diff options
author | Christian König <[email protected]> | 2010-10-28 20:24:56 +0200 |
---|---|---|
committer | Christian König <[email protected]> | 2010-10-28 20:24:56 +0200 |
commit | 41ed47d6b8fb6c032e2907ef2e49e414c26f35c1 (patch) | |
tree | 8cf267ee3ac5d8b530dd70a28a0d568344aa8304 /src/gallium/auxiliary | |
parent | 97a7cf230a70c64fff300931ae7c00aa00449c97 (diff) | |
parent | 5479fa34d9acebd55f68c23a278cf382d0e84248 (diff) |
Merge branch 'master' of ssh://git.freedesktop.org/git/mesa/mesa into pipe-video
Conflicts:
src/gallium/include/pipe/p_format.h
Diffstat (limited to 'src/gallium/auxiliary')
49 files changed, 2563 insertions, 1157 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 05096b12a86..49ff1653e0e 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -173,6 +173,7 @@ GALLIVM_SOURCES = \ gallivm/lp_bld_struct.c \ gallivm/lp_bld_swizzle.c \ gallivm/lp_bld_tgsi_aos.c \ + gallivm/lp_bld_tgsi_info.c \ gallivm/lp_bld_tgsi_soa.c \ gallivm/lp_bld_type.c \ draw/draw_llvm.c \ @@ -207,16 +208,16 @@ include ../Makefile.template indices/u_indices_gen.c: indices/u_indices_gen.py - python $< > $@ + $(PYTHON2) $< > $@ indices/u_unfilled_gen.c: indices/u_unfilled_gen.py - python $< > $@ + $(PYTHON2) $< > $@ util/u_format_srgb.c: util/u_format_srgb.py - python $< > $@ + $(PYTHON2) $< > $@ util/u_format_table.c: util/u_format_table.py util/u_format_pack.py util/u_format_parse.py util/u_format.csv - python util/u_format_table.py util/u_format.csv > $@ + $(PYTHON2) util/u_format_table.py util/u_format.csv > $@ util/u_half.c: util/u_half.py - python util/u_half.py > $@ + $(PYTHON2) util/u_half.py > $@ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index a18f7c0b2a3..f22c8b96123 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -225,6 +225,7 @@ if env['llvm']: 'gallivm/lp_bld_struct.c', 'gallivm/lp_bld_swizzle.c', 'gallivm/lp_bld_tgsi_aos.c', + 'gallivm/lp_bld_tgsi_info.c', 'gallivm/lp_bld_tgsi_soa.c', 'gallivm/lp_bld_type.c', 'draw/draw_llvm.c', diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 032fcbbc70a..39d82f32892 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -335,6 +335,7 @@ draw_set_mapped_constant_buffer(struct draw_context *draw, case PIPE_SHADER_VERTEX: draw->pt.user.vs_constants[slot] = buffer; draw->pt.user.vs_constants_size[slot] = size; + draw->pt.user.planes = (float (*) [12][4]) &(draw->plane[0]); draw_vs_set_constants(draw, slot, buffer, size); break; case PIPE_SHADER_GEOMETRY: @@ -721,9 +722,9 @@ draw_set_mapped_texture(struct draw_context *draw, unsigned sampler_idx, uint32_t width, uint32_t height, uint32_t depth, uint32_t last_level, - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], - const void *data[DRAW_MAX_TEXTURE_LEVELS]) + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], + const void *data[PIPE_MAX_TEXTURE_LEVELS]) { #ifdef HAVE_LLVM if(draw->llvm) diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 1f27cbf488a..ff4f753604f 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -49,7 +49,6 @@ struct draw_geometry_shader; struct draw_fragment_shader; struct tgsi_sampler; -#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */ struct draw_context *draw_create( struct pipe_context *pipe ); @@ -120,9 +119,9 @@ draw_set_mapped_texture(struct draw_context *draw, unsigned sampler_idx, uint32_t width, uint32_t height, uint32_t depth, uint32_t last_level, - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], - const void *data[DRAW_MAX_TEXTURE_LEVELS]); + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], + const void *data[PIPE_MAX_TEXTURE_LEVELS]); /* diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 7fb86d7cb27..140e596f994 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -31,6 +31,9 @@ #include "draw_vs.h" #include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_logic.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_swizzle.h" #include "gallivm/lp_bld_struct.h" #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_flow.h" @@ -43,7 +46,6 @@ #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_dump.h" -#include "util/u_cpu_detect.h" #include "util/u_math.h" #include "util/u_pointer.h" #include "util/u_string.h" @@ -72,12 +74,12 @@ init_globals(struct draw_llvm *llvm) elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type(); elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type(); elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] = - LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS); + LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS); elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] = - LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS); + LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS); elem_types[DRAW_JIT_TEXTURE_DATA] = LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0), - DRAW_MAX_TEXTURE_LEVELS); + PIPE_MAX_TEXTURE_LEVELS); elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = LLVMFloatType(); elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = LLVMFloatType(); elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType(); @@ -128,12 +130,14 @@ init_globals(struct draw_llvm *llvm) /* struct draw_jit_context */ { - LLVMTypeRef elem_types[3]; + LLVMTypeRef elem_types[5]; LLVMTypeRef context_type; elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ - elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ - elem_types[2] = LLVMArrayType(texture_type, + elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* gs_constants */ + elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(LLVMFloatType(), 4), 12), 0); /* planes */ + elem_types[3] = LLVMPointerType(LLVMFloatType(), 0); /* viewport */ + elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_VERTEX_SAMPLERS); /* textures */ context_type = LLVMStructType(elem_types, Elements(elem_types), 0); @@ -142,6 +146,8 @@ init_globals(struct draw_llvm *llvm) llvm->target, context_type, 0); LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants, llvm->target, context_type, 1); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes, + llvm->target, context_type, 2); LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures, llvm->target, context_type, DRAW_JIT_CTX_TEXTURES); @@ -267,13 +273,7 @@ draw_llvm_create(struct draw_context *draw) LLVMAddConstantPropagationPass(llvm->pass); } - if(util_cpu_caps.has_sse4_1) { - /* FIXME: There is a bug in this pass, whereby the combination of fptosi - * and sitofp (necessary for trunc/floor/ceil/round implementation) - * somehow becomes invalid code. - */ - LLVMAddInstructionCombiningPass(llvm->pass); - } + LLVMAddInstructionCombiningPass(llvm->pass); LLVMAddGVNPass(llvm->pass); } else { /* We need at least this pass to prevent the backends to fail in @@ -421,7 +421,7 @@ generate_fetch(LLVMBuilderRef builder, "instance_divisor"); } - /* limit index to min(inex, vb_max_index) */ + /* limit index to min(index, vb_max_index) */ cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, ""); index = LLVMBuildSelect(builder, cond, index, vb_max_index, ""); @@ -550,19 +550,28 @@ static void store_aos(LLVMBuilderRef builder, LLVMValueRef io_ptr, LLVMValueRef index, - LLVMValueRef value) + LLVMValueRef value, + LLVMValueRef clipmask) { LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr); LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr); LLVMValueRef indices[3]; + LLVMValueRef val, shift; indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); indices[1] = index; indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0); - /* undefined vertex */ - LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), - 0xffff, 0), id_ptr); + /* initialize vertex id:16 = 0xffff, pad:3 = 0, edgeflag:1 = 1 */ + val = LLVMConstInt(LLVMInt32Type(), 0xffff1, 0); + shift = LLVMConstInt(LLVMInt32Type(), 12, 0); + val = LLVMBuildShl(builder, val, shift, ""); + /* add clipmask:12 */ + val = LLVMBuildOr(builder, val, clipmask, ""); + + /* store vertex header */ + LLVMBuildStore(builder, val, id_ptr); + #if DEBUG_STORE lp_build_printf(builder, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr); @@ -617,7 +626,8 @@ store_aos_array(LLVMBuilderRef builder, LLVMValueRef io_ptr, LLVMValueRef aos[NUM_CHANNELS], int attrib, - int num_outputs) + int num_outputs, + LLVMValueRef clipmask) { LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0); LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); @@ -625,7 +635,8 @@ store_aos_array(LLVMBuilderRef builder, LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0); LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0); LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; - + LLVMValueRef clipmask0, clipmask1, clipmask2, clipmask3; + debug_assert(NUM_CHANNELS == 4); io0_ptr = LLVMBuildGEP(builder, io_ptr, @@ -637,21 +648,31 @@ store_aos_array(LLVMBuilderRef builder, io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, ""); + clipmask0 = LLVMBuildExtractElement(builder, clipmask, + ind0, ""); + clipmask1 = LLVMBuildExtractElement(builder, clipmask, + ind1, ""); + clipmask2 = LLVMBuildExtractElement(builder, clipmask, + ind2, ""); + clipmask3 = LLVMBuildExtractElement(builder, clipmask, + ind3, ""); + #if DEBUG_STORE - lp_build_printf(builder, " io = %p, indexes[%d, %d, %d, %d]\n", - io_ptr, ind0, ind1, ind2, ind3); + lp_build_printf(builder, "io = %p, indexes[%d, %d, %d, %d]\n, clipmask0 = %x, clipmask1 = %x, clipmask2 = %x, clipmask3 = %x\n", + io_ptr, ind0, ind1, ind2, ind3, clipmask0, clipmask1, clipmask2, clipmask3); #endif - - store_aos(builder, io0_ptr, attr_index, aos[0]); - store_aos(builder, io1_ptr, attr_index, aos[1]); - store_aos(builder, io2_ptr, attr_index, aos[2]); - store_aos(builder, io3_ptr, attr_index, aos[3]); + /* store for each of the 4 vertices */ + store_aos(builder, io0_ptr, attr_index, aos[0], clipmask0); + store_aos(builder, io1_ptr, attr_index, aos[1], clipmask1); + store_aos(builder, io2_ptr, attr_index, aos[2], clipmask2); + store_aos(builder, io3_ptr, attr_index, aos[3], clipmask3); } static void convert_to_aos(LLVMBuilderRef builder, LLVMValueRef io, LLVMValueRef (*outputs)[NUM_CHANNELS], + LLVMValueRef clipmask, int num_outputs, int max_vertices) { @@ -680,13 +701,305 @@ convert_to_aos(LLVMBuilderRef builder, io, aos, attrib, - num_outputs); + num_outputs, + clipmask); } #if DEBUG_STORE lp_build_printf(builder, " # storing end\n"); #endif } +/* + * Stores original vertex positions in clip coordinates + * There is probably a more efficient way to do this, 4 floats at once + * rather than extracting each element one by one. + */ +static void +store_clip(LLVMBuilderRef builder, + LLVMValueRef io_ptr, + LLVMValueRef (*outputs)[NUM_CHANNELS]) +{ + LLVMValueRef out[4]; + LLVMValueRef indices[2]; + LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; + LLVMValueRef clip_ptr0, clip_ptr1, clip_ptr2, clip_ptr3; + LLVMValueRef clip0_ptr, clip1_ptr, clip2_ptr, clip3_ptr; + LLVMValueRef out0elem, out1elem, out2elem, out3elem; + int i; + + LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0); + LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0); + LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[1] = LLVMConstInt(LLVMInt32Type(), 0, 0); + + out[0] = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/ + out[1] = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/ + out[2] = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/ + out[3] = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ + + io0_ptr = LLVMBuildGEP(builder, io_ptr, &ind0, 1, ""); + io1_ptr = LLVMBuildGEP(builder, io_ptr, &ind1, 1, ""); + io2_ptr = LLVMBuildGEP(builder, io_ptr, &ind2, 1, ""); + io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, ""); + + clip_ptr0 = draw_jit_header_clip(builder, io0_ptr); + clip_ptr1 = draw_jit_header_clip(builder, io1_ptr); + clip_ptr2 = draw_jit_header_clip(builder, io2_ptr); + clip_ptr3 = draw_jit_header_clip(builder, io3_ptr); + + for (i = 0; i<4; i++){ + clip0_ptr = LLVMBuildGEP(builder, clip_ptr0, + indices, 2, ""); //x0 + clip1_ptr = LLVMBuildGEP(builder, clip_ptr1, + indices, 2, ""); //x1 + clip2_ptr = LLVMBuildGEP(builder, clip_ptr2, + indices, 2, ""); //x2 + clip3_ptr = LLVMBuildGEP(builder, clip_ptr3, + indices, 2, ""); //x3 + + out0elem = LLVMBuildExtractElement(builder, out[i], + ind0, ""); //x0 + out1elem = LLVMBuildExtractElement(builder, out[i], + ind1, ""); //x1 + out2elem = LLVMBuildExtractElement(builder, out[i], + ind2, ""); //x2 + out3elem = LLVMBuildExtractElement(builder, out[i], + ind3, ""); //x3 + + LLVMBuildStore(builder, out0elem, clip0_ptr); + LLVMBuildStore(builder, out1elem, clip1_ptr); + LLVMBuildStore(builder, out2elem, clip2_ptr); + LLVMBuildStore(builder, out3elem, clip3_ptr); + + indices[1]= LLVMBuildAdd(builder, indices[1], ind1, ""); + } + +} + +/* Equivalent of _mm_set1_ps(a) + */ +static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld, + LLVMValueRef a, + const char *name) +{ + LLVMValueRef res = LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)); + int i; + + for(i = 0; i < 4; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : ""); + } + + return res; +} + +/* + * Transforms the outputs for viewport mapping + */ +static void +generate_viewport(struct draw_llvm *llvm, + LLVMBuilderRef builder, + LLVMValueRef (*outputs)[NUM_CHANNELS], + LLVMValueRef context_ptr) +{ + int i; + struct lp_type f32_type = lp_type_float_vec(32); + LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ + LLVMValueRef const1 = lp_build_const_vec(f32_type, 1.0); /*1.0 1.0 1.0 1.0*/ + LLVMValueRef vp_ptr = draw_jit_context_viewport(builder, context_ptr); + + /* for 1/w convention*/ + out3 = LLVMBuildFDiv(builder, const1, out3, ""); + LLVMBuildStore(builder, out3, outputs[0][3]); + + /* Viewport Mapping */ + for (i=0; i<3; i++){ + LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 x2 x3*/ + LLVMValueRef scale; + LLVMValueRef trans; + LLVMValueRef scale_i; + LLVMValueRef trans_i; + LLVMValueRef index; + + index = LLVMConstInt(LLVMInt32Type(), i, 0); + scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, ""); + + index = LLVMConstInt(LLVMInt32Type(), i+4, 0); + trans_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, ""); + + scale = vec4f_from_scalar(builder, LLVMBuildLoad(builder, scale_i, ""), "scale"); + trans = vec4f_from_scalar(builder, LLVMBuildLoad(builder, trans_i, ""), "trans"); + + /* divide by w */ + out = LLVMBuildFMul(builder, out, out3, ""); + /* mult by scale */ + out = LLVMBuildFMul(builder, out, scale, ""); + /* add translation */ + out = LLVMBuildFAdd(builder, out, trans, ""); + + /* store transformed outputs */ + LLVMBuildStore(builder, out, outputs[0][i]); + } + +} + + +/* + * Returns clipmask as 4xi32 bitmask for the 4 vertices + */ +static LLVMValueRef +generate_clipmask(LLVMBuilderRef builder, + LLVMValueRef (*outputs)[NUM_CHANNELS], + boolean clip_xy, + boolean clip_z, + boolean clip_user, + boolean clip_halfz, + unsigned nr, + LLVMValueRef context_ptr) +{ + LLVMValueRef mask; /* stores the <4xi32> clipmasks */ + LLVMValueRef test, temp; + LLVMValueRef zero, shift; + LLVMValueRef pos_x, pos_y, pos_z, pos_w; + LLVMValueRef plane1, planes, plane_ptr, sum; + + unsigned i; + + struct lp_type f32_type = lp_type_float_vec(32); + + mask = lp_build_const_int_vec(lp_type_int_vec(32), 0); + temp = lp_build_const_int_vec(lp_type_int_vec(32), 0); + zero = lp_build_const_vec(f32_type, 0); /* 0.0f 0.0f 0.0f 0.0f */ + shift = lp_build_const_int_vec(lp_type_int_vec(32), 1); /* 1 1 1 1 */ + + /* Assuming position stored at output[0] */ + pos_x = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/ + pos_y = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/ + pos_z = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/ + pos_w = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ + + /* Cliptest, for hardwired planes */ + if (clip_xy){ + /* plane 1 */ + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w); + temp = shift; + test = LLVMBuildAnd(builder, test, temp, ""); + mask = test; + + /* plane 2 */ + test = LLVMBuildFAdd(builder, pos_x, pos_w, ""); + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + + /* plane 3 */ + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + + /* plane 4 */ + test = LLVMBuildFAdd(builder, pos_y, pos_w, ""); + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + } + + if (clip_z){ + temp = lp_build_const_int_vec(lp_type_int_vec(32), 16); + if (clip_halfz){ + /* plane 5 */ + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, pos_z); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + } + else{ + /* plane 5 */ + test = LLVMBuildFAdd(builder, pos_z, pos_w, ""); + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + } + /* plane 6 */ + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + } + + if (clip_user){ + LLVMValueRef planes_ptr = draw_jit_context_planes(builder, context_ptr); + LLVMValueRef indices[3]; + temp = lp_build_const_int_vec(lp_type_int_vec(32), 32); + + /* userclip planes */ + for (i = 6; i < nr; i++) { + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[1] = LLVMConstInt(LLVMInt32Type(), i, 0); + + indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0); + plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); + plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x"); + planes = vec4f_from_scalar(builder, plane1, "plane4_x"); + sum = LLVMBuildFMul(builder, planes, pos_x, ""); + + indices[2] = LLVMConstInt(LLVMInt32Type(), 1, 0); + plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); + plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y"); + planes = vec4f_from_scalar(builder, plane1, "plane4_y"); + test = LLVMBuildFMul(builder, planes, pos_y, ""); + sum = LLVMBuildFAdd(builder, sum, test, ""); + + indices[2] = LLVMConstInt(LLVMInt32Type(), 2, 0); + plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); + plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z"); + planes = vec4f_from_scalar(builder, plane1, "plane4_z"); + test = LLVMBuildFMul(builder, planes, pos_z, ""); + sum = LLVMBuildFAdd(builder, sum, test, ""); + + indices[2] = LLVMConstInt(LLVMInt32Type(), 3, 0); + plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); + plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w"); + planes = vec4f_from_scalar(builder, plane1, "plane4_w"); + test = LLVMBuildFMul(builder, planes, pos_w, ""); + sum = LLVMBuildFAdd(builder, sum, test, ""); + + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, sum); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + } + } + return mask; +} + +/* + * Returns boolean if any clipping has occurred + * Used zero/non-zero i32 value to represent boolean + */ +static void +clipmask_bool(LLVMBuilderRef builder, + LLVMValueRef clipmask, + LLVMValueRef ret_ptr) +{ + LLVMValueRef ret = LLVMBuildLoad(builder, ret_ptr, ""); + LLVMValueRef temp; + int i; + + for (i=0; i<4; i++){ + temp = LLVMBuildExtractElement(builder, clipmask, + LLVMConstInt(LLVMInt32Type(), i, 0) , ""); + ret = LLVMBuildOr(builder, ret, temp, ""); + } + + LLVMBuildStore(builder, ret, ret_ptr); +} + static void draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) { @@ -706,7 +1019,12 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; void *code; struct lp_build_sampler_soa *sampler = 0; - + LLVMValueRef ret, ret_ptr; + boolean bypass_viewport = variant->key.bypass_viewport; + boolean enable_cliptest = variant->key.clip_xy || + variant->key.clip_z || + variant->key.clip_user; + arg_types[0] = llvm->context_ptr_type; /* context */ arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ @@ -716,7 +1034,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ arg_types[7] = LLVMInt32Type(); /* instance_id */ - func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0); variant->function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type); LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); @@ -756,6 +1074,10 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); + /* function will return non-zero i32 value if any clipped vertices */ + ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), ""); + LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr); + /* code generated texture sampling */ sampler = draw_llvm_sampler_soa_create( draw_llvm_variant_key_samplers(&variant->key), @@ -770,6 +1092,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; LLVMValueRef io; + LLVMValueRef clipmask; /* holds the clipmask value */ const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; io_itr = LLVMBuildSub(builder, lp_loop.counter, start, ""); @@ -806,10 +1129,37 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) context_ptr, sampler); - convert_to_aos(builder, io, outputs, + /* store original positions in clip before further manipulation */ + store_clip(builder, io, outputs); + + /* do cliptest */ + if (enable_cliptest){ + /* allocate clipmask, assign it integer type */ + clipmask = generate_clipmask(builder, outputs, + variant->key.clip_xy, + variant->key.clip_z, + variant->key.clip_user, + variant->key.clip_halfz, + variant->key.nr_planes, + context_ptr); + /* return clipping boolean value for function */ + clipmask_bool(builder, clipmask, ret_ptr); + } + else{ + clipmask = lp_build_const_int_vec(lp_type_int_vec(32), 0); + } + + /* do viewport mapping */ + if (!bypass_viewport){ + generate_viewport(llvm, builder, outputs, context_ptr); + } + + /* store clipmask in vertex header and positions in data */ + convert_to_aos(builder, io, outputs, clipmask, draw->vs.vertex_shader->info.num_outputs, max_vertices); } + lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop); sampler->destroy(sampler); @@ -819,8 +1169,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); #endif - LLVMBuildRetVoid(builder); - + ret = LLVMBuildLoad(builder, ret_ptr,""); + LLVMBuildRet(builder, ret); + LLVMDisposeBuilder(builder); /* @@ -870,7 +1221,12 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian LLVMValueRef fetch_max; void *code; struct lp_build_sampler_soa *sampler = 0; - + LLVMValueRef ret, ret_ptr; + boolean bypass_viewport = variant->key.bypass_viewport; + boolean enable_cliptest = variant->key.clip_xy || + variant->key.clip_z || + variant->key.clip_user; + arg_types[0] = llvm->context_ptr_type; /* context */ arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ @@ -880,10 +1236,9 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ arg_types[7] = LLVMInt32Type(); /* instance_id */ - func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0); - variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", - func_type); + variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type); LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv); for(i = 0; i < Elements(arg_types); ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) @@ -929,11 +1284,16 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian LLVMConstInt(LLVMInt32Type(), 1, 0), "fetch_max"); + /* function returns non-zero i32 value if any clipped vertices */ + ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), ""); + LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr); + lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop); { LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; LLVMValueRef io; + LLVMValueRef clipmask; /* holds the clipmask value */ const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; io_itr = lp_loop.counter; @@ -980,10 +1340,40 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian context_ptr, sampler); - convert_to_aos(builder, io, outputs, + /* store original positions in clip before further manipulation */ + store_clip(builder, io, outputs); + + /* do cliptest */ + if (enable_cliptest){ + /* allocate clipmask, assign it integer type */ + clipmask = generate_clipmask(builder, outputs, + variant->key.clip_xy, + variant->key.clip_z, + variant->key.clip_user, + variant->key.clip_halfz, + variant->key.nr_planes, + context_ptr); + /* return clipping boolean value for function */ + clipmask_bool(builder, clipmask, ret_ptr); + } + else{ + clipmask = lp_build_const_int_vec(lp_type_int_vec(32), 0); + } + + /* do viewport mapping */ + if (!bypass_viewport){ + generate_viewport(llvm, builder, outputs, context_ptr); + } + + /* store clipmask in vertex header, + * original positions in clip + * and transformed positions in data + */ + convert_to_aos(builder, io, outputs, clipmask, draw->vs.vertex_shader->info.num_outputs, max_vertices); } + lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop); sampler->destroy(sampler); @@ -993,8 +1383,9 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); #endif - LLVMBuildRetVoid(builder); - + ret = LLVMBuildLoad(builder, ret_ptr,""); + LLVMBuildRet(builder, ret); + LLVMDisposeBuilder(builder); /* @@ -1038,6 +1429,16 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store) */ key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements; + /* will have to rig this up properly later */ + key->clip_xy = llvm->draw->clip_xy; + key->clip_z = llvm->draw->clip_z; + key->clip_user = llvm->draw->clip_user; + key->bypass_viewport = llvm->draw->identity_viewport; + key->clip_halfz = !llvm->draw->rasterizer->gl_rasterization_rules; + key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE); + key->nr_planes = llvm->draw->nr_planes; + key->pad = 0; + /* All variants of this shader will have the same value for * nr_samplers. Not yet trying to compact away holes in the * sampler array. @@ -1066,9 +1467,9 @@ draw_llvm_set_mapped_texture(struct draw_context *draw, unsigned sampler_idx, uint32_t width, uint32_t height, uint32_t depth, uint32_t last_level, - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], - const void *data[DRAW_MAX_TEXTURE_LEVELS]) + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], + const void *data[PIPE_MAX_TEXTURE_LEVELS]) { unsigned j; struct draw_jit_texture *jit_tex; diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index d0a68ae412d..c3c30c07c64 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -41,7 +41,6 @@ #include <llvm-c/Target.h> #include <llvm-c/ExecutionEngine.h> -#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */ struct draw_llvm; struct llvm_vertex_shader; @@ -52,9 +51,9 @@ struct draw_jit_texture uint32_t height; uint32_t depth; uint32_t last_level; - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS]; - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS]; - const void *data[DRAW_MAX_TEXTURE_LEVELS]; + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS]; + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS]; + const void *data[PIPE_MAX_TEXTURE_LEVELS]; float min_lod; float max_lod; float lod_bias; @@ -97,7 +96,8 @@ struct draw_jit_context { const float *vs_constants; const float *gs_constants; - + float (*planes) [12][4]; + float *viewport; struct draw_jit_texture textures[PIPE_MAX_VERTEX_SAMPLERS]; }; @@ -109,18 +109,22 @@ struct draw_jit_context #define draw_jit_context_gs_constants(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, 1, "gs_constants") -#define DRAW_JIT_CTX_TEXTURES 2 +#define draw_jit_context_planes(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 2, "planes") -#define draw_jit_context_textures(_builder, _ptr) \ - lp_build_struct_get_ptr(_builder, _ptr, DRAW_JIT_CTX_TEXTURES, "textures") +#define draw_jit_context_viewport(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 3, "viewport") +#define DRAW_JIT_CTX_TEXTURES 4 +#define draw_jit_context_textures(_builder, _ptr) \ + lp_build_struct_get_ptr(_builder, _ptr, DRAW_JIT_CTX_TEXTURES, "textures") #define draw_jit_header_id(_builder, _ptr) \ lp_build_struct_get_ptr(_builder, _ptr, 0, "id") #define draw_jit_header_clip(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 1, "clip") + lp_build_struct_get_ptr(_builder, _ptr, 1, "clip") #define draw_jit_header_data(_builder, _ptr) \ lp_build_struct_get_ptr(_builder, _ptr, 2, "data") @@ -136,7 +140,7 @@ struct draw_jit_context lp_build_struct_get(_builder, _ptr, 2, "buffer_offset") -typedef void +typedef int (*draw_jit_vert_func)(struct draw_jit_context *context, struct vertex_header *io, const char *vbuffers[PIPE_MAX_ATTRIBS], @@ -147,7 +151,7 @@ typedef void unsigned instance_id); -typedef void +typedef int (*draw_jit_vert_func_elts)(struct draw_jit_context *context, struct vertex_header *io, const char *vbuffers[PIPE_MAX_ATTRIBS], @@ -159,8 +163,16 @@ typedef void struct draw_llvm_variant_key { - unsigned nr_vertex_elements:16; - unsigned nr_samplers:16; + unsigned nr_vertex_elements:8; + unsigned nr_samplers:8; + unsigned clip_xy:1; + unsigned clip_z:1; + unsigned clip_user:1; + unsigned clip_halfz:1; + unsigned bypass_viewport:1; + unsigned need_edgeflags:1; + unsigned nr_planes:4; + unsigned pad:6; /* Variable number of vertex elements: */ @@ -290,8 +302,8 @@ draw_llvm_set_mapped_texture(struct draw_context *draw, unsigned sampler_idx, uint32_t width, uint32_t height, uint32_t depth, uint32_t last_level, - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], - const void *data[DRAW_MAX_TEXTURE_LEVELS]); + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], + const void *data[PIPE_MAX_TEXTURE_LEVELS]); #endif diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index d417f825a0f..54163d7f9eb 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -169,6 +169,9 @@ struct draw_context unsigned vs_constants_size[PIPE_MAX_CONSTANT_BUFFERS]; const void *gs_constants[PIPE_MAX_CONSTANT_BUFFERS]; unsigned gs_constants_size[PIPE_MAX_CONSTANT_BUFFERS]; + + /* pointer to planes */ + float (*planes)[12][4]; } user; boolean test_fse; /* enable FSE even though its not correct (eg for softpipe) */ diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index f44bf2507c6..4078b2a07d0 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -287,6 +287,84 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count) } +/** Helper code for below */ +#define PRIM_RESTART_LOOP(elements) \ + do { \ + for (i = start; i < end; i++) { \ + if (elements[i] == info->restart_index) { \ + if (cur_count > 0) { \ + /* draw elts up to prev pos */ \ + draw_pt_arrays(draw, prim, cur_start, cur_count); \ + } \ + /* begin new prim at next elt */ \ + cur_start = i + 1; \ + cur_count = 0; \ + } \ + else { \ + cur_count++; \ + } \ + } \ + if (cur_count > 0) { \ + draw_pt_arrays(draw, prim, cur_start, cur_count); \ + } \ + } while (0) + + +/** + * For drawing prims with primitive restart enabled. + * Scan for restart indexes and draw the runs of elements/vertices between + * the restarts. + */ +static void +draw_pt_arrays_restart(struct draw_context *draw, + const struct pipe_draw_info *info) +{ + const unsigned prim = info->mode; + const unsigned start = info->start; + const unsigned count = info->count; + const unsigned end = start + count; + unsigned i, cur_start, cur_count; + + assert(info->primitive_restart); + + if (draw->pt.user.elts) { + /* indexed prims (draw_elements) */ + cur_start = start; + cur_count = 0; + + switch (draw->pt.user.eltSize) { + case 1: + { + const ubyte *elt_ub = (const ubyte *) draw->pt.user.elts; + PRIM_RESTART_LOOP(elt_ub); + } + break; + case 2: + { + const ushort *elt_us = (const ushort *) draw->pt.user.elts; + PRIM_RESTART_LOOP(elt_us); + } + break; + case 4: + { + const uint *elt_ui = (const uint *) draw->pt.user.elts; + PRIM_RESTART_LOOP(elt_ui); + } + break; + default: + assert(0 && "bad eltSize in draw_arrays()"); + } + } + else { + /* Non-indexed prims (draw_arrays). + * Primitive restart should have been handled in the state tracker. + */ + draw_pt_arrays(draw, prim, start, count); + } +} + + + /** * Non-instanced drawing. * \sa draw_arrays_instanced @@ -395,6 +473,12 @@ draw_vbo(struct draw_context *draw, for (instance = 0; instance < info->instance_count; instance++) { draw->instance_id = instance + info->start_instance; - draw_pt_arrays(draw, info->mode, info->start, info->count); + + if (info->primitive_restart) { + draw_pt_arrays_restart(draw, info); + } + else { + draw_pt_arrays(draw, info->mode, info->start, info->count); + } } } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index 77291e304e1..a53a768d029 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -175,6 +175,11 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, draw->pt.user.vs_constants[0]; fpme->llvm->jit_context.gs_constants = draw->pt.user.gs_constants[0]; + fpme->llvm->jit_context.planes = + (float (*) [12][4]) draw->pt.user.planes[0]; + fpme->llvm->jit_context.viewport = + (float *)draw->viewport.scale; + } @@ -217,6 +222,7 @@ llvm_pipeline_generic( struct draw_pt_middle_end *middle, struct draw_vertex_info gs_vert_info; struct draw_vertex_info *vert_info; unsigned opt = fpme->opt; + unsigned clipped = 0; llvm_vert_info.count = fetch_info->count; llvm_vert_info.vertex_size = fpme->vertex_size; @@ -230,7 +236,7 @@ llvm_pipeline_generic( struct draw_pt_middle_end *middle, } if (fetch_info->linear) - fpme->current_variant->jit_func( &fpme->llvm->jit_context, + clipped = fpme->current_variant->jit_func( &fpme->llvm->jit_context, llvm_vert_info.verts, (const char **)draw->pt.user.vbuffer, fetch_info->start, @@ -239,7 +245,7 @@ llvm_pipeline_generic( struct draw_pt_middle_end *middle, draw->pt.vertex_buffer, draw->instance_id); else - fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context, + clipped = fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context, llvm_vert_info.verts, (const char **)draw->pt.user.vbuffer, fetch_info->elts, @@ -266,6 +272,9 @@ llvm_pipeline_generic( struct draw_pt_middle_end *middle, FREE(vert_info->verts); vert_info = &gs_vert_info; prim_info = &gs_prim_info; + + clipped = draw_pt_post_vs_run( fpme->post_vs, vert_info ); + } /* stream output needs to be done before clipping */ @@ -273,11 +282,11 @@ llvm_pipeline_generic( struct draw_pt_middle_end *middle, vert_info, prim_info ); - if (draw_pt_post_vs_run( fpme->post_vs, vert_info )) { + if (clipped) { opt |= PT_PIPELINE; } - /* Do we need to run the pipeline? + /* Do we need to run the pipeline? Now will come here if clipped */ if (opt & PT_PIPELINE) { pipeline( fpme, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 64c468c14d4..f9a12a41a1b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -983,6 +983,12 @@ enum lp_build_round_sse41_mode }; +/** + * Helper for SSE4.1's ROUNDxx instructions. + * + * NOTE: In the SSE4.1's nearest mode, if two values are equally close, the + * result is the even value. That is, rounding 2.5 will be 2.0, and not 3.0. + */ static INLINE LLVMValueRef lp_build_round_sse41(struct lp_build_context *bld, LLVMValueRef a, @@ -1053,10 +1059,58 @@ lp_build_round_sse41(struct lp_build_context *bld, } +static INLINE LLVMValueRef +lp_build_iround_nearest_sse2(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMTypeRef ret_type = lp_build_int_vec_type(type); + const char *intrinsic; + LLVMValueRef res; + + assert(type.floating); + /* using the double precision conversions is a bit more complicated */ + assert(type.width == 32); + + assert(lp_check_value(type, a)); + assert(util_cpu_caps.has_sse2); + + /* This is relying on MXCSR rounding mode, which should always be nearest. */ + if (type.length == 1) { + LLVMTypeRef vec_type; + LLVMValueRef undef; + LLVMValueRef arg; + LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); + + vec_type = LLVMVectorType(bld->elem_type, 4); + + intrinsic = "llvm.x86.sse.cvtss2si"; + + undef = LLVMGetUndef(vec_type); + + arg = LLVMBuildInsertElement(bld->builder, undef, a, index0, ""); + + res = lp_build_intrinsic_unary(bld->builder, intrinsic, + ret_type, arg); + } + else { + assert(type.width*type.length == 128); + + intrinsic = "llvm.x86.sse2.cvtps2dq"; + + res = lp_build_intrinsic_unary(bld->builder, intrinsic, + ret_type, a); + } + + return res; +} + + /** - * Return the integer part of a float (vector) value. The returned value is - * a float (vector). - * Ex: trunc(-1.5) = 1.0 + * Return the integer part of a float (vector) value (== round toward zero). + * The returned value is a float (vector). + * Ex: trunc(-1.5) = -1.0 */ LLVMValueRef lp_build_trunc(struct lp_build_context *bld, @@ -1181,9 +1235,9 @@ lp_build_fract(struct lp_build_context *bld, /** - * Return the integer part of a float (vector) value. The returned value is - * an integer (vector). - * Ex: itrunc(-1.5) = 1 + * Return the integer part of a float (vector) value (== round toward zero). + * The returned value is an integer (vector). + * Ex: itrunc(-1.5) = -1 */ LLVMValueRef lp_build_itrunc(struct lp_build_context *bld, @@ -1217,7 +1271,11 @@ lp_build_iround(struct lp_build_context *bld, assert(lp_check_value(type, a)); - if (util_cpu_caps.has_sse4_1 && + if (util_cpu_caps.has_sse2 && + ((type.width == 32) && (type.length == 1 || type.length == 4))) { + return lp_build_iround_nearest_sse2(bld, a); + } + else if (util_cpu_caps.has_sse4_1 && (type.length == 1 || type.width*type.length == 128)) { res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); } @@ -1371,8 +1429,6 @@ lp_build_ifloor_fract(struct lp_build_context *bld, LLVMValueRef *out_ipart, LLVMValueRef *out_fpart) { - - const struct lp_type type = bld->type; LLVMValueRef ipart; @@ -2210,6 +2266,71 @@ lp_build_exp2(struct lp_build_context *bld, /** + * Extract the exponent of a IEEE-754 floating point value. + * + * Optionally apply an integer bias. + * + * Result is an integer value with + * + * ifloor(log2(x)) + bias + */ +LLVMValueRef +lp_build_extract_exponent(struct lp_build_context *bld, + LLVMValueRef x, + int bias) +{ + const struct lp_type type = bld->type; + unsigned mantissa = lp_mantissa(type); + LLVMValueRef res; + + assert(type.floating); + + assert(lp_check_value(bld->type, x)); + + x = LLVMBuildBitCast(bld->builder, x, bld->int_vec_type, ""); + + res = LLVMBuildLShr(bld->builder, x, lp_build_const_int_vec(type, mantissa), ""); + res = LLVMBuildAnd(bld->builder, res, lp_build_const_int_vec(type, 255), ""); + res = LLVMBuildSub(bld->builder, res, lp_build_const_int_vec(type, 127 - bias), ""); + + return res; +} + + +/** + * Extract the mantissa of the a floating. + * + * Result is a floating point value with + * + * x / floor(log2(x)) + */ +LLVMValueRef +lp_build_extract_mantissa(struct lp_build_context *bld, + LLVMValueRef x) +{ + const struct lp_type type = bld->type; + unsigned mantissa = lp_mantissa(type); + LLVMValueRef mantmask = lp_build_const_int_vec(type, (1ULL << mantissa) - 1); + LLVMValueRef one = LLVMConstBitCast(bld->one, bld->int_vec_type); + LLVMValueRef res; + + assert(lp_check_value(bld->type, x)); + + assert(type.floating); + + x = LLVMBuildBitCast(bld->builder, x, bld->int_vec_type, ""); + + /* res = x / 2**ipart */ + res = LLVMBuildAnd(bld->builder, x, mantmask, ""); + res = LLVMBuildOr(bld->builder, res, one, ""); + res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, ""); + + return res; +} + + + +/** * Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[ * These coefficients can be generate with * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html @@ -2333,7 +2454,10 @@ lp_build_log2(struct lp_build_context *bld, /** * Faster (and less accurate) log2. * - * log2(x) = floor(log2(x)) + frac(x) + * log2(x) = floor(log2(x)) - 1 + x / 2**floor(log2(x)) + * + * Piece-wise linear approximation, with exact results when x is a + * power of two. * * See http://www.flipcode.com/archives/Fast_log_Function.shtml */ @@ -2341,35 +2465,21 @@ LLVMValueRef lp_build_fast_log2(struct lp_build_context *bld, LLVMValueRef x) { - const struct lp_type type = bld->type; - LLVMTypeRef vec_type = bld->vec_type; - LLVMTypeRef int_vec_type = bld->int_vec_type; - - unsigned mantissa = lp_mantissa(type); - LLVMValueRef mantmask = lp_build_const_int_vec(type, (1ULL << mantissa) - 1); - LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type); - LLVMValueRef ipart; LLVMValueRef fpart; assert(lp_check_value(bld->type, x)); - assert(type.floating); - - x = LLVMBuildBitCast(bld->builder, x, int_vec_type, ""); + assert(bld->type.floating); /* ipart = floor(log2(x)) - 1 */ - ipart = LLVMBuildLShr(bld->builder, x, lp_build_const_int_vec(type, mantissa), ""); - ipart = LLVMBuildAnd(bld->builder, ipart, lp_build_const_int_vec(type, 255), ""); - ipart = LLVMBuildSub(bld->builder, ipart, lp_build_const_int_vec(type, 128), ""); - ipart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, ""); + ipart = lp_build_extract_exponent(bld, x, -1); + ipart = LLVMBuildSIToFP(bld->builder, ipart, bld->vec_type, ""); - /* fpart = 1.0 + frac(x) */ - fpart = LLVMBuildAnd(bld->builder, x, mantmask, ""); - fpart = LLVMBuildOr(bld->builder, fpart, one, ""); - fpart = LLVMBuildBitCast(bld->builder, fpart, vec_type, ""); + /* fpart = x / 2**ipart */ + fpart = lp_build_extract_mantissa(bld, x); - /* floor(log2(x)) + frac(x) */ + /* ipart + fpart */ return LLVMBuildFAdd(bld->builder, ipart, fpart, ""); } @@ -2383,27 +2493,18 @@ LLVMValueRef lp_build_ilog2(struct lp_build_context *bld, LLVMValueRef x) { - const struct lp_type type = bld->type; - LLVMTypeRef int_vec_type = bld->int_vec_type; - - unsigned mantissa = lp_mantissa(type); - LLVMValueRef sqrt2 = lp_build_const_vec(type, 1.4142135623730951); - + LLVMValueRef sqrt2 = lp_build_const_vec(bld->type, M_SQRT2); LLVMValueRef ipart; - assert(lp_check_value(bld->type, x)); + assert(bld->type.floating); - assert(type.floating); + assert(lp_check_value(bld->type, x)); /* x * 2^(0.5) i.e., add 0.5 to the log2(x) */ x = LLVMBuildFMul(bld->builder, x, sqrt2, ""); - x = LLVMBuildBitCast(bld->builder, x, int_vec_type, ""); - /* ipart = floor(log2(x) + 0.5) */ - ipart = LLVMBuildLShr(bld->builder, x, lp_build_const_int_vec(type, mantissa), ""); - ipart = LLVMBuildAnd(bld->builder, ipart, lp_build_const_int_vec(type, 255), ""); - ipart = LLVMBuildSub(bld->builder, ipart, lp_build_const_int_vec(type, 127), ""); + ipart = lp_build_extract_exponent(bld, x, 0); return ipart; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index 8424384f8f7..c78b61decf0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -215,6 +215,15 @@ lp_build_exp2(struct lp_build_context *bld, LLVMValueRef a); LLVMValueRef +lp_build_extract_exponent(struct lp_build_context *bld, + LLVMValueRef x, + int bias); + +LLVMValueRef +lp_build_extract_mantissa(struct lp_build_context *bld, + LLVMValueRef x); + +LLVMValueRef lp_build_log2(struct lp_build_context *bld, LLVMValueRef a); @@ -226,7 +235,6 @@ LLVMValueRef lp_build_ilog2(struct lp_build_context *bld, LLVMValueRef x); - void lp_build_exp2_approx(struct lp_build_context *bld, LLVMValueRef x, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c index 127b13bc286..6967dd26225 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c @@ -97,58 +97,104 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type); LLVMValueRef res; unsigned mantissa; - unsigned n; - unsigned long long ubound; - unsigned long long mask; - double scale; - double bias; assert(src_type.floating); + assert(dst_width <= src_type.width); + src_type.sign = FALSE; mantissa = lp_mantissa(src_type); - /* We cannot carry more bits than the mantissa */ - n = MIN2(mantissa, dst_width); + if (dst_width <= mantissa) { + /* + * Apply magic coefficients that will make the desired result to appear + * in the lowest significant bits of the mantissa, with correct rounding. + * + * This only works if the destination width fits in the mantissa. + */ - /* This magic coefficients will make the desired result to appear in the - * lowest significant bits of the mantissa. - */ - ubound = ((unsigned long long)1 << n); - mask = ubound - 1; - scale = (double)mask/ubound; - bias = (double)((unsigned long long)1 << (mantissa - n)); + unsigned long long ubound; + unsigned long long mask; + double scale; + double bias; - res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), ""); - res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), ""); - res = LLVMBuildBitCast(builder, res, int_vec_type, ""); + ubound = (1ULL << dst_width); + mask = ubound - 1; + scale = (double)mask/ubound; + bias = (double)(1ULL << (mantissa - dst_width)); - if(dst_width > n) { - int shift = dst_width - n; - res = LLVMBuildShl(builder, res, lp_build_const_int_vec(src_type, shift), ""); + res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), ""); + res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), ""); + res = LLVMBuildBitCast(builder, res, int_vec_type, ""); + res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(src_type, mask), ""); + } + else if (dst_width == (mantissa + 1)) { + /* + * The destination width matches exactly what can be represented in + * floating point (i.e., mantissa + 1 bits). So do a straight + * multiplication followed by casting. No further rounding is necessary. + */ + + double scale; + + scale = (double)((1ULL << dst_width) - 1); - /* TODO: Fill in the empty lower bits for additional precision? */ - /* YES: this fixes progs/trivial/tri-z-eq.c. - * Otherwise vertex Z=1.0 values get converted to something like - * 0xfffffb00 and the test for equality with 0xffffffff fails. + res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), ""); + res = LLVMBuildFPToSI(builder, res, int_vec_type, ""); + } + else { + /* + * The destination exceeds what can be represented in the floating point. + * So multiply by the largest power two we get away with, and when + * subtract the most significant bit to rescale to normalized values. + * + * The largest power of two factor we can get away is + * (1 << (src_type.width - 1)), because we need to use signed . In theory it + * should be (1 << (src_type.width - 2)), but IEEE 754 rules states + * INT_MIN should be returned in FPToSI, which is the correct result for + * values near 1.0! + * + * This means we get (src_type.width - 1) correct bits for values near 0.0, + * and (mantissa + 1) correct bits for values near 1.0. Equally or more + * important, we also get exact results for 0.0 and 1.0. */ -#if 0 - { - LLVMValueRef msb; - msb = LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, dst_width - 1), ""); - msb = LLVMBuildShl(builder, msb, lp_build_const_int_vec(src_type, shift), ""); - msb = LLVMBuildSub(builder, msb, lp_build_const_int_vec(src_type, 1), ""); - res = LLVMBuildOr(builder, res, msb, ""); - } -#elif 0 - while(shift > 0) { - res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, n), ""), ""); - shift -= n; - n *= 2; + + unsigned n = MIN2(src_type.width - 1, dst_width); + + double scale = (double)(1ULL << n); + unsigned lshift = dst_width - n; + unsigned rshift = n; + LLVMValueRef lshifted; + LLVMValueRef rshifted; + + res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), ""); + res = LLVMBuildFPToSI(builder, res, int_vec_type, ""); + + /* + * Align the most significant bit to its final place. + * + * This will cause 1.0 to overflow to 0, but the later adjustment will + * get it right. + */ + if (lshift) { + lshifted = LLVMBuildShl(builder, res, + lp_build_const_int_vec(src_type, lshift), ""); + } else { + lshifted = res; } -#endif + + /* + * Align the most significant bit to the right. + */ + rshifted = LLVMBuildAShr(builder, res, + lp_build_const_int_vec(src_type, rshift), ""); + + /* + * Subtract the MSB to the LSB, therefore re-scaling from + * (1 << dst_width) to ((1 << dst_width) - 1). + */ + + res = LLVMBuildSub(builder, lshifted, rshifted, ""); } - else - res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(src_type, mask), ""); return res; } @@ -178,6 +224,16 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, assert(dst_type.floating); + /* Special-case int8->float, though most cases could be handled + * this way: + */ + if (src_width == 8) { + scale = 1.0/255.0; + res = LLVMBuildSIToFP(builder, src, vec_type, ""); + res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), ""); + return res; + } + mantissa = lp_mantissa(dst_type); n = MIN2(mantissa, src_width); @@ -257,7 +313,9 @@ lp_build_conv(LLVMBuilderRef builder, dst_type.sign == 0 && dst_type.norm == 1 && dst_type.width == 8 && - dst_type.length == 16) + dst_type.length == 16 && + + util_cpu_caps.has_sse2) { int i; @@ -296,23 +354,7 @@ lp_build_conv(LLVMBuilderRef builder, c = LLVMBuildFMul(builder, src[2], const_255f, ""); d = LLVMBuildFMul(builder, src[3], const_255f, ""); - /* lp_build_round generates excessively general code without - * sse4, so do rounding manually. - */ - if (!util_cpu_caps.has_sse4_1) { - LLVMValueRef const_half = lp_build_const_vec(src_type, 0.5f); - - a = LLVMBuildFAdd(builder, a, const_half, ""); - b = LLVMBuildFAdd(builder, b, const_half, ""); - c = LLVMBuildFAdd(builder, c, const_half, ""); - d = LLVMBuildFAdd(builder, d, const_half, ""); - - src_int0 = LLVMBuildFPToSI(builder, a, int32_vec_type, ""); - src_int1 = LLVMBuildFPToSI(builder, b, int32_vec_type, ""); - src_int2 = LLVMBuildFPToSI(builder, c, int32_vec_type, ""); - src_int3 = LLVMBuildFPToSI(builder, d, int32_vec_type, ""); - } - else { + { struct lp_build_context bld; bld.builder = builder; @@ -323,13 +365,13 @@ lp_build_conv(LLVMBuilderRef builder, bld.undef = lp_build_undef(src_type); bld.zero = lp_build_zero(src_type); bld.one = lp_build_one(src_type); - + src_int0 = lp_build_iround(&bld, a); src_int1 = lp_build_iround(&bld, b); src_int2 = lp_build_iround(&bld, c); src_int3 = lp_build_iround(&bld, d); } - + /* relying on clamping behavior of sse2 intrinsics here */ lo = lp_build_pack2(builder, int32_type, int16_type, src_int0, src_int1); hi = lp_build_pack2(builder, int32_type, int16_type, src_int2, src_int3); dst[i] = lp_build_pack2(builder, int16_type, dst_type, lo, hi); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.c b/src/gallium/auxiliary/gallivm/lp_bld_debug.c index d3a5afff8c2..93e56553d7b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.c @@ -57,6 +57,8 @@ lp_disassemble(const void* func) #ifdef HAVE_UDIS86 ud_t ud_obj; uint64_t max_jmp_pc; + uint inst_no; + boolean emit_addrs = TRUE, emit_line_nos = FALSE; ud_init(&ud_obj); @@ -76,13 +78,18 @@ lp_disassemble(const void* func) while (ud_disassemble(&ud_obj)) { + if (emit_addrs) { #ifdef PIPE_ARCH_X86 - debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj)); + debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj)); #endif #ifdef PIPE_ARCH_X86_64 - debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj)); + debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj)); #endif - + } + else if (emit_line_nos) { + debug_printf("%6d:\t", inst_no); + inst_no++; + } #if 0 debug_printf("%-16s ", ud_insn_hex(&ud_obj)); #endif @@ -115,8 +122,10 @@ lp_disassemble(const void* func) } } - if ((ud_insn_off(&ud_obj) >= max_jmp_pc && ud_obj.mnemonic == UD_Iret) || - ud_obj.mnemonic == UD_Iinvalid) + if (ud_obj.mnemonic == UD_Iinvalid || + (ud_insn_off(&ud_obj) >= max_jmp_pc && + (ud_obj.mnemonic == UD_Iret || + ud_obj.mnemonic == UD_Ijmp))) break; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h index 369c1bbf09a..eb11dcd4ef4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h @@ -36,11 +36,12 @@ #include "util/u_string.h" -#define GALLIVM_DEBUG_TGSI 0x1 -#define GALLIVM_DEBUG_IR 0x2 -#define GALLIVM_DEBUG_ASM 0x4 -#define GALLIVM_DEBUG_NO_OPT 0x8 -#define GALLIVM_DEBUG_PERF 0x10 +#define GALLIVM_DEBUG_TGSI (1 << 0) +#define GALLIVM_DEBUG_IR (1 << 1) +#define GALLIVM_DEBUG_ASM (1 << 2) +#define GALLIVM_DEBUG_NO_OPT (1 << 3) +#define GALLIVM_DEBUG_PERF (1 << 4) +#define GALLIVM_DEBUG_NO_BRILINEAR (1 << 5) #ifdef DEBUG diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c index 5bc9c741a88..a2cee199a01 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c @@ -38,273 +38,15 @@ #include "lp_bld_flow.h" -#define LP_BUILD_FLOW_MAX_VARIABLES 64 -#define LP_BUILD_FLOW_MAX_DEPTH 32 - -/** - * Enumeration of all possible flow constructs. - */ -enum lp_build_flow_construct_kind { - LP_BUILD_FLOW_SCOPE, - LP_BUILD_FLOW_SKIP, - LP_BUILD_FLOW_IF -}; - - -/** - * Variable declaration scope. - */ -struct lp_build_flow_scope -{ - /** Number of variables declared in this scope */ - unsigned num_variables; -}; - - -/** - * Early exit. Useful to skip to the end of a function or block when - * the execution mask becomes zero or when there is an error condition. - */ -struct lp_build_flow_skip -{ - /** Block to skip to */ - LLVMBasicBlockRef block; - - /** Number of variables declared at the beginning */ - unsigned num_variables; - - LLVMValueRef *phi; /**< array [num_variables] */ -}; - - -/** - * if/else/endif. - */ -struct lp_build_flow_if -{ - unsigned num_variables; - - LLVMValueRef *phi; /**< array [num_variables] */ - - LLVMValueRef condition; - LLVMBasicBlockRef entry_block, true_block, false_block, merge_block; -}; - - -/** - * Union of all possible flow constructs' data - */ -union lp_build_flow_construct_data -{ - struct lp_build_flow_scope scope; - struct lp_build_flow_skip skip; - struct lp_build_flow_if ifthen; -}; - - -/** - * Element of the flow construct stack. - */ -struct lp_build_flow_construct -{ - enum lp_build_flow_construct_kind kind; - union lp_build_flow_construct_data data; -}; - - /** - * All necessary data to generate LLVM control flow constructs. + * Insert a new block, right where builder is pointing to. * - * Besides keeping track of the control flow construct themselves we also - * need to keep track of variables in order to generate SSA Phi values. - */ -struct lp_build_flow_context -{ - LLVMBuilderRef builder; - - /** - * Control flow stack. - */ - struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH]; - unsigned num_constructs; - - /** - * Variable stack - */ - LLVMValueRef *variables[LP_BUILD_FLOW_MAX_VARIABLES]; - unsigned num_variables; -}; - - -struct lp_build_flow_context * -lp_build_flow_create(LLVMBuilderRef builder) -{ - struct lp_build_flow_context *flow; - - flow = CALLOC_STRUCT(lp_build_flow_context); - if(!flow) - return NULL; - - flow->builder = builder; - - return flow; -} - - -void -lp_build_flow_destroy(struct lp_build_flow_context *flow) -{ - assert(flow->num_constructs == 0); - assert(flow->num_variables == 0); - FREE(flow); -} - - -/** - * Begin/push a new flow control construct, such as a loop, skip block - * or variable scope. - */ -static union lp_build_flow_construct_data * -lp_build_flow_push(struct lp_build_flow_context *flow, - enum lp_build_flow_construct_kind kind) -{ - assert(flow->num_constructs < LP_BUILD_FLOW_MAX_DEPTH); - if(flow->num_constructs >= LP_BUILD_FLOW_MAX_DEPTH) - return NULL; - - flow->constructs[flow->num_constructs].kind = kind; - return &flow->constructs[flow->num_constructs++].data; -} - - -/** - * Return the current/top flow control construct on the stack. - * \param kind the expected type of the top-most construct - */ -static union lp_build_flow_construct_data * -lp_build_flow_peek(struct lp_build_flow_context *flow, - enum lp_build_flow_construct_kind kind) -{ - assert(flow->num_constructs); - if(!flow->num_constructs) - return NULL; - - assert(flow->constructs[flow->num_constructs - 1].kind == kind); - if(flow->constructs[flow->num_constructs - 1].kind != kind) - return NULL; - - return &flow->constructs[flow->num_constructs - 1].data; -} - - -/** - * End/pop the current/top flow control construct on the stack. - * \param kind the expected type of the top-most construct - */ -static union lp_build_flow_construct_data * -lp_build_flow_pop(struct lp_build_flow_context *flow, - enum lp_build_flow_construct_kind kind) -{ - assert(flow->num_constructs); - if(!flow->num_constructs) - return NULL; - - assert(flow->constructs[flow->num_constructs - 1].kind == kind); - if(flow->constructs[flow->num_constructs - 1].kind != kind) - return NULL; - - return &flow->constructs[--flow->num_constructs].data; -} - - -/** - * Begin a variable scope. + * This is useful important not only for aesthetic reasons, but also for + * performance reasons, as frequently run blocks should be laid out next to + * each other and fall-throughs maximized. * + * See also llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp. * - */ -void -lp_build_flow_scope_begin(struct lp_build_flow_context *flow) -{ - struct lp_build_flow_scope *scope; - - scope = &lp_build_flow_push(flow, LP_BUILD_FLOW_SCOPE)->scope; - if(!scope) - return; - - scope->num_variables = 0; -} - - -/** - * Declare a variable. - * - * A variable is a named entity which can have different LLVMValueRef's at - * different points of the program. This is relevant for control flow because - * when there are multiple branches to a same location we need to replace - * the variable's value with a Phi function as explained in - * http://en.wikipedia.org/wiki/Static_single_assignment_form . - * - * We keep track of variables by keeping around a pointer to where they're - * current. - * - * There are a few cautions to observe: - * - * - Variable's value must not be NULL. If there is no initial value then - * LLVMGetUndef() should be used. - * - * - Variable's value must be kept up-to-date. If the variable is going to be - * modified by a function then a pointer should be passed so that its value - * is accurate. Failure to do this will cause some of the variables' - * transient values to be lost, leading to wrong results. - * - * - A program should be written from top to bottom, by always appending - * instructions to the bottom with a single LLVMBuilderRef. Inserting and/or - * modifying existing statements will most likely lead to wrong results. - * - */ -void -lp_build_flow_scope_declare(struct lp_build_flow_context *flow, - LLVMValueRef *variable) -{ - struct lp_build_flow_scope *scope; - - scope = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SCOPE)->scope; - if(!scope) - return; - - assert(*variable); - if(!*variable) - return; - - assert(flow->num_variables < LP_BUILD_FLOW_MAX_VARIABLES); - if(flow->num_variables >= LP_BUILD_FLOW_MAX_VARIABLES) - return; - - flow->variables[flow->num_variables++] = variable; - ++scope->num_variables; -} - - -void -lp_build_flow_scope_end(struct lp_build_flow_context *flow) -{ - struct lp_build_flow_scope *scope; - - scope = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SCOPE)->scope; - if(!scope) - return; - - assert(flow->num_variables >= scope->num_variables); - if(flow->num_variables < scope->num_variables) { - flow->num_variables = 0; - return; - } - - flow->num_variables -= scope->num_variables; -} - - -/** * Note: this function has no dependencies on the flow code and could * be used elsewhere. */ @@ -334,52 +76,18 @@ lp_build_insert_new_block(LLVMBuilderRef builder, const char *name) } -static LLVMBasicBlockRef -lp_build_flow_insert_block(struct lp_build_flow_context *flow) -{ - return lp_build_insert_new_block(flow->builder, ""); -} - - /** * Begin a "skip" block. Inside this block we can test a condition and * skip to the end of the block if the condition is false. */ void -lp_build_flow_skip_begin(struct lp_build_flow_context *flow) +lp_build_flow_skip_begin(struct lp_build_skip_context *skip, + LLVMBuilderRef builder) { - struct lp_build_flow_skip *skip; - LLVMBuilderRef builder; - unsigned i; - - skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP)->skip; - if(!skip) - return; + skip->builder = builder; /* create new basic block */ - skip->block = lp_build_flow_insert_block(flow); - - skip->num_variables = flow->num_variables; - if(!skip->num_variables) { - skip->phi = NULL; - return; - } - - /* Allocate a Phi node for each variable in this skip scope */ - skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi); - if(!skip->phi) { - skip->num_variables = 0; - return; - } - - builder = LLVMCreateBuilder(); - LLVMPositionBuilderAtEnd(builder, skip->block); - - /* create a Phi node for each variable */ - for(i = 0; i < skip->num_variables; ++i) - skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); - - LLVMDisposeBuilder(builder); + skip->block = lp_build_insert_new_block(skip->builder, "skip"); } @@ -388,83 +96,50 @@ lp_build_flow_skip_begin(struct lp_build_flow_context *flow) * skip block if the condition is true. */ void -lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, +lp_build_flow_skip_cond_break(struct lp_build_skip_context *skip, LLVMValueRef cond) { - struct lp_build_flow_skip *skip; - LLVMBasicBlockRef current_block; LLVMBasicBlockRef new_block; - unsigned i; - - skip = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SKIP)->skip; - if(!skip) - return; - current_block = LLVMGetInsertBlock(flow->builder); - - new_block = lp_build_flow_insert_block(flow); - - /* for each variable, update the Phi node with a (variable, block) pair */ - for(i = 0; i < skip->num_variables; ++i) { - assert(*flow->variables[i]); - assert(LLVMTypeOf(skip->phi[i]) == LLVMTypeOf(*flow->variables[i])); - LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); - } + new_block = lp_build_insert_new_block(skip->builder, ""); /* if cond is true, goto skip->block, else goto new_block */ - LLVMBuildCondBr(flow->builder, cond, skip->block, new_block); + LLVMBuildCondBr(skip->builder, cond, skip->block, new_block); - LLVMPositionBuilderAtEnd(flow->builder, new_block); + LLVMPositionBuilderAtEnd(skip->builder, new_block); } void -lp_build_flow_skip_end(struct lp_build_flow_context *flow) +lp_build_flow_skip_end(struct lp_build_skip_context *skip) { - struct lp_build_flow_skip *skip; - LLVMBasicBlockRef current_block; - unsigned i; - - skip = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SKIP)->skip; - if(!skip) - return; - - current_block = LLVMGetInsertBlock(flow->builder); - - /* add (variable, block) tuples to the phi nodes */ - for(i = 0; i < skip->num_variables; ++i) { - assert(*flow->variables[i]); - assert(LLVMTypeOf(skip->phi[i]) == LLVMTypeOf(*flow->variables[i])); - LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); - *flow->variables[i] = skip->phi[i]; - } - /* goto block */ - LLVMBuildBr(flow->builder, skip->block); - LLVMPositionBuilderAtEnd(flow->builder, skip->block); - - FREE(skip->phi); + LLVMBuildBr(skip->builder, skip->block); + LLVMPositionBuilderAtEnd(skip->builder, skip->block); } /** * Check if the mask predicate is zero. If so, jump to the end of the block. */ -static void +void lp_build_mask_check(struct lp_build_mask_context *mask) { - LLVMBuilderRef builder = mask->flow->builder; + LLVMBuilderRef builder = mask->skip.builder; + LLVMValueRef value; LLVMValueRef cond; + value = lp_build_mask_value(mask); + /* cond = (mask == 0) */ cond = LLVMBuildICmp(builder, LLVMIntEQ, - LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""), + LLVMBuildBitCast(builder, value, mask->reg_type, ""), LLVMConstNull(mask->reg_type), ""); /* if cond, goto end of block */ - lp_build_flow_skip_cond_break(mask->flow, cond); + lp_build_flow_skip_cond_break(&mask->skip, cond); } @@ -477,21 +152,27 @@ lp_build_mask_check(struct lp_build_mask_context *mask) */ void lp_build_mask_begin(struct lp_build_mask_context *mask, - struct lp_build_flow_context *flow, + LLVMBuilderRef builder, struct lp_type type, LLVMValueRef value) { memset(mask, 0, sizeof *mask); - mask->flow = flow; mask->reg_type = LLVMIntType(type.width * type.length); - mask->value = value; + mask->var = lp_build_alloca(builder, + lp_build_int_vec_type(type), + "execution_mask"); - lp_build_flow_scope_begin(flow); - lp_build_flow_scope_declare(flow, &mask->value); - lp_build_flow_skip_begin(flow); + LLVMBuildStore(builder, value, mask->var); - lp_build_mask_check(mask); + lp_build_flow_skip_begin(&mask->skip, builder); +} + + +LLVMValueRef +lp_build_mask_value(struct lp_build_mask_context *mask) +{ + return LLVMBuildLoad(mask->skip.builder, mask->var, ""); } @@ -504,9 +185,10 @@ void lp_build_mask_update(struct lp_build_mask_context *mask, LLVMValueRef value) { - mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, ""); - - lp_build_mask_check(mask); + value = LLVMBuildAnd(mask->skip.builder, + lp_build_mask_value(mask), + value, ""); + LLVMBuildStore(mask->skip.builder, value, mask->var); } @@ -516,9 +198,8 @@ lp_build_mask_update(struct lp_build_mask_context *mask, LLVMValueRef lp_build_mask_end(struct lp_build_mask_context *mask) { - lp_build_flow_skip_end(mask->flow); - lp_build_flow_scope_end(mask->flow); - return mask->value; + lp_build_flow_skip_end(&mask->skip); + return lp_build_mask_value(mask); } @@ -528,59 +209,27 @@ lp_build_loop_begin(LLVMBuilderRef builder, LLVMValueRef start, struct lp_build_loop_state *state) { - LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); - LLVMValueRef function = LLVMGetBasicBlockParent(block); + state->block = lp_build_insert_new_block(builder, "loop_begin"); - state->block = LLVMAppendBasicBlock(function, "loop"); + state->counter_var = lp_build_alloca(builder, LLVMTypeOf(start), "loop_counter"); + + LLVMBuildStore(builder, start, state->counter_var); LLVMBuildBr(builder, state->block); LLVMPositionBuilderAtEnd(builder, state->block); - state->counter = LLVMBuildPhi(builder, LLVMTypeOf(start), ""); - - LLVMAddIncoming(state->counter, &start, &block, 1); - + state->counter = LLVMBuildLoad(builder, state->counter_var, ""); } void -lp_build_loop_end(LLVMBuilderRef builder, - LLVMValueRef end, - LLVMValueRef step, - struct lp_build_loop_state *state) -{ - LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); - LLVMValueRef function = LLVMGetBasicBlockParent(block); - LLVMValueRef next; - LLVMValueRef cond; - LLVMBasicBlockRef after_block; - - if (!step) - step = LLVMConstInt(LLVMTypeOf(end), 1, 0); - - next = LLVMBuildAdd(builder, state->counter, step, ""); - - cond = LLVMBuildICmp(builder, LLVMIntNE, next, end, ""); - - after_block = LLVMAppendBasicBlock(function, ""); - - LLVMBuildCondBr(builder, cond, after_block, state->block); - - LLVMAddIncoming(state->counter, &next, &block, 1); - - LLVMPositionBuilderAtEnd(builder, after_block); -} - -void lp_build_loop_end_cond(LLVMBuilderRef builder, LLVMValueRef end, LLVMValueRef step, - int llvm_cond, + LLVMIntPredicate llvm_cond, struct lp_build_loop_state *state) { - LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); - LLVMValueRef function = LLVMGetBasicBlockParent(block); LLVMValueRef next; LLVMValueRef cond; LLVMBasicBlockRef after_block; @@ -590,15 +239,27 @@ lp_build_loop_end_cond(LLVMBuilderRef builder, next = LLVMBuildAdd(builder, state->counter, step, ""); + LLVMBuildStore(builder, next, state->counter_var); + cond = LLVMBuildICmp(builder, llvm_cond, next, end, ""); - after_block = LLVMAppendBasicBlock(function, ""); + after_block = lp_build_insert_new_block(builder, "loop_end"); LLVMBuildCondBr(builder, cond, after_block, state->block); - LLVMAddIncoming(state->counter, &next, &block, 1); - LLVMPositionBuilderAtEnd(builder, after_block); + + state->counter = LLVMBuildLoad(builder, state->counter_var, ""); +} + + +void +lp_build_loop_end(LLVMBuilderRef builder, + LLVMValueRef end, + LLVMValueRef step, + struct lp_build_loop_state *state) +{ + lp_build_loop_end_cond(builder, end, step, LLVMIntNE, state); } @@ -616,24 +277,16 @@ lp_build_loop_end_cond(LLVMBuilderRef builder, Is built with: - LLVMValueRef x = LLVMGetUndef(); // or something else + // x needs an alloca variable + x = lp_build_alloca(builder, type, "x"); - flow = lp_build_flow_create(builder); - lp_build_flow_scope_begin(flow); + lp_build_if(ctx, builder, cond); + LLVMBuildStore(LLVMBuildAdd(1, 2), x); + lp_build_else(ctx); + LLVMBuildStore(LLVMBuildAdd(2, 3). x); + lp_build_endif(ctx); - // x needs a phi node - lp_build_flow_scope_declare(flow, &x); - - lp_build_if(ctx, flow, builder, cond); - x = LLVMAdd(1, 2); - lp_build_else(ctx); - x = LLVMAdd(2, 3); - lp_build_endif(ctx); - - lp_build_flow_scope_end(flow); - - lp_build_flow_destroy(flow); */ @@ -642,47 +295,19 @@ lp_build_loop_end_cond(LLVMBuilderRef builder, * Begin an if/else/endif construct. */ void -lp_build_if(struct lp_build_if_state *ctx, - struct lp_build_flow_context *flow, +lp_build_if(struct lp_build_if_state *ifthen, LLVMBuilderRef builder, LLVMValueRef condition) { LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); - struct lp_build_flow_if *ifthen; - unsigned i; - - memset(ctx, 0, sizeof(*ctx)); - ctx->builder = builder; - ctx->flow = flow; - /* push/create new scope */ - ifthen = &lp_build_flow_push(flow, LP_BUILD_FLOW_IF)->ifthen; - assert(ifthen); - - ifthen->num_variables = flow->num_variables; + memset(ifthen, 0, sizeof *ifthen); + ifthen->builder = builder; ifthen->condition = condition; ifthen->entry_block = block; - /* create a Phi node for each variable in this flow scope */ - ifthen->phi = MALLOC(ifthen->num_variables * sizeof(*ifthen->phi)); - if (!ifthen->phi) { - ifthen->num_variables = 0; - return; - } - /* create endif/merge basic block for the phi functions */ ifthen->merge_block = lp_build_insert_new_block(builder, "endif-block"); - LLVMPositionBuilderAtEnd(builder, ifthen->merge_block); - - /* create a phi node for each variable */ - for (i = 0; i < flow->num_variables; i++) { - ifthen->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); - - /* add add the initial value of the var from the entry block */ - if (!LLVMIsUndef(*flow->variables[i])) - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], - &ifthen->entry_block, 1); - } /* create/insert true_block before merge_block */ ifthen->true_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-true-block"); @@ -696,27 +321,16 @@ lp_build_if(struct lp_build_if_state *ctx, * Begin else-part of a conditional */ void -lp_build_else(struct lp_build_if_state *ctx) +lp_build_else(struct lp_build_if_state *ifthen) { - struct lp_build_flow_context *flow = ctx->flow; - struct lp_build_flow_if *ifthen; - unsigned i; - - ifthen = &lp_build_flow_peek(flow, LP_BUILD_FLOW_IF)->ifthen; - assert(ifthen); - - /* for each variable, update the Phi node with a (variable, block) pair */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); - for (i = 0; i < flow->num_variables; i++) { - assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1); - } + /* Append an unconditional Br(anch) instruction on the true_block */ + LLVMBuildBr(ifthen->builder, ifthen->merge_block); /* create/insert false_block before the merge block */ ifthen->false_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-false-block"); /* successive code goes into the else block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block); + LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->false_block); } @@ -724,75 +338,30 @@ lp_build_else(struct lp_build_if_state *ctx) * End a conditional. */ void -lp_build_endif(struct lp_build_if_state *ctx) +lp_build_endif(struct lp_build_if_state *ifthen) { - struct lp_build_flow_context *flow = ctx->flow; - struct lp_build_flow_if *ifthen; - LLVMBasicBlockRef curBlock = LLVMGetInsertBlock(ctx->builder); - unsigned i; - - ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen; - assert(ifthen); - /* Insert branch to the merge block from current block */ - LLVMBuildBr(ctx->builder, ifthen->merge_block); + LLVMBuildBr(ifthen->builder, ifthen->merge_block); - if (ifthen->false_block) { - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); - /* for each variable, update the Phi node with a (variable, block) pair */ - for (i = 0; i < flow->num_variables; i++) { - assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &curBlock, 1); - /* replace the variable ref with the phi function */ - *flow->variables[i] = ifthen->phi[i]; - } - } - else { - /* no else clause */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); - for (i = 0; i < flow->num_variables; i++) { - assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1); - - /* replace the variable ref with the phi function */ - *flow->variables[i] = ifthen->phi[i]; - } - } - - FREE(ifthen->phi); - - /*** - *** Now patch in the various branch instructions. - ***/ + /* + * Now patch in the various branch instructions. + */ /* Insert the conditional branch instruction at the end of entry_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->entry_block); + LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->entry_block); if (ifthen->false_block) { /* we have an else clause */ - LLVMBuildCondBr(ctx->builder, ifthen->condition, + LLVMBuildCondBr(ifthen->builder, ifthen->condition, ifthen->true_block, ifthen->false_block); } else { /* no else clause */ - LLVMBuildCondBr(ctx->builder, ifthen->condition, + LLVMBuildCondBr(ifthen->builder, ifthen->condition, ifthen->true_block, ifthen->merge_block); } - /* Insert branch from end of true_block to merge_block */ - if (ifthen->false_block) { - /* Append an unconditional Br(anch) instruction on the true_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block); - LLVMBuildBr(ctx->builder, ifthen->merge_block); - } - else { - /* No else clause. - * Note that we've already inserted the branch at the end of - * true_block. See the very first LLVMBuildBr() call in this function. - */ - } - /* Resume building code at end of the ifthen->merge_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); + LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->merge_block); } @@ -830,6 +399,7 @@ lp_build_alloca(LLVMBuilderRef builder, } res = LLVMBuildAlloca(first_builder, type, name); + LLVMBuildStore(builder, LLVMConstNull(type), res); LLVMDisposeBuilder(first_builder); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index fffb493a93b..e729ee6eaac 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -41,52 +41,49 @@ struct lp_type; -struct lp_build_flow_context; - - -struct lp_build_flow_context * -lp_build_flow_create(LLVMBuilderRef builder); - -void -lp_build_flow_destroy(struct lp_build_flow_context *flow); - -void -lp_build_flow_scope_begin(struct lp_build_flow_context *flow); - -void -lp_build_flow_scope_declare(struct lp_build_flow_context *flow, - LLVMValueRef *variable); +/** + * Early exit. Useful to skip to the end of a function or block when + * the execution mask becomes zero or when there is an error condition. + */ +struct lp_build_skip_context +{ + LLVMBuilderRef builder; -void -lp_build_flow_scope_end(struct lp_build_flow_context *flow); + /** Block to skip to */ + LLVMBasicBlockRef block; +}; void -lp_build_flow_skip_begin(struct lp_build_flow_context *flow); +lp_build_flow_skip_begin(struct lp_build_skip_context *ctx, + LLVMBuilderRef builder); void -lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, +lp_build_flow_skip_cond_break(struct lp_build_skip_context *ctx, LLVMValueRef cond); void -lp_build_flow_skip_end(struct lp_build_flow_context *flow); +lp_build_flow_skip_end(struct lp_build_skip_context *ctx); struct lp_build_mask_context { - struct lp_build_flow_context *flow; + struct lp_build_skip_context skip; LLVMTypeRef reg_type; - LLVMValueRef value; + LLVMValueRef var; }; void lp_build_mask_begin(struct lp_build_mask_context *mask, - struct lp_build_flow_context *flow, + LLVMBuilderRef builder, struct lp_type type, LLVMValueRef value); +LLVMValueRef +lp_build_mask_value(struct lp_build_mask_context *mask); + /** * Bitwise AND the mask with the given value, if a previous mask was set. */ @@ -94,6 +91,9 @@ void lp_build_mask_update(struct lp_build_mask_context *mask, LLVMValueRef value); +void +lp_build_mask_check(struct lp_build_mask_context *mask); + LLVMValueRef lp_build_mask_end(struct lp_build_mask_context *mask); @@ -108,6 +108,7 @@ lp_build_mask_end(struct lp_build_mask_context *mask); struct lp_build_loop_state { LLVMBasicBlockRef block; + LLVMValueRef counter_var; LLVMValueRef counter; }; @@ -128,22 +129,28 @@ void lp_build_loop_end_cond(LLVMBuilderRef builder, LLVMValueRef end, LLVMValueRef step, - int cond, /* LLVM condition */ + LLVMIntPredicate cond, struct lp_build_loop_state *state); +/** + * if/else/endif. + */ struct lp_build_if_state { LLVMBuilderRef builder; - struct lp_build_flow_context *flow; + LLVMValueRef condition; + LLVMBasicBlockRef entry_block; + LLVMBasicBlockRef true_block; + LLVMBasicBlockRef false_block; + LLVMBasicBlockRef merge_block; }; void lp_build_if(struct lp_build_if_state *ctx, - struct lp_build_flow_context *flow, LLVMBuilderRef builder, LLVMValueRef condition); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 761f33b578d..5598ca5c489 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -44,6 +44,7 @@ static const struct debug_named_value lp_bld_debug_flags[] = { { "asm", GALLIVM_DEBUG_ASM, NULL }, { "nopt", GALLIVM_DEBUG_NO_OPT, NULL }, { "perf", GALLIVM_DEBUG_PERF, NULL }, + { "no_brilinear", GALLIVM_DEBUG_NO_BRILINEAR, NULL }, DEBUG_NAMED_VALUE_END }; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h index f26fdac4663..0b4b1ca7d11 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h @@ -47,4 +47,10 @@ lp_build_init(void); extern void lp_func_delete_body(LLVMValueRef func); + +extern LLVMValueRef +lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal, + const char *Name); + + #endif /* !LP_BLD_INIT_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index ce5d0214b43..026b60ac36e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -462,10 +462,12 @@ lp_build_select(struct lp_build_context *bld, LLVMTypeRef arg_type; LLVMValueRef args[3]; - if (type.width == 64) { + if (type.floating && + type.width == 64) { intrinsic = "llvm.x86.sse41.blendvpd"; arg_type = LLVMVectorType(LLVMDoubleType(), 2); - } else if (type.width == 32) { + } else if (type.floating && + type.width == 32) { intrinsic = "llvm.x86.sse41.blendvps"; arg_type = LLVMVectorType(LLVMFloatType(), 4); } else { diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 48baf7c425c..f56ddee7fd7 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -178,3 +178,13 @@ lp_func_delete_body(LLVMValueRef FF) llvm::Function *func = llvm::unwrap<llvm::Function>(FF); func->deleteBody(); } + + +extern "C" +LLVMValueRef +lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal, + const char *Name) +{ + return llvm::wrap(llvm::unwrap(B)->CreateLoad(llvm::unwrap(PointerVal), true, Name)); +} + diff --git a/src/gallium/auxiliary/gallivm/lp_bld_printf.c b/src/gallium/auxiliary/gallivm/lp_bld_printf.c index 153ba5b15b1..f418e96aff4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_printf.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.c @@ -29,6 +29,8 @@ #include "util/u_debug.h" #include "util/u_memory.h" +#include "util/u_string.h" +#include "lp_bld_const.h" #include "lp_bld_printf.h" @@ -119,3 +121,22 @@ lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...) return LLVMBuildCall(builder, func_printf, params, argcount + 1, ""); } + + +/** + * Print a float[4] vector. + */ +LLVMValueRef +lp_build_print_vec4(LLVMBuilderRef builder, const char *msg, LLVMValueRef vec) +{ + char format[1000]; + LLVMValueRef x, y, z, w; + + x = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(0), ""); + y = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(1), ""); + z = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(2), ""); + w = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(3), ""); + + util_snprintf(format, sizeof(format), "%s %%f %%f %%f %%f\n", msg); + return lp_build_printf(builder, format, x, y, z, w); +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_printf.h b/src/gallium/auxiliary/gallivm/lp_bld_printf.h index 83bd8f1d557..b6222c62ebe 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_printf.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.h @@ -35,5 +35,9 @@ LLVMValueRef lp_build_const_string_variable(LLVMModuleRef module, const char *str, int len); LLVMValueRef lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...); +LLVMValueRef +lp_build_print_vec4(LLVMBuilderRef builder, const char *msg, LLVMValueRef vec); + + #endif diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c index 7b1088939b9..c18c8b47100 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c @@ -81,11 +81,15 @@ LLVMValueRef lp_build_scalar_ddx(struct lp_build_context *bld, LLVMValueRef a) { - LLVMValueRef idx_left = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_LEFT, 0); - LLVMValueRef idx_right = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_RIGHT, 0); - LLVMValueRef a_left = LLVMBuildExtractElement(bld->builder, a, idx_left, ""); - LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, ""); - return lp_build_sub(bld, a_right, a_left); + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef idx_left = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_LEFT, 0); + LLVMValueRef idx_right = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_RIGHT, 0); + LLVMValueRef a_left = LLVMBuildExtractElement(bld->builder, a, idx_left, "left"); + LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, "right"); + if (bld->type.floating) + return LLVMBuildFSub(bld->builder, a_right, a_left, "ddx"); + else + return LLVMBuildSub(bld->builder, a_right, a_left, "ddx"); } @@ -93,9 +97,13 @@ LLVMValueRef lp_build_scalar_ddy(struct lp_build_context *bld, LLVMValueRef a) { - LLVMValueRef idx_top = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_LEFT, 0); - LLVMValueRef idx_bottom = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_BOTTOM_LEFT, 0); - LLVMValueRef a_top = LLVMBuildExtractElement(bld->builder, a, idx_top, ""); - LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, ""); - return lp_build_sub(bld, a_bottom, a_top); + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef idx_top = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_LEFT, 0); + LLVMValueRef idx_bottom = LLVMConstInt(i32t, LP_BLD_QUAD_BOTTOM_LEFT, 0); + LLVMValueRef a_top = LLVMBuildExtractElement(bld->builder, a, idx_top, "top"); + LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, "bottom"); + if (bld->type.floating) + return LLVMBuildFSub(bld->builder, a_bottom, a_top, "ddy"); + else + return LLVMBuildSub(bld->builder, a_bottom, a_top, "ddy"); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 7a64392d3c1..844d1d935b5 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -47,8 +47,7 @@ /* - * Bri-linear factor. Use zero or any other number less than one to force - * tri-linear filtering. + * Bri-linear factor. Should be greater than one. */ #define BRILINEAR_FACTOR 2 @@ -201,8 +200,8 @@ lp_build_rho(struct lp_build_sample_context *bld, LLVMValueRef float_size; LLVMValueRef rho; - dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx"); - dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy"); + dsdx = ddx[0]; + dsdy = ddy[0]; if (dims <= 1) { rho_x = dsdx; @@ -215,15 +214,15 @@ lp_build_rho(struct lp_build_sample_context *bld, rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dsdx, index0, ""); rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dsdy, index0, ""); - dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx"); - dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy"); + dtdx = ddx[1]; + dtdy = ddy[1]; rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dtdx, index1, ""); rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dtdy, index1, ""); if (dims >= 3) { - drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx"); - drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy"); + drdx = ddx[2]; + drdy = ddy[2]; rho_x = LLVMBuildInsertElement(bld->builder, rho_x, drdx, index2, ""); rho_y = LLVMBuildInsertElement(bld->builder, rho_y, drdy, index2, ""); @@ -294,31 +293,30 @@ lp_build_rho(struct lp_build_sample_context *bld, * TODO: This could be done in fixed point, where applicable. */ static void -lp_build_brilinear_lod(struct lp_build_sample_context *bld, +lp_build_brilinear_lod(struct lp_build_context *bld, LLVMValueRef lod, double factor, LLVMValueRef *out_lod_ipart, LLVMValueRef *out_lod_fpart) { - struct lp_build_context *float_bld = &bld->float_bld; LLVMValueRef lod_fpart; - float pre_offset = (factor - 0.5)/factor - 0.5; - float post_offset = 1 - factor; + double pre_offset = (factor - 0.5)/factor - 0.5; + double post_offset = 1 - factor; if (0) { lp_build_printf(bld->builder, "lod = %f\n", lod); } - lod = lp_build_add(float_bld, lod, - lp_build_const_vec(float_bld->type, pre_offset)); + lod = lp_build_add(bld, lod, + lp_build_const_vec(bld->type, pre_offset)); - lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, &lod_fpart); + lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart); - lod_fpart = lp_build_mul(float_bld, lod_fpart, - lp_build_const_vec(float_bld->type, factor)); + lod_fpart = lp_build_mul(bld, lod_fpart, + lp_build_const_vec(bld->type, factor)); - lod_fpart = lp_build_add(float_bld, lod_fpart, - lp_build_const_vec(float_bld->type, post_offset)); + lod_fpart = lp_build_add(bld, lod_fpart, + lp_build_const_vec(bld->type, post_offset)); /* * It's not necessary to clamp lod_fpart since: @@ -335,6 +333,61 @@ lp_build_brilinear_lod(struct lp_build_sample_context *bld, } +/* + * Combined log2 and brilinear lod computation. + * + * It's in all identical to calling lp_build_fast_log2() and + * lp_build_brilinear_lod() above, but by combining we can compute the interger + * and fractional part independently. + */ +static void +lp_build_brilinear_rho(struct lp_build_context *bld, + LLVMValueRef rho, + double factor, + LLVMValueRef *out_lod_ipart, + LLVMValueRef *out_lod_fpart) +{ + LLVMValueRef lod_ipart; + LLVMValueRef lod_fpart; + + const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor); + const double post_offset = 1 - 2*factor; + + assert(bld->type.floating); + + assert(lp_check_value(bld->type, rho)); + + /* + * The pre factor will make the intersections with the exact powers of two + * happen precisely where we want then to be, which means that the integer + * part will not need any post adjustments. + */ + rho = lp_build_mul(bld, rho, + lp_build_const_vec(bld->type, pre_factor)); + + /* ipart = ifloor(log2(rho)) */ + lod_ipart = lp_build_extract_exponent(bld, rho, 0); + + /* fpart = rho / 2**ipart */ + lod_fpart = lp_build_extract_mantissa(bld, rho); + + lod_fpart = lp_build_mul(bld, lod_fpart, + lp_build_const_vec(bld->type, factor)); + + lod_fpart = lp_build_add(bld, lod_fpart, + lp_build_const_vec(bld->type, post_offset)); + + /* + * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since: + * - the above expression will never produce numbers greater than one. + * - the mip filtering branch is only taken if lod_fpart is positive + */ + + *out_lod_ipart = lod_ipart; + *out_lod_fpart = lod_fpart; +} + + /** * Generate code to compute texture level of detail (lambda). * \param ddx partial derivatives of (s, t, r, q) with respect to X @@ -389,16 +442,32 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, rho = lp_build_rho(bld, ddx, ddy); - /* compute lod = log2(rho) */ - if ((mip_filter == PIPE_TEX_MIPFILTER_NONE || - mip_filter == PIPE_TEX_MIPFILTER_NEAREST) && - !lod_bias && + /* + * Compute lod = log2(rho) + */ + + if (!lod_bias && !bld->static_state->lod_bias_non_zero && !bld->static_state->apply_max_lod && !bld->static_state->apply_min_lod) { - *out_lod_ipart = lp_build_ilog2(float_bld, rho); - *out_lod_fpart = bld->float_bld.zero; - return; + /* + * Special case when there are no post-log2 adjustments, which + * saves instructions but keeping the integer and fractional lod + * computations separate from the start. + */ + + if (mip_filter == PIPE_TEX_MIPFILTER_NONE || + mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { + *out_lod_ipart = lp_build_ilog2(float_bld, rho); + *out_lod_fpart = bld->float_bld.zero; + return; + } + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR && + !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) { + lp_build_brilinear_rho(float_bld, rho, BRILINEAR_FACTOR, + out_lod_ipart, out_lod_fpart); + return; + } } if (0) { @@ -437,21 +506,22 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, } if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - if (BRILINEAR_FACTOR > 1.0) { - lp_build_brilinear_lod(bld, lod, BRILINEAR_FACTOR, + if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) { + lp_build_brilinear_lod(float_bld, lod, BRILINEAR_FACTOR, out_lod_ipart, out_lod_fpart); } else { lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, out_lod_fpart); } - lp_build_name(*out_lod_ipart, "lod_ipart"); lp_build_name(*out_lod_fpart, "lod_fpart"); } else { *out_lod_ipart = lp_build_iround(float_bld, lod); } + lp_build_name(*out_lod_ipart, "lod_ipart"); + return; } @@ -630,37 +700,21 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld, void lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, LLVMValueRef ilevel, - LLVMValueRef *out_width_vec, - LLVMValueRef *out_height_vec, - LLVMValueRef *out_depth_vec, + LLVMValueRef *out_size, LLVMValueRef *row_stride_vec, LLVMValueRef *img_stride_vec) { const unsigned dims = bld->dims; LLVMValueRef ilevel_vec; - LLVMValueRef size_vec; - LLVMTypeRef i32t = LLVMInt32Type(); ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel); /* * Compute width, height, depth at mipmap level 'ilevel' */ - size_vec = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec); + *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec); - *out_width_vec = lp_build_extract_broadcast(bld->builder, - bld->int_size_type, - bld->int_coord_type, - size_vec, - LLVMConstInt(i32t, 0, 0)); if (dims >= 2) { - - *out_height_vec = lp_build_extract_broadcast(bld->builder, - bld->int_size_type, - bld->int_coord_type, - size_vec, - LLVMConstInt(i32t, 1, 0)); - *row_stride_vec = lp_build_get_level_stride_vec(bld, bld->row_stride_array, ilevel); @@ -668,18 +722,90 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, *img_stride_vec = lp_build_get_level_stride_vec(bld, bld->img_stride_array, ilevel); - if (dims == 3) { - *out_depth_vec = lp_build_extract_broadcast(bld->builder, - bld->int_size_type, - bld->int_coord_type, - size_vec, - LLVMConstInt(i32t, 2, 0)); - } } } } +/** + * Extract and broadcast texture size. + * + * @param size_type type of the texture size vector (either + * bld->int_size_type or bld->float_size_type) + * @param coord_type type of the texture size vector (either + * bld->int_coord_type or bld->coord_type) + * @param int_size vector with the integer texture size (width, height, + * depth) + */ +void +lp_build_extract_image_sizes(struct lp_build_sample_context *bld, + struct lp_type size_type, + struct lp_type coord_type, + LLVMValueRef size, + LLVMValueRef *out_width, + LLVMValueRef *out_height, + LLVMValueRef *out_depth) +{ + const unsigned dims = bld->dims; + LLVMTypeRef i32t = LLVMInt32Type(); + + *out_width = lp_build_extract_broadcast(bld->builder, + size_type, + coord_type, + size, + LLVMConstInt(i32t, 0, 0)); + if (dims >= 2) { + *out_height = lp_build_extract_broadcast(bld->builder, + size_type, + coord_type, + size, + LLVMConstInt(i32t, 1, 0)); + if (dims == 3) { + *out_depth = lp_build_extract_broadcast(bld->builder, + size_type, + coord_type, + size, + LLVMConstInt(i32t, 2, 0)); + } + } +} + + +/** + * Unnormalize coords. + * + * @param int_size vector with the integer texture size (width, height, depth) + */ +void +lp_build_unnormalized_coords(struct lp_build_sample_context *bld, + LLVMValueRef flt_size, + LLVMValueRef *s, + LLVMValueRef *t, + LLVMValueRef *r) +{ + const unsigned dims = bld->dims; + LLVMValueRef width; + LLVMValueRef height; + LLVMValueRef depth; + + lp_build_extract_image_sizes(bld, + bld->float_size_type, + bld->coord_type, + flt_size, + &width, + &height, + &depth); + + /* s = s * width, t = t * height */ + *s = lp_build_mul(&bld->coord_bld, *s, width); + if (dims >= 2) { + *t = lp_build_mul(&bld->coord_bld, *t, height); + if (dims >= 3) { + *r = lp_build_mul(&bld->coord_bld, *r, depth); + } + } +} + /** Helper used by lp_build_cube_lookup() */ static LLVMValueRef @@ -798,25 +924,16 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, ""); { - struct lp_build_flow_context *flow_ctx; struct lp_build_if_state if_ctx; + LLVMValueRef face_s_var; + LLVMValueRef face_t_var; + LLVMValueRef face_var; - flow_ctx = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx); - - *face_s = bld->coord_bld.undef; - *face_t = bld->coord_bld.undef; - *face = bld->int_bld.undef; - - lp_build_name(*face_s, "face_s"); - lp_build_name(*face_t, "face_t"); - lp_build_name(*face, "face"); + face_s_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, "face_s_var"); + face_t_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, "face_t_var"); + face_var = lp_build_alloca(bld->builder, bld->int_bld.vec_type, "face_var"); - lp_build_flow_scope_declare(flow_ctx, face_s); - lp_build_flow_scope_declare(flow_ctx, face_t); - lp_build_flow_scope_declare(flow_ctx, face); - - lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz); + lp_build_if(&if_ctx, bld->builder, arx_ge_ary_arz); { /* +/- X face */ LLVMValueRef sign = lp_build_sgn(float_bld, rx); @@ -826,57 +943,52 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, *face = lp_build_cube_face(bld, rx, PIPE_TEX_FACE_POS_X, PIPE_TEX_FACE_NEG_X); + LLVMBuildStore(bld->builder, *face_s, face_s_var); + LLVMBuildStore(bld->builder, *face_t, face_t_var); + LLVMBuildStore(bld->builder, *face, face_var); } lp_build_else(&if_ctx); { - struct lp_build_flow_context *flow_ctx2; struct lp_build_if_state if_ctx2; - LLVMValueRef face_s2 = bld->coord_bld.undef; - LLVMValueRef face_t2 = bld->coord_bld.undef; - LLVMValueRef face2 = bld->int_bld.undef; - - flow_ctx2 = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx2); - lp_build_flow_scope_declare(flow_ctx2, &face_s2); - lp_build_flow_scope_declare(flow_ctx2, &face_t2); - lp_build_flow_scope_declare(flow_ctx2, &face2); - ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, ""); - lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz); + lp_build_if(&if_ctx2, bld->builder, ary_ge_arx_arz); { /* +/- Y face */ LLVMValueRef sign = lp_build_sgn(float_bld, ry); LLVMValueRef ima = lp_build_cube_ima(coord_bld, t); - face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima); - face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima); - face2 = lp_build_cube_face(bld, ry, + *face_s = lp_build_cube_coord(coord_bld, NULL, -1, s, ima); + *face_t = lp_build_cube_coord(coord_bld, sign, -1, r, ima); + *face = lp_build_cube_face(bld, ry, PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y); + LLVMBuildStore(bld->builder, *face_s, face_s_var); + LLVMBuildStore(bld->builder, *face_t, face_t_var); + LLVMBuildStore(bld->builder, *face, face_var); } lp_build_else(&if_ctx2); { /* +/- Z face */ LLVMValueRef sign = lp_build_sgn(float_bld, rz); LLVMValueRef ima = lp_build_cube_ima(coord_bld, r); - face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima); - face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); - face2 = lp_build_cube_face(bld, rz, + *face_s = lp_build_cube_coord(coord_bld, sign, -1, s, ima); + *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); + *face = lp_build_cube_face(bld, rz, PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z); + LLVMBuildStore(bld->builder, *face_s, face_s_var); + LLVMBuildStore(bld->builder, *face_t, face_t_var); + LLVMBuildStore(bld->builder, *face, face_var); } lp_build_endif(&if_ctx2); - lp_build_flow_scope_end(flow_ctx2); - lp_build_flow_destroy(flow_ctx2); - *face_s = face_s2; - *face_t = face_t2; - *face = face2; } lp_build_endif(&if_ctx); - lp_build_flow_scope_end(flow_ctx); - lp_build_flow_destroy(flow_ctx); + + *face_s = LLVMBuildLoad(bld->builder, face_s_var, "face_s"); + *face_t = LLVMBuildLoad(bld->builder, face_t_var, "face_t"); + *face = LLVMBuildLoad(bld->builder, face_var, "face"); } } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index d1a1aa143d8..ffed27cee83 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -197,10 +197,6 @@ struct lp_build_sample_context struct lp_type coord_type; struct lp_build_context coord_bld; - /** Unsigned integer coordinates */ - struct lp_type uint_coord_type; - struct lp_build_context uint_coord_bld; - /** Signed integer coordinates */ struct lp_type int_coord_type; struct lp_build_context int_coord_bld; @@ -333,14 +329,30 @@ lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld, void lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, LLVMValueRef ilevel, - LLVMValueRef *out_width_vec, - LLVMValueRef *out_height_vec, - LLVMValueRef *out_depth_vec, + LLVMValueRef *out_size_vec, LLVMValueRef *row_stride_vec, LLVMValueRef *img_stride_vec); void +lp_build_extract_image_sizes(struct lp_build_sample_context *bld, + struct lp_type size_type, + struct lp_type coord_type, + LLVMValueRef size, + LLVMValueRef *out_width, + LLVMValueRef *out_height, + LLVMValueRef *out_depth); + + +void +lp_build_unnormalized_coords(struct lp_build_sample_context *bld, + LLVMValueRef flt_size, + LLVMValueRef *s, + LLVMValueRef *t, + LLVMValueRef *r); + + +void lp_build_cube_lookup(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c index e7410448c04..d6831a580b3 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c @@ -45,6 +45,7 @@ #include "lp_bld_const.h" #include "lp_bld_conv.h" #include "lp_bld_arit.h" +#include "lp_bld_bitarit.h" #include "lp_bld_logic.h" #include "lp_bld_swizzle.h" #include "lp_bld_pack.h" @@ -80,11 +81,10 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, LLVMValueRef *out_offset, LLVMValueRef *out_i) { - struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; LLVMValueRef length_minus_one; - length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); + length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); switch(wrap_mode) { case PIPE_TEX_WRAP_REPEAT: @@ -92,7 +92,7 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, ""); else { /* Add a bias to the texcoord to handle negative coords */ - LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024); + LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); coord = LLVMBuildAdd(bld->builder, coord, bias, ""); coord = LLVMBuildURem(bld->builder, coord, length, ""); } @@ -113,7 +113,7 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, assert(0); } - lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride, + lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride, out_offset, out_i); } @@ -146,7 +146,6 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, LLVMValueRef *i0, LLVMValueRef *i1) { - struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; LLVMValueRef length_minus_one; LLVMValueRef lmask, umask, mask; @@ -188,8 +187,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, * multiplication. */ - *i0 = uint_coord_bld->zero; - *i1 = uint_coord_bld->zero; + *i0 = int_coord_bld->zero; + *i1 = int_coord_bld->zero; length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); @@ -200,7 +199,7 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, } else { /* Add a bias to the texcoord to handle negative coords */ - LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024); + LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); coord0 = LLVMBuildAdd(bld->builder, coord0, bias, ""); coord0 = LLVMBuildURem(bld->builder, coord0, length, ""); } @@ -208,9 +207,9 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, mask = lp_build_compare(bld->builder, int_coord_bld->type, PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); - *offset0 = lp_build_mul(uint_coord_bld, coord0, stride); + *offset0 = lp_build_mul(int_coord_bld, coord0, stride); *offset1 = LLVMBuildAnd(bld->builder, - lp_build_add(uint_coord_bld, *offset0, stride), + lp_build_add(int_coord_bld, *offset0, stride), mask, ""); break; @@ -225,8 +224,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, mask = LLVMBuildAnd(bld->builder, lmask, umask, ""); - *offset0 = lp_build_mul(uint_coord_bld, coord0, stride); - *offset1 = lp_build_add(uint_coord_bld, + *offset0 = lp_build_mul(int_coord_bld, coord0, stride); + *offset1 = lp_build_add(int_coord_bld, *offset0, LLVMBuildAnd(bld->builder, stride, mask, "")); break; @@ -239,8 +238,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: default: assert(0); - *offset0 = uint_coord_bld->zero; - *offset1 = uint_coord_bld->zero; + *offset0 = int_coord_bld->zero; + *offset1 = int_coord_bld->zero; break; } } @@ -253,9 +252,7 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, */ static void lp_build_sample_image_nearest(struct lp_build_sample_context *bld, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, + LLVMValueRef int_size, LLVMValueRef row_stride_vec, LLVMValueRef img_stride_vec, LLVMValueRef data_ptr, @@ -270,7 +267,8 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, struct lp_build_context i32, h16, u8n; LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; LLVMValueRef i32_c8; - LLVMValueRef s_ipart, t_ipart, r_ipart; + LLVMValueRef width_vec, height_vec, depth_vec; + LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL; LLVMValueRef x_stride; LLVMValueRef x_offset, offset; LLVMValueRef x_subcoord, y_subcoord, z_subcoord; @@ -283,30 +281,33 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, h16_vec_type = lp_build_vec_type(h16.type); u8n_vec_type = lp_build_vec_type(u8n.type); + lp_build_extract_image_sizes(bld, + bld->int_size_type, + bld->int_coord_type, + int_size, + &width_vec, + &height_vec, + &depth_vec); + if (bld->static_state->normalized_coords) { - /* s = s * width, t = t * height */ - LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type); - LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec, - coord_vec_type, ""); - s = lp_build_mul(&bld->coord_bld, s, fp_width); - if (dims >= 2) { - LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec, - coord_vec_type, ""); - t = lp_build_mul(&bld->coord_bld, t, fp_height); - if (dims >= 3) { - LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec, - coord_vec_type, ""); - r = lp_build_mul(&bld->coord_bld, r, fp_depth); - } - } - } + LLVMValueRef scaled_size; + LLVMValueRef flt_size; - /* scale coords by 256 (8 fractional bits) */ - s = lp_build_mul_imm(&bld->coord_bld, s, 256); - if (dims >= 2) - t = lp_build_mul_imm(&bld->coord_bld, t, 256); - if (dims >= 3) - r = lp_build_mul_imm(&bld->coord_bld, r, 256); + /* scale size by 256 (8 fractional bits) */ + scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8); + + flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size); + + lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r); + } + else { + /* scale coords by 256 (8 fractional bits) */ + s = lp_build_mul_imm(&bld->coord_bld, s, 256); + if (dims >= 2) + t = lp_build_mul_imm(&bld->coord_bld, t, 256); + if (dims >= 3) + r = lp_build_mul_imm(&bld->coord_bld, r, 256); + } /* convert float to int */ s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); @@ -324,7 +325,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, r_ipart = LLVMBuildAShr(builder, r, i32_c8, ""); /* get pixel, row, image strides */ - x_stride = lp_build_const_vec(bld->uint_coord_bld.type, + x_stride = lp_build_const_vec(bld->int_coord_bld.type, bld->format_desc->block.bits/8); /* Do texcoord wrapping, compute texel offset */ @@ -343,7 +344,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, bld->static_state->pot_height, bld->static_state->wrap_t, &y_offset, &y_subcoord); - offset = lp_build_add(&bld->uint_coord_bld, offset, y_offset); + offset = lp_build_add(&bld->int_coord_bld, offset, y_offset); if (dims >= 3) { LLVMValueRef z_offset; lp_build_sample_wrap_nearest_int(bld, @@ -352,13 +353,13 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, bld->static_state->pot_height, bld->static_state->wrap_r, &z_offset, &z_subcoord); - offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset); + offset = lp_build_add(&bld->int_coord_bld, offset, z_offset); } else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { LLVMValueRef z_offset; /* The r coord is the cube face in [0,5] */ - z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec); - offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset); + z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); + offset = lp_build_add(&bld->int_coord_bld, offset, z_offset); } } @@ -417,9 +418,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, */ static void lp_build_sample_image_linear(struct lp_build_sample_context *bld, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, + LLVMValueRef int_size, LLVMValueRef row_stride_vec, LLVMValueRef img_stride_vec, LLVMValueRef data_ptr, @@ -434,9 +433,10 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, struct lp_build_context i32, h16, u8n; LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; LLVMValueRef i32_c8, i32_c128, i32_c255; + LLVMValueRef width_vec, height_vec, depth_vec; LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; - LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi; - LLVMValueRef r_ipart, r_fpart, r_fpart_lo, r_fpart_hi; + LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_fpart_lo = NULL, t_fpart_hi = NULL; + LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_fpart_lo = NULL, r_fpart_hi = NULL; LLVMValueRef x_stride, y_stride, z_stride; LLVMValueRef x_offset0, x_offset1; LLVMValueRef y_offset0, y_offset1; @@ -458,30 +458,33 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, h16_vec_type = lp_build_vec_type(h16.type); u8n_vec_type = lp_build_vec_type(u8n.type); + lp_build_extract_image_sizes(bld, + bld->int_size_type, + bld->int_coord_type, + int_size, + &width_vec, + &height_vec, + &depth_vec); + if (bld->static_state->normalized_coords) { - /* s = s * width, t = t * height */ - LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type); - LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec, - coord_vec_type, ""); - s = lp_build_mul(&bld->coord_bld, s, fp_width); - if (dims >= 2) { - LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec, - coord_vec_type, ""); - t = lp_build_mul(&bld->coord_bld, t, fp_height); - } - if (dims >= 3) { - LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec, - coord_vec_type, ""); - r = lp_build_mul(&bld->coord_bld, r, fp_depth); - } - } + LLVMValueRef scaled_size; + LLVMValueRef flt_size; - /* scale coords by 256 (8 fractional bits) */ - s = lp_build_mul_imm(&bld->coord_bld, s, 256); - if (dims >= 2) - t = lp_build_mul_imm(&bld->coord_bld, t, 256); - if (dims >= 3) - r = lp_build_mul_imm(&bld->coord_bld, r, 256); + /* scale size by 256 (8 fractional bits) */ + scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8); + + flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size); + + lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r); + } + else { + /* scale coords by 256 (8 fractional bits) */ + s = lp_build_mul_imm(&bld->coord_bld, s, 256); + if (dims >= 2) + t = lp_build_mul_imm(&bld->coord_bld, t, 256); + if (dims >= 3) + r = lp_build_mul_imm(&bld->coord_bld, r, 256); + } /* convert float to int */ s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); @@ -517,7 +520,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, r_fpart = LLVMBuildAnd(builder, r, i32_c255, ""); /* get pixel, row and image strides */ - x_stride = lp_build_const_vec(bld->uint_coord_bld.type, + x_stride = lp_build_const_vec(bld->int_coord_bld.type, bld->format_desc->block.bits/8); y_stride = row_stride_vec; z_stride = img_stride_vec; @@ -548,9 +551,9 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, for (z = 0; z < 2; z++) { for (x = 0; x < 2; x++) { - offset[z][0][x] = lp_build_add(&bld->uint_coord_bld, + offset[z][0][x] = lp_build_add(&bld->int_coord_bld, offset[z][0][x], y_offset0); - offset[z][1][x] = lp_build_add(&bld->uint_coord_bld, + offset[z][1][x] = lp_build_add(&bld->int_coord_bld, offset[z][1][x], y_offset1); } } @@ -566,20 +569,20 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, &z_subcoord[0], &z_subcoord[1]); for (y = 0; y < 2; y++) { for (x = 0; x < 2; x++) { - offset[0][y][x] = lp_build_add(&bld->uint_coord_bld, + offset[0][y][x] = lp_build_add(&bld->int_coord_bld, offset[0][y][x], z_offset0); - offset[1][y][x] = lp_build_add(&bld->uint_coord_bld, + offset[1][y][x] = lp_build_add(&bld->int_coord_bld, offset[1][y][x], z_offset1); } } } else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { LLVMValueRef z_offset; - z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec); + z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); for (y = 0; y < 2; y++) { for (x = 0; x < 2; x++) { /* The r coord is the cube face in [0,5] */ - offset[0][y][x] = lp_build_add(&bld->uint_coord_bld, + offset[0][y][x] = lp_build_add(&bld->int_coord_bld, offset[0][y][x], z_offset); } } @@ -788,12 +791,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, LLVMValueRef colors_hi_var) { LLVMBuilderRef builder = bld->builder; - LLVMValueRef width0_vec; - LLVMValueRef width1_vec; - LLVMValueRef height0_vec; - LLVMValueRef height1_vec; - LLVMValueRef depth0_vec; - LLVMValueRef depth1_vec; + LLVMValueRef size0; + LLVMValueRef size1; LLVMValueRef row_stride0_vec; LLVMValueRef row_stride1_vec; LLVMValueRef img_stride0_vec; @@ -806,12 +805,12 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, /* sample the first mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel0, - &width0_vec, &height0_vec, &depth0_vec, + &size0, &row_stride0_vec, &img_stride0_vec); data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); if (img_filter == PIPE_TEX_FILTER_NEAREST) { lp_build_sample_image_nearest(bld, - width0_vec, height0_vec, depth0_vec, + size0, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, &colors0_lo, &colors0_hi); @@ -819,7 +818,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, else { assert(img_filter == PIPE_TEX_FILTER_LINEAR); lp_build_sample_image_linear(bld, - width0_vec, height0_vec, depth0_vec, + size0, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, &colors0_lo, &colors0_hi); @@ -832,12 +831,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { LLVMValueRef h16_scale = LLVMConstReal(LLVMFloatType(), 256.0); LLVMTypeRef i32_type = LLVMIntType(32); - struct lp_build_flow_context *flow_ctx; struct lp_build_if_state if_ctx; LLVMValueRef need_lerp; - flow_ctx = lp_build_flow_create(builder); - lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16_scale, ""); lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "lod_fpart.fixed16"); @@ -846,7 +842,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, lod_fpart, LLVMConstNull(i32_type), "need_lerp"); - lp_build_if(&if_ctx, flow_ctx, builder, need_lerp); + lp_build_if(&if_ctx, builder, need_lerp); { struct lp_build_context h16_bld; @@ -854,19 +850,19 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, /* sample the second mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel1, - &width1_vec, &height1_vec, &depth1_vec, + &size1, &row_stride1_vec, &img_stride1_vec); data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); if (img_filter == PIPE_TEX_FILTER_NEAREST) { lp_build_sample_image_nearest(bld, - width1_vec, height1_vec, depth1_vec, + size1, row_stride1_vec, img_stride1_vec, data_ptr1, s, t, r, &colors1_lo, &colors1_hi); } else { lp_build_sample_image_linear(bld, - width1_vec, height1_vec, depth1_vec, + size1, row_stride1_vec, img_stride1_vec, data_ptr1, s, t, r, &colors1_lo, &colors1_hi); @@ -877,6 +873,26 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, ""); lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart); +#if HAVE_LLVM == 0x208 + /* This is a work-around for a bug in LLVM 2.8. + * Evidently, something goes wrong in the construction of the + * lod_fpart short[8] vector. Adding this no-effect shuffle seems + * to force the vector to be properly constructed. + * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f). + */ + { + LLVMValueRef shuffles[8], shuffle; + int i; + assert(h16_bld.type.length <= Elements(shuffles)); + for (i = 0; i < h16_bld.type.length; i++) + shuffles[i] = lp_build_const_int32(2 * (i & 1)); + shuffle = LLVMConstVector(shuffles, h16_bld.type.length); + lod_fpart = LLVMBuildShuffleVector(builder, + lod_fpart, lod_fpart, + shuffle, ""); + } +#endif + colors0_lo = lp_build_lerp(&h16_bld, lod_fpart, colors0_lo, colors1_lo); colors0_hi = lp_build_lerp(&h16_bld, lod_fpart, @@ -886,8 +902,6 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, LLVMBuildStore(builder, colors0_hi, colors_hi_var); } lp_build_endif(&if_ctx); - - lp_build_flow_destroy(flow_ctx); } } @@ -946,12 +960,12 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */ /* recompute ddx, ddy using the new (s,t) face texcoords */ - face_ddx[0] = lp_build_ddx(&bld->coord_bld, s); - face_ddx[1] = lp_build_ddx(&bld->coord_bld, t); + face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s); + face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t); face_ddx[2] = NULL; face_ddx[3] = NULL; - face_ddy[0] = lp_build_ddy(&bld->coord_bld, s); - face_ddy[1] = lp_build_ddy(&bld->coord_bld, t); + face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s); + face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t); face_ddy[2] = NULL; face_ddy[3] = NULL; ddx = face_ddx; @@ -1027,17 +1041,14 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, /* Emit conditional to choose min image filter or mag image filter * depending on the lod being > 0 or <= 0, respectively. */ - struct lp_build_flow_context *flow_ctx; struct lp_build_if_state if_ctx; LLVMValueRef minify; - flow_ctx = lp_build_flow_create(builder); - /* minify = lod >= 0.0 */ minify = LLVMBuildICmp(builder, LLVMIntSGE, lod_ipart, int_bld->zero, ""); - lp_build_if(&if_ctx, flow_ctx, builder, minify); + lp_build_if(&if_ctx, builder, minify); { /* Use the minification filter */ lp_build_sample_mipmap(bld, @@ -1056,8 +1067,6 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, packed_lo, packed_hi); } lp_build_endif(&if_ctx); - - lp_build_flow_destroy(flow_ctx); } /* diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index b8cf938acfe..53cc0c5f345 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -131,7 +131,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, } /* convert x,y,z coords to linear offset from start of texture, in bytes */ - lp_build_sample_offset(&bld->uint_coord_bld, + lp_build_sample_offset(&bld->int_coord_bld, bld->format_desc, x, y, z, y_stride, z_stride, &offset, &i, &j); @@ -145,7 +145,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, * coords which are out of bounds to become zero. Zero's guaranteed * to be inside the texture image. */ - offset = lp_build_andnot(&bld->uint_coord_bld, offset, use_border); + offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border); } lp_build_fetch_rgba_soa(bld->builder, @@ -202,11 +202,7 @@ lp_build_coord_mirror(struct lp_build_sample_context *bld, struct lp_build_context *int_coord_bld = &bld->int_coord_bld; LLVMValueRef fract, flr, isOdd; - /* fract = coord - floor(coord) */ - fract = lp_build_sub(coord_bld, coord, lp_build_floor(coord_bld, coord)); - - /* flr = ifloor(coord); */ - flr = lp_build_ifloor(coord_bld, coord); + lp_build_ifloor_fract(coord_bld, coord, &flr, &fract); /* isOdd = flr & 1 */ isOdd = LLVMBuildAnd(bld->builder, flr, int_coord_bld->one, ""); @@ -234,6 +230,7 @@ static void lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, LLVMValueRef coord, LLVMValueRef length, + LLVMValueRef length_f, boolean is_pot, unsigned wrap_mode, LLVMValueRef *x0_out, @@ -242,10 +239,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, { struct lp_build_context *coord_bld = &bld->coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5); - LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length); - LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); + LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); LLVMValueRef coord0, coord1, weight; switch(wrap_mode) { @@ -255,19 +250,23 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, coord = lp_build_sub(coord_bld, coord, half); /* convert to int, compute lerp weight */ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); - coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one); /* repeat wrap */ if (is_pot) { + coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, ""); coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, ""); } else { /* Add a bias to the texcoord to handle negative coords */ - LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024); + LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); + LLVMValueRef mask; coord0 = LLVMBuildAdd(bld->builder, coord0, bias, ""); - coord1 = LLVMBuildAdd(bld->builder, coord1, bias, ""); coord0 = LLVMBuildURem(bld->builder, coord0, length, ""); - coord1 = LLVMBuildURem(bld->builder, coord1, length, ""); + mask = lp_build_compare(bld->builder, int_coord_bld->type, + PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); + coord1 = LLVMBuildAnd(bld->builder, + lp_build_add(int_coord_bld, coord0, int_coord_bld->one), + mask, ""); } break; @@ -288,41 +287,39 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, break; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - if (bld->static_state->normalized_coords) { - /* clamp to [0,1] */ - coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one); - /* mul by tex size and subtract 0.5 */ - coord = lp_build_mul(coord_bld, coord, length_f); + { + struct lp_build_context abs_coord_bld = bld->coord_bld; + abs_coord_bld.type.sign = FALSE; + + if (bld->static_state->normalized_coords) { + /* mul by tex size */ + coord = lp_build_mul(coord_bld, coord, length_f); + } + /* clamp to length max */ + coord = lp_build_min(coord_bld, coord, length_f); + /* subtract 0.5 */ coord = lp_build_sub(coord_bld, coord, half); + /* clamp to [0, length - 0.5] */ + coord = lp_build_max(coord_bld, coord, coord_bld->zero); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight); + coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); + /* coord1 = min(coord1, length-1) */ + coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); + break; } - else { - LLVMValueRef min, max; - /* clamp to [0.5, length - 0.5] */ - min = half; - max = lp_build_sub(coord_bld, length_f, min); - coord = lp_build_clamp(coord_bld, coord, min, max); - } - /* convert to int, compute lerp weight */ - lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); - coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); - /* coord0 = max(coord0, 0) */ - coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero); - /* coord1 = min(coord1, length-1) */ - coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); - break; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: { - LLVMValueRef min, max; + LLVMValueRef min; if (bld->static_state->normalized_coords) { /* scale coord to length */ coord = lp_build_mul(coord_bld, coord, length_f); } - /* clamp to [-0.5, length + 0.5] */ - min = lp_build_const_vec(coord_bld->type, -0.5F); - max = lp_build_sub(coord_bld, length_f, min); - coord = lp_build_clamp(coord_bld, coord, min, max); + /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */ coord = lp_build_sub(coord_bld, coord, half); + min = lp_build_const_vec(coord_bld->type, -1.0F); + coord = lp_build_clamp(coord_bld, coord, min, length_f); /* convert to int, compute lerp weight */ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); @@ -368,7 +365,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: { LLVMValueRef min, max; - + struct lp_build_context abs_coord_bld = bld->coord_bld; + abs_coord_bld.type.sign = FALSE; coord = lp_build_abs(coord_bld, coord); if (bld->static_state->normalized_coords) { @@ -384,15 +382,13 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, coord = lp_build_sub(coord_bld, coord, half); /* convert to int, compute lerp weight */ - lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); + lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight); coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); } break; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: { - LLVMValueRef min, max; - coord = lp_build_abs(coord_bld, coord); if (bld->static_state->normalized_coords) { @@ -400,12 +396,10 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, coord = lp_build_mul(coord_bld, coord, length_f); } - /* clamp to [-0.5, length + 0.5] */ - min = lp_build_negate(coord_bld, half); - max = lp_build_sub(coord_bld, length_f, min); - coord = lp_build_clamp(coord_bld, coord, min, max); - + /* was: clamp to [-0.5, length + 0.5] then sub 0.5 */ + /* skip -0.5 clamp (always positive), do sub first */ coord = lp_build_sub(coord_bld, coord, half); + coord = lp_build_min(coord_bld, coord, length_f); /* convert to int, compute lerp weight */ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); @@ -437,14 +431,13 @@ static LLVMValueRef lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, LLVMValueRef coord, LLVMValueRef length, + LLVMValueRef length_f, boolean is_pot, unsigned wrap_mode) { struct lp_build_context *coord_bld = &bld->coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; - LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length); - LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); + LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); LLVMValueRef icoord; switch(wrap_mode) { @@ -455,7 +448,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, ""); else { /* Add a bias to the texcoord to handle negative coords */ - LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024); + LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); icoord = LLVMBuildAdd(bld->builder, icoord, bias, ""); icoord = LLVMBuildURem(bld->builder, icoord, length, ""); } @@ -469,7 +462,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, } /* floor */ - icoord = lp_build_ifloor(coord_bld, coord); + /* use itrunc instead since we clamp to 0 anyway */ + icoord = lp_build_itrunc(coord_bld, coord); /* clamp to [0, length - 1]. */ icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero, @@ -503,7 +497,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, assert(bld->static_state->normalized_coords); coord = lp_build_mul(coord_bld, coord, length_f); - icoord = lp_build_ifloor(coord_bld, coord); + /* itrunc == ifloor here */ + icoord = lp_build_itrunc(coord_bld, coord); /* clamp to [0, length - 1] */ icoord = lp_build_min(int_coord_bld, icoord, length_minus_one); @@ -518,7 +513,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, coord = lp_build_mul(coord_bld, coord, length_f); } - icoord = lp_build_ifloor(coord_bld, coord); + /* itrunc == ifloor here */ + icoord = lp_build_itrunc(coord_bld, coord); /* clamp to [0, length - 1] */ icoord = lp_build_min(int_coord_bld, icoord, length_minus_one); @@ -532,7 +528,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, coord = lp_build_mul(coord_bld, coord, length_f); } - icoord = lp_build_ifloor(coord_bld, coord); + /* itrunc == ifloor here */ + icoord = lp_build_itrunc(coord_bld, coord); /* clamp to [0, length] */ icoord = lp_build_min(int_coord_bld, icoord, length); @@ -554,9 +551,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, static void lp_build_sample_image_nearest(struct lp_build_sample_context *bld, unsigned unit, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, + LLVMValueRef size, LLVMValueRef row_stride_vec, LLVMValueRef img_stride_vec, LLVMValueRef data_ptr, @@ -566,24 +561,45 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, LLVMValueRef colors_out[4]) { const unsigned dims = bld->dims; + LLVMValueRef width_vec; + LLVMValueRef height_vec; + LLVMValueRef depth_vec; + LLVMValueRef flt_size; + LLVMValueRef flt_width_vec; + LLVMValueRef flt_height_vec; + LLVMValueRef flt_depth_vec; LLVMValueRef x, y, z; + lp_build_extract_image_sizes(bld, + bld->int_size_type, + bld->int_coord_type, + size, + &width_vec, &height_vec, &depth_vec); + + flt_size = lp_build_int_to_float(&bld->float_size_bld, size); + + lp_build_extract_image_sizes(bld, + bld->float_size_type, + bld->coord_type, + flt_size, + &flt_width_vec, &flt_height_vec, &flt_depth_vec); + /* * Compute integer texcoords. */ - x = lp_build_sample_wrap_nearest(bld, s, width_vec, + x = lp_build_sample_wrap_nearest(bld, s, width_vec, flt_width_vec, bld->static_state->pot_width, bld->static_state->wrap_s); lp_build_name(x, "tex.x.wrapped"); if (dims >= 2) { - y = lp_build_sample_wrap_nearest(bld, t, height_vec, + y = lp_build_sample_wrap_nearest(bld, t, height_vec, flt_height_vec, bld->static_state->pot_height, bld->static_state->wrap_t); lp_build_name(y, "tex.y.wrapped"); if (dims == 3) { - z = lp_build_sample_wrap_nearest(bld, r, depth_vec, + z = lp_build_sample_wrap_nearest(bld, r, depth_vec, flt_depth_vec, bld->static_state->pot_depth, bld->static_state->wrap_r); lp_build_name(z, "tex.z.wrapped"); @@ -617,9 +633,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, static void lp_build_sample_image_linear(struct lp_build_sample_context *bld, unsigned unit, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, + LLVMValueRef size, LLVMValueRef row_stride_vec, LLVMValueRef img_stride_vec, LLVMValueRef data_ptr, @@ -629,15 +643,36 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, LLVMValueRef colors_out[4]) { const unsigned dims = bld->dims; + LLVMValueRef width_vec; + LLVMValueRef height_vec; + LLVMValueRef depth_vec; + LLVMValueRef flt_size; + LLVMValueRef flt_width_vec; + LLVMValueRef flt_height_vec; + LLVMValueRef flt_depth_vec; LLVMValueRef x0, y0, z0, x1, y1, z1; LLVMValueRef s_fpart, t_fpart, r_fpart; LLVMValueRef neighbors[2][2][4]; int chan; + lp_build_extract_image_sizes(bld, + bld->int_size_type, + bld->int_coord_type, + size, + &width_vec, &height_vec, &depth_vec); + + flt_size = lp_build_int_to_float(&bld->float_size_bld, size); + + lp_build_extract_image_sizes(bld, + bld->float_size_type, + bld->coord_type, + flt_size, + &flt_width_vec, &flt_height_vec, &flt_depth_vec); + /* * Compute integer texcoords. */ - lp_build_sample_wrap_linear(bld, s, width_vec, + lp_build_sample_wrap_linear(bld, s, width_vec, flt_width_vec, bld->static_state->pot_width, bld->static_state->wrap_s, &x0, &x1, &s_fpart); @@ -645,7 +680,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, lp_build_name(x1, "tex.x1.wrapped"); if (dims >= 2) { - lp_build_sample_wrap_linear(bld, t, height_vec, + lp_build_sample_wrap_linear(bld, t, height_vec, flt_height_vec, bld->static_state->pot_height, bld->static_state->wrap_t, &y0, &y1, &t_fpart); @@ -653,7 +688,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, lp_build_name(y1, "tex.y1.wrapped"); if (dims == 3) { - lp_build_sample_wrap_linear(bld, r, depth_vec, + lp_build_sample_wrap_linear(bld, r, depth_vec, flt_depth_vec, bld->static_state->pot_depth, bld->static_state->wrap_r, &z0, &z1, &r_fpart); @@ -796,12 +831,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, LLVMValueRef *colors_out) { LLVMBuilderRef builder = bld->builder; - LLVMValueRef width0_vec; - LLVMValueRef width1_vec; - LLVMValueRef height0_vec; - LLVMValueRef height1_vec; - LLVMValueRef depth0_vec; - LLVMValueRef depth1_vec; + LLVMValueRef size0; + LLVMValueRef size1; LLVMValueRef row_stride0_vec; LLVMValueRef row_stride1_vec; LLVMValueRef img_stride0_vec; @@ -813,12 +844,12 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, /* sample the first mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel0, - &width0_vec, &height0_vec, &depth0_vec, + &size0, &row_stride0_vec, &img_stride0_vec); data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); if (img_filter == PIPE_TEX_FILTER_NEAREST) { lp_build_sample_image_nearest(bld, unit, - width0_vec, height0_vec, depth0_vec, + size0, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, colors0); @@ -826,7 +857,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, else { assert(img_filter == PIPE_TEX_FILTER_LINEAR); lp_build_sample_image_linear(bld, unit, - width0_vec, height0_vec, depth0_vec, + size0, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, colors0); @@ -838,35 +869,32 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, } if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - struct lp_build_flow_context *flow_ctx; struct lp_build_if_state if_ctx; LLVMValueRef need_lerp; - flow_ctx = lp_build_flow_create(builder); - /* need_lerp = lod_fpart > 0 */ need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT, lod_fpart, bld->float_bld.zero, "need_lerp"); - lp_build_if(&if_ctx, flow_ctx, builder, need_lerp); + lp_build_if(&if_ctx, builder, need_lerp); { /* sample the second mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel1, - &width1_vec, &height1_vec, &depth1_vec, + &size1, &row_stride1_vec, &img_stride1_vec); data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); if (img_filter == PIPE_TEX_FILTER_NEAREST) { lp_build_sample_image_nearest(bld, unit, - width1_vec, height1_vec, depth1_vec, + size1, row_stride1_vec, img_stride1_vec, data_ptr1, s, t, r, colors1); } else { lp_build_sample_image_linear(bld, unit, - width1_vec, height1_vec, depth1_vec, + size1, row_stride1_vec, img_stride1_vec, data_ptr1, s, t, r, colors1); @@ -883,8 +911,6 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, } } lp_build_endif(&if_ctx); - - lp_build_flow_destroy(flow_ctx); } } @@ -937,12 +963,12 @@ lp_build_sample_general(struct lp_build_sample_context *bld, r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */ /* recompute ddx, ddy using the new (s,t) face texcoords */ - face_ddx[0] = lp_build_ddx(&bld->coord_bld, s); - face_ddx[1] = lp_build_ddx(&bld->coord_bld, t); + face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s); + face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t); face_ddx[2] = NULL; face_ddx[3] = NULL; - face_ddy[0] = lp_build_ddy(&bld->coord_bld, s); - face_ddy[1] = lp_build_ddy(&bld->coord_bld, t); + face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s); + face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t); face_ddy[2] = NULL; face_ddy[3] = NULL; ddx = face_ddx; @@ -1020,17 +1046,14 @@ lp_build_sample_general(struct lp_build_sample_context *bld, /* Emit conditional to choose min image filter or mag image filter * depending on the lod being > 0 or <= 0, respectively. */ - struct lp_build_flow_context *flow_ctx; struct lp_build_if_state if_ctx; LLVMValueRef minify; - flow_ctx = lp_build_flow_create(builder); - /* minify = lod >= 0.0 */ minify = LLVMBuildICmp(builder, LLVMIntSGE, lod_ipart, int_bld->zero, ""); - lp_build_if(&if_ctx, flow_ctx, builder, minify); + lp_build_if(&if_ctx, builder, minify); { /* Use the minification filter */ lp_build_sample_mipmap(bld, unit, @@ -1049,8 +1072,6 @@ lp_build_sample_general(struct lp_build_sample_context *bld, texels); } lp_build_endif(&if_ctx); - - lp_build_flow_destroy(flow_ctx); } for (chan = 0; chan < 4; ++chan) { @@ -1166,7 +1187,6 @@ lp_build_sample_soa(LLVMBuilderRef builder, bld.float_type = lp_type_float(32); bld.int_type = lp_type_int(32); bld.coord_type = type; - bld.uint_coord_type = lp_uint_type(type); bld.int_coord_type = lp_int_type(type); bld.float_size_type = lp_type_float(32); bld.float_size_type.length = dims > 1 ? 4 : 1; @@ -1179,7 +1199,6 @@ lp_build_sample_soa(LLVMBuilderRef builder, lp_build_context_init(&bld.float_vec_bld, builder, float_vec_type); lp_build_context_init(&bld.int_bld, builder, bld.int_type); lp_build_context_init(&bld.coord_bld, builder, bld.coord_type); - lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type); lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type); lp_build_context_init(&bld.int_size_bld, builder, bld.int_size_type); lp_build_context_init(&bld.float_size_bld, builder, bld.float_size_type); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 97318b3456c..a4d3b750c3c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -36,6 +36,9 @@ #define LP_BLD_TGSI_H #include "gallivm/lp_bld.h" +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" struct tgsi_token; @@ -55,6 +58,75 @@ enum lp_build_tex_modifier { /** + * Describe a channel of a register. + * + * The value can be a: + * - immediate value (i.e. derived from a IMM register) + * - CONST[n].x/y/z/w + * - IN[n].x/y/z/w + * - undetermined (when .file == TGSI_FILE_NULL) + * + * This is one of the analysis results, and is used to described + * the output color in terms of inputs. + */ +struct lp_tgsi_channel_info +{ + unsigned file:4; /* TGSI_FILE_* */ + unsigned swizzle:3; /* PIPE_SWIZZLE_x */ + union { + uint32_t index; + float value; /* for TGSI_FILE_IMMEDIATE */ + } u; +}; + + +/** + * Describe a texture sampler interpolator. + * + * The interpolation is described in terms of regular inputs. + */ +struct lp_tgsi_texture_info +{ + struct lp_tgsi_channel_info coord[4]; + unsigned target:8; /* TGSI_TEXTURE_* */ + unsigned unit:8; /* Sampler unit */ + unsigned modifier:8; /* LP_BLD_TEX_MODIFIER_* */ +}; + + +struct lp_tgsi_info +{ + struct tgsi_shader_info base; + + /* + * Whether any of the texture opcodes access a register file other than + * TGSI_FILE_INPUT. + * + * We could also handle TGSI_FILE_CONST/IMMEDIATE here, but there is little + * benefit. + */ + unsigned indirect_textures:1; + + /* + * Texture opcode description. Aimed at detecting and described direct + * texture opcodes. + */ + unsigned num_texs; + struct lp_tgsi_texture_info tex[PIPE_MAX_SAMPLERS]; + + /* + * Output description. Aimed at detecting and describing simple blit + * shaders. + */ + struct lp_tgsi_channel_info output[PIPE_MAX_SHADER_OUTPUTS][4]; + + /* + * Shortcut pointers into the above (for fragment shaders). + */ + const struct lp_tgsi_channel_info *cbuf[PIPE_MAX_COLOR_BUFS]; +}; + +/** * Sampler code generation interface. * * Although texture sampling is a requirement for TGSI translation, it is @@ -97,6 +169,11 @@ struct lp_build_sampler_aos void +lp_build_tgsi_info(const struct tgsi_token *tokens, + struct lp_tgsi_info *info); + + +void lp_build_tgsi_soa(LLVMBuilderRef builder, const struct tgsi_token *tokens, struct lp_type type, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c index d5f963be58d..c3c082b2b95 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -513,7 +513,7 @@ emit_instruction( { LLVMValueRef src0, src1, src2; LLVMValueRef tmp0, tmp1; - LLVMValueRef dst0; + LLVMValueRef dst0 = NULL; /* * Stores and write masks are handled in a general fashion after the long diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c new file mode 100644 index 00000000000..ad514463de0 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c @@ -0,0 +1,479 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_dump.h" +#include "lp_bld_debug.h" +#include "lp_bld_tgsi.h" + + +/** + * Analysis context. + * + * This is where we keep store the value of each channel of the IMM/TEMP/OUT + * register values, as we walk the shader. + */ +struct analysis_context +{ + struct lp_tgsi_info *info; + + unsigned num_imms; + float imm[32][4]; + + struct lp_tgsi_channel_info temp[32][4]; +}; + + +/** + * Describe the specified channel of the src register. + */ +static void +analyse_src(struct analysis_context *ctx, + struct lp_tgsi_channel_info *chan_info, + const struct tgsi_src_register *src, + unsigned chan) +{ + chan_info->file = TGSI_FILE_NULL; + if (!src->Indirect && !src->Absolute && !src->Negate) { + unsigned swizzle = tgsi_util_get_src_register_swizzle(src, chan); + if (src->File == TGSI_FILE_TEMPORARY) { + if (src->Index < Elements(ctx->temp)) { + *chan_info = ctx->temp[src->Index][swizzle]; + } + } else { + chan_info->file = src->File; + if (src->File == TGSI_FILE_IMMEDIATE) { + assert(src->Index < Elements(ctx->imm)); + if (src->Index < Elements(ctx->imm)) { + chan_info->u.value = ctx->imm[src->Index][swizzle]; + } + } else { + chan_info->u.index = src->Index; + chan_info->swizzle = swizzle; + } + } + } +} + + +/** + * Whether this register channel refers to a specific immediate value. + */ +static boolean +is_immediate(const struct lp_tgsi_channel_info *chan_info, float value) +{ + return chan_info->file == TGSI_FILE_IMMEDIATE && + chan_info->u.value == value; +} + + +static void +analyse_tex(struct analysis_context *ctx, + const struct tgsi_full_instruction *inst, + enum lp_build_tex_modifier modifier) +{ + struct lp_tgsi_info *info = ctx->info; + unsigned chan; + + if (info->num_texs < Elements(info->tex)) { + struct lp_tgsi_texture_info *tex_info = &info->tex[info->num_texs]; + bool indirect = FALSE; + unsigned readmask = 0; + + tex_info->target = inst->Texture.Texture; + switch (inst->Texture.Texture) { + case TGSI_TEXTURE_1D: + readmask = TGSI_WRITEMASK_X; + break; + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + readmask = TGSI_WRITEMASK_XY; + break; + case TGSI_TEXTURE_SHADOW1D: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + readmask = TGSI_WRITEMASK_XYZ; + break; + default: + assert(0); + return; + } + + if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { + /* We don't track explicit derivatives, although we could */ + indirect = TRUE; + tex_info->unit = inst->Src[3].Register.Index; + } else { + if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED || + modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS || + modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { + readmask |= TGSI_WRITEMASK_W; + } + tex_info->unit = inst->Src[1].Register.Index; + } + + for (chan = 0; chan < 4; ++chan) { + struct lp_tgsi_channel_info *chan_info = &tex_info->coord[chan]; + if (readmask & (1 << chan)) { + analyse_src(ctx, chan_info, &inst->Src[0].Register, chan); + if (chan_info->file != TGSI_FILE_INPUT) { + indirect = TRUE; + } + } else { + memset(chan_info, 0, sizeof *chan_info); + } + } + + if (indirect) { + info->indirect_textures = TRUE; + } + + ++info->num_texs; + } else { + info->indirect_textures = TRUE; + } +} + + +/** + * Process an instruction, and update the register values accordingly. + */ +static void +analyse_instruction(struct analysis_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct lp_tgsi_info *info = ctx->info; + struct lp_tgsi_channel_info (*regs)[4]; + unsigned max_regs; + unsigned i; + unsigned index; + unsigned chan; + + for (i = 0; i < inst->Instruction.NumDstRegs; ++i) { + const struct tgsi_dst_register *dst = &inst->Dst[i].Register; + + /* + * Get the lp_tgsi_channel_info array corresponding to the destination + * register file. + */ + + if (dst->File == TGSI_FILE_TEMPORARY) { + regs = ctx->temp; + max_regs = Elements(ctx->temp); + } else if (dst->File == TGSI_FILE_OUTPUT) { + regs = info->output; + max_regs = Elements(info->output); + } else if (dst->File == TGSI_FILE_ADDRESS || + dst->File == TGSI_FILE_PREDICATE) { + continue; + } else { + assert(0); + continue; + } + + /* + * Detect direct TEX instructions + */ + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_TEX: + analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_NONE); + break; + case TGSI_OPCODE_TXD: + analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV); + break; + case TGSI_OPCODE_TXB: + analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS); + break; + case TGSI_OPCODE_TXL: + analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD); + break; + case TGSI_OPCODE_TXP: + analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_PROJECTED); + break; + default: + break; + } + + /* + * Keep track of assignments and writes + */ + + if (dst->Indirect) { + /* + * It could be any register index so clear all register indices. + */ + + for (chan = 0; chan < 4; ++chan) { + if (dst->WriteMask & (1 << chan)) { + for (index = 0; index < max_regs; ++index) { + regs[index][chan].file = TGSI_FILE_NULL; + } + } + } + } else if (dst->Index < max_regs) { + /* + * Update this destination register value. + */ + + struct lp_tgsi_channel_info res[4]; + + memset(res, 0, sizeof res); + + if (!inst->Instruction.Predicate && + !inst->Instruction.Saturate) { + for (chan = 0; chan < 4; ++chan) { + if (dst->WriteMask & (1 << chan)) { + if (inst->Instruction.Opcode == TGSI_OPCODE_MOV) { + analyse_src(ctx, &res[chan], + &inst->Src[0].Register, chan); + } else if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) { + /* + * Propagate values across 1.0 and 0.0 multiplications. + */ + + struct lp_tgsi_channel_info src0; + struct lp_tgsi_channel_info src1; + + analyse_src(ctx, &src0, &inst->Src[0].Register, chan); + analyse_src(ctx, &src1, &inst->Src[1].Register, chan); + + if (is_immediate(&src0, 0.0f)) { + res[chan] = src0; + } else if (is_immediate(&src1, 0.0f)) { + res[chan] = src1; + } else if (is_immediate(&src0, 1.0f)) { + res[chan] = src1; + } else if (is_immediate(&src1, 1.0f)) { + res[chan] = src0; + } + } + } + } + } + + for (chan = 0; chan < 4; ++chan) { + if (dst->WriteMask & (1 << chan)) { + regs[dst->Index][chan] = res[chan]; + } + } + } + } + + /* + * Clear all temporaries information in presence of a control flow opcode. + */ + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_IF: + case TGSI_OPCODE_IFC: + case TGSI_OPCODE_ELSE: + case TGSI_OPCODE_ENDIF: + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_BRK: + case TGSI_OPCODE_BREAKC: + case TGSI_OPCODE_CONT: + case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_CALLNZ: + case TGSI_OPCODE_CAL: + case TGSI_OPCODE_BGNSUB: + case TGSI_OPCODE_ENDSUB: + case TGSI_OPCODE_SWITCH: + case TGSI_OPCODE_CASE: + case TGSI_OPCODE_DEFAULT: + case TGSI_OPCODE_ENDSWITCH: + case TGSI_OPCODE_RET: + case TGSI_OPCODE_END: + /* XXX: Are there more cases? */ + memset(&ctx->temp, 0, sizeof ctx->temp); + memset(&info->output, 0, sizeof info->output); + default: + break; + } +} + + +static INLINE void +dump_info(const struct tgsi_token *tokens, + struct lp_tgsi_info *info) +{ + unsigned index; + unsigned chan; + + tgsi_dump(tokens, 0); + + for (index = 0; index < info->num_texs; ++index) { + const struct lp_tgsi_texture_info *tex_info = &info->tex[index]; + debug_printf("TEX[%u] =", index); + for (chan = 0; chan < 4; ++chan) { + const struct lp_tgsi_channel_info *chan_info = + &tex_info->coord[chan]; + if (chan_info->file != TGSI_FILE_NULL) { + debug_printf(" %s[%u].%c", + tgsi_file_names[chan_info->file], + chan_info->u.index, + "xyzw01"[chan_info->swizzle]); + } else { + debug_printf(" _"); + } + } + debug_printf(", SAMP[%u], %s\n", + tex_info->unit, + tgsi_texture_names[tex_info->target]); + } + + for (index = 0; index < PIPE_MAX_SHADER_OUTPUTS; ++index) { + for (chan = 0; chan < 4; ++chan) { + const struct lp_tgsi_channel_info *chan_info = + &info->output[index][chan]; + if (chan_info->file != TGSI_FILE_NULL) { + debug_printf("OUT[%u].%c = ", index, "xyzw"[chan]); + if (chan_info->file == TGSI_FILE_IMMEDIATE) { + debug_printf("%f", chan_info->u.value); + } else { + const char *file_name; + switch (chan_info->file) { + case TGSI_FILE_CONSTANT: + file_name = "CONST"; + break; + case TGSI_FILE_INPUT: + file_name = "IN"; + break; + default: + file_name = "???"; + break; + } + debug_printf("%s[%u].%c", + file_name, + chan_info->u.index, + "xyzw01"[chan_info->swizzle]); + } + debug_printf("\n"); + } + } + } +} + + +/** + * Detect any direct relationship between the output color + */ +void +lp_build_tgsi_info(const struct tgsi_token *tokens, + struct lp_tgsi_info *info) +{ + struct tgsi_parse_context parse; + struct analysis_context ctx; + unsigned index; + unsigned chan; + + memset(info, 0, sizeof *info); + + tgsi_scan_shader(tokens, &info->base); + + memset(&ctx, 0, sizeof ctx); + ctx.info = info; + + tgsi_parse_init(&parse, tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + struct tgsi_full_instruction *inst = + &parse.FullToken.FullInstruction; + + if (inst->Instruction.Opcode == TGSI_OPCODE_END || + inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) { + /* We reached the end of main function body. */ + goto finished; + } + + analyse_instruction(&ctx, inst); + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const unsigned size = + parse.FullToken.FullImmediate.Immediate.NrTokens - 1; + assert(size <= 4); + if (ctx.num_imms < Elements(ctx.imm)) { + for (chan = 0; chan < size; ++chan) { + ctx.imm[ctx.num_imms][chan] = + parse.FullToken.FullImmediate.u[chan].Float; + } + ++ctx.num_imms; + } + } + break; + + case TGSI_TOKEN_TYPE_PROPERTY: + break; + + default: + assert(0); + } + } +finished: + + tgsi_parse_free(&parse); + + + /* + * Link the output color values. + */ + + for (index = 0; index < PIPE_MAX_COLOR_BUFS; ++index) { + const struct lp_tgsi_channel_info null_output[4]; + info->cbuf[index] = null_output; + } + + for (index = 0; index < info->base.num_outputs; ++index) { + unsigned semantic_name = info->base.output_semantic_name[index]; + unsigned semantic_index = info->base.output_semantic_index[index]; + if (semantic_name == TGSI_SEMANTIC_COLOR && + semantic_index < PIPE_MAX_COLOR_BUFS) { + info->cbuf[semantic_index] = info->output[index]; + } + } + + if (gallivm_debug & GALLIVM_DEBUG_TGSI) { + dump_info(tokens, info); + } +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 441aebae298..3c318cc8c80 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -887,21 +887,25 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, } if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); for (i = 0; i < num_coords; i++) { - ddx[i] = emit_fetch( bld, inst, 1, i ); - ddy[i] = emit_fetch( bld, inst, 2, i ); + LLVMValueRef src1 = emit_fetch( bld, inst, 1, i ); + LLVMValueRef src2 = emit_fetch( bld, inst, 2, i ); + ddx[i] = LLVMBuildExtractElement(bld->base.builder, src1, index0, ""); + ddy[i] = LLVMBuildExtractElement(bld->base.builder, src2, index0, ""); } unit = inst->Src[3].Register.Index; } else { for (i = 0; i < num_coords; i++) { - ddx[i] = lp_build_ddx( &bld->base, coords[i] ); - ddy[i] = lp_build_ddy( &bld->base, coords[i] ); + ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] ); + ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] ); } unit = inst->Src[1].Register.Index; } for (i = num_coords; i < 3; i++) { - ddx[i] = bld->base.undef; - ddy[i] = bld->base.undef; + ddx[i] = LLVMGetUndef(bld->base.elem_type); + ddy[i] = LLVMGetUndef(bld->base.elem_type); } bld->sampler->emit_fetch_texel(bld->sampler, @@ -913,6 +917,43 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, texel); } +static boolean +near_end_of_shader(struct lp_build_tgsi_soa_context *bld, + int pc) +{ + int i; + + for (i = 0; i < 5; i++) { + unsigned opcode; + + if (pc + i >= bld->info->num_instructions) + return TRUE; + + opcode = bld->instructions[pc + i].Instruction.Opcode; + + if (opcode == TGSI_OPCODE_END) + return TRUE; + + if (opcode == TGSI_OPCODE_TEX || + opcode == TGSI_OPCODE_TXP || + opcode == TGSI_OPCODE_TXD || + opcode == TGSI_OPCODE_TXB || + opcode == TGSI_OPCODE_TXL || + opcode == TGSI_OPCODE_TXF || + opcode == TGSI_OPCODE_TXQ || + opcode == TGSI_OPCODE_CAL || + opcode == TGSI_OPCODE_CALLNZ || + opcode == TGSI_OPCODE_IF || + opcode == TGSI_OPCODE_IFC || + opcode == TGSI_OPCODE_BGNLOOP || + opcode == TGSI_OPCODE_SWITCH) + return FALSE; + } + + return TRUE; +} + + /** * Kill fragment if any of the src register values are negative. @@ -920,7 +961,8 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, static void emit_kil( struct lp_build_tgsi_soa_context *bld, - const struct tgsi_full_instruction *inst ) + const struct tgsi_full_instruction *inst, + int pc) { const struct tgsi_full_src_register *reg = &inst->Src[0]; LLVMValueRef terms[NUM_CHANNELS]; @@ -959,8 +1001,12 @@ emit_kil( } } - if(mask) + if(mask) { lp_build_mask_update(bld->mask, mask); + + if (!near_end_of_shader(bld, pc)) + lp_build_mask_check(bld->mask); + } } @@ -972,7 +1018,8 @@ emit_kil( */ static void emit_kilp(struct lp_build_tgsi_soa_context *bld, - const struct tgsi_full_instruction *inst) + const struct tgsi_full_instruction *inst, + int pc) { LLVMValueRef mask; @@ -983,10 +1030,14 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld, mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp"); } else { - mask = bld->base.zero; + LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type); + mask = zero; } lp_build_mask_update(bld->mask, mask); + + if (!near_end_of_shader(bld, pc)) + lp_build_mask_check(bld->mask); } static void @@ -1535,12 +1586,12 @@ emit_instruction( case TGSI_OPCODE_KILP: /* predicated kill */ - emit_kilp( bld, inst ); + emit_kilp( bld, inst, (*pc)-1 ); break; case TGSI_OPCODE_KIL: /* conditional kill */ - emit_kil( bld, inst ); + emit_kil( bld, inst, (*pc)-1 ); break; case TGSI_OPCODE_PK2H: diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index ef4b306cb67..330838d23cf 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -97,7 +97,7 @@ void (*ppc_get_func(struct ppc_function *p))(void) return (void (*)(void)) NULL; else #endif - return (void (*)(void)) p->store; + return (void (*)(void)) pointer_to_func(p->store); } diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h index 036c1ee48a8..34bfa527db0 100644 --- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h +++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h @@ -23,26 +23,13 @@ #include "cell/ppu/cell_public.h" #endif + static INLINE struct pipe_screen * -sw_screen_create(struct sw_winsys *winsys) +sw_screen_create_named(struct sw_winsys *winsys, const char *driver) { - const char *default_driver; - const char *driver; struct pipe_screen *screen = NULL; #if defined(GALLIUM_CELL) - default_driver = "cell"; -#elif defined(GALLIUM_LLVMPIPE) - default_driver = "llvmpipe"; -#elif defined(GALLIUM_SOFTPIPE) - default_driver = "softpipe"; -#else - default_driver = ""; -#endif - - driver = debug_get_option("GALLIUM_DRIVER", default_driver); - -#if defined(GALLIUM_CELL) if (screen == NULL && strcmp(driver, "cell") == 0) screen = cell_create_screen(winsys); #endif @@ -60,4 +47,26 @@ sw_screen_create(struct sw_winsys *winsys) return screen; } + +static INLINE struct pipe_screen * +sw_screen_create(struct sw_winsys *winsys) +{ + const char *default_driver; + const char *driver; + +#if defined(GALLIUM_CELL) + default_driver = "cell"; +#elif defined(GALLIUM_LLVMPIPE) + default_driver = "llvmpipe"; +#elif defined(GALLIUM_SOFTPIPE) + default_driver = "softpipe"; +#else + default_driver = ""; +#endif + + driver = debug_get_option("GALLIUM_DRIVER", default_driver); + return sw_screen_create_named(winsys, driver); +} + + #endif diff --git a/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h index 0b4e7404034..e4effa713e9 100644 --- a/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h +++ b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h @@ -13,22 +13,28 @@ static INLINE struct pipe_screen * sw_screen_wrap(struct pipe_screen *screen) { struct sw_winsys *sws; - struct pipe_screen *sw_screen; + struct pipe_screen *sw_screen = NULL; + const char *driver; - sws = wrapper_sw_winsys_warp_pipe_screen(screen); + driver = debug_get_option("GALLIUM_DRIVER", "native"); + if (strcmp(driver, "native") == 0) + return screen; + + sws = wrapper_sw_winsys_wrap_pipe_screen(screen); if (!sws) goto err; - sw_screen = sw_screen_create(sws); - if (sw_screen == screen) + sw_screen = sw_screen_create_named(sws, driver); + + if (!sw_screen) goto err_winsys; return sw_screen; err_winsys: - sws->destroy(sws); + return wrapper_sw_winsys_dewrap_pipe_screen(sws); err: - return screen; + return screen; } #endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index f71ffb70308..77bde86684e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -90,7 +90,8 @@ static const char *processor_type_names[] = "GEOM" }; -static const char *file_names[TGSI_FILE_COUNT] = +const char * +tgsi_file_names[TGSI_FILE_COUNT] = { "NULL", "CONST", @@ -125,7 +126,8 @@ static const char *semantic_names[] = "FACE", "EDGEFLAG", "PRIM_ID", - "INSTANCEID" + "INSTANCEID", + "STENCIL" }; static const char *immediate_type_names[] = @@ -135,7 +137,8 @@ static const char *immediate_type_names[] = "INT32" }; -static const char *swizzle_names[] = +const char * +tgsi_swizzle_names[] = { "x", "y", @@ -143,7 +146,8 @@ static const char *swizzle_names[] = "w" }; -static const char *texture_names[] = +const char * +tgsi_texture_names[] = { "UNKNOWN", "1D", @@ -201,15 +205,15 @@ _dump_register_src( struct dump_ctx *ctx, const struct tgsi_full_src_register *src ) { - ENM(src->Register.File, file_names); + ENM(src->Register.File, tgsi_file_names); if (src->Register.Dimension) { if (src->Dimension.Indirect) { CHR( '[' ); - ENM( src->DimIndirect.File, file_names ); + ENM( src->DimIndirect.File, tgsi_file_names ); CHR( '[' ); SID( src->DimIndirect.Index ); TXT( "]." ); - ENM( src->DimIndirect.SwizzleX, swizzle_names ); + ENM( src->DimIndirect.SwizzleX, tgsi_swizzle_names ); if (src->Dimension.Index != 0) { if (src->Dimension.Index > 0) CHR( '+' ); @@ -224,11 +228,11 @@ _dump_register_src( } if (src->Register.Indirect) { CHR( '[' ); - ENM( src->Indirect.File, file_names ); + ENM( src->Indirect.File, tgsi_file_names ); CHR( '[' ); SID( src->Indirect.Index ); TXT( "]." ); - ENM( src->Indirect.SwizzleX, swizzle_names ); + ENM( src->Indirect.SwizzleX, tgsi_swizzle_names ); if (src->Register.Index != 0) { if (src->Register.Index > 0) CHR( '+' ); @@ -248,15 +252,15 @@ _dump_register_dst( struct dump_ctx *ctx, const struct tgsi_full_dst_register *dst ) { - ENM(dst->Register.File, file_names); + ENM(dst->Register.File, tgsi_file_names); if (dst->Register.Dimension) { if (dst->Dimension.Indirect) { CHR( '[' ); - ENM( dst->DimIndirect.File, file_names ); + ENM( dst->DimIndirect.File, tgsi_file_names ); CHR( '[' ); SID( dst->DimIndirect.Index ); TXT( "]." ); - ENM( dst->DimIndirect.SwizzleX, swizzle_names ); + ENM( dst->DimIndirect.SwizzleX, tgsi_swizzle_names ); if (dst->Dimension.Index != 0) { if (dst->Dimension.Index > 0) CHR( '+' ); @@ -271,11 +275,11 @@ _dump_register_dst( } if (dst->Register.Indirect) { CHR( '[' ); - ENM( dst->Indirect.File, file_names ); + ENM( dst->Indirect.File, tgsi_file_names ); CHR( '[' ); SID( dst->Indirect.Index ); TXT( "]." ); - ENM( dst->Indirect.SwizzleX, swizzle_names ); + ENM( dst->Indirect.SwizzleX, tgsi_swizzle_names ); if (dst->Register.Index != 0) { if (dst->Register.Index > 0) CHR( '+' ); @@ -351,7 +355,7 @@ iter_declaration( TXT( "DCL " ); - ENM(decl->Declaration.File, file_names); + ENM(decl->Declaration.File, tgsi_file_names); /* all geometry shader inputs are two dimensional */ if (decl->Declaration.File == TGSI_FILE_INPUT && @@ -585,10 +589,10 @@ iter_instruction( inst->Predicate.SwizzleZ != TGSI_SWIZZLE_Z || inst->Predicate.SwizzleW != TGSI_SWIZZLE_W) { CHR( '.' ); - ENM( inst->Predicate.SwizzleX, swizzle_names ); - ENM( inst->Predicate.SwizzleY, swizzle_names ); - ENM( inst->Predicate.SwizzleZ, swizzle_names ); - ENM( inst->Predicate.SwizzleW, swizzle_names ); + ENM( inst->Predicate.SwizzleX, tgsi_swizzle_names ); + ENM( inst->Predicate.SwizzleY, tgsi_swizzle_names ); + ENM( inst->Predicate.SwizzleZ, tgsi_swizzle_names ); + ENM( inst->Predicate.SwizzleW, tgsi_swizzle_names ); } TXT( ") " ); @@ -641,10 +645,10 @@ iter_instruction( src->Register.SwizzleZ != TGSI_SWIZZLE_Z || src->Register.SwizzleW != TGSI_SWIZZLE_W) { CHR( '.' ); - ENM( src->Register.SwizzleX, swizzle_names ); - ENM( src->Register.SwizzleY, swizzle_names ); - ENM( src->Register.SwizzleZ, swizzle_names ); - ENM( src->Register.SwizzleW, swizzle_names ); + ENM( src->Register.SwizzleX, tgsi_swizzle_names ); + ENM( src->Register.SwizzleY, tgsi_swizzle_names ); + ENM( src->Register.SwizzleZ, tgsi_swizzle_names ); + ENM( src->Register.SwizzleW, tgsi_swizzle_names ); } if (src->Register.Absolute) @@ -655,7 +659,7 @@ iter_instruction( if (inst->Instruction.Texture) { TXT( ", " ); - ENM( inst->Texture.Texture, texture_names ); + ENM( inst->Texture.Texture, tgsi_texture_names ); } switch (inst->Instruction.Opcode) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h index dd78b361007..fc0429ad8d9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.h +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h @@ -35,6 +35,15 @@ extern "C" { #endif +extern const char * +tgsi_file_names[TGSI_FILE_COUNT]; + +extern const char * +tgsi_swizzle_names[]; + +extern const char * +tgsi_texture_names[]; + void tgsi_dump_str( const struct tgsi_token *tokens, diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 90198a4f604..6585da3e838 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -147,6 +147,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name; info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index; info->input_interpolate[reg] = (ubyte)fulldecl->Declaration.Interpolate; + info->input_centroid[reg] = (ubyte)fulldecl->Declaration.Centroid; info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Declaration.CylindricalWrap; info->num_inputs++; } @@ -157,9 +158,11 @@ tgsi_scan_shader(const struct tgsi_token *tokens, /* extra info for special outputs */ if (procType == TGSI_PROCESSOR_FRAGMENT && - fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) { - info->writes_z = TRUE; - } + fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) + info->writes_z = TRUE; + if (procType == TGSI_PROCESSOR_FRAGMENT && + fulldecl->Semantic.Name == TGSI_SEMANTIC_STENCIL) + info->writes_stencil = TRUE; if (procType == TGSI_PROCESSOR_VERTEX && fulldecl->Semantic.Name == TGSI_SEMANTIC_EDGEFLAG) { info->writes_edgeflag = TRUE; diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index f8aa90cf065..104097fbc03 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -45,6 +45,7 @@ struct tgsi_shader_info ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */ ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS]; + ubyte input_centroid[PIPE_MAX_SHADER_INPUTS]; ubyte input_usage_mask[PIPE_MAX_SHADER_INPUTS]; ubyte input_cylindrical_wrap[PIPE_MAX_SHADER_INPUTS]; ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ @@ -60,6 +61,7 @@ struct tgsi_shader_info uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */ boolean writes_z; /**< does fragment shader write Z value? */ + boolean writes_stencil; /**< does fragment shader write stencil value? */ boolean writes_edgeflag; /**< vertex shader outputs edgeflag */ boolean uses_kill; /**< KIL or KILP instruction used? */ diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index a75380228b1..850ef39ef21 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -68,6 +68,33 @@ struct translate_key { }; +struct translate; + + +typedef void (PIPE_CDECL *run_elts_func)(struct translate *, + const unsigned *elts, + unsigned count, + unsigned instance_id, + void *output_buffer); + +typedef void (PIPE_CDECL *run_elts16_func)(struct translate *, + const uint16_t *elts, + unsigned count, + unsigned instance_id, + void *output_buffer); + +typedef void (PIPE_CDECL *run_elts8_func)(struct translate *, + const uint8_t *elts, + unsigned count, + unsigned instance_id, + void *output_buffer); + +typedef void (PIPE_CDECL *run_func)(struct translate *, + unsigned start, + unsigned count, + unsigned instance_id, + void *output_buffer); + struct translate { struct translate_key key; @@ -79,42 +106,14 @@ struct translate { unsigned stride, unsigned max_index ); - void (PIPE_CDECL *run_elts)( struct translate *, - const unsigned *elts, - unsigned count, - unsigned instance_id, - void *output_buffer); - - void (PIPE_CDECL *run_elts16)( struct translate *, - const uint16_t *elts, - unsigned count, - unsigned instance_id, - void *output_buffer); - - void (PIPE_CDECL *run_elts8)( struct translate *, - const uint8_t *elts, - unsigned count, - unsigned instance_id, - void *output_buffer); - - void (PIPE_CDECL *run)( struct translate *, - unsigned start, - unsigned count, - unsigned instance_id, - void *output_buffer); + run_elts_func run_elts; + run_elts16_func run_elts16; + run_elts8_func run_elts8; + run_func run; }; -#if 0 -struct translate_context *translate_context_create( void ); -void translate_context_destroy( struct translate_context * ); - -struct translate *translate_lookup_or_create( struct translate_context *tctx, - const struct translate_key *key ); -#endif - - struct translate *translate_create( const struct translate_key *key ); boolean translate_is_output_format_supported(enum pipe_format format); diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index f8bf5b46692..ef7f4be4c3e 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -1495,19 +1495,19 @@ struct translate *translate_sse2_create( const struct translate_key *key ) if (!build_vertex_emit(p, &p->elt8_func, 1)) goto fail; - p->translate.run = (void*)x86_get_func(&p->linear_func); + p->translate.run = (run_func) x86_get_func(&p->linear_func); if (p->translate.run == NULL) goto fail; - p->translate.run_elts = (void*)x86_get_func(&p->elt_func); + p->translate.run_elts = (run_elts_func) x86_get_func(&p->elt_func); if (p->translate.run_elts == NULL) goto fail; - p->translate.run_elts16 = (void*)x86_get_func(&p->elt16_func); + p->translate.run_elts16 = (run_elts16_func) x86_get_func(&p->elt16_func); if (p->translate.run_elts16 == NULL) goto fail; - p->translate.run_elts8 = (void*)x86_get_func(&p->elt8_func); + p->translate.run_elts8 = (run_elts8_func) x86_get_func(&p->elt8_func); if (p->translate.run_elts8 == NULL) goto fail; diff --git a/src/gallium/auxiliary/util/u_dl.c b/src/gallium/auxiliary/util/u_dl.c index 220860ebf4b..aca435d6cad 100644 --- a/src/gallium/auxiliary/util/u_dl.c +++ b/src/gallium/auxiliary/util/u_dl.c @@ -38,6 +38,7 @@ #endif #include "u_dl.h" +#include "u_pointer.h" struct util_dl_library * @@ -58,7 +59,7 @@ util_dl_get_proc_address(struct util_dl_library *library, const char *procname) { #if defined(PIPE_OS_UNIX) - return (util_dl_proc)dlsym((void *)library, procname); + return (util_dl_proc) pointer_to_func(dlsym((void *)library, procname)); #elif defined(PIPE_OS_WINDOWS) return (util_dl_proc)GetProcAddress((HMODULE)library, procname); #else diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 0811280b97b..8e5d4487a67 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -109,9 +109,12 @@ PIPE_FORMAT_Z32_UNORM , plain, 1, 1, un32, , , , x___, PIPE_FORMAT_Z32_FLOAT , plain, 1, 1, f32 , , , , x___, zs PIPE_FORMAT_Z24_UNORM_S8_USCALED , plain, 1, 1, un24, u8 , , , xy__, zs PIPE_FORMAT_S8_USCALED_Z24_UNORM , plain, 1, 1, u8 , un24, , , yx__, zs +PIPE_FORMAT_X24S8_USCALED , plain, 1, 1, x24, u8 , , , _y__, zs +PIPE_FORMAT_S8X24_USCALED , plain, 1, 1, u8 , x24 , , , _x__, zs PIPE_FORMAT_Z24X8_UNORM , plain, 1, 1, un24, x8 , , , x___, zs PIPE_FORMAT_X8Z24_UNORM , plain, 1, 1, x8 , un24, , , y___, zs PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED , plain, 1, 1, f32, u8 , x24 , , xy__, zs +PIPE_FORMAT_X32_S8X24_USCALED , plain, 1, 1, x32, u8 , x24 , , _y__, zs # YUV formats # http://www.fourcc.org/yuv.php#UYVY diff --git a/src/gallium/auxiliary/util/u_format_srgb.py b/src/gallium/auxiliary/util/u_format_srgb.py index 3e8000f3687..cd63ae78919 100644 --- a/src/gallium/auxiliary/util/u_format_srgb.py +++ b/src/gallium/auxiliary/util/u_format_srgb.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -''' +CopyRight = ''' /************************************************************************** * * Copyright 2010 VMware, Inc. @@ -89,7 +89,7 @@ def main(): print '/* This file is autogenerated by u_format_srgb.py. Do not edit directly. */' print # This will print the copyright message on the top of this file - print __doc__.strip() + print CopyRight.strip() print print '#include "u_format_srgb.h"' print diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py index f0b407b8b8e..8cc22a56371 100755 --- a/src/gallium/auxiliary/util/u_format_table.py +++ b/src/gallium/auxiliary/util/u_format_table.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -''' +CopyRight = ''' /************************************************************************** * * Copyright 2010 VMware, Inc. @@ -83,7 +83,7 @@ def write_format_table(formats): print '/* This file is autogenerated by u_format_table.py from u_format.csv. Do not edit directly. */' print # This will print the copyright message on the top of this file - print __doc__.strip() + print CopyRight.strip() print print '#include "u_format.h"' print '#include "u_format_s3tc.h"' diff --git a/src/gallium/auxiliary/util/u_format_zs.c b/src/gallium/auxiliary/util/u_format_zs.c index 792d69c214c..80081e22f7c 100644 --- a/src/gallium/auxiliary/util/u_format_zs.c +++ b/src/gallium/auxiliary/util/u_format_zs.c @@ -918,3 +918,56 @@ util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned d } } + +void +util_format_x24s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_z24_unorm_s8_uscaled_unpack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_x24s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_z24_unorm_s8_uscaled_pack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_s8_uscaled_z24_unorm_unpack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_s8_uscaled_z24_unorm_pack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_x32_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); + +} + +void +util_format_x32_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} diff --git a/src/gallium/auxiliary/util/u_format_zs.h b/src/gallium/auxiliary/util/u_format_zs.h index 650db4b95fd..1604cc3eee2 100644 --- a/src/gallium/auxiliary/util/u_format_zs.h +++ b/src/gallium/auxiliary/util/u_format_zs.h @@ -192,5 +192,21 @@ util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned void util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); +void +util_format_x24s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_x24s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); +void +util_format_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_x32_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_x32_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_sride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); #endif /* U_FORMAT_ZS_H_ */ diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 69a76814945..37294b7203f 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -118,6 +118,11 @@ __inline double __cdecl atan2(double val) #endif +#ifndef M_SQRT2 +#define M_SQRT2 1.41421356237309504880 +#endif + + #if defined(_MSC_VER) #if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h index 03198c91da4..1df6c872677 100644 --- a/src/gallium/auxiliary/util/u_sse.h +++ b/src/gallium/auxiliary/util/u_sse.h @@ -71,6 +71,96 @@ _mm_castps_si128(__m128 a) #endif /* defined(_MSC_VER) && _MSC_VER < 1500 */ +union m128i { + __m128i m; + ubyte ub[16]; + ushort us[8]; + uint ui[4]; +}; + +static INLINE void u_print_epi8(const char *name, __m128i r) +{ + union { __m128i m; ubyte ub[16]; } u; + u.m = r; + + debug_printf("%s: " + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x\n", + name, + u.ub[0], u.ub[1], u.ub[2], u.ub[3], + u.ub[4], u.ub[5], u.ub[6], u.ub[7], + u.ub[8], u.ub[9], u.ub[10], u.ub[11], + u.ub[12], u.ub[13], u.ub[14], u.ub[15]); +} + +static INLINE void u_print_epi16(const char *name, __m128i r) +{ + union { __m128i m; ushort us[8]; } u; + u.m = r; + + debug_printf("%s: " + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x\n", + name, + u.us[0], u.us[1], u.us[2], u.us[3], + u.us[4], u.us[5], u.us[6], u.us[7]); +} + +static INLINE void u_print_epi32(const char *name, __m128i r) +{ + union { __m128i m; uint ui[4]; } u; + u.m = r; + + debug_printf("%s: " + "%08x/" + "%08x/" + "%08x/" + "%08x\n", + name, + u.ui[0], u.ui[1], u.ui[2], u.ui[3]); +} + +static INLINE void u_print_ps(const char *name, __m128 r) +{ + union { __m128 m; float f[4]; } u; + u.m = r; + + debug_printf("%s: " + "%f/" + "%f/" + "%f/" + "%f\n", + name, + u.f[0], u.f[1], u.f[2], u.f[3]); +} + + +#define U_DUMP_EPI32(a) u_print_epi32(#a, a) +#define U_DUMP_EPI16(a) u_print_epi16(#a, a) +#define U_DUMP_EPI8(a) u_print_epi8(#a, a) +#define U_DUMP_PS(a) u_print_ps(#a, a) + + #if defined(PIPE_ARCH_SSSE3) @@ -98,6 +188,68 @@ _mm_shuffle_epi8(__m128i a, __m128i mask) #endif /* !PIPE_ARCH_SSSE3 */ -#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */ + + +/* Provide an SSE2 implementation of _mm_mullo_epi32() in terms of + * _mm_mul_epu32(). + * + * I suspect this works fine for us because one of our operands is + * always positive, but not sure that this can be used for general + * signed integer multiplication. + * + * This seems close enough to the speed of SSE4 and the real + * _mm_mullo_epi32() intrinsic as to not justify adding an sse4 + * dependency at this point. + */ +static INLINE __m128i mm_mullo_epi32(const __m128i a, const __m128i b) +{ + __m128i a4 = _mm_srli_epi64(a, 32); /* shift by one dword */ + __m128i b4 = _mm_srli_epi64(b, 32); /* shift by one dword */ + __m128i ba = _mm_mul_epu32(b, a); /* multply dwords 0, 2 */ + __m128i b4a4 = _mm_mul_epu32(b4, a4); /* multiply dwords 1, 3 */ + + /* Interleave the results, either with shuffles or (slightly + * faster) direct bit operations: + */ +#if 0 + __m128i ba8 = _mm_shuffle_epi32(ba, 8); + __m128i b4a48 = _mm_shuffle_epi32(b4a4, 8); + __m128i result = _mm_unpacklo_epi32(ba8, b4a48); +#else + __m128i mask = _mm_setr_epi32(~0,0,~0,0); + __m128i ba_mask = _mm_and_si128(ba, mask); + __m128i b4a4_mask_shift = _mm_slli_epi64(b4a4, 32); + __m128i result = _mm_or_si128(ba_mask, b4a4_mask_shift); +#endif + + return result; +} + + +static INLINE void +transpose4_epi32(const __m128i * restrict a, + const __m128i * restrict b, + const __m128i * restrict c, + const __m128i * restrict d, + __m128i * restrict o, + __m128i * restrict p, + __m128i * restrict q, + __m128i * restrict r) +{ + __m128i t0 = _mm_unpacklo_epi32(*a, *b); + __m128i t1 = _mm_unpacklo_epi32(*c, *d); + __m128i t2 = _mm_unpackhi_epi32(*a, *b); + __m128i t3 = _mm_unpackhi_epi32(*c, *d); + + *o = _mm_unpacklo_epi64(t0, t1); + *p = _mm_unpackhi_epi64(t0, t1); + *q = _mm_unpacklo_epi64(t2, t3); + *r = _mm_unpackhi_epi64(t2, t3); +} + +#define SCALAR_EPI32(m, i) _mm_shuffle_epi32((m), _MM_SHUFFLE(i,i,i,i)) + + +#endif /* PIPE_ARCH_SSE */ #endif /* U_SSE_H_ */ diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index f7aa1403d08..44cadbfcdd0 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -217,6 +217,81 @@ z24s8_get_tile_rgba(const unsigned *src, } } +/*** PIPE_FORMAT_S8X24_USCALED ***/ + +/** + * Return S component as four uint32_t in [0..255]. Z part ignored. + */ +static void +s8x24_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float)((*src++ >> 24) & 0xff); + } + + p += dst_stride; + } +} + +/*** PIPE_FORMAT_X24S8_USCALED ***/ + +/** + * Return S component as four uint32_t in [0..255]. Z part ignored. + */ +static void +x24s8_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float)(*src++ & 0xff); + } + p += dst_stride; + } +} + + +/** + * Return S component as four uint32_t in [0..255]. Z part ignored. + */ +static void +s8_get_tile_rgba(const unsigned char *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float)(*src++ & 0xff); + } + p += dst_stride; + } +} /*** PIPE_FORMAT_Z32_FLOAT ***/ @@ -261,10 +336,19 @@ pipe_tile_raw_to_rgba(enum pipe_format format, case PIPE_FORMAT_Z24X8_UNORM: s8z24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); break; + case PIPE_FORMAT_S8_USCALED: + s8_get_tile_rgba((unsigned char *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_X24S8_USCALED: + s8x24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; case PIPE_FORMAT_S8_USCALED_Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: z24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); break; + case PIPE_FORMAT_S8X24_USCALED: + x24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; case PIPE_FORMAT_Z32_FLOAT: z32f_get_tile_rgba((float *) src, w, h, dst, dst_stride); break; |