diff options
author | Thomas Balling Sørensen <tball@tball-laptop.(none)> | 2010-10-26 12:49:41 +0200 |
---|---|---|
committer | Thomas Balling Sørensen <tball@tball-laptop.(none)> | 2010-10-26 12:49:41 +0200 |
commit | dbf3a15313eed930a3d8fdde12e457259c43651b (patch) | |
tree | 78bc54fa866ff1e97e61802f52dabe3f4f3380b1 /src/gallium | |
parent | 1dccc4cfaa423f15ab582d2a0253a84a0ae0b9fa (diff) | |
parent | 547e7619aac74ae13bdaa7fdf403a4ceb5212467 (diff) |
Merge branch 'master' into pipe-video
Conflicts:
src/gallium/include/pipe/p_format.h
Diffstat (limited to 'src/gallium')
200 files changed, 9064 insertions, 4866 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 05096b12a86..ed96725d782 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -173,6 +173,7 @@ GALLIVM_SOURCES = \ gallivm/lp_bld_struct.c \ gallivm/lp_bld_swizzle.c \ gallivm/lp_bld_tgsi_aos.c \ + gallivm/lp_bld_tgsi_info.c \ gallivm/lp_bld_tgsi_soa.c \ gallivm/lp_bld_type.c \ draw/draw_llvm.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index a18f7c0b2a3..f22c8b96123 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -225,6 +225,7 @@ if env['llvm']: 'gallivm/lp_bld_struct.c', 'gallivm/lp_bld_swizzle.c', 'gallivm/lp_bld_tgsi_aos.c', + 'gallivm/lp_bld_tgsi_info.c', 'gallivm/lp_bld_tgsi_soa.c', 'gallivm/lp_bld_type.c', 'draw/draw_llvm.c', diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 032fcbbc70a..39d82f32892 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -335,6 +335,7 @@ draw_set_mapped_constant_buffer(struct draw_context *draw, case PIPE_SHADER_VERTEX: draw->pt.user.vs_constants[slot] = buffer; draw->pt.user.vs_constants_size[slot] = size; + draw->pt.user.planes = (float (*) [12][4]) &(draw->plane[0]); draw_vs_set_constants(draw, slot, buffer, size); break; case PIPE_SHADER_GEOMETRY: @@ -721,9 +722,9 @@ draw_set_mapped_texture(struct draw_context *draw, unsigned sampler_idx, uint32_t width, uint32_t height, uint32_t depth, uint32_t last_level, - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], - const void *data[DRAW_MAX_TEXTURE_LEVELS]) + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], + const void *data[PIPE_MAX_TEXTURE_LEVELS]) { #ifdef HAVE_LLVM if(draw->llvm) diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 1f27cbf488a..ff4f753604f 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -49,7 +49,6 @@ struct draw_geometry_shader; struct draw_fragment_shader; struct tgsi_sampler; -#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */ struct draw_context *draw_create( struct pipe_context *pipe ); @@ -120,9 +119,9 @@ draw_set_mapped_texture(struct draw_context *draw, unsigned sampler_idx, uint32_t width, uint32_t height, uint32_t depth, uint32_t last_level, - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], - const void *data[DRAW_MAX_TEXTURE_LEVELS]); + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], + const void *data[PIPE_MAX_TEXTURE_LEVELS]); /* diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 7fb86d7cb27..140e596f994 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -31,6 +31,9 @@ #include "draw_vs.h" #include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_logic.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_swizzle.h" #include "gallivm/lp_bld_struct.h" #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_flow.h" @@ -43,7 +46,6 @@ #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_dump.h" -#include "util/u_cpu_detect.h" #include "util/u_math.h" #include "util/u_pointer.h" #include "util/u_string.h" @@ -72,12 +74,12 @@ init_globals(struct draw_llvm *llvm) elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type(); elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type(); elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] = - LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS); + LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS); elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] = - LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS); + LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS); elem_types[DRAW_JIT_TEXTURE_DATA] = LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0), - DRAW_MAX_TEXTURE_LEVELS); + PIPE_MAX_TEXTURE_LEVELS); elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = LLVMFloatType(); elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = LLVMFloatType(); elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType(); @@ -128,12 +130,14 @@ init_globals(struct draw_llvm *llvm) /* struct draw_jit_context */ { - LLVMTypeRef elem_types[3]; + LLVMTypeRef elem_types[5]; LLVMTypeRef context_type; elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ - elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ - elem_types[2] = LLVMArrayType(texture_type, + elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* gs_constants */ + elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(LLVMFloatType(), 4), 12), 0); /* planes */ + elem_types[3] = LLVMPointerType(LLVMFloatType(), 0); /* viewport */ + elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_VERTEX_SAMPLERS); /* textures */ context_type = LLVMStructType(elem_types, Elements(elem_types), 0); @@ -142,6 +146,8 @@ init_globals(struct draw_llvm *llvm) llvm->target, context_type, 0); LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants, llvm->target, context_type, 1); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes, + llvm->target, context_type, 2); LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures, llvm->target, context_type, DRAW_JIT_CTX_TEXTURES); @@ -267,13 +273,7 @@ draw_llvm_create(struct draw_context *draw) LLVMAddConstantPropagationPass(llvm->pass); } - if(util_cpu_caps.has_sse4_1) { - /* FIXME: There is a bug in this pass, whereby the combination of fptosi - * and sitofp (necessary for trunc/floor/ceil/round implementation) - * somehow becomes invalid code. - */ - LLVMAddInstructionCombiningPass(llvm->pass); - } + LLVMAddInstructionCombiningPass(llvm->pass); LLVMAddGVNPass(llvm->pass); } else { /* We need at least this pass to prevent the backends to fail in @@ -421,7 +421,7 @@ generate_fetch(LLVMBuilderRef builder, "instance_divisor"); } - /* limit index to min(inex, vb_max_index) */ + /* limit index to min(index, vb_max_index) */ cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, ""); index = LLVMBuildSelect(builder, cond, index, vb_max_index, ""); @@ -550,19 +550,28 @@ static void store_aos(LLVMBuilderRef builder, LLVMValueRef io_ptr, LLVMValueRef index, - LLVMValueRef value) + LLVMValueRef value, + LLVMValueRef clipmask) { LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr); LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr); LLVMValueRef indices[3]; + LLVMValueRef val, shift; indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); indices[1] = index; indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0); - /* undefined vertex */ - LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), - 0xffff, 0), id_ptr); + /* initialize vertex id:16 = 0xffff, pad:3 = 0, edgeflag:1 = 1 */ + val = LLVMConstInt(LLVMInt32Type(), 0xffff1, 0); + shift = LLVMConstInt(LLVMInt32Type(), 12, 0); + val = LLVMBuildShl(builder, val, shift, ""); + /* add clipmask:12 */ + val = LLVMBuildOr(builder, val, clipmask, ""); + + /* store vertex header */ + LLVMBuildStore(builder, val, id_ptr); + #if DEBUG_STORE lp_build_printf(builder, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr); @@ -617,7 +626,8 @@ store_aos_array(LLVMBuilderRef builder, LLVMValueRef io_ptr, LLVMValueRef aos[NUM_CHANNELS], int attrib, - int num_outputs) + int num_outputs, + LLVMValueRef clipmask) { LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0); LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); @@ -625,7 +635,8 @@ store_aos_array(LLVMBuilderRef builder, LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0); LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0); LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; - + LLVMValueRef clipmask0, clipmask1, clipmask2, clipmask3; + debug_assert(NUM_CHANNELS == 4); io0_ptr = LLVMBuildGEP(builder, io_ptr, @@ -637,21 +648,31 @@ store_aos_array(LLVMBuilderRef builder, io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, ""); + clipmask0 = LLVMBuildExtractElement(builder, clipmask, + ind0, ""); + clipmask1 = LLVMBuildExtractElement(builder, clipmask, + ind1, ""); + clipmask2 = LLVMBuildExtractElement(builder, clipmask, + ind2, ""); + clipmask3 = LLVMBuildExtractElement(builder, clipmask, + ind3, ""); + #if DEBUG_STORE - lp_build_printf(builder, " io = %p, indexes[%d, %d, %d, %d]\n", - io_ptr, ind0, ind1, ind2, ind3); + lp_build_printf(builder, "io = %p, indexes[%d, %d, %d, %d]\n, clipmask0 = %x, clipmask1 = %x, clipmask2 = %x, clipmask3 = %x\n", + io_ptr, ind0, ind1, ind2, ind3, clipmask0, clipmask1, clipmask2, clipmask3); #endif - - store_aos(builder, io0_ptr, attr_index, aos[0]); - store_aos(builder, io1_ptr, attr_index, aos[1]); - store_aos(builder, io2_ptr, attr_index, aos[2]); - store_aos(builder, io3_ptr, attr_index, aos[3]); + /* store for each of the 4 vertices */ + store_aos(builder, io0_ptr, attr_index, aos[0], clipmask0); + store_aos(builder, io1_ptr, attr_index, aos[1], clipmask1); + store_aos(builder, io2_ptr, attr_index, aos[2], clipmask2); + store_aos(builder, io3_ptr, attr_index, aos[3], clipmask3); } static void convert_to_aos(LLVMBuilderRef builder, LLVMValueRef io, LLVMValueRef (*outputs)[NUM_CHANNELS], + LLVMValueRef clipmask, int num_outputs, int max_vertices) { @@ -680,13 +701,305 @@ convert_to_aos(LLVMBuilderRef builder, io, aos, attrib, - num_outputs); + num_outputs, + clipmask); } #if DEBUG_STORE lp_build_printf(builder, " # storing end\n"); #endif } +/* + * Stores original vertex positions in clip coordinates + * There is probably a more efficient way to do this, 4 floats at once + * rather than extracting each element one by one. + */ +static void +store_clip(LLVMBuilderRef builder, + LLVMValueRef io_ptr, + LLVMValueRef (*outputs)[NUM_CHANNELS]) +{ + LLVMValueRef out[4]; + LLVMValueRef indices[2]; + LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; + LLVMValueRef clip_ptr0, clip_ptr1, clip_ptr2, clip_ptr3; + LLVMValueRef clip0_ptr, clip1_ptr, clip2_ptr, clip3_ptr; + LLVMValueRef out0elem, out1elem, out2elem, out3elem; + int i; + + LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0); + LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0); + LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[1] = LLVMConstInt(LLVMInt32Type(), 0, 0); + + out[0] = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/ + out[1] = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/ + out[2] = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/ + out[3] = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ + + io0_ptr = LLVMBuildGEP(builder, io_ptr, &ind0, 1, ""); + io1_ptr = LLVMBuildGEP(builder, io_ptr, &ind1, 1, ""); + io2_ptr = LLVMBuildGEP(builder, io_ptr, &ind2, 1, ""); + io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, ""); + + clip_ptr0 = draw_jit_header_clip(builder, io0_ptr); + clip_ptr1 = draw_jit_header_clip(builder, io1_ptr); + clip_ptr2 = draw_jit_header_clip(builder, io2_ptr); + clip_ptr3 = draw_jit_header_clip(builder, io3_ptr); + + for (i = 0; i<4; i++){ + clip0_ptr = LLVMBuildGEP(builder, clip_ptr0, + indices, 2, ""); //x0 + clip1_ptr = LLVMBuildGEP(builder, clip_ptr1, + indices, 2, ""); //x1 + clip2_ptr = LLVMBuildGEP(builder, clip_ptr2, + indices, 2, ""); //x2 + clip3_ptr = LLVMBuildGEP(builder, clip_ptr3, + indices, 2, ""); //x3 + + out0elem = LLVMBuildExtractElement(builder, out[i], + ind0, ""); //x0 + out1elem = LLVMBuildExtractElement(builder, out[i], + ind1, ""); //x1 + out2elem = LLVMBuildExtractElement(builder, out[i], + ind2, ""); //x2 + out3elem = LLVMBuildExtractElement(builder, out[i], + ind3, ""); //x3 + + LLVMBuildStore(builder, out0elem, clip0_ptr); + LLVMBuildStore(builder, out1elem, clip1_ptr); + LLVMBuildStore(builder, out2elem, clip2_ptr); + LLVMBuildStore(builder, out3elem, clip3_ptr); + + indices[1]= LLVMBuildAdd(builder, indices[1], ind1, ""); + } + +} + +/* Equivalent of _mm_set1_ps(a) + */ +static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld, + LLVMValueRef a, + const char *name) +{ + LLVMValueRef res = LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)); + int i; + + for(i = 0; i < 4; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : ""); + } + + return res; +} + +/* + * Transforms the outputs for viewport mapping + */ +static void +generate_viewport(struct draw_llvm *llvm, + LLVMBuilderRef builder, + LLVMValueRef (*outputs)[NUM_CHANNELS], + LLVMValueRef context_ptr) +{ + int i; + struct lp_type f32_type = lp_type_float_vec(32); + LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ + LLVMValueRef const1 = lp_build_const_vec(f32_type, 1.0); /*1.0 1.0 1.0 1.0*/ + LLVMValueRef vp_ptr = draw_jit_context_viewport(builder, context_ptr); + + /* for 1/w convention*/ + out3 = LLVMBuildFDiv(builder, const1, out3, ""); + LLVMBuildStore(builder, out3, outputs[0][3]); + + /* Viewport Mapping */ + for (i=0; i<3; i++){ + LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 x2 x3*/ + LLVMValueRef scale; + LLVMValueRef trans; + LLVMValueRef scale_i; + LLVMValueRef trans_i; + LLVMValueRef index; + + index = LLVMConstInt(LLVMInt32Type(), i, 0); + scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, ""); + + index = LLVMConstInt(LLVMInt32Type(), i+4, 0); + trans_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, ""); + + scale = vec4f_from_scalar(builder, LLVMBuildLoad(builder, scale_i, ""), "scale"); + trans = vec4f_from_scalar(builder, LLVMBuildLoad(builder, trans_i, ""), "trans"); + + /* divide by w */ + out = LLVMBuildFMul(builder, out, out3, ""); + /* mult by scale */ + out = LLVMBuildFMul(builder, out, scale, ""); + /* add translation */ + out = LLVMBuildFAdd(builder, out, trans, ""); + + /* store transformed outputs */ + LLVMBuildStore(builder, out, outputs[0][i]); + } + +} + + +/* + * Returns clipmask as 4xi32 bitmask for the 4 vertices + */ +static LLVMValueRef +generate_clipmask(LLVMBuilderRef builder, + LLVMValueRef (*outputs)[NUM_CHANNELS], + boolean clip_xy, + boolean clip_z, + boolean clip_user, + boolean clip_halfz, + unsigned nr, + LLVMValueRef context_ptr) +{ + LLVMValueRef mask; /* stores the <4xi32> clipmasks */ + LLVMValueRef test, temp; + LLVMValueRef zero, shift; + LLVMValueRef pos_x, pos_y, pos_z, pos_w; + LLVMValueRef plane1, planes, plane_ptr, sum; + + unsigned i; + + struct lp_type f32_type = lp_type_float_vec(32); + + mask = lp_build_const_int_vec(lp_type_int_vec(32), 0); + temp = lp_build_const_int_vec(lp_type_int_vec(32), 0); + zero = lp_build_const_vec(f32_type, 0); /* 0.0f 0.0f 0.0f 0.0f */ + shift = lp_build_const_int_vec(lp_type_int_vec(32), 1); /* 1 1 1 1 */ + + /* Assuming position stored at output[0] */ + pos_x = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/ + pos_y = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/ + pos_z = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/ + pos_w = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ + + /* Cliptest, for hardwired planes */ + if (clip_xy){ + /* plane 1 */ + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w); + temp = shift; + test = LLVMBuildAnd(builder, test, temp, ""); + mask = test; + + /* plane 2 */ + test = LLVMBuildFAdd(builder, pos_x, pos_w, ""); + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + + /* plane 3 */ + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + + /* plane 4 */ + test = LLVMBuildFAdd(builder, pos_y, pos_w, ""); + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + } + + if (clip_z){ + temp = lp_build_const_int_vec(lp_type_int_vec(32), 16); + if (clip_halfz){ + /* plane 5 */ + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, pos_z); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + } + else{ + /* plane 5 */ + test = LLVMBuildFAdd(builder, pos_z, pos_w, ""); + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + } + /* plane 6 */ + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + } + + if (clip_user){ + LLVMValueRef planes_ptr = draw_jit_context_planes(builder, context_ptr); + LLVMValueRef indices[3]; + temp = lp_build_const_int_vec(lp_type_int_vec(32), 32); + + /* userclip planes */ + for (i = 6; i < nr; i++) { + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[1] = LLVMConstInt(LLVMInt32Type(), i, 0); + + indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0); + plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); + plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x"); + planes = vec4f_from_scalar(builder, plane1, "plane4_x"); + sum = LLVMBuildFMul(builder, planes, pos_x, ""); + + indices[2] = LLVMConstInt(LLVMInt32Type(), 1, 0); + plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); + plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y"); + planes = vec4f_from_scalar(builder, plane1, "plane4_y"); + test = LLVMBuildFMul(builder, planes, pos_y, ""); + sum = LLVMBuildFAdd(builder, sum, test, ""); + + indices[2] = LLVMConstInt(LLVMInt32Type(), 2, 0); + plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); + plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z"); + planes = vec4f_from_scalar(builder, plane1, "plane4_z"); + test = LLVMBuildFMul(builder, planes, pos_z, ""); + sum = LLVMBuildFAdd(builder, sum, test, ""); + + indices[2] = LLVMConstInt(LLVMInt32Type(), 3, 0); + plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); + plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w"); + planes = vec4f_from_scalar(builder, plane1, "plane4_w"); + test = LLVMBuildFMul(builder, planes, pos_w, ""); + sum = LLVMBuildFAdd(builder, sum, test, ""); + + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, sum); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + } + } + return mask; +} + +/* + * Returns boolean if any clipping has occurred + * Used zero/non-zero i32 value to represent boolean + */ +static void +clipmask_bool(LLVMBuilderRef builder, + LLVMValueRef clipmask, + LLVMValueRef ret_ptr) +{ + LLVMValueRef ret = LLVMBuildLoad(builder, ret_ptr, ""); + LLVMValueRef temp; + int i; + + for (i=0; i<4; i++){ + temp = LLVMBuildExtractElement(builder, clipmask, + LLVMConstInt(LLVMInt32Type(), i, 0) , ""); + ret = LLVMBuildOr(builder, ret, temp, ""); + } + + LLVMBuildStore(builder, ret, ret_ptr); +} + static void draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) { @@ -706,7 +1019,12 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; void *code; struct lp_build_sampler_soa *sampler = 0; - + LLVMValueRef ret, ret_ptr; + boolean bypass_viewport = variant->key.bypass_viewport; + boolean enable_cliptest = variant->key.clip_xy || + variant->key.clip_z || + variant->key.clip_user; + arg_types[0] = llvm->context_ptr_type; /* context */ arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ @@ -716,7 +1034,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ arg_types[7] = LLVMInt32Type(); /* instance_id */ - func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0); variant->function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type); LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); @@ -756,6 +1074,10 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); + /* function will return non-zero i32 value if any clipped vertices */ + ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), ""); + LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr); + /* code generated texture sampling */ sampler = draw_llvm_sampler_soa_create( draw_llvm_variant_key_samplers(&variant->key), @@ -770,6 +1092,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; LLVMValueRef io; + LLVMValueRef clipmask; /* holds the clipmask value */ const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; io_itr = LLVMBuildSub(builder, lp_loop.counter, start, ""); @@ -806,10 +1129,37 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) context_ptr, sampler); - convert_to_aos(builder, io, outputs, + /* store original positions in clip before further manipulation */ + store_clip(builder, io, outputs); + + /* do cliptest */ + if (enable_cliptest){ + /* allocate clipmask, assign it integer type */ + clipmask = generate_clipmask(builder, outputs, + variant->key.clip_xy, + variant->key.clip_z, + variant->key.clip_user, + variant->key.clip_halfz, + variant->key.nr_planes, + context_ptr); + /* return clipping boolean value for function */ + clipmask_bool(builder, clipmask, ret_ptr); + } + else{ + clipmask = lp_build_const_int_vec(lp_type_int_vec(32), 0); + } + + /* do viewport mapping */ + if (!bypass_viewport){ + generate_viewport(llvm, builder, outputs, context_ptr); + } + + /* store clipmask in vertex header and positions in data */ + convert_to_aos(builder, io, outputs, clipmask, draw->vs.vertex_shader->info.num_outputs, max_vertices); } + lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop); sampler->destroy(sampler); @@ -819,8 +1169,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); #endif - LLVMBuildRetVoid(builder); - + ret = LLVMBuildLoad(builder, ret_ptr,""); + LLVMBuildRet(builder, ret); + LLVMDisposeBuilder(builder); /* @@ -870,7 +1221,12 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian LLVMValueRef fetch_max; void *code; struct lp_build_sampler_soa *sampler = 0; - + LLVMValueRef ret, ret_ptr; + boolean bypass_viewport = variant->key.bypass_viewport; + boolean enable_cliptest = variant->key.clip_xy || + variant->key.clip_z || + variant->key.clip_user; + arg_types[0] = llvm->context_ptr_type; /* context */ arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ @@ -880,10 +1236,9 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ arg_types[7] = LLVMInt32Type(); /* instance_id */ - func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0); - variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", - func_type); + variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type); LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv); for(i = 0; i < Elements(arg_types); ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) @@ -929,11 +1284,16 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian LLVMConstInt(LLVMInt32Type(), 1, 0), "fetch_max"); + /* function returns non-zero i32 value if any clipped vertices */ + ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), ""); + LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr); + lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop); { LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; LLVMValueRef io; + LLVMValueRef clipmask; /* holds the clipmask value */ const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; io_itr = lp_loop.counter; @@ -980,10 +1340,40 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian context_ptr, sampler); - convert_to_aos(builder, io, outputs, + /* store original positions in clip before further manipulation */ + store_clip(builder, io, outputs); + + /* do cliptest */ + if (enable_cliptest){ + /* allocate clipmask, assign it integer type */ + clipmask = generate_clipmask(builder, outputs, + variant->key.clip_xy, + variant->key.clip_z, + variant->key.clip_user, + variant->key.clip_halfz, + variant->key.nr_planes, + context_ptr); + /* return clipping boolean value for function */ + clipmask_bool(builder, clipmask, ret_ptr); + } + else{ + clipmask = lp_build_const_int_vec(lp_type_int_vec(32), 0); + } + + /* do viewport mapping */ + if (!bypass_viewport){ + generate_viewport(llvm, builder, outputs, context_ptr); + } + + /* store clipmask in vertex header, + * original positions in clip + * and transformed positions in data + */ + convert_to_aos(builder, io, outputs, clipmask, draw->vs.vertex_shader->info.num_outputs, max_vertices); } + lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop); sampler->destroy(sampler); @@ -993,8 +1383,9 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); #endif - LLVMBuildRetVoid(builder); - + ret = LLVMBuildLoad(builder, ret_ptr,""); + LLVMBuildRet(builder, ret); + LLVMDisposeBuilder(builder); /* @@ -1038,6 +1429,16 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store) */ key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements; + /* will have to rig this up properly later */ + key->clip_xy = llvm->draw->clip_xy; + key->clip_z = llvm->draw->clip_z; + key->clip_user = llvm->draw->clip_user; + key->bypass_viewport = llvm->draw->identity_viewport; + key->clip_halfz = !llvm->draw->rasterizer->gl_rasterization_rules; + key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE); + key->nr_planes = llvm->draw->nr_planes; + key->pad = 0; + /* All variants of this shader will have the same value for * nr_samplers. Not yet trying to compact away holes in the * sampler array. @@ -1066,9 +1467,9 @@ draw_llvm_set_mapped_texture(struct draw_context *draw, unsigned sampler_idx, uint32_t width, uint32_t height, uint32_t depth, uint32_t last_level, - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], - const void *data[DRAW_MAX_TEXTURE_LEVELS]) + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], + const void *data[PIPE_MAX_TEXTURE_LEVELS]) { unsigned j; struct draw_jit_texture *jit_tex; diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index d0a68ae412d..c3c30c07c64 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -41,7 +41,6 @@ #include <llvm-c/Target.h> #include <llvm-c/ExecutionEngine.h> -#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */ struct draw_llvm; struct llvm_vertex_shader; @@ -52,9 +51,9 @@ struct draw_jit_texture uint32_t height; uint32_t depth; uint32_t last_level; - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS]; - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS]; - const void *data[DRAW_MAX_TEXTURE_LEVELS]; + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS]; + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS]; + const void *data[PIPE_MAX_TEXTURE_LEVELS]; float min_lod; float max_lod; float lod_bias; @@ -97,7 +96,8 @@ struct draw_jit_context { const float *vs_constants; const float *gs_constants; - + float (*planes) [12][4]; + float *viewport; struct draw_jit_texture textures[PIPE_MAX_VERTEX_SAMPLERS]; }; @@ -109,18 +109,22 @@ struct draw_jit_context #define draw_jit_context_gs_constants(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, 1, "gs_constants") -#define DRAW_JIT_CTX_TEXTURES 2 +#define draw_jit_context_planes(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 2, "planes") -#define draw_jit_context_textures(_builder, _ptr) \ - lp_build_struct_get_ptr(_builder, _ptr, DRAW_JIT_CTX_TEXTURES, "textures") +#define draw_jit_context_viewport(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 3, "viewport") +#define DRAW_JIT_CTX_TEXTURES 4 +#define draw_jit_context_textures(_builder, _ptr) \ + lp_build_struct_get_ptr(_builder, _ptr, DRAW_JIT_CTX_TEXTURES, "textures") #define draw_jit_header_id(_builder, _ptr) \ lp_build_struct_get_ptr(_builder, _ptr, 0, "id") #define draw_jit_header_clip(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 1, "clip") + lp_build_struct_get_ptr(_builder, _ptr, 1, "clip") #define draw_jit_header_data(_builder, _ptr) \ lp_build_struct_get_ptr(_builder, _ptr, 2, "data") @@ -136,7 +140,7 @@ struct draw_jit_context lp_build_struct_get(_builder, _ptr, 2, "buffer_offset") -typedef void +typedef int (*draw_jit_vert_func)(struct draw_jit_context *context, struct vertex_header *io, const char *vbuffers[PIPE_MAX_ATTRIBS], @@ -147,7 +151,7 @@ typedef void unsigned instance_id); -typedef void +typedef int (*draw_jit_vert_func_elts)(struct draw_jit_context *context, struct vertex_header *io, const char *vbuffers[PIPE_MAX_ATTRIBS], @@ -159,8 +163,16 @@ typedef void struct draw_llvm_variant_key { - unsigned nr_vertex_elements:16; - unsigned nr_samplers:16; + unsigned nr_vertex_elements:8; + unsigned nr_samplers:8; + unsigned clip_xy:1; + unsigned clip_z:1; + unsigned clip_user:1; + unsigned clip_halfz:1; + unsigned bypass_viewport:1; + unsigned need_edgeflags:1; + unsigned nr_planes:4; + unsigned pad:6; /* Variable number of vertex elements: */ @@ -290,8 +302,8 @@ draw_llvm_set_mapped_texture(struct draw_context *draw, unsigned sampler_idx, uint32_t width, uint32_t height, uint32_t depth, uint32_t last_level, - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], - const void *data[DRAW_MAX_TEXTURE_LEVELS]); + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], + const void *data[PIPE_MAX_TEXTURE_LEVELS]); #endif diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index d417f825a0f..54163d7f9eb 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -169,6 +169,9 @@ struct draw_context unsigned vs_constants_size[PIPE_MAX_CONSTANT_BUFFERS]; const void *gs_constants[PIPE_MAX_CONSTANT_BUFFERS]; unsigned gs_constants_size[PIPE_MAX_CONSTANT_BUFFERS]; + + /* pointer to planes */ + float (*planes)[12][4]; } user; boolean test_fse; /* enable FSE even though its not correct (eg for softpipe) */ diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index f44bf2507c6..4078b2a07d0 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -287,6 +287,84 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count) } +/** Helper code for below */ +#define PRIM_RESTART_LOOP(elements) \ + do { \ + for (i = start; i < end; i++) { \ + if (elements[i] == info->restart_index) { \ + if (cur_count > 0) { \ + /* draw elts up to prev pos */ \ + draw_pt_arrays(draw, prim, cur_start, cur_count); \ + } \ + /* begin new prim at next elt */ \ + cur_start = i + 1; \ + cur_count = 0; \ + } \ + else { \ + cur_count++; \ + } \ + } \ + if (cur_count > 0) { \ + draw_pt_arrays(draw, prim, cur_start, cur_count); \ + } \ + } while (0) + + +/** + * For drawing prims with primitive restart enabled. + * Scan for restart indexes and draw the runs of elements/vertices between + * the restarts. + */ +static void +draw_pt_arrays_restart(struct draw_context *draw, + const struct pipe_draw_info *info) +{ + const unsigned prim = info->mode; + const unsigned start = info->start; + const unsigned count = info->count; + const unsigned end = start + count; + unsigned i, cur_start, cur_count; + + assert(info->primitive_restart); + + if (draw->pt.user.elts) { + /* indexed prims (draw_elements) */ + cur_start = start; + cur_count = 0; + + switch (draw->pt.user.eltSize) { + case 1: + { + const ubyte *elt_ub = (const ubyte *) draw->pt.user.elts; + PRIM_RESTART_LOOP(elt_ub); + } + break; + case 2: + { + const ushort *elt_us = (const ushort *) draw->pt.user.elts; + PRIM_RESTART_LOOP(elt_us); + } + break; + case 4: + { + const uint *elt_ui = (const uint *) draw->pt.user.elts; + PRIM_RESTART_LOOP(elt_ui); + } + break; + default: + assert(0 && "bad eltSize in draw_arrays()"); + } + } + else { + /* Non-indexed prims (draw_arrays). + * Primitive restart should have been handled in the state tracker. + */ + draw_pt_arrays(draw, prim, start, count); + } +} + + + /** * Non-instanced drawing. * \sa draw_arrays_instanced @@ -395,6 +473,12 @@ draw_vbo(struct draw_context *draw, for (instance = 0; instance < info->instance_count; instance++) { draw->instance_id = instance + info->start_instance; - draw_pt_arrays(draw, info->mode, info->start, info->count); + + if (info->primitive_restart) { + draw_pt_arrays_restart(draw, info); + } + else { + draw_pt_arrays(draw, info->mode, info->start, info->count); + } } } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index 77291e304e1..a53a768d029 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -175,6 +175,11 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, draw->pt.user.vs_constants[0]; fpme->llvm->jit_context.gs_constants = draw->pt.user.gs_constants[0]; + fpme->llvm->jit_context.planes = + (float (*) [12][4]) draw->pt.user.planes[0]; + fpme->llvm->jit_context.viewport = + (float *)draw->viewport.scale; + } @@ -217,6 +222,7 @@ llvm_pipeline_generic( struct draw_pt_middle_end *middle, struct draw_vertex_info gs_vert_info; struct draw_vertex_info *vert_info; unsigned opt = fpme->opt; + unsigned clipped = 0; llvm_vert_info.count = fetch_info->count; llvm_vert_info.vertex_size = fpme->vertex_size; @@ -230,7 +236,7 @@ llvm_pipeline_generic( struct draw_pt_middle_end *middle, } if (fetch_info->linear) - fpme->current_variant->jit_func( &fpme->llvm->jit_context, + clipped = fpme->current_variant->jit_func( &fpme->llvm->jit_context, llvm_vert_info.verts, (const char **)draw->pt.user.vbuffer, fetch_info->start, @@ -239,7 +245,7 @@ llvm_pipeline_generic( struct draw_pt_middle_end *middle, draw->pt.vertex_buffer, draw->instance_id); else - fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context, + clipped = fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context, llvm_vert_info.verts, (const char **)draw->pt.user.vbuffer, fetch_info->elts, @@ -266,6 +272,9 @@ llvm_pipeline_generic( struct draw_pt_middle_end *middle, FREE(vert_info->verts); vert_info = &gs_vert_info; prim_info = &gs_prim_info; + + clipped = draw_pt_post_vs_run( fpme->post_vs, vert_info ); + } /* stream output needs to be done before clipping */ @@ -273,11 +282,11 @@ llvm_pipeline_generic( struct draw_pt_middle_end *middle, vert_info, prim_info ); - if (draw_pt_post_vs_run( fpme->post_vs, vert_info )) { + if (clipped) { opt |= PT_PIPELINE; } - /* Do we need to run the pipeline? + /* Do we need to run the pipeline? Now will come here if clipped */ if (opt & PT_PIPELINE) { pipeline( fpme, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index e65c13e64b5..f9a12a41a1b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -983,6 +983,12 @@ enum lp_build_round_sse41_mode }; +/** + * Helper for SSE4.1's ROUNDxx instructions. + * + * NOTE: In the SSE4.1's nearest mode, if two values are equally close, the + * result is the even value. That is, rounding 2.5 will be 2.0, and not 3.0. + */ static INLINE LLVMValueRef lp_build_round_sse41(struct lp_build_context *bld, LLVMValueRef a, @@ -1053,10 +1059,58 @@ lp_build_round_sse41(struct lp_build_context *bld, } +static INLINE LLVMValueRef +lp_build_iround_nearest_sse2(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMTypeRef ret_type = lp_build_int_vec_type(type); + const char *intrinsic; + LLVMValueRef res; + + assert(type.floating); + /* using the double precision conversions is a bit more complicated */ + assert(type.width == 32); + + assert(lp_check_value(type, a)); + assert(util_cpu_caps.has_sse2); + + /* This is relying on MXCSR rounding mode, which should always be nearest. */ + if (type.length == 1) { + LLVMTypeRef vec_type; + LLVMValueRef undef; + LLVMValueRef arg; + LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); + + vec_type = LLVMVectorType(bld->elem_type, 4); + + intrinsic = "llvm.x86.sse.cvtss2si"; + + undef = LLVMGetUndef(vec_type); + + arg = LLVMBuildInsertElement(bld->builder, undef, a, index0, ""); + + res = lp_build_intrinsic_unary(bld->builder, intrinsic, + ret_type, arg); + } + else { + assert(type.width*type.length == 128); + + intrinsic = "llvm.x86.sse2.cvtps2dq"; + + res = lp_build_intrinsic_unary(bld->builder, intrinsic, + ret_type, a); + } + + return res; +} + + /** - * Return the integer part of a float (vector) value. The returned value is - * a float (vector). - * Ex: trunc(-1.5) = 1.0 + * Return the integer part of a float (vector) value (== round toward zero). + * The returned value is a float (vector). + * Ex: trunc(-1.5) = -1.0 */ LLVMValueRef lp_build_trunc(struct lp_build_context *bld, @@ -1181,9 +1235,9 @@ lp_build_fract(struct lp_build_context *bld, /** - * Return the integer part of a float (vector) value. The returned value is - * an integer (vector). - * Ex: itrunc(-1.5) = 1 + * Return the integer part of a float (vector) value (== round toward zero). + * The returned value is an integer (vector). + * Ex: itrunc(-1.5) = -1 */ LLVMValueRef lp_build_itrunc(struct lp_build_context *bld, @@ -1210,32 +1264,40 @@ lp_build_iround(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMTypeRef int_vec_type = bld->int_vec_type; LLVMValueRef res; assert(type.floating); assert(lp_check_value(type, a)); - if (util_cpu_caps.has_sse4_1 && + if (util_cpu_caps.has_sse2 && + ((type.width == 32) && (type.length == 1 || type.length == 4))) { + return lp_build_iround_nearest_sse2(bld, a); + } + else if (util_cpu_caps.has_sse4_1 && (type.length == 1 || type.width*type.length == 128)) { res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); } else { - LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); - LLVMValueRef sign; LLVMValueRef half; - /* get sign bit */ - sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - sign = LLVMBuildAnd(bld->builder, sign, mask, ""); - - /* sign * 0.5 */ half = lp_build_const_vec(type, 0.5); - half = LLVMBuildBitCast(bld->builder, half, int_vec_type, ""); - half = LLVMBuildOr(bld->builder, sign, half, ""); - half = LLVMBuildBitCast(bld->builder, half, vec_type, ""); + + if (type.sign) { + LLVMTypeRef vec_type = bld->vec_type; + LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef sign; + + /* get sign bit */ + sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + + /* sign * 0.5 */ + half = LLVMBuildBitCast(bld->builder, half, int_vec_type, ""); + half = LLVMBuildOr(bld->builder, sign, half, ""); + half = LLVMBuildBitCast(bld->builder, half, vec_type, ""); + } res = LLVMBuildFAdd(bld->builder, a, half, ""); } @@ -1256,7 +1318,7 @@ lp_build_ifloor(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMTypeRef int_vec_type = bld->int_vec_type; LLVMValueRef res; assert(type.floating); @@ -1267,27 +1329,31 @@ lp_build_ifloor(struct lp_build_context *bld, res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); } else { - /* Take the sign bit and add it to 1 constant */ - LLVMTypeRef vec_type = lp_build_vec_type(type); - unsigned mantissa = lp_mantissa(type); - LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); - LLVMValueRef sign; - LLVMValueRef offset; - - /* sign = a < 0 ? ~0 : 0 */ - sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - sign = LLVMBuildAnd(bld->builder, sign, mask, ""); - sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "ifloor.sign"); - - /* offset = -0.99999(9)f */ - offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa)); - offset = LLVMConstBitCast(offset, int_vec_type); - - /* offset = a < 0 ? offset : 0.0f */ - offset = LLVMBuildAnd(bld->builder, offset, sign, ""); - offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset"); - - res = LLVMBuildFAdd(bld->builder, a, offset, "ifloor.res"); + res = a; + + if (type.sign) { + /* Take the sign bit and add it to 1 constant */ + LLVMTypeRef vec_type = bld->vec_type; + unsigned mantissa = lp_mantissa(type); + LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef sign; + LLVMValueRef offset; + + /* sign = a < 0 ? ~0 : 0 */ + sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "ifloor.sign"); + + /* offset = -0.99999(9)f */ + offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa)); + offset = LLVMConstBitCast(offset, int_vec_type); + + /* offset = a < 0 ? offset : 0.0f */ + offset = LLVMBuildAnd(bld->builder, offset, sign, ""); + offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset"); + + res = LLVMBuildFAdd(bld->builder, res, offset, "ifloor.res"); + } } /* round to nearest (toward zero) */ @@ -1307,7 +1373,7 @@ lp_build_iceil(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMTypeRef int_vec_type = bld->int_vec_type; LLVMValueRef res; assert(type.floating); @@ -1318,25 +1384,28 @@ lp_build_iceil(struct lp_build_context *bld, res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); } else { - LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMTypeRef vec_type = bld->vec_type; unsigned mantissa = lp_mantissa(type); - LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); - LLVMValueRef sign; LLVMValueRef offset; - /* sign = a < 0 ? 0 : ~0 */ - sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - sign = LLVMBuildAnd(bld->builder, sign, mask, ""); - sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "iceil.sign"); - sign = LLVMBuildNot(bld->builder, sign, "iceil.not"); - /* offset = 0.99999(9)f */ offset = lp_build_const_vec(type, (double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa)); - offset = LLVMConstBitCast(offset, int_vec_type); - /* offset = a < 0 ? 0.0 : offset */ - offset = LLVMBuildAnd(bld->builder, offset, sign, ""); - offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset"); + if (type.sign) { + LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef sign; + + /* sign = a < 0 ? 0 : ~0 */ + sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "iceil.sign"); + sign = LLVMBuildNot(bld->builder, sign, "iceil.not"); + + /* offset = a < 0 ? 0.0 : offset */ + offset = LLVMConstBitCast(offset, int_vec_type); + offset = LLVMBuildAnd(bld->builder, offset, sign, ""); + offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset"); + } res = LLVMBuildFAdd(bld->builder, a, offset, "iceil.res"); } @@ -1348,6 +1417,46 @@ lp_build_iceil(struct lp_build_context *bld, } +/** + * Combined ifloor() & fract(). + * + * Preferred to calling the functions separately, as it will ensure that the + * stratergy (floor() vs ifloor()) that results in less redundant work is used. + */ +void +lp_build_ifloor_fract(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef *out_ipart, + LLVMValueRef *out_fpart) +{ + const struct lp_type type = bld->type; + LLVMValueRef ipart; + + assert(type.floating); + assert(lp_check_value(type, a)); + + if (util_cpu_caps.has_sse4_1 && + (type.length == 1 || type.width*type.length == 128)) { + /* + * floor() is easier. + */ + + ipart = lp_build_floor(bld, a); + *out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart"); + *out_ipart = LLVMBuildFPToSI(bld->builder, ipart, bld->int_vec_type, "ipart"); + } + else { + /* + * ifloor() is easier. + */ + + *out_ipart = lp_build_ifloor(bld, a); + ipart = LLVMBuildSIToFP(bld->builder, *out_ipart, bld->vec_type, "ipart"); + *out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart"); + } +} + + LLVMValueRef lp_build_sqrt(struct lp_build_context *bld, LLVMValueRef a) @@ -2157,6 +2266,71 @@ lp_build_exp2(struct lp_build_context *bld, /** + * Extract the exponent of a IEEE-754 floating point value. + * + * Optionally apply an integer bias. + * + * Result is an integer value with + * + * ifloor(log2(x)) + bias + */ +LLVMValueRef +lp_build_extract_exponent(struct lp_build_context *bld, + LLVMValueRef x, + int bias) +{ + const struct lp_type type = bld->type; + unsigned mantissa = lp_mantissa(type); + LLVMValueRef res; + + assert(type.floating); + + assert(lp_check_value(bld->type, x)); + + x = LLVMBuildBitCast(bld->builder, x, bld->int_vec_type, ""); + + res = LLVMBuildLShr(bld->builder, x, lp_build_const_int_vec(type, mantissa), ""); + res = LLVMBuildAnd(bld->builder, res, lp_build_const_int_vec(type, 255), ""); + res = LLVMBuildSub(bld->builder, res, lp_build_const_int_vec(type, 127 - bias), ""); + + return res; +} + + +/** + * Extract the mantissa of the a floating. + * + * Result is a floating point value with + * + * x / floor(log2(x)) + */ +LLVMValueRef +lp_build_extract_mantissa(struct lp_build_context *bld, + LLVMValueRef x) +{ + const struct lp_type type = bld->type; + unsigned mantissa = lp_mantissa(type); + LLVMValueRef mantmask = lp_build_const_int_vec(type, (1ULL << mantissa) - 1); + LLVMValueRef one = LLVMConstBitCast(bld->one, bld->int_vec_type); + LLVMValueRef res; + + assert(lp_check_value(bld->type, x)); + + assert(type.floating); + + x = LLVMBuildBitCast(bld->builder, x, bld->int_vec_type, ""); + + /* res = x / 2**ipart */ + res = LLVMBuildAnd(bld->builder, x, mantmask, ""); + res = LLVMBuildOr(bld->builder, res, one, ""); + res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, ""); + + return res; +} + + + +/** * Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[ * These coefficients can be generate with * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html @@ -2275,3 +2449,62 @@ lp_build_log2(struct lp_build_context *bld, lp_build_log2_approx(bld, x, NULL, NULL, &res); return res; } + + +/** + * Faster (and less accurate) log2. + * + * log2(x) = floor(log2(x)) - 1 + x / 2**floor(log2(x)) + * + * Piece-wise linear approximation, with exact results when x is a + * power of two. + * + * See http://www.flipcode.com/archives/Fast_log_Function.shtml + */ +LLVMValueRef +lp_build_fast_log2(struct lp_build_context *bld, + LLVMValueRef x) +{ + LLVMValueRef ipart; + LLVMValueRef fpart; + + assert(lp_check_value(bld->type, x)); + + assert(bld->type.floating); + + /* ipart = floor(log2(x)) - 1 */ + ipart = lp_build_extract_exponent(bld, x, -1); + ipart = LLVMBuildSIToFP(bld->builder, ipart, bld->vec_type, ""); + + /* fpart = x / 2**ipart */ + fpart = lp_build_extract_mantissa(bld, x); + + /* ipart + fpart */ + return LLVMBuildFAdd(bld->builder, ipart, fpart, ""); +} + + +/** + * Fast implementation of iround(log2(x)). + * + * Not an approximation -- it should give accurate results all the time. + */ +LLVMValueRef +lp_build_ilog2(struct lp_build_context *bld, + LLVMValueRef x) +{ + LLVMValueRef sqrt2 = lp_build_const_vec(bld->type, M_SQRT2); + LLVMValueRef ipart; + + assert(bld->type.floating); + + assert(lp_check_value(bld->type, x)); + + /* x * 2^(0.5) i.e., add 0.5 to the log2(x) */ + x = LLVMBuildFMul(bld->builder, x, sqrt2, ""); + + /* ipart = floor(log2(x) + 0.5) */ + ipart = lp_build_extract_exponent(bld, x, 0); + + return ipart; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index 31efa9921ce..c78b61decf0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -171,6 +171,12 @@ LLVMValueRef lp_build_itrunc(struct lp_build_context *bld, LLVMValueRef a); +void +lp_build_ifloor_fract(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef *out_ipart, + LLVMValueRef *out_fpart); + LLVMValueRef lp_build_sqrt(struct lp_build_context *bld, LLVMValueRef a); @@ -209,9 +215,26 @@ lp_build_exp2(struct lp_build_context *bld, LLVMValueRef a); LLVMValueRef +lp_build_extract_exponent(struct lp_build_context *bld, + LLVMValueRef x, + int bias); + +LLVMValueRef +lp_build_extract_mantissa(struct lp_build_context *bld, + LLVMValueRef x); + +LLVMValueRef lp_build_log2(struct lp_build_context *bld, LLVMValueRef a); +LLVMValueRef +lp_build_fast_log2(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_ilog2(struct lp_build_context *bld, + LLVMValueRef x); + void lp_build_exp2_approx(struct lp_build_context *bld, LLVMValueRef x, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c index 8b477313d48..6967dd26225 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c @@ -63,6 +63,7 @@ #include "util/u_debug.h" #include "util/u_math.h" +#include "util/u_cpu_detect.h" #include "lp_bld_type.h" #include "lp_bld_const.h" @@ -96,58 +97,104 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type); LLVMValueRef res; unsigned mantissa; - unsigned n; - unsigned long long ubound; - unsigned long long mask; - double scale; - double bias; assert(src_type.floating); + assert(dst_width <= src_type.width); + src_type.sign = FALSE; mantissa = lp_mantissa(src_type); - /* We cannot carry more bits than the mantissa */ - n = MIN2(mantissa, dst_width); + if (dst_width <= mantissa) { + /* + * Apply magic coefficients that will make the desired result to appear + * in the lowest significant bits of the mantissa, with correct rounding. + * + * This only works if the destination width fits in the mantissa. + */ - /* This magic coefficients will make the desired result to appear in the - * lowest significant bits of the mantissa. - */ - ubound = ((unsigned long long)1 << n); - mask = ubound - 1; - scale = (double)mask/ubound; - bias = (double)((unsigned long long)1 << (mantissa - n)); + unsigned long long ubound; + unsigned long long mask; + double scale; + double bias; - res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), ""); - res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), ""); - res = LLVMBuildBitCast(builder, res, int_vec_type, ""); + ubound = (1ULL << dst_width); + mask = ubound - 1; + scale = (double)mask/ubound; + bias = (double)(1ULL << (mantissa - dst_width)); - if(dst_width > n) { - int shift = dst_width - n; - res = LLVMBuildShl(builder, res, lp_build_const_int_vec(src_type, shift), ""); + res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), ""); + res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), ""); + res = LLVMBuildBitCast(builder, res, int_vec_type, ""); + res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(src_type, mask), ""); + } + else if (dst_width == (mantissa + 1)) { + /* + * The destination width matches exactly what can be represented in + * floating point (i.e., mantissa + 1 bits). So do a straight + * multiplication followed by casting. No further rounding is necessary. + */ + + double scale; - /* TODO: Fill in the empty lower bits for additional precision? */ - /* YES: this fixes progs/trivial/tri-z-eq.c. - * Otherwise vertex Z=1.0 values get converted to something like - * 0xfffffb00 and the test for equality with 0xffffffff fails. + scale = (double)((1ULL << dst_width) - 1); + + res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), ""); + res = LLVMBuildFPToSI(builder, res, int_vec_type, ""); + } + else { + /* + * The destination exceeds what can be represented in the floating point. + * So multiply by the largest power two we get away with, and when + * subtract the most significant bit to rescale to normalized values. + * + * The largest power of two factor we can get away is + * (1 << (src_type.width - 1)), because we need to use signed . In theory it + * should be (1 << (src_type.width - 2)), but IEEE 754 rules states + * INT_MIN should be returned in FPToSI, which is the correct result for + * values near 1.0! + * + * This means we get (src_type.width - 1) correct bits for values near 0.0, + * and (mantissa + 1) correct bits for values near 1.0. Equally or more + * important, we also get exact results for 0.0 and 1.0. */ -#if 0 - { - LLVMValueRef msb; - msb = LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, dst_width - 1), ""); - msb = LLVMBuildShl(builder, msb, lp_build_const_int_vec(src_type, shift), ""); - msb = LLVMBuildSub(builder, msb, lp_build_const_int_vec(src_type, 1), ""); - res = LLVMBuildOr(builder, res, msb, ""); - } -#elif 0 - while(shift > 0) { - res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, n), ""), ""); - shift -= n; - n *= 2; + + unsigned n = MIN2(src_type.width - 1, dst_width); + + double scale = (double)(1ULL << n); + unsigned lshift = dst_width - n; + unsigned rshift = n; + LLVMValueRef lshifted; + LLVMValueRef rshifted; + + res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), ""); + res = LLVMBuildFPToSI(builder, res, int_vec_type, ""); + + /* + * Align the most significant bit to its final place. + * + * This will cause 1.0 to overflow to 0, but the later adjustment will + * get it right. + */ + if (lshift) { + lshifted = LLVMBuildShl(builder, res, + lp_build_const_int_vec(src_type, lshift), ""); + } else { + lshifted = res; } -#endif + + /* + * Align the most significant bit to the right. + */ + rshifted = LLVMBuildAShr(builder, res, + lp_build_const_int_vec(src_type, rshift), ""); + + /* + * Subtract the MSB to the LSB, therefore re-scaling from + * (1 << dst_width) to ((1 << dst_width) - 1). + */ + + res = LLVMBuildSub(builder, lshifted, rshifted, ""); } - else - res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(src_type, mask), ""); return res; } @@ -177,6 +224,16 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, assert(dst_type.floating); + /* Special-case int8->float, though most cases could be handled + * this way: + */ + if (src_width == 8) { + scale = 1.0/255.0; + res = LLVMBuildSIToFP(builder, src, vec_type, ""); + res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), ""); + return res; + } + mantissa = lp_mantissa(dst_type); n = MIN2(mantissa, src_width); @@ -241,6 +298,87 @@ lp_build_conv(LLVMBuilderRef builder, } num_tmps = num_srcs; + + /* Special case 4x4f --> 1x16ub + */ + if (src_type.floating == 1 && + src_type.fixed == 0 && + src_type.sign == 1 && + src_type.norm == 0 && + src_type.width == 32 && + src_type.length == 4 && + + dst_type.floating == 0 && + dst_type.fixed == 0 && + dst_type.sign == 0 && + dst_type.norm == 1 && + dst_type.width == 8 && + dst_type.length == 16 && + + util_cpu_caps.has_sse2) + { + int i; + + for (i = 0; i < num_dsts; i++, src += 4) { + struct lp_type int16_type = dst_type; + struct lp_type int32_type = dst_type; + LLVMValueRef lo, hi; + LLVMValueRef src_int0; + LLVMValueRef src_int1; + LLVMValueRef src_int2; + LLVMValueRef src_int3; + LLVMTypeRef int16_vec_type; + LLVMTypeRef int32_vec_type; + LLVMTypeRef src_vec_type; + LLVMTypeRef dst_vec_type; + LLVMValueRef const_255f; + LLVMValueRef a, b, c, d; + + int16_type.width *= 2; + int16_type.length /= 2; + int16_type.sign = 1; + + int32_type.width *= 4; + int32_type.length /= 4; + int32_type.sign = 1; + + src_vec_type = lp_build_vec_type(src_type); + dst_vec_type = lp_build_vec_type(dst_type); + int16_vec_type = lp_build_vec_type(int16_type); + int32_vec_type = lp_build_vec_type(int32_type); + + const_255f = lp_build_const_vec(src_type, 255.0f); + + a = LLVMBuildFMul(builder, src[0], const_255f, ""); + b = LLVMBuildFMul(builder, src[1], const_255f, ""); + c = LLVMBuildFMul(builder, src[2], const_255f, ""); + d = LLVMBuildFMul(builder, src[3], const_255f, ""); + + { + struct lp_build_context bld; + + bld.builder = builder; + bld.type = src_type; + bld.vec_type = src_vec_type; + bld.int_elem_type = lp_build_elem_type(int32_type); + bld.int_vec_type = int32_vec_type; + bld.undef = lp_build_undef(src_type); + bld.zero = lp_build_zero(src_type); + bld.one = lp_build_one(src_type); + + src_int0 = lp_build_iround(&bld, a); + src_int1 = lp_build_iround(&bld, b); + src_int2 = lp_build_iround(&bld, c); + src_int3 = lp_build_iround(&bld, d); + } + /* relying on clamping behavior of sse2 intrinsics here */ + lo = lp_build_pack2(builder, int32_type, int16_type, src_int0, src_int1); + hi = lp_build_pack2(builder, int32_type, int16_type, src_int2, src_int3); + dst[i] = lp_build_pack2(builder, int16_type, dst_type, lo, hi); + } + return; + } + /* * Clamp if necessary */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.c b/src/gallium/auxiliary/gallivm/lp_bld_debug.c index d3a5afff8c2..93e56553d7b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.c @@ -57,6 +57,8 @@ lp_disassemble(const void* func) #ifdef HAVE_UDIS86 ud_t ud_obj; uint64_t max_jmp_pc; + uint inst_no; + boolean emit_addrs = TRUE, emit_line_nos = FALSE; ud_init(&ud_obj); @@ -76,13 +78,18 @@ lp_disassemble(const void* func) while (ud_disassemble(&ud_obj)) { + if (emit_addrs) { #ifdef PIPE_ARCH_X86 - debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj)); + debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj)); #endif #ifdef PIPE_ARCH_X86_64 - debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj)); + debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj)); #endif - + } + else if (emit_line_nos) { + debug_printf("%6d:\t", inst_no); + inst_no++; + } #if 0 debug_printf("%-16s ", ud_insn_hex(&ud_obj)); #endif @@ -115,8 +122,10 @@ lp_disassemble(const void* func) } } - if ((ud_insn_off(&ud_obj) >= max_jmp_pc && ud_obj.mnemonic == UD_Iret) || - ud_obj.mnemonic == UD_Iinvalid) + if (ud_obj.mnemonic == UD_Iinvalid || + (ud_insn_off(&ud_obj) >= max_jmp_pc && + (ud_obj.mnemonic == UD_Iret || + ud_obj.mnemonic == UD_Ijmp))) break; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h index 369c1bbf09a..eb11dcd4ef4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h @@ -36,11 +36,12 @@ #include "util/u_string.h" -#define GALLIVM_DEBUG_TGSI 0x1 -#define GALLIVM_DEBUG_IR 0x2 -#define GALLIVM_DEBUG_ASM 0x4 -#define GALLIVM_DEBUG_NO_OPT 0x8 -#define GALLIVM_DEBUG_PERF 0x10 +#define GALLIVM_DEBUG_TGSI (1 << 0) +#define GALLIVM_DEBUG_IR (1 << 1) +#define GALLIVM_DEBUG_ASM (1 << 2) +#define GALLIVM_DEBUG_NO_OPT (1 << 3) +#define GALLIVM_DEBUG_PERF (1 << 4) +#define GALLIVM_DEBUG_NO_BRILINEAR (1 << 5) #ifdef DEBUG diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c index 5bc9c741a88..a2cee199a01 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c @@ -38,273 +38,15 @@ #include "lp_bld_flow.h" -#define LP_BUILD_FLOW_MAX_VARIABLES 64 -#define LP_BUILD_FLOW_MAX_DEPTH 32 - -/** - * Enumeration of all possible flow constructs. - */ -enum lp_build_flow_construct_kind { - LP_BUILD_FLOW_SCOPE, - LP_BUILD_FLOW_SKIP, - LP_BUILD_FLOW_IF -}; - - -/** - * Variable declaration scope. - */ -struct lp_build_flow_scope -{ - /** Number of variables declared in this scope */ - unsigned num_variables; -}; - - -/** - * Early exit. Useful to skip to the end of a function or block when - * the execution mask becomes zero or when there is an error condition. - */ -struct lp_build_flow_skip -{ - /** Block to skip to */ - LLVMBasicBlockRef block; - - /** Number of variables declared at the beginning */ - unsigned num_variables; - - LLVMValueRef *phi; /**< array [num_variables] */ -}; - - -/** - * if/else/endif. - */ -struct lp_build_flow_if -{ - unsigned num_variables; - - LLVMValueRef *phi; /**< array [num_variables] */ - - LLVMValueRef condition; - LLVMBasicBlockRef entry_block, true_block, false_block, merge_block; -}; - - -/** - * Union of all possible flow constructs' data - */ -union lp_build_flow_construct_data -{ - struct lp_build_flow_scope scope; - struct lp_build_flow_skip skip; - struct lp_build_flow_if ifthen; -}; - - -/** - * Element of the flow construct stack. - */ -struct lp_build_flow_construct -{ - enum lp_build_flow_construct_kind kind; - union lp_build_flow_construct_data data; -}; - - /** - * All necessary data to generate LLVM control flow constructs. + * Insert a new block, right where builder is pointing to. * - * Besides keeping track of the control flow construct themselves we also - * need to keep track of variables in order to generate SSA Phi values. - */ -struct lp_build_flow_context -{ - LLVMBuilderRef builder; - - /** - * Control flow stack. - */ - struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH]; - unsigned num_constructs; - - /** - * Variable stack - */ - LLVMValueRef *variables[LP_BUILD_FLOW_MAX_VARIABLES]; - unsigned num_variables; -}; - - -struct lp_build_flow_context * -lp_build_flow_create(LLVMBuilderRef builder) -{ - struct lp_build_flow_context *flow; - - flow = CALLOC_STRUCT(lp_build_flow_context); - if(!flow) - return NULL; - - flow->builder = builder; - - return flow; -} - - -void -lp_build_flow_destroy(struct lp_build_flow_context *flow) -{ - assert(flow->num_constructs == 0); - assert(flow->num_variables == 0); - FREE(flow); -} - - -/** - * Begin/push a new flow control construct, such as a loop, skip block - * or variable scope. - */ -static union lp_build_flow_construct_data * -lp_build_flow_push(struct lp_build_flow_context *flow, - enum lp_build_flow_construct_kind kind) -{ - assert(flow->num_constructs < LP_BUILD_FLOW_MAX_DEPTH); - if(flow->num_constructs >= LP_BUILD_FLOW_MAX_DEPTH) - return NULL; - - flow->constructs[flow->num_constructs].kind = kind; - return &flow->constructs[flow->num_constructs++].data; -} - - -/** - * Return the current/top flow control construct on the stack. - * \param kind the expected type of the top-most construct - */ -static union lp_build_flow_construct_data * -lp_build_flow_peek(struct lp_build_flow_context *flow, - enum lp_build_flow_construct_kind kind) -{ - assert(flow->num_constructs); - if(!flow->num_constructs) - return NULL; - - assert(flow->constructs[flow->num_constructs - 1].kind == kind); - if(flow->constructs[flow->num_constructs - 1].kind != kind) - return NULL; - - return &flow->constructs[flow->num_constructs - 1].data; -} - - -/** - * End/pop the current/top flow control construct on the stack. - * \param kind the expected type of the top-most construct - */ -static union lp_build_flow_construct_data * -lp_build_flow_pop(struct lp_build_flow_context *flow, - enum lp_build_flow_construct_kind kind) -{ - assert(flow->num_constructs); - if(!flow->num_constructs) - return NULL; - - assert(flow->constructs[flow->num_constructs - 1].kind == kind); - if(flow->constructs[flow->num_constructs - 1].kind != kind) - return NULL; - - return &flow->constructs[--flow->num_constructs].data; -} - - -/** - * Begin a variable scope. + * This is useful important not only for aesthetic reasons, but also for + * performance reasons, as frequently run blocks should be laid out next to + * each other and fall-throughs maximized. * + * See also llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp. * - */ -void -lp_build_flow_scope_begin(struct lp_build_flow_context *flow) -{ - struct lp_build_flow_scope *scope; - - scope = &lp_build_flow_push(flow, LP_BUILD_FLOW_SCOPE)->scope; - if(!scope) - return; - - scope->num_variables = 0; -} - - -/** - * Declare a variable. - * - * A variable is a named entity which can have different LLVMValueRef's at - * different points of the program. This is relevant for control flow because - * when there are multiple branches to a same location we need to replace - * the variable's value with a Phi function as explained in - * http://en.wikipedia.org/wiki/Static_single_assignment_form . - * - * We keep track of variables by keeping around a pointer to where they're - * current. - * - * There are a few cautions to observe: - * - * - Variable's value must not be NULL. If there is no initial value then - * LLVMGetUndef() should be used. - * - * - Variable's value must be kept up-to-date. If the variable is going to be - * modified by a function then a pointer should be passed so that its value - * is accurate. Failure to do this will cause some of the variables' - * transient values to be lost, leading to wrong results. - * - * - A program should be written from top to bottom, by always appending - * instructions to the bottom with a single LLVMBuilderRef. Inserting and/or - * modifying existing statements will most likely lead to wrong results. - * - */ -void -lp_build_flow_scope_declare(struct lp_build_flow_context *flow, - LLVMValueRef *variable) -{ - struct lp_build_flow_scope *scope; - - scope = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SCOPE)->scope; - if(!scope) - return; - - assert(*variable); - if(!*variable) - return; - - assert(flow->num_variables < LP_BUILD_FLOW_MAX_VARIABLES); - if(flow->num_variables >= LP_BUILD_FLOW_MAX_VARIABLES) - return; - - flow->variables[flow->num_variables++] = variable; - ++scope->num_variables; -} - - -void -lp_build_flow_scope_end(struct lp_build_flow_context *flow) -{ - struct lp_build_flow_scope *scope; - - scope = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SCOPE)->scope; - if(!scope) - return; - - assert(flow->num_variables >= scope->num_variables); - if(flow->num_variables < scope->num_variables) { - flow->num_variables = 0; - return; - } - - flow->num_variables -= scope->num_variables; -} - - -/** * Note: this function has no dependencies on the flow code and could * be used elsewhere. */ @@ -334,52 +76,18 @@ lp_build_insert_new_block(LLVMBuilderRef builder, const char *name) } -static LLVMBasicBlockRef -lp_build_flow_insert_block(struct lp_build_flow_context *flow) -{ - return lp_build_insert_new_block(flow->builder, ""); -} - - /** * Begin a "skip" block. Inside this block we can test a condition and * skip to the end of the block if the condition is false. */ void -lp_build_flow_skip_begin(struct lp_build_flow_context *flow) +lp_build_flow_skip_begin(struct lp_build_skip_context *skip, + LLVMBuilderRef builder) { - struct lp_build_flow_skip *skip; - LLVMBuilderRef builder; - unsigned i; - - skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP)->skip; - if(!skip) - return; + skip->builder = builder; /* create new basic block */ - skip->block = lp_build_flow_insert_block(flow); - - skip->num_variables = flow->num_variables; - if(!skip->num_variables) { - skip->phi = NULL; - return; - } - - /* Allocate a Phi node for each variable in this skip scope */ - skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi); - if(!skip->phi) { - skip->num_variables = 0; - return; - } - - builder = LLVMCreateBuilder(); - LLVMPositionBuilderAtEnd(builder, skip->block); - - /* create a Phi node for each variable */ - for(i = 0; i < skip->num_variables; ++i) - skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); - - LLVMDisposeBuilder(builder); + skip->block = lp_build_insert_new_block(skip->builder, "skip"); } @@ -388,83 +96,50 @@ lp_build_flow_skip_begin(struct lp_build_flow_context *flow) * skip block if the condition is true. */ void -lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, +lp_build_flow_skip_cond_break(struct lp_build_skip_context *skip, LLVMValueRef cond) { - struct lp_build_flow_skip *skip; - LLVMBasicBlockRef current_block; LLVMBasicBlockRef new_block; - unsigned i; - - skip = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SKIP)->skip; - if(!skip) - return; - current_block = LLVMGetInsertBlock(flow->builder); - - new_block = lp_build_flow_insert_block(flow); - - /* for each variable, update the Phi node with a (variable, block) pair */ - for(i = 0; i < skip->num_variables; ++i) { - assert(*flow->variables[i]); - assert(LLVMTypeOf(skip->phi[i]) == LLVMTypeOf(*flow->variables[i])); - LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); - } + new_block = lp_build_insert_new_block(skip->builder, ""); /* if cond is true, goto skip->block, else goto new_block */ - LLVMBuildCondBr(flow->builder, cond, skip->block, new_block); + LLVMBuildCondBr(skip->builder, cond, skip->block, new_block); - LLVMPositionBuilderAtEnd(flow->builder, new_block); + LLVMPositionBuilderAtEnd(skip->builder, new_block); } void -lp_build_flow_skip_end(struct lp_build_flow_context *flow) +lp_build_flow_skip_end(struct lp_build_skip_context *skip) { - struct lp_build_flow_skip *skip; - LLVMBasicBlockRef current_block; - unsigned i; - - skip = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SKIP)->skip; - if(!skip) - return; - - current_block = LLVMGetInsertBlock(flow->builder); - - /* add (variable, block) tuples to the phi nodes */ - for(i = 0; i < skip->num_variables; ++i) { - assert(*flow->variables[i]); - assert(LLVMTypeOf(skip->phi[i]) == LLVMTypeOf(*flow->variables[i])); - LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); - *flow->variables[i] = skip->phi[i]; - } - /* goto block */ - LLVMBuildBr(flow->builder, skip->block); - LLVMPositionBuilderAtEnd(flow->builder, skip->block); - - FREE(skip->phi); + LLVMBuildBr(skip->builder, skip->block); + LLVMPositionBuilderAtEnd(skip->builder, skip->block); } /** * Check if the mask predicate is zero. If so, jump to the end of the block. */ -static void +void lp_build_mask_check(struct lp_build_mask_context *mask) { - LLVMBuilderRef builder = mask->flow->builder; + LLVMBuilderRef builder = mask->skip.builder; + LLVMValueRef value; LLVMValueRef cond; + value = lp_build_mask_value(mask); + /* cond = (mask == 0) */ cond = LLVMBuildICmp(builder, LLVMIntEQ, - LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""), + LLVMBuildBitCast(builder, value, mask->reg_type, ""), LLVMConstNull(mask->reg_type), ""); /* if cond, goto end of block */ - lp_build_flow_skip_cond_break(mask->flow, cond); + lp_build_flow_skip_cond_break(&mask->skip, cond); } @@ -477,21 +152,27 @@ lp_build_mask_check(struct lp_build_mask_context *mask) */ void lp_build_mask_begin(struct lp_build_mask_context *mask, - struct lp_build_flow_context *flow, + LLVMBuilderRef builder, struct lp_type type, LLVMValueRef value) { memset(mask, 0, sizeof *mask); - mask->flow = flow; mask->reg_type = LLVMIntType(type.width * type.length); - mask->value = value; + mask->var = lp_build_alloca(builder, + lp_build_int_vec_type(type), + "execution_mask"); - lp_build_flow_scope_begin(flow); - lp_build_flow_scope_declare(flow, &mask->value); - lp_build_flow_skip_begin(flow); + LLVMBuildStore(builder, value, mask->var); - lp_build_mask_check(mask); + lp_build_flow_skip_begin(&mask->skip, builder); +} + + +LLVMValueRef +lp_build_mask_value(struct lp_build_mask_context *mask) +{ + return LLVMBuildLoad(mask->skip.builder, mask->var, ""); } @@ -504,9 +185,10 @@ void lp_build_mask_update(struct lp_build_mask_context *mask, LLVMValueRef value) { - mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, ""); - - lp_build_mask_check(mask); + value = LLVMBuildAnd(mask->skip.builder, + lp_build_mask_value(mask), + value, ""); + LLVMBuildStore(mask->skip.builder, value, mask->var); } @@ -516,9 +198,8 @@ lp_build_mask_update(struct lp_build_mask_context *mask, LLVMValueRef lp_build_mask_end(struct lp_build_mask_context *mask) { - lp_build_flow_skip_end(mask->flow); - lp_build_flow_scope_end(mask->flow); - return mask->value; + lp_build_flow_skip_end(&mask->skip); + return lp_build_mask_value(mask); } @@ -528,59 +209,27 @@ lp_build_loop_begin(LLVMBuilderRef builder, LLVMValueRef start, struct lp_build_loop_state *state) { - LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); - LLVMValueRef function = LLVMGetBasicBlockParent(block); + state->block = lp_build_insert_new_block(builder, "loop_begin"); - state->block = LLVMAppendBasicBlock(function, "loop"); + state->counter_var = lp_build_alloca(builder, LLVMTypeOf(start), "loop_counter"); + + LLVMBuildStore(builder, start, state->counter_var); LLVMBuildBr(builder, state->block); LLVMPositionBuilderAtEnd(builder, state->block); - state->counter = LLVMBuildPhi(builder, LLVMTypeOf(start), ""); - - LLVMAddIncoming(state->counter, &start, &block, 1); - + state->counter = LLVMBuildLoad(builder, state->counter_var, ""); } void -lp_build_loop_end(LLVMBuilderRef builder, - LLVMValueRef end, - LLVMValueRef step, - struct lp_build_loop_state *state) -{ - LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); - LLVMValueRef function = LLVMGetBasicBlockParent(block); - LLVMValueRef next; - LLVMValueRef cond; - LLVMBasicBlockRef after_block; - - if (!step) - step = LLVMConstInt(LLVMTypeOf(end), 1, 0); - - next = LLVMBuildAdd(builder, state->counter, step, ""); - - cond = LLVMBuildICmp(builder, LLVMIntNE, next, end, ""); - - after_block = LLVMAppendBasicBlock(function, ""); - - LLVMBuildCondBr(builder, cond, after_block, state->block); - - LLVMAddIncoming(state->counter, &next, &block, 1); - - LLVMPositionBuilderAtEnd(builder, after_block); -} - -void lp_build_loop_end_cond(LLVMBuilderRef builder, LLVMValueRef end, LLVMValueRef step, - int llvm_cond, + LLVMIntPredicate llvm_cond, struct lp_build_loop_state *state) { - LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); - LLVMValueRef function = LLVMGetBasicBlockParent(block); LLVMValueRef next; LLVMValueRef cond; LLVMBasicBlockRef after_block; @@ -590,15 +239,27 @@ lp_build_loop_end_cond(LLVMBuilderRef builder, next = LLVMBuildAdd(builder, state->counter, step, ""); + LLVMBuildStore(builder, next, state->counter_var); + cond = LLVMBuildICmp(builder, llvm_cond, next, end, ""); - after_block = LLVMAppendBasicBlock(function, ""); + after_block = lp_build_insert_new_block(builder, "loop_end"); LLVMBuildCondBr(builder, cond, after_block, state->block); - LLVMAddIncoming(state->counter, &next, &block, 1); - LLVMPositionBuilderAtEnd(builder, after_block); + + state->counter = LLVMBuildLoad(builder, state->counter_var, ""); +} + + +void +lp_build_loop_end(LLVMBuilderRef builder, + LLVMValueRef end, + LLVMValueRef step, + struct lp_build_loop_state *state) +{ + lp_build_loop_end_cond(builder, end, step, LLVMIntNE, state); } @@ -616,24 +277,16 @@ lp_build_loop_end_cond(LLVMBuilderRef builder, Is built with: - LLVMValueRef x = LLVMGetUndef(); // or something else + // x needs an alloca variable + x = lp_build_alloca(builder, type, "x"); - flow = lp_build_flow_create(builder); - lp_build_flow_scope_begin(flow); + lp_build_if(ctx, builder, cond); + LLVMBuildStore(LLVMBuildAdd(1, 2), x); + lp_build_else(ctx); + LLVMBuildStore(LLVMBuildAdd(2, 3). x); + lp_build_endif(ctx); - // x needs a phi node - lp_build_flow_scope_declare(flow, &x); - - lp_build_if(ctx, flow, builder, cond); - x = LLVMAdd(1, 2); - lp_build_else(ctx); - x = LLVMAdd(2, 3); - lp_build_endif(ctx); - - lp_build_flow_scope_end(flow); - - lp_build_flow_destroy(flow); */ @@ -642,47 +295,19 @@ lp_build_loop_end_cond(LLVMBuilderRef builder, * Begin an if/else/endif construct. */ void -lp_build_if(struct lp_build_if_state *ctx, - struct lp_build_flow_context *flow, +lp_build_if(struct lp_build_if_state *ifthen, LLVMBuilderRef builder, LLVMValueRef condition) { LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); - struct lp_build_flow_if *ifthen; - unsigned i; - - memset(ctx, 0, sizeof(*ctx)); - ctx->builder = builder; - ctx->flow = flow; - /* push/create new scope */ - ifthen = &lp_build_flow_push(flow, LP_BUILD_FLOW_IF)->ifthen; - assert(ifthen); - - ifthen->num_variables = flow->num_variables; + memset(ifthen, 0, sizeof *ifthen); + ifthen->builder = builder; ifthen->condition = condition; ifthen->entry_block = block; - /* create a Phi node for each variable in this flow scope */ - ifthen->phi = MALLOC(ifthen->num_variables * sizeof(*ifthen->phi)); - if (!ifthen->phi) { - ifthen->num_variables = 0; - return; - } - /* create endif/merge basic block for the phi functions */ ifthen->merge_block = lp_build_insert_new_block(builder, "endif-block"); - LLVMPositionBuilderAtEnd(builder, ifthen->merge_block); - - /* create a phi node for each variable */ - for (i = 0; i < flow->num_variables; i++) { - ifthen->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); - - /* add add the initial value of the var from the entry block */ - if (!LLVMIsUndef(*flow->variables[i])) - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], - &ifthen->entry_block, 1); - } /* create/insert true_block before merge_block */ ifthen->true_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-true-block"); @@ -696,27 +321,16 @@ lp_build_if(struct lp_build_if_state *ctx, * Begin else-part of a conditional */ void -lp_build_else(struct lp_build_if_state *ctx) +lp_build_else(struct lp_build_if_state *ifthen) { - struct lp_build_flow_context *flow = ctx->flow; - struct lp_build_flow_if *ifthen; - unsigned i; - - ifthen = &lp_build_flow_peek(flow, LP_BUILD_FLOW_IF)->ifthen; - assert(ifthen); - - /* for each variable, update the Phi node with a (variable, block) pair */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); - for (i = 0; i < flow->num_variables; i++) { - assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1); - } + /* Append an unconditional Br(anch) instruction on the true_block */ + LLVMBuildBr(ifthen->builder, ifthen->merge_block); /* create/insert false_block before the merge block */ ifthen->false_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-false-block"); /* successive code goes into the else block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block); + LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->false_block); } @@ -724,75 +338,30 @@ lp_build_else(struct lp_build_if_state *ctx) * End a conditional. */ void -lp_build_endif(struct lp_build_if_state *ctx) +lp_build_endif(struct lp_build_if_state *ifthen) { - struct lp_build_flow_context *flow = ctx->flow; - struct lp_build_flow_if *ifthen; - LLVMBasicBlockRef curBlock = LLVMGetInsertBlock(ctx->builder); - unsigned i; - - ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen; - assert(ifthen); - /* Insert branch to the merge block from current block */ - LLVMBuildBr(ctx->builder, ifthen->merge_block); + LLVMBuildBr(ifthen->builder, ifthen->merge_block); - if (ifthen->false_block) { - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); - /* for each variable, update the Phi node with a (variable, block) pair */ - for (i = 0; i < flow->num_variables; i++) { - assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &curBlock, 1); - /* replace the variable ref with the phi function */ - *flow->variables[i] = ifthen->phi[i]; - } - } - else { - /* no else clause */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); - for (i = 0; i < flow->num_variables; i++) { - assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1); - - /* replace the variable ref with the phi function */ - *flow->variables[i] = ifthen->phi[i]; - } - } - - FREE(ifthen->phi); - - /*** - *** Now patch in the various branch instructions. - ***/ + /* + * Now patch in the various branch instructions. + */ /* Insert the conditional branch instruction at the end of entry_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->entry_block); + LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->entry_block); if (ifthen->false_block) { /* we have an else clause */ - LLVMBuildCondBr(ctx->builder, ifthen->condition, + LLVMBuildCondBr(ifthen->builder, ifthen->condition, ifthen->true_block, ifthen->false_block); } else { /* no else clause */ - LLVMBuildCondBr(ctx->builder, ifthen->condition, + LLVMBuildCondBr(ifthen->builder, ifthen->condition, ifthen->true_block, ifthen->merge_block); } - /* Insert branch from end of true_block to merge_block */ - if (ifthen->false_block) { - /* Append an unconditional Br(anch) instruction on the true_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block); - LLVMBuildBr(ctx->builder, ifthen->merge_block); - } - else { - /* No else clause. - * Note that we've already inserted the branch at the end of - * true_block. See the very first LLVMBuildBr() call in this function. - */ - } - /* Resume building code at end of the ifthen->merge_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); + LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->merge_block); } @@ -830,6 +399,7 @@ lp_build_alloca(LLVMBuilderRef builder, } res = LLVMBuildAlloca(first_builder, type, name); + LLVMBuildStore(builder, LLVMConstNull(type), res); LLVMDisposeBuilder(first_builder); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index fffb493a93b..e729ee6eaac 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -41,52 +41,49 @@ struct lp_type; -struct lp_build_flow_context; - - -struct lp_build_flow_context * -lp_build_flow_create(LLVMBuilderRef builder); - -void -lp_build_flow_destroy(struct lp_build_flow_context *flow); - -void -lp_build_flow_scope_begin(struct lp_build_flow_context *flow); - -void -lp_build_flow_scope_declare(struct lp_build_flow_context *flow, - LLVMValueRef *variable); +/** + * Early exit. Useful to skip to the end of a function or block when + * the execution mask becomes zero or when there is an error condition. + */ +struct lp_build_skip_context +{ + LLVMBuilderRef builder; -void -lp_build_flow_scope_end(struct lp_build_flow_context *flow); + /** Block to skip to */ + LLVMBasicBlockRef block; +}; void -lp_build_flow_skip_begin(struct lp_build_flow_context *flow); +lp_build_flow_skip_begin(struct lp_build_skip_context *ctx, + LLVMBuilderRef builder); void -lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, +lp_build_flow_skip_cond_break(struct lp_build_skip_context *ctx, LLVMValueRef cond); void -lp_build_flow_skip_end(struct lp_build_flow_context *flow); +lp_build_flow_skip_end(struct lp_build_skip_context *ctx); struct lp_build_mask_context { - struct lp_build_flow_context *flow; + struct lp_build_skip_context skip; LLVMTypeRef reg_type; - LLVMValueRef value; + LLVMValueRef var; }; void lp_build_mask_begin(struct lp_build_mask_context *mask, - struct lp_build_flow_context *flow, + LLVMBuilderRef builder, struct lp_type type, LLVMValueRef value); +LLVMValueRef +lp_build_mask_value(struct lp_build_mask_context *mask); + /** * Bitwise AND the mask with the given value, if a previous mask was set. */ @@ -94,6 +91,9 @@ void lp_build_mask_update(struct lp_build_mask_context *mask, LLVMValueRef value); +void +lp_build_mask_check(struct lp_build_mask_context *mask); + LLVMValueRef lp_build_mask_end(struct lp_build_mask_context *mask); @@ -108,6 +108,7 @@ lp_build_mask_end(struct lp_build_mask_context *mask); struct lp_build_loop_state { LLVMBasicBlockRef block; + LLVMValueRef counter_var; LLVMValueRef counter; }; @@ -128,22 +129,28 @@ void lp_build_loop_end_cond(LLVMBuilderRef builder, LLVMValueRef end, LLVMValueRef step, - int cond, /* LLVM condition */ + LLVMIntPredicate cond, struct lp_build_loop_state *state); +/** + * if/else/endif. + */ struct lp_build_if_state { LLVMBuilderRef builder; - struct lp_build_flow_context *flow; + LLVMValueRef condition; + LLVMBasicBlockRef entry_block; + LLVMBasicBlockRef true_block; + LLVMBasicBlockRef false_block; + LLVMBasicBlockRef merge_block; }; void lp_build_if(struct lp_build_if_state *ctx, - struct lp_build_flow_context *flow, LLVMBuilderRef builder, LLVMValueRef condition); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 761f33b578d..5598ca5c489 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -44,6 +44,7 @@ static const struct debug_named_value lp_bld_debug_flags[] = { { "asm", GALLIVM_DEBUG_ASM, NULL }, { "nopt", GALLIVM_DEBUG_NO_OPT, NULL }, { "perf", GALLIVM_DEBUG_PERF, NULL }, + { "no_brilinear", GALLIVM_DEBUG_NO_BRILINEAR, NULL }, DEBUG_NAMED_VALUE_END }; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h index f26fdac4663..0b4b1ca7d11 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h @@ -47,4 +47,10 @@ lp_build_init(void); extern void lp_func_delete_body(LLVMValueRef func); + +extern LLVMValueRef +lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal, + const char *Name); + + #endif /* !LP_BLD_INIT_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index d5c62a3f734..026b60ac36e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -92,9 +92,23 @@ lp_build_compare(LLVMBuilderRef builder, if(func == PIPE_FUNC_ALWAYS) return ones; - /* TODO: optimize the constant case */ +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + /* + * There are no unsigned integer comparison instructions in SSE. + */ - /* XXX: It is not clear if we should use the ordered or unordered operators */ + if (!type.floating && !type.sign && + type.width * type.length == 128 && + util_cpu_caps.has_sse2 && + (func == PIPE_FUNC_LESS || + func == PIPE_FUNC_LEQUAL || + func == PIPE_FUNC_GREATER || + func == PIPE_FUNC_GEQUAL) && + (gallivm_debug & GALLIVM_DEBUG_PERF)) { + debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n", + __FUNCTION__, type.length, type.width); + } +#endif #if HAVE_LLVM < 0x0207 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) @@ -225,6 +239,8 @@ lp_build_compare(LLVMBuilderRef builder, #endif #endif /* HAVE_LLVM < 0x0207 */ + /* XXX: It is not clear if we should use the ordered or unordered operators */ + if(type.floating) { LLVMRealPredicate op; switch(func) { @@ -446,10 +462,12 @@ lp_build_select(struct lp_build_context *bld, LLVMTypeRef arg_type; LLVMValueRef args[3]; - if (type.width == 64) { + if (type.floating && + type.width == 64) { intrinsic = "llvm.x86.sse41.blendvpd"; arg_type = LLVMVectorType(LLVMDoubleType(), 2); - } else if (type.width == 32) { + } else if (type.floating && + type.width == 32) { intrinsic = "llvm.x86.sse41.blendvps"; arg_type = LLVMVectorType(LLVMFloatType(), 4); } else { diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 48baf7c425c..f56ddee7fd7 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -178,3 +178,13 @@ lp_func_delete_body(LLVMValueRef FF) llvm::Function *func = llvm::unwrap<llvm::Function>(FF); func->deleteBody(); } + + +extern "C" +LLVMValueRef +lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal, + const char *Name) +{ + return llvm::wrap(llvm::unwrap(B)->CreateLoad(llvm::unwrap(PointerVal), true, Name)); +} + diff --git a/src/gallium/auxiliary/gallivm/lp_bld_printf.c b/src/gallium/auxiliary/gallivm/lp_bld_printf.c index 153ba5b15b1..f418e96aff4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_printf.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.c @@ -29,6 +29,8 @@ #include "util/u_debug.h" #include "util/u_memory.h" +#include "util/u_string.h" +#include "lp_bld_const.h" #include "lp_bld_printf.h" @@ -119,3 +121,22 @@ lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...) return LLVMBuildCall(builder, func_printf, params, argcount + 1, ""); } + + +/** + * Print a float[4] vector. + */ +LLVMValueRef +lp_build_print_vec4(LLVMBuilderRef builder, const char *msg, LLVMValueRef vec) +{ + char format[1000]; + LLVMValueRef x, y, z, w; + + x = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(0), ""); + y = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(1), ""); + z = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(2), ""); + w = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(3), ""); + + util_snprintf(format, sizeof(format), "%s %%f %%f %%f %%f\n", msg); + return lp_build_printf(builder, format, x, y, z, w); +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_printf.h b/src/gallium/auxiliary/gallivm/lp_bld_printf.h index 83bd8f1d557..b6222c62ebe 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_printf.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.h @@ -35,5 +35,9 @@ LLVMValueRef lp_build_const_string_variable(LLVMModuleRef module, const char *str, int len); LLVMValueRef lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...); +LLVMValueRef +lp_build_print_vec4(LLVMBuilderRef builder, const char *msg, LLVMValueRef vec); + + #endif diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c index 7b1088939b9..c18c8b47100 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c @@ -81,11 +81,15 @@ LLVMValueRef lp_build_scalar_ddx(struct lp_build_context *bld, LLVMValueRef a) { - LLVMValueRef idx_left = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_LEFT, 0); - LLVMValueRef idx_right = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_RIGHT, 0); - LLVMValueRef a_left = LLVMBuildExtractElement(bld->builder, a, idx_left, ""); - LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, ""); - return lp_build_sub(bld, a_right, a_left); + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef idx_left = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_LEFT, 0); + LLVMValueRef idx_right = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_RIGHT, 0); + LLVMValueRef a_left = LLVMBuildExtractElement(bld->builder, a, idx_left, "left"); + LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, "right"); + if (bld->type.floating) + return LLVMBuildFSub(bld->builder, a_right, a_left, "ddx"); + else + return LLVMBuildSub(bld->builder, a_right, a_left, "ddx"); } @@ -93,9 +97,13 @@ LLVMValueRef lp_build_scalar_ddy(struct lp_build_context *bld, LLVMValueRef a) { - LLVMValueRef idx_top = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_LEFT, 0); - LLVMValueRef idx_bottom = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_BOTTOM_LEFT, 0); - LLVMValueRef a_top = LLVMBuildExtractElement(bld->builder, a, idx_top, ""); - LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, ""); - return lp_build_sub(bld, a_bottom, a_top); + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef idx_top = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_LEFT, 0); + LLVMValueRef idx_bottom = LLVMConstInt(i32t, LP_BLD_QUAD_BOTTOM_LEFT, 0); + LLVMValueRef a_top = LLVMBuildExtractElement(bld->builder, a, idx_top, "top"); + LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, "bottom"); + if (bld->type.floating) + return LLVMBuildFSub(bld->builder, a_bottom, a_top, "ddy"); + else + return LLVMBuildSub(bld->builder, a_bottom, a_top, "ddy"); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index aee94c1b866..844d1d935b5 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -39,12 +39,19 @@ #include "lp_bld_arit.h" #include "lp_bld_const.h" #include "lp_bld_debug.h" +#include "lp_bld_printf.h" #include "lp_bld_flow.h" #include "lp_bld_sample.h" #include "lp_bld_swizzle.h" #include "lp_bld_type.h" +/* + * Bri-linear factor. Should be greater than one. + */ +#define BRILINEAR_FACTOR 2 + + /** * Does the given texture wrap mode allow sampling the texture border color? * XXX maybe move this into gallium util code. @@ -126,17 +133,32 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, state->wrap_r = sampler->wrap_r; state->min_img_filter = sampler->min_img_filter; state->mag_img_filter = sampler->mag_img_filter; - if (view->last_level) { + + if (view->last_level && sampler->max_lod > 0.0f) { state->min_mip_filter = sampler->min_mip_filter; } else { state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE; } - /* If min_lod == max_lod we can greatly simplify mipmap selection. - * This is a case that occurs during automatic mipmap generation. - */ - if (sampler->min_lod == sampler->max_lod) { - state->min_max_lod_equal = 1; + if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { + if (sampler->lod_bias != 0.0f) { + state->lod_bias_non_zero = 1; + } + + /* If min_lod == max_lod we can greatly simplify mipmap selection. + * This is a case that occurs during automatic mipmap generation. + */ + if (sampler->min_lod == sampler->max_lod) { + state->min_max_lod_equal = 1; + } else { + if (sampler->min_lod > 0.0f) { + state->apply_min_lod = 1; + } + + if (sampler->max_lod < (float)view->last_level) { + state->apply_max_lod = 1; + } + } } state->compare_mode = sampler->compare_mode; @@ -153,6 +175,220 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, /** + * Generate code to compute coordinate gradient (rho). + * \param ddx partial derivatives of (s, t, r, q) with respect to X + * \param ddy partial derivatives of (s, t, r, q) with respect to Y + * + * XXX: The resulting rho is scalar, so we ignore all but the first element of + * derivatives that are passed by the shader. + */ +static LLVMValueRef +lp_build_rho(struct lp_build_sample_context *bld, + const LLVMValueRef ddx[4], + const LLVMValueRef ddy[4]) +{ + struct lp_build_context *float_size_bld = &bld->float_size_bld; + struct lp_build_context *float_bld = &bld->float_bld; + const unsigned dims = bld->dims; + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); + LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0); + LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0); + LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy; + LLVMValueRef rho_x, rho_y; + LLVMValueRef rho_vec; + LLVMValueRef float_size; + LLVMValueRef rho; + + dsdx = ddx[0]; + dsdy = ddy[0]; + + if (dims <= 1) { + rho_x = dsdx; + rho_y = dsdy; + } + else { + rho_x = float_size_bld->undef; + rho_y = float_size_bld->undef; + + rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dsdx, index0, ""); + rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dsdy, index0, ""); + + dtdx = ddx[1]; + dtdy = ddy[1]; + + rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dtdx, index1, ""); + rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dtdy, index1, ""); + + if (dims >= 3) { + drdx = ddx[2]; + drdy = ddy[2]; + + rho_x = LLVMBuildInsertElement(bld->builder, rho_x, drdx, index2, ""); + rho_y = LLVMBuildInsertElement(bld->builder, rho_y, drdy, index2, ""); + } + } + + rho_x = lp_build_abs(float_size_bld, rho_x); + rho_y = lp_build_abs(float_size_bld, rho_y); + + rho_vec = lp_build_max(float_size_bld, rho_x, rho_y); + + float_size = lp_build_int_to_float(float_size_bld, bld->int_size); + + rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size); + + if (dims <= 1) { + rho = rho_vec; + } + else { + if (dims >= 2) { + LLVMValueRef rho_s, rho_t, rho_r; + + rho_s = LLVMBuildExtractElement(bld->builder, rho_vec, index0, ""); + rho_t = LLVMBuildExtractElement(bld->builder, rho_vec, index1, ""); + + rho = lp_build_max(float_bld, rho_s, rho_t); + + if (dims >= 3) { + rho_r = LLVMBuildExtractElement(bld->builder, rho_vec, index0, ""); + rho = lp_build_max(float_bld, rho, rho_r); + } + } + } + + return rho; +} + + +/* + * Bri-linear lod computation + * + * Use a piece-wise linear approximation of log2 such that: + * - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc. + * - linear approximation for values in the neighborhood of 0.5, 1.5., etc, + * with the steepness specified in 'factor' + * - exact result for 0.5, 1.5, etc. + * + * + * 1.0 - /----* + * / + * / + * / + * 0.5 - * + * / + * / + * / + * 0.0 - *----/ + * + * | | + * 2^0 2^1 + * + * This is a technique also commonly used in hardware: + * - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html + * + * TODO: For correctness, this should only be applied when texture is known to + * have regular mipmaps, i.e., mipmaps derived from the base level. + * + * TODO: This could be done in fixed point, where applicable. + */ +static void +lp_build_brilinear_lod(struct lp_build_context *bld, + LLVMValueRef lod, + double factor, + LLVMValueRef *out_lod_ipart, + LLVMValueRef *out_lod_fpart) +{ + LLVMValueRef lod_fpart; + double pre_offset = (factor - 0.5)/factor - 0.5; + double post_offset = 1 - factor; + + if (0) { + lp_build_printf(bld->builder, "lod = %f\n", lod); + } + + lod = lp_build_add(bld, lod, + lp_build_const_vec(bld->type, pre_offset)); + + lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart); + + lod_fpart = lp_build_mul(bld, lod_fpart, + lp_build_const_vec(bld->type, factor)); + + lod_fpart = lp_build_add(bld, lod_fpart, + lp_build_const_vec(bld->type, post_offset)); + + /* + * It's not necessary to clamp lod_fpart since: + * - the above expression will never produce numbers greater than one. + * - the mip filtering branch is only taken if lod_fpart is positive + */ + + *out_lod_fpart = lod_fpart; + + if (0) { + lp_build_printf(bld->builder, "lod_ipart = %i\n", *out_lod_ipart); + lp_build_printf(bld->builder, "lod_fpart = %f\n\n", *out_lod_fpart); + } +} + + +/* + * Combined log2 and brilinear lod computation. + * + * It's in all identical to calling lp_build_fast_log2() and + * lp_build_brilinear_lod() above, but by combining we can compute the interger + * and fractional part independently. + */ +static void +lp_build_brilinear_rho(struct lp_build_context *bld, + LLVMValueRef rho, + double factor, + LLVMValueRef *out_lod_ipart, + LLVMValueRef *out_lod_fpart) +{ + LLVMValueRef lod_ipart; + LLVMValueRef lod_fpart; + + const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor); + const double post_offset = 1 - 2*factor; + + assert(bld->type.floating); + + assert(lp_check_value(bld->type, rho)); + + /* + * The pre factor will make the intersections with the exact powers of two + * happen precisely where we want then to be, which means that the integer + * part will not need any post adjustments. + */ + rho = lp_build_mul(bld, rho, + lp_build_const_vec(bld->type, pre_factor)); + + /* ipart = ifloor(log2(rho)) */ + lod_ipart = lp_build_extract_exponent(bld, rho, 0); + + /* fpart = rho / 2**ipart */ + lod_fpart = lp_build_extract_mantissa(bld, rho); + + lod_fpart = lp_build_mul(bld, lod_fpart, + lp_build_const_vec(bld->type, factor)); + + lod_fpart = lp_build_add(bld, lod_fpart, + lp_build_const_vec(bld->type, post_offset)); + + /* + * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since: + * - the above expression will never produce numbers greater than one. + * - the mip filtering branch is only taken if lod_fpart is positive + */ + + *out_lod_ipart = lod_ipart; + *out_lod_fpart = lod_fpart; +} + + +/** * Generate code to compute texture level of detail (lambda). * \param ddx partial derivatives of (s, t, r, q) with respect to X * \param ddy partial derivatives of (s, t, r, q) with respect to Y @@ -165,85 +401,81 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, * XXX: The resulting lod is scalar, so ignore all but the first element of * derivatives, lod_bias, etc that are passed by the shader. */ -LLVMValueRef +void lp_build_lod_selector(struct lp_build_sample_context *bld, unsigned unit, const LLVMValueRef ddx[4], const LLVMValueRef ddy[4], LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef depth) + unsigned mip_filter, + LLVMValueRef *out_lod_ipart, + LLVMValueRef *out_lod_fpart) { - LLVMValueRef min_lod = - bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit); + struct lp_build_context *float_bld = &bld->float_bld; + LLVMValueRef lod; + + *out_lod_ipart = bld->int_bld.zero; + *out_lod_fpart = bld->float_bld.zero; if (bld->static_state->min_max_lod_equal) { /* User is forcing sampling from a particular mipmap level. * This is hit during mipmap generation. */ - return min_lod; + LLVMValueRef min_lod = + bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit); + + lod = min_lod; } else { - struct lp_build_context *float_bld = &bld->float_bld; LLVMValueRef sampler_lod_bias = bld->dynamic_state->lod_bias(bld->dynamic_state, bld->builder, unit); - LLVMValueRef max_lod = - bld->dynamic_state->max_lod(bld->dynamic_state, bld->builder, unit); LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0); - LLVMValueRef lod; if (explicit_lod) { lod = LLVMBuildExtractElement(bld->builder, explicit_lod, index0, ""); } else { - const int dims = texture_dims(bld->static_state->target); - LLVMValueRef dsdx, dsdy; - LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL; LLVMValueRef rho; - dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx"); - dsdx = lp_build_abs(float_bld, dsdx); - dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy"); - dsdy = lp_build_abs(float_bld, dsdy); - if (dims > 1) { - dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx"); - dtdx = lp_build_abs(float_bld, dtdx); - dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy"); - dtdy = lp_build_abs(float_bld, dtdy); - if (dims > 2) { - drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx"); - drdx = lp_build_abs(float_bld, drdx); - drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy"); - drdy = lp_build_abs(float_bld, drdy); - } - } + rho = lp_build_rho(bld, ddx, ddy); - /* Compute rho = max of all partial derivatives scaled by texture size. - * XXX this could be vectorized somewhat + /* + * Compute lod = log2(rho) */ - rho = LLVMBuildFMul(bld->builder, - lp_build_max(float_bld, dsdx, dsdy), - lp_build_int_to_float(float_bld, width), ""); - if (dims > 1) { - LLVMValueRef max; - max = LLVMBuildFMul(bld->builder, - lp_build_max(float_bld, dtdx, dtdy), - lp_build_int_to_float(float_bld, height), ""); - rho = lp_build_max(float_bld, rho, max); - if (dims > 2) { - max = LLVMBuildFMul(bld->builder, - lp_build_max(float_bld, drdx, drdy), - lp_build_int_to_float(float_bld, depth), ""); - rho = lp_build_max(float_bld, rho, max); + + if (!lod_bias && + !bld->static_state->lod_bias_non_zero && + !bld->static_state->apply_max_lod && + !bld->static_state->apply_min_lod) { + /* + * Special case when there are no post-log2 adjustments, which + * saves instructions but keeping the integer and fractional lod + * computations separate from the start. + */ + + if (mip_filter == PIPE_TEX_MIPFILTER_NONE || + mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { + *out_lod_ipart = lp_build_ilog2(float_bld, rho); + *out_lod_fpart = bld->float_bld.zero; + return; + } + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR && + !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) { + lp_build_brilinear_rho(float_bld, rho, BRILINEAR_FACTOR, + out_lod_ipart, out_lod_fpart); + return; } } - /* compute lod = log2(rho) */ - lod = lp_build_log2(float_bld, rho); + if (0) { + lod = lp_build_log2(float_bld, rho); + } + else { + lod = lp_build_fast_log2(float_bld, rho); + } /* add shader lod bias */ if (lod_bias) { @@ -254,13 +486,43 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, } /* add sampler lod bias */ - lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias"); + if (bld->static_state->lod_bias_non_zero) + lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias"); + /* clamp lod */ - lod = lp_build_clamp(float_bld, lod, min_lod, max_lod); + if (bld->static_state->apply_max_lod) { + LLVMValueRef max_lod = + bld->dynamic_state->max_lod(bld->dynamic_state, bld->builder, unit); + + lod = lp_build_min(float_bld, lod, max_lod); + } + if (bld->static_state->apply_min_lod) { + LLVMValueRef min_lod = + bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit); + + lod = lp_build_max(float_bld, lod, min_lod); + } + } - return lod; + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) { + lp_build_brilinear_lod(float_bld, lod, BRILINEAR_FACTOR, + out_lod_ipart, out_lod_fpart); + } + else { + lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, out_lod_fpart); + } + + lp_build_name(*out_lod_fpart, "lod_fpart"); + } + else { + *out_lod_ipart = lp_build_iround(float_bld, lod); } + + lp_build_name(*out_lod_ipart, "lod_ipart"); + + return; } @@ -274,10 +536,9 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, void lp_build_nearest_mip_level(struct lp_build_sample_context *bld, unsigned unit, - LLVMValueRef lod, + LLVMValueRef lod_ipart, LLVMValueRef *level_out) { - struct lp_build_context *float_bld = &bld->float_bld; struct lp_build_context *int_bld = &bld->int_bld; LLVMValueRef last_level, level; @@ -287,7 +548,7 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld, bld->builder, unit); /* convert float lod to integer */ - level = lp_build_iround(float_bld, lod); + level = lod_ipart; /* clamp level to legal range of levels */ *level_out = lp_build_clamp(int_bld, level, zero, last_level); @@ -302,32 +563,61 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld, void lp_build_linear_mip_levels(struct lp_build_sample_context *bld, unsigned unit, - LLVMValueRef lod, + LLVMValueRef lod_ipart, + LLVMValueRef *lod_fpart_inout, LLVMValueRef *level0_out, - LLVMValueRef *level1_out, - LLVMValueRef *weight_out) + LLVMValueRef *level1_out) { - struct lp_build_context *float_bld = &bld->float_bld; + LLVMBuilderRef builder = bld->builder; struct lp_build_context *int_bld = &bld->int_bld; - LLVMValueRef last_level, level; + struct lp_build_context *float_bld = &bld->float_bld; + LLVMValueRef last_level; + LLVMValueRef clamp_min; + LLVMValueRef clamp_max; + + *level0_out = lod_ipart; + *level1_out = lp_build_add(int_bld, lod_ipart, int_bld->one); last_level = bld->dynamic_state->last_level(bld->dynamic_state, bld->builder, unit); - /* convert float lod to integer */ - level = lp_build_ifloor(float_bld, lod); - - /* compute level 0 and clamp to legal range of levels */ - *level0_out = lp_build_clamp(int_bld, level, - int_bld->zero, - last_level); - /* compute level 1 and clamp to legal range of levels */ - level = lp_build_add(int_bld, level, int_bld->one); - *level1_out = lp_build_clamp(int_bld, level, - int_bld->zero, - last_level); - - *weight_out = lp_build_fract(float_bld, lod); + /* + * Clamp both lod_ipart and lod_ipart + 1 to [0, last_level], with the + * minimum number of comparisons, and zeroing lod_fpart in the extreme + * ends in the process. + */ + + /* lod_ipart < 0 */ + clamp_min = LLVMBuildICmp(builder, LLVMIntSLT, + lod_ipart, int_bld->zero, + "clamp_lod_to_zero"); + + *level0_out = LLVMBuildSelect(builder, clamp_min, + int_bld->zero, *level0_out, ""); + + *level1_out = LLVMBuildSelect(builder, clamp_min, + int_bld->zero, *level1_out, ""); + + *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min, + float_bld->zero, *lod_fpart_inout, ""); + + /* lod_ipart >= last_level */ + clamp_max = LLVMBuildICmp(builder, LLVMIntSGE, + lod_ipart, last_level, + "clamp_lod_to_last"); + + *level0_out = LLVMBuildSelect(builder, clamp_max, + last_level, *level0_out, ""); + + *level1_out = LLVMBuildSelect(builder, clamp_max, + last_level, *level1_out, ""); + + *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max, + float_bld->zero, *lod_fpart_inout, ""); + + lp_build_name(*level0_out, "sampler%u_miplevel0", unit); + lp_build_name(*level1_out, "sampler%u_miplevel1", unit); + lp_build_name(*lod_fpart_inout, "sampler%u_mipweight", unit); } @@ -338,12 +628,12 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld, */ LLVMValueRef lp_build_get_mipmap_level(struct lp_build_sample_context *bld, - LLVMValueRef data_array, LLVMValueRef level) + LLVMValueRef level) { LLVMValueRef indexes[2], data_ptr; indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); indexes[1] = level; - data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, ""); + data_ptr = LLVMBuildGEP(bld->builder, bld->data_array, indexes, 2, ""); data_ptr = LLVMBuildLoad(bld->builder, data_ptr, ""); return data_ptr; } @@ -351,10 +641,10 @@ lp_build_get_mipmap_level(struct lp_build_sample_context *bld, LLVMValueRef lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld, - LLVMValueRef data_array, int level) + int level) { LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0); - return lp_build_get_mipmap_level(bld, data_array, lvl); + return lp_build_get_mipmap_level(bld, lvl); } @@ -363,18 +653,22 @@ lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld, * Return max(1, base_size >> level); */ static LLVMValueRef -lp_build_minify(struct lp_build_sample_context *bld, +lp_build_minify(struct lp_build_context *bld, LLVMValueRef base_size, LLVMValueRef level) { - if (level == bld->int_coord_bld.zero) { + assert(lp_check_value(bld->type, base_size)); + assert(lp_check_value(bld->type, level)); + + if (level == bld->zero) { /* if we're using mipmap level zero, no minification is needed */ return base_size; } else { LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify"); - size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one); + assert(bld->type.sign); + size = lp_build_max(bld, size, bld->one); return size; } } @@ -405,71 +699,113 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld, */ void lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, - unsigned dims, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, - LLVMValueRef ilevel0, - LLVMValueRef ilevel1, - LLVMValueRef row_stride_array, - LLVMValueRef img_stride_array, - LLVMValueRef *width0_vec, - LLVMValueRef *width1_vec, - LLVMValueRef *height0_vec, - LLVMValueRef *height1_vec, - LLVMValueRef *depth0_vec, - LLVMValueRef *depth1_vec, - LLVMValueRef *row_stride0_vec, - LLVMValueRef *row_stride1_vec, - LLVMValueRef *img_stride0_vec, - LLVMValueRef *img_stride1_vec) + LLVMValueRef ilevel, + LLVMValueRef *out_size, + LLVMValueRef *row_stride_vec, + LLVMValueRef *img_stride_vec) { - const unsigned mip_filter = bld->static_state->min_mip_filter; - LLVMValueRef ilevel0_vec, ilevel1_vec; + const unsigned dims = bld->dims; + LLVMValueRef ilevel_vec; - ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0); - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) - ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1); + ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel); /* - * Compute width, height, depth at mipmap level 'ilevel0' + * Compute width, height, depth at mipmap level 'ilevel' */ - *width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec); + *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec); + if (dims >= 2) { - *height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec); - *row_stride0_vec = lp_build_get_level_stride_vec(bld, - row_stride_array, - ilevel0); + *row_stride_vec = lp_build_get_level_stride_vec(bld, + bld->row_stride_array, + ilevel); if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { - *img_stride0_vec = lp_build_get_level_stride_vec(bld, - img_stride_array, - ilevel0); - if (dims == 3) { - *depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec); - } + *img_stride_vec = lp_build_get_level_stride_vec(bld, + bld->img_stride_array, + ilevel); } } - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* compute width, height, depth for second mipmap level at 'ilevel1' */ - *width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec); - if (dims >= 2) { - *height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec); - *row_stride1_vec = lp_build_get_level_stride_vec(bld, - row_stride_array, - ilevel1); - if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { - *img_stride1_vec = lp_build_get_level_stride_vec(bld, - img_stride_array, - ilevel1); - if (dims == 3) { - *depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec); - } - } +} + + +/** + * Extract and broadcast texture size. + * + * @param size_type type of the texture size vector (either + * bld->int_size_type or bld->float_size_type) + * @param coord_type type of the texture size vector (either + * bld->int_coord_type or bld->coord_type) + * @param int_size vector with the integer texture size (width, height, + * depth) + */ +void +lp_build_extract_image_sizes(struct lp_build_sample_context *bld, + struct lp_type size_type, + struct lp_type coord_type, + LLVMValueRef size, + LLVMValueRef *out_width, + LLVMValueRef *out_height, + LLVMValueRef *out_depth) +{ + const unsigned dims = bld->dims; + LLVMTypeRef i32t = LLVMInt32Type(); + + *out_width = lp_build_extract_broadcast(bld->builder, + size_type, + coord_type, + size, + LLVMConstInt(i32t, 0, 0)); + if (dims >= 2) { + *out_height = lp_build_extract_broadcast(bld->builder, + size_type, + coord_type, + size, + LLVMConstInt(i32t, 1, 0)); + if (dims == 3) { + *out_depth = lp_build_extract_broadcast(bld->builder, + size_type, + coord_type, + size, + LLVMConstInt(i32t, 2, 0)); } } } +/** + * Unnormalize coords. + * + * @param int_size vector with the integer texture size (width, height, depth) + */ +void +lp_build_unnormalized_coords(struct lp_build_sample_context *bld, + LLVMValueRef flt_size, + LLVMValueRef *s, + LLVMValueRef *t, + LLVMValueRef *r) +{ + const unsigned dims = bld->dims; + LLVMValueRef width; + LLVMValueRef height; + LLVMValueRef depth; + + lp_build_extract_image_sizes(bld, + bld->float_size_type, + bld->coord_type, + flt_size, + &width, + &height, + &depth); + + /* s = s * width, t = t * height */ + *s = lp_build_mul(&bld->coord_bld, *s, width); + if (dims >= 2) { + *t = lp_build_mul(&bld->coord_bld, *t, height); + if (dims >= 3) { + *r = lp_build_mul(&bld->coord_bld, *r, depth); + } + } +} + /** Helper used by lp_build_cube_lookup() */ static LLVMValueRef @@ -588,25 +924,16 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, ""); { - struct lp_build_flow_context *flow_ctx; struct lp_build_if_state if_ctx; + LLVMValueRef face_s_var; + LLVMValueRef face_t_var; + LLVMValueRef face_var; - flow_ctx = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx); - - *face_s = bld->coord_bld.undef; - *face_t = bld->coord_bld.undef; - *face = bld->int_bld.undef; + face_s_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, "face_s_var"); + face_t_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, "face_t_var"); + face_var = lp_build_alloca(bld->builder, bld->int_bld.vec_type, "face_var"); - lp_build_name(*face_s, "face_s"); - lp_build_name(*face_t, "face_t"); - lp_build_name(*face, "face"); - - lp_build_flow_scope_declare(flow_ctx, face_s); - lp_build_flow_scope_declare(flow_ctx, face_t); - lp_build_flow_scope_declare(flow_ctx, face); - - lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz); + lp_build_if(&if_ctx, bld->builder, arx_ge_ary_arz); { /* +/- X face */ LLVMValueRef sign = lp_build_sgn(float_bld, rx); @@ -616,57 +943,52 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, *face = lp_build_cube_face(bld, rx, PIPE_TEX_FACE_POS_X, PIPE_TEX_FACE_NEG_X); + LLVMBuildStore(bld->builder, *face_s, face_s_var); + LLVMBuildStore(bld->builder, *face_t, face_t_var); + LLVMBuildStore(bld->builder, *face, face_var); } lp_build_else(&if_ctx); { - struct lp_build_flow_context *flow_ctx2; struct lp_build_if_state if_ctx2; - LLVMValueRef face_s2 = bld->coord_bld.undef; - LLVMValueRef face_t2 = bld->coord_bld.undef; - LLVMValueRef face2 = bld->int_bld.undef; - - flow_ctx2 = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx2); - lp_build_flow_scope_declare(flow_ctx2, &face_s2); - lp_build_flow_scope_declare(flow_ctx2, &face_t2); - lp_build_flow_scope_declare(flow_ctx2, &face2); - ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, ""); - lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz); + lp_build_if(&if_ctx2, bld->builder, ary_ge_arx_arz); { /* +/- Y face */ LLVMValueRef sign = lp_build_sgn(float_bld, ry); LLVMValueRef ima = lp_build_cube_ima(coord_bld, t); - face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima); - face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima); - face2 = lp_build_cube_face(bld, ry, + *face_s = lp_build_cube_coord(coord_bld, NULL, -1, s, ima); + *face_t = lp_build_cube_coord(coord_bld, sign, -1, r, ima); + *face = lp_build_cube_face(bld, ry, PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y); + LLVMBuildStore(bld->builder, *face_s, face_s_var); + LLVMBuildStore(bld->builder, *face_t, face_t_var); + LLVMBuildStore(bld->builder, *face, face_var); } lp_build_else(&if_ctx2); { /* +/- Z face */ LLVMValueRef sign = lp_build_sgn(float_bld, rz); LLVMValueRef ima = lp_build_cube_ima(coord_bld, r); - face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima); - face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); - face2 = lp_build_cube_face(bld, rz, + *face_s = lp_build_cube_coord(coord_bld, sign, -1, s, ima); + *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); + *face = lp_build_cube_face(bld, rz, PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z); + LLVMBuildStore(bld->builder, *face_s, face_s_var); + LLVMBuildStore(bld->builder, *face_t, face_t_var); + LLVMBuildStore(bld->builder, *face, face_var); } lp_build_endif(&if_ctx2); - lp_build_flow_scope_end(flow_ctx2); - lp_build_flow_destroy(flow_ctx2); - *face_s = face_s2; - *face_t = face_t2; - *face = face2; } lp_build_endif(&if_ctx); - lp_build_flow_scope_end(flow_ctx); - lp_build_flow_destroy(flow_ctx); + + *face_s = LLVMBuildLoad(bld->builder, face_s_var, "face_s"); + *face_t = LLVMBuildLoad(bld->builder, face_t_var, "face_t"); + *face = LLVMBuildLoad(bld->builder, face_var, "face"); } } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index 4d2eeaa5eb4..ffed27cee83 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -83,6 +83,9 @@ struct lp_sampler_static_state unsigned compare_func:3; unsigned normalized_coords:1; unsigned min_max_lod_equal:1; /**< min_lod == max_lod ? */ + unsigned lod_bias_non_zero:1; + unsigned apply_min_lod:1; /**< min_lod > 0 ? */ + unsigned apply_max_lod:1; /**< max_lod < last_level ? */ }; @@ -176,6 +179,9 @@ struct lp_build_sample_context const struct util_format_description *format_desc; + /* See texture_dims() */ + unsigned dims; + /** regular scalar float type */ struct lp_type float_type; struct lp_build_context float_bld; @@ -191,17 +197,32 @@ struct lp_build_sample_context struct lp_type coord_type; struct lp_build_context coord_bld; - /** Unsigned integer coordinates */ - struct lp_type uint_coord_type; - struct lp_build_context uint_coord_bld; - /** Signed integer coordinates */ struct lp_type int_coord_type; struct lp_build_context int_coord_bld; + /** Unsigned integer texture size */ + struct lp_type int_size_type; + struct lp_build_context int_size_bld; + + /** Unsigned integer texture size */ + struct lp_type float_size_type; + struct lp_build_context float_size_bld; + /** Output texels type and build context */ struct lp_type texel_type; struct lp_build_context texel_bld; + + /* Common dynamic state values */ + LLVMValueRef width; + LLVMValueRef height; + LLVMValueRef depth; + LLVMValueRef row_stride_array; + LLVMValueRef img_stride_array; + LLVMValueRef data_array; + + /** Integer vector with texture width, height, depth */ + LLVMValueRef int_size; }; @@ -238,7 +259,7 @@ apply_sampler_swizzle(struct lp_build_sample_context *bld, } -static INLINE int +static INLINE unsigned texture_dims(enum pipe_texture_target tex) { switch (tex) { @@ -271,16 +292,16 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, const struct pipe_sampler_state *sampler); -LLVMValueRef +void lp_build_lod_selector(struct lp_build_sample_context *bld, unsigned unit, const LLVMValueRef ddx[4], const LLVMValueRef ddy[4], LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef depth); + unsigned mip_filter, + LLVMValueRef *out_lod_ipart, + LLVMValueRef *out_lod_fpart); void lp_build_nearest_mip_level(struct lp_build_sample_context *bld, @@ -291,40 +312,44 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld, void lp_build_linear_mip_levels(struct lp_build_sample_context *bld, unsigned unit, - LLVMValueRef lod, + LLVMValueRef lod_ipart, + LLVMValueRef *lod_fpart_inout, LLVMValueRef *level0_out, - LLVMValueRef *level1_out, - LLVMValueRef *weight_out); + LLVMValueRef *level1_out); LLVMValueRef lp_build_get_mipmap_level(struct lp_build_sample_context *bld, - LLVMValueRef data_array, LLVMValueRef level); + LLVMValueRef level); LLVMValueRef lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld, - LLVMValueRef data_array, int level); + int level); void lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, - unsigned dims, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, - LLVMValueRef ilevel0, - LLVMValueRef ilevel1, - LLVMValueRef row_stride_array, - LLVMValueRef img_stride_array, - LLVMValueRef *width0_vec, - LLVMValueRef *width1_vec, - LLVMValueRef *height0_vec, - LLVMValueRef *height1_vec, - LLVMValueRef *depth0_vec, - LLVMValueRef *depth1_vec, - LLVMValueRef *row_stride0_vec, - LLVMValueRef *row_stride1_vec, - LLVMValueRef *img_stride0_vec, - LLVMValueRef *img_stride1_vec); + LLVMValueRef ilevel, + LLVMValueRef *out_size_vec, + LLVMValueRef *row_stride_vec, + LLVMValueRef *img_stride_vec); + + +void +lp_build_extract_image_sizes(struct lp_build_sample_context *bld, + struct lp_type size_type, + struct lp_type coord_type, + LLVMValueRef size, + LLVMValueRef *out_width, + LLVMValueRef *out_height, + LLVMValueRef *out_depth); + + +void +lp_build_unnormalized_coords(struct lp_build_sample_context *bld, + LLVMValueRef flt_size, + LLVMValueRef *s, + LLVMValueRef *t, + LLVMValueRef *r); void diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c index 49a6eed615f..d6831a580b3 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c @@ -45,6 +45,7 @@ #include "lp_bld_const.h" #include "lp_bld_conv.h" #include "lp_bld_arit.h" +#include "lp_bld_bitarit.h" #include "lp_bld_logic.h" #include "lp_bld_swizzle.h" #include "lp_bld_pack.h" @@ -80,11 +81,10 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, LLVMValueRef *out_offset, LLVMValueRef *out_i) { - struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; LLVMValueRef length_minus_one; - length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); + length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); switch(wrap_mode) { case PIPE_TEX_WRAP_REPEAT: @@ -92,7 +92,7 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, ""); else { /* Add a bias to the texcoord to handle negative coords */ - LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024); + LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); coord = LLVMBuildAdd(bld->builder, coord, bias, ""); coord = LLVMBuildURem(bld->builder, coord, length, ""); } @@ -113,7 +113,7 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, assert(0); } - lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride, + lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride, out_offset, out_i); } @@ -146,7 +146,6 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, LLVMValueRef *i0, LLVMValueRef *i1) { - struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; LLVMValueRef length_minus_one; LLVMValueRef lmask, umask, mask; @@ -188,8 +187,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, * multiplication. */ - *i0 = uint_coord_bld->zero; - *i1 = uint_coord_bld->zero; + *i0 = int_coord_bld->zero; + *i1 = int_coord_bld->zero; length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); @@ -200,7 +199,7 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, } else { /* Add a bias to the texcoord to handle negative coords */ - LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024); + LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); coord0 = LLVMBuildAdd(bld->builder, coord0, bias, ""); coord0 = LLVMBuildURem(bld->builder, coord0, length, ""); } @@ -208,9 +207,9 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, mask = lp_build_compare(bld->builder, int_coord_bld->type, PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); - *offset0 = lp_build_mul(uint_coord_bld, coord0, stride); + *offset0 = lp_build_mul(int_coord_bld, coord0, stride); *offset1 = LLVMBuildAnd(bld->builder, - lp_build_add(uint_coord_bld, *offset0, stride), + lp_build_add(int_coord_bld, *offset0, stride), mask, ""); break; @@ -225,8 +224,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, mask = LLVMBuildAnd(bld->builder, lmask, umask, ""); - *offset0 = lp_build_mul(uint_coord_bld, coord0, stride); - *offset1 = lp_build_add(uint_coord_bld, + *offset0 = lp_build_mul(int_coord_bld, coord0, stride); + *offset1 = lp_build_add(int_coord_bld, *offset0, LLVMBuildAnd(bld->builder, stride, mask, "")); break; @@ -239,8 +238,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: default: assert(0); - *offset0 = uint_coord_bld->zero; - *offset1 = uint_coord_bld->zero; + *offset0 = int_coord_bld->zero; + *offset1 = int_coord_bld->zero; break; } } @@ -253,9 +252,7 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, */ static void lp_build_sample_image_nearest(struct lp_build_sample_context *bld, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, + LLVMValueRef int_size, LLVMValueRef row_stride_vec, LLVMValueRef img_stride_vec, LLVMValueRef data_ptr, @@ -265,12 +262,13 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, LLVMValueRef *colors_lo, LLVMValueRef *colors_hi) { - const int dims = texture_dims(bld->static_state->target); + const unsigned dims = bld->dims; LLVMBuilderRef builder = bld->builder; struct lp_build_context i32, h16, u8n; LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; LLVMValueRef i32_c8; - LLVMValueRef s_ipart, t_ipart, r_ipart; + LLVMValueRef width_vec, height_vec, depth_vec; + LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL; LLVMValueRef x_stride; LLVMValueRef x_offset, offset; LLVMValueRef x_subcoord, y_subcoord, z_subcoord; @@ -283,30 +281,33 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, h16_vec_type = lp_build_vec_type(h16.type); u8n_vec_type = lp_build_vec_type(u8n.type); + lp_build_extract_image_sizes(bld, + bld->int_size_type, + bld->int_coord_type, + int_size, + &width_vec, + &height_vec, + &depth_vec); + if (bld->static_state->normalized_coords) { - /* s = s * width, t = t * height */ - LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type); - LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec, - coord_vec_type, ""); - s = lp_build_mul(&bld->coord_bld, s, fp_width); - if (dims >= 2) { - LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec, - coord_vec_type, ""); - t = lp_build_mul(&bld->coord_bld, t, fp_height); - if (dims >= 3) { - LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec, - coord_vec_type, ""); - r = lp_build_mul(&bld->coord_bld, r, fp_depth); - } - } - } + LLVMValueRef scaled_size; + LLVMValueRef flt_size; - /* scale coords by 256 (8 fractional bits) */ - s = lp_build_mul_imm(&bld->coord_bld, s, 256); - if (dims >= 2) - t = lp_build_mul_imm(&bld->coord_bld, t, 256); - if (dims >= 3) - r = lp_build_mul_imm(&bld->coord_bld, r, 256); + /* scale size by 256 (8 fractional bits) */ + scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8); + + flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size); + + lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r); + } + else { + /* scale coords by 256 (8 fractional bits) */ + s = lp_build_mul_imm(&bld->coord_bld, s, 256); + if (dims >= 2) + t = lp_build_mul_imm(&bld->coord_bld, t, 256); + if (dims >= 3) + r = lp_build_mul_imm(&bld->coord_bld, r, 256); + } /* convert float to int */ s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); @@ -324,7 +325,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, r_ipart = LLVMBuildAShr(builder, r, i32_c8, ""); /* get pixel, row, image strides */ - x_stride = lp_build_const_vec(bld->uint_coord_bld.type, + x_stride = lp_build_const_vec(bld->int_coord_bld.type, bld->format_desc->block.bits/8); /* Do texcoord wrapping, compute texel offset */ @@ -343,7 +344,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, bld->static_state->pot_height, bld->static_state->wrap_t, &y_offset, &y_subcoord); - offset = lp_build_add(&bld->uint_coord_bld, offset, y_offset); + offset = lp_build_add(&bld->int_coord_bld, offset, y_offset); if (dims >= 3) { LLVMValueRef z_offset; lp_build_sample_wrap_nearest_int(bld, @@ -352,13 +353,13 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, bld->static_state->pot_height, bld->static_state->wrap_r, &z_offset, &z_subcoord); - offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset); + offset = lp_build_add(&bld->int_coord_bld, offset, z_offset); } else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { LLVMValueRef z_offset; /* The r coord is the cube face in [0,5] */ - z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec); - offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset); + z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); + offset = lp_build_add(&bld->int_coord_bld, offset, z_offset); } } @@ -417,9 +418,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, */ static void lp_build_sample_image_linear(struct lp_build_sample_context *bld, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, + LLVMValueRef int_size, LLVMValueRef row_stride_vec, LLVMValueRef img_stride_vec, LLVMValueRef data_ptr, @@ -429,14 +428,15 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, LLVMValueRef *colors_lo, LLVMValueRef *colors_hi) { - const int dims = texture_dims(bld->static_state->target); + const unsigned dims = bld->dims; LLVMBuilderRef builder = bld->builder; struct lp_build_context i32, h16, u8n; LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; LLVMValueRef i32_c8, i32_c128, i32_c255; + LLVMValueRef width_vec, height_vec, depth_vec; LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; - LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi; - LLVMValueRef r_ipart, r_fpart, r_fpart_lo, r_fpart_hi; + LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_fpart_lo = NULL, t_fpart_hi = NULL; + LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_fpart_lo = NULL, r_fpart_hi = NULL; LLVMValueRef x_stride, y_stride, z_stride; LLVMValueRef x_offset0, x_offset1; LLVMValueRef y_offset0, y_offset1; @@ -458,30 +458,33 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, h16_vec_type = lp_build_vec_type(h16.type); u8n_vec_type = lp_build_vec_type(u8n.type); + lp_build_extract_image_sizes(bld, + bld->int_size_type, + bld->int_coord_type, + int_size, + &width_vec, + &height_vec, + &depth_vec); + if (bld->static_state->normalized_coords) { - /* s = s * width, t = t * height */ - LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type); - LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec, - coord_vec_type, ""); - s = lp_build_mul(&bld->coord_bld, s, fp_width); - if (dims >= 2) { - LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec, - coord_vec_type, ""); - t = lp_build_mul(&bld->coord_bld, t, fp_height); - } - if (dims >= 3) { - LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec, - coord_vec_type, ""); - r = lp_build_mul(&bld->coord_bld, r, fp_depth); - } - } + LLVMValueRef scaled_size; + LLVMValueRef flt_size; - /* scale coords by 256 (8 fractional bits) */ - s = lp_build_mul_imm(&bld->coord_bld, s, 256); - if (dims >= 2) - t = lp_build_mul_imm(&bld->coord_bld, t, 256); - if (dims >= 3) - r = lp_build_mul_imm(&bld->coord_bld, r, 256); + /* scale size by 256 (8 fractional bits) */ + scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8); + + flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size); + + lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r); + } + else { + /* scale coords by 256 (8 fractional bits) */ + s = lp_build_mul_imm(&bld->coord_bld, s, 256); + if (dims >= 2) + t = lp_build_mul_imm(&bld->coord_bld, t, 256); + if (dims >= 3) + r = lp_build_mul_imm(&bld->coord_bld, r, 256); + } /* convert float to int */ s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); @@ -517,7 +520,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, r_fpart = LLVMBuildAnd(builder, r, i32_c255, ""); /* get pixel, row and image strides */ - x_stride = lp_build_const_vec(bld->uint_coord_bld.type, + x_stride = lp_build_const_vec(bld->int_coord_bld.type, bld->format_desc->block.bits/8); y_stride = row_stride_vec; z_stride = img_stride_vec; @@ -548,9 +551,9 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, for (z = 0; z < 2; z++) { for (x = 0; x < 2; x++) { - offset[z][0][x] = lp_build_add(&bld->uint_coord_bld, + offset[z][0][x] = lp_build_add(&bld->int_coord_bld, offset[z][0][x], y_offset0); - offset[z][1][x] = lp_build_add(&bld->uint_coord_bld, + offset[z][1][x] = lp_build_add(&bld->int_coord_bld, offset[z][1][x], y_offset1); } } @@ -566,20 +569,20 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, &z_subcoord[0], &z_subcoord[1]); for (y = 0; y < 2; y++) { for (x = 0; x < 2; x++) { - offset[0][y][x] = lp_build_add(&bld->uint_coord_bld, + offset[0][y][x] = lp_build_add(&bld->int_coord_bld, offset[0][y][x], z_offset0); - offset[1][y][x] = lp_build_add(&bld->uint_coord_bld, + offset[1][y][x] = lp_build_add(&bld->int_coord_bld, offset[1][y][x], z_offset1); } } } else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { LLVMValueRef z_offset; - z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec); + z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); for (y = 0; y < 2; y++) { for (x = 0; x < 2; x++) { /* The r coord is the cube face in [0,5] */ - offset[0][y][x] = lp_build_add(&bld->uint_coord_bld, + offset[0][y][x] = lp_build_add(&bld->int_coord_bld, offset[0][y][x], z_offset); } } @@ -781,76 +784,124 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, + LLVMValueRef ilevel0, + LLVMValueRef ilevel1, LLVMValueRef lod_fpart, - LLVMValueRef width0_vec, - LLVMValueRef width1_vec, - LLVMValueRef height0_vec, - LLVMValueRef height1_vec, - LLVMValueRef depth0_vec, - LLVMValueRef depth1_vec, - LLVMValueRef row_stride0_vec, - LLVMValueRef row_stride1_vec, - LLVMValueRef img_stride0_vec, - LLVMValueRef img_stride1_vec, - LLVMValueRef data_ptr0, - LLVMValueRef data_ptr1, - LLVMValueRef *colors_lo, - LLVMValueRef *colors_hi) + LLVMValueRef colors_lo_var, + LLVMValueRef colors_hi_var) { + LLVMBuilderRef builder = bld->builder; + LLVMValueRef size0; + LLVMValueRef size1; + LLVMValueRef row_stride0_vec; + LLVMValueRef row_stride1_vec; + LLVMValueRef img_stride0_vec; + LLVMValueRef img_stride1_vec; + LLVMValueRef data_ptr0; + LLVMValueRef data_ptr1; LLVMValueRef colors0_lo, colors0_hi; LLVMValueRef colors1_lo, colors1_hi; + + /* sample the first mipmap level */ + lp_build_mipmap_level_sizes(bld, ilevel0, + &size0, + &row_stride0_vec, &img_stride0_vec); + data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); if (img_filter == PIPE_TEX_FILTER_NEAREST) { - /* sample the first mipmap level */ lp_build_sample_image_nearest(bld, - width0_vec, height0_vec, depth0_vec, + size0, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, &colors0_lo, &colors0_hi); - - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* sample the second mipmap level */ - lp_build_sample_image_nearest(bld, - width1_vec, height1_vec, depth1_vec, - row_stride1_vec, img_stride1_vec, - data_ptr1, s, t, r, - &colors1_lo, &colors1_hi); - } } else { assert(img_filter == PIPE_TEX_FILTER_LINEAR); - - /* sample the first mipmap level */ lp_build_sample_image_linear(bld, - width0_vec, height0_vec, depth0_vec, + size0, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, &colors0_lo, &colors0_hi); - - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* sample the second mipmap level */ - lp_build_sample_image_linear(bld, - width1_vec, height1_vec, depth1_vec, - row_stride1_vec, img_stride1_vec, - data_ptr1, s, t, r, - &colors1_lo, &colors1_hi); - } } + /* Store the first level's colors in the output variables */ + LLVMBuildStore(builder, colors0_lo, colors_lo_var); + LLVMBuildStore(builder, colors0_hi, colors_hi_var); + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* interpolate samples from the two mipmap levels */ - struct lp_build_context h16; - lp_build_context_init(&h16, bld->builder, lp_type_ufixed(16)); - - *colors_lo = lp_build_lerp(&h16, lod_fpart, - colors0_lo, colors1_lo); - *colors_hi = lp_build_lerp(&h16, lod_fpart, - colors0_hi, colors1_hi); - } - else { - /* use first/only level's colors */ - *colors_lo = colors0_lo; - *colors_hi = colors0_hi; + LLVMValueRef h16_scale = LLVMConstReal(LLVMFloatType(), 256.0); + LLVMTypeRef i32_type = LLVMIntType(32); + struct lp_build_if_state if_ctx; + LLVMValueRef need_lerp; + + lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16_scale, ""); + lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "lod_fpart.fixed16"); + + /* need_lerp = lod_fpart > 0 */ + need_lerp = LLVMBuildICmp(builder, LLVMIntSGT, + lod_fpart, LLVMConstNull(i32_type), + "need_lerp"); + + lp_build_if(&if_ctx, builder, need_lerp); + { + struct lp_build_context h16_bld; + + lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16)); + + /* sample the second mipmap level */ + lp_build_mipmap_level_sizes(bld, ilevel1, + &size1, + &row_stride1_vec, &img_stride1_vec); + data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); + if (img_filter == PIPE_TEX_FILTER_NEAREST) { + lp_build_sample_image_nearest(bld, + size1, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, + &colors1_lo, &colors1_hi); + } + else { + lp_build_sample_image_linear(bld, + size1, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, + &colors1_lo, &colors1_hi); + } + + /* interpolate samples from the two mipmap levels */ + + lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, ""); + lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart); + +#if HAVE_LLVM == 0x208 + /* This is a work-around for a bug in LLVM 2.8. + * Evidently, something goes wrong in the construction of the + * lod_fpart short[8] vector. Adding this no-effect shuffle seems + * to force the vector to be properly constructed. + * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f). + */ + { + LLVMValueRef shuffles[8], shuffle; + int i; + assert(h16_bld.type.length <= Elements(shuffles)); + for (i = 0; i < h16_bld.type.length; i++) + shuffles[i] = lp_build_const_int32(2 * (i & 1)); + shuffle = LLVMConstVector(shuffles, h16_bld.type.length); + lod_fpart = LLVMBuildShuffleVector(builder, + lod_fpart, lod_fpart, + shuffle, ""); + } +#endif + + colors0_lo = lp_build_lerp(&h16_bld, lod_fpart, + colors0_lo, colors1_lo); + colors0_hi = lp_build_lerp(&h16_bld, lod_fpart, + colors0_hi, colors1_hi); + + LLVMBuildStore(builder, colors0_lo, colors_lo_var); + LLVMBuildStore(builder, colors0_hi, colors_hi_var); + } + lp_build_endif(&if_ctx); } } @@ -871,35 +922,22 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, const LLVMValueRef *ddy, LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef depth, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, - LLVMValueRef row_stride_array, - LLVMValueRef img_stride_array, - LLVMValueRef data_array, LLVMValueRef texel_out[4]) { - struct lp_build_context *float_bld = &bld->float_bld; + struct lp_build_context *int_bld = &bld->int_bld; LLVMBuilderRef builder = bld->builder; const unsigned mip_filter = bld->static_state->min_mip_filter; const unsigned min_filter = bld->static_state->min_img_filter; const unsigned mag_filter = bld->static_state->mag_img_filter; - const int dims = texture_dims(bld->static_state->target); - LLVMValueRef lod = NULL, lod_fpart = NULL; + const unsigned dims = bld->dims; + LLVMValueRef lod_ipart = NULL, lod_fpart = NULL; LLVMValueRef ilevel0, ilevel1 = NULL; - LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL; - LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL; - LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL; - LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL; - LLVMValueRef data_ptr0, data_ptr1 = NULL; LLVMValueRef packed, packed_lo, packed_hi; LLVMValueRef unswizzled[4]; LLVMValueRef face_ddx[4], face_ddy[4]; - struct lp_build_context h16; - LLVMTypeRef h16_vec_type; + struct lp_build_context h16_bld; + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef i32t_zero = LLVMConstInt(i32t, 0, 0); /* we only support the common/simple wrap modes at this time */ assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s)); @@ -910,9 +948,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, /* make 16-bit fixed-pt builder context */ - lp_build_context_init(&h16, builder, lp_type_ufixed(16)); - h16_vec_type = lp_build_vec_type(h16.type); - + lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16)); /* cube face selection, compute pre-face coords, etc. */ if (bld->static_state->target == PIPE_TEXTURE_CUBE) { @@ -924,19 +960,18 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */ /* recompute ddx, ddy using the new (s,t) face texcoords */ - face_ddx[0] = lp_build_ddx(&bld->coord_bld, s); - face_ddx[1] = lp_build_ddx(&bld->coord_bld, t); + face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s); + face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t); face_ddx[2] = NULL; face_ddx[3] = NULL; - face_ddy[0] = lp_build_ddy(&bld->coord_bld, s); - face_ddy[1] = lp_build_ddy(&bld->coord_bld, t); + face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s); + face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t); face_ddy[2] = NULL; face_ddy[3] = NULL; ddx = face_ddx; ddy = face_ddy; } - /* * Compute the level of detail (float). */ @@ -945,15 +980,16 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, /* Need to compute lod either to choose mipmap levels or to * distinguish between minification/magnification with one mipmap level. */ - lod = lp_build_lod_selector(bld, unit, ddx, ddy, - lod_bias, explicit_lod, - width, height, depth); + lp_build_lod_selector(bld, unit, ddx, ddy, + lod_bias, explicit_lod, + mip_filter, + &lod_ipart, &lod_fpart); + } else { + lod_ipart = i32t_zero; } /* * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1 - * If mipfilter=linear, also compute the weight between the two - * mipmap levels: lod_fpart */ switch (mip_filter) { default: @@ -966,135 +1002,81 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, * We should be able to set ilevel0 = const(0) but that causes * bad x86 code to be emitted. */ - lod = lp_build_const_elem(bld->coord_bld.type, 0.0); - lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); + assert(lod_ipart); + lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); } else { - ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + ilevel0 = i32t_zero; } break; case PIPE_TEX_MIPFILTER_NEAREST: - assert(lod); - lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); + assert(lod_ipart); + lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); break; case PIPE_TEX_MIPFILTER_LINEAR: - { - LLVMValueRef f256 = LLVMConstReal(LLVMFloatType(), 256.0); - LLVMValueRef i255 = lp_build_const_int32(255); - LLVMTypeRef i16_type = LLVMIntType(16); - - assert(lod); - - lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1, - &lod_fpart); - lod_fpart = LLVMBuildFMul(builder, lod_fpart, f256, ""); - lod_fpart = lp_build_ifloor(&bld->float_bld, lod_fpart); - lod_fpart = LLVMBuildAnd(builder, lod_fpart, i255, ""); - lod_fpart = LLVMBuildTrunc(builder, lod_fpart, i16_type, ""); - lod_fpart = lp_build_broadcast_scalar(&h16, lod_fpart); - - /* the lod_fpart values will be fixed pt values in [0,1) */ - } + assert(lod_ipart); + assert(lod_fpart); + lp_build_linear_mip_levels(bld, unit, + lod_ipart, &lod_fpart, + &ilevel0, &ilevel1); break; } - /* compute image size(s) of source mipmap level(s) */ - lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec, - ilevel0, ilevel1, - row_stride_array, img_stride_array, - &width0_vec, &width1_vec, - &height0_vec, &height1_vec, - &depth0_vec, &depth1_vec, - &row_stride0_vec, &row_stride1_vec, - &img_stride0_vec, &img_stride1_vec); - /* - * Get pointer(s) to image data for mipmap level(s). + * Get/interpolate texture colors. */ - data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0); - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1); - } + packed_lo = lp_build_alloca(builder, h16_bld.vec_type, "packed_lo"); + packed_hi = lp_build_alloca(builder, h16_bld.vec_type, "packed_hi"); - /* - * Get/interpolate texture colors. - */ if (min_filter == mag_filter) { /* no need to distinquish between minification and magnification */ - lp_build_sample_mipmap(bld, min_filter, mip_filter, - s, t, r, lod_fpart, - width0_vec, width1_vec, - height0_vec, height1_vec, - depth0_vec, depth1_vec, - row_stride0_vec, row_stride1_vec, - img_stride0_vec, img_stride1_vec, - data_ptr0, data_ptr1, - &packed_lo, &packed_hi); + lp_build_sample_mipmap(bld, + min_filter, mip_filter, + s, t, r, + ilevel0, ilevel1, lod_fpart, + packed_lo, packed_hi); } else { /* Emit conditional to choose min image filter or mag image filter * depending on the lod being > 0 or <= 0, respectively. */ - struct lp_build_flow_context *flow_ctx; struct lp_build_if_state if_ctx; LLVMValueRef minify; - flow_ctx = lp_build_flow_create(builder); - lp_build_flow_scope_begin(flow_ctx); + /* minify = lod >= 0.0 */ + minify = LLVMBuildICmp(builder, LLVMIntSGE, + lod_ipart, int_bld->zero, ""); - packed_lo = LLVMGetUndef(h16_vec_type); - packed_hi = LLVMGetUndef(h16_vec_type); - - lp_build_flow_scope_declare(flow_ctx, &packed_lo); - lp_build_flow_scope_declare(flow_ctx, &packed_hi); - - /* minify = lod > 0.0 */ - minify = LLVMBuildFCmp(builder, LLVMRealUGE, - lod, float_bld->zero, ""); - - lp_build_if(&if_ctx, flow_ctx, builder, minify); + lp_build_if(&if_ctx, builder, minify); { /* Use the minification filter */ - lp_build_sample_mipmap(bld, min_filter, mip_filter, - s, t, r, lod_fpart, - width0_vec, width1_vec, - height0_vec, height1_vec, - depth0_vec, depth1_vec, - row_stride0_vec, row_stride1_vec, - img_stride0_vec, img_stride1_vec, - data_ptr0, data_ptr1, - &packed_lo, &packed_hi); + lp_build_sample_mipmap(bld, + min_filter, mip_filter, + s, t, r, + ilevel0, ilevel1, lod_fpart, + packed_lo, packed_hi); } lp_build_else(&if_ctx); { /* Use the magnification filter */ - lp_build_sample_mipmap(bld, mag_filter, mip_filter, - s, t, r, lod_fpart, - width0_vec, width1_vec, - height0_vec, height1_vec, - depth0_vec, depth1_vec, - row_stride0_vec, row_stride1_vec, - img_stride0_vec, img_stride1_vec, - data_ptr0, data_ptr1, - &packed_lo, &packed_hi); + lp_build_sample_mipmap(bld, + mag_filter, PIPE_TEX_MIPFILTER_NONE, + s, t, r, + i32t_zero, NULL, NULL, + packed_lo, packed_hi); } lp_build_endif(&if_ctx); - - lp_build_flow_scope_end(flow_ctx); - lp_build_flow_destroy(flow_ctx); } - /* combine 'packed_lo', 'packed_hi' into 'packed' */ - { - struct lp_build_context h16, u8n; - - lp_build_context_init(&h16, builder, lp_type_ufixed(16)); - lp_build_context_init(&u8n, builder, lp_type_unorm(8)); - - packed = lp_build_pack2(builder, h16.type, u8n.type, - packed_lo, packed_hi); - } + /* + * combine the values stored in 'packed_lo' and 'packed_hi' variables + * into 'packed' + */ + packed = lp_build_pack2(builder, + h16_bld.type, lp_type_unorm(8), + LLVMBuildLoad(builder, packed_lo, ""), + LLVMBuildLoad(builder, packed_hi, "")); /* * Convert to SoA and swizzle. diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h index e1045bbbc21..5d9ecac4d50 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h @@ -50,15 +50,6 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, const LLVMValueRef *ddy, LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef depth, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, - LLVMValueRef row_stride_array, - LLVMValueRef img_stride_array, - LLVMValueRef data_array, LLVMValueRef texel_out[4]); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 36a77d3aff0..53cc0c5f345 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -82,7 +82,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, LLVMValueRef texel_out[4]) { const struct lp_sampler_static_state *static_state = bld->static_state; - const int dims = texture_dims(static_state->target); + const unsigned dims = bld->dims; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; LLVMValueRef offset; LLVMValueRef i, j; @@ -131,7 +131,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, } /* convert x,y,z coords to linear offset from start of texture, in bytes */ - lp_build_sample_offset(&bld->uint_coord_bld, + lp_build_sample_offset(&bld->int_coord_bld, bld->format_desc, x, y, z, y_stride, z_stride, &offset, &i, &j); @@ -145,7 +145,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, * coords which are out of bounds to become zero. Zero's guaranteed * to be inside the texture image. */ - offset = lp_build_andnot(&bld->uint_coord_bld, offset, use_border); + offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border); } lp_build_fetch_rgba_soa(bld->builder, @@ -202,11 +202,7 @@ lp_build_coord_mirror(struct lp_build_sample_context *bld, struct lp_build_context *int_coord_bld = &bld->int_coord_bld; LLVMValueRef fract, flr, isOdd; - /* fract = coord - floor(coord) */ - fract = lp_build_sub(coord_bld, coord, lp_build_floor(coord_bld, coord)); - - /* flr = ifloor(coord); */ - flr = lp_build_ifloor(coord_bld, coord); + lp_build_ifloor_fract(coord_bld, coord, &flr, &fract); /* isOdd = flr & 1 */ isOdd = LLVMBuildAnd(bld->builder, flr, int_coord_bld->one, ""); @@ -234,6 +230,7 @@ static void lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, LLVMValueRef coord, LLVMValueRef length, + LLVMValueRef length_f, boolean is_pot, unsigned wrap_mode, LLVMValueRef *x0_out, @@ -242,10 +239,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, { struct lp_build_context *coord_bld = &bld->coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5); - LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length); - LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); + LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); LLVMValueRef coord0, coord1, weight; switch(wrap_mode) { @@ -253,23 +248,25 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, /* mul by size and subtract 0.5 */ coord = lp_build_mul(coord_bld, coord, length_f); coord = lp_build_sub(coord_bld, coord, half); - /* convert to int */ - coord0 = lp_build_ifloor(coord_bld, coord); - coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one); - /* compute lerp weight */ - weight = lp_build_fract(coord_bld, coord); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); /* repeat wrap */ if (is_pot) { + coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, ""); coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, ""); } else { /* Add a bias to the texcoord to handle negative coords */ - LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024); + LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); + LLVMValueRef mask; coord0 = LLVMBuildAdd(bld->builder, coord0, bias, ""); - coord1 = LLVMBuildAdd(bld->builder, coord1, bias, ""); coord0 = LLVMBuildURem(bld->builder, coord0, length, ""); - coord1 = LLVMBuildURem(bld->builder, coord1, length, ""); + mask = lp_build_compare(bld->builder, int_coord_bld->type, + PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); + coord1 = LLVMBuildAnd(bld->builder, + lp_build_add(int_coord_bld, coord0, int_coord_bld->one), + mask, ""); } break; @@ -284,53 +281,47 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, coord = lp_build_sub(coord_bld, coord, half); - weight = lp_build_fract(coord_bld, coord); - coord0 = lp_build_ifloor(coord_bld, coord); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); break; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - if (bld->static_state->normalized_coords) { - /* clamp to [0,1] */ - coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one); - /* mul by tex size and subtract 0.5 */ - coord = lp_build_mul(coord_bld, coord, length_f); + { + struct lp_build_context abs_coord_bld = bld->coord_bld; + abs_coord_bld.type.sign = FALSE; + + if (bld->static_state->normalized_coords) { + /* mul by tex size */ + coord = lp_build_mul(coord_bld, coord, length_f); + } + /* clamp to length max */ + coord = lp_build_min(coord_bld, coord, length_f); + /* subtract 0.5 */ coord = lp_build_sub(coord_bld, coord, half); + /* clamp to [0, length - 0.5] */ + coord = lp_build_max(coord_bld, coord, coord_bld->zero); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight); + coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); + /* coord1 = min(coord1, length-1) */ + coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); + break; } - else { - LLVMValueRef min, max; - /* clamp to [0.5, length - 0.5] */ - min = half; - max = lp_build_sub(coord_bld, length_f, min); - coord = lp_build_clamp(coord_bld, coord, min, max); - } - /* compute lerp weight */ - weight = lp_build_fract(coord_bld, coord); - /* coord0 = floor(coord); */ - coord0 = lp_build_ifloor(coord_bld, coord); - coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); - /* coord0 = max(coord0, 0) */ - coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero); - /* coord1 = min(coord1, length-1) */ - coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); - break; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: { - LLVMValueRef min, max; + LLVMValueRef min; if (bld->static_state->normalized_coords) { /* scale coord to length */ coord = lp_build_mul(coord_bld, coord, length_f); } - /* clamp to [-0.5, length + 0.5] */ - min = lp_build_const_vec(coord_bld->type, -0.5F); - max = lp_build_sub(coord_bld, length_f, min); - coord = lp_build_clamp(coord_bld, coord, min, max); + /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */ coord = lp_build_sub(coord_bld, coord, half); - /* compute lerp weight */ - weight = lp_build_fract(coord_bld, coord); - /* convert to int */ - coord0 = lp_build_ifloor(coord_bld, coord); + min = lp_build_const_vec(coord_bld->type, -1.0F); + coord = lp_build_clamp(coord_bld, coord, min, length_f); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); } break; @@ -343,11 +334,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, coord = lp_build_mul(coord_bld, coord, length_f); coord = lp_build_sub(coord_bld, coord, half); - /* compute lerp weight */ - weight = lp_build_fract(coord_bld, coord); - - /* convert to int coords */ - coord0 = lp_build_ifloor(coord_bld, coord); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); /* coord0 = max(coord0, 0) */ @@ -369,15 +357,16 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, coord = lp_build_sub(coord_bld, coord, half); - weight = lp_build_fract(coord_bld, coord); - coord0 = lp_build_ifloor(coord_bld, coord); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); break; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: { LLVMValueRef min, max; - + struct lp_build_context abs_coord_bld = bld->coord_bld; + abs_coord_bld.type.sign = FALSE; coord = lp_build_abs(coord_bld, coord); if (bld->static_state->normalized_coords) { @@ -392,16 +381,14 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, coord = lp_build_sub(coord_bld, coord, half); - weight = lp_build_fract(coord_bld, coord); - coord0 = lp_build_ifloor(coord_bld, coord); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight); coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); } break; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: { - LLVMValueRef min, max; - coord = lp_build_abs(coord_bld, coord); if (bld->static_state->normalized_coords) { @@ -409,15 +396,13 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, coord = lp_build_mul(coord_bld, coord, length_f); } - /* clamp to [-0.5, length + 0.5] */ - min = lp_build_negate(coord_bld, half); - max = lp_build_sub(coord_bld, length_f, min); - coord = lp_build_clamp(coord_bld, coord, min, max); - + /* was: clamp to [-0.5, length + 0.5] then sub 0.5 */ + /* skip -0.5 clamp (always positive), do sub first */ coord = lp_build_sub(coord_bld, coord, half); + coord = lp_build_min(coord_bld, coord, length_f); - weight = lp_build_fract(coord_bld, coord); - coord0 = lp_build_ifloor(coord_bld, coord); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); } break; @@ -446,14 +431,13 @@ static LLVMValueRef lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, LLVMValueRef coord, LLVMValueRef length, + LLVMValueRef length_f, boolean is_pot, unsigned wrap_mode) { struct lp_build_context *coord_bld = &bld->coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; - LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length); - LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); + LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); LLVMValueRef icoord; switch(wrap_mode) { @@ -464,7 +448,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, ""); else { /* Add a bias to the texcoord to handle negative coords */ - LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024); + LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); icoord = LLVMBuildAdd(bld->builder, icoord, bias, ""); icoord = LLVMBuildURem(bld->builder, icoord, length, ""); } @@ -478,7 +462,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, } /* floor */ - icoord = lp_build_ifloor(coord_bld, coord); + /* use itrunc instead since we clamp to 0 anyway */ + icoord = lp_build_itrunc(coord_bld, coord); /* clamp to [0, length - 1]. */ icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero, @@ -512,7 +497,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, assert(bld->static_state->normalized_coords); coord = lp_build_mul(coord_bld, coord, length_f); - icoord = lp_build_ifloor(coord_bld, coord); + /* itrunc == ifloor here */ + icoord = lp_build_itrunc(coord_bld, coord); /* clamp to [0, length - 1] */ icoord = lp_build_min(int_coord_bld, icoord, length_minus_one); @@ -527,7 +513,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, coord = lp_build_mul(coord_bld, coord, length_f); } - icoord = lp_build_ifloor(coord_bld, coord); + /* itrunc == ifloor here */ + icoord = lp_build_itrunc(coord_bld, coord); /* clamp to [0, length - 1] */ icoord = lp_build_min(int_coord_bld, icoord, length_minus_one); @@ -541,7 +528,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, coord = lp_build_mul(coord_bld, coord, length_f); } - icoord = lp_build_ifloor(coord_bld, coord); + /* itrunc == ifloor here */ + icoord = lp_build_itrunc(coord_bld, coord); /* clamp to [0, length] */ icoord = lp_build_min(int_coord_bld, icoord, length); @@ -563,9 +551,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, static void lp_build_sample_image_nearest(struct lp_build_sample_context *bld, unsigned unit, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, + LLVMValueRef size, LLVMValueRef row_stride_vec, LLVMValueRef img_stride_vec, LLVMValueRef data_ptr, @@ -574,25 +560,46 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, LLVMValueRef r, LLVMValueRef colors_out[4]) { - const int dims = texture_dims(bld->static_state->target); + const unsigned dims = bld->dims; + LLVMValueRef width_vec; + LLVMValueRef height_vec; + LLVMValueRef depth_vec; + LLVMValueRef flt_size; + LLVMValueRef flt_width_vec; + LLVMValueRef flt_height_vec; + LLVMValueRef flt_depth_vec; LLVMValueRef x, y, z; + lp_build_extract_image_sizes(bld, + bld->int_size_type, + bld->int_coord_type, + size, + &width_vec, &height_vec, &depth_vec); + + flt_size = lp_build_int_to_float(&bld->float_size_bld, size); + + lp_build_extract_image_sizes(bld, + bld->float_size_type, + bld->coord_type, + flt_size, + &flt_width_vec, &flt_height_vec, &flt_depth_vec); + /* * Compute integer texcoords. */ - x = lp_build_sample_wrap_nearest(bld, s, width_vec, + x = lp_build_sample_wrap_nearest(bld, s, width_vec, flt_width_vec, bld->static_state->pot_width, bld->static_state->wrap_s); lp_build_name(x, "tex.x.wrapped"); if (dims >= 2) { - y = lp_build_sample_wrap_nearest(bld, t, height_vec, + y = lp_build_sample_wrap_nearest(bld, t, height_vec, flt_height_vec, bld->static_state->pot_height, bld->static_state->wrap_t); lp_build_name(y, "tex.y.wrapped"); if (dims == 3) { - z = lp_build_sample_wrap_nearest(bld, r, depth_vec, + z = lp_build_sample_wrap_nearest(bld, r, depth_vec, flt_depth_vec, bld->static_state->pot_depth, bld->static_state->wrap_r); lp_build_name(z, "tex.z.wrapped"); @@ -626,9 +633,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, static void lp_build_sample_image_linear(struct lp_build_sample_context *bld, unsigned unit, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, + LLVMValueRef size, LLVMValueRef row_stride_vec, LLVMValueRef img_stride_vec, LLVMValueRef data_ptr, @@ -637,16 +642,37 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, LLVMValueRef r, LLVMValueRef colors_out[4]) { - const int dims = texture_dims(bld->static_state->target); + const unsigned dims = bld->dims; + LLVMValueRef width_vec; + LLVMValueRef height_vec; + LLVMValueRef depth_vec; + LLVMValueRef flt_size; + LLVMValueRef flt_width_vec; + LLVMValueRef flt_height_vec; + LLVMValueRef flt_depth_vec; LLVMValueRef x0, y0, z0, x1, y1, z1; LLVMValueRef s_fpart, t_fpart, r_fpart; LLVMValueRef neighbors[2][2][4]; int chan; + lp_build_extract_image_sizes(bld, + bld->int_size_type, + bld->int_coord_type, + size, + &width_vec, &height_vec, &depth_vec); + + flt_size = lp_build_int_to_float(&bld->float_size_bld, size); + + lp_build_extract_image_sizes(bld, + bld->float_size_type, + bld->coord_type, + flt_size, + &flt_width_vec, &flt_height_vec, &flt_depth_vec); + /* * Compute integer texcoords. */ - lp_build_sample_wrap_linear(bld, s, width_vec, + lp_build_sample_wrap_linear(bld, s, width_vec, flt_width_vec, bld->static_state->pot_width, bld->static_state->wrap_s, &x0, &x1, &s_fpart); @@ -654,7 +680,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, lp_build_name(x1, "tex.x1.wrapped"); if (dims >= 2) { - lp_build_sample_wrap_linear(bld, t, height_vec, + lp_build_sample_wrap_linear(bld, t, height_vec, flt_height_vec, bld->static_state->pot_height, bld->static_state->wrap_t, &y0, &y1, &t_fpart); @@ -662,7 +688,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, lp_build_name(y1, "tex.y1.wrapped"); if (dims == 3) { - lp_build_sample_wrap_linear(bld, r, depth_vec, + lp_build_sample_wrap_linear(bld, r, depth_vec, flt_depth_vec, bld->static_state->pot_depth, bld->static_state->wrap_r, &z0, &z1, &r_fpart); @@ -799,69 +825,92 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, + LLVMValueRef ilevel0, + LLVMValueRef ilevel1, LLVMValueRef lod_fpart, - LLVMValueRef width0_vec, - LLVMValueRef width1_vec, - LLVMValueRef height0_vec, - LLVMValueRef height1_vec, - LLVMValueRef depth0_vec, - LLVMValueRef depth1_vec, - LLVMValueRef row_stride0_vec, - LLVMValueRef row_stride1_vec, - LLVMValueRef img_stride0_vec, - LLVMValueRef img_stride1_vec, - LLVMValueRef data_ptr0, - LLVMValueRef data_ptr1, LLVMValueRef *colors_out) { + LLVMBuilderRef builder = bld->builder; + LLVMValueRef size0; + LLVMValueRef size1; + LLVMValueRef row_stride0_vec; + LLVMValueRef row_stride1_vec; + LLVMValueRef img_stride0_vec; + LLVMValueRef img_stride1_vec; + LLVMValueRef data_ptr0; + LLVMValueRef data_ptr1; LLVMValueRef colors0[4], colors1[4]; - int chan; + unsigned chan; + /* sample the first mipmap level */ + lp_build_mipmap_level_sizes(bld, ilevel0, + &size0, + &row_stride0_vec, &img_stride0_vec); + data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); if (img_filter == PIPE_TEX_FILTER_NEAREST) { - /* sample the first mipmap level */ lp_build_sample_image_nearest(bld, unit, - width0_vec, height0_vec, depth0_vec, + size0, row_stride0_vec, img_stride0_vec, - data_ptr0, s, t, r, colors0); - - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* sample the second mipmap level */ - lp_build_sample_image_nearest(bld, unit, - width1_vec, height1_vec, depth1_vec, - row_stride1_vec, img_stride1_vec, - data_ptr1, s, t, r, colors1); - } + data_ptr0, s, t, r, + colors0); } else { assert(img_filter == PIPE_TEX_FILTER_LINEAR); - - /* sample the first mipmap level */ lp_build_sample_image_linear(bld, unit, - width0_vec, height0_vec, depth0_vec, + size0, row_stride0_vec, img_stride0_vec, - data_ptr0, s, t, r, colors0); + data_ptr0, s, t, r, + colors0); + } - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* sample the second mipmap level */ - lp_build_sample_image_linear(bld, unit, - width1_vec, height1_vec, depth1_vec, - row_stride1_vec, img_stride1_vec, - data_ptr1, s, t, r, colors1); - } + /* Store the first level's colors in the output variables */ + for (chan = 0; chan < 4; chan++) { + LLVMBuildStore(builder, colors0[chan], colors_out[chan]); } if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* interpolate samples from the two mipmap levels */ - for (chan = 0; chan < 4; chan++) { - colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, + struct lp_build_if_state if_ctx; + LLVMValueRef need_lerp; + + /* need_lerp = lod_fpart > 0 */ + need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT, + lod_fpart, + bld->float_bld.zero, + "need_lerp"); + + lp_build_if(&if_ctx, builder, need_lerp); + { + /* sample the second mipmap level */ + lp_build_mipmap_level_sizes(bld, ilevel1, + &size1, + &row_stride1_vec, &img_stride1_vec); + data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); + if (img_filter == PIPE_TEX_FILTER_NEAREST) { + lp_build_sample_image_nearest(bld, unit, + size1, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, + colors1); + } + else { + lp_build_sample_image_linear(bld, unit, + size1, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, + colors1); + } + + /* interpolate samples from the two mipmap levels */ + + lod_fpart = lp_build_broadcast_scalar(&bld->texel_bld, lod_fpart); + + for (chan = 0; chan < 4; chan++) { + colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, colors0[chan], colors1[chan]); + LLVMBuildStore(builder, colors0[chan], colors_out[chan]); + } } - } - else { - /* use first/only level's colors */ - for (chan = 0; chan < 4; chan++) { - colors_out[chan] = colors0[chan]; - } + lp_build_endif(&if_ctx); } } @@ -882,30 +931,20 @@ lp_build_sample_general(struct lp_build_sample_context *bld, const LLVMValueRef *ddy, LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef depth, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, - LLVMValueRef row_stride_array, - LLVMValueRef img_stride_array, - LLVMValueRef data_array, LLVMValueRef *colors_out) { - struct lp_build_context *float_bld = &bld->float_bld; + struct lp_build_context *int_bld = &bld->int_bld; + LLVMBuilderRef builder = bld->builder; const unsigned mip_filter = bld->static_state->min_mip_filter; const unsigned min_filter = bld->static_state->min_img_filter; const unsigned mag_filter = bld->static_state->mag_img_filter; - const int dims = texture_dims(bld->static_state->target); - LLVMValueRef lod = NULL, lod_fpart = NULL; + LLVMValueRef lod_ipart = NULL, lod_fpart = NULL; LLVMValueRef ilevel0, ilevel1 = NULL; - LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL; - LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL; - LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL; - LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL; - LLVMValueRef data_ptr0, data_ptr1 = NULL; LLVMValueRef face_ddx[4], face_ddy[4]; + LLVMValueRef texels[4]; + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef i32t_zero = LLVMConstInt(i32t, 0, 0); + unsigned chan; /* printf("%s mip %d min %d mag %d\n", __FUNCTION__, @@ -924,12 +963,12 @@ lp_build_sample_general(struct lp_build_sample_context *bld, r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */ /* recompute ddx, ddy using the new (s,t) face texcoords */ - face_ddx[0] = lp_build_ddx(&bld->coord_bld, s); - face_ddx[1] = lp_build_ddx(&bld->coord_bld, t); + face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s); + face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t); face_ddx[2] = NULL; face_ddx[3] = NULL; - face_ddy[0] = lp_build_ddy(&bld->coord_bld, s); - face_ddy[1] = lp_build_ddy(&bld->coord_bld, t); + face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s); + face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t); face_ddy[2] = NULL; face_ddy[3] = NULL; ddx = face_ddx; @@ -944,126 +983,100 @@ lp_build_sample_general(struct lp_build_sample_context *bld, /* Need to compute lod either to choose mipmap levels or to * distinguish between minification/magnification with one mipmap level. */ - lod = lp_build_lod_selector(bld, unit, ddx, ddy, - lod_bias, explicit_lod, - width, height, depth); + lp_build_lod_selector(bld, unit, ddx, ddy, + lod_bias, explicit_lod, + mip_filter, + &lod_ipart, &lod_fpart); + } else { + lod_ipart = i32t_zero; } /* - * Compute integer mipmap level(s) to fetch texels from. + * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1 */ - if (mip_filter == PIPE_TEX_MIPFILTER_NONE) { + switch (mip_filter) { + default: + assert(0 && "bad mip_filter value in lp_build_sample_soa()"); + /* fall-through */ + case PIPE_TEX_MIPFILTER_NONE: /* always use mip level 0 */ if (bld->static_state->target == PIPE_TEXTURE_CUBE) { /* XXX this is a work-around for an apparent bug in LLVM 2.7. * We should be able to set ilevel0 = const(0) but that causes * bad x86 code to be emitted. */ - lod = lp_build_const_elem(bld->coord_bld.type, 0.0); - lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); + assert(lod_ipart); + lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); } else { - ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0); - } - } - else { - assert(lod); - if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { - lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); - } - else { - assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR); - lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1, - &lod_fpart); - lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart); + ilevel0 = i32t_zero; } + break; + case PIPE_TEX_MIPFILTER_NEAREST: + assert(lod_ipart); + lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); + break; + case PIPE_TEX_MIPFILTER_LINEAR: + assert(lod_ipart); + assert(lod_fpart); + lp_build_linear_mip_levels(bld, unit, + lod_ipart, &lod_fpart, + &ilevel0, &ilevel1); + break; } - /* compute image size(s) of source mipmap level(s) */ - lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec, - ilevel0, ilevel1, - row_stride_array, img_stride_array, - &width0_vec, &width1_vec, - &height0_vec, &height1_vec, - &depth0_vec, &depth1_vec, - &row_stride0_vec, &row_stride1_vec, - &img_stride0_vec, &img_stride1_vec); - /* - * Get pointer(s) to image data for mipmap level(s). + * Get/interpolate texture colors. */ - data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0); - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1); + + for (chan = 0; chan < 4; ++chan) { + texels[chan] = lp_build_alloca(builder, bld->texel_bld.vec_type, ""); + lp_build_name(texels[chan], "sampler%u_texel_%c_var", unit, "xyzw"[chan]); } - /* - * Get/interpolate texture colors. - */ if (min_filter == mag_filter) { /* no need to distinquish between minification and magnification */ lp_build_sample_mipmap(bld, unit, - min_filter, mip_filter, s, t, r, lod_fpart, - width0_vec, width1_vec, - height0_vec, height1_vec, - depth0_vec, depth1_vec, - row_stride0_vec, row_stride1_vec, - img_stride0_vec, img_stride1_vec, - data_ptr0, data_ptr1, - colors_out); + min_filter, mip_filter, + s, t, r, + ilevel0, ilevel1, lod_fpart, + texels); } else { /* Emit conditional to choose min image filter or mag image filter - * depending on the lod being >0 or <= 0, respectively. + * depending on the lod being > 0 or <= 0, respectively. */ - struct lp_build_flow_context *flow_ctx; struct lp_build_if_state if_ctx; LLVMValueRef minify; - flow_ctx = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx); + /* minify = lod >= 0.0 */ + minify = LLVMBuildICmp(builder, LLVMIntSGE, + lod_ipart, int_bld->zero, ""); - lp_build_flow_scope_declare(flow_ctx, &colors_out[0]); - lp_build_flow_scope_declare(flow_ctx, &colors_out[1]); - lp_build_flow_scope_declare(flow_ctx, &colors_out[2]); - lp_build_flow_scope_declare(flow_ctx, &colors_out[3]); - - /* minify = lod > 0.0 */ - minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE, - lod, float_bld->zero, ""); - - lp_build_if(&if_ctx, flow_ctx, bld->builder, minify); + lp_build_if(&if_ctx, builder, minify); { /* Use the minification filter */ lp_build_sample_mipmap(bld, unit, min_filter, mip_filter, - s, t, r, lod_fpart, - width0_vec, width1_vec, - height0_vec, height1_vec, - depth0_vec, depth1_vec, - row_stride0_vec, row_stride1_vec, - img_stride0_vec, img_stride1_vec, - data_ptr0, data_ptr1, - colors_out); + s, t, r, + ilevel0, ilevel1, lod_fpart, + texels); } lp_build_else(&if_ctx); { /* Use the magnification filter */ lp_build_sample_mipmap(bld, unit, - mag_filter, mip_filter, - s, t, r, lod_fpart, - width0_vec, width1_vec, - height0_vec, height1_vec, - depth0_vec, depth1_vec, - row_stride0_vec, row_stride1_vec, - img_stride0_vec, img_stride1_vec, - data_ptr0, data_ptr1, - colors_out); + mag_filter, PIPE_TEX_MIPFILTER_NONE, + s, t, r, + i32t_zero, NULL, NULL, + texels); } lp_build_endif(&if_ctx); + } - lp_build_flow_scope_end(flow_ctx); - lp_build_flow_destroy(flow_ctx); + for (chan = 0; chan < 4; ++chan) { + colors_out[chan] = LLVMBuildLoad(builder, texels[chan], ""); + lp_build_name(colors_out[chan], "sampler%u_texel_%c", unit, "xyzw"[chan]); } } @@ -1147,12 +1160,10 @@ lp_build_sample_soa(LLVMBuilderRef builder, LLVMValueRef explicit_lod, /* optional */ LLVMValueRef texel_out[4]) { + unsigned dims = texture_dims(static_state->target); struct lp_build_sample_context bld; - LLVMValueRef width, width_vec; - LLVMValueRef height, height_vec; - LLVMValueRef depth, depth_vec; - LLVMValueRef row_stride_array, img_stride_array; - LLVMValueRef data_array; + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef s; LLVMValueRef t; LLVMValueRef r; @@ -1171,12 +1182,15 @@ lp_build_sample_soa(LLVMBuilderRef builder, bld.static_state = static_state; bld.dynamic_state = dynamic_state; bld.format_desc = util_format_description(static_state->format); + bld.dims = dims; bld.float_type = lp_type_float(32); bld.int_type = lp_type_int(32); bld.coord_type = type; - bld.uint_coord_type = lp_uint_type(type); bld.int_coord_type = lp_int_type(type); + bld.float_size_type = lp_type_float(32); + bld.float_size_type.length = dims > 1 ? 4 : 1; + bld.int_size_type = lp_int_type(bld.float_size_type); bld.texel_type = type; float_vec_type = lp_type_float_vec(32); @@ -1185,27 +1199,40 @@ lp_build_sample_soa(LLVMBuilderRef builder, lp_build_context_init(&bld.float_vec_bld, builder, float_vec_type); lp_build_context_init(&bld.int_bld, builder, bld.int_type); lp_build_context_init(&bld.coord_bld, builder, bld.coord_type); - lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type); lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type); + lp_build_context_init(&bld.int_size_bld, builder, bld.int_size_type); + lp_build_context_init(&bld.float_size_bld, builder, bld.float_size_type); lp_build_context_init(&bld.texel_bld, builder, bld.texel_type); /* Get the dynamic state */ - width = dynamic_state->width(dynamic_state, builder, unit); - height = dynamic_state->height(dynamic_state, builder, unit); - depth = dynamic_state->depth(dynamic_state, builder, unit); - row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit); - img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit); - data_array = dynamic_state->data_ptr(dynamic_state, builder, unit); + bld.width = dynamic_state->width(dynamic_state, builder, unit); + bld.height = dynamic_state->height(dynamic_state, builder, unit); + bld.depth = dynamic_state->depth(dynamic_state, builder, unit); + bld.row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit); + bld.img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit); + bld.data_array = dynamic_state->data_ptr(dynamic_state, builder, unit); /* Note that data_array is an array[level] of pointers to texture images */ s = coords[0]; t = coords[1]; r = coords[2]; - /* width, height, depth as uint vectors */ - width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width); - height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height); - depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth); + /* width, height, depth as single int vector */ + if (dims <= 1) { + bld.int_size = bld.width; + } + else { + bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_bld.undef, + bld.width, LLVMConstInt(i32t, 0, 0), ""); + if (dims >= 2) { + bld.int_size = LLVMBuildInsertElement(builder, bld.int_size, + bld.height, LLVMConstInt(i32t, 1, 0), ""); + if (dims >= 3) { + bld.int_size = LLVMBuildInsertElement(builder, bld.int_size, + bld.depth, LLVMConstInt(i32t, 2, 0), ""); + } + } + } if (0) { /* For debug: no-op texture sampling */ @@ -1217,10 +1244,7 @@ lp_build_sample_soa(LLVMBuilderRef builder, /* do sampling/filtering with fixed pt arithmetic */ lp_build_sample_aos(&bld, unit, s, t, r, ddx, ddy, lod_bias, explicit_lod, - width, height, depth, - width_vec, height_vec, depth_vec, - row_stride_array, img_stride_array, - data_array, texel_out); + texel_out); } else { @@ -1238,10 +1262,6 @@ lp_build_sample_soa(LLVMBuilderRef builder, lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy, lod_bias, explicit_lod, - width, height, depth, - width_vec, height_vec, depth_vec, - row_stride_array, img_stride_array, - data_array, texel_out); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c index 2e9e8386de0..4685a90e418 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c @@ -101,6 +101,83 @@ lp_build_broadcast_scalar(struct lp_build_context *bld, /** + * Combined extract and broadcast (or a mere shuffle when the two types match) + */ +LLVMValueRef +lp_build_extract_broadcast(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef vector, + LLVMValueRef index) +{ + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef res; + + assert(src_type.floating == dst_type.floating); + assert(src_type.width == dst_type.width); + + assert(lp_check_value(src_type, vector)); + assert(LLVMTypeOf(index) == i32t); + + if (src_type.length == 1) { + if (dst_type.length == 1) { + /* + * Trivial scalar -> scalar. + */ + + res = vector; + } + else { + /* + * Broadcast scalar -> vector. + */ + + res = lp_build_broadcast(builder, + lp_build_vec_type(dst_type), + vector); + } + } + else { + if (dst_type.length == src_type.length) { + /* + * Special shuffle of the same size. + */ + + LLVMValueRef shuffle; + shuffle = lp_build_broadcast(builder, + LLVMVectorType(i32t, dst_type.length), + index); + res = LLVMBuildShuffleVector(builder, vector, + LLVMGetUndef(lp_build_vec_type(dst_type)), + shuffle, ""); + } + else { + LLVMValueRef scalar; + scalar = LLVMBuildExtractElement(builder, vector, index, ""); + if (dst_type.length == 1) { + /* + * Trivial extract scalar from vector. + */ + + res = scalar; + } + else { + /* + * General case of different sized vectors. + */ + + res = lp_build_broadcast(builder, + lp_build_vec_type(dst_type), + vector); + } + } + } + + return res; +} + + +/** * Swizzle one channel into all other three channels. */ LLVMValueRef diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h index f9b6a5e7258..fdea8442aef 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h @@ -55,6 +55,14 @@ lp_build_broadcast_scalar(struct lp_build_context *bld, LLVMValueRef scalar); +LLVMValueRef +lp_build_extract_broadcast(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef vector, + LLVMValueRef index); + + /** * Broadcast one channel of a vector composed of arrays of XYZW structures into * all four channel. diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 97318b3456c..a4d3b750c3c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -36,6 +36,9 @@ #define LP_BLD_TGSI_H #include "gallivm/lp_bld.h" +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" struct tgsi_token; @@ -55,6 +58,75 @@ enum lp_build_tex_modifier { /** + * Describe a channel of a register. + * + * The value can be a: + * - immediate value (i.e. derived from a IMM register) + * - CONST[n].x/y/z/w + * - IN[n].x/y/z/w + * - undetermined (when .file == TGSI_FILE_NULL) + * + * This is one of the analysis results, and is used to described + * the output color in terms of inputs. + */ +struct lp_tgsi_channel_info +{ + unsigned file:4; /* TGSI_FILE_* */ + unsigned swizzle:3; /* PIPE_SWIZZLE_x */ + union { + uint32_t index; + float value; /* for TGSI_FILE_IMMEDIATE */ + } u; +}; + + +/** + * Describe a texture sampler interpolator. + * + * The interpolation is described in terms of regular inputs. + */ +struct lp_tgsi_texture_info +{ + struct lp_tgsi_channel_info coord[4]; + unsigned target:8; /* TGSI_TEXTURE_* */ + unsigned unit:8; /* Sampler unit */ + unsigned modifier:8; /* LP_BLD_TEX_MODIFIER_* */ +}; + + +struct lp_tgsi_info +{ + struct tgsi_shader_info base; + + /* + * Whether any of the texture opcodes access a register file other than + * TGSI_FILE_INPUT. + * + * We could also handle TGSI_FILE_CONST/IMMEDIATE here, but there is little + * benefit. + */ + unsigned indirect_textures:1; + + /* + * Texture opcode description. Aimed at detecting and described direct + * texture opcodes. + */ + unsigned num_texs; + struct lp_tgsi_texture_info tex[PIPE_MAX_SAMPLERS]; + + /* + * Output description. Aimed at detecting and describing simple blit + * shaders. + */ + struct lp_tgsi_channel_info output[PIPE_MAX_SHADER_OUTPUTS][4]; + + /* + * Shortcut pointers into the above (for fragment shaders). + */ + const struct lp_tgsi_channel_info *cbuf[PIPE_MAX_COLOR_BUFS]; +}; + +/** * Sampler code generation interface. * * Although texture sampling is a requirement for TGSI translation, it is @@ -97,6 +169,11 @@ struct lp_build_sampler_aos void +lp_build_tgsi_info(const struct tgsi_token *tokens, + struct lp_tgsi_info *info); + + +void lp_build_tgsi_soa(LLVMBuilderRef builder, const struct tgsi_token *tokens, struct lp_type type, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c index d5f963be58d..c3c082b2b95 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -513,7 +513,7 @@ emit_instruction( { LLVMValueRef src0, src1, src2; LLVMValueRef tmp0, tmp1; - LLVMValueRef dst0; + LLVMValueRef dst0 = NULL; /* * Stores and write masks are handled in a general fashion after the long diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c new file mode 100644 index 00000000000..ad514463de0 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c @@ -0,0 +1,479 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_dump.h" +#include "lp_bld_debug.h" +#include "lp_bld_tgsi.h" + + +/** + * Analysis context. + * + * This is where we keep store the value of each channel of the IMM/TEMP/OUT + * register values, as we walk the shader. + */ +struct analysis_context +{ + struct lp_tgsi_info *info; + + unsigned num_imms; + float imm[32][4]; + + struct lp_tgsi_channel_info temp[32][4]; +}; + + +/** + * Describe the specified channel of the src register. + */ +static void +analyse_src(struct analysis_context *ctx, + struct lp_tgsi_channel_info *chan_info, + const struct tgsi_src_register *src, + unsigned chan) +{ + chan_info->file = TGSI_FILE_NULL; + if (!src->Indirect && !src->Absolute && !src->Negate) { + unsigned swizzle = tgsi_util_get_src_register_swizzle(src, chan); + if (src->File == TGSI_FILE_TEMPORARY) { + if (src->Index < Elements(ctx->temp)) { + *chan_info = ctx->temp[src->Index][swizzle]; + } + } else { + chan_info->file = src->File; + if (src->File == TGSI_FILE_IMMEDIATE) { + assert(src->Index < Elements(ctx->imm)); + if (src->Index < Elements(ctx->imm)) { + chan_info->u.value = ctx->imm[src->Index][swizzle]; + } + } else { + chan_info->u.index = src->Index; + chan_info->swizzle = swizzle; + } + } + } +} + + +/** + * Whether this register channel refers to a specific immediate value. + */ +static boolean +is_immediate(const struct lp_tgsi_channel_info *chan_info, float value) +{ + return chan_info->file == TGSI_FILE_IMMEDIATE && + chan_info->u.value == value; +} + + +static void +analyse_tex(struct analysis_context *ctx, + const struct tgsi_full_instruction *inst, + enum lp_build_tex_modifier modifier) +{ + struct lp_tgsi_info *info = ctx->info; + unsigned chan; + + if (info->num_texs < Elements(info->tex)) { + struct lp_tgsi_texture_info *tex_info = &info->tex[info->num_texs]; + bool indirect = FALSE; + unsigned readmask = 0; + + tex_info->target = inst->Texture.Texture; + switch (inst->Texture.Texture) { + case TGSI_TEXTURE_1D: + readmask = TGSI_WRITEMASK_X; + break; + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + readmask = TGSI_WRITEMASK_XY; + break; + case TGSI_TEXTURE_SHADOW1D: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + readmask = TGSI_WRITEMASK_XYZ; + break; + default: + assert(0); + return; + } + + if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { + /* We don't track explicit derivatives, although we could */ + indirect = TRUE; + tex_info->unit = inst->Src[3].Register.Index; + } else { + if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED || + modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS || + modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { + readmask |= TGSI_WRITEMASK_W; + } + tex_info->unit = inst->Src[1].Register.Index; + } + + for (chan = 0; chan < 4; ++chan) { + struct lp_tgsi_channel_info *chan_info = &tex_info->coord[chan]; + if (readmask & (1 << chan)) { + analyse_src(ctx, chan_info, &inst->Src[0].Register, chan); + if (chan_info->file != TGSI_FILE_INPUT) { + indirect = TRUE; + } + } else { + memset(chan_info, 0, sizeof *chan_info); + } + } + + if (indirect) { + info->indirect_textures = TRUE; + } + + ++info->num_texs; + } else { + info->indirect_textures = TRUE; + } +} + + +/** + * Process an instruction, and update the register values accordingly. + */ +static void +analyse_instruction(struct analysis_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct lp_tgsi_info *info = ctx->info; + struct lp_tgsi_channel_info (*regs)[4]; + unsigned max_regs; + unsigned i; + unsigned index; + unsigned chan; + + for (i = 0; i < inst->Instruction.NumDstRegs; ++i) { + const struct tgsi_dst_register *dst = &inst->Dst[i].Register; + + /* + * Get the lp_tgsi_channel_info array corresponding to the destination + * register file. + */ + + if (dst->File == TGSI_FILE_TEMPORARY) { + regs = ctx->temp; + max_regs = Elements(ctx->temp); + } else if (dst->File == TGSI_FILE_OUTPUT) { + regs = info->output; + max_regs = Elements(info->output); + } else if (dst->File == TGSI_FILE_ADDRESS || + dst->File == TGSI_FILE_PREDICATE) { + continue; + } else { + assert(0); + continue; + } + + /* + * Detect direct TEX instructions + */ + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_TEX: + analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_NONE); + break; + case TGSI_OPCODE_TXD: + analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV); + break; + case TGSI_OPCODE_TXB: + analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS); + break; + case TGSI_OPCODE_TXL: + analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD); + break; + case TGSI_OPCODE_TXP: + analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_PROJECTED); + break; + default: + break; + } + + /* + * Keep track of assignments and writes + */ + + if (dst->Indirect) { + /* + * It could be any register index so clear all register indices. + */ + + for (chan = 0; chan < 4; ++chan) { + if (dst->WriteMask & (1 << chan)) { + for (index = 0; index < max_regs; ++index) { + regs[index][chan].file = TGSI_FILE_NULL; + } + } + } + } else if (dst->Index < max_regs) { + /* + * Update this destination register value. + */ + + struct lp_tgsi_channel_info res[4]; + + memset(res, 0, sizeof res); + + if (!inst->Instruction.Predicate && + !inst->Instruction.Saturate) { + for (chan = 0; chan < 4; ++chan) { + if (dst->WriteMask & (1 << chan)) { + if (inst->Instruction.Opcode == TGSI_OPCODE_MOV) { + analyse_src(ctx, &res[chan], + &inst->Src[0].Register, chan); + } else if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) { + /* + * Propagate values across 1.0 and 0.0 multiplications. + */ + + struct lp_tgsi_channel_info src0; + struct lp_tgsi_channel_info src1; + + analyse_src(ctx, &src0, &inst->Src[0].Register, chan); + analyse_src(ctx, &src1, &inst->Src[1].Register, chan); + + if (is_immediate(&src0, 0.0f)) { + res[chan] = src0; + } else if (is_immediate(&src1, 0.0f)) { + res[chan] = src1; + } else if (is_immediate(&src0, 1.0f)) { + res[chan] = src1; + } else if (is_immediate(&src1, 1.0f)) { + res[chan] = src0; + } + } + } + } + } + + for (chan = 0; chan < 4; ++chan) { + if (dst->WriteMask & (1 << chan)) { + regs[dst->Index][chan] = res[chan]; + } + } + } + } + + /* + * Clear all temporaries information in presence of a control flow opcode. + */ + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_IF: + case TGSI_OPCODE_IFC: + case TGSI_OPCODE_ELSE: + case TGSI_OPCODE_ENDIF: + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_BRK: + case TGSI_OPCODE_BREAKC: + case TGSI_OPCODE_CONT: + case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_CALLNZ: + case TGSI_OPCODE_CAL: + case TGSI_OPCODE_BGNSUB: + case TGSI_OPCODE_ENDSUB: + case TGSI_OPCODE_SWITCH: + case TGSI_OPCODE_CASE: + case TGSI_OPCODE_DEFAULT: + case TGSI_OPCODE_ENDSWITCH: + case TGSI_OPCODE_RET: + case TGSI_OPCODE_END: + /* XXX: Are there more cases? */ + memset(&ctx->temp, 0, sizeof ctx->temp); + memset(&info->output, 0, sizeof info->output); + default: + break; + } +} + + +static INLINE void +dump_info(const struct tgsi_token *tokens, + struct lp_tgsi_info *info) +{ + unsigned index; + unsigned chan; + + tgsi_dump(tokens, 0); + + for (index = 0; index < info->num_texs; ++index) { + const struct lp_tgsi_texture_info *tex_info = &info->tex[index]; + debug_printf("TEX[%u] =", index); + for (chan = 0; chan < 4; ++chan) { + const struct lp_tgsi_channel_info *chan_info = + &tex_info->coord[chan]; + if (chan_info->file != TGSI_FILE_NULL) { + debug_printf(" %s[%u].%c", + tgsi_file_names[chan_info->file], + chan_info->u.index, + "xyzw01"[chan_info->swizzle]); + } else { + debug_printf(" _"); + } + } + debug_printf(", SAMP[%u], %s\n", + tex_info->unit, + tgsi_texture_names[tex_info->target]); + } + + for (index = 0; index < PIPE_MAX_SHADER_OUTPUTS; ++index) { + for (chan = 0; chan < 4; ++chan) { + const struct lp_tgsi_channel_info *chan_info = + &info->output[index][chan]; + if (chan_info->file != TGSI_FILE_NULL) { + debug_printf("OUT[%u].%c = ", index, "xyzw"[chan]); + if (chan_info->file == TGSI_FILE_IMMEDIATE) { + debug_printf("%f", chan_info->u.value); + } else { + const char *file_name; + switch (chan_info->file) { + case TGSI_FILE_CONSTANT: + file_name = "CONST"; + break; + case TGSI_FILE_INPUT: + file_name = "IN"; + break; + default: + file_name = "???"; + break; + } + debug_printf("%s[%u].%c", + file_name, + chan_info->u.index, + "xyzw01"[chan_info->swizzle]); + } + debug_printf("\n"); + } + } + } +} + + +/** + * Detect any direct relationship between the output color + */ +void +lp_build_tgsi_info(const struct tgsi_token *tokens, + struct lp_tgsi_info *info) +{ + struct tgsi_parse_context parse; + struct analysis_context ctx; + unsigned index; + unsigned chan; + + memset(info, 0, sizeof *info); + + tgsi_scan_shader(tokens, &info->base); + + memset(&ctx, 0, sizeof ctx); + ctx.info = info; + + tgsi_parse_init(&parse, tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + struct tgsi_full_instruction *inst = + &parse.FullToken.FullInstruction; + + if (inst->Instruction.Opcode == TGSI_OPCODE_END || + inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) { + /* We reached the end of main function body. */ + goto finished; + } + + analyse_instruction(&ctx, inst); + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const unsigned size = + parse.FullToken.FullImmediate.Immediate.NrTokens - 1; + assert(size <= 4); + if (ctx.num_imms < Elements(ctx.imm)) { + for (chan = 0; chan < size; ++chan) { + ctx.imm[ctx.num_imms][chan] = + parse.FullToken.FullImmediate.u[chan].Float; + } + ++ctx.num_imms; + } + } + break; + + case TGSI_TOKEN_TYPE_PROPERTY: + break; + + default: + assert(0); + } + } +finished: + + tgsi_parse_free(&parse); + + + /* + * Link the output color values. + */ + + for (index = 0; index < PIPE_MAX_COLOR_BUFS; ++index) { + const struct lp_tgsi_channel_info null_output[4]; + info->cbuf[index] = null_output; + } + + for (index = 0; index < info->base.num_outputs; ++index) { + unsigned semantic_name = info->base.output_semantic_name[index]; + unsigned semantic_index = info->base.output_semantic_index[index]; + if (semantic_name == TGSI_SEMANTIC_COLOR && + semantic_index < PIPE_MAX_COLOR_BUFS) { + info->cbuf[semantic_index] = info->output[index]; + } + } + + if (gallivm_debug & GALLIVM_DEBUG_TGSI) { + dump_info(tokens, info); + } +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 441aebae298..3c318cc8c80 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -887,21 +887,25 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, } if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); for (i = 0; i < num_coords; i++) { - ddx[i] = emit_fetch( bld, inst, 1, i ); - ddy[i] = emit_fetch( bld, inst, 2, i ); + LLVMValueRef src1 = emit_fetch( bld, inst, 1, i ); + LLVMValueRef src2 = emit_fetch( bld, inst, 2, i ); + ddx[i] = LLVMBuildExtractElement(bld->base.builder, src1, index0, ""); + ddy[i] = LLVMBuildExtractElement(bld->base.builder, src2, index0, ""); } unit = inst->Src[3].Register.Index; } else { for (i = 0; i < num_coords; i++) { - ddx[i] = lp_build_ddx( &bld->base, coords[i] ); - ddy[i] = lp_build_ddy( &bld->base, coords[i] ); + ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] ); + ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] ); } unit = inst->Src[1].Register.Index; } for (i = num_coords; i < 3; i++) { - ddx[i] = bld->base.undef; - ddy[i] = bld->base.undef; + ddx[i] = LLVMGetUndef(bld->base.elem_type); + ddy[i] = LLVMGetUndef(bld->base.elem_type); } bld->sampler->emit_fetch_texel(bld->sampler, @@ -913,6 +917,43 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, texel); } +static boolean +near_end_of_shader(struct lp_build_tgsi_soa_context *bld, + int pc) +{ + int i; + + for (i = 0; i < 5; i++) { + unsigned opcode; + + if (pc + i >= bld->info->num_instructions) + return TRUE; + + opcode = bld->instructions[pc + i].Instruction.Opcode; + + if (opcode == TGSI_OPCODE_END) + return TRUE; + + if (opcode == TGSI_OPCODE_TEX || + opcode == TGSI_OPCODE_TXP || + opcode == TGSI_OPCODE_TXD || + opcode == TGSI_OPCODE_TXB || + opcode == TGSI_OPCODE_TXL || + opcode == TGSI_OPCODE_TXF || + opcode == TGSI_OPCODE_TXQ || + opcode == TGSI_OPCODE_CAL || + opcode == TGSI_OPCODE_CALLNZ || + opcode == TGSI_OPCODE_IF || + opcode == TGSI_OPCODE_IFC || + opcode == TGSI_OPCODE_BGNLOOP || + opcode == TGSI_OPCODE_SWITCH) + return FALSE; + } + + return TRUE; +} + + /** * Kill fragment if any of the src register values are negative. @@ -920,7 +961,8 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, static void emit_kil( struct lp_build_tgsi_soa_context *bld, - const struct tgsi_full_instruction *inst ) + const struct tgsi_full_instruction *inst, + int pc) { const struct tgsi_full_src_register *reg = &inst->Src[0]; LLVMValueRef terms[NUM_CHANNELS]; @@ -959,8 +1001,12 @@ emit_kil( } } - if(mask) + if(mask) { lp_build_mask_update(bld->mask, mask); + + if (!near_end_of_shader(bld, pc)) + lp_build_mask_check(bld->mask); + } } @@ -972,7 +1018,8 @@ emit_kil( */ static void emit_kilp(struct lp_build_tgsi_soa_context *bld, - const struct tgsi_full_instruction *inst) + const struct tgsi_full_instruction *inst, + int pc) { LLVMValueRef mask; @@ -983,10 +1030,14 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld, mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp"); } else { - mask = bld->base.zero; + LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type); + mask = zero; } lp_build_mask_update(bld->mask, mask); + + if (!near_end_of_shader(bld, pc)) + lp_build_mask_check(bld->mask); } static void @@ -1535,12 +1586,12 @@ emit_instruction( case TGSI_OPCODE_KILP: /* predicated kill */ - emit_kilp( bld, inst ); + emit_kilp( bld, inst, (*pc)-1 ); break; case TGSI_OPCODE_KIL: /* conditional kill */ - emit_kil( bld, inst ); + emit_kil( bld, inst, (*pc)-1 ); break; case TGSI_OPCODE_PK2H: diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index ef4b306cb67..330838d23cf 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -97,7 +97,7 @@ void (*ppc_get_func(struct ppc_function *p))(void) return (void (*)(void)) NULL; else #endif - return (void (*)(void)) p->store; + return (void (*)(void)) pointer_to_func(p->store); } diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h index 036c1ee48a8..34bfa527db0 100644 --- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h +++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h @@ -23,26 +23,13 @@ #include "cell/ppu/cell_public.h" #endif + static INLINE struct pipe_screen * -sw_screen_create(struct sw_winsys *winsys) +sw_screen_create_named(struct sw_winsys *winsys, const char *driver) { - const char *default_driver; - const char *driver; struct pipe_screen *screen = NULL; #if defined(GALLIUM_CELL) - default_driver = "cell"; -#elif defined(GALLIUM_LLVMPIPE) - default_driver = "llvmpipe"; -#elif defined(GALLIUM_SOFTPIPE) - default_driver = "softpipe"; -#else - default_driver = ""; -#endif - - driver = debug_get_option("GALLIUM_DRIVER", default_driver); - -#if defined(GALLIUM_CELL) if (screen == NULL && strcmp(driver, "cell") == 0) screen = cell_create_screen(winsys); #endif @@ -60,4 +47,26 @@ sw_screen_create(struct sw_winsys *winsys) return screen; } + +static INLINE struct pipe_screen * +sw_screen_create(struct sw_winsys *winsys) +{ + const char *default_driver; + const char *driver; + +#if defined(GALLIUM_CELL) + default_driver = "cell"; +#elif defined(GALLIUM_LLVMPIPE) + default_driver = "llvmpipe"; +#elif defined(GALLIUM_SOFTPIPE) + default_driver = "softpipe"; +#else + default_driver = ""; +#endif + + driver = debug_get_option("GALLIUM_DRIVER", default_driver); + return sw_screen_create_named(winsys, driver); +} + + #endif diff --git a/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h index 0b4e7404034..e4effa713e9 100644 --- a/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h +++ b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h @@ -13,22 +13,28 @@ static INLINE struct pipe_screen * sw_screen_wrap(struct pipe_screen *screen) { struct sw_winsys *sws; - struct pipe_screen *sw_screen; + struct pipe_screen *sw_screen = NULL; + const char *driver; - sws = wrapper_sw_winsys_warp_pipe_screen(screen); + driver = debug_get_option("GALLIUM_DRIVER", "native"); + if (strcmp(driver, "native") == 0) + return screen; + + sws = wrapper_sw_winsys_wrap_pipe_screen(screen); if (!sws) goto err; - sw_screen = sw_screen_create(sws); - if (sw_screen == screen) + sw_screen = sw_screen_create_named(sws, driver); + + if (!sw_screen) goto err_winsys; return sw_screen; err_winsys: - sws->destroy(sws); + return wrapper_sw_winsys_dewrap_pipe_screen(sws); err: - return screen; + return screen; } #endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index f71ffb70308..77bde86684e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -90,7 +90,8 @@ static const char *processor_type_names[] = "GEOM" }; -static const char *file_names[TGSI_FILE_COUNT] = +const char * +tgsi_file_names[TGSI_FILE_COUNT] = { "NULL", "CONST", @@ -125,7 +126,8 @@ static const char *semantic_names[] = "FACE", "EDGEFLAG", "PRIM_ID", - "INSTANCEID" + "INSTANCEID", + "STENCIL" }; static const char *immediate_type_names[] = @@ -135,7 +137,8 @@ static const char *immediate_type_names[] = "INT32" }; -static const char *swizzle_names[] = +const char * +tgsi_swizzle_names[] = { "x", "y", @@ -143,7 +146,8 @@ static const char *swizzle_names[] = "w" }; -static const char *texture_names[] = +const char * +tgsi_texture_names[] = { "UNKNOWN", "1D", @@ -201,15 +205,15 @@ _dump_register_src( struct dump_ctx *ctx, const struct tgsi_full_src_register *src ) { - ENM(src->Register.File, file_names); + ENM(src->Register.File, tgsi_file_names); if (src->Register.Dimension) { if (src->Dimension.Indirect) { CHR( '[' ); - ENM( src->DimIndirect.File, file_names ); + ENM( src->DimIndirect.File, tgsi_file_names ); CHR( '[' ); SID( src->DimIndirect.Index ); TXT( "]." ); - ENM( src->DimIndirect.SwizzleX, swizzle_names ); + ENM( src->DimIndirect.SwizzleX, tgsi_swizzle_names ); if (src->Dimension.Index != 0) { if (src->Dimension.Index > 0) CHR( '+' ); @@ -224,11 +228,11 @@ _dump_register_src( } if (src->Register.Indirect) { CHR( '[' ); - ENM( src->Indirect.File, file_names ); + ENM( src->Indirect.File, tgsi_file_names ); CHR( '[' ); SID( src->Indirect.Index ); TXT( "]." ); - ENM( src->Indirect.SwizzleX, swizzle_names ); + ENM( src->Indirect.SwizzleX, tgsi_swizzle_names ); if (src->Register.Index != 0) { if (src->Register.Index > 0) CHR( '+' ); @@ -248,15 +252,15 @@ _dump_register_dst( struct dump_ctx *ctx, const struct tgsi_full_dst_register *dst ) { - ENM(dst->Register.File, file_names); + ENM(dst->Register.File, tgsi_file_names); if (dst->Register.Dimension) { if (dst->Dimension.Indirect) { CHR( '[' ); - ENM( dst->DimIndirect.File, file_names ); + ENM( dst->DimIndirect.File, tgsi_file_names ); CHR( '[' ); SID( dst->DimIndirect.Index ); TXT( "]." ); - ENM( dst->DimIndirect.SwizzleX, swizzle_names ); + ENM( dst->DimIndirect.SwizzleX, tgsi_swizzle_names ); if (dst->Dimension.Index != 0) { if (dst->Dimension.Index > 0) CHR( '+' ); @@ -271,11 +275,11 @@ _dump_register_dst( } if (dst->Register.Indirect) { CHR( '[' ); - ENM( dst->Indirect.File, file_names ); + ENM( dst->Indirect.File, tgsi_file_names ); CHR( '[' ); SID( dst->Indirect.Index ); TXT( "]." ); - ENM( dst->Indirect.SwizzleX, swizzle_names ); + ENM( dst->Indirect.SwizzleX, tgsi_swizzle_names ); if (dst->Register.Index != 0) { if (dst->Register.Index > 0) CHR( '+' ); @@ -351,7 +355,7 @@ iter_declaration( TXT( "DCL " ); - ENM(decl->Declaration.File, file_names); + ENM(decl->Declaration.File, tgsi_file_names); /* all geometry shader inputs are two dimensional */ if (decl->Declaration.File == TGSI_FILE_INPUT && @@ -585,10 +589,10 @@ iter_instruction( inst->Predicate.SwizzleZ != TGSI_SWIZZLE_Z || inst->Predicate.SwizzleW != TGSI_SWIZZLE_W) { CHR( '.' ); - ENM( inst->Predicate.SwizzleX, swizzle_names ); - ENM( inst->Predicate.SwizzleY, swizzle_names ); - ENM( inst->Predicate.SwizzleZ, swizzle_names ); - ENM( inst->Predicate.SwizzleW, swizzle_names ); + ENM( inst->Predicate.SwizzleX, tgsi_swizzle_names ); + ENM( inst->Predicate.SwizzleY, tgsi_swizzle_names ); + ENM( inst->Predicate.SwizzleZ, tgsi_swizzle_names ); + ENM( inst->Predicate.SwizzleW, tgsi_swizzle_names ); } TXT( ") " ); @@ -641,10 +645,10 @@ iter_instruction( src->Register.SwizzleZ != TGSI_SWIZZLE_Z || src->Register.SwizzleW != TGSI_SWIZZLE_W) { CHR( '.' ); - ENM( src->Register.SwizzleX, swizzle_names ); - ENM( src->Register.SwizzleY, swizzle_names ); - ENM( src->Register.SwizzleZ, swizzle_names ); - ENM( src->Register.SwizzleW, swizzle_names ); + ENM( src->Register.SwizzleX, tgsi_swizzle_names ); + ENM( src->Register.SwizzleY, tgsi_swizzle_names ); + ENM( src->Register.SwizzleZ, tgsi_swizzle_names ); + ENM( src->Register.SwizzleW, tgsi_swizzle_names ); } if (src->Register.Absolute) @@ -655,7 +659,7 @@ iter_instruction( if (inst->Instruction.Texture) { TXT( ", " ); - ENM( inst->Texture.Texture, texture_names ); + ENM( inst->Texture.Texture, tgsi_texture_names ); } switch (inst->Instruction.Opcode) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h index dd78b361007..fc0429ad8d9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.h +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h @@ -35,6 +35,15 @@ extern "C" { #endif +extern const char * +tgsi_file_names[TGSI_FILE_COUNT]; + +extern const char * +tgsi_swizzle_names[]; + +extern const char * +tgsi_texture_names[]; + void tgsi_dump_str( const struct tgsi_token *tokens, diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 90198a4f604..6585da3e838 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -147,6 +147,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name; info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index; info->input_interpolate[reg] = (ubyte)fulldecl->Declaration.Interpolate; + info->input_centroid[reg] = (ubyte)fulldecl->Declaration.Centroid; info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Declaration.CylindricalWrap; info->num_inputs++; } @@ -157,9 +158,11 @@ tgsi_scan_shader(const struct tgsi_token *tokens, /* extra info for special outputs */ if (procType == TGSI_PROCESSOR_FRAGMENT && - fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) { - info->writes_z = TRUE; - } + fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) + info->writes_z = TRUE; + if (procType == TGSI_PROCESSOR_FRAGMENT && + fulldecl->Semantic.Name == TGSI_SEMANTIC_STENCIL) + info->writes_stencil = TRUE; if (procType == TGSI_PROCESSOR_VERTEX && fulldecl->Semantic.Name == TGSI_SEMANTIC_EDGEFLAG) { info->writes_edgeflag = TRUE; diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index f8aa90cf065..104097fbc03 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -45,6 +45,7 @@ struct tgsi_shader_info ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */ ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS]; + ubyte input_centroid[PIPE_MAX_SHADER_INPUTS]; ubyte input_usage_mask[PIPE_MAX_SHADER_INPUTS]; ubyte input_cylindrical_wrap[PIPE_MAX_SHADER_INPUTS]; ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ @@ -60,6 +61,7 @@ struct tgsi_shader_info uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */ boolean writes_z; /**< does fragment shader write Z value? */ + boolean writes_stencil; /**< does fragment shader write stencil value? */ boolean writes_edgeflag; /**< vertex shader outputs edgeflag */ boolean uses_kill; /**< KIL or KILP instruction used? */ diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index a75380228b1..850ef39ef21 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -68,6 +68,33 @@ struct translate_key { }; +struct translate; + + +typedef void (PIPE_CDECL *run_elts_func)(struct translate *, + const unsigned *elts, + unsigned count, + unsigned instance_id, + void *output_buffer); + +typedef void (PIPE_CDECL *run_elts16_func)(struct translate *, + const uint16_t *elts, + unsigned count, + unsigned instance_id, + void *output_buffer); + +typedef void (PIPE_CDECL *run_elts8_func)(struct translate *, + const uint8_t *elts, + unsigned count, + unsigned instance_id, + void *output_buffer); + +typedef void (PIPE_CDECL *run_func)(struct translate *, + unsigned start, + unsigned count, + unsigned instance_id, + void *output_buffer); + struct translate { struct translate_key key; @@ -79,42 +106,14 @@ struct translate { unsigned stride, unsigned max_index ); - void (PIPE_CDECL *run_elts)( struct translate *, - const unsigned *elts, - unsigned count, - unsigned instance_id, - void *output_buffer); - - void (PIPE_CDECL *run_elts16)( struct translate *, - const uint16_t *elts, - unsigned count, - unsigned instance_id, - void *output_buffer); - - void (PIPE_CDECL *run_elts8)( struct translate *, - const uint8_t *elts, - unsigned count, - unsigned instance_id, - void *output_buffer); - - void (PIPE_CDECL *run)( struct translate *, - unsigned start, - unsigned count, - unsigned instance_id, - void *output_buffer); + run_elts_func run_elts; + run_elts16_func run_elts16; + run_elts8_func run_elts8; + run_func run; }; -#if 0 -struct translate_context *translate_context_create( void ); -void translate_context_destroy( struct translate_context * ); - -struct translate *translate_lookup_or_create( struct translate_context *tctx, - const struct translate_key *key ); -#endif - - struct translate *translate_create( const struct translate_key *key ); boolean translate_is_output_format_supported(enum pipe_format format); diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index f8bf5b46692..ef7f4be4c3e 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -1495,19 +1495,19 @@ struct translate *translate_sse2_create( const struct translate_key *key ) if (!build_vertex_emit(p, &p->elt8_func, 1)) goto fail; - p->translate.run = (void*)x86_get_func(&p->linear_func); + p->translate.run = (run_func) x86_get_func(&p->linear_func); if (p->translate.run == NULL) goto fail; - p->translate.run_elts = (void*)x86_get_func(&p->elt_func); + p->translate.run_elts = (run_elts_func) x86_get_func(&p->elt_func); if (p->translate.run_elts == NULL) goto fail; - p->translate.run_elts16 = (void*)x86_get_func(&p->elt16_func); + p->translate.run_elts16 = (run_elts16_func) x86_get_func(&p->elt16_func); if (p->translate.run_elts16 == NULL) goto fail; - p->translate.run_elts8 = (void*)x86_get_func(&p->elt8_func); + p->translate.run_elts8 = (run_elts8_func) x86_get_func(&p->elt8_func); if (p->translate.run_elts8 == NULL) goto fail; diff --git a/src/gallium/auxiliary/util/u_dl.c b/src/gallium/auxiliary/util/u_dl.c index 220860ebf4b..aca435d6cad 100644 --- a/src/gallium/auxiliary/util/u_dl.c +++ b/src/gallium/auxiliary/util/u_dl.c @@ -38,6 +38,7 @@ #endif #include "u_dl.h" +#include "u_pointer.h" struct util_dl_library * @@ -58,7 +59,7 @@ util_dl_get_proc_address(struct util_dl_library *library, const char *procname) { #if defined(PIPE_OS_UNIX) - return (util_dl_proc)dlsym((void *)library, procname); + return (util_dl_proc) pointer_to_func(dlsym((void *)library, procname)); #elif defined(PIPE_OS_WINDOWS) return (util_dl_proc)GetProcAddress((HMODULE)library, procname); #else diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 0811280b97b..8e5d4487a67 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -109,9 +109,12 @@ PIPE_FORMAT_Z32_UNORM , plain, 1, 1, un32, , , , x___, PIPE_FORMAT_Z32_FLOAT , plain, 1, 1, f32 , , , , x___, zs PIPE_FORMAT_Z24_UNORM_S8_USCALED , plain, 1, 1, un24, u8 , , , xy__, zs PIPE_FORMAT_S8_USCALED_Z24_UNORM , plain, 1, 1, u8 , un24, , , yx__, zs +PIPE_FORMAT_X24S8_USCALED , plain, 1, 1, x24, u8 , , , _y__, zs +PIPE_FORMAT_S8X24_USCALED , plain, 1, 1, u8 , x24 , , , _x__, zs PIPE_FORMAT_Z24X8_UNORM , plain, 1, 1, un24, x8 , , , x___, zs PIPE_FORMAT_X8Z24_UNORM , plain, 1, 1, x8 , un24, , , y___, zs PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED , plain, 1, 1, f32, u8 , x24 , , xy__, zs +PIPE_FORMAT_X32_S8X24_USCALED , plain, 1, 1, x32, u8 , x24 , , _y__, zs # YUV formats # http://www.fourcc.org/yuv.php#UYVY diff --git a/src/gallium/auxiliary/util/u_format_zs.c b/src/gallium/auxiliary/util/u_format_zs.c index 792d69c214c..80081e22f7c 100644 --- a/src/gallium/auxiliary/util/u_format_zs.c +++ b/src/gallium/auxiliary/util/u_format_zs.c @@ -918,3 +918,56 @@ util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned d } } + +void +util_format_x24s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_z24_unorm_s8_uscaled_unpack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_x24s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_z24_unorm_s8_uscaled_pack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_s8_uscaled_z24_unorm_unpack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_s8_uscaled_z24_unorm_pack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_x32_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); + +} + +void +util_format_x32_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} diff --git a/src/gallium/auxiliary/util/u_format_zs.h b/src/gallium/auxiliary/util/u_format_zs.h index 650db4b95fd..1604cc3eee2 100644 --- a/src/gallium/auxiliary/util/u_format_zs.h +++ b/src/gallium/auxiliary/util/u_format_zs.h @@ -192,5 +192,21 @@ util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned void util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); +void +util_format_x24s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_x24s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); +void +util_format_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_x32_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_x32_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_sride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); #endif /* U_FORMAT_ZS_H_ */ diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 69a76814945..37294b7203f 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -118,6 +118,11 @@ __inline double __cdecl atan2(double val) #endif +#ifndef M_SQRT2 +#define M_SQRT2 1.41421356237309504880 +#endif + + #if defined(_MSC_VER) #if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index c90b0fdbc3f..5378f2d782f 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -434,8 +434,8 @@ util_pack_color(const float rgba[4], enum pipe_format format, union util_color * /* Integer versions of util_pack_z and util_pack_z_stencil - useful for * constructing clear masks. */ -static INLINE uint -util_pack_uint_z(enum pipe_format format, unsigned z) +static INLINE uint32_t +util_pack_mask_z(enum pipe_format format, uint32_t z) { switch (format) { case PIPE_FORMAT_Z16_UNORM: @@ -452,29 +452,32 @@ util_pack_uint_z(enum pipe_format format, unsigned z) case PIPE_FORMAT_S8_USCALED: return 0; default: - debug_print_format("gallium: unhandled format in util_pack_z()", format); + debug_print_format("gallium: unhandled format in util_pack_mask_z()", format); assert(0); return 0; } } -static INLINE uint -util_pack_uint_z_stencil(enum pipe_format format, double z, uint s) +static INLINE uint32_t +util_pack_mask_z_stencil(enum pipe_format format, uint32_t z, uint8_t s) { - unsigned packed = util_pack_uint_z(format, z); - - s &= 0xff; + uint32_t packed = util_pack_mask_z(format, z); switch (format) { case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return packed | (s << 24); + packed |= (uint32_t)s << 24; + break; case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - return packed | s; + packed |= s; + break; case PIPE_FORMAT_S8_USCALED: - return packed | s; + packed |= s; + break; default: - return packed; + break; } + + return packed; } @@ -482,9 +485,11 @@ util_pack_uint_z_stencil(enum pipe_format format, double z, uint s) /** * Note: it's assumed that z is in [0,1] */ -static INLINE uint +static INLINE uint32_t util_pack_z(enum pipe_format format, double z) { + union fi fui; + if (z == 0.0) return 0; @@ -492,24 +497,25 @@ util_pack_z(enum pipe_format format, double z) case PIPE_FORMAT_Z16_UNORM: if (z == 1.0) return 0xffff; - return (uint) (z * 0xffff); + return (uint32_t) (z * 0xffff); case PIPE_FORMAT_Z32_UNORM: /* special-case to avoid overflow */ if (z == 1.0) return 0xffffffff; - return (uint) (z * 0xffffffff); + return (uint32_t) (z * 0xffffffff); case PIPE_FORMAT_Z32_FLOAT: - return (uint)z; + fui.f = (float)z; + return fui.ui; case PIPE_FORMAT_Z24_UNORM_S8_USCALED: case PIPE_FORMAT_Z24X8_UNORM: if (z == 1.0) return 0xffffff; - return (uint) (z * 0xffffff); + return (uint32_t) (z * 0xffffff); case PIPE_FORMAT_S8_USCALED_Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: if (z == 1.0) return 0xffffff00; - return ((uint) (z * 0xffffff)) << 8; + return ((uint32_t) (z * 0xffffff)) << 8; case PIPE_FORMAT_S8_USCALED: /* this case can get it via util_pack_z_stencil() */ return 0; @@ -525,14 +531,14 @@ util_pack_z(enum pipe_format format, double z) * Pack Z and/or stencil values into a 32-bit value described by format. * Note: it's assumed that z is in [0,1] and s in [0,255] */ -static INLINE uint -util_pack_z_stencil(enum pipe_format format, double z, uint s) +static INLINE uint32_t +util_pack_z_stencil(enum pipe_format format, double z, uint8_t s) { - unsigned packed = util_pack_z(format, z); + uint32_t packed = util_pack_z(format, z); switch (format) { case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - packed |= s << 24; + packed |= (uint32_t)s << 24; break; case PIPE_FORMAT_S8_USCALED_Z24_UNORM: packed |= s; diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h index 03198c91da4..1df6c872677 100644 --- a/src/gallium/auxiliary/util/u_sse.h +++ b/src/gallium/auxiliary/util/u_sse.h @@ -71,6 +71,96 @@ _mm_castps_si128(__m128 a) #endif /* defined(_MSC_VER) && _MSC_VER < 1500 */ +union m128i { + __m128i m; + ubyte ub[16]; + ushort us[8]; + uint ui[4]; +}; + +static INLINE void u_print_epi8(const char *name, __m128i r) +{ + union { __m128i m; ubyte ub[16]; } u; + u.m = r; + + debug_printf("%s: " + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x\n", + name, + u.ub[0], u.ub[1], u.ub[2], u.ub[3], + u.ub[4], u.ub[5], u.ub[6], u.ub[7], + u.ub[8], u.ub[9], u.ub[10], u.ub[11], + u.ub[12], u.ub[13], u.ub[14], u.ub[15]); +} + +static INLINE void u_print_epi16(const char *name, __m128i r) +{ + union { __m128i m; ushort us[8]; } u; + u.m = r; + + debug_printf("%s: " + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x\n", + name, + u.us[0], u.us[1], u.us[2], u.us[3], + u.us[4], u.us[5], u.us[6], u.us[7]); +} + +static INLINE void u_print_epi32(const char *name, __m128i r) +{ + union { __m128i m; uint ui[4]; } u; + u.m = r; + + debug_printf("%s: " + "%08x/" + "%08x/" + "%08x/" + "%08x\n", + name, + u.ui[0], u.ui[1], u.ui[2], u.ui[3]); +} + +static INLINE void u_print_ps(const char *name, __m128 r) +{ + union { __m128 m; float f[4]; } u; + u.m = r; + + debug_printf("%s: " + "%f/" + "%f/" + "%f/" + "%f\n", + name, + u.f[0], u.f[1], u.f[2], u.f[3]); +} + + +#define U_DUMP_EPI32(a) u_print_epi32(#a, a) +#define U_DUMP_EPI16(a) u_print_epi16(#a, a) +#define U_DUMP_EPI8(a) u_print_epi8(#a, a) +#define U_DUMP_PS(a) u_print_ps(#a, a) + + #if defined(PIPE_ARCH_SSSE3) @@ -98,6 +188,68 @@ _mm_shuffle_epi8(__m128i a, __m128i mask) #endif /* !PIPE_ARCH_SSSE3 */ -#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */ + + +/* Provide an SSE2 implementation of _mm_mullo_epi32() in terms of + * _mm_mul_epu32(). + * + * I suspect this works fine for us because one of our operands is + * always positive, but not sure that this can be used for general + * signed integer multiplication. + * + * This seems close enough to the speed of SSE4 and the real + * _mm_mullo_epi32() intrinsic as to not justify adding an sse4 + * dependency at this point. + */ +static INLINE __m128i mm_mullo_epi32(const __m128i a, const __m128i b) +{ + __m128i a4 = _mm_srli_epi64(a, 32); /* shift by one dword */ + __m128i b4 = _mm_srli_epi64(b, 32); /* shift by one dword */ + __m128i ba = _mm_mul_epu32(b, a); /* multply dwords 0, 2 */ + __m128i b4a4 = _mm_mul_epu32(b4, a4); /* multiply dwords 1, 3 */ + + /* Interleave the results, either with shuffles or (slightly + * faster) direct bit operations: + */ +#if 0 + __m128i ba8 = _mm_shuffle_epi32(ba, 8); + __m128i b4a48 = _mm_shuffle_epi32(b4a4, 8); + __m128i result = _mm_unpacklo_epi32(ba8, b4a48); +#else + __m128i mask = _mm_setr_epi32(~0,0,~0,0); + __m128i ba_mask = _mm_and_si128(ba, mask); + __m128i b4a4_mask_shift = _mm_slli_epi64(b4a4, 32); + __m128i result = _mm_or_si128(ba_mask, b4a4_mask_shift); +#endif + + return result; +} + + +static INLINE void +transpose4_epi32(const __m128i * restrict a, + const __m128i * restrict b, + const __m128i * restrict c, + const __m128i * restrict d, + __m128i * restrict o, + __m128i * restrict p, + __m128i * restrict q, + __m128i * restrict r) +{ + __m128i t0 = _mm_unpacklo_epi32(*a, *b); + __m128i t1 = _mm_unpacklo_epi32(*c, *d); + __m128i t2 = _mm_unpackhi_epi32(*a, *b); + __m128i t3 = _mm_unpackhi_epi32(*c, *d); + + *o = _mm_unpacklo_epi64(t0, t1); + *p = _mm_unpackhi_epi64(t0, t1); + *q = _mm_unpacklo_epi64(t2, t3); + *r = _mm_unpackhi_epi64(t2, t3); +} + +#define SCALAR_EPI32(m, i) _mm_shuffle_epi32((m), _MM_SHUFFLE(i,i,i,i)) + + +#endif /* PIPE_ARCH_SSE */ #endif /* U_SSE_H_ */ diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index f7aa1403d08..44cadbfcdd0 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -217,6 +217,81 @@ z24s8_get_tile_rgba(const unsigned *src, } } +/*** PIPE_FORMAT_S8X24_USCALED ***/ + +/** + * Return S component as four uint32_t in [0..255]. Z part ignored. + */ +static void +s8x24_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float)((*src++ >> 24) & 0xff); + } + + p += dst_stride; + } +} + +/*** PIPE_FORMAT_X24S8_USCALED ***/ + +/** + * Return S component as four uint32_t in [0..255]. Z part ignored. + */ +static void +x24s8_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float)(*src++ & 0xff); + } + p += dst_stride; + } +} + + +/** + * Return S component as four uint32_t in [0..255]. Z part ignored. + */ +static void +s8_get_tile_rgba(const unsigned char *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float)(*src++ & 0xff); + } + p += dst_stride; + } +} /*** PIPE_FORMAT_Z32_FLOAT ***/ @@ -261,10 +336,19 @@ pipe_tile_raw_to_rgba(enum pipe_format format, case PIPE_FORMAT_Z24X8_UNORM: s8z24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); break; + case PIPE_FORMAT_S8_USCALED: + s8_get_tile_rgba((unsigned char *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_X24S8_USCALED: + s8x24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; case PIPE_FORMAT_S8_USCALED_Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: z24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); break; + case PIPE_FORMAT_S8X24_USCALED: + x24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; case PIPE_FORMAT_Z32_FLOAT: z32f_get_tile_rgba((float *) src, w, h, dst, dst_stride); break; diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst index 5342fc25dc1..e09a1304c4d 100644 --- a/src/gallium/docs/source/context.rst +++ b/src/gallium/docs/source/context.rst @@ -156,6 +156,15 @@ If there is an index buffer bound, and ``indexed`` field is true, all vertex indices will be looked up in the index buffer. ``min_index``, ``max_index``, and ``index_bias`` apply after index lookup. +When drawing indexed primitives, the primitive restart index can be +used to draw disjoint primitive strips. For example, several separate +line strips can be drawn by designating a special index value as the +restart index. The ``primitive_restart`` flag enables/disables this +feature. The ``restart_index`` field specifies the restart index value. + +When primitive restart is in use, array indexes are compared to the +restart index before adding the index_bias offset. + If a given vertex element has ``instance_divisor`` set to 0, it is said it contains per-vertex data and effective vertex attribute address needs to be recalculated for every index. diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 9e02d43ab74..d99ed7c6d6b 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -1415,6 +1415,12 @@ Edge flags are used to control which lines or points are actually drawn when the polygon mode converts triangles/quads/polygons into points or lines. +TGSI_SEMANTIC_STENCIL +"""""""""""""""""""""" + +For fragment shaders, this semantic label indicates than an output +is a writable stencil reference value. Only the Y component is writable. +This allows the fragment shader to change the fragments stencilref value. Properties @@ -1493,6 +1499,8 @@ well. | Z | XXX TBD | (z, z, z, 1) | (0, z, 0, 1) | | | | [#depth-tex-mode]_ | | +--------------------+--------------+--------------------+--------------+ +| S | (s, s, s, s) | unknown | unknown | ++--------------------+--------------+--------------------+--------------+ .. [#envmap-bumpmap] http://www.opengl.org/registry/specs/ATI/envmap_bumpmap.txt .. [#depth-tex-mode] the default is (z, z, z, 1) but may also be (0, 0, 0, z) diff --git a/src/gallium/drivers/galahad/glhd_context.c b/src/gallium/drivers/galahad/glhd_context.c index ff6d2aa00ab..50f66079c2a 100644 --- a/src/gallium/drivers/galahad/glhd_context.c +++ b/src/gallium/drivers/galahad/glhd_context.c @@ -641,7 +641,7 @@ galahad_set_index_buffer(struct pipe_context *_pipe, break; default: glhd_warn("index buffer %p has unrecognized index size %d", - _ib->buffer, _ib->index_size); + (void *) _ib->buffer, _ib->index_size); break; } } @@ -1013,7 +1013,7 @@ galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) glhd_pipe->pipe = pipe; - glhd_warn("Created context %p", glhd_pipe); + glhd_warn("Created context %p", (void *) glhd_pipe); return &glhd_pipe->base; } diff --git a/src/gallium/drivers/galahad/glhd_screen.c b/src/gallium/drivers/galahad/glhd_screen.c index 288941b1066..b6cc41d908b 100644 --- a/src/gallium/drivers/galahad/glhd_screen.c +++ b/src/gallium/drivers/galahad/glhd_screen.c @@ -370,7 +370,7 @@ galahad_screen_create(struct pipe_screen *screen) glhd_screen->screen = screen; - glhd_warn("Created screen %p", glhd_screen); + glhd_warn("Created screen %p", (void *) glhd_screen); return &glhd_screen->base; } diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 55b877b4ab9..08da2286b05 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -28,8 +28,6 @@ C_SOURCES = \ lp_scene_queue.c \ lp_screen.c \ lp_setup.c \ - lp_setup_coef.c \ - lp_setup_coef_intrin.c \ lp_setup_line.c \ lp_setup_point.c \ lp_setup_tri.c \ @@ -38,6 +36,7 @@ C_SOURCES = \ lp_state_clip.c \ lp_state_derived.c \ lp_state_fs.c \ + lp_state_setup.c \ lp_state_gs.c \ lp_state_rasterizer.c \ lp_state_sampler.c \ @@ -63,12 +62,12 @@ PROGS := lp_test_format \ # Need this for the lp_test_*.o files CLEAN_EXTRA = *.o +include ../../Makefile.template + lp_test_sincos.o : sse_mathfun.h PROGS_DEPS := ../../auxiliary/libgallium.a -include ../../Makefile.template - lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_pack.py ../../auxiliary/util/u_format.csv python lp_tile_soa.py ../../auxiliary/util/u_format.csv > $@ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 650435f0f19..49950153a4f 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -27,13 +27,7 @@ env.Depends('lp_tile_soa.c', [ ]) -# Only enable SSSE3 for lp_tile_soa_sse3.c -ssse3_env = env.Clone() -if env['gcc'] \ - and distutils.version.LooseVersion(env['CCVERSION']) >= distutils.version.LooseVersion('4.3') \ - and env['machine'] in ('x86', 'x86_64') : - ssse3_env.Append(CCFLAGS = ['-mssse3']) -lp_tile_soa_os = ssse3_env.SharedObject('lp_tile_soa.c') +lp_tile_soa_os = env.SharedObject('lp_tile_soa.c') llvmpipe = env.ConvenienceLibrary( @@ -64,13 +58,12 @@ llvmpipe = env.ConvenienceLibrary( 'lp_setup_line.c', 'lp_setup_point.c', 'lp_setup_tri.c', - 'lp_setup_coef.c', - 'lp_setup_coef_intrin.c', 'lp_setup_vbuf.c', 'lp_state_blend.c', 'lp_state_clip.c', 'lp_state_derived.c', 'lp_state_fs.c', + 'lp_state_setup.c', 'lp_state_gs.c', 'lp_state_rasterizer.c', 'lp_state_sampler.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c index e28efe778f9..e50643790c8 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c @@ -48,7 +48,8 @@ lp_build_alpha_test(LLVMBuilderRef builder, struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, - LLVMValueRef ref) + LLVMValueRef ref, + boolean do_branch) { struct lp_build_context bld; LLVMValueRef test; @@ -60,4 +61,7 @@ lp_build_alpha_test(LLVMBuilderRef builder, lp_build_name(test, "alpha_mask"); lp_build_mask_update(mask, test); + + if (do_branch) + lp_build_mask_check(mask); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h index 44603b418c0..27ca8aad4d4 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h @@ -48,7 +48,8 @@ lp_build_alpha_test(LLVMBuilderRef builder, struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, - LLVMValueRef ref); + LLVMValueRef ref, + boolean do_branch); #endif /* !LP_BLD_ALPHA_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 7561899a74e..7eb76d4fb31 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMware, Inc. + * Copyright 2009-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -53,15 +53,8 @@ * ... ... ... ... ... ... ... ... ... * * - * Stencil test: - * Two-sided stencil test is supported but probably not as efficient as - * it could be. Currently, we use if/then/else constructs to do the - * operations for front vs. back-facing polygons. We could probably do - * both the front and back arithmetic then use a Select() instruction to - * choose the result depending on polyon orientation. We'd have to - * measure performance both ways and see which is better. - * * @author Jose Fonseca <[email protected]> + * @author Brian Paul <[email protected]> */ #include "pipe/p_state.h" @@ -71,6 +64,7 @@ #include "gallivm/lp_bld_arit.h" #include "gallivm/lp_bld_bitarit.h" #include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_conv.h" #include "gallivm/lp_bld_logic.h" #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_intr.h" @@ -128,57 +122,32 @@ lp_build_stencil_test_single(struct lp_build_context *bld, /** * Do the one or two-sided stencil test comparison. * \sa lp_build_stencil_test_single - * \param face an integer indicating front (+) or back (-) facing polygon. - * If NULL, assume front-facing. + * \param front_facing an integer vector mask, indicating front (~0) or back + * (0) facing polygon. If NULL, assume front-facing. */ static LLVMValueRef lp_build_stencil_test(struct lp_build_context *bld, const struct pipe_stencil_state stencil[2], LLVMValueRef stencilRefs[2], LLVMValueRef stencilVals, - LLVMValueRef face) + LLVMValueRef front_facing) { LLVMValueRef res; assert(stencil[0].enabled); - if (stencil[1].enabled && face) { - /* do two-sided test */ - struct lp_build_flow_context *flow_ctx; - struct lp_build_if_state if_ctx; - LLVMValueRef front_facing; - LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); - LLVMValueRef result = bld->undef; - - flow_ctx = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx); + /* do front face test */ + res = lp_build_stencil_test_single(bld, &stencil[0], + stencilRefs[0], stencilVals); - lp_build_flow_scope_declare(flow_ctx, &result); + if (stencil[1].enabled && front_facing) { + /* do back face test */ + LLVMValueRef back_res; - /* front_facing = face > 0.0 */ - front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, ""); - - lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing); - { - result = lp_build_stencil_test_single(bld, &stencil[0], - stencilRefs[0], stencilVals); - } - lp_build_else(&if_ctx); - { - result = lp_build_stencil_test_single(bld, &stencil[1], - stencilRefs[1], stencilVals); - } - lp_build_endif(&if_ctx); + back_res = lp_build_stencil_test_single(bld, &stencil[1], + stencilRefs[1], stencilVals); - lp_build_flow_scope_end(flow_ctx); - lp_build_flow_destroy(flow_ctx); - - res = result; - } - else { - /* do single-side test */ - res = lp_build_stencil_test_single(bld, &stencil[0], - stencilRefs[0], stencilVals); + res = lp_build_select(bld, front_facing, res, back_res); } return res; @@ -195,14 +164,12 @@ lp_build_stencil_op_single(struct lp_build_context *bld, const struct pipe_stencil_state *stencil, enum stencil_op op, LLVMValueRef stencilRef, - LLVMValueRef stencilVals, - LLVMValueRef mask) + LLVMValueRef stencilVals) { - const unsigned stencilMax = 255; /* XXX fix */ struct lp_type type = bld->type; LLVMValueRef res; - LLVMValueRef max = lp_build_const_int_vec(type, stencilMax); + LLVMValueRef max = lp_build_const_int_vec(type, 0xff); unsigned stencil_op; assert(type.sign); @@ -255,19 +222,7 @@ lp_build_stencil_op_single(struct lp_build_context *bld, break; default: assert(0 && "bad stencil op mode"); - res = NULL; - } - - if (stencil->writemask != stencilMax) { - /* mask &= stencil->writemask */ - LLVMValueRef writemask = lp_build_const_int_vec(type, stencil->writemask); - mask = LLVMBuildAnd(bld->builder, mask, writemask, ""); - /* res = (res & mask) | (stencilVals & ~mask) */ - res = lp_build_select_bitwise(bld, writemask, res, stencilVals); - } - else { - /* res = mask ? res : stencilVals */ - res = lp_build_select(bld, mask, res, stencilVals); + res = bld->undef; } return res; @@ -284,49 +239,40 @@ lp_build_stencil_op(struct lp_build_context *bld, LLVMValueRef stencilRefs[2], LLVMValueRef stencilVals, LLVMValueRef mask, - LLVMValueRef face) + LLVMValueRef front_facing) { - assert(stencil[0].enabled); - - if (stencil[1].enabled && face) { - /* do two-sided op */ - struct lp_build_flow_context *flow_ctx; - struct lp_build_if_state if_ctx; - LLVMValueRef front_facing; - LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); - LLVMValueRef result = bld->undef; + LLVMValueRef res; - flow_ctx = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx); + assert(stencil[0].enabled); - lp_build_flow_scope_declare(flow_ctx, &result); + /* do front face op */ + res = lp_build_stencil_op_single(bld, &stencil[0], op, + stencilRefs[0], stencilVals); - /* front_facing = face > 0.0 */ - front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, ""); + if (stencil[1].enabled && front_facing) { + /* do back face op */ + LLVMValueRef back_res; - lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing); - { - result = lp_build_stencil_op_single(bld, &stencil[0], op, - stencilRefs[0], stencilVals, mask); - } - lp_build_else(&if_ctx); - { - result = lp_build_stencil_op_single(bld, &stencil[1], op, - stencilRefs[1], stencilVals, mask); - } - lp_build_endif(&if_ctx); + back_res = lp_build_stencil_op_single(bld, &stencil[1], op, + stencilRefs[1], stencilVals); - lp_build_flow_scope_end(flow_ctx); - lp_build_flow_destroy(flow_ctx); + res = lp_build_select(bld, front_facing, res, back_res); + } - return result; + if (stencil->writemask != 0xff) { + /* mask &= stencil->writemask */ + LLVMValueRef writemask = lp_build_const_int_vec(bld->type, stencil->writemask); + mask = LLVMBuildAnd(bld->builder, mask, writemask, ""); + /* res = (res & mask) | (stencilVals & ~mask) */ + res = lp_build_select_bitwise(bld, writemask, res, stencilVals); } else { - /* do single-sided op */ - return lp_build_stencil_op_single(bld, &stencil[0], op, - stencilRefs[0], stencilVals, mask); + /* res = mask ? res : stencilVals */ + res = lp_build_select(bld, mask, res, stencilVals); } + + return res; } @@ -358,8 +304,13 @@ lp_depth_type(const struct util_format_description *format_desc, } else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) { assert(format_desc->block.bits <= 32); - if(format_desc->channel[swizzle].normalized) - type.norm = TRUE; + assert(format_desc->channel[swizzle].normalized); + if (format_desc->channel[swizzle].size < format_desc->block.bits) { + /* Prefer signed integers when possible, as SSE has less support + * for unsigned comparison; + */ + type.sign = TRUE; + } } else assert(0); @@ -381,7 +332,7 @@ lp_depth_type(const struct util_format_description *format_desc, */ static boolean get_z_shift_and_mask(const struct util_format_description *format_desc, - unsigned *shift, unsigned *mask) + unsigned *shift, unsigned *width, unsigned *mask) { const unsigned total_bits = format_desc->block.bits; unsigned z_swizzle; @@ -397,12 +348,14 @@ get_z_shift_and_mask(const struct util_format_description *format_desc, if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) return FALSE; + *width = format_desc->channel[z_swizzle].size; + padding_right = 0; for (chan = 0; chan < z_swizzle; ++chan) padding_right += format_desc->channel[chan].size; padding_left = - total_bits - (padding_right + format_desc->channel[z_swizzle].size); + total_bits - (padding_right + *width); if (padding_left || padding_right) { unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1; @@ -413,7 +366,7 @@ get_z_shift_and_mask(const struct util_format_description *format_desc, *mask = 0xffffffff; } - *shift = padding_left; + *shift = padding_right; return TRUE; } @@ -457,7 +410,7 @@ get_s_shift_and_mask(const struct util_format_description *format_desc, * \param maskvalue is the depth test mask. * \param counter is a pointer of the uint32 counter. */ -static void +void lp_build_occlusion_count(LLVMBuilderRef builder, struct lp_type type, LLVMValueRef maskvalue, @@ -494,31 +447,57 @@ lp_build_occlusion_count(LLVMBuilderRef builder, * \param format_desc description of the depth/stencil surface * \param mask the alive/dead pixel mask for the quad (vector) * \param stencil_refs the front/back stencil ref values (scalar) - * \param z_src the incoming depth/stencil values (a 2x2 quad) + * \param z_src the incoming depth/stencil values (a 2x2 quad, float32) * \param zs_dst_ptr pointer to depth/stencil values in framebuffer - * \param facing contains float value indicating front/back facing polygon + * \param facing contains boolean value indicating front/back facing polygon */ void lp_build_depth_stencil_test(LLVMBuilderRef builder, const struct pipe_depth_state *depth, const struct pipe_stencil_state stencil[2], - struct lp_type type, + struct lp_type z_src_type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef stencil_refs[2], LLVMValueRef z_src, LLVMValueRef zs_dst_ptr, LLVMValueRef face, - LLVMValueRef counter) + LLVMValueRef *zs_value, + boolean do_branch) { - struct lp_build_context bld; - struct lp_build_context sbld; + struct lp_type z_type; + struct lp_build_context z_bld; + struct lp_build_context s_bld; struct lp_type s_type; + unsigned z_shift = 0, z_width = 0, z_mask = 0; LLVMValueRef zs_dst, z_dst = NULL; LLVMValueRef stencil_vals = NULL; LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; LLVMValueRef z_pass = NULL, s_pass_mask = NULL; - LLVMValueRef orig_mask = mask->value; + LLVMValueRef orig_mask = lp_build_mask_value(mask); + LLVMValueRef front_facing = NULL; + + + /* + * Depths are expected to be between 0 and 1, even if they are stored in + * floats. Setting these bits here will ensure that the lp_build_conv() call + * below won't try to unnecessarily clamp the incoming values. + */ + if(z_src_type.floating) { + z_src_type.sign = FALSE; + z_src_type.norm = TRUE; + } + else { + assert(!z_src_type.sign); + assert(z_src_type.norm); + } + + /* Pick the depth type. */ + z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); + + /* FIXME: Cope with a depth test type with a different bit width. */ + assert(z_type.width == z_src_type.width); + assert(z_type.length == z_src_type.length); /* Sanity checking */ { @@ -540,8 +519,8 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, } assert(z_swizzle < 4); - assert(format_desc->block.bits == type.width); - if (type.floating) { + assert(format_desc->block.bits == z_type.width); + if (z_type.floating) { assert(z_swizzle == 0); assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT); @@ -552,54 +531,56 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(format_desc->channel[z_swizzle].normalized); - assert(!type.fixed); - assert(!type.sign); - assert(type.norm); + assert(!z_type.fixed); } } /* Setup build context for Z vals */ - lp_build_context_init(&bld, builder, type); + lp_build_context_init(&z_bld, builder, z_type); /* Setup build context for stencil vals */ - s_type = lp_type_int_vec(type.width); - lp_build_context_init(&sbld, builder, s_type); + s_type = lp_type_int_vec(z_type.width); + lp_build_context_init(&s_bld, builder, s_type); /* Load current z/stencil value from z/stencil buffer */ + zs_dst_ptr = LLVMBuildBitCast(builder, + zs_dst_ptr, + LLVMPointerType(z_bld.vec_type, 0), ""); zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, ""); - lp_build_name(zs_dst, "zsbufval"); + lp_build_name(zs_dst, "zs_dst"); /* Compute and apply the Z/stencil bitmasks and shifts. */ { - unsigned z_shift, z_mask; unsigned s_shift, s_mask; - if (get_z_shift_and_mask(format_desc, &z_shift, &z_mask)) { - if (z_shift) { - LLVMValueRef shift = lp_build_const_int_vec(type, z_shift); - z_src = LLVMBuildLShr(builder, z_src, shift, ""); - } - + if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) { if (z_mask != 0xffffffff) { - LLVMValueRef mask = lp_build_const_int_vec(type, z_mask); - z_src = LLVMBuildAnd(builder, z_src, mask, ""); - z_dst = LLVMBuildAnd(builder, zs_dst, mask, ""); - z_bitmask = mask; /* used below */ + z_bitmask = lp_build_const_int_vec(z_type, z_mask); } - else { + + /* + * Align the framebuffer Z 's LSB to the right. + */ + if (z_shift) { + LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift); + z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst"); + } else if (z_bitmask) { + /* TODO: Instead of loading a mask from memory and ANDing, it's + * probably faster to just shake the bits with two shifts. */ + z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst"); + } else { z_dst = zs_dst; + lp_build_name(z_dst, "z_dst"); } - - lp_build_name(z_dst, "zsbuf.z"); } if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) { if (s_shift) { - LLVMValueRef shift = lp_build_const_int_vec(type, s_shift); + LLVMValueRef shift = lp_build_const_int_vec(s_type, s_shift); stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, ""); stencil_shift = shift; /* used below */ } @@ -608,35 +589,85 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, } if (s_mask != 0xffffffff) { - LLVMValueRef mask = lp_build_const_int_vec(type, s_mask); + LLVMValueRef mask = lp_build_const_int_vec(s_type, s_mask); stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); } - lp_build_name(stencil_vals, "stencil"); + lp_build_name(stencil_vals, "s_dst"); } } - if (stencil[0].enabled) { + + if (face) { + LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0); + + /* front_facing = face != 0 ? ~0 : 0 */ + front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, ""); + front_facing = LLVMBuildSExt(builder, front_facing, + LLVMIntType(s_bld.type.length*s_bld.type.width), + ""); + front_facing = LLVMBuildBitCast(builder, front_facing, + s_bld.int_vec_type, ""); + } + /* convert scalar stencil refs into vectors */ - stencil_refs[0] = lp_build_broadcast_scalar(&bld, stencil_refs[0]); - stencil_refs[1] = lp_build_broadcast_scalar(&bld, stencil_refs[1]); + stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]); + stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]); - s_pass_mask = lp_build_stencil_test(&sbld, stencil, - stencil_refs, stencil_vals, face); + s_pass_mask = lp_build_stencil_test(&s_bld, stencil, + stencil_refs, stencil_vals, + front_facing); /* apply stencil-fail operator */ { - LLVMValueRef s_fail_mask = lp_build_andnot(&bld, orig_mask, s_pass_mask); - stencil_vals = lp_build_stencil_op(&sbld, stencil, S_FAIL_OP, + LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP, stencil_refs, stencil_vals, - s_fail_mask, face); + s_fail_mask, front_facing); } } if (depth->enabled) { + /* + * Convert fragment Z to the desired type, aligning the LSB to the right. + */ + + assert(z_type.width == z_src_type.width); + assert(z_type.length == z_src_type.length); + assert(lp_check_value(z_src_type, z_src)); + if (z_src_type.floating) { + /* + * Convert from floating point values + */ + + if (!z_type.floating) { + z_src = lp_build_clamped_float_to_unsigned_norm(builder, + z_src_type, + z_width, + z_src); + } + } else { + /* + * Convert from unsigned normalized values. + */ + + assert(!z_src_type.sign); + assert(!z_src_type.fixed); + assert(z_src_type.norm); + assert(!z_type.floating); + if (z_src_type.width > z_width) { + LLVMValueRef shift = lp_build_const_int_vec(z_src_type, + z_src_type.width - z_width); + z_src = LLVMBuildLShr(builder, z_src, shift, ""); + } + } + assert(lp_check_value(z_type, z_src)); + + lp_build_name(z_src, "z_src"); + /* compare src Z to dst Z, returning 'pass' mask */ - z_pass = lp_build_cmp(&bld, depth->func, z_src, z_dst); + z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst); if (!stencil[0].enabled) { /* We can potentially skip all remaining operations here, but only @@ -644,28 +675,28 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, * buffer values. Don't need to update Z buffer values. */ lp_build_mask_update(mask, z_pass); + + if (do_branch) { + lp_build_mask_check(mask); + do_branch = FALSE; + } } if (depth->writemask) { - LLVMValueRef zselectmask = mask->value; + LLVMValueRef zselectmask; /* mask off bits that failed Z test */ - zselectmask = LLVMBuildAnd(builder, zselectmask, z_pass, ""); + zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, ""); /* mask off bits that failed stencil test */ if (s_pass_mask) { zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, ""); } - /* if combined Z/stencil format, mask off the stencil bits */ - if (z_bitmask) { - zselectmask = LLVMBuildAnd(builder, zselectmask, z_bitmask, ""); - } - /* Mix the old and new Z buffer values. - * z_dst[i] = (zselectmask[i] & z_src[i]) | (~zselectmask[i] & z_dst[i]) + * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i] */ - z_dst = lp_build_select_bitwise(&bld, zselectmask, z_src, z_dst); + z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst); } if (stencil[0].enabled) { @@ -673,33 +704,35 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef z_fail_mask, z_pass_mask; /* apply Z-fail operator */ - z_fail_mask = lp_build_andnot(&bld, orig_mask, z_pass); - stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_FAIL_OP, + z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP, stencil_refs, stencil_vals, - z_fail_mask, face); + z_fail_mask, front_facing); /* apply Z-pass operator */ - z_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, z_pass, ""); - stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, + z_pass_mask = LLVMBuildAnd(z_bld.builder, orig_mask, z_pass, ""); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, - z_pass_mask, face); + z_pass_mask, front_facing); } } else { /* No depth test: apply Z-pass operator to stencil buffer values which * passed the stencil test. */ - s_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, s_pass_mask, ""); - stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, + s_pass_mask = LLVMBuildAnd(s_bld.builder, orig_mask, s_pass_mask, ""); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, - s_pass_mask, face); + s_pass_mask, front_facing); } - /* The Z bits are already in the right place but we may need to shift the - * stencil bits before ORing Z with Stencil to make the final pixel value. - */ + /* Put Z and ztencil bits in the right place */ + if (z_dst && z_shift) { + LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift); + z_dst = LLVMBuildShl(builder, z_dst, shift, ""); + } if (stencil_vals && stencil_shift) - stencil_vals = LLVMBuildShl(bld.builder, stencil_vals, + stencil_vals = LLVMBuildShl(s_bld.builder, stencil_vals, stencil_shift, ""); /* Finally, merge/store the z/stencil values */ @@ -707,13 +740,13 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, (stencil[0].enabled && stencil[0].writemask)) { if (z_dst && stencil_vals) - zs_dst = LLVMBuildOr(bld.builder, z_dst, stencil_vals, ""); + zs_dst = LLVMBuildOr(z_bld.builder, z_dst, stencil_vals, ""); else if (z_dst) zs_dst = z_dst; else zs_dst = stencil_vals; - LLVMBuildStore(builder, zs_dst, zs_dst_ptr); + *zs_value = zs_dst; } if (s_pass_mask) @@ -722,6 +755,47 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, if (depth->enabled && stencil[0].enabled) lp_build_mask_update(mask, z_pass); - if (counter) - lp_build_occlusion_count(builder, type, mask->value, counter); + if (do_branch) + lp_build_mask_check(mask); + +} + + +void +lp_build_depth_write(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value) +{ + zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, + LLVMPointerType(LLVMTypeOf(zs_value), 0), ""); + + LLVMBuildStore(builder, zs_value, zs_dst_ptr); +} + + +void +lp_build_deferred_depth_write(LLVMBuilderRef builder, + struct lp_type z_src_type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value) +{ + struct lp_type z_type; + struct lp_build_context z_bld; + LLVMValueRef z_dst; + + /* XXX: pointlessly redo type logic: + */ + z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); + lp_build_context_init(&z_bld, builder, z_type); + + zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, + LLVMPointerType(z_bld.vec_type, 0), ""); + + z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval"); + z_dst = lp_build_select(&z_bld, lp_build_mask_value(mask), zs_value, z_dst); + + LLVMBuildStore(builder, z_dst, zs_dst_ptr); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h index e257a5bd7d0..a54ef3a711e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h @@ -61,7 +61,27 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef zs_src, LLVMValueRef zs_dst_ptr, LLVMValueRef facing, - LLVMValueRef counter); + LLVMValueRef *zs_value, + boolean do_branch); +void +lp_build_depth_write(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value); + +void +lp_build_deferred_depth_write(LLVMBuilderRef builder, + struct lp_type z_src_type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value); + +void +lp_build_occlusion_count(LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef maskvalue, + LLVMValueRef counter); #endif /* !LP_BLD_DEPTH_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index 2a374f8c390..c9da8900d0c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -206,7 +206,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld, dadq2 = LLVMBuildFAdd(builder, dadq, dadq, ""); /* - * a = a0 + x * dadx + y * dady + * a = a0 + (x * dadx + y * dady) */ if (attrib == 0 && chan == 0) { @@ -219,11 +219,11 @@ coeffs_init(struct lp_build_interp_soa_context *bld, a = a0; if (interp != LP_INTERP_CONSTANT && interp != LP_INTERP_FACING) { - LLVMValueRef tmp; - tmp = LLVMBuildFMul(builder, bld->x, dadx, ""); - a = LLVMBuildFAdd(builder, a, tmp, ""); - tmp = LLVMBuildFMul(builder, bld->y, dady, ""); - a = LLVMBuildFAdd(builder, a, tmp, ""); + LLVMValueRef ax, ay, axy; + ax = LLVMBuildFMul(builder, bld->x, dadx, ""); + ay = LLVMBuildFMul(builder, bld->y, dady, ""); + axy = LLVMBuildFAdd(builder, ax, ay, ""); + a = LLVMBuildFAdd(builder, a, axy, ""); } } @@ -272,7 +272,10 @@ coeffs_init(struct lp_build_interp_soa_context *bld, * This is called when we move from one quad to the next. */ static void -attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) +attribs_update(struct lp_build_interp_soa_context *bld, + int quad_index, + int start, + int end) { struct lp_build_context *coeff_bld = &bld->coeff_bld; LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index); @@ -282,7 +285,7 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) assert(quad_index < 4); - for(attrib = 0; attrib < bld->num_attribs; ++attrib) { + for(attrib = start; attrib < end; ++attrib) { const unsigned mask = bld->mask[attrib]; const unsigned interp = bld->interp[attrib]; for(chan = 0; chan < NUM_CHANNELS; ++chan) { @@ -350,6 +353,14 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) } #endif + if (attrib == 0 && chan == 2) { + /* FIXME: Depth values can exceed 1.0, due to the fact that + * setup interpolation coefficients refer to (0,0) which causes + * precision loss. So we must clamp to 1.0 here to avoid artifacts + */ + a = lp_build_min(coeff_bld, a, coeff_bld->one); + } + attrib_name(a, attrib, chan, ""); } bld->attribs[attrib][chan] = a; @@ -434,8 +445,6 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, pos_init(bld, x0, y0); coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr); - - attribs_update(bld, 0); } @@ -443,10 +452,20 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, * Advance the position and inputs to the given quad within the block. */ void -lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, - int quad_index) +lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld, + int quad_index) +{ + assert(quad_index < 4); + + attribs_update(bld, quad_index, 1, bld->num_attribs); +} + +void +lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld, + int quad_index) { assert(quad_index < 4); - attribs_update(bld, quad_index); + attribs_update(bld, quad_index, 0, 1); } + diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index 3054030f739..a7ebdd1bfa2 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -46,7 +46,31 @@ #include "tgsi/tgsi_exec.h" -#include "lp_setup.h" +/** + * Describes how to compute the interpolation coefficients (a0, dadx, dady) + * from the vertices passed into our triangle/line/point functions by the + * draw module. + * + * Vertices are treated as an array of float[4] values, indexed by + * src_index. + * + * LP_INTERP_COLOR is translated to either LP_INTERP_CONSTANT or + * LINEAR depending on flatshade state. + */ +enum lp_interp { + LP_INTERP_CONSTANT, + LP_INTERP_COLOR, + LP_INTERP_LINEAR, + LP_INTERP_PERSPECTIVE, + LP_INTERP_POSITION, + LP_INTERP_FACING +}; + +struct lp_shader_input { + ushort interp:4; /* enum lp_interp */ + ushort usage_mask:4; /* bitmask of TGSI_WRITEMASK_x flags */ + ushort src_index:8; /* where to find values in incoming vertices */ +}; struct lp_build_interp_soa_context @@ -89,7 +113,11 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, LLVMValueRef y); void -lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, +lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld, + int quad_index); + +void +lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld, int quad_index); diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 39f2c6085ef..763432ed712 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -82,6 +82,8 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) } } + lp_delete_setup_variants(llvmpipe); + align_free( llvmpipe ); } @@ -108,6 +110,7 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) memset(llvmpipe, 0, sizeof *llvmpipe); make_empty_list(&llvmpipe->fs_variants_list); + make_empty_list(&llvmpipe->setup_variants_list); llvmpipe->pipe.winsys = screen->winsys; llvmpipe->pipe.screen = screen; diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 34fa20e204a..db09c95b272 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -39,6 +39,7 @@ #include "lp_jit.h" #include "lp_setup.h" #include "lp_state_fs.h" +#include "lp_state_setup.h" struct llvmpipe_vbuf_render; @@ -48,6 +49,7 @@ struct lp_fragment_shader; struct lp_vertex_shader; struct lp_blend_state; struct lp_setup_context; +struct lp_setup_variant; struct lp_velems_state; struct llvmpipe_context { @@ -105,12 +107,9 @@ struct llvmpipe_context { /** Which vertex shader output slot contains point size */ int psize_slot; - /** Fragment shader input interpolation info */ - unsigned num_inputs; - struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; - /** The tiling engine */ struct lp_setup_context *setup; + struct lp_setup_variant setup_variant; /** The primitive drawing context */ struct draw_context *draw; @@ -120,6 +119,9 @@ struct llvmpipe_context { struct lp_fs_variant_list_item fs_variants_list; unsigned nr_fs_variants; + + struct lp_setup_variant_list_item setup_variants_list; + unsigned nr_setup_variants; }; diff --git a/src/gallium/drivers/llvmpipe/lp_flush.h b/src/gallium/drivers/llvmpipe/lp_flush.h index bb538b2bd83..3626ce4a86c 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.h +++ b/src/gallium/drivers/llvmpipe/lp_flush.h @@ -32,6 +32,7 @@ struct pipe_context; struct pipe_fence_handle; +struct pipe_resource; void llvmpipe_flush(struct pipe_context *pipe, diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 04b12dedccf..c540f9b3628 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -36,7 +36,6 @@ #include <llvm-c/Transforms/Scalar.h> #include "util/u_memory.h" -#include "util/u_cpu_detect.h" #include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_debug.h" #include "lp_screen.h" @@ -162,9 +161,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) void lp_jit_screen_cleanup(struct llvmpipe_screen *screen) { - if(screen->engine) - LLVMDisposeExecutionEngine(screen->engine); - if(screen->pass) LLVMDisposePassManager(screen->pass); } @@ -190,13 +186,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) LLVMAddCFGSimplificationPass(screen->pass); LLVMAddPromoteMemoryToRegisterPass(screen->pass); LLVMAddConstantPropagationPass(screen->pass); - if(util_cpu_caps.has_sse4_1) { - /* FIXME: There is a bug in this pass, whereby the combination of fptosi - * and sitofp (necessary for trunc/floor/ceil/round implementation) - * somehow becomes invalid code. - */ - LLVMAddInstructionCombiningPass(screen->pass); - } + LLVMAddInstructionCombiningPass(screen->pass); LLVMAddGVNPass(screen->pass); } else { /* We need at least this pass to prevent the backends to fail in diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 16e04fce0cd..114f21f2d16 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -144,7 +144,7 @@ typedef void (*lp_jit_frag_func)(const struct lp_jit_context *context, uint32_t x, uint32_t y, - float facing, + uint32_t facing, const void *a0, const void *dadx, const void *dady, diff --git a/src/gallium/drivers/llvmpipe/lp_limits.h b/src/gallium/drivers/llvmpipe/lp_limits.h index d1c431475d8..2538164ffaa 100644 --- a/src/gallium/drivers/llvmpipe/lp_limits.h +++ b/src/gallium/drivers/llvmpipe/lp_limits.h @@ -72,4 +72,14 @@ */ #define LP_MAX_SHADER_VARIANTS 1024 +/** + * Max number of setup variants that will be kept around. + * + * These are determined by the combination of the fragment shader + * input signature and a small amount of rasterization state (eg + * flatshading). It is likely that many active fragment shaders will + * share the same setup variant. + */ +#define LP_MAX_SETUP_VARIANTS 64 + #endif /* LP_LIMITS_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index d7e6415e139..d358a983943 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -211,8 +211,8 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { const struct lp_scene *scene = task->scene; - unsigned clear_value = arg.clear_zstencil.value; - unsigned clear_mask = arg.clear_zstencil.mask; + uint32_t clear_value = arg.clear_zstencil.value; + uint32_t clear_mask = arg.clear_zstencil.mask; const unsigned height = TILE_SIZE / TILE_VECTOR_HEIGHT; const unsigned width = TILE_SIZE * TILE_VECTOR_HEIGHT; const unsigned block_size = scene->zsbuf.blocksize; @@ -220,7 +220,8 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, uint8_t *dst; unsigned i, j; - LP_DBG(DEBUG_RAST, "%s 0x%x%x\n", __FUNCTION__, clear_value, clear_mask); + LP_DBG(DEBUG_RAST, "%s: value=0x%08x, mask=0x%08x\n", + __FUNCTION__, clear_value, clear_mask); /* * Clear the aera of the swizzled depth/depth buffer matching this tile, in @@ -232,16 +233,31 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, dst = task->depth_tile; + clear_value &= clear_mask; + switch (block_size) { case 1: + assert(clear_mask == 0xff); memset(dst, (uint8_t) clear_value, height * width); break; case 2: - for (i = 0; i < height; i++) { - uint16_t *row = (uint16_t *)dst; - for (j = 0; j < width; j++) - *row++ = (uint16_t) clear_value; - dst += dst_stride; + if (clear_mask == 0xffff) { + for (i = 0; i < height; i++) { + uint16_t *row = (uint16_t *)dst; + for (j = 0; j < width; j++) + *row++ = (uint16_t) clear_value; + dst += dst_stride; + } + } + else { + for (i = 0; i < height; i++) { + uint16_t *row = (uint16_t *)dst; + for (j = 0; j < width; j++) { + uint16_t tmp = ~clear_mask & *row; + *row++ = clear_value | tmp; + } + dst += dst_stride; + } } break; case 4: @@ -258,7 +274,7 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, uint32_t *row = (uint32_t *)dst; for (j = 0; j < width; j++) { uint32_t tmp = ~clear_mask & *row; - *row++ = (clear_value & clear_mask) | tmp; + *row++ = clear_value | tmp; } dst += dst_stride; } @@ -318,7 +334,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, { const struct lp_scene *scene = task->scene; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; - const struct lp_rast_state *state = inputs->state; + const struct lp_rast_state *state = task->state; struct lp_fragment_shader_variant *variant = state->variant; const unsigned tile_x = task->x, tile_y = task->y; unsigned x, y; @@ -349,10 +365,10 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, BEGIN_JIT_CALL(state); variant->jit_function[RAST_WHOLE]( &state->jit_context, tile_x + x, tile_y + y, - inputs->facing, - inputs->a0, - inputs->dadx, - inputs->dady, + inputs->frontfacing, + GET_A0(inputs), + GET_DADX(inputs), + GET_DADY(inputs), color, depth, 0xffff, @@ -398,7 +414,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, unsigned x, unsigned y, unsigned mask) { - const struct lp_rast_state *state = inputs->state; + const struct lp_rast_state *state = task->state; struct lp_fragment_shader_variant *variant = state->variant; const struct lp_scene *scene = task->scene; uint8_t *color[PIPE_MAX_COLOR_BUFS]; @@ -430,10 +446,10 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, BEGIN_JIT_CALL(state); variant->jit_function[RAST_EDGE_TEST](&state->jit_context, x, y, - inputs->facing, - inputs->a0, - inputs->dadx, - inputs->dady, + inputs->frontfacing, + GET_A0(inputs), + GET_DADX(inputs), + GET_DADY(inputs), color, depth, mask, @@ -474,6 +490,14 @@ lp_rast_end_query(struct lp_rasterizer_task *task, } +void +lp_rast_set_state(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + task->state = arg.state; +} + + /** * Set top row and left column of the tile's pixels to white. For debugging. @@ -581,10 +605,12 @@ static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] = lp_rast_triangle_8, lp_rast_triangle_3_4, lp_rast_triangle_3_16, + lp_rast_triangle_4_16, lp_rast_shade_tile, lp_rast_shade_tile_opaque, lp_rast_begin_query, lp_rast_end_query, + lp_rast_set_state, }; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index c55b97a9d13..a64c152cf83 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -78,30 +78,28 @@ struct lp_rast_state { * These pointers point into the bin data buffer. */ struct lp_rast_shader_inputs { - float facing; /** Positive for front-facing, negative for back-facing */ - unsigned disable:1; /** Partially binned, disable this command */ - unsigned opaque:1; /** Is opaque */ - - float (*a0)[4]; - float (*dadx)[4]; - float (*dady)[4]; - - const struct lp_rast_state *state; + unsigned frontfacing:1; /** True for front-facing */ + unsigned disable:1; /** Partially binned, disable this command */ + unsigned opaque:1; /** Is opaque */ + unsigned pad0:29; /* wasted space */ + unsigned stride; /* how much to advance data between a0, dadx, dady */ + unsigned pad2; /* wasted space */ + unsigned pad3; /* wasted space */ + /* followed by a0, dadx, dady and planes[] */ }; - +/* Note: the order of these values is important as they are loaded by + * sse code in rasterization: + */ struct lp_rast_plane { - /* one-pixel sized trivial accept offsets for each plane */ - int ei; - - /* one-pixel sized trivial reject offsets for each plane */ - int eo; - /* edge function values at minx,miny ?? */ int c; int dcdx; int dcdy; + + /* one-pixel sized trivial reject offsets for each plane */ + int eo; }; /** @@ -111,17 +109,24 @@ struct lp_rast_plane { * Objects of this type are put into the lp_setup_context::data buffer. */ struct lp_rast_triangle { - /* inputs for the shader */ - struct lp_rast_shader_inputs inputs; - #ifdef DEBUG float v[3][2]; + float pad0; + float pad1; #endif - struct lp_rast_plane plane[8]; /* NOTE: may allocate fewer planes */ + /* inputs for the shader */ + struct lp_rast_shader_inputs inputs; + /* planes are also allocated here */ }; +#define GET_A0(inputs) ((float (*)[4])((inputs)+1)) +#define GET_DADX(inputs) ((float (*)[4])((char *)((inputs) + 1) + (inputs)->stride)) +#define GET_DADY(inputs) ((float (*)[4])((char *)((inputs) + 1) + 2 * (inputs)->stride)) +#define GET_PLANES(tri) ((struct lp_rast_plane *)((char *)(&(tri)->inputs + 1) + 3 * (tri)->inputs.stride)) + + struct lp_rasterizer * lp_rast_create( unsigned num_threads ); @@ -149,9 +154,10 @@ union lp_rast_cmd_arg { const struct lp_rast_state *set_state; uint8_t clear_color[4]; struct { - unsigned value; - unsigned mask; + uint32_t value; + uint32_t mask; } clear_zstencil; + const struct lp_rast_state *state; struct lp_fence *fence; struct llvmpipe_query *query_obj; }; @@ -238,12 +244,14 @@ lp_rast_arg_null( void ) #define LP_RAST_OP_TRIANGLE_8 0x9 #define LP_RAST_OP_TRIANGLE_3_4 0xa #define LP_RAST_OP_TRIANGLE_3_16 0xb -#define LP_RAST_OP_SHADE_TILE 0xc -#define LP_RAST_OP_SHADE_TILE_OPAQUE 0xd -#define LP_RAST_OP_BEGIN_QUERY 0xe -#define LP_RAST_OP_END_QUERY 0xf - -#define LP_RAST_OP_MAX 0x10 +#define LP_RAST_OP_TRIANGLE_4_16 0xc +#define LP_RAST_OP_SHADE_TILE 0xd +#define LP_RAST_OP_SHADE_TILE_OPAQUE 0xe +#define LP_RAST_OP_BEGIN_QUERY 0xf +#define LP_RAST_OP_END_QUERY 0x10 +#define LP_RAST_OP_SET_STATE 0x11 + +#define LP_RAST_OP_MAX 0x12 #define LP_RAST_OP_MASK 0xff void diff --git a/src/gallium/drivers/llvmpipe/lp_rast_debug.c b/src/gallium/drivers/llvmpipe/lp_rast_debug.c index 9fc78645a3a..64ac616f629 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_debug.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_debug.c @@ -12,6 +12,7 @@ static INLINE int u_bit_scan(unsigned *mask) struct tile { int coverage; int overdraw; + const struct lp_rast_state *state; char data[TILE_SIZE][TILE_SIZE]; }; @@ -42,10 +43,12 @@ static const char *cmd_names[LP_RAST_OP_MAX] = "triangle_8", "triangle_3_4", "triangle_3_16", + "triangle_4_16", "shade_tile", "shade_tile_opaque", "begin_query", "end_query", + "set_state", }; static const char *cmd_name(unsigned cmd) @@ -55,31 +58,31 @@ static const char *cmd_name(unsigned cmd) } static const struct lp_fragment_shader_variant * -get_variant( const struct cmd_block *block, - int k ) +get_variant( const struct lp_rast_state *state, + const struct cmd_block *block, + int k ) { if (block->cmd[k] == LP_RAST_OP_SHADE_TILE || - block->cmd[k] == LP_RAST_OP_SHADE_TILE_OPAQUE) - return block->arg[k].shade_tile->state->variant; - - if (block->cmd[k] == LP_RAST_OP_TRIANGLE_1 || + block->cmd[k] == LP_RAST_OP_SHADE_TILE_OPAQUE || + block->cmd[k] == LP_RAST_OP_TRIANGLE_1 || block->cmd[k] == LP_RAST_OP_TRIANGLE_2 || block->cmd[k] == LP_RAST_OP_TRIANGLE_3 || block->cmd[k] == LP_RAST_OP_TRIANGLE_4 || block->cmd[k] == LP_RAST_OP_TRIANGLE_5 || block->cmd[k] == LP_RAST_OP_TRIANGLE_6 || block->cmd[k] == LP_RAST_OP_TRIANGLE_7) - return block->arg[k].triangle.tri->inputs.state->variant; + return state->variant; return NULL; } static boolean -is_blend( const struct cmd_block *block, +is_blend( const struct lp_rast_state *state, + const struct cmd_block *block, int k ) { - const struct lp_fragment_shader_variant *variant = get_variant(block, k); + const struct lp_fragment_shader_variant *variant = get_variant(state, block, k); if (variant) return variant->key.blend.rt[0].blend_enable; @@ -92,6 +95,7 @@ is_blend( const struct cmd_block *block, static void debug_bin( const struct cmd_bin *bin ) { + const struct lp_rast_state *state = NULL; const struct cmd_block *head = bin->head; int i, j = 0; @@ -99,9 +103,12 @@ debug_bin( const struct cmd_bin *bin ) while (head) { for (i = 0; i < head->count; i++, j++) { + if (head->cmd[i] == LP_RAST_OP_SET_STATE) + state = head->arg[i].state; + debug_printf("%d: %s %s\n", j, cmd_name(head->cmd[i]), - is_blend(head, i) ? "blended" : ""); + is_blend(state, head, i) ? "blended" : ""); } head = head->next; } @@ -133,7 +140,7 @@ debug_shade_tile(int x, int y, char val) { const struct lp_rast_shader_inputs *inputs = arg.shade_tile; - boolean blend = inputs->state->variant->key.blend.rt[0].blend_enable; + boolean blend = tile->state->variant->key.blend.rt[0].blend_enable; unsigned i,j; if (inputs->disable) @@ -171,11 +178,12 @@ debug_triangle(int tilex, int tiley, { const struct lp_rast_triangle *tri = arg.triangle.tri; unsigned plane_mask = arg.triangle.plane_mask; + const struct lp_rast_plane *tri_plane = GET_PLANES(tri); struct lp_rast_plane plane[8]; int x, y; int count = 0; unsigned i, nr_planes = 0; - boolean blend = tri->inputs.state->variant->key.blend.rt[0].blend_enable; + boolean blend = tile->state->variant->key.blend.rt[0].blend_enable; if (tri->inputs.disable) { /* This triangle was partially binned and has been disabled */ @@ -183,7 +191,7 @@ debug_triangle(int tilex, int tiley, } while (plane_mask) { - plane[nr_planes] = tri->plane[u_bit_scan(&plane_mask)]; + plane[nr_planes] = tri_plane[u_bit_scan(&plane_mask)]; plane[nr_planes].c = (plane[nr_planes].c + plane[nr_planes].dcdy * tiley - plane[nr_planes].dcdx * tilex); @@ -232,15 +240,19 @@ do_debug_bin( struct tile *tile, memset(tile->data, ' ', sizeof tile->data); tile->coverage = 0; tile->overdraw = 0; + tile->state = NULL; for (block = bin->head; block; block = block->next) { for (k = 0; k < block->count; k++, j++) { - boolean blend = is_blend(block, k); + boolean blend = is_blend(tile->state, block, k); char val = get_label(j); int count = 0; if (print_cmds) debug_printf("%c: %15s", val, cmd_name(block->cmd[k])); + + if (block->cmd[k] == LP_RAST_OP_SET_STATE) + tile->state = block->arg[k].state; if (block->cmd[k] == LP_RAST_OP_CLEAR_COLOR || block->cmd[k] == LP_RAST_OP_CLEAR_ZSTENCIL) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 7370119e966..b30408f097b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -77,6 +77,7 @@ struct cmd_bin; struct lp_rasterizer_task { const struct cmd_bin *bin; + const struct lp_rast_state *state; struct lp_scene *scene; unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */ @@ -244,7 +245,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, unsigned x, unsigned y ) { const struct lp_scene *scene = task->scene; - const struct lp_rast_state *state = inputs->state; + const struct lp_rast_state *state = task->state; struct lp_fragment_shader_variant *variant = state->variant; uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; @@ -260,10 +261,10 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, BEGIN_JIT_CALL(state); variant->jit_function[RAST_WHOLE]( &state->jit_context, x, y, - inputs->facing, - inputs->a0, - inputs->dadx, - inputs->dady, + inputs->frontfacing, + GET_A0(inputs), + GET_DADX(inputs), + GET_DADY(inputs), color, depth, 0xffff, @@ -293,6 +294,14 @@ void lp_rast_triangle_3_4(struct lp_rasterizer_task *, void lp_rast_triangle_3_16( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); + +void lp_rast_triangle_4_16( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + +void +lp_rast_set_state(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg); + void lp_debug_bin( const struct cmd_bin *bin ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index a1f309d4b01..042c315635e 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -123,6 +123,16 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task, } void +lp_rast_triangle_4_16(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + union lp_rast_cmd_arg arg2; + arg2.triangle.tri = arg.triangle.tri; + arg2.triangle.plane_mask = (1<<4)-1; + lp_rast_triangle_3(task, arg2); +} + +void lp_rast_triangle_3_4(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { @@ -230,144 +240,207 @@ sign_bits4(const __m128i *cstep, int cdiff) } -/* Special case for 3 plane triangle which is contained entirely - * within a 16x16 block. - */ +#define NR_PLANES 3 + + + + + + + void lp_rast_triangle_3_16(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { const struct lp_rast_triangle *tri = arg.triangle.tri; - const struct lp_rast_plane *plane = tri->plane; - unsigned mask = arg.triangle.plane_mask; - const int x = task->x + (mask & 0xff); - const int y = task->y + (mask >> 8); - unsigned outmask, inmask, partmask, partial_mask; - unsigned j; - __m128i cstep4[3][4]; - - outmask = 0; /* outside one or more trivial reject planes */ - partmask = 0; /* outside one or more trivial accept planes */ - - for (j = 0; j < 3; j++) { - const int dcdx = -plane[j].dcdx * 4; - const int dcdy = plane[j].dcdy * 4; - __m128i xdcdy = _mm_set1_epi32(dcdy); - - cstep4[j][0] = _mm_setr_epi32(0, dcdx, dcdx*2, dcdx*3); - cstep4[j][1] = _mm_add_epi32(cstep4[j][0], xdcdy); - cstep4[j][2] = _mm_add_epi32(cstep4[j][1], xdcdy); - cstep4[j][3] = _mm_add_epi32(cstep4[j][2], xdcdy); - - { - const int c = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; - const int cox = plane[j].eo * 4; - const int cio = plane[j].ei * 4 - 1; - - outmask |= sign_bits4(cstep4[j], c + cox); - partmask |= sign_bits4(cstep4[j], c + cio); - } - } + const struct lp_rast_plane *plane = GET_PLANES(tri); + int x = (arg.triangle.plane_mask & 0xff) + task->x; + int y = (arg.triangle.plane_mask >> 8) + task->y; + unsigned i, j; + + struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16]; + unsigned nr = 0; + + __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* c, dcdx, dcdy, eo */ + __m128i p1 = _mm_load_si128((__m128i *)&plane[1]); /* c, dcdx, dcdy, eo */ + __m128i p2 = _mm_load_si128((__m128i *)&plane[2]); /* c, dcdx, dcdy, eo */ + __m128i zero = _mm_setzero_si128(); + + __m128i c; + __m128i dcdx; + __m128i dcdy; + __m128i rej4; + + __m128i dcdx2; + __m128i dcdx3; + + __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */ + __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */ + __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */ + __m128i unused; + + transpose4_epi32(&p0, &p1, &p2, &zero, + &c, &dcdx, &dcdy, &rej4); + + /* Adjust dcdx; + */ + dcdx = _mm_sub_epi32(zero, dcdx); - if (outmask == 0xffff) - return; + c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x))); + c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y))); + rej4 = _mm_slli_epi32(rej4, 2); - /* Mask of sub-blocks which are inside all trivial accept planes: - */ - inmask = ~partmask & 0xffff; + dcdx2 = _mm_add_epi32(dcdx, dcdx); + dcdx3 = _mm_add_epi32(dcdx2, dcdx); - /* Mask of sub-blocks which are inside all trivial reject planes, - * but outside at least one trivial accept plane: - */ - partial_mask = partmask & ~outmask; + transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3, + &span_0, &span_1, &span_2, &unused); - assert((partial_mask & inmask) == 0); + for (i = 0; i < 4; i++) { + __m128i cx = c; - /* Iterate over partials: - */ - while (partial_mask) { - int i = ffs(partial_mask) - 1; - int ix = (i & 3) * 4; - int iy = (i >> 2) * 4; - int px = x + ix; - int py = y + iy; - unsigned mask = 0xffff; - - partial_mask &= ~(1 << i); - - for (j = 0; j < 3; j++) { - const int cx = (plane[j].c - - plane[j].dcdx * px - + plane[j].dcdy * py) * 4; - - mask &= ~sign_bits4(cstep4[j], cx); - } + for (j = 0; j < 4; j++) { + __m128i c4rej = _mm_add_epi32(cx, rej4); + __m128i rej_masks = _mm_srai_epi32(c4rej, 31); - if (mask) - lp_rast_shade_quads_mask(task, &tri->inputs, px, py, mask); - } + /* if (is_zero(rej_masks)) */ + if (_mm_movemask_epi8(rej_masks) == 0) { + __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(cx, 0), span_0); + __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(cx, 1), span_1); + __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(cx, 2), span_2); - /* Iterate over fulls: - */ - while (inmask) { - int i = ffs(inmask) - 1; - int ix = (i & 3) * 4; - int iy = (i >> 2) * 4; - int px = x + ix; - int py = y + iy; + __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0); + + __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0)); + __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1)); + __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2)); + + __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1); + __m128i c_01 = _mm_packs_epi32(c_0, c_1); + + __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0)); + __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1)); + __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2)); + + __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2); - inmask &= ~(1 << i); + __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0)); + __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1)); + __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2)); - block_full_4(task, tri, px, py); + __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3); + __m128i c_23 = _mm_packs_epi32(c_2, c_3); + __m128i c_0123 = _mm_packs_epi16(c_01, c_23); + + unsigned mask = _mm_movemask_epi8(c_0123); + + out[nr].i = i; + out[nr].j = j; + out[nr].mask = mask; + if (mask != 0xffff) + nr++; + } + cx = _mm_add_epi32(cx, _mm_slli_epi32(dcdx, 2)); + } + + c = _mm_add_epi32(c, _mm_slli_epi32(dcdy, 2)); } + + for (i = 0; i < nr; i++) + lp_rast_shade_quads_mask(task, + &tri->inputs, + x + 4 * out[i].j, + y + 4 * out[i].i, + 0xffff & ~out[i].mask); } + + + void lp_rast_triangle_3_4(struct lp_rasterizer_task *task, - const union lp_rast_cmd_arg arg) + const union lp_rast_cmd_arg arg) { const struct lp_rast_triangle *tri = arg.triangle.tri; - const struct lp_rast_plane *plane = tri->plane; - unsigned mask = arg.triangle.plane_mask; - const int x = task->x + (mask & 0xff); - const int y = task->y + (mask >> 8); - unsigned j; - - /* Iterate over partials: + const struct lp_rast_plane *plane = GET_PLANES(tri); + int x = (arg.triangle.plane_mask & 0xff) + task->x; + int y = (arg.triangle.plane_mask >> 8) + task->y; + + __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* c, dcdx, dcdy, eo */ + __m128i p1 = _mm_load_si128((__m128i *)&plane[1]); /* c, dcdx, dcdy, eo */ + __m128i p2 = _mm_load_si128((__m128i *)&plane[2]); /* c, dcdx, dcdy, eo */ + __m128i zero = _mm_setzero_si128(); + + __m128i c; + __m128i dcdx; + __m128i dcdy; + + __m128i dcdx2; + __m128i dcdx3; + + __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */ + __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */ + __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */ + __m128i unused; + + transpose4_epi32(&p0, &p1, &p2, &zero, + &c, &dcdx, &dcdy, &unused); + + /* Adjust dcdx; */ + dcdx = _mm_sub_epi32(zero, dcdx); + + c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x))); + c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y))); + + dcdx2 = _mm_add_epi32(dcdx, dcdx); + dcdx3 = _mm_add_epi32(dcdx2, dcdx); + + transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3, + &span_0, &span_1, &span_2, &unused); + + { - unsigned mask = 0xffff; + __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(c, 0), span_0); + __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(c, 1), span_1); + __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(c, 2), span_2); + + __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0); - for (j = 0; j < 3; j++) { - const int cx = (plane[j].c - - plane[j].dcdx * x - + plane[j].dcdy * y); + __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0)); + __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1)); + __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2)); - const int dcdx = -plane[j].dcdx; - const int dcdy = plane[j].dcdy; - __m128i xdcdy = _mm_set1_epi32(dcdy); + __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1); + __m128i c_01 = _mm_packs_epi32(c_0, c_1); - __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3); - __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy); - __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy); - __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy); + __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0)); + __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1)); + __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2)); - __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1); - __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3); - __m128i result = _mm_packs_epi16(cstep01, cstep23); + __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2); - /* Extract the sign bits - */ - mask &= ~_mm_movemask_epi8(result); - } + __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0)); + __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1)); + __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2)); - if (mask) - lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask); + __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3); + __m128i c_23 = _mm_packs_epi32(c_2, c_3); + __m128i c_0123 = _mm_packs_epi16(c_01, c_23); + + unsigned mask = _mm_movemask_epi8(c_0123); + + if (mask != 0xffff) + lp_rast_shade_quads_mask(task, + &tri->inputs, + x, + y, + 0xffff & ~mask); } } - +#undef NR_PLANES #endif @@ -383,10 +456,13 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task, #define TAG(x) x##_3 #define NR_PLANES 3 +/*#define TRI_4 lp_rast_triangle_3_4*/ +/*#define TRI_16 lp_rast_triangle_3_16*/ #include "lp_rast_tri_tmp.h" #define TAG(x) x##_4 #define NR_PLANES 4 +#define TRI_16 lp_rast_triangle_4_16 #include "lp_rast_tri_tmp.h" #define TAG(x) x##_5 diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index 9830a43ba55..4825d651c04 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -82,7 +82,8 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, const int dcdx = -plane[j].dcdx * 4; const int dcdy = plane[j].dcdy * 4; const int cox = plane[j].eo * 4; - const int cio = plane[j].ei * 4 - 1; + const int ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo; + const int cio = ei * 4 - 1; build_masks(c[j] + cox, cio - cox, @@ -156,6 +157,7 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, { const struct lp_rast_triangle *tri = arg.triangle.tri; unsigned plane_mask = arg.triangle.plane_mask; + const struct lp_rast_plane *tri_plane = GET_PLANES(tri); const int x = task->x, y = task->y; struct lp_rast_plane plane[NR_PLANES]; int c[NR_PLANES]; @@ -172,7 +174,7 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, while (plane_mask) { int i = ffs(plane_mask) - 1; - plane[j] = tri->plane[i]; + plane[j] = tri_plane[i]; plane_mask &= ~(1 << i); c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; @@ -180,7 +182,8 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, const int dcdx = -plane[j].dcdx * 16; const int dcdy = plane[j].dcdy * 16; const int cox = plane[j].eo * 16; - const int cio = plane[j].ei * 16 - 1; + const int ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo; + const int cio = ei * 16 - 1; build_masks(c[j] + cox, cio - cox, @@ -245,6 +248,133 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, } } +#if defined(PIPE_ARCH_SSE) && defined(TRI_16) +/* XXX: special case this when intersection is not required. + * - tile completely within bbox, + * - bbox completely within tile. + */ +void +TRI_16(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_rast_triangle *tri = arg.triangle.tri; + const struct lp_rast_plane *plane = GET_PLANES(tri); + unsigned mask = arg.triangle.plane_mask; + unsigned outmask, partial_mask; + unsigned j; + __m128i cstep4[NR_PLANES][4]; + + int x = (mask & 0xff); + int y = (mask >> 8); + + outmask = 0; /* outside one or more trivial reject planes */ + + x += task->x; + y += task->y; + + for (j = 0; j < NR_PLANES; j++) { + const int dcdx = -plane[j].dcdx * 4; + const int dcdy = plane[j].dcdy * 4; + __m128i xdcdy = _mm_set1_epi32(dcdy); + + cstep4[j][0] = _mm_setr_epi32(0, dcdx, dcdx*2, dcdx*3); + cstep4[j][1] = _mm_add_epi32(cstep4[j][0], xdcdy); + cstep4[j][2] = _mm_add_epi32(cstep4[j][1], xdcdy); + cstep4[j][3] = _mm_add_epi32(cstep4[j][2], xdcdy); + + { + const int c = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; + const int cox = plane[j].eo * 4; + + outmask |= sign_bits4(cstep4[j], c + cox); + } + } + + if (outmask == 0xffff) + return; + + + /* Mask of sub-blocks which are inside all trivial reject planes, + * but outside at least one trivial accept plane: + */ + partial_mask = 0xffff & ~outmask; + + /* Iterate over partials: + */ + while (partial_mask) { + int i = ffs(partial_mask) - 1; + int ix = (i & 3) * 4; + int iy = (i >> 2) * 4; + int px = x + ix; + int py = y + iy; + unsigned mask = 0xffff; + + partial_mask &= ~(1 << i); + + for (j = 0; j < NR_PLANES; j++) { + const int cx = (plane[j].c - 1 + - plane[j].dcdx * px + + plane[j].dcdy * py) * 4; + + mask &= ~sign_bits4(cstep4[j], cx); + } + + if (mask) + lp_rast_shade_quads_mask(task, &tri->inputs, px, py, mask); + } +} +#endif + +#if defined(PIPE_ARCH_SSE) && defined(TRI_4) +void +TRI_4(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_rast_triangle *tri = arg.triangle.tri; + const struct lp_rast_plane *plane = GET_PLANES(tri); + unsigned mask = arg.triangle.plane_mask; + const int x = task->x + (mask & 0xff); + const int y = task->y + (mask >> 8); + unsigned j; + + /* Iterate over partials: + */ + { + unsigned mask = 0xffff; + + for (j = 0; j < NR_PLANES; j++) { + const int cx = (plane[j].c + - plane[j].dcdx * x + + plane[j].dcdy * y); + + const int dcdx = -plane[j].dcdx; + const int dcdy = plane[j].dcdy; + __m128i xdcdy = _mm_set1_epi32(dcdy); + + __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3); + __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy); + __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy); + __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy); + + __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1); + __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3); + __m128i result = _mm_packs_epi16(cstep01, cstep23); + + /* Extract the sign bits + */ + mask &= ~_mm_movemask_epi8(result); + } + + if (mask) + lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask); + } +} +#endif + + + #undef TAG +#undef TRI_4 +#undef TRI_16 #undef NR_PLANES diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 8b504f23a33..a4fdf7cff36 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -203,7 +203,9 @@ lp_scene_end_rasterization(struct lp_scene *scene ) for (i = 0; i < scene->tiles_x; i++) { for (j = 0; j < scene->tiles_y; j++) { struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); - bin->head = bin->tail = NULL; + bin->head = NULL; + bin->tail = NULL; + bin->last_state = NULL; } } diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index dbef7692e42..622c522f11a 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -41,6 +41,7 @@ #include "lp_debug.h" struct lp_scene_queue; +struct lp_rast_state; /* We're limited to 2K by 2K for 32bit fixed point rasterization. * Will need a 64-bit version for larger framebuffers. @@ -94,6 +95,7 @@ struct data_block { struct cmd_bin { ushort x; ushort y; + const struct lp_rast_state *last_state; /* most recent state set in bin */ struct cmd_block *head; struct cmd_block *tail; }; @@ -297,7 +299,7 @@ lp_scene_bin_command( struct lp_scene *scene, assert(x < scene->tiles_x); assert(y < scene->tiles_y); - assert(cmd <= LP_RAST_OP_END_QUERY); + assert(cmd < LP_RAST_OP_MAX); if (tail == NULL || tail->count == CMD_BLOCK_MAX) { tail = lp_scene_new_cmd_block( scene, bin ); @@ -318,6 +320,30 @@ lp_scene_bin_command( struct lp_scene *scene, } +static INLINE boolean +lp_scene_bin_cmd_with_state( struct lp_scene *scene, + unsigned x, unsigned y, + const struct lp_rast_state *state, + unsigned cmd, + union lp_rast_cmd_arg arg ) +{ + struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); + + if (state != bin->last_state) { + bin->last_state = state; + if (!lp_scene_bin_command(scene, x, y, + LP_RAST_OP_SET_STATE, + lp_rast_arg_state(state))) + return FALSE; + } + + if (!lp_scene_bin_command( scene, x, y, cmd, arg )) + return FALSE; + + return TRUE; +} + + /* Add a command to all active bins. */ static INLINE boolean diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 96633d93654..ad0ea75b3a3 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -158,6 +158,8 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: return 0; + case PIPE_CAP_PRIMITIVE_RESTART: + return 1; case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: return 1; case PIPE_CAP_DEPTH_CLAMP: diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 5ff11a33632..6118434d3d3 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -56,7 +56,7 @@ #include "draw/draw_vbuf.h" -static void set_scene_state( struct lp_setup_context *, enum setup_state, +static boolean set_scene_state( struct lp_setup_context *, enum setup_state, const char *reason); static boolean try_update_scene_state( struct lp_setup_context *setup ); @@ -167,7 +167,7 @@ lp_setup_rasterize_scene( struct lp_setup_context *setup ) -static void +static boolean begin_binning( struct lp_setup_context *setup ) { struct lp_scene *scene = setup->scene; @@ -181,6 +181,8 @@ begin_binning( struct lp_setup_context *setup ) /* Always create a fence: */ scene->fence = lp_fence_create(MAX2(1, setup->num_threads)); + if (!scene->fence) + return FALSE; /* Initialize the bin flags and x/y coords: */ @@ -192,7 +194,8 @@ begin_binning( struct lp_setup_context *setup ) } ok = try_update_scene_state(setup); - assert(ok); + if (!ok) + return FALSE; if (setup->fb.zsbuf && ((setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) && @@ -208,7 +211,8 @@ begin_binning( struct lp_setup_context *setup ) ok = lp_scene_bin_everywhere( scene, LP_RAST_OP_CLEAR_COLOR, setup->clear.color ); - assert(ok); + if (!ok) + return FALSE; } } @@ -216,12 +220,14 @@ begin_binning( struct lp_setup_context *setup ) if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) { if (!need_zsload) scene->has_depthstencil_clear = TRUE; + ok = lp_scene_bin_everywhere( scene, LP_RAST_OP_CLEAR_ZSTENCIL, lp_rast_arg_clearzs( setup->clear.zsvalue, setup->clear.zsmask)); - assert(ok); + if (!ok) + return FALSE; } } @@ -229,15 +235,16 @@ begin_binning( struct lp_setup_context *setup ) ok = lp_scene_bin_everywhere( scene, LP_RAST_OP_BEGIN_QUERY, lp_rast_arg_query(setup->active_query) ); - assert(ok); + if (!ok) + return FALSE; } - setup->clear.flags = 0; setup->clear.zsmask = 0; setup->clear.zsvalue = 0; LP_DBG(DEBUG_SETUP, "%s done\n", __FUNCTION__); + return TRUE; } @@ -246,12 +253,12 @@ begin_binning( struct lp_setup_context *setup ) * * TODO: fast path for fullscreen clears and no triangles. */ -static void +static boolean execute_clears( struct lp_setup_context *setup ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - begin_binning( setup ); + return begin_binning( setup ); } const char *states[] = { @@ -262,7 +269,7 @@ const char *states[] = { }; -static void +static boolean set_scene_state( struct lp_setup_context *setup, enum setup_state new_state, const char *reason) @@ -270,7 +277,7 @@ set_scene_state( struct lp_setup_context *setup, unsigned old_state = setup->state; if (old_state == new_state) - return; + return TRUE; if (LP_DEBUG & DEBUG_SCENE) { debug_printf("%s old %s new %s%s%s\n", @@ -294,12 +301,14 @@ set_scene_state( struct lp_setup_context *setup, break; case SETUP_ACTIVE: - begin_binning( setup ); + if (!begin_binning( setup )) + goto fail; break; case SETUP_FLUSHED: if (old_state == SETUP_CLEARED) - execute_clears( setup ); + if (!execute_clears( setup )) + goto fail; lp_setup_rasterize_scene( setup ); assert(setup->scene == NULL); @@ -307,9 +316,21 @@ set_scene_state( struct lp_setup_context *setup, default: assert(0 && "invalid setup state mode"); + goto fail; } setup->state = new_state; + return TRUE; + +fail: + if (setup->scene) { + lp_scene_end_rasterization(setup->scene); + setup->scene = NULL; + } + + setup->state = SETUP_FLUSHED; + lp_setup_reset( setup ); + return FALSE; } @@ -377,16 +398,19 @@ lp_setup_try_clear( struct lp_setup_context *setup, } if (flags & PIPE_CLEAR_DEPTHSTENCIL) { - unsigned zmask = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0; - unsigned smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0; + uint32_t zmask = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0; + uint32_t smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0; zsvalue = util_pack_z_stencil(setup->fb.zsbuf->format, depth, stencil); - zsmask = util_pack_uint_z_stencil(setup->fb.zsbuf->format, + + zsmask = util_pack_mask_z_stencil(setup->fb.zsbuf->format, zmask, smask); + + zsvalue &= zsmask; } if (setup->state == SETUP_ACTIVE) { @@ -431,7 +455,7 @@ lp_setup_try_clear( struct lp_setup_context *setup, if (flags & PIPE_CLEAR_COLOR) { memcpy(setup->clear.color.clear_color, &color_arg, - sizeof color_arg); + sizeof setup->clear.color.clear_color); } } @@ -502,14 +526,12 @@ lp_setup_set_point_state( struct lp_setup_context *setup, } void -lp_setup_set_fs_inputs( struct lp_setup_context *setup, - const struct lp_shader_input *input, - unsigned nr ) +lp_setup_set_setup_variant( struct lp_setup_context *setup, + const struct lp_setup_variant *variant) { - LP_DBG(DEBUG_SETUP, "%s %p %u\n", __FUNCTION__, (void *) input, nr); - - memcpy( setup->fs.input, input, nr * sizeof input[0] ); - setup->fs.nr_inputs = nr; + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + setup->setup.variant = variant; } void @@ -875,7 +897,7 @@ try_update_scene_state( struct lp_setup_context *setup ) return TRUE; } -void +boolean lp_setup_update_state( struct lp_setup_context *setup, boolean update_scene ) { @@ -897,22 +919,47 @@ lp_setup_update_state( struct lp_setup_context *setup, setup->psize = lp->psize_slot; assert(lp->dirty == 0); + + assert(lp->setup_variant.key.size == + setup->setup.variant->key.size); + + assert(memcmp(&lp->setup_variant.key, + &setup->setup.variant->key, + setup->setup.variant->key.size) == 0); } - if (update_scene) - set_scene_state( setup, SETUP_ACTIVE, __FUNCTION__ ); + if (update_scene) { + if (!set_scene_state( setup, SETUP_ACTIVE, __FUNCTION__ )) + return FALSE; + } /* Only call into update_scene_state() if we already have a * scene: */ if (update_scene && setup->scene) { assert(setup->state == SETUP_ACTIVE); - if (!try_update_scene_state(setup)) { - lp_setup_flush_and_restart(setup); - if (!try_update_scene_state(setup)) - assert(0); - } + + if (try_update_scene_state(setup)) + return TRUE; + + /* Update failed, try to restart the scene. + * + * Cannot call lp_setup_flush_and_restart() directly here + * because of potential recursion. + */ + if (!set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__)) + return FALSE; + + if (!set_scene_state(setup, SETUP_ACTIVE, __FUNCTION__)) + return FALSE; + + if (!setup->scene) + return FALSE; + + return try_update_scene_state(setup); } + + return TRUE; } @@ -1016,12 +1063,12 @@ lp_setup_begin_query(struct lp_setup_context *setup, LP_RAST_OP_BEGIN_QUERY, lp_rast_arg_query(pq))) { - lp_setup_flush_and_restart(setup); + if (!lp_setup_flush_and_restart(setup)) + return; if (!lp_scene_bin_everywhere(setup->scene, LP_RAST_OP_BEGIN_QUERY, lp_rast_arg_query(pq))) { - assert(0); return; } } @@ -1065,14 +1112,20 @@ lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq) } -void +boolean lp_setup_flush_and_restart(struct lp_setup_context *setup) { if (0) debug_printf("%s\n", __FUNCTION__); assert(setup->state == SETUP_ACTIVE); - set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__); - lp_setup_update_state(setup, TRUE); + + if (!set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__)) + return FALSE; + + if (!lp_setup_update_state(setup, TRUE)) + return FALSE; + + return TRUE; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 25dab78f64f..ebb18f81344 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -33,28 +33,6 @@ struct draw_context; struct vertex_info; -enum lp_interp { - LP_INTERP_CONSTANT, - LP_INTERP_LINEAR, - LP_INTERP_PERSPECTIVE, - LP_INTERP_POSITION, - LP_INTERP_FACING -}; - - -/** - * Describes how to compute the interpolation coefficients (a0, dadx, dady) - * from the vertices passed into our triangle/line/point functions by the - * draw module. - * - * Vertices are treated as an array of float[4] values, indexed by - * src_index. - */ -struct lp_shader_input { - enum lp_interp interp; /* how to interpolate values */ - unsigned src_index; /* where to find values in incoming vertices */ - unsigned usage_mask; /* bitmask of TGSI_WRITEMASK_x flags */ -}; struct pipe_resource; struct pipe_query; @@ -66,7 +44,7 @@ struct lp_fragment_shader_variant; struct lp_jit_context; struct llvmpipe_query; struct pipe_fence_handle; - +struct lp_setup_variant; struct lp_setup_context * lp_setup_create( struct pipe_context *pipe, @@ -111,9 +89,8 @@ lp_setup_set_point_state( struct lp_setup_context *setup, uint sprite_coord_origin); void -lp_setup_set_fs_inputs( struct lp_setup_context *setup, - const struct lp_shader_input *interp, - unsigned nr ); +lp_setup_set_setup_variant( struct lp_setup_context *setup, + const struct lp_setup_variant *variant ); void lp_setup_set_fs_variant( struct lp_setup_context *setup, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.c b/src/gallium/drivers/llvmpipe/lp_setup_coef.c deleted file mode 100644 index 8dc2688ddb6..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_setup_coef.c +++ /dev/null @@ -1,279 +0,0 @@ -/************************************************************************** - * - * Copyright 2010, VMware. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* - * Binning code for triangles - */ - -#include "util/u_math.h" -#include "util/u_memory.h" -#include "lp_perf.h" -#include "lp_setup_context.h" -#include "lp_setup_coef.h" -#include "lp_rast.h" -#include "lp_state_fs.h" - -#if !defined(PIPE_ARCH_SSE) - -/** - * Compute a0 for a constant-valued coefficient (GL_FLAT shading). - */ -static void constant_coef( struct lp_rast_shader_inputs *inputs, - unsigned slot, - const float value, - unsigned i ) -{ - inputs->a0[slot][i] = value; - inputs->dadx[slot][i] = 0.0f; - inputs->dady[slot][i] = 0.0f; -} - - - -static void linear_coef( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot, - unsigned vert_attr, - unsigned i) -{ - float a0 = info->v0[vert_attr][i]; - float a1 = info->v1[vert_attr][i]; - float a2 = info->v2[vert_attr][i]; - - float da01 = a0 - a1; - float da20 = a2 - a0; - float dadx = (da01 * info->dy20_ooa - info->dy01_ooa * da20); - float dady = (da20 * info->dx01_ooa - info->dx20_ooa * da01); - - inputs->dadx[slot][i] = dadx; - inputs->dady[slot][i] = dady; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - inputs->a0[slot][i] = a0 - (dadx * info->x0_center + - dady * info->y0_center); -} - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a triangle. - * We basically multiply the vertex value by 1/w before computing - * the plane coefficients (a0, dadx, dady). - * Later, when we compute the value at a particular fragment position we'll - * divide the interpolated value by the interpolated W at that fragment. - */ -static void perspective_coef( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot, - unsigned vert_attr, - unsigned i) -{ - /* premultiply by 1/w (v[0][3] is always 1/w): - */ - float a0 = info->v0[vert_attr][i] * info->v0[0][3]; - float a1 = info->v1[vert_attr][i] * info->v1[0][3]; - float a2 = info->v2[vert_attr][i] * info->v2[0][3]; - float da01 = a0 - a1; - float da20 = a2 - a0; - float dadx = da01 * info->dy20_ooa - info->dy01_ooa * da20; - float dady = da20 * info->dx01_ooa - info->dx20_ooa * da01; - - inputs->dadx[slot][i] = dadx; - inputs->dady[slot][i] = dady; - inputs->a0[slot][i] = a0 - (dadx * info->x0_center + - dady * info->y0_center); -} - - -/** - * Special coefficient setup for gl_FragCoord. - * X and Y are trivial - * Z and W are copied from position_coef which should have already been computed. - * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. - */ -static void -setup_fragcoord_coef(struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot, - unsigned usage_mask) -{ - /*X*/ - if (usage_mask & TGSI_WRITEMASK_X) { - inputs->a0[slot][0] = 0.0; - inputs->dadx[slot][0] = 1.0; - inputs->dady[slot][0] = 0.0; - } - - /*Y*/ - if (usage_mask & TGSI_WRITEMASK_Y) { - inputs->a0[slot][1] = 0.0; - inputs->dadx[slot][1] = 0.0; - inputs->dady[slot][1] = 1.0; - } - - /*Z*/ - if (usage_mask & TGSI_WRITEMASK_Z) { - linear_coef(inputs, info, slot, 0, 2); - } - - /*W*/ - if (usage_mask & TGSI_WRITEMASK_W) { - linear_coef(inputs, info, slot, 0, 3); - } -} - - -/** - * Setup the fragment input attribute with the front-facing value. - * \param frontface is the triangle front facing? - */ -static void setup_facing_coef( struct lp_rast_shader_inputs *inputs, - unsigned slot, - boolean frontface, - unsigned usage_mask) -{ - /* convert TRUE to 1.0 and FALSE to -1.0 */ - if (usage_mask & TGSI_WRITEMASK_X) - constant_coef( inputs, slot, 2.0f * frontface - 1.0f, 0 ); - - if (usage_mask & TGSI_WRITEMASK_Y) - constant_coef( inputs, slot, 0.0f, 1 ); /* wasted */ - - if (usage_mask & TGSI_WRITEMASK_Z) - constant_coef( inputs, slot, 0.0f, 2 ); /* wasted */ - - if (usage_mask & TGSI_WRITEMASK_W) - constant_coef( inputs, slot, 0.0f, 3 ); /* wasted */ -} - - -/** - * Compute the tri->coef[] array dadx, dady, a0 values. - */ -void lp_setup_tri_coef( struct lp_setup_context *setup, - struct lp_rast_shader_inputs *inputs, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4], - boolean frontfacing) -{ - unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; - unsigned slot; - unsigned i; - struct lp_tri_info info; - float dx01 = v0[0][0] - v1[0][0]; - float dy01 = v0[0][1] - v1[0][1]; - float dx20 = v2[0][0] - v0[0][0]; - float dy20 = v2[0][1] - v0[0][1]; - float oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01); - - info.v0 = v0; - info.v1 = v1; - info.v2 = v2; - info.frontfacing = frontfacing; - info.x0_center = v0[0][0] - setup->pixel_offset; - info.y0_center = v0[0][1] - setup->pixel_offset; - info.dx01_ooa = dx01 * oneoverarea; - info.dx20_ooa = dx20 * oneoverarea; - info.dy01_ooa = dy01 * oneoverarea; - info.dy20_ooa = dy20 * oneoverarea; - - - /* setup interpolation for all the remaining attributes: - */ - for (slot = 0; slot < setup->fs.nr_inputs; slot++) { - unsigned vert_attr = setup->fs.input[slot].src_index; - unsigned usage_mask = setup->fs.input[slot].usage_mask; - - switch (setup->fs.input[slot].interp) { - case LP_INTERP_CONSTANT: - if (setup->flatshade_first) { - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - constant_coef(inputs, slot+1, info.v0[vert_attr][i], i); - } - else { - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - constant_coef(inputs, slot+1, info.v2[vert_attr][i], i); - } - break; - - case LP_INTERP_LINEAR: - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - linear_coef(inputs, &info, slot+1, vert_attr, i); - break; - - case LP_INTERP_PERSPECTIVE: - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - perspective_coef(inputs, &info, slot+1, vert_attr, i); - fragcoord_usage_mask |= TGSI_WRITEMASK_W; - break; - - case LP_INTERP_POSITION: - /* - * The generated pixel interpolators will pick up the coeffs from - * slot 0, so all need to ensure that the usage mask is covers all - * usages. - */ - fragcoord_usage_mask |= usage_mask; - break; - - case LP_INTERP_FACING: - setup_facing_coef(inputs, slot+1, info.frontfacing, usage_mask); - break; - - default: - assert(0); - } - } - - /* The internal position input is in slot zero: - */ - setup_fragcoord_coef(inputs, &info, 0, fragcoord_usage_mask); -} - -#else -extern void lp_setup_coef_dummy(void); -void lp_setup_coef_dummy(void) -{ -} - -#endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.h b/src/gallium/drivers/llvmpipe/lp_setup_coef.h deleted file mode 100644 index 87a3255ccc6..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_setup_coef.h +++ /dev/null @@ -1,64 +0,0 @@ -/************************************************************************** - * - * Copyright 2010 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * The setup code is concerned with point/line/triangle setup and - * putting commands/data into the bins. - */ - - -#ifndef LP_SETUP_COEF_H -#define LP_SETUP_COEF_H - - -struct lp_tri_info { - - float x0_center; - float y0_center; - - /* turn these into an aligned float[4] */ - float dy01_ooa; - float dy20_ooa; - float dx01_ooa; - float dx20_ooa; - - const float (*v0)[4]; - const float (*v1)[4]; - const float (*v2)[4]; - - boolean frontfacing; /* remove eventually */ -}; - -void lp_setup_tri_coef( struct lp_setup_context *setup, - struct lp_rast_shader_inputs *inputs, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4], - boolean frontfacing); - -#endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c b/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c deleted file mode 100644 index 3742fd672b2..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c +++ /dev/null @@ -1,228 +0,0 @@ -/************************************************************************** - * - * Copyright 2010 VMware. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* - * Binning code for triangles - */ - -#include "util/u_math.h" -#include "util/u_memory.h" -#include "lp_perf.h" -#include "lp_setup_context.h" -#include "lp_setup_coef.h" -#include "lp_rast.h" - -#if defined(PIPE_ARCH_SSE) -#include <emmintrin.h> - - -static void constant_coef4( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot, - const float *attr) -{ - *(__m128 *)inputs->a0[slot] = *(__m128 *)attr; - *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0); - *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0); -} - - - -/** - * Setup the fragment input attribute with the front-facing value. - * \param frontface is the triangle front facing? - */ -static void setup_facing_coef( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot ) -{ - /* XXX: just pass frontface directly to the shader, don't bother - * treating it as an input. - */ - __m128 a0 = _mm_setr_ps(info->frontfacing ? 1.0 : -1.0, - 0, 0, 0); - - *(__m128 *)inputs->a0[slot] = a0; - *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0); - *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0); -} - - - -static void calc_coef4( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot, - __m128 a0, - __m128 a1, - __m128 a2) -{ - __m128 da01 = _mm_sub_ps(a0, a1); - __m128 da20 = _mm_sub_ps(a2, a0); - - __m128 da01_dy20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dy20_ooa)); - __m128 da20_dy01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dy01_ooa)); - __m128 dadx = _mm_sub_ps(da01_dy20_ooa, da20_dy01_ooa); - - __m128 da01_dx20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dx20_ooa)); - __m128 da20_dx01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dx01_ooa)); - __m128 dady = _mm_sub_ps(da20_dx01_ooa, da01_dx20_ooa); - - __m128 dadx_x0 = _mm_mul_ps(dadx, _mm_set1_ps(info->x0_center)); - __m128 dady_y0 = _mm_mul_ps(dady, _mm_set1_ps(info->y0_center)); - __m128 attr_v0 = _mm_add_ps(dadx_x0, dady_y0); - __m128 attr_0 = _mm_sub_ps(a0, attr_v0); - - *(__m128 *)inputs->a0[slot] = attr_0; - *(__m128 *)inputs->dadx[slot] = dadx; - *(__m128 *)inputs->dady[slot] = dady; -} - - -static void linear_coef( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot, - unsigned vert_attr) -{ - __m128 a0 = *(const __m128 *)info->v0[vert_attr]; - __m128 a1 = *(const __m128 *)info->v1[vert_attr]; - __m128 a2 = *(const __m128 *)info->v2[vert_attr]; - - calc_coef4(inputs, info, slot, a0, a1, a2); -} - - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a triangle. - * We basically multiply the vertex value by 1/w before computing - * the plane coefficients (a0, dadx, dady). - * Later, when we compute the value at a particular fragment position we'll - * divide the interpolated value by the interpolated W at that fragment. - */ -static void perspective_coef( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot, - unsigned vert_attr) -{ - /* premultiply by 1/w (v[0][3] is always 1/w): - */ - __m128 a0 = *(const __m128 *)info->v0[vert_attr]; - __m128 a1 = *(const __m128 *)info->v1[vert_attr]; - __m128 a2 = *(const __m128 *)info->v2[vert_attr]; - - __m128 a0_oow = _mm_mul_ps(a0, _mm_set1_ps(info->v0[0][3])); - __m128 a1_oow = _mm_mul_ps(a1, _mm_set1_ps(info->v1[0][3])); - __m128 a2_oow = _mm_mul_ps(a2, _mm_set1_ps(info->v2[0][3])); - - calc_coef4(inputs, info, slot, a0_oow, a1_oow, a2_oow); -} - - - - - -/** - * Compute the inputs-> dadx, dady, a0 values. - */ -void lp_setup_tri_coef( struct lp_setup_context *setup, - struct lp_rast_shader_inputs *inputs, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4], - boolean frontfacing) -{ - unsigned slot; - struct lp_tri_info info; - float dx01 = v0[0][0] - v1[0][0]; - float dy01 = v0[0][1] - v1[0][1]; - float dx20 = v2[0][0] - v0[0][0]; - float dy20 = v2[0][1] - v0[0][1]; - float oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01); - - info.v0 = v0; - info.v1 = v1; - info.v2 = v2; - info.frontfacing = frontfacing; - info.x0_center = v0[0][0] - setup->pixel_offset; - info.y0_center = v0[0][1] - setup->pixel_offset; - info.dx01_ooa = dx01 * oneoverarea; - info.dx20_ooa = dx20 * oneoverarea; - info.dy01_ooa = dy01 * oneoverarea; - info.dy20_ooa = dy20 * oneoverarea; - - - /* The internal position input is in slot zero: - */ - linear_coef(inputs, &info, 0, 0); - - /* setup interpolation for all the remaining attributes: - */ - for (slot = 0; slot < setup->fs.nr_inputs; slot++) { - unsigned vert_attr = setup->fs.input[slot].src_index; - - switch (setup->fs.input[slot].interp) { - case LP_INTERP_CONSTANT: - if (setup->flatshade_first) { - constant_coef4(inputs, &info, slot+1, info.v0[vert_attr]); - } - else { - constant_coef4(inputs, &info, slot+1, info.v2[vert_attr]); - } - break; - - case LP_INTERP_LINEAR: - linear_coef(inputs, &info, slot+1, vert_attr); - break; - - case LP_INTERP_PERSPECTIVE: - perspective_coef(inputs, &info, slot+1, vert_attr); - break; - - case LP_INTERP_POSITION: - /* - * The generated pixel interpolators will pick up the coeffs from - * slot 0. - */ - break; - - case LP_INTERP_FACING: - setup_facing_coef(inputs, &info, slot+1); - break; - - default: - assert(0); - } - } -} - -#else -extern void lp_setup_coef_dummy(void); -void lp_setup_coef_dummy(void) -{ -} -#endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 8506ed2dc9e..dc2533bedc5 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -39,6 +39,7 @@ #include "lp_rast.h" #include "lp_tile_soa.h" /* for TILE_SIZE */ #include "lp_scene.h" +#include "lp_bld_interp.h" /* for struct lp_shader_input */ #include "draw/draw_vbuf.h" #include "util/u_rect.h" @@ -49,6 +50,8 @@ #define LP_SETUP_NEW_SCISSOR 0x08 +struct lp_setup_variant; + /** Max number of scenes */ #define MAX_SCENES 2 @@ -118,9 +121,6 @@ struct lp_setup_context } state; struct { - struct lp_shader_input input[PIPE_MAX_ATTRIBS]; - unsigned nr_inputs; - const struct lp_rast_state *stored; /**< what's in the scene */ struct lp_rast_state current; /**< currently set state */ struct pipe_resource *current_tex[PIPE_MAX_SAMPLERS]; @@ -139,6 +139,10 @@ struct lp_setup_context } blend_color; + struct { + const struct lp_setup_variant *variant; + } setup; + unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */ void (*point)( struct lp_setup_context *, @@ -160,12 +164,12 @@ void lp_setup_choose_point( struct lp_setup_context *setup ); void lp_setup_init_vbuf(struct lp_setup_context *setup); -void lp_setup_update_state( struct lp_setup_context *setup, +boolean lp_setup_update_state( struct lp_setup_context *setup, boolean update_scene); void lp_setup_destroy( struct lp_setup_context *setup ); -void lp_setup_flush_and_restart(struct lp_setup_context *setup); +boolean lp_setup_flush_and_restart(struct lp_setup_context *setup); void lp_setup_print_triangle(struct lp_setup_context *setup, @@ -181,7 +185,7 @@ lp_setup_print_vertex(struct lp_setup_context *setup, struct lp_rast_triangle * lp_setup_alloc_triangle(struct lp_scene *scene, - unsigned nr_inputs, + unsigned num_inputs, unsigned nr_planes, unsigned *tri_size); @@ -191,6 +195,4 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, const struct u_rect *bbox, int nr_planes ); -void lp_setup_flush_and_restart(struct lp_setup_context *setup); - #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index 156bd633754..827413bb33e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -35,6 +35,7 @@ #include "lp_setup_context.h" #include "lp_rast.h" #include "lp_state_fs.h" +#include "lp_state_setup.h" #define NUM_CHANNELS 4 @@ -46,6 +47,10 @@ struct lp_line_info { const float (*v1)[4]; const float (*v2)[4]; + + float (*a0)[4]; + float (*dadx)[4]; + float (*dady)[4]; }; @@ -53,14 +58,14 @@ struct lp_line_info { * Compute a0 for a constant-valued coefficient (GL_FLAT shading). */ static void constant_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, + struct lp_line_info *info, unsigned slot, const float value, unsigned i ) { - tri->inputs.a0[slot][i] = value; - tri->inputs.dadx[slot][i] = 0.0f; - tri->inputs.dady[slot][i] = 0.0f; + info->a0[slot][i] = value; + info->dadx[slot][i] = 0.0f; + info->dady[slot][i] = 0.0f; } @@ -69,7 +74,6 @@ static void constant_coef( struct lp_setup_context *setup, * for a triangle. */ static void linear_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, struct lp_line_info *info, unsigned slot, unsigned vert_attr, @@ -82,10 +86,10 @@ static void linear_coef( struct lp_setup_context *setup, float dadx = da21 * info->dx * info->oneoverarea; float dady = da21 * info->dy * info->oneoverarea; - tri->inputs.dadx[slot][i] = dadx; - tri->inputs.dady[slot][i] = dady; + info->dadx[slot][i] = dadx; + info->dady[slot][i] = dady; - tri->inputs.a0[slot][i] = (a1 - + info->a0[slot][i] = (a1 - (dadx * (info->v1[0][0] - setup->pixel_offset) + dady * (info->v1[0][1] - setup->pixel_offset))); } @@ -100,7 +104,6 @@ static void linear_coef( struct lp_setup_context *setup, * divide the interpolated value by the interpolated W at that fragment. */ static void perspective_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, struct lp_line_info *info, unsigned slot, unsigned vert_attr, @@ -115,43 +118,42 @@ static void perspective_coef( struct lp_setup_context *setup, float dadx = da21 * info->dx * info->oneoverarea; float dady = da21 * info->dy * info->oneoverarea; - tri->inputs.dadx[slot][i] = dadx; - tri->inputs.dady[slot][i] = dady; + info->dadx[slot][i] = dadx; + info->dady[slot][i] = dady; - tri->inputs.a0[slot][i] = (a1 - - (dadx * (info->v1[0][0] - setup->pixel_offset) + - dady * (info->v1[0][1] - setup->pixel_offset))); + info->a0[slot][i] = (a1 - + (dadx * (info->v1[0][0] - setup->pixel_offset) + + dady * (info->v1[0][1] - setup->pixel_offset))); } static void setup_fragcoord_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, struct lp_line_info *info, unsigned slot, unsigned usage_mask) { /*X*/ if (usage_mask & TGSI_WRITEMASK_X) { - tri->inputs.a0[slot][0] = 0.0; - tri->inputs.dadx[slot][0] = 1.0; - tri->inputs.dady[slot][0] = 0.0; + info->a0[slot][0] = 0.0; + info->dadx[slot][0] = 1.0; + info->dady[slot][0] = 0.0; } /*Y*/ if (usage_mask & TGSI_WRITEMASK_Y) { - tri->inputs.a0[slot][1] = 0.0; - tri->inputs.dadx[slot][1] = 0.0; - tri->inputs.dady[slot][1] = 1.0; + info->a0[slot][1] = 0.0; + info->dadx[slot][1] = 0.0; + info->dady[slot][1] = 1.0; } /*Z*/ if (usage_mask & TGSI_WRITEMASK_Z) { - linear_coef(setup, tri, info, slot, 0, 2); + linear_coef(setup, info, slot, 0, 2); } /*W*/ if (usage_mask & TGSI_WRITEMASK_W) { - linear_coef(setup, tri, info, slot, 0, 3); + linear_coef(setup, info, slot, 0, 3); } } @@ -159,43 +161,43 @@ setup_fragcoord_coef( struct lp_setup_context *setup, * Compute the tri->coef[] array dadx, dady, a0 values. */ static void setup_line_coefficients( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, struct lp_line_info *info) { + const struct lp_setup_variant_key *key = &setup->setup.variant->key; unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; unsigned slot; /* setup interpolation for all the remaining attributes: */ - for (slot = 0; slot < setup->fs.nr_inputs; slot++) { - unsigned vert_attr = setup->fs.input[slot].src_index; - unsigned usage_mask = setup->fs.input[slot].usage_mask; + for (slot = 0; slot < key->num_inputs; slot++) { + unsigned vert_attr = key->inputs[slot].src_index; + unsigned usage_mask = key->inputs[slot].usage_mask; unsigned i; - switch (setup->fs.input[slot].interp) { + switch (key->inputs[slot].interp) { case LP_INTERP_CONSTANT: - if (setup->flatshade_first) { + if (key->flatshade_first) { for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(setup, tri, slot+1, info->v1[vert_attr][i], i); + constant_coef(setup, info, slot+1, info->v1[vert_attr][i], i); } else { for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(setup, tri, slot+1, info->v2[vert_attr][i], i); + constant_coef(setup, info, slot+1, info->v2[vert_attr][i], i); } break; case LP_INTERP_LINEAR: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - linear_coef(setup, tri, info, slot+1, vert_attr, i); + linear_coef(setup, info, slot+1, vert_attr, i); break; case LP_INTERP_PERSPECTIVE: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - perspective_coef(setup, tri, info, slot+1, vert_attr, i); + perspective_coef(setup, info, slot+1, vert_attr, i); fragcoord_usage_mask |= TGSI_WRITEMASK_W; break; @@ -211,7 +213,7 @@ static void setup_line_coefficients( struct lp_setup_context *setup, case LP_INTERP_FACING: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(setup, tri, slot+1, 1.0, i); + constant_coef(setup, info, slot+1, 1.0, i); break; default: @@ -221,7 +223,7 @@ static void setup_line_coefficients( struct lp_setup_context *setup, /* The internal position input is in slot zero: */ - setup_fragcoord_coef(setup, tri, info, 0, + setup_fragcoord_coef(setup, info, 0, fragcoord_usage_mask); } @@ -241,14 +243,15 @@ print_line(struct lp_setup_context *setup, const float (*v1)[4], const float (*v2)[4]) { + const struct lp_setup_variant_key *key = &setup->setup.variant->key; uint i; debug_printf("llvmpipe line\n"); - for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { + for (i = 0; i < 1 + key->num_inputs; i++) { debug_printf(" v1[%d]: %f %f %f %f\n", i, v1[i][0], v1[i][1], v1[i][2], v1[i][3]); } - for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { + for (i = 0; i < 1 + key->num_inputs; i++) { debug_printf(" v2[%d]: %f %f %f %f\n", i, v2[i][0], v2[i][1], v2[i][2], v2[i][3]); } @@ -275,7 +278,9 @@ try_setup_line( struct lp_setup_context *setup, const float (*v2)[4]) { struct lp_scene *scene = setup->scene; + const struct lp_setup_variant_key *key = &setup->setup.variant->key; struct lp_rast_triangle *line; + struct lp_rast_plane *plane; struct lp_line_info info; float width = MAX2(1.0, setup->line_width); struct u_rect bbox; @@ -475,7 +480,7 @@ try_setup_line( struct lp_setup_context *setup, else { /* do intersection test */ float xintersect = fracf(v2[0][0]) + y2diff * dxdy; - draw_end = (xintersect < 1.0 && xintersect > 0.0); + draw_end = (xintersect < 1.0 && xintersect >= 0.0); } /* Are we already drawing start/end? @@ -513,7 +518,7 @@ try_setup_line( struct lp_setup_context *setup, x_offset_end = y_offset_end * dxdy; } } - + /* x/y positions in fixed point */ x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) - fixed_width/2; x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) - fixed_width/2; @@ -567,7 +572,7 @@ try_setup_line( struct lp_setup_context *setup, u_rect_find_intersection(&setup->draw_region, &bbox); line = lp_setup_alloc_triangle(scene, - setup->fs.nr_inputs, + key->num_inputs, nr_planes, &tri_bytes); if (!line) @@ -581,33 +586,35 @@ try_setup_line( struct lp_setup_context *setup, #endif /* calculate the deltas */ - line->plane[0].dcdy = x[0] - x[1]; - line->plane[1].dcdy = x[1] - x[2]; - line->plane[2].dcdy = x[2] - x[3]; - line->plane[3].dcdy = x[3] - x[0]; + plane = GET_PLANES(line); + plane[0].dcdy = x[0] - x[1]; + plane[1].dcdy = x[1] - x[2]; + plane[2].dcdy = x[2] - x[3]; + plane[3].dcdy = x[3] - x[0]; - line->plane[0].dcdx = y[0] - y[1]; - line->plane[1].dcdx = y[1] - y[2]; - line->plane[2].dcdx = y[2] - y[3]; - line->plane[3].dcdx = y[3] - y[0]; + plane[0].dcdx = y[0] - y[1]; + plane[1].dcdx = y[1] - y[2]; + plane[2].dcdx = y[2] - y[3]; + plane[3].dcdx = y[3] - y[0]; /* Setup parameter interpolants: */ - setup_line_coefficients( setup, line, &info); + info.a0 = GET_A0(&line->inputs); + info.dadx = GET_DADX(&line->inputs); + info.dady = GET_DADY(&line->inputs); + setup_line_coefficients(setup, &info); - line->inputs.facing = 1.0F; - line->inputs.state = setup->fs.stored; + line->inputs.frontfacing = TRUE; line->inputs.disable = FALSE; line->inputs.opaque = FALSE; for (i = 0; i < 4; i++) { - struct lp_rast_plane *plane = &line->plane[i]; /* half-edge constants, will be interated over the whole render * target. */ - plane->c = plane->dcdx * x[i] - plane->dcdy * y[i]; + plane[i].c = plane[i].dcdx * x[i] - plane[i].dcdy * y[i]; /* correct for top-left vs. bottom-left fill convention. @@ -623,38 +630,34 @@ try_setup_line( struct lp_setup_context *setup, * to its usual method, in which case it will probably want * to use the opposite, top-left convention. */ - if (plane->dcdx < 0) { + if (plane[i].dcdx < 0) { /* both fill conventions want this - adjust for left edges */ - plane->c++; + plane[i].c++; } - else if (plane->dcdx == 0) { + else if (plane[i].dcdx == 0) { if (setup->pixel_offset == 0) { /* correct for top-left fill convention: */ - if (plane->dcdy > 0) plane->c++; + if (plane[i].dcdy > 0) plane[i].c++; } else { /* correct for bottom-left fill convention: */ - if (plane->dcdy < 0) plane->c++; + if (plane[i].dcdy < 0) plane[i].c++; } } - plane->dcdx *= FIXED_ONE; - plane->dcdy *= FIXED_ONE; + plane[i].dcdx *= FIXED_ONE; + plane[i].dcdy *= FIXED_ONE; /* find trivial reject offsets for each edge for a single-pixel * sized block. These will be scaled up at each recursive level to * match the active blocksize. Scaling in this way works best if * the blocks are square. */ - plane->eo = 0; - if (plane->dcdx < 0) plane->eo -= plane->dcdx; - if (plane->dcdy > 0) plane->eo += plane->dcdy; - - /* Calculate trivial accept offsets from the above. - */ - plane->ei = plane->dcdy - plane->dcdx - plane->eo; + plane[i].eo = 0; + if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx; + if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy; } @@ -677,29 +680,25 @@ try_setup_line( struct lp_setup_context *setup, * these planes elsewhere. */ if (nr_planes == 8) { - line->plane[4].dcdx = -1; - line->plane[4].dcdy = 0; - line->plane[4].c = 1-bbox.x0; - line->plane[4].ei = 0; - line->plane[4].eo = 1; - - line->plane[5].dcdx = 1; - line->plane[5].dcdy = 0; - line->plane[5].c = bbox.x1+1; - line->plane[5].ei = -1; - line->plane[5].eo = 0; - - line->plane[6].dcdx = 0; - line->plane[6].dcdy = 1; - line->plane[6].c = 1-bbox.y0; - line->plane[6].ei = 0; - line->plane[6].eo = 1; - - line->plane[7].dcdx = 0; - line->plane[7].dcdy = -1; - line->plane[7].c = bbox.y1+1; - line->plane[7].ei = -1; - line->plane[7].eo = 0; + plane[4].dcdx = -1; + plane[4].dcdy = 0; + plane[4].c = 1-bbox.x0; + plane[4].eo = 1; + + plane[5].dcdx = 1; + plane[5].dcdy = 0; + plane[5].c = bbox.x1+1; + plane[5].eo = 0; + + plane[6].dcdx = 0; + plane[6].dcdy = 1; + plane[6].c = 1-bbox.y0; + plane[6].eo = 1; + + plane[7].dcdx = 0; + plane[7].dcdy = -1; + plane[7].c = bbox.y1+1; + plane[7].eo = 0; } return lp_setup_bin_triangle(setup, line, &bbox, nr_planes); @@ -712,10 +711,11 @@ static void lp_setup_line( struct lp_setup_context *setup, { if (!try_setup_line( setup, v0, v1 )) { - lp_setup_flush_and_restart(setup); + if (!lp_setup_flush_and_restart(setup)) + return; if (!try_setup_line( setup, v0, v1 )) - assert(0); + return; } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index 3b217f95447..146f1bd07ca 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -35,6 +35,7 @@ #include "lp_perf.h" #include "lp_rast.h" #include "lp_state_fs.h" +#include "lp_state_setup.h" #include "tgsi/tgsi_scan.h" #define NUM_CHANNELS 4 @@ -45,6 +46,10 @@ struct point_info { int dx01, dx12; const float (*v0)[4]; + + float (*a0)[4]; + float (*dadx)[4]; + float (*dady)[4]; }; @@ -53,14 +58,37 @@ struct point_info { */ static void constant_coef(struct lp_setup_context *setup, - struct lp_rast_triangle *point, + struct point_info *info, unsigned slot, const float value, unsigned i) { - point->inputs.a0[slot][i] = value; - point->inputs.dadx[slot][i] = 0.0f; - point->inputs.dady[slot][i] = 0.0f; + info->a0[slot][i] = value; + info->dadx[slot][i] = 0.0f; + info->dady[slot][i] = 0.0f; +} + + +static void +point_persp_coeff(struct lp_setup_context *setup, + const struct point_info *info, + unsigned slot, + unsigned i) +{ + /* + * Fragment shader expects pre-multiplied w for LP_INTERP_PERSPECTIVE. A + * better stratergy would be to take the primitive in consideration when + * generating the fragment shader key, and therefore avoid the per-fragment + * perspective divide. + */ + + float w0 = info->v0[0][3]; + + assert(i < 4); + + info->a0[slot][i] = info->v0[slot][i]*w0; + info->dadx[slot][i] = 0.0f; + info->dady[slot][i] = 0.0f; } @@ -69,17 +97,19 @@ constant_coef(struct lp_setup_context *setup, * \param slot the vertex attribute slot to setup * \param i the attribute channel in [0,3] * \param sprite_coord_origin one of PIPE_SPRITE_COORD_x - * \param perspective_proj will the TEX instruction do a divide by Q? + * \param perspective does the shader expects pre-multiplied w, i.e., + * LP_INTERP_PERSPECTIVE is specified in the shader key */ static void texcoord_coef(struct lp_setup_context *setup, - struct lp_rast_triangle *point, const struct point_info *info, unsigned slot, unsigned i, unsigned sprite_coord_origin, - boolean perspective_proj) + boolean perspective) { + float w0 = info->v0[0][3]; + assert(i < 4); if (i == 0) { @@ -88,21 +118,14 @@ texcoord_coef(struct lp_setup_context *setup, float x0 = info->v0[0][0] - setup->pixel_offset; float y0 = info->v0[0][1] - setup->pixel_offset; - point->inputs.dadx[slot][0] = dadx; - point->inputs.dady[slot][0] = dady; - point->inputs.a0[slot][0] = 0.5 - (dadx * x0 + dady * y0); + info->dadx[slot][0] = dadx; + info->dady[slot][0] = dady; + info->a0[slot][0] = 0.5 - (dadx * x0 + dady * y0); - if (!perspective_proj) { - /* Divide coefficients by vertex.w here. - * - * It would be clearer to always multiply by w0 above and - * then divide it out for perspective projection here, but - * doing it this way involves less algebra. - */ - float w0 = info->v0[0][3]; - point->inputs.dadx[slot][0] *= w0; - point->inputs.dady[slot][0] *= w0; - point->inputs.a0[slot][0] *= w0; + if (perspective) { + info->dadx[slot][0] *= w0; + info->dady[slot][0] *= w0; + info->a0[slot][0] *= w0; } } else if (i == 1) { @@ -115,26 +138,25 @@ texcoord_coef(struct lp_setup_context *setup, dady = -dady; } - point->inputs.dadx[slot][1] = dadx; - point->inputs.dady[slot][1] = dady; - point->inputs.a0[slot][1] = 0.5 - (dadx * x0 + dady * y0); + info->dadx[slot][1] = dadx; + info->dady[slot][1] = dady; + info->a0[slot][1] = 0.5 - (dadx * x0 + dady * y0); - if (!perspective_proj) { - float w0 = info->v0[0][3]; - point->inputs.dadx[slot][1] *= w0; - point->inputs.dady[slot][1] *= w0; - point->inputs.a0[slot][1] *= w0; + if (perspective) { + info->dadx[slot][1] *= w0; + info->dady[slot][1] *= w0; + info->a0[slot][1] *= w0; } } else if (i == 2) { - point->inputs.a0[slot][2] = 0.0f; - point->inputs.dadx[slot][2] = 0.0f; - point->inputs.dady[slot][2] = 0.0f; + info->a0[slot][2] = 0.0f; + info->dadx[slot][2] = 0.0f; + info->dady[slot][2] = 0.0f; } else { - point->inputs.a0[slot][3] = 1.0f; - point->inputs.dadx[slot][3] = 0.0f; - point->inputs.dady[slot][3] = 0.0f; + info->a0[slot][3] = perspective ? w0 : 1.0f; + info->dadx[slot][3] = 0.0f; + info->dady[slot][3] = 0.0f; } } @@ -147,33 +169,32 @@ texcoord_coef(struct lp_setup_context *setup, */ static void setup_point_fragcoord_coef(struct lp_setup_context *setup, - struct lp_rast_triangle *point, - const struct point_info *info, + struct point_info *info, unsigned slot, unsigned usage_mask) { /*X*/ if (usage_mask & TGSI_WRITEMASK_X) { - point->inputs.a0[slot][0] = 0.0; - point->inputs.dadx[slot][0] = 1.0; - point->inputs.dady[slot][0] = 0.0; + info->a0[slot][0] = 0.0; + info->dadx[slot][0] = 1.0; + info->dady[slot][0] = 0.0; } /*Y*/ if (usage_mask & TGSI_WRITEMASK_Y) { - point->inputs.a0[slot][1] = 0.0; - point->inputs.dadx[slot][1] = 0.0; - point->inputs.dady[slot][1] = 1.0; + info->a0[slot][1] = 0.0; + info->dadx[slot][1] = 0.0; + info->dady[slot][1] = 1.0; } /*Z*/ if (usage_mask & TGSI_WRITEMASK_Z) { - constant_coef(setup, point, slot, info->v0[0][2], 2); + constant_coef(setup, info, slot, info->v0[0][2], 2); } /*W*/ if (usage_mask & TGSI_WRITEMASK_W) { - constant_coef(setup, point, slot, info->v0[0][3], 3); + constant_coef(setup, info, slot, info->v0[0][3], 3); } } @@ -183,21 +204,27 @@ setup_point_fragcoord_coef(struct lp_setup_context *setup, */ static void setup_point_coefficients( struct lp_setup_context *setup, - struct lp_rast_triangle *point, - const struct point_info *info) + struct point_info *info) { + const struct lp_setup_variant_key *key = &setup->setup.variant->key; const struct lp_fragment_shader *shader = setup->fs.current.variant->shader; unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; unsigned slot; /* setup interpolation for all the remaining attributes: */ - for (slot = 0; slot < setup->fs.nr_inputs; slot++) { - unsigned vert_attr = setup->fs.input[slot].src_index; - unsigned usage_mask = setup->fs.input[slot].usage_mask; + for (slot = 0; slot < key->num_inputs; slot++) { + unsigned vert_attr = key->inputs[slot].src_index; + unsigned usage_mask = key->inputs[slot].usage_mask; + enum lp_interp interp = key->inputs[slot].interp; + boolean perspective = !!(interp == LP_INTERP_PERSPECTIVE); unsigned i; + + if (perspective & usage_mask) { + fragcoord_usage_mask |= TGSI_WRITEMASK_W; + } - switch (setup->fs.input[slot].interp) { + switch (interp) { case LP_INTERP_POSITION: /* * The generated pixel interpolators will pick up the coeffs from @@ -210,39 +237,45 @@ setup_point_coefficients( struct lp_setup_context *setup, case LP_INTERP_LINEAR: /* Sprite tex coords may use linear interpolation someday */ /* fall-through */ - case LP_INTERP_PERSPECTIVE: /* check if the sprite coord flag is set for this attribute. * If so, set it up so it up so x and y vary from 0 to 1. */ - if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_GENERIC) { - const int index = shader->info.input_semantic_index[slot]; + if (shader->info.base.input_semantic_name[slot] == TGSI_SEMANTIC_GENERIC) { + unsigned semantic_index = shader->info.base.input_semantic_index[slot]; /* Note that sprite_coord enable is a bitfield of * PIPE_MAX_SHADER_OUTPUTS bits. */ - if (index < PIPE_MAX_SHADER_OUTPUTS && - (setup->sprite_coord_enable & (1 << index))) { - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - texcoord_coef(setup, point, info, slot + 1, i, + if (semantic_index < PIPE_MAX_SHADER_OUTPUTS && + (setup->sprite_coord_enable & (1 << semantic_index))) { + for (i = 0; i < NUM_CHANNELS; i++) { + if (usage_mask & (1 << i)) { + texcoord_coef(setup, info, slot + 1, i, setup->sprite_coord_origin, - (usage_mask & TGSI_WRITEMASK_W)); - fragcoord_usage_mask |= TGSI_WRITEMASK_W; - break; + perspective); + } + } + break; } } - /* FALLTHROUGH */ + /* fall-through */ case LP_INTERP_CONSTANT: for (i = 0; i < NUM_CHANNELS; i++) { - if (usage_mask & (1 << i)) - constant_coef(setup, point, slot+1, info->v0[vert_attr][i], i); + if (usage_mask & (1 << i)) { + if (perspective) { + point_persp_coeff(setup, info, slot+1, i); + } + else { + constant_coef(setup, info, slot+1, info->v0[vert_attr][i], i); + } + } } break; case LP_INTERP_FACING: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(setup, point, slot+1, 1.0, i); + constant_coef(setup, info, slot+1, 1.0, i); break; default: @@ -253,7 +286,7 @@ setup_point_coefficients( struct lp_setup_context *setup, /* The internal position input is in slot zero: */ - setup_point_fragcoord_coef(setup, point, info, 0, + setup_point_fragcoord_coef(setup, info, 0, fragcoord_usage_mask); } @@ -270,6 +303,7 @@ try_setup_point( struct lp_setup_context *setup, const float (*v0)[4] ) { /* x/y positions in fixed point */ + const struct lp_setup_variant_key *key = &setup->setup.variant->key; const int sizeAttr = setup->psize; const float size = (setup->point_size_per_vertex && sizeAttr > 0) ? v0[sizeAttr][0] @@ -321,7 +355,7 @@ try_setup_point( struct lp_setup_context *setup, u_rect_find_intersection(&setup->draw_region, &bbox); point = lp_setup_alloc_triangle(scene, - setup->fs.nr_inputs, + key->num_inputs, nr_planes, &bytes); if (!point) @@ -337,40 +371,40 @@ try_setup_point( struct lp_setup_context *setup, info.dx12 = fixed_width; info.dy01 = fixed_width; info.dy12 = 0; + info.a0 = GET_A0(&point->inputs); + info.dadx = GET_DADX(&point->inputs); + info.dady = GET_DADY(&point->inputs); /* Setup parameter interpolants: */ - setup_point_coefficients(setup, point, &info); + setup_point_coefficients(setup, &info); - point->inputs.facing = 1.0F; - point->inputs.state = setup->fs.stored; + point->inputs.frontfacing = TRUE; point->inputs.disable = FALSE; point->inputs.opaque = FALSE; { - point->plane[0].dcdx = -1; - point->plane[0].dcdy = 0; - point->plane[0].c = 1-bbox.x0; - point->plane[0].ei = 0; - point->plane[0].eo = 1; - - point->plane[1].dcdx = 1; - point->plane[1].dcdy = 0; - point->plane[1].c = bbox.x1+1; - point->plane[1].ei = -1; - point->plane[1].eo = 0; - - point->plane[2].dcdx = 0; - point->plane[2].dcdy = 1; - point->plane[2].c = 1-bbox.y0; - point->plane[2].ei = 0; - point->plane[2].eo = 1; - - point->plane[3].dcdx = 0; - point->plane[3].dcdy = -1; - point->plane[3].c = bbox.y1+1; - point->plane[3].ei = -1; - point->plane[3].eo = 0; + struct lp_rast_plane *plane = GET_PLANES(point); + + plane[0].dcdx = -1; + plane[0].dcdy = 0; + plane[0].c = 1-bbox.x0; + plane[0].eo = 1; + + plane[1].dcdx = 1; + plane[1].dcdy = 0; + plane[1].c = bbox.x1+1; + plane[1].eo = 0; + + plane[2].dcdx = 0; + plane[2].dcdy = 1; + plane[2].c = 1-bbox.y0; + plane[2].eo = 1; + + plane[3].dcdx = 0; + plane[3].dcdy = -1; + plane[3].c = bbox.y1+1; + plane[3].eo = 0; } return lp_setup_bin_triangle(setup, point, &bbox, nr_planes); @@ -383,10 +417,11 @@ lp_setup_point(struct lp_setup_context *setup, { if (!try_setup_point( setup, v0 )) { - lp_setup_flush_and_restart(setup); + if (!lp_setup_flush_and_restart(setup)) + return; if (!try_setup_point( setup, v0 )) - assert(0); + return; } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 9016bb8e249..4ab0b72a574 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -32,15 +32,18 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_rect.h" +#include "util/u_sse.h" #include "lp_perf.h" #include "lp_setup_context.h" -#include "lp_setup_coef.h" #include "lp_rast.h" #include "lp_state_fs.h" +#include "lp_state_setup.h" #define NUM_CHANNELS 4 - +#if defined(PIPE_ARCH_SSE) +#include <emmintrin.h> +#endif static INLINE int subpixel_snap(float a) @@ -65,7 +68,7 @@ fixed_to_float(int a) * immediately after it. * The memory is allocated from the per-scene pool, not per-tile. * \param tri_size returns number of bytes allocated - * \param nr_inputs number of fragment shader inputs + * \param num_inputs number of fragment shader inputs * \return pointer to triangle space */ struct lp_rast_triangle * @@ -75,22 +78,23 @@ lp_setup_alloc_triangle(struct lp_scene *scene, unsigned *tri_size) { unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float); + unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane); struct lp_rast_triangle *tri; - unsigned tri_bytes, bytes; - char *inputs; - tri_bytes = align(Offset(struct lp_rast_triangle, plane[nr_planes]), 16); - bytes = tri_bytes + (3 * input_array_sz); + *tri_size = (sizeof(struct lp_rast_triangle) + + 3 * input_array_sz + + plane_sz); - tri = lp_scene_alloc_aligned( scene, bytes, 16 ); + tri = lp_scene_alloc_aligned( scene, *tri_size, 16 ); + if (tri == NULL) + return NULL; - if (tri) { - inputs = ((char *)tri) + tri_bytes; - tri->inputs.a0 = (float (*)[4]) inputs; - tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz); - tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz); + tri->inputs.stride = input_array_sz; - *tri_size = bytes; + { + char *a = (char *)tri; + char *b = (char *)&GET_PLANES(tri)[nr_planes]; + assert(b - a == *tri_size); } return tri; @@ -101,25 +105,26 @@ lp_setup_print_vertex(struct lp_setup_context *setup, const char *name, const float (*v)[4]) { + const struct lp_setup_variant_key *key = &setup->setup.variant->key; int i, j; debug_printf(" wpos (%s[0]) xyzw %f %f %f %f\n", name, v[0][0], v[0][1], v[0][2], v[0][3]); - for (i = 0; i < setup->fs.nr_inputs; i++) { - const float *in = v[setup->fs.input[i].src_index]; + for (i = 0; i < key->num_inputs; i++) { + const float *in = v[key->inputs[i].src_index]; debug_printf(" in[%d] (%s[%d]) %s%s%s%s ", i, - name, setup->fs.input[i].src_index, - (setup->fs.input[i].usage_mask & 0x1) ? "x" : " ", - (setup->fs.input[i].usage_mask & 0x2) ? "y" : " ", - (setup->fs.input[i].usage_mask & 0x4) ? "z" : " ", - (setup->fs.input[i].usage_mask & 0x8) ? "w" : " "); + name, key->inputs[i].src_index, + (key->inputs[i].usage_mask & 0x1) ? "x" : " ", + (key->inputs[i].usage_mask & 0x2) ? "y" : " ", + (key->inputs[i].usage_mask & 0x4) ? "z" : " ", + (key->inputs[i].usage_mask & 0x8) ? "w" : " "); for (j = 0; j < 4; j++) - if (setup->fs.input[i].usage_mask & (1<<j)) + if (key->inputs[i].usage_mask & (1<<j)) debug_printf("%.5f ", in[j]); debug_printf("\n"); @@ -200,14 +205,16 @@ lp_setup_whole_tile(struct lp_setup_context *setup, } LP_COUNT(nr_shade_opaque_64); - return lp_scene_bin_command( scene, tx, ty, - LP_RAST_OP_SHADE_TILE_OPAQUE, - lp_rast_arg_inputs(inputs) ); + return lp_scene_bin_cmd_with_state( scene, tx, ty, + setup->fs.stored, + LP_RAST_OP_SHADE_TILE_OPAQUE, + lp_rast_arg_inputs(inputs) ); } else { LP_COUNT(nr_shade_64); - return lp_scene_bin_command( scene, tx, ty, - LP_RAST_OP_SHADE_TILE, - lp_rast_arg_inputs(inputs) ); + return lp_scene_bin_cmd_with_state( scene, tx, ty, + setup->fs.stored, + LP_RAST_OP_SHADE_TILE, + lp_rast_arg_inputs(inputs) ); } } @@ -225,13 +232,13 @@ do_triangle_ccw(struct lp_setup_context *setup, boolean frontfacing ) { struct lp_scene *scene = setup->scene; + const struct lp_setup_variant_key *key = &setup->setup.variant->key; struct lp_rast_triangle *tri; - int x[3]; - int y[3]; - int area; + struct lp_rast_plane *plane; + int x[4]; + int y[4]; struct u_rect bbox; unsigned tri_bytes; - int i; int nr_planes = 3; if (0) @@ -248,10 +255,12 @@ do_triangle_ccw(struct lp_setup_context *setup, x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset); x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset); x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset); + x[3] = 0; y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset); y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset); y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset); - + y[3] = 0; + /* Bounding rectangle (in pixels) */ { @@ -289,13 +298,13 @@ do_triangle_ccw(struct lp_setup_context *setup, u_rect_find_intersection(&setup->draw_region, &bbox); tri = lp_setup_alloc_triangle(scene, - setup->fs.nr_inputs, + key->num_inputs, nr_planes, &tri_bytes); if (!tri) return FALSE; -#ifdef DEBUG +#if 0 tri->v[0][0] = v0[0][0]; tri->v[1][0] = v1[0][0]; tri->v[2][0] = v2[0][0]; @@ -304,92 +313,172 @@ do_triangle_ccw(struct lp_setup_context *setup, tri->v[2][1] = v2[0][1]; #endif - tri->plane[0].dcdy = x[0] - x[1]; - tri->plane[1].dcdy = x[1] - x[2]; - tri->plane[2].dcdy = x[2] - x[0]; - - tri->plane[0].dcdx = y[0] - y[1]; - tri->plane[1].dcdx = y[1] - y[2]; - tri->plane[2].dcdx = y[2] - y[0]; - - area = (tri->plane[0].dcdy * tri->plane[2].dcdx - - tri->plane[2].dcdy * tri->plane[0].dcdx); - LP_COUNT(nr_tris); - /* Cull non-ccw and zero-sized triangles. - * - * XXX: subject to overflow?? - */ - if (area <= 0) { - lp_scene_putback_data( scene, tri_bytes ); - LP_COUNT(nr_culled_tris); - return TRUE; - } - /* Setup parameter interpolants: */ - lp_setup_tri_coef( setup, &tri->inputs, v0, v1, v2, frontfacing ); - - tri->inputs.facing = frontfacing ? 1.0F : -1.0F; + setup->setup.variant->jit_function( v0, + v1, + v2, + frontfacing, + GET_A0(&tri->inputs), + GET_DADX(&tri->inputs), + GET_DADY(&tri->inputs) ); + + tri->inputs.frontfacing = frontfacing; tri->inputs.disable = FALSE; tri->inputs.opaque = setup->fs.current.variant->opaque; - tri->inputs.state = setup->fs.stored; - - for (i = 0; i < 3; i++) { - struct lp_rast_plane *plane = &tri->plane[i]; + if (0) + lp_dump_setup_coef(&setup->setup.variant->key, + (const float (*)[4])GET_A0(&tri->inputs), + (const float (*)[4])GET_DADX(&tri->inputs), + (const float (*)[4])GET_DADY(&tri->inputs)); + + plane = GET_PLANES(tri); + +#if defined(PIPE_ARCH_SSE) + { + __m128i vertx, verty; + __m128i shufx, shufy; + __m128i dcdx, dcdy, c; + __m128i unused; + __m128i dcdx_neg_mask; + __m128i dcdy_neg_mask; + __m128i dcdx_zero_mask; + __m128i top_left_flag; + __m128i c_inc_mask, c_inc; + __m128i eo, p0, p1, p2; + __m128i zero = _mm_setzero_si128(); + + vertx = _mm_loadu_si128((__m128i *)x); /* vertex x coords */ + verty = _mm_loadu_si128((__m128i *)y); /* vertex y coords */ + + shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1)); + shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1)); + + dcdx = _mm_sub_epi32(verty, shufy); + dcdy = _mm_sub_epi32(vertx, shufx); + + dcdx_neg_mask = _mm_srai_epi32(dcdx, 31); + dcdx_zero_mask = _mm_cmpeq_epi32(dcdx, zero); + dcdy_neg_mask = _mm_srai_epi32(dcdy, 31); + + top_left_flag = _mm_set1_epi32((setup->pixel_offset == 0) ? ~0 : 0); - /* half-edge constants, will be interated over the whole render - * target. + c_inc_mask = _mm_or_si128(dcdx_neg_mask, + _mm_and_si128(dcdx_zero_mask, + _mm_xor_si128(dcdy_neg_mask, + top_left_flag))); + + c_inc = _mm_srli_epi32(c_inc_mask, 31); + + c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx), + mm_mullo_epi32(dcdy, verty)); + + c = _mm_add_epi32(c, c_inc); + + /* Scale up to match c: */ - plane->c = plane->dcdx * x[i] - plane->dcdy * y[i]; - - /* correct for top-left vs. bottom-left fill convention. - * - * note that we're overloading gl_rasterization_rules to mean - * both (0.5,0.5) pixel centers *and* bottom-left filling - * convention. - * - * GL actually has a top-left filling convention, but GL's - * notion of "top" differs from gallium's... - * - * Also, sometimes (in FBO cases) GL will render upside down - * to its usual method, in which case it will probably want - * to use the opposite, top-left convention. - */ - if (plane->dcdx < 0) { - /* both fill conventions want this - adjust for left edges */ - plane->c++; - } - else if (plane->dcdx == 0) { - if (setup->pixel_offset == 0) { - /* correct for top-left fill convention: - */ - if (plane->dcdy > 0) plane->c++; + dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER); + dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER); + + /* Calculate trivial reject values: + */ + eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy), + _mm_and_si128(dcdx_neg_mask, dcdx)); + + /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */ + + /* Pointless transpose which gets undone immediately in + * rasterization: + */ + transpose4_epi32(&c, &dcdx, &dcdy, &eo, + &p0, &p1, &p2, &unused); + + _mm_store_si128((__m128i *)&plane[0], p0); + _mm_store_si128((__m128i *)&plane[1], p1); + _mm_store_si128((__m128i *)&plane[2], p2); + } +#else + { + int i; + plane[0].dcdy = x[0] - x[1]; + plane[1].dcdy = x[1] - x[2]; + plane[2].dcdy = x[2] - x[0]; + plane[0].dcdx = y[0] - y[1]; + plane[1].dcdx = y[1] - y[2]; + plane[2].dcdx = y[2] - y[0]; + + for (i = 0; i < 3; i++) { + /* half-edge constants, will be interated over the whole render + * target. + */ + plane[i].c = plane[i].dcdx * x[i] - plane[i].dcdy * y[i]; + + /* correct for top-left vs. bottom-left fill convention. + * + * note that we're overloading gl_rasterization_rules to mean + * both (0.5,0.5) pixel centers *and* bottom-left filling + * convention. + * + * GL actually has a top-left filling convention, but GL's + * notion of "top" differs from gallium's... + * + * Also, sometimes (in FBO cases) GL will render upside down + * to its usual method, in which case it will probably want + * to use the opposite, top-left convention. + */ + if (plane[i].dcdx < 0) { + /* both fill conventions want this - adjust for left edges */ + plane[i].c++; } - else { - /* correct for bottom-left fill convention: - */ - if (plane->dcdy < 0) plane->c++; + else if (plane[i].dcdx == 0) { + if (setup->pixel_offset == 0) { + /* correct for top-left fill convention: + */ + if (plane[i].dcdy > 0) plane[i].c++; + } + else { + /* correct for bottom-left fill convention: + */ + if (plane[i].dcdy < 0) plane[i].c++; + } } - } - plane->dcdx *= FIXED_ONE; - plane->dcdy *= FIXED_ONE; + plane[i].dcdx *= FIXED_ONE; + plane[i].dcdy *= FIXED_ONE; - /* find trivial reject offsets for each edge for a single-pixel - * sized block. These will be scaled up at each recursive level to - * match the active blocksize. Scaling in this way works best if - * the blocks are square. - */ - plane->eo = 0; - if (plane->dcdx < 0) plane->eo -= plane->dcdx; - if (plane->dcdy > 0) plane->eo += plane->dcdy; + /* find trivial reject offsets for each edge for a single-pixel + * sized block. These will be scaled up at each recursive level to + * match the active blocksize. Scaling in this way works best if + * the blocks are square. + */ + plane[i].eo = 0; + if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx; + if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy; + } + } +#endif - /* Calculate trivial accept offsets from the above. - */ - plane->ei = plane->dcdy - plane->dcdx - plane->eo; + if (0) { + debug_printf("p0: %08x/%08x/%08x/%08x\n", + plane[0].c, + plane[0].dcdx, + plane[0].dcdy, + plane[0].eo); + + debug_printf("p1: %08x/%08x/%08x/%08x\n", + plane[1].c, + plane[1].dcdx, + plane[1].dcdy, + plane[1].eo); + + debug_printf("p0: %08x/%08x/%08x/%08x\n", + plane[2].c, + plane[2].dcdx, + plane[2].dcdy, + plane[2].eo); } @@ -412,29 +501,25 @@ do_triangle_ccw(struct lp_setup_context *setup, * these planes elsewhere. */ if (nr_planes == 7) { - tri->plane[3].dcdx = -1; - tri->plane[3].dcdy = 0; - tri->plane[3].c = 1-bbox.x0; - tri->plane[3].ei = 0; - tri->plane[3].eo = 1; - - tri->plane[4].dcdx = 1; - tri->plane[4].dcdy = 0; - tri->plane[4].c = bbox.x1+1; - tri->plane[4].ei = -1; - tri->plane[4].eo = 0; - - tri->plane[5].dcdx = 0; - tri->plane[5].dcdy = 1; - tri->plane[5].c = 1-bbox.y0; - tri->plane[5].ei = 0; - tri->plane[5].eo = 1; - - tri->plane[6].dcdx = 0; - tri->plane[6].dcdy = -1; - tri->plane[6].c = bbox.y1+1; - tri->plane[6].ei = -1; - tri->plane[6].eo = 0; + plane[3].dcdx = -1; + plane[3].dcdy = 0; + plane[3].c = 1-bbox.x0; + plane[3].eo = 1; + + plane[4].dcdx = 1; + plane[4].dcdy = 0; + plane[4].c = bbox.x1+1; + plane[4].eo = 0; + + plane[5].dcdx = 0; + plane[5].dcdy = 1; + plane[5].c = 1-bbox.y0; + plane[5].eo = 1; + + plane[6].dcdx = 0; + plane[6].dcdy = -1; + plane[6].c = bbox.y1+1; + plane[6].eo = 0; } return lp_setup_bin_triangle( setup, tri, &bbox, nr_planes ); @@ -487,51 +572,58 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, int sz = floor_pot((bbox->x1 - (bbox->x0 & ~3)) | (bbox->y1 - (bbox->y0 & ~3))); - if (nr_planes == 3) { - if (sz < 4 && dx < 64) - { - /* Triangle is contained in a single 4x4 stamp: - */ - int mask = (bbox->x0 & 63 & ~3) | ((bbox->y0 & 63 & ~3) << 8); - - return lp_scene_bin_command( scene, - bbox->x0/64, bbox->y0/64, - LP_RAST_OP_TRIANGLE_3_4, - lp_rast_arg_triangle(tri, mask) ); - } - - if (sz < 16 && dx < 64) - { - int mask = (bbox->x0 & 63 & ~3) | ((bbox->y0 & 63 & ~3) << 8); - - /* Triangle is contained in a single 16x16 block: - */ - return lp_scene_bin_command( scene, - bbox->x0/64, bbox->y0/64, - LP_RAST_OP_TRIANGLE_3_16, - lp_rast_arg_triangle(tri, mask) ); - } - } - - /* Determine which tile(s) intersect the triangle's bounding box */ if (dx < TILE_SIZE) { int ix0 = bbox->x0 / TILE_SIZE; int iy0 = bbox->y0 / TILE_SIZE; + int px = bbox->x0 & 63 & ~3; + int py = bbox->y0 & 63 & ~3; + int mask = px | (py << 8); assert(iy0 == bbox->y1 / TILE_SIZE && ix0 == bbox->x1 / TILE_SIZE); + if (nr_planes == 3) { + if (sz < 4) + { + /* Triangle is contained in a single 4x4 stamp: + */ + return lp_scene_bin_cmd_with_state( scene, ix0, iy0, + setup->fs.stored, + LP_RAST_OP_TRIANGLE_3_4, + lp_rast_arg_triangle(tri, mask) ); + } + + if (sz < 16) + { + /* Triangle is contained in a single 16x16 block: + */ + return lp_scene_bin_cmd_with_state( scene, ix0, iy0, + setup->fs.stored, + LP_RAST_OP_TRIANGLE_3_16, + lp_rast_arg_triangle(tri, mask) ); + } + } + else if (nr_planes == 4 && sz < 16) + { + return lp_scene_bin_cmd_with_state(scene, ix0, iy0, + setup->fs.stored, + LP_RAST_OP_TRIANGLE_4_16, + lp_rast_arg_triangle(tri, mask) ); + } + + /* Triangle is contained in a single tile: */ - return lp_scene_bin_command( scene, ix0, iy0, - lp_rast_tri_tab[nr_planes], - lp_rast_arg_triangle(tri, (1<<nr_planes)-1) ); + return lp_scene_bin_cmd_with_state( scene, ix0, iy0, setup->fs.stored, + lp_rast_tri_tab[nr_planes], + lp_rast_arg_triangle(tri, (1<<nr_planes)-1) ); } else { + struct lp_rast_plane *plane = GET_PLANES(tri); int c[MAX_PLANES]; int ei[MAX_PLANES]; int eo[MAX_PLANES]; @@ -545,14 +637,17 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, int iy1 = bbox->y1 / TILE_SIZE; for (i = 0; i < nr_planes; i++) { - c[i] = (tri->plane[i].c + - tri->plane[i].dcdy * iy0 * TILE_SIZE - - tri->plane[i].dcdx * ix0 * TILE_SIZE); - - ei[i] = tri->plane[i].ei << TILE_ORDER; - eo[i] = tri->plane[i].eo << TILE_ORDER; - xstep[i] = -(tri->plane[i].dcdx << TILE_ORDER); - ystep[i] = tri->plane[i].dcdy << TILE_ORDER; + c[i] = (plane[i].c + + plane[i].dcdy * iy0 * TILE_SIZE - + plane[i].dcdx * ix0 * TILE_SIZE); + + ei[i] = (plane[i].dcdy - + plane[i].dcdx - + plane[i].eo) << TILE_ORDER; + + eo[i] = plane[i].eo << TILE_ORDER; + xstep[i] = -(plane[i].dcdx << TILE_ORDER); + ystep[i] = plane[i].dcdy << TILE_ORDER; } @@ -594,9 +689,11 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, */ int count = util_bitcount(partial); in = TRUE; - if (!lp_scene_bin_command( scene, x, y, - lp_rast_tri_tab[count], - lp_rast_arg_triangle(tri, partial) )) + + if (!lp_scene_bin_cmd_with_state( scene, x, y, + setup->fs.stored, + lp_rast_tri_tab[count], + lp_rast_arg_triangle(tri, partial) )) goto fail; LP_COUNT(nr_partially_covered_64); @@ -635,40 +732,62 @@ fail: /** - * Draw triangle if it's CW, cull otherwise. + * Try to draw the triangle, restart the scene on failure. */ -static void triangle_cw( struct lp_setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) +static void retry_triangle_ccw( struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4], + boolean front) { - if (!do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface )) + if (!do_triangle_ccw( setup, v0, v1, v2, front )) { - lp_setup_flush_and_restart(setup); + if (!lp_setup_flush_and_restart(setup)) + return; - if (!do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface )) - assert(0); + if (!do_triangle_ccw( setup, v0, v1, v2, front )) + return; } } +static INLINE float +calc_area(const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) +{ + float dx01 = v0[0][0] - v1[0][0]; + float dy01 = v0[0][1] - v1[0][1]; + float dx20 = v2[0][0] - v0[0][0]; + float dy20 = v2[0][1] - v0[0][1]; + return dx01 * dy20 - dx20 * dy01; +} + /** - * Draw triangle if it's CCW, cull otherwise. + * Draw triangle if it's CW, cull otherwise. */ -static void triangle_ccw( struct lp_setup_context *setup, +static void triangle_cw( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) { - if (!do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface )) - { - lp_setup_flush_and_restart(setup); - if (!do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface )) - assert(0); - } + float area = calc_area(v0, v1, v2); + + if (area < 0.0f) + retry_triangle_ccw(setup, v0, v2, v1, !setup->ccw_is_frontface); } +static void triangle_ccw( struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) +{ + float area = calc_area(v0, v1, v2); + + if (area > 0.0f) + retry_triangle_ccw(setup, v0, v1, v2, setup->ccw_is_frontface); +} /** * Draw triangle whether it's CW or CCW. @@ -678,18 +797,12 @@ static void triangle_both( struct lp_setup_context *setup, const float (*v1)[4], const float (*v2)[4] ) { - /* edge vectors e = v0 - v2, f = v1 - v2 */ - const float ex = v0[0][0] - v2[0][0]; - const float ey = v0[0][1] - v2[0][1]; - const float fx = v1[0][0] - v2[0][0]; - const float fy = v1[0][1] - v2[0][1]; - - /* det = cross(e,f).z */ - const float det = ex * fy - ey * fx; - if (det < 0.0f) - triangle_ccw( setup, v0, v1, v2 ); - else if (det > 0.0f) - triangle_cw( setup, v0, v1, v2 ); + float area = calc_area(v0, v1, v2); + + if (area > 0.0f) + retry_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface ); + else if (area < 0.0f) + retry_triangle_ccw( setup, v0, v2, v1, !setup->ccw_is_frontface ); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c index 6308561f242..9c1f0fe7939 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -141,7 +141,8 @@ lp_setup_draw_elements(struct vbuf_render *vbr, const ushort *indices, uint nr) const boolean flatshade_first = setup->flatshade_first; unsigned i; - lp_setup_update_state(setup, TRUE); + if (!lp_setup_update_state(setup, TRUE)) + return; switch (setup->prim) { case PIPE_PRIM_POINTS: @@ -338,7 +339,8 @@ lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) const boolean flatshade_first = setup->flatshade_first; unsigned i; - lp_setup_update_state(setup, TRUE); + if (!lp_setup_update_state(setup, TRUE)) + return; switch (setup->prim) { case PIPE_PRIM_POINTS: diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 86313e1c484..7893e9cdc0c 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -97,6 +97,9 @@ llvmpipe_set_framebuffer_state(struct pipe_context *, void llvmpipe_update_fs(struct llvmpipe_context *lp); +void +llvmpipe_update_setup(struct llvmpipe_context *lp); + void llvmpipe_update_derived(struct llvmpipe_context *llvmpipe); diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index bb059d04599..0f5f7369e04 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -50,12 +50,13 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) { const struct lp_fragment_shader *lpfs = llvmpipe->fs; struct vertex_info *vinfo = &llvmpipe->vertex_info; - struct lp_shader_input *inputs = llvmpipe->inputs; unsigned vs_index; uint i; /* - * Match FS inputs against VS outputs, emitting the necessary attributes. + * Match FS inputs against VS outputs, emitting the necessary + * attributes. Could cache these structs and look them up with a + * combination of fragment shader, vertex shader ids. */ vinfo->num_attribs = 0; @@ -66,72 +67,18 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index); - for (i = 0; i < lpfs->info.num_inputs; i++) { + for (i = 0; i < lpfs->info.base.num_inputs; i++) { /* * Search for each input in current vs output: */ vs_index = draw_find_shader_output(llvmpipe->draw, - lpfs->info.input_semantic_name[i], - lpfs->info.input_semantic_index[i]); - if (vs_index < 0) { - /* - * This can happen with sprite coordinates - the vertex - * shader doesn't need to provide an output as we generate - * them internally. However, lets keep pretending that there - * is something there to not confuse other code. - */ - vs_index = 0; - } - - /* This can be pre-computed, except for flatshade: - */ - inputs[i].usage_mask = lpfs->info.input_usage_mask[i]; - - switch (lpfs->info.input_interpolate[i]) { - case TGSI_INTERPOLATE_CONSTANT: - inputs[i].interp = LP_INTERP_CONSTANT; - break; - case TGSI_INTERPOLATE_LINEAR: - inputs[i].interp = LP_INTERP_LINEAR; - break; - case TGSI_INTERPOLATE_PERSPECTIVE: - inputs[i].interp = LP_INTERP_PERSPECTIVE; - break; - default: - assert(0); - break; - } - - switch (lpfs->info.input_semantic_name[i]) { - case TGSI_SEMANTIC_FACE: - inputs[i].interp = LP_INTERP_FACING; - break; - case TGSI_SEMANTIC_POSITION: - /* Position was already emitted above - */ - inputs[i].interp = LP_INTERP_POSITION; - inputs[i].src_index = 0; - continue; - case TGSI_SEMANTIC_COLOR: - /* Colors are linearly inputs[i].interpolated in the fragment shader - * even when flatshading is active. This just tells the - * setup module to use coefficients with ddx==0 and - * ddy==0. - */ - if (llvmpipe->rasterizer->flatshade) - inputs[i].interp = LP_INTERP_CONSTANT; - break; - - default: - break; - } + lpfs->info.base.input_semantic_name[i], + lpfs->info.base.input_semantic_index[i]); /* * Emit the requested fs attribute for all but position. */ - - inputs[i].src_index = vinfo->num_attribs; draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index); } @@ -145,15 +92,8 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index); } - llvmpipe->num_inputs = lpfs->info.num_inputs; - draw_compute_vertex_size(vinfo); - lp_setup_set_vertex_info(llvmpipe->setup, vinfo); - - lp_setup_set_fs_inputs(llvmpipe->setup, - inputs, - lpfs->info.num_inputs); } @@ -190,6 +130,10 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) LP_NEW_QUERY)) llvmpipe_update_fs( llvmpipe ); + if (llvmpipe->dirty & (LP_NEW_FS | + LP_NEW_RASTERIZER)) + llvmpipe_update_setup( llvmpipe ); + if (llvmpipe->dirty & LP_NEW_BLEND_COLOR) lp_setup_set_blend_color(llvmpipe->setup, &llvmpipe->blend_color); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index f0a15e11b9b..9fbedac165f 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -99,74 +99,12 @@ #include <llvm-c/Analysis.h> +#include <llvm-c/BitWriter.h> static unsigned fs_no = 0; -/** - * Generate the depth /stencil test code. - */ -static void -generate_depth_stencil(LLVMBuilderRef builder, - const struct lp_fragment_shader_variant_key *key, - struct lp_type src_type, - struct lp_build_mask_context *mask, - LLVMValueRef stencil_refs[2], - LLVMValueRef src, - LLVMValueRef dst_ptr, - LLVMValueRef facing, - LLVMValueRef counter) -{ - const struct util_format_description *format_desc; - struct lp_type dst_type; - - if (!key->depth.enabled && !key->stencil[0].enabled && !key->stencil[1].enabled) - return; - - format_desc = util_format_description(key->zsbuf_format); - assert(format_desc); - - /* - * Depths are expected to be between 0 and 1, even if they are stored in - * floats. Setting these bits here will ensure that the lp_build_conv() call - * below won't try to unnecessarily clamp the incoming values. - */ - if(src_type.floating) { - src_type.sign = FALSE; - src_type.norm = TRUE; - } - else { - assert(!src_type.sign); - assert(src_type.norm); - } - - /* Pick the depth type. */ - dst_type = lp_depth_type(format_desc, src_type.width*src_type.length); - - /* FIXME: Cope with a depth test type with a different bit width. */ - assert(dst_type.width == src_type.width); - assert(dst_type.length == src_type.length); - - /* Convert fragment Z from float to integer */ - lp_build_conv(builder, src_type, dst_type, &src, 1, &src, 1); - - dst_ptr = LLVMBuildBitCast(builder, - dst_ptr, - LLVMPointerType(lp_build_vec_type(dst_type), 0), ""); - lp_build_depth_stencil_test(builder, - &key->depth, - key->stencil, - dst_type, - format_desc, - mask, - stencil_refs, - src, - dst_ptr, - facing, - counter); -} - /** * Expand the relevent bits of mask_input to a 4-dword mask for the @@ -248,6 +186,26 @@ generate_quad_mask(LLVMBuilderRef builder, } +#define EARLY_DEPTH_TEST 0x1 +#define LATE_DEPTH_TEST 0x2 +#define EARLY_DEPTH_WRITE 0x4 +#define LATE_DEPTH_WRITE 0x8 + +static int +find_output_by_semantic( const struct tgsi_shader_info *info, + unsigned semantic, + unsigned index ) +{ + int i; + + for (i = 0; i < info->num_outputs; i++) + if (info->output_semantic_name[i] == semantic && + info->output_semantic_index[i] == index) + return i; + + return -1; +} + /** * Generate the fragment shader, depth/stencil test, and alpha tests. @@ -255,14 +213,13 @@ generate_quad_mask(LLVMBuilderRef builder, * \param partial_mask if 1, do mask_input testing */ static void -generate_fs(struct llvmpipe_context *lp, - struct lp_fragment_shader *shader, +generate_fs(struct lp_fragment_shader *shader, const struct lp_fragment_shader_variant_key *key, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef context_ptr, unsigned i, - const struct lp_build_interp_soa_context *interp, + struct lp_build_interp_soa_context *interp, struct lp_build_sampler_soa *sampler, LLVMValueRef *pmask, LLVMValueRef (*color)[4], @@ -272,18 +229,52 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef mask_input, LLVMValueRef counter) { + const struct util_format_description *zs_format_desc = NULL; const struct tgsi_token *tokens = shader->base.tokens; LLVMTypeRef vec_type; LLVMValueRef consts_ptr; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; - LLVMValueRef z = interp->pos[2]; + LLVMValueRef z; + LLVMValueRef zs_value = NULL; LLVMValueRef stencil_refs[2]; - struct lp_build_flow_context *flow; struct lp_build_mask_context mask; - boolean early_depth_stencil_test; + boolean simple_shader = (shader->info.base.file_count[TGSI_FILE_SAMPLER] == 0 && + shader->info.base.num_inputs < 3 && + shader->info.base.num_instructions < 8); unsigned attrib; unsigned chan; unsigned cbuf; + unsigned depth_mode; + + if (key->depth.enabled || + key->stencil[0].enabled || + key->stencil[1].enabled) { + + zs_format_desc = util_format_description(key->zsbuf_format); + assert(zs_format_desc); + + if (!shader->info.base.writes_z) { + if (key->alpha.enabled || shader->info.base.uses_kill) + /* With alpha test and kill, can do the depth test early + * and hopefully eliminate some quads. But need to do a + * special deferred depth write once the final mask value + * is known. + */ + depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE; + else + depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE; + } + else { + depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE; + } + + if (!(key->depth.enabled && key->depth.writemask) && + !(key->stencil[0].enabled && key->stencil[0].writemask)) + depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE); + } + else { + depth_mode = 0; + } assert(i < 4); @@ -294,20 +285,14 @@ generate_fs(struct llvmpipe_context *lp, consts_ptr = lp_jit_context_constants(builder, context_ptr); - flow = lp_build_flow_create(builder); - memset(outputs, 0, sizeof outputs); - lp_build_flow_scope_begin(flow); - /* Declare the color and z variables */ for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { - color[cbuf][chan] = LLVMGetUndef(vec_type); - lp_build_flow_scope_declare(flow, &color[cbuf][chan]); + color[cbuf][chan] = lp_build_alloca(builder, vec_type, "color"); } } - lp_build_flow_scope_declare(flow, &z); /* do triangle edge testing */ if (partial_mask) { @@ -319,74 +304,126 @@ generate_fs(struct llvmpipe_context *lp, } /* 'mask' will control execution based on quad's pixel alive/killed state */ - lp_build_mask_begin(&mask, flow, type, *pmask); - - early_depth_stencil_test = - (key->depth.enabled || key->stencil[0].enabled) && - !key->alpha.enabled && - !shader->info.uses_kill && - !shader->info.writes_z; - - if (early_depth_stencil_test) - generate_depth_stencil(builder, key, - type, &mask, - stencil_refs, z, depth_ptr, facing, counter); + lp_build_mask_begin(&mask, builder, type, *pmask); + + if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader) + lp_build_mask_check(&mask); + + lp_build_interp_soa_update_pos(interp, i); + z = interp->pos[2]; + + if (depth_mode & EARLY_DEPTH_TEST) { + lp_build_depth_stencil_test(builder, + &key->depth, + key->stencil, + type, + zs_format_desc, + &mask, + stencil_refs, + z, + depth_ptr, facing, + &zs_value, + !simple_shader); + + if (depth_mode & EARLY_DEPTH_WRITE) { + lp_build_depth_write(builder, zs_format_desc, depth_ptr, zs_value); + } + } + lp_build_interp_soa_update_inputs(interp, i); + + /* Build the actual shader */ lp_build_tgsi_soa(builder, tokens, type, &mask, consts_ptr, interp->pos, interp->inputs, - outputs, sampler, &shader->info); + outputs, sampler, &shader->info.base); - /* loop over fragment shader outputs/results */ - for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) { - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(outputs[attrib][chan]) { - LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); - lp_build_name(out, "output%u.%u.%c", i, attrib, "xyzw"[chan]); - - switch (shader->info.output_semantic_name[attrib]) { - case TGSI_SEMANTIC_COLOR: - { - unsigned cbuf = shader->info.output_semantic_index[attrib]; - - lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]); - - /* Alpha test */ - /* XXX: should only test the final assignment to alpha */ - if (cbuf == 0 && chan == 3 && key->alpha.enabled) { - LLVMValueRef alpha = out; - LLVMValueRef alpha_ref_value; - alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr); - alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value); - lp_build_alpha_test(builder, key->alpha.func, type, - &mask, alpha, alpha_ref_value); - } - - color[cbuf][chan] = out; - break; - } - - case TGSI_SEMANTIC_POSITION: - if(chan == 2) - z = out; - break; - } - } + + /* Alpha test */ + if (key->alpha.enabled) { + int color0 = find_output_by_semantic(&shader->info.base, + TGSI_SEMANTIC_COLOR, + 0); + + if (color0 != -1 && outputs[color0][3]) { + LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha"); + LLVMValueRef alpha_ref_value; + + alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr); + alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value); + + lp_build_alpha_test(builder, key->alpha.func, type, + &mask, alpha, alpha_ref_value, + (depth_mode & LATE_DEPTH_TEST) != 0); } } - if (!early_depth_stencil_test) - generate_depth_stencil(builder, key, - type, &mask, - stencil_refs, z, depth_ptr, facing, counter); + /* Late Z test */ + if (depth_mode & LATE_DEPTH_TEST) { + int pos0 = find_output_by_semantic(&shader->info.base, + TGSI_SEMANTIC_POSITION, + 0); + + if (pos0 != -1 && outputs[pos0][2]) { + z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z"); + } - lp_build_mask_end(&mask); + lp_build_depth_stencil_test(builder, + &key->depth, + key->stencil, + type, + zs_format_desc, + &mask, + stencil_refs, + z, + depth_ptr, facing, + &zs_value, + !simple_shader); + /* Late Z write */ + if (depth_mode & LATE_DEPTH_WRITE) { + lp_build_depth_write(builder, zs_format_desc, depth_ptr, zs_value); + } + } + else if ((depth_mode & EARLY_DEPTH_TEST) && + (depth_mode & LATE_DEPTH_WRITE)) + { + /* Need to apply a reduced mask to the depth write. Reload the + * depth value, update from zs_value with the new mask value and + * write that out. + */ + lp_build_deferred_depth_write(builder, + type, + zs_format_desc, + &mask, + depth_ptr, + zs_value); + } - lp_build_flow_scope_end(flow); - lp_build_flow_destroy(flow); + /* Color write */ + for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib) + { + if (shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR && + shader->info.base.output_semantic_index[attrib] < key->nr_cbufs) + { + unsigned cbuf = shader->info.base.output_semantic_index[attrib]; + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + if(outputs[attrib][chan]) { + /* XXX: just initialize outputs to point at colors[] and + * skip this. + */ + LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); + lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]); + LLVMBuildStore(builder, out, color[cbuf][chan]); + } + } + } + } - *pmask = mask.value; + if (counter) + lp_build_occlusion_count(builder, type, + lp_build_mask_value(&mask), counter); + *pmask = lp_build_mask_end(&mask); } @@ -407,10 +444,10 @@ generate_blend(const struct pipe_blend_state *blend, LLVMValueRef context_ptr, LLVMValueRef mask, LLVMValueRef *src, - LLVMValueRef dst_ptr) + LLVMValueRef dst_ptr, + boolean do_branch) { struct lp_build_context bld; - struct lp_build_flow_context *flow; struct lp_build_mask_context mask_ctx; LLVMTypeRef vec_type; LLVMValueRef const_ptr; @@ -421,10 +458,9 @@ generate_blend(const struct pipe_blend_state *blend, lp_build_context_init(&bld, builder, type); - flow = lp_build_flow_create(builder); - - /* we'll use this mask context to skip blending if all pixels are dead */ - lp_build_mask_begin(&mask_ctx, flow, type, mask); + lp_build_mask_begin(&mask_ctx, builder, type, mask); + if (do_branch) + lp_build_mask_check(&mask_ctx); vec_type = lp_build_vec_type(type); @@ -457,7 +493,6 @@ generate_blend(const struct pipe_blend_state *blend, } lp_build_mask_end(&mask_ctx); - lp_build_flow_destroy(flow); } @@ -468,13 +503,13 @@ generate_blend(const struct pipe_blend_state *blend, * 2x2 pixels. */ static void -generate_fragment(struct llvmpipe_context *lp, +generate_fragment(struct llvmpipe_screen *screen, struct lp_fragment_shader *shader, struct lp_fragment_shader_variant *variant, unsigned partial_mask) { - struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); const struct lp_fragment_shader_variant_key *key = &variant->key; + struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; char func_name[256]; struct lp_type fs_type; struct lp_type blend_type; @@ -502,11 +537,24 @@ generate_fragment(struct llvmpipe_context *lp, LLVMValueRef blend_mask; LLVMValueRef function; LLVMValueRef facing; + const struct util_format_description *zs_format_desc; unsigned num_fs; unsigned i; unsigned chan; unsigned cbuf; + /* Adjust color input interpolation according to flatshade state: + */ + memcpy(inputs, shader->inputs, shader->info.base.num_inputs * sizeof inputs[0]); + for (i = 0; i < shader->info.base.num_inputs; i++) { + if (inputs[i].interp == LP_INTERP_COLOR) { + if (key->flatshade) + inputs[i].interp = LP_INTERP_CONSTANT; + else + inputs[i].interp = LP_INTERP_LINEAR; + } + } + /* TODO: actually pick these based on the fs and color buffer * characteristics. */ @@ -542,12 +590,12 @@ generate_fragment(struct llvmpipe_context *lp, arg_types[0] = screen->context_ptr_type; /* context */ arg_types[1] = LLVMInt32Type(); /* x */ arg_types[2] = LLVMInt32Type(); /* y */ - arg_types[3] = LLVMFloatType(); /* facing */ + arg_types[3] = LLVMInt32Type(); /* facing */ arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* a0 */ arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dadx */ arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */ arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */ - arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ + arg_types[8] = LLVMPointerType(LLVMInt8Type(), 0); /* depth */ arg_types[9] = LLVMInt32Type(); /* mask_input */ arg_types[10] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */ @@ -558,7 +606,6 @@ generate_fragment(struct llvmpipe_context *lp, variant->function[partial_mask] = function; - /* XXX: need to propagate noalias down into color param now we are * passing a pointer-to-pointer? */ @@ -606,8 +653,8 @@ generate_fragment(struct llvmpipe_context *lp, * already included in the shader key. */ lp_build_interp_soa_init(&interp, - lp->num_inputs, - lp->inputs, + shader->info.base.num_inputs, + inputs, builder, fs_type, a0_ptr, dadx_ptr, dady_ptr, x, y); @@ -616,17 +663,18 @@ generate_fragment(struct llvmpipe_context *lp, sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); /* loop over quads in the block */ + zs_format_desc = util_format_description(key->zsbuf_format); + for(i = 0; i < num_fs; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef depth_offset = LLVMConstInt(LLVMInt32Type(), + i*fs_type.length*zs_format_desc->block.bits/8, + 0); LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS]; LLVMValueRef depth_ptr_i; - if(i != 0) - lp_build_interp_soa_update(&interp, i); + depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, ""); - depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, ""); - - generate_fs(lp, shader, key, + generate_fs(shader, key, builder, fs_type, context_ptr, @@ -660,9 +708,18 @@ generate_fragment(struct llvmpipe_context *lp, * Convert the fs's output color and mask to fit to the blending type. */ for(chan = 0; chan < NUM_CHANNELS; ++chan) { + LLVMValueRef fs_color_vals[LP_MAX_VECTOR_LENGTH]; + + for (i = 0; i < num_fs; i++) { + fs_color_vals[i] = + LLVMBuildLoad(builder, fs_out_color[cbuf][chan][i], "fs_color_vals"); + } + lp_build_conv(builder, fs_type, blend_type, - fs_out_color[cbuf][chan], num_fs, + fs_color_vals, + num_fs, &blend_in_color[chan], 1); + lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]); } @@ -685,14 +742,23 @@ generate_fragment(struct llvmpipe_context *lp, /* * Blending. */ - generate_blend(&key->blend, - rt, - builder, - blend_type, - context_ptr, - blend_mask, - blend_in_color, - color_ptr); + { + /* Could the 4x4 have been killed? + */ + boolean do_branch = ((key->depth.enabled || key->stencil[0].enabled) && + !key->alpha.enabled && + !shader->info.base.uses_kill); + + generate_blend(&key->blend, + rt, + builder, + blend_type, + context_ptr, + blend_mask, + blend_in_color, + color_ptr, + do_branch); + } } #ifdef PIPE_ARCH_X86 @@ -717,12 +783,17 @@ generate_fragment(struct llvmpipe_context *lp, /* Apply optimizations to LLVM IR */ LLVMRunFunctionPassManager(screen->pass, function); - if (gallivm_debug & GALLIVM_DEBUG_IR) { + if ((gallivm_debug & GALLIVM_DEBUG_IR) || (LP_DEBUG & DEBUG_FS)) { /* Print the LLVM IR to stderr */ lp_debug_dump_value(function); debug_printf("\n"); } + /* Dump byte code to a file */ + if (0) { + LLVMWriteBitcodeToFile(lp_build_module, "llvmpipe.bc"); + } + /* * Translate the LLVM IR into machine code. */ @@ -731,7 +802,7 @@ generate_fragment(struct llvmpipe_context *lp, variant->jit_function[partial_mask] = (lp_jit_frag_func)pointer_to_func(f); - if (gallivm_debug & GALLIVM_DEBUG_ASM) { + if ((gallivm_debug & GALLIVM_DEBUG_ASM) || (LP_DEBUG & DEBUG_FS)) { lp_disassemble(f); } lp_func_delete_body(function); @@ -746,6 +817,9 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key) debug_printf("fs variant %p:\n", (void *) key); + if (key->flatshade) { + debug_printf("flatshade = 1\n"); + } for (i = 0; i < key->nr_cbufs; ++i) { debug_printf("cbuf_format[%u] = %s\n", i, util_format_name(key->cbuf_format[i])); } @@ -770,6 +844,10 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key) debug_printf("alpha.func = %s\n", util_dump_func(key->alpha.func, TRUE)); } + if (key->occlusion_count) { + debug_printf("occlusion_count = 1\n"); + } + if (key->blend.logicop_enable) { debug_printf("blend.logicop_func = %s\n", util_dump_logicop(key->blend.logicop_func, TRUE)); } @@ -782,31 +860,33 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key) debug_printf("blend.alpha_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_dst_factor, TRUE)); } debug_printf("blend.colormask = 0x%x\n", key->blend.rt[0].colormask); - for (i = 0; i < PIPE_MAX_SAMPLERS; ++i) { - if (key->sampler[i].format) { - debug_printf("sampler[%u] = \n", i); - debug_printf(" .format = %s\n", - util_format_name(key->sampler[i].format)); - debug_printf(" .target = %s\n", - util_dump_tex_target(key->sampler[i].target, TRUE)); - debug_printf(" .pot = %u %u %u\n", - key->sampler[i].pot_width, - key->sampler[i].pot_height, - key->sampler[i].pot_depth); - debug_printf(" .wrap = %s %s %s\n", - util_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), - util_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), - util_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); - debug_printf(" .min_img_filter = %s\n", - util_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); - debug_printf(" .min_mip_filter = %s\n", - util_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); - debug_printf(" .mag_img_filter = %s\n", - util_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); - if (key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE) - debug_printf(" .compare_func = %s\n", util_dump_func(key->sampler[i].compare_func, TRUE)); - debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); - } + for (i = 0; i < key->nr_samplers; ++i) { + debug_printf("sampler[%u] = \n", i); + debug_printf(" .format = %s\n", + util_format_name(key->sampler[i].format)); + debug_printf(" .target = %s\n", + util_dump_tex_target(key->sampler[i].target, TRUE)); + debug_printf(" .pot = %u %u %u\n", + key->sampler[i].pot_width, + key->sampler[i].pot_height, + key->sampler[i].pot_depth); + debug_printf(" .wrap = %s %s %s\n", + util_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), + util_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), + util_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); + debug_printf(" .min_img_filter = %s\n", + util_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); + debug_printf(" .min_mip_filter = %s\n", + util_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); + debug_printf(" .mag_img_filter = %s\n", + util_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); + if (key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE) + debug_printf(" .compare_func = %s\n", util_dump_func(key->sampler[i].compare_func, TRUE)); + debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); + debug_printf(" .min_max_lod_equal = %u\n", key->sampler[i].min_max_lod_equal); + debug_printf(" .lod_bias_non_zero = %u\n", key->sampler[i].lod_bias_non_zero); + debug_printf(" .apply_min_lod = %u\n", key->sampler[i].apply_min_lod); + debug_printf(" .apply_max_lod = %u\n", key->sampler[i].apply_max_lod); } } @@ -823,7 +903,7 @@ lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant) } static struct lp_fragment_shader_variant * -generate_variant(struct llvmpipe_context *lp, +generate_variant(struct llvmpipe_screen *screen, struct lp_fragment_shader *shader, const struct lp_fragment_shader_variant_key *key) { @@ -861,7 +941,7 @@ generate_variant(struct llvmpipe_context *lp, !key->stencil[0].enabled && !key->alpha.enabled && !key->depth.enabled && - !shader->info.uses_kill + !shader->info.base.uses_kill ? TRUE : FALSE; @@ -869,11 +949,11 @@ generate_variant(struct llvmpipe_context *lp, lp_debug_fs_variant(variant); } - generate_fragment(lp, shader, variant, RAST_EDGE_TEST); + generate_fragment(screen, shader, variant, RAST_EDGE_TEST); if (variant->opaque) { /* Specialized shader, which doesn't need to read the color buffer. */ - generate_fragment(lp, shader, variant, RAST_WHOLE); + generate_fragment(screen, shader, variant, RAST_WHOLE); } else { variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST]; } @@ -889,6 +969,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); struct lp_fragment_shader *shader; int nr_samplers; + int i; shader = CALLOC_STRUCT(lp_fragment_shader); if (!shader) @@ -898,7 +979,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, make_empty_list(&shader->variants); /* get/save the summary info for this shader */ - tgsi_scan_shader(templ->tokens, &shader->info); + lp_build_tgsi_info(templ->tokens, &shader->info); /* we need to keep a local copy of the tokens */ shader->base.tokens = tgsi_dup_tokens(templ->tokens); @@ -910,18 +991,58 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, return NULL; } - nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1; + nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1; shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key, sampler[nr_samplers]); + for (i = 0; i < shader->info.base.num_inputs; i++) { + shader->inputs[i].usage_mask = shader->info.base.input_usage_mask[i]; + + switch (shader->info.base.input_interpolate[i]) { + case TGSI_INTERPOLATE_CONSTANT: + shader->inputs[i].interp = LP_INTERP_CONSTANT; + break; + case TGSI_INTERPOLATE_LINEAR: + shader->inputs[i].interp = LP_INTERP_LINEAR; + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + shader->inputs[i].interp = LP_INTERP_PERSPECTIVE; + break; + default: + assert(0); + break; + } + + switch (shader->info.base.input_semantic_name[i]) { + case TGSI_SEMANTIC_COLOR: + /* Colors may be either linearly or constant interpolated in + * the fragment shader, but that information isn't available + * here. Mark color inputs and fix them up later. + */ + shader->inputs[i].interp = LP_INTERP_COLOR; + break; + case TGSI_SEMANTIC_FACE: + shader->inputs[i].interp = LP_INTERP_FACING; + break; + case TGSI_SEMANTIC_POSITION: + /* Position was already emitted above + */ + shader->inputs[i].interp = LP_INTERP_POSITION; + shader->inputs[i].src_index = 0; + continue; + } + + shader->inputs[i].src_index = i+1; + } + if (LP_DEBUG & DEBUG_TGSI) { unsigned attrib; debug_printf("llvmpipe: Create fragment shader #%u %p:\n", shader->no, (void *) shader); tgsi_dump(templ->tokens, 0); debug_printf("usage masks:\n"); - for (attrib = 0; attrib < shader->info.num_inputs; ++attrib) { - unsigned usage_mask = shader->info.input_usage_mask[attrib]; + for (attrib = 0; attrib < shader->info.base.num_inputs; ++attrib) { + unsigned usage_mask = shader->info.base.input_usage_mask[attrib]; debug_printf(" IN[%u].%s%s%s%s\n", attrib, usage_mask & TGSI_WRITEMASK_X ? "x" : "", @@ -1150,10 +1271,10 @@ make_variant_key(struct llvmpipe_context *lp, /* This value will be the same for all the variants of a given shader: */ - key->nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1; + key->nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1; for(i = 0; i < key->nr_samplers; ++i) { - if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { + if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { lp_sampler_static_state(&key->sampler[i], lp->fragment_sampler_views[i], lp->sampler[i]); @@ -1168,6 +1289,7 @@ make_variant_key(struct llvmpipe_context *lp, void llvmpipe_update_fs(struct llvmpipe_context *lp) { + struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); struct lp_fragment_shader *shader = lp->fs; struct lp_fragment_shader_variant_key key; struct lp_fragment_shader_variant *variant = NULL; @@ -1208,7 +1330,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) } t0 = os_time_get(); - variant = generate_variant(lp, shader, &key); + variant = generate_variant(screen, shader, &key); t1 = os_time_get(); dt = t1 - t0; @@ -1228,6 +1350,10 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) + + + + void llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe) { diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h index 4999b8dca1a..7d58c4936c7 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h @@ -34,6 +34,8 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" /* for tgsi_shader_info */ #include "gallivm/lp_bld_sample.h" /* for struct lp_sampler_static_state */ +#include "gallivm/lp_bld_tgsi.h" /* for lp_tgsi_info */ +#include "lp_bld_interp.h" /* for struct lp_shader_input */ struct tgsi_token; @@ -96,7 +98,7 @@ struct lp_fragment_shader { struct pipe_shader_state base; - struct tgsi_shader_info info; + struct lp_tgsi_info info; struct lp_fs_variant_list_item variants; @@ -107,6 +109,9 @@ struct lp_fragment_shader unsigned no; unsigned variants_created; unsigned variants_cached; + + /** Fragment shader input interpolation info */ + struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index 17a4a0ed02d..1dd866195d3 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -246,9 +246,9 @@ llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *lp, struct pipe_sampler_view **views) { unsigned i; - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS]; - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS]; - const void *data[DRAW_MAX_TEXTURE_LEVELS]; + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS]; + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS]; + const void *data[PIPE_MAX_TEXTURE_LEVELS]; assert(num <= PIPE_MAX_VERTEX_SAMPLERS); if (!num) diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c new file mode 100644 index 00000000000..2c8b8b9a928 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c @@ -0,0 +1,759 @@ +/************************************************************************** + * + * Copyright 2010 VMware. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_simple_list.h" +#include "os/os_time.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_intr.h" +#include <llvm-c/Analysis.h> /* for LLVMVerifyFunction */ + +#include "lp_perf.h" +#include "lp_debug.h" +#include "lp_flush.h" +#include "lp_screen.h" +#include "lp_context.h" +#include "lp_setup_context.h" +#include "lp_rast.h" +#include "lp_state.h" +#include "lp_state_fs.h" +#include "lp_state_setup.h" + + + +/* currently organized to interpolate full float[4] attributes even + * when some elements are unused. Later, can pack vertex data more + * closely. + */ + + +struct lp_setup_args +{ + /* Function arguments: + */ + LLVMValueRef v0; + LLVMValueRef v1; + LLVMValueRef v2; + LLVMValueRef facing; /* boolean */ + LLVMValueRef a0; + LLVMValueRef dadx; + LLVMValueRef dady; + + /* Derived: + */ + LLVMValueRef x0_center; + LLVMValueRef y0_center; + LLVMValueRef dy20_ooa; + LLVMValueRef dy01_ooa; + LLVMValueRef dx20_ooa; + LLVMValueRef dx01_ooa; +}; + +static LLVMTypeRef type4f(void) +{ + return LLVMVectorType(LLVMFloatType(), 4); +} + + +/* Equivalent of _mm_setr_ps(a,b,c,d) + */ +static LLVMValueRef vec4f(LLVMBuilderRef bld, + LLVMValueRef a, LLVMValueRef b, LLVMValueRef c, LLVMValueRef d, + const char *name) +{ + LLVMValueRef i0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0); + LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0); + LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + + LLVMValueRef res = LLVMGetUndef(type4f()); + + res = LLVMBuildInsertElement(bld, res, a, i0, ""); + res = LLVMBuildInsertElement(bld, res, b, i1, ""); + res = LLVMBuildInsertElement(bld, res, c, i2, ""); + res = LLVMBuildInsertElement(bld, res, d, i3, name); + + return res; +} + +/* Equivalent of _mm_set1_ps(a) + */ +static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld, + LLVMValueRef a, + const char *name) +{ + LLVMValueRef res = LLVMGetUndef(type4f()); + int i; + + for(i = 0; i < 4; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : ""); + } + + return res; +} + +static void +store_coef(LLVMBuilderRef builder, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef a0, + LLVMValueRef dadx, + LLVMValueRef dady) +{ + LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), slot, 0); + + LLVMBuildStore(builder, + a0, + LLVMBuildGEP(builder, args->a0, &idx, 1, "")); + + LLVMBuildStore(builder, + dadx, + LLVMBuildGEP(builder, args->dadx, &idx, 1, "")); + + LLVMBuildStore(builder, + dady, + LLVMBuildGEP(builder, args->dady, &idx, 1, "")); +} + + + +static void +emit_constant_coef4( LLVMBuilderRef builder, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef vert, + unsigned attr) +{ + LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); + LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero"); + LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), attr, 0); + LLVMValueRef attr_ptr = LLVMBuildGEP(builder, vert, &idx, 1, "attr_ptr"); + LLVMValueRef vert_attr = LLVMBuildLoad(builder, attr_ptr, "vert_attr"); + + store_coef(builder, args, slot, vert_attr, zerovec, zerovec); +} + + + +/** + * Setup the fragment input attribute with the front-facing value. + * \param frontface is the triangle front facing? + */ +static void +emit_facing_coef( LLVMBuilderRef builder, + struct lp_setup_args *args, + unsigned slot ) +{ + LLVMValueRef a0_0 = args->facing; + LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, LLVMFloatType(), ""); + LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); + LLVMValueRef a0 = vec4f(builder, a0_0f, zero, zero, zero, "facing"); + LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero"); + + store_coef(builder, args, slot, a0, zerovec, zerovec); +} + + +static LLVMValueRef +vert_attrib(LLVMBuilderRef b, + LLVMValueRef vert, + int attr, + int elem, + const char *name) +{ + LLVMValueRef idx[2]; + idx[0] = LLVMConstInt(LLVMInt32Type(), attr, 0); + idx[1] = LLVMConstInt(LLVMInt32Type(), elem, 0); + return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name); +} + + + +static void +emit_coef4( LLVMBuilderRef b, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef a0, + LLVMValueRef a1, + LLVMValueRef a2) +{ + LLVMValueRef dy20_ooa = args->dy20_ooa; + LLVMValueRef dy01_ooa = args->dy01_ooa; + LLVMValueRef dx20_ooa = args->dx20_ooa; + LLVMValueRef dx01_ooa = args->dx01_ooa; + LLVMValueRef x0_center = args->x0_center; + LLVMValueRef y0_center = args->y0_center; + + /* XXX: using fsub, fmul on vector types -- does this work?? + */ + LLVMValueRef da01 = LLVMBuildFSub(b, a0, a1, "da01"); + LLVMValueRef da20 = LLVMBuildFSub(b, a2, a0, "da20"); + + /* Calculate dadx (vec4f) + */ + LLVMValueRef da01_dy20_ooa = LLVMBuildFMul(b, da01, dy20_ooa, "da01_dy20_ooa"); + LLVMValueRef da20_dy01_ooa = LLVMBuildFMul(b, da20, dy01_ooa, "da20_dy01_ooa"); + LLVMValueRef dadx = LLVMBuildFSub(b, da01_dy20_ooa, da20_dy01_ooa, "dadx"); + + /* Calculate dady (vec4f) + */ + LLVMValueRef da01_dx20_ooa = LLVMBuildFMul(b, da01, dx20_ooa, "da01_dx20_ooa"); + LLVMValueRef da20_dx01_ooa = LLVMBuildFMul(b, da20, dx01_ooa, "da20_dx01_ooa"); + LLVMValueRef dady = LLVMBuildFSub(b, da20_dx01_ooa, da01_dx20_ooa, "dady"); + + /* Calculate a0 - the attribute value at the origin + */ + LLVMValueRef dadx_x0 = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0"); + LLVMValueRef dady_y0 = LLVMBuildFMul(b, dady, y0_center, "dady_y0"); + LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0"); + LLVMValueRef attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0"); + + store_coef(b, args, slot, attr_0, dadx, dady); +} + + +static void +emit_linear_coef( LLVMBuilderRef b, + struct lp_setup_args *args, + unsigned slot, + unsigned vert_attr) +{ + LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), vert_attr, 0); + + LLVMValueRef a0 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a"); + LLVMValueRef a1 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a"); + LLVMValueRef a2 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a"); + + emit_coef4(b, args, slot, a0, a1, a2); +} + + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void +emit_perspective_coef( LLVMBuilderRef b, + struct lp_setup_args *args, + unsigned slot, + unsigned vert_attr) +{ + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), vert_attr, 0); + + LLVMValueRef v0a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a"); + LLVMValueRef v1a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a"); + LLVMValueRef v2a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a"); + + LLVMValueRef v0_oow = vec4f_from_scalar(b, vert_attrib(b, args->v0, 0, 3, ""), "v0_oow"); + LLVMValueRef v1_oow = vec4f_from_scalar(b, vert_attrib(b, args->v1, 0, 3, ""), "v1_oow"); + LLVMValueRef v2_oow = vec4f_from_scalar(b, vert_attrib(b, args->v2, 0, 3, ""), "v2_oow"); + + LLVMValueRef v0_oow_v0a = LLVMBuildFMul(b, v0a, v0_oow, "v0_oow_v0a"); + LLVMValueRef v1_oow_v1a = LLVMBuildFMul(b, v1a, v1_oow, "v1_oow_v1a"); + LLVMValueRef v2_oow_v2a = LLVMBuildFMul(b, v2a, v2_oow, "v2_oow_v2a"); + + emit_coef4(b, args, slot, v0_oow_v0a, v1_oow_v1a, v2_oow_v2a); +} + + +static void +emit_position_coef( LLVMBuilderRef builder, + struct lp_setup_args *args, + int slot, int attrib ) +{ + emit_linear_coef(builder, args, slot, attrib); +} + + + + +/** + * Compute the inputs-> dadx, dady, a0 values. + */ +static void +emit_tri_coef( LLVMBuilderRef builder, + const struct lp_setup_variant_key *key, + struct lp_setup_args *args ) +{ + unsigned slot; + + /* The internal position input is in slot zero: + */ + emit_position_coef(builder, args, 0, 0); + + /* setup interpolation for all the remaining attributes: + */ + for (slot = 0; slot < key->num_inputs; slot++) { + unsigned vert_attr = key->inputs[slot].src_index; + + switch (key->inputs[slot].interp) { + case LP_INTERP_CONSTANT: + if (key->flatshade_first) { + emit_constant_coef4(builder, args, slot+1, args->v0, vert_attr); + } + else { + emit_constant_coef4(builder, args, slot+1, args->v2, vert_attr); + } + break; + + case LP_INTERP_LINEAR: + emit_linear_coef(builder, args, slot+1, vert_attr); + break; + + case LP_INTERP_PERSPECTIVE: + emit_perspective_coef(builder, args, slot+1, vert_attr); + break; + + case LP_INTERP_POSITION: + /* + * The generated pixel interpolators will pick up the coeffs from + * slot 0. + */ + break; + + case LP_INTERP_FACING: + emit_facing_coef(builder, args, slot+1); + break; + + default: + assert(0); + } + } +} + + +/* XXX: This is generic code, share with fs/vs codegen: + */ +static lp_jit_setup_triangle +finalize_function(struct llvmpipe_screen *screen, + LLVMBuilderRef builder, + LLVMValueRef function) +{ + void *f; + + /* Verify the LLVM IR. If invalid, dump and abort */ +#ifdef DEBUG + if (LLVMVerifyFunction(function, LLVMPrintMessageAction)) { + if (1) + lp_debug_dump_value(function); + abort(); + } +#endif + + /* Apply optimizations to LLVM IR */ + LLVMRunFunctionPassManager(screen->pass, function); + + if (gallivm_debug & GALLIVM_DEBUG_IR) + { + /* Print the LLVM IR to stderr */ + lp_debug_dump_value(function); + debug_printf("\n"); + } + + /* + * Translate the LLVM IR into machine code. + */ + f = LLVMGetPointerToGlobal(screen->engine, function); + + if (gallivm_debug & GALLIVM_DEBUG_ASM) + { + lp_disassemble(f); + } + + lp_func_delete_body(function); + + return f; +} + +/* XXX: Generic code: + */ +static void +lp_emit_emms(LLVMBuilderRef builder) +{ +#ifdef PIPE_ARCH_X86 + /* Avoid corrupting the FPU stack on 32bit OSes. */ + lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); +#endif +} + + +/* XXX: generic code: + */ +static void +set_noalias(LLVMBuilderRef builder, + LLVMValueRef function, + const LLVMTypeRef *arg_types, + int nr_args) +{ + int i; + for(i = 0; i < Elements(arg_types); ++i) + if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) + LLVMAddAttribute(LLVMGetParam(function, i), + LLVMNoAliasAttribute); +} + +static void +init_args(LLVMBuilderRef b, + struct lp_setup_args *args, + const struct lp_setup_variant *variant) +{ + LLVMValueRef v0_x = vert_attrib(b, args->v0, 0, 0, "v0_x"); + LLVMValueRef v0_y = vert_attrib(b, args->v0, 0, 1, "v0_y"); + + LLVMValueRef v1_x = vert_attrib(b, args->v1, 0, 0, "v1_x"); + LLVMValueRef v1_y = vert_attrib(b, args->v1, 0, 1, "v1_y"); + + LLVMValueRef v2_x = vert_attrib(b, args->v2, 0, 0, "v2_x"); + LLVMValueRef v2_y = vert_attrib(b, args->v2, 0, 1, "v2_y"); + + LLVMValueRef pixel_center = LLVMConstReal(LLVMFloatType(), + variant->key.pixel_center_half ? 0.5 : 0); + + LLVMValueRef x0_center = LLVMBuildFSub(b, v0_x, pixel_center, "x0_center" ); + LLVMValueRef y0_center = LLVMBuildFSub(b, v0_y, pixel_center, "y0_center" ); + + LLVMValueRef dx01 = LLVMBuildFSub(b, v0_x, v1_x, "dx01"); + LLVMValueRef dy01 = LLVMBuildFSub(b, v0_y, v1_y, "dy01"); + LLVMValueRef dx20 = LLVMBuildFSub(b, v2_x, v0_x, "dx20"); + LLVMValueRef dy20 = LLVMBuildFSub(b, v2_y, v0_y, "dy20"); + + LLVMValueRef one = LLVMConstReal(LLVMFloatType(), 1.0); + LLVMValueRef e = LLVMBuildFMul(b, dx01, dy20, "e"); + LLVMValueRef f = LLVMBuildFMul(b, dx20, dy01, "f"); + LLVMValueRef ooa = LLVMBuildFDiv(b, one, LLVMBuildFSub(b, e, f, ""), "ooa"); + + LLVMValueRef dy20_ooa = LLVMBuildFMul(b, dy20, ooa, "dy20_ooa"); + LLVMValueRef dy01_ooa = LLVMBuildFMul(b, dy01, ooa, "dy01_ooa"); + LLVMValueRef dx20_ooa = LLVMBuildFMul(b, dx20, ooa, "dx20_ooa"); + LLVMValueRef dx01_ooa = LLVMBuildFMul(b, dx01, ooa, "dx01_ooa"); + + args->dy20_ooa = vec4f_from_scalar(b, dy20_ooa, "dy20_ooa_4f"); + args->dy01_ooa = vec4f_from_scalar(b, dy01_ooa, "dy01_ooa_4f"); + + args->dx20_ooa = vec4f_from_scalar(b, dx20_ooa, "dx20_ooa_4f"); + args->dx01_ooa = vec4f_from_scalar(b, dx01_ooa, "dx01_ooa_4f"); + + args->x0_center = vec4f_from_scalar(b, x0_center, "x0_center_4f"); + args->y0_center = vec4f_from_scalar(b, y0_center, "y0_center_4f"); +} + +/** + * Generate the runtime callable function for the coefficient calculation. + * + */ +static struct lp_setup_variant * +generate_setup_variant(struct llvmpipe_screen *screen, + struct lp_setup_variant_key *key) +{ + struct lp_setup_variant *variant = NULL; + struct lp_setup_args args; + char func_name[256]; + LLVMTypeRef vec4f_type; + LLVMTypeRef func_type; + LLVMTypeRef arg_types[7]; + LLVMBasicBlockRef block; + LLVMBuilderRef builder; + int64_t t0, t1; + + if (0) + goto fail; + + variant = CALLOC_STRUCT(lp_setup_variant); + if (variant == NULL) + goto fail; + + if (LP_DEBUG & DEBUG_COUNTERS) { + t0 = os_time_get(); + } + + memcpy(&variant->key, key, key->size); + variant->list_item_global.base = variant; + + util_snprintf(func_name, sizeof(func_name), "fs%u_setup%u", + 0, + variant->no); + + /* Currently always deal with full 4-wide vertex attributes from + * the vertices. + */ + + vec4f_type = LLVMVectorType(LLVMFloatType(), 4); + + arg_types[0] = LLVMPointerType(vec4f_type, 0); /* v0 */ + arg_types[1] = LLVMPointerType(vec4f_type, 0); /* v1 */ + arg_types[2] = LLVMPointerType(vec4f_type, 0); /* v2 */ + arg_types[3] = LLVMInt32Type(); /* facing */ + arg_types[4] = LLVMPointerType(vec4f_type, 0); /* a0, aligned */ + arg_types[5] = LLVMPointerType(vec4f_type, 0); /* dadx, aligned */ + arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */ + + func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + + variant->function = LLVMAddFunction(screen->module, func_name, func_type); + if (!variant->function) + goto fail; + + LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); + + args.v0 = LLVMGetParam(variant->function, 0); + args.v1 = LLVMGetParam(variant->function, 1); + args.v2 = LLVMGetParam(variant->function, 2); + args.facing = LLVMGetParam(variant->function, 3); + args.a0 = LLVMGetParam(variant->function, 4); + args.dadx = LLVMGetParam(variant->function, 5); + args.dady = LLVMGetParam(variant->function, 6); + + lp_build_name(args.v0, "in_v0"); + lp_build_name(args.v1, "in_v1"); + lp_build_name(args.v2, "in_v2"); + lp_build_name(args.facing, "in_facing"); + lp_build_name(args.a0, "out_a0"); + lp_build_name(args.dadx, "out_dadx"); + lp_build_name(args.dady, "out_dady"); + + /* + * Function body + */ + block = LLVMAppendBasicBlock(variant->function, "entry"); + builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, block); + + set_noalias(builder, variant->function, arg_types, Elements(arg_types)); + init_args(builder, &args, variant); + emit_tri_coef(builder, &variant->key, &args); + + lp_emit_emms(builder); + LLVMBuildRetVoid(builder); + LLVMDisposeBuilder(builder); + + variant->jit_function = finalize_function(screen, builder, + variant->function); + if (!variant->jit_function) + goto fail; + + /* + * Update timing information: + */ + if (LP_DEBUG & DEBUG_COUNTERS) { + t1 = os_time_get(); + LP_COUNT_ADD(llvm_compile_time, t1 - t0); + LP_COUNT_ADD(nr_llvm_compiles, 1); + } + + return variant; + +fail: + if (variant) { + if (variant->function) { + if (variant->jit_function) + LLVMFreeMachineCodeForFunction(screen->engine, + variant->function); + LLVMDeleteFunction(variant->function); + } + FREE(variant); + } + + return NULL; +} + + + +static void +lp_make_setup_variant_key(struct llvmpipe_context *lp, + struct lp_setup_variant_key *key) +{ + struct lp_fragment_shader *fs = lp->fs; + unsigned i; + + assert(sizeof key->inputs[0] == sizeof(ushort)); + + key->num_inputs = fs->info.base.num_inputs; + key->flatshade_first = lp->rasterizer->flatshade_first; + key->pixel_center_half = lp->rasterizer->gl_rasterization_rules; + key->size = Offset(struct lp_setup_variant_key, + inputs[key->num_inputs]); + key->pad = 0; + + memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]); + for (i = 0; i < key->num_inputs; i++) { + if (key->inputs[i].interp == LP_INTERP_COLOR) { + if (lp->rasterizer->flatshade) + key->inputs[i].interp = LP_INTERP_CONSTANT; + else + key->inputs[i].interp = LP_INTERP_LINEAR; + } + } + +} + + +static void +remove_setup_variant(struct llvmpipe_context *lp, + struct lp_setup_variant *variant) +{ + struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); + + if (gallivm_debug & GALLIVM_DEBUG_IR) { + debug_printf("llvmpipe: del setup_variant #%u total %u\n", + variant->no, lp->nr_setup_variants); + } + + if (variant->function) { + if (variant->jit_function) + LLVMFreeMachineCodeForFunction(screen->engine, + variant->function); + LLVMDeleteFunction(variant->function); + } + + remove_from_list(&variant->list_item_global); + lp->nr_setup_variants--; + FREE(variant); +} + + + +/* When the number of setup variants exceeds a threshold, cull a + * fraction (currently a quarter) of them. + */ +static void +cull_setup_variants(struct llvmpipe_context *lp) +{ + struct pipe_context *pipe = &lp->pipe; + int i; + + /* + * XXX: we need to flush the context until we have some sort of reference + * counting in fragment shaders as they may still be binned + * Flushing alone might not be sufficient we need to wait on it too. + */ + llvmpipe_finish(pipe, __FUNCTION__); + + for (i = 0; i < LP_MAX_SETUP_VARIANTS / 4; i++) { + struct lp_setup_variant_list_item *item = last_elem(&lp->setup_variants_list); + remove_setup_variant(lp, item->base); + } +} + + +/** + * Update fragment/vertex shader linkage state. This is called just + * prior to drawing something when some fragment-related state has + * changed. + */ +void +llvmpipe_update_setup(struct llvmpipe_context *lp) +{ + struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); + + struct lp_setup_variant_key *key = &lp->setup_variant.key; + struct lp_setup_variant *variant = NULL; + struct lp_setup_variant_list_item *li; + + lp_make_setup_variant_key(lp, key); + + foreach(li, &lp->setup_variants_list) { + if(li->base->key.size == key->size && + memcmp(&li->base->key, key, key->size) == 0) { + variant = li->base; + break; + } + } + + if (variant) { + move_to_head(&lp->setup_variants_list, &variant->list_item_global); + } + else { + if (lp->nr_setup_variants >= LP_MAX_SETUP_VARIANTS) { + cull_setup_variants(lp); + } + + variant = generate_setup_variant(screen, key); + insert_at_head(&lp->setup_variants_list, &variant->list_item_global); + lp->nr_setup_variants++; + } + + lp_setup_set_setup_variant(lp->setup, + variant); +} + +void +lp_delete_setup_variants(struct llvmpipe_context *lp) +{ + struct lp_setup_variant_list_item *li; + li = first_elem(&lp->setup_variants_list); + while(!at_end(&lp->setup_variants_list, li)) { + struct lp_setup_variant_list_item *next = next_elem(li); + remove_setup_variant(lp, li->base); + li = next; + } +} + +void +lp_dump_setup_coef( const struct lp_setup_variant_key *key, + const float (*sa0)[4], + const float (*sdadx)[4], + const float (*sdady)[4]) +{ + int i, slot; + + for (i = 0; i < NUM_CHANNELS; i++) { + float a0 = sa0 [0][i]; + float dadx = sdadx[0][i]; + float dady = sdady[0][i]; + + debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n", + "xyzw"[i], + a0, dadx, dady); + } + + for (slot = 0; slot < key->num_inputs; slot++) { + unsigned usage_mask = key->inputs[slot].usage_mask; + for (i = 0; i < NUM_CHANNELS; i++) { + if (usage_mask & (1 << i)) { + float a0 = sa0 [1 + slot][i]; + float dadx = sdadx[1 + slot][i]; + float dady = sdady[1 + slot][i]; + + debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n", + slot, + "xyzw"[i], + a0, dadx, dady); + } + } + } +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.h b/src/gallium/drivers/llvmpipe/lp_state_setup.h new file mode 100644 index 00000000000..b0c81baa75f --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.h @@ -0,0 +1,80 @@ +#ifndef LP_STATE_SETUP_H +#define LP_STATE_SETUP_H + +#include "lp_bld_interp.h" + + +struct llvmpipe_context; +struct lp_setup_variant; + +struct lp_setup_variant_list_item +{ + struct lp_setup_variant *base; + struct lp_setup_variant_list_item *next, *prev; +}; + + +struct lp_setup_variant_key { + unsigned num_inputs:8; + unsigned flatshade_first:1; + unsigned pixel_center_half:1; + unsigned pad:7; + unsigned size:16; + struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; +}; + + +typedef void (*lp_jit_setup_triangle)( const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4], + boolean front_facing, + float (*a0)[4], + float (*dadx)[4], + float (*dady)[4] ); + + + + +/* At this stage, for a given variant key, we create a + * draw_vertex_info struct telling the draw module how to format the + * vertices, and an llvm-generated function which calculates the + * attribute interpolants (a0, dadx, dady) from three of those + * vertices. + */ +struct lp_setup_variant { + struct lp_setup_variant_key key; + + struct lp_setup_variant_list_item list_item_global; + + /* XXX: this is a pointer to the LLVM IR. Once jit_function is + * generated, we never need to use the IR again - need to find a + * way to release this data without destroying the generated + * assembly. + */ + LLVMValueRef function; + + /* The actual generated setup function: + */ + lp_jit_setup_triangle jit_function; + + unsigned no; +}; + +void lp_setup_tri_fallback( const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4], + boolean front_facing, + float (*a0)[4], + float (*dadx)[4], + float (*dady)[4], + const struct lp_setup_variant_key *key ); + +void lp_delete_setup_variants(struct llvmpipe_context *lp); + +void +lp_dump_setup_coef( const struct lp_setup_variant_key *key, + const float (*sa0)[4], + const float (*sdadx)[4], + const float (*sdady)[4]); + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_test_round.c b/src/gallium/drivers/llvmpipe/lp_test_round.c index 57b0ee57767..816518e5081 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_round.c +++ b/src/gallium/drivers/llvmpipe/lp_test_round.c @@ -75,10 +75,7 @@ add_test(LLVMModuleRef module, const char *name, lp_func_t lp_func) LLVMValueRef ret; struct lp_build_context bld; - bld.builder = builder; - bld.type.floating = 1; - bld.type.width = 32; - bld.type.length = 4; + lp_build_context_init(&bld, builder, lp_float32_vec4_type()); LLVMSetFunctionCallConv(func, LLVMCCallConv); @@ -100,9 +97,10 @@ printv(char* string, v4sf value) f[0], f[1], f[2], f[3]); } -static void +static boolean compare(v4sf x, v4sf y) { + boolean success = TRUE; float *xp = (float *) &x; float *yp = (float *) &y; if (xp[0] != yp[0] || @@ -110,7 +108,9 @@ compare(v4sf x, v4sf y) xp[2] != yp[2] || xp[3] != yp[3]) { printf(" Incorrect result! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ \n"); + success = FALSE; } + return success; } @@ -171,9 +171,12 @@ test_round(unsigned verbose, FILE *fp) LLVMDumpModule(module); for (i = 0; i < 3; i++) { + /* NOTE: There are several acceptable rules for x.5 rounding: ceiling, + * nearest even, etc. So we avoid testing such corner cases here. + */ v4sf xvals[3] = { {-10.0, -1, 0, 12.0}, - {-1.5, -0.25, 1.25, 2.5}, + {-1.49, -0.25, 1.25, 2.51}, {-0.99, -0.01, 0.01, 0.99} }; v4sf x = xvals[i]; @@ -191,7 +194,7 @@ test_round(unsigned verbose, FILE *fp) y = round_func(x); printv("C round(x) ", ref); printv("LLVM round(x)", y); - compare(ref, y); + success = success && compare(ref, y); refp[0] = trunc(xp[0]); refp[1] = trunc(xp[1]); @@ -200,7 +203,7 @@ test_round(unsigned verbose, FILE *fp) y = trunc_func(x); printv("C trunc(x) ", ref); printv("LLVM trunc(x)", y); - compare(ref, y); + success = success && compare(ref, y); refp[0] = floor(xp[0]); refp[1] = floor(xp[1]); @@ -209,7 +212,7 @@ test_round(unsigned verbose, FILE *fp) y = floor_func(x); printv("C floor(x) ", ref); printv("LLVM floor(x)", y); - compare(ref, y); + success = success && compare(ref, y); refp[0] = ceil(xp[0]); refp[1] = ceil(xp[1]); @@ -218,7 +221,7 @@ test_round(unsigned verbose, FILE *fp) y = ceil_func(x); printv("C ceil(x) ", ref); printv("LLVM ceil(x) ", y); - compare(ref, y); + success = success && compare(ref, y); } LLVMFreeMachineCodeForFunction(engine, test_round); @@ -247,11 +250,7 @@ test_round(unsigned verbose, FILE *fp) boolean test_all(unsigned verbose, FILE *fp) { - boolean success = TRUE; - - test_round(verbose, fp); - - return success; + return test_round(verbose, fp); } diff --git a/src/gallium/drivers/llvmpipe/lp_test_sincos.c b/src/gallium/drivers/llvmpipe/lp_test_sincos.c index 7ab357f162e..79939b1a393 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_sincos.c +++ b/src/gallium/drivers/llvmpipe/lp_test_sincos.c @@ -72,10 +72,7 @@ add_sincos_test(LLVMModuleRef module, boolean sin) LLVMValueRef ret; struct lp_build_context bld; - bld.builder = builder; - bld.type.floating = 1; - bld.type.width = 32; - bld.type.length = 4; + lp_build_context_init(&bld, builder, lp_float32_vec4_type()); LLVMSetFunctionCallConv(func, LLVMCCallConv); diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index 2ba39052aba..e49f9c62fe7 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -289,172 +289,141 @@ def generate_format_write(format, src_channel, src_native_type, src_suffix): print -def generate_ssse3(): +def generate_sse2(): print ''' #if defined(PIPE_ARCH_SSE) #include "util/u_sse.h" -static void -lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst, - const uint8_t *src, unsigned src_stride, - unsigned x0, unsigned y0) +static ALWAYS_INLINE void +swz4( const __m128i * restrict x, + const __m128i * restrict y, + const __m128i * restrict z, + const __m128i * restrict w, + __m128i * restrict a, + __m128i * restrict b, + __m128i * restrict c, + __m128i * restrict d) { + __m128i i, j, k, l; + __m128i m, n, o, p; + __m128i e, f, g, h; + + m = _mm_unpacklo_epi8(*x,*y); + n = _mm_unpackhi_epi8(*x,*y); + o = _mm_unpacklo_epi8(*z,*w); + p = _mm_unpackhi_epi8(*z,*w); + + i = _mm_unpacklo_epi16(m,n); + j = _mm_unpackhi_epi16(m,n); + k = _mm_unpacklo_epi16(o,p); + l = _mm_unpackhi_epi16(o,p); + + e = _mm_unpacklo_epi8(i,j); + f = _mm_unpackhi_epi8(i,j); + g = _mm_unpacklo_epi8(k,l); + h = _mm_unpackhi_epi8(k,l); + + *a = _mm_unpacklo_epi64(e,g); + *b = _mm_unpackhi_epi64(e,g); + *c = _mm_unpacklo_epi64(f,h); + *d = _mm_unpackhi_epi64(f,h); +} + +static ALWAYS_INLINE void +unswz4( const __m128i * restrict a, + const __m128i * restrict b, + const __m128i * restrict c, + const __m128i * restrict d, + __m128i * restrict x, + __m128i * restrict y, + __m128i * restrict z, + __m128i * restrict w) +{ + __m128i i, j, k, l; + __m128i m, n, o, p; + + i = _mm_unpacklo_epi8(*a,*b); + j = _mm_unpackhi_epi8(*a,*b); + k = _mm_unpacklo_epi8(*c,*d); + l = _mm_unpackhi_epi8(*c,*d); + + m = _mm_unpacklo_epi16(i,k); + n = _mm_unpackhi_epi16(i,k); + o = _mm_unpacklo_epi16(j,l); + p = _mm_unpackhi_epi16(j,l); + + *x = _mm_unpacklo_epi64(m,n); + *y = _mm_unpackhi_epi64(m,n); + *z = _mm_unpacklo_epi64(o,p); + *w = _mm_unpackhi_epi64(o,p); +} +static void +lp_tile_b8g8r8a8_unorm_swizzle_4ub_sse2(uint8_t * restrict dst, + const uint8_t * restrict src, unsigned src_stride, + unsigned x0, unsigned y0) +{ + __m128i *dst128 = (__m128i *) dst; unsigned x, y; - __m128i *pdst = (__m128i*) dst; - const uint8_t *ysrc0 = src + y0*src_stride + x0*sizeof(uint32_t); - unsigned int tile_stridex = src_stride*(TILE_VECTOR_HEIGHT - 1) - sizeof(uint32_t)*TILE_VECTOR_WIDTH; - unsigned int tile_stridey = src_stride*TILE_VECTOR_HEIGHT; - - const __m128i shuffle00 = _mm_setr_epi8(0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle01 = _mm_setr_epi8(0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle02 = _mm_setr_epi8(0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle03 = _mm_setr_epi8(0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - - const __m128i shuffle10 = _mm_setr_epi8(0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle11 = _mm_setr_epi8(0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle12 = _mm_setr_epi8(0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle13 = _mm_setr_epi8(0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - - const __m128i shuffle20 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff); - const __m128i shuffle21 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff); - const __m128i shuffle22 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff); - const __m128i shuffle23 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff); - - const __m128i shuffle30 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e); - const __m128i shuffle31 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d); - const __m128i shuffle32 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c); - const __m128i shuffle33 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f); - - for (y = 0; y < TILE_SIZE; y += TILE_VECTOR_HEIGHT) { - __m128i line0 = *(__m128i*)ysrc0; - const uint8_t *ysrc = ysrc0 + src_stride; - ysrc0 += tile_stridey; - - for (x = 0; x < TILE_SIZE; x += TILE_VECTOR_WIDTH) { - __m128i r, g, b, a, line1; - line1 = *(__m128i*)ysrc; - PIPE_READ_WRITE_BARRIER(); - ysrc += src_stride; - r = _mm_shuffle_epi8(line0, shuffle00); - g = _mm_shuffle_epi8(line0, shuffle01); - b = _mm_shuffle_epi8(line0, shuffle02); - a = _mm_shuffle_epi8(line0, shuffle03); - - line0 = *(__m128i*)ysrc; - PIPE_READ_WRITE_BARRIER(); - ysrc += src_stride; - r = _mm_or_si128(r, _mm_shuffle_epi8(line1, shuffle10)); - g = _mm_or_si128(g, _mm_shuffle_epi8(line1, shuffle11)); - b = _mm_or_si128(b, _mm_shuffle_epi8(line1, shuffle12)); - a = _mm_or_si128(a, _mm_shuffle_epi8(line1, shuffle13)); - - line1 = *(__m128i*)ysrc; - PIPE_READ_WRITE_BARRIER(); - ysrc -= tile_stridex; - r = _mm_or_si128(r, _mm_shuffle_epi8(line0, shuffle20)); - g = _mm_or_si128(g, _mm_shuffle_epi8(line0, shuffle21)); - b = _mm_or_si128(b, _mm_shuffle_epi8(line0, shuffle22)); - a = _mm_or_si128(a, _mm_shuffle_epi8(line0, shuffle23)); - - if (x + 1 < TILE_SIZE) { - line0 = *(__m128i*)ysrc; - ysrc += src_stride; - } - - PIPE_READ_WRITE_BARRIER(); - r = _mm_or_si128(r, _mm_shuffle_epi8(line1, shuffle30)); - g = _mm_or_si128(g, _mm_shuffle_epi8(line1, shuffle31)); - b = _mm_or_si128(b, _mm_shuffle_epi8(line1, shuffle32)); - a = _mm_or_si128(a, _mm_shuffle_epi8(line1, shuffle33)); - - *pdst++ = r; - *pdst++ = g; - *pdst++ = b; - *pdst++ = a; + + src += y0 * src_stride; + src += x0 * sizeof(uint32_t); + + for (y = 0; y < TILE_SIZE; y += 4) { + const uint8_t *src_row = src; + + for (x = 0; x < TILE_SIZE; x += 4) { + swz4((const __m128i *) (src_row + 0 * src_stride), + (const __m128i *) (src_row + 1 * src_stride), + (const __m128i *) (src_row + 2 * src_stride), + (const __m128i *) (src_row + 3 * src_stride), + dst128 + 2, /* b */ + dst128 + 1, /* g */ + dst128 + 0, /* r */ + dst128 + 3); /* a */ + + dst128 += 4; + src_row += sizeof(__m128i); } - } + src += 4 * src_stride; + } } static void -lp_tile_b8g8r8a8_unorm_unswizzle_4ub_ssse3(const uint8_t *src, - uint8_t *dst, unsigned dst_stride, +lp_tile_b8g8r8a8_unorm_unswizzle_4ub_sse2(const uint8_t * restrict src, + uint8_t * restrict dst, unsigned dst_stride, unsigned x0, unsigned y0) { unsigned int x, y; - const __m128i *psrc = (__m128i*) src; - const __m128i *end = (__m128i*) (src + (y0 + TILE_SIZE - 1)*dst_stride + (x0 + TILE_SIZE - 1)*sizeof(uint32_t)); - uint8_t *pdst = dst + y0 * dst_stride + x0 * sizeof(uint32_t); - __m128i c0 = *psrc++; - __m128i c1; - - const __m128i shuffle00 = _mm_setr_epi8(0xff,0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff); - const __m128i shuffle01 = _mm_setr_epi8(0xff,0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff); - const __m128i shuffle02 = _mm_setr_epi8(0xff,0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff); - const __m128i shuffle03 = _mm_setr_epi8(0xff,0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff); - - const __m128i shuffle10 = _mm_setr_epi8(0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff,0xff); - const __m128i shuffle11 = _mm_setr_epi8(0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff,0xff); - const __m128i shuffle12 = _mm_setr_epi8(0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff,0xff); - const __m128i shuffle13 = _mm_setr_epi8(0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff,0xff); - - const __m128i shuffle20 = _mm_setr_epi8(0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff,0xff,0xff); - const __m128i shuffle21 = _mm_setr_epi8(0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff,0xff,0xff); - const __m128i shuffle22 = _mm_setr_epi8(0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff,0xff,0xff); - const __m128i shuffle23 = _mm_setr_epi8(0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff,0xff,0xff); - - const __m128i shuffle30 = _mm_setr_epi8(0xff,0xff,0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05); - const __m128i shuffle31 = _mm_setr_epi8(0xff,0xff,0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07); - const __m128i shuffle32 = _mm_setr_epi8(0xff,0xff,0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d); - const __m128i shuffle33 = _mm_setr_epi8(0xff,0xff,0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f); - - for (y = 0; y < TILE_SIZE; y += TILE_VECTOR_HEIGHT) { - __m128i *tile = (__m128i*) pdst; - pdst += dst_stride * TILE_VECTOR_HEIGHT; - for (x = 0; x < TILE_SIZE; x += TILE_VECTOR_WIDTH) { - uint8_t *linep = (uint8_t*) (tile++); - __m128i line0, line1, line2, line3; - - c1 = *psrc++; /* r */ - PIPE_READ_WRITE_BARRIER(); - line0 = _mm_shuffle_epi8(c0, shuffle00); - line1 = _mm_shuffle_epi8(c0, shuffle01); - line2 = _mm_shuffle_epi8(c0, shuffle02); - line3 = _mm_shuffle_epi8(c0, shuffle03); - - c0 = *psrc++; /* g */ - PIPE_READ_WRITE_BARRIER(); - line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c1, shuffle10)); - line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c1, shuffle11)); - line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c1, shuffle12)); - line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c1, shuffle13)); - - c1 = *psrc++; /* b */ - PIPE_READ_WRITE_BARRIER(); - line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c0, shuffle20)); - line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c0, shuffle21)); - line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c0, shuffle22)); - line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c0, shuffle23)); - - if (psrc != end) - c0 = *psrc++; /* a */ - PIPE_READ_WRITE_BARRIER(); - line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c1, shuffle30)); - line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c1, shuffle31)); - line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c1, shuffle32)); - line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c1, shuffle33)); - - *(__m128i*) (linep) = line0; - *(__m128i*) (((char*)linep) + dst_stride) = line1; - *(__m128i*) (((char*)linep) + 2 * dst_stride) = line2; - *(__m128i*) (((char*)linep) + 3 * dst_stride) = line3; + const __m128i *src128 = (const __m128i *) src; + + dst += y0 * dst_stride; + dst += x0 * sizeof(uint32_t); + + for (y = 0; y < TILE_SIZE; y += 4) { + const uint8_t *dst_row = dst; + + for (x = 0; x < TILE_SIZE; x += 4) { + unswz4( &src128[2], /* b */ + &src128[1], /* g */ + &src128[0], /* r */ + &src128[3], /* a */ + (__m128i *) (dst_row + 0 * dst_stride), + (__m128i *) (dst_row + 1 * dst_stride), + (__m128i *) (dst_row + 2 * dst_stride), + (__m128i *) (dst_row + 3 * dst_stride)); + + src128 += 4; + dst_row += sizeof(__m128i);; } + + dst += 4 * dst_stride; } } -#endif /* PIPE_ARCH_SSSE3 */ +#endif /* PIPE_ARCH_SSE */ ''' @@ -479,7 +448,7 @@ def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix): func_name = 'lp_tile_%s_swizzle_%s' % (format.short_name(), dst_suffix) if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM': print '#ifdef PIPE_ARCH_SSE' - print ' func = util_cpu_caps.has_ssse3 ? %s_ssse3 : %s;' % (func_name, func_name) + print ' func = util_cpu_caps.has_sse2 ? %s_sse2 : %s;' % (func_name, func_name) print '#else' print ' func = %s;' % (func_name,) print '#endif' @@ -517,7 +486,7 @@ def generate_unswizzle(formats, src_channel, src_native_type, src_suffix): func_name = 'lp_tile_%s_unswizzle_%s' % (format.short_name(), src_suffix) if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM': print '#ifdef PIPE_ARCH_SSE' - print ' func = util_cpu_caps.has_ssse3 ? %s_ssse3 : %s;' % (func_name, func_name) + print ' func = util_cpu_caps.has_sse2 ? %s_sse2 : %s;' % (func_name, func_name) print '#else' print ' func = %s;' % (func_name,) print '#endif' @@ -577,7 +546,7 @@ def main(): print '};' print - generate_ssse3() + generate_sse2() channel = Channel(UNSIGNED, True, 8) native_type = 'uint8_t' diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index ebb21a6e5a3..a9426df686f 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -236,7 +236,7 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) int ret; ret = nouveau_channel_alloc(dev, 0xbeef0201, 0xbeef0202, - &screen->channel); + 512*1024, &screen->channel); if (ret) return ret; screen->device = dev; diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index ac69c7848e9..bf6a577188b 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -26,6 +26,9 @@ #define NOUVEAU_MSG(fmt, args...) \ fprintf(stderr, "nouveau: "fmt, ##args); +#define nouveau_bo_tile_layout(nvbo) \ + ((nvbo)->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK) + /* Constant buffer assignment */ #define NV50_CB_PMISC 0 #define NV50_CB_PVP 1 diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 3f3166261b1..f70c138fe1a 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -92,7 +92,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) return 1; } - if (!bo->tile_flags) { + if (!nouveau_bo_tile_layout(bo)) { BEGIN_RING(chan, eng2d, mthd, 2); OUT_RING (chan, format); OUT_RING (chan, 1); diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index f973cf24b98..0cc2f4a837f 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -45,7 +45,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, WAIT_RING (chan, 14); - if (!src_bo->tile_flags) { + if (!nouveau_bo_tile_layout(src_bo)) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 1); OUT_RING (chan, 1); @@ -64,7 +64,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, OUT_RING (chan, sz); /* copying only 1 zslice per call */ } - if (!dst_bo->tile_flags) { + if (!nouveau_bo_tile_layout(dst_bo)) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 1); OUT_RING (chan, 1); @@ -95,14 +95,14 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2); OUT_RELOCl(chan, src_bo, src_offset, src_reloc); OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc); - if (src_bo->tile_flags) { + if (nouveau_bo_tile_layout(src_bo)) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN, 1); OUT_RING (chan, (sy << 16) | (sx * cpp)); } else { src_offset += (line_count * src_pitch); } - if (dst_bo->tile_flags) { + if (nouveau_bo_tile_layout(dst_bo)) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT, 1); OUT_RING (chan, (dy << 16) | (dx * cpp)); @@ -280,7 +280,7 @@ nv50_upload_sifc(struct nv50_context *nv50, MARK_RING (chan, 32, 2); /* flush on lack of space or relocs */ - if (bo->tile_flags) { + if (nouveau_bo_tile_layout(bo)) { BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 5); OUT_RING (chan, dst_format); OUT_RING (chan, 0); diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 145a7985da3..f78fe34790c 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -29,6 +29,7 @@ static const struct debug_named_value debug_options[] = { { "fp", DBG_FP, "Log fragment program compilation" }, { "vp", DBG_VP, "Log vertex program compilation" }, + { "pstat", DBG_P_STAT, "Log vertex/fragment program stats" }, { "draw", DBG_DRAW, "Log draw calls" }, { "swtcl", DBG_SWTCL, "Log SWTCL-specific info" }, { "rsblock", DBG_RS_BLOCK, "Log rasterizer registers" }, diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index d9d4a9304df..c91532eb7b3 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -378,7 +378,8 @@ static void r300_translate_fragment_shader( /* Setup the compiler. */ memset(&compiler, 0, sizeof(compiler)); rc_init(&compiler.Base); - compiler.Base.Debug = DBG_ON(r300, DBG_FP); + DBG_ON(r300, DBG_FP) ? compiler.Base.Debug |= RC_DBG_LOG : 0; + DBG_ON(r300, DBG_P_STAT) ? compiler.Base.Debug |= RC_DBG_STATS : 0; compiler.code = &shader->code; compiler.state = shader->compare_state; @@ -395,7 +396,7 @@ static void r300_translate_fragment_shader( find_output_registers(&compiler, shader); - if (compiler.Base.Debug) { + if (compiler.Base.Debug & RC_DBG_LOG) { DBG(r300, DBG_FP, "r300: Initial fragment program\n"); tgsi_dump(tokens, 0); } diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 7f41ff0e2ec..d445df408dd 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -124,6 +124,9 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_FUNC: case PIPE_CAP_DEPTH_CLAMP: /* XXX implemented, but breaks Regnum Online */ case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: + case PIPE_CAP_SHADER_STENCIL_EXPORT: + case PIPE_CAP_STREAM_OUTPUT: + case PIPE_CAP_PRIMITIVE_RESTART: return 0; /* Texturing. */ @@ -153,8 +156,8 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: return 0; default: - fprintf(stderr, "r300: Implementation error: Bad param %d\n", - param); + debug_printf("r300: Warning: Unknown CAP %d in get_param.\n", + param); return 0; } } @@ -264,8 +267,8 @@ static float r300_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: return 16.0f; default: - fprintf(stderr, "r300: Implementation error: Bad paramf %d\n", - param); + debug_printf("r300: Warning: Unknown CAP %d in get_paramf.\n", + param); return 0.0f; } } diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index dc2bc7e8279..8b7f1fab61b 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -102,6 +102,7 @@ r300_winsys_screen(struct pipe_screen *screen) { #define DBG_NO_CBZB (1 << 21) /* Statistics. */ #define DBG_STATS (1 << 24) +#define DBG_P_STAT (1 << 25) /*@}*/ static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags) diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 904736ef06d..730ef7a3ee2 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -620,14 +620,19 @@ static uint32_t r300_get_border_color(enum pipe_format format, } break; - default: - /* I think the fat formats (16, 32) are specified - * as the 8-bit ones. I am not sure how compressed formats - * work here. */ + case 8: r = ((float_to_ubyte(border_swizzled[0]) & 0xff) << 0) | ((float_to_ubyte(border_swizzled[1]) & 0xff) << 8) | ((float_to_ubyte(border_swizzled[2]) & 0xff) << 16) | ((float_to_ubyte(border_swizzled[3]) & 0xff) << 24); + break; + + case 16: + r = ((float_to_ubyte(border_swizzled[2]) & 0xff) << 0) | + ((float_to_ubyte(border_swizzled[1]) & 0xff) << 8) | + ((float_to_ubyte(border_swizzled[0]) & 0xff) << 16) | + ((float_to_ubyte(border_swizzled[3]) & 0xff) << 24); + break; } return r; diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index e2b9af9d018..65696555ac3 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -202,7 +202,8 @@ void r300_translate_vertex_shader(struct r300_context *r300, memset(&compiler, 0, sizeof(compiler)); rc_init(&compiler.Base); - compiler.Base.Debug = DBG_ON(r300, DBG_VP); + DBG_ON(r300, DBG_VP) ? compiler.Base.Debug |= RC_DBG_LOG : 0; + DBG_ON(r300, DBG_P_STAT) ? compiler.Base.Debug |= RC_DBG_STATS : 0; compiler.code = &vs->code; compiler.UserData = vs; compiler.Base.is_r500 = r300->screen->caps.is_r500; @@ -214,7 +215,7 @@ void r300_translate_vertex_shader(struct r300_context *r300, compiler.Base.max_alu_insts = r300->screen->caps.is_r500 ? 1024 : 256; compiler.Base.remove_unused_constants = TRUE; - if (compiler.Base.Debug) { + if (compiler.Base.Debug & RC_DBG_LOG) { DBG(r300, DBG_VP, "r300: Initial vertex program\n"); tgsi_dump(vs->state.tokens, 0); } diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile index ede0bb2ec45..a484f38e9f1 100644 --- a/src/gallium/drivers/r600/Makefile +++ b/src/gallium/drivers/r600/Makefile @@ -19,6 +19,8 @@ C_SOURCES = \ r600_texture.c \ r700_asm.c \ evergreen_state.c \ - eg_asm.c + eg_asm.c \ + r600_translate.c \ + r600_state_common.c include ../../Makefile.template diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript index b6d9b7d8d22..bf0ad8571ba 100644 --- a/src/gallium/drivers/r600/SConscript +++ b/src/gallium/drivers/r600/SConscript @@ -18,6 +18,7 @@ r600 = env.ConvenienceLibrary( source = [ 'r600_asm.c', 'r600_buffer.c', + 'r600_blit.c', 'r600_helper.c', 'r600_pipe.c', 'r600_query.c', diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index 52b7189e9e5..c30f09c394b 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -36,8 +36,13 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) | - S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode); + S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank); bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | + S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) | S_SQ_CF_ALU_WORD1_BARRIER(1) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); break; diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index 81094904485..be81c28b43f 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -25,6 +25,7 @@ #include "util/u_format.h" #include "evergreend.h" +#include "r600_formats.h" static INLINE uint32_t r600_translate_blend_function(int blend_func) { @@ -276,6 +277,14 @@ static inline uint32_t r600_translate_dbformat(enum pipe_format format) } } +static inline uint32_t r600_translate_stencilformat(enum pipe_format format) +{ + if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED) + return 1; + else + return 0; +} + static inline uint32_t r600_translate_colorswap(enum pipe_format format) { switch (format) { @@ -301,6 +310,12 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_Z16_UNORM: return V_028C70_SWAP_STD; + + case PIPE_FORMAT_R8G8_UNORM: + return V_028C70_SWAP_STD; + + case PIPE_FORMAT_R16_UNORM: + return V_028C70_SWAP_STD; /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: @@ -338,6 +353,9 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R10SG10SB10SA2U_NORM: return V_028C70_SWAP_STD_REV; + case PIPE_FORMAT_R16G16_UNORM: + return V_028C70_SWAP_STD; + /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: @@ -382,6 +400,12 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_Z16_UNORM: return V_028C70_COLOR_16; + case PIPE_FORMAT_R8G8_UNORM: + return V_028C70_COLOR_8_8; + + case PIPE_FORMAT_R16_UNORM: + return V_028C70_COLOR_16; + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: case PIPE_FORMAT_A8B8G8R8_UNORM: @@ -419,6 +443,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_028C70_COLOR_16_16_FLOAT; case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16_UNORM: return V_028C70_COLOR_16_16; /* 64-bit buffers. */ @@ -499,32 +524,32 @@ static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format) case 16: switch (desc->nr_channels) { case 1: - result = V_030008_FMT_16_FLOAT; + result = FMT_16_FLOAT; break; case 2: - result = V_030008_FMT_16_16_FLOAT; + result = FMT_16_16_FLOAT; break; case 3: - result = V_030008_FMT_16_16_16_FLOAT; + result = FMT_16_16_16_FLOAT; break; case 4: - result = V_030008_FMT_16_16_16_16_FLOAT; + result = FMT_16_16_16_16_FLOAT; break; } break; case 32: switch (desc->nr_channels) { case 1: - result = V_030008_FMT_32_FLOAT; + result = FMT_32_FLOAT; break; case 2: - result = V_030008_FMT_32_32_FLOAT; + result = FMT_32_32_FLOAT; break; case 3: - result = V_030008_FMT_32_32_32_FLOAT; + result = FMT_32_32_32_FLOAT; break; case 4: - result = V_030008_FMT_32_32_32_32_FLOAT; + result = FMT_32_32_32_32_FLOAT; break; } break; @@ -540,48 +565,48 @@ static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format) case 8: switch (desc->nr_channels) { case 1: - result = V_030008_FMT_8; + result = FMT_8; break; case 2: - result = V_030008_FMT_8_8; + result = FMT_8_8; break; case 3: // result = V_038008_FMT_8_8_8; /* fails piglit draw-vertices test */ // break; case 4: - result = V_030008_FMT_8_8_8_8; + result = FMT_8_8_8_8; break; } break; case 16: switch (desc->nr_channels) { case 1: - result = V_030008_FMT_16; + result = FMT_16; break; case 2: - result = V_030008_FMT_16_16; + result = FMT_16_16; break; case 3: // result = V_038008_FMT_16_16_16; /* fails piglit draw-vertices test */ // break; case 4: - result = V_030008_FMT_16_16_16_16; + result = FMT_16_16_16_16; break; } break; case 32: switch (desc->nr_channels) { case 1: - result = V_030008_FMT_32; + result = FMT_32; break; case 2: - result = V_030008_FMT_32_32; + result = FMT_32_32; break; case 3: - result = V_030008_FMT_32_32_32; + result = FMT_32_32_32; break; case 4: - result = V_030008_FMT_32_32_32_32; + result = FMT_32_32_32_32; break; } break; diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 147d4f372e6..72223485067 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -39,6 +39,7 @@ #include <util/u_pack_color.h> #include <util/u_memory.h> #include <util/u_inlines.h> +#include <util/u_framebuffer.h> #include <pipebuffer/pb_buffer.h> #include "r600.h" #include "evergreend.h" @@ -136,20 +137,6 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, return rstate; } -static void evergreen_bind_blend_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_blend *blend = (struct r600_pipe_blend *)state; - struct r600_pipe_state *rstate; - - if (state == NULL) - return; - rstate = &blend->rstate; - rctx->states[rstate->id] = rstate; - rctx->cb_target_mask = blend->cb_target_mask; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - static void *evergreen_create_dsa_state(struct pipe_context *ctx, const struct pipe_depth_stencil_alpha_state *state) { @@ -241,6 +228,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, struct r600_pipe_state *rstate; unsigned tmp; unsigned prov_vtx = 1, polygon_dual_mode; + unsigned clip_rule; if (rs == NULL) { return NULL; @@ -250,6 +238,8 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, rs->flatshade = state->flatshade; rs->sprite_coord_enable = state->sprite_coord_enable; + clip_rule = state->scissor ? 0xAAAA : 0xFFFF; + /* offset */ rs->offset_units = state->offset_units; rs->offset_scale = state->offset_scale * 12.0f; @@ -257,7 +247,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_RASTERIZER; if (state->flatshade_first) prov_vtx = 0; - tmp = 0x00000001; + tmp = S_0286D4_FLAT_SHADE_ENA(1); if (state->sprite_coord_enable) { tmp |= S_0286D4_PNT_SPRITE_ENA(1) | S_0286D4_PNT_SPRITE_OVRD_X(2) | @@ -299,39 +289,10 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL, 0x00000005, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL); return rstate; } -static void evergreen_bind_rs_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - if (state == NULL) - return; - - rctx->flatshade = rs->flatshade; - rctx->sprite_coord_enable = rs->sprite_coord_enable; - rctx->rasterizer = rs; - - rctx->states[rs->rstate.id] = &rs->rstate; - r600_context_pipe_state_set(&rctx->ctx, &rs->rstate); -} - -static void evergreen_delete_rs_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; - - if (rctx->rasterizer == rs) { - rctx->rasterizer = NULL; - } - if (rctx->states[rs->rstate.id] == &rs->rstate) { - rctx->states[rs->rstate.id] = NULL; - } - free(rs); -} - static void *evergreen_create_sampler_state(struct pipe_context *ctx, const struct pipe_sampler_state *state) { @@ -372,28 +333,6 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, return rstate; } -static void *evergreen_create_vertex_elements(struct pipe_context *ctx, - unsigned count, - const struct pipe_vertex_element *elements) -{ - struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); - - assert(count < 32); - v->count = count; - v->refcount = 1; - memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); - return v; -} - -static void evergreen_sampler_view_destroy(struct pipe_context *ctx, - struct pipe_sampler_view *state) -{ - struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state; - - pipe_resource_reference(&state->texture, NULL); - FREE(resource); -} - static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_context *ctx, struct pipe_resource *texture, const struct pipe_sampler_view *state) @@ -424,15 +363,15 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte swizzle[1] = state->swizzle_g; swizzle[2] = state->swizzle_b; swizzle[3] = state->swizzle_a; - format = r600_translate_texformat(texture->format, + format = r600_translate_texformat(state->format, swizzle, &word4, &yuv_format); if (format == ~0) { format = 0; } - desc = util_format_description(texture->format); + desc = util_format_description(state->format); if (desc == NULL) { - R600_ERR("unknow format %d\n", texture->format); + R600_ERR("unknow format %d\n", state->format); } tmp = (struct r600_resource_texture*)texture; rbuffer = &tmp->resource; @@ -446,7 +385,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; } - pitch = align(tmp->pitch[0] / tmp->bpt, 8); + pitch = align(tmp->pitch_in_pixels[0], 8); /* FIXME properly handle first level != 0 */ r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, @@ -464,7 +403,6 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, word4 | S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_NORM) | S_030010_SRF_MODE_ALL(V_030010_SFR_MODE_NO_ZERO) | - S_030010_REQUEST_SIZE(1) | S_030010_BASE_LEVEL(state->first_level), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, S_030014_LAST_LEVEL(state->last_level) | @@ -481,32 +419,42 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte static void evergreen_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, struct pipe_sampler_view **views) { - /* TODO */ - assert(1); -} - -static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, - struct pipe_sampler_view **views) -{ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; for (int i = 0; i < count; i++) { if (resource[i]) { - evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i + PIPE_MAX_ATTRIBS); } } } -static void evergreen_bind_state(struct pipe_context *ctx, void *state) +static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, + struct pipe_sampler_view **views) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; - - if (state == NULL) - return; - rctx->states[rstate->id] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); + struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; + int i; + + for (i = 0; i < count; i++) { + if (&rctx->ps_samplers.views[i]->base != views[i]) { + if (resource[i]) + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + else + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + + pipe_sampler_view_reference( + (struct pipe_sampler_view **)&rctx->ps_samplers.views[i], + views[i]); + } + } + for (i = count; i < NUM_TEX_UNITS; i++) { + if (rctx->ps_samplers.views[i]) { + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL); + } + } + rctx->ps_samplers.n_views = count; } static void evergreen_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void **states) @@ -514,6 +462,10 @@ static void evergreen_bind_ps_sampler(struct pipe_context *ctx, unsigned count, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; + + memcpy(rctx->ps_samplers.samplers, states, sizeof(void*) * count); + rctx->ps_samplers.n_samplers = count; + for (int i = 0; i < count; i++) { evergreen_context_pipe_state_set_ps_sampler(&rctx->ctx, rstates[i], i); } @@ -524,37 +476,11 @@ static void evergreen_bind_vs_sampler(struct pipe_context *ctx, unsigned count, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; - /* TODO implement */ for (int i = 0; i < count; i++) { evergreen_context_pipe_state_set_vs_sampler(&rctx->ctx, rstates[i], i); } } -static void evergreen_delete_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; - - if (rctx->states[rstate->id] == rstate) { - rctx->states[rstate->id] = NULL; - } - for (int i = 0; i < rstate->nregs; i++) { - r600_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL); - } - free(rstate); -} - -static void evergreen_delete_vertex_element(struct pipe_context *ctx, void *state) -{ - struct r600_vertex_element *v = (struct r600_vertex_element*)state; - - if (v == NULL) - return; - if (--v->refcount) - return; - free(v); -} - static void evergreen_set_clip_state(struct pipe_context *ctx, const struct pipe_clip_state *state) { @@ -590,19 +516,6 @@ static void evergreen_set_clip_state(struct pipe_context *ctx, r600_context_pipe_state_set(&rctx->ctx, rstate); } -static void evergreen_bind_vertex_elements(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_vertex_element *v = (struct r600_vertex_element*)state; - - evergreen_delete_vertex_element(ctx, rctx->vertex_elements); - rctx->vertex_elements = v; - if (v) { - v->refcount++; -// rctx->vs_rebuild = TRUE; - } -} - static void evergreen_set_polygon_stipple(struct pipe_context *ctx, const struct pipe_poly_stipple *state) { @@ -626,18 +539,6 @@ static void evergreen_set_scissor_state(struct pipe_context *ctx, tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny); br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); r600_pipe_state_add_reg(rstate, - R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028034_PA_SC_SCREEN_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028208_PA_SC_WINDOW_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028210_PA_SC_CLIPRECT_0_TL, tl, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, @@ -661,15 +562,6 @@ static void evergreen_set_scissor_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_02822C_PA_SC_CLIPRECT_3_BR, br, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_02820C_PA_SC_CLIPRECT_RULE, 0x0000FFFF, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, - 0xFFFFFFFF, NULL); free(rctx->states[R600_PIPE_STATE_SCISSOR]); rctx->states[R600_PIPE_STATE_SCISSOR] = rstate; @@ -733,6 +625,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state { struct r600_resource_texture *rtex; struct r600_resource *rbuffer; + struct r600_surface *surf; unsigned level = state->cbufs[cb]->level; unsigned pitch, slice; unsigned color_info; @@ -740,14 +633,15 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state const struct util_format_description *desc; struct r600_bo *bo[3]; + surf = (struct r600_surface *)state->cbufs[cb]; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; rbuffer = &rtex->resource; bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; bo[2] = rbuffer->bo; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1; + pitch = rtex->pitch_in_pixels[level] / 8 - 1; + slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; ntype = 0; desc = util_format_description(rtex->resource.base.b.format); if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) @@ -794,32 +688,49 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state { struct r600_resource_texture *rtex; struct r600_resource *rbuffer; + struct r600_surface *surf; unsigned level; - unsigned pitch, slice, format; + unsigned pitch, slice, format, stencil_format; if (state->zsbuf == NULL) return; + level = state->zsbuf->level; + + surf = (struct r600_surface *)state->zsbuf; rtex = (struct r600_resource_texture*)state->zsbuf->texture; rtex->tiled = 1; - rtex->array_mode = 2; + rtex->array_mode[level] = 2; rtex->tile_type = 1; rtex->depth = 1; rbuffer = &rtex->resource; - level = state->zsbuf->level; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; + pitch = rtex->pitch_in_pixels[level] / 8 - 1; + slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); + stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format); r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE, (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE, (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); -// r600_pipe_state_add_reg(rstate, R_028014_DB_HTILE_DATA_BASE, state->zsbuf->offset >> 8, 0xFFFFFFFF, rbuffer->bo); + + if (stencil_format) { + uint32_t stencil_offset; + + stencil_offset = ((surf->aligned_height * rtex->pitch_in_bytes[level]) + 255) & ~255; + r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE, + (state->zsbuf->offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE, + (state->zsbuf->offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + } + r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO, + S_028044_FORMAT(stencil_format), 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_028040_DB_Z_INFO, - S_028040_ARRAY_MODE(rtex->array_mode) | S_028040_FORMAT(format), + S_028040_ARRAY_MODE(rtex->array_mode[level]) | S_028040_FORMAT(format), 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_028058_DB_DEPTH_SIZE, S_028058_PITCH_TILE_MAX(pitch), @@ -841,14 +752,9 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, /* unreference old buffer and reference new one */ rstate->id = R600_PIPE_STATE_FRAMEBUFFER; - for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { - pipe_surface_reference(&rctx->framebuffer.cbufs[i], NULL); - } - for (int i = 0; i < state->nr_cbufs; i++) { - pipe_surface_reference(&rctx->framebuffer.cbufs[i], state->cbufs[i]); - } - pipe_surface_reference(&rctx->framebuffer.zsbuf, state->zsbuf); - rctx->framebuffer = *state; + + util_copy_framebuffer_state(&rctx->framebuffer, state); + rctx->pframebuffer = &rctx->framebuffer; /* build states */ @@ -881,6 +787,24 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028254_PA_SC_VPORT_SCISSOR_0_BR, br, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028034_PA_SC_SCREEN_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028208_PA_SC_WINDOW_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, + 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028238_CB_TARGET_MASK, 0x00000000, target_mask, NULL); @@ -896,40 +820,6 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, r600_context_pipe_state_set(&rctx->ctx, rstate); } -static void evergreen_set_index_buffer(struct pipe_context *ctx, - const struct pipe_index_buffer *ib) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - if (ib) { - pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer); - memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer)); - } else { - pipe_resource_reference(&rctx->index_buffer.buffer, NULL); - memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer)); - } - - /* TODO make this more like a state */ -} - -static void evergreen_set_vertex_buffers(struct pipe_context *ctx, unsigned count, - const struct pipe_vertex_buffer *buffers) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - for (int i = 0; i < rctx->nvertex_buffer; i++) { - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); - } - memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); - for (int i = 0; i < count; i++) { - rctx->vertex_buffer[i].buffer = NULL; - if (r600_buffer_is_user_buffer(buffers[i].buffer)) - rctx->any_user_vbs = TRUE; - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); - } - rctx->nvertex_buffer = count; -} - static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, struct pipe_resource *buffer) { @@ -965,84 +855,31 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, } } -static void *evergreen_create_shader_state(struct pipe_context *ctx, - const struct pipe_shader_state *state) -{ - struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader); - int r; - - r = r600_pipe_shader_create(ctx, shader, state->tokens); - if (r) { - return NULL; - } - return shader; -} - -static void evergreen_bind_ps_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - /* TODO delete old shader */ - rctx->ps_shader = (struct r600_pipe_shader *)state; -} - -static void evergreen_bind_vs_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - /* TODO delete old shader */ - rctx->vs_shader = (struct r600_pipe_shader *)state; -} - -static void evergreen_delete_ps_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; - - if (rctx->ps_shader == shader) { - rctx->ps_shader = NULL; - } - /* TODO proper delete */ - free(shader); -} - -static void evergreen_delete_vs_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; - - if (rctx->vs_shader == shader) { - rctx->vs_shader = NULL; - } - /* TODO proper delete */ - free(shader); -} - void evergreen_init_state_functions(struct r600_pipe_context *rctx) { rctx->context.create_blend_state = evergreen_create_blend_state; rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state; - rctx->context.create_fs_state = evergreen_create_shader_state; + rctx->context.create_fs_state = r600_create_shader_state; rctx->context.create_rasterizer_state = evergreen_create_rs_state; rctx->context.create_sampler_state = evergreen_create_sampler_state; rctx->context.create_sampler_view = evergreen_create_sampler_view; - rctx->context.create_vertex_elements_state = evergreen_create_vertex_elements; - rctx->context.create_vs_state = evergreen_create_shader_state; - rctx->context.bind_blend_state = evergreen_bind_blend_state; - rctx->context.bind_depth_stencil_alpha_state = evergreen_bind_state; + rctx->context.create_vertex_elements_state = r600_create_vertex_elements; + rctx->context.create_vs_state = r600_create_shader_state; + rctx->context.bind_blend_state = r600_bind_blend_state; + rctx->context.bind_depth_stencil_alpha_state = r600_bind_state; rctx->context.bind_fragment_sampler_states = evergreen_bind_ps_sampler; - rctx->context.bind_fs_state = evergreen_bind_ps_shader; - rctx->context.bind_rasterizer_state = evergreen_bind_rs_state; - rctx->context.bind_vertex_elements_state = evergreen_bind_vertex_elements; + rctx->context.bind_fs_state = r600_bind_ps_shader; + rctx->context.bind_rasterizer_state = r600_bind_rs_state; + rctx->context.bind_vertex_elements_state = r600_bind_vertex_elements; rctx->context.bind_vertex_sampler_states = evergreen_bind_vs_sampler; - rctx->context.bind_vs_state = evergreen_bind_vs_shader; - rctx->context.delete_blend_state = evergreen_delete_state; - rctx->context.delete_depth_stencil_alpha_state = evergreen_delete_state; - rctx->context.delete_fs_state = evergreen_delete_ps_shader; - rctx->context.delete_rasterizer_state = evergreen_delete_rs_state; - rctx->context.delete_sampler_state = evergreen_delete_state; - rctx->context.delete_vertex_elements_state = evergreen_delete_vertex_element; - rctx->context.delete_vs_state = evergreen_delete_vs_shader; + rctx->context.bind_vs_state = r600_bind_vs_shader; + rctx->context.delete_blend_state = r600_delete_state; + rctx->context.delete_depth_stencil_alpha_state = r600_delete_state; + rctx->context.delete_fs_state = r600_delete_ps_shader; + rctx->context.delete_rasterizer_state = r600_delete_rs_state; + rctx->context.delete_sampler_state = r600_delete_state; + rctx->context.delete_vertex_elements_state = r600_delete_vertex_element; + rctx->context.delete_vs_state = r600_delete_vs_shader; rctx->context.set_blend_color = evergreen_set_blend_color; rctx->context.set_clip_state = evergreen_set_clip_state; rctx->context.set_constant_buffer = evergreen_set_constant_buffer; @@ -1052,11 +889,11 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx) rctx->context.set_sample_mask = evergreen_set_sample_mask; rctx->context.set_scissor_state = evergreen_set_scissor_state; rctx->context.set_stencil_ref = evergreen_set_stencil_ref; - rctx->context.set_vertex_buffers = evergreen_set_vertex_buffers; - rctx->context.set_index_buffer = evergreen_set_index_buffer; + rctx->context.set_vertex_buffers = r600_set_vertex_buffers; + rctx->context.set_index_buffer = r600_set_index_buffer; rctx->context.set_vertex_sampler_views = evergreen_set_vs_sampler_view; rctx->context.set_viewport_state = evergreen_set_viewport_state; - rctx->context.sampler_view_destroy = evergreen_sampler_view_destroy; + rctx->context.sampler_view_destroy = r600_sampler_view_destroy; } void evergreen_init_config(struct r600_pipe_context *rctx) @@ -1339,6 +1176,12 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) struct r600_draw rdraw; struct r600_pipe_state vgt; struct r600_drawl draw; + boolean translate = FALSE; + + if (rctx->vertex_elements->incompatible_layout) { + r600_begin_vertex_translate(rctx); + translate = TRUE; + } if (rctx->any_user_vbs) { r600_upload_user_buffers(rctx); @@ -1415,11 +1258,11 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); - format = r600_translate_vertex_data_type(rctx->vertex_elements->elements[i].src_format); + format = r600_translate_vertex_data_type(rctx->vertex_elements->hw_format[i]); word2 = format | S_030008_STRIDE(vertex_buffer->stride); - word3 = r600_translate_vertex_data_swizzle(rctx->vertex_elements->elements[i].src_format); + word3 = r600_translate_vertex_data_swizzle(rctx->vertex_elements->hw_format[i]); r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL); @@ -1500,6 +1343,9 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) } evergreen_context_draw(&rctx->ctx, &rdraw); + if (translate) + r600_end_vertex_translate(rctx); + pipe_resource_reference(&draw.index_buffer, NULL); } @@ -1508,23 +1354,39 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; - unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z; - boolean have_pos = FALSE, have_face = FALSE; + unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; + int pos_index = -1, face_index = -1; + int ninterp = 0; + boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE; + unsigned spi_baryc_cntl; /* clear previous register */ rstate->nregs = 0; for (i = 0; i < rshader->ninput; i++) { tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); + /* evergreen NUM_INTERP only contains values interpolated into the LDS, + POSITION goes via GPRs from the SC so isn't counted */ if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) - have_pos = TRUE; + pos_index = i; + else if (rshader->input[i].name == TGSI_SEMANTIC_FACE) + face_index = i; + else { + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR || + rshader->input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) + ninterp++; + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) + have_linear = TRUE; + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) + have_perspective = TRUE; + if (rshader->input[i].centroid) + have_centroid = TRUE; + } if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || rshader->input[i].name == TGSI_SEMANTIC_POSITION) { tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); } - if (rshader->input[i].name == TGSI_SEMANTIC_FACE) - have_face = TRUE; if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { tmp |= S_028644_PT_SPRITE_TEX(1); @@ -1532,17 +1394,23 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); } for (i = 0; i < rshader->noutput; i++) { - if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, S_02880C_Z_EXPORT_ENABLE(1), S_02880C_Z_EXPORT_ENABLE(1), NULL); + if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + S_02880C_STENCIL_EXPORT_ENABLE(1), + S_02880C_STENCIL_EXPORT_ENABLE(1), NULL); } exports_ps = 0; num_cout = 0; for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || + rshader->output[i].name == TGSI_SEMANTIC_STENCIL) exports_ps |= 1; else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { num_cout++; @@ -1554,19 +1422,49 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader exports_ps = 2; } - spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | - S_0286CC_PERSP_GRADIENT_ENA(1); + if (ninterp == 0) { + ninterp = 1; + have_perspective = TRUE; + } + + spi_ps_in_control_0 = S_0286CC_NUM_INTERP(ninterp) | + S_0286CC_PERSP_GRADIENT_ENA(have_perspective) | + S_0286CC_LINEAR_GRADIENT_ENA(have_linear); spi_input_z = 0; - if (have_pos) { - spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1); + if (pos_index != -1) { + spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1) | + S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | + S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr); spi_input_z |= 1; } + + spi_ps_in_control_1 = 0; + if (face_index != -1) { + spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | + S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); + } + + spi_baryc_cntl = 0; + if (have_perspective) + spi_baryc_cntl |= S_0286E0_PERSP_CENTER_ENA(1) | + S_0286E0_PERSP_CENTROID_ENA(have_centroid); + if (have_linear) + spi_baryc_cntl |= S_0286E0_LINEAR_CENTER_ENA(1) | + S_0286E0_LINEAR_CENTROID_ENA(have_centroid); + r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, - S_0286D0_FRONT_FACE_ENA(have_face), 0xFFFFFFFF, NULL); + spi_ps_in_control_1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286E4_SPI_PS_IN_CONTROL_2, + 0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, + R_0286E0_SPI_BARYC_CNTL, + spi_baryc_cntl, + 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028840_SQ_PGM_START_PS, (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); r600_pipe_state_add_reg(rstate, @@ -1581,11 +1479,6 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader r600_pipe_state_add_reg(rstate, R_02884C_SQ_PGM_EXPORTS_PS, exports_ps, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_0286E0_SPI_BARYC_CNTL, - S_0286E0_PERSP_CENTROID_ENA(1) | - S_0286E0_LINEAR_CENTROID_ENA(1), - 0xFFFFFFFF, NULL); if (rshader->uses_kill) { /* only set some bits here, the other bits are set in the dsa state */ diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 9971dded782..8e96f9355e6 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -686,6 +686,9 @@ #define S_02880C_Z_EXPORT_ENABLE(x) (((x) & 0x1) << 0) #define G_02880C_Z_EXPORT_ENABLE(x) (((x) >> 0) & 0x1) #define C_02880C_Z_EXPORT_ENABLE 0xFFFFFFFE +#define S_02880C_STENCIL_EXPORT_ENABLE(x) (((x) & 0x1) << 1) +#define G_02880C_STENCIL_EXPORT_ENABLE(x) (((x) >> 1) & 0x1) +#define C_02880C_STENCIL_EXPORT_ENABLE 0xFFFFFFFD #define S_02880C_Z_ORDER(x) (((x) & 0x3) << 4) #define G_02880C_Z_ORDER(x) (((x) >> 4) & 0x3) #define C_02880C_Z_ORDER 0xFFFFFCFF @@ -984,9 +987,6 @@ #define S_030010_ENDIAN_SWAP(x) (((x) & 0x3) << 12) #define G_030010_ENDIAN_SWAP(x) (((x) >> 12) & 0x3) #define C_030010_ENDIAN_SWAP 0xFFFFCFFF -#define S_030010_REQUEST_SIZE(x) (((x) & 0x3) << 14) -#define G_030010_REQUEST_SIZE(x) (((x) >> 14) & 0x3) -#define C_030010_REQUEST_SIZE 0xFFFF3FFF #define S_030010_DST_SEL_X(x) (((x) & 0x7) << 16) #define G_030010_DST_SEL_X(x) (((x) >> 16) & 0x7) #define C_030010_DST_SEL_X 0xFFF8FFFF @@ -1050,45 +1050,6 @@ #define S_030008_DATA_FORMAT(x) (((x) & 0x3F) << 20) #define G_030008_DATA_FORMAT(x) (((x) >> 20) & 0x3F) #define C_030008_DATA_FORMAT 0xFC0FFFFF -#define V_030008_FMT_INVALID 0x00000000 -#define V_030008_FMT_8 0x00000001 -#define V_030008_FMT_4_4 0x00000002 -#define V_030008_FMT_3_3_2 0x00000003 -#define V_030008_FMT_16 0x00000005 -#define V_030008_FMT_16_FLOAT 0x00000006 -#define V_030008_FMT_8_8 0x00000007 -#define V_030008_FMT_5_6_5 0x00000008 -#define V_030008_FMT_6_5_5 0x00000009 -#define V_030008_FMT_1_5_5_5 0x0000000A -#define V_030008_FMT_4_4_4_4 0x0000000B -#define V_030008_FMT_5_5_5_1 0x0000000C -#define V_030008_FMT_32 0x0000000D -#define V_030008_FMT_32_FLOAT 0x0000000E -#define V_030008_FMT_16_16 0x0000000F -#define V_030008_FMT_16_16_FLOAT 0x00000010 -#define V_030008_FMT_8_24 0x00000011 -#define V_030008_FMT_8_24_FLOAT 0x00000012 -#define V_030008_FMT_24_8 0x00000013 -#define V_030008_FMT_24_8_FLOAT 0x00000014 -#define V_030008_FMT_10_11_11 0x00000015 -#define V_030008_FMT_10_11_11_FLOAT 0x00000016 -#define V_030008_FMT_11_11_10 0x00000017 -#define V_030008_FMT_11_11_10_FLOAT 0x00000018 -#define V_030008_FMT_2_10_10_10 0x00000019 -#define V_030008_FMT_8_8_8_8 0x0000001A -#define V_030008_FMT_10_10_10_2 0x0000001B -#define V_030008_FMT_X24_8_32_FLOAT 0x0000001C -#define V_030008_FMT_32_32 0x0000001D -#define V_030008_FMT_32_32_FLOAT 0x0000001E -#define V_030008_FMT_16_16_16_16 0x0000001F -#define V_030008_FMT_16_16_16_16_FLOAT 0x00000020 -#define V_030008_FMT_32_32_32_32 0x00000022 -#define V_030008_FMT_32_32_32_32_FLOAT 0x00000023 -#define V_030008_FMT_8_8_8 0x0000002c -#define V_030008_FMT_16_16_16 0x0000002d -#define V_030008_FMT_16_16_16_FLOAT 0x0000002e -#define V_030008_FMT_32_32_32 0x0000002f -#define V_030008_FMT_32_32_32_FLOAT 0x00000030 #define S_030008_NUM_FORMAT_ALL(x) (((x) & 0x3) << 26) #define G_030008_NUM_FORMAT_ALL(x) (((x) >> 26) & 0x3) #define C_030008_NUM_FORMAT_ALL 0xF3FFFFFF diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 630177d6add..62d983269f5 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -99,15 +99,22 @@ enum chip_class { EVERGREEN, }; +struct r600_tiling_info { + unsigned num_channels; + unsigned num_banks; + unsigned group_bytes; +}; + enum radeon_family r600_get_family(struct radeon *rw); enum chip_class r600_get_family_class(struct radeon *radeon); +struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon); /* r600_bo.c */ struct r600_bo; struct r600_bo *r600_bo(struct radeon *radeon, unsigned size, unsigned alignment, unsigned usage); struct r600_bo *r600_bo_handle(struct radeon *radeon, - unsigned handle); + unsigned handle, unsigned *array_mode); void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx); void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo); void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, @@ -233,6 +240,10 @@ struct r600_context { u32 *pm4; struct list_head query_list; unsigned num_query_running; + unsigned fence; + struct list_head fenced_bo; + unsigned *cfence; + struct r600_bo *fence_bo; }; struct r600_draw { diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index d13da0ef638..c22628423bd 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -701,9 +701,14 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) | - S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode); + S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank); bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | + S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) | S_SQ_CF_ALU_WORD1_BARRIER(1) | S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == 0 ? cf->r6xx_uses_waterfall : 0) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); @@ -871,3 +876,39 @@ int r600_bc_build(struct r600_bc *bc) } return 0; } + +void r600_bc_clear(struct r600_bc *bc) +{ + struct r600_bc_cf *cf, *next_cf; + + free(bc->bytecode); + bc->bytecode = NULL; + + LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) { + struct r600_bc_alu *alu, *next_alu; + struct r600_bc_tex *tex, *next_tex; + struct r600_bc_tex *vtx, *next_vtx; + + LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) { + free(alu); + } + + LIST_INITHEAD(&cf->alu); + + LIST_FOR_EACH_ENTRY_SAFE(tex, next_tex, &cf->tex, list) { + free(tex); + } + + LIST_INITHEAD(&cf->tex); + + LIST_FOR_EACH_ENTRY_SAFE(vtx, next_vtx, &cf->vtx, list) { + free(vtx); + } + + LIST_INITHEAD(&cf->vtx); + + free(cf); + } + + LIST_INITHEAD(&cf->list); +} diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index bebc7c15b00..25cda16837d 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -132,6 +132,11 @@ struct r600_bc_cf { unsigned pop_count; unsigned cf_addr; /* control flow addr */ unsigned kcache0_mode; + unsigned kcache1_mode; + unsigned kcache0_addr; + unsigned kcache1_addr; + unsigned kcache0_bank; + unsigned kcache1_bank; unsigned r6xx_uses_waterfall; struct list_head alu; struct list_head tex; @@ -185,6 +190,7 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf); /* r600_asm.c */ int r600_bc_init(struct r600_bc *bc, enum radeon_family family); +void r600_bc_clear(struct r600_bc *bc); int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu); int r600_bc_add_literal(struct r600_bc *bc, const u32 *value); int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx); diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 4bf44a171af..50d47060c1a 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -22,12 +22,22 @@ */ #include <util/u_surface.h> #include <util/u_blitter.h> +#include <util/u_format.h> #include "r600_pipe.h" -static void r600_blitter_save_states(struct pipe_context *ctx) +enum r600_blitter_op /* bitmask */ +{ + R600_CLEAR = 1, + R600_CLEAR_SURFACE = 2, + R600_COPY = 4 +}; + +static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + r600_context_queries_suspend(&rctx->ctx); + util_blitter_save_blend(rctx->blitter, rctx->states[R600_PIPE_STATE_BLEND]); util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->states[R600_PIPE_STATE_DSA]); if (rctx->states[R600_PIPE_STATE_STENCIL_REF]) { @@ -47,25 +57,34 @@ static void r600_blitter_save_states(struct pipe_context *ctx) rctx->vertex_elements = NULL; - /* TODO queries */ + if (op & (R600_CLEAR_SURFACE | R600_COPY)) + util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer); + + if (op & R600_COPY) { + util_blitter_save_fragment_sampler_states( + rctx->blitter, rctx->ps_samplers.n_samplers, + (void**)rctx->ps_samplers.samplers); + + util_blitter_save_fragment_sampler_views( + rctx->blitter, rctx->ps_samplers.n_views, + (struct pipe_sampler_view**)rctx->ps_samplers.views); + } + +} + +static void r600_blitter_end(struct pipe_context *ctx) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + r600_context_queries_resume(&rctx->ctx); } int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_framebuffer_state fb = *rctx->pframebuffer; struct pipe_surface *zsurf, *cbsurf; int level = 0; float depth = 1.0f; - r600_context_queries_suspend(&rctx->ctx); - for (int i = 0; i < fb.nr_cbufs; i++) { - fb.cbufs[i] = NULL; - pipe_surface_reference(&fb.cbufs[i], rctx->pframebuffer->cbufs[i]); - } - fb.zsbuf = NULL; - pipe_surface_reference(&fb.zsbuf, rctx->pframebuffer->zsbuf); - zsurf = ctx->screen->get_tex_surface(ctx->screen, &texture->resource.base.b, 0, level, 0, PIPE_BIND_DEPTH_STENCIL); @@ -73,22 +92,17 @@ int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_te (struct pipe_resource*)texture->flushed_depth_texture, 0, level, 0, PIPE_BIND_RENDER_TARGET); - r600_blitter_save_states(ctx); - util_blitter_save_framebuffer(rctx->blitter, &fb); - if (rctx->family == CHIP_RV610 || rctx->family == CHIP_RV630 || - rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635) + rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635) depth = 0.0f; + r600_blitter_begin(ctx, R600_CLEAR_SURFACE); util_blitter_custom_depth_stencil(rctx->blitter, zsurf, cbsurf, rctx->custom_dsa_flush, depth); + r600_blitter_end(ctx); pipe_surface_reference(&zsurf, NULL); pipe_surface_reference(&cbsurf, NULL); - for (int i = 0; i < fb.nr_cbufs; i++) { - pipe_surface_reference(&fb.cbufs[i], NULL); - } - pipe_surface_reference(&fb.zsbuf, NULL); - r600_context_queries_resume(&rctx->ctx); + return 0; } @@ -99,12 +113,11 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct pipe_framebuffer_state *fb = &rctx->framebuffer; - r600_context_queries_suspend(&rctx->ctx); - r600_blitter_save_states(ctx); + r600_blitter_begin(ctx, R600_CLEAR); util_blitter_clear(rctx->blitter, fb->width, fb->height, fb->nr_cbufs, buffers, rgba, depth, stencil); - r600_context_queries_resume(&rctx->ctx); + r600_blitter_end(ctx); } static void r600_clear_render_target(struct pipe_context *ctx, @@ -114,13 +127,11 @@ static void r600_clear_render_target(struct pipe_context *ctx, unsigned width, unsigned height) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_framebuffer_state *fb = &rctx->framebuffer; - r600_context_queries_suspend(&rctx->ctx); - util_blitter_save_framebuffer(rctx->blitter, fb); + r600_blitter_begin(ctx, R600_CLEAR_SURFACE); util_blitter_clear_render_target(rctx->blitter, dst, rgba, dstx, dsty, width, height); - r600_context_queries_resume(&rctx->ctx); + r600_blitter_end(ctx); } static void r600_clear_depth_stencil(struct pipe_context *ctx, @@ -132,16 +143,34 @@ static void r600_clear_depth_stencil(struct pipe_context *ctx, unsigned width, unsigned height) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_framebuffer_state *fb = &rctx->framebuffer; - r600_context_queries_suspend(&rctx->ctx); - util_blitter_save_framebuffer(rctx->blitter, fb); + r600_blitter_begin(ctx, R600_CLEAR_SURFACE); util_blitter_clear_depth_stencil(rctx->blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height); - r600_context_queries_resume(&rctx->ctx); + r600_blitter_end(ctx); } + +/* Copy a block of pixels from one surface to another using HW. */ +static void r600_hw_copy_region(struct pipe_context *ctx, + struct pipe_resource *dst, + struct pipe_subresource subdst, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + struct pipe_subresource subsrc, + unsigned srcx, unsigned srcy, unsigned srcz, + unsigned width, unsigned height) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + r600_blitter_begin(ctx, R600_COPY); + util_blitter_copy_region(rctx->blitter, dst, subdst, dstx, dsty, dstz, + src, subsrc, srcx, srcy, srcz, width, height, + TRUE); + r600_blitter_end(ctx); +} + static void r600_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst, struct pipe_subresource subdst, @@ -151,8 +180,16 @@ static void r600_resource_copy_region(struct pipe_context *ctx, unsigned srcx, unsigned srcy, unsigned srcz, unsigned width, unsigned height) { - util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz, - src, subsrc, srcx, srcy, srcz, width, height); + boolean is_depth; + /* there is something wrong with depth resource copies at the moment so avoid them for now */ + is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; + if (is_depth) + util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz, + src, subsrc, srcx, srcy, srcz, width, height); + else + r600_hw_copy_region(ctx, dst, subdst, dstx, dsty, dstz, + src, subsrc, srcx, srcy, srcz, width, height); + } void r600_init_blit_functions(struct r600_pipe_context *rctx) diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 2bfa4e22fec..455aa2e81f6 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -227,7 +227,7 @@ struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, struct r600_resource *rbuffer; struct r600_bo *bo = NULL; - bo = r600_bo_handle(rw, whandle->handle); + bo = r600_bo_handle(rw, whandle->handle, NULL); if (bo == NULL) { return NULL; } diff --git a/src/gallium/drivers/r600/r600_formats.h b/src/gallium/drivers/r600/r600_formats.h new file mode 100644 index 00000000000..0c91a212384 --- /dev/null +++ b/src/gallium/drivers/r600/r600_formats.h @@ -0,0 +1,56 @@ +#ifndef R600_FORMATS_H +#define R600_FORMATS_H + +/* list of formats from R700 ISA document - apply across GPUs in different registers */ +#define FMT_INVALID 0x00000000 +#define FMT_8 0x00000001 +#define FMT_4_4 0x00000002 +#define FMT_3_3_2 0x00000003 +#define FMT_16 0x00000005 +#define FMT_16_FLOAT 0x00000006 +#define FMT_8_8 0x00000007 +#define FMT_5_6_5 0x00000008 +#define FMT_6_5_5 0x00000009 +#define FMT_1_5_5_5 0x0000000A +#define FMT_4_4_4_4 0x0000000B +#define FMT_5_5_5_1 0x0000000C +#define FMT_32 0x0000000D +#define FMT_32_FLOAT 0x0000000E +#define FMT_16_16 0x0000000F +#define FMT_16_16_FLOAT 0x00000010 +#define FMT_8_24 0x00000011 +#define FMT_8_24_FLOAT 0x00000012 +#define FMT_24_8 0x00000013 +#define FMT_24_8_FLOAT 0x00000014 +#define FMT_10_11_11 0x00000015 +#define FMT_10_11_11_FLOAT 0x00000016 +#define FMT_11_11_10 0x00000017 +#define FMT_11_11_10_FLOAT 0x00000018 +#define FMT_2_10_10_10 0x00000019 +#define FMT_8_8_8_8 0x0000001A +#define FMT_10_10_10_2 0x0000001B +#define FMT_X24_8_32_FLOAT 0x0000001C +#define FMT_32_32 0x0000001D +#define FMT_32_32_FLOAT 0x0000001E +#define FMT_16_16_16_16 0x0000001F +#define FMT_16_16_16_16_FLOAT 0x00000020 +#define FMT_32_32_32_32 0x00000022 +#define FMT_32_32_32_32_FLOAT 0x00000023 +#define FMT_1 0x00000025 +#define FMT_GB_GR 0x00000027 +#define FMT_BG_RG 0x00000028 +#define FMT_32_AS_8 0x00000029 +#define FMT_32_AS_8_8 0x0000002a +#define FMT_5_9_9_9_SHAREDEXP 0x0000002b +#define FMT_8_8_8 0x0000002c +#define FMT_16_16_16 0x0000002d +#define FMT_16_16_16_FLOAT 0x0000002e +#define FMT_32_32_32 0x0000002f +#define FMT_32_32_32_FLOAT 0x00000030 +#define FMT_BC1 0x00000031 +#define FMT_BC2 0x00000032 +#define FMT_BC3 0x00000033 +#define FMT_BC4 0x00000034 +#define FMT_BC5 0x00000035 + +#endif diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h index 0cf9c1c401c..4f9b39a7fdc 100644 --- a/src/gallium/drivers/r600/r600_opcodes.h +++ b/src/gallium/drivers/r600/r600_opcodes.h @@ -233,12 +233,6 @@ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL 0x00000012 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE 0x00000013 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR 0x00000014 -/* same up to here */ -/* -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA 0x00000015 -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR 0x00000016 -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT 0x00000018 -*/ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT 0x00000015 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT 0x00000016 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT 0x00000017 @@ -336,9 +330,11 @@ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED_64 0x00000098 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_64 0x00000099 /* TODO Fill in more ALU */ +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR 0x000000B1 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 0x000000BE #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE 0x000000BF #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE 0x000000C0 +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT 0x000000CC #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY 0x000000D6 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW 0x000000D7 diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 0589652f705..bea7ef5df84 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -85,6 +85,11 @@ static void r600_destroy_context(struct pipe_context *context) u_upload_destroy(rctx->upload_vb); u_upload_destroy(rctx->upload_ib); + if (rctx->tran.translate_cache) + translate_cache_destroy(rctx->tran.translate_cache); + + FREE(rctx->ps_resource); + FREE(rctx->vs_resource); FREE(rctx); } @@ -171,6 +176,24 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } + rctx->tran.translate_cache = translate_cache_create(); + if (rctx->tran.translate_cache == NULL) { + FREE(rctx); + return NULL; + } + + rctx->vs_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); + if (!rctx->vs_resource) { + FREE(rctx); + return NULL; + } + + rctx->ps_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); + if (!rctx->ps_resource) { + FREE(rctx); + return NULL; + } + class = r600_get_family_class(rctx->radeon); if (class == R600 || class == R700) rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx); @@ -242,6 +265,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_DEPTH_CLAMP: + case PIPE_CAP_SHADER_STENCIL_EXPORT: return 1; /* Unsupported features (boolean caps). */ @@ -257,7 +281,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return 14; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: /* FIXME allow this once infrastructure is there */ - return 0; + return 16; case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: case PIPE_CAP_MAX_COMBINED_SAMPLERS: return 16; @@ -428,5 +452,7 @@ struct pipe_screen *r600_screen_create(struct radeon *radeon) r600_init_screen_texture_functions(&rscreen->screen); r600_init_screen_resource_functions(&rscreen->screen); + rscreen->tiling_info = r600_get_tiling_info(radeon); + return &rscreen->screen; } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index c46029a5617..1c691f6b764 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -30,6 +30,7 @@ #include <pipe/p_screen.h> #include <pipe/p_context.h> #include <util/u_math.h> +#include "translate/translate_cache.h" #include "r600.h" #include "r600_public.h" #include "r600_shader.h" @@ -58,6 +59,7 @@ enum r600_pipe_state_id { struct r600_screen { struct pipe_screen screen; struct radeon *radeon; + struct r600_tiling_info *tiling_info; }; struct r600_pipe_sampler_view { @@ -81,8 +83,10 @@ struct r600_pipe_blend { struct r600_vertex_element { unsigned count; - unsigned refcount; - struct pipe_vertex_element elements[32]; + struct pipe_vertex_element elements[PIPE_MAX_ATTRIBS]; + enum pipe_format hw_format[PIPE_MAX_ATTRIBS]; + unsigned hw_format_size[PIPE_MAX_ATTRIBS]; + boolean incompatible_layout; }; struct r600_pipe_shader { @@ -92,6 +96,29 @@ struct r600_pipe_shader { struct r600_vertex_element vertex_elements; }; +/* needed for blitter save */ +#define NUM_TEX_UNITS 16 + +struct r600_textures_info { + struct r600_pipe_sampler_view *views[NUM_TEX_UNITS]; + unsigned n_views; + void *samplers[NUM_TEX_UNITS]; + unsigned n_samplers; +}; + +struct r600_translate_context { + /* Translate cache for incompatible vertex offset/stride/format fallback. */ + struct translate_cache *translate_cache; + + /* The vertex buffer slot containing the translated buffer. */ + unsigned vb_slot; + /* Saved and new vertex element state. */ + void *saved_velems, *new_velems; +}; + +#define R600_CONSTANT_ARRAY_SIZE 256 +#define R600_RESOURCE_ARRAY_SIZE 160 + struct r600_pipe_context { struct pipe_context context; struct blitter_context *blitter; @@ -112,12 +139,8 @@ struct r600_pipe_context { struct pipe_stencil_ref stencil_ref; struct pipe_viewport_state viewport; struct pipe_clip_state clip; - unsigned vs_nconst; - unsigned ps_nconst; - struct r600_pipe_state vs_const[256]; - struct r600_pipe_state ps_const[256]; - struct r600_pipe_state vs_resource[160]; - struct r600_pipe_state ps_resource[160]; + struct r600_pipe_state *vs_resource; + struct r600_pipe_state *ps_resource; struct r600_pipe_state config; struct r600_pipe_shader *ps_shader; struct r600_pipe_shader *vs_shader; @@ -130,6 +153,11 @@ struct r600_pipe_context { struct u_upload_mgr *upload_vb; struct u_upload_mgr *upload_ib; unsigned any_user_vbs; + struct r600_textures_info ps_samplers; + + unsigned vb_max_index; + struct r600_translate_context tran; + }; struct r600_drawl { @@ -180,6 +208,7 @@ void r600_init_context_resource_functions(struct r600_pipe_context *r600); /* r600_shader.c */ int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader); int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens); +void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader); int r600_find_vs_semantic_index(struct r600_shader *vs, struct r600_shader *ps, int id); @@ -187,10 +216,6 @@ int r600_find_vs_semantic_index(struct r600_shader *vs, void r600_init_state_functions(struct r600_pipe_context *rctx); void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); void r600_init_config(struct r600_pipe_context *rctx); -void r600_translate_index_buffer(struct r600_pipe_context *r600, - struct pipe_resource **index_buffer, - unsigned *index_size, - unsigned *start, unsigned count); void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx); /* r600_helper.h */ int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); @@ -201,6 +226,38 @@ uint32_t r600_translate_texformat(enum pipe_format format, const unsigned char *swizzle_view, uint32_t *word4_p, uint32_t *yuv_format_p); +/* r600_translate.c */ +void r600_begin_vertex_translate(struct r600_pipe_context *rctx); +void r600_end_vertex_translate(struct r600_pipe_context *rctx); +void r600_translate_index_buffer(struct r600_pipe_context *r600, + struct pipe_resource **index_buffer, + unsigned *index_size, + unsigned *start, unsigned count); + +/* r600_state_common.c */ +void r600_set_index_buffer(struct pipe_context *ctx, + const struct pipe_index_buffer *ib); +void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, + const struct pipe_vertex_buffer *buffers); +void *r600_create_vertex_elements(struct pipe_context *ctx, + unsigned count, + const struct pipe_vertex_element *elements); +void r600_delete_vertex_element(struct pipe_context *ctx, void *state); +void r600_bind_blend_state(struct pipe_context *ctx, void *state); +void r600_bind_rs_state(struct pipe_context *ctx, void *state); +void r600_delete_rs_state(struct pipe_context *ctx, void *state); +void r600_sampler_view_destroy(struct pipe_context *ctx, + struct pipe_sampler_view *state); +void r600_bind_state(struct pipe_context *ctx, void *state); +void r600_delete_state(struct pipe_context *ctx, void *state); +void r600_bind_vertex_elements(struct pipe_context *ctx, void *state); + +void *r600_create_shader_state(struct pipe_context *ctx, + const struct pipe_shader_state *state); +void r600_bind_ps_shader(struct pipe_context *ctx, void *state); +void r600_bind_vs_shader(struct pipe_context *ctx, void *state); +void r600_delete_ps_shader(struct pipe_context *ctx, void *state); +void r600_delete_vs_shader(struct pipe_context *ctx, void *state); /* * common helpers */ diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index f6377ea802b..d152285815c 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -25,6 +25,9 @@ #include "util/u_transfer.h" +/* flag to indicate a resource is to be used as a transfer so should not be tiled */ +#define R600_RESOURCE_FLAG_TRANSFER PIPE_RESOURCE_FLAG_DRV_PRIV + /* Texture transfer. */ struct r600_transfer { /* Base class. */ @@ -49,16 +52,14 @@ struct r600_resource { struct r600_resource_texture { struct r600_resource resource; - unsigned long offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long pitch[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long width[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long height[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long layer_size[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long pitch_override; - unsigned long bpt; - unsigned long size; + unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; + unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS]; + unsigned pitch_in_pixels[PIPE_MAX_TEXTURE_LEVELS]; + unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS]; + unsigned array_mode[PIPE_MAX_TEXTURE_LEVELS]; + unsigned pitch_override; + unsigned size; unsigned tiled; - unsigned array_mode; unsigned tile_type; unsigned depth; unsigned dirty; @@ -126,4 +127,9 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, void r600_texture_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer* transfer); +struct r600_surface { + struct pipe_surface base; + unsigned aligned_height; +}; + #endif diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 016e75bd52b..4106587398b 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -107,24 +107,28 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; - unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z; - boolean have_pos = FALSE, have_face = FALSE; + unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; + int pos_index = -1, face_index = -1; /* clear previous register */ rstate->nregs = 0; for (i = 0; i < rshader->ninput; i++) { tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); - tmp |= S_028644_SEL_CENTROID(1); + if (rshader->input[i].centroid) + tmp |= S_028644_SEL_CENTROID(1); + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) + tmp |= S_028644_SEL_LINEAR(1); + if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) - have_pos = TRUE; + pos_index = i; if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || rshader->input[i].name == TGSI_SEMANTIC_POSITION) { tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); } if (rshader->input[i].name == TGSI_SEMANTIC_FACE) - have_face = TRUE; + face_index = i; if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { tmp |= S_028644_PT_SPRITE_TEX(1); @@ -132,17 +136,22 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); } for (i = 0; i < rshader->noutput; i++) { - if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, S_02880C_Z_EXPORT_ENABLE(1), S_02880C_Z_EXPORT_ENABLE(1), NULL); + if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + S_02880C_STENCIL_REF_EXPORT_ENABLE(1), + S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL); } exports_ps = 0; num_cout = 0; for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL) exports_ps |= 1; else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { num_cout++; @@ -157,13 +166,22 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | S_0286CC_PERSP_GRADIENT_ENA(1); spi_input_z = 0; - if (have_pos) { - spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1) | - S_0286CC_BARYC_SAMPLE_CNTL(1); + if (pos_index != -1) { + spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) | + S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | + S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) | + S_0286CC_BARYC_SAMPLE_CNTL(1)); spi_input_z |= 1; } + + spi_ps_in_control_1 = 0; + if (face_index != -1) { + spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | + S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); + } + r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, S_0286D0_FRONT_FACE_ENA(have_face), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028840_SQ_PGM_START_PS, @@ -252,7 +270,7 @@ static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader } rshader->vertex_elements = *rctx->vertex_elements; for (i = 0; i < rctx->vertex_elements->count; i++) { - resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; + resource_format[nresources++] = rctx->vertex_elements->hw_format[i]; } r600_bo_reference(rctx->radeon, &rshader->bo, NULL); LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { @@ -320,6 +338,18 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s return 0; } +void +r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + r600_bo_reference(rctx->radeon, &shader->bo, NULL); + + r600_bc_clear(&shader->shader.bc); + + /* FIXME: is there more stuff to free? */ +} + /* * tgsi -> r600 shader */ @@ -339,6 +369,11 @@ struct r600_shader_ctx { u32 *literals; u32 nliterals; u32 max_driver_temp_used; + /* needed for evergreen interpolation */ + boolean input_centroid; + boolean input_linear; + boolean input_perspective; + int num_interp_gpr; }; struct r600_shader_tgsi_instruction { @@ -371,11 +406,9 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) } #endif for (j = 0; j < i->Instruction.NumSrcRegs; j++) { - if (i->Src[j].Register.Dimension || - i->Src[j].Register.Absolute) { - R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j, - i->Src[j].Register.Dimension, - i->Src[j].Register.Absolute); + if (i->Src[j].Register.Dimension) { + R600_ERR("unsupported src %d (dimension %d)\n", j, + i->Src[j].Register.Dimension); return -EINVAL; } } @@ -388,10 +421,33 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) return 0; } -static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr) +static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) { int i, r; struct r600_bc_alu alu; + int gpr = 0, base_chan = 0; + int ij_index = 0; + + if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { + ij_index = 0; + if (ctx->shader->input[input].centroid) + ij_index++; + } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { + ij_index = 0; + /* if we have perspective add one */ + if (ctx->input_perspective) { + ij_index++; + /* if we have perspective centroid */ + if (ctx->input_centroid) + ij_index++; + } + if (ctx->shader->input[input].centroid) + ij_index++; + } + + /* work out gpr and base_chan from index */ + gpr = ij_index / 2; + base_chan = (2 * (ij_index % 2)) + 1; for (i = 0; i < 8; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -402,13 +458,16 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr) alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; if ((i > 1) && (i < 6)) { - alu.dst.sel = ctx->shader->input[gpr].gpr; + alu.dst.sel = ctx->shader->input[input].gpr; alu.dst.write = 1; } alu.dst.chan = i % 4; - alu.src[0].chan = (1 - (i % 2)); - alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + gpr; + + alu.src[0].sel = gpr; + alu.src[0].chan = (base_chan - (i % 2)); + + alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; alu.bank_swizzle_force = SQ_ALU_VEC_210; if ((i % 4) == 3) @@ -434,6 +493,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->input[i].name = d->Semantic.Name; ctx->shader->input[i].sid = d->Semantic.Index; ctx->shader->input[i].interpolate = d->Declaration.Interpolate; + ctx->shader->input[i].centroid = d->Declaration.Centroid; ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; if (ctx->type == TGSI_PROCESSOR_VERTEX) { /* turn input into fetch */ @@ -457,7 +517,12 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) } if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == 2) { /* turn input into interpolate on EG */ - evergreen_interp_alu(ctx, i); + if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { + if (ctx->shader->input[i].interpolate > 0) { + ctx->shader->input[i].lds_pos = ctx->shader->nlds++; + evergreen_interp_alu(ctx, i); + } + } } break; case TGSI_FILE_OUTPUT: @@ -484,6 +549,53 @@ static int r600_get_temp(struct r600_shader_ctx *ctx) return ctx->temp_reg + ctx->max_driver_temp_used++; } +/* + * for evergreen we need to scan the shader to find the number of GPRs we need to + * reserve for interpolation. + * + * we need to know if we are going to emit + * any centroid inputs + * if perspective and linear are required +*/ +static int evergreen_gpr_count(struct r600_shader_ctx *ctx) +{ + int i; + int num_baryc; + + ctx->input_linear = FALSE; + ctx->input_perspective = FALSE; + ctx->input_centroid = FALSE; + ctx->num_interp_gpr = 1; + + /* any centroid inputs */ + for (i = 0; i < ctx->info.num_inputs; i++) { + /* skip position/face */ + if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || + ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) + continue; + if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) + ctx->input_linear = TRUE; + if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) + ctx->input_perspective = TRUE; + if (ctx->info.input_centroid[i]) + ctx->input_centroid = TRUE; + } + + num_baryc = 0; + /* ignoring sample for now */ + if (ctx->input_perspective) + num_baryc++; + if (ctx->input_linear) + num_baryc++; + if (ctx->input_centroid) + num_baryc *= 2; + + ctx->num_interp_gpr += (num_baryc + 1) >> 1; + + /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ + return ctx->num_interp_gpr; +} + int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) { struct tgsi_full_immediate *immediate; @@ -529,6 +641,9 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s if (ctx.type == TGSI_PROCESSOR_VERTEX) { ctx.file_offset[TGSI_FILE_INPUT] = 1; } + if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == 2) { + ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); + } ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + ctx.info.file_count[TGSI_FILE_INPUT]; ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + @@ -625,7 +740,14 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { output[i].array_base = 61; output[i].swizzle_x = 2; - output[i].swizzle_y = output[i].swizzle_z = output[i].swizzle_w = 7; + output[i].swizzle_y = 7; + output[i].swizzle_z = output[i].swizzle_w = 7; + output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { + output[i].array_base = 61; + output[i].swizzle_x = 7; + output[i].swizzle_y = 1; + output[i].swizzle_z = output[i].swizzle_w = 7; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else { R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); @@ -731,6 +853,7 @@ static int tgsi_src(struct r600_shader_ctx *ctx, if (tgsi_src->Register.Indirect) r600_src->rel = V_SQ_REL_RELATIVE; r600_src->neg = tgsi_src->Register.Negate; + r600_src->abs = tgsi_src->Register.Absolute; r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; return 0; } @@ -793,6 +916,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = r600_src[i].sel; alu.src[0].chan = k; + alu.src[0].rel = r600_src[i].rel; alu.dst.sel = treg; alu.dst.chan = k; alu.dst.write = 1; @@ -803,6 +927,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s return r; } r600_src[i].sel = treg; + r600_src[i].rel =0; j--; } } @@ -1843,7 +1968,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = src_gpr; - alu.src[0].chan = i; + alu.src[0].chan = tgsi_chan(&inst->Src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) @@ -1863,8 +1988,10 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) memset(&tex, 0, sizeof(struct r600_bc_tex)); tex.inst = opcode; - tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; - tex.sampler_id = tex.resource_id; + tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; + tex.resource_id = tex.sampler_id; + if (ctx->shader->processor_type == TGSI_PROCESSOR_VERTEX) + tex.resource_id += PIPE_MAX_ATTRIBS; tex.src_gpr = src_gpr; tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; @@ -2172,7 +2299,7 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) static int tgsi_exp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; + struct r600_bc_alu_src r600_src[3] = { { 0 } }; struct r600_bc_alu alu; int r; @@ -2495,7 +2622,40 @@ static int tgsi_log(struct r600_shader_ctx *ctx) } /* r6/7 only for now */ -static int tgsi_arl(struct r600_shader_ctx *ctx) +static int tgsi_eg_arl(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int r; + + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); + if (r) + return r; + alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + alu.last = 1; + alu.dst.chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); + if (r) + return r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); + if (r) + return r; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.last = 1; + r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); + if (r) + return r; + return 0; +} +static int tgsi_r600_arl(struct r600_shader_ctx *ctx) { /* TODO from r600c, ar values don't persist between clauses */ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -2840,7 +3000,7 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) } static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { - {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl}, + {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, @@ -2921,7 +3081,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, - {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, /* gap */ @@ -3004,7 +3164,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { }; static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { - {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, @@ -3079,7 +3239,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, - {TGSI_OPCODE_TXL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, /* gap */ diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 6e2620f2012..f8bc5951395 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -31,6 +31,8 @@ struct r600_shader_io { unsigned done; int sid; unsigned interpolate; + boolean centroid; + unsigned lds_pos; /* for evergreen */ }; struct r600_shader { @@ -39,6 +41,7 @@ struct r600_shader { boolean flat_shade; unsigned ninput; unsigned noutput; + unsigned nlds; struct r600_shader_io input[32]; struct r600_shader_io output[32]; enum radeon_family family; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index b55c3450059..b3e0d493217 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -37,7 +37,7 @@ #include <util/u_memory.h> #include <util/u_inlines.h> #include <util/u_upload_mgr.h> -#include <util/u_index_modify.h> +#include <util/u_framebuffer.h> #include <pipebuffer/pb_buffer.h> #include "r600.h" #include "r600d.h" @@ -98,7 +98,7 @@ static void r600_draw_common(struct r600_drawl *draw) vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); - format = r600_translate_vertex_data_type(rctx->vertex_elements->elements[i].src_format); + format = r600_translate_vertex_data_type(rctx->vertex_elements->hw_format[i]); word2 = format | S_038008_STRIDE(vertex_buffer->stride); @@ -181,40 +181,21 @@ static void r600_draw_common(struct r600_drawl *draw) r600_context_draw(&rctx->ctx, &rdraw); } -void r600_translate_index_buffer(struct r600_pipe_context *r600, - struct pipe_resource **index_buffer, - unsigned *index_size, - unsigned *start, unsigned count) -{ - switch (*index_size) { - case 1: - util_shorten_ubyte_elts(&r600->context, index_buffer, 0, *start, count); - *index_size = 2; - *start = 0; - break; - - case 2: - if (*start % 2 != 0) { - util_rebuild_ushort_elts(&r600->context, index_buffer, 0, *start, count); - *start = 0; - } - break; - - case 4: - break; - } -} - void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_drawl draw; + boolean translate = FALSE; + + if (rctx->vertex_elements->incompatible_layout) { + r600_begin_vertex_translate(rctx); + translate = TRUE; + } if (rctx->any_user_vbs) { r600_upload_user_buffers(rctx); rctx->any_user_vbs = FALSE; } - memset(&draw, 0, sizeof(struct r600_drawl)); draw.ctx = ctx; draw.mode = info->mode; @@ -245,6 +226,9 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) } r600_draw_common(&draw); + if (translate) + r600_end_vertex_translate(rctx); + pipe_resource_reference(&draw.index_buffer, NULL); } @@ -340,20 +324,6 @@ static void *r600_create_blend_state(struct pipe_context *ctx, return rstate; } -static void r600_bind_blend_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_blend *blend = (struct r600_pipe_blend *)state; - struct r600_pipe_state *rstate; - - if (state == NULL) - return; - rstate = &blend->rstate; - rctx->states[rstate->id] = rstate; - rctx->cb_target_mask = blend->cb_target_mask; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - static void *r600_create_dsa_state(struct pipe_context *ctx, const struct pipe_depth_stencil_alpha_state *state) { @@ -446,6 +416,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx, struct r600_pipe_state *rstate; unsigned tmp; unsigned prov_vtx = 1, polygon_dual_mode; + unsigned clip_rule; if (rs == NULL) { return NULL; @@ -455,6 +426,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx, rs->flatshade = state->flatshade; rs->sprite_coord_enable = state->sprite_coord_enable; + clip_rule = state->scissor ? 0xAAAA : 0xFFFF; /* offset */ rs->offset_units = state->offset_units; rs->offset_scale = state->offset_scale * 12.0f; @@ -462,7 +434,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_RASTERIZER; if (state->flatshade_first) prov_vtx = 0; - tmp = 0x00000001; + tmp = S_0286D4_FLAT_SHADE_ENA(1); if (state->sprite_coord_enable) { tmp |= S_0286D4_PNT_SPRITE_ENA(1) | S_0286D4_PNT_SPRITE_OVRD_X(2) | @@ -496,7 +468,10 @@ static void *r600_create_rs_state(struct pipe_context *ctx, tmp = (unsigned)(state->point_size * 8.0); r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, 0x00000008, 0xFFFFFFFF, NULL); + + tmp = (unsigned)(state->line_width * 8.0); + r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A0C_PA_SC_LINE_STIPPLE, 0x00000005, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MPASS_PS_CNTL, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); @@ -505,37 +480,9 @@ static void *r600_create_rs_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0x00000000, 0xFFFFFFFF, NULL); - return rstate; -} - -static void r600_bind_rs_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - if (state == NULL) - return; - - rctx->flatshade = rs->flatshade; - rctx->sprite_coord_enable = rs->sprite_coord_enable; - rctx->rasterizer = rs; + r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL); - rctx->states[rs->rstate.id] = &rs->rstate; - r600_context_pipe_state_set(&rctx->ctx, &rs->rstate); -} - -static void r600_delete_rs_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; - - if (rctx->rasterizer == rs) { - rctx->rasterizer = NULL; - } - if (rctx->states[rs->rstate.id] == &rs->rstate) { - rctx->states[rs->rstate.id] = NULL; - } - free(rs); + return rstate; } static void *r600_create_sampler_state(struct pipe_context *ctx, @@ -574,28 +521,6 @@ static void *r600_create_sampler_state(struct pipe_context *ctx, return rstate; } -static void *r600_create_vertex_elements(struct pipe_context *ctx, - unsigned count, - const struct pipe_vertex_element *elements) -{ - struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); - - assert(count < 32); - v->count = count; - v->refcount = 1; - memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); - return v; -} - -static void r600_sampler_view_destroy(struct pipe_context *ctx, - struct pipe_sampler_view *state) -{ - struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state; - - pipe_resource_reference(&state->texture, NULL); - FREE(resource); -} - static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *ctx, struct pipe_resource *texture, const struct pipe_sampler_view *state) @@ -626,15 +551,15 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c swizzle[1] = state->swizzle_g; swizzle[2] = state->swizzle_b; swizzle[3] = state->swizzle_a; - format = r600_translate_texformat(texture->format, + format = r600_translate_texformat(state->format, swizzle, &word4, &yuv_format); if (format == ~0) { format = 0; } - desc = util_format_description(texture->format); + desc = util_format_description(state->format); if (desc == NULL) { - R600_ERR("unknow format %d\n", texture->format); + R600_ERR("unknow format %d\n", state->format); } tmp = (struct r600_resource_texture*)texture; rbuffer = &tmp->resource; @@ -648,7 +573,11 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; } - pitch = align(tmp->pitch[0] / tmp->bpt, 8); + pitch = align(tmp->pitch_in_pixels[0], 8); + if (tmp->tiled) { + array_mode = tmp->array_mode[0]; + tile_type = tmp->tile_type; + } /* FIXME properly handle first level != 0 */ r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, @@ -683,32 +612,43 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, struct pipe_sampler_view **views) { - /* TODO */ - assert(1); -} - -static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, - struct pipe_sampler_view **views) -{ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; for (int i = 0; i < count; i++) { if (resource[i]) { - r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + r600_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i + PIPE_MAX_ATTRIBS); } } } -static void r600_bind_state(struct pipe_context *ctx, void *state) +static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, + struct pipe_sampler_view **views) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; + struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; + int i; - if (state == NULL) - return; - rctx->states[rstate->id] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); + for (i = 0; i < count; i++) { + if (&rctx->ps_samplers.views[i]->base != views[i]) { + if (resource[i]) + r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + else + r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + + pipe_sampler_view_reference( + (struct pipe_sampler_view **)&rctx->ps_samplers.views[i], + views[i]); + + } + } + for (i = count; i < NUM_TEX_UNITS; i++) { + if (rctx->ps_samplers.views[i]) { + r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL); + } + } + rctx->ps_samplers.n_views = count; } static void r600_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void **states) @@ -716,6 +656,9 @@ static void r600_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; + memcpy(rctx->ps_samplers.samplers, states, sizeof(void*) * count); + rctx->ps_samplers.n_samplers = count; + for (int i = 0; i < count; i++) { r600_context_pipe_state_set_ps_sampler(&rctx->ctx, rstates[i], i); } @@ -726,37 +669,11 @@ static void r600_bind_vs_sampler(struct pipe_context *ctx, unsigned count, void struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; - /* TODO implement */ for (int i = 0; i < count; i++) { r600_context_pipe_state_set_vs_sampler(&rctx->ctx, rstates[i], i); } } -static void r600_delete_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; - - if (rctx->states[rstate->id] == rstate) { - rctx->states[rstate->id] = NULL; - } - for (int i = 0; i < rstate->nregs; i++) { - r600_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL); - } - free(rstate); -} - -static void r600_delete_vertex_element(struct pipe_context *ctx, void *state) -{ - struct r600_vertex_element *v = (struct r600_vertex_element*)state; - - if (v == NULL) - return; - if (--v->refcount) - return; - free(v); -} - static void r600_set_clip_state(struct pipe_context *ctx, const struct pipe_clip_state *state) { @@ -792,19 +709,6 @@ static void r600_set_clip_state(struct pipe_context *ctx, r600_context_pipe_state_set(&rctx->ctx, rstate); } -static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_vertex_element *v = (struct r600_vertex_element*)state; - - r600_delete_vertex_element(ctx, rctx->vertex_elements); - rctx->vertex_elements = v; - if (v) { - v->refcount++; -// rctx->vs_rebuild = TRUE; - } -} - static void r600_set_polygon_stipple(struct pipe_context *ctx, const struct pipe_poly_stipple *state) { @@ -828,18 +732,6 @@ static void r600_set_scissor_state(struct pipe_context *ctx, tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1); br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); r600_pipe_state_add_reg(rstate, - R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028034_PA_SC_SCREEN_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028208_PA_SC_WINDOW_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028210_PA_SC_CLIPRECT_0_TL, tl, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, @@ -863,17 +755,6 @@ static void r600_set_scissor_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_02822C_PA_SC_CLIPRECT_3_BR, br, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_02820C_PA_SC_CLIPRECT_RULE, 0x0000FFFF, - 0xFFFFFFFF, NULL); - if (rctx->family >= CHIP_RV770) { - r600_pipe_state_add_reg(rstate, - R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, - 0xFFFFFFFF, NULL); - } free(rctx->states[R600_PIPE_STATE_SCISSOR]); rctx->states[R600_PIPE_STATE_SCISSOR] = rstate; @@ -937,6 +818,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta { struct r600_resource_texture *rtex; struct r600_resource *rbuffer; + struct r600_surface *surf; unsigned level = state->cbufs[cb]->level; unsigned pitch, slice; unsigned color_info; @@ -944,14 +826,15 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta const struct util_format_description *desc; struct r600_bo *bo[3]; + surf = (struct r600_surface *)state->cbufs[cb]; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; rbuffer = &rtex->resource; bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; bo[2] = rbuffer->bo; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1; + pitch = rtex->pitch_in_pixels[level] / 8 - 1; + slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; ntype = 0; desc = util_format_description(rtex->resource.base.b.format); if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) @@ -961,6 +844,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta swap = r600_translate_colorswap(rtex->resource.base.b.format); color_info = S_0280A0_FORMAT(format) | S_0280A0_COMP_SWAP(swap) | + S_0280A0_ARRAY_MODE(rtex->array_mode[level]) | S_0280A0_BLEND_CLAMP(1) | S_0280A0_NUMBER_TYPE(ntype); if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) @@ -996,22 +880,25 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta { struct r600_resource_texture *rtex; struct r600_resource *rbuffer; + struct r600_surface *surf; unsigned level; unsigned pitch, slice, format; if (state->zsbuf == NULL) return; + level = state->zsbuf->level; + + surf = (struct r600_surface *)state->zsbuf; rtex = (struct r600_resource_texture*)state->zsbuf->texture; rtex->tiled = 1; - rtex->array_mode = 2; + rtex->array_mode[level] = 2; rtex->tile_type = 1; rtex->depth = 1; rbuffer = &rtex->resource; - level = state->zsbuf->level; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; + pitch = rtex->pitch_in_pixels[level] / 8 - 1; + slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE, @@ -1021,10 +908,10 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028004_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028010_DB_DEPTH_INFO, - S_028010_ARRAY_MODE(rtex->array_mode) | S_028010_FORMAT(format), + S_028010_ARRAY_MODE(rtex->array_mode[level]) | S_028010_FORMAT(format), 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT, - (state->zsbuf->height / 8) - 1, 0xFFFFFFFF, NULL); + (surf->aligned_height / 8) - 1, 0xFFFFFFFF, NULL); } static void r600_set_framebuffer_state(struct pipe_context *ctx, @@ -1039,14 +926,9 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, /* unreference old buffer and reference new one */ rstate->id = R600_PIPE_STATE_FRAMEBUFFER; - for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { - pipe_surface_reference(&rctx->framebuffer.cbufs[i], NULL); - } - for (int i = 0; i < state->nr_cbufs; i++) { - pipe_surface_reference(&rctx->framebuffer.cbufs[i], state->cbufs[i]); - } - pipe_surface_reference(&rctx->framebuffer.zsbuf, state->zsbuf); - rctx->framebuffer = *state; + + util_copy_framebuffer_state(&rctx->framebuffer, state); + rctx->pframebuffer = &rctx->framebuffer; /* build states */ @@ -1070,6 +952,18 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, br = S_028244_BR_X(state->width) | S_028244_BR_Y(state->height); r600_pipe_state_add_reg(rstate, + R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028034_PA_SC_SCREEN_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028208_PA_SC_WINDOW_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028240_PA_SC_GENERIC_SCISSOR_TL, tl, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, @@ -1081,6 +975,14 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028254_PA_SC_VPORT_SCISSOR_0_BR, br, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, + 0xFFFFFFFF, NULL); + if (rctx->family >= CHIP_RV770) { + r600_pipe_state_add_reg(rstate, + R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, + 0xFFFFFFFF, NULL); + } r600_pipe_state_add_reg(rstate, R_0287A0_CB_SHADER_CONTROL, shader_control, 0xFFFFFFFF, NULL); @@ -1110,40 +1012,6 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, r600_context_pipe_state_set(&rctx->ctx, rstate); } -static void r600_set_index_buffer(struct pipe_context *ctx, - const struct pipe_index_buffer *ib) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - if (ib) { - pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer); - memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer)); - } else { - pipe_resource_reference(&rctx->index_buffer.buffer, NULL); - memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer)); - } - - /* TODO make this more like a state */ -} - -static void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, - const struct pipe_vertex_buffer *buffers) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - for (int i = 0; i < rctx->nvertex_buffer; i++) { - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); - } - memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); - for (int i = 0; i < count; i++) { - rctx->vertex_buffer[i].buffer = NULL; - if (r600_buffer_is_user_buffer(buffers[i].buffer)) - rctx->any_user_vbs = TRUE; - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); - } - rctx->nvertex_buffer = count; -} - static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, struct pipe_resource *buffer) { @@ -1179,59 +1047,6 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint } } -static void *r600_create_shader_state(struct pipe_context *ctx, - const struct pipe_shader_state *state) -{ - struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader); - int r; - - r = r600_pipe_shader_create(ctx, shader, state->tokens); - if (r) { - return NULL; - } - return shader; -} - -static void r600_bind_ps_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - /* TODO delete old shader */ - rctx->ps_shader = (struct r600_pipe_shader *)state; -} - -static void r600_bind_vs_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - /* TODO delete old shader */ - rctx->vs_shader = (struct r600_pipe_shader *)state; -} - -static void r600_delete_ps_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; - - if (rctx->ps_shader == shader) { - rctx->ps_shader = NULL; - } - /* TODO proper delete */ - free(shader); -} - -static void r600_delete_vs_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; - - if (rctx->vs_shader == shader) { - rctx->vs_shader = NULL; - } - /* TODO proper delete */ - free(shader); -} - void r600_init_state_functions(struct r600_pipe_context *rctx) { rctx->context.create_blend_state = r600_create_blend_state; @@ -1480,14 +1295,14 @@ void r600_init_config(struct r600_pipe_context *rctx) r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514002, 0xFFFFFFFF, NULL); } else { r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, 0x07000003, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00004010, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00004012, 0xFFFFFFFF, NULL); } r600_pipe_state_add_reg(rstate, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c new file mode 100644 index 00000000000..210420e823b --- /dev/null +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -0,0 +1,276 @@ +/* + * Copyright 2010 Red Hat Inc. + * 2010 Jerome Glisse + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie <[email protected]> + * Jerome Glisse <[email protected]> + */ +#include <util/u_memory.h> +#include <util/u_format.h> +#include <pipebuffer/pb_buffer.h> +#include "r600_pipe.h" + +/* common state between evergreen and r600 */ +void r600_bind_blend_state(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_blend *blend = (struct r600_pipe_blend *)state; + struct r600_pipe_state *rstate; + + if (state == NULL) + return; + rstate = &blend->rstate; + rctx->states[rstate->id] = rstate; + rctx->cb_target_mask = blend->cb_target_mask; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +void r600_bind_rs_state(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + if (state == NULL) + return; + + rctx->flatshade = rs->flatshade; + rctx->sprite_coord_enable = rs->sprite_coord_enable; + rctx->rasterizer = rs; + + rctx->states[rs->rstate.id] = &rs->rstate; + r600_context_pipe_state_set(&rctx->ctx, &rs->rstate); +} + +void r600_delete_rs_state(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; + + if (rctx->rasterizer == rs) { + rctx->rasterizer = NULL; + } + if (rctx->states[rs->rstate.id] == &rs->rstate) { + rctx->states[rs->rstate.id] = NULL; + } + free(rs); +} + +void r600_sampler_view_destroy(struct pipe_context *ctx, + struct pipe_sampler_view *state) +{ + struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state; + + pipe_resource_reference(&state->texture, NULL); + FREE(resource); +} + +void r600_bind_state(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; + + if (state == NULL) + return; + rctx->states[rstate->id] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +void r600_delete_state(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; + + if (rctx->states[rstate->id] == rstate) { + rctx->states[rstate->id] = NULL; + } + for (int i = 0; i < rstate->nregs; i++) { + r600_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL); + } + free(rstate); +} + +void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_vertex_element *v = (struct r600_vertex_element*)state; + + rctx->vertex_elements = v; + if (v) { +// rctx->vs_rebuild = TRUE; + } +} + +void r600_delete_vertex_element(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + FREE(state); + + if (rctx->vertex_elements == state) + rctx->vertex_elements = NULL; +} + + +void r600_set_index_buffer(struct pipe_context *ctx, + const struct pipe_index_buffer *ib) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + if (ib) { + pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer); + memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer)); + } else { + pipe_resource_reference(&rctx->index_buffer.buffer, NULL); + memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer)); + } + + /* TODO make this more like a state */ +} + +void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct pipe_vertex_buffer *vbo; + unsigned max_index = (unsigned)-1; + + for (int i = 0; i < rctx->nvertex_buffer; i++) { + pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); + } + memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); + + for (int i = 0; i < count; i++) { + vbo = (struct pipe_vertex_buffer*)&buffers[i]; + + rctx->vertex_buffer[i].buffer = NULL; + if (r600_buffer_is_user_buffer(buffers[i].buffer)) + rctx->any_user_vbs = TRUE; + pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); + + if (vbo->max_index == ~0) { + if (!vbo->stride) + vbo->max_index = 1; + else + vbo->max_index = (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; + } + max_index = MIN2(vbo->max_index, max_index); + } + rctx->nvertex_buffer = count; + rctx->vb_max_index = max_index; +} + + +#define FORMAT_REPLACE(what, withwhat) \ + case PIPE_FORMAT_##what: *format = PIPE_FORMAT_##withwhat; break + +void *r600_create_vertex_elements(struct pipe_context *ctx, + unsigned count, + const struct pipe_vertex_element *elements) +{ + struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); + int i; + enum pipe_format *format; + + assert(count < 32); + if (!v) + return NULL; + + v->count = count; + memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); + + for (i = 0; i < count; i++) { + v->hw_format[i] = v->elements[i].src_format; + format = &v->hw_format[i]; + + switch (*format) { + FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); + FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); + FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT); + FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT); + default:; + } + v->incompatible_layout = + v->incompatible_layout || + v->elements[i].src_format != v->hw_format[i] || + v->elements[i].src_offset % 4 != 0; + + v->hw_format_size[i] = + align(util_format_get_blocksize(v->hw_format[i]), 4); + } + + return v; +} + +void *r600_create_shader_state(struct pipe_context *ctx, + const struct pipe_shader_state *state) +{ + struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader); + int r; + + r = r600_pipe_shader_create(ctx, shader, state->tokens); + if (r) { + return NULL; + } + return shader; +} + +void r600_bind_ps_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + /* TODO delete old shader */ + rctx->ps_shader = (struct r600_pipe_shader *)state; +} + +void r600_bind_vs_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + /* TODO delete old shader */ + rctx->vs_shader = (struct r600_pipe_shader *)state; +} + +void r600_delete_ps_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; + + if (rctx->ps_shader == shader) { + rctx->ps_shader = NULL; + } + + r600_pipe_shader_destroy(ctx, shader); + free(shader); +} + +void r600_delete_vs_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; + + if (rctx->vs_shader == shader) { + rctx->vs_shader = NULL; + } + + r600_pipe_shader_destroy(ctx, shader); + free(shader); +} diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 81f285027e6..1c1978f8abb 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -25,6 +25,7 @@ #include "util/u_format.h" #include "r600d.h" +#include "r600_formats.h" static INLINE uint32_t r600_translate_blend_function(int blend_func) { @@ -303,6 +304,10 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) return V_0280A0_SWAP_STD; case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_R16_UNORM: return V_0280A0_SWAP_STD; /* 32-bit buffers. */ @@ -342,16 +347,19 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R10SG10SB10SA2U_NORM: return V_0280A0_SWAP_STD_REV; + case PIPE_FORMAT_R16G16_UNORM: + return V_0280A0_SWAP_STD; + /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: - // return V_0280A0_COLOR_16_16_16_16; + // return FMT_16_16_16_16; case PIPE_FORMAT_R16G16B16A16_FLOAT: - // return V_0280A0_COLOR_16_16_16_16_FLOAT; + // return FMT_16_16_16_16_FLOAT; /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: - // return V_0280A0_COLOR_32_32_32_32_FLOAT; + // return FMT_32_32_32_32_FLOAT; return 0; default: R600_ERR("unsupported colorswap format %d\n", format); @@ -387,8 +395,12 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_0280A0_COLOR_16; case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: return V_0280A0_COLOR_8_8; + case PIPE_FORMAT_R16_UNORM: + return V_0280A0_COLOR_16; + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: case PIPE_FORMAT_A8B8G8R8_UNORM: @@ -426,6 +438,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_0280A0_COLOR_16_16_FLOAT; case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16_UNORM: return V_0280A0_COLOR_16_16; @@ -510,32 +523,32 @@ static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format) case 16: switch (desc->nr_channels) { case 1: - result = V_038008_FMT_16_FLOAT; + result = FMT_16_FLOAT; break; case 2: - result = V_038008_FMT_16_16_FLOAT; + result = FMT_16_16_FLOAT; break; case 3: - result = V_038008_FMT_16_16_16_FLOAT; + result = FMT_16_16_16_FLOAT; break; case 4: - result = V_038008_FMT_16_16_16_16_FLOAT; + result = FMT_16_16_16_16_FLOAT; break; } break; case 32: switch (desc->nr_channels) { case 1: - result = V_038008_FMT_32_FLOAT; + result = FMT_32_FLOAT; break; case 2: - result = V_038008_FMT_32_32_FLOAT; + result = FMT_32_32_FLOAT; break; case 3: - result = V_038008_FMT_32_32_32_FLOAT; + result = FMT_32_32_32_FLOAT; break; case 4: - result = V_038008_FMT_32_32_32_32_FLOAT; + result = FMT_32_32_32_32_FLOAT; break; } break; @@ -551,48 +564,48 @@ static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format) case 8: switch (desc->nr_channels) { case 1: - result = V_038008_FMT_8; + result = FMT_8; break; case 2: - result = V_038008_FMT_8_8; + result = FMT_8_8; break; case 3: - // result = V_038008_FMT_8_8_8; /* fails piglit draw-vertices test */ + // result = FMT_8_8_8; /* fails piglit draw-vertices test */ // break; case 4: - result = V_038008_FMT_8_8_8_8; + result = FMT_8_8_8_8; break; } break; case 16: switch (desc->nr_channels) { case 1: - result = V_038008_FMT_16; + result = FMT_16; break; case 2: - result = V_038008_FMT_16_16; + result = FMT_16_16; break; case 3: - // result = V_038008_FMT_16_16_16; /* fails piglit draw-vertices test */ + // result = FMT_16_16_16; /* fails piglit draw-vertices test */ // break; case 4: - result = V_038008_FMT_16_16_16_16; + result = FMT_16_16_16_16; break; } break; case 32: switch (desc->nr_channels) { case 1: - result = V_038008_FMT_32; + result = FMT_32; break; case 2: - result = V_038008_FMT_32_32; + result = FMT_32_32; break; case 3: - result = V_038008_FMT_32_32_32; + result = FMT_32_32_32; break; case 4: - result = V_038008_FMT_32_32_32_32; + result = FMT_32_32_32_32; break; } break; diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index c46bfa66ba1..4ebd5b754b3 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -35,6 +35,7 @@ #include "r600_resource.h" #include "r600_state_inlines.h" #include "r600d.h" +#include "r600_formats.h" extern struct u_resource_vtbl r600_texture_vtbl; @@ -53,11 +54,30 @@ static void r600_copy_from_tiled_texture(struct pipe_context *ctx, struct r600_t transfer->box.width, transfer->box.height); } -static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex, + +/* Copy from a detiled texture to a tiled one. */ +static void r600_copy_into_tiled_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer) +{ + struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer; + struct pipe_resource *texture = transfer->resource; + struct pipe_subresource subsrc; + + subsrc.face = 0; + subsrc.level = 0; + ctx->resource_copy_region(ctx, texture, transfer->sr, + transfer->box.x, transfer->box.y, transfer->box.z, + rtransfer->linear_texture, subsrc, + 0, 0, 0, + transfer->box.width, transfer->box.height); + + ctx->flush(ctx, 0, NULL); +} + +static unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, unsigned level, unsigned zslice, unsigned face) { - unsigned long offset = rtex->offset[level]; + unsigned offset = rtex->offset[level]; switch (rtex->resource.base.b.target) { case PIPE_TEXTURE_3D: @@ -72,22 +92,158 @@ static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex, } } -static void r600_setup_miptree(struct r600_resource_texture *rtex, enum chip_class chipc) +static unsigned r600_get_pixel_alignment(struct pipe_screen *screen, + enum pipe_format format, + unsigned array_mode) +{ + struct r600_screen* rscreen = (struct r600_screen *)screen; + unsigned pixsize = util_format_get_blocksize(format); + int p_align; + + switch(array_mode) { + case V_038000_ARRAY_1D_TILED_THIN1: + p_align = MAX2(8, + ((rscreen->tiling_info->group_bytes / 8 / pixsize))); + break; + case V_038000_ARRAY_2D_TILED_THIN1: + p_align = MAX2(rscreen->tiling_info->num_banks, + (((rscreen->tiling_info->group_bytes / 8 / pixsize)) * + rscreen->tiling_info->num_banks)); + break; + case 0: + default: + p_align = 64; + break; + } + return p_align; +} + +static unsigned r600_get_height_alignment(struct pipe_screen *screen, + unsigned array_mode) +{ + struct r600_screen* rscreen = (struct r600_screen *)screen; + int h_align; + + switch (array_mode) { + case V_038000_ARRAY_2D_TILED_THIN1: + h_align = rscreen->tiling_info->num_channels * 8; + break; + case V_038000_ARRAY_1D_TILED_THIN1: + h_align = 8; + break; + default: + h_align = 1; + break; + } + return h_align; +} + +static unsigned mip_minify(unsigned size, unsigned level) +{ + unsigned val; + val = u_minify(size, level); + if (level > 0) + val = util_next_power_of_two(val); + return val; +} + +static unsigned r600_texture_get_stride(struct pipe_screen *screen, + struct r600_resource_texture *rtex, + unsigned level) +{ + struct pipe_resource *ptex = &rtex->resource.base.b; + struct radeon *radeon = (struct radeon *)screen->winsys; + enum chip_class chipc = r600_get_family_class(radeon); + unsigned width, stride, tile_width; + + if (rtex->pitch_override) + return rtex->pitch_override; + + width = mip_minify(ptex->width0, level); + if (util_format_is_plain(ptex->format)) { + tile_width = r600_get_pixel_alignment(screen, ptex->format, + rtex->array_mode[level]); + width = align(width, tile_width); + } + stride = util_format_get_stride(ptex->format, width); + if (chipc == EVERGREEN) + stride = align(stride, 512); + return stride; +} + +static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen, + struct r600_resource_texture *rtex, + unsigned level) { struct pipe_resource *ptex = &rtex->resource.base.b; - unsigned long w, h, pitch, size, layer_size, i, offset; + unsigned height, tile_height; - rtex->bpt = util_format_get_blocksize(ptex->format); - for (i = 0, offset = 0; i <= ptex->last_level; i++) { - w = u_minify(ptex->width0, i); - h = u_minify(ptex->height0, i); - h = util_next_power_of_two(h); - pitch = util_format_get_stride(ptex->format, align(w, 64)); - if (chipc == EVERGREEN) - pitch = align(pitch, 512); + height = mip_minify(ptex->height0, level); + if (util_format_is_plain(ptex->format)) { + tile_height = r600_get_height_alignment(screen, + rtex->array_mode[level]); + height = align(height, tile_height); + } + return util_format_get_nblocksy(ptex->format, height); +} + +/* Get a width in pixels from a stride in bytes. */ +static unsigned pitch_to_width(enum pipe_format format, + unsigned pitch_in_bytes) +{ + return (pitch_in_bytes / util_format_get_blocksize(format)) * + util_format_get_blockwidth(format); +} + +static void r600_texture_set_array_mode(struct pipe_screen *screen, + struct r600_resource_texture *rtex, + unsigned level, unsigned array_mode) +{ + struct pipe_resource *ptex = &rtex->resource.base.b; + + switch (array_mode) { + case V_0280A0_ARRAY_LINEAR_GENERAL: + case V_0280A0_ARRAY_LINEAR_ALIGNED: + case V_0280A0_ARRAY_1D_TILED_THIN1: + default: + rtex->array_mode[level] = array_mode; + break; + case V_0280A0_ARRAY_2D_TILED_THIN1: + { + unsigned w, h, tile_height, tile_width; + + tile_height = r600_get_height_alignment(screen, array_mode); + tile_width = r600_get_pixel_alignment(screen, ptex->format, array_mode); + + w = mip_minify(ptex->width0, level); + h = mip_minify(ptex->height0, level); + if (w < tile_width || h < tile_height) + rtex->array_mode[level] = V_0280A0_ARRAY_1D_TILED_THIN1; else - pitch = align(pitch, 256); - layer_size = pitch * h; + rtex->array_mode[level] = array_mode; + } + break; + } +} + +static void r600_setup_miptree(struct pipe_screen *screen, + struct r600_resource_texture *rtex, + unsigned array_mode) +{ + struct pipe_resource *ptex = &rtex->resource.base.b; + struct radeon *radeon = (struct radeon *)screen->winsys; + enum chip_class chipc = r600_get_family_class(radeon); + unsigned pitch, size, layer_size, i, offset; + unsigned nblocksy; + + for (i = 0, offset = 0; i <= ptex->last_level; i++) { + r600_texture_set_array_mode(screen, rtex, i, array_mode); + + pitch = r600_texture_get_stride(screen, rtex, i); + nblocksy = r600_texture_get_nblocksy(screen, rtex, i); + + layer_size = pitch * nblocksy; + if (ptex->target == PIPE_TEXTURE_CUBE) { if (chipc >= R700) size = layer_size * 8; @@ -98,41 +254,69 @@ static void r600_setup_miptree(struct r600_resource_texture *rtex, enum chip_cla size = layer_size * u_minify(ptex->depth0, i); rtex->offset[i] = offset; rtex->layer_size[i] = layer_size; - rtex->pitch[i] = pitch; - rtex->width[i] = w; - rtex->height[i] = h; + rtex->pitch_in_bytes[i] = pitch; + rtex->pitch_in_pixels[i] = pitch_to_width(ptex->format, pitch); offset += size; } rtex->size = offset; } -struct pipe_resource *r600_texture_create(struct pipe_screen *screen, - const struct pipe_resource *templ) +static struct r600_resource_texture * +r600_texture_create_object(struct pipe_screen *screen, + const struct pipe_resource *base, + unsigned array_mode, + unsigned pitch_in_bytes_override, + unsigned max_buffer_size, + struct r600_bo *bo) { struct r600_resource_texture *rtex; struct r600_resource *resource; struct radeon *radeon = (struct radeon *)screen->winsys; rtex = CALLOC_STRUCT(r600_resource_texture); - if (!rtex) { + if (rtex == NULL) return NULL; - } + resource = &rtex->resource; - resource->base.b = *templ; + resource->base.b = *base; resource->base.vtbl = &r600_texture_vtbl; pipe_reference_init(&resource->base.b.reference, 1); resource->base.b.screen = screen; - r600_setup_miptree(rtex, r600_get_family_class(radeon)); - - /* FIXME alignment 4096 enought ? too much ? */ + resource->bo = bo; resource->domain = r600_domain_from_usage(resource->base.b.bind); + rtex->pitch_override = pitch_in_bytes_override; + + if (array_mode) + rtex->tiled = 1; + r600_setup_miptree(screen, rtex, array_mode); + resource->size = rtex->size; - resource->bo = r600_bo(radeon, rtex->size, 4096, 0); - if (resource->bo == NULL) { - FREE(rtex); - return NULL; + + if (!resource->bo) { + resource->bo = r600_bo(radeon, rtex->size, 4096, 0); + if (!resource->bo) { + FREE(rtex); + return NULL; + } + } + return rtex; +} + +struct pipe_resource *r600_texture_create(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + unsigned array_mode = 0; + + if (debug_get_bool_option("R600_FORCE_TILING", FALSE)) { + if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) && + !(templ->bind & PIPE_BIND_SCANOUT)) { + array_mode = V_038000_ARRAY_2D_TILED_THIN1; + } } - return &resource->base.b; + + return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode, + 0, 0, NULL); + } static void r600_texture_destroy(struct pipe_screen *screen, @@ -157,24 +341,27 @@ static struct pipe_surface *r600_get_tex_surface(struct pipe_screen *screen, unsigned zslice, unsigned flags) { struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; - struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface); - unsigned long offset; + struct r600_surface *surface = CALLOC_STRUCT(r600_surface); + unsigned offset, tile_height; if (surface == NULL) return NULL; offset = r600_texture_get_offset(rtex, level, zslice, face); - pipe_reference_init(&surface->reference, 1); - pipe_resource_reference(&surface->texture, texture); - surface->format = texture->format; - surface->width = u_minify(texture->width0, level); - surface->height = u_minify(texture->height0, level); - surface->offset = offset; - surface->usage = flags; - surface->zslice = zslice; - surface->texture = texture; - surface->face = face; - surface->level = level; - return surface; + pipe_reference_init(&surface->base.reference, 1); + pipe_resource_reference(&surface->base.texture, texture); + surface->base.format = texture->format; + surface->base.width = mip_minify(texture->width0, level); + surface->base.height = mip_minify(texture->height0, level); + surface->base.offset = offset; + surface->base.usage = flags; + surface->base.zslice = zslice; + surface->base.texture = texture; + surface->base.face = face; + surface->base.level = level; + + tile_height = r600_get_height_alignment(screen, rtex->array_mode[level]); + surface->aligned_height = align(surface->base.height, tile_height); + return &surface->base; } static void r600_tex_surface_destroy(struct pipe_surface *surface) @@ -183,46 +370,29 @@ static void r600_tex_surface_destroy(struct pipe_surface *surface) FREE(surface); } + struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, const struct pipe_resource *templ, struct winsys_handle *whandle) { struct radeon *rw = (struct radeon*)screen->winsys; - struct r600_resource_texture *rtex; - struct r600_resource *resource; struct r600_bo *bo = NULL; + unsigned array_mode = 0; /* Support only 2D textures without mipmaps */ if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) || templ->depth0 != 1 || templ->last_level != 0) return NULL; - rtex = CALLOC_STRUCT(r600_resource_texture); - if (rtex == NULL) - return NULL; - - bo = r600_bo_handle(rw, whandle->handle); + bo = r600_bo_handle(rw, whandle->handle, &array_mode); if (bo == NULL) { - FREE(rtex); return NULL; } - resource = &rtex->resource; - resource->base.b = *templ; - resource->base.vtbl = &r600_texture_vtbl; - pipe_reference_init(&resource->base.b.reference, 1); - resource->base.b.screen = screen; - resource->bo = bo; - rtex->depth = 0; - rtex->pitch_override = whandle->stride; - rtex->bpt = util_format_get_blocksize(templ->format); - rtex->pitch[0] = whandle->stride; - rtex->width[0] = templ->width0; - rtex->height[0] = templ->height0; - rtex->offset[0] = 0; - rtex->size = align(rtex->pitch[0] * templ->height0, 64); - - return &resource->base.b; + return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode, + whandle->stride, + 0, + bo); } static unsigned int r600_texture_is_referenced(struct pipe_context *context, @@ -248,14 +418,14 @@ int r600_texture_depth_flush(struct pipe_context *ctx, resource.format = texture->format; resource.width0 = texture->width0; resource.height0 = texture->height0; - resource.depth0 = 0; + resource.depth0 = 1; resource.last_level = 0; resource.nr_samples = 0; resource.usage = PIPE_USAGE_DYNAMIC; resource.bind = 0; - resource.flags = 0; + resource.flags = R600_RESOURCE_FLAG_TRANSFER; - resource.bind |= PIPE_BIND_RENDER_TARGET; + resource.bind |= PIPE_BIND_DEPTH_STENCIL; rtex->flushed_depth_texture = (struct r600_resource_texture *)ctx->screen->resource_create(ctx->screen, &resource); if (rtex->flushed_depth_texture == NULL) { @@ -286,8 +456,6 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, trans->transfer.sr = sr; trans->transfer.usage = usage; trans->transfer.box = *box; - trans->transfer.stride = rtex->pitch[sr.level]; - trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face); if (rtex->depth) { r = r600_texture_depth_flush(ctx, texture); if (r < 0) { @@ -301,12 +469,12 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, resource.format = texture->format; resource.width0 = box->width; resource.height0 = box->height; - resource.depth0 = 0; + resource.depth0 = 1; resource.last_level = 0; resource.nr_samples = 0; resource.usage = PIPE_USAGE_DYNAMIC; resource.bind = 0; - resource.flags = 0; + resource.flags = R600_RESOURCE_FLAG_TRANSFER; /* For texture reading, the temporary (detiled) texture is used as * a render target when blitting from a tiled texture. */ if (usage & PIPE_TRANSFER_READ) { @@ -325,6 +493,9 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, FREE(trans); return NULL; } + + trans->transfer.stride = + ((struct r600_resource_texture *)trans->linear_texture)->pitch_in_bytes[0]; if (usage & PIPE_TRANSFER_READ) { /* We cannot map a tiled texture directly because the data is * in a different order, therefore we do detiling using a blit. */ @@ -332,7 +503,10 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, /* Always referenced in the blit. */ ctx->flush(ctx, 0, NULL); } + return &trans->transfer; } + trans->transfer.stride = rtex->pitch_in_bytes[sr.level]; + trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face); return &trans->transfer; } @@ -343,12 +517,12 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx, struct r600_resource_texture *rtex = (struct r600_resource_texture*)transfer->resource; if (rtransfer->linear_texture) { + if (transfer->usage & PIPE_TRANSFER_WRITE) { + r600_copy_into_tiled_texture(ctx, rtransfer); + } pipe_resource_reference(&rtransfer->linear_texture, NULL); } if (rtex->flushed_depth_texture) { - if (transfer->usage & PIPE_TRANSFER_WRITE) { - // TODO - } pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); } pipe_resource_reference(&transfer->resource, NULL); @@ -362,7 +536,7 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, struct r600_bo *bo; enum pipe_format format = transfer->resource->format; struct radeon *radeon = (struct radeon *)ctx->screen->winsys; - unsigned long offset = 0; + unsigned offset = 0; char *map; if (rtransfer->linear_texture) { @@ -500,15 +674,23 @@ uint32_t r600_translate_texformat(enum pipe_format format, case UTIL_FORMAT_COLORSPACE_ZS: switch (format) { case PIPE_FORMAT_Z16_UNORM: - result = V_0280A0_COLOR_16; + result = FMT_16; goto out_word4; + case PIPE_FORMAT_X24S8_USCALED: + word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - result = V_0280A0_COLOR_8_24; + result = FMT_8_24; goto out_word4; + case PIPE_FORMAT_S8X24_USCALED: + word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - result = V_0280A0_COLOR_24_8; + result = FMT_24_8; + goto out_word4; + case PIPE_FORMAT_S8_USCALED: + result = V_0280A0_COLOR_8; + word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); goto out_word4; default: goto out_unknown; @@ -562,7 +744,7 @@ uint32_t r600_translate_texformat(enum pipe_format format, if (desc->channel[0].size == 5 && desc->channel[1].size == 6 && desc->channel[2].size == 5) { - result = V_0280A0_COLOR_5_6_5; + result = FMT_5_6_5; goto out_word4; } goto out_unknown; @@ -571,14 +753,14 @@ uint32_t r600_translate_texformat(enum pipe_format format, desc->channel[1].size == 5 && desc->channel[2].size == 5 && desc->channel[3].size == 1) { - result = V_0280A0_COLOR_1_5_5_5; + result = FMT_1_5_5_5; goto out_word4; } if (desc->channel[0].size == 10 && desc->channel[1].size == 10 && desc->channel[2].size == 10 && desc->channel[3].size == 2) { - result = V_0280A0_COLOR_10_10_10_2; + result = FMT_10_10_10_2; goto out_word4; } goto out_unknown; @@ -609,36 +791,36 @@ uint32_t r600_translate_texformat(enum pipe_format format, case 4: switch (desc->nr_channels) { case 2: - result = V_0280A0_COLOR_4_4; + result = FMT_4_4; goto out_word4; case 4: - result = V_0280A0_COLOR_4_4_4_4; + result = FMT_4_4_4_4; goto out_word4; } goto out_unknown; case 8: switch (desc->nr_channels) { case 1: - result = V_0280A0_COLOR_8; + result = FMT_8; goto out_word4; case 2: - result = V_0280A0_COLOR_8_8; + result = FMT_8_8; goto out_word4; case 4: - result = V_0280A0_COLOR_8_8_8_8; + result = FMT_8_8_8_8; goto out_word4; } goto out_unknown; case 16: switch (desc->nr_channels) { case 1: - result = V_0280A0_COLOR_16; + result = FMT_16; goto out_word4; case 2: - result = V_0280A0_COLOR_16_16; + result = FMT_16_16; goto out_word4; case 4: - result = V_0280A0_COLOR_16_16_16_16; + result = FMT_16_16_16_16; goto out_word4; } } @@ -649,26 +831,26 @@ uint32_t r600_translate_texformat(enum pipe_format format, case 16: switch (desc->nr_channels) { case 1: - result = V_0280A0_COLOR_16_FLOAT; + result = FMT_16_FLOAT; goto out_word4; case 2: - result = V_0280A0_COLOR_16_16_FLOAT; + result = FMT_16_16_FLOAT; goto out_word4; case 4: - result = V_0280A0_COLOR_16_16_16_16_FLOAT; + result = FMT_16_16_16_16_FLOAT; goto out_word4; } goto out_unknown; case 32: switch (desc->nr_channels) { case 1: - result = V_0280A0_COLOR_32_FLOAT; + result = FMT_32_FLOAT; goto out_word4; case 2: - result = V_0280A0_COLOR_32_32_FLOAT; + result = FMT_32_32_FLOAT; goto out_word4; case 4: - result = V_0280A0_COLOR_32_32_32_32_FLOAT; + result = FMT_32_32_32_32_FLOAT; goto out_word4; } } diff --git a/src/gallium/drivers/r600/r600_translate.c b/src/gallium/drivers/r600/r600_translate.c new file mode 100644 index 00000000000..9a07cf2073f --- /dev/null +++ b/src/gallium/drivers/r600/r600_translate.c @@ -0,0 +1,211 @@ +/* + * Copyright 2010 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie <[email protected]> + */ +#include "translate/translate_cache.h" +#include "translate/translate.h" +#include <pipebuffer/pb_buffer.h> +#include <util/u_index_modify.h> +#include "r600_pipe.h" + +void r600_begin_vertex_translate(struct r600_pipe_context *rctx) +{ + struct pipe_context *pipe = &rctx->context; + struct translate_key key = {0}; + struct translate_element *te; + unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0}; + struct translate *tr; + struct r600_vertex_element *ve = rctx->vertex_elements; + boolean vb_translated[PIPE_MAX_ATTRIBS] = {0}; + void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map; + struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer; + struct pipe_resource *out_buffer; + unsigned i, num_verts; + + /* Initialize the translate key, i.e. the recipe how vertices should be + * translated. */ + for (i = 0; i < ve->count; i++) { + struct pipe_vertex_buffer *vb = + &rctx->vertex_buffer[ve->elements[i].vertex_buffer_index]; + enum pipe_format output_format = ve->hw_format[i]; + unsigned output_format_size = ve->hw_format_size[i]; + + /* Check for support. */ + if (ve->elements[i].src_format == ve->hw_format[i] && + (vb->buffer_offset + ve->elements[i].src_offset) % 4 == 0 && + vb->stride % 4 == 0) { + continue; + } + + /* Workaround for translate: output floats instead of halfs. */ + switch (output_format) { + case PIPE_FORMAT_R16_FLOAT: + output_format = PIPE_FORMAT_R32_FLOAT; + output_format_size = 4; + break; + case PIPE_FORMAT_R16G16_FLOAT: + output_format = PIPE_FORMAT_R32G32_FLOAT; + output_format_size = 8; + break; + case PIPE_FORMAT_R16G16B16_FLOAT: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + output_format_size = 12; + break; + case PIPE_FORMAT_R16G16B16A16_FLOAT: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + output_format_size = 16; + break; + default:; + } + + /* Add this vertex element. */ + te = &key.element[key.nr_elements]; + /*te->type; + te->instance_divisor;*/ + te->input_buffer = ve->elements[i].vertex_buffer_index; + te->input_format = ve->elements[i].src_format; + te->input_offset = vb->buffer_offset + ve->elements[i].src_offset; + te->output_format = output_format; + te->output_offset = key.output_stride; + + key.output_stride += output_format_size; + vb_translated[ve->elements[i].vertex_buffer_index] = TRUE; + tr_elem_index[i] = key.nr_elements; + key.nr_elements++; + } + + /* Get a translate object. */ + tr = translate_cache_find(rctx->tran.translate_cache, &key); + + /* Map buffers we want to translate. */ + for (i = 0; i < rctx->nvertex_buffer; i++) { + if (vb_translated[i]) { + struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; + + vb_map[i] = pipe_buffer_map(pipe, vb->buffer, + PIPE_TRANSFER_READ, &vb_transfer[i]); + + tr->set_buffer(tr, i, vb_map[i], vb->stride, vb->max_index); + } + } + + /* Create and map the output buffer. */ + num_verts = rctx->vb_max_index + 1; + + out_buffer = pipe_buffer_create(&rctx->screen->screen, + PIPE_BIND_VERTEX_BUFFER, + key.output_stride * num_verts); + + out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE, + &out_transfer); + + /* Translate. */ + tr->run(tr, 0, num_verts, 0, out_map); + + /* Unmap all buffers. */ + for (i = 0; i < rctx->nvertex_buffer; i++) { + if (vb_translated[i]) { + pipe_buffer_unmap(pipe, rctx->vertex_buffer[i].buffer, + vb_transfer[i]); + } + } + + pipe_buffer_unmap(pipe, out_buffer, out_transfer); + + /* Setup the new vertex buffer in the first free slot. */ + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; + + if (!vb->buffer) { + pipe_resource_reference(&vb->buffer, out_buffer); + vb->buffer_offset = 0; + vb->max_index = num_verts - 1; + vb->stride = key.output_stride; + rctx->tran.vb_slot = i; + break; + } + } + + /* Save and replace vertex elements. */ + { + struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; + + rctx->tran.saved_velems = rctx->vertex_elements; + + for (i = 0; i < ve->count; i++) { + if (vb_translated[ve->elements[i].vertex_buffer_index]) { + te = &key.element[tr_elem_index[i]]; + new_velems[i].instance_divisor = ve->elements[i].instance_divisor; + new_velems[i].src_format = te->output_format; + new_velems[i].src_offset = te->output_offset; + new_velems[i].vertex_buffer_index = rctx->tran.vb_slot; + } else { + memcpy(&new_velems[i], &ve->elements[i], + sizeof(struct pipe_vertex_element)); + } + } + + rctx->tran.new_velems = + pipe->create_vertex_elements_state(pipe, ve->count, new_velems); + pipe->bind_vertex_elements_state(pipe, rctx->tran.new_velems); + } + + pipe_resource_reference(&out_buffer, NULL); +} + +void r600_end_vertex_translate(struct r600_pipe_context *rctx) +{ + struct pipe_context *pipe = &rctx->context; + + /* Restore vertex elements. */ + pipe->bind_vertex_elements_state(pipe, rctx->tran.saved_velems); + pipe->delete_vertex_elements_state(pipe, rctx->tran.new_velems); + + /* Delete the now-unused VBO. */ + pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer, + NULL); +} + +void r600_translate_index_buffer(struct r600_pipe_context *r600, + struct pipe_resource **index_buffer, + unsigned *index_size, + unsigned *start, unsigned count) +{ + switch (*index_size) { + case 1: + util_shorten_ubyte_elts(&r600->context, index_buffer, 0, *start, count); + *index_size = 2; + *start = 0; + break; + + case 2: + if (*start % 2 != 0) { + util_rebuild_ushort_elts(&r600->context, index_buffer, 0, *start, count); + *start = 0; + } + break; + + case 4: + break; + } +} diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index a96d2ce26c1..a3cb5b86004 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -667,6 +667,9 @@ #define S_02880C_Z_EXPORT_ENABLE(x) (((x) & 0x1) << 0) #define G_02880C_Z_EXPORT_ENABLE(x) (((x) >> 0) & 0x1) #define C_02880C_Z_EXPORT_ENABLE 0xFFFFFFFE +#define S_02880C_STENCIL_REF_EXPORT_ENABLE(x) (((x) & 0x1) << 1) +#define G_02880C_STENCIL_REF_EXPORT_ENABLE(x) (((x) >> 1) & 0x1) +#define C_02880C_STENCIL_REF_EXPORT_ENABLE 0xFFFFFFFD #define S_02880C_Z_ORDER(x) (((x) & 0x3) << 4) #define G_02880C_Z_ORDER(x) (((x) >> 4) & 0x3) #define C_02880C_Z_ORDER 0xFFFFFCFF @@ -901,6 +904,10 @@ #define S_038000_TILE_MODE(x) (((x) & 0xF) << 3) #define G_038000_TILE_MODE(x) (((x) >> 3) & 0xF) #define C_038000_TILE_MODE 0xFFFFFF87 +#define V_038000_ARRAY_LINEAR_GENERAL 0x00000000 +#define V_038000_ARRAY_LINEAR_ALIGNED 0x00000001 +#define V_038000_ARRAY_1D_TILED_THIN1 0x00000002 +#define V_038000_ARRAY_2D_TILED_THIN1 0x00000004 #define S_038000_TILE_TYPE(x) (((x) & 0x1) << 7) #define G_038000_TILE_TYPE(x) (((x) >> 7) & 0x1) #define C_038000_TILE_TYPE 0xFFFFFF7F @@ -1025,45 +1032,7 @@ #define S_038008_DATA_FORMAT(x) (((x) & 0x3F) << 20) #define G_038008_DATA_FORMAT(x) (((x) >> 20) & 0x3F) #define C_038008_DATA_FORMAT 0xFC0FFFFF -#define V_038008_FMT_INVALID 0x00000000 -#define V_038008_FMT_8 0x00000001 -#define V_038008_FMT_4_4 0x00000002 -#define V_038008_FMT_3_3_2 0x00000003 -#define V_038008_FMT_16 0x00000005 -#define V_038008_FMT_16_FLOAT 0x00000006 -#define V_038008_FMT_8_8 0x00000007 -#define V_038008_FMT_5_6_5 0x00000008 -#define V_038008_FMT_6_5_5 0x00000009 -#define V_038008_FMT_1_5_5_5 0x0000000A -#define V_038008_FMT_4_4_4_4 0x0000000B -#define V_038008_FMT_5_5_5_1 0x0000000C -#define V_038008_FMT_32 0x0000000D -#define V_038008_FMT_32_FLOAT 0x0000000E -#define V_038008_FMT_16_16 0x0000000F -#define V_038008_FMT_16_16_FLOAT 0x00000010 -#define V_038008_FMT_8_24 0x00000011 -#define V_038008_FMT_8_24_FLOAT 0x00000012 -#define V_038008_FMT_24_8 0x00000013 -#define V_038008_FMT_24_8_FLOAT 0x00000014 -#define V_038008_FMT_10_11_11 0x00000015 -#define V_038008_FMT_10_11_11_FLOAT 0x00000016 -#define V_038008_FMT_11_11_10 0x00000017 -#define V_038008_FMT_11_11_10_FLOAT 0x00000018 -#define V_038008_FMT_2_10_10_10 0x00000019 -#define V_038008_FMT_8_8_8_8 0x0000001A -#define V_038008_FMT_10_10_10_2 0x0000001B -#define V_038008_FMT_X24_8_32_FLOAT 0x0000001C -#define V_038008_FMT_32_32 0x0000001D -#define V_038008_FMT_32_32_FLOAT 0x0000001E -#define V_038008_FMT_16_16_16_16 0x0000001F -#define V_038008_FMT_16_16_16_16_FLOAT 0x00000020 -#define V_038008_FMT_32_32_32_32 0x00000022 -#define V_038008_FMT_32_32_32_32_FLOAT 0x00000023 -#define V_038008_FMT_8_8_8 0x0000002c -#define V_038008_FMT_16_16_16 0x0000002d -#define V_038008_FMT_16_16_16_FLOAT 0x0000002e -#define V_038008_FMT_32_32_32 0x0000002f -#define V_038008_FMT_32_32_32_FLOAT 0x00000030 + #define S_038008_NUM_FORMAT_ALL(x) (((x) & 0x3) << 26) #define G_038008_NUM_FORMAT_ALL(x) (((x) >> 26) & 0x3) #define C_038008_NUM_FORMAT_ALL 0xF3FFFFFF diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 67e2c8f8bc4..346e1b402ba 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -158,9 +158,17 @@ exec_run( const struct sp_fragment_shader *base, case TGSI_SEMANTIC_POSITION: { uint j; - for (j = 0; j < 4; j++) { + + for (j = 0; j < 4; j++) quad->output.depth[j] = machine->Outputs[i].xyzw[2].f[j]; - } + } + break; + case TGSI_SEMANTIC_STENCIL: + { + uint j; + + for (j = 0; j < 4; j++) + quad->output.stencil[j] = (unsigned)machine->Outputs[i].xyzw[1].f[j]; } break; } diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index daa158df7c4..5b18cd035e3 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -169,9 +169,15 @@ fs_sse_run( const struct sp_fragment_shader *base, case TGSI_SEMANTIC_POSITION: { uint j; - for (j = 0; j < 4; j++) { - quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j]; - } + for (j = 0; j < 4; j++) + quad->output.depth[j] = machine->Outputs[i].xyzw[2].f[j]; + } + break; + case TGSI_SEMANTIC_STENCIL: + { + uint j; + for (j = 0; j < 4; j++) + quad->output.stencil[j] = machine->Outputs[i].xyzw[1].f[j]; } break; } diff --git a/src/gallium/drivers/softpipe/sp_quad.h b/src/gallium/drivers/softpipe/sp_quad.h index a3236bd1169..e745aa80619 100644 --- a/src/gallium/drivers/softpipe/sp_quad.h +++ b/src/gallium/drivers/softpipe/sp_quad.h @@ -85,6 +85,7 @@ struct quad_header_output /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; float depth[QUAD_SIZE]; + uint8_t stencil[QUAD_SIZE]; }; diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index e9b92626176..c8f5f89568a 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -47,6 +47,8 @@ struct depth_data { unsigned bzzzz[QUAD_SIZE]; /**< Z values fetched from depth buffer */ unsigned qzzzz[QUAD_SIZE]; /**< Z values from the quad */ ubyte stencilVals[QUAD_SIZE]; + boolean use_shader_stencil_refs; + ubyte shader_stencil_refs[QUAD_SIZE]; struct softpipe_cached_tile *tile; }; @@ -186,6 +188,33 @@ convert_quad_depth( struct depth_data *data, } +/** + * Compute the depth_data::shader_stencil_refs[] values from the float fragment stencil values. + */ +static void +convert_quad_stencil( struct depth_data *data, + const struct quad_header *quad ) +{ + unsigned j; + + data->use_shader_stencil_refs = TRUE; + /* Copy quads stencil values + */ + switch (data->format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + { + for (j = 0; j < QUAD_SIZE; j++) { + data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j])); + } + } + break; + default: + assert(0); + } +} /** * Write data->bzzzz[] values and data->stencilVals into the Z/stencil buffer. @@ -272,8 +301,14 @@ do_stencil_test(struct depth_data *data, { unsigned passMask = 0x0; unsigned j; + ubyte refs[QUAD_SIZE]; - ref &= valMask; + for (j = 0; j < QUAD_SIZE; j++) { + if (data->use_shader_stencil_refs) + refs[j] = data->shader_stencil_refs[j] & valMask; + else + refs[j] = ref & valMask; + } switch (func) { case PIPE_FUNC_NEVER: @@ -281,42 +316,42 @@ do_stencil_test(struct depth_data *data, break; case PIPE_FUNC_LESS: for (j = 0; j < QUAD_SIZE; j++) { - if (ref < (data->stencilVals[j] & valMask)) { + if (refs[j] < (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_EQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (ref == (data->stencilVals[j] & valMask)) { + if (refs[j] == (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_LEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (ref <= (data->stencilVals[j] & valMask)) { + if (refs[j] <= (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_GREATER: for (j = 0; j < QUAD_SIZE; j++) { - if (ref > (data->stencilVals[j] & valMask)) { + if (refs[j] > (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_NOTEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (ref != (data->stencilVals[j] & valMask)) { + if (refs[j] != (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_GEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (ref >= (data->stencilVals[j] & valMask)) { + if (refs[j] >= (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } @@ -348,9 +383,14 @@ apply_stencil_op(struct depth_data *data, { unsigned j; ubyte newstencil[QUAD_SIZE]; + ubyte refs[QUAD_SIZE]; for (j = 0; j < QUAD_SIZE; j++) { newstencil[j] = data->stencilVals[j]; + if (data->use_shader_stencil_refs) + refs[j] = data->shader_stencil_refs[j]; + else + refs[j] = ref; } switch (op) { @@ -367,7 +407,7 @@ apply_stencil_op(struct depth_data *data, case PIPE_STENCIL_OP_REPLACE: for (j = 0; j < QUAD_SIZE; j++) { if (mask & (1 << j)) { - newstencil[j] = ref; + newstencil[j] = refs[j]; } } break; @@ -688,8 +728,10 @@ depth_test_quads_fallback(struct quad_stage *qs, unsigned i, pass = 0; const struct sp_fragment_shader *fs = qs->softpipe->fs; boolean interp_depth = !fs->info.writes_z; + boolean shader_stencil_ref = fs->info.writes_stencil; struct depth_data data; + data.use_shader_stencil_refs = FALSE; if (qs->softpipe->depth_stencil->alpha.enabled) { nr = alpha_test_quads(qs, quads, nr); @@ -716,6 +758,9 @@ depth_test_quads_fallback(struct quad_stage *qs, } if (qs->softpipe->depth_stencil->stencil[0].enabled) { + if (shader_stencil_ref) + convert_quad_stencil(&data, quads[i]); + depth_stencil_test_quad(qs, &data, quads[i]); write_depth_stencil_values(&data, quads[i]); } diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c index 43b8e88e334..2cfd02a22c6 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.c +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c @@ -47,7 +47,8 @@ sp_build_quad_pipeline(struct softpipe_context *sp) sp->framebuffer.zsbuf && !sp->depth_stencil->alpha.enabled && !sp->fs->info.uses_kill && - !sp->fs->info.writes_z; + !sp->fs->info.writes_z && + !sp->fs->info.writes_stencil; sp->quad.first = sp->quad.blend; diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 25a0a622179..edc2a6dacf2 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -113,8 +113,12 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; case PIPE_CAP_STREAM_OUTPUT: return 1; + case PIPE_CAP_PRIMITIVE_RESTART: + return 1; case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: return 0; + case PIPE_CAP_SHADER_STENCIL_EXPORT: + return 1; default: return 0; } @@ -206,13 +210,6 @@ softpipe_is_format_supported( struct pipe_screen *screen, if (format_desc->block.width != 1 || format_desc->block.height != 1) return FALSE; - - /* - * TODO: Unfortunately we cannot render into anything more than 32 bits - * because we encode color clear values into a 32bit word. - */ - if (format_desc->block.bits > 32) - return FALSE; } if (bind & PIPE_BIND_DEPTH_STENCIL) { diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 088e48f81fe..2eac4c7a82b 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -44,6 +44,9 @@ #include "sp_tex_tile_cache.h" +/** Set to one to help debug texture sampling */ +#define DEBUG_TEX 0 + /* * Return fractional part of 'f'. Used for computing interpolation weights. @@ -774,6 +777,18 @@ pot_level_size(unsigned base_pot, unsigned level) } +static void +print_sample(const char *function, float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + debug_printf("%s %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n", + function, + rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0], + rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1], + rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2], + rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]); +} + + /* Some image-filter fastpaths: */ static INLINE void @@ -832,6 +847,10 @@ img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, tx[2][c], tx[3][c]); } } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } } @@ -872,6 +891,10 @@ img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, rgba[c][j] = out[c]; } } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } } @@ -921,6 +944,10 @@ img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, rgba[c][j] = out[c]; } } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } } @@ -957,6 +984,10 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler, rgba[c][j] = out[c]; } } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } } @@ -997,6 +1028,10 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, rgba[c][j] = out[c]; } } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } } @@ -1045,6 +1080,10 @@ img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler, rgba[c][j] = out[c]; } } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } } @@ -1357,6 +1396,10 @@ mip_filter_linear(struct tgsi_sampler *tgsi_sampler, } } } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } } @@ -1402,13 +1445,9 @@ mip_filter_nearest(struct tgsi_sampler *tgsi_sampler, samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } -#if 0 - printf("RGBA %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n", - rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0], - rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1], - rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2], - rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]); -#endif + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } } @@ -1510,6 +1549,10 @@ mip_filter_linear_2d_linear_repeat_POT( } } } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } } diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h index 031c7c1ea5c..4151a47c323 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -86,7 +86,7 @@ struct softpipe_tile_cache struct softpipe_cached_tile *entries[NUM_ENTRIES]; uint clear_flags[(MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32]; float clear_color[4]; /**< for color bufs */ - uint clear_val; /**< for z+stencil, or packed color clear value */ + uint clear_val; /**< for z+stencil */ boolean depth_stencil; /**< Is the surface a depth/stencil format? */ struct softpipe_cached_tile *tile; /**< scratch tile for clears */ diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index 50205995911..3d6b5b5c81d 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -122,6 +122,27 @@ typedef unsigned char boolean; # endif #endif +/* + * Define the C99 restrict keyword. + * + * See also: + * - http://cellperformance.beyond3d.com/articles/2006/05/demystifying-the-restrict-keyword.html + */ +#ifndef restrict +# if (__STDC_VERSION__ >= 199901L) + /* C99 */ +# elif defined(__SUNPRO_C) && defined(__C99FEATURES__) + /* C99 */ +# elif defined(__GNUC__) +# define restrict __restrict__ +# elif defined(_MSC_VER) +# define restrict __restrict +# else +# define restrict /* */ +# endif +#endif + + /* Function visibility */ #ifndef PUBLIC # if defined(__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index d2273b334a6..e30b9904fa5 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -452,6 +452,7 @@ enum pipe_cap { PIPE_CAP_BLEND_EQUATION_SEPARATE, PIPE_CAP_SM3, /*< Shader Model, supported */ PIPE_CAP_STREAM_OUTPUT, + PIPE_CAP_PRIMITIVE_RESTART, /** Maximum texture image units accessible from vertex and fragment shaders * combined */ PIPE_CAP_MAX_COMBINED_SAMPLERS, @@ -464,7 +465,8 @@ enum pipe_cap { PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER, - PIPE_CAP_DEPTH_CLAMP + PIPE_CAP_DEPTH_CLAMP, + PIPE_CAP_SHADER_STENCIL_EXPORT, }; /* Shader caps not specific to any single stage */ diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index 240e349ba7e..9a59c9f9ea0 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -198,6 +198,10 @@ enum pipe_format { PIPE_FORMAT_IA44 = 141, PIPE_FORMAT_AI44 = 142, + /* some stencil samplers formats */ + PIPE_FORMAT_X24S8_USCALED = 136, + PIPE_FORMAT_S8X24_USCALED = 137, + PIPE_FORMAT_X32_S8X24_USCALED = 138, PIPE_FORMAT_COUNT }; diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 74488de17eb..ba433b2bd2a 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -143,7 +143,8 @@ struct tgsi_declaration_dimension #define TGSI_SEMANTIC_EDGEFLAG 8 #define TGSI_SEMANTIC_PRIMID 9 #define TGSI_SEMANTIC_INSTANCEID 10 -#define TGSI_SEMANTIC_COUNT 11 /**< number of semantic values */ +#define TGSI_SEMANTIC_STENCIL 11 +#define TGSI_SEMANTIC_COUNT 12 /**< number of semantic values */ struct tgsi_declaration_semantic { diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 9a2b31da50d..fc6dba346da 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -457,6 +457,12 @@ struct pipe_draw_info int index_bias; /**< a bias to be added to each index */ unsigned min_index; /**< the min index */ unsigned max_index; /**< the max index */ + + /** + * Primitive restart enable/index (only applies to indexed drawing) + */ + boolean primitive_restart; + unsigned restart_index; }; diff --git a/src/gallium/state_trackers/dri/common/dri_context.c b/src/gallium/state_trackers/dri/common/dri_context.c index 22e1b6dd701..770b37037f5 100644 --- a/src/gallium/state_trackers/dri/common/dri_context.c +++ b/src/gallium/state_trackers/dri/common/dri_context.c @@ -49,7 +49,7 @@ dri_init_extensions(struct dri_context *ctx) } GLboolean -dri_create_context(gl_api api, const __GLcontextModes * visual, +dri_create_context(gl_api api, const struct gl_config * visual, __DRIcontext * cPriv, void *sharedContextPrivate) { __DRIscreen *sPriv = cPriv->driScreenPriv; diff --git a/src/gallium/state_trackers/dri/common/dri_context.h b/src/gallium/state_trackers/dri/common/dri_context.h index beb59c6f684..35105e861f9 100644 --- a/src/gallium/state_trackers/dri/common/dri_context.h +++ b/src/gallium/state_trackers/dri/common/dri_context.h @@ -88,7 +88,7 @@ dri_get_current(__DRIscreen * driScreenPriv); boolean dri_create_context(gl_api api, - const __GLcontextModes * visual, + const struct gl_config * visual, __DRIcontext * driContextPriv, void *sharedContextPrivate); diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.c b/src/gallium/state_trackers/dri/common/dri_drawable.c index 1bdfdccf439..5fd6e7863c0 100644 --- a/src/gallium/state_trackers/dri/common/dri_drawable.c +++ b/src/gallium/state_trackers/dri/common/dri_drawable.c @@ -112,7 +112,7 @@ dri_st_framebuffer_flush_front(struct st_framebuffer_iface *stfbi, boolean dri_create_buffer(__DRIscreen * sPriv, __DRIdrawable * dPriv, - const __GLcontextModes * visual, boolean isPixmap) + const struct gl_config * visual, boolean isPixmap) { struct dri_screen *screen = sPriv->private; struct dri_drawable *drawable = NULL; diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.h b/src/gallium/state_trackers/dri/common/dri_drawable.h index 74e662d36c4..837d3983748 100644 --- a/src/gallium/state_trackers/dri/common/dri_drawable.h +++ b/src/gallium/state_trackers/dri/common/dri_drawable.h @@ -79,7 +79,7 @@ dri_drawable(__DRIdrawable * driDrawPriv) boolean dri_create_buffer(__DRIscreen * sPriv, __DRIdrawable * dPriv, - const __GLcontextModes * visual, boolean isPixmap); + const struct gl_config * visual, boolean isPixmap); void dri_destroy_buffer(__DRIdrawable * dPriv); diff --git a/src/gallium/state_trackers/dri/common/dri_screen.c b/src/gallium/state_trackers/dri/common/dri_screen.c index b3b09b605fa..252ad1768d8 100644 --- a/src/gallium/state_trackers/dri/common/dri_screen.c +++ b/src/gallium/state_trackers/dri/common/dri_screen.c @@ -227,7 +227,7 @@ dri_fill_in_modes(struct dri_screen *screen, */ void dri_fill_st_visual(struct st_visual *stvis, struct dri_screen *screen, - const __GLcontextModes *mode) + const struct gl_config *mode) { memset(stvis, 0, sizeof(*stvis)); diff --git a/src/gallium/state_trackers/dri/common/dri_screen.h b/src/gallium/state_trackers/dri/common/dri_screen.h index d4eb8f454f0..0da9b5510fc 100644 --- a/src/gallium/state_trackers/dri/common/dri_screen.h +++ b/src/gallium/state_trackers/dri/common/dri_screen.h @@ -114,7 +114,7 @@ dri_with_format(__DRIscreen * sPriv) void dri_fill_st_visual(struct st_visual *stvis, struct dri_screen *screen, - const __GLcontextModes *mode); + const struct gl_config *mode); const __DRIconfig ** dri_init_screen_helper(struct dri_screen *screen, diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c index 116afccb194..3c5b0756174 100644 --- a/src/gallium/state_trackers/dri/drm/dri2.c +++ b/src/gallium/state_trackers/dri/drm/dri2.c @@ -502,7 +502,7 @@ static const __DRIextension *dri_screen_extensions[] = { /** * This is the driver specific part of the createNewScreen entry point. * - * Returns the __GLcontextModes supported by this driver. + * Returns the struct gl_config supported by this driver. */ static const __DRIconfig ** dri2_init_screen(__DRIscreen * sPriv) @@ -548,7 +548,7 @@ fail: } static boolean -dri2_create_context(gl_api api, const __GLcontextModes * visual, +dri2_create_context(gl_api api, const struct gl_config * visual, __DRIcontext * cPriv, void *sharedContextPrivate) { struct dri_context *ctx = NULL; @@ -564,7 +564,7 @@ dri2_create_context(gl_api api, const __GLcontextModes * visual, static boolean dri2_create_buffer(__DRIscreen * sPriv, __DRIdrawable * dPriv, - const __GLcontextModes * visual, boolean isPixmap) + const struct gl_config * visual, boolean isPixmap) { struct dri_drawable *drawable = NULL; diff --git a/src/gallium/state_trackers/dri/sw/drisw.c b/src/gallium/state_trackers/dri/sw/drisw.c index 04bba631aeb..c48cc440367 100644 --- a/src/gallium/state_trackers/dri/sw/drisw.c +++ b/src/gallium/state_trackers/dri/sw/drisw.c @@ -298,7 +298,7 @@ fail: static boolean drisw_create_buffer(__DRIscreen * sPriv, __DRIdrawable * dPriv, - const __GLcontextModes * visual, boolean isPixmap) + const struct gl_config * visual, boolean isPixmap) { struct dri_drawable *drawable = NULL; diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c b/src/gallium/state_trackers/egl/common/egl_g3d.c index bfbb431058b..30ddcd5bc14 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d.c @@ -126,24 +126,24 @@ egl_g3d_add_screens(_EGLDriver *drv, _EGLDisplay *dpy) continue; } - _eglInitScreen(&gscr->base); - - for (j = 0; j < num_modes; j++) { + _eglInitScreen(&gscr->base, dpy, num_modes); + for (j = 0; j < gscr->base.NumModes; j++) { const struct native_mode *nmode = native_modes[j]; - _EGLMode *mode; - - mode = _eglAddNewMode(&gscr->base, nmode->width, nmode->height, - nmode->refresh_rate, nmode->desc); - if (!mode) - break; - /* gscr->native_modes and gscr->base.Modes should be consistent */ - assert(mode == &gscr->base.Modes[j]); + _EGLMode *mode = &gscr->base.Modes[j]; + + mode->Width = nmode->width; + mode->Height = nmode->height; + mode->RefreshRate = nmode->refresh_rate; + mode->Optimal = EGL_FALSE; + mode->Interlaced = EGL_FALSE; + /* no need to strdup() */ + mode->Name = nmode->desc; } gscr->native = nconn; gscr->native_modes = native_modes; - _eglAddScreen(dpy, &gscr->base); + _eglLinkScreen(&gscr->base); } FREE(native_connectors); @@ -194,53 +194,48 @@ init_config_attributes(_EGLConfig *conf, const struct native_config *nconf, if (nconf->buffer_mask & (1 << NATIVE_ATTACHMENT_BACK_LEFT)) surface_type |= EGL_PBUFFER_BIT; - SET_CONFIG_ATTRIB(conf, EGL_CONFORMANT, api_mask); - SET_CONFIG_ATTRIB(conf, EGL_RENDERABLE_TYPE, api_mask); + conf->Conformant = api_mask; + conf->RenderableType = api_mask; - SET_CONFIG_ATTRIB(conf, EGL_RED_SIZE, rgba[0]); - SET_CONFIG_ATTRIB(conf, EGL_GREEN_SIZE, rgba[1]); - SET_CONFIG_ATTRIB(conf, EGL_BLUE_SIZE, rgba[2]); - SET_CONFIG_ATTRIB(conf, EGL_ALPHA_SIZE, rgba[3]); - SET_CONFIG_ATTRIB(conf, EGL_BUFFER_SIZE, buffer_size); + conf->RedSize = rgba[0]; + conf->GreenSize = rgba[1]; + conf->BlueSize = rgba[2]; + conf->AlphaSize = rgba[3]; + conf->BufferSize = buffer_size; - SET_CONFIG_ATTRIB(conf, EGL_DEPTH_SIZE, depth_stencil[0]); - SET_CONFIG_ATTRIB(conf, EGL_STENCIL_SIZE, depth_stencil[1]); + conf->DepthSize = depth_stencil[0]; + conf->StencilSize = depth_stencil[1]; - SET_CONFIG_ATTRIB(conf, EGL_SURFACE_TYPE, surface_type); + conf->SurfaceType = surface_type; - SET_CONFIG_ATTRIB(conf, EGL_NATIVE_RENDERABLE, EGL_TRUE); + conf->NativeRenderable = EGL_TRUE; if (surface_type & EGL_WINDOW_BIT) { - SET_CONFIG_ATTRIB(conf, EGL_NATIVE_VISUAL_ID, nconf->native_visual_id); - SET_CONFIG_ATTRIB(conf, EGL_NATIVE_VISUAL_TYPE, - nconf->native_visual_type); + conf->NativeVisualID = nconf->native_visual_id; + conf->NativeVisualType = nconf->native_visual_type; } if (surface_type & EGL_PBUFFER_BIT) { - SET_CONFIG_ATTRIB(conf, EGL_BIND_TO_TEXTURE_RGB, EGL_TRUE); + conf->BindToTextureRGB = EGL_TRUE; if (rgba[3]) - SET_CONFIG_ATTRIB(conf, EGL_BIND_TO_TEXTURE_RGBA, EGL_TRUE); + conf->BindToTextureRGBA = EGL_TRUE; - SET_CONFIG_ATTRIB(conf, EGL_MAX_PBUFFER_WIDTH, 4096); - SET_CONFIG_ATTRIB(conf, EGL_MAX_PBUFFER_HEIGHT, 4096); - SET_CONFIG_ATTRIB(conf, EGL_MAX_PBUFFER_PIXELS, 4096 * 4096); + conf->MaxPbufferWidth = 4096; + conf->MaxPbufferHeight = 4096; + conf->MaxPbufferPixels = 4096 * 4096; } - SET_CONFIG_ATTRIB(conf, EGL_LEVEL, nconf->level); - SET_CONFIG_ATTRIB(conf, EGL_SAMPLES, nconf->samples); - SET_CONFIG_ATTRIB(conf, EGL_SAMPLE_BUFFERS, 1); + conf->Level = nconf->level; + conf->Samples = nconf->samples; + conf->SampleBuffers = 0; if (nconf->slow_config) - SET_CONFIG_ATTRIB(conf, EGL_CONFIG_CAVEAT, EGL_SLOW_CONFIG); + conf->ConfigCaveat = EGL_SLOW_CONFIG; if (nconf->transparent_rgb) { - rgba[0] = nconf->transparent_rgb_values[0]; - rgba[1] = nconf->transparent_rgb_values[1]; - rgba[2] = nconf->transparent_rgb_values[2]; - - SET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_TYPE, EGL_TRANSPARENT_RGB); - SET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_RED_VALUE, rgba[0]); - SET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_GREEN_VALUE, rgba[1]); - SET_CONFIG_ATTRIB(conf, EGL_TRANSPARENT_BLUE_VALUE, rgba[2]); + conf->TransparentType = EGL_TRANSPARENT_RGB; + conf->TransparentRedValue = nconf->transparent_rgb_values[0]; + conf->TransparentGreenValue = nconf->transparent_rgb_values[1]; + conf->TransparentBlueValue = nconf->transparent_rgb_values[2]; } return _eglValidateConfig(conf, EGL_FALSE); @@ -378,7 +373,7 @@ egl_g3d_add_configs(_EGLDriver *drv, _EGLDisplay *dpy, EGLint id) break; } - _eglAddConfig(dpy, &gconf->base); + _eglLinkConfig(&gconf->base); id++; } } diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.h b/src/gallium/state_trackers/egl/common/egl_g3d.h index be450bbede3..72c14f0ac49 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d.h +++ b/src/gallium/state_trackers/egl/common/egl_g3d.h @@ -106,8 +106,6 @@ _EGL_DRIVER_TYPECAST(egl_g3d_image, _EGLImage, obj) struct egl_g3d_sync { _EGLSync base; - int refs; - /* the mutex protects only the condvar, not the struct */ pipe_mutex mutex; pipe_condvar condvar; diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_api.c b/src/gallium/state_trackers/egl/common/egl_g3d_api.c index c0164daf9c1..c10245bb067 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_api.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d_api.c @@ -160,7 +160,7 @@ destroy_context(_EGLDisplay *dpy, _EGLContext *ctx) static EGLBoolean egl_g3d_destroy_context(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx) { - if (!_eglIsContextBound(ctx)) + if (_eglPutContext(ctx)) destroy_context(dpy, ctx); return EGL_TRUE; } @@ -433,7 +433,7 @@ destroy_surface(_EGLDisplay *dpy, _EGLSurface *surf) static EGLBoolean egl_g3d_destroy_surface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf) { - if (!_eglIsSurfaceBound(surf)) + if (_eglPutSurface(surf)) destroy_surface(dpy, surf); return EGL_TRUE; } @@ -446,13 +446,15 @@ egl_g3d_make_current(_EGLDriver *drv, _EGLDisplay *dpy, struct egl_g3d_surface *gdraw = egl_g3d_surface(draw); struct egl_g3d_surface *gread = egl_g3d_surface(read); struct egl_g3d_context *old_gctx; + _EGLContext *old_ctx; + _EGLSurface *old_draw, *old_read; EGLBoolean ok = EGL_TRUE; - /* bind the new context and return the "orphaned" one */ - if (!_eglBindContext(&ctx, &draw, &read)) + /* make new bindings */ + if (!_eglBindContext(ctx, draw, read, &old_ctx, &old_draw, &old_read)) return EGL_FALSE; - old_gctx = egl_g3d_context(ctx); + old_gctx = egl_g3d_context(old_ctx); if (old_gctx) { /* flush old context */ old_gctx->stctxi->flush(old_gctx->stctxi, @@ -481,15 +483,33 @@ egl_g3d_make_current(_EGLDriver *drv, _EGLDisplay *dpy, } else if (old_gctx) { ok = old_gctx->stapi->make_current(old_gctx->stapi, NULL, NULL, NULL); - old_gctx->base.WindowRenderBuffer = EGL_NONE; + if (ok) + old_gctx->base.WindowRenderBuffer = EGL_NONE; } - if (ctx && !_eglIsContextLinked(ctx)) - destroy_context(dpy, ctx); - if (draw && !_eglIsSurfaceLinked(draw)) - destroy_surface(dpy, draw); - if (read && read != draw && !_eglIsSurfaceLinked(read)) - destroy_surface(dpy, read); + if (ok) { + if (_eglPutContext(old_ctx)) + destroy_context(dpy, old_ctx); + if (_eglPutSurface(old_draw)) + destroy_surface(dpy, old_draw); + if (_eglPutSurface(old_read)) + destroy_surface(dpy, old_read); + } + else { + /* undo the previous _eglBindContext */ + _eglBindContext(old_ctx, old_draw, old_read, &ctx, &draw, &read); + assert(&gctx->base == ctx && + &gdraw->base == draw && + &gread->base == read); + + _eglPutSurface(draw); + _eglPutSurface(read); + _eglPutContext(ctx); + + _eglPutSurface(old_draw); + _eglPutSurface(old_read); + _eglPutContext(old_ctx); + } return ok; } @@ -609,8 +629,10 @@ egl_g3d_wait_client(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx) gctx->stctxi->flush(gctx->stctxi, PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, &fence); - screen->fence_finish(screen, fence, 0); - screen->fence_reference(screen, &fence, NULL); + if (fence) { + screen->fence_finish(screen, fence, 0); + screen->fence_reference(screen, &fence, NULL); + } return EGL_TRUE; } diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_image.c b/src/gallium/state_trackers/egl/common/egl_g3d_image.c index 558638e72f0..be9c88e5e47 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_image.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d_image.c @@ -74,62 +74,40 @@ egl_g3d_reference_native_pixmap(_EGLDisplay *dpy, EGLNativePixmapType pix) #ifdef EGL_MESA_drm_image static struct pipe_resource * -egl_g3d_create_drm_buffer(_EGLDisplay *dpy, const EGLint *attribs) +egl_g3d_create_drm_buffer(_EGLDisplay *dpy, _EGLImage *img, + const EGLint *attribs) { struct egl_g3d_display *gdpy = egl_g3d_display(dpy); struct pipe_screen *screen = gdpy->native->screen; struct pipe_resource templ; - EGLint width = 0, height = 0, format = 0, use = 0; - EGLint valid_use; - EGLint i, err = EGL_SUCCESS; - - for (i = 0; attribs[i] != EGL_NONE; i++) { - EGLint attr = attribs[i++]; - EGLint val = attribs[i]; - - switch (attr) { - case EGL_WIDTH: - width = val; - break; - case EGL_HEIGHT: - height = val; - break; - case EGL_DRM_BUFFER_FORMAT_MESA: - format = val; - break; - case EGL_DRM_BUFFER_USE_MESA: - use = val; - break; - default: - err = EGL_BAD_ATTRIBUTE; - break; - } + _EGLImageAttribs attrs; + EGLint format, valid_use; - if (err != EGL_SUCCESS) { - _eglLog(_EGL_DEBUG, "bad image attribute 0x%04x", attr); - return NULL; - } - } + if (_eglParseImageAttribList(&attrs, dpy, attribs) != EGL_SUCCESS) + return NULL; - if (width <= 0 || height <= 0) { - _eglLog(_EGL_DEBUG, "bad width or height (%dx%d)", width, height); + if (attrs.Width <= 0 || attrs.Height <= 0) { + _eglLog(_EGL_DEBUG, "bad width or height (%dx%d)", + attrs.Width, attrs.Height); return NULL; } - switch (format) { + switch (attrs.DRMBufferFormatMESA) { case EGL_DRM_BUFFER_FORMAT_ARGB32_MESA: format = PIPE_FORMAT_B8G8R8A8_UNORM; break; default: - _eglLog(_EGL_DEBUG, "bad image format value 0x%04x", format); + _eglLog(_EGL_DEBUG, "bad image format value 0x%04x", + attrs.DRMBufferFormatMESA); return NULL; break; } valid_use = EGL_DRM_BUFFER_USE_SCANOUT_MESA | EGL_DRM_BUFFER_USE_SHARE_MESA; - if (use & ~valid_use) { - _eglLog(_EGL_DEBUG, "bad image use bit 0x%04x", use); + if (attrs.DRMBufferUseMESA & ~valid_use) { + _eglLog(_EGL_DEBUG, "bad image use bit 0x%04x", + attrs.DRMBufferUseMESA); return NULL; } @@ -137,18 +115,18 @@ egl_g3d_create_drm_buffer(_EGLDisplay *dpy, const EGLint *attribs) templ.target = PIPE_TEXTURE_2D; templ.format = format; templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; - templ.width0 = width; - templ.height0 = height; + templ.width0 = attrs.Width; + templ.height0 = attrs.Height; templ.depth0 = 1; /* * XXX fix apps (e.g. wayland) and pipe drivers (e.g. i915) and remove the * size check */ - if ((use & EGL_DRM_BUFFER_USE_SCANOUT_MESA) && - width >= 640 && height >= 480) + if ((attrs.DRMBufferUseMESA & EGL_DRM_BUFFER_USE_SCANOUT_MESA) && + attrs.Width >= 640 && attrs.Height >= 480) templ.bind |= PIPE_BIND_SCANOUT; - if (use & EGL_DRM_BUFFER_USE_SHARE_MESA) + if (attrs.DRMBufferUseMESA & EGL_DRM_BUFFER_USE_SHARE_MESA) templ.bind |= PIPE_BIND_SHARED; return screen->resource_create(screen, &templ); @@ -156,59 +134,36 @@ egl_g3d_create_drm_buffer(_EGLDisplay *dpy, const EGLint *attribs) static struct pipe_resource * egl_g3d_reference_drm_buffer(_EGLDisplay *dpy, EGLint name, - const EGLint *attribs) + _EGLImage *img, const EGLint *attribs) { struct egl_g3d_display *gdpy = egl_g3d_display(dpy); struct pipe_screen *screen = gdpy->native->screen; struct pipe_resource templ; struct winsys_handle wsh; - EGLint width = 0, height = 0, format = 0, stride = 0; - EGLint i, err = EGL_SUCCESS; + _EGLImageAttribs attrs; + EGLint format; /* winsys_handle is in theory platform-specific */ if (dpy->Platform != _EGL_PLATFORM_DRM) return NULL; - for (i = 0; attribs[i] != EGL_NONE; i++) { - EGLint attr = attribs[i++]; - EGLint val = attribs[i]; - - switch (attr) { - case EGL_WIDTH: - width = val; - break; - case EGL_HEIGHT: - height = val; - break; - case EGL_DRM_BUFFER_FORMAT_MESA: - format = val; - break; - case EGL_DRM_BUFFER_STRIDE_MESA: - stride = val; - break; - default: - err = EGL_BAD_ATTRIBUTE; - break; - } - - if (err != EGL_SUCCESS) { - _eglLog(_EGL_DEBUG, "bad image attribute 0x%04x", attr); - return NULL; - } - } + if (_eglParseImageAttribList(&attrs, dpy, attribs) != EGL_SUCCESS) + return NULL; - if (width <= 0 || height <= 0 || stride <= 0) { + if (attrs.Width <= 0 || attrs.Height <= 0 || + attrs.DRMBufferStrideMESA <= 0) { _eglLog(_EGL_DEBUG, "bad width, height, or stride (%dx%dx%d)", - width, height, stride); + attrs.Width, attrs.Height, attrs.DRMBufferStrideMESA); return NULL; } - switch (format) { + switch (attrs.DRMBufferFormatMESA) { case EGL_DRM_BUFFER_FORMAT_ARGB32_MESA: format = PIPE_FORMAT_B8G8R8A8_UNORM; break; default: - _eglLog(_EGL_DEBUG, "bad image format value 0x%04x", format); + _eglLog(_EGL_DEBUG, "bad image format value 0x%04x", + attrs.DRMBufferFormatMESA); return NULL; break; } @@ -217,13 +172,13 @@ egl_g3d_reference_drm_buffer(_EGLDisplay *dpy, EGLint name, templ.target = PIPE_TEXTURE_2D; templ.format = format; templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; - templ.width0 = width; - templ.height0 = height; + templ.width0 = attrs.Width; + templ.height0 = attrs.Height; templ.depth0 = 1; memset(&wsh, 0, sizeof(wsh)); wsh.handle = (unsigned) name; - wsh.stride = stride; + wsh.stride = attrs.DRMBufferStrideMESA; return screen->resource_from_handle(screen, &templ, &wsh); } @@ -245,7 +200,7 @@ egl_g3d_create_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx, return NULL; } - if (!_eglInitImage(&gimg->base, dpy, attribs)) { + if (!_eglInitImage(&gimg->base, dpy)) { FREE(gimg); return NULL; } @@ -257,7 +212,8 @@ egl_g3d_create_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx, break; #ifdef EGL_MESA_drm_image case EGL_DRM_BUFFER_MESA: - ptex = egl_g3d_reference_drm_buffer(dpy, (EGLint) buffer, attribs); + ptex = egl_g3d_reference_drm_buffer(dpy, + (EGLint) buffer, &gimg->base, attribs); break; #endif default: @@ -316,13 +272,13 @@ egl_g3d_create_drm_image(_EGLDriver *drv, _EGLDisplay *dpy, return NULL; } - if (!_eglInitImage(&gimg->base, dpy, attribs)) { + if (!_eglInitImage(&gimg->base, dpy)) { FREE(gimg); return NULL; } #ifdef EGL_MESA_drm_image - ptex = egl_g3d_create_drm_buffer(dpy, attribs); + ptex = egl_g3d_create_drm_buffer(dpy, &gimg->base, attribs); #else ptex = NULL; #endif diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_sync.c b/src/gallium/state_trackers/egl/common/egl_g3d_sync.c index ec74e9eb94c..4e6d944c151 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_sync.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d_sync.c @@ -128,13 +128,13 @@ egl_g3d_wait_fence_sync(struct egl_g3d_sync *gsync, EGLTimeKHR timeout) static INLINE void egl_g3d_ref_sync(struct egl_g3d_sync *gsync) { - p_atomic_inc(&gsync->refs); + _eglGetSync(&gsync->base); } static INLINE void egl_g3d_unref_sync(struct egl_g3d_sync *gsync) { - if (p_atomic_dec_zero(&gsync->refs)) { + if (_eglPutSync(&gsync->base)) { pipe_condvar_destroy(gsync->condvar); pipe_mutex_destroy(gsync->mutex); @@ -194,7 +194,6 @@ egl_g3d_create_sync(_EGLDriver *drv, _EGLDisplay *dpy, pipe_mutex_init(gsync->mutex); pipe_condvar_init(gsync->condvar); - p_atomic_set(&gsync->refs, 1); return &gsync->base; } diff --git a/src/gallium/state_trackers/egl/common/native_modeset.h b/src/gallium/state_trackers/egl/common/native_modeset.h index dee757b3a88..2598082d687 100644 --- a/src/gallium/state_trackers/egl/common/native_modeset.h +++ b/src/gallium/state_trackers/egl/common/native_modeset.h @@ -39,7 +39,7 @@ struct native_connector { struct native_mode { const char *desc; int width, height; - int refresh_rate; + int refresh_rate; /* HZ * 1000 */ }; /** diff --git a/src/gallium/state_trackers/egl/drm/modeset.c b/src/gallium/state_trackers/egl/drm/modeset.c index 06a60770537..5ed22f7b9d4 100644 --- a/src/gallium/state_trackers/egl/drm/modeset.c +++ b/src/gallium/state_trackers/egl/drm/modeset.c @@ -469,8 +469,8 @@ drm_display_get_modes(struct native_display *ndpy, drmmode->base.height = drmmode->mode.vdisplay; drmmode->base.refresh_rate = drmmode->mode.vrefresh; /* not all kernels have vrefresh = refresh_rate * 1000 */ - if (drmmode->base.refresh_rate > 1000) - drmmode->base.refresh_rate = (drmmode->base.refresh_rate + 500) / 1000; + if (drmmode->base.refresh_rate < 1000) + drmmode->base.refresh_rate *= 1000; } nmodes_return = MALLOC(count * sizeof(*nmodes_return)); diff --git a/src/gallium/state_trackers/egl/x11/glcore.h b/src/gallium/state_trackers/egl/x11/glcore.h new file mode 100644 index 00000000000..547b1113707 --- /dev/null +++ b/src/gallium/state_trackers/egl/x11/glcore.h @@ -0,0 +1,181 @@ +#ifndef __gl_core_h_ +#define __gl_core_h_ + +/* + * SGI FREE SOFTWARE LICENSE B (Version 2.0, Sept. 18, 2008) + * Copyright (C) 1991-2000 Silicon Graphics, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice including the dates of first publication and + * either this permission notice or a reference to + * http://oss.sgi.com/projects/FreeB/ + * shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * SILICON GRAPHICS, INC. BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Except as contained in this notice, the name of Silicon Graphics, Inc. + * shall not be used in advertising or otherwise to promote the sale, use or + * other dealings in this Software without prior written authorization from + * Silicon Graphics, Inc. + */ + +#if !defined(_WIN32_WCE) +#include <sys/types.h> +#endif + +#define GL_CORE_SGI 1 +#define GL_CORE_MESA 2 +#define GL_CORE_APPLE 4 +#define GL_CORE_WINDOWS 8 + +typedef struct __GLcontextRec __GLcontext; + +/* +** This file defines the interface between the GL core and the surrounding +** "operating system" that supports it (currently the GLX or WGL extensions). +** +** Members (data and function pointers) are documented as imported or +** exported according to how they are used by the core rendering functions. +** Imported members are initialized by the "operating system" and used by +** the core functions. Exported members are initialized by the core functions +** and used by the "operating system". +*/ + +/** + * Mode and limit information for a context. This information is + * kept around in the context so that values can be used during + * command execution, and for returning information about the + * context to the application. + * + * Instances of this structure are shared by the driver and the loader. To + * maintain binary compatability, new fields \b must be added only to the + * end of the structure. + * + * \sa _gl_context_modes_create + */ +typedef struct __GLcontextModesRec { + struct __GLcontextModesRec * next; + + GLboolean rgbMode; + GLboolean floatMode; + GLboolean colorIndexMode; + GLuint doubleBufferMode; + GLuint stereoMode; + + GLboolean haveAccumBuffer; + GLboolean haveDepthBuffer; + GLboolean haveStencilBuffer; + + GLint redBits, greenBits, blueBits, alphaBits; /* bits per comp */ + GLuint redMask, greenMask, blueMask, alphaMask; + GLint rgbBits; /* total bits for rgb */ + GLint indexBits; /* total bits for colorindex */ + + GLint accumRedBits, accumGreenBits, accumBlueBits, accumAlphaBits; + GLint depthBits; + GLint stencilBits; + + GLint numAuxBuffers; + + GLint level; + + GLint pixmapMode; + + /* GLX */ + GLint visualID; + GLint visualType; /**< One of the GLX X visual types. (i.e., + * \c GLX_TRUE_COLOR, etc.) + */ + + /* EXT_visual_rating / GLX 1.2 */ + GLint visualRating; + + /* EXT_visual_info / GLX 1.2 */ + GLint transparentPixel; + /* colors are floats scaled to ints */ + GLint transparentRed, transparentGreen, transparentBlue, transparentAlpha; + GLint transparentIndex; + + /* ARB_multisample / SGIS_multisample */ + GLint sampleBuffers; + GLint samples; + + /* SGIX_fbconfig / GLX 1.3 */ + GLint drawableType; + GLint renderType; + GLint xRenderable; + GLint fbconfigID; + + /* SGIX_pbuffer / GLX 1.3 */ + GLint maxPbufferWidth; + GLint maxPbufferHeight; + GLint maxPbufferPixels; + GLint optimalPbufferWidth; /* Only for SGIX_pbuffer. */ + GLint optimalPbufferHeight; /* Only for SGIX_pbuffer. */ + + /* SGIX_visual_select_group */ + GLint visualSelectGroup; + + /* OML_swap_method */ + GLint swapMethod; + + GLint screen; + + /* EXT_texture_from_pixmap */ + GLint bindToTextureRgb; + GLint bindToTextureRgba; + GLint bindToMipmapTexture; + GLint bindToTextureTargets; + GLint yInverted; +} __GLcontextModes; + +/* Several fields of __GLcontextModes can take these as values. Since + * GLX header files may not be available everywhere they need to be used, + * redefine them here. + */ +#define GLX_NONE 0x8000 +#define GLX_SLOW_CONFIG 0x8001 +#define GLX_TRUE_COLOR 0x8002 +#define GLX_DIRECT_COLOR 0x8003 +#define GLX_PSEUDO_COLOR 0x8004 +#define GLX_STATIC_COLOR 0x8005 +#define GLX_GRAY_SCALE 0x8006 +#define GLX_STATIC_GRAY 0x8007 +#define GLX_TRANSPARENT_RGB 0x8008 +#define GLX_TRANSPARENT_INDEX 0x8009 +#define GLX_NON_CONFORMANT_CONFIG 0x800D +#define GLX_SWAP_EXCHANGE_OML 0x8061 +#define GLX_SWAP_COPY_OML 0x8062 +#define GLX_SWAP_UNDEFINED_OML 0x8063 + +#define GLX_DONT_CARE 0xFFFFFFFF + +#define GLX_RGBA_BIT 0x00000001 +#define GLX_COLOR_INDEX_BIT 0x00000002 +#define GLX_WINDOW_BIT 0x00000001 +#define GLX_PIXMAP_BIT 0x00000002 +#define GLX_PBUFFER_BIT 0x00000004 + +#define GLX_BIND_TO_TEXTURE_RGB_EXT 0x20D0 +#define GLX_BIND_TO_TEXTURE_RGBA_EXT 0x20D1 +#define GLX_BIND_TO_MIPMAP_TEXTURE_EXT 0x20D2 +#define GLX_BIND_TO_TEXTURE_TARGETS_EXT 0x20D3 +#define GLX_Y_INVERTED_EXT 0x20D4 + +#define GLX_TEXTURE_1D_BIT_EXT 0x00000001 +#define GLX_TEXTURE_2D_BIT_EXT 0x00000002 +#define GLX_TEXTURE_RECTANGLE_BIT_EXT 0x00000004 + +#endif /* __gl_core_h_ */ diff --git a/src/gallium/state_trackers/egl/x11/glxinit.c b/src/gallium/state_trackers/egl/x11/glxinit.c index 57c6aaff864..df8370f8d7d 100644 --- a/src/gallium/state_trackers/egl/x11/glxinit.c +++ b/src/gallium/state_trackers/egl/x11/glxinit.c @@ -18,7 +18,7 @@ #include "GL/glxproto.h" #include "GL/glxtokens.h" #include "GL/gl.h" /* for GL types needed by __GLcontextModes */ -#include "GL/internal/glcore.h" /* for __GLcontextModes */ +#include "glcore.h" /* for __GLcontextModes */ #include "glxinit.h" diff --git a/src/gallium/state_trackers/egl/x11/x11_screen.h b/src/gallium/state_trackers/egl/x11/x11_screen.h index bc0ef69ec66..2e313e0148e 100644 --- a/src/gallium/state_trackers/egl/x11/x11_screen.h +++ b/src/gallium/state_trackers/egl/x11/x11_screen.h @@ -30,7 +30,7 @@ #include <X11/Xutil.h> #include <X11/extensions/dri2tokens.h> #include "GL/gl.h" /* for GL types needed by __GLcontextModes */ -#include "GL/internal/glcore.h" /* for __GLcontextModes */ +#include "glcore.h" /* for __GLcontextModes */ #include "pipe/p_compiler.h" #include "common/native.h" diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.c b/src/gallium/state_trackers/glx/xlib/xm_api.c index f950c8858bc..8332633f01b 100644 --- a/src/gallium/state_trackers/glx/xlib/xm_api.c +++ b/src/gallium/state_trackers/glx/xlib/xm_api.c @@ -423,7 +423,7 @@ static XMesaBuffer XMesaBufferList = NULL; /** * Allocate a new XMesaBuffer object which corresponds to the given drawable. - * Note that XMesaBuffer is derived from GLframebuffer. + * Note that XMesaBuffer is derived from struct gl_framebuffer. * The new XMesaBuffer will not have any size (Width=Height=0). * * \param d the corresponding X drawable (window or pixmap) @@ -569,7 +569,7 @@ initialize_visual_and_buffer(XMesaVisual v, XMesaBuffer b, /* RGB WINDOW: * We support RGB rendering into almost any kind of visual. */ - const int xclass = v->mesa_visual.visualType; + const int xclass = v->visualType; if (xclass != GLX_TRUE_COLOR && xclass == !GLX_DIRECT_COLOR) { _mesa_warning(NULL, "XMesa: RGB mode rendering not supported in given visual.\n"); @@ -716,13 +716,13 @@ XMesaVisual XMesaCreateVisual( Display *display, v->mesa_visual.redMask = visinfo->red_mask; v->mesa_visual.greenMask = visinfo->green_mask; v->mesa_visual.blueMask = visinfo->blue_mask; - v->mesa_visual.visualID = visinfo->visualid; - v->mesa_visual.screen = visinfo->screen; + v->visualID = visinfo->visualid; + v->screen = visinfo->screen; #if !(defined(__cplusplus) || defined(c_plusplus)) - v->mesa_visual.visualType = xmesa_convert_from_x_visual_type(visinfo->class); + v->visualType = xmesa_convert_from_x_visual_type(visinfo->class); #else - v->mesa_visual.visualType = xmesa_convert_from_x_visual_type(visinfo->c_class); + v->visualType = xmesa_convert_from_x_visual_type(visinfo->c_class); #endif v->mesa_visual.visualRating = visualCaveat; @@ -733,7 +733,7 @@ XMesaVisual XMesaCreateVisual( Display *display, (void) initialize_visual_and_buffer( v, NULL, rgb_flag, 0, 0 ); { - const int xclass = v->mesa_visual.visualType; + const int xclass = v->visualType; if (xclass == GLX_TRUE_COLOR || xclass == GLX_DIRECT_COLOR) { red_bits = _mesa_bitcount(GET_REDMASK(v)); green_bits = _mesa_bitcount(GET_GREENMASK(v)); @@ -756,7 +756,7 @@ XMesaVisual XMesaCreateVisual( Display *display, /* initialize visual */ { - __GLcontextModes *vis = &v->mesa_visual; + struct gl_config *vis = &v->mesa_visual; vis->rgbMode = GL_TRUE; vis->doubleBufferMode = db_flag; @@ -783,7 +783,6 @@ XMesaVisual XMesaCreateVisual( Display *display, vis->numAuxBuffers = 0; vis->level = 0; - vis->pixmapMode = 0; vis->sampleBuffers = 0; vis->samples = 0; } @@ -855,7 +854,7 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) if (!xmdpy) return NULL; - /* Note: the XMesaContext contains a Mesa GLcontext struct (inheritance) */ + /* Note: the XMesaContext contains a Mesa struct gl_context struct (inheritance) */ c = (XMesaContext) CALLOC_STRUCT(xmesa_context); if (!c) return NULL; diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.h b/src/gallium/state_trackers/glx/xlib/xm_api.h index fedf2b2d5a1..b8ac979edc1 100644 --- a/src/gallium/state_trackers/glx/xlib/xm_api.h +++ b/src/gallium/state_trackers/glx/xlib/xm_api.h @@ -280,7 +280,8 @@ XMesaCopyContext(XMesaContext src, XMesaContext dst, unsigned long mask); * Basically corresponds to an XVisualInfo. */ struct xmesa_visual { - GLvisual mesa_visual; /* Device independent visual parameters */ + struct gl_config mesa_visual;/* Device independent visual parameters */ + int screen, visualID, visualType; Display *display; /* The X11 display */ XVisualInfo * visinfo; /* X's visual info (pointer to private copy) */ XVisualInfo *vishandle; /* Only used in fakeglx.c */ diff --git a/src/gallium/state_trackers/glx/xlib/xm_st.c b/src/gallium/state_trackers/glx/xlib/xm_st.c index 4d0f5e66256..e7466bdbee5 100644 --- a/src/gallium/state_trackers/glx/xlib/xm_st.c +++ b/src/gallium/state_trackers/glx/xlib/xm_st.c @@ -196,7 +196,13 @@ xmesa_st_framebuffer_validate_textures(struct st_framebuffer_iface *stfbi, /** + * Check that a framebuffer's attachments match the window's size. + * * Called via st_framebuffer_iface::validate() + * + * \param statts array of framebuffer attachments + * \param count number of framebuffer attachments in statts[] + * \param out returns resources for each of the attachments */ static boolean xmesa_st_framebuffer_validate(struct st_framebuffer_iface *stfbi, @@ -209,9 +215,11 @@ xmesa_st_framebuffer_validate(struct st_framebuffer_iface *stfbi, boolean resized; boolean ret; + /* build mask of ST_ATTACHMENT bits */ statt_mask = 0x0; for (i = 0; i < count; i++) statt_mask |= 1 << statts[i]; + /* record newly allocated textures */ new_mask = statt_mask & ~xstfb->texture_mask; diff --git a/src/gallium/state_trackers/vega/vg_manager.c b/src/gallium/state_trackers/vega/vg_manager.c index e7996741d14..232deefa166 100644 --- a/src/gallium/state_trackers/vega/vg_manager.c +++ b/src/gallium/state_trackers/vega/vg_manager.c @@ -352,7 +352,7 @@ vg_api_create_context(struct st_api *stapi, struct st_manager *smapi, return NULL; /* only 1.0 is supported */ - if (attribs->major != 1 || attribs->minor > 0) + if (attribs->major > 1 || (attribs->major == 1 && attribs->minor > 0)) return NULL; pipe = smapi->screen->context_create(smapi->screen, NULL); diff --git a/src/gallium/state_trackers/xorg/Makefile b/src/gallium/state_trackers/xorg/Makefile index cb2c3aea410..7a44d28017b 100644 --- a/src/gallium/state_trackers/xorg/Makefile +++ b/src/gallium/state_trackers/xorg/Makefile @@ -10,7 +10,7 @@ LIBRARY_INCLUDES = \ $(shell pkg-config libkms --atleast-version=1.0 \ && echo "-DHAVE_LIBKMS") \ $(shell pkg-config libkms --silence-errors --cflags-only-I) \ - $(shell pkg-config --cflags-only-I pixman-1 xorg-server libdrm xproto) \ + $(shell pkg-config --cflags-only-I pixman-1 xorg-server libdrm xproto dri2proto) \ -I$(TOP)/src/gallium/include \ -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/include \ diff --git a/src/gallium/state_trackers/xorg/xorg_crtc.c b/src/gallium/state_trackers/xorg/xorg_crtc.c index 26a907f205e..80af82d97b2 100644 --- a/src/gallium/state_trackers/xorg/xorg_crtc.c +++ b/src/gallium/state_trackers/xorg/xorg_crtc.c @@ -234,6 +234,10 @@ crtc_load_cursor_argb_ga3d(xf86CrtcPtr crtc, CARD32 * image) 64, 64, (void*)image, 64 * 4, 0, 0); ms->ctx->transfer_unmap(ms->ctx, transfer); ms->ctx->transfer_destroy(ms->ctx, transfer); + + if (crtc->cursor_shown) + drmModeSetCursor(ms->fd, crtcp->drm_crtc->crtc_id, + crtcp->cursor_handle, 64, 64); } #if HAVE_LIBKMS @@ -271,6 +275,10 @@ crtc_load_cursor_argb_kms(xf86CrtcPtr crtc, CARD32 * image) memcpy(ptr, image, 64*64*4); kms_bo_unmap(crtcp->cursor_bo); + if (crtc->cursor_shown) + drmModeSetCursor(ms->fd, crtcp->drm_crtc->crtc_id, + crtcp->cursor_handle, 64, 64); + return; err_bo_destroy: @@ -353,7 +361,7 @@ crtc_destroy(xf86CrtcPtr crtc) drmModeFreeCrtc(crtcp->drm_crtc); - xfree(crtcp); + free(crtcp); crtc->driver_private = NULL; } @@ -401,7 +409,7 @@ xorg_crtc_init(ScrnInfoPtr pScrn) if (crtc == NULL) goto out; - crtcp = xcalloc(1, sizeof(struct crtc_private)); + crtcp = calloc(1, sizeof(struct crtc_private)); if (!crtcp) { xf86CrtcDestroy(crtc); goto out; diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c index 704aed6a82c..b723a8e9cb0 100644 --- a/src/gallium/state_trackers/xorg/xorg_dri2.c +++ b/src/gallium/state_trackers/xorg/xorg_dri2.c @@ -201,11 +201,11 @@ dri2_create_buffer(DrawablePtr pDraw, unsigned int attachment, unsigned int form DRI2Buffer2Ptr buffer; BufferPrivatePtr private; - buffer = xcalloc(1, sizeof *buffer); + buffer = calloc(1, sizeof *buffer); if (!buffer) return NULL; - private = xcalloc(1, sizeof *private); + private = calloc(1, sizeof *private); if (!private) { goto fail; } @@ -217,9 +217,9 @@ dri2_create_buffer(DrawablePtr pDraw, unsigned int attachment, unsigned int form if (dri2_do_create_buffer(pDraw, (DRI2BufferPtr)buffer, format)) return buffer; - xfree(private); + free(private); fail: - xfree(buffer); + free(buffer); return NULL; } @@ -229,8 +229,8 @@ dri2_destroy_buffer(DrawablePtr pDraw, DRI2Buffer2Ptr buffer) /* So far it is safe to downcast a DRI2Buffer2Ptr to DRI2BufferPtr */ dri2_do_destroy_buffer(pDraw, (DRI2BufferPtr)buffer); - xfree(buffer->driverPrivate); - xfree(buffer); + free(buffer->driverPrivate); + free(buffer); } #endif /* DRI2INFOREC_VERSION >= 2 */ @@ -244,11 +244,11 @@ dri2_create_buffers(DrawablePtr pDraw, unsigned int *attachments, int count) DRI2BufferPtr buffers; int i; - buffers = xcalloc(count, sizeof *buffers); + buffers = calloc(count, sizeof *buffers); if (!buffers) goto fail_buffers; - privates = xcalloc(count, sizeof *privates); + privates = calloc(count, sizeof *privates); if (!privates) goto fail_privates; @@ -263,9 +263,9 @@ dri2_create_buffers(DrawablePtr pDraw, unsigned int *attachments, int count) return buffers; fail: - xfree(privates); + free(privates); fail_privates: - xfree(buffers); + free(buffers); fail_buffers: return NULL; } @@ -280,8 +280,8 @@ dri2_destroy_buffers(DrawablePtr pDraw, DRI2BufferPtr buffers, int count) } if (buffers) { - xfree(buffers[0].driverPrivate); - xfree(buffers); + free(buffers[0].driverPrivate); + free(buffers); } } diff --git a/src/gallium/state_trackers/xorg/xorg_driver.c b/src/gallium/state_trackers/xorg/xorg_driver.c index e10ff2f9508..1ec772df172 100644 --- a/src/gallium/state_trackers/xorg/xorg_driver.c +++ b/src/gallium/state_trackers/xorg/xorg_driver.c @@ -45,6 +45,7 @@ #include "miscstruct.h" #include "dixstruct.h" #include "xf86xv.h" +#include "xorgVersion.h" #ifndef XSERVER_LIBPCIACCESS #error "libpciaccess needed" #endif @@ -122,7 +123,7 @@ xorg_tracker_set_functions(ScrnInfoPtr scrn) Bool xorg_tracker_have_modesetting(ScrnInfoPtr pScrn, struct pci_device *device) { - char *BusID = xalloc(64); + char *BusID = malloc(64); sprintf(BusID, "pci:%04x:%02x:%02x.%d", device->domain, device->bus, device->dev, device->func); @@ -130,14 +131,14 @@ xorg_tracker_have_modesetting(ScrnInfoPtr pScrn, struct pci_device *device) if (drmCheckModesettingSupported(BusID)) { xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, 0, "Drm modesetting not supported %s\n", BusID); - xfree(BusID); + free(BusID); return FALSE; } xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, 0, "Drm modesetting supported on %s\n", BusID); - xfree(BusID); + free(BusID); return TRUE; } @@ -174,7 +175,7 @@ drv_free_rec(ScrnInfoPtr pScrn) if (!pScrn->driverPrivate) return; - xfree(pScrn->driverPrivate); + free(pScrn->driverPrivate); pScrn->driverPrivate = NULL; } @@ -274,7 +275,7 @@ drv_init_drm(ScrnInfoPtr pScrn) if (ms->fd < 0) { char *BusID; - BusID = xalloc(64); + BusID = malloc(64); sprintf(BusID, "PCI:%d:%d:%d", ((ms->PciInfo->domain << 8) | ms->PciInfo->bus), ms->PciInfo->dev, ms->PciInfo->func @@ -283,7 +284,7 @@ drv_init_drm(ScrnInfoPtr pScrn) ms->fd = drmOpen(driver_descriptor.driver_name, BusID); ms->isMaster = TRUE; - xfree(BusID); + free(BusID); if (ms->fd >= 0) return TRUE; @@ -369,6 +370,7 @@ drv_pre_init(ScrnInfoPtr pScrn, int flags) ms = modesettingPTR(pScrn); ms->pEnt = pEnt; ms->cust = cust; + ms->fb_id = -1; pScrn->displayWidth = 640; /* default it */ @@ -402,19 +404,6 @@ drv_pre_init(ScrnInfoPtr pScrn, int flags) if (!drv_init_drm(pScrn)) return FALSE; - use3D = cust ? !cust->no_3d : TRUE; - ms->from_3D = xf86GetOptValBool(ms->Options, OPTION_3D_ACCEL, - &use3D) ? - X_CONFIG : X_PROBED; - - ms->no3D = !use3D; - - if (!drv_init_resource_management(pScrn)) { - xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Could not init " - "Gallium3D or libKMS."); - return FALSE; - } - pScrn->monitor = pScrn->confScreen->monitor; pScrn->progClock = TRUE; pScrn->rgbBits = 8; @@ -444,11 +433,24 @@ drv_pre_init(ScrnInfoPtr pScrn, int flags) /* Process the options */ xf86CollectOptions(pScrn, NULL); - if (!(ms->Options = xalloc(sizeof(drv_options)))) + if (!(ms->Options = malloc(sizeof(drv_options)))) return FALSE; memcpy(ms->Options, drv_options, sizeof(drv_options)); xf86ProcessOptions(pScrn->scrnIndex, pScrn->options, ms->Options); + use3D = cust ? !cust->no_3d : TRUE; + ms->from_3D = xf86GetOptValBool(ms->Options, OPTION_3D_ACCEL, + &use3D) ? + X_CONFIG : X_PROBED; + + ms->no3D = !use3D; + + if (!drv_init_resource_management(pScrn)) { + xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Could not init " + "Gallium3D or libKMS."); + return FALSE; + } + /* Allocate an xf86CrtcConfig */ xf86CrtcConfigInit(pScrn, &crtc_config_funcs); xf86_config = XF86_CRTC_CONFIG_PTR(pScrn); @@ -791,7 +793,9 @@ drv_screen_init(int scrnIndex, ScreenPtr pScreen, int argc, char **argv) if (!ms->SWCursor) xf86_cursors_init(pScreen, 64, 64, HARDWARE_CURSOR_SOURCE_MASK_INTERLEAVE_64 | - HARDWARE_CURSOR_ARGB); + HARDWARE_CURSOR_ARGB | + ((cust && cust->unhidden_hw_cursor_update) ? + HARDWARE_CURSOR_UPDATE_UNHIDDEN : 0)); /* Must force it before EnterVT, so we are in control of VT and * later memory should be bound when allocating, e.g rotate_mem */ @@ -862,8 +866,10 @@ drv_leave_vt(int scrnIndex, int flags) } } - drmModeRmFB(ms->fd, ms->fb_id); - ms->fb_id = -1; + if (ms->fb_id != -1) { + drmModeRmFB(ms->fd, ms->fb_id); + ms->fb_id = -1; + } /* idle hardware */ if (!ms->kms) @@ -944,7 +950,6 @@ drv_close_screen(int scrnIndex, ScreenPtr pScreen) } #endif - drmModeRmFB(ms->fd, ms->fb_id); ms->destroy_front_buffer(pScrn); if (ms->exa) @@ -1178,6 +1183,8 @@ drv_bind_front_buffer_kms(ScrnInfoPtr pScrn) stride, ptr); +#if (XORG_VERSION_CURRENT < XORG_VERSION_NUMERIC(1, 9, 99, 1, 0)) + /* This a hack to work around EnableDisableFBAccess setting the pointer * the real fix would be to replace pScrn->EnableDisableFBAccess hook * and set the rootPixmap->devPrivate.ptr to something valid before that. @@ -1187,6 +1194,8 @@ drv_bind_front_buffer_kms(ScrnInfoPtr pScrn) */ pScrn->pixmapPrivate.ptr = ptr; +#endif + return TRUE; err_destroy: diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c index 6b2c80fbca6..4b1c02bad42 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa.c +++ b/src/gallium/state_trackers/xorg/xorg_exa.c @@ -720,7 +720,7 @@ ExaCreatePixmap(ScreenPtr pScreen, int size, int align) { struct exa_pixmap_priv *priv; - priv = xcalloc(1, sizeof(struct exa_pixmap_priv)); + priv = calloc(1, sizeof(struct exa_pixmap_priv)); if (!priv) return NULL; @@ -737,7 +737,7 @@ ExaDestroyPixmap(ScreenPtr pScreen, void *dPriv) pipe_resource_reference(&priv->tex, NULL); - xfree(priv); + free(priv); } static Bool @@ -975,7 +975,7 @@ xorg_exa_close(ScrnInfoPtr pScrn) ms->ctx = NULL; exaDriverFini(pScrn->pScreen); - xfree(exa); + free(exa); ms->exa = NULL; } @@ -987,7 +987,7 @@ xorg_exa_init(ScrnInfoPtr pScrn, Bool accel) ExaDriverPtr pExa; CustomizerPtr cust = ms->cust; - exa = xcalloc(1, sizeof(struct exa_context)); + exa = calloc(1, sizeof(struct exa_context)); if (!exa) return NULL; @@ -1057,6 +1057,7 @@ xorg_exa_init(ScrnInfoPtr pScrn, Bool accel) out_err: xorg_exa_close(pScrn); + free(exa); return NULL; } diff --git a/src/gallium/state_trackers/xorg/xorg_output.c b/src/gallium/state_trackers/xorg/xorg_output.c index 61206ed751c..5555b51131c 100644 --- a/src/gallium/state_trackers/xorg/xorg_output.c +++ b/src/gallium/state_trackers/xorg/xorg_output.c @@ -128,7 +128,7 @@ output_get_modes(xf86OutputPtr output) for (i = 0; i < drm_connector->count_modes; i++) { drm_mode = &drm_connector->modes[i]; if (drm_mode) { - mode = xcalloc(1, sizeof(DisplayModeRec)); + mode = calloc(1, sizeof(DisplayModeRec)); if (!mode) continue; mode->Clock = drm_mode->clock; @@ -195,7 +195,7 @@ output_destroy(xf86OutputPtr output) { struct output_private *priv = output->driver_private; drmModeFreeConnector(priv->drm_connector); - xfree(priv); + free(priv); output->driver_private = NULL; } @@ -262,14 +262,14 @@ xorg_output_init(ScrnInfoPtr pScrn) drm_connector->connector_type_id); - priv = xcalloc(sizeof(*priv), 1); + priv = calloc(sizeof(*priv), 1); if (!priv) { continue; } output = xf86OutputCreate(pScrn, &output_funcs, name); if (!output) { - xfree(priv); + free(priv); continue; } diff --git a/src/gallium/state_trackers/xorg/xorg_tracker.h b/src/gallium/state_trackers/xorg/xorg_tracker.h index be1a9fda48d..a3fb5e5dad0 100644 --- a/src/gallium/state_trackers/xorg/xorg_tracker.h +++ b/src/gallium/state_trackers/xorg/xorg_tracker.h @@ -76,6 +76,7 @@ typedef struct _CustomizerRec Bool dirty_throttling; Bool swap_throttling; Bool no_3d; + Bool unhidden_hw_cursor_update; Bool (*winsys_pre_init) (struct _CustomizerRec *cust, int fd); Bool (*winsys_screen_init)(struct _CustomizerRec *cust); Bool (*winsys_screen_close)(struct _CustomizerRec *cust); diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c index f98bd939010..f64959f00e9 100644 --- a/src/gallium/state_trackers/xorg/xorg_xv.c +++ b/src/gallium/state_trackers/xorg/xorg_xv.c @@ -536,8 +536,10 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id, dst_surf = xorg_gpu_surface(pPriv->r->pipe->screen, dst); hdtv = ((src_w >= RES_720P_X) && (src_h >= RES_720P_Y)); +#ifdef COMPOSITE REGION_TRANSLATE(pScrn->pScreen, dstRegion, -pPixmap->screen_x, -pPixmap->screen_y); +#endif dxo = dstRegion->extents.x1; dyo = dstRegion->extents.y1; @@ -562,11 +564,16 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id, int box_y2 = pbox->y2; float diff_x = (float)src_w / (float)dst_w; float diff_y = (float)src_h / (float)dst_h; - float offset_x = box_x1 - dstX + pPixmap->screen_x; - float offset_y = box_y1 - dstY + pPixmap->screen_y; + float offset_x = box_x1 - dstX; + float offset_y = box_y1 - dstY; float offset_w; float offset_h; +#ifdef COMPOSITE + offset_x += pPixmap->screen_x; + offset_y += pPixmap->screen_y; +#endif + x = box_x1; y = box_y1; w = box_x2 - box_x1; diff --git a/src/gallium/targets/Makefile.xorg b/src/gallium/targets/Makefile.xorg index 762c905985e..87eedd7136c 100644 --- a/src/gallium/targets/Makefile.xorg +++ b/src/gallium/targets/Makefile.xorg @@ -29,7 +29,7 @@ INCLUDES = \ LIBNAME_STAGING = $(TOP)/$(LIB_DIR)/gallium/$(TARGET) ifeq ($(MESA_LLVM),1) -LD = g++ +LD = $(CXX) LDFLAGS += $(LLVM_LDFLAGS) USE_CXX=1 DRIVER_PIPES += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a @@ -42,7 +42,7 @@ endif default: depend $(TOP)/$(LIB_DIR)/gallium $(LIBNAME) $(LIBNAME_STAGING) $(LIBNAME): $(OBJECTS) Makefile ../Makefile.xorg $(LIBS) $(DRIVER_PIPES) - $(MKLIB) -noprefix -o $@ $(LDFLAGS) $(OBJECTS) $(DRIVER_PIPES) $(GALLIUM_AUXILIARIES) $(DRIVER_LINKS) + $(MKLIB) -linker $(CC) -noprefix -o $@ $(LDFLAGS) $(OBJECTS) $(DRIVER_PIPES) $(GALLIUM_AUXILIARIES) $(DRIVER_LINKS) depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) $(GENERATED_SOURCES) rm -f depend diff --git a/src/gallium/targets/dri-i915/target.c b/src/gallium/targets/dri-i915/target.c index 5ae6ca367d8..a27b7bd6d81 100644 --- a/src/gallium/targets/dri-i915/target.c +++ b/src/gallium/targets/dri-i915/target.c @@ -19,8 +19,7 @@ create_screen(int fd) if (!screen) return NULL; - if (debug_get_bool_option("I915_SOFTWARE", FALSE)) - screen = sw_screen_wrap(screen); + screen = sw_screen_wrap(screen); screen = debug_screen_wrap(screen); diff --git a/src/gallium/targets/dri-i965/target.c b/src/gallium/targets/dri-i965/target.c index ce97f820278..0632b97beaa 100644 --- a/src/gallium/targets/dri-i965/target.c +++ b/src/gallium/targets/dri-i965/target.c @@ -19,8 +19,7 @@ create_screen(int fd) if (!screen) return NULL; - if (debug_get_bool_option("BRW_SOFTPIPE", FALSE)) - screen = sw_screen_wrap(screen); + screen = sw_screen_wrap(screen); screen = debug_screen_wrap(screen); diff --git a/src/gallium/targets/egl/Makefile b/src/gallium/targets/egl/Makefile index 47c24cefe5c..57979c4e9d4 100644 --- a/src/gallium/targets/egl/Makefile +++ b/src/gallium/targets/egl/Makefile @@ -24,7 +24,9 @@ common_CPPFLAGS := \ -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/drivers \ -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/winsys + -I$(TOP)/src/gallium/winsys \ + $(LIBDRM_CFLAGS) + common_SYS := common_LIBS := \ $(TOP)/src/gallium/drivers/identity/libidentity.a \ @@ -37,15 +39,15 @@ egl_CPPFLAGS := \ -I$(TOP)/src/gallium/state_trackers/egl \ -I$(TOP)/src/egl/main \ -DPIPE_PREFIX=\"$(PIPE_PREFIX)\" -DST_PREFIX=\"$(ST_PREFIX)\" -egl_SYS := -lm $(DLOPEN_LIBS) -L$(TOP)/$(LIB_DIR) -lEGL +egl_SYS := -lm $(DLOPEN_LIBS) -lEGL egl_LIBS := $(TOP)/src/gallium/state_trackers/egl/libegl.a ifneq ($(findstring x11, $(EGL_PLATFORMS)),) -egl_SYS += -lX11 -lXext -lXfixes +egl_SYS += -lX11 -lXext -lXfixes $(LIBDRM_LIB) egl_LIBS += $(TOP)/src/gallium/winsys/sw/xlib/libws_xlib.a endif -ifneq ($(findstring kms, $(EGL_PLATFORMS)),) -egl_SYS += -ldrm +ifneq ($(findstring drm, $(EGL_PLATFORMS)),) +egl_SYS += $(LIBDRM_LIB) endif ifneq ($(findstring fbdev, $(EGL_PLATFORMS)),) egl_LIBS += $(TOP)/src/gallium/winsys/sw/fbdev/libfbdev.a @@ -132,17 +134,17 @@ GL_LIBS := $(TOP)/src/mesa/libmesagallium.a # OpenGL ES 1.x state tracker GLESv1_CM_CPPFLAGS := -I$(TOP)/src/mesa -GLESv1_CM_SYS := $(DRI_LIB_DEPS) -L$(TOP)/$(LIB_DIR) -l$(GLESv1_CM_LIB) +GLESv1_CM_SYS := $(DRI_LIB_DEPS) -l$(GLESv1_CM_LIB) GLESv1_CM_LIBS := $(TOP)/src/mesa/libes1gallium.a # OpenGL ES 2.x state tracker GLESv2_CPPFLAGS := -I$(TOP)/src/mesa -GLESv2_SYS := $(DRI_LIB_DEPS) -L$(TOP)/$(LIB_DIR) -l$(GLESv2_LIB) +GLESv2_SYS := $(DRI_LIB_DEPS) -l$(GLESv2_LIB) GLESv2_LIBS := $(TOP)/src/mesa/libes2gallium.a # OpenVG state tracker OpenVG_CPPFLAGS := -I$(TOP)/src/gallium/state_trackers/vega -OpenVG_SYS := -lm -L$(TOP)/$(LIB_DIR) -l$(VG_LIB) +OpenVG_SYS := -lm -l$(VG_LIB) OpenVG_LIBS := $(TOP)/src/gallium/state_trackers/vega/libvega.a @@ -179,14 +181,16 @@ OUTPUTS := $(addprefix $(OUTPUT_PATH)/, $(OUTPUTS)) default: $(OUTPUTS) define mklib -$(MKLIB) -o $(notdir $@) -noprefix -linker '$(CC)' -ldflags '$(LDFLAGS)' \ +$(MKLIB) -o $(notdir $@) -noprefix -linker '$(CC)' \ + -L$(TOP)/$(LIB_DIR) -ldflags '$(LDFLAGS)' \ -install $(OUTPUT_PATH) $(MKLIB_OPTIONS) $< \ -Wl,--start-group $(common_LIBS) $($(1)_LIBS) -Wl,--end-group \ $(common_SYS) $($(1)_SYS) endef define mklib-cxx -$(MKLIB) -o $(notdir $@) -noprefix -linker '$(CXX)' -ldflags '$(LDFLAGS)' \ +$(MKLIB) -o $(notdir $@) -noprefix -linker '$(CXX)' \ + -L$(TOP)/$(LIB_DIR) -ldflags '$(LDFLAGS)' \ -cplusplus -install $(OUTPUT_PATH) $(MKLIB_OPTIONS) $< \ -Wl,--start-group $(common_LIBS) $($(1)_LIBS) -Wl,--end-group \ $(common_SYS) $($(1)_SYS) diff --git a/src/gallium/targets/egl/pipe_i915.c b/src/gallium/targets/egl/pipe_i915.c index 758a921b481..cd74044d8c1 100644 --- a/src/gallium/targets/egl/pipe_i915.c +++ b/src/gallium/targets/egl/pipe_i915.c @@ -1,5 +1,4 @@ -#include "target-helpers/inline_wrapper_sw_helper.h" #include "target-helpers/inline_debug_helper.h" #include "state_tracker/drm_driver.h" #include "i915/drm/i915_drm_public.h" diff --git a/src/gallium/targets/egl/pipe_i965.c b/src/gallium/targets/egl/pipe_i965.c index 43bf646e825..f810ecffb0a 100644 --- a/src/gallium/targets/egl/pipe_i965.c +++ b/src/gallium/targets/egl/pipe_i965.c @@ -1,6 +1,6 @@ -#include "target-helpers/inline_wrapper_sw_helper.h" #include "target-helpers/inline_debug_helper.h" +#include "target-helpers/inline_wrapper_sw_helper.h" #include "state_tracker/drm_driver.h" #include "i965/drm/i965_drm_public.h" #include "i965/brw_public.h" @@ -19,8 +19,7 @@ create_screen(int fd) if (!screen) return NULL; - if (debug_get_bool_option("BRW_SOFTPIPE", FALSE)) - screen = sw_screen_wrap(screen); + screen = sw_screen_wrap(screen); screen = debug_screen_wrap(screen); diff --git a/src/gallium/targets/libgl-xlib/Makefile b/src/gallium/targets/libgl-xlib/Makefile index 79e516a2a7a..076a040a5ab 100644 --- a/src/gallium/targets/libgl-xlib/Makefile +++ b/src/gallium/targets/libgl-xlib/Makefile @@ -10,7 +10,7 @@ include $(TOP)/configs/current GL_MAJOR = 1 GL_MINOR = 5 -GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY) +GL_TINY = 0$(MESA_MAJOR)$(MESA_MINOR)0$(MESA_TINY) INCLUDE_DIRS = \ diff --git a/src/gallium/targets/xorg-i965/intel_target.c b/src/gallium/targets/xorg-i965/intel_target.c index ce97f820278..0632b97beaa 100644 --- a/src/gallium/targets/xorg-i965/intel_target.c +++ b/src/gallium/targets/xorg-i965/intel_target.c @@ -19,8 +19,7 @@ create_screen(int fd) if (!screen) return NULL; - if (debug_get_bool_option("BRW_SOFTPIPE", FALSE)) - screen = sw_screen_wrap(screen); + screen = sw_screen_wrap(screen); screen = debug_screen_wrap(screen); diff --git a/src/gallium/targets/xorg-vmwgfx/vmw_ctrl.c b/src/gallium/targets/xorg-vmwgfx/vmw_ctrl.c index 237b308ae35..9b422e661bf 100644 --- a/src/gallium/targets/xorg-vmwgfx/vmw_ctrl.c +++ b/src/gallium/targets/xorg-vmwgfx/vmw_ctrl.c @@ -32,6 +32,7 @@ * allows X clients to communicate with the driver. */ +#include <xorg-server.h> #include "dixstruct.h" #include "extnsionst.h" #include <X11/X.h> @@ -211,7 +212,7 @@ VMwareCtrlDoSetTopology(ScrnInfoPtr pScrn, struct vmw_customizer *vmw = vmw_customizer(xorg_customizer(pScrn)); int i; - rects = xcalloc(number, sizeof(*rects)); + rects = calloc(number, sizeof(*rects)); if (!rects) return FALSE; @@ -224,7 +225,7 @@ VMwareCtrlDoSetTopology(ScrnInfoPtr pScrn, vmw_ioctl_update_layout(vmw, number, rects); - xfree(rects); + free(rects); return TRUE; } diff --git a/src/gallium/targets/xorg-vmwgfx/vmw_ioctl.c b/src/gallium/targets/xorg-vmwgfx/vmw_ioctl.c index 7c799b58275..7625d2fb8f9 100644 --- a/src/gallium/targets/xorg-vmwgfx/vmw_ioctl.c +++ b/src/gallium/targets/xorg-vmwgfx/vmw_ioctl.c @@ -165,7 +165,7 @@ vmw_ioctl_buffer_create(struct vmw_customizer *vmw, uint32_t size, unsigned *han struct drm_vmw_dmabuf_rep *rep = &arg.rep; int ret; - buf = xcalloc(1, sizeof(*buf)); + buf = calloc(1, sizeof(*buf)); if (!buf) goto err; @@ -192,7 +192,7 @@ vmw_ioctl_buffer_create(struct vmw_customizer *vmw, uint32_t size, unsigned *han return buf; err_free: - xfree(buf); + free(buf); err: return NULL; } @@ -211,7 +211,7 @@ vmw_ioctl_buffer_destroy(struct vmw_customizer *vmw, struct vmw_dma_buffer *buf) arg.handle = buf->handle; drmCommandWrite(vmw->fd, DRM_VMW_UNREF_DMABUF, &arg, sizeof(arg)); - xfree(buf); + free(buf); } void * diff --git a/src/gallium/targets/xorg-vmwgfx/vmw_screen.c b/src/gallium/targets/xorg-vmwgfx/vmw_screen.c index 8173908f551..76622031650 100644 --- a/src/gallium/targets/xorg-vmwgfx/vmw_screen.c +++ b/src/gallium/targets/xorg-vmwgfx/vmw_screen.c @@ -245,6 +245,7 @@ vmw_screen_pre_init(ScrnInfoPtr pScrn, int flags) cust->winsys_enter_vt = vmw_screen_enter_vt; cust->winsys_leave_vt = vmw_screen_leave_vt; cust->no_3d = TRUE; + cust->unhidden_hw_cursor_update = TRUE; vmw->pScrn = pScrn; pScrn->driverPrivate = cust; diff --git a/src/gallium/targets/xorg-vmwgfx/vmw_video.c b/src/gallium/targets/xorg-vmwgfx/vmw_video.c index eced60d0ec1..94465e52043 100644 --- a/src/gallium/targets/xorg-vmwgfx/vmw_video.c +++ b/src/gallium/targets/xorg-vmwgfx/vmw_video.c @@ -300,7 +300,7 @@ vmw_video_init(struct vmw_customizer *vmw) numAdaptors = 1; overlayAdaptors = &newAdaptor; } else { - newAdaptors = xalloc((numAdaptors + 1) * + newAdaptors = malloc((numAdaptors + 1) * sizeof(XF86VideoAdaptorPtr*)); if (!newAdaptors) { xf86XVFreeVideoAdaptorRec(newAdaptor); @@ -320,7 +320,7 @@ vmw_video_init(struct vmw_customizer *vmw) } if (newAdaptors) { - xfree(newAdaptors); + free(newAdaptors); } debug_printf("Initialized VMware Xv extension successfully\n"); @@ -438,7 +438,7 @@ vmw_video_init_adaptor(ScrnInfoPtr pScrn, struct vmw_customizer *vmw) return NULL; } - video = xcalloc(1, sizeof(*video)); + video = calloc(1, sizeof(*video)); if (!video) { debug_printf("Not enough memory.\n"); xf86XVFreeVideoAdaptorRec(adaptor); @@ -742,7 +742,7 @@ vmw_video_buffer_alloc(struct vmw_customizer *vmw, int size, } out->size = size; - out->extra_data = xcalloc(1, size); + out->extra_data = calloc(1, size); debug_printf("\t\t%s: allocated buffer %p of size %i\n", __func__, out, size); @@ -773,7 +773,7 @@ vmw_video_buffer_free(struct vmw_customizer *vmw, if (out->size == 0) return Success; - xfree(out->extra_data); + free(out->extra_data); vmw_ioctl_buffer_unmap(vmw, out->buf); vmw_ioctl_buffer_destroy(vmw, out->buf); diff --git a/src/gallium/tests/python/retrace/interpreter.py b/src/gallium/tests/python/retrace/interpreter.py index 16132abb06d..954a701a53f 100755 --- a/src/gallium/tests/python/retrace/interpreter.py +++ b/src/gallium/tests/python/retrace/interpreter.py @@ -610,6 +610,15 @@ class Context(Object): _rgba[i] = rgba[i] self.real.clear(buffers, _rgba, depth, stencil) + def clear_render_target(self, dst, rgba, dstx, dsty, width, height): + _rgba = gallium.FloatArray(4) + for i in range(4): + _rgba[i] = rgba[i] + self.real.clear_render_target(dst, _rgba, dstx, dsty, width, height) + + def clear_depth_stencil(self, dst, clear_flags, depth, stencil, dstx, dsty, width, height): + self.real.clear_depth_stencil(dst, clear_flags, depth, stencil, dstx, dsty, width, height) + def _present(self): self.real.flush() diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-face.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-face.sh new file mode 100644 index 00000000000..5745b6a5aba --- /dev/null +++ b/src/gallium/tests/python/tests/regress/fragment-shader/frag-face.sh @@ -0,0 +1,14 @@ +FRAG + +DCL IN[0], COLOR, LINEAR +DCL IN[1], FACE, CONSTANT +DCL OUT[0], COLOR +DCL TEMP[0] +IMM FLT32 { 0.5, 1.0, 0.0, 0.0 } + +MUL TEMP[0], IN[1].xxxx, IMM[0].xxxx +ADD TEMP[0], TEMP[0], IMM[0].yyyy + +MOV OUT[0], TEMP[0] + +END diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index 1355b079450..7f21b53ace0 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -613,6 +613,13 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon) r = -ENOMEM; goto out_err; } + /* save 16dwords space for fence mecanism */ + ctx->pm4_ndwords -= 16; + + r = r600_context_init_fence(ctx); + if (r) { + goto out_err; + } /* init dirty list */ LIST_INITHEAD(&ctx->dirty); @@ -633,7 +640,7 @@ static inline void evergreen_context_pipe_state_set_resource(struct r600_context block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL); r600_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL); - LIST_DEL(&block->list); + LIST_DELINIT(&block->list); return; } block->reg[0] = state->regs[0].value; @@ -688,7 +695,7 @@ static inline void evergreen_context_pipe_state_set_sampler(struct r600_context block = range->blocks[CTX_BLOCK_ID(ctx, offset)]; if (state == NULL) { block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); - LIST_DEL(&block->list); + LIST_DELINIT(&block->list); return; } block->reg[0] = state->regs[0].value; @@ -712,7 +719,7 @@ static inline void evergreen_context_pipe_state_set_sampler_border(struct r600_c block = range->blocks[CTX_BLOCK_ID(ctx, fake_offset)]; if (state == NULL) { block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); - LIST_DEL(&block->list); + LIST_DELINIT(&block->list); return; } if (state->nregs <= 3) { @@ -755,7 +762,8 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr struct r600_bo *cb[12]; struct r600_bo *db; unsigned ndwords = 9, flush; - struct r600_block *dirty_block; + struct r600_block *dirty_block = NULL; + struct r600_block *next_block; if (draw->indices) { ndwords = 13; @@ -810,10 +818,9 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr } /* enough room to copy packet */ - LIST_FOR_EACH_ENTRY(dirty_block,&ctx->dirty,list) { + LIST_FOR_EACH_ENTRY_SAFE(dirty_block, next_block, &ctx->dirty,list) { r600_context_block_emit_dirty(ctx, dirty_block); } - LIST_INITHEAD(&ctx->dirty); /* draw packet */ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0); diff --git a/src/gallium/winsys/r600/drm/r600.c b/src/gallium/winsys/r600/drm/r600.c index 496547ca994..0a4d2e791db 100644 --- a/src/gallium/winsys/r600/drm/r600.c +++ b/src/gallium/winsys/r600/drm/r600.c @@ -40,6 +40,11 @@ enum chip_class r600_get_family_class(struct radeon *radeon) return radeon->chip_class; } +struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon) +{ + return &radeon->tiling_info; +} + static int r600_get_device(struct radeon *r600) { struct drm_radeon_info info; diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c index 9498f3a82ea..7d54ff18fc2 100644 --- a/src/gallium/winsys/r600/drm/r600_bo.c +++ b/src/gallium/winsys/r600/drm/r600_bo.c @@ -26,7 +26,9 @@ #include <pipe/p_compiler.h> #include <pipe/p_screen.h> #include <pipebuffer/pb_bufmgr.h> +#include "radeon_drm.h" #include "r600_priv.h" +#include "r600d.h" struct r600_bo *r600_bo(struct radeon *radeon, unsigned size, unsigned alignment, unsigned usage) @@ -55,7 +57,7 @@ struct r600_bo *r600_bo(struct radeon *radeon, } struct r600_bo *r600_bo_handle(struct radeon *radeon, - unsigned handle) + unsigned handle, unsigned *array_mode) { struct r600_bo *ws_bo = calloc(1, sizeof(struct r600_bo)); struct radeon_bo *bo; @@ -68,6 +70,20 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, bo = radeon_bo_pb_get_bo(ws_bo->pb); ws_bo->size = bo->size; pipe_reference_init(&ws_bo->reference, 1); + + radeon_bo_get_tiling_flags(radeon, bo, &ws_bo->tiling_flags, + &ws_bo->kernel_pitch); + if (array_mode) { + if (ws_bo->tiling_flags) { + if (ws_bo->tiling_flags & RADEON_TILING_MICRO) + *array_mode = V_0280A0_ARRAY_1D_TILED_THIN1; + if ((ws_bo->tiling_flags & (RADEON_TILING_MICRO | RADEON_TILING_MACRO)) == + (RADEON_TILING_MICRO | RADEON_TILING_MACRO)) + *array_mode = V_0280A0_ARRAY_2D_TILED_THIN1; + } else { + *array_mode = 0; + } + } return ws_bo; } diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 5f175a4df98..60c2f51fac0 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -37,6 +37,9 @@ #include "xf86drm.h" #include "radeon_drm.h" +#ifndef RADEON_INFO_TILING_CONFIG +#define RADEON_INFO_TILING_CONFIG 0x6 +#endif static int radeon_get_device(struct radeon *radeon) { struct drm_radeon_info info; @@ -50,6 +53,61 @@ static int radeon_get_device(struct radeon *radeon) return r; } +static int radeon_drm_get_tiling(struct radeon *radeon) +{ + struct drm_radeon_info info; + int r; + uint32_t tiling_config; + + info.request = RADEON_INFO_TILING_CONFIG; + info.value = (uintptr_t)&tiling_config; + r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, + sizeof(struct drm_radeon_info)); + + if (r) + return 0; + + switch ((tiling_config & 0xe) >> 1) { + case 0: + radeon->tiling_info.num_channels = 1; + break; + case 1: + radeon->tiling_info.num_channels = 2; + break; + case 2: + radeon->tiling_info.num_channels = 4; + break; + case 3: + radeon->tiling_info.num_channels = 8; + break; + default: + return -EINVAL; + } + + switch ((tiling_config & 0x30) >> 4) { + case 0: + radeon->tiling_info.num_banks = 4; + break; + case 1: + radeon->tiling_info.num_banks = 8; + break; + default: + return -EINVAL; + + } + switch ((tiling_config & 0xc0) >> 6) { + case 0: + radeon->tiling_info.group_bytes = 256; + break; + case 1: + radeon->tiling_info.group_bytes = 512; + break; + default: + return -EINVAL; + } + return 0; +} + struct radeon *radeon_new(int fd, unsigned device) { struct radeon *radeon; @@ -157,6 +215,10 @@ struct radeon *radeon_new(int fd, unsigned device) break; } + if (radeon->chip_class == R600 || radeon->chip_class == R700) { + if (radeon_drm_get_tiling(radeon)) + return NULL; + } radeon->kman = radeon_bo_pbmgr_create(radeon); if (!radeon->kman) return NULL; @@ -179,9 +241,15 @@ struct radeon *radeon_decref(struct radeon *radeon) return NULL; } - radeon->cman->destroy(radeon->cman); - radeon->kman->destroy(radeon->kman); - drmClose(radeon->fd); + if (radeon->cman) + radeon->cman->destroy(radeon->cman); + + if (radeon->kman) + radeon->kman->destroy(radeon->kman); + + if (radeon->fd >= 0) + drmClose(radeon->fd); + free(radeon); return NULL; } diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index b379499f060..2521ff96473 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -41,6 +41,45 @@ #define GROUP_FORCE_NEW_BLOCK 0 +int r600_context_init_fence(struct r600_context *ctx) +{ + ctx->fence = 1; + ctx->fence_bo = r600_bo(ctx->radeon, 4096, 0, 0); + if (ctx->fence_bo == NULL) { + return -ENOMEM; + } + ctx->cfence = r600_bo_map(ctx->radeon, ctx->fence_bo, PB_USAGE_UNSYNCHRONIZED, NULL); + *ctx->cfence = 0; + LIST_INITHEAD(&ctx->fenced_bo); + return 0; +} + +static void INLINE r600_context_update_fenced_list(struct r600_context *ctx) +{ + for (int i = 0; i < ctx->creloc; i++) { + if (!LIST_IS_EMPTY(&ctx->bo[i]->fencedlist)) + LIST_DELINIT(&ctx->bo[i]->fencedlist); + LIST_ADDTAIL(&ctx->bo[i]->fencedlist, &ctx->fenced_bo); + ctx->bo[i]->fence = ctx->fence; + ctx->bo[i]->ctx = ctx; + } +} + +static void INLINE r600_context_fence_wraparound(struct r600_context *ctx, unsigned fence) +{ + struct radeon_bo *bo = NULL; + struct radeon_bo *tmp; + + LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &ctx->fenced_bo, fencedlist) { + if (bo->fence <= *ctx->cfence) { + LIST_DELINIT(&bo->fencedlist); + bo->fence = 0; + } else { + bo->fence = fence; + } + } +} + int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg) { struct r600_block *block; @@ -87,6 +126,8 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, block->reg = &block->pm4[block->pm4_ndwords]; block->pm4_ndwords += n; block->nreg = n; + LIST_INITHEAD(&block->list); + for (j = 0; j < n; j++) { if (reg[i+j].need_bo) { block->nbo++; @@ -572,6 +613,9 @@ void r600_context_fini(struct r600_context *ctx) } free(ctx->reloc); free(ctx->pm4); + if (ctx->fence_bo) { + r600_bo_reference(ctx->radeon, &ctx->fence_bo, NULL); + } memset(ctx, 0, sizeof(struct r600_context)); } @@ -691,6 +735,13 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) r = -ENOMEM; goto out_err; } + /* save 16dwords space for fence mecanism */ + ctx->pm4_ndwords -= 16; + + r = r600_context_init_fence(ctx); + if (r) { + goto out_err; + } /* init dirty list */ LIST_INITHEAD(&ctx->dirty); @@ -781,7 +832,7 @@ static inline void r600_context_pipe_state_set_resource(struct r600_context *ctx block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL); r600_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL); - LIST_DEL(&block->list); + LIST_DELINIT(&block->list); return; } block->reg[0] = state->regs[0].value; @@ -835,7 +886,7 @@ static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx, block = range->blocks[CTX_BLOCK_ID(ctx, offset)]; if (state == NULL) { block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); - LIST_DEL(&block->list); + LIST_DELINIT(&block->list); return; } block->reg[0] = state->regs[0].value; @@ -858,7 +909,7 @@ static inline void r600_context_pipe_state_set_sampler_border(struct r600_contex block = range->blocks[CTX_BLOCK_ID(ctx, offset)]; if (state == NULL) { block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); - LIST_DEL(&block->list); + LIST_DELINIT(&block->list); return; } if (state->nregs <= 3) { @@ -917,7 +968,8 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) struct r600_bo *cb[8]; struct r600_bo *db; unsigned ndwords = 9; - struct r600_block *dirty_block; + struct r600_block *dirty_block = NULL; + struct r600_block *next_block; if (draw->indices) { ndwords = 13; @@ -970,10 +1022,9 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) } /* enough room to copy packet */ - LIST_FOR_EACH_ENTRY(dirty_block,&ctx->dirty,list) { + LIST_FOR_EACH_ENTRY_SAFE(dirty_block, next_block, &ctx->dirty,list) { r600_context_block_emit_dirty(ctx, dirty_block); } - LIST_INITHEAD(&ctx->dirty); /* draw packet */ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0); @@ -1019,6 +1070,7 @@ void r600_context_flush(struct r600_context *ctx) struct drm_radeon_cs drmib; struct drm_radeon_cs_chunk chunks[2]; uint64_t chunk_array[2]; + unsigned fence; int r; if (!ctx->pm4_cdwords) @@ -1028,6 +1080,18 @@ void r600_context_flush(struct r600_context *ctx) r600_context_queries_suspend(ctx); radeon_bo_pbmgr_flush_maps(ctx->radeon->kman); + + /* emit fence */ + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4); + ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT | (5 << 8); + ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24); + ctx->pm4[ctx->pm4_cdwords++] = ctx->fence; + ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); + ctx->pm4[ctx->pm4_cdwords++] = 0; + r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], ctx->fence_bo); + #if 1 /* emit cs */ drmib.num_chunks = 2; @@ -1043,8 +1107,18 @@ void r600_context_flush(struct r600_context *ctx) r = drmCommandWriteRead(ctx->radeon->fd, DRM_RADEON_CS, &drmib, sizeof(struct drm_radeon_cs)); #endif + + r600_context_update_fenced_list(ctx); + + fence = ctx->fence + 1; + if (fence < ctx->fence) { + /* wrap around */ + fence = 1; + r600_context_fence_wraparound(ctx, fence); + } + ctx->fence = fence; + /* restart */ - radeon_bo_fencelist(ctx->radeon, ctx->bo, ctx->creloc); for (int i = 0; i < ctx->creloc; i++) { ctx->bo[i]->reloc = NULL; ctx->bo[i]->last_flush = 0; @@ -1062,8 +1136,9 @@ void r600_context_flush(struct r600_context *ctx) */ for (int i = 0; i < ctx->nblocks; i++) { if (ctx->blocks[i]->status & R600_BLOCK_STATUS_ENABLED) { - if(!(ctx->blocks[i]->status & R600_BLOCK_STATUS_DIRTY)) + if(!(ctx->blocks[i]->status & R600_BLOCK_STATUS_DIRTY)) { LIST_ADDTAIL(&ctx->blocks[i]->list,&ctx->dirty); + } ctx->pm4_dirty_cdwords += ctx->blocks[i]->pm4_ndwords + ctx->blocks[i]->pm4_flush_ndwords; ctx->blocks[i]->status |= R600_BLOCK_STATUS_DIRTY; } @@ -1243,7 +1318,7 @@ struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query) { r600_bo_reference(ctx->radeon, &query->buffer, NULL); - LIST_DEL(&query->list); + LIST_DELINIT(&query->list); free(query); } diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index ea2cf347785..b5bd7bd92c1 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -42,6 +42,7 @@ struct radeon { enum chip_class chip_class; struct pb_manager *kman; /* kernel bo manager */ struct pb_manager *cman; /* cached bo manager */ + struct r600_tiling_info tiling_info; }; struct radeon *r600_new(int fd, unsigned device); @@ -64,9 +65,9 @@ struct radeon_bo { unsigned map_count; void *data; struct list_head fencedlist; + unsigned fence; + struct r600_context *ctx; boolean shared; - int64_t last_busy; - boolean set_busy; struct r600_reloc *reloc; unsigned reloc_id; unsigned last_flush; @@ -76,6 +77,8 @@ struct r600_bo { struct pipe_reference reference; struct pb_buffer *pb; unsigned size; + unsigned tiling_flags; + unsigned kernel_pitch; }; @@ -94,7 +97,10 @@ int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo); int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain); void radeon_bo_pbmgr_flush_maps(struct pb_manager *_mgr); int radeon_bo_fencelist(struct radeon *radeon, struct radeon_bo **bolist, uint32_t num_bo); - +int radeon_bo_get_tiling_flags(struct radeon *radeon, + struct radeon_bo *bo, + uint32_t *tiling_flags, + uint32_t *pitch); /* radeon_bo_pb.c */ struct radeon_bo *radeon_bo_pb_get_bo(struct pb_buffer *_buf); @@ -103,6 +109,7 @@ struct pb_buffer *radeon_bo_pb_create_buffer_from_handle(struct pb_manager *_mgr uint32_t handle); /* r600_hw_context.c */ +int r600_context_init_fence(struct r600_context *ctx); void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct r600_bo *rbo); void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, unsigned flush_mask, struct r600_bo *rbo); @@ -161,6 +168,7 @@ static inline void r600_context_block_emit_dirty(struct r600_context *ctx, struc memcpy(&ctx->pm4[ctx->pm4_cdwords], block->pm4, block->pm4_ndwords * 4); ctx->pm4_cdwords += block->pm4_ndwords; block->status ^= R600_BLOCK_STATUS_DIRTY; + LIST_DELINIT(&block->list); } static inline int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo) diff --git a/src/gallium/winsys/r600/drm/r600d.h b/src/gallium/winsys/r600/drm/r600d.h index ccc9ffaf8e3..d91f7737af3 100644 --- a/src/gallium/winsys/r600/drm/r600d.h +++ b/src/gallium/winsys/r600/drm/r600d.h @@ -91,6 +91,7 @@ #define PKT3_SET_CTL_CONST 0x6F #define PKT3_SURFACE_BASE_UPDATE 0x73 +#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14 #define EVENT_TYPE_ZPASS_DONE 0x15 #define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c index 42a23f0ab8f..9d664b7e537 100644 --- a/src/gallium/winsys/r600/drm/radeon_bo.c +++ b/src/gallium/winsys/r600/drm/radeon_bo.c @@ -32,7 +32,6 @@ #include "r600_priv.h" #include "xf86drm.h" #include "radeon_drm.h" -#include "util/u_time.h" static int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo) { @@ -128,7 +127,6 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, return bo; } - static void radeon_bo_destroy(struct radeon *radeon, struct radeon_bo *bo) { struct drm_gem_close args; @@ -158,8 +156,14 @@ int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo) struct drm_radeon_gem_wait_idle args; int ret; - if (LIST_IS_EMPTY(&bo->fencedlist) && !bo->shared) + if (!bo->fence && !bo->shared) + return 0; + + if (bo->fence <= *bo->ctx->cfence) { + LIST_DELINIT(&bo->fencedlist); + bo->fence = 0; return 0; + } /* Zero out args to make valgrind happy */ memset(&args, 0, sizeof(args)); @@ -171,22 +175,20 @@ int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo) return ret; } -#define BO_BUSY_BACKOFF 10000 - int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain) { struct drm_radeon_gem_busy args; int ret; - int64_t now; - - now = os_time_get(); - if (LIST_IS_EMPTY(&bo->fencedlist) && !bo->shared) - return 0; - - if (bo->set_busy && (now - bo->last_busy < BO_BUSY_BACKOFF)) - return -EBUSY; - bo->set_busy = FALSE; + if (!bo->shared) { + if (!bo->fence) + return 0; + if (bo->fence <= *bo->ctx->cfence) { + LIST_DELINIT(&bo->fencedlist); + bo->fence = 0; + return 0; + } + } memset(&args, 0, sizeof(args)); args.handle = bo->handle; @@ -195,37 +197,25 @@ int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_BUSY, &args, sizeof(args)); - if (ret == 0) { - struct radeon_bo *entry, *tent; - LIST_FOR_EACH_ENTRY_SAFE(entry, tent, &bo->fencedlist, fencedlist) { - LIST_DELINIT(&entry->fencedlist); - } - } else { - bo->set_busy = TRUE; - bo->last_busy = now; - } *domain = args.domain; return ret; } -int radeon_bo_fencelist(struct radeon *radeon, struct radeon_bo **bolist, - uint32_t num_bo) +int radeon_bo_get_tiling_flags(struct radeon *radeon, + struct radeon_bo *bo, + uint32_t *tiling_flags, + uint32_t *pitch) { - struct radeon_bo *first; - int i; - - first = bolist[0]; - - if (!LIST_IS_EMPTY(&first->fencedlist)) - LIST_DELINIT(&first->fencedlist); - - LIST_INITHEAD(&first->fencedlist); - - for (i = 1; i < num_bo; i++) { - if (!LIST_IS_EMPTY(&bolist[i]->fencedlist)) - LIST_DELINIT(&bolist[i]->fencedlist); - - LIST_ADDTAIL(&bolist[i]->fencedlist, &first->fencedlist); - } - return 0; + struct drm_radeon_gem_get_tiling args; + int ret; + + args.handle = bo->handle; + ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_GET_TILING, + &args, sizeof(args)); + if (ret) + return ret; + + *tiling_flags = args.tiling_flags; + *pitch = args.pitch; + return ret; } diff --git a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c index 3a76098b655..bc2623e7b77 100644 --- a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c +++ b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c @@ -272,7 +272,7 @@ wsw_destroy(struct sw_winsys *ws) } struct sw_winsys * -wrapper_sw_winsys_warp_pipe_screen(struct pipe_screen *screen) +wrapper_sw_winsys_wrap_pipe_screen(struct pipe_screen *screen) { struct wrapper_sw_winsys *wsw = CALLOC_STRUCT(wrapper_sw_winsys); @@ -304,3 +304,16 @@ err_free: err: return NULL; } + +struct pipe_screen * +wrapper_sw_winsys_dewrap_pipe_screen(struct sw_winsys *ws) +{ + struct wrapper_sw_winsys *wsw = wrapper_sw_winsys(ws); + struct pipe_screen *screen = wsw->screen; + + wsw->pipe->destroy(wsw->pipe); + /* don't destroy the screen its needed later on */ + + FREE(wsw); + return screen; +} diff --git a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.h b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.h index b5c25a3c50f..ae0196c432c 100644 --- a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.h +++ b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.h @@ -30,6 +30,15 @@ struct sw_winsys; struct pipe_screen; -struct sw_winsys *wrapper_sw_winsys_warp_pipe_screen(struct pipe_screen *screen); +/* + * Wrap a pipe screen. + */ +struct sw_winsys *wrapper_sw_winsys_wrap_pipe_screen(struct pipe_screen *screen); + +/* + * Destroy the sw_winsys and return the wrapped pipe_screen. + * Not destroying it as sw_winsys::destroy does. + */ +struct pipe_screen *wrapper_sw_winsys_dewrap_pipe_screen(struct sw_winsys *sw_winsys); #endif diff --git a/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c b/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c index b78f537c125..3aef8daa423 100644 --- a/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c +++ b/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c @@ -54,7 +54,7 @@ DEBUG_GET_ONCE_BOOL_OPTION(xlib_no_shm, "XLIB_NO_SHM", FALSE) * Display target for Xlib winsys. * Low-level OS/window system memory buffer */ -struct xm_displaytarget +struct xlib_displaytarget { enum pipe_format format; unsigned width; @@ -75,7 +75,7 @@ struct xm_displaytarget Drawable drawable; XShmSegmentInfo shminfo; - int shm; + Bool shm; /** Using shared memory images? */ }; @@ -85,19 +85,16 @@ struct xm_displaytarget struct xlib_sw_winsys { struct sw_winsys base; - - - Display *display; }; /** Cast wrapper */ -static INLINE struct xm_displaytarget * -xm_displaytarget( struct sw_displaytarget *dt ) +static INLINE struct xlib_displaytarget * +xlib_displaytarget(struct sw_displaytarget *dt) { - return (struct xm_displaytarget *)dt; + return (struct xlib_displaytarget *) dt; } @@ -105,22 +102,23 @@ xm_displaytarget( struct sw_displaytarget *dt ) * X Shared Memory Image extension code */ -static volatile int mesaXErrorFlag = 0; +static volatile int XErrorFlag = 0; /** * Catches potential Xlib errors. */ static int -mesaHandleXError(Display *dpy, XErrorEvent *event) +handle_xerror(Display *dpy, XErrorEvent *event) { (void) dpy; (void) event; - mesaXErrorFlag = 1; + XErrorFlag = 1; return 0; } -static char *alloc_shm(struct xm_displaytarget *buf, unsigned size) +static char * +alloc_shm(struct xlib_displaytarget *buf, unsigned size) { XShmSegmentInfo *const shminfo = & buf->shminfo; @@ -144,10 +142,10 @@ static char *alloc_shm(struct xm_displaytarget *buf, unsigned size) /** - * Allocate a shared memory XImage back buffer for the given XMesaBuffer. + * Allocate a shared memory XImage back buffer for the given display target. */ static void -alloc_shm_ximage(struct xm_displaytarget *xm_dt, +alloc_shm_ximage(struct xlib_displaytarget *xlib_dt, struct xlib_drawable *xmb, unsigned width, unsigned height) { @@ -159,51 +157,54 @@ alloc_shm_ximage(struct xm_displaytarget *xm_dt, */ int (*old_handler)(Display *, XErrorEvent *); - xm_dt->tempImage = XShmCreateImage(xm_dt->display, + xlib_dt->tempImage = XShmCreateImage(xlib_dt->display, xmb->visual, xmb->depth, ZPixmap, NULL, - &xm_dt->shminfo, + &xlib_dt->shminfo, width, height); - if (xm_dt->tempImage == NULL) { - xm_dt->shm = 0; + if (xlib_dt->tempImage == NULL) { + xlib_dt->shm = False; return; } - mesaXErrorFlag = 0; - old_handler = XSetErrorHandler(mesaHandleXError); + XErrorFlag = 0; + old_handler = XSetErrorHandler(handle_xerror); /* This may trigger the X protocol error we're ready to catch: */ - XShmAttach(xm_dt->display, &xm_dt->shminfo); - XSync(xm_dt->display, False); + XShmAttach(xlib_dt->display, &xlib_dt->shminfo); + XSync(xlib_dt->display, False); - if (mesaXErrorFlag) { + if (XErrorFlag) { /* we are on a remote display, this error is normal, don't print it */ - XFlush(xm_dt->display); - mesaXErrorFlag = 0; - XDestroyImage(xm_dt->tempImage); - xm_dt->tempImage = NULL; - xm_dt->shm = 0; + XFlush(xlib_dt->display); + XErrorFlag = 0; + XDestroyImage(xlib_dt->tempImage); + xlib_dt->tempImage = NULL; + xlib_dt->shm = False; (void) XSetErrorHandler(old_handler); return; } - xm_dt->shm = 1; + xlib_dt->shm = True; } static void -alloc_ximage(struct xm_displaytarget *xm_dt, +alloc_ximage(struct xlib_displaytarget *xlib_dt, struct xlib_drawable *xmb, unsigned width, unsigned height) { - if (xm_dt->shm) { - alloc_shm_ximage(xm_dt, xmb, width, height); - return; + /* try allocating a shared memory image first */ + if (xlib_dt->shm) { + alloc_shm_ximage(xlib_dt, xmb, width, height); + if (xlib_dt->tempImage) + return; /* success */ } - xm_dt->tempImage = XCreateImage(xm_dt->display, + /* try regular (non-shared memory) image */ + xlib_dt->tempImage = XCreateImage(xlib_dt->display, xmb->visual, xmb->depth, ZPixmap, 0, @@ -212,9 +213,9 @@ alloc_ximage(struct xm_displaytarget *xm_dt, } static boolean -xm_is_displaytarget_format_supported( struct sw_winsys *ws, - unsigned tex_usage, - enum pipe_format format ) +xlib_is_displaytarget_format_supported(struct sw_winsys *ws, + unsigned tex_usage, + enum pipe_format format) { /* TODO: check visuals or other sensible thing here */ return TRUE; @@ -222,61 +223,67 @@ xm_is_displaytarget_format_supported( struct sw_winsys *ws, static void * -xm_displaytarget_map(struct sw_winsys *ws, - struct sw_displaytarget *dt, - unsigned flags) +xlib_displaytarget_map(struct sw_winsys *ws, + struct sw_displaytarget *dt, + unsigned flags) { - struct xm_displaytarget *xm_dt = xm_displaytarget(dt); - xm_dt->mapped = xm_dt->data; - return xm_dt->mapped; + struct xlib_displaytarget *xlib_dt = xlib_displaytarget(dt); + xlib_dt->mapped = xlib_dt->data; + return xlib_dt->mapped; } + static void -xm_displaytarget_unmap(struct sw_winsys *ws, - struct sw_displaytarget *dt) +xlib_displaytarget_unmap(struct sw_winsys *ws, + struct sw_displaytarget *dt) { - struct xm_displaytarget *xm_dt = xm_displaytarget(dt); - xm_dt->mapped = NULL; + struct xlib_displaytarget *xlib_dt = xlib_displaytarget(dt); + xlib_dt->mapped = NULL; } + static void -xm_displaytarget_destroy(struct sw_winsys *ws, - struct sw_displaytarget *dt) +xlib_displaytarget_destroy(struct sw_winsys *ws, + struct sw_displaytarget *dt) { - struct xm_displaytarget *xm_dt = xm_displaytarget(dt); + struct xlib_displaytarget *xlib_dt = xlib_displaytarget(dt); - if (xm_dt->data) { - if (xm_dt->shminfo.shmid >= 0) { - shmdt(xm_dt->shminfo.shmaddr); - shmctl(xm_dt->shminfo.shmid, IPC_RMID, 0); + if (xlib_dt->data) { + if (xlib_dt->shminfo.shmid >= 0) { + shmdt(xlib_dt->shminfo.shmaddr); + shmctl(xlib_dt->shminfo.shmid, IPC_RMID, 0); - xm_dt->shminfo.shmid = -1; - xm_dt->shminfo.shmaddr = (char *) -1; + xlib_dt->shminfo.shmid = -1; + xlib_dt->shminfo.shmaddr = (char *) -1; + + xlib_dt->data = NULL; + if (xlib_dt->tempImage) + xlib_dt->tempImage->data = NULL; } else { - FREE(xm_dt->data); - if (xm_dt->tempImage && xm_dt->tempImage->data == xm_dt->data) { - xm_dt->tempImage->data = NULL; + FREE(xlib_dt->data); + if (xlib_dt->tempImage && xlib_dt->tempImage->data == xlib_dt->data) { + xlib_dt->tempImage->data = NULL; } - xm_dt->data = NULL; + xlib_dt->data = NULL; } } - if (xm_dt->tempImage) { - XDestroyImage(xm_dt->tempImage); - xm_dt->tempImage = NULL; + if (xlib_dt->tempImage) { + XDestroyImage(xlib_dt->tempImage); + xlib_dt->tempImage = NULL; } - if (xm_dt->gc) - XFreeGC(xm_dt->display, xm_dt->gc); + if (xlib_dt->gc) + XFreeGC(xlib_dt->display, xlib_dt->gc); - FREE(xm_dt); + FREE(xlib_dt); } /** * Display/copy the image in the surface into the X window specified - * by the XMesaBuffer. + * by the display target. */ static void xlib_sw_display(struct xlib_drawable *xlib_drawable, @@ -284,8 +291,8 @@ xlib_sw_display(struct xlib_drawable *xlib_drawable, { static boolean no_swap = 0; static boolean firsttime = 1; - struct xm_displaytarget *xm_dt = xm_displaytarget(dt); - Display *display = xm_dt->display; + struct xlib_displaytarget *xlib_dt = xlib_displaytarget(dt); + Display *display = xlib_dt->display; XImage *ximage; if (firsttime) { @@ -296,74 +303,74 @@ xlib_sw_display(struct xlib_drawable *xlib_drawable, if (no_swap) return; - if (xm_dt->drawable != xlib_drawable->drawable) { - if (xm_dt->gc) { - XFreeGC( display, xm_dt->gc ); - xm_dt->gc = NULL; + if (xlib_dt->drawable != xlib_drawable->drawable) { + if (xlib_dt->gc) { + XFreeGC(display, xlib_dt->gc); + xlib_dt->gc = NULL; } - if (xm_dt->tempImage) { - XDestroyImage( xm_dt->tempImage ); - xm_dt->tempImage = NULL; + if (xlib_dt->tempImage) { + XDestroyImage(xlib_dt->tempImage); + xlib_dt->tempImage = NULL; } - xm_dt->drawable = xlib_drawable->drawable; + xlib_dt->drawable = xlib_drawable->drawable; } - if (xm_dt->tempImage == NULL) { - assert(util_format_get_blockwidth(xm_dt->format) == 1); - assert(util_format_get_blockheight(xm_dt->format) == 1); - alloc_ximage(xm_dt, xlib_drawable, - xm_dt->stride / util_format_get_blocksize(xm_dt->format), - xm_dt->height); - if (!xm_dt->tempImage) + if (xlib_dt->tempImage == NULL) { + assert(util_format_get_blockwidth(xlib_dt->format) == 1); + assert(util_format_get_blockheight(xlib_dt->format) == 1); + alloc_ximage(xlib_dt, xlib_drawable, + xlib_dt->stride / util_format_get_blocksize(xlib_dt->format), + xlib_dt->height); + if (!xlib_dt->tempImage) return; } - if (xm_dt->gc == NULL) { - xm_dt->gc = XCreateGC( display, xlib_drawable->drawable, 0, NULL ); - XSetFunction( display, xm_dt->gc, GXcopy ); + if (xlib_dt->gc == NULL) { + xlib_dt->gc = XCreateGC(display, xlib_drawable->drawable, 0, NULL); + XSetFunction(display, xlib_dt->gc, GXcopy); } - if (xm_dt->shm) - { - ximage = xm_dt->tempImage; - ximage->data = xm_dt->data; + if (xlib_dt->shm) { + ximage = xlib_dt->tempImage; + ximage->data = xlib_dt->data; /* _debug_printf("XSHM\n"); */ - XShmPutImage(xm_dt->display, xlib_drawable->drawable, xm_dt->gc, - ximage, 0, 0, 0, 0, xm_dt->width, xm_dt->height, False); + XShmPutImage(xlib_dt->display, xlib_drawable->drawable, xlib_dt->gc, + ximage, 0, 0, 0, 0, xlib_dt->width, xlib_dt->height, False); } else { /* display image in Window */ - ximage = xm_dt->tempImage; - ximage->data = xm_dt->data; + ximage = xlib_dt->tempImage; + ximage->data = xlib_dt->data; /* check that the XImage has been previously initialized */ assert(ximage->format); assert(ximage->bitmap_unit); /* update XImage's fields */ - ximage->width = xm_dt->width; - ximage->height = xm_dt->height; - ximage->bytes_per_line = xm_dt->stride; + ximage->width = xlib_dt->width; + ximage->height = xlib_dt->height; + ximage->bytes_per_line = xlib_dt->stride; /* _debug_printf("XPUT\n"); */ - XPutImage(xm_dt->display, xlib_drawable->drawable, xm_dt->gc, - ximage, 0, 0, 0, 0, xm_dt->width, xm_dt->height); + XPutImage(xlib_dt->display, xlib_drawable->drawable, xlib_dt->gc, + ximage, 0, 0, 0, 0, xlib_dt->width, xlib_dt->height); } - XFlush(xm_dt->display); + XFlush(xlib_dt->display); } + /** * Display/copy the image in the surface into the X window specified - * by the XMesaBuffer. + * by the display target. */ static void -xm_displaytarget_display(struct sw_winsys *ws, - struct sw_displaytarget *dt, - void *context_private) +xlib_displaytarget_display(struct sw_winsys *ws, + struct sw_displaytarget *dt, + void *context_private) { struct xlib_drawable *xlib_drawable = (struct xlib_drawable *)context_private; xlib_sw_display(xlib_drawable, dt); @@ -371,57 +378,57 @@ xm_displaytarget_display(struct sw_winsys *ws, static struct sw_displaytarget * -xm_displaytarget_create(struct sw_winsys *winsys, - unsigned tex_usage, - enum pipe_format format, - unsigned width, unsigned height, - unsigned alignment, - unsigned *stride) +xlib_displaytarget_create(struct sw_winsys *winsys, + unsigned tex_usage, + enum pipe_format format, + unsigned width, unsigned height, + unsigned alignment, + unsigned *stride) { - struct xm_displaytarget *xm_dt; + struct xlib_displaytarget *xlib_dt; unsigned nblocksy, size; - xm_dt = CALLOC_STRUCT(xm_displaytarget); - if(!xm_dt) - goto no_xm_dt; + xlib_dt = CALLOC_STRUCT(xlib_displaytarget); + if (!xlib_dt) + goto no_xlib_dt; - xm_dt->display = ((struct xlib_sw_winsys *)winsys)->display; - xm_dt->format = format; - xm_dt->width = width; - xm_dt->height = height; + xlib_dt->display = ((struct xlib_sw_winsys *)winsys)->display; + xlib_dt->format = format; + xlib_dt->width = width; + xlib_dt->height = height; nblocksy = util_format_get_nblocksy(format, height); - xm_dt->stride = align(util_format_get_stride(format, width), alignment); - size = xm_dt->stride * nblocksy; + xlib_dt->stride = align(util_format_get_stride(format, width), alignment); + size = xlib_dt->stride * nblocksy; if (!debug_get_option_xlib_no_shm()) { - xm_dt->data = alloc_shm(xm_dt, size); - if (xm_dt->data) { - xm_dt->shm = TRUE; + xlib_dt->data = alloc_shm(xlib_dt, size); + if (xlib_dt->data) { + xlib_dt->shm = True; } } - if(!xm_dt->data) { - xm_dt->data = align_malloc(size, alignment); - if(!xm_dt->data) + if (!xlib_dt->data) { + xlib_dt->data = align_malloc(size, alignment); + if (!xlib_dt->data) goto no_data; } - *stride = xm_dt->stride; - return (struct sw_displaytarget *)xm_dt; + *stride = xlib_dt->stride; + return (struct sw_displaytarget *)xlib_dt; no_data: - FREE(xm_dt); -no_xm_dt: + FREE(xlib_dt); +no_xlib_dt: return NULL; } static struct sw_displaytarget * -xm_displaytarget_from_handle(struct sw_winsys *winsys, - const struct pipe_resource *templet, - struct winsys_handle *whandle, - unsigned *stride) +xlib_displaytarget_from_handle(struct sw_winsys *winsys, + const struct pipe_resource *templet, + struct winsys_handle *whandle, + unsigned *stride) { assert(0); return NULL; @@ -429,9 +436,9 @@ xm_displaytarget_from_handle(struct sw_winsys *winsys, static boolean -xm_displaytarget_get_handle(struct sw_winsys *winsys, - struct sw_displaytarget *dt, - struct winsys_handle *whandle) +xlib_displaytarget_get_handle(struct sw_winsys *winsys, + struct sw_displaytarget *dt, + struct winsys_handle *whandle) { assert(0); return FALSE; @@ -439,14 +446,14 @@ xm_displaytarget_get_handle(struct sw_winsys *winsys, static void -xm_destroy( struct sw_winsys *ws ) +xlib_destroy(struct sw_winsys *ws) { FREE(ws); } struct sw_winsys * -xlib_create_sw_winsys( Display *display ) +xlib_create_sw_winsys(Display *display) { struct xlib_sw_winsys *ws; @@ -455,19 +462,18 @@ xlib_create_sw_winsys( Display *display ) return NULL; ws->display = display; - ws->base.destroy = xm_destroy; + ws->base.destroy = xlib_destroy; - ws->base.is_displaytarget_format_supported = xm_is_displaytarget_format_supported; + ws->base.is_displaytarget_format_supported = xlib_is_displaytarget_format_supported; - ws->base.displaytarget_create = xm_displaytarget_create; - ws->base.displaytarget_from_handle = xm_displaytarget_from_handle; - ws->base.displaytarget_get_handle = xm_displaytarget_get_handle; - ws->base.displaytarget_map = xm_displaytarget_map; - ws->base.displaytarget_unmap = xm_displaytarget_unmap; - ws->base.displaytarget_destroy = xm_displaytarget_destroy; + ws->base.displaytarget_create = xlib_displaytarget_create; + ws->base.displaytarget_from_handle = xlib_displaytarget_from_handle; + ws->base.displaytarget_get_handle = xlib_displaytarget_get_handle; + ws->base.displaytarget_map = xlib_displaytarget_map; + ws->base.displaytarget_unmap = xlib_displaytarget_unmap; + ws->base.displaytarget_destroy = xlib_displaytarget_destroy; - ws->base.displaytarget_display = xm_displaytarget_display; + ws->base.displaytarget_display = xlib_displaytarget_display; return &ws->base; } - |