diff options
Diffstat (limited to 'src/gallium/drivers')
120 files changed, 6737 insertions, 5521 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index bd48ce70050..d185c6b8497 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -41,7 +41,7 @@ static const char * cell_get_vendor(struct pipe_screen *screen) { - return "Tungsten Graphics, Inc."; + return "VMware, Inc."; } @@ -64,8 +64,6 @@ cell_get_param(struct pipe_screen *screen, int param) return 1; case PIPE_CAP_GLSL: return 1; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index b43f7352456..e745f3342d1 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -175,12 +175,19 @@ i915_is_buffer_referenced(struct pipe_context *pipe, static void i915_destroy(struct pipe_context *pipe) { struct i915_context *i915 = i915_context(pipe); + int i; draw_destroy(i915->draw); if(i915->batch) i915->iws->batchbuffer_destroy(i915->batch); + /* unbind framebuffer */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + pipe_surface_reference(&i915->framebuffer.cbufs[i], NULL); + } + pipe_surface_reference(&i915->framebuffer.zsbuf, NULL); + FREE(i915); } diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 9f017a14cca..c66558c320e 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -46,7 +46,7 @@ static const char * i915_get_vendor(struct pipe_screen *screen) { - return "Tungsten Graphics, Inc."; + return "VMware, Inc."; } static const char * @@ -101,8 +101,6 @@ i915_get_param(struct pipe_screen *screen, int param) return 1; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index 0087dfa410f..7d48e6e84d5 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -588,9 +588,17 @@ static void i915_set_framebuffer_state(struct pipe_context *pipe, const struct pipe_framebuffer_state *fb) { struct i915_context *i915 = i915_context(pipe); + int i; + draw_flush(i915->draw); - i915->framebuffer = *fb; /* struct copy */ + i915->framebuffer.width = fb->width; + i915->framebuffer.height = fb->height; + i915->framebuffer.nr_cbufs = fb->nr_cbufs; + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + pipe_surface_reference(&i915->framebuffer.cbufs[i], fb->cbufs[i]); + } + pipe_surface_reference(&i915->framebuffer.zsbuf, fb->zsbuf); i915->dirty |= I915_NEW_FRAMEBUFFER; } diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c index b22e105f106..4a84c4db23f 100644 --- a/src/gallium/drivers/i965simple/brw_screen.c +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -39,7 +39,7 @@ static const char * brw_get_vendor( struct pipe_screen *screen ) { - return "Tungsten Graphics, Inc."; + return "VMware, Inc."; } @@ -85,8 +85,6 @@ brw_get_param(struct pipe_screen *screen, int param) return 1; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 06c586e6bb9..cd7b6356d28 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -3,6 +3,8 @@ include $(TOP)/configs/current LIBNAME = llvmpipe +CFLAGS += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS + C_SOURCES = \ lp_bld_alpha.c \ lp_bld_arit.c \ diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index 498d21dea6c..89d08834a3c 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -8,13 +8,16 @@ Done so far is: - the whole fragment pipeline is code generated in a single function + - input interpolation + - depth testing + - texture sampling (not all state/formats are supported) + - fragment shader TGSI translation - same level of support as the TGSI SSE2 exec machine, with the exception we don't fallback to TGSI interpretation when an unsupported opcode is found, but just ignore it - - texture sampling via an intrinsic call - done in SoA layout - input interpolation also code generated @@ -28,16 +31,17 @@ Done so far is: any width and length - not all operations are implemented for these types yet though -Most mesa/progs/demos/* work. Speed is on par with Keith's softpipe-opt branch, -which includes hand written fast implementations for common cases. +Most mesa/progs/demos/* work. To do (probably by this order): - code generate stipple and stencil testing - - code generate texture sampling + - translate the remaining bits of texture sampling state - translate TGSI control flow instructions, and all other remaining opcodes + + - integrate with the draw module for VS code generation - code generate the triangle setup and rasterization @@ -93,7 +97,7 @@ Alternatively, you can build it with GNU make, if you prefer, by invoking it as make linux-llvm -but the rest of these instructions assume scons is used. +but the rest of these instructions assume that scons is used. Using @@ -108,6 +112,9 @@ or export LD_LIBRARY_PATH=$PWD/build/linux-x86-debug/lib:$LD_LIBRARY_PATH +For performance evaluation pass debug=no to scons, and use the corresponding +lib directory without the "-debug" suffix. + Unit testing ============ @@ -119,7 +126,7 @@ build/linux-???-debug/gallium/drivers/llvmpipe: - lp_test_conv: SIMD vector conversion - lp_test_format: pixel unpacking/packing -Some of this tests can output results and benchmarks to a tab-seperated-file +Some of this tests can output results and benchmarks to a tab-separated-file for posterior analysis, e.g.: build/linux-x86_64-debug/gallium/drivers/llvmpipe/lp_test_blend -o blend.tsv @@ -133,10 +140,10 @@ Development Notes at the top of the lp_bld_*.c functions. - All lp_bld_*.[ch] are isolated from the rest of the driver, and could/may be - put in a standalone Gallium state -> LLVM IR translation module. + put in a stand-alone Gallium state -> LLVM IR translation module. - We use LLVM-C bindings for now. They are not documented, but follow the C++ interfaces very closely, and appear to be complete enough for code generation. See http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html - for a standalone example. + for a stand-alone example. diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index dea4b703c45..f4a9a3b22e2 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -3,7 +3,7 @@ Import('*') env = env.Clone() env.Tool('llvm') -if env.has_key('LLVM_VERSION') is False: +if not env.has_key('LLVM_VERSION'): print 'warning: LLVM not found: not building llvmpipe' Return() diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c index 49c2f911af7..2b4bc5c819d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c @@ -45,7 +45,7 @@ void lp_build_alpha_test(LLVMBuilderRef builder, const struct pipe_alpha_state *state, - union lp_type type, + struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, LLVMValueRef ref) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h index 9dbcdb4daab..634575670db 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h @@ -38,14 +38,14 @@ #include <llvm-c/Core.h> struct pipe_alpha_state; -union lp_type; +struct lp_type; struct lp_build_mask_context; void lp_build_alpha_test(LLVMBuilderRef builder, const struct pipe_alpha_state *state, - union lp_type type, + struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, LLVMValueRef ref); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index be7442d00ae..31433318a7e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -65,7 +65,7 @@ lp_build_min_simple(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; const char *intrinsic = NULL; LLVMValueRef cond; @@ -113,7 +113,7 @@ lp_build_max_simple(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; const char *intrinsic = NULL; LLVMValueRef cond; @@ -159,7 +159,7 @@ LLVMValueRef lp_build_comp(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; if(a == bld->one) return bld->zero; @@ -188,7 +188,7 @@ lp_build_add(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMValueRef res; if(a == bld->zero) @@ -241,7 +241,7 @@ lp_build_sub(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMValueRef res; if(b == bld->zero) @@ -405,7 +405,7 @@ lp_build_mul(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; if(a == bld->zero) return bld->zero; @@ -477,7 +477,7 @@ lp_build_div(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; if(a == bld->zero) return bld->zero; @@ -590,7 +590,7 @@ LLVMValueRef lp_build_abs(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); if(!type.sign) @@ -623,6 +623,47 @@ lp_build_abs(struct lp_build_context *bld, } +LLVMValueRef +lp_build_sgn(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef cond; + LLVMValueRef res; + + /* Handle non-zero case */ + if(!type.sign) { + /* if not zero then sign must be positive */ + res = bld->one; + } + else if(type.floating) { + /* Take the sign bit and add it to 1 constant */ + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef sign; + LLVMValueRef one; + sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + one = LLVMConstBitCast(bld->one, int_vec_type); + res = LLVMBuildOr(bld->builder, sign, one, ""); + res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); + } + else + { + LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0); + cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero); + res = lp_build_select(bld, cond, bld->one, minus_one); + } + + /* Handle zero */ + cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero); + res = lp_build_select(bld, cond, bld->zero, bld->one); + + return res; +} + + enum lp_build_round_sse41_mode { LP_BUILD_ROUND_SSE41_NEAREST = 0, @@ -637,7 +678,7 @@ lp_build_round_sse41(struct lp_build_context *bld, LLVMValueRef a, enum lp_build_round_sse41_mode mode) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); const char *intrinsic; @@ -662,10 +703,28 @@ lp_build_round_sse41(struct lp_build_context *bld, LLVMValueRef +lp_build_round(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); +#endif + + /* FIXME */ + assert(0); + return bld->undef; +} + + +LLVMValueRef lp_build_floor(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; assert(type.floating); @@ -679,6 +738,42 @@ lp_build_floor(struct lp_build_context *bld, } +LLVMValueRef +lp_build_ceil(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); +#endif + + /* FIXME */ + assert(0); + return bld->undef; +} + + +LLVMValueRef +lp_build_trunc(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE); +#endif + + /* FIXME */ + assert(0); + return bld->undef; +} + + /** * Convert to integer, through whichever rounding method that's fastest, * typically truncating to zero. @@ -687,7 +782,7 @@ LLVMValueRef lp_build_int(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); assert(type.floating); @@ -710,7 +805,7 @@ LLVMValueRef lp_build_sqrt(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -728,7 +823,7 @@ LLVMValueRef lp_build_rcp(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; if(a == bld->zero) return bld->undef; @@ -759,7 +854,7 @@ LLVMValueRef lp_build_rsqrt(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; assert(type.floating); @@ -780,7 +875,7 @@ LLVMValueRef lp_build_cos(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -800,7 +895,7 @@ LLVMValueRef lp_build_sin(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -823,7 +918,8 @@ lp_build_pow(struct lp_build_context *bld, { /* TODO: optimize the constant case */ if(LLVMIsConstant(x) && LLVMIsConstant(y)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n"); + debug_printf("%s: inefficient/imprecise constant arithmetic\n", + __FUNCTION__); return lp_build_exp2(bld, lp_build_mul(bld, lp_build_log2(bld, x), y)); } @@ -871,13 +967,14 @@ lp_build_polynomial(struct lp_build_context *bld, const double *coeffs, unsigned num_coeffs) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMValueRef res = NULL; unsigned i; /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n"); + debug_printf("%s: inefficient/imprecise constant arithmetic\n", + __FUNCTION__); for (i = num_coeffs; i--; ) { LLVMValueRef coeff = lp_build_const_scalar(type, coeffs[i]); @@ -919,7 +1016,7 @@ lp_build_exp2_approx(struct lp_build_context *bld, LLVMValueRef *p_frac_part, LLVMValueRef *p_exp2) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef ipart = NULL; @@ -931,7 +1028,8 @@ lp_build_exp2_approx(struct lp_build_context *bld, if(p_exp2_int_part || p_frac_part || p_exp2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n"); + debug_printf("%s: inefficient/imprecise constant arithmetic\n", + __FUNCTION__); assert(type.floating && type.width == 32); @@ -1012,7 +1110,7 @@ lp_build_log2_approx(struct lp_build_context *bld, LLVMValueRef *p_floor_log2, LLVMValueRef *p_log2) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); @@ -1030,7 +1128,8 @@ lp_build_log2_approx(struct lp_build_context *bld, if(p_exp || p_floor_log2 || p_log2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n"); + debug_printf("%s: inefficient/imprecise constant arithmetic\n", + __FUNCTION__); assert(type.floating && type.width == 32); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h index 383c3c3313b..d68a97c4b87 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h @@ -40,7 +40,7 @@ #include <llvm-c/Core.h> -union lp_type type; +struct lp_type type; struct lp_build_context; @@ -106,10 +106,26 @@ lp_build_abs(struct lp_build_context *bld, LLVMValueRef a); LLVMValueRef +lp_build_sgn(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_round(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef lp_build_floor(struct lp_build_context *bld, LLVMValueRef a); LLVMValueRef +lp_build_ceil(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_trunc(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef lp_build_int(struct lp_build_context *bld, LLVMValueRef a); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/drivers/llvmpipe/lp_bld_blend.h index d19e18846c2..da272e549f3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h @@ -46,7 +46,7 @@ struct pipe_blend_state; -union lp_type; +struct lp_type; struct lp_build_context; @@ -74,7 +74,7 @@ lp_build_blend_func(struct lp_build_context *bld, LLVMValueRef lp_build_blend_aos(LLVMBuilderRef builder, const struct pipe_blend_state *blend, - union lp_type type, + struct lp_type type, LLVMValueRef src, LLVMValueRef dst, LLVMValueRef const_, @@ -84,7 +84,7 @@ lp_build_blend_aos(LLVMBuilderRef builder, void lp_build_blend_soa(LLVMBuilderRef builder, const struct pipe_blend_state *blend, - union lp_type type, + struct lp_type type, LLVMValueRef src[4], LLVMValueRef dst[4], LLVMValueRef const_[4], diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index c11a9398f87..d14f468ba93 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -303,7 +303,7 @@ lp_build_blend_func(struct lp_build_context *bld, LLVMValueRef lp_build_blend_aos(LLVMBuilderRef builder, const struct pipe_blend_state *blend, - union lp_type type, + struct lp_type type, LLVMValueRef src, LLVMValueRef dst, LLVMValueRef const_, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c index b92254a7d6f..9511299d558 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c @@ -199,7 +199,7 @@ lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld, void lp_build_blend_soa(LLVMBuilderRef builder, const struct pipe_blend_state *blend, - union lp_type type, + struct lp_type type, LLVMValueRef src[4], LLVMValueRef dst[4], LLVMValueRef con[4], diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.c b/src/gallium/drivers/llvmpipe/lp_bld_const.c index 21487365eae..c8eaa8c3940 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_const.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_const.c @@ -42,7 +42,7 @@ unsigned -lp_mantissa(union lp_type type) +lp_mantissa(struct lp_type type) { assert(type.floating); @@ -72,7 +72,7 @@ lp_mantissa(union lp_type type) * Same as lp_const_scale(), but in terms of shifts. */ unsigned -lp_const_shift(union lp_type type) +lp_const_shift(struct lp_type type) { if(type.floating) return 0; @@ -86,7 +86,7 @@ lp_const_shift(union lp_type type) unsigned -lp_const_offset(union lp_type type) +lp_const_offset(struct lp_type type) { if(type.floating || type.fixed) return 0; @@ -104,7 +104,7 @@ lp_const_offset(union lp_type type) * else for the fixed points types and normalized integers. */ double -lp_const_scale(union lp_type type) +lp_const_scale(struct lp_type type) { unsigned long long llscale; double dscale; @@ -122,7 +122,7 @@ lp_const_scale(union lp_type type) * Minimum value representable by the type. */ double -lp_const_min(union lp_type type) +lp_const_min(struct lp_type type) { unsigned bits; @@ -158,7 +158,7 @@ lp_const_min(union lp_type type) * Maximum value representable by the type. */ double -lp_const_max(union lp_type type) +lp_const_max(struct lp_type type) { unsigned bits; @@ -190,7 +190,7 @@ lp_const_max(union lp_type type) double -lp_const_eps(union lp_type type) +lp_const_eps(struct lp_type type) { if (type.floating) { switch(type.width) { @@ -211,7 +211,7 @@ lp_const_eps(union lp_type type) LLVMValueRef -lp_build_undef(union lp_type type) +lp_build_undef(struct lp_type type) { LLVMTypeRef vec_type = lp_build_vec_type(type); return LLVMGetUndef(vec_type); @@ -219,7 +219,7 @@ lp_build_undef(union lp_type type) LLVMValueRef -lp_build_zero(union lp_type type) +lp_build_zero(struct lp_type type) { LLVMTypeRef vec_type = lp_build_vec_type(type); return LLVMConstNull(vec_type); @@ -227,7 +227,7 @@ lp_build_zero(union lp_type type) LLVMValueRef -lp_build_one(union lp_type type) +lp_build_one(struct lp_type type) { LLVMTypeRef elem_type; LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; @@ -269,7 +269,7 @@ lp_build_one(union lp_type type) LLVMValueRef -lp_build_const_scalar(union lp_type type, +lp_build_const_scalar(struct lp_type type, double val) { LLVMTypeRef elem_type = lp_build_elem_type(type); @@ -295,7 +295,7 @@ lp_build_const_scalar(union lp_type type, LLVMValueRef -lp_build_int_const_scalar(union lp_type type, +lp_build_int_const_scalar(struct lp_type type, long long val) { LLVMTypeRef elem_type = lp_build_int_elem_type(type); @@ -312,7 +312,7 @@ lp_build_int_const_scalar(union lp_type type, LLVMValueRef -lp_build_const_aos(union lp_type type, +lp_build_const_aos(struct lp_type type, double r, double g, double b, double a, const unsigned char *swizzle) { @@ -352,8 +352,8 @@ lp_build_const_aos(union lp_type type, LLVMValueRef -lp_build_const_mask_aos(union lp_type type, - boolean cond[4]) +lp_build_const_mask_aos(struct lp_type type, + const boolean cond[4]) { LLVMTypeRef elem_type = LLVMIntType(type.width); LLVMValueRef masks[LP_MAX_VECTOR_LENGTH]; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.h b/src/gallium/drivers/llvmpipe/lp_bld_const.h index 1934530ea3c..ffb302f7366 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_const.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_const.h @@ -42,67 +42,67 @@ #include <pipe/p_compiler.h> -union lp_type type; +struct lp_type type; unsigned -lp_mantissa(union lp_type type); +lp_mantissa(struct lp_type type); unsigned -lp_const_shift(union lp_type type); +lp_const_shift(struct lp_type type); unsigned -lp_const_offset(union lp_type type); +lp_const_offset(struct lp_type type); double -lp_const_scale(union lp_type type); +lp_const_scale(struct lp_type type); double -lp_const_min(union lp_type type); +lp_const_min(struct lp_type type); double -lp_const_max(union lp_type type); +lp_const_max(struct lp_type type); double -lp_const_eps(union lp_type type); +lp_const_eps(struct lp_type type); LLVMValueRef -lp_build_undef(union lp_type type); +lp_build_undef(struct lp_type type); LLVMValueRef -lp_build_zero(union lp_type type); +lp_build_zero(struct lp_type type); LLVMValueRef -lp_build_one(union lp_type type); +lp_build_one(struct lp_type type); LLVMValueRef -lp_build_const_scalar(union lp_type type, +lp_build_const_scalar(struct lp_type type, double val); LLVMValueRef -lp_build_int_const_scalar(union lp_type type, +lp_build_int_const_scalar(struct lp_type type, long long val); LLVMValueRef -lp_build_const_aos(union lp_type type, +lp_build_const_aos(struct lp_type type, double r, double g, double b, double a, const unsigned char *swizzle); LLVMValueRef -lp_build_const_mask_aos(union lp_type type, - boolean cond[4]); +lp_build_const_mask_aos(struct lp_type type, + const boolean cond[4]); #endif /* !LP_BLD_CONST_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c index c8954c8a34f..186cac70f62 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c @@ -86,7 +86,7 @@ */ LLVMValueRef lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, - union lp_type src_type, + struct lp_type src_type, unsigned dst_width, LLVMValueRef src) { @@ -122,7 +122,7 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, int shift = dst_width - n; res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), ""); - /* Fill in the empty lower bits for added precision? */ + /* TODO: Fill in the empty lower bits for additional precision? */ #if 0 { LLVMValueRef msb; @@ -152,7 +152,7 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, LLVMValueRef lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, unsigned src_width, - union lp_type dst_type, + struct lp_type dst_type, LLVMValueRef src) { LLVMTypeRef vec_type = lp_build_vec_type(dst_type); @@ -244,12 +244,12 @@ lp_build_const_pack_shuffle(unsigned n) * Expand the bit width. * * This will only change the number of bits the values are represented, not the - * values themselved. + * values themselves. */ static void lp_build_expand(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, LLVMValueRef src, LLVMValueRef *dst, unsigned num_dsts) { @@ -266,7 +266,7 @@ lp_build_expand(LLVMBuilderRef builder, dst[0] = src; while(src_type.width < dst_type.width) { - union lp_type new_type = src_type; + struct lp_type new_type = src_type; LLVMTypeRef new_vec_type; new_type.width *= 2; @@ -314,8 +314,8 @@ lp_build_expand(LLVMBuilderRef builder, */ static LLVMValueRef lp_build_pack2(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, boolean clamped, LLVMValueRef lo, LLVMValueRef hi) @@ -391,11 +391,11 @@ lp_build_pack2(LLVMBuilderRef builder, * TODO: Handle saturation consistently. */ static LLVMValueRef -lp_build_trunc(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, - boolean clamped, - const LLVMValueRef *src, unsigned num_srcs) +lp_build_pack(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + boolean clamped, + const LLVMValueRef *src, unsigned num_srcs) { LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; unsigned i; @@ -410,7 +410,7 @@ lp_build_trunc(LLVMBuilderRef builder, tmp[i] = src[i]; while(src_type.width > dst_type.width) { - union lp_type new_type = src_type; + struct lp_type new_type = src_type; new_type.width /= 2; new_type.length *= 2; @@ -442,12 +442,12 @@ lp_build_trunc(LLVMBuilderRef builder, */ void lp_build_conv(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, LLVMValueRef *dst, unsigned num_dsts) { - union lp_type tmp_type; + struct lp_type tmp_type; LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; unsigned num_tmps; unsigned i; @@ -470,7 +470,7 @@ lp_build_conv(LLVMBuilderRef builder, * Clamp if necessary */ - if(src_type.value != dst_type.value) { + if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) { struct lp_build_context bld; double src_min = lp_const_min(src_type); double dst_min = lp_const_min(dst_type); @@ -565,7 +565,7 @@ lp_build_conv(LLVMBuilderRef builder, if(tmp_type.width > dst_type.width) { assert(num_dsts == 1); - tmp[0] = lp_build_trunc(builder, tmp_type, dst_type, TRUE, tmp, num_tmps); + tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps); tmp_type.width = dst_type.width; tmp_type.length = dst_type.length; num_tmps = 1; @@ -656,8 +656,8 @@ lp_build_conv(LLVMBuilderRef builder, */ void lp_build_conv_mask(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, LLVMValueRef *dst, unsigned num_dsts) { @@ -689,7 +689,7 @@ lp_build_conv_mask(LLVMBuilderRef builder, if(src_type.width > dst_type.width) { assert(num_dsts == 1); - dst[0] = lp_build_trunc(builder, src_type, dst_type, TRUE, src, num_srcs); + dst[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs); } else if(src_type.width < dst_type.width) { assert(num_srcs == 1); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.h b/src/gallium/drivers/llvmpipe/lp_bld_conv.h index 05c1ef2a100..ca378804d2a 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.h @@ -40,33 +40,33 @@ #include <llvm-c/Core.h> -union lp_type type; +struct lp_type type; LLVMValueRef lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, - union lp_type src_type, + struct lp_type src_type, unsigned dst_width, LLVMValueRef src); LLVMValueRef lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, unsigned src_width, - union lp_type dst_type, + struct lp_type dst_type, LLVMValueRef src); void lp_build_conv(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, const LLVMValueRef *srcs, unsigned num_srcs, LLVMValueRef *dsts, unsigned num_dsts); void lp_build_conv_mask(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, LLVMValueRef *dst, unsigned num_dsts); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 3f88a14b5d3..21c665c4d4c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -71,11 +71,11 @@ /** * Return a type appropriate for depth/stencil testing. */ -union lp_type +struct lp_type lp_depth_type(const struct util_format_description *format_desc, unsigned length) { - union lp_type type; + struct lp_type type; unsigned swizzle; assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); @@ -85,7 +85,7 @@ lp_depth_type(const struct util_format_description *format_desc, swizzle = format_desc->swizzle[0]; assert(swizzle < 4); - type.value = 0; + memset(&type, 0, sizeof type); type.width = format_desc->block.bits; if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) { @@ -114,7 +114,7 @@ lp_depth_type(const struct util_format_description *format_desc, void lp_build_depth_test(LLVMBuilderRef builder, const struct pipe_depth_state *state, - union lp_type type, + struct lp_type type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef src, @@ -211,5 +211,6 @@ lp_build_depth_test(LLVMBuilderRef builder, LLVMBuildStore(builder, dst, dst_ptr); } + /* FIXME */ assert(!state->occlusion_count); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h index 5d2e042fcc5..79d6981bb51 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h @@ -41,11 +41,11 @@ struct pipe_depth_state; struct util_format_description; -union lp_type; +struct lp_type; struct lp_build_mask_context; -union lp_type +struct lp_type lp_depth_type(const struct util_format_description *format_desc, unsigned length); @@ -53,7 +53,7 @@ lp_depth_type(const struct util_format_description *format_desc, void lp_build_depth_test(LLVMBuilderRef builder, const struct pipe_depth_state *state, - union lp_type type, + struct lp_type type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef src, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index 69ed014ff3d..dcc25fbff86 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -405,7 +405,7 @@ lp_build_mask_check(struct lp_build_mask_context *mask) void lp_build_mask_begin(struct lp_build_mask_context *mask, struct lp_build_flow_context *flow, - union lp_type type, + struct lp_type type, LLVMValueRef value) { memset(mask, 0, sizeof *mask); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.h b/src/gallium/drivers/llvmpipe/lp_bld_flow.h index 9d76e3064dd..e61999ff06b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.h @@ -38,7 +38,7 @@ #include <llvm-c/Core.h> -union lp_type; +struct lp_type; struct lp_build_flow_context; @@ -84,7 +84,7 @@ struct lp_build_mask_context void lp_build_mask_begin(struct lp_build_mask_context *mask, struct lp_build_flow_context *flow, - union lp_type type, + struct lp_type type, LLVMValueRef value); /** diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h index 5ee06560932..6d3f6926193 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h @@ -39,7 +39,7 @@ #include "pipe/p_format.h" struct util_format_description; -union lp_type; +struct lp_type; /** @@ -103,7 +103,7 @@ lp_build_gather(LLVMBuilderRef builder, void lp_build_unpack_rgba_soa(LLVMBuilderRef builder, const struct util_format_description *format_desc, - union lp_type type, + struct lp_type type, LLVMValueRef packed, LLVMValueRef *rgba); @@ -111,7 +111,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, void lp_build_load_rgba_soa(LLVMBuilderRef builder, const struct util_format_description *format_desc, - union lp_type type, + struct lp_type type, LLVMValueRef base_ptr, LLVMValueRef offsets, LLVMValueRef *rgba); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c index 569e8d10a31..b5ff434e1ae 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c @@ -84,7 +84,7 @@ lp_build_gather(LLVMBuilderRef builder, static LLVMValueRef -lp_build_format_swizzle(union lp_type type, +lp_build_format_swizzle(struct lp_type type, const LLVMValueRef *inputs, enum util_format_swizzle swizzle) { @@ -110,7 +110,7 @@ lp_build_format_swizzle(union lp_type type, void lp_build_unpack_rgba_soa(LLVMBuilderRef builder, const struct util_format_description *format_desc, - union lp_type type, + struct lp_type type, LLVMValueRef packed, LLVMValueRef *rgba) { @@ -188,7 +188,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, void lp_build_load_rgba_soa(LLVMBuilderRef builder, const struct util_format_description *format_desc, - union lp_type type, + struct lp_type type, LLVMValueRef base_ptr, LLVMValueRef offsets, LLVMValueRef *rgba) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index cfe20a0d75b..338dbca6d1e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -292,7 +292,7 @@ void lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, const struct tgsi_token *tokens, LLVMBuilderRef builder, - union lp_type type, + struct lp_type type, LLVMValueRef a0_ptr, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index 9194f6233a7..9c57a10879b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -83,7 +83,7 @@ void lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, const struct tgsi_token *tokens, LLVMBuilderRef builder, - union lp_type type, + struct lp_type type, LLVMValueRef a0_ptr, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c index 8631efd6c3e..6b6f8207697 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c @@ -45,7 +45,7 @@ lp_build_cmp(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef zeros = LLVMConstNull(int_vec_type); @@ -301,7 +301,7 @@ lp_build_select(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - union lp_type type = bld->type; + struct lp_type type = bld->type; LLVMValueRef res; if(a == b) @@ -313,8 +313,6 @@ lp_build_select(struct lp_build_context *bld, b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); } - /* TODO: On SSE4 we could do this with a single instruction -- PBLENDVB */ - a = LLVMBuildAnd(bld->builder, a, mask, ""); /* This often gets translated to PANDN, but sometimes the NOT is @@ -339,9 +337,9 @@ LLVMValueRef lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b, - boolean cond[4]) + const boolean cond[4]) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; @@ -376,9 +374,9 @@ lp_build_select_aos(struct lp_build_context *bld, return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); } + else { #if 0 - else if(0) { - /* FIXME: Unfortunately select of vectors do not work */ + /* XXX: Unfortunately select of vectors do not work */ /* Use a select */ LLVMTypeRef elem_type = LLVMInt1Type(); LLVMValueRef cond[LP_MAX_VECTOR_LENGTH]; @@ -388,10 +386,9 @@ lp_build_select_aos(struct lp_build_context *bld, cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0); return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, ""); - } -#endif - else { +#else LLVMValueRef mask = lp_build_const_mask_aos(type, cond); return lp_build_select(bld, mask, a, b); +#endif } } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/drivers/llvmpipe/lp_bld_logic.h index 29b9e1c45b8..a4ee7723b5f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.h @@ -42,7 +42,7 @@ #include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */ -union lp_type type; +struct lp_type type; struct lp_build_context; @@ -66,7 +66,7 @@ LLVMValueRef lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b, - boolean cond[4]); + const boolean cond[4]); #endif /* !LP_BLD_LOGIC_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h index 6f565af76d1..403d0e48367 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h @@ -40,7 +40,7 @@ struct pipe_texture; struct pipe_sampler_state; -union lp_type; +struct lp_type; /** @@ -123,7 +123,7 @@ void lp_build_sample_soa(LLVMBuilderRef builder, const struct lp_sampler_static_state *static_state, struct lp_sampler_dynamic_state *dynamic_state, - union lp_type fp_type, + struct lp_type fp_type, unsigned unit, unsigned num_coords, const LLVMValueRef *coords, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 2913b17d3f8..8ca1be6f1be 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -97,15 +97,15 @@ struct lp_build_sample_context const struct util_format_description *format_desc; /** Incoming coordinates type and build context */ - union lp_type coord_type; + struct lp_type coord_type; struct lp_build_context coord_bld; /** Integer coordinates */ - union lp_type int_coord_type; + struct lp_type int_coord_type; struct lp_build_context int_coord_bld; /** Output texels type and build context */ - union lp_type texel_type; + struct lp_type texel_type; struct lp_build_context texel_bld; }; @@ -207,6 +207,12 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld, case PIPE_TEX_WRAP_MIRROR_CLAMP: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + /* FIXME */ + _debug_printf("warning: failed to translate texture wrap mode %u\n", wrap_mode); + coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); + coord = lp_build_min(int_coord_bld, coord, length_minus_one); + break; + default: assert(0); } @@ -336,7 +342,7 @@ void lp_build_sample_soa(LLVMBuilderRef builder, const struct lp_sampler_static_state *static_state, struct lp_sampler_dynamic_state *dynamic_state, - union lp_type type, + struct lp_type type, unsigned unit, unsigned num_coords, const LLVMValueRef *coords, @@ -398,7 +404,13 @@ lp_build_sample_soa(LLVMBuilderRef builder, case PIPE_TEX_FILTER_ANISO: lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel); break; + default: + assert(0); } + /* FIXME: respect static_state->min_mip_filter */; + /* FIXME: respect static_state->mag_img_filter */; + /* FIXME: respect static_state->prefilter */; + lp_build_sample_compare(&bld, p, texel); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c index ac7eed9379a..64e81f7b1fe 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c @@ -64,7 +64,7 @@ LLVMValueRef lp_build_broadcast_scalar(struct lp_build_context *bld, LLVMValueRef scalar) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMValueRef res; unsigned i; @@ -83,7 +83,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, LLVMValueRef a, unsigned channel) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; @@ -115,7 +115,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, * YY00 YY00 .... YY00 * YYYY YYYY .... YYYY <= output */ - union lp_type type4 = type; + struct lp_type type4 = type; const char shifts[4][2] = { { 1, 2}, {-1, 2}, @@ -161,7 +161,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, LLVMValueRef lp_build_swizzle1_aos(struct lp_build_context *bld, LLVMValueRef a, - unsigned char swizzle[4]) + const unsigned char swizzle[4]) { const unsigned n = bld->type.length; unsigned i, j; @@ -192,7 +192,7 @@ LLVMValueRef lp_build_swizzle2_aos(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b, - unsigned char swizzle[4]) + const unsigned char swizzle[4]) { const unsigned n = bld->type.length; unsigned i, j; @@ -201,11 +201,12 @@ lp_build_swizzle2_aos(struct lp_build_context *bld, return lp_build_swizzle1_aos(bld, a, swizzle); if(a == b) { - swizzle[0] %= 4; - swizzle[1] %= 4; - swizzle[2] %= 4; - swizzle[3] %= 4; - return lp_build_swizzle1_aos(bld, a, swizzle); + unsigned char swizzle1[4]; + swizzle1[0] = swizzle[0] % 4; + swizzle1[1] = swizzle[1] % 4; + swizzle1[2] = swizzle[2] % 4; + swizzle1[3] = swizzle[3] % 4; + return lp_build_swizzle1_aos(bld, a, swizzle1); } if(swizzle[0] % 4 == 0 && diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h index d7dd6a8a604..1f6da804488 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h @@ -40,7 +40,7 @@ #include <llvm-c/Core.h> -union lp_type type; +struct lp_type type; struct lp_build_context; @@ -73,7 +73,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, LLVMValueRef lp_build_swizzle1_aos(struct lp_build_context *bld, LLVMValueRef a, - unsigned char swizzle[4]); + const unsigned char swizzle[4]); /** @@ -85,7 +85,7 @@ LLVMValueRef lp_build_swizzle2_aos(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b, - unsigned char swizzle[4]); + const unsigned char swizzle[4]); #endif /* !LP_BLD_SWIZZLE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h index 10c251c4162..eddb7a83fa2 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h @@ -39,7 +39,7 @@ struct tgsi_token; -union lp_type; +struct lp_type; struct lp_build_context; struct lp_build_mask_context; @@ -60,7 +60,7 @@ struct lp_build_sampler_soa void (*emit_fetch_texel)( struct lp_build_sampler_soa *sampler, LLVMBuilderRef builder, - union lp_type type, + struct lp_type type, unsigned unit, unsigned num_coords, const LLVMValueRef *coords, @@ -72,7 +72,7 @@ struct lp_build_sampler_soa void lp_build_tgsi_soa(LLVMBuilderRef builder, const struct tgsi_token *tokens, - union lp_type type, + struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef consts_ptr, const LLVMValueRef *pos, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index 3ce379de12f..adc81569ed5 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -78,6 +78,11 @@ #define CHAN_Z 2 #define CHAN_W 3 +#define QUAD_TOP_LEFT 0 +#define QUAD_TOP_RIGHT 1 +#define QUAD_BOTTOM_LEFT 2 +#define QUAD_BOTTOM_RIGHT 3 + struct lp_build_tgsi_soa_context { @@ -97,6 +102,51 @@ struct lp_build_tgsi_soa_context }; +static const unsigned char +swizzle_left[4] = { + QUAD_TOP_LEFT, QUAD_TOP_LEFT, + QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT +}; + +static const unsigned char +swizzle_right[4] = { + QUAD_TOP_RIGHT, QUAD_TOP_RIGHT, + QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT +}; + +static const unsigned char +swizzle_top[4] = { + QUAD_TOP_LEFT, QUAD_TOP_RIGHT, + QUAD_TOP_LEFT, QUAD_TOP_RIGHT +}; + +static const unsigned char +swizzle_bottom[4] = { + QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT, + QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT +}; + + +static LLVMValueRef +emit_ddx(struct lp_build_tgsi_soa_context *bld, + LLVMValueRef src) +{ + LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left); + LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right); + return lp_build_sub(&bld->base, src_right, src_left); +} + + +static LLVMValueRef +emit_ddy(struct lp_build_tgsi_soa_context *bld, + LLVMValueRef src) +{ + LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top); + LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom); + return lp_build_sub(&bld->base, src_top, src_bottom); +} + + /** * Register fetch. */ @@ -167,6 +217,7 @@ emit_fetch( break; case TGSI_UTIL_SIGN_SET: + /* TODO: Use bitwese OR for floating point */ res = lp_build_abs( &bld->base, res ); res = LLVMBuildNeg( bld->base.builder, res, "" ); break; @@ -184,6 +235,36 @@ emit_fetch( /** + * Register fetch with derivatives. + */ +static void +emit_fetch_deriv( + struct lp_build_tgsi_soa_context *bld, + const struct tgsi_full_instruction *inst, + unsigned index, + const unsigned chan_index, + LLVMValueRef *res, + LLVMValueRef *ddx, + LLVMValueRef *ddy) +{ + LLVMValueRef src; + + src = emit_fetch(bld, inst, index, chan_index); + + if(res) + *res = src; + + /* TODO: use interpolation coeffs for inputs */ + + if(ddx) + *ddx = emit_ddx(bld, src); + + if(ddy) + *ddy = emit_ddy(bld, src); +} + + +/** * Register store. */ static void @@ -238,17 +319,18 @@ emit_store( * High-level instruction translators. */ + static void emit_tex( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst, boolean apply_lodbias, - boolean projected) + boolean projected, + LLVMValueRef *texel) { const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; LLVMValueRef lodbias; LLVMValueRef oow; LLVMValueRef coords[3]; - LLVMValueRef texel[4]; unsigned num_coords; unsigned i; @@ -293,10 +375,6 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, bld->base.type, unit, num_coords, coords, lodbias, texel); - - FOR_EACH_DST0_ENABLED_CHANNEL( inst, i ) { - emit_store( bld, inst, 0, i, texel[i] ); - } } @@ -349,14 +427,6 @@ emit_kil( } -static void -emit_kilp( - struct lp_build_tgsi_soa_context *bld ) -{ - /* XXX todo / fix me */ -} - - /** * Check if inst src/dest regs use indirect addressing into temporary * register file. @@ -384,25 +454,35 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst) static int emit_instruction( struct lp_build_tgsi_soa_context *bld, - struct tgsi_full_instruction *inst ) + const struct tgsi_full_instruction *inst, + const struct tgsi_opcode_info *info) { unsigned chan_index; LLVMValueRef src0, src1, src2; LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - LLVMValueRef dst0; + LLVMValueRef res; + LLVMValueRef dst0[NUM_CHANNELS]; /* we can't handle indirect addressing into temp register file yet */ if (indirect_temp_reference(inst)) return FALSE; + assert(info->num_dst <= 1); + if(info->num_dst) { + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = bld->base.undef; + } + } + switch (inst->Instruction.Opcode) { #if 0 case TGSI_OPCODE_ARL: + /* FIXME */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); emit_flr(bld, 0, 0); emit_f2it( bld, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; #endif @@ -410,19 +490,17 @@ emit_instruction( case TGSI_OPCODE_MOV: case TGSI_OPCODE_SWZ: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); } break; case TGSI_OPCODE_LIT: if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) { - emit_store( bld, inst, 0, CHAN_X, bld->base.one); + dst0[CHAN_X] = bld->base.one; } if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { src0 = emit_fetch( bld, inst, 0, CHAN_X ); - dst0 = lp_build_max( &bld->base, src0, bld->base.zero); - emit_store( bld, inst, 0, CHAN_Y, dst0); + dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); } if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { /* XMM[1] = SrcReg[0].yyyy */ @@ -434,20 +512,19 @@ emit_instruction( tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); - dst0 = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); - emit_store( bld, inst, 0, CHAN_Z, dst0); + dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); } if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) { - emit_store( bld, inst, 0, CHAN_W, bld->base.one); + dst0[CHAN_W] = bld->base.one; } break; case TGSI_OPCODE_RCP: /* TGSI_OPCODE_RECIP */ src0 = emit_fetch( bld, inst, 0, CHAN_X ); - dst0 = lp_build_rcp(&bld->base, src0); + res = lp_build_rcp(&bld->base, src0); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, dst0 ); + dst0[chan_index] = res; } break; @@ -455,9 +532,9 @@ emit_instruction( /* TGSI_OPCODE_RECIPSQRT */ src0 = emit_fetch( bld, inst, 0, CHAN_X ); src0 = lp_build_abs(&bld->base, src0); - dst0 = lp_build_rsqrt(&bld->base, src0); + res = lp_build_rsqrt(&bld->base, src0); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, dst0 ); + dst0[chan_index] = res; } break; @@ -481,16 +558,15 @@ emit_instruction( lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) - emit_store( bld, inst, 0, CHAN_X, tmp0); + dst0[CHAN_X] = tmp0; if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) - emit_store( bld, inst, 0, CHAN_Y, tmp1); + dst0[CHAN_Y] = tmp1; if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) - emit_store( bld, inst, 0, CHAN_Z, tmp2); + dst0[CHAN_Z] = tmp2; } /* dst.w = 1.0 */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } break; @@ -516,20 +592,18 @@ emit_instruction( /* dst.x = floor(lg2(abs(src.x))) */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) - emit_store( bld, inst, 0, CHAN_X, tmp0); + dst0[CHAN_X] = tmp0; /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) { - tmp1 = lp_build_div( &bld->base, src0, tmp1); - emit_store( bld, inst, 0, CHAN_Y, tmp1); + dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); } /* dst.z = lg2(abs(src.x)) */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) - emit_store( bld, inst, 0, CHAN_Z, tmp2); + dst0[CHAN_Z] = tmp2; } /* dst.w = 1.0 */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } break; @@ -537,8 +611,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0 = lp_build_mul(&bld->base, src0, src1); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); } break; @@ -546,8 +619,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0 = lp_build_add(&bld->base, src0, src1); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_add(&bld->base, src0, src1); } break; @@ -565,7 +637,7 @@ emit_instruction( tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); tmp0 = lp_build_add( &bld->base, tmp0, tmp1); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; @@ -587,28 +659,24 @@ emit_instruction( tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); tmp0 = lp_build_add( &bld->base, tmp0, tmp1); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; case TGSI_OPCODE_DST: IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_X, tmp0); + dst0[CHAN_X] = bld->base.one; } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); tmp1 = emit_fetch( bld, inst, 1, CHAN_Y ); - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); - emit_store( bld, inst, 0, CHAN_Y, tmp0); + dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { - tmp0 = emit_fetch( bld, inst, 0, CHAN_Z ); - emit_store( bld, inst, 0, CHAN_Z, tmp0); + dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z ); } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { - tmp0 = emit_fetch( bld, inst, 1, CHAN_W ); - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W ); } break; @@ -616,8 +684,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0 = lp_build_min( &bld->base, src0, src1 ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); } break; @@ -625,8 +692,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0 = lp_build_max( &bld->base, src0, src1 ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); } break; @@ -636,8 +702,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; @@ -647,8 +712,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; @@ -660,7 +724,7 @@ emit_instruction( tmp2 = emit_fetch( bld, inst, 2, chan_index ); tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); tmp0 = lp_build_add( &bld->base, tmp0, tmp2); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; @@ -668,8 +732,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); tmp1 = emit_fetch( bld, inst, 1, chan_index ); - tmp0 = lp_build_sub( &bld->base, tmp0, tmp1); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); } break; @@ -680,13 +743,19 @@ emit_instruction( src2 = emit_fetch( bld, inst, 2, chan_index ); tmp0 = lp_build_sub( &bld->base, src1, src2 ); tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); - dst0 = lp_build_add( &bld->base, tmp0, src2 ); - emit_store( bld, inst, 0, chan_index, dst0 ); + dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); } break; case TGSI_OPCODE_CND: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + src0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + src2 = emit_fetch( bld, inst, 2, chan_index ); + tmp1 = lp_build_const_scalar(bld->base.type, 0.5); + tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); + } break; case TGSI_OPCODE_DP2A: @@ -700,45 +769,49 @@ emit_instruction( tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */ tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */ + dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ } break; -#if 0 case TGSI_OPCODE_FRC: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_frc( bld, 0, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + src0 = emit_fetch( bld, inst, 0, chan_index ); + tmp0 = lp_build_floor(&bld->base, src0); + tmp0 = lp_build_sub(&bld->base, tmp0, src0); + dst0[chan_index] = tmp0; } break; case TGSI_OPCODE_CLAMP: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + tmp0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + src2 = emit_fetch( bld, inst, 2, chan_index ); + tmp0 = lp_build_max(&bld->base, tmp0, src1); + tmp0 = lp_build_min(&bld->base, tmp0, src2); + dst0[chan_index] = tmp0; + } break; case TGSI_OPCODE_FLR: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_flr( bld, 0, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_floor(&bld->base, tmp0); } break; case TGSI_OPCODE_ROUND: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_rnd( bld, 0, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_round(&bld->base, tmp0); } break; -#endif case TGSI_OPCODE_EX2: { tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp0 = lp_build_exp2( &bld->base, tmp0); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; } @@ -747,16 +820,16 @@ emit_instruction( tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp0 = lp_build_log2( &bld->base, tmp0); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; case TGSI_OPCODE_POW: src0 = emit_fetch( bld, inst, 0, CHAN_X ); src1 = emit_fetch( bld, inst, 1, CHAN_X ); - dst0 = lp_build_pow( &bld->base, src0, src1 ); + res = lp_build_pow( &bld->base, src0, src1 ); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, dst0 ); + dst0[chan_index] = res; } break; @@ -777,7 +850,7 @@ emit_instruction( tmp5 = tmp3; tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); - emit_store( bld, inst, 0, CHAN_X, tmp2); + dst0[CHAN_X] = tmp2; } if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { @@ -788,31 +861,30 @@ emit_instruction( tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); - emit_store( bld, inst, 0, CHAN_Y, tmp3); + dst0[CHAN_Y] = tmp3; } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); - emit_store( bld, inst, 0, CHAN_Z, tmp5); + dst0[CHAN_Z] = tmp5; } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } break; case TGSI_OPCODE_ABS: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - tmp0 = lp_build_abs( &bld->base, tmp0 ) ; - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); } break; case TGSI_OPCODE_RCC: + /* deprecated? */ + assert(0); return 0; - break; case TGSI_OPCODE_DPH: tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); @@ -829,7 +901,7 @@ emit_instruction( tmp1 = emit_fetch( bld, inst, 1, CHAN_W ); tmp0 = lp_build_add( &bld->base, tmp0, tmp1); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; @@ -837,25 +909,27 @@ emit_instruction( tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp0 = lp_build_cos( &bld->base, tmp0 ); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; case TGSI_OPCODE_DDX: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); + } break; case TGSI_OPCODE_DDY: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); + } break; -#if 0 case TGSI_OPCODE_KILP: /* predicated kill */ - emit_kilp( bld ); - return 0; /* XXX fix me */ + /* FIXME */ + return 0; break; -#endif case TGSI_OPCODE_KIL: /* conditional kill */ @@ -887,13 +961,14 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; case TGSI_OPCODE_SFL: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = bld->base.zero; + } break; case TGSI_OPCODE_SGT: @@ -901,8 +976,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; @@ -910,7 +984,7 @@ emit_instruction( tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp0 = lp_build_sin( &bld->base, tmp0 ); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; @@ -919,8 +993,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; @@ -929,85 +1002,99 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; case TGSI_OPCODE_STR: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = bld->base.one; + } break; case TGSI_OPCODE_TEX: - emit_tex( bld, inst, FALSE, FALSE ); + emit_tex( bld, inst, FALSE, FALSE, dst0 ); break; case TGSI_OPCODE_TXD: + /* FIXME */ return 0; break; case TGSI_OPCODE_UP2H: + /* deprecated */ + assert (0); return 0; break; case TGSI_OPCODE_UP2US: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_UP4B: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_UP4UB: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_X2D: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_ARA: + /* deprecated */ + assert(0); return 0; break; #if 0 case TGSI_OPCODE_ARR: + /* FIXME */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); emit_rnd( bld, 0, 0 ); emit_f2it( bld, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; #endif case TGSI_OPCODE_BRA: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_CAL: + /* FIXME */ return 0; break; -#if 0 case TGSI_OPCODE_RET: - emit_ret( bld ); + /* FIXME */ + return 0; break; -#endif case TGSI_OPCODE_END: break; -#if 0 case TGSI_OPCODE_SSG: /* TGSI_OPCODE_SGN */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_sgn( bld, 0, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); } break; -#endif case TGSI_OPCODE_CMP: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { @@ -1015,34 +1102,29 @@ emit_instruction( src1 = emit_fetch( bld, inst, 1, chan_index ); src2 = emit_fetch( bld, inst, 2, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); - dst0 = lp_build_select( &bld->base, tmp0, src1, src2); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); } break; case TGSI_OPCODE_SCS: IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); - tmp0 = lp_build_cos( &bld->base, tmp0 ); - emit_store( bld, inst, 0, CHAN_X, tmp0); + dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 ); } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); - tmp0 = lp_build_sin( &bld->base, tmp0 ); - emit_store( bld, inst, 0, CHAN_Y, tmp0); + dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { - tmp0 = bld->base.zero; - emit_store( bld, inst, 0, CHAN_Z, tmp0); + dst0[CHAN_Z] = bld->base.zero; } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } break; case TGSI_OPCODE_TXB: - emit_tex( bld, inst, TRUE, FALSE ); + emit_tex( bld, inst, TRUE, FALSE, dst0 ); break; case TGSI_OPCODE_NRM: @@ -1101,38 +1183,35 @@ emit_instruction( /* dst.x = xmm1 * src.x */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { - tmp4 = lp_build_mul( &bld->base, tmp4, tmp1); - emit_store(bld, inst, 0, CHAN_X, tmp4); + dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); } /* dst.y = xmm1 * src.y */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { - tmp5 = lp_build_mul( &bld->base, tmp5, tmp1); - emit_store(bld, inst, 0, CHAN_Y, tmp5); + dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); } /* dst.z = xmm1 * src.z */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { - tmp6 = lp_build_mul( &bld->base, tmp6, tmp1); - emit_store(bld, inst, 0, CHAN_Z, tmp6); + dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); } /* dst.w = xmm1 * src.w */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) { - tmp7 = lp_build_mul( &bld->base, tmp7, tmp1); - emit_store(bld, inst, 0, CHAN_W, tmp7); + dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); } } - /* dst0.w = 1.0 */ + /* dst.w = 1.0 */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) { - tmp0 = bld->base.one; - emit_store(bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } } break; case TGSI_OPCODE_DIV: + /* deprecated */ + assert( 0 ); return 0; break; @@ -1145,118 +1224,157 @@ emit_instruction( tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */ + dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ } break; case TGSI_OPCODE_TXL: - emit_tex( bld, inst, TRUE, FALSE ); + emit_tex( bld, inst, TRUE, FALSE, dst0 ); break; case TGSI_OPCODE_TXP: - emit_tex( bld, inst, FALSE, TRUE ); + emit_tex( bld, inst, FALSE, TRUE, dst0 ); break; case TGSI_OPCODE_BRK: + /* FIXME */ return 0; break; case TGSI_OPCODE_IF: + /* FIXME */ return 0; break; case TGSI_OPCODE_BGNFOR: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_REP: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_ELSE: + /* FIXME */ return 0; break; case TGSI_OPCODE_ENDIF: + /* FIXME */ return 0; break; case TGSI_OPCODE_ENDFOR: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_ENDREP: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_PUSHA: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_POPA: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_CEIL: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + tmp0 = emit_fetch( bld, inst, 0, chan_index ); + dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); + } break; case TGSI_OPCODE_I2F: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_NOT: + /* deprecated? */ + assert(0); return 0; break; -#if 0 case TGSI_OPCODE_TRUNC: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_f2it( bld, 0 ); - emit_i2f( bld, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); } break; -#endif case TGSI_OPCODE_SHL: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_SHR: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_AND: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_OR: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_MOD: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_XOR: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_SAD: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_TXF: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_TXQ: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_CONT: + /* deprecated? */ + assert(0); return 0; break; @@ -1268,10 +1386,28 @@ emit_instruction( return 0; break; + case TGSI_OPCODE_NOISE1: + case TGSI_OPCODE_NOISE2: + case TGSI_OPCODE_NOISE3: + case TGSI_OPCODE_NOISE4: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = bld->base.zero; + } + break; + + case TGSI_OPCODE_NOP: + break; + default: return 0; } + if(info->num_dst) { + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + emit_store( bld, inst, 0, chan_index, dst0[chan_index]); + } + } + return 1; } @@ -1279,7 +1415,7 @@ emit_instruction( void lp_build_tgsi_soa(LLVMBuilderRef builder, const struct tgsi_token *tokens, - union lp_type type, + struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef consts_ptr, const LLVMValueRef *pos, @@ -1309,16 +1445,18 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: - /* Input already interpolated */ + /* Inputs already interpolated */ break; case TGSI_TOKEN_TYPE_INSTRUCTION: - if (!emit_instruction( &bld, &parse.FullToken.FullInstruction )) { + { unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode; const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode); - _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", - info ? info->mnemonic : "<invalid>"); - } + if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info )) + _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", + info ? info->mnemonic : "<invalid>"); + } + break; case TGSI_TOKEN_TYPE_IMMEDIATE: diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c index 577644b7ab8..606243d6c5a 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c @@ -33,7 +33,7 @@ LLVMTypeRef -lp_build_elem_type(union lp_type type) +lp_build_elem_type(struct lp_type type) { if (type.floating) { switch(type.width) { @@ -55,7 +55,7 @@ lp_build_elem_type(union lp_type type) LLVMTypeRef -lp_build_vec_type(union lp_type type) +lp_build_vec_type(struct lp_type type) { LLVMTypeRef elem_type = lp_build_elem_type(type); return LLVMVectorType(elem_type, type.length); @@ -69,7 +69,7 @@ lp_build_vec_type(union lp_type type) * type and check for identity. */ boolean -lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type) +lp_check_elem_type(struct lp_type type, LLVMTypeRef elem_type) { LLVMTypeKind elem_kind; @@ -107,7 +107,7 @@ lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type) boolean -lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type) +lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type) { LLVMTypeRef elem_type; @@ -128,7 +128,7 @@ lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type) boolean -lp_check_value(union lp_type type, LLVMValueRef val) +lp_check_value(struct lp_type type, LLVMValueRef val) { LLVMTypeRef vec_type; @@ -143,25 +143,26 @@ lp_check_value(union lp_type type, LLVMValueRef val) LLVMTypeRef -lp_build_int_elem_type(union lp_type type) +lp_build_int_elem_type(struct lp_type type) { return LLVMIntType(type.width); } LLVMTypeRef -lp_build_int_vec_type(union lp_type type) +lp_build_int_vec_type(struct lp_type type) { LLVMTypeRef elem_type = lp_build_int_elem_type(type); return LLVMVectorType(elem_type, type.length); } -union lp_type -lp_int_type(union lp_type type) +struct lp_type +lp_int_type(struct lp_type type) { - union lp_type int_type; - int_type.value = 0; + struct lp_type int_type; + + memset(&int_type, 0, sizeof int_type); int_type.width = type.width; int_type.length = type.length; return int_type; @@ -171,7 +172,7 @@ lp_int_type(union lp_type type) void lp_build_context_init(struct lp_build_context *bld, LLVMBuilderRef builder, - union lp_type type) + struct lp_type type) { bld->builder = builder; bld->type = type; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h index 9933e0b45c3..ee5ca3483c1 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h @@ -56,58 +56,55 @@ * on the types used for intermediate computations, such as signed vs unsigned, * normalized values, or fixed point. */ -union lp_type { - struct { - /** - * Floating-point. Cannot be used with fixed. Integer numbers are - * represented by this zero. - */ - unsigned floating:1; - - /** - * Fixed-point. Cannot be used with floating. Integer numbers are - * represented by this zero. - */ - unsigned fixed:1; - - /** - * Whether it can represent negative values or not. - * - * If this is not set for floating point, it means that all values are - * assumed to be positive. - */ - unsigned sign:1; - - /** - * Whether values are normalized to fit [0, 1] interval, or [-1, 1] - * interval for signed types. - * - * For integer types it means the representable integer range should be - * interpreted as the interval above. - * - * For floating and fixed point formats it means the values should be - * clamped to the interval above. - */ - unsigned norm:1; - - /** - * Element width. - * - * For fixed point values, the fixed point is assumed to be at half the - * width. - */ - unsigned width:14; - - /** - * Vector length. - * - * width*length should be a power of two greater or equal to eight. - * - * @sa LP_MAX_VECTOR_LENGTH - */ - unsigned length:14; - }; - uint32_t value; +struct lp_type { + /** + * Floating-point. Cannot be used with fixed. Integer numbers are + * represented by this zero. + */ + unsigned floating:1; + + /** + * Fixed-point. Cannot be used with floating. Integer numbers are + * represented by this zero. + */ + unsigned fixed:1; + + /** + * Whether it can represent negative values or not. + * + * If this is not set for floating point, it means that all values are + * assumed to be positive. + */ + unsigned sign:1; + + /** + * Whether values are normalized to fit [0, 1] interval, or [-1, 1] + * interval for signed types. + * + * For integer types it means the representable integer range should be + * interpreted as the interval above. + * + * For floating and fixed point formats it means the values should be + * clamped to the interval above. + */ + unsigned norm:1; + + /** + * Element width. + * + * For fixed point values, the fixed point is assumed to be at half the + * width. + */ + unsigned width:14; + + /** + * Vector length. + * + * width*length should be a power of two greater or equal to eight. + * + * @sa LP_MAX_VECTOR_LENGTH + */ + unsigned length:14; }; @@ -124,7 +121,7 @@ struct lp_build_context * This not only describes the input/output LLVM types, but also whether * to normalize/clamp the results. */ - union lp_type type; + struct lp_type type; /** Same as lp_build_undef(type) */ LLVMValueRef undef; @@ -138,41 +135,41 @@ struct lp_build_context LLVMTypeRef -lp_build_elem_type(union lp_type type); +lp_build_elem_type(struct lp_type type); LLVMTypeRef -lp_build_vec_type(union lp_type type); +lp_build_vec_type(struct lp_type type); boolean -lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type); +lp_check_elem_type(struct lp_type type, LLVMTypeRef elem_type); boolean -lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type); +lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type); boolean -lp_check_value(union lp_type type, LLVMValueRef val); +lp_check_value(struct lp_type type, LLVMValueRef val); LLVMTypeRef -lp_build_int_elem_type(union lp_type type); +lp_build_int_elem_type(struct lp_type type); LLVMTypeRef -lp_build_int_vec_type(union lp_type type); +lp_build_int_vec_type(struct lp_type type); -union lp_type -lp_int_type(union lp_type type); +struct lp_type +lp_int_type(struct lp_type type); void lp_build_context_init(struct lp_build_context *bld, LLVMBuilderRef builder, - union lp_type type); + struct lp_type type); #endif /* !LP_BLD_TYPE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_clear.c b/src/gallium/drivers/llvmpipe/lp_clear.c index 580cca5b463..bdcff94b9bf 100644 --- a/src/gallium/drivers/llvmpipe/lp_clear.c +++ b/src/gallium/drivers/llvmpipe/lp_clear.c @@ -67,6 +67,7 @@ llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, util_pack_color(rgba, ps->format, &cv); lp_tile_cache_clear(llvmpipe->cbuf_cache[i], rgba, cv); } + llvmpipe->dirty_render_cache = TRUE; } if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 233d1df0e10..a4b2bd8c2ad 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -141,8 +141,6 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe, return PIPE_REFERENCED_FOR_WRITE; } - /* FIXME: we also need to do the same for the texture cache */ - return PIPE_UNREFERENCED; } diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 9465f763d50..b4a22ff4a97 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -152,7 +152,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) screen->provider = LLVMCreateModuleProviderForExistingModule(screen->module); if (LLVMCreateJITCompiler(&screen->engine, screen->provider, 1, &error)) { - fprintf(stderr, "%s\n", error); + _debug_printf("%s\n", error); LLVMDisposeMessage(error); abort(); } diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index c3e3e1af672..58f716ede29 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -80,10 +80,9 @@ struct lp_jit_context struct tgsi_sampler **samplers; - /* TODO: alpha reference value */ float alpha_ref_value; - /* TODO: blend constant color */ + /* FIXME: store (also?) in floats */ uint8_t *blend_color; struct lp_jit_texture textures[PIPE_MAX_SAMPLERS]; diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 125035771e5..05189274589 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -27,8 +27,6 @@ #include "util/u_memory.h" -#include "util/u_simple_screen.h" -#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" @@ -67,8 +65,6 @@ llvmpipe_get_param(struct pipe_screen *screen, int param) return 1; case PIPE_CAP_GLSL: return 1; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: @@ -86,7 +82,7 @@ llvmpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: return 13; /* max 4Kx4K */ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 8; /* max 128x128x128 */ + return 9; /* max 256x256x256 */ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 13; /* max 4Kx4K */ case PIPE_CAP_TGSI_CONT_SUPPORTED: @@ -196,8 +192,7 @@ static void llvmpipe_destroy_screen( struct pipe_screen *_screen ) { struct llvmpipe_screen *screen = llvmpipe_screen(_screen); - - struct pipe_winsys *winsys = _screen->winsys; + struct llvmpipe_winsys *winsys = screen->winsys; lp_jit_screen_cleanup(screen); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index f9e254efcae..2d2fc19a65f 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -163,8 +163,6 @@ shade_quads(struct llvmpipe_context *llvmpipe, else depth = NULL; - /* TODO: blend color */ - /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ assert(lp_check_alignment(mask, 16)); diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index e87976b9f36..30fb41ea65d 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -93,6 +93,23 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) vinfo->num_attribs = 0; for (i = 0; i < lpfs->info.num_inputs; i++) { int src; + enum interp_mode interp; + + switch (lpfs->info.input_interpolate[i]) { + case TGSI_INTERPOLATE_CONSTANT: + interp = INTERP_CONSTANT; + break; + case TGSI_INTERPOLATE_LINEAR: + interp = INTERP_LINEAR; + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + interp = INTERP_PERSPECTIVE; + break; + default: + assert(0); + interp = INTERP_LINEAR; + } + switch (lpfs->info.input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: src = draw_find_vs_output(llvmpipe->draw, @@ -108,7 +125,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) case TGSI_SEMANTIC_FOG: src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_FOG, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); break; case TGSI_SEMANTIC_GENERIC: @@ -116,7 +133,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) /* this includes texcoords and varying vars */ src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_GENERIC, lpfs->info.input_semantic_index[i]); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); break; default: diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 618cf1ffb8d..9faed5a0b18 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -133,13 +133,13 @@ generate_pos0(LLVMBuilderRef builder, static void generate_depth(LLVMBuilderRef builder, const struct lp_fragment_shader_variant_key *key, - union lp_type src_type, + struct lp_type src_type, struct lp_build_mask_context *mask, LLVMValueRef src, LLVMValueRef dst_ptr) { const struct util_format_description *format_desc; - union lp_type dst_type; + struct lp_type dst_type; if(!key->depth.enabled) return; @@ -181,7 +181,7 @@ generate_fs(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, const struct lp_fragment_shader_variant_key *key, LLVMBuilderRef builder, - union lp_type type, + struct lp_type type, LLVMValueRef context_ptr, unsigned i, const struct lp_build_interp_soa_context *interp, @@ -299,7 +299,7 @@ generate_fs(struct llvmpipe_context *lp, static void generate_blend(const struct pipe_blend_state *blend, LLVMBuilderRef builder, - union lp_type type, + struct lp_type type, LLVMValueRef context_ptr, LLVMValueRef mask, LLVMValueRef *src, @@ -364,8 +364,8 @@ generate_fragment(struct llvmpipe_context *lp, { struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); struct lp_fragment_shader_variant *variant; - union lp_type fs_type; - union lp_type blend_type; + struct lp_type fs_type; + struct lp_type blend_type; LLVMTypeRef fs_elem_type; LLVMTypeRef fs_vec_type; LLVMTypeRef fs_int_vec_type; @@ -431,7 +431,7 @@ generate_fragment(struct llvmpipe_context *lp, /* TODO: actually pick these based on the fs and color buffer * characteristics. */ - fs_type.value = 0; + memset(&fs_type, 0, sizeof fs_type); fs_type.floating = TRUE; /* floating point values */ fs_type.sign = TRUE; /* values are signed */ fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ @@ -439,7 +439,7 @@ generate_fragment(struct llvmpipe_context *lp, fs_type.length = 4; /* 4 element per vector */ num_fs = 4; - blend_type.value = 0; + memset(&blend_type, 0, sizeof blend_type); blend_type.floating = FALSE; /* values are integers */ blend_type.sign = FALSE; /* values are unsigned */ blend_type.norm = TRUE; /* values are in [0,1] or [-1,1] */ diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h index 69aaae26e0a..a88e110c663 100644 --- a/src/gallium/drivers/llvmpipe/lp_test.h +++ b/src/gallium/drivers/llvmpipe/lp_test.h @@ -86,43 +86,43 @@ random_float(void); void -dump_type(FILE *fp, union lp_type type); +dump_type(FILE *fp, struct lp_type type); double -read_elem(union lp_type type, const void *src, unsigned index); +read_elem(struct lp_type type, const void *src, unsigned index); void -write_elem(union lp_type type, void *dst, unsigned index, double src); +write_elem(struct lp_type type, void *dst, unsigned index, double src); void -random_elem(union lp_type type, void *dst, unsigned index); +random_elem(struct lp_type type, void *dst, unsigned index); void -read_vec(union lp_type type, const void *src, double *dst); +read_vec(struct lp_type type, const void *src, double *dst); void -write_vec(union lp_type type, void *dst, const double *src); +write_vec(struct lp_type type, void *dst, const double *src); void -random_vec(union lp_type type, void *dst); +random_vec(struct lp_type type, void *dst); boolean -compare_vec_with_eps(union lp_type type, const void *res, const void *ref, double eps); +compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps); boolean -compare_vec(union lp_type type, const void *res, const void *ref); +compare_vec(struct lp_type type, const void *res, const void *ref); void -dump_vec(FILE *fp, union lp_type type, const void *src); +dump_vec(FILE *fp, struct lp_type type, const void *src); #endif /* !LP_TEST_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 8dfad468e3c..94b661dcba4 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -80,7 +80,7 @@ static void write_tsv_row(FILE *fp, const struct pipe_blend_state *blend, enum vector_mode mode, - union lp_type type, + struct lp_type type, double cycles, boolean success) { @@ -125,7 +125,7 @@ static void dump_blend_type(FILE *fp, const struct pipe_blend_state *blend, enum vector_mode mode, - union lp_type type) + struct lp_type type) { fprintf(fp, "%s", mode ? "soa" : "aos"); @@ -153,7 +153,7 @@ static LLVMValueRef add_blend_test(LLVMModuleRef module, const struct pipe_blend_state *blend, enum vector_mode mode, - union lp_type type) + struct lp_type type) { LLVMTypeRef ret_type; LLVMTypeRef vec_type; @@ -467,7 +467,7 @@ test_one(unsigned verbose, FILE *fp, const struct pipe_blend_state *blend, enum vector_mode mode, - union lp_type type) + struct lp_type type) { LLVMModuleRef module = NULL; LLVMValueRef func = NULL; @@ -765,10 +765,10 @@ blend_funcs[] = { }; -const union lp_type blend_types[] = { +const struct lp_type blend_types[] = { /* float, fixed, sign, norm, width, len */ - {{ TRUE, FALSE, FALSE, TRUE, 32, 4 }}, /* f32 x 4 */ - {{ FALSE, FALSE, FALSE, TRUE, 8, 16 }}, /* u8n x 16 */ + { TRUE, FALSE, FALSE, TRUE, 32, 4 }, /* f32 x 4 */ + { FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */ }; @@ -788,7 +788,7 @@ test_all(unsigned verbose, FILE *fp) const unsigned *alpha_dst_factor; struct pipe_blend_state blend; enum vector_mode mode; - const union lp_type *type; + const struct lp_type *type; bool success = TRUE; for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) { @@ -841,27 +841,27 @@ test_some(unsigned verbose, FILE *fp, unsigned long n) const unsigned *alpha_dst_factor; struct pipe_blend_state blend; enum vector_mode mode; - const union lp_type *type; + const struct lp_type *type; unsigned long i; bool success = TRUE; for(i = 0; i < n; ++i) { - rgb_func = &blend_funcs[random() % num_funcs]; - alpha_func = &blend_funcs[random() % num_funcs]; - rgb_src_factor = &blend_factors[random() % num_factors]; - alpha_src_factor = &blend_factors[random() % num_factors]; + rgb_func = &blend_funcs[rand() % num_funcs]; + alpha_func = &blend_funcs[rand() % num_funcs]; + rgb_src_factor = &blend_factors[rand() % num_factors]; + alpha_src_factor = &blend_factors[rand() % num_factors]; do { - rgb_dst_factor = &blend_factors[random() % num_factors]; + rgb_dst_factor = &blend_factors[rand() % num_factors]; } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE); do { - alpha_dst_factor = &blend_factors[random() % num_factors]; + alpha_dst_factor = &blend_factors[rand() % num_factors]; } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE); - mode = random() & 1; + mode = rand() & 1; - type = &blend_types[random() % num_types]; + type = &blend_types[rand() % num_types]; memset(&blend, 0, sizeof blend); blend.blend_enable = 1; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index e6489834af5..9dcf58e5dcd 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -59,8 +59,8 @@ write_tsv_header(FILE *fp) static void write_tsv_row(FILE *fp, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, double cycles, boolean success) { @@ -80,8 +80,8 @@ write_tsv_row(FILE *fp, static void dump_conv_types(FILE *fp, - union lp_type src_type, - union lp_type dst_type) + struct lp_type src_type, + struct lp_type dst_type) { fprintf(fp, "src_type="); dump_type(fp, src_type); @@ -96,8 +96,8 @@ dump_conv_types(FILE *fp, static LLVMValueRef add_conv_test(LLVMModuleRef module, - union lp_type src_type, unsigned num_srcs, - union lp_type dst_type, unsigned num_dsts) + struct lp_type src_type, unsigned num_srcs, + struct lp_type dst_type, unsigned num_dsts) { LLVMTypeRef args[2]; LLVMValueRef func; @@ -145,8 +145,8 @@ add_conv_test(LLVMModuleRef module, static boolean test_one(unsigned verbose, FILE *fp, - union lp_type src_type, - union lp_type dst_type) + struct lp_type src_type, + struct lp_type dst_type) { LLVMModuleRef module = NULL; LLVMValueRef func = NULL; @@ -343,35 +343,35 @@ test_one(unsigned verbose, } -const union lp_type conv_types[] = { +const struct lp_type conv_types[] = { /* float, fixed, sign, norm, width, len */ - {{ TRUE, FALSE, TRUE, TRUE, 32, 4 }}, - {{ TRUE, FALSE, TRUE, FALSE, 32, 4 }}, - {{ TRUE, FALSE, FALSE, TRUE, 32, 4 }}, - {{ TRUE, FALSE, FALSE, FALSE, 32, 4 }}, + { TRUE, FALSE, TRUE, TRUE, 32, 4 }, + { TRUE, FALSE, TRUE, FALSE, 32, 4 }, + { TRUE, FALSE, FALSE, TRUE, 32, 4 }, + { TRUE, FALSE, FALSE, FALSE, 32, 4 }, /* TODO: test fixed formats too */ - {{ FALSE, FALSE, TRUE, TRUE, 16, 8 }}, - {{ FALSE, FALSE, TRUE, FALSE, 16, 8 }}, - {{ FALSE, FALSE, FALSE, TRUE, 16, 8 }}, - {{ FALSE, FALSE, FALSE, FALSE, 16, 8 }}, - - {{ FALSE, FALSE, TRUE, TRUE, 32, 4 }}, - {{ FALSE, FALSE, TRUE, FALSE, 32, 4 }}, - {{ FALSE, FALSE, FALSE, TRUE, 32, 4 }}, - {{ FALSE, FALSE, FALSE, FALSE, 32, 4 }}, - - {{ FALSE, FALSE, TRUE, TRUE, 16, 8 }}, - {{ FALSE, FALSE, TRUE, FALSE, 16, 8 }}, - {{ FALSE, FALSE, FALSE, TRUE, 16, 8 }}, - {{ FALSE, FALSE, FALSE, FALSE, 16, 8 }}, - - {{ FALSE, FALSE, TRUE, TRUE, 8, 16 }}, - {{ FALSE, FALSE, TRUE, FALSE, 8, 16 }}, - {{ FALSE, FALSE, FALSE, TRUE, 8, 16 }}, - {{ FALSE, FALSE, FALSE, FALSE, 8, 16 }}, + { FALSE, FALSE, TRUE, TRUE, 16, 8 }, + { FALSE, FALSE, TRUE, FALSE, 16, 8 }, + { FALSE, FALSE, FALSE, TRUE, 16, 8 }, + { FALSE, FALSE, FALSE, FALSE, 16, 8 }, + + { FALSE, FALSE, TRUE, TRUE, 32, 4 }, + { FALSE, FALSE, TRUE, FALSE, 32, 4 }, + { FALSE, FALSE, FALSE, TRUE, 32, 4 }, + { FALSE, FALSE, FALSE, FALSE, 32, 4 }, + + { FALSE, FALSE, TRUE, TRUE, 16, 8 }, + { FALSE, FALSE, TRUE, FALSE, 16, 8 }, + { FALSE, FALSE, FALSE, TRUE, 16, 8 }, + { FALSE, FALSE, FALSE, FALSE, 16, 8 }, + + { FALSE, FALSE, TRUE, TRUE, 8, 16 }, + { FALSE, FALSE, TRUE, FALSE, 8, 16 }, + { FALSE, FALSE, FALSE, TRUE, 8, 16 }, + { FALSE, FALSE, FALSE, FALSE, 8, 16 }, }; @@ -381,8 +381,8 @@ const unsigned num_types = sizeof(conv_types)/sizeof(conv_types[0]); boolean test_all(unsigned verbose, FILE *fp) { - const union lp_type *src_type; - const union lp_type *dst_type; + const struct lp_type *src_type; + const struct lp_type *dst_type; bool success = TRUE; for(src_type = conv_types; src_type < &conv_types[num_types]; ++src_type) { @@ -407,16 +407,16 @@ test_all(unsigned verbose, FILE *fp) boolean test_some(unsigned verbose, FILE *fp, unsigned long n) { - const union lp_type *src_type; - const union lp_type *dst_type; + const struct lp_type *src_type; + const struct lp_type *dst_type; unsigned long i; bool success = TRUE; for(i = 0; i < n; ++i) { - src_type = &conv_types[random() % num_types]; + src_type = &conv_types[rand() % num_types]; do { - dst_type = &conv_types[random() % num_types]; + dst_type = &conv_types[rand() % num_types]; } while (src_type == dst_type || src_type->norm != dst_type->norm); if(!test_one(verbose, fp, *src_type, *dst_type)) diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index 49213fb4f0b..4592dc0b2d0 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -40,7 +40,7 @@ void dump_type(FILE *fp, - union lp_type type) + struct lp_type type) { fprintf(fp, "%s%s%u%sx%u", type.sign ? (type.floating || type.fixed ? "" : "s") : "u", @@ -52,7 +52,7 @@ dump_type(FILE *fp, double -read_elem(union lp_type type, const void *src, unsigned index) +read_elem(struct lp_type type, const void *src, unsigned index) { double scale = lp_const_scale(type); double value; @@ -115,7 +115,7 @@ read_elem(union lp_type type, const void *src, unsigned index) void -write_elem(union lp_type type, void *dst, unsigned index, double value) +write_elem(struct lp_type type, void *dst, unsigned index, double value) { assert(index < type.length); if(!type.sign && value < 0.0) @@ -184,11 +184,11 @@ write_elem(union lp_type type, void *dst, unsigned index, double value) void -random_elem(union lp_type type, void *dst, unsigned index) +random_elem(struct lp_type type, void *dst, unsigned index) { double value; assert(index < type.length); - value = (double)random()/(double)RAND_MAX; + value = (double)rand()/(double)RAND_MAX; if(!type.norm) { unsigned long long mask; if (type.floating) @@ -199,17 +199,17 @@ random_elem(union lp_type type, void *dst, unsigned index) mask = ((unsigned long long)1 << (type.width - 1)) - 1; else mask = ((unsigned long long)1 << type.width) - 1; - value += (double)(mask & random()); + value += (double)(mask & rand()); } if(!type.sign) - if(random() & 1) + if(rand() & 1) value = -value; write_elem(type, dst, index, value); } void -read_vec(union lp_type type, const void *src, double *dst) +read_vec(struct lp_type type, const void *src, double *dst) { unsigned i; for (i = 0; i < type.length; ++i) @@ -218,7 +218,7 @@ read_vec(union lp_type type, const void *src, double *dst) void -write_vec(union lp_type type, void *dst, const double *src) +write_vec(struct lp_type type, void *dst, const double *src) { unsigned i; for (i = 0; i < type.length; ++i) @@ -229,12 +229,12 @@ write_vec(union lp_type type, void *dst, const double *src) float random_float(void) { - return (float)((double)random()/(double)RAND_MAX); + return (float)((double)rand()/(double)RAND_MAX); } void -random_vec(union lp_type type, void *dst) +random_vec(struct lp_type type, void *dst) { unsigned i; for (i = 0; i < type.length; ++i) @@ -243,7 +243,7 @@ random_vec(union lp_type type, void *dst) boolean -compare_vec_with_eps(union lp_type type, const void *res, const void *ref, double eps) +compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps) { unsigned i; for (i = 0; i < type.length; ++i) { @@ -259,7 +259,7 @@ compare_vec_with_eps(union lp_type type, const void *res, const void *ref, doubl boolean -compare_vec(union lp_type type, const void *res, const void *ref) +compare_vec(struct lp_type type, const void *res, const void *ref) { double eps = lp_const_eps(type); return compare_vec_with_eps(type, res, ref, eps); @@ -267,7 +267,7 @@ compare_vec(union lp_type type, const void *res, const void *ref) void -dump_vec(FILE *fp, union lp_type type, const void *src) +dump_vec(FILE *fp, struct lp_type type, const void *src) { unsigned i; for (i = 0; i < type.length; ++i) { diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c index 9a876f404df..a1365a045f1 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c @@ -1654,7 +1654,7 @@ lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) static void lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler, LLVMBuilderRef builder, - union lp_type type, + struct lp_type type, unsigned unit, unsigned num_coords, const LLVMValueRef *coords, diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c index 7d31705d014..d2a6ae21f57 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c @@ -149,7 +149,7 @@ lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) static void lp_llvm_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *base, LLVMBuilderRef builder, - union lp_type type, + struct lp_type type, unsigned unit, unsigned num_coords, const LLVMValueRef *coords, diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c index 143afec3d35..0c06b659a1f 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c @@ -44,10 +44,53 @@ #include "lp_tile_cache.h" +#define MAX_WIDTH 4096 +#define MAX_HEIGHT 4096 + + +enum llvmpipe_tile_status +{ + LP_TILE_STATUS_UNDEFINED = 0, + LP_TILE_STATUS_CLEAR = 1, + LP_TILE_STATUS_DEFINED = 2 +}; + + +struct llvmpipe_cached_tile +{ + enum llvmpipe_tile_status status; + + /** color in SOA format */ + uint8_t *color; +}; + + +struct llvmpipe_tile_cache +{ + struct pipe_screen *screen; + struct pipe_surface *surface; /**< the surface we're caching */ + struct pipe_transfer *transfer; + void *transfer_map; + + struct llvmpipe_cached_tile entries[MAX_WIDTH/TILE_SIZE][MAX_HEIGHT/TILE_SIZE]; + + uint8_t clear_color[4]; /**< for color bufs */ + uint clear_val; /**< for z+stencil, or packed color clear value */ + + struct llvmpipe_cached_tile *last_tile; /**< most recently retrieved tile */ +}; + + struct llvmpipe_tile_cache * lp_create_tile_cache( struct pipe_screen *screen ) { struct llvmpipe_tile_cache *tc; + int maxLevels, maxTexSize; + + /* sanity checking: max sure MAX_WIDTH/HEIGHT >= largest texture image */ + maxLevels = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS); + maxTexSize = 1 << (maxLevels - 1); + assert(MAX_WIDTH >= maxTexSize); tc = CALLOC_STRUCT( llvmpipe_tile_cache ); if(!tc) @@ -225,11 +268,14 @@ lp_flush_tile_cache(struct llvmpipe_tile_cache *tc) tc->clear_val); screen->transfer_unmap(screen, pt); + + tile->status = LP_TILE_STATUS_UNDEFINED; break; } case LP_TILE_STATUS_DEFINED: lp_put_tile_rgba_soa(pt, x, y, tile->color); + tile->status = LP_TILE_STATUS_UNDEFINED; break; } } @@ -257,7 +303,7 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc, case LP_TILE_STATUS_UNDEFINED: /* get new tile data from transfer */ - lp_get_tile_rgba_soa(pt, x, y, tile->color); + lp_get_tile_rgba_soa(pt, x & ~(TILE_SIZE - 1), y & ~(TILE_SIZE - 1), tile->color); tile->status = LP_TILE_STATUS_DEFINED; break; diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.h b/src/gallium/drivers/llvmpipe/lp_tile_cache.h index 6d8ba5ece7a..161bab37991 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.h @@ -33,42 +33,7 @@ #include "lp_tile_soa.h" -enum llvmpipe_tile_status -{ - LP_TILE_STATUS_UNDEFINED = 0, - LP_TILE_STATUS_CLEAR = 1, - LP_TILE_STATUS_DEFINED = 2 -}; - - -struct llvmpipe_cached_tile -{ - enum llvmpipe_tile_status status; - - /** color in SOA format */ - uint8_t *color; -}; - - -/** XXX move these */ -#define MAX_WIDTH 2048 -#define MAX_HEIGHT 2048 - - -struct llvmpipe_tile_cache -{ - struct pipe_screen *screen; - struct pipe_surface *surface; /**< the surface we're caching */ - struct pipe_transfer *transfer; - void *transfer_map; - - struct llvmpipe_cached_tile entries[MAX_WIDTH/TILE_SIZE][MAX_HEIGHT/TILE_SIZE]; - - uint8_t clear_color[4]; /**< for color bufs */ - uint clear_val; /**< for z+stencil, or packed color clear value */ - - struct llvmpipe_cached_tile *last_tile; /**< most recently retrieved tile */ -}; +struct llvmpipe_tile_cache; /* opaque */ extern struct llvmpipe_tile_cache * diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c index ff2febb668e..170ce3eb7e5 100644 --- a/src/gallium/drivers/nv04/nv04_screen.c +++ b/src/gallium/drivers/nv04/nv04_screen.c @@ -16,8 +16,6 @@ nv04_screen_get_param(struct pipe_screen *screen, int param) return 0; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index 4469b22d91a..ee5901e743e 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -15,8 +15,6 @@ nv10_screen_get_param(struct pipe_screen *screen, int param) return 0; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c index e6924ad71eb..4eeacd1afd5 100644 --- a/src/gallium/drivers/nv20/nv20_screen.c +++ b/src/gallium/drivers/nv20/nv20_screen.c @@ -15,8 +15,6 @@ nv20_screen_get_param(struct pipe_screen *screen, int param) return 0; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index f8285e4455f..41af38450b5 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -22,8 +22,6 @@ nv30_screen_get_param(struct pipe_screen *pscreen, int param) return 1; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index 5d2a4216c5a..bd13dfddd1c 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -21,8 +21,6 @@ nv40_screen_get_param(struct pipe_screen *pscreen, int param) return 1; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 1; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 6e8f4f9750d..fca078b174a 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -37,11 +37,12 @@ nv50_flush(struct pipe_context *pipe, unsigned flags, /* We need this in the ddx for reliable composite, not sure what we're * actually flushing. We generate all our own flushes with flags = 0. */ - WAIT_RING(chan, 3); + WAIT_RING(chan, 2); BEGIN_RING(chan, eng2d, 0x0110, 1); OUT_RING (chan, 0); - FIRE_RING(chan); + if (flags & PIPE_FLUSH_FRAME) + FIRE_RING(chan); } static void @@ -110,6 +111,9 @@ nv50_create(struct pipe_screen *pscreen, unsigned pctx_id) nv50->pipe.is_texture_referenced = nv50_is_texture_referenced; nv50->pipe.is_buffer_referenced = nv50_is_buffer_referenced; + screen->base.channel->user_private = nv50; + screen->base.channel->flush_notify = nv50_state_flush_notify; + nv50_init_surface_functions(nv50); nv50_init_state_functions(nv50); nv50_init_query_functions(nv50); diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 1e9e8e49bfb..4608854d711 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -116,6 +116,7 @@ struct nv50_state { unsigned miptree_nr; struct nouveau_stateobj *vertprog; struct nouveau_stateobj *fragprog; + struct nouveau_stateobj *programs; struct nouveau_stateobj *vtxfmt; struct nouveau_stateobj *vtxbuf; struct nouveau_stateobj *vtxattr; @@ -190,10 +191,12 @@ extern void nv50_clear(struct pipe_context *pipe, unsigned buffers, /* nv50_program.c */ extern void nv50_vertprog_validate(struct nv50_context *nv50); extern void nv50_fragprog_validate(struct nv50_context *nv50); +extern void nv50_linkage_validate(struct nv50_context *nv50); extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p); /* nv50_state_validate.c */ extern boolean nv50_state_validate(struct nv50_context *nv50); +extern void nv50_state_flush_notify(struct nouveau_channel *chan); /* nv50_tex.c */ extern void nv50_tex_validate(struct nv50_context *); diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 4a838529de7..576d075318f 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -90,6 +90,10 @@ struct nv50_reg { int acc; /* instruction where this reg is last read (first insn == 1) */ }; +/* arbitrary limits */ +#define MAX_IF_DEPTH 4 +#define MAX_LOOP_DEPTH 4 + struct nv50_pc { struct nv50_program *p; @@ -112,11 +116,22 @@ struct nv50_pc { struct nv50_reg *temp_temp[16]; unsigned temp_temp_nr; + /* broadcast and destination replacement regs */ + struct nv50_reg *r_brdc; + struct nv50_reg *r_dst[4]; + unsigned interp_mode[32]; /* perspective interpolation registers */ struct nv50_reg *iv_p; struct nv50_reg *iv_c; + struct nv50_program_exec *if_cond; + struct nv50_program_exec *if_insn[MAX_IF_DEPTH]; + struct nv50_program_exec *br_join[MAX_IF_DEPTH]; + struct nv50_program_exec *br_loop[MAX_LOOP_DEPTH]; /* for BRK branch */ + int if_lvl, loop_lvl; + unsigned loop_pos[MAX_LOOP_DEPTH]; + /* current instruction and total number of insns */ unsigned insn_cur; unsigned insn_nr; @@ -124,6 +139,25 @@ struct nv50_pc { boolean allow32; }; +static INLINE void +ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw) +{ + reg->type = type; + reg->index = index; + reg->hw = hw; + reg->neg = 0; + reg->rhw = -1; + reg->acc = 0; +} + +static INLINE unsigned +popcnt4(uint32_t val) +{ + static const unsigned cnt[16] + = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; + return cnt[val & 0xf]; +} + static void alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) { @@ -173,6 +207,10 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) assert(0); } +/* XXX: For shaders that aren't executed linearly (e.g. shaders that + * contain loops), we need to assign all hw regs to TGSI TEMPs early, + * lest we risk temp_temps overwriting regs alloc'd "later". + */ static struct nv50_reg * alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) { @@ -184,11 +222,8 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) for (i = 0; i < NV50_SU_MAX_TEMP; i++) { if (!pc->r_temp[i]) { - r = CALLOC_STRUCT(nv50_reg); - r->type = P_TEMP; - r->index = -1; - r->hw = i; - r->rhw = -1; + r = MALLOC_STRUCT(nv50_reg); + ctor_reg(r, P_TEMP, -1, i); pc->r_temp[i] = r; return r; } @@ -254,10 +289,8 @@ alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx) return alloc_temp4(pc, dst, idx + 4); for (i = 0; i < 4; i++) { - dst[i] = CALLOC_STRUCT(nv50_reg); - dst[i]->type = P_TEMP; - dst[i]->index = -1; - dst[i]->hw = idx + i; + dst[i] = MALLOC_STRUCT(nv50_reg); + ctor_reg(dst[i], P_TEMP, -1, idx + i); pc->r_temp[idx + i] = dst[i]; } @@ -309,7 +342,7 @@ ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) static struct nv50_reg * alloc_immd(struct nv50_pc *pc, float f) { - struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); + struct nv50_reg *r = MALLOC_STRUCT(nv50_reg); unsigned hw; for (hw = 0; hw < pc->immd_nr * 4; hw++) @@ -319,9 +352,7 @@ alloc_immd(struct nv50_pc *pc, float f) if (hw == pc->immd_nr * 4) hw = ctor_immd(pc, f, -f, 0.5 * f, 0) * 4; - r->type = P_IMMD; - r->hw = hw; - r->index = -1; + ctor_reg(r, P_IMMD, -1, hw); return r; } @@ -543,6 +574,22 @@ check_swap_src_0_1(struct nv50_pc *pc, } static void +set_src_0_restricted(struct nv50_pc *pc, struct nv50_reg *src, + struct nv50_program_exec *e) +{ + struct nv50_reg *temp; + + if (src->type != P_TEMP) { + temp = temp_temp(pc); + emit_mov(pc, temp, src); + src = temp; + } + + alloc_reg(pc, src); + e->inst[0] |= (src->hw << 9); +} + +static void set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) { if (src->type == P_ATTR) { @@ -744,7 +791,11 @@ emit_flop(struct nv50_pc *pc, unsigned sub, } set_dst(pc, dst, e); - set_src_0(pc, src, e); + + if (sub == 0 || sub == 2) + set_src_0_restricted(pc, src, e); + else + set_src_0(pc, src, e); emit(pc, e); } @@ -786,16 +837,20 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) #define CVTOP_SAT 0x08 #define CVTOP_ABS 0x10 +/* 0x04 == 32 bit */ +/* 0x40 == dst is float */ +/* 0x80 == src is float */ #define CVT_F32_F32 0xc4 #define CVT_F32_S32 0x44 #define CVT_F32_U32 0x64 #define CVT_S32_F32 0x8c #define CVT_S32_S32 0x0c -#define CVT_F32_F32_ROP 0xcc +#define CVT_NEG 0x20 +#define CVT_RI 0x08 static void emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, - int wp, unsigned cop, unsigned fmt) + int wp, unsigned cvn, unsigned fmt) { struct nv50_program_exec *e; @@ -804,7 +859,7 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, e->inst[0] |= 0xa0000000; e->inst[1] |= 0x00004000; - e->inst[1] |= (cop << 16); + e->inst[1] |= (cvn << 16); e->inst[1] |= (fmt << 24); set_src_0(pc, src, e); @@ -821,53 +876,85 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, emit(pc, e); } +/* nv50 Condition codes: + * 0x1 = LT + * 0x2 = EQ + * 0x3 = LE + * 0x4 = GT + * 0x5 = NE + * 0x6 = GE + * 0x7 = set condition code ? (used before bra.lt/le/gt/ge) + * 0x8 = unordered bit (allows NaN) + */ static void -emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, +emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp, struct nv50_reg *src0, struct nv50_reg *src1) { + static const unsigned cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; + struct nv50_program_exec *e = exec(pc); - unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; struct nv50_reg *rdst; - assert(c_op <= 7); + assert(ccode < 16); if (check_swap_src_0_1(pc, &src0, &src1)) - c_op = inv_cop[c_op]; + ccode = cc_swapped[ccode & 7] | (ccode & 8); rdst = dst; - if (dst->type != P_TEMP) + if (dst && dst->type != P_TEMP) dst = alloc_temp(pc, NULL); /* set.u32 */ set_long(pc, e); e->inst[0] |= 0xb0000000; - e->inst[1] |= (3 << 29); - e->inst[1] |= (c_op << 14); - /*XXX: breaks things, .u32 by default? - * decuda will disasm as .u16 and use .lo/.hi regs, but this - * doesn't seem to match what the hw actually does. - inst[1] |= 0x04000000; << breaks things.. .u32 by default? + e->inst[1] |= 0x60000000 | (ccode << 14); + + /* XXX: decuda will disasm as .u16 and use .lo/.hi regs, but + * that doesn't seem to match what the hw actually does + e->inst[1] |= 0x04000000; << breaks things, u32 by default ? */ - set_dst(pc, dst, e); + + if (wp >= 0) + set_pred_wr(pc, 1, wp, e); + if (dst) + set_dst(pc, dst, e); + else { + e->inst[0] |= 0x000001fc; + e->inst[1] |= 0x00000008; + } + set_src_0(pc, src0, e); set_src_1(pc, src1, e); - emit(pc, e); - /* cvt.f32.u32 */ - e = exec(pc); - e->inst[0] = 0xa0000001; - e->inst[1] = 0x64014780; - set_dst(pc, rdst, e); - set_src_0(pc, dst, e); emit(pc, e); + pc->if_cond = pc->p->exec_tail; /* record for OPCODE_IF */ - if (dst != rdst) + /* cvt.f32.u32/s32 (?) if we didn't only write the predicate */ + if (rdst) + emit_cvt(pc, rdst, dst, -1, CVTOP_ABS | CVTOP_RN, CVT_F32_S32); + if (rdst && rdst != dst) free_temp(pc, dst); } +static INLINE unsigned +map_tgsi_setop_cc(unsigned op) +{ + switch (op) { + case TGSI_OPCODE_SLT: return 0x1; + case TGSI_OPCODE_SGE: return 0x6; + case TGSI_OPCODE_SEQ: return 0x2; + case TGSI_OPCODE_SGT: return 0x4; + case TGSI_OPCODE_SLE: return 0x3; + case TGSI_OPCODE_SNE: return 0xd; + default: + assert(0); + return 0; + } +} + static INLINE void emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - emit_cvt(pc, dst, src, -1, CVTOP_FLOOR, CVT_F32_F32_ROP); + emit_cvt(pc, dst, src, -1, CVTOP_FLOOR, CVT_F32_F32 | CVT_RI); } static void @@ -890,6 +977,12 @@ emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) emit_cvt(pc, dst, src, -1, CVTOP_ABS, CVT_F32_F32); } +static INLINE void +emit_sat(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + emit_cvt(pc, dst, src, -1, CVTOP_SAT, CVT_F32_F32); +} + static void emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, struct nv50_reg **src) @@ -1073,10 +1166,11 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, emit(pc, e); #if 1 - if (mask & 1) emit_mov(pc, dst[0], t[0]); - if (mask & 2) emit_mov(pc, dst[1], t[1]); - if (mask & 4) emit_mov(pc, dst[2], t[2]); - if (mask & 8) emit_mov(pc, dst[3], t[3]); + c = 0; + if (mask & 1) emit_mov(pc, dst[0], t[c++]); + if (mask & 2) emit_mov(pc, dst[1], t[c++]); + if (mask & 4) emit_mov(pc, dst[2], t[c++]); + if (mask & 8) emit_mov(pc, dst[3], t[c]); free_temp4(pc, t); #else @@ -1093,6 +1187,38 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, } static void +emit_branch(struct nv50_pc *pc, int pred, unsigned cc, + struct nv50_program_exec **join) +{ + struct nv50_program_exec *e = exec(pc); + + if (join) { + set_long(pc, e); + e->inst[0] |= 0xa0000002; + emit(pc, e); + *join = e; + e = exec(pc); + } + + set_long(pc, e); + e->inst[0] |= 0x10000002; + if (pred >= 0) + set_pred(pc, cc, pred, e); + emit(pc, e); +} + +static void +emit_nop(struct nv50_pc *pc) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xf0000000; + set_long(pc, e); + e->inst[1] = 0xe0000000; + emit(pc, e); +} + +static void convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e) { unsigned q = 0, m = ~0; @@ -1159,6 +1285,70 @@ negate_supported(const struct tgsi_full_instruction *insn, int i) } } +/* Return a read mask for source registers deduced from opcode & write mask. */ +static unsigned +nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c) +{ + unsigned x, mask = insn->FullDstRegisters[0].DstRegister.WriteMask; + + switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_COS: + case TGSI_OPCODE_SIN: + return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0); + case TGSI_OPCODE_DP3: + return 0x7; + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_DPH: + case TGSI_OPCODE_KIL: /* WriteMask ignored */ + return 0xf; + case TGSI_OPCODE_DST: + return mask & (c ? 0xa : 0x6); + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_LG2: + case TGSI_OPCODE_POW: + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_SCS: + return 0x1; + case TGSI_OPCODE_LIT: + return 0xb; + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXP: + { + const struct tgsi_instruction_ext_texture *tex; + + assert(insn->Instruction.Extended); + tex = &insn->InstructionExtTexture; + + mask = 0x7; + if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) + mask |= 0x8; + + switch (tex->Texture) { + case TGSI_TEXTURE_1D: + mask &= 0x9; + break; + case TGSI_TEXTURE_2D: + mask &= 0xb; + break; + default: + break; + } + } + return mask; + case TGSI_OPCODE_XPD: + x = 0; + if (mask & 1) x |= 0x6; + if (mask & 2) x |= 0x5; + if (mask & 4) x |= 0x3; + return x; + default: + break; + } + + return mask; +} + static struct nv50_reg * tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) { @@ -1258,93 +1448,175 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, return r; } -/* returns TRUE if instruction can overwrite sources before they're read */ +/* return TRUE for ops that produce only a single result */ static boolean -direct2dest_op(const struct tgsi_full_instruction *insn) +is_scalar_op(unsigned op) { - if (insn->Instruction.Saturate) - return FALSE; - - switch (insn->Instruction.Opcode) { + switch (op) { case TGSI_OPCODE_COS: + case TGSI_OPCODE_DP2: case TGSI_OPCODE_DP3: case TGSI_OPCODE_DP4: case TGSI_OPCODE_DPH: - case TGSI_OPCODE_KIL: - case TGSI_OPCODE_LIT: + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_LG2: case TGSI_OPCODE_POW: case TGSI_OPCODE_RCP: case TGSI_OPCODE_RSQ: - case TGSI_OPCODE_SCS: case TGSI_OPCODE_SIN: + /* + case TGSI_OPCODE_KIL: + case TGSI_OPCODE_LIT: + case TGSI_OPCODE_SCS: + */ + return TRUE; + default: + return FALSE; + } +} + +/* Returns a bitmask indicating which dst components depend + * on source s, component c (reverse of nv50_tgsi_src_mask). + */ +static unsigned +nv50_tgsi_dst_revdep(unsigned op, int s, int c) +{ + if (is_scalar_op(op)) + return 0x1; + + switch (op) { + case TGSI_OPCODE_DST: + return (1 << c) & (s ? 0xa : 0x6); + case TGSI_OPCODE_XPD: + switch (c) { + case 0: return 0x6; + case 1: return 0x5; + case 2: return 0x3; + case 3: return 0x0; + default: + assert(0); + return 0x0; + } + case TGSI_OPCODE_LIT: + case TGSI_OPCODE_SCS: case TGSI_OPCODE_TEX: case TGSI_OPCODE_TXP: - return FALSE; + /* these take care of dangerous swizzles themselves */ + return 0x0; + case TGSI_OPCODE_IF: + case TGSI_OPCODE_KIL: + /* don't call this function for these ops */ + assert(0); + return 0; default: - return TRUE; + /* linear vector instruction */ + return (1 << c); } } +static INLINE boolean +has_pred(struct nv50_program_exec *e, unsigned cc) +{ + if (!is_long(e) || is_immd(e)) + return FALSE; + return ((e->inst[1] & 0x780) == (cc << 7)); +} + +/* on ENDIF see if we can do "@p0.neu single_op" instead of: + * join_at ENDIF + * @p0.eq bra ENDIF + * single_op + * ENDIF: nop.join + */ static boolean -nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) +nv50_kill_branch(struct nv50_pc *pc) { - const struct tgsi_full_instruction *inst = &tok->FullInstruction; - struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp; + int lvl = pc->if_lvl; + + if (pc->if_insn[lvl]->next != pc->p->exec_tail) + return FALSE; + + /* if ccode == 'true', the BRA is from an ELSE and the predicate + * reg may no longer be valid, since we currently always use $p0 + */ + if (has_pred(pc->if_insn[lvl], 0xf)) + return FALSE; + assert(pc->if_insn[lvl] && pc->br_join[lvl]); + + /* We'll use the exec allocated for JOIN_AT (as we can't easily + * update prev's next); if exec_tail is BRK, update the pointer. + */ + if (pc->loop_lvl && pc->br_loop[pc->loop_lvl - 1] == pc->p->exec_tail) + pc->br_loop[pc->loop_lvl - 1] = pc->br_join[lvl]; + + pc->p->exec_size -= 4; /* remove JOIN_AT and BRA */ + + *pc->br_join[lvl] = *pc->p->exec_tail; + + FREE(pc->if_insn[lvl]); + FREE(pc->p->exec_tail); + + pc->p->exec_tail = pc->br_join[lvl]; + pc->p->exec_tail->next = NULL; + set_pred(pc, 0xd, 0, pc->p->exec_tail); + + return TRUE; +} + +static boolean +nv50_program_tx_insn(struct nv50_pc *pc, + const struct tgsi_full_instruction *inst) +{ + struct nv50_reg *rdst[4], *dst[4], *brdc, *src[3][4], *temp; unsigned mask, sat, unit; - boolean assimilate = FALSE; int i, c; mask = inst->FullDstRegisters[0].DstRegister.WriteMask; sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; + memset(src, 0, sizeof(src)); + for (c = 0; c < 4; c++) { - if (mask & (1 << c)) + if ((mask & (1 << c)) && !pc->r_dst[c]) dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); else - dst[c] = NULL; - rdst[c] = NULL; - src[0][c] = NULL; - src[1][c] = NULL; - src[2][c] = NULL; + dst[c] = pc->r_dst[c]; + rdst[c] = dst[c]; } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i]; + unsigned src_mask; + boolean neg_supp; + + src_mask = nv50_tgsi_src_mask(inst, i); + neg_supp = negate_supported(inst, i); if (fs->SrcRegister.File == TGSI_FILE_SAMPLER) unit = fs->SrcRegister.Index; for (c = 0; c < 4; c++) - src[i][c] = tgsi_src(pc, c, fs, - negate_supported(inst, i)); + if (src_mask & (1 << c)) + src[i][c] = tgsi_src(pc, c, fs, neg_supp); } - if (sat) { - for (c = 0; c < 4; c++) { - rdst[c] = dst[c]; - dst[c] = temp_temp(pc); - } + brdc = temp = pc->r_brdc; + if (brdc && brdc->type != P_TEMP) { + temp = temp_temp(pc); + if (sat) + brdc = temp; } else - if (direct2dest_op(inst)) { + if (sat) { for (c = 0; c < 4; c++) { - if (!dst[c] || dst[c]->type != P_TEMP) - continue; - - for (i = c + 1; i < 4; i++) { - if (dst[c] == src[0][i] || - dst[c] == src[1][i] || - dst[c] == src[2][i]) - break; - } - if (i == 4) + if (!(mask & (1 << c)) || dst[c]->type == P_TEMP) continue; - - assimilate = TRUE; rdst[c] = dst[c]; - dst[c] = alloc_temp(pc, NULL); + dst[c] = temp_temp(pc); } } + assert(brdc || !is_scalar_op(inst->Instruction.Opcode)); + switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ABS: for (c = 0; c < 4; c++) { @@ -1360,74 +1632,91 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_add(pc, dst[c], src[0][c], src[1][c]); } break; - case TGSI_OPCODE_COS: - temp = temp_temp(pc); - emit_precossin(pc, temp, src[0][0]); - emit_flop(pc, 5, temp, temp); + case TGSI_OPCODE_BGNLOOP: + pc->loop_pos[pc->loop_lvl++] = pc->p->exec_size; + break; + case TGSI_OPCODE_BRK: + emit_branch(pc, -1, 0, NULL); + assert(pc->loop_lvl > 0); + pc->br_loop[pc->loop_lvl - 1] = pc->p->exec_tail; + break; + case TGSI_OPCODE_CEIL: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_mov(pc, dst[c], temp); + emit_cvt(pc, dst[c], src[0][c], -1, + CVTOP_CEIL, CVT_F32_F32 | CVT_RI); + } + break; + case TGSI_OPCODE_COS: + if (mask & 8) { + emit_precossin(pc, temp, src[0][3]); + emit_flop(pc, 5, dst[3], temp); + if (!(mask &= 7)) + break; + if (temp == dst[3]) + temp = brdc = temp_temp(pc); } + emit_precossin(pc, temp, src[0][0]); + emit_flop(pc, 5, brdc, temp); break; case TGSI_OPCODE_DP3: - temp = temp_temp(pc); emit_mul(pc, temp, src[0][0], src[1][0]); emit_mad(pc, temp, src[0][1], src[1][1], temp); - emit_mad(pc, temp, src[0][2], src[1][2], temp); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } + emit_mad(pc, brdc, src[0][2], src[1][2], temp); break; case TGSI_OPCODE_DP4: - temp = temp_temp(pc); emit_mul(pc, temp, src[0][0], src[1][0]); emit_mad(pc, temp, src[0][1], src[1][1], temp); emit_mad(pc, temp, src[0][2], src[1][2], temp); - emit_mad(pc, temp, src[0][3], src[1][3], temp); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } + emit_mad(pc, brdc, src[0][3], src[1][3], temp); break; case TGSI_OPCODE_DPH: - temp = temp_temp(pc); emit_mul(pc, temp, src[0][0], src[1][0]); emit_mad(pc, temp, src[0][1], src[1][1], temp); emit_mad(pc, temp, src[0][2], src[1][2], temp); - emit_add(pc, temp, src[1][3], temp); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } + emit_add(pc, brdc, src[1][3], temp); break; case TGSI_OPCODE_DST: - { - struct nv50_reg *one = alloc_immd(pc, 1.0); - if (mask & (1 << 0)) - emit_mov(pc, dst[0], one); if (mask & (1 << 1)) emit_mul(pc, dst[1], src[0][1], src[1][1]); if (mask & (1 << 2)) emit_mov(pc, dst[2], src[0][2]); if (mask & (1 << 3)) emit_mov(pc, dst[3], src[1][3]); - FREE(one); - } + if (mask & (1 << 0)) + emit_mov_immdval(pc, dst[0], 1.0f); + break; + case TGSI_OPCODE_ELSE: + emit_branch(pc, -1, 0, NULL); + pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size; + pc->if_insn[pc->if_lvl++] = pc->p->exec_tail; + break; + case TGSI_OPCODE_ENDIF: + pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size; + + /* try to replace branch over 1 insn with a predicated insn */ + if (nv50_kill_branch(pc) == TRUE) + break; + + if (pc->br_join[pc->if_lvl]) { + pc->br_join[pc->if_lvl]->param.index = pc->p->exec_size; + pc->br_join[pc->if_lvl] = NULL; + } + /* emit a NOP as join point, we could set it on the next + * one, but would have to make sure it is long and !immd + */ + emit_nop(pc); + pc->p->exec_tail->inst[1] |= 2; + break; + case TGSI_OPCODE_ENDLOOP: + emit_branch(pc, -1, 0, NULL); + pc->p->exec_tail->param.index = pc->loop_pos[--pc->loop_lvl]; + pc->br_loop[pc->loop_lvl]->param.index = pc->p->exec_size; break; case TGSI_OPCODE_EX2: - temp = temp_temp(pc); emit_preex2(pc, temp, src[0][0]); - emit_flop(pc, 6, temp, temp); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } + emit_flop(pc, 6, brdc, temp); break; case TGSI_OPCODE_FLR: for (c = 0; c < 4; c++) { @@ -1445,24 +1734,24 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_sub(pc, dst[c], src[0][c], temp); } break; + case TGSI_OPCODE_IF: + /* emitting a join_at may not be necessary */ + assert(pc->if_lvl < MAX_IF_DEPTH); + set_pred_wr(pc, 1, 0, pc->if_cond); + emit_branch(pc, 0, 2, &pc->br_join[pc->if_lvl]); + pc->if_insn[pc->if_lvl++] = pc->p->exec_tail; + break; case TGSI_OPCODE_KIL: emit_kil(pc, src[0][0]); emit_kil(pc, src[0][1]); emit_kil(pc, src[0][2]); emit_kil(pc, src[0][3]); - pc->p->cfg.fp.regs[2] |= 0x00100000; break; case TGSI_OPCODE_LIT: emit_lit(pc, &dst[0], mask, &src[0][0]); break; case TGSI_OPCODE_LG2: - temp = temp_temp(pc); - emit_flop(pc, 3, temp, src[0][0]); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } + emit_flop(pc, 3, brdc, src[0][0]); break; case TGSI_OPCODE_LRP: temp = temp_temp(pc); @@ -1510,31 +1799,18 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } break; case TGSI_OPCODE_POW: - temp = temp_temp(pc); - emit_pow(pc, temp, src[0][0], src[1][0]); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } + emit_pow(pc, brdc, src[0][0], src[1][0]); break; case TGSI_OPCODE_RCP: - for (c = 3; c >= 0; c--) { - if (!(mask & (1 << c))) - continue; - emit_flop(pc, 0, dst[c], src[0][0]); - } + emit_flop(pc, 0, brdc, src[0][0]); break; case TGSI_OPCODE_RSQ: - for (c = 3; c >= 0; c--) { - if (!(mask & (1 << c))) - continue; - emit_flop(pc, 2, dst[c], src[0][0]); - } + emit_flop(pc, 2, brdc, src[0][0]); break; case TGSI_OPCODE_SCS: temp = temp_temp(pc); - emit_precossin(pc, temp, src[0][0]); + if (mask & 3) + emit_precossin(pc, temp, src[0][0]); if (mask & (1 << 0)) emit_flop(pc, 5, dst[0], temp); if (mask & (1 << 1)) @@ -1544,28 +1820,29 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) if (mask & (1 << 3)) emit_mov_immdval(pc, dst[3], 1.0); break; - case TGSI_OPCODE_SGE: - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_set(pc, 6, dst[c], src[0][c], src[1][c]); - } - break; case TGSI_OPCODE_SIN: - temp = temp_temp(pc); - emit_precossin(pc, temp, src[0][0]); - emit_flop(pc, 4, temp, temp); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); + if (mask & 8) { + emit_precossin(pc, temp, src[0][3]); + emit_flop(pc, 4, dst[3], temp); + if (!(mask &= 7)) + break; + if (temp == dst[3]) + temp = brdc = temp_temp(pc); } + emit_precossin(pc, temp, src[0][0]); + emit_flop(pc, 4, brdc, temp); break; case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SGE: + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SGT: + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SNE: + i = map_tgsi_setop_cc(inst->Instruction.Opcode); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_set(pc, 1, dst[c], src[0][c], src[1][c]); + emit_set(pc, i, dst[c], -1, src[0][c], src[1][c]); } break; case TGSI_OPCODE_SUB: @@ -1583,6 +1860,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_tex(pc, dst, mask, src[0], unit, inst->InstructionExtTexture.Texture, TRUE); break; + case TGSI_OPCODE_TRUNC: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, dst[c], src[0][c], -1, + CVTOP_TRUNC, CVT_F32_F32 | CVT_RI); + } + break; case TGSI_OPCODE_XPD: temp = temp_temp(pc); if (mask & (1 << 0)) { @@ -1607,17 +1892,22 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) return FALSE; } + if (brdc) { + if (sat) + emit_sat(pc, brdc, brdc); + for (c = 0; c < 4; c++) + if ((mask & (1 << c)) && dst[c] != brdc) + emit_mov(pc, dst[c], brdc); + } else if (sat) { for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_cvt(pc, rdst[c], dst[c], -1, CVTOP_SAT, - CVT_F32_F32); + /* in this case we saturate later */ + if (dst[c]->type == P_TEMP && dst[c]->index < 0) + continue; + emit_sat(pc, rdst[c], dst[c]); } - } else if (assimilate) { - for (c = 0; c < 4; c++) - if (rdst[c]) - assimilate_temp(pc, rdst[c], dst[c]); } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { @@ -1626,9 +1916,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) continue; if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD) FREE(src[i][c]); - else - if (src[i][c]->acc == pc->insn_cur) - release_hw(pc, src[i][c]); } } @@ -1636,180 +1923,271 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) return TRUE; } -/* Adjust a bitmask that indicates what components of a source are used, - * we use this in tx_prep so we only load interpolants that are needed. - */ -static void -insn_adjust_mask(const struct tgsi_full_instruction *insn, unsigned *mask) -{ - const struct tgsi_instruction_ext_texture *tex; - - switch (insn->Instruction.Opcode) { - case TGSI_OPCODE_DP3: - *mask = 0x7; - break; - case TGSI_OPCODE_DP4: - case TGSI_OPCODE_DPH: - *mask = 0xF; - break; - case TGSI_OPCODE_LIT: - *mask = 0xB; - break; - case TGSI_OPCODE_RCP: - case TGSI_OPCODE_RSQ: - *mask = 0x1; - break; - case TGSI_OPCODE_TEX: - case TGSI_OPCODE_TXP: - assert(insn->Instruction.Extended); - tex = &insn->InstructionExtTexture; - - *mask = 0x7; - if (tex->Texture == TGSI_TEXTURE_1D) - *mask = 0x1; - else - if (tex->Texture == TGSI_TEXTURE_2D) - *mask = 0x3; - - if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) - *mask |= 0x8; - break; - default: - break; - } -} - static void -prep_inspect_insn(struct nv50_pc *pc, const union tgsi_full_token *tok, - unsigned *r_usage[2]) +prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) { - const struct tgsi_full_instruction *insn; + struct nv50_reg *reg = NULL; const struct tgsi_full_src_register *src; const struct tgsi_dst_register *dst; + unsigned i, c, k, mask; - unsigned i, c, k, n, mask, *acc_p; - - insn = &tok->FullInstruction; dst = &insn->FullDstRegisters[0].DstRegister; mask = dst->WriteMask; - if (!r_usage[0]) - r_usage[0] = CALLOC(pc->temp_nr * 4, sizeof(unsigned)); - if (!r_usage[1]) - r_usage[1] = CALLOC(pc->attr_nr * 4, sizeof(unsigned)); + if (dst->File == TGSI_FILE_TEMPORARY) + reg = pc->temp; + else + if (dst->File == TGSI_FILE_OUTPUT) + reg = pc->result; - if (dst->File == TGSI_FILE_TEMPORARY) { + if (reg) { for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - r_usage[0][dst->Index * 4 + c] = pc->insn_nr; + reg[dst->Index * 4 + c].acc = pc->insn_nr; } } for (i = 0; i < insn->Instruction.NumSrcRegs; i++) { src = &insn->FullSrcRegisters[i]; - switch (src->SrcRegister.File) { - case TGSI_FILE_TEMPORARY: - acc_p = r_usage[0]; - break; - case TGSI_FILE_INPUT: - acc_p = r_usage[1]; - break; - default: + if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) + reg = pc->temp; + else + if (src->SrcRegister.File == TGSI_FILE_INPUT) + reg = pc->attr; + else continue; - } - insn_adjust_mask(insn, &mask); + mask = nv50_tgsi_src_mask(insn, i); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - k = tgsi_util_get_full_src_register_extswizzle(src, c); - switch (k) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: - n = src->SrcRegister.Index * 4 + k; - acc_p[n] = pc->insn_nr; - break; - default: - break; - } + + if (k > TGSI_EXTSWIZZLE_W) + continue; + + reg[src->SrcRegister.Index * 4 + k].acc = pc->insn_nr; } } } +/* Returns a bitmask indicating which dst components need to be + * written to temporaries first to avoid 'corrupting' sources. + * + * m[i] (out) indicate component to write in the i-th position + * rdep[c] (in) bitmasks of dst[i] that require dst[c] as source + */ +static unsigned +nv50_revdep_reorder(unsigned m[4], unsigned rdep[4]) +{ + unsigned i, c, x, unsafe; + + for (c = 0; c < 4; c++) + m[c] = c; + + /* Swap as long as a dst component written earlier is depended on + * by one written later, but the next one isn't depended on by it. + */ + for (c = 0; c < 3; c++) { + if (rdep[m[c + 1]] & (1 << m[c])) + continue; /* if next one is depended on by us */ + for (i = c + 1; i < 4; i++) + /* if we are depended on by a later one */ + if (rdep[m[c]] & (1 << m[i])) + break; + if (i == 4) + continue; + /* now, swap */ + x = m[c]; + m[c] = m[c + 1]; + m[c + 1] = x; + + /* restart */ + c = 0; + } + + /* mark dependencies that could not be resolved by reordering */ + for (i = 0; i < 3; ++i) + for (c = i + 1; c < 4; ++c) + if (rdep[m[i]] & (1 << m[c])) + unsafe |= (1 << i); + + /* NOTE: $unsafe is with respect to order, not component */ + return unsafe; +} + +/* Select a suitable dst register for broadcasting scalar results, + * or return NULL if we have to allocate an extra TEMP. + * + * If e.g. only 1 component is written, we may also emit the final + * result to a write-only register. + */ +static struct nv50_reg * +tgsi_broadcast_dst(struct nv50_pc *pc, + const struct tgsi_full_dst_register *fd, unsigned mask) +{ + if (fd->DstRegister.File == TGSI_FILE_TEMPORARY) { + int c = ffs(~mask & fd->DstRegister.WriteMask); + if (c) + return tgsi_dst(pc, c - 1, fd); + } else { + int c = ffs(fd->DstRegister.WriteMask) - 1; + if ((1 << c) == fd->DstRegister.WriteMask) + return tgsi_dst(pc, c, fd); + } + + return NULL; +} + +/* Scan source swizzles and return a bitmask indicating dst regs that + * also occur among the src regs, and fill rdep for nv50_revdep_reoder. + */ static unsigned -load_fp_attrib(struct nv50_pc *pc, int i, unsigned *acc, int *mid, - int *aid, int *p_oid) +nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, + unsigned rdep[4]) { - struct nv50_reg *iv; - int oid, c, n; - unsigned mask = 0; + const struct tgsi_full_dst_register *fd = &insn->FullDstRegisters[0]; + const struct tgsi_full_src_register *fs; + unsigned i, deqs = 0; - iv = (pc->interp_mode[i] & INTERP_CENTROID) ? pc->iv_c : pc->iv_p; + for (i = 0; i < 4; ++i) + rdep[i] = 0; - for (c = 0, n = i * 4; c < 4; c++, n++) { - oid = (*p_oid)++; - pc->attr[n].type = P_TEMP; - pc->attr[n].index = i; + for (i = 0; i < insn->Instruction.NumSrcRegs; i++) { + unsigned chn, mask = nv50_tgsi_src_mask(insn, i); + boolean neg_supp = negate_supported(insn, i); - if (pc->attr[n].acc == acc[n]) + fs = &insn->FullSrcRegisters[i]; + if (fs->SrcRegister.File != fd->DstRegister.File || + fs->SrcRegister.Index != fd->DstRegister.Index) continue; - mask |= (1 << c); - pc->attr[n].acc = acc[n]; - pc->attr[n].rhw = pc->attr[n].hw = -1; - alloc_reg(pc, &pc->attr[n]); + for (chn = 0; chn < 4; ++chn) { + unsigned s, c; + + if (!(mask & (1 << chn))) /* src is not read */ + continue; + c = tgsi_util_get_full_src_register_extswizzle(fs, chn); + s = tgsi_util_get_full_src_register_sign_mode(fs, chn); - pc->attr[n].rhw = (*aid)++; - emit_interp(pc, &pc->attr[n], iv, pc->interp_mode[i]); + if (c > TGSI_EXTSWIZZLE_W || + !(fd->DstRegister.WriteMask & (1 << c))) + continue; - pc->p->cfg.fp.map[(*mid) / 4] |= oid << (8 * ((*mid) % 4)); - (*mid)++; - pc->p->cfg.fp.regs[1] += 0x00010001; + /* no danger if src is copied to TEMP first */ + if ((s != TGSI_UTIL_SIGN_KEEP) && + (s != TGSI_UTIL_SIGN_TOGGLE || !neg_supp)) + continue; + + rdep[c] |= nv50_tgsi_dst_revdep( + insn->Instruction.Opcode, i, chn); + deqs |= (1 << c); + } } - return mask; + return deqs; } static boolean -nv50_program_tx_prep(struct nv50_pc *pc) +nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) { - struct tgsi_parse_context p; - boolean ret = FALSE; - unsigned i, c; - unsigned fcol, bcol, fcrd, depr; + struct tgsi_full_instruction insn = tok->FullInstruction; + const struct tgsi_full_dst_register *fd; + unsigned i, deqs, rdep[4], m[4]; + + fd = &tok->FullInstruction.FullDstRegisters[0]; + deqs = nv50_tgsi_scan_swizzle(&insn, rdep); + + if (is_scalar_op(insn.Instruction.Opcode)) { + pc->r_brdc = tgsi_broadcast_dst(pc, fd, deqs); + if (!pc->r_brdc) + pc->r_brdc = temp_temp(pc); + return nv50_program_tx_insn(pc, &insn); + } + pc->r_brdc = NULL; + + if (!deqs) + return nv50_program_tx_insn(pc, &insn); + + deqs = nv50_revdep_reorder(m, rdep); + + for (i = 0; i < 4; ++i) { + assert(pc->r_dst[m[i]] == NULL); + + insn.FullDstRegisters[0].DstRegister.WriteMask = + fd->DstRegister.WriteMask & (1 << m[i]); + + if (!insn.FullDstRegisters[0].DstRegister.WriteMask) + continue; + + if (deqs & (1 << i)) + pc->r_dst[m[i]] = alloc_temp(pc, NULL); + + if (!nv50_program_tx_insn(pc, &insn)) + return FALSE; + } - /* count (centroid) perspective interpolations */ - unsigned centroid_loads = 0; - unsigned perspect_loads = 0; + for (i = 0; i < 4; i++) { + struct nv50_reg *reg = pc->r_dst[i]; + if (!reg) + continue; + pc->r_dst[i] = NULL; + + if (insn.Instruction.Saturate == TGSI_SAT_ZERO_ONE) + emit_sat(pc, tgsi_dst(pc, i, fd), reg); + else + emit_mov(pc, tgsi_dst(pc, i, fd), reg); + free_temp(pc, reg); + } - /* track register access for temps and attrs */ - unsigned *r_usage[2]; - r_usage[0] = NULL; - r_usage[1] = NULL; + return TRUE; +} - depr = fcol = bcol = fcrd = 0xffff; +static void +load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg) +{ + struct nv50_reg *iv, **ppiv; + unsigned mode = pc->interp_mode[reg->index]; - if (pc->p->type == PIPE_SHADER_FRAGMENT) { - pc->p->cfg.fp.regs[0] = 0x01000404; - pc->p->cfg.fp.regs[1] = 0x00000400; + ppiv = (mode & INTERP_CENTROID) ? &pc->iv_c : &pc->iv_p; + iv = *ppiv; + + if ((mode & INTERP_PERSPECTIVE) && !iv) { + iv = *ppiv = alloc_temp(pc, NULL); + iv->rhw = popcnt4(pc->p->cfg.regs[1] >> 24) - 1; + + emit_interp(pc, iv, NULL, mode & INTERP_CENTROID); + emit_flop(pc, 0, iv, iv); + + /* XXX: when loading interpolants dynamically, move these + * to the program head, or make sure it can't be skipped. + */ } - tgsi_parse_init(&p, pc->p->pipe.tokens); - while (!tgsi_parse_end_of_tokens(&p)) { - const union tgsi_full_token *tok = &p.FullToken; + emit_interp(pc, reg, iv, mode); +} + +static boolean +nv50_program_tx_prep(struct nv50_pc *pc) +{ + struct tgsi_parse_context tp; + struct nv50_program *p = pc->p; + boolean ret = FALSE; + unsigned i, c, flat_nr = 0; + + tgsi_parse_init(&tp, pc->p->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&tp)) { + const union tgsi_full_token *tok = &tp.FullToken; - tgsi_parse_token(&p); + tgsi_parse_token(&tp); switch (tok->Token.Type) { case TGSI_TOKEN_TYPE_IMMEDIATE: { const struct tgsi_full_immediate *imm = - &p.FullToken.FullImmediate; + &tp.FullToken.FullImmediate; ctor_immd(pc, imm->u[0].Float, imm->u[1].Float, @@ -1820,78 +2198,61 @@ nv50_program_tx_prep(struct nv50_pc *pc) case TGSI_TOKEN_TYPE_DECLARATION: { const struct tgsi_full_declaration *d; - unsigned last, first, mode; + unsigned si, last, first, mode; - d = &p.FullToken.FullDeclaration; + d = &tp.FullToken.FullDeclaration; first = d->DeclarationRange.First; last = d->DeclarationRange.Last; switch (d->Declaration.File) { case TGSI_FILE_TEMPORARY: - if (pc->temp_nr < (last + 1)) - pc->temp_nr = last + 1; break; case TGSI_FILE_OUTPUT: - if (pc->result_nr < (last + 1)) - pc->result_nr = last + 1; - - if (!d->Declaration.Semantic) + if (!d->Declaration.Semantic || + p->type == PIPE_SHADER_FRAGMENT) break; + si = d->Semantic.SemanticIndex; switch (d->Semantic.SemanticName) { - case TGSI_SEMANTIC_POSITION: - depr = first; - pc->p->cfg.fp.regs[2] |= 0x00000100; - pc->p->cfg.fp.regs[3] |= 0x00000011; + case TGSI_SEMANTIC_BCOLOR: + p->cfg.two_side[si].hw = first; + if (p->cfg.io_nr > first) + p->cfg.io_nr = first; + break; + case TGSI_SEMANTIC_PSIZE: + p->cfg.psiz = first; + if (p->cfg.io_nr > first) + p->cfg.io_nr = first; break; + /* + case TGSI_SEMANTIC_CLIP_DISTANCE: + p->cfg.clpd = MIN2(p->cfg.clpd, first); + break; + */ default: break; } - break; case TGSI_FILE_INPUT: { - if (pc->attr_nr < (last + 1)) - pc->attr_nr = last + 1; - - if (pc->p->type != PIPE_SHADER_FRAGMENT) + if (p->type != PIPE_SHADER_FRAGMENT) break; switch (d->Declaration.Interpolate) { case TGSI_INTERPOLATE_CONSTANT: mode = INTERP_FLAT; + flat_nr++; break; case TGSI_INTERPOLATE_PERSPECTIVE: mode = INTERP_PERSPECTIVE; + p->cfg.regs[1] |= 0x08 << 24; break; default: mode = INTERP_LINEAR; break; } - - if (d->Declaration.Semantic) { - switch (d->Semantic.SemanticName) { - case TGSI_SEMANTIC_POSITION: - fcrd = first; - break; - case TGSI_SEMANTIC_COLOR: - fcol = first; - mode = INTERP_PERSPECTIVE; - break; - case TGSI_SEMANTIC_BCOLOR: - bcol = first; - mode = INTERP_PERSPECTIVE; - break; - } - } - - if (d->Declaration.Centroid) { + if (d->Declaration.Centroid) mode |= INTERP_CENTROID; - if (mode & INTERP_PERSPECTIVE) - centroid_loads++; - } else - if (mode & INTERP_PERSPECTIVE) - perspect_loads++; assert(last < 32); for (i = first; i <= last; i++) @@ -1899,8 +2260,6 @@ nv50_program_tx_prep(struct nv50_pc *pc) } break; case TGSI_FILE_CONSTANT: - if (pc->param_nr < (last + 1)) - pc->param_nr = last + 1; break; case TGSI_FILE_SAMPLER: break; @@ -1913,182 +2272,157 @@ nv50_program_tx_prep(struct nv50_pc *pc) break; case TGSI_TOKEN_TYPE_INSTRUCTION: pc->insn_nr++; - prep_inspect_insn(pc, tok, r_usage); + prep_inspect_insn(pc, &tok->FullInstruction); break; default: break; } } - if (pc->temp_nr) { - pc->temp = CALLOC(pc->temp_nr * 4, sizeof(struct nv50_reg)); - if (!pc->temp) - goto out_err; + if (p->type == PIPE_SHADER_VERTEX) { + int rid = 0; - for (i = 0; i < pc->temp_nr; i++) { - for (c = 0; c < 4; c++) { - pc->temp[i*4+c].type = P_TEMP; - pc->temp[i*4+c].hw = -1; - pc->temp[i*4+c].rhw = -1; - pc->temp[i*4+c].index = i; - pc->temp[i*4+c].acc = r_usage[0][i*4+c]; + for (i = 0; i < pc->attr_nr * 4; ++i) { + if (pc->attr[i].acc) { + pc->attr[i].hw = rid++; + p->cfg.attr[i / 32] |= 1 << (i % 32); } } - } - - if (pc->attr_nr) { - int oid = 4, mid = 4, aid = 0; - /* oid = VP output id - * aid = FP attribute/interpolant id - * mid = VP output mapping field ID - */ - pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg)); - if (!pc->attr) - goto out_err; - - if (pc->p->type == PIPE_SHADER_FRAGMENT) { - /* position should be loaded first */ - if (fcrd != 0xffff) { - unsigned mask; - mid = 0; - mask = load_fp_attrib(pc, fcrd, r_usage[1], - &mid, &aid, &oid); - oid = 0; - pc->p->cfg.fp.regs[1] |= (mask << 24); - pc->p->cfg.fp.map[0] = 0x04040404 * fcrd; - } - pc->p->cfg.fp.map[0] += 0x03020100; - - /* should do MAD fcrd.xy, fcrd, SOME_CONST, fcrd */ - - if (perspect_loads) { - pc->iv_p = alloc_temp(pc, NULL); - - if (!(pc->p->cfg.fp.regs[1] & 0x08000000)) { - pc->p->cfg.fp.regs[1] |= 0x08000000; - pc->iv_p->rhw = aid++; - emit_interp(pc, pc->iv_p, NULL, - INTERP_LINEAR); - emit_flop(pc, 0, pc->iv_p, pc->iv_p); - } else { - pc->iv_p->rhw = aid - 1; - emit_flop(pc, 0, pc->iv_p, - &pc->attr[fcrd * 4 + 3]); - } - } + for (i = 0, rid = 0; i < pc->result_nr; ++i) { + p->cfg.io[i].hw = rid; + p->cfg.io[i].id_vp = i; - if (centroid_loads) { - pc->iv_c = alloc_temp(pc, NULL); - pc->iv_c->rhw = pc->iv_p ? aid - 1 : aid++; - emit_interp(pc, pc->iv_c, NULL, - INTERP_CENTROID); - emit_flop(pc, 0, pc->iv_c, pc->iv_c); - pc->p->cfg.fp.regs[1] |= 0x08000000; + for (c = 0; c < 4; ++c) { + int n = i * 4 + c; + if (!pc->result[n].acc) + continue; + pc->result[n].hw = rid++; + p->cfg.io[i].mask |= 1 << c; } + } - for (c = 0; c < 4; c++) { - /* I don't know what these values do, but - * let's set them like the blob does: - */ - if (fcol != 0xffff && r_usage[1][fcol * 4 + c]) - pc->p->cfg.fp.regs[0] += 0x00010000; - if (bcol != 0xffff && r_usage[1][bcol * 4 + c]) - pc->p->cfg.fp.regs[0] += 0x00010000; - } + for (c = 0; c < 2; ++c) + if (p->cfg.two_side[c].hw < 0x40) + p->cfg.two_side[c] = p->cfg.io[ + p->cfg.two_side[c].hw]; - for (i = 0; i < pc->attr_nr; i++) - load_fp_attrib(pc, i, r_usage[1], - &mid, &aid, &oid); + if (p->cfg.psiz < 0x40) + p->cfg.psiz = p->cfg.io[p->cfg.psiz].hw; + } else + if (p->type == PIPE_SHADER_FRAGMENT) { + int rid, aid; + unsigned n = 0, m = pc->attr_nr - flat_nr; - if (pc->iv_p) - free_temp(pc, pc->iv_p); - if (pc->iv_c) - free_temp(pc, pc->iv_c); + int base = (TGSI_SEMANTIC_POSITION == + p->info.input_semantic_name[0]) ? 0 : 1; - pc->p->cfg.fp.high_map = (mid / 4); - pc->p->cfg.fp.high_map += ((mid % 4) ? 1 : 0); - } else { - /* vertex program */ - for (i = 0; i < pc->attr_nr * 4; i++) { - pc->p->cfg.vp.attr[aid / 32] |= - (1 << (aid % 32)); - pc->attr[i].type = P_ATTR; - pc->attr[i].hw = aid++; - pc->attr[i].index = i / 4; + /* non-flat interpolants have to be mapped to + * the lower hardware IDs, so sort them: + */ + for (i = 0; i < pc->attr_nr; i++) { + if (pc->interp_mode[i] == INTERP_FLAT) { + p->cfg.io[m].id_vp = i + base; + p->cfg.io[m++].id_fp = i; + } else { + if (!(pc->interp_mode[i] & INTERP_PERSPECTIVE)) + p->cfg.io[n].linear = TRUE; + p->cfg.io[n].id_vp = i + base; + p->cfg.io[n++].id_fp = i; } } - } - if (pc->result_nr) { - int rid = 0; + if (!base) /* set w-coordinate mask from perspective interp */ + p->cfg.io[0].mask |= p->cfg.regs[1] >> 24; - pc->result = CALLOC(pc->result_nr * 4, sizeof(struct nv50_reg)); - if (!pc->result) - goto out_err; + aid = popcnt4( /* if fcrd isn't contained in cfg.io */ + base ? (p->cfg.regs[1] >> 24) : p->cfg.io[0].mask); - for (i = 0; i < pc->result_nr; i++) { - for (c = 0; c < 4; c++) { - if (pc->p->type == PIPE_SHADER_FRAGMENT) { - pc->result[i*4+c].type = P_TEMP; - pc->result[i*4+c].hw = -1; - pc->result[i*4+c].rhw = (i == depr) ? - -1 : rid++; - } else { - pc->result[i*4+c].type = P_RESULT; - pc->result[i*4+c].hw = rid++; - } - pc->result[i*4+c].index = i; - } + for (n = 0; n < pc->attr_nr; ++n) { + p->cfg.io[n].hw = rid = aid; + i = p->cfg.io[n].id_fp; - if (pc->p->type == PIPE_SHADER_FRAGMENT && - depr != 0xffff) { - pc->result[depr * 4 + 2].rhw = - (pc->result_nr - 1) * 4; + for (c = 0; c < 4; ++c) { + if (!pc->attr[i * 4 + c].acc) + continue; + pc->attr[i * 4 + c].rhw = rid++; + p->cfg.io[n].mask |= 1 << c; + + load_interpolant(pc, &pc->attr[i * 4 + c]); } + aid += popcnt4(p->cfg.io[n].mask); } - } - if (pc->param_nr) { - int rid = 0; + if (!base) + p->cfg.regs[1] |= p->cfg.io[0].mask << 24; - pc->param = CALLOC(pc->param_nr * 4, sizeof(struct nv50_reg)); - if (!pc->param) - goto out_err; + m = popcnt4(p->cfg.regs[1] >> 24); + + /* set count of non-position inputs and of non-flat + * non-position inputs for FP_INTERPOLANT_CTRL + */ + p->cfg.regs[1] |= aid - m; + + if (flat_nr) { + i = p->cfg.io[pc->attr_nr - flat_nr].hw; + p->cfg.regs[1] |= (i - m) << 16; + } else + p->cfg.regs[1] |= p->cfg.regs[1] << 16; + + /* mark color semantic for light-twoside */ + n = 0x40; + for (i = 0; i < pc->attr_nr; i++) { + ubyte si, sn; - for (i = 0; i < pc->param_nr; i++) { - for (c = 0; c < 4; c++) { - pc->param[i*4+c].type = P_CONST; - pc->param[i*4+c].hw = rid++; - pc->param[i*4+c].index = i; + sn = p->info.input_semantic_name[p->cfg.io[i].id_fp]; + si = p->info.input_semantic_index[p->cfg.io[i].id_fp]; + + if (sn == TGSI_SEMANTIC_COLOR) { + p->cfg.two_side[si] = p->cfg.io[i]; + + /* increase colour count */ + p->cfg.regs[0] += popcnt4( + p->cfg.two_side[si].mask) << 16; + + n = MIN2(n, p->cfg.io[i].hw - m); } } + if (n < 0x40) + p->cfg.regs[0] += n; + + /* Initialize FP results: + * FragDepth is always first TGSI and last hw output + */ + i = p->info.writes_z ? 4 : 0; + for (rid = 0; i < pc->result_nr * 4; i++) + pc->result[i].rhw = rid++; + if (p->info.writes_z) + pc->result[2].rhw = rid; + + p->cfg.high_result = rid; } if (pc->immd_nr) { int rid = 0; - pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg)); + pc->immd = MALLOC(pc->immd_nr * 4 * sizeof(struct nv50_reg)); if (!pc->immd) goto out_err; for (i = 0; i < pc->immd_nr; i++) { - for (c = 0; c < 4; c++) { - pc->immd[i*4+c].type = P_IMMD; - pc->immd[i*4+c].hw = rid++; - pc->immd[i*4+c].index = i; - } + for (c = 0; c < 4; c++, rid++) + ctor_reg(&pc->immd[rid], P_IMMD, i, rid); } } ret = TRUE; out_err: - if (r_usage[0]) - FREE(r_usage[0]); - if (r_usage[1]) - FREE(r_usage[1]); + if (pc->iv_p) + free_temp(pc, pc->iv_p); + if (pc->iv_c) + free_temp(pc, pc->iv_c); - tgsi_parse_free(&p); + tgsi_parse_free(&tp); return ret; } @@ -2110,18 +2444,165 @@ free_nv50_pc(struct nv50_pc *pc) } static boolean +ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) +{ + int i, c; + unsigned rtype[2] = { P_ATTR, P_RESULT }; + + pc->p = p; + pc->temp_nr = p->info.file_max[TGSI_FILE_TEMPORARY] + 1; + pc->attr_nr = p->info.file_max[TGSI_FILE_INPUT] + 1; + pc->result_nr = p->info.file_max[TGSI_FILE_OUTPUT] + 1; + pc->param_nr = p->info.file_max[TGSI_FILE_CONSTANT] + 1; + + p->cfg.high_temp = 4; + + p->cfg.two_side[0].hw = 0x40; + p->cfg.two_side[1].hw = 0x40; + + switch (p->type) { + case PIPE_SHADER_VERTEX: + p->cfg.psiz = 0x40; + p->cfg.clpd = 0x40; + p->cfg.io_nr = pc->result_nr; + break; + case PIPE_SHADER_FRAGMENT: + rtype[0] = rtype[1] = P_TEMP; + + p->cfg.regs[0] = 0x01000004; + p->cfg.io_nr = pc->attr_nr; + + if (p->info.writes_z) { + p->cfg.regs[2] |= 0x00000100; + p->cfg.regs[3] |= 0x00000011; + } + if (p->info.uses_kill) + p->cfg.regs[2] |= 0x00100000; + break; + } + + if (pc->temp_nr) { + pc->temp = MALLOC(pc->temp_nr * 4 * sizeof(struct nv50_reg)); + if (!pc->temp) + return FALSE; + + for (i = 0; i < pc->temp_nr * 4; ++i) + ctor_reg(&pc->temp[i], P_TEMP, i / 4, -1); + } + + if (pc->attr_nr) { + pc->attr = MALLOC(pc->attr_nr * 4 * sizeof(struct nv50_reg)); + if (!pc->attr) + return FALSE; + + for (i = 0; i < pc->attr_nr * 4; ++i) + ctor_reg(&pc->attr[i], rtype[0], i / 4, -1); + } + + if (pc->result_nr) { + unsigned nr = pc->result_nr * 4; + + pc->result = MALLOC(nr * sizeof(struct nv50_reg)); + if (!pc->result) + return FALSE; + + for (i = 0; i < nr; ++i) + ctor_reg(&pc->result[i], rtype[1], i / 4, -1); + } + + if (pc->param_nr) { + int rid = 0; + + pc->param = MALLOC(pc->param_nr * 4 * sizeof(struct nv50_reg)); + if (!pc->param) + return FALSE; + + for (i = 0; i < pc->param_nr; ++i) + for (c = 0; c < 4; ++c, ++rid) + ctor_reg(&pc->param[rid], P_CONST, i, rid); + } + + return TRUE; +} + +static void +nv50_fp_move_results(struct nv50_pc *pc) +{ + struct nv50_reg reg; + unsigned i; + + ctor_reg(®, P_TEMP, -1, -1); + + for (i = 0; i < pc->result_nr * 4; ++i) { + if (pc->result[i].rhw < 0 || pc->result[i].hw < 0) + continue; + if (pc->result[i].rhw != pc->result[i].hw) { + reg.hw = pc->result[i].rhw; + emit_mov(pc, ®, &pc->result[i]); + } + } +} + +static void +nv50_program_fixup_insns(struct nv50_pc *pc) +{ + struct nv50_program_exec *e, *prev = NULL, **bra_list; + unsigned i, n, pos; + + bra_list = CALLOC(pc->p->exec_size, sizeof(struct nv50_program_exec *)); + + /* Collect branch instructions, we need to adjust their offsets + * when converting 32 bit instructions to 64 bit ones + */ + for (n = 0, e = pc->p->exec_head; e; e = e->next) + if (e->param.index >= 0 && !e->param.mask) + bra_list[n++] = e; + + /* Make sure we don't have any single 32 bit instructions. */ + for (e = pc->p->exec_head, pos = 0; e; e = e->next) { + pos += is_long(e) ? 2 : 1; + + if ((pos & 1) && (!e->next || is_long(e->next))) { + for (i = 0; i < n; ++i) + if (bra_list[i]->param.index >= pos) + bra_list[i]->param.index += 1; + convert_to_long(pc, e); + ++pos; + } + if (e->next) + prev = e; + } + + assert(!is_immd(pc->p->exec_head)); + assert(!is_immd(pc->p->exec_tail)); + + /* last instruction must be long so it can have the end bit set */ + if (!is_long(pc->p->exec_tail)) { + convert_to_long(pc, pc->p->exec_tail); + if (prev) + convert_to_long(pc, prev); + } + assert(!(pc->p->exec_tail->inst[1] & 2)); + /* set the end-bit */ + pc->p->exec_tail->inst[1] |= 1; + + FREE(bra_list); +} + +static boolean nv50_program_tx(struct nv50_program *p) { struct tgsi_parse_context parse; struct nv50_pc *pc; - unsigned k; boolean ret; pc = CALLOC_STRUCT(nv50_pc); if (!pc) return FALSE; - pc->p = p; - pc->p->cfg.high_temp = 4; + + ret = ctor_nv50_pc(pc, p); + if (ret == FALSE) + goto out_cleanup; ret = nv50_program_tx_prep(pc); if (ret == FALSE) @@ -2141,7 +2622,7 @@ nv50_program_tx(struct nv50_program *p) switch (tok->Token.Type) { case TGSI_TOKEN_TYPE_INSTRUCTION: ++pc->insn_cur; - ret = nv50_program_tx_insn(pc, tok); + ret = nv50_tgsi_insn(pc, tok); if (ret == FALSE) goto out_err; break; @@ -2150,48 +2631,10 @@ nv50_program_tx(struct nv50_program *p) } } - if (p->type == PIPE_SHADER_FRAGMENT) { - struct nv50_reg out; - - out.type = P_TEMP; - for (k = 0; k < pc->result_nr * 4; k++) { - if (pc->result[k].rhw == -1) - continue; - if (pc->result[k].hw != pc->result[k].rhw) { - out.hw = pc->result[k].rhw; - emit_mov(pc, &out, &pc->result[k]); - } - if (pc->p->cfg.high_result < (pc->result[k].rhw + 1)) - pc->p->cfg.high_result = pc->result[k].rhw + 1; - } - } - - /* look for single half instructions and make them long */ - struct nv50_program_exec *e, *e_prev; + if (pc->p->type == PIPE_SHADER_FRAGMENT) + nv50_fp_move_results(pc); - for (k = 0, e = pc->p->exec_head, e_prev = NULL; e; e = e->next) { - if (!is_long(e)) - k++; - - if (!e->next || is_long(e->next)) { - if (k & 1) - convert_to_long(pc, e); - k = 0; - } - - if (e->next) - e_prev = e; - } - - if (!is_long(pc->p->exec_tail)) { - /* this may occur if moving FP results */ - assert(e_prev && !is_long(e_prev)); - convert_to_long(pc, e_prev); - convert_to_long(pc, pc->p->exec_tail); - } - - assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head)); - pc->p->exec_tail->inst[1] |= 0x00000001; + nv50_program_fixup_insns(pc); p->param_nr = pc->param_nr * 4; p->immd_nr = pc->immd_nr * 4; @@ -2258,30 +2701,19 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) p->immd_nr, NV50_CB_PMISC); } - if (!p->data[1] && p->param_nr) { - struct nouveau_resource *heap = - nv50->screen->parm_heap[p->type]; - - if (nouveau_resource_alloc(heap, p->param_nr, p, &p->data[1])) { - while (heap->next && heap->size < p->param_nr) { - struct nv50_program *evict = heap->next->priv; - nouveau_resource_free(&evict->data[1]); - } - - if (nouveau_resource_alloc(heap, p->param_nr, p, - &p->data[1])) - assert(0); - } - } + assert(p->param_nr <= 128); if (p->param_nr) { - unsigned cbuf = NV50_CB_PVP; + unsigned cb; float *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type], PIPE_BUFFER_USAGE_CPU_READ); - if (p->type == PIPE_SHADER_FRAGMENT) - cbuf = NV50_CB_PFP; - nv50_program_upload_data(nv50, map, p->data[1]->start, - p->param_nr, cbuf); + + if (p->type == PIPE_SHADER_VERTEX) + cb = NV50_CB_PVP; + else + cb = NV50_CB_PFP; + + nv50_program_upload_data(nv50, map, 0, p->param_nr, cb); pipe_buffer_unmap(pscreen, nv50->constbuf[p->type]); } } @@ -2303,32 +2735,41 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) upload = TRUE; } - if ((p->data[0] && p->data[0]->start != p->data_start[0]) || - (p->data[1] && p->data[1]->start != p->data_start[1])) { - for (e = p->exec_head; e; e = e->next) { - unsigned ei, ci, bs; + if (p->data[0] && p->data[0]->start != p->data_start[0]) + upload = TRUE; - if (e->param.index < 0) - continue; - bs = (e->inst[1] >> 22) & 0x07; - assert(bs < 2); - ei = e->param.shift >> 5; - ci = e->param.index + p->data[bs]->start; + if (!upload) + return; + + for (e = p->exec_head; e; e = e->next) { + unsigned ei, ci, bs; + + if (e->param.index < 0) + continue; + + if (e->param.mask == 0) { + assert(!(e->param.index & 1)); + /* seem to be 8 byte steps */ + ei = (e->param.index >> 1) + 0 /* START_ID */; - e->inst[ei] &= ~e->param.mask; - e->inst[ei] |= (ci << e->param.shift); + e->inst[0] &= 0xf0000fff; + e->inst[0] |= ei << 12; + continue; } - if (p->data[0]) - p->data_start[0] = p->data[0]->start; - if (p->data[1]) - p->data_start[1] = p->data[1]->start; + bs = (e->inst[1] >> 22) & 0x07; + assert(bs < 2); + ei = e->param.shift >> 5; + ci = e->param.index; + if (bs == 0) + ci += p->data[bs]->start; - upload = TRUE; + e->inst[ei] &= ~e->param.mask; + e->inst[ei] |= (ci << e->param.shift); } - if (!upload) - return; + if (p->data[0]) + p->data_start[0] = p->data[0]->start; #ifdef NV50_PROGRAM_DUMP NOUVEAU_ERR("-------\n"); @@ -2402,8 +2843,8 @@ nv50_vertprog_validate(struct nv50_context *nv50) so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2); - so_data (so, p->cfg.vp.attr[0]); - so_data (so, p->cfg.vp.attr[1]); + so_data (so, p->cfg.attr[0]); + so_data (so, p->cfg.attr[1]); so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1); so_data (so, p->cfg.high_result); so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 2); @@ -2421,7 +2862,6 @@ nv50_fragprog_validate(struct nv50_context *nv50) struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_program *p = nv50->fragprog; struct nouveau_stateobj *so; - unsigned i; if (!p->translated) { nv50_program_validate(nv50, p); @@ -2438,29 +2878,186 @@ nv50_fragprog_validate(struct nv50_context *nv50) NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4); - so_data (so, p->cfg.fp.regs[0]); /* 0x01000404 / 0x00040404 */ - so_data (so, 0x00000004); - so_data (so, 0x00000000); - so_data (so, 0x00000000); - so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), p->cfg.fp.high_map); - for (i = 0; i < p->cfg.fp.high_map; i++) - so_data(so, p->cfg.fp.map[i]); - so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 2); - so_data (so, p->cfg.fp.regs[1]); /* 0x08040404 / 0x0f000401 */ + so_method(so, tesla, NV50TCL_FP_REG_ALLOC_TEMP, 1); so_data (so, p->cfg.high_temp); so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1); so_data (so, p->cfg.high_result); so_method(so, tesla, NV50TCL_FP_CTRL_UNK19A8, 1); - so_data (so, p->cfg.fp.regs[2]); + so_data (so, p->cfg.regs[2]); so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1); - so_data (so, p->cfg.fp.regs[3]); + so_data (so, p->cfg.regs[3]); so_method(so, tesla, NV50TCL_FP_START_ID, 1); so_data (so, 0); /* program start offset */ so_ref(so, &nv50->state.fragprog); so_ref(NULL, &so); } +static void +nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) +{ + struct nv50_program *fp = nv50->fragprog; + struct nv50_program *vp = nv50->vertprog; + unsigned i, c, m = base; + + /* XXX: This can't work correctly in all cases yet, we either + * have to create TGSI_SEMANTIC_PNTC or sprite_coord_mode has + * to be per FP input instead of per VP output + */ + memset(pntc, 0, 8 * sizeof(uint32_t)); + + for (i = 0; i < fp->cfg.io_nr; i++) { + uint8_t sn, si; + uint8_t j = fp->cfg.io[i].id_vp, k = fp->cfg.io[i].id_fp; + unsigned n = popcnt4(fp->cfg.io[i].mask); + + if (fp->info.input_semantic_name[k] != TGSI_SEMANTIC_GENERIC) { + m += n; + continue; + } + + sn = vp->info.input_semantic_name[j]; + si = vp->info.input_semantic_index[j]; + + if (j < fp->cfg.io_nr && sn == TGSI_SEMANTIC_GENERIC) { + ubyte mode = + nv50->rasterizer->pipe.sprite_coord_mode[si]; + + if (mode == PIPE_SPRITE_COORD_NONE) { + m += n; + continue; + } + } + + /* this is either PointCoord or replaced by sprite coords */ + for (c = 0; c < 4; c++) { + if (!(fp->cfg.io[i].mask & (1 << c))) + continue; + pntc[m / 8] |= (c + 1) << ((m % 8) * 4); + ++m; + } + } +} + +static int +nv50_sreg4_map(uint32_t *p_map, int mid, uint32_t lin[4], + struct nv50_sreg4 *fpi, struct nv50_sreg4 *vpo) +{ + int c; + uint8_t mv = vpo->mask, mf = fpi->mask, oid = vpo->hw; + uint8_t *map = (uint8_t *)p_map; + + for (c = 0; c < 4; ++c) { + if (mf & 1) { + if (fpi->linear == TRUE) + lin[mid / 32] |= 1 << (mid % 32); + map[mid++] = (mv & 1) ? oid : ((c == 3) ? 0x41 : 0x40); + } + + oid += mv & 1; + mf >>= 1; + mv >>= 1; + } + + return mid; +} + +void +nv50_linkage_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_program *vp = nv50->vertprog; + struct nv50_program *fp = nv50->fragprog; + struct nouveau_stateobj *so; + struct nv50_sreg4 dummy, *vpo; + int i, n, c, m = 0; + uint32_t map[16], lin[4], reg[5], pcrd[8]; + + memset(map, 0, sizeof(map)); + memset(lin, 0, sizeof(lin)); + + reg[1] = 0x00000004; /* low and high clip distance map ids */ + reg[2] = 0x00000000; /* layer index map id (disabled, GP only) */ + reg[3] = 0x00000000; /* point size map id & enable */ + reg[0] = fp->cfg.regs[0]; /* colour semantic reg */ + reg[4] = fp->cfg.regs[1]; /* interpolant info */ + + dummy.linear = FALSE; + dummy.mask = 0xf; /* map all components of HPOS */ + m = nv50_sreg4_map(map, m, lin, &dummy, &vp->cfg.io[0]); + + dummy.mask = 0x0; + + if (vp->cfg.clpd < 0x40) { + for (c = 0; c < vp->cfg.clpd_nr; ++c) + map[m++] = vp->cfg.clpd + c; + reg[1] = (m << 8); + } + + reg[0] |= m << 8; /* adjust BFC0 id */ + + /* if light_twoside is active, it seems FFC0_ID == BFC0_ID is bad */ + if (nv50->rasterizer->pipe.light_twoside) { + vpo = &vp->cfg.two_side[0]; + + m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[0], &vpo[0]); + m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[1], &vpo[1]); + } + + reg[0] += m - 4; /* adjust FFC0 id */ + reg[4] |= m << 8; /* set mid where 'normal' FP inputs start */ + + i = 0; + if (fp->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) + i = 1; + for (; i < fp->cfg.io_nr; i++) { + ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id_fp]; + ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id_fp]; + + n = fp->cfg.io[i].id_vp; + if (n >= vp->cfg.io_nr || + vp->info.output_semantic_name[n] != sn || + vp->info.output_semantic_index[n] != si) + vpo = &dummy; + else + vpo = &vp->cfg.io[n]; + + m = nv50_sreg4_map(map, m, lin, &fp->cfg.io[i], vpo); + } + + if (nv50->rasterizer->pipe.point_size_per_vertex) { + map[m / 4] |= vp->cfg.psiz << ((m % 4) * 8); + reg[3] = (m++ << 4) | 1; + } + + /* now fill the stateobj */ + so = so_new(64, 0); + + n = (m + 3) / 4; + so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); + so_data (so, m); + so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n); + so_datap (so, map, n); + + so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4); + so_datap (so, reg, 4); + + so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1); + so_data (so, reg[4]); + + so_method(so, tesla, 0x1540, 4); + so_datap (so, lin, 4); + + if (nv50->rasterizer->pipe.point_sprite) { + nv50_pntc_replace(nv50, pcrd, (reg[4] >> 8) & 0xff); + + so_method(so, tesla, NV50TCL_POINT_COORD_REPLACE_MAP(0), 8); + so_datap (so, pcrd, 8); + } + + so_ref(so, &nv50->state.programs); + so_ref(NULL, &so); +} + void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) { @@ -2476,7 +3073,6 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) nouveau_bo_ref(NULL, &p->bo); nouveau_resource_free(&p->data[0]); - nouveau_resource_free(&p->data[1]); p->translated = 0; } diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 096e0476aab..d78dee083f1 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -15,6 +15,15 @@ struct nv50_program_exec { } param; }; +struct nv50_sreg4 { + uint8_t hw; + uint8_t id_vp; + uint8_t id_fp; + + uint8_t mask; + boolean linear; +}; + struct nv50_program { struct pipe_shader_state pipe; struct tgsi_shader_info info; @@ -24,8 +33,8 @@ struct nv50_program { struct nv50_program_exec *exec_head; struct nv50_program_exec *exec_tail; unsigned exec_size; - struct nouveau_resource *data[2]; - unsigned data_start[2]; + struct nouveau_resource *data[1]; + unsigned data_start[1]; struct nouveau_bo *bo; @@ -36,14 +45,20 @@ struct nv50_program { struct { unsigned high_temp; unsigned high_result; - struct { - unsigned attr[2]; - } vp; - struct { - unsigned regs[4]; - unsigned map[5]; - unsigned high_map; - } fp; + + uint32_t attr[2]; + uint32_t regs[4]; + + /* for VPs, io_nr doesn't count 'private' results (PSIZ etc.) */ + unsigned io_nr; + struct nv50_sreg4 io[PIPE_MAX_SHADER_OUTPUTS]; + + /* FP colour inputs, VP/GP back colour outputs */ + struct nv50_sreg4 two_side[2]; + + /* VP only */ + uint8_t clpd, clpd_nr; + uint8_t psiz; } cfg; }; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index c7f80a22037..3b08e1b89fb 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -87,12 +87,10 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param) return 1; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 1; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: - return 0; + return 1; case PIPE_CAP_MAX_RENDER_TARGETS: return 8; case PIPE_CAP_OCCLUSION_QUERY: diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 4283808ed93..81fa3e34c59 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -276,6 +276,9 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, so_method(so, tesla, 0x1684, 1); so_data (so, cso->flatshade_first ? 0 : 1); + so_method(so, tesla, NV50TCL_VERTEX_TWO_SIDE_ENABLE, 1); + so_data (so, cso->light_twoside); + so_method(so, tesla, NV50TCL_LINE_WIDTH, 1); so_data (so, fui(cso->line_width)); so_method(so, tesla, NV50TCL_LINE_SMOOTH_ENABLE, 1); @@ -294,6 +297,9 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, so_method(so, tesla, NV50TCL_POINT_SIZE, 1); so_data (so, fui(cso->point_size)); + so_method(so, tesla, NV50TCL_POINT_SPRITE_ENABLE, 1); + so_data (so, cso->point_sprite); + so_method(so, tesla, NV50TCL_POLYGON_MODE_FRONT, 3); if (cso->front_winding == PIPE_WINDING_CCW) { so_data(so, nvgl_polygon_mode(cso->fill_ccw)); diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index d294356f75d..4ed76973c4b 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -189,6 +189,8 @@ nv50_state_emit(struct nv50_context *nv50) so_emit(chan, nv50->state.vertprog); if (nv50->state.dirty & NV50_NEW_FRAGPROG) so_emit(chan, nv50->state.fragprog); + if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG)) + so_emit(chan, nv50->state.programs); if (nv50->state.dirty & NV50_NEW_RASTERIZER) so_emit(chan, nv50->state.rast); if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR) @@ -210,6 +212,12 @@ nv50_state_emit(struct nv50_context *nv50) so_emit(chan, nv50->state.vtxattr); } nv50->state.dirty = 0; +} + +void +nv50_state_flush_notify(struct nouveau_channel *chan) +{ + struct nv50_context *nv50 = chan->user_private; so_emit_reloc_markers(chan, nv50->state.fb); so_emit_reloc_markers(chan, nv50->state.vertprog); @@ -240,6 +248,9 @@ nv50_state_validate(struct nv50_context *nv50) if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB)) nv50_fragprog_validate(nv50); + if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG)) + nv50_linkage_validate(nv50); + if (nv50->dirty & NV50_NEW_RASTERIZER) so_ref(nv50->rasterizer->so, &nv50->state.rast); @@ -301,7 +312,7 @@ scissor_uptodate: goto viewport_uptodate; nv50->state.viewport_bypass = bypass; - so = so_new(12, 0); + so = so_new(14, 0); if (!bypass) { so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE(0), 3); so_data (so, fui(nv50->viewport.translate[0])); @@ -314,12 +325,21 @@ scissor_uptodate: so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1); so_data (so, 1); + /* 0x0000 = remove whole primitive only (xyz) + * 0x1018 = remove whole primitive only (xy), clamp z + * 0x1080 = clip primitive (xyz) + * 0x1098 = clip primitive (xy), clamp z + */ + so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1); + so_data (so, 0x1080); /* no idea what 0f90 does */ so_method(so, tesla, 0x0f90, 1); so_data (so, 0); } else { so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1); so_data (so, 0); + so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1); + so_data (so, 0x0000); so_method(so, tesla, 0x0f90, 1); so_data (so, 1); } diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index e9c3562194b..bb7731855cd 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -89,14 +89,14 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, if (src_bo->tile_flags) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN, 1); - OUT_RING (chan, (sy << 16) | sx); + OUT_RING (chan, (sy << 16) | (sx * cpp)); } else { src_offset += (line_count * src_pitch); } if (dst_bo->tile_flags) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT, 1); - OUT_RING (chan, (dy << 16) | dx); + OUT_RING (chan, (dy << 16) | (dx * cpp)); } else { dst_offset += (line_count * dst_pitch); } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 6c5914baa35..52b1c9a6b26 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -184,8 +184,15 @@ struct r300_texture { /* Offsets into the buffer. */ unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; - /* Stride (pitch?) of this texture in bytes */ - unsigned stride; + /** + * If non-zero, override the natural texture layout with + * a custom stride (in bytes). + * + * \note Mipmapping fails for textures with a non-natural layout! + * + * \sa r300_texture_get_stride + */ + unsigned stride_override; /* Total size of this texture, in bytes. */ unsigned size; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 1bc35c24867..a1b36ba2ed1 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -283,7 +283,7 @@ void r300_emit_fb_state(struct r300_context* r300, for (i = 0; i < fb->nr_cbufs; i++) { tex = (struct r300_texture*)fb->cbufs[i]->texture; assert(tex && tex->buffer && "cbuf is marked, but NULL!"); - pixpitch = tex->stride / tex->tex.block.size; + pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size; OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); @@ -300,7 +300,7 @@ void r300_emit_fb_state(struct r300_context* r300, if (fb->zsbuf) { tex = (struct r300_texture*)fb->zsbuf->texture; assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); - pixpitch = tex->stride / tex->tex.block.size; + pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size; OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index d05a736dd96..737396d8d97 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -31,6 +31,7 @@ #include "r300_state_derived.h" /* r300_render: Vertex and index buffer primitive emission. */ +#define R300_MAX_VBO_SIZE (1024 * 1024) struct r300_render { /* Parent class */ @@ -46,7 +47,10 @@ struct r300_render { /* VBO */ struct pipe_buffer* vbo; - size_t vbo_alloc_size; + size_t vbo_size; + size_t vbo_offset; + size_t vbo_max_used; + void * vbo_ptr; }; static INLINE struct r300_render* @@ -75,19 +79,18 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render, struct pipe_screen* screen = r300->context.screen; size_t size = (size_t)vertex_size * (size_t)count; - if (r300render->vbo && (size > r300render->vbo_alloc_size)) { - pipe_buffer_reference(&r300render->vbo, NULL); - } - - if (!r300render->vbo) { + if (size + r300render->vbo_offset > r300render->vbo_size) + { r300render->vbo = pipe_buffer_create(screen, 64, PIPE_BUFFER_USAGE_VERTEX, - size); + R300_MAX_VBO_SIZE); + r300render->vbo_size = R300_MAX_VBO_SIZE; } - r300render->vbo_alloc_size = MAX2(size, r300render->vbo_alloc_size); r300render->vertex_size = vertex_size; + r300->vbo = r300render->vbo; + r300->vbo_offset = r300render->vbo_offset; return (r300render->vbo) ? TRUE : FALSE; } @@ -97,8 +100,10 @@ static void* r300_render_map_vertices(struct vbuf_render* render) struct r300_render* r300render = r300_render(render); struct pipe_screen* screen = r300render->r300->context.screen; - return (unsigned char*)pipe_buffer_map(screen, r300render->vbo, - PIPE_BUFFER_USAGE_CPU_WRITE); + r300render->vbo_ptr = pipe_buffer_map(screen, r300render->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); + + return (r300render->vbo_ptr + r300render->vbo_offset); } static void r300_render_unmap_vertices(struct vbuf_render* render, @@ -107,15 +112,24 @@ static void r300_render_unmap_vertices(struct vbuf_render* render, { struct r300_render* r300render = r300_render(render); struct pipe_screen* screen = r300render->r300->context.screen; + CS_LOCALS(r300render->r300); + BEGIN_CS(2); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, max); + END_CS; + r300render->vbo_max_used = MAX2(r300render->vbo_max_used, + r300render->vertex_size * (max + 1)); pipe_buffer_unmap(screen, r300render->vbo); } static void r300_render_release_vertices(struct vbuf_render* render) { struct r300_render* r300render = r300_render(render); + struct r300_context* r300 = r300render->r300; - pipe_buffer_reference(&r300render->vbo, NULL); + r300render->vbo_offset += r300render->vbo_max_used; + r300render->vbo_max_used = 0; + r300->vbo = NULL; } static boolean r300_render_set_primitive(struct vbuf_render* render, @@ -163,14 +177,12 @@ static boolean r300_render_set_primitive(struct vbuf_render* render, return TRUE; } -static void prepare_render(struct r300_render* render, unsigned count) +static void r300_prepare_render(struct r300_render* render, unsigned count) { struct r300_context* r300 = render->r300; CS_LOCALS(r300); - r300->vbo = render->vbo; - r300_emit_dirty_state(r300); } @@ -183,7 +195,7 @@ static void r300_render_draw_arrays(struct vbuf_render* render, CS_LOCALS(r300); - prepare_render(r300render, count); + r300_prepare_render(r300render, count); DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count); @@ -208,7 +220,7 @@ static void r300_render_draw(struct vbuf_render* render, CS_LOCALS(r300); - prepare_render(r300render, count); + r300_prepare_render(r300render, count); /* Send our indices into an index buffer. */ index_buffer = pipe_buffer_create(screen, 64, PIPE_BUFFER_USAGE_VERTEX, @@ -217,25 +229,7 @@ static void r300_render_draw(struct vbuf_render* render, return; } -/* - index_map = pipe_buffer_map(screen, index_buffer, - PIPE_BUFFER_USAGE_CPU_WRITE); - memcpy(index_map, indices, count); - pipe_buffer_unmap(screen, index_buffer); - - debug_printf("r300: Doing indexbuf render, count %d\n", count); - - BEGIN_CS(8); - OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); - OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | - r300render->hwprim); - OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2); - OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2)); - OUT_CS_INDEX_RELOC(index_buffer, 0, count, RADEON_GEM_DOMAIN_GTT, 0, 0); - END_CS; */ - - BEGIN_CS(4 + (count+1)/2); - OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count); + BEGIN_CS(2 + (count+1)/2); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300render->hwprim); @@ -273,6 +267,10 @@ static struct vbuf_render* r300_render_create(struct r300_context* r300) r300render->base.release_vertices = r300_render_release_vertices; r300render->base.destroy = r300_render_destroy; + r300render->vbo = NULL; + r300render->vbo_size = 0; + r300render->vbo_offset = 0; + return &r300render->base; } diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 15740f61252..3b5b1bbd37f 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -93,8 +93,6 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) } else { return 0; } - case PIPE_CAP_S3TC: - return 1; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: @@ -323,13 +321,14 @@ r300_get_tex_transfer(struct pipe_screen *screen, if (trans) { pipe_texture_reference(&trans->transfer.texture, texture); trans->transfer.format = texture->format; + trans->transfer.x = x; + trans->transfer.y = y; trans->transfer.width = w; trans->transfer.height = h; trans->transfer.block = texture->block; trans->transfer.nblocksx = texture->nblocksx[level]; trans->transfer.nblocksy = texture->nblocksy[level]; - trans->transfer.stride = align(pf_get_stride(&trans->transfer.block, - texture->width[level]), 32); + trans->transfer.stride = r300_texture_get_stride(tex, level); trans->transfer.usage = usage; trans->offset = offset; } @@ -356,7 +355,7 @@ static void* r300_transfer_map(struct pipe_screen* screen, if (transfer->usage != PIPE_TRANSFER_READ) { flags |= PIPE_BUFFER_USAGE_CPU_WRITE; } - + map = pipe_buffer_map(screen, tex->buffer, flags); if (!map) { diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c index 96e6e4a77d4..cc6288cb519 100644 --- a/src/gallium/drivers/r300/r300_surface.c +++ b/src/gallium/drivers/r300/r300_surface.c @@ -29,7 +29,7 @@ static void r300_surface_setup(struct r300_context* r300, unsigned w, unsigned h) { struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; - unsigned pixpitch = dest->stride / dest->tex.block.size; + unsigned pixpitch = r300_texture_get_stride(dest, 0) / dest->tex.block.size; CS_LOCALS(r300); r300_emit_blend_state(r300, &blend_clear_state); @@ -100,7 +100,7 @@ static void r300_surface_fill(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); struct r300_capabilities* caps = r300_screen(pipe->screen)->caps; struct r300_texture* tex = (struct r300_texture*)dest->texture; - unsigned pixpitch = tex->stride / tex->tex.block.size; + unsigned pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size; boolean invalid = FALSE; CS_LOCALS(r300); @@ -233,7 +233,7 @@ static void r300_surface_copy(struct pipe_context* pipe, struct r300_capabilities* caps = r300_screen(pipe->screen)->caps; struct r300_texture* srctex = (struct r300_texture*)src->texture; struct r300_texture* desttex = (struct r300_texture*)dest->texture; - unsigned pixpitch = srctex->stride / srctex->tex.block.size; + unsigned pixpitch = r300_texture_get_stride(srctex, 0) / srctex->tex.block.size; boolean invalid = FALSE; float fsrcx = srcx, fsrcy = srcy, fdestx = destx, fdesty = desty; CS_LOCALS(r300); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 590052509cc..7c041d17f71 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -25,7 +25,6 @@ static void r300_setup_texture_state(struct r300_texture* tex, unsigned width, unsigned height, - unsigned pitch, unsigned levels) { struct r300_texture_state* state = &tex->state; @@ -38,7 +37,7 @@ static void r300_setup_texture_state(struct r300_texture* tex, /* XXX */ state->format1 = r300_translate_texformat(tex->tex.format); - state->format2 = pitch - 1; + state->format2 = r300_texture_get_stride(tex, 0); /* Assume (somewhat foolishly) that oversized textures will * not be permitted by the state tracker. */ @@ -49,14 +48,31 @@ static void r300_setup_texture_state(struct r300_texture* tex, state->format2 |= R500_TXHEIGHT_BIT11; } - debug_printf("r300: Set texture state (%dx%d, pitch %d, %d levels)\n", - width, height, pitch, levels); + debug_printf("r300: Set texture state (%dx%d, %d levels)\n", + width, height, levels); +} + +/** + * Return the stride, in bytes, of the texture images of the given texture + * at the given level. + */ +unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level) +{ + if (tex->stride_override) + return tex->stride_override; + + if (level > tex->tex.last_level) { + debug_printf("%s: level (%u) > last_level (%u)\n", __FUNCTION__, level, tex->tex.last_level); + return 0; + } + + return align(pf_get_stride(&tex->tex.block, tex->tex.width[level]), 32); } static void r300_setup_miptree(struct r300_texture* tex) { struct pipe_texture* base = &tex->tex; - int stride, size, offset; + int stride, size; int i; for (i = 0; i <= base->last_level; i++) { @@ -74,7 +90,7 @@ static void r300_setup_miptree(struct r300_texture* tex) * XXX * POT, uncompressed, unmippmapped textures can be aligned to 32, * instead of 64. */ - stride = align(pf_get_stride(&base->block, base->width[i]), 32); + stride = r300_texture_get_stride(tex, i); size = stride * base->nblocksy[i] * base->depth[i]; tex->offset[i] = align(tex->size, 32); @@ -84,10 +100,6 @@ static void r300_setup_miptree(struct r300_texture* tex) "(%dx%dx%d px, pitch %d bytes)\n", i, base->width[i], base->height[i], base->depth[i], stride); - /* Save stride of first level to the texture. */ - if (i == 0) { - tex->stride = stride; - } } } @@ -109,7 +121,7 @@ static struct pipe_texture* r300_setup_miptree(tex); r300_setup_texture_state(tex, template->width[0], template->height[0], - template->width[0], template->last_level); + template->last_level); tex->buffer = screen->buffer_create(screen, 1024, PIPE_BUFFER_USAGE_PIXEL, @@ -189,11 +201,10 @@ static struct pipe_texture* pipe_reference_init(&tex->tex.reference, 1); tex->tex.screen = screen; - tex->stride = *stride; + tex->stride_override = *stride; /* XXX */ - r300_setup_texture_state(tex, tex->tex.width[0], tex->tex.height[0], - tex->stride, 0); + r300_setup_texture_state(tex, tex->tex.width[0], tex->tex.height[0], 0); pipe_buffer_reference(&tex->buffer, buffer); @@ -221,7 +232,7 @@ boolean r300_get_texture_buffer(struct pipe_texture* texture, pipe_buffer_reference(buffer, tex->buffer); if (stride) { - *stride = tex->stride; + *stride = r300_texture_get_stride(tex, 0); } return TRUE; diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 3b56f0307c0..697669147d8 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -30,8 +30,12 @@ #include "r300_context.h" #include "r300_reg.h" +struct r300_texture; + void r300_init_screen_texture_functions(struct pipe_screen* screen); +unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level); + /* Note the signature of R300_EASY_TX_FORMAT(A, R, G, B, FORMAT)... */ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) { @@ -68,6 +72,9 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) /* W24_FP */ case PIPE_FORMAT_Z24S8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, W24_FP); + /* Z5_Y6_X5 */ + case PIPE_FORMAT_R16_SNORM: + return R300_EASY_TX_FORMAT(X, X, X, X, Z5Y6X5); default: debug_printf("r300: Implementation error: " "Got unsupported texture format %s in %s\n", diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index d68a1041063..0913ca1bd5b 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -158,7 +158,6 @@ static unsigned translate_saturate(unsigned saturate) switch(saturate) { case TGSI_SAT_NONE: return SATURATE_OFF; case TGSI_SAT_ZERO_ONE: return SATURATE_ZERO_ONE; - case TGSI_SAT_MINUS_PLUS_ONE: return SATURATE_PLUS_MINUS_ONE; } fprintf(stderr, "Unknown saturate mode: %i\n", saturate); diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index 516e3992fdd..6ab37537628 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -6,26 +6,17 @@ LIBNAME = softpipe C_SOURCES = \ sp_fs_exec.c \ sp_fs_sse.c \ - sp_fs_llvm.c \ sp_clear.c \ sp_flush.c \ sp_query.c \ sp_context.c \ sp_draw_arrays.c \ - sp_prim_setup.c \ sp_prim_vbuf.c \ sp_quad_pipe.c \ - sp_quad_alpha_test.c \ - sp_quad_blend.c \ - sp_quad_colormask.c \ - sp_quad_coverage.c \ + sp_quad_stipple.c \ sp_quad_depth_test.c \ - sp_quad_earlyz.c \ sp_quad_fs.c \ - sp_quad_occlusion.c \ - sp_quad_output.c \ - sp_quad_stencil.c \ - sp_quad_stipple.c \ + sp_quad_blend.c \ sp_screen.c \ sp_setup.c \ sp_state_blend.c \ @@ -38,6 +29,7 @@ C_SOURCES = \ sp_state_vertex.c \ sp_texture.c \ sp_tex_sample.c \ + sp_tex_tile_cache.c \ sp_tile_cache.c \ sp_surface.c diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index f8720638a76..950c3d9955b 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -7,25 +7,16 @@ softpipe = env.ConvenienceLibrary( source = [ 'sp_fs_exec.c', 'sp_fs_sse.c', - 'sp_fs_llvm.c', 'sp_clear.c', 'sp_context.c', 'sp_draw_arrays.c', 'sp_flush.c', - 'sp_prim_setup.c', 'sp_prim_vbuf.c', 'sp_setup.c', - 'sp_quad_alpha_test.c', 'sp_quad_blend.c', 'sp_quad_pipe.c', - 'sp_quad_colormask.c', - 'sp_quad_coverage.c', 'sp_quad_depth_test.c', - 'sp_quad_earlyz.c', 'sp_quad_fs.c', - 'sp_quad_occlusion.c', - 'sp_quad_output.c', - 'sp_quad_stencil.c', 'sp_quad_stipple.c', 'sp_query.c', 'sp_screen.c', @@ -39,6 +30,7 @@ softpipe = env.ConvenienceLibrary( 'sp_state_vertex.c', 'sp_surface.c', 'sp_tex_sample.c', + 'sp_tex_tile_cache.c', 'sp_texture.c', 'sp_tile_cache.c', ]) diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index d3af18e162b..8fac8e6e05f 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -36,8 +36,6 @@ #include "util/u_pack_color.h" #include "sp_clear.h" #include "sp_context.h" -#include "sp_surface.h" -#include "sp_state.h" #include "sp_tile_cache.h" diff --git a/src/gallium/drivers/softpipe/sp_clear.h b/src/gallium/drivers/softpipe/sp_clear.h index 2e450672f58..9be3b86fe9f 100644 --- a/src/gallium/drivers/softpipe/sp_clear.h +++ b/src/gallium/drivers/softpipe/sp_clear.h @@ -32,7 +32,6 @@ #ifndef SP_CLEAR_H #define SP_CLEAR_H -#include "pipe/p_state.h" struct pipe_context; extern void diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 86df320ea8a..e1e31ab047a 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -31,17 +31,18 @@ */ #include "draw/draw_context.h" +#include "draw/draw_vbuf.h" #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "sp_clear.h" #include "sp_context.h" #include "sp_flush.h" -#include "sp_prim_setup.h" #include "sp_prim_vbuf.h" #include "sp_state.h" #include "sp_surface.h" #include "sp_tile_cache.h" +#include "sp_tex_tile_cache.h" #include "sp_texture.h" #include "sp_winsys.h" #include "sp_query.h" @@ -72,18 +73,16 @@ softpipe_unmap_transfers(struct softpipe_context *sp) { uint i; - for (i = 0; i < sp->framebuffer.nr_cbufs; i++) - sp_flush_tile_cache(sp, sp->cbuf_cache[i]); - sp_flush_tile_cache(sp, sp->zsbuf_cache); - for (i = 0; i < sp->framebuffer.nr_cbufs; i++) { sp_tile_cache_unmap_transfers(sp->cbuf_cache[i]); } + sp_tile_cache_unmap_transfers(sp->zsbuf_cache); } -static void softpipe_destroy( struct pipe_context *pipe ) +static void +softpipe_destroy( struct pipe_context *pipe ) { struct softpipe_context *softpipe = softpipe_context( pipe ); uint i; @@ -91,26 +90,16 @@ static void softpipe_destroy( struct pipe_context *pipe ) if (softpipe->draw) draw_destroy( softpipe->draw ); - for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { - softpipe->quad[i].polygon_stipple->destroy( softpipe->quad[i].polygon_stipple ); - softpipe->quad[i].earlyz->destroy( softpipe->quad[i].earlyz ); - softpipe->quad[i].shade->destroy( softpipe->quad[i].shade ); - softpipe->quad[i].alpha_test->destroy( softpipe->quad[i].alpha_test ); - softpipe->quad[i].depth_test->destroy( softpipe->quad[i].depth_test ); - softpipe->quad[i].stencil_test->destroy( softpipe->quad[i].stencil_test ); - softpipe->quad[i].occlusion->destroy( softpipe->quad[i].occlusion ); - softpipe->quad[i].coverage->destroy( softpipe->quad[i].coverage ); - softpipe->quad[i].blend->destroy( softpipe->quad[i].blend ); - softpipe->quad[i].colormask->destroy( softpipe->quad[i].colormask ); - softpipe->quad[i].output->destroy( softpipe->quad[i].output ); - } + softpipe->quad.shade->destroy( softpipe->quad.shade ); + softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); + softpipe->quad.blend->destroy( softpipe->quad.blend ); for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) sp_destroy_tile_cache(softpipe->cbuf_cache[i]); sp_destroy_tile_cache(softpipe->zsbuf_cache); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) - sp_destroy_tile_cache(softpipe->tex_cache[i]); + sp_destroy_tex_tile_cache(softpipe->tex_cache[i]); for (i = 0; i < Elements(softpipe->constants); i++) { if (softpipe->constants[i].buffer) { @@ -121,6 +110,15 @@ static void softpipe_destroy( struct pipe_context *pipe ) FREE( softpipe ); } + +/** + * if (the texture is being used as a framebuffer surface) + * return PIPE_REFERENCED_FOR_WRITE + * else if (the texture is a bound texture source) + * return PIPE_REFERENCED_FOR_READ XXX not done yet + * else + * return PIPE_UNREFERENCED + */ static unsigned int softpipe_is_texture_referenced( struct pipe_context *pipe, struct pipe_texture *texture, @@ -129,15 +127,17 @@ softpipe_is_texture_referenced( struct pipe_context *pipe, struct softpipe_context *softpipe = softpipe_context( pipe ); unsigned i; - if(softpipe->dirty_render_cache) { + if (softpipe->dirty_render_cache) { for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) { - if(softpipe->framebuffer.cbufs[i] && - softpipe->framebuffer.cbufs[i]->texture == texture) + if (softpipe->framebuffer.cbufs[i] && + softpipe->framebuffer.cbufs[i]->texture == texture) { return PIPE_REFERENCED_FOR_WRITE; + } } - if(softpipe->framebuffer.zsbuf && - softpipe->framebuffer.zsbuf->texture == texture) + if (softpipe->framebuffer.zsbuf && + softpipe->framebuffer.zsbuf->texture == texture) { return PIPE_REFERENCED_FOR_WRITE; + } } /* FIXME: we also need to do the same for the texture cache */ @@ -145,6 +145,7 @@ softpipe_is_texture_referenced( struct pipe_context *pipe, return PIPE_UNREFERENCED; } + static unsigned int softpipe_is_buffer_referenced( struct pipe_context *pipe, struct pipe_buffer *buf) @@ -152,6 +153,7 @@ softpipe_is_buffer_referenced( struct pipe_context *pipe, return PIPE_UNREFERENCED; } + struct pipe_context * softpipe_create( struct pipe_screen *screen ) { @@ -222,7 +224,6 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.is_buffer_referenced = softpipe_is_buffer_referenced; softpipe_init_query_funcs( softpipe ); - softpipe_init_texture_funcs( softpipe ); /* * Alloc caches for accessing drawing surfaces and textures. @@ -233,41 +234,14 @@ softpipe_create( struct pipe_screen *screen ) softpipe->zsbuf_cache = sp_create_tile_cache( screen ); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) - softpipe->tex_cache[i] = sp_create_tile_cache( screen ); + softpipe->tex_cache[i] = sp_create_tex_tile_cache( screen ); /* setup quad rendering stages */ - for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { - softpipe->quad[i].polygon_stipple = sp_quad_polygon_stipple_stage(softpipe); - softpipe->quad[i].earlyz = sp_quad_earlyz_stage(softpipe); - softpipe->quad[i].shade = sp_quad_shade_stage(softpipe); - softpipe->quad[i].alpha_test = sp_quad_alpha_test_stage(softpipe); - softpipe->quad[i].depth_test = sp_quad_depth_test_stage(softpipe); - softpipe->quad[i].stencil_test = sp_quad_stencil_test_stage(softpipe); - softpipe->quad[i].occlusion = sp_quad_occlusion_stage(softpipe); - softpipe->quad[i].coverage = sp_quad_coverage_stage(softpipe); - softpipe->quad[i].blend = sp_quad_blend_stage(softpipe); - softpipe->quad[i].colormask = sp_quad_colormask_stage(softpipe); - softpipe->quad[i].output = sp_quad_output_stage(softpipe); - } - - /* vertex shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - softpipe->tgsi.vert_samplers[i].base.get_samples = sp_get_samples_vertex; - softpipe->tgsi.vert_samplers[i].unit = i; - softpipe->tgsi.vert_samplers[i].sp = softpipe; - softpipe->tgsi.vert_samplers[i].cache = softpipe->tex_cache[i]; - softpipe->tgsi.vert_samplers_list[i] = &softpipe->tgsi.vert_samplers[i]; - } + softpipe->quad.shade = sp_quad_shade_stage(softpipe); + softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe); + softpipe->quad.blend = sp_quad_blend_stage(softpipe); - /* fragment shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - softpipe->tgsi.frag_samplers[i].base.get_samples = sp_get_samples_fragment; - softpipe->tgsi.frag_samplers[i].unit = i; - softpipe->tgsi.frag_samplers[i].sp = softpipe; - softpipe->tgsi.frag_samplers[i].cache = softpipe->tex_cache[i]; - softpipe->tgsi.frag_samplers_list[i] = &softpipe->tgsi.frag_samplers[i]; - } /* * Create drawing context and plug our rendering stage into it. @@ -281,30 +255,28 @@ softpipe_create( struct pipe_screen *screen ) (struct tgsi_sampler **) softpipe->tgsi.vert_samplers_list); - softpipe->setup = sp_draw_render_stage(softpipe); - if (!softpipe->setup) - goto fail; - if (debug_get_bool_option( "SP_NO_RAST", FALSE )) softpipe->no_rast = TRUE; - if (debug_get_bool_option( "SP_NO_VBUF", FALSE )) { - /* Deprecated path -- vbuf is the intended interface to the draw module: - */ - draw_set_rasterize_stage(softpipe->draw, softpipe->setup); - } - else { - sp_init_vbuf(softpipe); - } + softpipe->vbuf_backend = sp_create_vbuf_backend(softpipe); + if (!softpipe->vbuf_backend) + goto fail; + + softpipe->vbuf = draw_vbuf_stage(softpipe->draw, softpipe->vbuf_backend); + if (!softpipe->vbuf) + goto fail; + + draw_set_rasterize_stage(softpipe->draw, softpipe->vbuf); + draw_set_render(softpipe->draw, softpipe->vbuf_backend); + + /* plug in AA line/point stages */ draw_install_aaline_stage(softpipe->draw, &softpipe->pipe); draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe); -#if USE_DRAW_STAGE_PSTIPPLE /* Do polygon stipple w/ texture map + frag prog? */ draw_install_pstipple_stage(softpipe->draw, &softpipe->pipe); -#endif sp_init_surface_functions(softpipe); diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 7888c2f644b..43a195c8ef5 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -36,24 +36,13 @@ #include "draw/draw_vertex.h" #include "sp_quad_pipe.h" -#include "sp_tex_sample.h" -/** - * This is a temporary variable for testing draw-stage polygon stipple. - * If zero, do stipple in sp_quad_stipple.c - */ -#define USE_DRAW_STAGE_PSTIPPLE 1 - -/* Number of threads working on individual quads. - * Setting to 1 disables this feature. - */ -#define SP_NUM_QUAD_THREADS 1 - struct softpipe_vbuf_render; struct draw_context; struct draw_stage; struct softpipe_tile_cache; +struct softpipe_tex_tile_cache; struct sp_fragment_shader; struct sp_vertex_shader; @@ -62,12 +51,12 @@ struct softpipe_context { struct pipe_context pipe; /**< base class */ /** Constant state objects */ - const struct pipe_blend_state *blend; - const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; - const struct pipe_depth_stencil_alpha_state *depth_stencil; - const struct pipe_rasterizer_state *rasterizer; - const struct sp_fragment_shader *fs; - const struct sp_vertex_shader *vs; + struct pipe_blend_state *blend; + struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + struct pipe_depth_stencil_alpha_state *depth_stencil; + struct pipe_rasterizer_state *rasterizer; + struct sp_fragment_shader *fs; + struct sp_vertex_shader *vs; /** Other rendering state */ struct pipe_blend_color blend_color; @@ -107,7 +96,16 @@ struct softpipe_context { /** Which vertex shader output slot contains point size */ int psize_slot; - unsigned reduced_api_prim; /**< PIPE_PRIM_POINTS, _LINES or _TRIANGLES */ + /* The reduced version of the primitive supplied by the state + * tracker. + */ + unsigned reduced_api_prim; + + /* The reduced primitive after unfilled triangles, wide-line + * decomposition, etc, are taken into account. This is the + * primitive actually rasterized. + */ + unsigned reduced_prim; /** Derived from scissor and surface bounds: */ struct pipe_scissor_state cliprect; @@ -116,41 +114,33 @@ struct softpipe_context { /** Software quad rendering pipeline */ struct { - struct quad_stage *polygon_stipple; - struct quad_stage *earlyz; struct quad_stage *shade; - struct quad_stage *alpha_test; - struct quad_stage *stencil_test; struct quad_stage *depth_test; - struct quad_stage *occlusion; - struct quad_stage *coverage; struct quad_stage *blend; - struct quad_stage *colormask; - struct quad_stage *output; struct quad_stage *first; /**< points to one of the above stages */ - } quad[SP_NUM_QUAD_THREADS]; + } quad; /** TGSI exec things */ struct { - struct sp_shader_sampler vert_samplers[PIPE_MAX_SAMPLERS]; - struct sp_shader_sampler *vert_samplers_list[PIPE_MAX_SAMPLERS]; - struct sp_shader_sampler frag_samplers[PIPE_MAX_SAMPLERS]; - struct sp_shader_sampler *frag_samplers_list[PIPE_MAX_SAMPLERS]; + struct sp_sampler_varient *vert_samplers_list[PIPE_MAX_SAMPLERS]; + struct sp_sampler_varient *frag_samplers_list[PIPE_MAX_SAMPLERS]; } tgsi; /** The primitive drawing context */ struct draw_context *draw; - struct draw_stage *setup; + + /** Draw module backend */ + struct vbuf_render *vbuf_backend; struct draw_stage *vbuf; - struct softpipe_vbuf_render *vbuf_render; boolean dirty_render_cache; struct softpipe_tile_cache *cbuf_cache[PIPE_MAX_COLOR_BUFS]; struct softpipe_tile_cache *zsbuf_cache; - - struct softpipe_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; + + unsigned tex_timestamp; + struct softpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; unsigned use_sse : 1; unsigned dump_fs : 1; @@ -164,5 +154,9 @@ softpipe_context( struct pipe_context *pipe ) return (struct softpipe_context *)pipe; } +void +softpipe_reset_sampler_varients(struct softpipe_context *softpipe); + + #endif /* SP_CONTEXT_H */ diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index 4a14d49686e..e38b767cf2c 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -37,6 +37,7 @@ #include "sp_surface.h" #include "sp_state.h" #include "sp_tile_cache.h" +#include "sp_tex_tile_cache.h" #include "sp_winsys.h" @@ -52,17 +53,19 @@ softpipe_flush( struct pipe_context *pipe, if (flags & PIPE_FLUSH_TEXTURE_CACHE) { for (i = 0; i < softpipe->num_textures; i++) { - sp_flush_tile_cache(softpipe, softpipe->tex_cache[i]); + sp_flush_tex_tile_cache(softpipe->tex_cache[i]); } } - if (flags & PIPE_FLUSH_RENDER_CACHE) { + if (flags & PIPE_FLUSH_SWAPBUFFERS) { + /* If this is a swapbuffers, just flush color buffers. + * + * The zbuffer changes are not discarded, but held in the cache + * in the hope that a later clear will wipe them out. + */ for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) if (softpipe->cbuf_cache[i]) - sp_flush_tile_cache(softpipe, softpipe->cbuf_cache[i]); - - if (softpipe->zsbuf_cache) - sp_flush_tile_cache(softpipe, softpipe->zsbuf_cache); + sp_flush_tile_cache(softpipe->cbuf_cache[i]); /* Need this call for hardware buffers before swapbuffers. * @@ -71,7 +74,15 @@ softpipe_flush( struct pipe_context *pipe, * to unmap surfaces when flushing. */ softpipe_unmap_transfers(softpipe); - + } + else if (flags & PIPE_FLUSH_RENDER_CACHE) { + for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) + if (softpipe->cbuf_cache[i]) + sp_flush_tile_cache(softpipe->cbuf_cache[i]); + + if (softpipe->zsbuf_cache) + sp_flush_tile_cache(softpipe->zsbuf_cache); + softpipe->dirty_render_cache = FALSE; } diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 9ee86fe7878..4076114d392 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -59,15 +59,34 @@ sp_exec_fragment_shader(const struct sp_fragment_shader *base) } +static void +exec_prepare( const struct sp_fragment_shader *base, + struct tgsi_exec_machine *machine, + struct tgsi_sampler **samplers ) +{ + /* + * Bind tokens/shader to the interpreter's machine state. + * Avoid redundant binding. + */ + if (machine->Tokens != base->shader.tokens) { + tgsi_exec_machine_bind_shader( machine, + base->shader.tokens, + PIPE_MAX_SAMPLERS, + samplers ); + } +} + + + /** * Compute quad X,Y,Z,W for the four fragments in a quad. * * This should really be part of the compiled shader. */ -void -sp_setup_pos_vector(const struct tgsi_interp_coef *coef, - float x, float y, - struct tgsi_exec_vector *quadpos) +static void +setup_pos_vector(const struct tgsi_interp_coef *coef, + float x, float y, + struct tgsi_exec_vector *quadpos) { uint chan; /* do X */ @@ -95,24 +114,6 @@ sp_setup_pos_vector(const struct tgsi_interp_coef *coef, } -static void -exec_prepare( const struct sp_fragment_shader *base, - struct tgsi_exec_machine *machine, - struct tgsi_sampler **samplers ) -{ - /* - * Bind tokens/shader to the interpreter's machine state. - * Avoid redundant binding. - */ - if (machine->Tokens != base->shader.tokens) { - tgsi_exec_machine_bind_shader( machine, - base->shader.tokens, - PIPE_MAX_SAMPLERS, - samplers ); - } -} - - /* TODO: hide the machine struct in here somewhere, remove from this * interface: */ @@ -122,11 +123,43 @@ exec_run( const struct sp_fragment_shader *base, struct quad_header *quad ) { /* Compute X, Y, Z, W vals for this quad */ - sp_setup_pos_vector(quad->posCoef, - (float)quad->input.x0, (float)quad->input.y0, - &machine->QuadPos); + setup_pos_vector(quad->posCoef, + (float)quad->input.x0, (float)quad->input.y0, + &machine->QuadPos); - return tgsi_exec_machine_run( machine ); + quad->inout.mask &= tgsi_exec_machine_run( machine ); + if (quad->inout.mask == 0) + return FALSE; + + /* store outputs */ + { + const ubyte *sem_name = base->info.output_semantic_name; + const ubyte *sem_index = base->info.output_semantic_index; + const uint n = base->info.num_outputs; + uint i; + for (i = 0; i < n; i++) { + switch (sem_name[i]) { + case TGSI_SEMANTIC_COLOR: + { + uint cbuf = sem_index[i]; + memcpy(quad->output.color[cbuf], + &machine->Outputs[i].xyzw[0].f[0], + sizeof(quad->output.color[0]) ); + } + break; + case TGSI_SEMANTIC_POSITION: + { + uint j; + for (j = 0; j < 4; j++) { + quad->output.depth[j] = machine->Outputs[i].xyzw[2].f[j]; + } + } + break; + } + } + } + + return TRUE; } diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c deleted file mode 100644 index 95c0d982d12..00000000000 --- a/src/gallium/drivers/softpipe/sp_fs_llvm.c +++ /dev/null @@ -1,205 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Execute fragment shader using LLVM code generation. - * Authors: - * Zack Rusin - */ - -#include "sp_context.h" -#include "sp_state.h" -#include "sp_fs.h" - -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "util/u_memory.h" -#include "tgsi/tgsi_sse2.h" - -#if 0 - -/** - * Subclass of sp_fragment_shader - */ -struct sp_llvm_fragment_shader -{ - struct sp_fragment_shader base; - struct gallivm_prog *llvm_prog; -}; - - -static void -shade_quad_llvm(struct quad_stage *qs, - struct quad_header *quad) -{ - struct quad_shade_stage *qss = quad_shade_stage(qs); - struct softpipe_context *softpipe = qs->softpipe; - float dests[4][16][4] ALIGN16_ATTRIB; - float inputs[4][16][4] ALIGN16_ATTRIB; - const float fx = (float) quad->x0; - const float fy = (float) quad->y0; - struct gallivm_prog *llvm = qss->llvm_prog; - - inputs[0][0][0] = fx; - inputs[1][0][0] = fx + 1.0f; - inputs[2][0][0] = fx; - inputs[3][0][0] = fx + 1.0f; - - inputs[0][0][1] = fy; - inputs[1][0][1] = fy; - inputs[2][0][1] = fy + 1.0f; - inputs[3][0][1] = fy + 1.0f; - - - gallivm_prog_inputs_interpolate(llvm, inputs, quad->coef); - -#if DLLVM - debug_printf("MASK = %d\n", quad->mask); - for (int i = 0; i < 4; ++i) { - for (int j = 0; j < 2; ++j) { - debug_printf("IN(%d,%d) [%f %f %f %f]\n", i, j, - inputs[i][j][0], inputs[i][j][1], inputs[i][j][2], inputs[i][j][3]); - } - } -#endif - - quad->mask &= - gallivm_fragment_shader_exec(llvm, fx, fy, dests, inputs, - softpipe->mapped_constants[PIPE_SHADER_FRAGMENT], - qss->samplers); -#if DLLVM - debug_printf("OUT LLVM = 1[%f %f %f %f], 2[%f %f %f %f]\n", - dests[0][0][0], dests[0][0][1], dests[0][0][2], dests[0][0][3], - dests[0][1][0], dests[0][1][1], dests[0][1][2], dests[0][1][3]); -#endif - - /* store result color */ - if (qss->colorOutSlot >= 0) { - unsigned i; - /* XXX need to handle multiple color outputs someday */ - allvmrt(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot] - == TGSI_SEMANTIC_COLOR); - for (i = 0; i < QUAD_SIZE; ++i) { - quad->outputs.color[0][0][i] = dests[i][qss->colorOutSlot][0]; - quad->outputs.color[0][1][i] = dests[i][qss->colorOutSlot][1]; - quad->outputs.color[0][2][i] = dests[i][qss->colorOutSlot][2]; - quad->outputs.color[0][3][i] = dests[i][qss->colorOutSlot][3]; - } - } -#if DLLVM - for (int i = 0; i < QUAD_SIZE; ++i) { - debug_printf("QLLVM%d(%d) [%f, %f, %f, %f]\n", i, qss->colorOutSlot, - quad->outputs.color[0][0][i], - quad->outputs.color[0][1][i], - quad->outputs.color[0][2][i], - quad->outputs.color[0][3][i]); - } -#endif - - /* store result Z */ - if (qss->depthOutSlot >= 0) { - /* output[slot] is new Z */ - uint i; - for (i = 0; i < 4; i++) { - quad->outputs.depth[i] = dests[i][0][2]; - } - } - else { - /* copy input Z (which was interpolated by the executor) to output Z */ - uint i; - for (i = 0; i < 4; i++) { - quad->outputs.depth[i] = inputs[i][0][2]; - } - } -#if DLLVM - debug_printf("D [%f, %f, %f, %f] mask = %d\n", - quad->outputs.depth[0], - quad->outputs.depth[1], - quad->outputs.depth[2], - quad->outputs.depth[3], quad->mask); -#endif - - /* shader may cull fragments */ - if( quad->mask ) { - qs->next->run( qs->next, quad ); - } -} - - -unsigned -run_llvm_fs( const struct sp_fragment_shader *base, - struct foo *machine ) -{ -} - - -void -delete_llvm_fs( struct sp_fragment_shader *base ) -{ - FREE(base); -} - - -struct sp_fragment_shader * -softpipe_create_fs_llvm(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ) -{ - struct sp_llvm_fragment_shader *shader = NULL; - - /* LLVM fragment shaders currently disabled: - */ - state = CALLOC_STRUCT(sp_llvm_shader_state); - if (!state) - return NULL; - - state->llvm_prog = 0; - - if (!gallivm_global_cpu_engine()) { - gallivm_cpu_engine_create(state->llvm_prog); - } - else - gallivm_cpu_jit_compile(gallivm_global_cpu_engine(), state->llvm_prog); - - if (shader) { - shader->base.run = run_llvm_fs; - shader->base.delete = delete_llvm_fs; - } - - return shader; -} - - -#else - -struct sp_fragment_shader * -softpipe_create_fs_llvm(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ) -{ - return NULL; -} - -#endif diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index f4fa0905d74..9d3e4670eef 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -76,6 +76,43 @@ fs_sse_prepare( const struct sp_fragment_shader *base, } + +/** + * Compute quad X,Y,Z,W for the four fragments in a quad. + * + * This should really be part of the compiled shader. + */ +static void +setup_pos_vector(const struct tgsi_interp_coef *coef, + float x, float y, + struct tgsi_exec_vector *quadpos) +{ + uint chan; + /* do X */ + quadpos->xyzw[0].f[0] = x; + quadpos->xyzw[0].f[1] = x + 1; + quadpos->xyzw[0].f[2] = x; + quadpos->xyzw[0].f[3] = x + 1; + + /* do Y */ + quadpos->xyzw[1].f[0] = y; + quadpos->xyzw[1].f[1] = y; + quadpos->xyzw[1].f[2] = y + 1; + quadpos->xyzw[1].f[3] = y + 1; + + /* do Z and W for all fragments in the quad */ + for (chan = 2; chan < 4; chan++) { + const float dadx = coef->dadx[chan]; + const float dady = coef->dady[chan]; + const float a0 = coef->a0[chan] + dadx * x + dady * y; + quadpos->xyzw[chan].f[0] = a0; + quadpos->xyzw[chan].f[1] = a0 + dadx; + quadpos->xyzw[chan].f[2] = a0 + dady; + quadpos->xyzw[chan].f[3] = a0 + dadx + dady; + } +} + + /* TODO: codegenerate the whole run function, skip this wrapper. * TODO: break dependency on tgsi_exec_machine struct * TODO: push Position calculation into the generated shader @@ -89,9 +126,9 @@ fs_sse_run( const struct sp_fragment_shader *base, struct sp_sse_fragment_shader *shader = sp_sse_fragment_shader(base); /* Compute X, Y, Z, W vals for this quad -- place in temp[0] for now */ - sp_setup_pos_vector(quad->posCoef, - (float)quad->input.x0, (float)quad->input.y0, - machine->Temps); + setup_pos_vector(quad->posCoef, + (float)quad->input.x0, (float)quad->input.y0, + machine->Temps); /* init kill mask */ tgsi_set_kill_mask(machine, 0x0); @@ -104,7 +141,39 @@ fs_sse_run( const struct sp_fragment_shader *base, // , &machine->QuadPos ); - return ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]); + quad->inout.mask &= ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]); + if (quad->inout.mask == 0) + return FALSE; + + /* store outputs */ + { + const ubyte *sem_name = base->info.output_semantic_name; + const ubyte *sem_index = base->info.output_semantic_index; + const uint n = base->info.num_outputs; + uint i; + for (i = 0; i < n; i++) { + switch (sem_name[i]) { + case TGSI_SEMANTIC_COLOR: + { + uint cbuf = sem_index[i]; + memcpy(quad->output.color[cbuf], + &machine->Outputs[i].xyzw[0].f[0], + sizeof(quad->output.color[0]) ); + } + break; + case TGSI_SEMANTIC_POSITION: + { + uint j; + for (j = 0; j < 4; j++) { + quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j]; + } + } + break; + } + } + } + + return TRUE; } diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c deleted file mode 100644 index 038ff04d4f1..00000000000 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ /dev/null @@ -1,190 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \brief A draw stage that drives our triangle setup routines from - * within the draw pipeline. One of two ways to drive setup, the - * other being in sp_prim_vbuf.c. - * - * \author Keith Whitwell <[email protected]> - * \author Brian Paul - */ - - -#include "sp_context.h" -#include "sp_setup.h" -#include "sp_state.h" -#include "sp_prim_setup.h" -#include "draw/draw_pipe.h" -#include "draw/draw_vertex.h" -#include "util/u_memory.h" - -/** - * Triangle setup info (derived from draw_stage). - * Also used for line drawing (taking some liberties). - */ -struct setup_stage { - struct draw_stage stage; /**< This must be first (base class) */ - - struct setup_context *setup; -}; - - - -/** - * Basically a cast wrapper. - */ -static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) -{ - return (struct setup_stage *)stage; -} - - -typedef const float (*cptrf4)[4]; - -static void -do_tri(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - setup_tri( setup->setup, - (cptrf4)prim->v[0]->data, - (cptrf4)prim->v[1]->data, - (cptrf4)prim->v[2]->data ); -} - -static void -do_line(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - setup_line( setup->setup, - (cptrf4)prim->v[0]->data, - (cptrf4)prim->v[1]->data ); -} - -static void -do_point(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - setup_point( setup->setup, - (cptrf4)prim->v[0]->data ); -} - - - - -static void setup_begin( struct draw_stage *stage ) -{ - struct setup_stage *setup = setup_stage(stage); - - setup_prepare( setup->setup ); - - stage->point = do_point; - stage->line = do_line; - stage->tri = do_tri; -} - - -static void setup_first_point( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->point( stage, header ); -} - -static void setup_first_line( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->line( stage, header ); -} - - -static void setup_first_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->tri( stage, header ); -} - - - -static void setup_flush( struct draw_stage *stage, - unsigned flags ) -{ - stage->point = setup_first_point; - stage->line = setup_first_line; - stage->tri = setup_first_tri; -} - - -static void reset_stipple_counter( struct draw_stage *stage ) -{ -} - - -static void render_destroy( struct draw_stage *stage ) -{ - struct setup_stage *ssetup = setup_stage(stage); - setup_destroy_context(ssetup->setup); - FREE( stage ); -} - - -/** - * Create a new primitive setup/render stage. - */ -struct draw_stage *sp_draw_render_stage( struct softpipe_context *softpipe ) -{ - struct setup_stage *sstage = CALLOC_STRUCT(setup_stage); - - sstage->setup = setup_create_context(softpipe); - sstage->stage.draw = softpipe->draw; - sstage->stage.point = setup_first_point; - sstage->stage.line = setup_first_line; - sstage->stage.tri = setup_first_tri; - sstage->stage.flush = setup_flush; - sstage->stage.reset_stipple_counter = reset_stipple_counter; - sstage->stage.destroy = render_destroy; - - return (struct draw_stage *)sstage; -} - -struct setup_context * -sp_draw_setup_context( struct draw_stage *stage ) -{ - struct setup_stage *ssetup = setup_stage(stage); - return ssetup->setup; -} - -void -sp_draw_flush( struct draw_stage *stage ) -{ - stage->flush( stage, 0 ); -} diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.h b/src/gallium/drivers/softpipe/sp_prim_setup.h deleted file mode 100644 index 49bdd98ed87..00000000000 --- a/src/gallium/drivers/softpipe/sp_prim_setup.h +++ /dev/null @@ -1,85 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef SP_PRIM_SETUP_H -#define SP_PRIM_SETUP_H - - -/** - * vbuf is a special stage to gather the stream of triangles, lines, points - * together and reconstruct vertex buffers for hardware upload. - * - * First attempt, work in progress. - * - * TODO: - * - separate out vertex buffer building and primitive emit, ie >1 draw per vb. - * - tell vbuf stage how to build hw vertices directly - * - pass vbuf stage a buffer pointer for direct emit to agp/vram. - * - * - * - * Vertices are just an array of floats, with all the attributes - * packed. We currently assume a layout like: - * - * attr[0][0..3] - window position - * attr[1..n][0..3] - remaining attributes. - * - * Attributes are assumed to be 4 floats wide but are packed so that - * all the enabled attributes run contiguously. - */ - - -struct draw_stage; -struct softpipe_context; - - -typedef void (*vbuf_draw_func)( struct pipe_context *pipe, - unsigned prim, - const ushort *elements, - unsigned nr_elements, - const void *vertex_buffer, - unsigned nr_vertices ); - - -extern struct draw_stage * -sp_draw_render_stage( struct softpipe_context *softpipe ); - -extern struct setup_context * -sp_draw_setup_context( struct draw_stage * ); - -extern void -sp_draw_flush( struct draw_stage * ); - - -extern struct draw_stage * -sp_draw_vbuf_stage( struct draw_context *draw_context, - struct pipe_context *pipe, - vbuf_draw_func draw ); - - -#endif /* SP_PRIM_SETUP_H */ diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 42021789ea8..e603c20fc4c 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -37,13 +37,13 @@ #include "sp_context.h" +#include "sp_setup.h" #include "sp_state.h" #include "sp_prim_vbuf.h" -#include "sp_prim_setup.h" -#include "sp_setup.h" #include "draw/draw_context.h" #include "draw/draw_vbuf.h" #include "util/u_memory.h" +#include "util/u_prim.h" #define SP_MAX_VBUF_INDEXES 1024 @@ -58,6 +58,8 @@ struct softpipe_vbuf_render { struct vbuf_render base; struct softpipe_context *softpipe; + struct setup_context *setup; + uint prim; uint vertex_size; uint nr_vertices; @@ -74,6 +76,11 @@ softpipe_vbuf_render(struct vbuf_render *vbr) } + + + + + static const struct vertex_info * sp_vbuf_get_vertex_info(struct vbuf_render *vbr) { @@ -104,36 +111,6 @@ sp_vbuf_allocate_vertices(struct vbuf_render *vbr, static void sp_vbuf_release_vertices(struct vbuf_render *vbr) { -#if 0 - { - struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); - const struct vertex_info *info = - softpipe_get_vbuf_vertex_info(cvbr->softpipe); - const float *vtx = (const float *) cvbr->vertex_buffer; - uint i, j; - debug_printf("%s (vtx_size = %u, vtx_used = %u)\n", - __FUNCTION__, cvbr->vertex_size, cvbr->nr_vertices); - for (i = 0; i < cvbr->nr_vertices; i++) { - for (j = 0; j < info->num_attribs; j++) { - uint k; - switch (info->attrib[j].emit) { - case EMIT_4F: k = 4; break; - case EMIT_3F: k = 3; break; - case EMIT_2F: k = 2; break; - case EMIT_1F: k = 1; break; - default: assert(0); - } - debug_printf("Vert %u attr %u: ", i, j); - while (k-- > 0) { - debug_printf("%g ", vtx[0]); - vtx++; - } - debug_printf("\n"); - } - } - } -#endif - /* keep the old allocation for next time */ } @@ -159,14 +136,11 @@ static boolean sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); - - /* XXX: break this dependency - make setup_context live under - * softpipe, rename the old "setup" draw stage to something else. - */ - struct setup_context *setup_ctx = sp_draw_setup_context(cvbr->softpipe->setup); + struct setup_context *setup_ctx = cvbr->setup; setup_prepare( setup_ctx ); + cvbr->softpipe->reduced_prim = u_reduced_prim(prim); cvbr->prim = prim; return TRUE; @@ -191,14 +165,9 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) struct softpipe_context *softpipe = cvbr->softpipe; const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float); const void *vertex_buffer = cvbr->vertex_buffer; + struct setup_context *setup_ctx = cvbr->setup; unsigned i; - /* XXX: break this dependency - make setup_context live under - * softpipe, rename the old "setup" draw stage to something else. - */ - struct draw_stage *setup = softpipe->setup; - struct setup_context *setup_ctx = sp_draw_setup_context(setup); - switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { @@ -237,14 +206,16 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) break; case PIPE_PRIM_TRIANGLES: - for (i = 2; i < nr; i += 3) { - if (softpipe->rasterizer->flatshade_first) { + if (softpipe->rasterizer->flatshade_first) { + for (i = 2; i < nr; i += 3) { setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-2], stride) ); } - else { + } + else { + for (i = 2; i < nr; i += 3) { setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-1], stride), @@ -254,14 +225,16 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) break; case PIPE_PRIM_TRIANGLE_STRIP: - for (i = 2; i < nr; i += 1) { - if (softpipe->rasterizer->flatshade_first) { + if (softpipe->rasterizer->flatshade_first) { + for (i = 2; i < nr; i += 1) { setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i+(i&1)-1], stride), get_vert(vertex_buffer, indices[i-(i&1)], stride), get_vert(vertex_buffer, indices[i-2], stride) ); } - else { + } + else { + for (i = 2; i < nr; i += 1) { setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i+(i&1)-2], stride), get_vert(vertex_buffer, indices[i-(i&1)-1], stride), @@ -271,14 +244,16 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) break; case PIPE_PRIM_TRIANGLE_FAN: - for (i = 2; i < nr; i += 1) { - if (softpipe->rasterizer->flatshade_first) { + if (softpipe->rasterizer->flatshade_first) { + for (i = 2; i < nr; i += 1) { setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[0], stride), get_vert(vertex_buffer, indices[i-1], stride) ); } - else { + } + else { + for (i = 2; i < nr; i += 1) { setup_tri( setup_ctx, get_vert(vertex_buffer, indices[0], stride), get_vert(vertex_buffer, indices[i-1], stride), @@ -288,8 +263,8 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) break; case PIPE_PRIM_QUADS: - for (i = 3; i < nr; i += 4) { - if (softpipe->rasterizer->flatshade_first) { + if (softpipe->rasterizer->flatshade_first) { + for (i = 3; i < nr; i += 4) { setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-1], stride), @@ -299,7 +274,9 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-3], stride) ); } - else { + } + else { + for (i = 3; i < nr; i += 4) { setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-2], stride), @@ -314,8 +291,8 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) break; case PIPE_PRIM_QUAD_STRIP: - for (i = 3; i < nr; i += 2) { - if (softpipe->rasterizer->flatshade_first) { + if (softpipe->rasterizer->flatshade_first) { + for (i = 3; i < nr; i += 2) { setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-1], stride), @@ -325,7 +302,9 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-3], stride) ); } - else { + } + else { + for (i = 3; i < nr; i += 2) { setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-2], stride), @@ -355,11 +334,6 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) default: assert(0); } - - /* XXX: why are we calling this??? If we had to call something, it - * would be a function in sp_setup.c: - */ - sp_draw_flush( setup ); } @@ -372,17 +346,12 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); struct softpipe_context *softpipe = cvbr->softpipe; + struct setup_context *setup_ctx = cvbr->setup; const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float); const void *vertex_buffer = (void *) get_vert(cvbr->vertex_buffer, start, stride); unsigned i; - /* XXX: break this dependency - make setup_context live under - * softpipe, rename the old "setup" draw stage to something else. - */ - struct draw_stage *setup = softpipe->setup; - struct setup_context *setup_ctx = sp_draw_setup_context(setup); - switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { @@ -421,14 +390,16 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) break; case PIPE_PRIM_TRIANGLES: - for (i = 2; i < nr; i += 3) { - if (softpipe->rasterizer->flatshade_first) { + if (softpipe->rasterizer->flatshade_first) { + for (i = 2; i < nr; i += 3) { setup_tri( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-2, stride) ); } - else { + } + else { + for (i = 2; i < nr; i += 3) { setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-1, stride), @@ -438,14 +409,16 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) break; case PIPE_PRIM_TRIANGLE_STRIP: - for (i = 2; i < nr; i++) { - if (softpipe->rasterizer->flatshade_first) { + if (softpipe->rasterizer->flatshade_first) { + for (i = 2; i < nr; i++) { setup_tri( setup_ctx, get_vert(vertex_buffer, i+(i&1)-1, stride), get_vert(vertex_buffer, i-(i&1), stride), get_vert(vertex_buffer, i-2, stride) ); } - else { + } + else { + for (i = 2; i < nr; i++) { setup_tri( setup_ctx, get_vert(vertex_buffer, i+(i&1)-2, stride), get_vert(vertex_buffer, i-(i&1)-1, stride), @@ -455,14 +428,16 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) break; case PIPE_PRIM_TRIANGLE_FAN: - for (i = 2; i < nr; i += 1) { - if (softpipe->rasterizer->flatshade_first) { + if (softpipe->rasterizer->flatshade_first) { + for (i = 2; i < nr; i += 1) { setup_tri( setup_ctx, get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, 0, stride), get_vert(vertex_buffer, i-1, stride) ); } - else { + } + else { + for (i = 2; i < nr; i += 1) { setup_tri( setup_ctx, get_vert(vertex_buffer, 0, stride), get_vert(vertex_buffer, i-1, stride), @@ -472,8 +447,8 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) break; case PIPE_PRIM_QUADS: - for (i = 3; i < nr; i += 4) { - if (softpipe->rasterizer->flatshade_first) { + if (softpipe->rasterizer->flatshade_first) { + for (i = 3; i < nr; i += 4) { setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-1, stride), @@ -483,7 +458,9 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-3, stride) ); } - else { + } + else { + for (i = 3; i < nr; i += 4) { setup_tri( setup_ctx, get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-2, stride), @@ -497,8 +474,8 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) break; case PIPE_PRIM_QUAD_STRIP: - for (i = 3; i < nr; i += 2) { - if (softpipe->rasterizer->flatshade_first) { + if (softpipe->rasterizer->flatshade_first) { + for (i = 3; i < nr; i += 2) { setup_tri( setup_ctx, get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-1, stride), @@ -508,7 +485,9 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-3, stride) ); } - else { + } + else { + for (i = 3; i < nr; i += 2) { setup_tri( setup_ctx, get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-2, stride), @@ -546,40 +525,38 @@ static void sp_vbuf_destroy(struct vbuf_render *vbr) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); - cvbr->softpipe->vbuf_render = NULL; + setup_destroy_context(cvbr->setup); FREE(cvbr); } /** - * Initialize the post-transform vertex buffer information for the given - * context. + * Create the post-transform vertex handler for the given context. */ -void -sp_init_vbuf(struct softpipe_context *sp) +struct vbuf_render * +sp_create_vbuf_backend(struct softpipe_context *sp) { - assert(sp->draw); + struct softpipe_vbuf_render *cvbr = CALLOC_STRUCT(softpipe_vbuf_render); - sp->vbuf_render = CALLOC_STRUCT(softpipe_vbuf_render); + assert(sp->draw); - sp->vbuf_render->base.max_indices = SP_MAX_VBUF_INDEXES; - sp->vbuf_render->base.max_vertex_buffer_bytes = SP_MAX_VBUF_SIZE; - sp->vbuf_render->base.get_vertex_info = sp_vbuf_get_vertex_info; - sp->vbuf_render->base.allocate_vertices = sp_vbuf_allocate_vertices; - sp->vbuf_render->base.map_vertices = sp_vbuf_map_vertices; - sp->vbuf_render->base.unmap_vertices = sp_vbuf_unmap_vertices; - sp->vbuf_render->base.set_primitive = sp_vbuf_set_primitive; - sp->vbuf_render->base.draw = sp_vbuf_draw; - sp->vbuf_render->base.draw_arrays = sp_vbuf_draw_arrays; - sp->vbuf_render->base.release_vertices = sp_vbuf_release_vertices; - sp->vbuf_render->base.destroy = sp_vbuf_destroy; + cvbr->base.max_indices = SP_MAX_VBUF_INDEXES; + cvbr->base.max_vertex_buffer_bytes = SP_MAX_VBUF_SIZE; - sp->vbuf_render->softpipe = sp; + cvbr->base.get_vertex_info = sp_vbuf_get_vertex_info; + cvbr->base.allocate_vertices = sp_vbuf_allocate_vertices; + cvbr->base.map_vertices = sp_vbuf_map_vertices; + cvbr->base.unmap_vertices = sp_vbuf_unmap_vertices; + cvbr->base.set_primitive = sp_vbuf_set_primitive; + cvbr->base.draw = sp_vbuf_draw; + cvbr->base.draw_arrays = sp_vbuf_draw_arrays; + cvbr->base.release_vertices = sp_vbuf_release_vertices; + cvbr->base.destroy = sp_vbuf_destroy; - sp->vbuf = draw_vbuf_stage(sp->draw, &sp->vbuf_render->base); + cvbr->softpipe = sp; - draw_set_rasterize_stage(sp->draw, sp->vbuf); + cvbr->setup = setup_create_context(cvbr->softpipe); - draw_set_render(sp->draw, &sp->vbuf_render->base); + return &cvbr->base; } diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.h b/src/gallium/drivers/softpipe/sp_prim_vbuf.h index 1de9cc2a894..ad01cc2f289 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.h +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.h @@ -31,8 +31,8 @@ struct softpipe_context; -extern void -sp_init_vbuf(struct softpipe_context *softpipe); +extern struct vbuf_render * +sp_create_vbuf_backend(struct softpipe_context *softpipe); #endif /* SP_VBUF_H */ diff --git a/src/gallium/drivers/softpipe/sp_quad.h b/src/gallium/drivers/softpipe/sp_quad.h index bd6c6cb9123..a3236bd1169 100644 --- a/src/gallium/drivers/softpipe/sp_quad.h +++ b/src/gallium/drivers/softpipe/sp_quad.h @@ -97,10 +97,10 @@ struct quad_header { struct quad_header_inout inout; struct quad_header_output output; - const struct tgsi_interp_coef *coef; + /* Redundant/duplicated: + */ const struct tgsi_interp_coef *posCoef; - - unsigned nr_attrs; + const struct tgsi_interp_coef *coef; }; #endif /* SP_QUAD_H */ diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c deleted file mode 100644 index 0845bae0e68..00000000000 --- a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c +++ /dev/null @@ -1,108 +0,0 @@ - -/** - * quad alpha test - */ - -#include "sp_context.h" -#include "sp_quad.h" -#include "sp_quad_pipe.h" -#include "pipe/p_defines.h" -#include "util/u_memory.h" - - -static void -alpha_test_quad(struct quad_stage *qs, struct quad_header *quad) -{ - struct softpipe_context *softpipe = qs->softpipe; - const float ref = softpipe->depth_stencil->alpha.ref_value; - unsigned passMask = 0x0, j; - const uint cbuf = 0; /* only output[0].alpha is tested */ - const float *aaaa = quad->output.color[cbuf][3]; - - switch (softpipe->depth_stencil->alpha.func) { - case PIPE_FUNC_NEVER: - break; - case PIPE_FUNC_LESS: - /* - * If mask were an array [4] we could do this SIMD-style: - * passMask = (quad->outputs.color[0][3] <= vec4(ref)); - */ - for (j = 0; j < QUAD_SIZE; j++) { - if (aaaa[j] < ref) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_EQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (aaaa[j] == ref) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_LEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (aaaa[j] <= ref) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_GREATER: - for (j = 0; j < QUAD_SIZE; j++) { - if (aaaa[j] > ref) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_NOTEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (aaaa[j] != ref) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_GEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (aaaa[j] >= ref) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_ALWAYS: - passMask = MASK_ALL; - break; - default: - assert(0); - } - - quad->inout.mask &= passMask; - - if (quad->inout.mask) - qs->next->run(qs->next, quad); -} - - -static void alpha_test_begin(struct quad_stage *qs) -{ - qs->next->begin(qs->next); -} - - -static void alpha_test_destroy(struct quad_stage *qs) -{ - FREE( qs ); -} - - -struct quad_stage * -sp_quad_alpha_test_stage( struct softpipe_context *softpipe ) -{ - struct quad_stage *stage = CALLOC_STRUCT(quad_stage); - - stage->softpipe = softpipe; - stage->begin = alpha_test_begin; - stage->run = alpha_test_quad; - stage->destroy = alpha_test_destroy; - - return stage; -} diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index b1e18805c70..e243c63fa23 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -117,644 +117,865 @@ do { \ static void -logicop_quad(struct quad_stage *qs, struct quad_header *quad) +logicop_quad(struct quad_stage *qs, + float (*quadColor)[4], + float (*dest)[4]) { struct softpipe_context *softpipe = qs->softpipe; - uint cbuf; + ubyte src[4][4], dst[4][4], res[4][4]; + uint *src4 = (uint *) src; + uint *dst4 = (uint *) dst; + uint *res4 = (uint *) res; + uint j; + + + /* convert to ubyte */ + for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */ + dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */ + dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */ + dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */ + dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */ + + src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */ + src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */ + src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */ + src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */ + } - /* loop over colorbuffer outputs */ - for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { - float dest[4][QUAD_SIZE]; - ubyte src[4][4], dst[4][4], res[4][4]; - uint *src4 = (uint *) src; - uint *dst4 = (uint *) dst; - uint *res4 = (uint *) res; - struct softpipe_cached_tile * - tile = sp_get_cached_tile(softpipe, - softpipe->cbuf_cache[cbuf], - quad->input.x0, quad->input.y0); - float (*quadColor)[4] = quad->output.color[cbuf]; - uint i, j; + switch (softpipe->blend->logicop_func) { + case PIPE_LOGICOP_CLEAR: + for (j = 0; j < 4; j++) + res4[j] = 0; + break; + case PIPE_LOGICOP_NOR: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] | dst4[j]); + break; + case PIPE_LOGICOP_AND_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j] & dst4[j]; + break; + case PIPE_LOGICOP_COPY_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j]; + break; + case PIPE_LOGICOP_AND_REVERSE: + for (j = 0; j < 4; j++) + res4[j] = src4[j] & ~dst4[j]; + break; + case PIPE_LOGICOP_INVERT: + for (j = 0; j < 4; j++) + res4[j] = ~dst4[j]; + break; + case PIPE_LOGICOP_XOR: + for (j = 0; j < 4; j++) + res4[j] = dst4[j] ^ src4[j]; + break; + case PIPE_LOGICOP_NAND: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] & dst4[j]); + break; + case PIPE_LOGICOP_AND: + for (j = 0; j < 4; j++) + res4[j] = src4[j] & dst4[j]; + break; + case PIPE_LOGICOP_EQUIV: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] ^ dst4[j]); + break; + case PIPE_LOGICOP_NOOP: + for (j = 0; j < 4; j++) + res4[j] = dst4[j]; + break; + case PIPE_LOGICOP_OR_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j] | dst4[j]; + break; + case PIPE_LOGICOP_COPY: + for (j = 0; j < 4; j++) + res4[j] = src4[j]; + break; + case PIPE_LOGICOP_OR_REVERSE: + for (j = 0; j < 4; j++) + res4[j] = src4[j] | ~dst4[j]; + break; + case PIPE_LOGICOP_OR: + for (j = 0; j < 4; j++) + res4[j] = src4[j] | dst4[j]; + break; + case PIPE_LOGICOP_SET: + for (j = 0; j < 4; j++) + res4[j] = ~0; + break; + default: + assert(0); + } - /* get/swizzle dest colors */ - for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); - for (i = 0; i < 4; i++) { - dest[i][j] = tile->data.color[y][x][i]; - } - } + for (j = 0; j < 4; j++) { + quadColor[j][0] = ubyte_to_float(res[j][0]); + quadColor[j][1] = ubyte_to_float(res[j][1]); + quadColor[j][2] = ubyte_to_float(res[j][2]); + quadColor[j][3] = ubyte_to_float(res[j][3]); + } +} - /* convert to ubyte */ - for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */ - dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */ - dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */ - dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */ - dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */ - - src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */ - src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */ - src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */ - src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */ - } - switch (softpipe->blend->logicop_func) { - case PIPE_LOGICOP_CLEAR: - for (j = 0; j < 4; j++) - res4[j] = 0; - break; - case PIPE_LOGICOP_NOR: - for (j = 0; j < 4; j++) - res4[j] = ~(src4[j] | dst4[j]); - break; - case PIPE_LOGICOP_AND_INVERTED: - for (j = 0; j < 4; j++) - res4[j] = ~src4[j] & dst4[j]; - break; - case PIPE_LOGICOP_COPY_INVERTED: - for (j = 0; j < 4; j++) - res4[j] = ~src4[j]; - break; - case PIPE_LOGICOP_AND_REVERSE: - for (j = 0; j < 4; j++) - res4[j] = src4[j] & ~dst4[j]; - break; - case PIPE_LOGICOP_INVERT: - for (j = 0; j < 4; j++) - res4[j] = ~dst4[j]; - break; - case PIPE_LOGICOP_XOR: - for (j = 0; j < 4; j++) - res4[j] = dst4[j] ^ src4[j]; - break; - case PIPE_LOGICOP_NAND: - for (j = 0; j < 4; j++) - res4[j] = ~(src4[j] & dst4[j]); - break; - case PIPE_LOGICOP_AND: - for (j = 0; j < 4; j++) - res4[j] = src4[j] & dst4[j]; - break; - case PIPE_LOGICOP_EQUIV: - for (j = 0; j < 4; j++) - res4[j] = ~(src4[j] ^ dst4[j]); - break; - case PIPE_LOGICOP_NOOP: - for (j = 0; j < 4; j++) - res4[j] = dst4[j]; - break; - case PIPE_LOGICOP_OR_INVERTED: - for (j = 0; j < 4; j++) - res4[j] = ~src4[j] | dst4[j]; - break; - case PIPE_LOGICOP_COPY: - for (j = 0; j < 4; j++) - res4[j] = src4[j]; - break; - case PIPE_LOGICOP_OR_REVERSE: - for (j = 0; j < 4; j++) - res4[j] = src4[j] | ~dst4[j]; - break; - case PIPE_LOGICOP_OR: - for (j = 0; j < 4; j++) - res4[j] = src4[j] | dst4[j]; - break; - case PIPE_LOGICOP_SET: - for (j = 0; j < 4; j++) - res4[j] = ~0; - break; - default: - assert(0); - } - for (j = 0; j < 4; j++) { - quadColor[j][0] = ubyte_to_float(res[j][0]); - quadColor[j][1] = ubyte_to_float(res[j][1]); - quadColor[j][2] = ubyte_to_float(res[j][2]); - quadColor[j][3] = ubyte_to_float(res[j][3]); - } +static void +blend_quad(struct quad_stage *qs, + float (*quadColor)[4], + float (*dest)[4]) +{ + static const float zero[4] = { 0, 0, 0, 0 }; + static const float one[4] = { 1, 1, 1, 1 }; + struct softpipe_context *softpipe = qs->softpipe; + float source[4][QUAD_SIZE]; + + /* + * Compute src/first term RGB + */ + switch (softpipe->blend->rgb_src_factor) { + case PIPE_BLENDFACTOR_ONE: + VEC4_COPY(source[0], quadColor[0]); /* R */ + VEC4_COPY(source[1], quadColor[1]); /* G */ + VEC4_COPY(source[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */ + VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */ + VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + { + const float *alpha = quadColor[3]; + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_DST_COLOR: + VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */ + VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */ + VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */ + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + { + const float *alpha = dest[3]; + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + { + const float *alpha = quadColor[3]; + float diff[4], temp[4]; + VEC4_SUB(diff, one, dest[3]); + VEC4_MIN(temp, alpha, diff); + VEC4_MUL(source[0], quadColor[0], temp); /* R */ + VEC4_MUL(source[1], quadColor[1], temp); /* G */ + VEC4_MUL(source[2], quadColor[2], temp); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], comp); /* R */ + VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], comp); /* G */ + VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float alpha[4]; + VEC4_SCALAR(alpha, softpipe->blend_color.color[3]); + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_SRC1_COLOR: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(source[0], zero); /* R */ + VEC4_COPY(source[1], zero); /* G */ + VEC4_COPY(source[2], zero); /* B */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, quadColor[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, dest[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, dest[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, dest[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + { + float inv_comp[4]; + /* R */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); + VEC4_MUL(source[0], quadColor[0], inv_comp); + /* G */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); + VEC4_MUL(source[1], quadColor[1], inv_comp); + /* B */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); + VEC4_MUL(source[2], quadColor[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_alpha[4]; + VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + assert(0); /* to do */ + break; + default: + assert(0); + } + + /* + * Compute src/first term A + */ + switch (softpipe->blend->alpha_src_factor) { + case PIPE_BLENDFACTOR_ONE: + VEC4_COPY(source[3], quadColor[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_SRC_ALPHA: + { + const float *alpha = quadColor[3]; + VEC4_MUL(source[3], quadColor[3], alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + /* multiply alpha by 1.0 */ + VEC4_COPY(source[3], quadColor[3]); /* A */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(source[3], quadColor[3], comp); /* A */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(source[3], zero); /* A */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, quadColor[3]); + VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, dest[3]); + VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + /* A */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(source[3], quadColor[3], inv_comp); + } + break; + default: + assert(0); } - /* pass quad to next stage */ - qs->next->run(qs->next, quad); -} + /* + * Compute dest/second term RGB + */ + switch (softpipe->blend->rgb_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* dest = dest * 1 NO-OP, leave dest as-is */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */ + VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */ + VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */ + VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */ + VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */ + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ + VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ + VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ + break; + case PIPE_BLENDFACTOR_DST_COLOR: + VEC4_MUL(dest[0], dest[0], dest[0]); /* R */ + VEC4_MUL(dest[1], dest[1], dest[1]); /* G */ + VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + assert(0); /* illegal */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ + VEC4_MUL(dest[0], dest[0], comp); /* R */ + VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ + VEC4_MUL(dest[1], dest[1], comp); /* G */ + VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ + VEC4_MUL(dest[2], dest[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(dest[0], dest[0], comp); /* R */ + VEC4_MUL(dest[1], dest[1], comp); /* G */ + VEC4_MUL(dest[2], dest[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(dest[0], zero); /* R */ + VEC4_COPY(dest[1], zero); /* G */ + VEC4_COPY(dest[2], zero); /* B */ + break; + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + /* XXX what are these? */ + assert(0); + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ + VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ + VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ + VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ + VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ + VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float one_minus_alpha[QUAD_SIZE]; + VEC4_SUB(one_minus_alpha, one, quadColor[3]); + VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ + VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ + VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[3]); /* A */ + VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ + VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ + VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[0]); /* R */ + VEC4_MUL(dest[0], dest[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, dest[1]); /* G */ + VEC4_MUL(dest[1], dest[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, dest[2]); /* B */ + VEC4_MUL(dest[2], dest[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + { + float inv_comp[4]; + /* R */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); + VEC4_MUL(dest[0], dest[0], inv_comp); + /* G */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); + VEC4_MUL(dest[1], dest[1], inv_comp); + /* B */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); + VEC4_MUL(dest[2], dest[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(dest[0], dest[0], inv_comp); + VEC4_MUL(dest[1], dest[1], inv_comp); + VEC4_MUL(dest[2], dest[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + /* XXX what are these? */ + assert(0); + break; + default: + assert(0); + } + /* + * Compute dest/second term A + */ + switch (softpipe->blend->alpha_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* dest = dest * 1 NO-OP, leave dest as-is */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_SRC_ALPHA: + VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */ + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + assert(0); /* illegal */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(dest[3], dest[3], comp); /* A */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(dest[3], zero); /* A */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float one_minus_alpha[QUAD_SIZE]; + VEC4_SUB(one_minus_alpha, one, quadColor[3]); + VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[3]); /* A */ + VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(dest[3], dest[3], inv_comp); + } + break; + default: + assert(0); + } + /* + * Combine RGB terms + */ + switch (softpipe->blend->rgb_func) { + case PIPE_BLEND_ADD: + VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */ + VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */ + VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_SUBTRACT: + VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */ + VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */ + VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */ + VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */ + VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */ + break; + case PIPE_BLEND_MIN: + VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */ + VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */ + VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_MAX: + VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */ + VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */ + VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */ + break; + default: + assert(0); + } + + /* + * Combine A terms + */ + switch (softpipe->blend->alpha_func) { + case PIPE_BLEND_ADD: + VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_SUBTRACT: + VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */ + break; + case PIPE_BLEND_MIN: + VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_MAX: + VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ + break; + default: + assert(0); + } +} static void -blend_quad(struct quad_stage *qs, struct quad_header *quad) +colormask_quad(struct quad_stage *qs, + float (*quadColor)[4], + float (*dest)[4]) { - static const float zero[4] = { 0, 0, 0, 0 }; - static const float one[4] = { 1, 1, 1, 1 }; + struct softpipe_context *softpipe = qs->softpipe; + /* R */ + if (!(softpipe->blend->colormask & PIPE_MASK_R)) + COPY_4V(quadColor[0], dest[0]); + + /* G */ + if (!(softpipe->blend->colormask & PIPE_MASK_G)) + COPY_4V(quadColor[1], dest[1]); + + /* B */ + if (!(softpipe->blend->colormask & PIPE_MASK_B)) + COPY_4V(quadColor[2], dest[2]); + + /* A */ + if (!(softpipe->blend->colormask & PIPE_MASK_A)) + COPY_4V(quadColor[3], dest[3]); +} + + +static void +blend_fallback(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ struct softpipe_context *softpipe = qs->softpipe; - uint cbuf; + const struct pipe_blend_state *blend = softpipe->blend; + unsigned cbuf; + + for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) + { + float dest[4][QUAD_SIZE]; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe->cbuf_cache[cbuf], + quads[0]->input.x0, + quads[0]->input.y0); + uint q, i, j; + + for (q = 0; q < nr; q++) { + struct quad_header *quad = quads[q]; + float (*quadColor)[4] = quad->output.color[cbuf]; + const int itx = (quad->input.x0 & (TILE_SIZE-1)); + const int ity = (quad->input.y0 & (TILE_SIZE-1)); + + /* get/swizzle dest colors + */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = itx + (j & 1); + int y = ity + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } + } - if (softpipe->blend->logicop_enable) { - logicop_quad(qs, quad); - return; + + if (blend->logicop_enable) { + logicop_quad( qs, quadColor, dest ); + } + else if (blend->blend_enable) { + blend_quad( qs, quadColor, dest ); + } + + if (blend->colormask != 0xf) + colormask_quad( qs, quadColor, dest ); + + /* Output color values + */ + for (j = 0; j < QUAD_SIZE; j++) { + if (quad->inout.mask & (1 << j)) { + int x = itx + (j & 1); + int y = ity + (j >> 1); + for (i = 0; i < 4; i++) { /* loop over color chans */ + tile->data.color[y][x][i] = quadColor[i][j]; + } + } + } + } } +} - /* loop over colorbuffer outputs */ - for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { - float source[4][QUAD_SIZE], dest[4][QUAD_SIZE]; - struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe, - softpipe->cbuf_cache[cbuf], - quad->input.x0, quad->input.y0); - float (*quadColor)[4] = quad->output.color[cbuf]; - uint i, j; +static void +blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + static const float one[4] = { 1, 1, 1, 1 }; + float one_minus_alpha[QUAD_SIZE]; + float dest[4][QUAD_SIZE]; + float source[4][QUAD_SIZE]; + uint i, j, q; + + struct softpipe_cached_tile *tile + = sp_get_cached_tile(qs->softpipe->cbuf_cache[0], + quads[0]->input.x0, + quads[0]->input.y0); + + for (q = 0; q < nr; q++) { + struct quad_header *quad = quads[q]; + float (*quadColor)[4] = quad->output.color[0]; + const float *alpha = quadColor[3]; + const int itx = (quad->input.x0 & (TILE_SIZE-1)); + const int ity = (quad->input.y0 & (TILE_SIZE-1)); + /* get/swizzle dest colors */ for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); + int x = itx + (j & 1); + int y = ity + (j >> 1); for (i = 0; i < 4; i++) { dest[i][j] = tile->data.color[y][x][i]; } } - /* - * Compute src/first term RGB - */ - switch (softpipe->blend->rgb_src_factor) { - case PIPE_BLENDFACTOR_ONE: - VEC4_COPY(source[0], quadColor[0]); /* R */ - VEC4_COPY(source[1], quadColor[1]); /* G */ - VEC4_COPY(source[2], quadColor[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */ - VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */ - VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - { - const float *alpha = quadColor[3]; - VEC4_MUL(source[0], quadColor[0], alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_DST_COLOR: - VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */ - VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */ - VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */ - break; - case PIPE_BLENDFACTOR_DST_ALPHA: - { - const float *alpha = dest[3]; - VEC4_MUL(source[0], quadColor[0], alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - { - const float *alpha = quadColor[3]; - float diff[4], temp[4]; - VEC4_SUB(diff, one, dest[3]); - VEC4_MIN(temp, alpha, diff); - VEC4_MUL(source[0], quadColor[0], temp); /* R */ - VEC4_MUL(source[1], quadColor[1], temp); /* G */ - VEC4_MUL(source[2], quadColor[2], temp); /* B */ - } - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - { - float comp[4]; - VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ - VEC4_MUL(source[0], quadColor[0], comp); /* R */ - VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ - VEC4_MUL(source[1], quadColor[1], comp); /* G */ - VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ - VEC4_MUL(source[2], quadColor[2], comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_CONST_ALPHA: - { - float alpha[4]; - VEC4_SCALAR(alpha, softpipe->blend_color.color[3]); - VEC4_MUL(source[0], quadColor[0], alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_SRC1_COLOR: - assert(0); /* to do */ - break; - case PIPE_BLENDFACTOR_SRC1_ALPHA: - assert(0); /* to do */ - break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(source[0], zero); /* R */ - VEC4_COPY(source[1], zero); /* G */ - VEC4_COPY(source[2], zero); /* B */ - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ - VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ - VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ - VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ - VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ - VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - { - float inv_alpha[4]; - VEC4_SUB(inv_alpha, one, quadColor[3]); - VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_alpha[4]; - VEC4_SUB(inv_alpha, one, dest[3]); - VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[0]); /* R */ - VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ - VEC4_SUB(inv_comp, one, dest[1]); /* G */ - VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ - VEC4_SUB(inv_comp, one, dest[2]); /* B */ - VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - { - float inv_comp[4]; - /* R */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); - VEC4_MUL(source[0], quadColor[0], inv_comp); - /* G */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); - VEC4_MUL(source[1], quadColor[1], inv_comp); - /* B */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); - VEC4_MUL(source[2], quadColor[2], inv_comp); - } - break; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - { - float inv_alpha[4]; - VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + VEC4_MUL(source[3], quadColor[3], alpha); /* A */ + + VEC4_SUB(one_minus_alpha, one, alpha); + VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ + VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ + VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ + VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* B */ + + VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */ + VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */ + VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */ + VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */ + + for (j = 0; j < QUAD_SIZE; j++) { + if (quad->inout.mask & (1 << j)) { + int x = itx + (j & 1); + int y = ity + (j >> 1); + for (i = 0; i < 4; i++) { /* loop over color chans */ + tile->data.color[y][x][i] = quadColor[i][j]; + } } - break; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - assert(0); /* to do */ - break; - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - assert(0); /* to do */ - break; - default: - assert(0); } + } +} - /* - * Compute src/first term A - */ - switch (softpipe->blend->alpha_src_factor) { - case PIPE_BLENDFACTOR_ONE: - VEC4_COPY(source[3], quadColor[3]); /* A */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_SRC_ALPHA: - { - const float *alpha = quadColor[3]; - VEC4_MUL(source[3], quadColor[3], alpha); /* A */ - } - break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - /* multiply alpha by 1.0 */ - VEC4_COPY(source[3], quadColor[3]); /* A */ - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_CONST_ALPHA: - { - float comp[4]; - VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ - VEC4_MUL(source[3], quadColor[3], comp); /* A */ - } - break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(source[3], zero); /* A */ - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - { - float inv_alpha[4]; - VEC4_SUB(inv_alpha, one, quadColor[3]); - VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_alpha[4]; - VEC4_SUB(inv_alpha, one, dest[3]); - VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ - } - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - { - float inv_comp[4]; - /* A */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(source[3], quadColor[3], inv_comp); +static void +blend_single_add_one_one(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + float dest[4][QUAD_SIZE]; + uint i, j, q; + + struct softpipe_cached_tile *tile + = sp_get_cached_tile(qs->softpipe->cbuf_cache[0], + quads[0]->input.x0, + quads[0]->input.y0); + + for (q = 0; q < nr; q++) { + struct quad_header *quad = quads[q]; + float (*quadColor)[4] = quad->output.color[0]; + const int itx = (quad->input.x0 & (TILE_SIZE-1)); + const int ity = (quad->input.y0 & (TILE_SIZE-1)); + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = itx + (j & 1); + int y = ity + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; } - break; - default: - assert(0); } + + VEC4_ADD_SAT(quadColor[0], quadColor[0], dest[0]); /* R */ + VEC4_ADD_SAT(quadColor[1], quadColor[1], dest[1]); /* G */ + VEC4_ADD_SAT(quadColor[2], quadColor[2], dest[2]); /* B */ + VEC4_ADD_SAT(quadColor[3], quadColor[3], dest[3]); /* A */ - - /* - * Compute dest/second term RGB - */ - switch (softpipe->blend->rgb_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - /* dest = dest * 1 NO-OP, leave dest as-is */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */ - VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */ - VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */ - VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */ - VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */ - break; - case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ - VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ - VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ - break; - case PIPE_BLENDFACTOR_DST_COLOR: - VEC4_MUL(dest[0], dest[0], dest[0]); /* R */ - VEC4_MUL(dest[1], dest[1], dest[1]); /* G */ - VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - assert(0); /* illegal */ - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - { - float comp[4]; - VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ - VEC4_MUL(dest[0], dest[0], comp); /* R */ - VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ - VEC4_MUL(dest[1], dest[1], comp); /* G */ - VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ - VEC4_MUL(dest[2], dest[2], comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_CONST_ALPHA: - { - float comp[4]; - VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ - VEC4_MUL(dest[0], dest[0], comp); /* R */ - VEC4_MUL(dest[1], dest[1], comp); /* G */ - VEC4_MUL(dest[2], dest[2], comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(dest[0], zero); /* R */ - VEC4_COPY(dest[1], zero); /* G */ - VEC4_COPY(dest[2], zero); /* B */ - break; - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - /* XXX what are these? */ - assert(0); - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ - VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ - VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ - VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ - VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ - VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - { - float one_minus_alpha[QUAD_SIZE]; - VEC4_SUB(one_minus_alpha, one, quadColor[3]); - VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ - VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ - VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[3]); /* A */ - VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ - VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ - VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[0]); /* R */ - VEC4_MUL(dest[0], dest[0], inv_comp); /* R */ - VEC4_SUB(inv_comp, one, dest[1]); /* G */ - VEC4_MUL(dest[1], dest[1], inv_comp); /* G */ - VEC4_SUB(inv_comp, one, dest[2]); /* B */ - VEC4_MUL(dest[2], dest[2], inv_comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - { - float inv_comp[4]; - /* R */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); - VEC4_MUL(dest[0], dest[0], inv_comp); - /* G */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); - VEC4_MUL(dest[1], dest[1], inv_comp); - /* B */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); - VEC4_MUL(dest[2], dest[2], inv_comp); - } - break; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - { - float inv_comp[4]; - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(dest[0], dest[0], inv_comp); - VEC4_MUL(dest[1], dest[1], inv_comp); - VEC4_MUL(dest[2], dest[2], inv_comp); + for (j = 0; j < QUAD_SIZE; j++) { + if (quad->inout.mask & (1 << j)) { + int x = itx + (j & 1); + int y = ity + (j >> 1); + for (i = 0; i < 4; i++) { /* loop over color chans */ + tile->data.color[y][x][i] = quadColor[i][j]; + } } - break; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - /* XXX what are these? */ - assert(0); - break; - default: - assert(0); } + } +} - /* - * Compute dest/second term A - */ - switch (softpipe->blend->alpha_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - /* dest = dest * 1 NO-OP, leave dest as-is */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_SRC_ALPHA: - VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */ - break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - assert(0); /* illegal */ - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_CONST_ALPHA: - { - float comp[4]; - VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ - VEC4_MUL(dest[3], dest[3], comp); /* A */ - } - break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(dest[3], zero); /* A */ - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - { - float one_minus_alpha[QUAD_SIZE]; - VEC4_SUB(one_minus_alpha, one, quadColor[3]); - VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[3]); /* A */ - VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */ - } - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - { - float inv_comp[4]; - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(dest[3], dest[3], inv_comp); + +static void +single_output_color(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + uint i, j, q; + + struct softpipe_cached_tile *tile + = sp_get_cached_tile(qs->softpipe->cbuf_cache[0], + quads[0]->input.x0, + quads[0]->input.y0); + + for (q = 0; q < nr; q++) { + struct quad_header *quad = quads[q]; + float (*quadColor)[4] = quad->output.color[0]; + const int itx = (quad->input.x0 & (TILE_SIZE-1)); + const int ity = (quad->input.y0 & (TILE_SIZE-1)); + + for (j = 0; j < QUAD_SIZE; j++) { + if (quad->inout.mask & (1 << j)) { + int x = itx + (j & 1); + int y = ity + (j >> 1); + for (i = 0; i < 4; i++) { /* loop over color chans */ + tile->data.color[y][x][i] = quadColor[i][j]; + } } - break; - default: - assert(0); } + } +} + +static void +blend_noop(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ +} - /* - * Combine RGB terms - */ - switch (softpipe->blend->rgb_func) { - case PIPE_BLEND_ADD: - VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */ - VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */ - VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */ - break; - case PIPE_BLEND_SUBTRACT: - VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */ - VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */ - VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */ - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */ - VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */ - VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */ - break; - case PIPE_BLEND_MIN: - VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */ - VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */ - VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */ - break; - case PIPE_BLEND_MAX: - VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */ - VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */ - VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */ - break; - default: - assert(0); - } - /* - * Combine A terms - */ - switch (softpipe->blend->alpha_func) { - case PIPE_BLEND_ADD: - VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */ - break; - case PIPE_BLEND_SUBTRACT: - VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */ - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */ - break; - case PIPE_BLEND_MIN: - VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */ - break; - case PIPE_BLEND_MAX: - VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ - break; - default: - assert(0); +static void +choose_blend_quad(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + struct softpipe_context *softpipe = qs->softpipe; + const struct pipe_blend_state *blend = softpipe->blend; + + qs->run = blend_fallback; + + if (softpipe->framebuffer.nr_cbufs == 0) { + qs->run = blend_noop; + } + else if (!softpipe->blend->logicop_enable && + softpipe->blend->colormask == 0xf) + { + if (!blend->blend_enable) { + qs->run = single_output_color; } + else if (blend->rgb_src_factor == blend->alpha_src_factor && + blend->rgb_dst_factor == blend->alpha_dst_factor && + blend->rgb_func == blend->alpha_func && + softpipe->framebuffer.nr_cbufs == 1) + { + if (blend->alpha_func == PIPE_BLEND_ADD) { + if (blend->rgb_src_factor == PIPE_BLENDFACTOR_ONE && + blend->rgb_dst_factor == PIPE_BLENDFACTOR_ONE) { + qs->run = blend_single_add_one_one; + } + else if (blend->rgb_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA && + blend->rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA) + qs->run = blend_single_add_src_alpha_inv_src_alpha; - } /* cbuf loop */ + } + } + } - /* pass blended quad to next stage */ - qs->next->run(qs->next, quad); + qs->run(qs, quads, nr); } static void blend_begin(struct quad_stage *qs) { - qs->next->begin(qs->next); + qs->run = choose_blend_quad; } @@ -770,7 +991,7 @@ struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe ) stage->softpipe = softpipe; stage->begin = blend_begin; - stage->run = blend_quad; + stage->run = choose_blend_quad; stage->destroy = blend_destroy; return stage; diff --git a/src/gallium/drivers/softpipe/sp_quad_bufloop.c b/src/gallium/drivers/softpipe/sp_quad_bufloop.c deleted file mode 100644 index 953d8516b90..00000000000 --- a/src/gallium/drivers/softpipe/sp_quad_bufloop.c +++ /dev/null @@ -1,74 +0,0 @@ - -#include "util/u_memory.h" -#include "sp_context.h" -#include "sp_quad.h" -#include "sp_surface.h" -#include "sp_quad_pipe.h" - - -/** - * Loop over colorbuffers, passing quad to next stage each time. - */ -static void -cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad) -{ - struct softpipe_context *softpipe = qs->softpipe; - float tmp[PIPE_MAX_COLOR_BUFS][4][QUAD_SIZE]; - unsigned i; - - assert(sizeof(quad->outputs.color) == sizeof(tmp)); - assert(softpipe->framebuffer.nr_cbufs <= PIPE_MAX_COLOR_BUFS); - - /* make copy of original colors since they can get modified - * by blending and masking. - * XXX we won't have to do this if the fragment program actually emits - * N separate colors and we're drawing to N color buffers (MRT). - * But if we emitted one color and glDrawBuffer(GL_FRONT_AND_BACK) is - * in effect, we need to save/restore colors like this. - */ - memcpy(tmp, quad->outputs.color, sizeof(tmp)); - - for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) { - /* set current cbuffer */ -#if 0 /* obsolete & going away */ - softpipe->current_cbuf = i; -#endif - - /* pass blended quad to next stage */ - qs->next->run(qs->next, quad); - - /* restore quad's colors for next buffer */ - memcpy(quad->outputs.color, tmp, sizeof(tmp)); - } -} - - -static void cbuf_loop_begin(struct quad_stage *qs) -{ - qs->next->begin(qs->next); -} - - -static void cbuf_loop_destroy(struct quad_stage *qs) -{ - FREE( qs ); -} - - -/** - * Create the colorbuffer loop stage. - * This is used to implement multiple render targets and GL_FRONT_AND_BACK - * rendering. - */ -struct quad_stage *sp_quad_bufloop_stage( struct softpipe_context *softpipe ) -{ - struct quad_stage *stage = CALLOC_STRUCT(quad_stage); - - stage->softpipe = softpipe; - stage->begin = cbuf_loop_begin; - stage->run = cbuf_loop_quad; - stage->destroy = cbuf_loop_destroy; - - return stage; -} - diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c deleted file mode 100644 index dc90e5d5e99..00000000000 --- a/src/gallium/drivers/softpipe/sp_quad_colormask.c +++ /dev/null @@ -1,116 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \brief quad colormask stage - * \author Brian Paul - */ - -#include "pipe/p_defines.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "sp_context.h" -#include "sp_quad.h" -#include "sp_surface.h" -#include "sp_quad_pipe.h" -#include "sp_tile_cache.h" - - - -/** - * XXX colormask could be rolled into blending... - */ -static void -colormask_quad(struct quad_stage *qs, struct quad_header *quad) -{ - struct softpipe_context *softpipe = qs->softpipe; - uint cbuf; - - /* loop over colorbuffer outputs */ - for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { - float dest[4][QUAD_SIZE]; - struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe, - softpipe->cbuf_cache[cbuf], - quad->input.x0, quad->input.y0); - float (*quadColor)[4] = quad->output.color[cbuf]; - uint i, j; - - /* get/swizzle dest colors */ - for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); - for (i = 0; i < 4; i++) { - dest[i][j] = tile->data.color[y][x][i]; - } - } - - /* R */ - if (!(softpipe->blend->colormask & PIPE_MASK_R)) - COPY_4V(quadColor[0], dest[0]); - - /* G */ - if (!(softpipe->blend->colormask & PIPE_MASK_G)) - COPY_4V(quadColor[1], dest[1]); - - /* B */ - if (!(softpipe->blend->colormask & PIPE_MASK_B)) - COPY_4V(quadColor[2], dest[2]); - - /* A */ - if (!(softpipe->blend->colormask & PIPE_MASK_A)) - COPY_4V(quadColor[3], dest[3]); - } - - /* pass quad to next stage */ - qs->next->run(qs->next, quad); -} - - -static void colormask_begin(struct quad_stage *qs) -{ - qs->next->begin(qs->next); -} - - -static void colormask_destroy(struct quad_stage *qs) -{ - FREE( qs ); -} - - -struct quad_stage *sp_quad_colormask_stage( struct softpipe_context *softpipe ) -{ - struct quad_stage *stage = CALLOC_STRUCT(quad_stage); - - stage->softpipe = softpipe; - stage->begin = colormask_begin; - stage->run = colormask_quad; - stage->destroy = colormask_destroy; - - return stage; -} diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c deleted file mode 100644 index 4aeee858705..00000000000 --- a/src/gallium/drivers/softpipe/sp_quad_coverage.c +++ /dev/null @@ -1,94 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * \brief Apply AA coverage to quad alpha valus - * \author Brian Paul - */ - - -#include "pipe/p_defines.h" -#include "util/u_memory.h" -#include "sp_context.h" -#include "sp_quad.h" -#include "sp_quad_pipe.h" - - -/** - * Multiply quad's alpha values by the fragment coverage. - */ -static void -coverage_quad(struct quad_stage *qs, struct quad_header *quad) -{ - struct softpipe_context *softpipe = qs->softpipe; - const uint prim = quad->input.prim; - - if ((softpipe->rasterizer->poly_smooth && prim == QUAD_PRIM_TRI) || - (softpipe->rasterizer->line_smooth && prim == QUAD_PRIM_LINE) || - (softpipe->rasterizer->point_smooth && prim == QUAD_PRIM_POINT)) { - uint cbuf; - - /* loop over colorbuffer outputs */ - for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { - float (*quadColor)[4] = quad->output.color[cbuf]; - unsigned j; - for (j = 0; j < QUAD_SIZE; j++) { - assert(quad->input.coverage[j] >= 0.0); - assert(quad->input.coverage[j] <= 1.0); - quadColor[3][j] *= quad->input.coverage[j]; - } - } - } - - qs->next->run(qs->next, quad); -} - - -static void coverage_begin(struct quad_stage *qs) -{ - qs->next->begin(qs->next); -} - - -static void coverage_destroy(struct quad_stage *qs) -{ - FREE( qs ); -} - - -struct quad_stage *sp_quad_coverage_stage( struct softpipe_context *softpipe ) -{ - struct quad_stage *stage = CALLOC_STRUCT(quad_stage); - - stage->softpipe = softpipe; - stage->begin = coverage_begin; - stage->run = coverage_quad; - stage->destroy = coverage_destroy; - - return stage; -} diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index d463930bae1..0ca86c4e1cb 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -31,61 +31,109 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" +#include "tgsi/tgsi_scan.h" #include "sp_context.h" #include "sp_quad.h" #include "sp_surface.h" #include "sp_quad_pipe.h" #include "sp_tile_cache.h" +#include "sp_state.h" /* for sp_fragment_shader */ -/** - * Do depth testing for a quad. - * Not static since it's used by the stencil code. - */ +struct depth_data { + struct pipe_surface *ps; + enum pipe_format format; + unsigned bzzzz[QUAD_SIZE]; /**< Z values fetched from depth buffer */ + unsigned qzzzz[QUAD_SIZE]; /**< Z values from the quad */ + ubyte stencilVals[QUAD_SIZE]; + struct softpipe_cached_tile *tile; +}; -/* - * To increase efficiency, we should probably have multiple versions - * of this function that are specifically for Z16, Z32 and FP Z buffers. - * Try to effectively do that with codegen... - */ -void -sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) + +static void +get_depth_stencil_values( struct depth_data *data, + const struct quad_header *quad ) { - struct softpipe_context *softpipe = qs->softpipe; - struct pipe_surface *ps = softpipe->framebuffer.zsbuf; - const enum pipe_format format = ps->format; - unsigned bzzzz[QUAD_SIZE]; /**< Z values fetched from depth buffer */ - unsigned qzzzz[QUAD_SIZE]; /**< Z values from the quad */ - unsigned zmask = 0; unsigned j; - struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->input.x0, quad->input.y0); + const struct softpipe_cached_tile *tile = data->tile; + + switch (data->format) { + case PIPE_FORMAT_Z16_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + data->bzzzz[j] = tile->data.depth16[y][x]; + } + break; + case PIPE_FORMAT_Z32_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + data->bzzzz[j] = tile->data.depth32[y][x]; + } + break; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + data->bzzzz[j] = tile->data.depth32[y][x] & 0xffffff; + data->stencilVals[j] = tile->data.depth32[y][x] >> 24; + } + break; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + data->bzzzz[j] = tile->data.depth32[y][x] >> 8; + data->stencilVals[j] = tile->data.depth32[y][x] & 0xff; + } + break; + default: + assert(0); + } +} - assert(ps); /* shouldn't get here if there's no zbuffer */ +/* If the shader has not been run, interpolate the depth values + * ourselves. + */ +static void +interpolate_quad_depth( struct quad_header *quad ) +{ + const float fx = (float) quad->input.x0; + const float fy = (float) quad->input.y0; + const float dzdx = quad->posCoef->dadx[2]; + const float dzdy = quad->posCoef->dady[2]; + const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; - /* - * Convert quad's float depth values to int depth values (qzzzz). + quad->output.depth[0] = z0; + quad->output.depth[1] = z0 + dzdx; + quad->output.depth[2] = z0 + dzdy; + quad->output.depth[3] = z0 + dzdx + dzdy; +} + + +static void +convert_quad_depth( struct depth_data *data, + const struct quad_header *quad ) +{ + unsigned j; + + /* Convert quad's float depth values to int depth values (qzzzz). * If the Z buffer stores integer values, we _have_ to do the depth * compares with integers (not floats). Otherwise, the float->int->float * conversion of Z values (which isn't an identity function) will cause * Z-fighting errors. - * - * Also, get the zbuffer values (bzzzz) from the cached tile. */ - switch (format) { + switch (data->format) { case PIPE_FORMAT_Z16_UNORM: { float scale = 65535.0; for (j = 0; j < QUAD_SIZE; j++) { - qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); - } - - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - bzzzz[j] = tile->data.depth16[y][x]; + data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); } } break; @@ -94,47 +142,247 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) double scale = (double) (uint) ~0UL; for (j = 0; j < QUAD_SIZE; j++) { - qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); - } - - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - bzzzz[j] = tile->data.depth32[y][x]; + data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); } } break; case PIPE_FORMAT_X8Z24_UNORM: - /* fall-through */ case PIPE_FORMAT_S8Z24_UNORM: { float scale = (float) ((1 << 24) - 1); for (j = 0; j < QUAD_SIZE; j++) { - qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); - } - - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - bzzzz[j] = tile->data.depth32[y][x] & 0xffffff; + data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); } } break; case PIPE_FORMAT_Z24X8_UNORM: - /* fall-through */ case PIPE_FORMAT_Z24S8_UNORM: { float scale = (float) ((1 << 24) - 1); for (j = 0; j < QUAD_SIZE; j++) { - qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); + data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); } + } + break; + default: + assert(0); + } +} - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - bzzzz[j] = tile->data.depth32[y][x] >> 8; + + +static void +write_depth_stencil_values( struct depth_data *data, + struct quad_header *quad ) +{ + struct softpipe_cached_tile *tile = data->tile; + unsigned j; + + /* put updated Z values back into cached tile */ + switch (data->format) { + case PIPE_FORMAT_Z16_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.depth16[y][x] = (ushort) data->bzzzz[j]; + } + break; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_Z32_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.depth32[y][x] = data->bzzzz[j]; + } + break; + case PIPE_FORMAT_S8Z24_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.depth32[y][x] = (data->stencilVals[j] << 24) | data->bzzzz[j]; + } + break; + case PIPE_FORMAT_Z24S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.depth32[y][x] = (data->bzzzz[j] << 8) | data->stencilVals[j]; + } + break; + case PIPE_FORMAT_Z24X8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.depth32[y][x] = data->bzzzz[j] << 8; + } + break; + default: + assert(0); + } +} + + + + +/** Only 8-bit stencil supported */ +#define STENCIL_MAX 0xff + + +/** + * Do the basic stencil test (compare stencil buffer values against the + * reference value. + * + * \param data->stencilVals the stencil values from the stencil buffer + * \param func the stencil func (PIPE_FUNC_x) + * \param ref the stencil reference value + * \param valMask the stencil value mask indicating which bits of the stencil + * values and ref value are to be used. + * \return mask indicating which pixels passed the stencil test + */ +static unsigned +do_stencil_test(struct depth_data *data, + unsigned func, + unsigned ref, unsigned valMask) +{ + unsigned passMask = 0x0; + unsigned j; + + ref &= valMask; + + switch (func) { + case PIPE_FUNC_NEVER: + /* passMask = 0x0 */ + break; + case PIPE_FUNC_LESS: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref < (data->stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_EQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref == (data->stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_LEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref <= (data->stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_GREATER: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref > (data->stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_NOTEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref != (data->stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_GEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref >= (data->stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_ALWAYS: + passMask = MASK_ALL; + break; + default: + assert(0); + } + + return passMask; +} + + +/** + * Apply the stencil operator to stencil values. + * + * \param data->stencilVals the stencil buffer values (read and written) + * \param mask indicates which pixels to update + * \param op the stencil operator (PIPE_STENCIL_OP_x) + * \param ref the stencil reference value + * \param wrtMask writemask controlling which bits are changed in the + * stencil values + */ +static void +apply_stencil_op(struct depth_data *data, + unsigned mask, unsigned op, ubyte ref, ubyte wrtMask) +{ + unsigned j; + ubyte newstencil[QUAD_SIZE]; + + for (j = 0; j < QUAD_SIZE; j++) { + newstencil[j] = data->stencilVals[j]; + } + + switch (op) { + case PIPE_STENCIL_OP_KEEP: + /* no-op */ + break; + case PIPE_STENCIL_OP_ZERO: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = 0; + } + } + break; + case PIPE_STENCIL_OP_REPLACE: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = ref; + } + } + break; + case PIPE_STENCIL_OP_INCR: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + if (data->stencilVals[j] < STENCIL_MAX) { + newstencil[j] = data->stencilVals[j] + 1; + } + } + } + break; + case PIPE_STENCIL_OP_DECR: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + if (data->stencilVals[j] > 0) { + newstencil[j] = data->stencilVals[j] - 1; + } + } + } + break; + case PIPE_STENCIL_OP_INCR_WRAP: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = data->stencilVals[j] + 1; + } + } + break; + case PIPE_STENCIL_OP_DECR_WRAP: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = data->stencilVals[j] - 1; + } + } + break; + case PIPE_STENCIL_OP_INVERT: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = ~data->stencilVals[j]; } } break; @@ -142,6 +390,39 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) assert(0); } + /* + * update the stencil values + */ + if (wrtMask != STENCIL_MAX) { + /* apply bit-wise stencil buffer writemask */ + for (j = 0; j < QUAD_SIZE; j++) { + data->stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & data->stencilVals[j]); + } + } + else { + for (j = 0; j < QUAD_SIZE; j++) { + data->stencilVals[j] = newstencil[j]; + } + } +} + + + +/* + * To increase efficiency, we should probably have multiple versions + * of this function that are specifically for Z16, Z32 and FP Z buffers. + * Try to effectively do that with codegen... + */ + +static boolean +depth_test_quad(struct quad_stage *qs, + struct depth_data *data, + struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + unsigned zmask = 0; + unsigned j; + switch (softpipe->depth_stencil->depth.func) { case PIPE_FUNC_NEVER: /* zmask = 0 */ @@ -151,37 +432,37 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) * Like this: quad->mask &= (quad->outputs.depth < zzzz); */ for (j = 0; j < QUAD_SIZE; j++) { - if (qzzzz[j] < bzzzz[j]) + if (data->qzzzz[j] < data->bzzzz[j]) zmask |= 1 << j; } break; case PIPE_FUNC_EQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (qzzzz[j] == bzzzz[j]) + if (data->qzzzz[j] == data->bzzzz[j]) zmask |= 1 << j; } break; case PIPE_FUNC_LEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (qzzzz[j] <= bzzzz[j]) + if (data->qzzzz[j] <= data->bzzzz[j]) zmask |= (1 << j); } break; case PIPE_FUNC_GREATER: for (j = 0; j < QUAD_SIZE; j++) { - if (qzzzz[j] > bzzzz[j]) + if (data->qzzzz[j] > data->bzzzz[j]) zmask |= (1 << j); } break; case PIPE_FUNC_NOTEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (qzzzz[j] != bzzzz[j]) + if (data->qzzzz[j] != data->bzzzz[j]) zmask |= (1 << j); } break; case PIPE_FUNC_GEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (qzzzz[j] >= bzzzz[j]) + if (data->qzzzz[j] >= data->bzzzz[j]) zmask |= (1 << j); } break; @@ -193,80 +474,480 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) } quad->inout.mask &= zmask; + if (quad->inout.mask == 0) + return FALSE; + /* Update our internal copy only if writemask set. Even if + * depth.writemask is FALSE, may still need to write out buffer + * data due to stencil changes. + */ if (softpipe->depth_stencil->depth.writemask) { - - /* This is also efficient with sse / spe instructions: - */ for (j = 0; j < QUAD_SIZE; j++) { - if (quad->inout.mask & (1 << j)) { - bzzzz[j] = qzzzz[j]; - } + if (quad->inout.mask & (1 << j)) { + data->bzzzz[j] = data->qzzzz[j]; + } } + } - /* put updated Z values back into cached tile */ - switch (format) { - case PIPE_FORMAT_Z16_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - tile->data.depth16[y][x] = (ushort) bzzzz[j]; + return TRUE; +} + + + +/** + * Do stencil (and depth) testing. Stenciling depends on the outcome of + * depth testing. + */ +static void +depth_stencil_test_quad(struct quad_stage *qs, + struct depth_data *data, + struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + unsigned func, zFailOp, zPassOp, failOp; + ubyte ref, wrtMask, valMask; + uint face = quad->input.facing; + + if (!softpipe->depth_stencil->stencil[1].enabled) { + /* single-sided stencil test, use front (face=0) state */ + face = 0; + } + + /* choose front or back face function, operator, etc */ + /* XXX we could do these initializations once per primitive */ + func = softpipe->depth_stencil->stencil[face].func; + failOp = softpipe->depth_stencil->stencil[face].fail_op; + zFailOp = softpipe->depth_stencil->stencil[face].zfail_op; + zPassOp = softpipe->depth_stencil->stencil[face].zpass_op; + ref = softpipe->depth_stencil->stencil[face].ref_value; + wrtMask = softpipe->depth_stencil->stencil[face].writemask; + valMask = softpipe->depth_stencil->stencil[face].valuemask; + + + /* do the stencil test first */ + { + unsigned passMask, failMask; + passMask = do_stencil_test(data, func, ref, valMask); + failMask = quad->inout.mask & ~passMask; + quad->inout.mask &= passMask; + + if (failOp != PIPE_STENCIL_OP_KEEP) { + apply_stencil_op(data, failMask, failOp, ref, wrtMask); + } + } + + if (quad->inout.mask) { + /* now the pixels that passed the stencil test are depth tested */ + if (softpipe->depth_stencil->depth.enabled) { + const unsigned origMask = quad->inout.mask; + + depth_test_quad(qs, data, quad); /* quad->mask is updated */ + + /* update stencil buffer values according to z pass/fail result */ + if (zFailOp != PIPE_STENCIL_OP_KEEP) { + const unsigned zFailMask = origMask & ~quad->inout.mask; + apply_stencil_op(data, zFailMask, zFailOp, ref, wrtMask); } - break; - case PIPE_FORMAT_X8Z24_UNORM: - /* fall-through */ - /* (yes, this falls through to a different case than above) */ - case PIPE_FORMAT_Z32_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - tile->data.depth32[y][x] = bzzzz[j]; + + if (zPassOp != PIPE_STENCIL_OP_KEEP) { + const unsigned zPassMask = origMask & quad->inout.mask; + apply_stencil_op(data, zPassMask, zPassOp, ref, wrtMask); } - break; - case PIPE_FORMAT_S8Z24_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - uint s8z24 = tile->data.depth32[y][x]; - s8z24 = (s8z24 & 0xff000000) | bzzzz[j]; - tile->data.depth32[y][x] = s8z24; + } + else { + /* no depth test, apply Zpass operator to stencil buffer values */ + apply_stencil_op(data, quad->inout.mask, zPassOp, ref, wrtMask); + } + } +} + + +#define ALPHATEST( FUNC, COMP ) \ + static int \ + alpha_test_quads_##FUNC( struct quad_stage *qs, \ + struct quad_header *quads[], \ + unsigned nr ) \ + { \ + const float ref = qs->softpipe->depth_stencil->alpha.ref_value; \ + const uint cbuf = 0; /* only output[0].alpha is tested */ \ + unsigned pass_nr = 0; \ + unsigned i; \ + \ + for (i = 0; i < nr; i++) { \ + const float *aaaa = quads[i]->output.color[cbuf][3]; \ + unsigned passMask = 0; \ + \ + if (aaaa[0] COMP ref) passMask |= (1 << 0); \ + if (aaaa[1] COMP ref) passMask |= (1 << 1); \ + if (aaaa[2] COMP ref) passMask |= (1 << 2); \ + if (aaaa[3] COMP ref) passMask |= (1 << 3); \ + \ + quads[i]->inout.mask &= passMask; \ + \ + if (quads[i]->inout.mask) \ + quads[pass_nr++] = quads[i]; \ + } \ + \ + return pass_nr; \ + } + + +ALPHATEST( LESS, < ) +ALPHATEST( EQUAL, == ) +ALPHATEST( LEQUAL, <= ) +ALPHATEST( GREATER, > ) +ALPHATEST( NOTEQUAL, != ) +ALPHATEST( GEQUAL, >= ) + + +/* XXX: Incorporate into shader using KILP. + */ +static int +alpha_test_quads(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + switch (qs->softpipe->depth_stencil->alpha.func) { + case PIPE_FUNC_LESS: + return alpha_test_quads_LESS( qs, quads, nr ); + case PIPE_FUNC_EQUAL: + return alpha_test_quads_EQUAL( qs, quads, nr ); + break; + case PIPE_FUNC_LEQUAL: + return alpha_test_quads_LEQUAL( qs, quads, nr ); + case PIPE_FUNC_GREATER: + return alpha_test_quads_GREATER( qs, quads, nr ); + case PIPE_FUNC_NOTEQUAL: + return alpha_test_quads_NOTEQUAL( qs, quads, nr ); + case PIPE_FUNC_GEQUAL: + return alpha_test_quads_GEQUAL( qs, quads, nr ); + case PIPE_FUNC_ALWAYS: + return nr; + case PIPE_FUNC_NEVER: + default: + return 0; + } +} + +static unsigned mask_count[16] = +{ + 0, /* 0x0 */ + 1, /* 0x1 */ + 1, /* 0x2 */ + 2, /* 0x3 */ + 1, /* 0x4 */ + 2, /* 0x5 */ + 2, /* 0x6 */ + 3, /* 0x7 */ + 1, /* 0x8 */ + 2, /* 0x9 */ + 2, /* 0xa */ + 3, /* 0xb */ + 2, /* 0xc */ + 3, /* 0xd */ + 3, /* 0xe */ + 4, /* 0xf */ +}; + + + +static void +depth_test_quads_fallback(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + unsigned i, pass = 0; + const struct sp_fragment_shader *fs = qs->softpipe->fs; + boolean interp_depth = !fs->info.writes_z; + struct depth_data data; + + + if (qs->softpipe->depth_stencil->alpha.enabled) { + nr = alpha_test_quads(qs, quads, nr); + } + + if (qs->softpipe->framebuffer.zsbuf && + (qs->softpipe->depth_stencil->depth.enabled || + qs->softpipe->depth_stencil->stencil[0].enabled)) { + + data.ps = qs->softpipe->framebuffer.zsbuf; + data.format = data.ps->format; + data.tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, + quads[0]->input.x0, + quads[0]->input.y0); + + for (i = 0; i < nr; i++) { + get_depth_stencil_values(&data, quads[i]); + + if (qs->softpipe->depth_stencil->depth.enabled) { + if (interp_depth) + interpolate_quad_depth(quads[i]); + + convert_quad_depth(&data, quads[i]); } - break; - case PIPE_FORMAT_Z24S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - uint z24s8 = tile->data.depth32[y][x]; - z24s8 = (z24s8 & 0xff) | (bzzzz[j] << 8); - tile->data.depth32[y][x] = z24s8; + + if (qs->softpipe->depth_stencil->stencil[0].enabled) { + depth_stencil_test_quad(qs, &data, quads[i]); + write_depth_stencil_values(&data, quads[i]); } - break; - case PIPE_FORMAT_Z24X8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - tile->data.depth32[y][x] = bzzzz[j] << 8; + else { + if (!depth_test_quad(qs, &data, quads[i])) + continue; + + if (qs->softpipe->depth_stencil->depth.writemask) + write_depth_stencil_values(&data, quads[i]); } - break; - default: - assert(0); + + + quads[pass++] = quads[i]; + } + + nr = pass; + } + + if (qs->softpipe->active_query_count) { + for (i = 0; i < nr; i++) + qs->softpipe->occlusion_count += mask_count[quads[i]->inout.mask]; + } + + if (nr) + qs->next->run(qs->next, quads, nr); +} + +/* XXX: this function assumes setup function actually emits linear + * spans of quads. It seems a lot more natural to do (early) + * depth-testing on spans rather than quads. + */ +static void +depth_interp_z16_less_write(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + unsigned i, pass = 0; + const unsigned ix = quads[0]->input.x0; + const unsigned iy = quads[0]->input.y0; + const float fx = (float) ix; + const float fy = (float) iy; + const float dzdx = quads[0]->posCoef->dadx[2]; + const float dzdy = quads[0]->posCoef->dady[2]; + const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy; + struct softpipe_cached_tile *tile; + ushort (*depth16)[TILE_SIZE]; + ushort idepth[4], depth_step; + const float scale = 65535.0; + + idepth[0] = (ushort)((z0) * scale); + idepth[1] = (ushort)((z0 + dzdx) * scale); + idepth[2] = (ushort)((z0 + dzdy) * scale); + idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale); + + depth_step = (ushort)(dzdx * 2 * scale); + + tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, ix, iy); + + depth16 = (ushort (*)[TILE_SIZE]) + &tile->data.depth16[iy % TILE_SIZE][ix % TILE_SIZE]; + + for (i = 0; i < nr; i++) { + unsigned outmask = quads[i]->inout.mask; + unsigned mask = 0; + + if ((outmask & 1) && idepth[0] < depth16[0][0]) { + depth16[0][0] = idepth[0]; + mask |= (1 << 0); + } + + if ((outmask & 2) && idepth[1] < depth16[0][1]) { + depth16[0][1] = idepth[1]; + mask |= (1 << 1); + } + + if ((outmask & 4) && idepth[2] < depth16[1][0]) { + depth16[1][0] = idepth[2]; + mask |= (1 << 2); + } + + if ((outmask & 8) && idepth[3] < depth16[1][1]) { + depth16[1][1] = idepth[3]; + mask |= (1 << 3); } + + idepth[0] += depth_step; + idepth[1] += depth_step; + idepth[2] += depth_step; + idepth[3] += depth_step; + + depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2]; + + quads[i]->inout.mask = mask; + if (quads[i]->inout.mask) + quads[pass++] = quads[i]; } + + if (pass) + qs->next->run(qs->next, quads, pass); + } static void -depth_test_quad(struct quad_stage *qs, struct quad_header *quad) +depth_interp_z16_lequal_write(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) { - sp_depth_test_quad(qs, quad); + unsigned i, pass = 0; + const unsigned ix = quads[0]->input.x0; + const unsigned iy = quads[0]->input.y0; + const float fx = (float) ix; + const float fy = (float) iy; + const float dzdx = quads[0]->posCoef->dadx[2]; + const float dzdy = quads[0]->posCoef->dady[2]; + const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy; + struct softpipe_cached_tile *tile; + ushort (*depth16)[TILE_SIZE]; + ushort idepth[4], depth_step; + const float scale = 65535.0; + + idepth[0] = (ushort)((z0) * scale); + idepth[1] = (ushort)((z0 + dzdx) * scale); + idepth[2] = (ushort)((z0 + dzdy) * scale); + idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale); + + depth_step = (ushort)(dzdx * 2 * scale); + + tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, ix, iy); + + depth16 = (ushort (*)[TILE_SIZE]) + &tile->data.depth16[iy % TILE_SIZE][ix % TILE_SIZE]; + + for (i = 0; i < nr; i++) { + unsigned outmask = quads[i]->inout.mask; + unsigned mask = 0; + + if ((outmask & 1) && idepth[0] <= depth16[0][0]) { + depth16[0][0] = idepth[0]; + mask |= (1 << 0); + } + + if ((outmask & 2) && idepth[1] <= depth16[0][1]) { + depth16[0][1] = idepth[1]; + mask |= (1 << 1); + } + + if ((outmask & 4) && idepth[2] <= depth16[1][0]) { + depth16[1][0] = idepth[2]; + mask |= (1 << 2); + } + + if ((outmask & 8) && idepth[3] <= depth16[1][1]) { + depth16[1][1] = idepth[3]; + mask |= (1 << 3); + } + + idepth[0] += depth_step; + idepth[1] += depth_step; + idepth[2] += depth_step; + idepth[3] += depth_step; + + depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2]; + + quads[i]->inout.mask = mask; + if (quads[i]->inout.mask) + quads[pass++] = quads[i]; + } + + if (pass) + qs->next->run(qs->next, quads, pass); - if (quad->inout.mask) - qs->next->run(qs->next, quad); } + + + +static void +depth_noop(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + qs->next->run(qs->next, quads, nr); +} + + + +static void +choose_depth_test(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + boolean interp_depth = !qs->softpipe->fs->info.writes_z; + + boolean alpha = qs->softpipe->depth_stencil->alpha.enabled; + + boolean depth = (qs->softpipe->framebuffer.zsbuf && + qs->softpipe->depth_stencil->depth.enabled); + + unsigned depthfunc = qs->softpipe->depth_stencil->depth.func; + + boolean stencil = qs->softpipe->depth_stencil->stencil[0].enabled; + + boolean depthwrite = qs->softpipe->depth_stencil->depth.writemask; + + boolean occlusion = qs->softpipe->active_query_count; + + + if (!alpha && + !depth && + !stencil) { + qs->run = depth_noop; + } + else if (!alpha && + interp_depth && + depth && + depthwrite && + !occlusion && + !stencil) + { + switch (depthfunc) { + case PIPE_FUNC_LESS: + switch (qs->softpipe->framebuffer.zsbuf->format) { + case PIPE_FORMAT_Z16_UNORM: + qs->run = depth_interp_z16_less_write; + break; + default: + qs->run = depth_test_quads_fallback; + break; + } + break; + case PIPE_FUNC_LEQUAL: + switch (qs->softpipe->framebuffer.zsbuf->format) { + case PIPE_FORMAT_Z16_UNORM: + qs->run = depth_interp_z16_lequal_write; + break; + default: + qs->run = depth_test_quads_fallback; + break; + } + break; + default: + qs->run = depth_test_quads_fallback; + } + } + else { + qs->run = depth_test_quads_fallback; + } + + + qs->run( qs, quads, nr ); +} + + + + + static void depth_test_begin(struct quad_stage *qs) { + qs->run = choose_depth_test; qs->next->begin(qs->next); } @@ -283,7 +964,7 @@ struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe ) stage->softpipe = softpipe; stage->begin = depth_test_begin; - stage->run = depth_test_quad; + stage->run = choose_depth_test; stage->destroy = depth_test_destroy; return stage; diff --git a/src/gallium/drivers/softpipe/sp_quad_earlyz.c b/src/gallium/drivers/softpipe/sp_quad_earlyz.c deleted file mode 100644 index 496fd39ed1a..00000000000 --- a/src/gallium/drivers/softpipe/sp_quad_earlyz.c +++ /dev/null @@ -1,88 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \brief Quad early-z testing - */ - -#include "pipe/p_defines.h" -#include "util/u_memory.h" -#include "sp_quad.h" -#include "sp_quad_pipe.h" - - -/** - * All this stage does is compute the quad's Z values (which is normally - * done by the shading stage). - * The next stage will do the actual depth test. - */ -static void -earlyz_quad( - struct quad_stage *qs, - struct quad_header *quad ) -{ - const float fx = (float) quad->input.x0; - const float fy = (float) quad->input.y0; - const float dzdx = quad->posCoef->dadx[2]; - const float dzdy = quad->posCoef->dady[2]; - const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; - - quad->output.depth[0] = z0; - quad->output.depth[1] = z0 + dzdx; - quad->output.depth[2] = z0 + dzdy; - quad->output.depth[3] = z0 + dzdx + dzdy; - - qs->next->run( qs->next, quad ); -} - -static void -earlyz_begin( - struct quad_stage *qs ) -{ - qs->next->begin( qs->next ); -} - -static void -earlyz_destroy( - struct quad_stage *qs ) -{ - FREE( qs ); -} - -struct quad_stage * -sp_quad_earlyz_stage( - struct softpipe_context *softpipe ) -{ - struct quad_stage *stage = CALLOC_STRUCT( quad_stage ); - - stage->softpipe = softpipe; - stage->begin = earlyz_begin; - stage->run = earlyz_quad; - stage->destroy = earlyz_destroy; - - return stage; -} diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 28f8d1a60ea..1e7533d0f9e 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -68,72 +68,69 @@ quad_shade_stage(struct quad_stage *qs) /** * Execute fragment shader for the four fragments in the quad. */ -static void +static INLINE boolean shade_quad(struct quad_stage *qs, struct quad_header *quad) { struct quad_shade_stage *qss = quad_shade_stage( qs ); struct softpipe_context *softpipe = qs->softpipe; struct tgsi_exec_machine *machine = qss->machine; - boolean z_written; - - /* Consts do not require 16 byte alignment. */ - machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT]; - - machine->InterpCoefs = quad->coef; /* run shader */ - quad->inout.mask &= softpipe->fs->run( softpipe->fs, machine, quad ); - - /* store outputs */ - z_written = FALSE; - { - const ubyte *sem_name = softpipe->fs->info.output_semantic_name; - const ubyte *sem_index = softpipe->fs->info.output_semantic_index; - const uint n = qss->stage.softpipe->fs->info.num_outputs; - uint i; - for (i = 0; i < n; i++) { - switch (sem_name[i]) { - case TGSI_SEMANTIC_COLOR: - { - uint cbuf = sem_index[i]; - memcpy(quad->output.color[cbuf], - &machine->Outputs[i].xyzw[0].f[0], - sizeof(quad->output.color[0]) ); - } - break; - case TGSI_SEMANTIC_POSITION: - { - uint j; - for (j = 0; j < 4; j++) { - quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j]; - } - z_written = TRUE; - } - break; - } + return softpipe->fs->run( softpipe->fs, machine, quad ); +} + + + +static void +coverage_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + uint cbuf; + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { + float (*quadColor)[4] = quad->output.color[cbuf]; + unsigned j; + for (j = 0; j < QUAD_SIZE; j++) { + assert(quad->input.coverage[j] >= 0.0); + assert(quad->input.coverage[j] <= 1.0); + quadColor[3][j] *= quad->input.coverage[j]; } } +} + - if (!z_written) { - /* compute Z values now, as in the quad earlyz stage */ - /* XXX we should really only do this if the earlyz stage is not used */ - const float fx = (float) quad->input.x0; - const float fy = (float) quad->input.y0; - const float dzdx = quad->posCoef->dadx[2]; - const float dzdy = quad->posCoef->dady[2]; - const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; - - quad->output.depth[0] = z0; - quad->output.depth[1] = z0 + dzdx; - quad->output.depth[2] = z0 + dzdy; - quad->output.depth[3] = z0 + dzdx + dzdy; - } - /* shader may cull fragments */ - if (quad->inout.mask) { - qs->next->run( qs->next, quad ); +static void +shade_quads(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + struct quad_shade_stage *qss = quad_shade_stage( qs ); + struct softpipe_context *softpipe = qs->softpipe; + struct tgsi_exec_machine *machine = qss->machine; + + unsigned i, pass = 0; + + machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT]; + machine->InterpCoefs = quads[0]->coef; + + for (i = 0; i < nr; i++) { + if (!shade_quad(qs, quads[i])) + continue; + + if (/*do_coverage*/ 0) + coverage_quad( qs, quads[i] ); + + quads[pass++] = quads[i]; } + + if (pass) + qs->next->run(qs->next, quads, pass); } + + + /** @@ -174,7 +171,7 @@ sp_quad_shade_stage( struct softpipe_context *softpipe ) qss->stage.softpipe = softpipe; qss->stage.begin = shade_begin; - qss->stage.run = shade_quad; + qss->stage.run = shade_quads; qss->stage.destroy = shade_destroy; qss->machine = tgsi_exec_machine_create(); diff --git a/src/gallium/drivers/softpipe/sp_quad_occlusion.c b/src/gallium/drivers/softpipe/sp_quad_occlusion.c deleted file mode 100644 index dfa7ff3b1d1..00000000000 --- a/src/gallium/drivers/softpipe/sp_quad_occlusion.c +++ /dev/null @@ -1,85 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * \brief Quad occlusion counter stage - * \author Brian Paul - */ - - -#include "pipe/p_defines.h" -#include "util/u_memory.h" -#include "sp_context.h" -#include "sp_quad.h" -#include "sp_surface.h" -#include "sp_quad_pipe.h" - -static unsigned count_bits( unsigned val ) -{ - unsigned i; - - for (i = 0; val ; val >>= 1) - i += (val & 1); - - return i; -} - -static void -occlusion_count_quad(struct quad_stage *qs, struct quad_header *quad) -{ - struct softpipe_context *softpipe = qs->softpipe; - - softpipe->occlusion_count += count_bits(quad->inout.mask); - - qs->next->run(qs->next, quad); -} - - -static void occlusion_begin(struct quad_stage *qs) -{ - qs->next->begin(qs->next); -} - - -static void occlusion_destroy(struct quad_stage *qs) -{ - FREE( qs ); -} - - -struct quad_stage *sp_quad_occlusion_stage( struct softpipe_context *softpipe ) -{ - struct quad_stage *stage = CALLOC_STRUCT(quad_stage); - - stage->softpipe = softpipe; - stage->begin = occlusion_begin; - stage->run = occlusion_count_quad; - stage->destroy = occlusion_destroy; - - return stage; -} diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c deleted file mode 100644 index 92d5f9f3c1a..00000000000 --- a/src/gallium/drivers/softpipe/sp_quad_output.c +++ /dev/null @@ -1,103 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "util/u_memory.h" -#include "sp_context.h" -#include "sp_quad.h" -#include "sp_surface.h" -#include "sp_quad_pipe.h" -#include "sp_tile_cache.h" - - -/** - * Last step of quad processing: write quad colors to the framebuffer, - * taking mask into account. - */ -static void -output_quad(struct quad_stage *qs, struct quad_header *quad) -{ - /* in-tile pos: */ - const int itx = quad->input.x0 % TILE_SIZE; - const int ity = quad->input.y0 % TILE_SIZE; - - struct softpipe_context *softpipe = qs->softpipe; - uint cbuf; - - /* loop over colorbuffer outputs */ - for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { - struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe, - softpipe->cbuf_cache[cbuf], - quad->input.x0, quad->input.y0); - float (*quadColor)[4] = quad->output.color[cbuf]; - int i, j; - - /* get/swizzle dest colors */ - for (j = 0; j < QUAD_SIZE; j++) { - if (quad->inout.mask & (1 << j)) { - int x = itx + (j & 1); - int y = ity + (j >> 1); - for (i = 0; i < 4; i++) { /* loop over color chans */ - tile->data.color[y][x][i] = quadColor[i][j]; - } - if (0) { - debug_printf("sp write pixel %d,%d: %g, %g, %g\n", - quad->input.x0 + x, - quad->input.y0 + y, - quadColor[0][j], - quadColor[1][j], - quadColor[2][j]); - } - } - } - } -} - - -static void output_begin(struct quad_stage *qs) -{ - assert(qs->next == NULL); -} - - -static void output_destroy(struct quad_stage *qs) -{ - FREE( qs ); -} - - -struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe ) -{ - struct quad_stage *stage = CALLOC_STRUCT(quad_stage); - - stage->softpipe = softpipe; - stage->begin = output_begin; - stage->run = output_quad; - stage->destroy = output_destroy; - - return stage; -} diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c index b5f69b74264..1b5bab4eca6 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.c +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c @@ -31,88 +31,33 @@ #include "pipe/p_shader_tokens.h" static void -sp_push_quad_first( - struct softpipe_context *sp, - struct quad_stage *quad, - uint i ) +sp_push_quad_first( struct softpipe_context *sp, + struct quad_stage *quad ) { - quad->next = sp->quad[i].first; - sp->quad[i].first = quad; + quad->next = sp->quad.first; + sp->quad.first = quad; } -static void -sp_build_depth_stencil( - struct softpipe_context *sp, - uint i ) -{ - if (sp->depth_stencil->stencil[0].enabled || - sp->depth_stencil->stencil[1].enabled) { - sp_push_quad_first( sp, sp->quad[i].stencil_test, i ); - } - else if (sp->depth_stencil->depth.enabled && - sp->framebuffer.zsbuf) { - sp_push_quad_first( sp, sp->quad[i].depth_test, i ); - } -} void sp_build_quad_pipeline(struct softpipe_context *sp) { - uint i; - boolean early_depth_test = - sp->depth_stencil->depth.enabled && - sp->framebuffer.zsbuf && - !sp->depth_stencil->alpha.enabled && - !sp->fs->info.uses_kill && - !sp->fs->info.writes_z; - - /* build up the pipeline in reverse order... */ - for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { - sp->quad[i].first = sp->quad[i].output; - - if (sp->blend->colormask != 0xf) { - sp_push_quad_first( sp, sp->quad[i].colormask, i ); - } + sp->depth_stencil->depth.enabled && + sp->framebuffer.zsbuf && + !sp->depth_stencil->alpha.enabled && + !sp->fs->info.uses_kill && + !sp->fs->info.writes_z; - if (sp->blend->blend_enable || - sp->blend->logicop_enable) { - sp_push_quad_first( sp, sp->quad[i].blend, i ); - } + sp->quad.first = sp->quad.blend; - if (sp->active_query_count) { - sp_push_quad_first( sp, sp->quad[i].occlusion, i ); - } - - if (sp->rasterizer->poly_smooth || - sp->rasterizer->line_smooth || - sp->rasterizer->point_smooth) { - sp_push_quad_first( sp, sp->quad[i].coverage, i ); - } - - if (!early_depth_test) { - sp_build_depth_stencil( sp, i ); - } - - if (sp->depth_stencil->alpha.enabled) { - sp_push_quad_first( sp, sp->quad[i].alpha_test, i ); - } - - /* XXX always enable shader? */ - if (1) { - sp_push_quad_first( sp, sp->quad[i].shade, i ); - } - - if (early_depth_test) { - sp_build_depth_stencil( sp, i ); - sp_push_quad_first( sp, sp->quad[i].earlyz, i ); - } - -#if !USE_DRAW_STAGE_PSTIPPLE - if (sp->rasterizer->poly_stipple_enable) { - sp_push_quad_first( sp, sp->quad[i].polygon_stipple, i ); - } -#endif + if (early_depth_test) { + sp_push_quad_first( sp, sp->quad.shade ); + sp_push_quad_first( sp, sp->quad.depth_test ); + } + else { + sp_push_quad_first( sp, sp->quad.depth_test ); + sp_push_quad_first( sp, sp->quad.shade ); } } diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.h b/src/gallium/drivers/softpipe/sp_quad_pipe.h index 0e40586ffc8..c0aa1348319 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.h +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.h @@ -49,7 +49,7 @@ struct quad_stage { void (*begin)(struct quad_stage *qs); /** the stage action */ - void (*run)(struct quad_stage *qs, struct quad_header *quad); + void (*run)(struct quad_stage *qs, struct quad_header *quad[], unsigned nr); void (*destroy)(struct quad_stage *qs); }; @@ -69,6 +69,4 @@ struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe ); void sp_build_quad_pipeline(struct softpipe_context *sp); -void sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad); - #endif /* SP_QUAD_PIPE_H */ diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c deleted file mode 100644 index 5e9d447737d..00000000000 --- a/src/gallium/drivers/softpipe/sp_quad_stencil.c +++ /dev/null @@ -1,352 +0,0 @@ - -/** - * \brief Quad stencil testing - */ - - -#include "sp_context.h" -#include "sp_quad.h" -#include "sp_surface.h" -#include "sp_tile_cache.h" -#include "sp_quad_pipe.h" -#include "pipe/p_defines.h" -#include "util/u_memory.h" - - -/** Only 8-bit stencil supported */ -#define STENCIL_MAX 0xff - - -/** - * Do the basic stencil test (compare stencil buffer values against the - * reference value. - * - * \param stencilVals the stencil values from the stencil buffer - * \param func the stencil func (PIPE_FUNC_x) - * \param ref the stencil reference value - * \param valMask the stencil value mask indicating which bits of the stencil - * values and ref value are to be used. - * \return mask indicating which pixels passed the stencil test - */ -static unsigned -do_stencil_test(const ubyte stencilVals[QUAD_SIZE], unsigned func, - unsigned ref, unsigned valMask) -{ - unsigned passMask = 0x0; - unsigned j; - - ref &= valMask; - - switch (func) { - case PIPE_FUNC_NEVER: - /* passMask = 0x0 */ - break; - case PIPE_FUNC_LESS: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref < (stencilVals[j] & valMask)) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_EQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref == (stencilVals[j] & valMask)) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_LEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref <= (stencilVals[j] & valMask)) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_GREATER: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref > (stencilVals[j] & valMask)) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_NOTEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref != (stencilVals[j] & valMask)) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_GEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref >= (stencilVals[j] & valMask)) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_ALWAYS: - passMask = MASK_ALL; - break; - default: - assert(0); - } - - return passMask; -} - - -/** - * Apply the stencil operator to stencil values. - * - * \param stencilVals the stencil buffer values (read and written) - * \param mask indicates which pixels to update - * \param op the stencil operator (PIPE_STENCIL_OP_x) - * \param ref the stencil reference value - * \param wrtMask writemask controlling which bits are changed in the - * stencil values - */ -static void -apply_stencil_op(ubyte stencilVals[QUAD_SIZE], - unsigned mask, unsigned op, ubyte ref, ubyte wrtMask) -{ - unsigned j; - ubyte newstencil[QUAD_SIZE]; - - for (j = 0; j < QUAD_SIZE; j++) { - newstencil[j] = stencilVals[j]; - } - - switch (op) { - case PIPE_STENCIL_OP_KEEP: - /* no-op */ - break; - case PIPE_STENCIL_OP_ZERO: - for (j = 0; j < QUAD_SIZE; j++) { - if (mask & (1 << j)) { - newstencil[j] = 0; - } - } - break; - case PIPE_STENCIL_OP_REPLACE: - for (j = 0; j < QUAD_SIZE; j++) { - if (mask & (1 << j)) { - newstencil[j] = ref; - } - } - break; - case PIPE_STENCIL_OP_INCR: - for (j = 0; j < QUAD_SIZE; j++) { - if (mask & (1 << j)) { - if (stencilVals[j] < STENCIL_MAX) { - newstencil[j] = stencilVals[j] + 1; - } - } - } - break; - case PIPE_STENCIL_OP_DECR: - for (j = 0; j < QUAD_SIZE; j++) { - if (mask & (1 << j)) { - if (stencilVals[j] > 0) { - newstencil[j] = stencilVals[j] - 1; - } - } - } - break; - case PIPE_STENCIL_OP_INCR_WRAP: - for (j = 0; j < QUAD_SIZE; j++) { - if (mask & (1 << j)) { - newstencil[j] = stencilVals[j] + 1; - } - } - break; - case PIPE_STENCIL_OP_DECR_WRAP: - for (j = 0; j < QUAD_SIZE; j++) { - if (mask & (1 << j)) { - newstencil[j] = stencilVals[j] - 1; - } - } - break; - case PIPE_STENCIL_OP_INVERT: - for (j = 0; j < QUAD_SIZE; j++) { - if (mask & (1 << j)) { - newstencil[j] = ~stencilVals[j]; - } - } - break; - default: - assert(0); - } - - /* - * update the stencil values - */ - if (wrtMask != STENCIL_MAX) { - /* apply bit-wise stencil buffer writemask */ - for (j = 0; j < QUAD_SIZE; j++) { - stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & stencilVals[j]); - } - } - else { - for (j = 0; j < QUAD_SIZE; j++) { - stencilVals[j] = newstencil[j]; - } - } -} - - -/** - * Do stencil (and depth) testing. Stenciling depends on the outcome of - * depth testing. - */ -static void -stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) -{ - struct softpipe_context *softpipe = qs->softpipe; - struct pipe_surface *ps = softpipe->framebuffer.zsbuf; - unsigned func, zFailOp, zPassOp, failOp; - ubyte ref, wrtMask, valMask; - ubyte stencilVals[QUAD_SIZE]; - struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->input.x0, quad->input.y0); - uint j; - uint face = quad->input.facing; - - if (!softpipe->depth_stencil->stencil[1].enabled) { - /* single-sided stencil test, use front (face=0) state */ - face = 0; - } - - /* choose front or back face function, operator, etc */ - /* XXX we could do these initializations once per primitive */ - func = softpipe->depth_stencil->stencil[face].func; - failOp = softpipe->depth_stencil->stencil[face].fail_op; - zFailOp = softpipe->depth_stencil->stencil[face].zfail_op; - zPassOp = softpipe->depth_stencil->stencil[face].zpass_op; - ref = softpipe->depth_stencil->stencil[face].ref_value; - wrtMask = softpipe->depth_stencil->stencil[face].writemask; - valMask = softpipe->depth_stencil->stencil[face].valuemask; - - assert(ps); /* shouldn't get here if there's no stencil buffer */ - - /* get stencil values from cached tile */ - switch (ps->format) { - case PIPE_FORMAT_S8Z24_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - stencilVals[j] = tile->data.depth32[y][x] >> 24; - } - break; - case PIPE_FORMAT_Z24S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - stencilVals[j] = tile->data.depth32[y][x] & 0xff; - } - break; - case PIPE_FORMAT_S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - stencilVals[j] = tile->data.stencil8[y][x]; - } - break; - default: - assert(0); - } - - /* do the stencil test first */ - { - unsigned passMask, failMask; - passMask = do_stencil_test(stencilVals, func, ref, valMask); - failMask = quad->inout.mask & ~passMask; - quad->inout.mask &= passMask; - - if (failOp != PIPE_STENCIL_OP_KEEP) { - apply_stencil_op(stencilVals, failMask, failOp, ref, wrtMask); - } - } - - if (quad->inout.mask) { - /* now the pixels that passed the stencil test are depth tested */ - if (softpipe->depth_stencil->depth.enabled) { - const unsigned origMask = quad->inout.mask; - - sp_depth_test_quad(qs, quad); /* quad->mask is updated */ - - /* update stencil buffer values according to z pass/fail result */ - if (zFailOp != PIPE_STENCIL_OP_KEEP) { - const unsigned failMask = origMask & ~quad->inout.mask; - apply_stencil_op(stencilVals, failMask, zFailOp, ref, wrtMask); - } - - if (zPassOp != PIPE_STENCIL_OP_KEEP) { - const unsigned passMask = origMask & quad->inout.mask; - apply_stencil_op(stencilVals, passMask, zPassOp, ref, wrtMask); - } - } - else { - /* no depth test, apply Zpass operator to stencil buffer values */ - apply_stencil_op(stencilVals, quad->inout.mask, zPassOp, ref, wrtMask); - } - - } - - /* put new stencil values into cached tile */ - switch (ps->format) { - case PIPE_FORMAT_S8Z24_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - uint s8z24 = tile->data.depth32[y][x]; - s8z24 = (stencilVals[j] << 24) | (s8z24 & 0xffffff); - tile->data.depth32[y][x] = s8z24; - } - break; - case PIPE_FORMAT_Z24S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - uint z24s8 = tile->data.depth32[y][x]; - z24s8 = (z24s8 & 0xffffff00) | stencilVals[j]; - tile->data.depth32[y][x] = z24s8; - } - break; - case PIPE_FORMAT_S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - tile->data.stencil8[y][x] = stencilVals[j]; - } - break; - default: - assert(0); - } - - if (quad->inout.mask) - qs->next->run(qs->next, quad); -} - - -static void stencil_begin(struct quad_stage *qs) -{ - qs->next->begin(qs->next); -} - - -static void stencil_destroy(struct quad_stage *qs) -{ - FREE( qs ); -} - - -struct quad_stage *sp_quad_stencil_test_stage( struct softpipe_context *softpipe ) -{ - struct quad_stage *stage = CALLOC_STRUCT(quad_stage); - - stage->softpipe = softpipe; - stage->begin = stencil_begin; - stage->run = stencil_test_quad; - stage->destroy = stencil_destroy; - - return stage; -} diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c index 07162db7b6e..a0527a596a6 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stipple.c +++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c @@ -14,14 +14,20 @@ * Apply polygon stipple to quads produced by triangle rasterization */ static void -stipple_quad(struct quad_stage *qs, struct quad_header *quad) +stipple_quad(struct quad_stage *qs, struct quad_header *quads[], unsigned nr) { static const uint bit31 = 1 << 31; static const uint bit30 = 1 << 30; + unsigned pass = nr; + + struct softpipe_context *softpipe = qs->softpipe; + unsigned q; + + pass = 0; + + for (q = 0; q < nr; q++) { + struct quad_header *quad = quads[q]; - if (quad->input.prim == QUAD_PRIM_TRI) { - struct softpipe_context *softpipe = qs->softpipe; - /* need to invert Y to index into OpenGL's stipple pattern */ const int col0 = quad->input.x0 % 32; const int y0 = quad->input.y0; const int y1 = y0 + 1; @@ -41,13 +47,11 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad) if ((stipple1 & (bit30 >> col0)) == 0) quad->inout.mask &= ~MASK_BOTTOM_RIGHT; - if (!quad->inout.mask) { - /* all fragments failed stipple test, end of quad pipeline */ - return; - } + if (quad->inout.mask) + quads[pass++] = quad; } - qs->next->run(qs->next, quad); + qs->next->run(qs->next, quads, pass); } diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 6178c4ac7e3..81fb7aa20c6 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -40,7 +40,7 @@ static const char * softpipe_get_vendor(struct pipe_screen *screen) { - return "Tungsten Graphics, Inc."; + return "VMware, Inc."; } @@ -65,8 +65,6 @@ softpipe_get_param(struct pipe_screen *screen, int param) return 1; case PIPE_CAP_GLSL: return 1; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: @@ -84,7 +82,7 @@ softpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: return 13; /* max 4Kx4K */ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 8; /* max 128x128x128 */ + return 9; /* max 256x256x256 */ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 13; /* max 4Kx4K */ case PIPE_CAP_TGSI_CONT_SUPPORTED: @@ -137,10 +135,14 @@ softpipe_is_format_supported( struct pipe_screen *screen, target == PIPE_TEXTURE_CUBE); switch(format) { + case PIPE_FORMAT_L16_UNORM: + case PIPE_FORMAT_YCBCR_REV: + case PIPE_FORMAT_YCBCR: case PIPE_FORMAT_DXT1_RGB: case PIPE_FORMAT_DXT1_RGBA: case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_Z32_FLOAT: return FALSE; default: return TRUE; diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index de3ae3c3696..ade125662ad 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -33,7 +33,6 @@ */ #include "sp_context.h" -#include "sp_prim_setup.h" #include "sp_quad.h" #include "sp_quad_pipe.h" #include "sp_setup.h" @@ -61,87 +60,9 @@ struct edge { int lines; /**< number of lines on this edge */ }; -#if SP_NUM_QUAD_THREADS > 1 -/* Set to 1 if you want other threads to be instantly - * notified of pending jobs. - */ -#define INSTANT_NOTEMPTY_NOTIFY 0 - -struct thread_info -{ - struct setup_context *setup; - uint id; - pipe_thread handle; -}; - -struct quad_job; - -typedef void (* quad_job_routine)( struct setup_context *setup, uint thread, struct quad_job *job ); - -struct quad_job -{ - struct quad_header_input input; - struct quad_header_inout inout; - quad_job_routine routine; -}; - -#define NUM_QUAD_JOBS 64 - -struct quad_job_que -{ - struct quad_job jobs[NUM_QUAD_JOBS]; - uint first; - uint last; - pipe_mutex que_mutex; - pipe_condvar que_notfull_condvar; - pipe_condvar que_notempty_condvar; - uint jobs_added; - uint jobs_done; - pipe_condvar que_done_condvar; -}; - -static void -add_quad_job( struct quad_job_que *que, struct quad_header *quad, quad_job_routine routine ) -{ -#if INSTANT_NOTEMPTY_NOTIFY - boolean empty; -#endif - - /* Wait for empty slot, see if the que is empty. - */ - pipe_mutex_lock( que->que_mutex ); - while ((que->last + 1) % NUM_QUAD_JOBS == que->first) { -#if !INSTANT_NOTEMPTY_NOTIFY - pipe_condvar_broadcast( que->que_notempty_condvar ); -#endif - pipe_condvar_wait( que->que_notfull_condvar, que->que_mutex ); - } -#if INSTANT_NOTEMPTY_NOTIFY - empty = que->last == que->first; -#endif - que->jobs_added++; - pipe_mutex_unlock( que->que_mutex ); +#define MAX_QUADS 16 - /* Submit new job. - */ - que->jobs[que->last].input = quad->input; - que->jobs[que->last].inout = quad->inout; - que->jobs[que->last].routine = routine; - que->last = (que->last + 1) % NUM_QUAD_JOBS; - -#if INSTANT_NOTEMPTY_NOTIFY - /* If the que was empty, notify consumers there's a job to be done. - */ - if (empty) { - pipe_mutex_lock( que->que_mutex ); - pipe_condvar_broadcast( que->que_notempty_condvar ); - pipe_mutex_unlock( que->que_mutex ); - } -#endif -} - -#endif /** * Triangle setup info (derived from draw_stage). @@ -164,22 +85,19 @@ struct setup_context { struct edge emaj; float oneoverarea; + int facing; + + struct quad_header quad[MAX_QUADS]; + struct quad_header *quad_ptrs[MAX_QUADS]; + unsigned count; struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; struct tgsi_interp_coef posCoef; /* For Z, W */ - struct quad_header quad; - -#if SP_NUM_QUAD_THREADS > 1 - struct quad_job_que que; - struct thread_info threads[SP_NUM_QUAD_THREADS]; -#endif struct { int left[2]; /**< [0] = row0, [1] = row1 */ int right[2]; int y; - unsigned y_flags; - unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ } span; #if DEBUG_FRAGS @@ -190,67 +108,6 @@ struct setup_context { unsigned winding; /* which winding to cull */ }; -#if SP_NUM_QUAD_THREADS > 1 - -static PIPE_THREAD_ROUTINE( quad_thread, param ) -{ - struct thread_info *info = (struct thread_info *) param; - struct quad_job_que *que = &info->setup->que; - - for (;;) { - struct quad_job job; - boolean full; - - /* Wait for an available job. - */ - pipe_mutex_lock( que->que_mutex ); - while (que->last == que->first) - pipe_condvar_wait( que->que_notempty_condvar, que->que_mutex ); - - /* See if the que is full. - */ - full = (que->last + 1) % NUM_QUAD_JOBS == que->first; - - /* Take a job and remove it from que. - */ - job = que->jobs[que->first]; - que->first = (que->first + 1) % NUM_QUAD_JOBS; - - /* Notify the producer if the que is not full. - */ - if (full) - pipe_condvar_signal( que->que_notfull_condvar ); - pipe_mutex_unlock( que->que_mutex ); - - job.routine( info->setup, info->id, &job ); - - /* Notify the producer if that's the last finished job. - */ - pipe_mutex_lock( que->que_mutex ); - que->jobs_done++; - if (que->jobs_added == que->jobs_done) - pipe_condvar_signal( que->que_done_condvar ); - pipe_mutex_unlock( que->que_mutex ); - } - - return NULL; -} - -#define WAIT_FOR_COMPLETION(setup) \ - do {\ - pipe_mutex_lock( setup->que.que_mutex );\ - if (!INSTANT_NOTEMPTY_NOTIFY)\ - pipe_condvar_broadcast( setup->que.que_notempty_condvar );\ - while (setup->que.jobs_added != setup->que.jobs_done)\ - pipe_condvar_wait( setup->que.que_done_condvar, setup->que.que_mutex );\ - pipe_mutex_unlock( setup->que.que_mutex );\ - } while (0) - -#else - -#define WAIT_FOR_COMPLETION(setup) ((void) 0) - -#endif @@ -313,98 +170,18 @@ quad_clip( struct setup_context *setup, struct quad_header *quad ) * Emit a quad (pass to next stage) with clipping. */ static INLINE void -clip_emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread ) +clip_emit_quad( struct setup_context *setup, struct quad_header *quad ) { quad_clip( setup, quad ); + if (quad->inout.mask) { struct softpipe_context *sp = setup->softpipe; - sp->quad[thread].first->run( sp->quad[thread].first, quad ); + sp->quad.first->run( sp->quad.first, &quad, 1 ); } } -#if SP_NUM_QUAD_THREADS > 1 - -static void -clip_emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job ) -{ - struct quad_header quad; - - quad.input = job->input; - quad.inout = job->inout; - quad.coef = setup->quad.coef; - quad.posCoef = setup->quad.posCoef; - quad.nr_attrs = setup->quad.nr_attrs; - clip_emit_quad( setup, &quad, thread ); -} - -#define CLIP_EMIT_QUAD(setup) add_quad_job( &setup->que, &setup->quad, clip_emit_quad_job ) - -#else - -#define CLIP_EMIT_QUAD(setup) clip_emit_quad( setup, &setup->quad, 0 ) - -#endif - -/** - * Emit a quad (pass to next stage). No clipping is done. - */ -static INLINE void -emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread ) -{ - struct softpipe_context *sp = setup->softpipe; -#if DEBUG_FRAGS - uint mask = quad->inout.mask; -#endif - -#if DEBUG_FRAGS - if (mask & 1) setup->numFragsEmitted++; - if (mask & 2) setup->numFragsEmitted++; - if (mask & 4) setup->numFragsEmitted++; - if (mask & 8) setup->numFragsEmitted++; -#endif - sp->quad[thread].first->run( sp->quad[thread].first, quad ); -#if DEBUG_FRAGS - mask = quad->inout.mask; - if (mask & 1) setup->numFragsWritten++; - if (mask & 2) setup->numFragsWritten++; - if (mask & 4) setup->numFragsWritten++; - if (mask & 8) setup->numFragsWritten++; -#endif -} - -#if SP_NUM_QUAD_THREADS > 1 -static void -emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job ) -{ - struct quad_header quad; - - quad.input = job->input; - quad.inout = job->inout; - quad.coef = setup->quad.coef; - quad.posCoef = setup->quad.posCoef; - quad.nr_attrs = setup->quad.nr_attrs; - emit_quad( setup, &quad, thread ); -} - -#define EMIT_QUAD(setup,x,y,mask) do {\ - setup->quad.input.x0 = x;\ - setup->quad.input.y0 = y;\ - setup->quad.inout.mask = mask;\ - add_quad_job( &setup->que, &setup->quad, emit_quad_job );\ - } while (0) - -#else - -#define EMIT_QUAD(setup,x,y,mask) do {\ - setup->quad.input.x0 = x;\ - setup->quad.input.y0 = y;\ - setup->quad.inout.mask = mask;\ - emit_quad( setup, &setup->quad, 0 );\ - } while (0) - -#endif /** * Given an X or Y coordinate, return the block/quad coordinate that it @@ -412,7 +189,12 @@ emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job ) */ static INLINE int block( int x ) { - return x & ~1; + return x & ~(2-1); +} + +static INLINE int block_x( int x ) +{ + return x & ~(16-1); } @@ -421,72 +203,63 @@ static INLINE int block( int x ) */ static void flush_spans( struct setup_context *setup ) { + const int step = 16; const int xleft0 = setup->span.left[0]; const int xleft1 = setup->span.left[1]; const int xright0 = setup->span.right[0]; const int xright1 = setup->span.right[1]; - int minleft, maxright; + struct quad_stage *pipe = setup->softpipe->quad.first; + + + int minleft = block_x(MIN2(xleft0, xleft1)); + int maxright = MAX2(xright0, xright1); int x; - switch (setup->span.y_flags) { - case 0x3: - /* both odd and even lines written (both quad rows) */ - minleft = block(MIN2(xleft0, xleft1)); - maxright = block(MAX2(xright0, xright1)); - for (x = minleft; x <= maxright; x += 2) { - /* determine which of the four pixels is inside the span bounds */ - uint mask = 0x0; - if (x >= xleft0 && x < xright0) - mask |= MASK_TOP_LEFT; - if (x >= xleft1 && x < xright1) - mask |= MASK_BOTTOM_LEFT; - if (x+1 >= xleft0 && x+1 < xright0) - mask |= MASK_TOP_RIGHT; - if (x+1 >= xleft1 && x+1 < xright1) - mask |= MASK_BOTTOM_RIGHT; - if (mask) - EMIT_QUAD( setup, x, setup->span.y, mask ); - } - break; - - case 0x1: - /* only even line written (quad top row) */ - minleft = block(xleft0); - maxright = block(xright0); - for (x = minleft; x <= maxright; x += 2) { - uint mask = 0x0; - if (x >= xleft0 && x < xright0) - mask |= MASK_TOP_LEFT; - if (x+1 >= xleft0 && x+1 < xright0) - mask |= MASK_TOP_RIGHT; - if (mask) - EMIT_QUAD( setup, x, setup->span.y, mask ); - } - break; - - case 0x2: - /* only odd line written (quad bottom row) */ - minleft = block(xleft1); - maxright = block(xright1); - for (x = minleft; x <= maxright; x += 2) { - uint mask = 0x0; - if (x >= xleft1 && x < xright1) - mask |= MASK_BOTTOM_LEFT; - if (x+1 >= xleft1 && x+1 < xright1) - mask |= MASK_BOTTOM_RIGHT; - if (mask) - EMIT_QUAD( setup, x, setup->span.y, mask ); - } - break; + for (x = minleft; x < maxright; x += step) { + unsigned skip_left0 = CLAMP(xleft0 - x, 0, step); + unsigned skip_left1 = CLAMP(xleft1 - x, 0, step); + unsigned skip_right0 = CLAMP(x + step - xright0, 0, step); + unsigned skip_right1 = CLAMP(x + step - xright1, 0, step); + unsigned lx = x; + unsigned q = 0; - default: - return; + unsigned skipmask_left0 = (1U << skip_left0) - 1U; + unsigned skipmask_left1 = (1U << skip_left1) - 1U; + + /* These calculations fail when step == 32 and skip_right == 0. + */ + unsigned skipmask_right0 = ~0U << (unsigned)(step - skip_right0); + unsigned skipmask_right1 = ~0U << (unsigned)(step - skip_right1); + + unsigned mask0 = ~skipmask_left0 & ~skipmask_right0; + unsigned mask1 = ~skipmask_left1 & ~skipmask_right1; + + if (mask0 | mask1) { + do { + unsigned quadmask = (mask0 & 3) | ((mask1 & 3) << 2); + if (quadmask) { + setup->quad[q].input.x0 = lx; + setup->quad[q].input.y0 = setup->span.y; + setup->quad[q].input.facing = setup->facing; + setup->quad[q].inout.mask = quadmask; + setup->quad_ptrs[q] = &setup->quad[q]; + q++; + } + mask0 >>= 2; + mask1 >>= 2; + lx += 2; + } while (mask0 | mask1); + + pipe->run( pipe, setup->quad_ptrs, q ); + } } + setup->span.y = 0; - setup->span.y_flags = 0; setup->span.right[0] = 0; setup->span.right[1] = 0; + setup->span.left[0] = 1000000; /* greater than right[0] */ + setup->span.left[1] = 1000000; /* greater than right[1] */ } @@ -496,7 +269,7 @@ static void print_vertex(const struct setup_context *setup, { int i; debug_printf(" Vertex: (%p)\n", v); - for (i = 0; i < setup->quad.nr_attrs; i++) { + for (i = 0; i < setup->quad[0].nr_attrs; i++) { debug_printf(" %d: %f %f %f %f\n", i, v[i][0], v[i][1], v[i][2], v[i][3]); if (util_is_inf_or_nan(v[i][0])) { @@ -601,7 +374,9 @@ static boolean setup_sort_vertices( struct setup_context *setup, * - the GLSL gl_FrontFacing fragment attribute (bool) * - two-sided stencil test */ - setup->quad.input.facing = (det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW); + setup->facing = + ((det > 0.0) ^ + (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW)); return TRUE; } @@ -788,7 +563,7 @@ static void setup_tri_coefficients( struct setup_context *setup ) } if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { - setup->coef[fragSlot].a0[0] = 1.0f - setup->quad.input.facing; + setup->coef[fragSlot].a0[0] = 1.0f - setup->facing; setup->coef[fragSlot].dadx[0] = 0.0; setup->coef[fragSlot].dady[0] = 0.0; } @@ -844,11 +619,10 @@ static void subtriangle( struct setup_context *setup, /* clip top/bottom */ start_y = sy; - finish_y = sy + lines; - if (start_y < miny) start_y = miny; + finish_y = sy + lines; if (finish_y > maxy) finish_y = maxy; @@ -885,7 +659,6 @@ static void subtriangle( struct setup_context *setup, setup->span.left[_y&1] = left; setup->span.right[_y&1] = right; - setup->span.y_flags |= 1<<(_y&1); } } @@ -958,10 +731,9 @@ void setup_tri( struct setup_context *setup, setup_tri_coefficients( setup ); setup_tri_edges( setup ); - setup->quad.input.prim = QUAD_PRIM_TRI; + assert(setup->softpipe->reduced_prim == PIPE_PRIM_TRIANGLES); setup->span.y = 0; - setup->span.y_flags = 0; setup->span.right[0] = 0; setup->span.right[1] = 0; /* setup->span.z_mode = tri_z_mode( setup->ctx ); */ @@ -983,8 +755,6 @@ void setup_tri( struct setup_context *setup, flush_spans( setup ); - WAIT_FOR_COMPLETION(setup); - #if DEBUG_FRAGS printf("Tri: %u frags emitted, %u written\n", setup->numFragsEmitted, @@ -1101,7 +871,7 @@ setup_line_coefficients(struct setup_context *setup, } if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { - setup->coef[fragSlot].a0[0] = 1.0f - setup->quad.input.facing; + setup->coef[fragSlot].a0[0] = 1.0f - setup->facing; setup->coef[fragSlot].dadx[0] = 0.0; setup->coef[fragSlot].dady[0] = 0.0; } @@ -1122,20 +892,20 @@ plot(struct setup_context *setup, int x, int y) const int quadY = y - iy; const int mask = (1 << ix) << (2 * iy); - if (quadX != setup->quad.input.x0 || - quadY != setup->quad.input.y0) + if (quadX != setup->quad[0].input.x0 || + quadY != setup->quad[0].input.y0) { /* flush prev quad, start new quad */ - if (setup->quad.input.x0 != -1) - CLIP_EMIT_QUAD(setup); + if (setup->quad[0].input.x0 != -1) + clip_emit_quad( setup, &setup->quad[0] ); - setup->quad.input.x0 = quadX; - setup->quad.input.y0 = quadY; - setup->quad.inout.mask = 0x0; + setup->quad[0].input.x0 = quadX; + setup->quad[0].input.y0 = quadY; + setup->quad[0].inout.mask = 0x0; } - setup->quad.inout.mask |= mask; + setup->quad[0].inout.mask |= mask; } @@ -1195,17 +965,18 @@ setup_line(struct setup_context *setup, assert(dx >= 0); assert(dy >= 0); + assert(setup->softpipe->reduced_prim == PIPE_PRIM_LINES); + + setup->quad[0].input.x0 = setup->quad[0].input.y0 = -1; + setup->quad[0].inout.mask = 0x0; - setup->quad.input.x0 = setup->quad.input.y0 = -1; - setup->quad.inout.mask = 0x0; - setup->quad.input.prim = QUAD_PRIM_LINE; /* XXX temporary: set coverage to 1.0 so the line appears * if AA mode happens to be enabled. */ - setup->quad.input.coverage[0] = - setup->quad.input.coverage[1] = - setup->quad.input.coverage[2] = - setup->quad.input.coverage[3] = 1.0; + setup->quad[0].input.coverage[0] = + setup->quad[0].input.coverage[1] = + setup->quad[0].input.coverage[2] = + setup->quad[0].input.coverage[3] = 1.0; if (dx > dy) { /*** X-major line ***/ @@ -1249,11 +1020,9 @@ setup_line(struct setup_context *setup, } /* draw final quad */ - if (setup->quad.inout.mask) { - CLIP_EMIT_QUAD(setup); + if (setup->quad[0].inout.mask) { + clip_emit_quad( setup, &setup->quad[0] ); } - - WAIT_FOR_COMPLETION(setup); } @@ -1300,6 +1069,8 @@ setup_point( struct setup_context *setup, if (softpipe->no_rast) return; + assert(setup->softpipe->reduced_prim == PIPE_PRIM_POINTS); + /* For points, all interpolants are constant-valued. * However, for point sprites, we'll need to setup texcoords appropriately. * XXX: which coefficients are the texcoords??? @@ -1346,22 +1117,21 @@ setup_point( struct setup_context *setup, } if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { - setup->coef[fragSlot].a0[0] = 1.0f - setup->quad.input.facing; + setup->coef[fragSlot].a0[0] = 1.0f - setup->facing; setup->coef[fragSlot].dadx[0] = 0.0; setup->coef[fragSlot].dady[0] = 0.0; } } - setup->quad.input.prim = QUAD_PRIM_POINT; if (halfSize <= 0.5 && !round) { /* special case for 1-pixel points */ const int ix = ((int) x) & 1; const int iy = ((int) y) & 1; - setup->quad.input.x0 = (int) x - ix; - setup->quad.input.y0 = (int) y - iy; - setup->quad.inout.mask = (1 << ix) << (2 * iy); - CLIP_EMIT_QUAD(setup); + setup->quad[0].input.x0 = (int) x - ix; + setup->quad[0].input.y0 = (int) y - iy; + setup->quad[0].inout.mask = (1 << ix) << (2 * iy); + clip_emit_quad( setup, &setup->quad[0] ); } else { if (round) { @@ -1381,15 +1151,15 @@ setup_point( struct setup_context *setup, for (ix = ixmin; ix <= ixmax; ix += 2) { float dx, dy, dist2, cover; - setup->quad.inout.mask = 0x0; + setup->quad[0].inout.mask = 0x0; dx = (ix + 0.5f) - x; dy = (iy + 0.5f) - y; dist2 = dx * dx + dy * dy; if (dist2 <= rmax2) { cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); - setup->quad.inout.mask |= MASK_TOP_LEFT; + setup->quad[0].input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); + setup->quad[0].inout.mask |= MASK_TOP_LEFT; } dx = (ix + 1.5f) - x; @@ -1397,8 +1167,8 @@ setup_point( struct setup_context *setup, dist2 = dx * dx + dy * dy; if (dist2 <= rmax2) { cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); - setup->quad.inout.mask |= MASK_TOP_RIGHT; + setup->quad[0].input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); + setup->quad[0].inout.mask |= MASK_TOP_RIGHT; } dx = (ix + 0.5f) - x; @@ -1406,8 +1176,8 @@ setup_point( struct setup_context *setup, dist2 = dx * dx + dy * dy; if (dist2 <= rmax2) { cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); - setup->quad.inout.mask |= MASK_BOTTOM_LEFT; + setup->quad[0].input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); + setup->quad[0].inout.mask |= MASK_BOTTOM_LEFT; } dx = (ix + 1.5f) - x; @@ -1415,14 +1185,14 @@ setup_point( struct setup_context *setup, dist2 = dx * dx + dy * dy; if (dist2 <= rmax2) { cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); - setup->quad.inout.mask |= MASK_BOTTOM_RIGHT; + setup->quad[0].input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); + setup->quad[0].inout.mask |= MASK_BOTTOM_RIGHT; } - if (setup->quad.inout.mask) { - setup->quad.input.x0 = ix; - setup->quad.input.y0 = iy; - CLIP_EMIT_QUAD(setup); + if (setup->quad[0].inout.mask) { + setup->quad[0].input.x0 = ix; + setup->quad[0].input.y0 = iy; + clip_emit_quad( setup, &setup->quad[0] ); } } } @@ -1466,33 +1236,25 @@ setup_point( struct setup_context *setup, mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); } - setup->quad.inout.mask = mask; - setup->quad.input.x0 = ix; - setup->quad.input.y0 = iy; - CLIP_EMIT_QUAD(setup); + setup->quad[0].inout.mask = mask; + setup->quad[0].input.x0 = ix; + setup->quad[0].input.y0 = iy; + clip_emit_quad( setup, &setup->quad[0] ); } } } } - - WAIT_FOR_COMPLETION(setup); } void setup_prepare( struct setup_context *setup ) { struct softpipe_context *sp = setup->softpipe; - unsigned i; if (sp->dirty) { softpipe_update_derived(sp); } - /* Note: nr_attrs is only used for debugging (vertex printing) */ - setup->quad.nr_attrs = draw_num_vs_outputs(sp->draw); - - for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { - sp->quad[i].first->begin( sp->quad[i].first ); - } + sp->quad.first->begin( sp->quad.first ); if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES && sp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && @@ -1520,30 +1282,17 @@ void setup_destroy_context( struct setup_context *setup ) struct setup_context *setup_create_context( struct softpipe_context *softpipe ) { struct setup_context *setup = CALLOC_STRUCT(setup_context); -#if SP_NUM_QUAD_THREADS > 1 - uint i; -#endif + unsigned i; setup->softpipe = softpipe; - setup->quad.coef = setup->coef; - setup->quad.posCoef = &setup->posCoef; - -#if SP_NUM_QUAD_THREADS > 1 - setup->que.first = 0; - setup->que.last = 0; - pipe_mutex_init( setup->que.que_mutex ); - pipe_condvar_init( setup->que.que_notfull_condvar ); - pipe_condvar_init( setup->que.que_notempty_condvar ); - setup->que.jobs_added = 0; - setup->que.jobs_done = 0; - pipe_condvar_init( setup->que.que_done_condvar ); - for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { - setup->threads[i].setup = setup; - setup->threads[i].id = i; - setup->threads[i].handle = pipe_thread_create( quad_thread, &setup->threads[i] ); + for (i = 0; i < MAX_QUADS; i++) { + setup->quad[i].coef = setup->coef; + setup->quad[i].posCoef = &setup->posCoef; } -#endif + + setup->span.left[0] = 1000000; /* greater than right[0] */ + setup->span.left[1] = 1000000; /* greater than right[1] */ return setup; } diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 9776e978e3e..77ee3c1136b 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -87,6 +87,7 @@ struct sp_fragment_shader { struct sp_vertex_shader { struct pipe_shader_state shader; struct draw_vertex_shader *draw_data; + int max_sampler; /* -1 if no samplers */ }; diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c index 384fe559afd..efed082f823 100644 --- a/src/gallium/drivers/softpipe/sp_state_blend.c +++ b/src/gallium/drivers/softpipe/sp_state_blend.c @@ -45,7 +45,7 @@ void softpipe_bind_blend_state( struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); - softpipe->blend = (const struct pipe_blend_state *)blend; + softpipe->blend = (struct pipe_blend_state *)blend; softpipe->dirty |= SP_NEW_BLEND; } @@ -86,7 +86,7 @@ softpipe_bind_depth_stencil_state(struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); - softpipe->depth_stencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; + softpipe->depth_stencil = (struct pipe_depth_stencil_alpha_state *)depth_stencil; softpipe->dirty |= SP_NEW_DEPTH_STENCIL_ALPHA; } diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 75551000c9b..1faeca1c2a3 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -32,7 +32,10 @@ #include "draw/draw_vertex.h" #include "draw/draw_private.h" #include "sp_context.h" +#include "sp_screen.h" #include "sp_state.h" +#include "sp_texture.h" +#include "sp_tex_tile_cache.h" /** @@ -65,24 +68,19 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) const struct sp_fragment_shader *spfs = softpipe->fs; const enum interp_mode colorInterp = softpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; + const uint num = draw_num_vs_outputs(softpipe->draw); uint i; - if (softpipe->vbuf) { - /* if using the post-transform vertex buffer, tell draw_vbuf to - * simply emit the whole post-xform vertex as-is: - */ - struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; - const uint num = draw_num_vs_outputs(softpipe->draw); - uint i; - - /* No longer any need to try and emit draw vertex_header info. - */ - vinfo_vbuf->num_attribs = 0; - for (i = 0; i < num; i++) { - draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); - } - draw_compute_vertex_size(vinfo_vbuf); + /* Tell draw_vbuf to simply emit the whole post-xform vertex + * as-is. No longer any need to try and emit draw vertex_header + * info. + */ + vinfo_vbuf->num_attribs = 0; + for (i = 0; i < num; i++) { + draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); } + draw_compute_vertex_size(vinfo_vbuf); /* * Loop over fragment shader inputs, searching for the matching output @@ -91,6 +89,23 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) vinfo->num_attribs = 0; for (i = 0; i < spfs->info.num_inputs; i++) { int src; + enum interp_mode interp; + + switch (spfs->info.input_interpolate[i]) { + case TGSI_INTERPOLATE_CONSTANT: + interp = INTERP_CONSTANT; + break; + case TGSI_INTERPOLATE_LINEAR: + interp = INTERP_LINEAR; + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + interp = INTERP_PERSPECTIVE; + break; + default: + assert(0); + interp = INTERP_LINEAR; + } + switch (spfs->info.input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: src = draw_find_vs_output(softpipe->draw, @@ -106,7 +121,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) case TGSI_SEMANTIC_FOG: src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_FOG, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); break; case TGSI_SEMANTIC_GENERIC: @@ -114,7 +129,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) /* this includes texcoords and varying vars */ src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_GENERIC, spfs->info.input_semantic_index[i]); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); break; default: @@ -164,11 +179,19 @@ softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe) static void compute_cliprect(struct softpipe_context *sp) { + /* SP_NEW_FRAMEBUFFER + */ uint surfWidth = sp->framebuffer.width; uint surfHeight = sp->framebuffer.height; + /* SP_NEW_RASTERIZER + */ if (sp->rasterizer->scissor) { - /* clip to scissor rect */ + + /* SP_NEW_SCISSOR + * + * clip to scissor rect: + */ sp->cliprect.minx = MAX2(sp->scissor.minx, 0); sp->cliprect.miny = MAX2(sp->scissor.miny, 0); sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth); @@ -184,27 +207,63 @@ compute_cliprect(struct softpipe_context *sp) } +static void +update_tgsi_samplers( struct softpipe_context *softpipe ) +{ + unsigned i; + + softpipe_reset_sampler_varients( softpipe ); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + struct softpipe_tex_tile_cache *tc = softpipe->tex_cache[i]; + if (tc->texture) { + struct softpipe_texture *spt = softpipe_texture(tc->texture); + if (spt->timestamp != tc->timestamp) { + sp_tex_tile_cache_validate_texture( tc ); + /* + _debug_printf("INV %d %d\n", tc->timestamp, spt->timestamp); + */ + tc->timestamp = spt->timestamp; + } + } + } +} + + /* Hopefully this will remain quite simple, otherwise need to pull in * something like the state tracker mechanism. */ void softpipe_update_derived( struct softpipe_context *softpipe ) { + struct softpipe_screen *sp_screen = softpipe_screen(softpipe->pipe.screen); + + /* Check for updated textures. + */ + if (softpipe->tex_timestamp != sp_screen->timestamp) { + softpipe->tex_timestamp = sp_screen->timestamp; + softpipe->dirty |= SP_NEW_TEXTURE; + } + + if (softpipe->dirty & (SP_NEW_SAMPLER | + SP_NEW_TEXTURE | + SP_NEW_FS | + SP_NEW_VS)) + update_tgsi_samplers( softpipe ); + if (softpipe->dirty & (SP_NEW_RASTERIZER | SP_NEW_FS | SP_NEW_VS)) invalidate_vertex_layout( softpipe ); if (softpipe->dirty & (SP_NEW_SCISSOR | - SP_NEW_DEPTH_STENCIL_ALPHA | + SP_NEW_RASTERIZER | SP_NEW_FRAMEBUFFER)) compute_cliprect(softpipe); if (softpipe->dirty & (SP_NEW_BLEND | SP_NEW_DEPTH_STENCIL_ALPHA | SP_NEW_FRAMEBUFFER | - SP_NEW_RASTERIZER | - SP_NEW_FS | - SP_NEW_QUERY)) + SP_NEW_FS)) sp_build_quad_pipeline(softpipe); softpipe->dirty = 0; diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 4330c203935..256faa94b84 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -31,9 +31,8 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" -#include "pipe/internal/p_winsys_screen.h" -#include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" +#include "draw/draw_vs.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_scan.h" #include "tgsi/tgsi_parse.h" @@ -51,12 +50,9 @@ softpipe_create_fs_state(struct pipe_context *pipe, tgsi_dump(templ->tokens, 0); /* codegen */ - state = softpipe_create_fs_llvm( softpipe, templ ); + state = softpipe_create_fs_sse( softpipe, templ ); if (!state) { - state = softpipe_create_fs_sse( softpipe, templ ); - if (!state) { - state = softpipe_create_fs_exec( softpipe, templ ); - } + state = softpipe_create_fs_exec( softpipe, templ ); } assert(state); @@ -111,6 +107,8 @@ softpipe_create_vs_state(struct pipe_context *pipe, if (state->draw_data == NULL) goto fail; + state->max_sampler = state->draw_data->info.file_max[TGSI_FILE_SAMPLER]; + return state; fail: @@ -128,7 +126,7 @@ softpipe_bind_vs_state(struct pipe_context *pipe, void *vs) { struct softpipe_context *softpipe = softpipe_context(pipe); - softpipe->vs = (const struct sp_vertex_shader *)vs; + softpipe->vs = (struct sp_vertex_shader *) vs; draw_bind_vertex_shader(softpipe->draw, (softpipe->vs ? softpipe->vs->draw_data : NULL)); @@ -142,8 +140,7 @@ softpipe_delete_vs_state(struct pipe_context *pipe, void *vs) { struct softpipe_context *softpipe = softpipe_context(pipe); - struct sp_vertex_shader *state = - (struct sp_vertex_shader *)vs; + struct sp_vertex_shader *state = (struct sp_vertex_shader *) vs; draw_delete_vertex_shader(softpipe->draw, state->draw_data); FREE( state ); diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index cb517b02e44..db0b8ab76b1 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -32,21 +32,37 @@ #include "util/u_memory.h" #include "draw/draw_context.h" +#include "draw/draw_context.h" #include "sp_context.h" -#include "sp_context.h" #include "sp_state.h" #include "sp_texture.h" -#include "sp_tile_cache.h" -#include "draw/draw_context.h" +#include "sp_tex_sample.h" +#include "sp_tex_tile_cache.h" +struct sp_sampler { + struct pipe_sampler_state base; + struct sp_sampler_varient *varients; + struct sp_sampler_varient *current; +}; + +static struct sp_sampler *sp_sampler( struct pipe_sampler_state *sampler ) +{ + return (struct sp_sampler *)sampler; +} + void * softpipe_create_sampler_state(struct pipe_context *pipe, const struct pipe_sampler_state *sampler) { - return mem_dup(sampler, sizeof(*sampler)); + struct sp_sampler *sp_sampler = CALLOC_STRUCT(sp_sampler); + + sp_sampler->base = *sampler; + sp_sampler->varients = NULL; + + return (void *)sp_sampler; } @@ -97,7 +113,7 @@ softpipe_set_sampler_textures(struct pipe_context *pipe, struct pipe_texture *tex = i < num ? texture[i] : NULL; pipe_texture_reference(&softpipe->texture[i], tex); - sp_tile_cache_set_texture(pipe, softpipe->tex_cache[i], tex); + sp_tex_tile_cache_set_texture(softpipe->tex_cache[i], tex); } softpipe->num_textures = num; @@ -106,10 +122,111 @@ softpipe_set_sampler_textures(struct pipe_context *pipe, } +/** + * Find/create an sp_sampler_varient object for sampling the given texture, + * sampler and tex unit. + * + * Note that the tex unit is significant. We can't re-use a sampler + * varient for multiple texture units because the sampler varient contains + * the texture object pointer. If the texture object pointer were stored + * somewhere outside the sampler varient, we could re-use samplers for + * multiple texture units. + */ +static struct sp_sampler_varient * +get_sampler_varient( unsigned unit, + struct sp_sampler *sampler, + struct pipe_texture *texture, + unsigned processor ) +{ + struct softpipe_texture *sp_texture = softpipe_texture(texture); + struct sp_sampler_varient *v = NULL; + union sp_sampler_key key; + + /* if this fails, widen the key.unit field and update this assertion */ + assert(PIPE_MAX_SAMPLERS <= 16); + + key.bits.target = sp_texture->base.target; + key.bits.is_pot = sp_texture->pot; + key.bits.processor = processor; + key.bits.unit = unit; + key.bits.pad = 0; + + if (sampler->current && + key.value == sampler->current->key.value) { + v = sampler->current; + } + + if (v == NULL) { + for (v = sampler->varients; v; v = v->next) + if (v->key.value == key.value) + break; + + if (v == NULL) { + v = sp_create_sampler_varient( &sampler->base, key ); + v->next = sampler->varients; + sampler->varients = v; + } + } + + sampler->current = v; + return v; +} + + + + +void +softpipe_reset_sampler_varients(struct softpipe_context *softpipe) +{ + int i; + + /* It's a bit hard to build these samplers ahead of time -- don't + * really know which samplers are going to be used for vertex and + * fragment programs. + */ + for (i = 0; i <= softpipe->vs->max_sampler; i++) { + if (softpipe->sampler[i]) { + softpipe->tgsi.vert_samplers_list[i] = + get_sampler_varient( i, + sp_sampler(softpipe->sampler[i]), + softpipe->texture[i], + TGSI_PROCESSOR_VERTEX ); + + sp_sampler_varient_bind_texture( softpipe->tgsi.vert_samplers_list[i], + softpipe->tex_cache[i], + softpipe->texture[i] ); + } + } + + for (i = 0; i <= softpipe->fs->info.file_max[TGSI_FILE_SAMPLER]; i++) { + if (softpipe->sampler[i]) { + softpipe->tgsi.frag_samplers_list[i] = + get_sampler_varient( i, + sp_sampler(softpipe->sampler[i]), + softpipe->texture[i], + TGSI_PROCESSOR_FRAGMENT ); + + sp_sampler_varient_bind_texture( softpipe->tgsi.frag_samplers_list[i], + softpipe->tex_cache[i], + softpipe->texture[i] ); + } + } +} + + + void softpipe_delete_sampler_state(struct pipe_context *pipe, void *sampler) { + struct sp_sampler *sp_sampler = (struct sp_sampler *)sampler; + struct sp_sampler_varient *v, *tmp; + + for (v = sp_sampler->varients; v; v = tmp) { + tmp = v->next; + sp_sampler_varient_destroy(v); + } + FREE( sampler ); } diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c index 7c06d864a75..c8f55c3cec4 100644 --- a/src/gallium/drivers/softpipe/sp_state_surface.c +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -53,7 +53,7 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, /* check if changing cbuf */ if (sp->framebuffer.cbufs[i] != fb->cbufs[i]) { /* flush old */ - sp_flush_tile_cache(sp, sp->cbuf_cache[i]); + sp_flush_tile_cache(sp->cbuf_cache[i]); /* assign new */ sp->framebuffer.cbufs[i] = fb->cbufs[i]; @@ -68,58 +68,28 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, /* zbuf changing? */ if (sp->framebuffer.zsbuf != fb->zsbuf) { /* flush old */ - sp_flush_tile_cache(sp, sp->zsbuf_cache); + sp_flush_tile_cache(sp->zsbuf_cache); /* assign new */ sp->framebuffer.zsbuf = fb->zsbuf; /* update cache */ sp_tile_cache_set_surface(sp->zsbuf_cache, fb->zsbuf); - } - -#if 0 - /* XXX combined depth/stencil here */ - - /* sbuf changing? */ - if (sp->framebuffer.sbuf != fb->sbuf) { - /* flush old */ - sp_flush_tile_cache(sp, sp->sbuf_cache_sep); - - /* assign new */ - sp->framebuffer.sbuf = fb->sbuf; - - /* update cache */ - if (fb->sbuf != fb->zbuf) { - /* separate stencil buf */ - sp->sbuf_cache = sp->sbuf_cache_sep; - sp_tile_cache_set_surface(sp->sbuf_cache, fb->sbuf); - } - else { - /* combined depth/stencil */ - sp->sbuf_cache = sp->zbuf_cache; - sp_tile_cache_set_surface(sp->sbuf_cache, fb->sbuf); - } - } -#endif - /* Tell draw module how deep the Z/depth buffer is */ - { - int depth_bits; - double mrd; + /* Tell draw module how deep the Z/depth buffer is */ if (sp->framebuffer.zsbuf) { + int depth_bits; + double mrd; depth_bits = pf_get_component_bits(sp->framebuffer.zsbuf->format, PIPE_FORMAT_COMP_Z); + if (depth_bits > 16) { + mrd = 0.0000001; + } + else { + mrd = 0.00002; + } + draw_set_mrd(sp->draw, mrd); } - else { - depth_bits = 0; - } - if (depth_bits > 16) { - mrd = 0.0000001; - } - else { - mrd = 0.00002; - } - draw_set_mrd(sp->draw, mrd); } sp->framebuffer.width = fb->width; diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index a1d3bade27a..c22ee86b66c 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -31,29 +31,33 @@ * * Authors: * Brian Paul + * Keith Whitwell */ -#include "sp_context.h" -#include "sp_quad.h" -#include "sp_surface.h" -#include "sp_texture.h" -#include "sp_tex_sample.h" -#include "sp_tile_cache.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "sp_quad.h" /* only for #define QUAD_* tokens */ +#include "sp_tex_sample.h" +#include "sp_tex_tile_cache.h" /* - * Note, the FRAC macro has to work perfectly. Otherwise you'll sometimes - * see 1-pixel bands of improperly weighted linear-filtered textures. + * Return fractional part of 'f'. Used for computing interpolation weights. + * Need to be careful with negative values. + * Note, if this function isn't perfect you'll sometimes see 1-pixel bands + * of improperly weighted linear-filtered textures. * The tests/texwrap.c demo is a good test. - * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0. - * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x). */ -#define FRAC(f) ((f) - util_ifloor(f)) +static INLINE float +frac(float f) +{ + return f - util_ifloor(f); +} + /** @@ -100,10 +104,16 @@ lerp_3d(float a, float b, float c, /** - * If A is a signed integer, A % B doesn't give the right value for A < 0 - * (in terms of texture repeat). Just casting to unsigned fixes that. + * Compute coord % size for repeat wrap modes. + * Note that if coord is a signed integer, coord % size doesn't give + * the right value for coord < 0 (in terms of texture repeat). Just + * casting to unsigned fixes that. */ -#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B)) +static INLINE int +repeat(int coord, unsigned size) +{ + return (int) ((unsigned) coord % size); +} /** @@ -115,133 +125,153 @@ lerp_3d(float a, float b, float c, * \param icoord returns the integer texcoords * \return integer texture index */ -static INLINE void -nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord[4]) +static void +wrap_nearest_repeat(const float s[4], unsigned size, int icoord[4]) { uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_REPEAT: - /* s limited to [0,1) */ - /* i limited to [0,size-1] */ - for (ch = 0; ch < 4; ch++) { - int i = util_ifloor(s[ch] * size); - icoord[ch] = REMAINDER(i, size); - } - return; - case PIPE_TEX_WRAP_CLAMP: + /* s limited to [0,1) */ + /* i limited to [0,size-1] */ + for (ch = 0; ch < 4; ch++) { + int i = util_ifloor(s[ch] * size); + icoord[ch] = repeat(i, size); + } +} + + +static void +wrap_nearest_clamp(const float s[4], unsigned size, int icoord[4]) +{ + uint ch; + /* s limited to [0,1] */ + /* i limited to [0,size-1] */ + for (ch = 0; ch < 4; ch++) { + if (s[ch] <= 0.0F) + icoord[ch] = 0; + else if (s[ch] >= 1.0F) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(s[ch] * size); + } +} + + +static void +wrap_nearest_clamp_to_edge(const float s[4], unsigned size, int icoord[4]) +{ + uint ch; + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + if (s[ch] < min) + icoord[ch] = 0; + else if (s[ch] > max) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(s[ch] * size); + } +} + + +static void +wrap_nearest_clamp_to_border(const float s[4], unsigned size, int icoord[4]) +{ + uint ch; + /* s limited to [min,max] */ + /* i limited to [-1, size] */ + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + if (s[ch] <= min) + icoord[ch] = -1; + else if (s[ch] >= max) + icoord[ch] = size; + else + icoord[ch] = util_ifloor(s[ch] * size); + } +} + + +static void +wrap_nearest_mirror_repeat(const float s[4], unsigned size, int icoord[4]) +{ + uint ch; + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + const int flr = util_ifloor(s[ch]); + float u; + if (flr & 1) + u = 1.0F - (s[ch] - (float) flr); + else + u = s[ch] - (float) flr; + if (u < min) + icoord[ch] = 0; + else if (u > max) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(u * size); + } +} + + +static void +wrap_nearest_mirror_clamp(const float s[4], unsigned size, int icoord[4]) +{ + uint ch; + for (ch = 0; ch < 4; ch++) { /* s limited to [0,1] */ /* i limited to [0,size-1] */ - for (ch = 0; ch < 4; ch++) { - if (s[ch] <= 0.0F) - icoord[ch] = 0; - else if (s[ch] >= 1.0F) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - if (s[ch] < min) - icoord[ch] = 0; - else if (s[ch] > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - { - /* s limited to [min,max] */ - /* i limited to [-1, size] */ - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - if (s[ch] <= min) - icoord[ch] = -1; - else if (s[ch] >= max) - icoord[ch] = size; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - { - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const int flr = util_ifloor(s[ch]); - float u; - if (flr & 1) - u = 1.0F - (s[ch] - (float) flr); - else - u = s[ch] - (float) flr; - if (u < min) - icoord[ch] = 0; - else if (u > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - for (ch = 0; ch < 4; ch++) { - /* s limited to [0,1] */ - /* i limited to [0,size-1] */ - const float u = fabsf(s[ch]); - if (u <= 0.0F) - icoord[ch] = 0; - else if (u >= 1.0F) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const float u = fabsf(s[ch]); - if (u < min) - icoord[ch] = 0; - else if (u > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const float u = fabsf(s[ch]); - if (u < min) - icoord[ch] = -1; - else if (u > max) - icoord[ch] = size; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - default: - assert(0); + const float u = fabsf(s[ch]); + if (u <= 0.0F) + icoord[ch] = 0; + else if (u >= 1.0F) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(u * size); + } +} + + +static void +wrap_nearest_mirror_clamp_to_edge(const float s[4], unsigned size, + int icoord[4]) +{ + uint ch; + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + const float u = fabsf(s[ch]); + if (u < min) + icoord[ch] = 0; + else if (u > max) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(u * size); + } +} + + +static void +wrap_nearest_mirror_clamp_to_border(const float s[4], unsigned size, + int icoord[4]) +{ + uint ch; + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + const float u = fabsf(s[ch]); + if (u < min) + icoord[ch] = -1; + else if (u > max) + icoord[ch] = size; + else + icoord[ch] = util_ifloor(u * size); } } @@ -256,125 +286,156 @@ nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, * \param w returns blend factor/weight between texture indexes * \param icoord returns the computed integer texture coords */ -static INLINE void -linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, +static void +wrap_linear_repeat(const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) +{ + uint ch; + for (ch = 0; ch < 4; ch++) { + float u = s[ch] * size - 0.5F; + icoord0[ch] = repeat(util_ifloor(u), size); + icoord1[ch] = repeat(icoord0[ch] + 1, size); + w[ch] = frac(u); + } +} + + +static void +wrap_linear_clamp(const float s[4], unsigned size, int icoord0[4], int icoord1[4], float w[4]) { uint ch; + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], 0.0F, 1.0F); + u = u * size - 0.5f; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = frac(u); + } +} - switch (wrapMode) { - case PIPE_TEX_WRAP_REPEAT: - for (ch = 0; ch < 4; ch++) { - float u = s[ch] * size - 0.5F; - icoord0[ch] = REMAINDER(util_ifloor(u), size); - icoord1[ch] = REMAINDER(icoord0[ch] + 1, size); - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.0F, 1.0F); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.0F, 1.0F); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - { - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], min, max); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - } - break;; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - for (ch = 0; ch < 4; ch++) { - const int flr = util_ifloor(s[ch]); - float u; - if (flr & 1) - u = 1.0F - (s[ch] - (float) flr); - else - u = s[ch] - (float) flr; - u = u * size - 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u >= 1.0F) - u = (float) size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u >= 1.0F) - u = (float) size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - { - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u <= min) - u = min * size; - else if (u >= max) - u = max * size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - } - break;; - default: - assert(0); + +static void +wrap_linear_clamp_to_edge(const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) +{ + uint ch; + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], 0.0F, 1.0F); + u = u * size - 0.5f; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord0[ch] < 0) + icoord0[ch] = 0; + if (icoord1[ch] >= (int) size) + icoord1[ch] = size - 1; + w[ch] = frac(u); + } +} + + +static void +wrap_linear_clamp_to_border(const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) +{ + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + uint ch; + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], min, max); + u = u * size - 0.5f; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = frac(u); + } +} + + +static void +wrap_linear_mirror_repeat(const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) +{ + uint ch; + for (ch = 0; ch < 4; ch++) { + const int flr = util_ifloor(s[ch]); + float u; + if (flr & 1) + u = 1.0F - (s[ch] - (float) flr); + else + u = s[ch] - (float) flr; + u = u * size - 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord0[ch] < 0) + icoord0[ch] = 0; + if (icoord1[ch] >= (int) size) + icoord1[ch] = size - 1; + w[ch] = frac(u); + } +} + + +static void +wrap_linear_mirror_clamp(const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) +{ + uint ch; + for (ch = 0; ch < 4; ch++) { + float u = fabsf(s[ch]); + if (u >= 1.0F) + u = (float) size; + else + u *= size; + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = frac(u); + } +} + + +static void +wrap_linear_mirror_clamp_to_edge(const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) +{ + uint ch; + for (ch = 0; ch < 4; ch++) { + float u = fabsf(s[ch]); + if (u >= 1.0F) + u = (float) size; + else + u *= size; + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord0[ch] < 0) + icoord0[ch] = 0; + if (icoord1[ch] >= (int) size) + icoord1[ch] = size - 1; + w[ch] = frac(u); + } +} + + +static void +wrap_linear_mirror_clamp_to_border(const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) +{ + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + uint ch; + for (ch = 0; ch < 4; ch++) { + float u = fabsf(s[ch]); + if (u <= min) + u = min * size; + else if (u >= max) + u = max * size; + else + u *= size; + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = frac(u); } } @@ -383,27 +444,27 @@ linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, * For RECT textures / unnormalized texcoords * Only a subset of wrap modes supported. */ -static INLINE void -nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord[4]) +static void +wrap_nearest_unorm_clamp(const float s[4], unsigned size, int icoord[4]) { uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - int i = util_ifloor(s[ch]); - icoord[ch]= CLAMP(i, 0, (int) size-1); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - /* fall-through */ - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - for (ch = 0; ch < 4; ch++) { - icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) ); - } - return; - default: - assert(0); + for (ch = 0; ch < 4; ch++) { + int i = util_ifloor(s[ch]); + icoord[ch]= CLAMP(i, 0, (int) size-1); + } +} + + +/** + * Handles clamp_to_edge and clamp_to_border: + */ +static void +wrap_nearest_unorm_clamp_to_border(const float s[4], unsigned size, + int icoord[4]) +{ + uint ch; + for (ch = 0; ch < 4; ch++) { + icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) ); } } @@ -412,358 +473,971 @@ nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, * For RECT textures / unnormalized texcoords. * Only a subset of wrap modes supported. */ -static INLINE void -linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord0[4], int icoord1[4], float w[4]) +static void +wrap_linear_unorm_clamp(const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) { uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - /* Not exactly what the spec says, but it matches NVIDIA output */ - float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f); - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - /* fall-through */ - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F); - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord1[ch] > (int) size - 1) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break; - default: - assert(0); + for (ch = 0; ch < 4; ch++) { + /* Not exactly what the spec says, but it matches NVIDIA output */ + float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f); + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = frac(u); } } -static unsigned -choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) +static void +wrap_linear_unorm_clamp_to_border(const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) { - /* - major axis - direction target sc tc ma - ---------- ------------------------------- --- --- --- - +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx - -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx - +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry - -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry - +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz - -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz - */ - const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz); - unsigned face; - float sc, tc, ma; - - if (arx > ary && arx > arz) { - if (rx >= 0.0F) { - face = PIPE_TEX_FACE_POS_X; - sc = -rz; - tc = -ry; - ma = arx; + uint ch; + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F); + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord1[ch] > (int) size - 1) + icoord1[ch] = size - 1; + w[ch] = frac(u); + } +} + + + +/** + * Examine the quad's texture coordinates to compute the partial + * derivatives w.r.t X and Y, then compute lambda (level of detail). + */ +static float +compute_lambda_1d(const struct sp_sampler_varient *samp, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias) +{ + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; + float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]); + float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); + float rho = MAX2(dsdx, dsdy) * texture->width[0]; + float lambda; + + lambda = util_fast_log2(rho); + lambda += lodbias + sampler->lod_bias; + lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); + + return lambda; +} + + +static float +compute_lambda_2d(const struct sp_sampler_varient *samp, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias) +{ + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; + float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]); + float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); + float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]); + float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]); + float maxx = MAX2(dsdx, dsdy) * texture->width[0]; + float maxy = MAX2(dtdx, dtdy) * texture->height[0]; + float rho = MAX2(maxx, maxy); + float lambda; + + lambda = util_fast_log2(rho); + lambda += lodbias + sampler->lod_bias; + lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); + + return lambda; +} + + +static float +compute_lambda_3d(const struct sp_sampler_varient *samp, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias) +{ + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; + float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]); + float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); + float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]); + float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]); + float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]); + float dpdy = fabsf(p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]); + float maxx = MAX2(dsdx, dsdy) * texture->width[0]; + float maxy = MAX2(dtdx, dtdy) * texture->height[0]; + float maxz = MAX2(dpdx, dpdy) * texture->depth[0]; + float rho, lambda; + + rho = MAX2(maxx, maxy); + rho = MAX2(rho, maxz); + + lambda = util_fast_log2(rho); + lambda += lodbias + sampler->lod_bias; + lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); + + return lambda; +} + + +/** + * Compute lambda for a vertex texture sampler. + * Since there aren't derivatives to use, just return the LOD bias. + */ +static float +compute_lambda_vert(const struct sp_sampler_varient *samp, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias) +{ + return lodbias; +} + + + +/** + * Get a texel from a texture, using the texture tile cache. + * + * \param addr the template tex address containing cube, z, face info. + * \param x the x coord of texel within 2D image + * \param y the y coord of texel within 2D image + * \param rgba the quad to put the texel/color into + * + * XXX maybe move this into sp_tex_tile_cache.c and merge with the + * sp_get_cached_tile_tex() function. Also, get 4 texels instead of 1... + */ + + + + +static INLINE const float * +get_texel_2d_no_border(const struct sp_sampler_varient *samp, + union tex_tile_address addr, int x, int y) +{ + const struct softpipe_tex_cached_tile *tile; + + addr.bits.x = x / TILE_SIZE; + addr.bits.y = y / TILE_SIZE; + y %= TILE_SIZE; + x %= TILE_SIZE; + + tile = sp_get_cached_tile_tex(samp->cache, addr); + + return &tile->data.color[y][x][0]; +} + + +static INLINE const float * +get_texel_2d(const struct sp_sampler_varient *samp, + union tex_tile_address addr, int x, int y) +{ + const struct pipe_texture *texture = samp->texture; + unsigned level = addr.bits.level; + + if (x < 0 || x >= (int) texture->width[level] || + y < 0 || y >= (int) texture->height[level]) { + return samp->sampler->border_color; + } + else { + return get_texel_2d_no_border( samp, addr, x, y ); + } +} + + +/* Gather a quad of adjacent texels within a tile: + */ +static INLINE void +get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_varient *samp, + union tex_tile_address addr, + unsigned x, unsigned y, + const float *out[4]) +{ + const struct softpipe_tex_cached_tile *tile; + + addr.bits.x = x / TILE_SIZE; + addr.bits.y = y / TILE_SIZE; + y %= TILE_SIZE; + x %= TILE_SIZE; + + tile = sp_get_cached_tile_tex(samp->cache, addr); + + out[0] = &tile->data.color[y ][x ][0]; + out[1] = &tile->data.color[y ][x+1][0]; + out[2] = &tile->data.color[y+1][x ][0]; + out[3] = &tile->data.color[y+1][x+1][0]; +} + + +/* Gather a quad of potentially non-adjacent texels: + */ +static INLINE void +get_texel_quad_2d_no_border(const struct sp_sampler_varient *samp, + union tex_tile_address addr, + int x0, int y0, + int x1, int y1, + const float *out[4]) +{ + out[0] = get_texel_2d_no_border( samp, addr, x0, y0 ); + out[1] = get_texel_2d_no_border( samp, addr, x1, y0 ); + out[2] = get_texel_2d_no_border( samp, addr, x0, y1 ); + out[3] = get_texel_2d_no_border( samp, addr, x1, y1 ); +} + +/* Can involve a lot of unnecessary checks for border color: + */ +static INLINE void +get_texel_quad_2d(const struct sp_sampler_varient *samp, + union tex_tile_address addr, + int x0, int y0, + int x1, int y1, + const float *out[4]) +{ + out[0] = get_texel_2d( samp, addr, x0, y0 ); + out[1] = get_texel_2d( samp, addr, x1, y0 ); + out[3] = get_texel_2d( samp, addr, x1, y1 ); + out[2] = get_texel_2d( samp, addr, x0, y1 ); +} + + + +/* 3d varients: + */ +static INLINE const float * +get_texel_3d_no_border(const struct sp_sampler_varient *samp, + union tex_tile_address addr, int x, int y, int z) +{ + const struct softpipe_tex_cached_tile *tile; + + addr.bits.x = x / TILE_SIZE; + addr.bits.y = y / TILE_SIZE; + addr.bits.z = z; + y %= TILE_SIZE; + x %= TILE_SIZE; + + tile = sp_get_cached_tile_tex(samp->cache, addr); + + return &tile->data.color[y][x][0]; +} + + +static INLINE const float * +get_texel_3d(const struct sp_sampler_varient *samp, + union tex_tile_address addr, int x, int y, int z) +{ + const struct pipe_texture *texture = samp->texture; + unsigned level = addr.bits.level; + + if (x < 0 || x >= (int) texture->width[level] || + y < 0 || y >= (int) texture->height[level] || + z < 0 || z >= (int) texture->depth[level]) { + return samp->sampler->border_color; + } + else { + return get_texel_3d_no_border( samp, addr, x, y, z ); + } +} + + +/** + * Given the logbase2 of a mipmap's base level size and a mipmap level, + * return the size (in texels) of that mipmap level. + * For example, if level[0].width = 256 then base_pot will be 8. + * If level = 2, then we'll return 64 (the width at level=2). + * Return 1 if level > base_pot. + */ +static INLINE unsigned +pot_level_size(unsigned base_pot, unsigned level) +{ + return (base_pot >= level) ? (1 << (base_pot - level)) : 1; +} + + +/* Some image-filter fastpaths: + */ +static INLINE void +img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + unsigned j; + unsigned level = samp->level; + unsigned xpot = pot_level_size(samp->xpot, level); + unsigned ypot = pot_level_size(samp->ypot, level); + unsigned xmax = (xpot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, xpot) - 1; */ + unsigned ymax = (ypot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, ypot) - 1; */ + union tex_tile_address addr; + + addr.value = 0; + addr.bits.level = samp->level; + + for (j = 0; j < QUAD_SIZE; j++) { + int c; + + float u = s[j] * xpot - 0.5F; + float v = t[j] * ypot - 0.5F; + + int uflr = util_ifloor(u); + int vflr = util_ifloor(v); + + float xw = u - (float)uflr; + float yw = v - (float)vflr; + + int x0 = uflr & (xpot - 1); + int y0 = vflr & (ypot - 1); + + const float *tx[4]; + + /* Can we fetch all four at once: + */ + if (x0 < xmax && y0 < ymax) { + get_texel_quad_2d_no_border_single_tile(samp, addr, x0, y0, tx); } else { - face = PIPE_TEX_FACE_NEG_X; - sc = rz; - tc = -ry; - ma = arx; + unsigned x1 = (x0 + 1) & (xpot - 1); + unsigned y1 = (y0 + 1) & (ypot - 1); + get_texel_quad_2d_no_border(samp, addr, x0, y0, x1, y1, tx); + } + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(xw, yw, + tx[0][c], tx[1][c], + tx[2][c], tx[3][c]); } } - else if (ary > arx && ary > arz) { - if (ry >= 0.0F) { - face = PIPE_TEX_FACE_POS_Y; - sc = rx; - tc = rz; - ma = ary; +} + + +static INLINE void +img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + unsigned j; + unsigned level = samp->level; + unsigned xpot = pot_level_size(samp->xpot, level); + unsigned ypot = pot_level_size(samp->ypot, level); + union tex_tile_address addr; + + addr.value = 0; + addr.bits.level = samp->level; + + for (j = 0; j < QUAD_SIZE; j++) { + int c; + + float u = s[j] * xpot; + float v = t[j] * ypot; + + int uflr = util_ifloor(u); + int vflr = util_ifloor(v); + + int x0 = uflr & (xpot - 1); + int y0 = vflr & (ypot - 1); + + const float *out = get_texel_2d_no_border(samp, addr, x0, y0); + + for (c = 0; c < 4; c++) { + rgba[c][j] = out[c]; } - else { - face = PIPE_TEX_FACE_NEG_Y; - sc = rx; - tc = -rz; - ma = ary; + } +} + + +static INLINE void +img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + unsigned j; + unsigned level = samp->level; + unsigned xpot = pot_level_size(samp->xpot, level); + unsigned ypot = pot_level_size(samp->ypot, level); + union tex_tile_address addr; + + addr.value = 0; + addr.bits.level = samp->level; + + for (j = 0; j < QUAD_SIZE; j++) { + int c; + + float u = s[j] * xpot; + float v = t[j] * ypot; + + int x0, y0; + const float *out; + + x0 = util_ifloor(u); + if (x0 < 0) + x0 = 0; + else if (x0 > xpot - 1) + x0 = xpot - 1; + + y0 = util_ifloor(v); + if (y0 < 0) + y0 = 0; + else if (y0 > ypot - 1) + y0 = ypot - 1; + + out = get_texel_2d_no_border(samp, addr, x0, y0); + + for (c = 0; c < 4; c++) { + rgba[c][j] = out[c]; } } - else { - if (rz > 0.0F) { - face = PIPE_TEX_FACE_POS_Z; - sc = rx; - tc = -ry; - ma = arz; +} + + +static void +img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + unsigned level0, j; + int width; + int x[4]; + union tex_tile_address addr; + + level0 = samp->level; + width = texture->width[level0]; + + assert(width > 0); + + addr.value = 0; + addr.bits.level = samp->level; + + samp->nearest_texcoord_s(s, width, x); + + for (j = 0; j < QUAD_SIZE; j++) { + const float *out = get_texel_2d(samp, addr, x[j], 0); + int c; + for (c = 0; c < 4; c++) { + rgba[c][j] = out[c]; } - else { - face = PIPE_TEX_FACE_NEG_Z; - sc = -rx; - tc = -ry; - ma = arz; + } +} + + +static void +img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + unsigned level0, j; + int width, height; + int x[4], y[4]; + union tex_tile_address addr; + + + level0 = samp->level; + width = texture->width[level0]; + height = texture->height[level0]; + + assert(width > 0); + assert(height > 0); + + addr.value = 0; + addr.bits.level = samp->level; + + samp->nearest_texcoord_s(s, width, x); + samp->nearest_texcoord_t(t, height, y); + + for (j = 0; j < QUAD_SIZE; j++) { + const float *out = get_texel_2d(samp, addr, x[j], y[j]); + int c; + for (c = 0; c < 4; c++) { + rgba[c][j] = out[c]; } } +} - *newS = ( sc / ma + 1.0F ) * 0.5F; - *newT = ( tc / ma + 1.0F ) * 0.5F; - return face; +static INLINE union tex_tile_address +face(union tex_tile_address addr, unsigned face ) +{ + addr.bits.face = face; + return addr; } -/** - * Examine the quad's texture coordinates to compute the partial - * derivatives w.r.t X and Y, then compute lambda (level of detail). - * - * This is only done for fragment shaders, not vertex shaders. - */ -static float -compute_lambda(const struct pipe_texture *tex, - const struct pipe_sampler_state *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) +static void +img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) { - float rho, lambda; + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + const unsigned *faces = samp->faces; /* zero when not cube-mapping */ + unsigned level0, j; + int width, height; + int x[4], y[4]; + union tex_tile_address addr; - assert(sampler->normalized_coords); + level0 = samp->level; + width = texture->width[level0]; + height = texture->height[level0]; - assert(s); - { - float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]; - float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; - dsdx = fabsf(dsdx); - dsdy = fabsf(dsdy); - rho = MAX2(dsdx, dsdy) * tex->width[0]; - } - if (t) { - float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; - float dtdy = t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]; - float max; - dtdx = fabsf(dtdx); - dtdy = fabsf(dtdy); - max = MAX2(dtdx, dtdy) * tex->height[0]; - rho = MAX2(rho, max); + assert(width > 0); + assert(height > 0); + + addr.value = 0; + addr.bits.level = samp->level; + + samp->nearest_texcoord_s(s, width, x); + samp->nearest_texcoord_t(t, height, y); + + for (j = 0; j < QUAD_SIZE; j++) { + const float *out = get_texel_2d(samp, face(addr, faces[j]), x[j], y[j]); + int c; + for (c = 0; c < 4; c++) { + rgba[c][j] = out[c]; + } } - if (p) { - float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]; - float dpdy = p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]; - float max; - dpdx = fabsf(dpdx); - dpdy = fabsf(dpdy); - max = MAX2(dpdx, dpdy) * tex->depth[0]; - rho = MAX2(rho, max); +} + + +static void +img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + unsigned level0, j; + int width, height, depth; + int x[4], y[4], z[4]; + union tex_tile_address addr; + + level0 = samp->level; + width = texture->width[level0]; + height = texture->height[level0]; + depth = texture->depth[level0]; + + assert(width > 0); + assert(height > 0); + assert(depth > 0); + + samp->nearest_texcoord_s(s, width, x); + samp->nearest_texcoord_t(t, height, y); + samp->nearest_texcoord_p(p, depth, z); + + addr.value = 0; + addr.bits.level = samp->level; + + for (j = 0; j < QUAD_SIZE; j++) { + const float *out = get_texel_3d(samp, addr, x[j], y[j], z[j]); + int c; + for (c = 0; c < 4; c++) { + rgba[c][j] = out[c]; + } } +} - lambda = util_fast_log2(rho); - lambda += lodbias + sampler->lod_bias; - lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); - return lambda; +static void +img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + unsigned level0, j; + int width; + int x0[4], x1[4]; + float xw[4]; /* weights */ + union tex_tile_address addr; + + level0 = samp->level; + width = texture->width[level0]; + + assert(width > 0); + + addr.value = 0; + addr.bits.level = samp->level; + + samp->linear_texcoord_s(s, width, x0, x1, xw); + + for (j = 0; j < QUAD_SIZE; j++) { + const float *tx0 = get_texel_2d(samp, addr, x0[j], 0); + const float *tx1 = get_texel_2d(samp, addr, x1[j], 0); + int c; + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp(xw[j], tx0[c], tx1[c]); + } + } } -/** - * Do several things here: - * 1. Compute lambda from the texcoords, if needed - * 2. Determine if we're minifying or magnifying - * 3. If minifying, choose mipmap levels - * 4. Return image filter to use within mipmap images - * \param level0 Returns first mipmap level to sample from - * \param level1 Returns second mipmap level to sample from - * \param levelBlend Returns blend factor between levels, in [0,1] - * \param imgFilter Returns either the min or mag filter, depending on lambda - */ static void -choose_mipmap_levels(const struct pipe_texture *texture, - const struct pipe_sampler_state *sampler, +img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - boolean computeLambda, float lodbias, - unsigned *level0, unsigned *level1, float *levelBlend, - unsigned *imgFilter) -{ - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { - /* no mipmap selection needed */ - *level0 = *level1 = CLAMP((int) sampler->min_lod, - 0, (int) texture->last_level); - - if (sampler->min_img_filter != sampler->mag_img_filter) { - /* non-mipmapped texture, but still need to determine if doing - * minification or magnification. - */ - float lambda = compute_lambda(texture, sampler, s, t, p, lodbias); - if (lambda <= 0.0) { - *imgFilter = sampler->mag_img_filter; - } - else { - *imgFilter = sampler->min_img_filter; - } + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + unsigned level0, j; + int width, height; + int x0[4], y0[4], x1[4], y1[4]; + float xw[4], yw[4]; /* weights */ + union tex_tile_address addr; + + level0 = samp->level; + width = texture->width[level0]; + height = texture->height[level0]; + + assert(width > 0); + assert(height > 0); + + addr.value = 0; + addr.bits.level = samp->level; + + samp->linear_texcoord_s(s, width, x0, x1, xw); + samp->linear_texcoord_t(t, height, y0, y1, yw); + + for (j = 0; j < QUAD_SIZE; j++) { + const float *tx0 = get_texel_2d(samp, addr, x0[j], y0[j]); + const float *tx1 = get_texel_2d(samp, addr, x1[j], y0[j]); + const float *tx2 = get_texel_2d(samp, addr, x0[j], y1[j]); + const float *tx3 = get_texel_2d(samp, addr, x1[j], y1[j]); + int c; + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(xw[j], yw[j], + tx0[c], tx1[c], + tx2[c], tx3[c]); } - else { - *imgFilter = sampler->mag_img_filter; + } +} + + +static void +img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + const unsigned *faces = samp->faces; /* zero when not cube-mapping */ + unsigned level0, j; + int width, height; + int x0[4], y0[4], x1[4], y1[4]; + float xw[4], yw[4]; /* weights */ + union tex_tile_address addr; + + level0 = samp->level; + width = texture->width[level0]; + height = texture->height[level0]; + + assert(width > 0); + assert(height > 0); + + addr.value = 0; + addr.bits.level = samp->level; + + samp->linear_texcoord_s(s, width, x0, x1, xw); + samp->linear_texcoord_t(t, height, y0, y1, yw); + + for (j = 0; j < QUAD_SIZE; j++) { + union tex_tile_address addrj = face(addr, faces[j]); + const float *tx0 = get_texel_2d(samp, addrj, x0[j], y0[j]); + const float *tx1 = get_texel_2d(samp, addrj, x1[j], y0[j]); + const float *tx2 = get_texel_2d(samp, addrj, x0[j], y1[j]); + const float *tx3 = get_texel_2d(samp, addrj, x1[j], y1[j]); + int c; + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(xw[j], yw[j], + tx0[c], tx1[c], + tx2[c], tx3[c]); } } - else { - float lambda; +} - if (computeLambda) - /* fragment shader */ - lambda = compute_lambda(texture, sampler, s, t, p, lodbias); - else - /* vertex shader */ - lambda = lodbias; /* not really a bias, but absolute LOD */ - if (lambda <= 0.0) { /* XXX threshold depends on the filter */ - /* magnifying */ - *imgFilter = sampler->mag_img_filter; - *level0 = *level1 = 0; +static void +img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + unsigned level0, j; + int width, height, depth; + int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4]; + float xw[4], yw[4], zw[4]; /* interpolation weights */ + union tex_tile_address addr; + + level0 = samp->level; + width = texture->width[level0]; + height = texture->height[level0]; + depth = texture->depth[level0]; + + addr.value = 0; + addr.bits.level = level0; + + assert(width > 0); + assert(height > 0); + assert(depth > 0); + + samp->linear_texcoord_s(s, width, x0, x1, xw); + samp->linear_texcoord_t(t, height, y0, y1, yw); + samp->linear_texcoord_p(p, depth, z0, z1, zw); + + for (j = 0; j < QUAD_SIZE; j++) { + int c; + + const float *tx00 = get_texel_3d(samp, addr, x0[j], y0[j], z0[j]); + const float *tx01 = get_texel_3d(samp, addr, x1[j], y0[j], z0[j]); + const float *tx02 = get_texel_3d(samp, addr, x0[j], y1[j], z0[j]); + const float *tx03 = get_texel_3d(samp, addr, x1[j], y1[j], z0[j]); + + const float *tx10 = get_texel_3d(samp, addr, x0[j], y0[j], z1[j]); + const float *tx11 = get_texel_3d(samp, addr, x1[j], y0[j], z1[j]); + const float *tx12 = get_texel_3d(samp, addr, x0[j], y1[j], z1[j]); + const float *tx13 = get_texel_3d(samp, addr, x1[j], y1[j], z1[j]); + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j], + tx00[c], tx01[c], + tx02[c], tx03[c], + tx10[c], tx11[c], + tx12[c], tx13[c]); } - else { - /* minifying */ - *imgFilter = sampler->min_img_filter; - - /* choose mipmap level(s) and compute the blend factor between them */ - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { - /* Nearest mipmap level */ - const int lvl = (int) (lambda + 0.5); - *level0 = - *level1 = CLAMP(lvl, 0, (int) texture->last_level); - } - else { - /* Linear interpolation between mipmap levels */ - const int lvl = (int) lambda; - *level0 = CLAMP(lvl, 0, (int) texture->last_level); - *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level); - *levelBlend = FRAC(lambda); /* blending weight between levels */ + } +} + + +static void +mip_filter_linear(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + int level0; + float lambda; + + lambda = samp->compute_lambda(samp, s, t, p, lodbias); + level0 = (int)lambda; + + if (lambda < 0.0) { + samp->level = 0; + samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + } + else if (level0 >= texture->last_level) { + samp->level = texture->last_level; + samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + } + else { + float levelBlend = lambda - level0; + float rgba0[4][4]; + float rgba1[4][4]; + int c,j; + + samp->level = level0; + samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba0 ); + + samp->level = level0+1; + samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba1 ); + + for (j = 0; j < QUAD_SIZE; j++) { + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]); } } } } -/** - * Get a texel from a texture, using the texture tile cache. - * - * \param face the cube face in 0..5 - * \param level the mipmap level - * \param x the x coord of texel within 2D image - * \param y the y coord of texel within 2D image - * \param z which slice of a 3D texture - * \param rgba the quad to put the texel/color into - * \param j which element of the rgba quad to write to - * - * XXX maybe move this into sp_tile_cache.c and merge with the - * sp_get_cached_tile_tex() function. Also, get 4 texels instead of 1... - */ static void -get_texel(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y, int z, - float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) +mip_filter_nearest(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); - struct softpipe_context *sp = samp->sp; - const uint unit = samp->unit; - const struct pipe_texture *texture = sp->texture[unit]; - const struct pipe_sampler_state *sampler = sp->sampler[unit]; + struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + float lambda; - if (x < 0 || x >= (int) texture->width[level] || - y < 0 || y >= (int) texture->height[level] || - z < 0 || z >= (int) texture->depth[level]) { - rgba[0][j] = sampler->border_color[0]; - rgba[1][j] = sampler->border_color[1]; - rgba[2][j] = sampler->border_color[2]; - rgba[3][j] = sampler->border_color[3]; + lambda = samp->compute_lambda(samp, s, t, p, lodbias); + + if (lambda < 0.0) { + samp->level = 0; + samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba ); } else { - const int tx = x % TILE_SIZE; - const int ty = y % TILE_SIZE; - const struct softpipe_cached_tile *tile - = sp_get_cached_tile_tex(sp, samp->cache, - x, y, z, face, level); - rgba[0][j] = tile->data.color[ty][tx][0]; - rgba[1][j] = tile->data.color[ty][tx][1]; - rgba[2][j] = tile->data.color[ty][tx][2]; - rgba[3][j] = tile->data.color[ty][tx][3]; - if (0) - { - debug_printf("Get texel %f %f %f %f from %s\n", - rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j], - pf_name(texture->format)); - } + samp->level = (int)(lambda + 0.5) ; + samp->level = MIN2(samp->level, (int)texture->last_level); + samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + } + +#if 0 + printf("RGBA %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n", + rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0], + rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1], + rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2], + rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]); +#endif +} + + +static void +mip_filter_none(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + float lambda = samp->compute_lambda(samp, s, t, p, lodbias); + + if (lambda < 0.0) { + samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + } + else { + samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba ); } } + /** - * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' - * When we sampled the depth texture, the depth value was put into all - * RGBA channels. We look at the red channel here. - * \param rgba quad of (depth) texel values - * \param p texture 'P' components for four pixels in quad - * \param j which pixel in the quad to test [0..3] + * Specialized version of mip_filter_linear with hard-wired calls to + * 2d lambda calculation and 2d_linear_repeat_POT img filters. */ -static INLINE void -shadow_compare(const struct pipe_sampler_state *sampler, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const float p[QUAD_SIZE], - uint j) +static void +mip_filter_linear_2d_linear_repeat_POT( + struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) { - int k; - switch (sampler->compare_func) { - case PIPE_FUNC_LESS: - k = p[j] < rgba[0][j]; - break; - case PIPE_FUNC_LEQUAL: - k = p[j] <= rgba[0][j]; - break; - case PIPE_FUNC_GREATER: - k = p[j] > rgba[0][j]; - break; - case PIPE_FUNC_GEQUAL: - k = p[j] >= rgba[0][j]; - break; - case PIPE_FUNC_EQUAL: - k = p[j] == rgba[0][j]; - break; - case PIPE_FUNC_NOTEQUAL: - k = p[j] != rgba[0][j]; - break; - case PIPE_FUNC_ALWAYS: - k = 1; - break; - case PIPE_FUNC_NEVER: - k = 0; - break; - default: - k = 0; - assert(0); - break; + struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + int level0; + float lambda; + + lambda = compute_lambda_2d(samp, s, t, p, lodbias); + level0 = (int)lambda; + + /* Catches both negative and large values of level0: + */ + if ((unsigned)level0 >= texture->last_level) { + if (level0 < 0) + samp->level = 0; + else + samp->level = texture->last_level; + + img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba ); } + else { + float levelBlend = lambda - level0; + float rgba0[4][4]; + float rgba1[4][4]; + int c,j; - /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ - rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k; - rgba[3][j] = 1.0F; + samp->level = level0; + img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba0 ); + + samp->level = level0+1; + img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba1 ); + + for (j = 0; j < QUAD_SIZE; j++) { + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]); + } + } + } } + /** - * As above, but do four z/texture comparisons. + * Do shadow/depth comparisons. */ -static INLINE void -shadow_compare4(const struct pipe_sampler_state *sampler, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const float p[QUAD_SIZE]) +static void +sample_compare(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) { + struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct pipe_sampler_state *sampler = samp->sampler; int j, k0, k1, k2, k3; float val; + samp->mip_filter( tgsi_sampler, s, t, p, lodbias, rgba ); + + /** + * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' + * When we sampled the depth texture, the depth value was put into all + * RGBA channels. We look at the red channel here. + */ + /* compare four texcoords vs. four texture samples */ switch (sampler->compare_func) { case PIPE_FUNC_LESS: @@ -826,470 +1500,392 @@ shadow_compare4(const struct pipe_sampler_state *sampler, /** - * Common code for sampling 1D/2D/cube textures. - * Could probably extend for 3D... + * Compute which cube face is referenced by each texcoord and put that + * info into the sampler faces[] array. Then sample the cube faces */ static void -sp_get_samples_2d_common(const struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - boolean computeLambda, - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const unsigned faces[4]) -{ - const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); - const struct softpipe_context *sp = samp->sp; - const uint unit = samp->unit; - const struct pipe_texture *texture = sp->texture[unit]; - const struct pipe_sampler_state *sampler = sp->sampler[unit]; - unsigned level0, level1, j, imgFilter; - int width, height; - float levelBlend; - - choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - assert(sampler->normalized_coords); - - width = texture->width[level0]; - height = texture->height[level0]; - - assert(width > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4]; - nearest_texcoord_4(sampler->wrap_s, s, width, x); - nearest_texcoord_4(sampler->wrap_t, t, height, y); - - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare(sampler, rgba, p, j); - } +sample_cube(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + unsigned j; + float ssss[4], tttt[4]; - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - unsigned c; - x[j] /= 2; - y[j] /= 2; - get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0, - rgba2, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ - shadow_compare(sampler, rgba2, p, j); - } - - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } + /* + major axis + direction target sc tc ma + ---------- ------------------------------- --- --- --- + +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx + -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx + +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry + -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry + +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz + -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz + */ + for (j = 0; j < QUAD_SIZE; j++) { + float rx = s[j]; + float ry = t[j]; + float rz = p[j]; + const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz); + unsigned face; + float sc, tc, ma; + + if (arx >= ary && arx >= arz) { + if (rx >= 0.0F) { + face = PIPE_TEX_FACE_POS_X; + sc = -rz; + tc = -ry; + ma = arx; + } + else { + face = PIPE_TEX_FACE_NEG_X; + sc = rz; + tc = -ry; + ma = arx; } } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: + else if (ary >= arx && ary >= arz) { + if (ry >= 0.0F) { + face = PIPE_TEX_FACE_POS_Y; + sc = rx; + tc = rz; + ma = ary; + } + else { + face = PIPE_TEX_FACE_NEG_Y; + sc = rx; + tc = -rz; + ma = ary; + } + } + else { + if (rz > 0.0F) { + face = PIPE_TEX_FACE_POS_Z; + sc = rx; + tc = -ry; + ma = arz; + } + else { + face = PIPE_TEX_FACE_NEG_Z; + sc = -rx; + tc = -ry; + ma = arz; + } + } + { - int x0[4], y0[4], x1[4], y1[4]; - float xw[4], yw[4]; /* weights */ - - linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); - - for (j = 0; j < QUAD_SIZE; j++) { - float tx[4][4]; /* texels */ - int c; - get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare4(sampler, tx, p); - } + const float ima = 1.0 / ma; + ssss[j] = ( sc * ima + 1.0F ) * 0.5F; + tttt[j] = ( tc * ima + 1.0F ) * 0.5F; + samp->faces[j] = face; + } + } - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], - tx[c][2], tx[c][3]); - } + /* In our little pipeline, the compare stage is next. If compare + * is not active, this will point somewhere deeper into the + * pipeline, eg. to mip_filter or even img_filter. + */ + samp->compare(tgsi_sampler, ssss, tttt, NULL, lodbias, rgba); +} - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - x0[j] /= 2; - y0[j] /= 2; - x1[j] /= 2; - y1[j] /= 2; - get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ - shadow_compare4(sampler, tx, p); - } - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba2[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], tx[c][2], tx[c][3]); - } - - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; + + +static wrap_nearest_func +get_nearest_unorm_wrap(unsigned mode) +{ + switch (mode) { + case PIPE_TEX_WRAP_CLAMP: + return wrap_nearest_unorm_clamp; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return wrap_nearest_unorm_clamp_to_border; default: assert(0); + return wrap_nearest_unorm_clamp; } } -static INLINE void -sp_get_samples_1d(const struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - boolean computeLambda, - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) +static wrap_nearest_func +get_nearest_wrap(unsigned mode) { - static const unsigned faces[4] = {0, 0, 0, 0}; - static const float tzero[4] = {0, 0, 0, 0}; - sp_get_samples_2d_common(sampler, s, tzero, NULL, - computeLambda, lodbias, rgba, faces); + switch (mode) { + case PIPE_TEX_WRAP_REPEAT: + return wrap_nearest_repeat; + case PIPE_TEX_WRAP_CLAMP: + return wrap_nearest_clamp; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return wrap_nearest_clamp_to_edge; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return wrap_nearest_clamp_to_border; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return wrap_nearest_mirror_repeat; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + return wrap_nearest_mirror_clamp; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return wrap_nearest_mirror_clamp_to_edge; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return wrap_nearest_mirror_clamp_to_border; + default: + assert(0); + return wrap_nearest_repeat; + } } -static INLINE void -sp_get_samples_2d(const struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - boolean computeLambda, - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) +static wrap_linear_func +get_linear_unorm_wrap(unsigned mode) { - static const unsigned faces[4] = {0, 0, 0, 0}; - sp_get_samples_2d_common(sampler, s, t, p, - computeLambda, lodbias, rgba, faces); + switch (mode) { + case PIPE_TEX_WRAP_CLAMP: + return wrap_linear_unorm_clamp; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return wrap_linear_unorm_clamp_to_border; + default: + assert(0); + return wrap_linear_unorm_clamp; + } } -static INLINE void -sp_get_samples_3d(const struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - boolean computeLambda, - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) +static wrap_linear_func +get_linear_wrap(unsigned mode) { - const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); - const struct softpipe_context *sp = samp->sp; - const uint unit = samp->unit; - const struct pipe_texture *texture = sp->texture[unit]; - const struct pipe_sampler_state *sampler = sp->sampler[unit]; - /* get/map pipe_surfaces corresponding to 3D tex slices */ - unsigned level0, level1, j, imgFilter; - int width, height, depth; - float levelBlend; - const uint face = 0; - - choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - assert(sampler->normalized_coords); - - width = texture->width[level0]; - height = texture->height[level0]; - depth = texture->depth[level0]; - - assert(width > 0); - assert(height > 0); - assert(depth > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4], z[4]; - nearest_texcoord_4(sampler->wrap_s, s, width, x); - nearest_texcoord_4(sampler->wrap_t, t, height, y); - nearest_texcoord_4(sampler->wrap_r, p, depth, z); - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j); - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - unsigned c; - x[j] /= 2; - y[j] /= 2; - z[j] /= 2; - get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j); - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]); - } - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4]; - float xw[4], yw[4], zw[4]; /* interpolation weights */ - linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); - linear_texcoord_4(sampler->wrap_r, p, depth, z0, z1, zw); - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - float tx0[4][4], tx1[4][4]; - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3); - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3); - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j], - tx0[c][0], tx0[c][1], - tx0[c][2], tx0[c][3], - tx1[c][0], tx1[c][1], - tx1[c][2], tx1[c][3]); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - x0[j] /= 2; - y0[j] /= 2; - z0[j] /= 2; - x1[j] /= 2; - y1[j] /= 2; - z1[j] /= 2; - get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0); - get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1); - get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2); - get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3); - get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0); - get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1); - get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2); - get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3); - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j], - tx0[c][0], tx0[c][1], - tx0[c][2], tx0[c][3], - tx1[c][0], tx1[c][1], - tx1[c][2], tx1[c][3]); - } - - /* blend mipmap levels */ - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; + switch (mode) { + case PIPE_TEX_WRAP_REPEAT: + return wrap_linear_repeat; + case PIPE_TEX_WRAP_CLAMP: + return wrap_linear_clamp; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return wrap_linear_clamp_to_edge; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return wrap_linear_clamp_to_border; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return wrap_linear_mirror_repeat; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + return wrap_linear_mirror_clamp; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return wrap_linear_mirror_clamp_to_edge; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return wrap_linear_mirror_clamp_to_border; default: assert(0); + return wrap_linear_repeat; } } -static void -sp_get_samples_cube(const struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - boolean computeLambda, - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) +static compute_lambda_func +get_lambda_func(const union sp_sampler_key key) { - unsigned faces[QUAD_SIZE], j; - float ssss[4], tttt[4]; - for (j = 0; j < QUAD_SIZE; j++) { - faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j); + if (key.bits.processor == TGSI_PROCESSOR_VERTEX) + return compute_lambda_vert; + + switch (key.bits.target) { + case PIPE_TEXTURE_1D: + return compute_lambda_1d; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_CUBE: + return compute_lambda_2d; + case PIPE_TEXTURE_3D: + return compute_lambda_3d; + default: + assert(0); + return compute_lambda_1d; } - sp_get_samples_2d_common(sampler, ssss, tttt, NULL, - computeLambda, lodbias, rgba, faces); } -static void -sp_get_samples_rect(const struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - boolean computeLambda, - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); - const struct softpipe_context *sp = samp->sp; - const uint unit = samp->unit; - const struct pipe_texture *texture = sp->texture[unit]; - const struct pipe_sampler_state *sampler = sp->sampler[unit]; - const uint face = 0; - unsigned level0, level1, j, imgFilter; - int width, height; - float levelBlend; - - choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - /* texture RECTS cannot be mipmapped */ - assert(level0 == level1); - - width = texture->width[level0]; - height = texture->height[level0]; - - assert(width > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4]; - nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x); - nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y); - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare(sampler, rgba, p, j); - } - } - } +static filter_func +get_img_filter(const union sp_sampler_key key, + unsigned filter, + const struct pipe_sampler_state *sampler) +{ + switch (key.bits.target) { + case PIPE_TEXTURE_1D: + if (filter == PIPE_TEX_FILTER_NEAREST) + return img_filter_1d_nearest; + else + return img_filter_1d_linear; break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: + case PIPE_TEXTURE_2D: + /* Try for fast path: + */ + if (key.bits.is_pot && + sampler->wrap_s == sampler->wrap_t && + sampler->normalized_coords) { - int x0[4], y0[4], x1[4], y1[4]; - float xw[4], yw[4]; /* weights */ - linear_texcoord_unnorm_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw); - for (j = 0; j < QUAD_SIZE; j++) { - float tx[4][4]; /* texels */ - int c; - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare4(sampler, tx, p); + switch (sampler->wrap_s) { + case PIPE_TEX_WRAP_REPEAT: + switch (filter) { + case PIPE_TEX_FILTER_NEAREST: + return img_filter_2d_nearest_repeat_POT; + case PIPE_TEX_FILTER_LINEAR: + return img_filter_2d_linear_repeat_POT; + default: + break; } - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], tx[c][2], tx[c][3]); + break; + case PIPE_TEX_WRAP_CLAMP: + switch (filter) { + case PIPE_TEX_FILTER_NEAREST: + return img_filter_2d_nearest_clamp_POT; + default: + break; } } } + /* Otherwise use default versions: + */ + if (filter == PIPE_TEX_FILTER_NEAREST) + return img_filter_2d_nearest; + else + return img_filter_2d_linear; + break; + case PIPE_TEXTURE_CUBE: + if (filter == PIPE_TEX_FILTER_NEAREST) + return img_filter_cube_nearest; + else + return img_filter_cube_linear; + break; + case PIPE_TEXTURE_3D: + if (filter == PIPE_TEX_FILTER_NEAREST) + return img_filter_3d_nearest; + else + return img_filter_3d_linear; break; default: assert(0); + return img_filter_1d_nearest; } } /** - * Common code for vertex/fragment program texture sampling. + * Bind the given texture object and texture cache to the sampler varient. */ -static INLINE void -sp_get_samples(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - boolean computeLambda, - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) +void +sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp, + struct softpipe_tex_tile_cache *tex_cache, + const struct pipe_texture *texture ) { - const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); - const struct softpipe_context *sp = samp->sp; - const uint unit = samp->unit; - const struct pipe_texture *texture = sp->texture[unit]; - const struct pipe_sampler_state *sampler = sp->sampler[unit]; - - if (!texture) - return; + const struct pipe_sampler_state *sampler = samp->sampler; - switch (texture->target) { - case PIPE_TEXTURE_1D: - assert(sampler->normalized_coords); - sp_get_samples_1d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); - break; - case PIPE_TEXTURE_2D: - if (sampler->normalized_coords) - sp_get_samples_2d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); - else - sp_get_samples_rect(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); - break; - case PIPE_TEXTURE_3D: - assert(sampler->normalized_coords); - sp_get_samples_3d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); - break; - case PIPE_TEXTURE_CUBE: - assert(sampler->normalized_coords); - sp_get_samples_cube(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); - break; - default: - assert(0); - } - -#if 0 /* DEBUG */ - { - int i; - printf("Sampled at %f, %f, %f:\n", s[0], t[0], p[0]); - for (i = 0; i < 4; i++) { - printf("Frag %d: %f %f %f %f\n", i, - rgba[0][i], - rgba[1][i], - rgba[2][i], - rgba[3][i]); - } - } -#endif + samp->texture = texture; + samp->cache = tex_cache; + samp->xpot = util_unsigned_logbase2( texture->width[0] ); + samp->ypot = util_unsigned_logbase2( texture->height[0] ); + samp->level = CLAMP((int) sampler->min_lod, 0, (int) texture->last_level); } -/** - * Called via tgsi_sampler::get_samples() when running a fragment shader. - * Get four filtered RGBA values from the sampler's texture. - */ void -sp_get_samples_fragment(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) +sp_sampler_varient_destroy( struct sp_sampler_varient *samp ) { - sp_get_samples(tgsi_sampler, s, t, p, TRUE, lodbias, rgba); + FREE(samp); } /** - * Called via tgsi_sampler::get_samples() when running a vertex shader. - * Get four filtered RGBA values from the sampler's texture. + * Create a sampler varient for a given set of non-orthogonal state. */ -void -sp_get_samples_vertex(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) +struct sp_sampler_varient * +sp_create_sampler_varient( const struct pipe_sampler_state *sampler, + const union sp_sampler_key key ) { - sp_get_samples(tgsi_sampler, s, t, p, FALSE, lodbias, rgba); + struct sp_sampler_varient *samp = CALLOC_STRUCT(sp_sampler_varient); + if (!samp) + return NULL; + + samp->sampler = sampler; + samp->key = key; + + /* Note that (for instance) linear_texcoord_s and + * nearest_texcoord_s may be active at the same time, if the + * sampler min_img_filter differs from its mag_img_filter. + */ + if (sampler->normalized_coords) { + samp->linear_texcoord_s = get_linear_wrap( sampler->wrap_s ); + samp->linear_texcoord_t = get_linear_wrap( sampler->wrap_t ); + samp->linear_texcoord_p = get_linear_wrap( sampler->wrap_r ); + + samp->nearest_texcoord_s = get_nearest_wrap( sampler->wrap_s ); + samp->nearest_texcoord_t = get_nearest_wrap( sampler->wrap_t ); + samp->nearest_texcoord_p = get_nearest_wrap( sampler->wrap_r ); + } + else { + samp->linear_texcoord_s = get_linear_unorm_wrap( sampler->wrap_s ); + samp->linear_texcoord_t = get_linear_unorm_wrap( sampler->wrap_t ); + samp->linear_texcoord_p = get_linear_unorm_wrap( sampler->wrap_r ); + + samp->nearest_texcoord_s = get_nearest_unorm_wrap( sampler->wrap_s ); + samp->nearest_texcoord_t = get_nearest_unorm_wrap( sampler->wrap_t ); + samp->nearest_texcoord_p = get_nearest_unorm_wrap( sampler->wrap_r ); + } + + samp->compute_lambda = get_lambda_func( key ); + + samp->min_img_filter = get_img_filter(key, sampler->min_img_filter, sampler); + samp->mag_img_filter = get_img_filter(key, sampler->mag_img_filter, sampler); + + switch (sampler->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NONE: + if (sampler->min_img_filter == sampler->mag_img_filter) + samp->mip_filter = samp->min_img_filter; + else + samp->mip_filter = mip_filter_none; + break; + + case PIPE_TEX_MIPFILTER_NEAREST: + samp->mip_filter = mip_filter_nearest; + break; + + case PIPE_TEX_MIPFILTER_LINEAR: + if (key.bits.is_pot && + sampler->min_img_filter == sampler->mag_img_filter && + sampler->normalized_coords && + sampler->wrap_s == PIPE_TEX_WRAP_REPEAT && + sampler->wrap_t == PIPE_TEX_WRAP_REPEAT && + sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR) + { + samp->mip_filter = mip_filter_linear_2d_linear_repeat_POT; + } + else + { + samp->mip_filter = mip_filter_linear; + } + break; + } + + if (sampler->compare_mode != FALSE) { + samp->compare = sample_compare; + } + else { + /* Skip compare operation by promoting the mip_filter function + * pointer: + */ + samp->compare = samp->mip_filter; + } + + if (key.bits.target == PIPE_TEXTURE_CUBE) { + samp->base.get_samples = sample_cube; + } + else { + samp->faces[0] = 0; + samp->faces[1] = 0; + samp->faces[2] = 0; + samp->faces[3] = 0; + + /* Skip cube face determination by promoting the compare + * function pointer: + */ + samp->base.get_samples = samp->compare; + } + + return samp; } diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h index 40d8eb2c2a8..b0797711d37 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.h +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -31,43 +31,122 @@ #include "tgsi/tgsi_exec.h" +struct sp_sampler_varient; + +typedef void (*wrap_nearest_func)(const float s[4], + unsigned size, + int icoord[4]); + +typedef void (*wrap_linear_func)(const float s[4], + unsigned size, + int icoord0[4], + int icoord1[4], + float w[4]); + +typedef float (*compute_lambda_func)(const struct sp_sampler_varient *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias); + +typedef void (*filter_func)(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]); + + +union sp_sampler_key { + struct { + unsigned target:3; + unsigned is_pot:1; + unsigned processor:2; + unsigned unit:4; + unsigned pad:22; + } bits; + unsigned value; +}; /** * Subclass of tgsi_sampler */ -struct sp_shader_sampler +struct sp_sampler_varient { struct tgsi_sampler base; /**< base class */ - uint unit; - struct softpipe_context *sp; - struct softpipe_tile_cache *cache; + union sp_sampler_key key; + + /* The owner of this struct: + */ + const struct pipe_sampler_state *sampler; + + + /* Currently bound texture: + */ + const struct pipe_texture *texture; + struct softpipe_tex_tile_cache *cache; + + unsigned processor; + + /* For sp_get_samples_2d_linear_POT: + */ + unsigned xpot; + unsigned ypot; + unsigned level; + + unsigned faces[4]; + + wrap_nearest_func nearest_texcoord_s; + wrap_nearest_func nearest_texcoord_t; + wrap_nearest_func nearest_texcoord_p; + + wrap_linear_func linear_texcoord_s; + wrap_linear_func linear_texcoord_t; + wrap_linear_func linear_texcoord_p; + + filter_func min_img_filter; + filter_func mag_img_filter; + + compute_lambda_func compute_lambda; + + filter_func mip_filter; + filter_func compare; + + /* Linked list: + */ + struct sp_sampler_varient *next; }; +struct sp_sampler; +/* Create a sampler varient for a given set of non-orthogonal state. Currently the + */ +struct sp_sampler_varient * +sp_create_sampler_varient( const struct pipe_sampler_state *sampler, + const union sp_sampler_key key ); -static INLINE const struct sp_shader_sampler * -sp_shader_sampler(const struct tgsi_sampler *sampler) -{ - return (const struct sp_shader_sampler *) sampler; -} +void sp_sampler_varient_bind_texture( struct sp_sampler_varient *varient, + struct softpipe_tex_tile_cache *tex_cache, + const struct pipe_texture *tex ); +void sp_sampler_varient_destroy( struct sp_sampler_varient * ); -extern void -sp_get_samples_fragment(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]); + + +static INLINE struct sp_sampler_varient * +sp_sampler_varient(const struct tgsi_sampler *sampler) +{ + return (struct sp_sampler_varient *) sampler; +} extern void -sp_get_samples_vertex(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]); +sp_get_samples(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]); #endif /* SP_TEX_SAMPLE_H */ diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c new file mode 100644 index 00000000000..407a22a9f4b --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c @@ -0,0 +1,273 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Texture tile caching. + * + * Author: + * Brian Paul + */ + +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "util/u_tile.h" +#include "sp_context.h" +#include "sp_surface.h" +#include "sp_texture.h" +#include "sp_tex_tile_cache.h" + + + +struct softpipe_tex_tile_cache * +sp_create_tex_tile_cache( struct pipe_screen *screen ) +{ + struct softpipe_tex_tile_cache *tc; + uint pos; + + tc = CALLOC_STRUCT( softpipe_tex_tile_cache ); + if (tc) { + tc->screen = screen; + for (pos = 0; pos < NUM_ENTRIES; pos++) { + tc->entries[pos].addr.bits.invalid = 1; + } + tc->last_tile = &tc->entries[0]; /* any tile */ + } + return tc; +} + + +void +sp_destroy_tex_tile_cache(struct softpipe_tex_tile_cache *tc) +{ + struct pipe_screen *screen; + uint pos; + + for (pos = 0; pos < NUM_ENTRIES; pos++) { + /*assert(tc->entries[pos].x < 0);*/ + } + if (tc->transfer) { + screen = tc->transfer->texture->screen; + screen->tex_transfer_destroy(tc->transfer); + } + if (tc->tex_trans) { + screen = tc->tex_trans->texture->screen; + screen->tex_transfer_destroy(tc->tex_trans); + } + + FREE( tc ); +} + + + + +void +sp_tex_tile_cache_map_transfers(struct softpipe_tex_tile_cache *tc) +{ + if (tc->tex_trans && !tc->tex_trans_map) + tc->tex_trans_map = tc->screen->transfer_map(tc->screen, tc->tex_trans); +} + + +void +sp_tex_tile_cache_unmap_transfers(struct softpipe_tex_tile_cache *tc) +{ + if (tc->tex_trans_map) { + tc->screen->transfer_unmap(tc->screen, tc->tex_trans); + tc->tex_trans_map = NULL; + } +} + +/** + * Invalidate all cached tiles for the cached texture. + * Should be called when the texture is modified. + */ +void +sp_tex_tile_cache_validate_texture(struct softpipe_tex_tile_cache *tc) +{ + unsigned i; + + assert(tc); + assert(tc->texture); + + for (i = 0; i < NUM_ENTRIES; i++) { + tc->entries[i].addr.bits.invalid = 1; + } +} + +/** + * Specify the texture to cache. + */ +void +sp_tex_tile_cache_set_texture(struct softpipe_tex_tile_cache *tc, + struct pipe_texture *texture) +{ + uint i; + + assert(!tc->transfer); + + if (tc->texture != texture) { + pipe_texture_reference(&tc->texture, texture); + + if (tc->tex_trans) { + struct pipe_screen *screen = tc->tex_trans->texture->screen; + + if (tc->tex_trans_map) { + screen->transfer_unmap(screen, tc->tex_trans); + tc->tex_trans_map = NULL; + } + + screen->tex_transfer_destroy(tc->tex_trans); + tc->tex_trans = NULL; + } + + /* mark as entries as invalid/empty */ + /* XXX we should try to avoid this when the teximage hasn't changed */ + for (i = 0; i < NUM_ENTRIES; i++) { + tc->entries[i].addr.bits.invalid = 1; + } + + tc->tex_face = -1; /* any invalid value here */ + } +} + + + + +/** + * Flush the tile cache: write all dirty tiles back to the transfer. + * any tiles "flagged" as cleared will be "really" cleared. + */ +void +sp_flush_tex_tile_cache(struct softpipe_tex_tile_cache *tc) +{ + int pos; + + if (tc->texture) { + /* caching a texture, mark all entries as empty */ + for (pos = 0; pos < NUM_ENTRIES; pos++) { + tc->entries[pos].addr.bits.invalid = 1; + } + tc->tex_face = -1; + } + +} + + +/** + * Given the texture face, level, zslice, x and y values, compute + * the cache entry position/index where we'd hope to find the + * cached texture tile. + * This is basically a direct-map cache. + * XXX There's probably lots of ways in which we can improve this. + */ +static INLINE uint +tex_cache_pos( union tex_tile_address addr ) +{ + uint entry = (addr.bits.x + + addr.bits.y * 9 + + addr.bits.z * 3 + + addr.bits.face + + addr.bits.level * 7); + + return entry % NUM_ENTRIES; +} + +/** + * Similar to sp_get_cached_tile() but for textures. + * Tiles are read-only and indexed with more params. + */ +const struct softpipe_tex_cached_tile * +sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, + union tex_tile_address addr ) +{ + struct pipe_screen *screen = tc->screen; + struct softpipe_tex_cached_tile *tile; + + tile = tc->entries + tex_cache_pos( addr ); + + if (addr.value != tile->addr.value) { + + /* cache miss. Most misses are because we've invaldiated the + * texture cache previously -- most commonly on binding a new + * texture. Currently we effectively flush the cache on texture + * bind. + */ +#if 0 + _debug_printf("miss at %u: x=%d y=%d z=%d face=%d level=%d\n" + " tile %u: x=%d y=%d z=%d face=%d level=%d\n", + pos, x/TILE_SIZE, y/TILE_SIZE, z, face, level, + pos, tile->addr.bits.x, tile->addr.bits.y, tile->z, tile->face, tile->level); +#endif + + /* check if we need to get a new transfer */ + if (!tc->tex_trans || + tc->tex_face != addr.bits.face || + tc->tex_level != addr.bits.level || + tc->tex_z != addr.bits.z) { + /* get new transfer (view into texture) */ + + if (tc->tex_trans) { + if (tc->tex_trans_map) { + tc->screen->transfer_unmap(tc->screen, tc->tex_trans); + tc->tex_trans_map = NULL; + } + + screen->tex_transfer_destroy(tc->tex_trans); + tc->tex_trans = NULL; + } + + tc->tex_trans = + screen->get_tex_transfer(screen, tc->texture, + addr.bits.face, + addr.bits.level, + addr.bits.z, + PIPE_TRANSFER_READ, 0, 0, + tc->texture->width[addr.bits.level], + tc->texture->height[addr.bits.level]); + + tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans); + + tc->tex_face = addr.bits.face; + tc->tex_level = addr.bits.level; + tc->tex_z = addr.bits.z; + } + + /* get tile from the transfer (view into texture) */ + pipe_get_tile_rgba(tc->tex_trans, + addr.bits.x * TILE_SIZE, + addr.bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, + (float *) tile->data.color); + tile->addr = addr; + } + + tc->last_tile = tile; + return tile; +} + + + diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h new file mode 100644 index 00000000000..ac6886a3df1 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h @@ -0,0 +1,155 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SP_TEX_TILE_CACHE_H +#define SP_TEX_TILE_CACHE_H + + +#include "pipe/p_compiler.h" + + +struct softpipe_context; +struct softpipe_tex_tile_cache; + + +/** + * Cache tile size (width and height). This needs to be a power of two. + */ +#define TILE_SIZE 64 + + +/* If we need to support > 4096, just expand this to be a 64 bit + * union, or consider tiling in Z as well. + */ +union tex_tile_address { + struct { + unsigned x:6; /* 4096 / TILE_SIZE */ + unsigned y:6; /* 4096 / TILE_SIZE */ + unsigned z:12; /* 4096 -- z not tiled */ + unsigned face:3; + unsigned level:4; + unsigned invalid:1; + } bits; + unsigned value; +}; + + +struct softpipe_tex_cached_tile +{ + union tex_tile_address addr; + union { + float color[TILE_SIZE][TILE_SIZE][4]; + } data; +}; + +#define NUM_ENTRIES 50 + +struct softpipe_tex_tile_cache +{ + struct pipe_screen *screen; + struct pipe_transfer *transfer; + void *transfer_map; + + struct pipe_texture *texture; /**< if caching a texture */ + unsigned timestamp; + + struct softpipe_tex_cached_tile entries[NUM_ENTRIES]; + + struct pipe_transfer *tex_trans; + void *tex_trans_map; + int tex_face, tex_level, tex_z; + + struct softpipe_tex_cached_tile *last_tile; /**< most recently retrieved tile */ +}; + + +extern struct softpipe_tex_tile_cache * +sp_create_tex_tile_cache( struct pipe_screen *screen ); + +extern void +sp_destroy_tex_tile_cache(struct softpipe_tex_tile_cache *tc); + + +extern void +sp_tex_tile_cache_map_transfers(struct softpipe_tex_tile_cache *tc); + +extern void +sp_tex_tile_cache_unmap_transfers(struct softpipe_tex_tile_cache *tc); + +extern void +sp_tex_tile_cache_set_texture(struct softpipe_tex_tile_cache *tc, + struct pipe_texture *texture); + +void +sp_tex_tile_cache_validate_texture(struct softpipe_tex_tile_cache *tc); + +extern void +sp_flush_tex_tile_cache(struct softpipe_tex_tile_cache *tc); + + + +extern const struct softpipe_tex_cached_tile * +sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, + union tex_tile_address addr ); + +static INLINE union tex_tile_address +tex_tile_address( unsigned x, + unsigned y, + unsigned z, + unsigned face, + unsigned level ) +{ + union tex_tile_address addr; + + addr.value = 0; + addr.bits.x = x / TILE_SIZE; + addr.bits.y = y / TILE_SIZE; + addr.bits.z = z; + addr.bits.face = face; + addr.bits.level = level; + + return addr; +} + +/* Quickly retrieve tile if it matches last lookup. + */ +static INLINE const struct softpipe_tex_cached_tile * +sp_get_cached_tile_tex(struct softpipe_tex_tile_cache *tc, + union tex_tile_address addr ) +{ + if (tc->last_tile->addr.value == addr.value) + return tc->last_tile; + + return sp_find_cached_tile_tex( tc, addr ); +} + + + + + +#endif /* SP_TEX_TILE_CACHE_H */ + diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 70f09324311..49b51afda7e 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -30,26 +30,21 @@ * Michel Dänzer <[email protected]> */ -#include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/internal/p_winsys_screen.h" #include "util/u_math.h" #include "util/u_memory.h" #include "sp_context.h" #include "sp_state.h" #include "sp_texture.h" -#include "sp_tile_cache.h" #include "sp_screen.h" #include "sp_winsys.h" -/* Simple, maximally packed layout. - */ - - -/* Conventional allocation path for non-display textures: +/** + * Conventional allocation path for non-display textures: + * Use a simple, maximally packed layout. */ static boolean softpipe_texture_layout(struct pipe_screen *screen, @@ -89,6 +84,10 @@ softpipe_texture_layout(struct pipe_screen *screen, return spt->buffer != NULL; } + +/** + * Texture layout for simple color buffers. + */ static boolean softpipe_displaytarget_layout(struct pipe_screen *screen, struct softpipe_texture * spt) @@ -112,21 +111,22 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, } - - - static struct pipe_texture * softpipe_texture_create(struct pipe_screen *screen, - const struct pipe_texture *templat) + const struct pipe_texture *template) { struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture); if (!spt) return NULL; - spt->base = *templat; + spt->base = *template; pipe_reference_init(&spt->base.reference, 1); spt->base.screen = screen; + spt->pot = (util_is_power_of_two(template->width[0]) && + util_is_power_of_two(template->height[0]) && + util_is_power_of_two(template->depth[0])); + if (spt->base.tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | PIPE_TEXTURE_USAGE_PRIMARY)) { if (!softpipe_displaytarget_layout(screen, spt)) @@ -222,6 +222,13 @@ softpipe_get_tex_surface(struct pipe_screen *screen, if (ps->usage & PIPE_BUFFER_USAGE_GPU_READ) ps->usage |= PIPE_BUFFER_USAGE_CPU_READ; + if (ps->usage & (PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_WRITE)) { + /* Mark the surface as dirty. The tile cache will look for this. */ + spt->timestamp++; + softpipe_screen(screen)->timestamp++; + } + ps->face = face; ps->level = level; ps->zslice = zslice; @@ -342,14 +349,13 @@ softpipe_transfer_map( struct pipe_screen *screen, /* May want to different things here depending on read/write nature * of the map: */ - if (transfer->texture && transfer->usage != PIPE_TRANSFER_READ) - { + if (transfer->texture && transfer->usage != PIPE_TRANSFER_READ) { /* Do something to notify sharing contexts of a texture change. * In softpipe, that would mean flushing the texture cache. */ softpipe_screen(screen)->timestamp++; } - + xfer_map = map + softpipe_transfer(transfer)->offset + transfer->y / transfer->block.height * transfer->stride + transfer->x / transfer->block.width * transfer->block.size; @@ -360,7 +366,7 @@ softpipe_transfer_map( struct pipe_screen *screen, static void softpipe_transfer_unmap(struct pipe_screen *screen, - struct pipe_transfer *transfer) + struct pipe_transfer *transfer) { struct softpipe_texture *spt; @@ -371,18 +377,12 @@ softpipe_transfer_unmap(struct pipe_screen *screen, if (transfer->usage != PIPE_TRANSFER_READ) { /* Mark the texture as dirty to expire the tile caches. */ - spt->modified = TRUE; + spt->timestamp++; } } void -softpipe_init_texture_funcs(struct softpipe_context *sp) -{ -} - - -void softpipe_init_screen_texture_funcs(struct pipe_screen *screen) { screen->texture_create = softpipe_texture_create; @@ -404,7 +404,7 @@ softpipe_get_texture_buffer( struct pipe_texture *texture, struct pipe_buffer **buf, unsigned *stride ) { - struct softpipe_texture *tex = (struct softpipe_texture *)texture; + struct softpipe_texture *tex = (struct softpipe_texture *) texture; if (!tex) return FALSE; diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index 893aa7d11d8..2537ab6a40d 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -48,7 +48,11 @@ struct softpipe_texture */ struct pipe_buffer *buffer; - boolean modified; + /* True if texture images are power-of-two in all dimensions: + */ + boolean pot; + + unsigned timestamp; }; struct softpipe_transfer @@ -74,9 +78,6 @@ softpipe_transfer(struct pipe_transfer *pt) extern void -softpipe_init_texture_funcs( struct softpipe_context *softpipe ); - -extern void softpipe_init_screen_texture_funcs(struct pipe_screen *screen); diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 1f9b8f1f4fb..de479709858 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -26,7 +26,7 @@ **************************************************************************/ /** - * Texture tile caching. + * Render target tile caching. * * Author: * Brian Paul @@ -35,38 +35,8 @@ #include "pipe/p_inlines.h" #include "util/u_memory.h" #include "util/u_tile.h" -#include "sp_context.h" -#include "sp_surface.h" -#include "sp_texture.h" #include "sp_tile_cache.h" -#define NUM_ENTRIES 50 - - -/** XXX move these */ -#define MAX_WIDTH 2048 -#define MAX_HEIGHT 2048 - - -struct softpipe_tile_cache -{ - struct pipe_screen *screen; - struct pipe_surface *surface; /**< the surface we're caching */ - struct pipe_transfer *transfer; - void *transfer_map; - struct pipe_texture *texture; /**< if caching a texture */ - struct softpipe_cached_tile entries[NUM_ENTRIES]; - uint clear_flags[(MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32]; - float clear_color[4]; /**< for color bufs */ - uint clear_val; /**< for z+stencil, or packed color clear value */ - boolean depth_stencil; /**< Is the surface a depth/stencil format? */ - - struct pipe_transfer *tex_trans; - void *tex_trans_map; - int tex_face, tex_level, tex_z; - - struct softpipe_cached_tile tile; /**< scratch tile for clears */ -}; /** @@ -76,7 +46,7 @@ struct softpipe_tile_cache * a LRU replacement policy. */ #define CACHE_POS(x, y) \ - (((x) / TILE_SIZE + ((y) / TILE_SIZE) * 5) % NUM_ENTRIES) + (((x) + (y) * 5) % NUM_ENTRIES) @@ -84,12 +54,10 @@ struct softpipe_tile_cache * Is the tile at (x,y) in cleared state? */ static INLINE uint -is_clear_flag_set(const uint *bitvec, int x, int y) +is_clear_flag_set(const uint *bitvec, union tile_address addr) { int pos, bit; - x /= TILE_SIZE; - y /= TILE_SIZE; - pos = y * (MAX_WIDTH / TILE_SIZE) + x; + pos = addr.bits.y * (MAX_WIDTH / TILE_SIZE) + addr.bits.x; assert(pos / 32 < (MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32); bit = bitvec[pos / 32] & (1 << (pos & 31)); return bit; @@ -100,12 +68,10 @@ is_clear_flag_set(const uint *bitvec, int x, int y) * Mark the tile at (x,y) as not cleared. */ static INLINE void -clear_clear_flag(uint *bitvec, int x, int y) +clear_clear_flag(uint *bitvec, union tile_address addr) { int pos; - x /= TILE_SIZE; - y /= TILE_SIZE; - pos = y * (MAX_WIDTH / TILE_SIZE) + x; + pos = addr.bits.y * (MAX_WIDTH / TILE_SIZE) + addr.bits.x; assert(pos / 32 < (MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32); bitvec[pos / 32] &= ~(1 << (pos & 31)); } @@ -116,14 +82,20 @@ sp_create_tile_cache( struct pipe_screen *screen ) { struct softpipe_tile_cache *tc; uint pos; + int maxLevels, maxTexSize; + + /* sanity checking: max sure MAX_WIDTH/HEIGHT >= largest texture image */ + maxLevels = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS); + maxTexSize = 1 << (maxLevels - 1); + assert(MAX_WIDTH >= maxTexSize); tc = CALLOC_STRUCT( softpipe_tile_cache ); if (tc) { tc->screen = screen; for (pos = 0; pos < NUM_ENTRIES; pos++) { - tc->entries[pos].x = - tc->entries[pos].y = -1; + tc->entries[pos].addr.bits.invalid = 1; } + tc->last_tile = &tc->entries[0]; /* any tile */ } return tc; } @@ -142,10 +114,6 @@ sp_destroy_tile_cache(struct softpipe_tile_cache *tc) screen = tc->transfer->texture->screen; screen->tex_transfer_destroy(tc->transfer); } - if (tc->tex_trans) { - screen = tc->tex_trans->texture->screen; - screen->tex_transfer_destroy(tc->tex_trans); - } FREE( tc ); } @@ -158,8 +126,6 @@ void sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, struct pipe_surface *ps) { - assert(!tc->texture); - if (tc->transfer) { struct pipe_screen *screen = tc->transfer->texture->screen; @@ -211,9 +177,6 @@ sp_tile_cache_map_transfers(struct softpipe_tile_cache *tc) { if (tc->transfer && !tc->transfer_map) tc->transfer_map = tc->screen->transfer_map(tc->screen, tc->transfer); - - if (tc->tex_trans && !tc->tex_trans_map) - tc->tex_trans_map = tc->screen->transfer_map(tc->screen, tc->tex_trans); } @@ -224,47 +187,6 @@ sp_tile_cache_unmap_transfers(struct softpipe_tile_cache *tc) tc->screen->transfer_unmap(tc->screen, tc->transfer); tc->transfer_map = NULL; } - - if (tc->tex_trans_map) { - tc->screen->transfer_unmap(tc->screen, tc->tex_trans); - tc->tex_trans_map = NULL; - } -} - - -/** - * Specify the texture to cache. - */ -void -sp_tile_cache_set_texture(struct pipe_context *pipe, - struct softpipe_tile_cache *tc, - struct pipe_texture *texture) -{ - uint i; - - assert(!tc->transfer); - - pipe_texture_reference(&tc->texture, texture); - - if (tc->tex_trans) { - struct pipe_screen *screen = tc->tex_trans->texture->screen; - - if (tc->tex_trans_map) { - screen->transfer_unmap(screen, tc->tex_trans); - tc->tex_trans_map = NULL; - } - - screen->tex_transfer_destroy(tc->tex_trans); - tc->tex_trans = NULL; - } - - /* mark as entries as invalid/empty */ - /* XXX we should try to avoid this when the teximage hasn't changed */ - for (i = 0; i < NUM_ENTRIES; i++) { - tc->entries[i].x = -1; - } - - tc->tex_face = -1; /* any invalid value here */ } @@ -308,7 +230,7 @@ clear_tile(struct softpipe_cached_tile *tile, switch (pf_get_size(format)) { case 1: - memset(tile->data.any, 0, TILE_SIZE * TILE_SIZE); + memset(tile->data.any, clear_value, TILE_SIZE * TILE_SIZE); break; case 2: if (clear_value == 0) { @@ -344,8 +266,7 @@ clear_tile(struct softpipe_cached_tile *tile, * Actually clear the tiles which were flagged as being in a clear state. */ static void -sp_tile_cache_flush_clear(struct pipe_context *pipe, - struct softpipe_tile_cache *tc) +sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc) { struct pipe_transfer *pt = tc->transfer; const uint w = tc->transfer->width; @@ -359,13 +280,15 @@ sp_tile_cache_flush_clear(struct pipe_context *pipe, /* push the tile to all positions marked as clear */ for (y = 0; y < h; y += TILE_SIZE) { for (x = 0; x < w; x += TILE_SIZE) { - if (is_clear_flag_set(tc->clear_flags, x, y)) { + union tile_address addr = tile_address(x, y); + + if (is_clear_flag_set(tc->clear_flags, addr)) { pipe_put_tile_raw(pt, x, y, TILE_SIZE, TILE_SIZE, tc->tile.data.color32, 0/*STRIDE*/); /* do this? */ - clear_clear_flag(tc->clear_flags, x, y); + clear_clear_flag(tc->clear_flags, addr); numCleared++; } @@ -382,8 +305,7 @@ sp_tile_cache_flush_clear(struct pipe_context *pipe, * any tiles "flagged" as cleared will be "really" cleared. */ void -sp_flush_tile_cache(struct softpipe_context *softpipe, - struct softpipe_tile_cache *tc) +sp_flush_tile_cache(struct softpipe_tile_cache *tc) { struct pipe_transfer *pt = tc->transfer; int inuse = 0, pos; @@ -392,33 +314,30 @@ sp_flush_tile_cache(struct softpipe_context *softpipe, /* caching a drawing transfer */ for (pos = 0; pos < NUM_ENTRIES; pos++) { struct softpipe_cached_tile *tile = tc->entries + pos; - if (tile->x >= 0) { + if (!tile->addr.bits.invalid) { if (tc->depth_stencil) { pipe_put_tile_raw(pt, - tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->addr.bits.x * TILE_SIZE, + tile->addr.bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { pipe_put_tile_rgba(pt, - tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->addr.bits.x * TILE_SIZE, + tile->addr.bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, (float *) tile->data.color); } - tile->x = tile->y = -1; /* mark as empty */ + tile->addr.bits.invalid = 1; /* mark as empty */ inuse++; } } #if TILE_CLEAR_OPTIMIZATION - sp_tile_cache_flush_clear(&softpipe->pipe, tc); + sp_tile_cache_flush_clear(tc); #endif } - else if (tc->texture) { - /* caching a texture, mark all entries as empty */ - for (pos = 0; pos < NUM_ENTRIES; pos++) { - tc->entries[pos].x = -1; - } - tc->tex_face = -1; - } #if 0 debug_printf("flushed tiles in use: %d\n", inuse); @@ -431,40 +350,39 @@ sp_flush_tile_cache(struct softpipe_context *softpipe, * \param x, y position of tile, in pixels */ struct softpipe_cached_tile * -sp_get_cached_tile(struct softpipe_context *softpipe, - struct softpipe_tile_cache *tc, int x, int y) +sp_find_cached_tile(struct softpipe_tile_cache *tc, + union tile_address addr ) { struct pipe_transfer *pt = tc->transfer; - - /* tile pos in framebuffer: */ - const int tile_x = x & ~(TILE_SIZE - 1); - const int tile_y = y & ~(TILE_SIZE - 1); - + /* cache pos/entry: */ - const int pos = CACHE_POS(x, y); + const int pos = CACHE_POS(addr.bits.x, + addr.bits.y); struct softpipe_cached_tile *tile = tc->entries + pos; - if (tile_x != tile->x || - tile_y != tile->y) { + if (addr.value != tile->addr.value) { - if (tile->x != -1) { + if (tile->addr.bits.invalid == 0) { /* put dirty tile back in framebuffer */ if (tc->depth_stencil) { pipe_put_tile_raw(pt, - tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->addr.bits.x * TILE_SIZE, + tile->addr.bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { pipe_put_tile_rgba(pt, - tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->addr.bits.x * TILE_SIZE, + tile->addr.bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, (float *) tile->data.color); } } - tile->x = tile_x; - tile->y = tile_y; + tile->addr = addr; - if (is_clear_flag_set(tc->clear_flags, x, y)) { + if (is_clear_flag_set(tc->clear_flags, addr)) { /* don't get tile from framebuffer, just clear it */ if (tc->depth_stencil) { clear_tile(tile, pt->format, tc->clear_val); @@ -472,125 +390,33 @@ sp_get_cached_tile(struct softpipe_context *softpipe, else { clear_tile_rgba(tile, pt->format, tc->clear_color); } - clear_clear_flag(tc->clear_flags, x, y); + clear_clear_flag(tc->clear_flags, addr); } else { /* get new tile data from transfer */ if (tc->depth_stencil) { pipe_get_tile_raw(pt, - tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->addr.bits.x * TILE_SIZE, + tile->addr.bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { pipe_get_tile_rgba(pt, - tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->addr.bits.x * TILE_SIZE, + tile->addr.bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, (float *) tile->data.color); } } } + tc->last_tile = tile; return tile; } -/** - * Given the texture face, level, zslice, x and y values, compute - * the cache entry position/index where we'd hope to find the - * cached texture tile. - * This is basically a direct-map cache. - * XXX There's probably lots of ways in which we can improve this. - */ -static INLINE uint -tex_cache_pos(int x, int y, int z, int face, int level) -{ - uint entry = x + y * 9 + z * 3 + face + level * 7; - return entry % NUM_ENTRIES; -} - - -/** - * Similar to sp_get_cached_tile() but for textures. - * Tiles are read-only and indexed with more params. - */ -const struct softpipe_cached_tile * -sp_get_cached_tile_tex(struct softpipe_context *sp, - struct softpipe_tile_cache *tc, int x, int y, int z, - int face, int level) -{ - struct pipe_screen *screen = sp->pipe.screen; - /* tile pos in framebuffer: */ - const int tile_x = x & ~(TILE_SIZE - 1); - const int tile_y = y & ~(TILE_SIZE - 1); - /* cache pos/entry: */ - const uint pos = tex_cache_pos(x / TILE_SIZE, y / TILE_SIZE, z, - face, level); - struct softpipe_cached_tile *tile = tc->entries + pos; - - if (tc->texture) { - struct softpipe_texture *spt = softpipe_texture(tc->texture); - if (spt->modified) { - /* texture was modified, invalidate all cached tiles */ - uint p; - for (p = 0; p < NUM_ENTRIES; p++) { - tile = tc->entries + p; - tile->x = -1; - } - spt->modified = FALSE; - } - } - - if (tile_x != tile->x || - tile_y != tile->y || - z != tile->z || - face != tile->face || - level != tile->level) { - /* cache miss */ -#if 0 - printf("miss at %u x=%d y=%d z=%d face=%d level=%d\n", pos, - x/TILE_SIZE, y/TILE_SIZE, z, face, level); -#endif - /* check if we need to get a new transfer */ - if (!tc->tex_trans || - tc->tex_face != face || - tc->tex_level != level || - tc->tex_z != z) { - /* get new transfer (view into texture) */ - - if (tc->tex_trans) { - if (tc->tex_trans_map) { - tc->screen->transfer_unmap(tc->screen, tc->tex_trans); - tc->tex_trans_map = NULL; - } - - screen->tex_transfer_destroy(tc->tex_trans); - tc->tex_trans = NULL; - } - - tc->tex_trans = screen->get_tex_transfer(screen, tc->texture, face, level, z, - PIPE_TRANSFER_READ, 0, 0, - tc->texture->width[level], - tc->texture->height[level]); - tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans); - - tc->tex_face = face; - tc->tex_level = level; - tc->tex_z = z; - } - - /* get tile from the transfer (view into texture) */ - pipe_get_tile_rgba(tc->tex_trans, - tile_x, tile_y, TILE_SIZE, TILE_SIZE, - (float *) tile->data.color); - tile->x = tile_x; - tile->y = tile_y; - tile->z = z; - tile->face = face; - tile->level = level; - } - - return tile; -} /** @@ -621,6 +447,6 @@ sp_tile_cache_clear(struct softpipe_tile_cache *tc, const float *rgba, for (pos = 0; pos < NUM_ENTRIES; pos++) { struct softpipe_cached_tile *tile = tc->entries + pos; - tile->x = tile->y = -1; + tile->addr.bits.invalid = 1; } } diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h index 8f247d0e580..a12092702a6 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -34,7 +34,6 @@ #include "pipe/p_compiler.h" -struct softpipe_context; struct softpipe_tile_cache; @@ -44,11 +43,23 @@ struct softpipe_tile_cache; #define TILE_SIZE 64 +/* If we need to support > 4096, just expand this to be a 64 bit + * union, or consider tiling in Z as well. + */ +union tile_address { + struct { + unsigned x:6; /* 4096 / TILE_SIZE */ + unsigned y:6; /* 4096 / TILE_SIZE */ + unsigned invalid:1; + unsigned pad:19; + } bits; + unsigned value; +}; + struct softpipe_cached_tile { - int x, y; /**< pos of tile in window coords */ - int z, face, level; /**< Extra texture indexes */ + union tile_address addr; union { float color[TILE_SIZE][TILE_SIZE][4]; uint color32[TILE_SIZE][TILE_SIZE]; @@ -59,6 +70,32 @@ struct softpipe_cached_tile } data; }; +#define NUM_ENTRIES 50 + + +/** XXX move these */ +#define MAX_WIDTH 4096 +#define MAX_HEIGHT 4096 + + +struct softpipe_tile_cache +{ + struct pipe_screen *screen; + struct pipe_surface *surface; /**< the surface we're caching */ + struct pipe_transfer *transfer; + void *transfer_map; + + struct softpipe_cached_tile entries[NUM_ENTRIES]; + uint clear_flags[(MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32]; + float clear_color[4]; /**< for color bufs */ + uint clear_val; /**< for z+stencil, or packed color clear value */ + boolean depth_stencil; /**< Is the surface a depth/stencil format? */ + + struct softpipe_cached_tile tile; /**< scratch tile for clears */ + + struct softpipe_cached_tile *last_tile; /**< most recently retrieved tile */ +}; + extern struct softpipe_tile_cache * sp_create_tile_cache( struct pipe_screen *screen ); @@ -80,26 +117,45 @@ extern void sp_tile_cache_unmap_transfers(struct softpipe_tile_cache *tc); extern void -sp_tile_cache_set_texture(struct pipe_context *pipe, - struct softpipe_tile_cache *tc, - struct pipe_texture *texture); - -extern void -sp_flush_tile_cache(struct softpipe_context *softpipe, - struct softpipe_tile_cache *tc); +sp_flush_tile_cache(struct softpipe_tile_cache *tc); extern void sp_tile_cache_clear(struct softpipe_tile_cache *tc, const float *rgba, uint clearValue); extern struct softpipe_cached_tile * -sp_get_cached_tile(struct softpipe_context *softpipe, - struct softpipe_tile_cache *tc, int x, int y); +sp_find_cached_tile(struct softpipe_tile_cache *tc, + union tile_address addr ); + + +static INLINE union tile_address +tile_address( unsigned x, + unsigned y ) +{ + union tile_address addr; + + addr.value = 0; + addr.bits.x = x / TILE_SIZE; + addr.bits.y = y / TILE_SIZE; + + return addr; +} + +/* Quickly retrieve tile if it matches last lookup. + */ +static INLINE struct softpipe_cached_tile * +sp_get_cached_tile(struct softpipe_tile_cache *tc, + int x, int y ) +{ + union tile_address addr = tile_address( x, y ); + + if (tc->last_tile->addr.value == addr.value) + return tc->last_tile; + + return sp_find_cached_tile( tc, addr ); +} + -extern const struct softpipe_cached_tile * -sp_get_cached_tile_tex(struct softpipe_context *softpipe, - struct softpipe_tile_cache *tc, int x, int y, int z, - int face, int level); #endif /* SP_TILE_CACHE_H */ diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index ae0af4d0557..bf470b46ae1 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -125,11 +125,11 @@ trace_context_draw_block(struct trace_context *tr_ctx, int flag) } else if ((tr_ctx->draw_rule.blocker & flag) && (tr_ctx->draw_blocker & 4)) { boolean block = FALSE; - debug_printf("%s (%lu %lu) (%lu %lu) (%lu %u) (%lu %u)\n", __FUNCTION__, - tr_ctx->draw_rule.fs, tr_ctx->curr.fs, - tr_ctx->draw_rule.vs, tr_ctx->curr.vs, - tr_ctx->draw_rule.surf, 0, - tr_ctx->draw_rule.tex, 0); + debug_printf("%s (%p %p) (%p %p) (%p %u) (%p %u)\n", __FUNCTION__, + (void *) tr_ctx->draw_rule.fs, (void *) tr_ctx->curr.fs, + (void *) tr_ctx->draw_rule.vs, (void *) tr_ctx->curr.vs, + (void *) tr_ctx->draw_rule.surf, 0, + (void *) tr_ctx->draw_rule.tex, 0); if (tr_ctx->draw_rule.fs && tr_ctx->draw_rule.fs == tr_ctx->curr.fs) block = TRUE; |