diff options
author | Keith Whitwell <[email protected]> | 2010-10-17 19:03:42 -0700 |
---|---|---|
committer | Keith Whitwell <[email protected]> | 2010-10-17 19:09:42 -0700 |
commit | 0072acd447dc6be652e63752e50215c3105322c8 (patch) | |
tree | 847d1763b54772d336a04e606f8248291c3092b7 /src/gallium/drivers | |
parent | 543fb77ddece7e1806e8eaa0d65bb2a945ef9a75 (diff) | |
parent | ca2b2ac131933b4171b519813df1aaa3a81621cd (diff) |
Merge remote branch 'origin/master' into lp-setup-llvm
Conflicts:
src/gallium/drivers/llvmpipe/lp_setup_coef.c
src/gallium/drivers/llvmpipe/lp_setup_coef.h
src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c
src/gallium/drivers/llvmpipe/lp_setup_point.c
src/gallium/drivers/llvmpipe/lp_setup_tri.c
src/gallium/drivers/llvmpipe/lp_state_derived.c
src/gallium/drivers/llvmpipe/lp_state_fs.h
Diffstat (limited to 'src/gallium/drivers')
142 files changed, 9060 insertions, 9687 deletions
diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 7bb7893d939..bd059d5716c 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -30,7 +30,6 @@ #include "i915_context.h" #include "i915_batch.h" #include "i915_debug.h" -#include "i915_reg.h" #include "i915_resource.h" #include "pipe/p_context.h" diff --git a/src/gallium/drivers/i965/intel_decode.c b/src/gallium/drivers/i965/intel_decode.c index bd8b9174a89..36c04a31655 100644 --- a/src/gallium/drivers/i965/intel_decode.c +++ b/src/gallium/drivers/i965/intel_decode.c @@ -40,6 +40,7 @@ #include <stdint.h> #include <string.h> +#include "util/u_memory.h" #include "util/u_string.h" #include "intel_decode.h" @@ -116,8 +117,7 @@ decode_mi(const uint32_t *data, int count, uint32_t hw_offset, int *failures) }; - for (opcode = 0; opcode < sizeof(opcodes_mi) / sizeof(opcodes_mi[0]); - opcode++) { + for (opcode = 0; opcode < Elements(opcodes_mi); opcode++) { if ((data[0] & 0x1f800000) >> 23 == opcodes_mi[opcode].opcode) { unsigned int len = 1, i; @@ -275,8 +275,7 @@ decode_2d(const uint32_t *data, int count, uint32_t hw_offset, int *failures) return len; } - for (opcode = 0; opcode < sizeof(opcodes_2d) / sizeof(opcodes_2d[0]); - opcode++) { + for (opcode = 0; opcode < Elements(opcodes_2d); opcode++) { if ((data[0] & 0x1fc00000) >> 22 == opcodes_2d[opcode].opcode) { unsigned int i; @@ -1037,9 +1036,7 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, return len; } - for (opcode = 0; opcode < sizeof(opcodes_3d_1d) / sizeof(opcodes_3d_1d[0]); - opcode++) - { + for (opcode = 0; opcode < Elements(opcodes_3d_1d); opcode++) { if (opcodes_3d_1d[opcode].i830_only && !i830) continue; @@ -1291,8 +1288,7 @@ decode_3d(const uint32_t *data, int count, uint32_t hw_offset, int *failures) return decode_3d_1c(data, count, hw_offset, failures); } - for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]); - opcode++) { + for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) { if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) { unsigned int len = 1, i; @@ -1637,8 +1633,7 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures return len; } - for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]); - opcode++) { + for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) { if ((data[0] & 0xffff0000) >> 16 == opcodes_3d[opcode].opcode) { unsigned int i; len = 1; @@ -1705,8 +1700,7 @@ decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failure return decode_3d_1c(data, count, hw_offset, failures); } - for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]); - opcode++) { + for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) { if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) { unsigned int len = 1, i; diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 379f14b43d2..726a19db2b5 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -64,12 +64,12 @@ PROGS := lp_test_format \ # Need this for the lp_test_*.o files CLEAN_EXTRA = *.o +include ../../Makefile.template + lp_test_sincos.o : sse_mathfun.h PROGS_DEPS := ../../auxiliary/libgallium.a -include ../../Makefile.template - lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_pack.py ../../auxiliary/util/u_format.csv python lp_tile_soa.py ../../auxiliary/util/u_format.csv > $@ diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index 8b5539d2c51..ec30d4d7084 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -131,6 +131,44 @@ replacing the native ICD driver, but it's quite an advanced usage, so if you need to ask, don't even try it. +Profiling +========= + +To profile llvmpipe you should pass the options + + scons debug=no profile=yes <same-as-before> + +This will ensure that frame pointers are used both in C and JIT functions, and +that no tail call optimizations are done by gcc. + + +To better profile JIT code you'll need to build LLVM with oprofile integration. + + source_dir=$PWD/llvm-2.6 + build_dir=$source_dir/build/profile + install_dir=$source_dir-profile + + mkdir -p "$build_dir" + cd "$build_dir" && \ + $source_dir/configure \ + --prefix=$install_dir \ + --enable-optimized \ + --disable-profiling \ + --enable-targets=host-only \ + --with-oprofile + + make -C "$build_dir" + make -C "$build_dir" install + + find "$install_dir/lib" -iname '*.a' -print0 | xargs -0 strip --strip-debug + +The you should define + + export LLVM=/path/to/llvm-2.6-profile + +and rebuild. + + Unit testing ============ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index f893878daab..d63bdd72a72 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -27,13 +27,7 @@ env.Depends('lp_tile_soa.c', [ ]) -# Only enable SSSE3 for lp_tile_soa_sse3.c -ssse3_env = env.Clone() -if env['gcc'] \ - and distutils.version.LooseVersion(env['CCVERSION']) >= distutils.version.LooseVersion('4.3') \ - and env['machine'] in ('x86', 'x86_64') : - ssse3_env.Append(CCFLAGS = ['-mssse3']) -lp_tile_soa_os = ssse3_env.SharedObject('lp_tile_soa.c') +lp_tile_soa_os = env.SharedObject('lp_tile_soa.c') llvmpipe = env.ConvenienceLibrary( diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c index e28efe778f9..e50643790c8 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c @@ -48,7 +48,8 @@ lp_build_alpha_test(LLVMBuilderRef builder, struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, - LLVMValueRef ref) + LLVMValueRef ref, + boolean do_branch) { struct lp_build_context bld; LLVMValueRef test; @@ -60,4 +61,7 @@ lp_build_alpha_test(LLVMBuilderRef builder, lp_build_name(test, "alpha_mask"); lp_build_mask_update(mask, test); + + if (do_branch) + lp_build_mask_check(mask); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h index 44603b418c0..27ca8aad4d4 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h @@ -48,7 +48,8 @@ lp_build_alpha_test(LLVMBuilderRef builder, struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, - LLVMValueRef ref); + LLVMValueRef ref, + boolean do_branch); #endif /* !LP_BLD_ALPHA_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index b5924cbb7dc..d1c9b88f9bb 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -320,9 +320,6 @@ lp_build_blend_aos(LLVMBuilderRef builder, if(!blend->rt[rt].blend_enable) return src; - /* It makes no sense to blend unless values are normalized */ - assert(type.norm); - /* Setup build context */ memset(&bld, 0, sizeof bld); lp_build_context_init(&bld.base, builder, type); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c index b9c7a6ceed6..30d261e979f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMware, Inc. + * Copyright 2009-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -195,6 +195,13 @@ lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld, } +static boolean +lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor) +{ + return dst_factor == (src_factor ^ 0x10); +} + + /** * Generate blend code in SOA mode. * \param rt render target index (to index the blend / colormask state) @@ -243,8 +250,41 @@ lp_build_blend_soa(LLVMBuilderRef builder, unsigned func = i < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func; boolean func_commutative = lp_build_blend_func_commutative(func); - /* It makes no sense to blend unless values are normalized */ - assert(type.norm); + if (func == PIPE_BLEND_ADD && + lp_build_blend_factor_complementary(src_factor, dst_factor) && 0) { + /* + * Special case linear interpolation, (i.e., complementary factors). + */ + + LLVMValueRef weight; + if (src_factor < dst_factor) { + weight = lp_build_blend_soa_factor(&bld, src_factor, i); + res[i] = lp_build_lerp(&bld.base, weight, dst[i], src[i]); + } else { + weight = lp_build_blend_soa_factor(&bld, dst_factor, i); + res[i] = lp_build_lerp(&bld.base, weight, src[i], dst[i]); + } + continue; + } + + if ((func == PIPE_BLEND_ADD || + func == PIPE_BLEND_SUBTRACT || + func == PIPE_BLEND_REVERSE_SUBTRACT) && + src_factor == dst_factor && + type.floating) { + /* + * Special common factor. + * + * XXX: Only for floating points for now, since saturation will + * cause different results. + */ + + LLVMValueRef factor; + factor = lp_build_blend_soa_factor(&bld, src_factor, i); + res[i] = lp_build_blend_func(&bld.base, func, src[i], dst[i]); + res[i] = lp_build_mul(&bld.base, res[i], factor); + continue; + } /* * Compute src/dst factors. diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 7561899a74e..7eb76d4fb31 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMware, Inc. + * Copyright 2009-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -53,15 +53,8 @@ * ... ... ... ... ... ... ... ... ... * * - * Stencil test: - * Two-sided stencil test is supported but probably not as efficient as - * it could be. Currently, we use if/then/else constructs to do the - * operations for front vs. back-facing polygons. We could probably do - * both the front and back arithmetic then use a Select() instruction to - * choose the result depending on polyon orientation. We'd have to - * measure performance both ways and see which is better. - * * @author Jose Fonseca <[email protected]> + * @author Brian Paul <[email protected]> */ #include "pipe/p_state.h" @@ -71,6 +64,7 @@ #include "gallivm/lp_bld_arit.h" #include "gallivm/lp_bld_bitarit.h" #include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_conv.h" #include "gallivm/lp_bld_logic.h" #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_intr.h" @@ -128,57 +122,32 @@ lp_build_stencil_test_single(struct lp_build_context *bld, /** * Do the one or two-sided stencil test comparison. * \sa lp_build_stencil_test_single - * \param face an integer indicating front (+) or back (-) facing polygon. - * If NULL, assume front-facing. + * \param front_facing an integer vector mask, indicating front (~0) or back + * (0) facing polygon. If NULL, assume front-facing. */ static LLVMValueRef lp_build_stencil_test(struct lp_build_context *bld, const struct pipe_stencil_state stencil[2], LLVMValueRef stencilRefs[2], LLVMValueRef stencilVals, - LLVMValueRef face) + LLVMValueRef front_facing) { LLVMValueRef res; assert(stencil[0].enabled); - if (stencil[1].enabled && face) { - /* do two-sided test */ - struct lp_build_flow_context *flow_ctx; - struct lp_build_if_state if_ctx; - LLVMValueRef front_facing; - LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); - LLVMValueRef result = bld->undef; - - flow_ctx = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx); + /* do front face test */ + res = lp_build_stencil_test_single(bld, &stencil[0], + stencilRefs[0], stencilVals); - lp_build_flow_scope_declare(flow_ctx, &result); + if (stencil[1].enabled && front_facing) { + /* do back face test */ + LLVMValueRef back_res; - /* front_facing = face > 0.0 */ - front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, ""); - - lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing); - { - result = lp_build_stencil_test_single(bld, &stencil[0], - stencilRefs[0], stencilVals); - } - lp_build_else(&if_ctx); - { - result = lp_build_stencil_test_single(bld, &stencil[1], - stencilRefs[1], stencilVals); - } - lp_build_endif(&if_ctx); + back_res = lp_build_stencil_test_single(bld, &stencil[1], + stencilRefs[1], stencilVals); - lp_build_flow_scope_end(flow_ctx); - lp_build_flow_destroy(flow_ctx); - - res = result; - } - else { - /* do single-side test */ - res = lp_build_stencil_test_single(bld, &stencil[0], - stencilRefs[0], stencilVals); + res = lp_build_select(bld, front_facing, res, back_res); } return res; @@ -195,14 +164,12 @@ lp_build_stencil_op_single(struct lp_build_context *bld, const struct pipe_stencil_state *stencil, enum stencil_op op, LLVMValueRef stencilRef, - LLVMValueRef stencilVals, - LLVMValueRef mask) + LLVMValueRef stencilVals) { - const unsigned stencilMax = 255; /* XXX fix */ struct lp_type type = bld->type; LLVMValueRef res; - LLVMValueRef max = lp_build_const_int_vec(type, stencilMax); + LLVMValueRef max = lp_build_const_int_vec(type, 0xff); unsigned stencil_op; assert(type.sign); @@ -255,19 +222,7 @@ lp_build_stencil_op_single(struct lp_build_context *bld, break; default: assert(0 && "bad stencil op mode"); - res = NULL; - } - - if (stencil->writemask != stencilMax) { - /* mask &= stencil->writemask */ - LLVMValueRef writemask = lp_build_const_int_vec(type, stencil->writemask); - mask = LLVMBuildAnd(bld->builder, mask, writemask, ""); - /* res = (res & mask) | (stencilVals & ~mask) */ - res = lp_build_select_bitwise(bld, writemask, res, stencilVals); - } - else { - /* res = mask ? res : stencilVals */ - res = lp_build_select(bld, mask, res, stencilVals); + res = bld->undef; } return res; @@ -284,49 +239,40 @@ lp_build_stencil_op(struct lp_build_context *bld, LLVMValueRef stencilRefs[2], LLVMValueRef stencilVals, LLVMValueRef mask, - LLVMValueRef face) + LLVMValueRef front_facing) { - assert(stencil[0].enabled); - - if (stencil[1].enabled && face) { - /* do two-sided op */ - struct lp_build_flow_context *flow_ctx; - struct lp_build_if_state if_ctx; - LLVMValueRef front_facing; - LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); - LLVMValueRef result = bld->undef; + LLVMValueRef res; - flow_ctx = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx); + assert(stencil[0].enabled); - lp_build_flow_scope_declare(flow_ctx, &result); + /* do front face op */ + res = lp_build_stencil_op_single(bld, &stencil[0], op, + stencilRefs[0], stencilVals); - /* front_facing = face > 0.0 */ - front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, ""); + if (stencil[1].enabled && front_facing) { + /* do back face op */ + LLVMValueRef back_res; - lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing); - { - result = lp_build_stencil_op_single(bld, &stencil[0], op, - stencilRefs[0], stencilVals, mask); - } - lp_build_else(&if_ctx); - { - result = lp_build_stencil_op_single(bld, &stencil[1], op, - stencilRefs[1], stencilVals, mask); - } - lp_build_endif(&if_ctx); + back_res = lp_build_stencil_op_single(bld, &stencil[1], op, + stencilRefs[1], stencilVals); - lp_build_flow_scope_end(flow_ctx); - lp_build_flow_destroy(flow_ctx); + res = lp_build_select(bld, front_facing, res, back_res); + } - return result; + if (stencil->writemask != 0xff) { + /* mask &= stencil->writemask */ + LLVMValueRef writemask = lp_build_const_int_vec(bld->type, stencil->writemask); + mask = LLVMBuildAnd(bld->builder, mask, writemask, ""); + /* res = (res & mask) | (stencilVals & ~mask) */ + res = lp_build_select_bitwise(bld, writemask, res, stencilVals); } else { - /* do single-sided op */ - return lp_build_stencil_op_single(bld, &stencil[0], op, - stencilRefs[0], stencilVals, mask); + /* res = mask ? res : stencilVals */ + res = lp_build_select(bld, mask, res, stencilVals); } + + return res; } @@ -358,8 +304,13 @@ lp_depth_type(const struct util_format_description *format_desc, } else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) { assert(format_desc->block.bits <= 32); - if(format_desc->channel[swizzle].normalized) - type.norm = TRUE; + assert(format_desc->channel[swizzle].normalized); + if (format_desc->channel[swizzle].size < format_desc->block.bits) { + /* Prefer signed integers when possible, as SSE has less support + * for unsigned comparison; + */ + type.sign = TRUE; + } } else assert(0); @@ -381,7 +332,7 @@ lp_depth_type(const struct util_format_description *format_desc, */ static boolean get_z_shift_and_mask(const struct util_format_description *format_desc, - unsigned *shift, unsigned *mask) + unsigned *shift, unsigned *width, unsigned *mask) { const unsigned total_bits = format_desc->block.bits; unsigned z_swizzle; @@ -397,12 +348,14 @@ get_z_shift_and_mask(const struct util_format_description *format_desc, if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) return FALSE; + *width = format_desc->channel[z_swizzle].size; + padding_right = 0; for (chan = 0; chan < z_swizzle; ++chan) padding_right += format_desc->channel[chan].size; padding_left = - total_bits - (padding_right + format_desc->channel[z_swizzle].size); + total_bits - (padding_right + *width); if (padding_left || padding_right) { unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1; @@ -413,7 +366,7 @@ get_z_shift_and_mask(const struct util_format_description *format_desc, *mask = 0xffffffff; } - *shift = padding_left; + *shift = padding_right; return TRUE; } @@ -457,7 +410,7 @@ get_s_shift_and_mask(const struct util_format_description *format_desc, * \param maskvalue is the depth test mask. * \param counter is a pointer of the uint32 counter. */ -static void +void lp_build_occlusion_count(LLVMBuilderRef builder, struct lp_type type, LLVMValueRef maskvalue, @@ -494,31 +447,57 @@ lp_build_occlusion_count(LLVMBuilderRef builder, * \param format_desc description of the depth/stencil surface * \param mask the alive/dead pixel mask for the quad (vector) * \param stencil_refs the front/back stencil ref values (scalar) - * \param z_src the incoming depth/stencil values (a 2x2 quad) + * \param z_src the incoming depth/stencil values (a 2x2 quad, float32) * \param zs_dst_ptr pointer to depth/stencil values in framebuffer - * \param facing contains float value indicating front/back facing polygon + * \param facing contains boolean value indicating front/back facing polygon */ void lp_build_depth_stencil_test(LLVMBuilderRef builder, const struct pipe_depth_state *depth, const struct pipe_stencil_state stencil[2], - struct lp_type type, + struct lp_type z_src_type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef stencil_refs[2], LLVMValueRef z_src, LLVMValueRef zs_dst_ptr, LLVMValueRef face, - LLVMValueRef counter) + LLVMValueRef *zs_value, + boolean do_branch) { - struct lp_build_context bld; - struct lp_build_context sbld; + struct lp_type z_type; + struct lp_build_context z_bld; + struct lp_build_context s_bld; struct lp_type s_type; + unsigned z_shift = 0, z_width = 0, z_mask = 0; LLVMValueRef zs_dst, z_dst = NULL; LLVMValueRef stencil_vals = NULL; LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; LLVMValueRef z_pass = NULL, s_pass_mask = NULL; - LLVMValueRef orig_mask = mask->value; + LLVMValueRef orig_mask = lp_build_mask_value(mask); + LLVMValueRef front_facing = NULL; + + + /* + * Depths are expected to be between 0 and 1, even if they are stored in + * floats. Setting these bits here will ensure that the lp_build_conv() call + * below won't try to unnecessarily clamp the incoming values. + */ + if(z_src_type.floating) { + z_src_type.sign = FALSE; + z_src_type.norm = TRUE; + } + else { + assert(!z_src_type.sign); + assert(z_src_type.norm); + } + + /* Pick the depth type. */ + z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); + + /* FIXME: Cope with a depth test type with a different bit width. */ + assert(z_type.width == z_src_type.width); + assert(z_type.length == z_src_type.length); /* Sanity checking */ { @@ -540,8 +519,8 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, } assert(z_swizzle < 4); - assert(format_desc->block.bits == type.width); - if (type.floating) { + assert(format_desc->block.bits == z_type.width); + if (z_type.floating) { assert(z_swizzle == 0); assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT); @@ -552,54 +531,56 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(format_desc->channel[z_swizzle].normalized); - assert(!type.fixed); - assert(!type.sign); - assert(type.norm); + assert(!z_type.fixed); } } /* Setup build context for Z vals */ - lp_build_context_init(&bld, builder, type); + lp_build_context_init(&z_bld, builder, z_type); /* Setup build context for stencil vals */ - s_type = lp_type_int_vec(type.width); - lp_build_context_init(&sbld, builder, s_type); + s_type = lp_type_int_vec(z_type.width); + lp_build_context_init(&s_bld, builder, s_type); /* Load current z/stencil value from z/stencil buffer */ + zs_dst_ptr = LLVMBuildBitCast(builder, + zs_dst_ptr, + LLVMPointerType(z_bld.vec_type, 0), ""); zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, ""); - lp_build_name(zs_dst, "zsbufval"); + lp_build_name(zs_dst, "zs_dst"); /* Compute and apply the Z/stencil bitmasks and shifts. */ { - unsigned z_shift, z_mask; unsigned s_shift, s_mask; - if (get_z_shift_and_mask(format_desc, &z_shift, &z_mask)) { - if (z_shift) { - LLVMValueRef shift = lp_build_const_int_vec(type, z_shift); - z_src = LLVMBuildLShr(builder, z_src, shift, ""); - } - + if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) { if (z_mask != 0xffffffff) { - LLVMValueRef mask = lp_build_const_int_vec(type, z_mask); - z_src = LLVMBuildAnd(builder, z_src, mask, ""); - z_dst = LLVMBuildAnd(builder, zs_dst, mask, ""); - z_bitmask = mask; /* used below */ + z_bitmask = lp_build_const_int_vec(z_type, z_mask); } - else { + + /* + * Align the framebuffer Z 's LSB to the right. + */ + if (z_shift) { + LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift); + z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst"); + } else if (z_bitmask) { + /* TODO: Instead of loading a mask from memory and ANDing, it's + * probably faster to just shake the bits with two shifts. */ + z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst"); + } else { z_dst = zs_dst; + lp_build_name(z_dst, "z_dst"); } - - lp_build_name(z_dst, "zsbuf.z"); } if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) { if (s_shift) { - LLVMValueRef shift = lp_build_const_int_vec(type, s_shift); + LLVMValueRef shift = lp_build_const_int_vec(s_type, s_shift); stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, ""); stencil_shift = shift; /* used below */ } @@ -608,35 +589,85 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, } if (s_mask != 0xffffffff) { - LLVMValueRef mask = lp_build_const_int_vec(type, s_mask); + LLVMValueRef mask = lp_build_const_int_vec(s_type, s_mask); stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); } - lp_build_name(stencil_vals, "stencil"); + lp_build_name(stencil_vals, "s_dst"); } } - if (stencil[0].enabled) { + + if (face) { + LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0); + + /* front_facing = face != 0 ? ~0 : 0 */ + front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, ""); + front_facing = LLVMBuildSExt(builder, front_facing, + LLVMIntType(s_bld.type.length*s_bld.type.width), + ""); + front_facing = LLVMBuildBitCast(builder, front_facing, + s_bld.int_vec_type, ""); + } + /* convert scalar stencil refs into vectors */ - stencil_refs[0] = lp_build_broadcast_scalar(&bld, stencil_refs[0]); - stencil_refs[1] = lp_build_broadcast_scalar(&bld, stencil_refs[1]); + stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]); + stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]); - s_pass_mask = lp_build_stencil_test(&sbld, stencil, - stencil_refs, stencil_vals, face); + s_pass_mask = lp_build_stencil_test(&s_bld, stencil, + stencil_refs, stencil_vals, + front_facing); /* apply stencil-fail operator */ { - LLVMValueRef s_fail_mask = lp_build_andnot(&bld, orig_mask, s_pass_mask); - stencil_vals = lp_build_stencil_op(&sbld, stencil, S_FAIL_OP, + LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP, stencil_refs, stencil_vals, - s_fail_mask, face); + s_fail_mask, front_facing); } } if (depth->enabled) { + /* + * Convert fragment Z to the desired type, aligning the LSB to the right. + */ + + assert(z_type.width == z_src_type.width); + assert(z_type.length == z_src_type.length); + assert(lp_check_value(z_src_type, z_src)); + if (z_src_type.floating) { + /* + * Convert from floating point values + */ + + if (!z_type.floating) { + z_src = lp_build_clamped_float_to_unsigned_norm(builder, + z_src_type, + z_width, + z_src); + } + } else { + /* + * Convert from unsigned normalized values. + */ + + assert(!z_src_type.sign); + assert(!z_src_type.fixed); + assert(z_src_type.norm); + assert(!z_type.floating); + if (z_src_type.width > z_width) { + LLVMValueRef shift = lp_build_const_int_vec(z_src_type, + z_src_type.width - z_width); + z_src = LLVMBuildLShr(builder, z_src, shift, ""); + } + } + assert(lp_check_value(z_type, z_src)); + + lp_build_name(z_src, "z_src"); + /* compare src Z to dst Z, returning 'pass' mask */ - z_pass = lp_build_cmp(&bld, depth->func, z_src, z_dst); + z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst); if (!stencil[0].enabled) { /* We can potentially skip all remaining operations here, but only @@ -644,28 +675,28 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, * buffer values. Don't need to update Z buffer values. */ lp_build_mask_update(mask, z_pass); + + if (do_branch) { + lp_build_mask_check(mask); + do_branch = FALSE; + } } if (depth->writemask) { - LLVMValueRef zselectmask = mask->value; + LLVMValueRef zselectmask; /* mask off bits that failed Z test */ - zselectmask = LLVMBuildAnd(builder, zselectmask, z_pass, ""); + zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, ""); /* mask off bits that failed stencil test */ if (s_pass_mask) { zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, ""); } - /* if combined Z/stencil format, mask off the stencil bits */ - if (z_bitmask) { - zselectmask = LLVMBuildAnd(builder, zselectmask, z_bitmask, ""); - } - /* Mix the old and new Z buffer values. - * z_dst[i] = (zselectmask[i] & z_src[i]) | (~zselectmask[i] & z_dst[i]) + * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i] */ - z_dst = lp_build_select_bitwise(&bld, zselectmask, z_src, z_dst); + z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst); } if (stencil[0].enabled) { @@ -673,33 +704,35 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef z_fail_mask, z_pass_mask; /* apply Z-fail operator */ - z_fail_mask = lp_build_andnot(&bld, orig_mask, z_pass); - stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_FAIL_OP, + z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP, stencil_refs, stencil_vals, - z_fail_mask, face); + z_fail_mask, front_facing); /* apply Z-pass operator */ - z_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, z_pass, ""); - stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, + z_pass_mask = LLVMBuildAnd(z_bld.builder, orig_mask, z_pass, ""); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, - z_pass_mask, face); + z_pass_mask, front_facing); } } else { /* No depth test: apply Z-pass operator to stencil buffer values which * passed the stencil test. */ - s_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, s_pass_mask, ""); - stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, + s_pass_mask = LLVMBuildAnd(s_bld.builder, orig_mask, s_pass_mask, ""); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, - s_pass_mask, face); + s_pass_mask, front_facing); } - /* The Z bits are already in the right place but we may need to shift the - * stencil bits before ORing Z with Stencil to make the final pixel value. - */ + /* Put Z and ztencil bits in the right place */ + if (z_dst && z_shift) { + LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift); + z_dst = LLVMBuildShl(builder, z_dst, shift, ""); + } if (stencil_vals && stencil_shift) - stencil_vals = LLVMBuildShl(bld.builder, stencil_vals, + stencil_vals = LLVMBuildShl(s_bld.builder, stencil_vals, stencil_shift, ""); /* Finally, merge/store the z/stencil values */ @@ -707,13 +740,13 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, (stencil[0].enabled && stencil[0].writemask)) { if (z_dst && stencil_vals) - zs_dst = LLVMBuildOr(bld.builder, z_dst, stencil_vals, ""); + zs_dst = LLVMBuildOr(z_bld.builder, z_dst, stencil_vals, ""); else if (z_dst) zs_dst = z_dst; else zs_dst = stencil_vals; - LLVMBuildStore(builder, zs_dst, zs_dst_ptr); + *zs_value = zs_dst; } if (s_pass_mask) @@ -722,6 +755,47 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, if (depth->enabled && stencil[0].enabled) lp_build_mask_update(mask, z_pass); - if (counter) - lp_build_occlusion_count(builder, type, mask->value, counter); + if (do_branch) + lp_build_mask_check(mask); + +} + + +void +lp_build_depth_write(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value) +{ + zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, + LLVMPointerType(LLVMTypeOf(zs_value), 0), ""); + + LLVMBuildStore(builder, zs_value, zs_dst_ptr); +} + + +void +lp_build_deferred_depth_write(LLVMBuilderRef builder, + struct lp_type z_src_type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value) +{ + struct lp_type z_type; + struct lp_build_context z_bld; + LLVMValueRef z_dst; + + /* XXX: pointlessly redo type logic: + */ + z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); + lp_build_context_init(&z_bld, builder, z_type); + + zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, + LLVMPointerType(z_bld.vec_type, 0), ""); + + z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval"); + z_dst = lp_build_select(&z_bld, lp_build_mask_value(mask), zs_value, z_dst); + + LLVMBuildStore(builder, z_dst, zs_dst_ptr); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h index e257a5bd7d0..a54ef3a711e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h @@ -61,7 +61,27 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef zs_src, LLVMValueRef zs_dst_ptr, LLVMValueRef facing, - LLVMValueRef counter); + LLVMValueRef *zs_value, + boolean do_branch); +void +lp_build_depth_write(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value); + +void +lp_build_deferred_depth_write(LLVMBuilderRef builder, + struct lp_type z_src_type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value); + +void +lp_build_occlusion_count(LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef maskvalue, + LLVMValueRef counter); #endif /* !LP_BLD_DEPTH_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index 2a374f8c390..c9da8900d0c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -206,7 +206,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld, dadq2 = LLVMBuildFAdd(builder, dadq, dadq, ""); /* - * a = a0 + x * dadx + y * dady + * a = a0 + (x * dadx + y * dady) */ if (attrib == 0 && chan == 0) { @@ -219,11 +219,11 @@ coeffs_init(struct lp_build_interp_soa_context *bld, a = a0; if (interp != LP_INTERP_CONSTANT && interp != LP_INTERP_FACING) { - LLVMValueRef tmp; - tmp = LLVMBuildFMul(builder, bld->x, dadx, ""); - a = LLVMBuildFAdd(builder, a, tmp, ""); - tmp = LLVMBuildFMul(builder, bld->y, dady, ""); - a = LLVMBuildFAdd(builder, a, tmp, ""); + LLVMValueRef ax, ay, axy; + ax = LLVMBuildFMul(builder, bld->x, dadx, ""); + ay = LLVMBuildFMul(builder, bld->y, dady, ""); + axy = LLVMBuildFAdd(builder, ax, ay, ""); + a = LLVMBuildFAdd(builder, a, axy, ""); } } @@ -272,7 +272,10 @@ coeffs_init(struct lp_build_interp_soa_context *bld, * This is called when we move from one quad to the next. */ static void -attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) +attribs_update(struct lp_build_interp_soa_context *bld, + int quad_index, + int start, + int end) { struct lp_build_context *coeff_bld = &bld->coeff_bld; LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index); @@ -282,7 +285,7 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) assert(quad_index < 4); - for(attrib = 0; attrib < bld->num_attribs; ++attrib) { + for(attrib = start; attrib < end; ++attrib) { const unsigned mask = bld->mask[attrib]; const unsigned interp = bld->interp[attrib]; for(chan = 0; chan < NUM_CHANNELS; ++chan) { @@ -350,6 +353,14 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) } #endif + if (attrib == 0 && chan == 2) { + /* FIXME: Depth values can exceed 1.0, due to the fact that + * setup interpolation coefficients refer to (0,0) which causes + * precision loss. So we must clamp to 1.0 here to avoid artifacts + */ + a = lp_build_min(coeff_bld, a, coeff_bld->one); + } + attrib_name(a, attrib, chan, ""); } bld->attribs[attrib][chan] = a; @@ -434,8 +445,6 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, pos_init(bld, x0, y0); coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr); - - attribs_update(bld, 0); } @@ -443,10 +452,20 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, * Advance the position and inputs to the given quad within the block. */ void -lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, - int quad_index) +lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld, + int quad_index) +{ + assert(quad_index < 4); + + attribs_update(bld, quad_index, 1, bld->num_attribs); +} + +void +lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld, + int quad_index) { assert(quad_index < 4); - attribs_update(bld, quad_index); + attribs_update(bld, quad_index, 0, 1); } + diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index 37479fca9dc..a7ebdd1bfa2 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -113,7 +113,11 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, LLVMValueRef y); void -lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, +lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld, + int quad_index); + +void +lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld, int quad_index); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 8e6dfb293d0..e09ec504ab7 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -64,6 +64,11 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) elem_types[LP_JIT_TEXTURE_DATA] = LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0), LP_MAX_TEXTURE_LEVELS); + elem_types[LP_JIT_TEXTURE_MIN_LOD] = LLVMFloatType(); + elem_types[LP_JIT_TEXTURE_MAX_LOD] = LLVMFloatType(); + elem_types[LP_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType(); + elem_types[LP_JIT_TEXTURE_BORDER_COLOR] = + LLVMArrayType(LLVMFloatType(), 4); texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); @@ -88,6 +93,19 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, data, screen->target, texture_type, LP_JIT_TEXTURE_DATA); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, min_lod, + screen->target, texture_type, + LP_JIT_TEXTURE_MIN_LOD); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, max_lod, + screen->target, texture_type, + LP_JIT_TEXTURE_MAX_LOD); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, lod_bias, + screen->target, texture_type, + LP_JIT_TEXTURE_LOD_BIAS); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, border_color, + screen->target, texture_type, + LP_JIT_TEXTURE_BORDER_COLOR); + LP_CHECK_STRUCT_SIZE(struct lp_jit_texture, screen->target, texture_type); @@ -144,9 +162,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) void lp_jit_screen_cleanup(struct llvmpipe_screen *screen) { - if(screen->engine) - LLVMDisposeExecutionEngine(screen->engine); - if(screen->pass) LLVMDisposePassManager(screen->pass); } diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index c94189413ab..114f21f2d16 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -54,6 +54,11 @@ struct lp_jit_texture uint32_t row_stride[LP_MAX_TEXTURE_LEVELS]; uint32_t img_stride[LP_MAX_TEXTURE_LEVELS]; const void *data[LP_MAX_TEXTURE_LEVELS]; + /* sampler state, actually */ + float min_lod; + float max_lod; + float lod_bias; + float border_color[4]; }; @@ -65,6 +70,10 @@ enum { LP_JIT_TEXTURE_ROW_STRIDE, LP_JIT_TEXTURE_IMG_STRIDE, LP_JIT_TEXTURE_DATA, + LP_JIT_TEXTURE_MIN_LOD, + LP_JIT_TEXTURE_MAX_LOD, + LP_JIT_TEXTURE_LOD_BIAS, + LP_JIT_TEXTURE_BORDER_COLOR, LP_JIT_TEXTURE_NUM_FIELDS /* number of fields above */ }; @@ -135,7 +144,7 @@ typedef void (*lp_jit_frag_func)(const struct lp_jit_context *context, uint32_t x, uint32_t y, - float facing, + uint32_t facing, const void *a0, const void *dadx, const void *dady, diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c index ff0e207a54b..84c66dd36e8 100644 --- a/src/gallium/drivers/llvmpipe/lp_query.c +++ b/src/gallium/drivers/llvmpipe/lp_query.c @@ -92,8 +92,9 @@ llvmpipe_get_query_result(struct pipe_context *pipe, int i; if (!pq->fence) { - assert(0); /* query not in issued state */ - return FALSE; + /* no fence because there was no scene, so results is zero */ + *result = 0; + return TRUE; } if (!lp_fence_signalled(pq->fence)) { diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index d7e6415e139..d358a983943 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -211,8 +211,8 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { const struct lp_scene *scene = task->scene; - unsigned clear_value = arg.clear_zstencil.value; - unsigned clear_mask = arg.clear_zstencil.mask; + uint32_t clear_value = arg.clear_zstencil.value; + uint32_t clear_mask = arg.clear_zstencil.mask; const unsigned height = TILE_SIZE / TILE_VECTOR_HEIGHT; const unsigned width = TILE_SIZE * TILE_VECTOR_HEIGHT; const unsigned block_size = scene->zsbuf.blocksize; @@ -220,7 +220,8 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, uint8_t *dst; unsigned i, j; - LP_DBG(DEBUG_RAST, "%s 0x%x%x\n", __FUNCTION__, clear_value, clear_mask); + LP_DBG(DEBUG_RAST, "%s: value=0x%08x, mask=0x%08x\n", + __FUNCTION__, clear_value, clear_mask); /* * Clear the aera of the swizzled depth/depth buffer matching this tile, in @@ -232,16 +233,31 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, dst = task->depth_tile; + clear_value &= clear_mask; + switch (block_size) { case 1: + assert(clear_mask == 0xff); memset(dst, (uint8_t) clear_value, height * width); break; case 2: - for (i = 0; i < height; i++) { - uint16_t *row = (uint16_t *)dst; - for (j = 0; j < width; j++) - *row++ = (uint16_t) clear_value; - dst += dst_stride; + if (clear_mask == 0xffff) { + for (i = 0; i < height; i++) { + uint16_t *row = (uint16_t *)dst; + for (j = 0; j < width; j++) + *row++ = (uint16_t) clear_value; + dst += dst_stride; + } + } + else { + for (i = 0; i < height; i++) { + uint16_t *row = (uint16_t *)dst; + for (j = 0; j < width; j++) { + uint16_t tmp = ~clear_mask & *row; + *row++ = clear_value | tmp; + } + dst += dst_stride; + } } break; case 4: @@ -258,7 +274,7 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, uint32_t *row = (uint32_t *)dst; for (j = 0; j < width; j++) { uint32_t tmp = ~clear_mask & *row; - *row++ = (clear_value & clear_mask) | tmp; + *row++ = clear_value | tmp; } dst += dst_stride; } @@ -318,7 +334,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, { const struct lp_scene *scene = task->scene; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; - const struct lp_rast_state *state = inputs->state; + const struct lp_rast_state *state = task->state; struct lp_fragment_shader_variant *variant = state->variant; const unsigned tile_x = task->x, tile_y = task->y; unsigned x, y; @@ -349,10 +365,10 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, BEGIN_JIT_CALL(state); variant->jit_function[RAST_WHOLE]( &state->jit_context, tile_x + x, tile_y + y, - inputs->facing, - inputs->a0, - inputs->dadx, - inputs->dady, + inputs->frontfacing, + GET_A0(inputs), + GET_DADX(inputs), + GET_DADY(inputs), color, depth, 0xffff, @@ -398,7 +414,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, unsigned x, unsigned y, unsigned mask) { - const struct lp_rast_state *state = inputs->state; + const struct lp_rast_state *state = task->state; struct lp_fragment_shader_variant *variant = state->variant; const struct lp_scene *scene = task->scene; uint8_t *color[PIPE_MAX_COLOR_BUFS]; @@ -430,10 +446,10 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, BEGIN_JIT_CALL(state); variant->jit_function[RAST_EDGE_TEST](&state->jit_context, x, y, - inputs->facing, - inputs->a0, - inputs->dadx, - inputs->dady, + inputs->frontfacing, + GET_A0(inputs), + GET_DADX(inputs), + GET_DADY(inputs), color, depth, mask, @@ -474,6 +490,14 @@ lp_rast_end_query(struct lp_rasterizer_task *task, } +void +lp_rast_set_state(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + task->state = arg.state; +} + + /** * Set top row and left column of the tile's pixels to white. For debugging. @@ -581,10 +605,12 @@ static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] = lp_rast_triangle_8, lp_rast_triangle_3_4, lp_rast_triangle_3_16, + lp_rast_triangle_4_16, lp_rast_shade_tile, lp_rast_shade_tile_opaque, lp_rast_begin_query, lp_rast_end_query, + lp_rast_set_state, }; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index c55b97a9d13..a64c152cf83 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -78,30 +78,28 @@ struct lp_rast_state { * These pointers point into the bin data buffer. */ struct lp_rast_shader_inputs { - float facing; /** Positive for front-facing, negative for back-facing */ - unsigned disable:1; /** Partially binned, disable this command */ - unsigned opaque:1; /** Is opaque */ - - float (*a0)[4]; - float (*dadx)[4]; - float (*dady)[4]; - - const struct lp_rast_state *state; + unsigned frontfacing:1; /** True for front-facing */ + unsigned disable:1; /** Partially binned, disable this command */ + unsigned opaque:1; /** Is opaque */ + unsigned pad0:29; /* wasted space */ + unsigned stride; /* how much to advance data between a0, dadx, dady */ + unsigned pad2; /* wasted space */ + unsigned pad3; /* wasted space */ + /* followed by a0, dadx, dady and planes[] */ }; - +/* Note: the order of these values is important as they are loaded by + * sse code in rasterization: + */ struct lp_rast_plane { - /* one-pixel sized trivial accept offsets for each plane */ - int ei; - - /* one-pixel sized trivial reject offsets for each plane */ - int eo; - /* edge function values at minx,miny ?? */ int c; int dcdx; int dcdy; + + /* one-pixel sized trivial reject offsets for each plane */ + int eo; }; /** @@ -111,17 +109,24 @@ struct lp_rast_plane { * Objects of this type are put into the lp_setup_context::data buffer. */ struct lp_rast_triangle { - /* inputs for the shader */ - struct lp_rast_shader_inputs inputs; - #ifdef DEBUG float v[3][2]; + float pad0; + float pad1; #endif - struct lp_rast_plane plane[8]; /* NOTE: may allocate fewer planes */ + /* inputs for the shader */ + struct lp_rast_shader_inputs inputs; + /* planes are also allocated here */ }; +#define GET_A0(inputs) ((float (*)[4])((inputs)+1)) +#define GET_DADX(inputs) ((float (*)[4])((char *)((inputs) + 1) + (inputs)->stride)) +#define GET_DADY(inputs) ((float (*)[4])((char *)((inputs) + 1) + 2 * (inputs)->stride)) +#define GET_PLANES(tri) ((struct lp_rast_plane *)((char *)(&(tri)->inputs + 1) + 3 * (tri)->inputs.stride)) + + struct lp_rasterizer * lp_rast_create( unsigned num_threads ); @@ -149,9 +154,10 @@ union lp_rast_cmd_arg { const struct lp_rast_state *set_state; uint8_t clear_color[4]; struct { - unsigned value; - unsigned mask; + uint32_t value; + uint32_t mask; } clear_zstencil; + const struct lp_rast_state *state; struct lp_fence *fence; struct llvmpipe_query *query_obj; }; @@ -238,12 +244,14 @@ lp_rast_arg_null( void ) #define LP_RAST_OP_TRIANGLE_8 0x9 #define LP_RAST_OP_TRIANGLE_3_4 0xa #define LP_RAST_OP_TRIANGLE_3_16 0xb -#define LP_RAST_OP_SHADE_TILE 0xc -#define LP_RAST_OP_SHADE_TILE_OPAQUE 0xd -#define LP_RAST_OP_BEGIN_QUERY 0xe -#define LP_RAST_OP_END_QUERY 0xf - -#define LP_RAST_OP_MAX 0x10 +#define LP_RAST_OP_TRIANGLE_4_16 0xc +#define LP_RAST_OP_SHADE_TILE 0xd +#define LP_RAST_OP_SHADE_TILE_OPAQUE 0xe +#define LP_RAST_OP_BEGIN_QUERY 0xf +#define LP_RAST_OP_END_QUERY 0x10 +#define LP_RAST_OP_SET_STATE 0x11 + +#define LP_RAST_OP_MAX 0x12 #define LP_RAST_OP_MASK 0xff void diff --git a/src/gallium/drivers/llvmpipe/lp_rast_debug.c b/src/gallium/drivers/llvmpipe/lp_rast_debug.c index 9fc78645a3a..e2783aa5683 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_debug.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_debug.c @@ -12,6 +12,7 @@ static INLINE int u_bit_scan(unsigned *mask) struct tile { int coverage; int overdraw; + const struct lp_rast_state *state; char data[TILE_SIZE][TILE_SIZE]; }; @@ -42,10 +43,12 @@ static const char *cmd_names[LP_RAST_OP_MAX] = "triangle_8", "triangle_3_4", "triangle_3_16", + "triangle_4_16", "shade_tile", "shade_tile_opaque", "begin_query", "end_query", + "set_state", }; static const char *cmd_name(unsigned cmd) @@ -55,31 +58,31 @@ static const char *cmd_name(unsigned cmd) } static const struct lp_fragment_shader_variant * -get_variant( const struct cmd_block *block, - int k ) +get_variant( const struct lp_rast_state *state, + const struct cmd_block *block, + int k ) { if (block->cmd[k] == LP_RAST_OP_SHADE_TILE || - block->cmd[k] == LP_RAST_OP_SHADE_TILE_OPAQUE) - return block->arg[k].shade_tile->state->variant; - - if (block->cmd[k] == LP_RAST_OP_TRIANGLE_1 || + block->cmd[k] == LP_RAST_OP_SHADE_TILE_OPAQUE || + block->cmd[k] == LP_RAST_OP_TRIANGLE_1 || block->cmd[k] == LP_RAST_OP_TRIANGLE_2 || block->cmd[k] == LP_RAST_OP_TRIANGLE_3 || block->cmd[k] == LP_RAST_OP_TRIANGLE_4 || block->cmd[k] == LP_RAST_OP_TRIANGLE_5 || block->cmd[k] == LP_RAST_OP_TRIANGLE_6 || block->cmd[k] == LP_RAST_OP_TRIANGLE_7) - return block->arg[k].triangle.tri->inputs.state->variant; + return state->variant; return NULL; } static boolean -is_blend( const struct cmd_block *block, +is_blend( const struct lp_rast_state *state, + const struct cmd_block *block, int k ) { - const struct lp_fragment_shader_variant *variant = get_variant(block, k); + const struct lp_fragment_shader_variant *variant = get_variant(state, block, k); if (variant) return variant->key.blend.rt[0].blend_enable; @@ -92,6 +95,7 @@ is_blend( const struct cmd_block *block, static void debug_bin( const struct cmd_bin *bin ) { + const struct lp_rast_state *state; const struct cmd_block *head = bin->head; int i, j = 0; @@ -99,9 +103,12 @@ debug_bin( const struct cmd_bin *bin ) while (head) { for (i = 0; i < head->count; i++, j++) { + if (head->cmd[i] == LP_RAST_OP_SET_STATE) + state = head->arg[i].state; + debug_printf("%d: %s %s\n", j, cmd_name(head->cmd[i]), - is_blend(head, i) ? "blended" : ""); + is_blend(state, head, i) ? "blended" : ""); } head = head->next; } @@ -133,7 +140,7 @@ debug_shade_tile(int x, int y, char val) { const struct lp_rast_shader_inputs *inputs = arg.shade_tile; - boolean blend = inputs->state->variant->key.blend.rt[0].blend_enable; + boolean blend = tile->state->variant->key.blend.rt[0].blend_enable; unsigned i,j; if (inputs->disable) @@ -171,11 +178,12 @@ debug_triangle(int tilex, int tiley, { const struct lp_rast_triangle *tri = arg.triangle.tri; unsigned plane_mask = arg.triangle.plane_mask; + const struct lp_rast_plane *tri_plane = GET_PLANES(tri); struct lp_rast_plane plane[8]; int x, y; int count = 0; unsigned i, nr_planes = 0; - boolean blend = tri->inputs.state->variant->key.blend.rt[0].blend_enable; + boolean blend = tile->state->variant->key.blend.rt[0].blend_enable; if (tri->inputs.disable) { /* This triangle was partially binned and has been disabled */ @@ -183,7 +191,7 @@ debug_triangle(int tilex, int tiley, } while (plane_mask) { - plane[nr_planes] = tri->plane[u_bit_scan(&plane_mask)]; + plane[nr_planes] = tri_plane[u_bit_scan(&plane_mask)]; plane[nr_planes].c = (plane[nr_planes].c + plane[nr_planes].dcdy * tiley - plane[nr_planes].dcdx * tilex); @@ -235,12 +243,15 @@ do_debug_bin( struct tile *tile, for (block = bin->head; block; block = block->next) { for (k = 0; k < block->count; k++, j++) { - boolean blend = is_blend(block, k); + boolean blend = is_blend(tile->state, block, k); char val = get_label(j); int count = 0; if (print_cmds) debug_printf("%c: %15s", val, cmd_name(block->cmd[k])); + + if (block->cmd[k] == LP_RAST_OP_SET_STATE) + tile->state = block->arg[k].state; if (block->cmd[k] == LP_RAST_OP_CLEAR_COLOR || block->cmd[k] == LP_RAST_OP_CLEAR_ZSTENCIL) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 7370119e966..b30408f097b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -77,6 +77,7 @@ struct cmd_bin; struct lp_rasterizer_task { const struct cmd_bin *bin; + const struct lp_rast_state *state; struct lp_scene *scene; unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */ @@ -244,7 +245,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, unsigned x, unsigned y ) { const struct lp_scene *scene = task->scene; - const struct lp_rast_state *state = inputs->state; + const struct lp_rast_state *state = task->state; struct lp_fragment_shader_variant *variant = state->variant; uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; @@ -260,10 +261,10 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, BEGIN_JIT_CALL(state); variant->jit_function[RAST_WHOLE]( &state->jit_context, x, y, - inputs->facing, - inputs->a0, - inputs->dadx, - inputs->dady, + inputs->frontfacing, + GET_A0(inputs), + GET_DADX(inputs), + GET_DADY(inputs), color, depth, 0xffff, @@ -293,6 +294,14 @@ void lp_rast_triangle_3_4(struct lp_rasterizer_task *, void lp_rast_triangle_3_16( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); + +void lp_rast_triangle_4_16( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + +void +lp_rast_set_state(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg); + void lp_debug_bin( const struct cmd_bin *bin ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index a1f309d4b01..042c315635e 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -123,6 +123,16 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task, } void +lp_rast_triangle_4_16(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + union lp_rast_cmd_arg arg2; + arg2.triangle.tri = arg.triangle.tri; + arg2.triangle.plane_mask = (1<<4)-1; + lp_rast_triangle_3(task, arg2); +} + +void lp_rast_triangle_3_4(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { @@ -230,144 +240,207 @@ sign_bits4(const __m128i *cstep, int cdiff) } -/* Special case for 3 plane triangle which is contained entirely - * within a 16x16 block. - */ +#define NR_PLANES 3 + + + + + + + void lp_rast_triangle_3_16(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { const struct lp_rast_triangle *tri = arg.triangle.tri; - const struct lp_rast_plane *plane = tri->plane; - unsigned mask = arg.triangle.plane_mask; - const int x = task->x + (mask & 0xff); - const int y = task->y + (mask >> 8); - unsigned outmask, inmask, partmask, partial_mask; - unsigned j; - __m128i cstep4[3][4]; - - outmask = 0; /* outside one or more trivial reject planes */ - partmask = 0; /* outside one or more trivial accept planes */ - - for (j = 0; j < 3; j++) { - const int dcdx = -plane[j].dcdx * 4; - const int dcdy = plane[j].dcdy * 4; - __m128i xdcdy = _mm_set1_epi32(dcdy); - - cstep4[j][0] = _mm_setr_epi32(0, dcdx, dcdx*2, dcdx*3); - cstep4[j][1] = _mm_add_epi32(cstep4[j][0], xdcdy); - cstep4[j][2] = _mm_add_epi32(cstep4[j][1], xdcdy); - cstep4[j][3] = _mm_add_epi32(cstep4[j][2], xdcdy); - - { - const int c = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; - const int cox = plane[j].eo * 4; - const int cio = plane[j].ei * 4 - 1; - - outmask |= sign_bits4(cstep4[j], c + cox); - partmask |= sign_bits4(cstep4[j], c + cio); - } - } + const struct lp_rast_plane *plane = GET_PLANES(tri); + int x = (arg.triangle.plane_mask & 0xff) + task->x; + int y = (arg.triangle.plane_mask >> 8) + task->y; + unsigned i, j; + + struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16]; + unsigned nr = 0; + + __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* c, dcdx, dcdy, eo */ + __m128i p1 = _mm_load_si128((__m128i *)&plane[1]); /* c, dcdx, dcdy, eo */ + __m128i p2 = _mm_load_si128((__m128i *)&plane[2]); /* c, dcdx, dcdy, eo */ + __m128i zero = _mm_setzero_si128(); + + __m128i c; + __m128i dcdx; + __m128i dcdy; + __m128i rej4; + + __m128i dcdx2; + __m128i dcdx3; + + __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */ + __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */ + __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */ + __m128i unused; + + transpose4_epi32(&p0, &p1, &p2, &zero, + &c, &dcdx, &dcdy, &rej4); + + /* Adjust dcdx; + */ + dcdx = _mm_sub_epi32(zero, dcdx); - if (outmask == 0xffff) - return; + c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x))); + c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y))); + rej4 = _mm_slli_epi32(rej4, 2); - /* Mask of sub-blocks which are inside all trivial accept planes: - */ - inmask = ~partmask & 0xffff; + dcdx2 = _mm_add_epi32(dcdx, dcdx); + dcdx3 = _mm_add_epi32(dcdx2, dcdx); - /* Mask of sub-blocks which are inside all trivial reject planes, - * but outside at least one trivial accept plane: - */ - partial_mask = partmask & ~outmask; + transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3, + &span_0, &span_1, &span_2, &unused); - assert((partial_mask & inmask) == 0); + for (i = 0; i < 4; i++) { + __m128i cx = c; - /* Iterate over partials: - */ - while (partial_mask) { - int i = ffs(partial_mask) - 1; - int ix = (i & 3) * 4; - int iy = (i >> 2) * 4; - int px = x + ix; - int py = y + iy; - unsigned mask = 0xffff; - - partial_mask &= ~(1 << i); - - for (j = 0; j < 3; j++) { - const int cx = (plane[j].c - - plane[j].dcdx * px - + plane[j].dcdy * py) * 4; - - mask &= ~sign_bits4(cstep4[j], cx); - } + for (j = 0; j < 4; j++) { + __m128i c4rej = _mm_add_epi32(cx, rej4); + __m128i rej_masks = _mm_srai_epi32(c4rej, 31); - if (mask) - lp_rast_shade_quads_mask(task, &tri->inputs, px, py, mask); - } + /* if (is_zero(rej_masks)) */ + if (_mm_movemask_epi8(rej_masks) == 0) { + __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(cx, 0), span_0); + __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(cx, 1), span_1); + __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(cx, 2), span_2); - /* Iterate over fulls: - */ - while (inmask) { - int i = ffs(inmask) - 1; - int ix = (i & 3) * 4; - int iy = (i >> 2) * 4; - int px = x + ix; - int py = y + iy; + __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0); + + __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0)); + __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1)); + __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2)); + + __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1); + __m128i c_01 = _mm_packs_epi32(c_0, c_1); + + __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0)); + __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1)); + __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2)); + + __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2); - inmask &= ~(1 << i); + __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0)); + __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1)); + __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2)); - block_full_4(task, tri, px, py); + __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3); + __m128i c_23 = _mm_packs_epi32(c_2, c_3); + __m128i c_0123 = _mm_packs_epi16(c_01, c_23); + + unsigned mask = _mm_movemask_epi8(c_0123); + + out[nr].i = i; + out[nr].j = j; + out[nr].mask = mask; + if (mask != 0xffff) + nr++; + } + cx = _mm_add_epi32(cx, _mm_slli_epi32(dcdx, 2)); + } + + c = _mm_add_epi32(c, _mm_slli_epi32(dcdy, 2)); } + + for (i = 0; i < nr; i++) + lp_rast_shade_quads_mask(task, + &tri->inputs, + x + 4 * out[i].j, + y + 4 * out[i].i, + 0xffff & ~out[i].mask); } + + + void lp_rast_triangle_3_4(struct lp_rasterizer_task *task, - const union lp_rast_cmd_arg arg) + const union lp_rast_cmd_arg arg) { const struct lp_rast_triangle *tri = arg.triangle.tri; - const struct lp_rast_plane *plane = tri->plane; - unsigned mask = arg.triangle.plane_mask; - const int x = task->x + (mask & 0xff); - const int y = task->y + (mask >> 8); - unsigned j; - - /* Iterate over partials: + const struct lp_rast_plane *plane = GET_PLANES(tri); + int x = (arg.triangle.plane_mask & 0xff) + task->x; + int y = (arg.triangle.plane_mask >> 8) + task->y; + + __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* c, dcdx, dcdy, eo */ + __m128i p1 = _mm_load_si128((__m128i *)&plane[1]); /* c, dcdx, dcdy, eo */ + __m128i p2 = _mm_load_si128((__m128i *)&plane[2]); /* c, dcdx, dcdy, eo */ + __m128i zero = _mm_setzero_si128(); + + __m128i c; + __m128i dcdx; + __m128i dcdy; + + __m128i dcdx2; + __m128i dcdx3; + + __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */ + __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */ + __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */ + __m128i unused; + + transpose4_epi32(&p0, &p1, &p2, &zero, + &c, &dcdx, &dcdy, &unused); + + /* Adjust dcdx; */ + dcdx = _mm_sub_epi32(zero, dcdx); + + c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x))); + c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y))); + + dcdx2 = _mm_add_epi32(dcdx, dcdx); + dcdx3 = _mm_add_epi32(dcdx2, dcdx); + + transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3, + &span_0, &span_1, &span_2, &unused); + + { - unsigned mask = 0xffff; + __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(c, 0), span_0); + __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(c, 1), span_1); + __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(c, 2), span_2); + + __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0); - for (j = 0; j < 3; j++) { - const int cx = (plane[j].c - - plane[j].dcdx * x - + plane[j].dcdy * y); + __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0)); + __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1)); + __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2)); - const int dcdx = -plane[j].dcdx; - const int dcdy = plane[j].dcdy; - __m128i xdcdy = _mm_set1_epi32(dcdy); + __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1); + __m128i c_01 = _mm_packs_epi32(c_0, c_1); - __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3); - __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy); - __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy); - __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy); + __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0)); + __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1)); + __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2)); - __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1); - __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3); - __m128i result = _mm_packs_epi16(cstep01, cstep23); + __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2); - /* Extract the sign bits - */ - mask &= ~_mm_movemask_epi8(result); - } + __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0)); + __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1)); + __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2)); - if (mask) - lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask); + __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3); + __m128i c_23 = _mm_packs_epi32(c_2, c_3); + __m128i c_0123 = _mm_packs_epi16(c_01, c_23); + + unsigned mask = _mm_movemask_epi8(c_0123); + + if (mask != 0xffff) + lp_rast_shade_quads_mask(task, + &tri->inputs, + x, + y, + 0xffff & ~mask); } } - +#undef NR_PLANES #endif @@ -383,10 +456,13 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task, #define TAG(x) x##_3 #define NR_PLANES 3 +/*#define TRI_4 lp_rast_triangle_3_4*/ +/*#define TRI_16 lp_rast_triangle_3_16*/ #include "lp_rast_tri_tmp.h" #define TAG(x) x##_4 #define NR_PLANES 4 +#define TRI_16 lp_rast_triangle_4_16 #include "lp_rast_tri_tmp.h" #define TAG(x) x##_5 diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index 9830a43ba55..4825d651c04 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -82,7 +82,8 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, const int dcdx = -plane[j].dcdx * 4; const int dcdy = plane[j].dcdy * 4; const int cox = plane[j].eo * 4; - const int cio = plane[j].ei * 4 - 1; + const int ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo; + const int cio = ei * 4 - 1; build_masks(c[j] + cox, cio - cox, @@ -156,6 +157,7 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, { const struct lp_rast_triangle *tri = arg.triangle.tri; unsigned plane_mask = arg.triangle.plane_mask; + const struct lp_rast_plane *tri_plane = GET_PLANES(tri); const int x = task->x, y = task->y; struct lp_rast_plane plane[NR_PLANES]; int c[NR_PLANES]; @@ -172,7 +174,7 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, while (plane_mask) { int i = ffs(plane_mask) - 1; - plane[j] = tri->plane[i]; + plane[j] = tri_plane[i]; plane_mask &= ~(1 << i); c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; @@ -180,7 +182,8 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, const int dcdx = -plane[j].dcdx * 16; const int dcdy = plane[j].dcdy * 16; const int cox = plane[j].eo * 16; - const int cio = plane[j].ei * 16 - 1; + const int ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo; + const int cio = ei * 16 - 1; build_masks(c[j] + cox, cio - cox, @@ -245,6 +248,133 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, } } +#if defined(PIPE_ARCH_SSE) && defined(TRI_16) +/* XXX: special case this when intersection is not required. + * - tile completely within bbox, + * - bbox completely within tile. + */ +void +TRI_16(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_rast_triangle *tri = arg.triangle.tri; + const struct lp_rast_plane *plane = GET_PLANES(tri); + unsigned mask = arg.triangle.plane_mask; + unsigned outmask, partial_mask; + unsigned j; + __m128i cstep4[NR_PLANES][4]; + + int x = (mask & 0xff); + int y = (mask >> 8); + + outmask = 0; /* outside one or more trivial reject planes */ + + x += task->x; + y += task->y; + + for (j = 0; j < NR_PLANES; j++) { + const int dcdx = -plane[j].dcdx * 4; + const int dcdy = plane[j].dcdy * 4; + __m128i xdcdy = _mm_set1_epi32(dcdy); + + cstep4[j][0] = _mm_setr_epi32(0, dcdx, dcdx*2, dcdx*3); + cstep4[j][1] = _mm_add_epi32(cstep4[j][0], xdcdy); + cstep4[j][2] = _mm_add_epi32(cstep4[j][1], xdcdy); + cstep4[j][3] = _mm_add_epi32(cstep4[j][2], xdcdy); + + { + const int c = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; + const int cox = plane[j].eo * 4; + + outmask |= sign_bits4(cstep4[j], c + cox); + } + } + + if (outmask == 0xffff) + return; + + + /* Mask of sub-blocks which are inside all trivial reject planes, + * but outside at least one trivial accept plane: + */ + partial_mask = 0xffff & ~outmask; + + /* Iterate over partials: + */ + while (partial_mask) { + int i = ffs(partial_mask) - 1; + int ix = (i & 3) * 4; + int iy = (i >> 2) * 4; + int px = x + ix; + int py = y + iy; + unsigned mask = 0xffff; + + partial_mask &= ~(1 << i); + + for (j = 0; j < NR_PLANES; j++) { + const int cx = (plane[j].c - 1 + - plane[j].dcdx * px + + plane[j].dcdy * py) * 4; + + mask &= ~sign_bits4(cstep4[j], cx); + } + + if (mask) + lp_rast_shade_quads_mask(task, &tri->inputs, px, py, mask); + } +} +#endif + +#if defined(PIPE_ARCH_SSE) && defined(TRI_4) +void +TRI_4(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_rast_triangle *tri = arg.triangle.tri; + const struct lp_rast_plane *plane = GET_PLANES(tri); + unsigned mask = arg.triangle.plane_mask; + const int x = task->x + (mask & 0xff); + const int y = task->y + (mask >> 8); + unsigned j; + + /* Iterate over partials: + */ + { + unsigned mask = 0xffff; + + for (j = 0; j < NR_PLANES; j++) { + const int cx = (plane[j].c + - plane[j].dcdx * x + + plane[j].dcdy * y); + + const int dcdx = -plane[j].dcdx; + const int dcdy = plane[j].dcdy; + __m128i xdcdy = _mm_set1_epi32(dcdy); + + __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3); + __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy); + __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy); + __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy); + + __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1); + __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3); + __m128i result = _mm_packs_epi16(cstep01, cstep23); + + /* Extract the sign bits + */ + mask &= ~_mm_movemask_epi8(result); + } + + if (mask) + lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask); + } +} +#endif + + + #undef TAG +#undef TRI_4 +#undef TRI_16 #undef NR_PLANES diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 8b504f23a33..a4fdf7cff36 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -203,7 +203,9 @@ lp_scene_end_rasterization(struct lp_scene *scene ) for (i = 0; i < scene->tiles_x; i++) { for (j = 0; j < scene->tiles_y; j++) { struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); - bin->head = bin->tail = NULL; + bin->head = NULL; + bin->tail = NULL; + bin->last_state = NULL; } } diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index dbef7692e42..622c522f11a 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -41,6 +41,7 @@ #include "lp_debug.h" struct lp_scene_queue; +struct lp_rast_state; /* We're limited to 2K by 2K for 32bit fixed point rasterization. * Will need a 64-bit version for larger framebuffers. @@ -94,6 +95,7 @@ struct data_block { struct cmd_bin { ushort x; ushort y; + const struct lp_rast_state *last_state; /* most recent state set in bin */ struct cmd_block *head; struct cmd_block *tail; }; @@ -297,7 +299,7 @@ lp_scene_bin_command( struct lp_scene *scene, assert(x < scene->tiles_x); assert(y < scene->tiles_y); - assert(cmd <= LP_RAST_OP_END_QUERY); + assert(cmd < LP_RAST_OP_MAX); if (tail == NULL || tail->count == CMD_BLOCK_MAX) { tail = lp_scene_new_cmd_block( scene, bin ); @@ -318,6 +320,30 @@ lp_scene_bin_command( struct lp_scene *scene, } +static INLINE boolean +lp_scene_bin_cmd_with_state( struct lp_scene *scene, + unsigned x, unsigned y, + const struct lp_rast_state *state, + unsigned cmd, + union lp_rast_cmd_arg arg ) +{ + struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); + + if (state != bin->last_state) { + bin->last_state = state; + if (!lp_scene_bin_command(scene, x, y, + LP_RAST_OP_SET_STATE, + lp_rast_arg_state(state))) + return FALSE; + } + + if (!lp_scene_bin_command( scene, x, y, cmd, arg )) + return FALSE; + + return TRUE; +} + + /* Add a command to all active bins. */ static INLINE boolean diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 3854fd70af7..6118434d3d3 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -56,7 +56,7 @@ #include "draw/draw_vbuf.h" -static void set_scene_state( struct lp_setup_context *, enum setup_state, +static boolean set_scene_state( struct lp_setup_context *, enum setup_state, const char *reason); static boolean try_update_scene_state( struct lp_setup_context *setup ); @@ -167,7 +167,7 @@ lp_setup_rasterize_scene( struct lp_setup_context *setup ) -static void +static boolean begin_binning( struct lp_setup_context *setup ) { struct lp_scene *scene = setup->scene; @@ -181,6 +181,8 @@ begin_binning( struct lp_setup_context *setup ) /* Always create a fence: */ scene->fence = lp_fence_create(MAX2(1, setup->num_threads)); + if (!scene->fence) + return FALSE; /* Initialize the bin flags and x/y coords: */ @@ -192,7 +194,8 @@ begin_binning( struct lp_setup_context *setup ) } ok = try_update_scene_state(setup); - assert(ok); + if (!ok) + return FALSE; if (setup->fb.zsbuf && ((setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) && @@ -208,7 +211,8 @@ begin_binning( struct lp_setup_context *setup ) ok = lp_scene_bin_everywhere( scene, LP_RAST_OP_CLEAR_COLOR, setup->clear.color ); - assert(ok); + if (!ok) + return FALSE; } } @@ -216,12 +220,14 @@ begin_binning( struct lp_setup_context *setup ) if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) { if (!need_zsload) scene->has_depthstencil_clear = TRUE; + ok = lp_scene_bin_everywhere( scene, LP_RAST_OP_CLEAR_ZSTENCIL, lp_rast_arg_clearzs( setup->clear.zsvalue, setup->clear.zsmask)); - assert(ok); + if (!ok) + return FALSE; } } @@ -229,15 +235,16 @@ begin_binning( struct lp_setup_context *setup ) ok = lp_scene_bin_everywhere( scene, LP_RAST_OP_BEGIN_QUERY, lp_rast_arg_query(setup->active_query) ); - assert(ok); + if (!ok) + return FALSE; } - setup->clear.flags = 0; setup->clear.zsmask = 0; setup->clear.zsvalue = 0; LP_DBG(DEBUG_SETUP, "%s done\n", __FUNCTION__); + return TRUE; } @@ -246,12 +253,12 @@ begin_binning( struct lp_setup_context *setup ) * * TODO: fast path for fullscreen clears and no triangles. */ -static void +static boolean execute_clears( struct lp_setup_context *setup ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - begin_binning( setup ); + return begin_binning( setup ); } const char *states[] = { @@ -262,7 +269,7 @@ const char *states[] = { }; -static void +static boolean set_scene_state( struct lp_setup_context *setup, enum setup_state new_state, const char *reason) @@ -270,7 +277,7 @@ set_scene_state( struct lp_setup_context *setup, unsigned old_state = setup->state; if (old_state == new_state) - return; + return TRUE; if (LP_DEBUG & DEBUG_SCENE) { debug_printf("%s old %s new %s%s%s\n", @@ -294,12 +301,14 @@ set_scene_state( struct lp_setup_context *setup, break; case SETUP_ACTIVE: - begin_binning( setup ); + if (!begin_binning( setup )) + goto fail; break; case SETUP_FLUSHED: if (old_state == SETUP_CLEARED) - execute_clears( setup ); + if (!execute_clears( setup )) + goto fail; lp_setup_rasterize_scene( setup ); assert(setup->scene == NULL); @@ -307,9 +316,21 @@ set_scene_state( struct lp_setup_context *setup, default: assert(0 && "invalid setup state mode"); + goto fail; } setup->state = new_state; + return TRUE; + +fail: + if (setup->scene) { + lp_scene_end_rasterization(setup->scene); + setup->scene = NULL; + } + + setup->state = SETUP_FLUSHED; + lp_setup_reset( setup ); + return FALSE; } @@ -377,16 +398,19 @@ lp_setup_try_clear( struct lp_setup_context *setup, } if (flags & PIPE_CLEAR_DEPTHSTENCIL) { - unsigned zmask = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0; - unsigned smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0; + uint32_t zmask = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0; + uint32_t smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0; zsvalue = util_pack_z_stencil(setup->fb.zsbuf->format, depth, stencil); - zsmask = util_pack_uint_z_stencil(setup->fb.zsbuf->format, + + zsmask = util_pack_mask_z_stencil(setup->fb.zsbuf->format, zmask, smask); + + zsvalue &= zsmask; } if (setup->state == SETUP_ACTIVE) { @@ -431,7 +455,7 @@ lp_setup_try_clear( struct lp_setup_context *setup, if (flags & PIPE_CLEAR_COLOR) { memcpy(setup->clear.color.clear_color, &color_arg, - sizeof color_arg); + sizeof setup->clear.color.clear_color); } } @@ -490,12 +514,14 @@ void lp_setup_set_point_state( struct lp_setup_context *setup, float point_size, boolean point_size_per_vertex, - uint sprite) + uint sprite_coord_enable, + uint sprite_coord_origin) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); setup->point_size = point_size; - setup->sprite = sprite; + setup->sprite_coord_enable = sprite_coord_enable; + setup->sprite_coord_origin = sprite_coord_origin; setup->point_size_per_vertex = point_size_per_vertex; } @@ -624,7 +650,7 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { struct pipe_sampler_view *view = i < num ? views[i] : NULL; - if(view) { + if (view) { struct pipe_resource *tex = view->texture; struct llvmpipe_resource *lp_tex = llvmpipe_resource(tex); struct lp_jit_texture *jit_tex; @@ -683,6 +709,38 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, /** + * Called during state validation when LP_NEW_SAMPLER is set. + */ +void +lp_setup_set_fragment_sampler_state(struct lp_setup_context *setup, + unsigned num, + const struct pipe_sampler_state **samplers) +{ + unsigned i; + + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + assert(num <= PIPE_MAX_SAMPLERS); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + const struct pipe_sampler_state *sampler = i < num ? samplers[i] : NULL; + + if (sampler) { + struct lp_jit_texture *jit_tex; + jit_tex = &setup->fs.current.jit_context.textures[i]; + + jit_tex->min_lod = sampler->min_lod; + jit_tex->max_lod = sampler->max_lod; + jit_tex->lod_bias = sampler->lod_bias; + COPY_4V(jit_tex->border_color, sampler->border_color); + } + } + + setup->dirty |= LP_SETUP_NEW_FS; +} + + +/** * Is the given texture referenced by any scene? * Note: we have to check all scenes including any scenes currently * being rendered and the current scene being built. @@ -839,7 +897,7 @@ try_update_scene_state( struct lp_setup_context *setup ) return TRUE; } -void +boolean lp_setup_update_state( struct lp_setup_context *setup, boolean update_scene ) { @@ -870,20 +928,38 @@ lp_setup_update_state( struct lp_setup_context *setup, setup->setup.variant->key.size) == 0); } - if (update_scene) - set_scene_state( setup, SETUP_ACTIVE, __FUNCTION__ ); + if (update_scene) { + if (!set_scene_state( setup, SETUP_ACTIVE, __FUNCTION__ )) + return FALSE; + } /* Only call into update_scene_state() if we already have a * scene: */ if (update_scene && setup->scene) { assert(setup->state == SETUP_ACTIVE); - if (!try_update_scene_state(setup)) { - lp_setup_flush_and_restart(setup); - if (!try_update_scene_state(setup)) - assert(0); - } + + if (try_update_scene_state(setup)) + return TRUE; + + /* Update failed, try to restart the scene. + * + * Cannot call lp_setup_flush_and_restart() directly here + * because of potential recursion. + */ + if (!set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__)) + return FALSE; + + if (!set_scene_state(setup, SETUP_ACTIVE, __FUNCTION__)) + return FALSE; + + if (!setup->scene) + return FALSE; + + return try_update_scene_state(setup); } + + return TRUE; } @@ -987,12 +1063,12 @@ lp_setup_begin_query(struct lp_setup_context *setup, LP_RAST_OP_BEGIN_QUERY, lp_rast_arg_query(pq))) { - lp_setup_flush_and_restart(setup); + if (!lp_setup_flush_and_restart(setup)) + return; if (!lp_scene_bin_everywhere(setup->scene, LP_RAST_OP_BEGIN_QUERY, lp_rast_arg_query(pq))) { - assert(0); return; } } @@ -1036,14 +1112,20 @@ lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq) } -void +boolean lp_setup_flush_and_restart(struct lp_setup_context *setup) { if (0) debug_printf("%s\n", __FUNCTION__); assert(setup->state == SETUP_ACTIVE); - set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__); - lp_setup_update_state(setup, TRUE); + + if (!set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__)) + return FALSE; + + if (!lp_setup_update_state(setup, TRUE)) + return FALSE; + + return TRUE; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 19078ebbcb4..ebb18f81344 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -85,7 +85,8 @@ void lp_setup_set_point_state( struct lp_setup_context *setup, float point_size, boolean point_size_per_vertex, - uint sprite); + uint sprite_coord_enable, + uint sprite_coord_origin); void lp_setup_set_setup_variant( struct lp_setup_context *setup, @@ -121,6 +122,11 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, unsigned num, struct pipe_sampler_view **views); +void +lp_setup_set_fragment_sampler_state(struct lp_setup_context *setup, + unsigned num, + const struct pipe_sampler_state **samplers); + unsigned lp_setup_is_resource_referenced( const struct lp_setup_context *setup, const struct pipe_resource *texture ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index ff3b69afa83..dc2533bedc5 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -76,7 +76,7 @@ struct lp_setup_context uint prim; uint vertex_size; uint nr_vertices; - uint sprite; + uint sprite_coord_enable, sprite_coord_origin; uint vertex_buffer_size; void *vertex_buffer; @@ -164,12 +164,12 @@ void lp_setup_choose_point( struct lp_setup_context *setup ); void lp_setup_init_vbuf(struct lp_setup_context *setup); -void lp_setup_update_state( struct lp_setup_context *setup, +boolean lp_setup_update_state( struct lp_setup_context *setup, boolean update_scene); void lp_setup_destroy( struct lp_setup_context *setup ); -void lp_setup_flush_and_restart(struct lp_setup_context *setup); +boolean lp_setup_flush_and_restart(struct lp_setup_context *setup); void lp_setup_print_triangle(struct lp_setup_context *setup, @@ -195,6 +195,4 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, const struct u_rect *bbox, int nr_planes ); -void lp_setup_flush_and_restart(struct lp_setup_context *setup); - #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index 928ffdc5cb5..827413bb33e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -47,6 +47,10 @@ struct lp_line_info { const float (*v1)[4]; const float (*v2)[4]; + + float (*a0)[4]; + float (*dadx)[4]; + float (*dady)[4]; }; @@ -54,14 +58,14 @@ struct lp_line_info { * Compute a0 for a constant-valued coefficient (GL_FLAT shading). */ static void constant_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, + struct lp_line_info *info, unsigned slot, const float value, unsigned i ) { - tri->inputs.a0[slot][i] = value; - tri->inputs.dadx[slot][i] = 0.0f; - tri->inputs.dady[slot][i] = 0.0f; + info->a0[slot][i] = value; + info->dadx[slot][i] = 0.0f; + info->dady[slot][i] = 0.0f; } @@ -70,7 +74,6 @@ static void constant_coef( struct lp_setup_context *setup, * for a triangle. */ static void linear_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, struct lp_line_info *info, unsigned slot, unsigned vert_attr, @@ -83,10 +86,10 @@ static void linear_coef( struct lp_setup_context *setup, float dadx = da21 * info->dx * info->oneoverarea; float dady = da21 * info->dy * info->oneoverarea; - tri->inputs.dadx[slot][i] = dadx; - tri->inputs.dady[slot][i] = dady; + info->dadx[slot][i] = dadx; + info->dady[slot][i] = dady; - tri->inputs.a0[slot][i] = (a1 - + info->a0[slot][i] = (a1 - (dadx * (info->v1[0][0] - setup->pixel_offset) + dady * (info->v1[0][1] - setup->pixel_offset))); } @@ -101,7 +104,6 @@ static void linear_coef( struct lp_setup_context *setup, * divide the interpolated value by the interpolated W at that fragment. */ static void perspective_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, struct lp_line_info *info, unsigned slot, unsigned vert_attr, @@ -116,43 +118,42 @@ static void perspective_coef( struct lp_setup_context *setup, float dadx = da21 * info->dx * info->oneoverarea; float dady = da21 * info->dy * info->oneoverarea; - tri->inputs.dadx[slot][i] = dadx; - tri->inputs.dady[slot][i] = dady; + info->dadx[slot][i] = dadx; + info->dady[slot][i] = dady; - tri->inputs.a0[slot][i] = (a1 - - (dadx * (info->v1[0][0] - setup->pixel_offset) + - dady * (info->v1[0][1] - setup->pixel_offset))); + info->a0[slot][i] = (a1 - + (dadx * (info->v1[0][0] - setup->pixel_offset) + + dady * (info->v1[0][1] - setup->pixel_offset))); } static void setup_fragcoord_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, struct lp_line_info *info, unsigned slot, unsigned usage_mask) { /*X*/ if (usage_mask & TGSI_WRITEMASK_X) { - tri->inputs.a0[slot][0] = 0.0; - tri->inputs.dadx[slot][0] = 1.0; - tri->inputs.dady[slot][0] = 0.0; + info->a0[slot][0] = 0.0; + info->dadx[slot][0] = 1.0; + info->dady[slot][0] = 0.0; } /*Y*/ if (usage_mask & TGSI_WRITEMASK_Y) { - tri->inputs.a0[slot][1] = 0.0; - tri->inputs.dadx[slot][1] = 0.0; - tri->inputs.dady[slot][1] = 1.0; + info->a0[slot][1] = 0.0; + info->dadx[slot][1] = 0.0; + info->dady[slot][1] = 1.0; } /*Z*/ if (usage_mask & TGSI_WRITEMASK_Z) { - linear_coef(setup, tri, info, slot, 0, 2); + linear_coef(setup, info, slot, 0, 2); } /*W*/ if (usage_mask & TGSI_WRITEMASK_W) { - linear_coef(setup, tri, info, slot, 0, 3); + linear_coef(setup, info, slot, 0, 3); } } @@ -160,7 +161,6 @@ setup_fragcoord_coef( struct lp_setup_context *setup, * Compute the tri->coef[] array dadx, dady, a0 values. */ static void setup_line_coefficients( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, struct lp_line_info *info) { const struct lp_setup_variant_key *key = &setup->setup.variant->key; @@ -179,25 +179,25 @@ static void setup_line_coefficients( struct lp_setup_context *setup, if (key->flatshade_first) { for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(setup, tri, slot+1, info->v1[vert_attr][i], i); + constant_coef(setup, info, slot+1, info->v1[vert_attr][i], i); } else { for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(setup, tri, slot+1, info->v2[vert_attr][i], i); + constant_coef(setup, info, slot+1, info->v2[vert_attr][i], i); } break; case LP_INTERP_LINEAR: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - linear_coef(setup, tri, info, slot+1, vert_attr, i); + linear_coef(setup, info, slot+1, vert_attr, i); break; case LP_INTERP_PERSPECTIVE: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - perspective_coef(setup, tri, info, slot+1, vert_attr, i); + perspective_coef(setup, info, slot+1, vert_attr, i); fragcoord_usage_mask |= TGSI_WRITEMASK_W; break; @@ -210,6 +210,12 @@ static void setup_line_coefficients( struct lp_setup_context *setup, fragcoord_usage_mask |= usage_mask; break; + case LP_INTERP_FACING: + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + constant_coef(setup, info, slot+1, 1.0, i); + break; + default: assert(0); } @@ -217,7 +223,7 @@ static void setup_line_coefficients( struct lp_setup_context *setup, /* The internal position input is in slot zero: */ - setup_fragcoord_coef(setup, tri, info, 0, + setup_fragcoord_coef(setup, info, 0, fragcoord_usage_mask); } @@ -274,6 +280,7 @@ try_setup_line( struct lp_setup_context *setup, struct lp_scene *scene = setup->scene; const struct lp_setup_variant_key *key = &setup->setup.variant->key; struct lp_rast_triangle *line; + struct lp_rast_plane *plane; struct lp_line_info info; float width = MAX2(1.0, setup->line_width); struct u_rect bbox; @@ -296,6 +303,7 @@ try_setup_line( struct lp_setup_context *setup, float x2diff; float y2diff; float dx, dy; + float area; boolean draw_start; boolean draw_end; @@ -315,6 +323,18 @@ try_setup_line( struct lp_setup_context *setup, dx = v1[0][0] - v2[0][0]; dy = v1[0][1] - v2[0][1]; + area = (dx * dx + dy * dy); + if (area == 0) { + LP_COUNT(nr_culled_tris); + return TRUE; + } + + info.oneoverarea = 1.0f / area; + info.dx = dx; + info.dy = dy; + info.v1 = v1; + info.v2 = v2; + /* X-MAJOR LINE */ if (fabsf(dx) >= fabsf(dy)) { @@ -460,7 +480,7 @@ try_setup_line( struct lp_setup_context *setup, else { /* do intersection test */ float xintersect = fracf(v2[0][0]) + y2diff * dxdy; - draw_end = (xintersect < 1.0 && xintersect > 0.0); + draw_end = (xintersect < 1.0 && xintersect >= 0.0); } /* Are we already drawing start/end? @@ -498,7 +518,7 @@ try_setup_line( struct lp_setup_context *setup, x_offset_end = y_offset_end * dxdy; } } - + /* x/y positions in fixed point */ x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) - fixed_width/2; x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) - fixed_width/2; @@ -566,39 +586,35 @@ try_setup_line( struct lp_setup_context *setup, #endif /* calculate the deltas */ - line->plane[0].dcdy = x[0] - x[1]; - line->plane[1].dcdy = x[1] - x[2]; - line->plane[2].dcdy = x[2] - x[3]; - line->plane[3].dcdy = x[3] - x[0]; + plane = GET_PLANES(line); + plane[0].dcdy = x[0] - x[1]; + plane[1].dcdy = x[1] - x[2]; + plane[2].dcdy = x[2] - x[3]; + plane[3].dcdy = x[3] - x[0]; - line->plane[0].dcdx = y[0] - y[1]; - line->plane[1].dcdx = y[1] - y[2]; - line->plane[2].dcdx = y[2] - y[3]; - line->plane[3].dcdx = y[3] - y[0]; + plane[0].dcdx = y[0] - y[1]; + plane[1].dcdx = y[1] - y[2]; + plane[2].dcdx = y[2] - y[3]; + plane[3].dcdx = y[3] - y[0]; - info.oneoverarea = 1.0f / (dx * dx + dy * dy); - info.dx = dx; - info.dy = dy; - info.v1 = v1; - info.v2 = v2; - /* Setup parameter interpolants: */ - setup_line_coefficients( setup, line, &info); + info.a0 = GET_A0(&line->inputs); + info.dadx = GET_DADX(&line->inputs); + info.dady = GET_DADY(&line->inputs); + setup_line_coefficients(setup, &info); - line->inputs.facing = 1.0F; - line->inputs.state = setup->fs.stored; + line->inputs.frontfacing = TRUE; line->inputs.disable = FALSE; line->inputs.opaque = FALSE; for (i = 0; i < 4; i++) { - struct lp_rast_plane *plane = &line->plane[i]; /* half-edge constants, will be interated over the whole render * target. */ - plane->c = plane->dcdx * x[i] - plane->dcdy * y[i]; + plane[i].c = plane[i].dcdx * x[i] - plane[i].dcdy * y[i]; /* correct for top-left vs. bottom-left fill convention. @@ -614,38 +630,34 @@ try_setup_line( struct lp_setup_context *setup, * to its usual method, in which case it will probably want * to use the opposite, top-left convention. */ - if (plane->dcdx < 0) { + if (plane[i].dcdx < 0) { /* both fill conventions want this - adjust for left edges */ - plane->c++; + plane[i].c++; } - else if (plane->dcdx == 0) { + else if (plane[i].dcdx == 0) { if (setup->pixel_offset == 0) { /* correct for top-left fill convention: */ - if (plane->dcdy > 0) plane->c++; + if (plane[i].dcdy > 0) plane[i].c++; } else { /* correct for bottom-left fill convention: */ - if (plane->dcdy < 0) plane->c++; + if (plane[i].dcdy < 0) plane[i].c++; } } - plane->dcdx *= FIXED_ONE; - plane->dcdy *= FIXED_ONE; + plane[i].dcdx *= FIXED_ONE; + plane[i].dcdy *= FIXED_ONE; /* find trivial reject offsets for each edge for a single-pixel * sized block. These will be scaled up at each recursive level to * match the active blocksize. Scaling in this way works best if * the blocks are square. */ - plane->eo = 0; - if (plane->dcdx < 0) plane->eo -= plane->dcdx; - if (plane->dcdy > 0) plane->eo += plane->dcdy; - - /* Calculate trivial accept offsets from the above. - */ - plane->ei = plane->dcdy - plane->dcdx - plane->eo; + plane[i].eo = 0; + if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx; + if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy; } @@ -668,29 +680,25 @@ try_setup_line( struct lp_setup_context *setup, * these planes elsewhere. */ if (nr_planes == 8) { - line->plane[4].dcdx = -1; - line->plane[4].dcdy = 0; - line->plane[4].c = 1-bbox.x0; - line->plane[4].ei = 0; - line->plane[4].eo = 1; - - line->plane[5].dcdx = 1; - line->plane[5].dcdy = 0; - line->plane[5].c = bbox.x1+1; - line->plane[5].ei = -1; - line->plane[5].eo = 0; - - line->plane[6].dcdx = 0; - line->plane[6].dcdy = 1; - line->plane[6].c = 1-bbox.y0; - line->plane[6].ei = 0; - line->plane[6].eo = 1; - - line->plane[7].dcdx = 0; - line->plane[7].dcdy = -1; - line->plane[7].c = bbox.y1+1; - line->plane[7].ei = -1; - line->plane[7].eo = 0; + plane[4].dcdx = -1; + plane[4].dcdy = 0; + plane[4].c = 1-bbox.x0; + plane[4].eo = 1; + + plane[5].dcdx = 1; + plane[5].dcdy = 0; + plane[5].c = bbox.x1+1; + plane[5].eo = 0; + + plane[6].dcdx = 0; + plane[6].dcdy = 1; + plane[6].c = 1-bbox.y0; + plane[6].eo = 1; + + plane[7].dcdx = 0; + plane[7].dcdy = -1; + plane[7].c = bbox.y1+1; + plane[7].eo = 0; } return lp_setup_bin_triangle(setup, line, &bbox, nr_planes); @@ -703,10 +711,11 @@ static void lp_setup_line( struct lp_setup_context *setup, { if (!try_setup_line( setup, v0, v1 )) { - lp_setup_flush_and_restart(setup); + if (!lp_setup_flush_and_restart(setup)) + return; if (!try_setup_line( setup, v0, v1 )) - assert(0); + return; } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index c98966022ee..146f1bd07ca 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -33,7 +33,6 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "lp_perf.h" -#include "lp_setup_context.h" #include "lp_rast.h" #include "lp_state_fs.h" #include "lp_state_setup.h" @@ -47,63 +46,118 @@ struct point_info { int dx01, dx12; const float (*v0)[4]; + + float (*a0)[4]; + float (*dadx)[4]; + float (*dady)[4]; }; /** * Compute a0 for a constant-valued coefficient (GL_FLAT shading). */ -static void constant_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *point, - unsigned slot, - const float value, - unsigned i ) +static void +constant_coef(struct lp_setup_context *setup, + struct point_info *info, + unsigned slot, + const float value, + unsigned i) { - point->inputs.a0[slot][i] = value; - point->inputs.dadx[slot][i] = 0.0f; - point->inputs.dady[slot][i] = 0.0f; + info->a0[slot][i] = value; + info->dadx[slot][i] = 0.0f; + info->dady[slot][i] = 0.0f; } -static void perspective_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *point, - const struct point_info *info, - unsigned slot, - unsigned vert_attr, - unsigned i) + +static void +point_persp_coeff(struct lp_setup_context *setup, + const struct point_info *info, + unsigned slot, + unsigned i) { - if (i == 0) { - float dadx = FIXED_ONE / (float)info->dx12; + /* + * Fragment shader expects pre-multiplied w for LP_INTERP_PERSPECTIVE. A + * better stratergy would be to take the primitive in consideration when + * generating the fragment shader key, and therefore avoid the per-fragment + * perspective divide. + */ + + float w0 = info->v0[0][3]; + + assert(i < 4); + + info->a0[slot][i] = info->v0[slot][i]*w0; + info->dadx[slot][i] = 0.0f; + info->dady[slot][i] = 0.0f; +} + + +/** + * Setup automatic texcoord coefficients (for sprite rendering). + * \param slot the vertex attribute slot to setup + * \param i the attribute channel in [0,3] + * \param sprite_coord_origin one of PIPE_SPRITE_COORD_x + * \param perspective does the shader expects pre-multiplied w, i.e., + * LP_INTERP_PERSPECTIVE is specified in the shader key + */ +static void +texcoord_coef(struct lp_setup_context *setup, + const struct point_info *info, + unsigned slot, + unsigned i, + unsigned sprite_coord_origin, + boolean perspective) +{ + float w0 = info->v0[0][3]; + + assert(i < 4); + + if (i == 0) { + float dadx = FIXED_ONE / (float)info->dx12; float dady = 0.0f; - point->inputs.dadx[slot][i] = dadx; - point->inputs.dady[slot][i] = dady; - point->inputs.a0[slot][i] = (0.5 - - (dadx * ((float)info->v0[0][0] - setup->pixel_offset) + - dady * ((float)info->v0[0][1] - setup->pixel_offset))); - } + float x0 = info->v0[0][0] - setup->pixel_offset; + float y0 = info->v0[0][1] - setup->pixel_offset; - else if (i == 1) { - float dadx = 0.0f; - float dady = FIXED_ONE / (float)info->dx12; - - point->inputs.dadx[slot][i] = dadx; - point->inputs.dady[slot][i] = dady; - point->inputs.a0[slot][i] = (0.5 - - (dadx * ((float)info->v0[0][0] - setup->pixel_offset) + - dady * ((float)info->v0[0][1] - setup->pixel_offset))); + info->dadx[slot][0] = dadx; + info->dady[slot][0] = dady; + info->a0[slot][0] = 0.5 - (dadx * x0 + dady * y0); + + if (perspective) { + info->dadx[slot][0] *= w0; + info->dady[slot][0] *= w0; + info->a0[slot][0] *= w0; + } } + else if (i == 1) { + float dadx = 0.0f; + float dady = FIXED_ONE / (float)info->dx12; + float x0 = info->v0[0][0] - setup->pixel_offset; + float y0 = info->v0[0][1] - setup->pixel_offset; + if (sprite_coord_origin == PIPE_SPRITE_COORD_LOWER_LEFT) { + dady = -dady; + } + + info->dadx[slot][1] = dadx; + info->dady[slot][1] = dady; + info->a0[slot][1] = 0.5 - (dadx * x0 + dady * y0); + + if (perspective) { + info->dadx[slot][1] *= w0; + info->dady[slot][1] *= w0; + info->a0[slot][1] *= w0; + } + } else if (i == 2) { - point->inputs.a0[slot][i] = 0.0f; - point->inputs.dadx[slot][i] = 0.0f; - point->inputs.dady[slot][i] = 0.0f; + info->a0[slot][2] = 0.0f; + info->dadx[slot][2] = 0.0f; + info->dady[slot][2] = 0.0f; } - - else if (i == 3) { - point->inputs.a0[slot][i] = 1.0f; - point->inputs.dadx[slot][i] = 0.0f; - point->inputs.dady[slot][i] = 0.0f; + else { + info->a0[slot][3] = perspective ? w0 : 1.0f; + info->dadx[slot][3] = 0.0f; + info->dady[slot][3] = 0.0f; } - } @@ -115,45 +169,45 @@ static void perspective_coef( struct lp_setup_context *setup, */ static void setup_point_fragcoord_coef(struct lp_setup_context *setup, - struct lp_rast_triangle *point, - const struct point_info *info, + struct point_info *info, unsigned slot, unsigned usage_mask) { /*X*/ if (usage_mask & TGSI_WRITEMASK_X) { - point->inputs.a0[slot][0] = 0.0; - point->inputs.dadx[slot][0] = 1.0; - point->inputs.dady[slot][0] = 0.0; + info->a0[slot][0] = 0.0; + info->dadx[slot][0] = 1.0; + info->dady[slot][0] = 0.0; } /*Y*/ if (usage_mask & TGSI_WRITEMASK_Y) { - point->inputs.a0[slot][1] = 0.0; - point->inputs.dadx[slot][1] = 0.0; - point->inputs.dady[slot][1] = 1.0; + info->a0[slot][1] = 0.0; + info->dadx[slot][1] = 0.0; + info->dady[slot][1] = 1.0; } /*Z*/ if (usage_mask & TGSI_WRITEMASK_Z) { - constant_coef(setup, point, slot, info->v0[0][2], 2); + constant_coef(setup, info, slot, info->v0[0][2], 2); } /*W*/ if (usage_mask & TGSI_WRITEMASK_W) { - constant_coef(setup, point, slot, info->v0[0][3], 3); + constant_coef(setup, info, slot, info->v0[0][3], 3); } } + /** * Compute the point->coef[] array dadx, dady, a0 values. */ static void setup_point_coefficients( struct lp_setup_context *setup, - struct lp_rast_triangle *point, - const struct point_info *info) + struct point_info *info) { const struct lp_setup_variant_key *key = &setup->setup.variant->key; + const struct lp_fragment_shader *shader = setup->fs.current.variant->shader; unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; unsigned slot; @@ -162,9 +216,15 @@ setup_point_coefficients( struct lp_setup_context *setup, for (slot = 0; slot < key->num_inputs; slot++) { unsigned vert_attr = key->inputs[slot].src_index; unsigned usage_mask = key->inputs[slot].usage_mask; + enum lp_interp interp = key->inputs[slot].interp; + boolean perspective = !!(interp == LP_INTERP_PERSPECTIVE); unsigned i; + + if (perspective & usage_mask) { + fragcoord_usage_mask |= TGSI_WRITEMASK_W; + } - switch (key->inputs[slot].interp) { + switch (interp) { case LP_INTERP_POSITION: /* * The generated pixel interpolators will pick up the coeffs from @@ -174,37 +234,63 @@ setup_point_coefficients( struct lp_setup_context *setup, fragcoord_usage_mask |= usage_mask; break; + case LP_INTERP_LINEAR: + /* Sprite tex coords may use linear interpolation someday */ + /* fall-through */ case LP_INTERP_PERSPECTIVE: - /* For point sprite textures */ - if (setup->fs.current.variant->shader->info.input_semantic_name[slot] - == TGSI_SEMANTIC_GENERIC) - { - int index = setup->fs.current.variant->shader->info.input_semantic_index[slot]; - - if (setup->sprite & (1 << index)) { - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - perspective_coef(setup, point, info, slot+1, vert_attr, i); - fragcoord_usage_mask |= TGSI_WRITEMASK_W; - break; + /* check if the sprite coord flag is set for this attribute. + * If so, set it up so it up so x and y vary from 0 to 1. + */ + if (shader->info.base.input_semantic_name[slot] == TGSI_SEMANTIC_GENERIC) { + unsigned semantic_index = shader->info.base.input_semantic_index[slot]; + /* Note that sprite_coord enable is a bitfield of + * PIPE_MAX_SHADER_OUTPUTS bits. + */ + if (semantic_index < PIPE_MAX_SHADER_OUTPUTS && + (setup->sprite_coord_enable & (1 << semantic_index))) { + for (i = 0; i < NUM_CHANNELS; i++) { + if (usage_mask & (1 << i)) { + texcoord_coef(setup, info, slot + 1, i, + setup->sprite_coord_origin, + perspective); + } + } + break; } } - - /* Otherwise fallthrough */ - default: + /* fall-through */ + case LP_INTERP_CONSTANT: for (i = 0; i < NUM_CHANNELS; i++) { - if (usage_mask & (1 << i)) - constant_coef(setup, point, slot+1, info->v0[vert_attr][i], i); + if (usage_mask & (1 << i)) { + if (perspective) { + point_persp_coeff(setup, info, slot+1, i); + } + else { + constant_coef(setup, info, slot+1, info->v0[vert_attr][i], i); + } + } } + break; + + case LP_INTERP_FACING: + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + constant_coef(setup, info, slot+1, 1.0, i); + break; + + default: + assert(0); + break; } } /* The internal position input is in slot zero: */ - setup_point_fragcoord_coef(setup, point, info, 0, + setup_point_fragcoord_coef(setup, info, 0, fragcoord_usage_mask); } + static INLINE int subpixel_snap(float a) { @@ -285,55 +371,57 @@ try_setup_point( struct lp_setup_context *setup, info.dx12 = fixed_width; info.dy01 = fixed_width; info.dy12 = 0; + info.a0 = GET_A0(&point->inputs); + info.dadx = GET_DADX(&point->inputs); + info.dady = GET_DADY(&point->inputs); /* Setup parameter interpolants: */ - setup_point_coefficients(setup, point, &info); + setup_point_coefficients(setup, &info); - point->inputs.facing = 1.0F; - point->inputs.state = setup->fs.stored; + point->inputs.frontfacing = TRUE; point->inputs.disable = FALSE; point->inputs.opaque = FALSE; { - point->plane[0].dcdx = -1; - point->plane[0].dcdy = 0; - point->plane[0].c = 1-bbox.x0; - point->plane[0].ei = 0; - point->plane[0].eo = 1; - - point->plane[1].dcdx = 1; - point->plane[1].dcdy = 0; - point->plane[1].c = bbox.x1+1; - point->plane[1].ei = -1; - point->plane[1].eo = 0; - - point->plane[2].dcdx = 0; - point->plane[2].dcdy = 1; - point->plane[2].c = 1-bbox.y0; - point->plane[2].ei = 0; - point->plane[2].eo = 1; - - point->plane[3].dcdx = 0; - point->plane[3].dcdy = -1; - point->plane[3].c = bbox.y1+1; - point->plane[3].ei = -1; - point->plane[3].eo = 0; + struct lp_rast_plane *plane = GET_PLANES(point); + + plane[0].dcdx = -1; + plane[0].dcdy = 0; + plane[0].c = 1-bbox.x0; + plane[0].eo = 1; + + plane[1].dcdx = 1; + plane[1].dcdy = 0; + plane[1].c = bbox.x1+1; + plane[1].eo = 0; + + plane[2].dcdx = 0; + plane[2].dcdy = 1; + plane[2].c = 1-bbox.y0; + plane[2].eo = 1; + + plane[3].dcdx = 0; + plane[3].dcdy = -1; + plane[3].c = bbox.y1+1; + plane[3].eo = 0; } return lp_setup_bin_triangle(setup, point, &bbox, nr_planes); } -static void lp_setup_point( struct lp_setup_context *setup, - const float (*v0)[4] ) +static void +lp_setup_point(struct lp_setup_context *setup, + const float (*v0)[4]) { if (!try_setup_point( setup, v0 )) { - lp_setup_flush_and_restart(setup); + if (!lp_setup_flush_and_restart(setup)) + return; if (!try_setup_point( setup, v0 )) - assert(0); + return; } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 43617a6b672..03036cd75d2 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -32,6 +32,7 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_rect.h" +#include "util/u_sse.h" #include "lp_perf.h" #include "lp_setup_context.h" #include "lp_rast.h" @@ -40,7 +41,9 @@ #define NUM_CHANNELS 4 - +#if defined(PIPE_ARCH_SSE) +#include <emmintrin.h> +#endif static INLINE int subpixel_snap(float a) @@ -70,27 +73,28 @@ fixed_to_float(int a) */ struct lp_rast_triangle * lp_setup_alloc_triangle(struct lp_scene *scene, - unsigned num_inputs, + unsigned nr_inputs, unsigned nr_planes, unsigned *tri_size) { - unsigned input_array_sz = NUM_CHANNELS * (num_inputs + 1) * sizeof(float); + unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float); + unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane); struct lp_rast_triangle *tri; - unsigned tri_bytes, bytes; - char *inputs; - tri_bytes = align(Offset(struct lp_rast_triangle, plane[nr_planes]), 16); - bytes = tri_bytes + (3 * input_array_sz); + *tri_size = (sizeof(struct lp_rast_triangle) + + 3 * input_array_sz + + plane_sz); - tri = lp_scene_alloc_aligned( scene, bytes, 16 ); + tri = lp_scene_alloc_aligned( scene, *tri_size, 16 ); + if (tri == NULL) + return NULL; - if (tri) { - inputs = ((char *)tri) + tri_bytes; - tri->inputs.a0 = (float (*)[4]) inputs; - tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz); - tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz); + tri->inputs.stride = input_array_sz; - *tri_size = bytes; + { + char *a = (char *)tri; + char *b = (char *)&GET_PLANES(tri)[nr_planes]; + assert(b - a == *tri_size); } return tri; @@ -161,8 +165,9 @@ lp_setup_print_triangle(struct lp_setup_context *setup, } +#define MAX_PLANES 8 static unsigned -lp_rast_tri_tab[9] = { +lp_rast_tri_tab[MAX_PLANES+1] = { 0, /* should be impossible */ LP_RAST_OP_TRIANGLE_1, LP_RAST_OP_TRIANGLE_2, @@ -200,14 +205,16 @@ lp_setup_whole_tile(struct lp_setup_context *setup, } LP_COUNT(nr_shade_opaque_64); - return lp_scene_bin_command( scene, tx, ty, - LP_RAST_OP_SHADE_TILE_OPAQUE, - lp_rast_arg_inputs(inputs) ); + return lp_scene_bin_cmd_with_state( scene, tx, ty, + setup->fs.stored, + LP_RAST_OP_SHADE_TILE_OPAQUE, + lp_rast_arg_inputs(inputs) ); } else { LP_COUNT(nr_shade_64); - return lp_scene_bin_command( scene, tx, ty, - LP_RAST_OP_SHADE_TILE, - lp_rast_arg_inputs(inputs) ); + return lp_scene_bin_cmd_with_state( scene, tx, ty, + setup->fs.stored, + LP_RAST_OP_SHADE_TILE, + lp_rast_arg_inputs(inputs) ); } } @@ -227,12 +234,11 @@ do_triangle_ccw(struct lp_setup_context *setup, struct lp_scene *scene = setup->scene; const struct lp_setup_variant_key *key = &setup->setup.variant->key; struct lp_rast_triangle *tri; - int x[3]; - int y[3]; - int area; + struct lp_rast_plane *plane; + int x[4]; + int y[4]; struct u_rect bbox; unsigned tri_bytes; - int i; int nr_planes = 3; if (0) @@ -249,10 +255,12 @@ do_triangle_ccw(struct lp_setup_context *setup, x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset); x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset); x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset); + x[3] = 0; y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset); y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset); y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset); - + y[3] = 0; + /* Bounding rectangle (in pixels) */ { @@ -296,7 +304,7 @@ do_triangle_ccw(struct lp_setup_context *setup, if (!tri) return FALSE; -#ifdef DEBUG +#if 0 tri->v[0][0] = v0[0][0]; tri->v[1][0] = v1[0][0]; tri->v[2][0] = v2[0][0]; @@ -305,104 +313,173 @@ do_triangle_ccw(struct lp_setup_context *setup, tri->v[2][1] = v2[0][1]; #endif - tri->plane[0].dcdy = x[0] - x[1]; - tri->plane[1].dcdy = x[1] - x[2]; - tri->plane[2].dcdy = x[2] - x[0]; - - tri->plane[0].dcdx = y[0] - y[1]; - tri->plane[1].dcdx = y[1] - y[2]; - tri->plane[2].dcdx = y[2] - y[0]; - - area = (tri->plane[0].dcdy * tri->plane[2].dcdx - - tri->plane[2].dcdy * tri->plane[0].dcdx); - LP_COUNT(nr_tris); - /* Cull non-ccw and zero-sized triangles. - * - * XXX: subject to overflow?? - */ - if (area <= 0) { - lp_scene_putback_data( scene, tri_bytes ); - LP_COUNT(nr_culled_tris); - return TRUE; - } - /* Setup parameter interpolants: */ setup->setup.variant->jit_function( v0, v1, v2, frontfacing, - tri->inputs.a0, - tri->inputs.dadx, - tri->inputs.dady, + GET_A0(&tri->inputs), + GET_DADX(&tri->inputs), + GET_DADY(&tri->inputs), &setup->setup.variant->key ); - tri->inputs.facing = frontfacing ? 1.0F : -1.0F; + tri->inputs.frontfacing = frontfacing; tri->inputs.disable = FALSE; tri->inputs.opaque = setup->fs.current.variant->opaque; - tri->inputs.state = setup->fs.stored; if (0) lp_dump_setup_coef(&setup->setup.variant->key, - (const float (*)[4])tri->inputs.a0, - (const float (*)[4])tri->inputs.dadx, - (const float (*)[4])tri->inputs.dady); - - for (i = 0; i < 3; i++) { - struct lp_rast_plane *plane = &tri->plane[i]; + (const float (*)[4])GET_A0(&tri->inputs), + (const float (*)[4])GET_DADX(&tri->inputs), + (const float (*)[4])GET_DADY(&tri->inputs)); + + plane = GET_PLANES(tri); + +#if defined(PIPE_ARCH_SSE) + { + __m128i vertx, verty; + __m128i shufx, shufy; + __m128i dcdx, dcdy, c; + __m128i unused; + __m128i dcdx_neg_mask; + __m128i dcdy_neg_mask; + __m128i dcdx_zero_mask; + __m128i top_left_flag; + __m128i c_inc_mask, c_inc; + __m128i eo, p0, p1, p2; + __m128i zero = _mm_setzero_si128(); + + vertx = _mm_loadu_si128((__m128i *)x); /* vertex x coords */ + verty = _mm_loadu_si128((__m128i *)y); /* vertex y coords */ + + shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1)); + shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1)); + + dcdx = _mm_sub_epi32(verty, shufy); + dcdy = _mm_sub_epi32(vertx, shufx); + + dcdx_neg_mask = _mm_srai_epi32(dcdx, 31); + dcdx_zero_mask = _mm_cmpeq_epi32(dcdx, zero); + dcdy_neg_mask = _mm_srai_epi32(dcdy, 31); + + top_left_flag = _mm_set1_epi32((setup->pixel_offset == 0) ? ~0 : 0); + + c_inc_mask = _mm_or_si128(dcdx_neg_mask, + _mm_and_si128(dcdx_zero_mask, + _mm_xor_si128(dcdy_neg_mask, + top_left_flag))); - /* half-edge constants, will be interated over the whole render - * target. + c_inc = _mm_srli_epi32(c_inc_mask, 31); + + c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx), + mm_mullo_epi32(dcdy, verty)); + + c = _mm_add_epi32(c, c_inc); + + /* Scale up to match c: */ - plane->c = plane->dcdx * x[i] - plane->dcdy * y[i]; - - /* correct for top-left vs. bottom-left fill convention. - * - * note that we're overloading gl_rasterization_rules to mean - * both (0.5,0.5) pixel centers *and* bottom-left filling - * convention. - * - * GL actually has a top-left filling convention, but GL's - * notion of "top" differs from gallium's... - * - * Also, sometimes (in FBO cases) GL will render upside down - * to its usual method, in which case it will probably want - * to use the opposite, top-left convention. - */ - if (plane->dcdx < 0) { - /* both fill conventions want this - adjust for left edges */ - plane->c++; - } - else if (plane->dcdx == 0) { - if (setup->pixel_offset == 0) { - /* correct for top-left fill convention: - */ - if (plane->dcdy > 0) plane->c++; + dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER); + dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER); + + /* Calculate trivial reject values: + */ + eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy), + _mm_and_si128(dcdx_neg_mask, dcdx)); + + /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */ + + /* Pointless transpose which gets undone immediately in + * rasterization: + */ + transpose4_epi32(&c, &dcdx, &dcdy, &eo, + &p0, &p1, &p2, &unused); + + _mm_store_si128((__m128i *)&plane[0], p0); + _mm_store_si128((__m128i *)&plane[1], p1); + _mm_store_si128((__m128i *)&plane[2], p2); + } +#else + { + int i; + plane[0].dcdy = x[0] - x[1]; + plane[1].dcdy = x[1] - x[2]; + plane[2].dcdy = x[2] - x[0]; + plane[0].dcdx = y[0] - y[1]; + plane[1].dcdx = y[1] - y[2]; + plane[2].dcdx = y[2] - y[0]; + + for (i = 0; i < 3; i++) { + /* half-edge constants, will be interated over the whole render + * target. + */ + plane[i].c = plane[i].dcdx * x[i] - plane[i].dcdy * y[i]; + + /* correct for top-left vs. bottom-left fill convention. + * + * note that we're overloading gl_rasterization_rules to mean + * both (0.5,0.5) pixel centers *and* bottom-left filling + * convention. + * + * GL actually has a top-left filling convention, but GL's + * notion of "top" differs from gallium's... + * + * Also, sometimes (in FBO cases) GL will render upside down + * to its usual method, in which case it will probably want + * to use the opposite, top-left convention. + */ + if (plane[i].dcdx < 0) { + /* both fill conventions want this - adjust for left edges */ + plane[i].c++; } - else { - /* correct for bottom-left fill convention: - */ - if (plane->dcdy < 0) plane->c++; + else if (plane[i].dcdx == 0) { + if (setup->pixel_offset == 0) { + /* correct for top-left fill convention: + */ + if (plane[i].dcdy > 0) plane[i].c++; + } + else { + /* correct for bottom-left fill convention: + */ + if (plane[i].dcdy < 0) plane[i].c++; + } } - } - plane->dcdx *= FIXED_ONE; - plane->dcdy *= FIXED_ONE; + plane[i].dcdx *= FIXED_ONE; + plane[i].dcdy *= FIXED_ONE; - /* find trivial reject offsets for each edge for a single-pixel - * sized block. These will be scaled up at each recursive level to - * match the active blocksize. Scaling in this way works best if - * the blocks are square. - */ - plane->eo = 0; - if (plane->dcdx < 0) plane->eo -= plane->dcdx; - if (plane->dcdy > 0) plane->eo += plane->dcdy; + /* find trivial reject offsets for each edge for a single-pixel + * sized block. These will be scaled up at each recursive level to + * match the active blocksize. Scaling in this way works best if + * the blocks are square. + */ + plane[i].eo = 0; + if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx; + if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy; + } + } +#endif - /* Calculate trivial accept offsets from the above. - */ - plane->ei = plane->dcdy - plane->dcdx - plane->eo; + if (0) { + debug_printf("p0: %08x/%08x/%08x/%08x\n", + plane[0].c, + plane[0].dcdx, + plane[0].dcdy, + plane[0].eo); + + debug_printf("p1: %08x/%08x/%08x/%08x\n", + plane[1].c, + plane[1].dcdx, + plane[1].dcdy, + plane[1].eo); + + debug_printf("p0: %08x/%08x/%08x/%08x\n", + plane[2].c, + plane[2].dcdx, + plane[2].dcdy, + plane[2].eo); } @@ -425,29 +502,25 @@ do_triangle_ccw(struct lp_setup_context *setup, * these planes elsewhere. */ if (nr_planes == 7) { - tri->plane[3].dcdx = -1; - tri->plane[3].dcdy = 0; - tri->plane[3].c = 1-bbox.x0; - tri->plane[3].ei = 0; - tri->plane[3].eo = 1; - - tri->plane[4].dcdx = 1; - tri->plane[4].dcdy = 0; - tri->plane[4].c = bbox.x1+1; - tri->plane[4].ei = -1; - tri->plane[4].eo = 0; - - tri->plane[5].dcdx = 0; - tri->plane[5].dcdy = 1; - tri->plane[5].c = 1-bbox.y0; - tri->plane[5].ei = 0; - tri->plane[5].eo = 1; - - tri->plane[6].dcdx = 0; - tri->plane[6].dcdy = -1; - tri->plane[6].c = bbox.y1+1; - tri->plane[6].ei = -1; - tri->plane[6].eo = 0; + plane[3].dcdx = -1; + plane[3].dcdy = 0; + plane[3].c = 1-bbox.x0; + plane[3].eo = 1; + + plane[4].dcdx = 1; + plane[4].dcdy = 0; + plane[4].c = bbox.x1+1; + plane[4].eo = 0; + + plane[5].dcdx = 0; + plane[5].dcdy = 1; + plane[5].c = 1-bbox.y0; + plane[5].eo = 1; + + plane[6].dcdx = 0; + plane[6].dcdy = -1; + plane[6].c = bbox.y1+1; + plane[6].eo = 0; } return lp_setup_bin_triangle( setup, tri, &bbox, nr_planes ); @@ -500,56 +573,63 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, int sz = floor_pot((bbox->x1 - (bbox->x0 & ~3)) | (bbox->y1 - (bbox->y0 & ~3))); - if (nr_planes == 3) { - if (sz < 4 && dx < 64) - { - /* Triangle is contained in a single 4x4 stamp: - */ - int mask = (bbox->x0 & 63 & ~3) | ((bbox->y0 & 63 & ~3) << 8); - - return lp_scene_bin_command( scene, - bbox->x0/64, bbox->y0/64, - LP_RAST_OP_TRIANGLE_3_4, - lp_rast_arg_triangle(tri, mask) ); - } - - if (sz < 16 && dx < 64) - { - int mask = (bbox->x0 & 63 & ~3) | ((bbox->y0 & 63 & ~3) << 8); - - /* Triangle is contained in a single 16x16 block: - */ - return lp_scene_bin_command( scene, - bbox->x0/64, bbox->y0/64, - LP_RAST_OP_TRIANGLE_3_16, - lp_rast_arg_triangle(tri, mask) ); - } - } - - /* Determine which tile(s) intersect the triangle's bounding box */ if (dx < TILE_SIZE) { int ix0 = bbox->x0 / TILE_SIZE; int iy0 = bbox->y0 / TILE_SIZE; + int px = bbox->x0 & 63 & ~3; + int py = bbox->y0 & 63 & ~3; + int mask = px | (py << 8); assert(iy0 == bbox->y1 / TILE_SIZE && ix0 == bbox->x1 / TILE_SIZE); + if (nr_planes == 3) { + if (sz < 4) + { + /* Triangle is contained in a single 4x4 stamp: + */ + return lp_scene_bin_cmd_with_state( scene, ix0, iy0, + setup->fs.stored, + LP_RAST_OP_TRIANGLE_3_4, + lp_rast_arg_triangle(tri, mask) ); + } + + if (sz < 16) + { + /* Triangle is contained in a single 16x16 block: + */ + return lp_scene_bin_cmd_with_state( scene, ix0, iy0, + setup->fs.stored, + LP_RAST_OP_TRIANGLE_3_16, + lp_rast_arg_triangle(tri, mask) ); + } + } + else if (nr_planes == 4 && sz < 16) + { + return lp_scene_bin_cmd_with_state(scene, ix0, iy0, + setup->fs.stored, + LP_RAST_OP_TRIANGLE_4_16, + lp_rast_arg_triangle(tri, mask) ); + } + + /* Triangle is contained in a single tile: */ - return lp_scene_bin_command( scene, ix0, iy0, - lp_rast_tri_tab[nr_planes], - lp_rast_arg_triangle(tri, (1<<nr_planes)-1) ); + return lp_scene_bin_cmd_with_state( scene, ix0, iy0, setup->fs.stored, + lp_rast_tri_tab[nr_planes], + lp_rast_arg_triangle(tri, (1<<nr_planes)-1) ); } else { - int c[7]; - int ei[7]; - int eo[7]; - int xstep[7]; - int ystep[7]; + struct lp_rast_plane *plane = GET_PLANES(tri); + int c[MAX_PLANES]; + int ei[MAX_PLANES]; + int eo[MAX_PLANES]; + int xstep[MAX_PLANES]; + int ystep[MAX_PLANES]; int x, y; int ix0 = bbox->x0 / TILE_SIZE; @@ -558,14 +638,17 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, int iy1 = bbox->y1 / TILE_SIZE; for (i = 0; i < nr_planes; i++) { - c[i] = (tri->plane[i].c + - tri->plane[i].dcdy * iy0 * TILE_SIZE - - tri->plane[i].dcdx * ix0 * TILE_SIZE); - - ei[i] = tri->plane[i].ei << TILE_ORDER; - eo[i] = tri->plane[i].eo << TILE_ORDER; - xstep[i] = -(tri->plane[i].dcdx << TILE_ORDER); - ystep[i] = tri->plane[i].dcdy << TILE_ORDER; + c[i] = (plane[i].c + + plane[i].dcdy * iy0 * TILE_SIZE - + plane[i].dcdx * ix0 * TILE_SIZE); + + ei[i] = (plane[i].dcdy - + plane[i].dcdx - + plane[i].eo) << TILE_ORDER; + + eo[i] = plane[i].eo << TILE_ORDER; + xstep[i] = -(plane[i].dcdx << TILE_ORDER); + ystep[i] = plane[i].dcdy << TILE_ORDER; } @@ -578,7 +661,7 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, for (y = iy0; y <= iy1; y++) { boolean in = FALSE; /* are we inside the triangle? */ - int cx[7]; + int cx[MAX_PLANES]; for (i = 0; i < nr_planes; i++) cx[i] = c[i]; @@ -607,9 +690,11 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, */ int count = util_bitcount(partial); in = TRUE; - if (!lp_scene_bin_command( scene, x, y, - lp_rast_tri_tab[count], - lp_rast_arg_triangle(tri, partial) )) + + if (!lp_scene_bin_cmd_with_state( scene, x, y, + setup->fs.stored, + lp_rast_tri_tab[count], + lp_rast_arg_triangle(tri, partial) )) goto fail; LP_COUNT(nr_partially_covered_64); @@ -648,40 +733,62 @@ fail: /** - * Draw triangle if it's CW, cull otherwise. + * Try to draw the triangle, restart the scene on failure. */ -static void triangle_cw( struct lp_setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) +static void retry_triangle_ccw( struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4], + boolean front) { - if (!do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface )) + if (!do_triangle_ccw( setup, v0, v1, v2, front )) { - lp_setup_flush_and_restart(setup); + if (!lp_setup_flush_and_restart(setup)) + return; - if (!do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface )) - assert(0); + if (!do_triangle_ccw( setup, v0, v1, v2, front )) + return; } } +static INLINE float +calc_area(const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) +{ + float dx01 = v0[0][0] - v1[0][0]; + float dy01 = v0[0][1] - v1[0][1]; + float dx20 = v2[0][0] - v0[0][0]; + float dy20 = v2[0][1] - v0[0][1]; + return dx01 * dy20 - dx20 * dy01; +} + /** - * Draw triangle if it's CCW, cull otherwise. + * Draw triangle if it's CW, cull otherwise. */ -static void triangle_ccw( struct lp_setup_context *setup, +static void triangle_cw( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) { - if (!do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface )) - { - lp_setup_flush_and_restart(setup); - if (!do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface )) - assert(0); - } + float area = calc_area(v0, v1, v2); + + if (area < 0.0f) + retry_triangle_ccw(setup, v0, v2, v1, !setup->ccw_is_frontface); } +static void triangle_ccw( struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) +{ + float area = calc_area(v0, v1, v2); + + if (area > 0.0f) + retry_triangle_ccw(setup, v0, v1, v2, setup->ccw_is_frontface); +} /** * Draw triangle whether it's CW or CCW. @@ -691,18 +798,12 @@ static void triangle_both( struct lp_setup_context *setup, const float (*v1)[4], const float (*v2)[4] ) { - /* edge vectors e = v0 - v2, f = v1 - v2 */ - const float ex = v0[0][0] - v2[0][0]; - const float ey = v0[0][1] - v2[0][1]; - const float fx = v1[0][0] - v2[0][0]; - const float fy = v1[0][1] - v2[0][1]; - - /* det = cross(e,f).z */ - const float det = ex * fy - ey * fx; - if (det < 0.0f) - triangle_ccw( setup, v0, v1, v2 ); - else if (det > 0.0f) - triangle_cw( setup, v0, v1, v2 ); + float area = calc_area(v0, v1, v2); + + if (area > 0.0f) + retry_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface ); + else if (area < 0.0f) + retry_triangle_ccw( setup, v0, v2, v1, !setup->ccw_is_frontface ); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c index 6308561f242..9c1f0fe7939 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -141,7 +141,8 @@ lp_setup_draw_elements(struct vbuf_render *vbr, const ushort *indices, uint nr) const boolean flatshade_first = setup->flatshade_first; unsigned i; - lp_setup_update_state(setup, TRUE); + if (!lp_setup_update_state(setup, TRUE)) + return; switch (setup->prim) { case PIPE_PRIM_POINTS: @@ -338,7 +339,8 @@ lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) const boolean flatshade_first = setup->flatshade_first; unsigned i; - lp_setup_update_state(setup, TRUE); + if (!lp_setup_update_state(setup, TRUE)) + return; switch (setup->prim) { case PIPE_PRIM_POINTS: diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index de242aa93ca..0f5f7369e04 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -67,14 +67,14 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index); - for (i = 0; i < lpfs->info.num_inputs; i++) { + for (i = 0; i < lpfs->info.base.num_inputs; i++) { /* * Search for each input in current vs output: */ vs_index = draw_find_shader_output(llvmpipe->draw, - lpfs->info.input_semantic_name[i], - lpfs->info.input_semantic_index[i]); + lpfs->info.base.input_semantic_name[i], + lpfs->info.base.input_semantic_index[i]); /* * Emit the requested fs attribute for all but position. @@ -94,7 +94,6 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) draw_compute_vertex_size(vinfo); lp_setup_set_vertex_info(llvmpipe->setup, vinfo); - } @@ -153,11 +152,16 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) lp_setup_set_fs_constants(llvmpipe->setup, llvmpipe->constants[PIPE_SHADER_FRAGMENT][0]); - if (llvmpipe->dirty & LP_NEW_SAMPLER_VIEW) + if (llvmpipe->dirty & (LP_NEW_SAMPLER_VIEW)) lp_setup_set_fragment_sampler_views(llvmpipe->setup, llvmpipe->num_fragment_sampler_views, llvmpipe->fragment_sampler_views); + if (llvmpipe->dirty & (LP_NEW_SAMPLER)) + lp_setup_set_fragment_sampler_state(llvmpipe->setup, + llvmpipe->num_samplers, + llvmpipe->sampler); + llvmpipe->dirty = 0; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index ad058e384ad..9fbedac165f 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -99,74 +99,12 @@ #include <llvm-c/Analysis.h> +#include <llvm-c/BitWriter.h> static unsigned fs_no = 0; -/** - * Generate the depth /stencil test code. - */ -static void -generate_depth_stencil(LLVMBuilderRef builder, - const struct lp_fragment_shader_variant_key *key, - struct lp_type src_type, - struct lp_build_mask_context *mask, - LLVMValueRef stencil_refs[2], - LLVMValueRef src, - LLVMValueRef dst_ptr, - LLVMValueRef facing, - LLVMValueRef counter) -{ - const struct util_format_description *format_desc; - struct lp_type dst_type; - - if (!key->depth.enabled && !key->stencil[0].enabled && !key->stencil[1].enabled) - return; - - format_desc = util_format_description(key->zsbuf_format); - assert(format_desc); - - /* - * Depths are expected to be between 0 and 1, even if they are stored in - * floats. Setting these bits here will ensure that the lp_build_conv() call - * below won't try to unnecessarily clamp the incoming values. - */ - if(src_type.floating) { - src_type.sign = FALSE; - src_type.norm = TRUE; - } - else { - assert(!src_type.sign); - assert(src_type.norm); - } - - /* Pick the depth type. */ - dst_type = lp_depth_type(format_desc, src_type.width*src_type.length); - - /* FIXME: Cope with a depth test type with a different bit width. */ - assert(dst_type.width == src_type.width); - assert(dst_type.length == src_type.length); - - /* Convert fragment Z from float to integer */ - lp_build_conv(builder, src_type, dst_type, &src, 1, &src, 1); - - dst_ptr = LLVMBuildBitCast(builder, - dst_ptr, - LLVMPointerType(lp_build_vec_type(dst_type), 0), ""); - lp_build_depth_stencil_test(builder, - &key->depth, - key->stencil, - dst_type, - format_desc, - mask, - stencil_refs, - src, - dst_ptr, - facing, - counter); -} - /** * Expand the relevent bits of mask_input to a 4-dword mask for the @@ -248,6 +186,26 @@ generate_quad_mask(LLVMBuilderRef builder, } +#define EARLY_DEPTH_TEST 0x1 +#define LATE_DEPTH_TEST 0x2 +#define EARLY_DEPTH_WRITE 0x4 +#define LATE_DEPTH_WRITE 0x8 + +static int +find_output_by_semantic( const struct tgsi_shader_info *info, + unsigned semantic, + unsigned index ) +{ + int i; + + for (i = 0; i < info->num_outputs; i++) + if (info->output_semantic_name[i] == semantic && + info->output_semantic_index[i] == index) + return i; + + return -1; +} + /** * Generate the fragment shader, depth/stencil test, and alpha tests. @@ -261,7 +219,7 @@ generate_fs(struct lp_fragment_shader *shader, struct lp_type type, LLVMValueRef context_ptr, unsigned i, - const struct lp_build_interp_soa_context *interp, + struct lp_build_interp_soa_context *interp, struct lp_build_sampler_soa *sampler, LLVMValueRef *pmask, LLVMValueRef (*color)[4], @@ -271,18 +229,52 @@ generate_fs(struct lp_fragment_shader *shader, LLVMValueRef mask_input, LLVMValueRef counter) { + const struct util_format_description *zs_format_desc = NULL; const struct tgsi_token *tokens = shader->base.tokens; LLVMTypeRef vec_type; LLVMValueRef consts_ptr; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; - LLVMValueRef z = interp->pos[2]; + LLVMValueRef z; + LLVMValueRef zs_value = NULL; LLVMValueRef stencil_refs[2]; - struct lp_build_flow_context *flow; struct lp_build_mask_context mask; - boolean early_depth_stencil_test; + boolean simple_shader = (shader->info.base.file_count[TGSI_FILE_SAMPLER] == 0 && + shader->info.base.num_inputs < 3 && + shader->info.base.num_instructions < 8); unsigned attrib; unsigned chan; unsigned cbuf; + unsigned depth_mode; + + if (key->depth.enabled || + key->stencil[0].enabled || + key->stencil[1].enabled) { + + zs_format_desc = util_format_description(key->zsbuf_format); + assert(zs_format_desc); + + if (!shader->info.base.writes_z) { + if (key->alpha.enabled || shader->info.base.uses_kill) + /* With alpha test and kill, can do the depth test early + * and hopefully eliminate some quads. But need to do a + * special deferred depth write once the final mask value + * is known. + */ + depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE; + else + depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE; + } + else { + depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE; + } + + if (!(key->depth.enabled && key->depth.writemask) && + !(key->stencil[0].enabled && key->stencil[0].writemask)) + depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE); + } + else { + depth_mode = 0; + } assert(i < 4); @@ -293,20 +285,14 @@ generate_fs(struct lp_fragment_shader *shader, consts_ptr = lp_jit_context_constants(builder, context_ptr); - flow = lp_build_flow_create(builder); - memset(outputs, 0, sizeof outputs); - lp_build_flow_scope_begin(flow); - /* Declare the color and z variables */ for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { - color[cbuf][chan] = LLVMGetUndef(vec_type); - lp_build_flow_scope_declare(flow, &color[cbuf][chan]); + color[cbuf][chan] = lp_build_alloca(builder, vec_type, "color"); } } - lp_build_flow_scope_declare(flow, &z); /* do triangle edge testing */ if (partial_mask) { @@ -318,74 +304,126 @@ generate_fs(struct lp_fragment_shader *shader, } /* 'mask' will control execution based on quad's pixel alive/killed state */ - lp_build_mask_begin(&mask, flow, type, *pmask); - - early_depth_stencil_test = - (key->depth.enabled || key->stencil[0].enabled) && - !key->alpha.enabled && - !shader->info.uses_kill && - !shader->info.writes_z; - - if (early_depth_stencil_test) - generate_depth_stencil(builder, key, - type, &mask, - stencil_refs, z, depth_ptr, facing, counter); + lp_build_mask_begin(&mask, builder, type, *pmask); + + if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader) + lp_build_mask_check(&mask); + + lp_build_interp_soa_update_pos(interp, i); + z = interp->pos[2]; + + if (depth_mode & EARLY_DEPTH_TEST) { + lp_build_depth_stencil_test(builder, + &key->depth, + key->stencil, + type, + zs_format_desc, + &mask, + stencil_refs, + z, + depth_ptr, facing, + &zs_value, + !simple_shader); + + if (depth_mode & EARLY_DEPTH_WRITE) { + lp_build_depth_write(builder, zs_format_desc, depth_ptr, zs_value); + } + } + lp_build_interp_soa_update_inputs(interp, i); + + /* Build the actual shader */ lp_build_tgsi_soa(builder, tokens, type, &mask, consts_ptr, interp->pos, interp->inputs, - outputs, sampler, &shader->info); + outputs, sampler, &shader->info.base); - /* loop over fragment shader outputs/results */ - for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) { - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(outputs[attrib][chan]) { - LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); - lp_build_name(out, "output%u.%u.%c", i, attrib, "xyzw"[chan]); - - switch (shader->info.output_semantic_name[attrib]) { - case TGSI_SEMANTIC_COLOR: - { - unsigned cbuf = shader->info.output_semantic_index[attrib]; - - lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]); - - /* Alpha test */ - /* XXX: should only test the final assignment to alpha */ - if (cbuf == 0 && chan == 3 && key->alpha.enabled) { - LLVMValueRef alpha = out; - LLVMValueRef alpha_ref_value; - alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr); - alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value); - lp_build_alpha_test(builder, key->alpha.func, type, - &mask, alpha, alpha_ref_value); - } - - color[cbuf][chan] = out; - break; - } - - case TGSI_SEMANTIC_POSITION: - if(chan == 2) - z = out; - break; - } - } + + /* Alpha test */ + if (key->alpha.enabled) { + int color0 = find_output_by_semantic(&shader->info.base, + TGSI_SEMANTIC_COLOR, + 0); + + if (color0 != -1 && outputs[color0][3]) { + LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha"); + LLVMValueRef alpha_ref_value; + + alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr); + alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value); + + lp_build_alpha_test(builder, key->alpha.func, type, + &mask, alpha, alpha_ref_value, + (depth_mode & LATE_DEPTH_TEST) != 0); } } - if (!early_depth_stencil_test) - generate_depth_stencil(builder, key, - type, &mask, - stencil_refs, z, depth_ptr, facing, counter); + /* Late Z test */ + if (depth_mode & LATE_DEPTH_TEST) { + int pos0 = find_output_by_semantic(&shader->info.base, + TGSI_SEMANTIC_POSITION, + 0); + + if (pos0 != -1 && outputs[pos0][2]) { + z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z"); + } - lp_build_mask_end(&mask); + lp_build_depth_stencil_test(builder, + &key->depth, + key->stencil, + type, + zs_format_desc, + &mask, + stencil_refs, + z, + depth_ptr, facing, + &zs_value, + !simple_shader); + /* Late Z write */ + if (depth_mode & LATE_DEPTH_WRITE) { + lp_build_depth_write(builder, zs_format_desc, depth_ptr, zs_value); + } + } + else if ((depth_mode & EARLY_DEPTH_TEST) && + (depth_mode & LATE_DEPTH_WRITE)) + { + /* Need to apply a reduced mask to the depth write. Reload the + * depth value, update from zs_value with the new mask value and + * write that out. + */ + lp_build_deferred_depth_write(builder, + type, + zs_format_desc, + &mask, + depth_ptr, + zs_value); + } - lp_build_flow_scope_end(flow); - lp_build_flow_destroy(flow); + /* Color write */ + for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib) + { + if (shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR && + shader->info.base.output_semantic_index[attrib] < key->nr_cbufs) + { + unsigned cbuf = shader->info.base.output_semantic_index[attrib]; + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + if(outputs[attrib][chan]) { + /* XXX: just initialize outputs to point at colors[] and + * skip this. + */ + LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); + lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]); + LLVMBuildStore(builder, out, color[cbuf][chan]); + } + } + } + } - *pmask = mask.value; + if (counter) + lp_build_occlusion_count(builder, type, + lp_build_mask_value(&mask), counter); + *pmask = lp_build_mask_end(&mask); } @@ -406,10 +444,10 @@ generate_blend(const struct pipe_blend_state *blend, LLVMValueRef context_ptr, LLVMValueRef mask, LLVMValueRef *src, - LLVMValueRef dst_ptr) + LLVMValueRef dst_ptr, + boolean do_branch) { struct lp_build_context bld; - struct lp_build_flow_context *flow; struct lp_build_mask_context mask_ctx; LLVMTypeRef vec_type; LLVMValueRef const_ptr; @@ -420,10 +458,9 @@ generate_blend(const struct pipe_blend_state *blend, lp_build_context_init(&bld, builder, type); - flow = lp_build_flow_create(builder); - - /* we'll use this mask context to skip blending if all pixels are dead */ - lp_build_mask_begin(&mask_ctx, flow, type, mask); + lp_build_mask_begin(&mask_ctx, builder, type, mask); + if (do_branch) + lp_build_mask_check(&mask_ctx); vec_type = lp_build_vec_type(type); @@ -456,7 +493,6 @@ generate_blend(const struct pipe_blend_state *blend, } lp_build_mask_end(&mask_ctx); - lp_build_flow_destroy(flow); } @@ -501,6 +537,7 @@ generate_fragment(struct llvmpipe_screen *screen, LLVMValueRef blend_mask; LLVMValueRef function; LLVMValueRef facing; + const struct util_format_description *zs_format_desc; unsigned num_fs; unsigned i; unsigned chan; @@ -508,8 +545,8 @@ generate_fragment(struct llvmpipe_screen *screen, /* Adjust color input interpolation according to flatshade state: */ - memcpy(inputs, shader->inputs, shader->info.num_inputs * sizeof inputs[0]); - for (i = 0; i < shader->info.num_inputs; i++) { + memcpy(inputs, shader->inputs, shader->info.base.num_inputs * sizeof inputs[0]); + for (i = 0; i < shader->info.base.num_inputs; i++) { if (inputs[i].interp == LP_INTERP_COLOR) { if (key->flatshade) inputs[i].interp = LP_INTERP_CONSTANT; @@ -553,12 +590,12 @@ generate_fragment(struct llvmpipe_screen *screen, arg_types[0] = screen->context_ptr_type; /* context */ arg_types[1] = LLVMInt32Type(); /* x */ arg_types[2] = LLVMInt32Type(); /* y */ - arg_types[3] = LLVMFloatType(); /* facing */ + arg_types[3] = LLVMInt32Type(); /* facing */ arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* a0 */ arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dadx */ arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */ arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */ - arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ + arg_types[8] = LLVMPointerType(LLVMInt8Type(), 0); /* depth */ arg_types[9] = LLVMInt32Type(); /* mask_input */ arg_types[10] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */ @@ -616,7 +653,7 @@ generate_fragment(struct llvmpipe_screen *screen, * already included in the shader key. */ lp_build_interp_soa_init(&interp, - shader->info.num_inputs, + shader->info.base.num_inputs, inputs, builder, fs_type, a0_ptr, dadx_ptr, dady_ptr, @@ -626,15 +663,16 @@ generate_fragment(struct llvmpipe_screen *screen, sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); /* loop over quads in the block */ + zs_format_desc = util_format_description(key->zsbuf_format); + for(i = 0; i < num_fs; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef depth_offset = LLVMConstInt(LLVMInt32Type(), + i*fs_type.length*zs_format_desc->block.bits/8, + 0); LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS]; LLVMValueRef depth_ptr_i; - if(i != 0) - lp_build_interp_soa_update(&interp, i); - - depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, ""); + depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, ""); generate_fs(shader, key, builder, @@ -670,9 +708,18 @@ generate_fragment(struct llvmpipe_screen *screen, * Convert the fs's output color and mask to fit to the blending type. */ for(chan = 0; chan < NUM_CHANNELS; ++chan) { + LLVMValueRef fs_color_vals[LP_MAX_VECTOR_LENGTH]; + + for (i = 0; i < num_fs; i++) { + fs_color_vals[i] = + LLVMBuildLoad(builder, fs_out_color[cbuf][chan][i], "fs_color_vals"); + } + lp_build_conv(builder, fs_type, blend_type, - fs_out_color[cbuf][chan], num_fs, + fs_color_vals, + num_fs, &blend_in_color[chan], 1); + lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]); } @@ -695,14 +742,23 @@ generate_fragment(struct llvmpipe_screen *screen, /* * Blending. */ - generate_blend(&key->blend, - rt, - builder, - blend_type, - context_ptr, - blend_mask, - blend_in_color, - color_ptr); + { + /* Could the 4x4 have been killed? + */ + boolean do_branch = ((key->depth.enabled || key->stencil[0].enabled) && + !key->alpha.enabled && + !shader->info.base.uses_kill); + + generate_blend(&key->blend, + rt, + builder, + blend_type, + context_ptr, + blend_mask, + blend_in_color, + color_ptr, + do_branch); + } } #ifdef PIPE_ARCH_X86 @@ -727,12 +783,17 @@ generate_fragment(struct llvmpipe_screen *screen, /* Apply optimizations to LLVM IR */ LLVMRunFunctionPassManager(screen->pass, function); - if (gallivm_debug & GALLIVM_DEBUG_IR) { + if ((gallivm_debug & GALLIVM_DEBUG_IR) || (LP_DEBUG & DEBUG_FS)) { /* Print the LLVM IR to stderr */ lp_debug_dump_value(function); debug_printf("\n"); } + /* Dump byte code to a file */ + if (0) { + LLVMWriteBitcodeToFile(lp_build_module, "llvmpipe.bc"); + } + /* * Translate the LLVM IR into machine code. */ @@ -741,7 +802,7 @@ generate_fragment(struct llvmpipe_screen *screen, variant->jit_function[partial_mask] = (lp_jit_frag_func)pointer_to_func(f); - if (gallivm_debug & GALLIVM_DEBUG_ASM) { + if ((gallivm_debug & GALLIVM_DEBUG_ASM) || (LP_DEBUG & DEBUG_FS)) { lp_disassemble(f); } lp_func_delete_body(function); @@ -756,6 +817,9 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key) debug_printf("fs variant %p:\n", (void *) key); + if (key->flatshade) { + debug_printf("flatshade = 1\n"); + } for (i = 0; i < key->nr_cbufs; ++i) { debug_printf("cbuf_format[%u] = %s\n", i, util_format_name(key->cbuf_format[i])); } @@ -780,6 +844,10 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key) debug_printf("alpha.func = %s\n", util_dump_func(key->alpha.func, TRUE)); } + if (key->occlusion_count) { + debug_printf("occlusion_count = 1\n"); + } + if (key->blend.logicop_enable) { debug_printf("blend.logicop_func = %s\n", util_dump_logicop(key->blend.logicop_func, TRUE)); } @@ -792,31 +860,33 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key) debug_printf("blend.alpha_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_dst_factor, TRUE)); } debug_printf("blend.colormask = 0x%x\n", key->blend.rt[0].colormask); - for (i = 0; i < PIPE_MAX_SAMPLERS; ++i) { - if (key->sampler[i].format) { - debug_printf("sampler[%u] = \n", i); - debug_printf(" .format = %s\n", - util_format_name(key->sampler[i].format)); - debug_printf(" .target = %s\n", - util_dump_tex_target(key->sampler[i].target, TRUE)); - debug_printf(" .pot = %u %u %u\n", - key->sampler[i].pot_width, - key->sampler[i].pot_height, - key->sampler[i].pot_depth); - debug_printf(" .wrap = %s %s %s\n", - util_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), - util_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), - util_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); - debug_printf(" .min_img_filter = %s\n", - util_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); - debug_printf(" .min_mip_filter = %s\n", - util_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); - debug_printf(" .mag_img_filter = %s\n", - util_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); - if (key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE) - debug_printf(" .compare_func = %s\n", util_dump_func(key->sampler[i].compare_func, TRUE)); - debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); - } + for (i = 0; i < key->nr_samplers; ++i) { + debug_printf("sampler[%u] = \n", i); + debug_printf(" .format = %s\n", + util_format_name(key->sampler[i].format)); + debug_printf(" .target = %s\n", + util_dump_tex_target(key->sampler[i].target, TRUE)); + debug_printf(" .pot = %u %u %u\n", + key->sampler[i].pot_width, + key->sampler[i].pot_height, + key->sampler[i].pot_depth); + debug_printf(" .wrap = %s %s %s\n", + util_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), + util_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), + util_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); + debug_printf(" .min_img_filter = %s\n", + util_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); + debug_printf(" .min_mip_filter = %s\n", + util_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); + debug_printf(" .mag_img_filter = %s\n", + util_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); + if (key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE) + debug_printf(" .compare_func = %s\n", util_dump_func(key->sampler[i].compare_func, TRUE)); + debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); + debug_printf(" .min_max_lod_equal = %u\n", key->sampler[i].min_max_lod_equal); + debug_printf(" .lod_bias_non_zero = %u\n", key->sampler[i].lod_bias_non_zero); + debug_printf(" .apply_min_lod = %u\n", key->sampler[i].apply_min_lod); + debug_printf(" .apply_max_lod = %u\n", key->sampler[i].apply_max_lod); } } @@ -871,7 +941,7 @@ generate_variant(struct llvmpipe_screen *screen, !key->stencil[0].enabled && !key->alpha.enabled && !key->depth.enabled && - !shader->info.uses_kill + !shader->info.base.uses_kill ? TRUE : FALSE; @@ -896,6 +966,7 @@ static void * llvmpipe_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); struct lp_fragment_shader *shader; int nr_samplers; int i; @@ -908,20 +979,27 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, make_empty_list(&shader->variants); /* get/save the summary info for this shader */ - tgsi_scan_shader(templ->tokens, &shader->info); + lp_build_tgsi_info(templ->tokens, &shader->info); /* we need to keep a local copy of the tokens */ shader->base.tokens = tgsi_dup_tokens(templ->tokens); - nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1; + shader->draw_data = draw_create_fragment_shader(llvmpipe->draw, templ); + if (shader->draw_data == NULL) { + FREE((void *) shader->base.tokens); + FREE(shader); + return NULL; + } + + nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1; shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key, sampler[nr_samplers]); - for (i = 0; i < shader->info.num_inputs; i++) { - shader->inputs[i].usage_mask = shader->info.input_usage_mask[i]; + for (i = 0; i < shader->info.base.num_inputs; i++) { + shader->inputs[i].usage_mask = shader->info.base.input_usage_mask[i]; - switch (shader->info.input_interpolate[i]) { + switch (shader->info.base.input_interpolate[i]) { case TGSI_INTERPOLATE_CONSTANT: shader->inputs[i].interp = LP_INTERP_CONSTANT; break; @@ -936,7 +1014,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, break; } - switch (shader->info.input_semantic_name[i]) { + switch (shader->info.base.input_semantic_name[i]) { case TGSI_SEMANTIC_COLOR: /* Colors may be either linearly or constant interpolated in * the fragment shader, but that information isn't available @@ -963,8 +1041,8 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, debug_printf("llvmpipe: Create fragment shader #%u %p:\n", shader->no, (void *) shader); tgsi_dump(templ->tokens, 0); debug_printf("usage masks:\n"); - for (attrib = 0; attrib < shader->info.num_inputs; ++attrib) { - unsigned usage_mask = shader->info.input_usage_mask[attrib]; + for (attrib = 0; attrib < shader->info.base.num_inputs; ++attrib) { + unsigned usage_mask = shader->info.base.input_usage_mask[attrib]; debug_printf(" IN[%u].%s%s%s%s\n", attrib, usage_mask & TGSI_WRITEMASK_X ? "x" : "", @@ -989,6 +1067,9 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs) draw_flush(llvmpipe->draw); + draw_bind_fragment_shader(llvmpipe->draw, + (llvmpipe->fs ? llvmpipe->fs->draw_data : NULL)); + llvmpipe->fs = fs; llvmpipe->dirty |= LP_NEW_FS; @@ -1046,6 +1127,8 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) li = next; } + draw_delete_fragment_shader(llvmpipe->draw, shader->draw_data); + assert(shader->variants_cached == 0); FREE((void *) shader->base.tokens); FREE(shader); @@ -1087,7 +1170,7 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, * Return the blend factor equivalent to a destination alpha of one. */ static INLINE unsigned -force_dst_alpha_one(unsigned factor, boolean alpha) +force_dst_alpha_one(unsigned factor) { switch(factor) { case PIPE_BLENDFACTOR_DST_ALPHA: @@ -1098,15 +1181,6 @@ force_dst_alpha_one(unsigned factor, boolean alpha) return PIPE_BLENDFACTOR_ZERO; } - if (alpha) { - switch(factor) { - case PIPE_BLENDFACTOR_DST_COLOR: - return PIPE_BLENDFACTOR_ONE; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - return PIPE_BLENDFACTOR_ZERO; - } - } - return factor; } @@ -1183,21 +1257,24 @@ make_variant_key(struct llvmpipe_context *lp, * * TODO: This should be generalized to all channels for better * performance, but only alpha causes correctness issues. + * + * Also, force rgb/alpha func/factors match, to make AoS blending easier. */ if (format_desc->swizzle[3] > UTIL_FORMAT_SWIZZLE_W) { - blend_rt->rgb_src_factor = force_dst_alpha_one(blend_rt->rgb_src_factor, FALSE); - blend_rt->rgb_dst_factor = force_dst_alpha_one(blend_rt->rgb_dst_factor, FALSE); - blend_rt->alpha_src_factor = force_dst_alpha_one(blend_rt->alpha_src_factor, TRUE); - blend_rt->alpha_dst_factor = force_dst_alpha_one(blend_rt->alpha_dst_factor, TRUE); + blend_rt->rgb_src_factor = force_dst_alpha_one(blend_rt->rgb_src_factor); + blend_rt->rgb_dst_factor = force_dst_alpha_one(blend_rt->rgb_dst_factor); + blend_rt->alpha_func = blend_rt->rgb_func; + blend_rt->alpha_src_factor = blend_rt->rgb_src_factor; + blend_rt->alpha_dst_factor = blend_rt->rgb_dst_factor; } } /* This value will be the same for all the variants of a given shader: */ - key->nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1; + key->nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1; for(i = 0; i < key->nr_samplers; ++i) { - if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { + if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { lp_sampler_static_state(&key->sampler[i], lp->fragment_sampler_views[i], lp->sampler[i]); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h index f73c7801c00..7d58c4936c7 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h @@ -34,6 +34,7 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" /* for tgsi_shader_info */ #include "gallivm/lp_bld_sample.h" /* for struct lp_sampler_static_state */ +#include "gallivm/lp_bld_tgsi.h" /* for lp_tgsi_info */ #include "lp_bld_interp.h" /* for struct lp_shader_input */ @@ -97,10 +98,12 @@ struct lp_fragment_shader { struct pipe_shader_state base; - struct tgsi_shader_info info; + struct lp_tgsi_info info; struct lp_fs_variant_list_item variants; + struct draw_fragment_shader *draw_data; + /* For debugging/profiling purposes */ unsigned variant_key_size; unsigned no; diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index 0bad7320f3e..dbd73812e45 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -78,8 +78,9 @@ llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, void *handle) lp_setup_set_point_state( llvmpipe->setup, llvmpipe->rasterizer->point_size, llvmpipe->rasterizer->point_size_per_vertex, - llvmpipe->rasterizer->sprite_coord_enable); - } + llvmpipe->rasterizer->sprite_coord_enable, + llvmpipe->rasterizer->sprite_coord_mode); + } llvmpipe->dirty |= LP_NEW_RASTERIZER; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index 17a4a0ed02d..1dd866195d3 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -246,9 +246,9 @@ llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *lp, struct pipe_sampler_view **views) { unsigned i; - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS]; - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS]; - const void *data[DRAW_MAX_TEXTURE_LEVELS]; + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS]; + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS]; + const void *data[PIPE_MAX_TEXTURE_LEVELS]; assert(num <= PIPE_MAX_VERTEX_SAMPLERS); if (!num) diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c index ee4991bf8d2..a8dee280dd5 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c @@ -603,7 +603,7 @@ lp_make_setup_variant_key(struct llvmpipe_context *lp, assert(sizeof key->inputs[0] == sizeof(ushort)); - key->num_inputs = fs->info.num_inputs; + key->num_inputs = fs->info.base.num_inputs; key->flatshade_first = lp->rasterizer->flatshade_first; key->pixel_center_half = lp->rasterizer->gl_rasterization_rules; key->size = Offset(struct lp_setup_variant_key, diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index d0389f0cb0b..8b6b5e1298f 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -243,19 +243,6 @@ add_blend_test(LLVMModuleRef module, } -/** Add and limit result to ceiling of 1.0 */ -#define ADD_SAT(R, A, B) \ -do { \ - R = (A) + (B); if (R > 1.0f) R = 1.0f; \ -} while (0) - -/** Subtract and limit result to floor of 0.0 */ -#define SUB_SAT(R, A, B) \ -do { \ - R = (A) - (B); if (R < 0.0f) R = 0.0f; \ -} while (0) - - static void compute_blend_ref_term(unsigned rgb_factor, unsigned alpha_factor, @@ -423,19 +410,19 @@ compute_blend_ref(const struct pipe_blend_state *blend, */ switch (blend->rt[0].rgb_func) { case PIPE_BLEND_ADD: - ADD_SAT(res[0], src_term[0], dst_term[0]); /* R */ - ADD_SAT(res[1], src_term[1], dst_term[1]); /* G */ - ADD_SAT(res[2], src_term[2], dst_term[2]); /* B */ + res[0] = src_term[0] + dst_term[0]; /* R */ + res[1] = src_term[1] + dst_term[1]; /* G */ + res[2] = src_term[2] + dst_term[2]; /* B */ break; case PIPE_BLEND_SUBTRACT: - SUB_SAT(res[0], src_term[0], dst_term[0]); /* R */ - SUB_SAT(res[1], src_term[1], dst_term[1]); /* G */ - SUB_SAT(res[2], src_term[2], dst_term[2]); /* B */ + res[0] = src_term[0] - dst_term[0]; /* R */ + res[1] = src_term[1] - dst_term[1]; /* G */ + res[2] = src_term[2] - dst_term[2]; /* B */ break; case PIPE_BLEND_REVERSE_SUBTRACT: - SUB_SAT(res[0], dst_term[0], src_term[0]); /* R */ - SUB_SAT(res[1], dst_term[1], src_term[1]); /* G */ - SUB_SAT(res[2], dst_term[2], src_term[2]); /* B */ + res[0] = dst_term[0] - src_term[0]; /* R */ + res[1] = dst_term[1] - src_term[1]; /* G */ + res[2] = dst_term[2] - src_term[2]; /* B */ break; case PIPE_BLEND_MIN: res[0] = MIN2(src_term[0], dst_term[0]); /* R */ @@ -456,13 +443,13 @@ compute_blend_ref(const struct pipe_blend_state *blend, */ switch (blend->rt[0].alpha_func) { case PIPE_BLEND_ADD: - ADD_SAT(res[3], src_term[3], dst_term[3]); /* A */ + res[3] = src_term[3] + dst_term[3]; /* A */ break; case PIPE_BLEND_SUBTRACT: - SUB_SAT(res[3], src_term[3], dst_term[3]); /* A */ + res[3] = src_term[3] - dst_term[3]; /* A */ break; case PIPE_BLEND_REVERSE_SUBTRACT: - SUB_SAT(res[3], dst_term[3], src_term[3]); /* A */ + res[3] = dst_term[3] - src_term[3]; /* A */ break; case PIPE_BLEND_MIN: res[3] = MIN2(src_term[3], dst_term[3]); /* A */ @@ -676,6 +663,8 @@ test_one(unsigned verbose, fprintf(stderr, " Ref%c: ", channel); dump_vec(stderr, type, ref + j*stride); fprintf(stderr, "\n"); + + fprintf(stderr, "\n"); } } } @@ -773,7 +762,7 @@ blend_funcs[] = { const struct lp_type blend_types[] = { /* float, fixed, sign, norm, width, len */ - { TRUE, FALSE, FALSE, TRUE, 32, 4 }, /* f32 x 4 */ + { TRUE, FALSE, TRUE, FALSE, 32, 4 }, /* f32 x 4 */ { FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */ }; diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index 7bbbc61d4c2..7a0d06ae2c8 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -205,16 +205,19 @@ random_elem(struct lp_type type, void *dst, unsigned index) assert(index < type.length); value = (double)rand()/(double)RAND_MAX; if(!type.norm) { - unsigned long long mask; - if (type.floating) - mask = ~(unsigned long long)0; - else if (type.fixed) - mask = ((unsigned long long)1 << (type.width / 2)) - 1; - else if (type.sign) - mask = ((unsigned long long)1 << (type.width - 1)) - 1; - else - mask = ((unsigned long long)1 << type.width) - 1; - value += (double)(mask & rand()); + if (type.floating) { + value *= 2.0; + } + else { + unsigned long long mask; + if (type.fixed) + mask = ((unsigned long long)1 << (type.width / 2)) - 1; + else if (type.sign) + mask = ((unsigned long long)1 << (type.width - 1)) - 1; + else + mask = ((unsigned long long)1 << type.width) - 1; + value += (double)(mask & rand()); + } } if(!type.sign) if(rand() & 1) @@ -261,12 +264,18 @@ boolean compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps) { unsigned i; + eps *= type.floating ? 8.0 : 2.0; for (i = 0; i < type.length; ++i) { double res_elem = read_elem(type, res, i); double ref_elem = read_elem(type, ref, i); - double delta = fabs(res_elem - ref_elem); - if(delta >= 2.0*eps) + double delta = res_elem - ref_elem; + if (ref_elem < -1.0 || ref_elem > 1.0) { + delta /= ref_elem; + } + delta = fabs(delta); + if (delta >= eps) { return FALSE; + } } return TRUE; diff --git a/src/gallium/drivers/llvmpipe/lp_test_round.c b/src/gallium/drivers/llvmpipe/lp_test_round.c index 57b0ee57767..0ca27915923 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_round.c +++ b/src/gallium/drivers/llvmpipe/lp_test_round.c @@ -75,10 +75,7 @@ add_test(LLVMModuleRef module, const char *name, lp_func_t lp_func) LLVMValueRef ret; struct lp_build_context bld; - bld.builder = builder; - bld.type.floating = 1; - bld.type.width = 32; - bld.type.length = 4; + lp_build_context_init(&bld, builder, lp_float32_vec4_type()); LLVMSetFunctionCallConv(func, LLVMCCallConv); @@ -100,9 +97,10 @@ printv(char* string, v4sf value) f[0], f[1], f[2], f[3]); } -static void +static boolean compare(v4sf x, v4sf y) { + boolean success = TRUE; float *xp = (float *) &x; float *yp = (float *) &y; if (xp[0] != yp[0] || @@ -110,7 +108,9 @@ compare(v4sf x, v4sf y) xp[2] != yp[2] || xp[3] != yp[3]) { printf(" Incorrect result! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ \n"); + success = FALSE; } + return success; } @@ -191,7 +191,7 @@ test_round(unsigned verbose, FILE *fp) y = round_func(x); printv("C round(x) ", ref); printv("LLVM round(x)", y); - compare(ref, y); + success = success && compare(ref, y); refp[0] = trunc(xp[0]); refp[1] = trunc(xp[1]); @@ -200,7 +200,7 @@ test_round(unsigned verbose, FILE *fp) y = trunc_func(x); printv("C trunc(x) ", ref); printv("LLVM trunc(x)", y); - compare(ref, y); + success = success && compare(ref, y); refp[0] = floor(xp[0]); refp[1] = floor(xp[1]); @@ -209,7 +209,7 @@ test_round(unsigned verbose, FILE *fp) y = floor_func(x); printv("C floor(x) ", ref); printv("LLVM floor(x)", y); - compare(ref, y); + success = success && compare(ref, y); refp[0] = ceil(xp[0]); refp[1] = ceil(xp[1]); @@ -218,7 +218,7 @@ test_round(unsigned verbose, FILE *fp) y = ceil_func(x); printv("C ceil(x) ", ref); printv("LLVM ceil(x) ", y); - compare(ref, y); + success = success && compare(ref, y); } LLVMFreeMachineCodeForFunction(engine, test_round); @@ -247,11 +247,7 @@ test_round(unsigned verbose, FILE *fp) boolean test_all(unsigned verbose, FILE *fp) { - boolean success = TRUE; - - test_round(verbose, fp); - - return success; + return test_round(verbose, fp); } diff --git a/src/gallium/drivers/llvmpipe/lp_test_sincos.c b/src/gallium/drivers/llvmpipe/lp_test_sincos.c index 7ab357f162e..79939b1a393 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_sincos.c +++ b/src/gallium/drivers/llvmpipe/lp_test_sincos.c @@ -72,10 +72,7 @@ add_sincos_test(LLVMModuleRef module, boolean sin) LLVMValueRef ret; struct lp_build_context bld; - bld.builder = builder; - bld.type.floating = 1; - bld.type.width = 32; - bld.type.length = 4; + lp_build_context_init(&bld, builder, lp_float32_vec4_type()); LLVMSetFunctionCallConv(func, LLVMCCallConv); diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample.c index 4e026cc8ffb..f417fc8a9ea 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.c @@ -151,6 +151,10 @@ LP_LLVM_TEXTURE_MEMBER(last_level, LP_JIT_TEXTURE_LAST_LEVEL, TRUE) LP_LLVM_TEXTURE_MEMBER(row_stride, LP_JIT_TEXTURE_ROW_STRIDE, FALSE) LP_LLVM_TEXTURE_MEMBER(img_stride, LP_JIT_TEXTURE_IMG_STRIDE, FALSE) LP_LLVM_TEXTURE_MEMBER(data_ptr, LP_JIT_TEXTURE_DATA, FALSE) +LP_LLVM_TEXTURE_MEMBER(min_lod, LP_JIT_TEXTURE_MIN_LOD, TRUE) +LP_LLVM_TEXTURE_MEMBER(max_lod, LP_JIT_TEXTURE_MAX_LOD, TRUE) +LP_LLVM_TEXTURE_MEMBER(lod_bias, LP_JIT_TEXTURE_LOD_BIAS, TRUE) +LP_LLVM_TEXTURE_MEMBER(border_color, LP_JIT_TEXTURE_BORDER_COLOR, FALSE) static void @@ -217,6 +221,11 @@ lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state, sampler->dynamic_state.base.row_stride = lp_llvm_texture_row_stride; sampler->dynamic_state.base.img_stride = lp_llvm_texture_img_stride; sampler->dynamic_state.base.data_ptr = lp_llvm_texture_data_ptr; + sampler->dynamic_state.base.min_lod = lp_llvm_texture_min_lod; + sampler->dynamic_state.base.max_lod = lp_llvm_texture_max_lod; + sampler->dynamic_state.base.lod_bias = lp_llvm_texture_lod_bias; + sampler->dynamic_state.base.border_color = lp_llvm_texture_border_color; + sampler->dynamic_state.static_state = static_state; sampler->dynamic_state.context_ptr = context_ptr; diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index 2ba39052aba..e49f9c62fe7 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -289,172 +289,141 @@ def generate_format_write(format, src_channel, src_native_type, src_suffix): print -def generate_ssse3(): +def generate_sse2(): print ''' #if defined(PIPE_ARCH_SSE) #include "util/u_sse.h" -static void -lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst, - const uint8_t *src, unsigned src_stride, - unsigned x0, unsigned y0) +static ALWAYS_INLINE void +swz4( const __m128i * restrict x, + const __m128i * restrict y, + const __m128i * restrict z, + const __m128i * restrict w, + __m128i * restrict a, + __m128i * restrict b, + __m128i * restrict c, + __m128i * restrict d) { + __m128i i, j, k, l; + __m128i m, n, o, p; + __m128i e, f, g, h; + + m = _mm_unpacklo_epi8(*x,*y); + n = _mm_unpackhi_epi8(*x,*y); + o = _mm_unpacklo_epi8(*z,*w); + p = _mm_unpackhi_epi8(*z,*w); + + i = _mm_unpacklo_epi16(m,n); + j = _mm_unpackhi_epi16(m,n); + k = _mm_unpacklo_epi16(o,p); + l = _mm_unpackhi_epi16(o,p); + + e = _mm_unpacklo_epi8(i,j); + f = _mm_unpackhi_epi8(i,j); + g = _mm_unpacklo_epi8(k,l); + h = _mm_unpackhi_epi8(k,l); + + *a = _mm_unpacklo_epi64(e,g); + *b = _mm_unpackhi_epi64(e,g); + *c = _mm_unpacklo_epi64(f,h); + *d = _mm_unpackhi_epi64(f,h); +} + +static ALWAYS_INLINE void +unswz4( const __m128i * restrict a, + const __m128i * restrict b, + const __m128i * restrict c, + const __m128i * restrict d, + __m128i * restrict x, + __m128i * restrict y, + __m128i * restrict z, + __m128i * restrict w) +{ + __m128i i, j, k, l; + __m128i m, n, o, p; + + i = _mm_unpacklo_epi8(*a,*b); + j = _mm_unpackhi_epi8(*a,*b); + k = _mm_unpacklo_epi8(*c,*d); + l = _mm_unpackhi_epi8(*c,*d); + + m = _mm_unpacklo_epi16(i,k); + n = _mm_unpackhi_epi16(i,k); + o = _mm_unpacklo_epi16(j,l); + p = _mm_unpackhi_epi16(j,l); + + *x = _mm_unpacklo_epi64(m,n); + *y = _mm_unpackhi_epi64(m,n); + *z = _mm_unpacklo_epi64(o,p); + *w = _mm_unpackhi_epi64(o,p); +} +static void +lp_tile_b8g8r8a8_unorm_swizzle_4ub_sse2(uint8_t * restrict dst, + const uint8_t * restrict src, unsigned src_stride, + unsigned x0, unsigned y0) +{ + __m128i *dst128 = (__m128i *) dst; unsigned x, y; - __m128i *pdst = (__m128i*) dst; - const uint8_t *ysrc0 = src + y0*src_stride + x0*sizeof(uint32_t); - unsigned int tile_stridex = src_stride*(TILE_VECTOR_HEIGHT - 1) - sizeof(uint32_t)*TILE_VECTOR_WIDTH; - unsigned int tile_stridey = src_stride*TILE_VECTOR_HEIGHT; - - const __m128i shuffle00 = _mm_setr_epi8(0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle01 = _mm_setr_epi8(0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle02 = _mm_setr_epi8(0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle03 = _mm_setr_epi8(0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - - const __m128i shuffle10 = _mm_setr_epi8(0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle11 = _mm_setr_epi8(0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle12 = _mm_setr_epi8(0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle13 = _mm_setr_epi8(0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - - const __m128i shuffle20 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff); - const __m128i shuffle21 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff); - const __m128i shuffle22 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff); - const __m128i shuffle23 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff); - - const __m128i shuffle30 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e); - const __m128i shuffle31 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d); - const __m128i shuffle32 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c); - const __m128i shuffle33 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f); - - for (y = 0; y < TILE_SIZE; y += TILE_VECTOR_HEIGHT) { - __m128i line0 = *(__m128i*)ysrc0; - const uint8_t *ysrc = ysrc0 + src_stride; - ysrc0 += tile_stridey; - - for (x = 0; x < TILE_SIZE; x += TILE_VECTOR_WIDTH) { - __m128i r, g, b, a, line1; - line1 = *(__m128i*)ysrc; - PIPE_READ_WRITE_BARRIER(); - ysrc += src_stride; - r = _mm_shuffle_epi8(line0, shuffle00); - g = _mm_shuffle_epi8(line0, shuffle01); - b = _mm_shuffle_epi8(line0, shuffle02); - a = _mm_shuffle_epi8(line0, shuffle03); - - line0 = *(__m128i*)ysrc; - PIPE_READ_WRITE_BARRIER(); - ysrc += src_stride; - r = _mm_or_si128(r, _mm_shuffle_epi8(line1, shuffle10)); - g = _mm_or_si128(g, _mm_shuffle_epi8(line1, shuffle11)); - b = _mm_or_si128(b, _mm_shuffle_epi8(line1, shuffle12)); - a = _mm_or_si128(a, _mm_shuffle_epi8(line1, shuffle13)); - - line1 = *(__m128i*)ysrc; - PIPE_READ_WRITE_BARRIER(); - ysrc -= tile_stridex; - r = _mm_or_si128(r, _mm_shuffle_epi8(line0, shuffle20)); - g = _mm_or_si128(g, _mm_shuffle_epi8(line0, shuffle21)); - b = _mm_or_si128(b, _mm_shuffle_epi8(line0, shuffle22)); - a = _mm_or_si128(a, _mm_shuffle_epi8(line0, shuffle23)); - - if (x + 1 < TILE_SIZE) { - line0 = *(__m128i*)ysrc; - ysrc += src_stride; - } - - PIPE_READ_WRITE_BARRIER(); - r = _mm_or_si128(r, _mm_shuffle_epi8(line1, shuffle30)); - g = _mm_or_si128(g, _mm_shuffle_epi8(line1, shuffle31)); - b = _mm_or_si128(b, _mm_shuffle_epi8(line1, shuffle32)); - a = _mm_or_si128(a, _mm_shuffle_epi8(line1, shuffle33)); - - *pdst++ = r; - *pdst++ = g; - *pdst++ = b; - *pdst++ = a; + + src += y0 * src_stride; + src += x0 * sizeof(uint32_t); + + for (y = 0; y < TILE_SIZE; y += 4) { + const uint8_t *src_row = src; + + for (x = 0; x < TILE_SIZE; x += 4) { + swz4((const __m128i *) (src_row + 0 * src_stride), + (const __m128i *) (src_row + 1 * src_stride), + (const __m128i *) (src_row + 2 * src_stride), + (const __m128i *) (src_row + 3 * src_stride), + dst128 + 2, /* b */ + dst128 + 1, /* g */ + dst128 + 0, /* r */ + dst128 + 3); /* a */ + + dst128 += 4; + src_row += sizeof(__m128i); } - } + src += 4 * src_stride; + } } static void -lp_tile_b8g8r8a8_unorm_unswizzle_4ub_ssse3(const uint8_t *src, - uint8_t *dst, unsigned dst_stride, +lp_tile_b8g8r8a8_unorm_unswizzle_4ub_sse2(const uint8_t * restrict src, + uint8_t * restrict dst, unsigned dst_stride, unsigned x0, unsigned y0) { unsigned int x, y; - const __m128i *psrc = (__m128i*) src; - const __m128i *end = (__m128i*) (src + (y0 + TILE_SIZE - 1)*dst_stride + (x0 + TILE_SIZE - 1)*sizeof(uint32_t)); - uint8_t *pdst = dst + y0 * dst_stride + x0 * sizeof(uint32_t); - __m128i c0 = *psrc++; - __m128i c1; - - const __m128i shuffle00 = _mm_setr_epi8(0xff,0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff); - const __m128i shuffle01 = _mm_setr_epi8(0xff,0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff); - const __m128i shuffle02 = _mm_setr_epi8(0xff,0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff); - const __m128i shuffle03 = _mm_setr_epi8(0xff,0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff); - - const __m128i shuffle10 = _mm_setr_epi8(0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff,0xff); - const __m128i shuffle11 = _mm_setr_epi8(0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff,0xff); - const __m128i shuffle12 = _mm_setr_epi8(0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff,0xff); - const __m128i shuffle13 = _mm_setr_epi8(0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff,0xff); - - const __m128i shuffle20 = _mm_setr_epi8(0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff,0xff,0xff); - const __m128i shuffle21 = _mm_setr_epi8(0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff,0xff,0xff); - const __m128i shuffle22 = _mm_setr_epi8(0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff,0xff,0xff); - const __m128i shuffle23 = _mm_setr_epi8(0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff,0xff,0xff); - - const __m128i shuffle30 = _mm_setr_epi8(0xff,0xff,0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05); - const __m128i shuffle31 = _mm_setr_epi8(0xff,0xff,0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07); - const __m128i shuffle32 = _mm_setr_epi8(0xff,0xff,0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d); - const __m128i shuffle33 = _mm_setr_epi8(0xff,0xff,0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f); - - for (y = 0; y < TILE_SIZE; y += TILE_VECTOR_HEIGHT) { - __m128i *tile = (__m128i*) pdst; - pdst += dst_stride * TILE_VECTOR_HEIGHT; - for (x = 0; x < TILE_SIZE; x += TILE_VECTOR_WIDTH) { - uint8_t *linep = (uint8_t*) (tile++); - __m128i line0, line1, line2, line3; - - c1 = *psrc++; /* r */ - PIPE_READ_WRITE_BARRIER(); - line0 = _mm_shuffle_epi8(c0, shuffle00); - line1 = _mm_shuffle_epi8(c0, shuffle01); - line2 = _mm_shuffle_epi8(c0, shuffle02); - line3 = _mm_shuffle_epi8(c0, shuffle03); - - c0 = *psrc++; /* g */ - PIPE_READ_WRITE_BARRIER(); - line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c1, shuffle10)); - line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c1, shuffle11)); - line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c1, shuffle12)); - line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c1, shuffle13)); - - c1 = *psrc++; /* b */ - PIPE_READ_WRITE_BARRIER(); - line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c0, shuffle20)); - line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c0, shuffle21)); - line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c0, shuffle22)); - line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c0, shuffle23)); - - if (psrc != end) - c0 = *psrc++; /* a */ - PIPE_READ_WRITE_BARRIER(); - line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c1, shuffle30)); - line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c1, shuffle31)); - line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c1, shuffle32)); - line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c1, shuffle33)); - - *(__m128i*) (linep) = line0; - *(__m128i*) (((char*)linep) + dst_stride) = line1; - *(__m128i*) (((char*)linep) + 2 * dst_stride) = line2; - *(__m128i*) (((char*)linep) + 3 * dst_stride) = line3; + const __m128i *src128 = (const __m128i *) src; + + dst += y0 * dst_stride; + dst += x0 * sizeof(uint32_t); + + for (y = 0; y < TILE_SIZE; y += 4) { + const uint8_t *dst_row = dst; + + for (x = 0; x < TILE_SIZE; x += 4) { + unswz4( &src128[2], /* b */ + &src128[1], /* g */ + &src128[0], /* r */ + &src128[3], /* a */ + (__m128i *) (dst_row + 0 * dst_stride), + (__m128i *) (dst_row + 1 * dst_stride), + (__m128i *) (dst_row + 2 * dst_stride), + (__m128i *) (dst_row + 3 * dst_stride)); + + src128 += 4; + dst_row += sizeof(__m128i);; } + + dst += 4 * dst_stride; } } -#endif /* PIPE_ARCH_SSSE3 */ +#endif /* PIPE_ARCH_SSE */ ''' @@ -479,7 +448,7 @@ def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix): func_name = 'lp_tile_%s_swizzle_%s' % (format.short_name(), dst_suffix) if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM': print '#ifdef PIPE_ARCH_SSE' - print ' func = util_cpu_caps.has_ssse3 ? %s_ssse3 : %s;' % (func_name, func_name) + print ' func = util_cpu_caps.has_sse2 ? %s_sse2 : %s;' % (func_name, func_name) print '#else' print ' func = %s;' % (func_name,) print '#endif' @@ -517,7 +486,7 @@ def generate_unswizzle(formats, src_channel, src_native_type, src_suffix): func_name = 'lp_tile_%s_unswizzle_%s' % (format.short_name(), src_suffix) if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM': print '#ifdef PIPE_ARCH_SSE' - print ' func = util_cpu_caps.has_ssse3 ? %s_ssse3 : %s;' % (func_name, func_name) + print ' func = util_cpu_caps.has_sse2 ? %s_sse2 : %s;' % (func_name, func_name) print '#else' print ' func = %s;' % (func_name,) print '#endif' @@ -577,7 +546,7 @@ def main(): print '};' print - generate_ssse3() + generate_sse2() channel = Channel(UNSIGNED, True, 8) native_type = 'uint8_t' diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index ebb21a6e5a3..a9426df686f 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -236,7 +236,7 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) int ret; ret = nouveau_channel_alloc(dev, 0xbeef0201, 0xbeef0202, - &screen->channel); + 512*1024, &screen->channel); if (ret) return ret; screen->device = dev; diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index c9003c97f5d..ab480cabd09 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -12,6 +12,10 @@ #include "nouveau/nouveau_resource.h" #include "nouveau/nouveau_pushbuf.h" +#ifndef NV04_PFIFO_MAX_PACKET_LEN +#define NV04_PFIFO_MAX_PACKET_LEN 2047 +#endif + static INLINE uint32_t nouveau_screen_transfer_flags(unsigned pipe) { diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 6ec9095a741..bf6a577188b 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -26,6 +26,9 @@ #define NOUVEAU_MSG(fmt, args...) \ fprintf(stderr, "nouveau: "fmt, ##args); +#define nouveau_bo_tile_layout(nvbo) \ + ((nvbo)->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK) + /* Constant buffer assignment */ #define NV50_CB_PMISC 0 #define NV50_CB_PVP 1 @@ -157,6 +160,7 @@ struct nv50_context { unsigned sampler_view_nr[3]; unsigned vbo_fifo; + unsigned req_lmem; }; static INLINE struct nv50_context * diff --git a/src/gallium/drivers/nv50/nv50_formats.c b/src/gallium/drivers/nv50/nv50_formats.c index 3be39d5337a..42828094547 100644 --- a/src/gallium/drivers/nv50/nv50_formats.c +++ b/src/gallium/drivers/nv50/nv50_formats.c @@ -53,6 +53,8 @@ #define NV50TIC_0_0_FMT_16_16_16 NV50TIC_0_0_FMT_16_16_16_16 #define NV50TIC_0_0_FMT_32_32_32 NV50TIC_0_0_FMT_32_32_32_32 +/* NOTE: using NV50_2D_DST_FORMAT for substitute formats used with 2D engine */ + const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = { /* COMMON FORMATS */ @@ -81,7 +83,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 1_5_5_5, 1), SAMPLER_VIEW | RENDER_TARGET | SCANOUT }, - [PIPE_FORMAT_B4G4R4A4_UNORM] = { 0, + [PIPE_FORMAT_B4G4R4A4_UNORM] = { NV50_2D_DST_FORMAT_R16_UNORM, B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1), SAMPLER_VIEW }, @@ -122,15 +124,15 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = /* LUMINANCE, ALPHA, INTENSITY */ - [PIPE_FORMAT_L8_UNORM] = { 0, + [PIPE_FORMAT_L8_UNORM] = { NV50_2D_DST_FORMAT_R8_UNORM, A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0), SAMPLER_VIEW }, - [PIPE_FORMAT_L8_SRGB] = { 0, + [PIPE_FORMAT_L8_SRGB] = { NV50_2D_DST_FORMAT_R8_UNORM, A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0), SAMPLER_VIEW }, - [PIPE_FORMAT_I8_UNORM] = { 0, + [PIPE_FORMAT_I8_UNORM] = { NV50_2D_DST_FORMAT_R8_UNORM, A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0), SAMPLER_VIEW }, @@ -138,7 +140,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 8, 0), SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_L8A8_UNORM] = { 0, + [PIPE_FORMAT_L8A8_UNORM] = { NV50_2D_DST_FORMAT_R16_UNORM, A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0), SAMPLER_VIEW }, @@ -276,9 +278,9 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R16_SNORM] = { 0, + [PIPE_FORMAT_R16_SNORM] = { NV50TCL_RT_FORMAT_R16_SNORM, A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 16, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, /* UNORM 16 */ @@ -294,9 +296,9 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_R16_UNORM] = { 0, + [PIPE_FORMAT_R16_UNORM] = { NV50TCL_RT_FORMAT_R16_UNORM, A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 16, 0), - VERTEX_BUFFER | SAMPLER_VIEW }, + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, /* SNORM 8 */ diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c index 676540538e5..c88e7ba742e 100644 --- a/src/gallium/drivers/nv50/nv50_pc.c +++ b/src/gallium/drivers/nv50/nv50_pc.c @@ -328,7 +328,7 @@ nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv) static void nv_do_print_function(void *priv, struct nv_basic_block *b) { - struct nv_instruction *i = b->phi; + struct nv_instruction *i; debug_printf("=== BB %i ", b->id); if (b->out[0]) @@ -547,6 +547,8 @@ nv50_generate_code(struct nv50_translation_info *ti) ti->p->fixups = pc->fixups; ti->p->num_fixups = pc->num_fixups; + ti->p->uses_lmem = ti->store_to_memory; + NV50_DBGMSG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success"); out: diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c index 921ed156919..27eb3817bf1 100644 --- a/src/gallium/drivers/nv50/nv50_pc_optimize.c +++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c @@ -452,7 +452,7 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) if (nvi->opcode == NV_OP_SAT) { mi = nvi->src[0]->value->insn; - if (mi->opcode != NV_OP_ADD || mi->opcode != NV_OP_MAD) + if (mi->opcode != NV_OP_ADD && mi->opcode != NV_OP_MAD) continue; if (mi->flags_def || mi->def[0]->refc > 1) continue; diff --git a/src/gallium/drivers/nv50/nv50_pc_regalloc.c b/src/gallium/drivers/nv50/nv50_pc_regalloc.c index b9d5ba5ef67..39ae36681c0 100644 --- a/src/gallium/drivers/nv50/nv50_pc_regalloc.c +++ b/src/gallium/drivers/nv50/nv50_pc_regalloc.c @@ -767,7 +767,7 @@ nv50_ctor_register_set(struct nv_pc *pc, struct register_set *set) static void insert_ordered_tail(struct nv_value *list, struct nv_value *nval) { - struct nv_value *elem = list->prev; + struct nv_value *elem; for (elem = list->prev; elem != list && elem->livei->bgn > nval->livei->bgn; diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 37b02bbec7c..33c4c8ca6df 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -45,6 +45,7 @@ struct nv50_program { ubyte type; boolean translated; + boolean uses_lmem; struct nouveau_bo *bo; struct nouveau_stateobj *so; diff --git a/src/gallium/drivers/nv50/nv50_reg.h b/src/gallium/drivers/nv50/nv50_reg.h index 365576fdd07..949838b33f5 100644 --- a/src/gallium/drivers/nv50/nv50_reg.h +++ b/src/gallium/drivers/nv50/nv50_reg.h @@ -1018,6 +1018,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50TCL_FP_START_ID 0x00001414 #define NV50TCL_GP_VERTEX_OUTPUT_COUNT 0x00001420 #define NV50TCL_VB_ELEMENT_BASE 0x00001434 +#define NV50TCL_CLEAR_FLAGS 0x0000143c +#define NV50TCL_CLEAR_FLAGS_OGL (1 << 0) +#define NV50TCL_CLEAR_FLAGS_D3D (1 << 4) #define NV50TCL_INSTANCE_BASE 0x00001438 #define NV50TCL_CODE_CB_FLUSH 0x00001440 #define NV50TCL_BIND_TSC(x) (0x00001444+((x)*8)) diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 49af9b59beb..c6bd62df1db 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -84,6 +84,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TWO_SIDED_STENCIL: return 1; case PIPE_CAP_GLSL: + case PIPE_CAP_SM3: return 1; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; @@ -95,6 +96,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 1; case PIPE_CAP_TIMER_QUERY: return 0; + case PIPE_CAP_STREAM_OUTPUT: + return 0; case PIPE_CAP_TEXTURE_SHADOW_MAP: return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: @@ -137,8 +140,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, switch(shader) { case PIPE_SHADER_FRAGMENT: case PIPE_SHADER_VERTEX: - case PIPE_SHADER_GEOMETRY: break; + case PIPE_SHADER_GEOMETRY: default: return 0; } @@ -158,6 +161,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return 64 / 4; case PIPE_SHADER_CAP_MAX_CONSTS: return 65536 / 16; + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: /* 16 - 1, but not implemented */ + return 1; case PIPE_SHADER_CAP_MAX_ADDRS: /* no spilling atm */ return 1; case PIPE_SHADER_CAP_MAX_PREDS: /* not yet handled */ @@ -222,6 +227,36 @@ nv50_screen_destroy(struct pipe_screen *pscreen) OUT_RELOC(ch, bo, (n << 18) | (gr->subc << 13) | m, fl, 0, 0) void +nv50_screen_reloc_constbuf(struct nv50_screen *screen, unsigned cbi) +{ + struct nouveau_bo *bo; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *tesla = screen->tesla; + unsigned size; + const unsigned rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY; + + switch (cbi) { + case NV50_CB_PMISC: + bo = screen->constbuf_misc[0]; + size = 0x200; + break; + case NV50_CB_PVP: + case NV50_CB_PFP: + case NV50_CB_PGP: + bo = screen->constbuf_parm[cbi - NV50_CB_PVP]; + size = 0; + break; + default: + return; + } + + BGN_RELOC (chan, bo, tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3, rl); + OUT_RELOCh(chan, bo, 0, rl); + OUT_RELOCl(chan, bo, 0, rl); + OUT_RELOC (chan, bo, (cbi << 16) | size, rl, 0, 0); +} + +void nv50_screen_relocs(struct nv50_screen *screen) { struct nouveau_channel *chan = screen->base.channel; @@ -243,12 +278,7 @@ nv50_screen_relocs(struct nv50_screen *screen) OUT_RELOCh(chan, screen->tsc, 0, rl); OUT_RELOCl(chan, screen->tsc, 0, rl); - BGN_RELOC (chan, screen->constbuf_misc[0], - tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3, rl); - OUT_RELOCh(chan, screen->constbuf_misc[0], 0, rl); - OUT_RELOCl(chan, screen->constbuf_misc[0], 0, rl); - OUT_RELOC (chan, screen->constbuf_misc[0], - (NV50_CB_PMISC << 16) | 0x0200, rl, 0, 0); + nv50_screen_reloc_constbuf(screen, NV50_CB_PMISC); BGN_RELOC (chan, screen->constbuf_misc[0], tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3, rl); @@ -257,14 +287,21 @@ nv50_screen_relocs(struct nv50_screen *screen) OUT_RELOC (chan, screen->constbuf_misc[0], (NV50_CB_AUX << 16) | 0x0200, rl, 0, 0); - for (i = 0; i < 3; ++i) { - BGN_RELOC (chan, screen->constbuf_parm[i], - tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3, rl); - OUT_RELOCh(chan, screen->constbuf_parm[i], 0, rl); - OUT_RELOCl(chan, screen->constbuf_parm[i], 0, rl); - OUT_RELOC (chan, screen->constbuf_parm[i], - ((NV50_CB_PVP + i) << 16) | 0x0000, rl, 0, 0); - } + for (i = 0; i < 3; ++i) + nv50_screen_reloc_constbuf(screen, NV50_CB_PVP + i); + + BGN_RELOC (chan, screen->stack_bo, + tesla, NV50TCL_STACK_ADDRESS_HIGH, 2, rl); + OUT_RELOCh(chan, screen->stack_bo, 0, rl); + OUT_RELOCl(chan, screen->stack_bo, 0, rl); + + if (!screen->cur_ctx->req_lmem) + return; + + BGN_RELOC (chan, screen->local_bo, + tesla, NV50TCL_LOCAL_ADDRESS_HIGH, 2, rl); + OUT_RELOCh(chan, screen->local_bo, 0, rl); + OUT_RELOCl(chan, screen->local_bo, 0, rl); } #ifndef NOUVEAU_GETPARAM_GRAPH_UNITS @@ -425,6 +462,9 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) BEGIN_RING(chan, screen->tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1); OUT_RING (chan, 8); + BEGIN_RING(chan, screen->tesla, NV50TCL_CLEAR_FLAGS, 1); + OUT_RING (chan, NV50TCL_CLEAR_FLAGS_D3D); + /* constant buffers for immediates and VP/FP parameters */ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (32 * 4) * 4, &screen->constbuf_misc[0]); diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index ad6bdeb27c8..6e15230b486 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -39,6 +39,8 @@ nv50_screen(struct pipe_screen *screen) extern void nv50_screen_relocs(struct nv50_screen *); +extern void nv50_screen_reloc_constbuf(struct nv50_screen *, unsigned cbi); + struct nv50_format { uint32_t rt; uint32_t tic; diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c index 8c1a5999cfe..6c41e8f4561 100644 --- a/src/gallium/drivers/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nv50/nv50_shader_state.c @@ -47,10 +47,17 @@ nv50_transfer_constbuf(struct nv50_context *nv50, start = 0; while (count) { - unsigned nr = count; - nr = MIN2(nr, 2047); + unsigned nr = AVAIL_RING(chan); + + if (nr < 8) { + FIRE_RING(chan); + continue; + } + nr = MIN2(count, nr - 7); + nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN); + + nv50_screen_reloc_constbuf(nv50->screen, cbi); - /* FIXME: emit relocs for unsuiTed MM */ BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1); OUT_RING (chan, (start << 8) | cbi); BEGIN_RING_NI(chan, tesla, NV50TCL_CB_DATA(0), nr); @@ -77,8 +84,16 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) unsigned start = 0; while (count) { - unsigned nr = count; - nr = MIN2(nr, 2047); + unsigned nr = AVAIL_RING(chan); + + if (nr < 8) { + FIRE_RING(chan); + continue; + } + nr = MIN2(count, nr - 7); + nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN); + + nv50_screen_reloc_constbuf(nv50->screen, NV50_CB_PMISC); BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1); OUT_RING (chan, (start << 8) | NV50_CB_PMISC); @@ -111,8 +126,7 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) break; default: assert(0); - cbi = 0; - break; + return; } nv50_transfer_constbuf(nv50, nv50->constbuf[p->type], p->parm_size, cbi); @@ -281,6 +295,17 @@ nv50_program_validate(struct nv50_program *p) return p->translated; } +static INLINE void +nv50_program_validate_common(struct nv50_context *nv50, struct nv50_program *p) +{ + nv50_program_validate_code(nv50, p); + + if (p->uses_lmem) + nv50->req_lmem |= 1 << p->type; + else + nv50->req_lmem &= ~(1 << p->type); +} + struct nouveau_stateobj * nv50_vertprog_validate(struct nv50_context *nv50) { @@ -300,7 +325,7 @@ nv50_vertprog_validate(struct nv50_context *nv50) if (!(nv50->dirty & NV50_NEW_VERTPROG)) return NULL; - nv50_program_validate_code(nv50, p); + nv50_program_validate_common(nv50, p); so_ref(p->so, &so); return so; @@ -325,7 +350,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) if (!(nv50->dirty & NV50_NEW_FRAGPROG)) return NULL; - nv50_program_validate_code(nv50, p); + nv50_program_validate_common(nv50, p); so_ref(p->so, &so); return so; @@ -337,6 +362,10 @@ nv50_geomprog_validate(struct nv50_context *nv50) struct nv50_program *p = nv50->geomprog; struct nouveau_stateobj *so = NULL; + /* GP may be NULL, but VP and FP may not */ + if (!p) + return NULL; /* GP is deactivated in linkage validation */ + if (!p->translated) { if (nv50_program_validate(p)) nv50_gp_update_stateobj(nv50, p); @@ -350,7 +379,7 @@ nv50_geomprog_validate(struct nv50_context *nv50) if (!(nv50->dirty & NV50_NEW_GEOMPROG)) return NULL; - nv50_program_validate_code(nv50, p); + nv50_program_validate_common(nv50, p); so_ref(p->so, &so); return so; diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 3afce06557a..f42fa2d4d2b 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -663,7 +663,7 @@ nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv50_context *nv50 = nv50_context(pipe); - nv50->fragprog = hwcso; + nv50->geomprog = hwcso; nv50->dirty |= NV50_NEW_GEOMPROG; } diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index f1d8202dffa..16c2dab9af6 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -333,7 +333,7 @@ struct state_validate { { validate_vtxbuf , NV50_NEW_ARRAYS }, { validate_vtxattr , NV50_NEW_ARRAYS }, { validate_clip , NV50_NEW_CLIP }, - {} + { NULL , 0 } }; #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 3e61203adff..f70c138fe1a 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -52,48 +52,26 @@ nv50_2d_format_faithful(enum pipe_format format) } } -static INLINE int -nv50_format(enum pipe_format format) +static INLINE uint8_t +nv50_2d_format(enum pipe_format format) { - switch (format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - return NV50_2D_DST_FORMAT_A8R8G8B8_UNORM; - case PIPE_FORMAT_B8G8R8X8_UNORM: - return NV50_2D_DST_FORMAT_X8R8G8B8_UNORM; - case PIPE_FORMAT_B8G8R8A8_SRGB: - return NV50_2D_DST_FORMAT_A8R8G8B8_SRGB; - case PIPE_FORMAT_B8G8R8X8_SRGB: - return NV50_2D_DST_FORMAT_X8R8G8B8_SRGB; - case PIPE_FORMAT_B5G6R5_UNORM: - return NV50_2D_DST_FORMAT_R5G6B5_UNORM; - case PIPE_FORMAT_B5G5R5A1_UNORM: - return NV50_2D_DST_FORMAT_A1R5G5B5_UNORM; - case PIPE_FORMAT_B10G10R10A2_UNORM: - return NV50_2D_DST_FORMAT_A2R10G10B10_UNORM; - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_I8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_R8_UNORM: + uint8_t id = nv50_format_table[format].rt; + + /* Hardware values for color formats range from 0xc0 to 0xff, + * but the 2D engine doesn't support all of them. + */ + if ((id >= 0xc0) && (0xff0843e080608409ULL & (1ULL << (id - 0xc0)))) + return id; + + switch (util_format_get_blocksize(format)) { + case 1: return NV50_2D_DST_FORMAT_R8_UNORM; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - return NV50_2D_DST_FORMAT_R32G32B32A32_FLOAT; - case PIPE_FORMAT_R32G32B32_FLOAT: - return NV50_2D_DST_FORMAT_R32G32B32X32_FLOAT; - case PIPE_FORMAT_Z32_FLOAT: - return NV50_2D_DST_FORMAT_R32_FLOAT; - - /* only because we require src format == dst format: */ - case PIPE_FORMAT_R16G16_SNORM: - case PIPE_FORMAT_R16G16_UNORM: - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return NV50_2D_DST_FORMAT_A8R8G8B8_UNORM; - case PIPE_FORMAT_L8A8_UNORM: - case PIPE_FORMAT_B4G4R4A4_UNORM: + case 2: return NV50_2D_DST_FORMAT_R16_UNORM; - + case 4: + return NV50_2D_DST_FORMAT_A8R8G8B8_UNORM; default: - return -1; + return 0; } } @@ -107,14 +85,14 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) int format, mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT; int flags = NOUVEAU_BO_VRAM | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD); - format = nv50_format(ps->format); - if (format < 0) { + format = nv50_2d_format(ps->format); + if (!format) { NOUVEAU_ERR("invalid/unsupported surface format: %s\n", util_format_name(ps->format)); return 1; } - if (!bo->tile_flags) { + if (!nouveau_bo_tile_layout(bo)) { BEGIN_RING(chan, eng2d, mthd, 2); OUT_RING (chan, format); OUT_RING (chan, 1); @@ -221,7 +199,6 @@ nv50_surface_copy(struct pipe_context *pipe, nv50_miptree_surface_del(ps_dst); } -/* XXX this should probably look more along the lines of nv50_clear */ static void nv50_clear_render_target(struct pipe_context *pipe, struct pipe_surface *dst, @@ -231,34 +208,99 @@ nv50_clear_render_target(struct pipe_context *pipe, { struct nv50_context *nv50 = nv50_context(pipe); struct nv50_screen *screen = nv50->screen; - struct nouveau_channel *chan = screen->eng2d->channel; - struct nouveau_grobj *eng2d = screen->eng2d; - int format, ret; - union util_color uc; - util_pack_color(rgba, dst->format, &uc); + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *tesla = screen->tesla; + struct nv50_miptree *mt = nv50_miptree(dst->texture); + struct nouveau_bo *bo = mt->base.bo; - format = nv50_format(dst->format); - if (format < 0) - return; + BEGIN_RING(chan, tesla, NV50TCL_CLEAR_COLOR(0), 4); + OUT_RINGf (chan, rgba[0]); + OUT_RINGf (chan, rgba[1]); + OUT_RINGf (chan, rgba[2]); + OUT_RINGf (chan, rgba[3]); - ret = MARK_RING (chan, 16 + 32, 2); - if (ret) + if (MARK_RING(chan, 18, 2)) return; - ret = nv50_surface_set(screen, dst, 1); - if (ret) + BEGIN_RING(chan, tesla, NV50TCL_RT_CONTROL, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, tesla, NV50TCL_RT_ADDRESS_HIGH(0), 5); + OUT_RELOCh(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RING (chan, nv50_format_table[dst->format].rt); + OUT_RING (chan, mt->level[dst->level].tile_mode << 4); + OUT_RING (chan, 0); + BEGIN_RING(chan, tesla, NV50TCL_RT_HORIZ(0), 2); + OUT_RING (chan, dst->width); + OUT_RING (chan, dst->height); + BEGIN_RING(chan, tesla, NV50TCL_RT_ARRAY_MODE, 1); + OUT_RING (chan, 1); + + /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */ + + BEGIN_RING(chan, tesla, NV50TCL_VIEWPORT_HORIZ(0), 2); + OUT_RING (chan, (width << 16) | dstx); + OUT_RING (chan, (height << 16) | dsty); + + BEGIN_RING(chan, tesla, NV50TCL_CLEAR_BUFFERS, 1); + OUT_RING (chan, 0x3c); + + nv50->dirty |= NV50_NEW_FRAMEBUFFER; +} + +static void +nv50_clear_depth_stencil(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_screen *screen = nv50->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *tesla = screen->tesla; + struct nv50_miptree *mt = nv50_miptree(dst->texture); + struct nouveau_bo *bo = mt->base.bo; + uint32_t mode = 0; + + if (clear_flags & PIPE_CLEAR_DEPTH) { + BEGIN_RING(chan, tesla, NV50TCL_CLEAR_DEPTH, 1); + OUT_RINGf (chan, depth); + mode |= NV50TCL_CLEAR_BUFFERS_Z; + } + + if (clear_flags & PIPE_CLEAR_STENCIL) { + BEGIN_RING(chan, tesla, NV50TCL_CLEAR_STENCIL, 1); + OUT_RING (chan, stencil & 0xff); + mode |= NV50TCL_CLEAR_BUFFERS_S; + } + + if (MARK_RING(chan, 17, 2)) return; - BEGIN_RING(chan, eng2d, NV50_2D_DRAW_SHAPE, 3); - OUT_RING (chan, NV50_2D_DRAW_SHAPE_RECTANGLES); - OUT_RING (chan, format); - OUT_RING (chan, uc.ui); - BEGIN_RING(chan, eng2d, NV50_2D_DRAW_POINT32_X(0), 4); - OUT_RING (chan, dstx); - OUT_RING (chan, dsty); - OUT_RING (chan, width); - OUT_RING (chan, height); + BEGIN_RING(chan, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5); + OUT_RELOCh(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RING (chan, nv50_format_table[dst->format].rt); + OUT_RING (chan, mt->level[dst->level].tile_mode << 4); + OUT_RING (chan, 0); + BEGIN_RING(chan, tesla, NV50TCL_ZETA_ENABLE, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, tesla, NV50TCL_ZETA_HORIZ, 3); + OUT_RING (chan, dst->width); + OUT_RING (chan, dst->height); + OUT_RING (chan, (1 << 16) | 1); + + BEGIN_RING(chan, tesla, NV50TCL_VIEWPORT_HORIZ(0), 2); + OUT_RING (chan, (width << 16) | dstx); + OUT_RING (chan, (height << 16) | dsty); + + BEGIN_RING(chan, tesla, NV50TCL_CLEAR_BUFFERS, 1); + OUT_RING (chan, mode); + nv50->dirty |= NV50_NEW_FRAMEBUFFER; } void @@ -266,6 +308,7 @@ nv50_init_surface_functions(struct nv50_context *nv50) { nv50->pipe.resource_copy_region = nv50_surface_copy; nv50->pipe.clear_render_target = nv50_clear_render_target; + nv50->pipe.clear_depth_stencil = nv50_clear_depth_stencil; } diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index f973cf24b98..0cc2f4a837f 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -45,7 +45,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, WAIT_RING (chan, 14); - if (!src_bo->tile_flags) { + if (!nouveau_bo_tile_layout(src_bo)) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 1); OUT_RING (chan, 1); @@ -64,7 +64,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, OUT_RING (chan, sz); /* copying only 1 zslice per call */ } - if (!dst_bo->tile_flags) { + if (!nouveau_bo_tile_layout(dst_bo)) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 1); OUT_RING (chan, 1); @@ -95,14 +95,14 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2); OUT_RELOCl(chan, src_bo, src_offset, src_reloc); OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc); - if (src_bo->tile_flags) { + if (nouveau_bo_tile_layout(src_bo)) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN, 1); OUT_RING (chan, (sy << 16) | (sx * cpp)); } else { src_offset += (line_count * src_pitch); } - if (dst_bo->tile_flags) { + if (nouveau_bo_tile_layout(dst_bo)) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT, 1); OUT_RING (chan, (dy << 16) | (dx * cpp)); @@ -280,7 +280,7 @@ nv50_upload_sifc(struct nv50_context *nv50, MARK_RING (chan, 32, 2); /* flush on lack of space or relocs */ - if (bo->tile_flags) { + if (nouveau_bo_tile_layout(bo)) { BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 5); OUT_RING (chan, dst_format); OUT_RING (chan, 0); diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c index d97cab8db19..13e8beed479 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragprog.c +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -1235,10 +1235,9 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) struct nouveau_channel* chan = nvfx->screen->base.channel; struct nvfx_pipe_fragment_program *pfp = nvfx->fragprog; struct nvfx_vertex_program* vp; - /* Gallium always puts the point coord in GENERIC[0] - * TODO: this is wrong, Gallium needs to be fixed - */ - unsigned sprite_coord_enable = nvfx->rasterizer->pipe.point_quad_rasterization * (nvfx->rasterizer->pipe.sprite_coord_enable | 1); + + // TODO: the multiplication by point_quad_rasterization is probably superfluous + unsigned sprite_coord_enable = nvfx->rasterizer->pipe.point_quad_rasterization * nvfx->rasterizer->pipe.sprite_coord_enable; boolean emulate_sprite_flipping = sprite_coord_enable && nvfx->rasterizer->pipe.sprite_coord_mode; unsigned key = emulate_sprite_flipping; @@ -1297,7 +1296,7 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) unsigned used_texcoords = 0; for(unsigned i = 0; i < fp->num_slots; ++i) { unsigned generic = fp->slot_to_generic[i]; - if(!((1 << generic) & sprite_coord_enable)) + if((generic < 32) && !((1 << generic) & sprite_coord_enable)) { unsigned char slot_mask = vp->generic_to_fp_input[generic]; if(slot_mask >= 0xf0) @@ -1320,7 +1319,7 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) for(i = 0; i < fp->num_slots; ++i) { unsigned generic = fp->slot_to_generic[i]; - if((1 << generic) & sprite_coord_enable) + if((generic < 32) && ((1 << generic) & sprite_coord_enable)) { if(fp->slot_to_fp_input[i] != sprite_reloc_input) goto update_slots; @@ -1346,7 +1345,7 @@ update_slots: for(; i < fp->num_slots; ++i) { unsigned generic = fp->slot_to_generic[i]; - if((1 << generic) & sprite_coord_enable) + if((generic < 32) && ((1 << generic) & sprite_coord_enable)) fp->slot_to_fp_input[i] = sprite_reloc_input; else fp->slot_to_fp_input[i] = vp->generic_to_fp_input[generic] & 0xf; @@ -1559,7 +1558,7 @@ nvfx_fragprog_destroy(struct nvfx_context *nvfx, struct nvfx_fragment_program_bo* next = fpbo->next; nouveau_bo_unmap(fpbo->bo); nouveau_bo_ref(0, &fpbo->bo); - free(fpbo); + os_free_aligned(fpbo); fpbo = next; } while(fpbo != fp->fpbo); diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index 3f177b7ed07..8024800bd09 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -194,6 +194,8 @@ nvfx_screen_is_format_supported(struct pipe_screen *pscreen, switch (format) { case PIPE_FORMAT_B8G8R8A8_UNORM: case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: case PIPE_FORMAT_B5G6R5_UNORM: break; case PIPE_FORMAT_R16G16B16A16_FLOAT: diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c index b767846a99e..54619037d82 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.c +++ b/src/gallium/drivers/nvfx/nvfx_state.c @@ -305,7 +305,7 @@ nvfx_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, struct nvfx_context *nvfx = nvfx_context(pipe); nvfx->constbuf[shader] = buf; - nvfx->constbuf_nr[shader] = buf->width0 / (4 * sizeof(float)); + nvfx->constbuf_nr[shader] = buf ? (buf->width0 / (4 * sizeof(float))) : 0; if (shader == PIPE_SHADER_VERTEX) { nvfx->dirty |= NVFX_NEW_VERTCONST; diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c index 4ffc4de4520..30e48c80735 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_fb.c +++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c @@ -140,6 +140,12 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result) case 0: rt_format |= NV30_3D_RT_FORMAT_COLOR_A8R8G8B8; break; + case PIPE_FORMAT_R8G8B8X8_UNORM: + rt_format |= NV30_3D_RT_FORMAT_COLOR_X8B8G8R8; + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + rt_format |= NV30_3D_RT_FORMAT_COLOR_A8B8G8R8; + break; case PIPE_FORMAT_B5G6R5_UNORM: rt_format |= NV30_3D_RT_FORMAT_COLOR_R5G6B5; break; diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c index 23f045ecf6c..e543fda50ef 100644 --- a/src/gallium/drivers/nvfx/nvfx_vertprog.c +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c @@ -377,6 +377,8 @@ tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) { src.swz[2] = fsrc->Register.SwizzleZ; src.swz[3] = fsrc->Register.SwizzleW; src.indirect = 0; + src.indirect_reg = 0; + src.indirect_swz = 0; if(fsrc->Register.Indirect) { if(fsrc->Indirect.File == TGSI_FILE_ADDRESS && @@ -973,7 +975,7 @@ nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc) DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", FALSE) static struct nvfx_vertex_program* -nvfx_vertprog_translate(struct nvfx_context *nvfx, const struct pipe_shader_state* vps, const struct tgsi_shader_info* info) +nvfx_vertprog_translate(struct nvfx_context *nvfx, const struct pipe_shader_state* vps, struct tgsi_shader_info* info) { struct tgsi_parse_context parse; struct nvfx_vertex_program* vp = NULL; diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index 728bc40a5bb..66d900ebb5f 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -39,5 +39,6 @@ EXTRA_OBJECTS = \ include ../../Makefile.template +.PHONY: $(COMPILER_ARCHIVE) $(COMPILER_ARCHIVE): $(MAKE) -C $(TOP)/src/mesa/drivers/dri/r300/compiler diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 7f655dbfd21..b59bc002610 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -125,6 +125,8 @@ struct r300_gpu_flush { uint32_t cb_flush_clean[6]; }; +#define RS_STATE_MAIN_SIZE 23 + struct r300_rs_state { /* Original rasterizer state. */ struct pipe_rasterizer_state rs; @@ -132,7 +134,7 @@ struct r300_rs_state { struct pipe_rasterizer_state rs_draw; /* Command buffers. */ - uint32_t cb_main[25]; + uint32_t cb_main[RS_STATE_MAIN_SIZE]; uint32_t cb_poly_offset_zb16[5]; uint32_t cb_poly_offset_zb24[5]; @@ -150,6 +152,7 @@ struct r300_rs_block { uint32_t vap_vtx_state_cntl; /* R300_VAP_VTX_STATE_CNTL: 0x2180 */ uint32_t vap_vsm_vtx_assm; /* R300_VAP_VSM_VTX_ASSM: 0x2184 */ uint32_t vap_out_vtx_fmt[2]; /* R300_VAP_OUTPUT_VTX_FMT_[0-1]: 0x2090 */ + uint32_t gb_enable; uint32_t ip[8]; /* R300_RS_IP_[0-7], R500_RS_IP_[0-7] */ uint32_t count; /* R300_RS_COUNT */ @@ -162,7 +165,6 @@ struct r300_sampler_state { uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */ uint32_t filter1; /* R300_TX_FILTER1: 0x4440 */ - uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */ /* Min/max LOD must be clamped to [0, last_level], thus * it's dependent on a currently bound texture */ @@ -334,6 +336,13 @@ struct r300_texture_desc { /* Parent class. */ struct u_resource b; + /* Width, height, and depth. + * Most of the time, these are equal to pipe_texture::width0, height0, + * and depth0. However, NPOT 3D textures must have dimensions aligned + * to POT, and this is the only case when these variables differ from + * pipe_texture. */ + unsigned width0, height0, depth0; + /* Buffer tiling. * Macrotiling is specified per-level because small mipmaps cannot * be macrotiled. */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index b2b34c3efcb..3a1085d2dc5 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -89,7 +89,7 @@ static const float * get_rc_constant_state( { struct r300_textures_state* texstate = r300->textures_state.state; static float vec[4] = { 0.0, 0.0, 0.0, 1.0 }; - struct pipe_resource *tex; + struct r300_texture *tex; assert(constant->Type == RC_CONSTANT_STATE); @@ -97,9 +97,17 @@ static const float * get_rc_constant_state( /* Factor for converting rectangle coords to * normalized coords. Should only show up on non-r500. */ case RC_STATE_R300_TEXRECT_FACTOR: - tex = texstate->sampler_views[constant->u.State[1]]->base.texture; - vec[0] = 1.0 / tex->width0; - vec[1] = 1.0 / tex->height0; + tex = r300_texture(texstate->sampler_views[constant->u.State[1]]->base.texture); + vec[0] = 1.0 / tex->desc.width0; + vec[1] = 1.0 / tex->desc.height0; + break; + + case RC_STATE_R300_TEXSCALE_FACTOR: + tex = r300_texture(texstate->sampler_views[constant->u.State[1]]->base.texture); + /* Add a small number to the texture size to work around rounding errors in hw. */ + vec[0] = tex->desc.b.b.width0 / (tex->desc.width0 + 0.001f); + vec[1] = tex->desc.b.b.height0 / (tex->desc.height0 + 0.001f); + vec[2] = tex->desc.b.b.depth0 / (tex->desc.depth0 + 0.001f); break; case RC_STATE_R300_VIEWPORT_SCALE: @@ -667,7 +675,7 @@ void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state) CS_LOCALS(r300); BEGIN_CS(size); - OUT_CS_TABLE(rs->cb_main, 25); + OUT_CS_TABLE(rs->cb_main, RS_STATE_MAIN_SIZE); if (rs->polygon_offset_enable) { if (r300->zbuffer_bpp == 16) { OUT_CS_TABLE(rs->cb_poly_offset_zb16, 5); @@ -709,6 +717,8 @@ void r300_emit_rs_block_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); OUT_CS(rs->vap_out_vtx_fmt[0]); OUT_CS(rs->vap_out_vtx_fmt[1]); + OUT_CS_REG_SEQ(R300_GB_ENABLE, 1); + OUT_CS(rs->gb_enable); if (r300->screen->caps.is_r500) { OUT_CS_REG_SEQ(R500_RS_IP_0, count); diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index b8dab88ef09..d9d4a9304df 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -150,12 +150,16 @@ static void get_external_state( unsigned char *swizzle; for (i = 0; i < texstate->sampler_state_count; i++) { - struct r300_sampler_state* s = texstate->sampler_states[i]; + struct r300_sampler_state *s = texstate->sampler_states[i]; + struct r300_sampler_view *v = texstate->sampler_views[i]; + struct r300_texture *t; - if (!s) { + if (!s || !v) { continue; } + t = r300_texture(texstate->sampler_views[i]->base.texture); + if (s->state.compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { state->unit[i].compare_mode_enabled = 1; @@ -176,35 +180,29 @@ static void get_external_state( state->unit[i].non_normalized_coords = !s->state.normalized_coords; - if (texstate->sampler_views[i]) { - struct r300_texture *t; - t = (struct r300_texture*)texstate->sampler_views[i]->base.texture; - - /* XXX this should probably take into account STR, not just S. */ - if (t->desc.is_npot) { - switch (s->state.wrap_s) { - case PIPE_TEX_WRAP_REPEAT: - state->unit[i].wrap_mode = RC_WRAP_REPEAT; - state->unit[i].fake_npot = TRUE; - break; - - case PIPE_TEX_WRAP_MIRROR_REPEAT: - state->unit[i].wrap_mode = RC_WRAP_MIRRORED_REPEAT; - state->unit[i].fake_npot = TRUE; - break; - - case PIPE_TEX_WRAP_MIRROR_CLAMP: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - state->unit[i].wrap_mode = RC_WRAP_MIRRORED_CLAMP; - state->unit[i].fake_npot = TRUE; - break; - - default: - state->unit[i].wrap_mode = RC_WRAP_NONE; - break; - } + /* XXX this should probably take into account STR, not just S. */ + if (t->desc.is_npot) { + switch (s->state.wrap_s) { + case PIPE_TEX_WRAP_REPEAT: + state->unit[i].wrap_mode = RC_WRAP_REPEAT; + break; + + case PIPE_TEX_WRAP_MIRROR_REPEAT: + state->unit[i].wrap_mode = RC_WRAP_MIRRORED_REPEAT; + break; + + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + state->unit[i].wrap_mode = RC_WRAP_MIRRORED_CLAMP; + break; + + default: + state->unit[i].wrap_mode = RC_WRAP_NONE; } + + if (t->desc.b.b.target == PIPE_TEXTURE_3D) + state->unit[i].clamp_and_scale_before_fetch = TRUE; } } } diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 5b0121ce9e1..5f34fcb2744 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -158,7 +158,7 @@ static void r300_render_condition(struct pipe_context *pipe, uint mode) { struct r300_context *r300 = r300_context(pipe); - uint64_t result; + uint64_t result = 0; boolean wait; if (query) { @@ -167,9 +167,9 @@ static void r300_render_condition(struct pipe_context *pipe, if (!r300_get_query_result(pipe, query, wait, &result)) { r300->skip_rendering = FALSE; + } else { + r300->skip_rendering = result == 0; } - - r300->skip_rendering = result == 0; } else { r300->skip_rendering = FALSE; } diff --git a/src/gallium/drivers/r300/r300_render_translate.c b/src/gallium/drivers/r300/r300_render_translate.c index 0ea11e5bfc2..9247064508f 100644 --- a/src/gallium/drivers/r300/r300_render_translate.c +++ b/src/gallium/drivers/r300/r300_render_translate.c @@ -29,6 +29,7 @@ #include "r300_context.h" #include "translate/translate.h" +#include "util/u_index_modify.h" void r300_begin_vertex_translate(struct r300_context *r300) { @@ -188,111 +189,6 @@ void r300_end_vertex_translate(struct r300_context *r300) NULL); } -static void r300_shorten_ubyte_elts(struct r300_context* r300, - struct pipe_resource** elts, - int index_bias, - unsigned start, - unsigned count) -{ - struct pipe_context* context = &r300->context; - struct pipe_screen* screen = r300->context.screen; - struct pipe_resource* new_elts; - unsigned char *in_map; - unsigned short *out_map; - struct pipe_transfer *src_transfer, *dst_transfer; - unsigned i; - - new_elts = pipe_buffer_create(screen, - PIPE_BIND_INDEX_BUFFER, - 2 * count); - - in_map = pipe_buffer_map(context, *elts, PIPE_TRANSFER_READ, &src_transfer); - out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &dst_transfer); - - in_map += start; - - for (i = 0; i < count; i++) { - *out_map = (unsigned short)(*in_map + index_bias); - in_map++; - out_map++; - } - - pipe_buffer_unmap(context, *elts, src_transfer); - pipe_buffer_unmap(context, new_elts, dst_transfer); - - *elts = new_elts; -} - -static void r300_rebuild_ushort_elts(struct r300_context *r300, - struct pipe_resource **elts, - int index_bias, - unsigned start, unsigned count) -{ - struct pipe_context *context = &r300->context; - struct pipe_transfer *in_transfer = NULL; - struct pipe_transfer *out_transfer = NULL; - struct pipe_resource *new_elts; - unsigned short *in_map; - unsigned short *out_map; - unsigned i; - - new_elts = pipe_buffer_create(context->screen, - PIPE_BIND_INDEX_BUFFER, - 2 * count); - - in_map = pipe_buffer_map(context, *elts, - PIPE_TRANSFER_READ, &in_transfer); - out_map = pipe_buffer_map(context, new_elts, - PIPE_TRANSFER_WRITE, &out_transfer); - - in_map += start; - for (i = 0; i < count; i++) { - *out_map = (unsigned short)(*in_map + index_bias); - in_map++; - out_map++; - } - - pipe_buffer_unmap(context, *elts, in_transfer); - pipe_buffer_unmap(context, new_elts, out_transfer); - - *elts = new_elts; -} - -static void r300_rebuild_uint_elts(struct r300_context *r300, - struct pipe_resource **elts, - int index_bias, - unsigned start, unsigned count) -{ - struct pipe_context *context = &r300->context; - struct pipe_transfer *in_transfer = NULL; - struct pipe_transfer *out_transfer = NULL; - struct pipe_resource *new_elts; - unsigned int *in_map; - unsigned int *out_map; - unsigned i; - - new_elts = pipe_buffer_create(context->screen, - PIPE_BIND_INDEX_BUFFER, - 2 * count); - - in_map = pipe_buffer_map(context, *elts, - PIPE_TRANSFER_READ, &in_transfer); - out_map = pipe_buffer_map(context, new_elts, - PIPE_TRANSFER_WRITE, &out_transfer); - - in_map += start; - for (i = 0; i < count; i++) { - *out_map = (unsigned int)(*in_map + index_bias); - in_map++; - out_map++; - } - - pipe_buffer_unmap(context, *elts, in_transfer); - pipe_buffer_unmap(context, new_elts, out_transfer); - - *elts = new_elts; -} - void r300_translate_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned *index_size, unsigned index_offset, @@ -300,21 +196,21 @@ void r300_translate_index_buffer(struct r300_context *r300, { switch (*index_size) { case 1: - r300_shorten_ubyte_elts(r300, index_buffer, index_offset, *start, count); + util_shorten_ubyte_elts(&r300->context, index_buffer, index_offset, *start, count); *index_size = 2; *start = 0; break; case 2: if (*start % 2 != 0 || index_offset) { - r300_rebuild_ushort_elts(r300, index_buffer, index_offset, *start, count); + util_rebuild_ushort_elts(&r300->context, index_buffer, index_offset, *start, count); *start = 0; } break; case 4: if (index_offset) { - r300_rebuild_uint_elts(r300, index_buffer, index_offset, *start, count); + util_rebuild_uint_elts(&r300->context, index_buffer, index_offset, *start, count); *start = 0; } break; diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 7f41ff0e2ec..b448924f85e 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -124,6 +124,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_FUNC: case PIPE_CAP_DEPTH_CLAMP: /* XXX implemented, but breaks Regnum Online */ case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: + case PIPE_CAP_SHADER_STENCIL_EXPORT: return 0; /* Texturing. */ diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 8ccb63964e7..f2479a994c8 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -922,7 +922,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe, const struct pipe_rasterizer_state* state) { struct r300_rs_state* rs = CALLOC_STRUCT(r300_rs_state); - int i; float psiz; uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ @@ -935,10 +934,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe, uint32_t polygon_mode; /* R300_GA_POLY_MODE: 0x4288 */ uint32_t clip_rule; /* R300_SC_CLIP_RULE: 0x43D0 */ - /* Specifies top of Raster pipe specific enable controls, - * i.e. texture coordinates stuffing for points, lines, triangles */ - uint32_t stuffing_enable; /* R300_GB_ENABLE: 0x4008 */ - /* Point sprites texture coordinates, 0: lower left, 1: upper right */ float point_texcoord_left = 0; /* R300_GA_POINT_S0: 0x4200 */ float point_texcoord_bottom = 0;/* R300_GA_POINT_T0: 0x4204 */ @@ -950,10 +945,8 @@ static void* r300_create_rs_state(struct pipe_context* pipe, rs->rs = *state; rs->rs_draw = *state; - /* Generate point sprite texture coordinates in GENERIC0 - * if point_quad_rasterization is TRUE. */ rs->rs.sprite_coord_enable = state->point_quad_rasterization * - (state->sprite_coord_enable | 1); + state->sprite_coord_enable; /* Override some states for Draw. */ rs->rs_draw.sprite_coord_enable = 0; /* We can do this in HW. */ @@ -1054,16 +1047,8 @@ static void* r300_create_rs_state(struct pipe_context* pipe, clip_rule = state->scissor ? 0xAAAA : 0xFFFF; - /* Point sprites */ - stuffing_enable = 0; + /* Point sprites coord mode */ if (rs->rs.sprite_coord_enable) { - stuffing_enable = R300_GB_POINT_STUFF_ENABLE; - for (i = 0; i < 8; i++) { - if (rs->rs.sprite_coord_enable & (1 << i)) - stuffing_enable |= - R300_GB_TEX_ST << (R300_GB_TEX0_SOURCE_SHIFT + (i*2)); - } - switch (state->sprite_coord_mode) { case PIPE_SPRITE_COORD_UPPER_LEFT: point_texcoord_top = 0.0f; @@ -1077,7 +1062,7 @@ static void* r300_create_rs_state(struct pipe_context* pipe, } /* Build the main command buffer. */ - BEGIN_CB(rs->cb_main, 25); + BEGIN_CB(rs->cb_main, RS_STATE_MAIN_SIZE); OUT_CB_REG(R300_VAP_CNTL_STATUS, vap_control_status); OUT_CB_REG(R300_GA_POINT_SIZE, point_size); OUT_CB_REG_SEQ(R300_GA_POINT_MINMAX, 2); @@ -1091,7 +1076,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe, OUT_CB_REG(R300_GA_LINE_STIPPLE_VALUE, line_stipple_value); OUT_CB_REG(R300_GA_POLY_MODE, polygon_mode); OUT_CB_REG(R300_SC_CLIP_RULE, clip_rule); - OUT_CB_REG(R300_GB_ENABLE, stuffing_enable); OUT_CB_REG_SEQ(R300_GA_POINT_S0, 4); OUT_CB_32F(point_texcoord_left); OUT_CB_32F(point_texcoord_bottom); @@ -1149,7 +1133,7 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) } UPDATE_STATE(state, r300->rs_state); - r300->rs_state.size = 25 + (r300->polygon_offset_enabled ? 5 : 0); + r300->rs_state.size = RS_STATE_MAIN_SIZE + (r300->polygon_offset_enabled ? 5 : 0); if (last_sprite_coord_enable != r300->sprite_coord_enable || last_two_sided_color != r300->two_sided_color) { @@ -1171,7 +1155,6 @@ static void* struct r300_sampler_state* sampler = CALLOC_STRUCT(r300_sampler_state); boolean is_r500 = r300->screen->caps.is_r500; int lod_bias; - union util_color uc; sampler->state = *state; @@ -1228,9 +1211,6 @@ static void* sampler->filter1 |= r500_anisotropy(state->max_anisotropy); } - util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - sampler->border_color = uc.ui; - /* R500-specific fixups and optimizations */ if (r300->screen->caps.is_r500) { sampler->filter1 |= R500_BORDER_FIX; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index f9a516825df..904736ef06d 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -324,6 +324,7 @@ static void r300_update_rs_block(struct r300_context *r300) boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || vs_outputs->bcolor[1] != ATTR_UNUSED; int *stream_loc_notcl = r300->stream_loc_notcl; + uint32_t stuffing_enable = 0; if (r300->screen->caps.is_r500) { rX00_rs_col = r500_rs_col; @@ -436,7 +437,11 @@ static void r300_update_rs_block(struct r300_context *r300) /* Rasterize texture coordinates. */ for (i = 0; i < ATTR_GENERIC_COUNT && tex_count < 8; i++) { - bool sprite_coord = !!(r300->sprite_coord_enable & (1 << i)); + bool sprite_coord = false; + + if (fs_inputs->generic[i] != ATTR_UNUSED) { + sprite_coord = !!(r300->sprite_coord_enable & (1 << i)); + } if (vs_outputs->generic[i] != ATTR_UNUSED || sprite_coord) { if (!sprite_coord) { @@ -444,7 +449,9 @@ static void r300_update_rs_block(struct r300_context *r300) rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count); rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count)); stream_loc_notcl[loc++] = 6 + tex_count; - } + } else + stuffing_enable |= + R300_GB_TEX_ST << (R300_GB_TEX0_SOURCE_SHIFT + (tex_count*2)); /* Rasterize it. */ rX00_rs_tex(&rs, tex_count, tex_ptr, @@ -456,8 +463,8 @@ static void r300_update_rs_block(struct r300_context *r300) fp_offset++; DBG(r300, DBG_RS, - "r300: Rasterized generic %i written to FS%s.\n", - i, sprite_coord ? " (sprite coord)" : ""); + "r300: Rasterized generic %i written to FS%s in texcoord %d.\n", + i, sprite_coord ? " (sprite coord)" : "", tex_count); } else { DBG(r300, DBG_RS, "r300: Rasterized generic %i unused%s.\n", @@ -560,13 +567,72 @@ static void r300_update_rs_block(struct r300_context *r300) count = MAX3(col_count, tex_count, 1); rs.inst_count = count - 1; + /* set the GB enable flags */ + if (r300->sprite_coord_enable) + stuffing_enable |= R300_GB_POINT_STUFF_ENABLE; + + rs.gb_enable = stuffing_enable; + /* Now, after all that, see if we actually need to update the state. */ if (memcmp(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block))) { memcpy(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block)); - r300->rs_block_state.size = 11 + count*2; + r300->rs_block_state.size = 13 + count*2; } } +static uint32_t r300_get_border_color(enum pipe_format format, + const float border[4]) +{ + const struct util_format_description *desc; + float border_swizzled[4] = { + border[2], + border[1], + border[0], + border[3] + }; + uint32_t r; + + desc = util_format_description(format); + + /* We don't use util_pack_format because it does not handle the formats + * we want, e.g. R4G4B4A4 is non-existent in Gallium. */ + switch (desc->channel[0].size) { + case 4: + r = ((float_to_ubyte(border_swizzled[0]) & 0xf0) >> 4) | + ((float_to_ubyte(border_swizzled[1]) & 0xf0) << 0) | + ((float_to_ubyte(border_swizzled[2]) & 0xf0) << 4) | + ((float_to_ubyte(border_swizzled[3]) & 0xf0) << 8); + break; + + case 5: + if (desc->channel[1].size == 5) { + r = ((float_to_ubyte(border_swizzled[0]) & 0xf8) >> 3) | + ((float_to_ubyte(border_swizzled[1]) & 0xf8) << 2) | + ((float_to_ubyte(border_swizzled[2]) & 0xf8) << 7) | + ((float_to_ubyte(border_swizzled[3]) & 0x80) << 8); + } else if (desc->channel[1].size == 6) { + r = ((float_to_ubyte(border_swizzled[0]) & 0xf8) >> 3) | + ((float_to_ubyte(border_swizzled[1]) & 0xfc) << 3) | + ((float_to_ubyte(border_swizzled[2]) & 0xf8) << 8); + } else { + assert(0); + r = 0; + } + break; + + default: + /* I think the fat formats (16, 32) are specified + * as the 8-bit ones. I am not sure how compressed formats + * work here. */ + r = ((float_to_ubyte(border_swizzled[0]) & 0xff) << 0) | + ((float_to_ubyte(border_swizzled[1]) & 0xff) << 8) | + ((float_to_ubyte(border_swizzled[2]) & 0xff) << 16) | + ((float_to_ubyte(border_swizzled[3]) & 0xff) << 24); + } + + return r; +} + static void r300_merge_textures_and_samplers(struct r300_context* r300) { struct r300_textures_state *state = @@ -599,7 +665,11 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) texstate->format = view->format; texstate->filter0 = sampler->filter0; texstate->filter1 = sampler->filter1; - texstate->border_color = sampler->border_color; + + /* Set the border color. */ + texstate->border_color = + r300_get_border_color(view->base.format, + sampler->state.border_color); /* determine min/max levels */ max_level = MIN3(sampler->max_lod + view->base.first_level, diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 03ec127ff79..7e501221b1f 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -364,6 +364,7 @@ static INLINE uint16_t r300_translate_vertex_data_type(enum pipe_format format) { uint32_t result = 0; const struct util_format_description *desc; + unsigned i; desc = util_format_description(format); @@ -371,10 +372,17 @@ r300_translate_vertex_data_type(enum pipe_format format) { return R300_INVALID_FORMAT; } - switch (desc->channel[0].type) { + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + switch (desc->channel[i].type) { /* Half-floats, floats, doubles */ case UTIL_FORMAT_TYPE_FLOAT: - switch (desc->channel[0].size) { + switch (desc->channel[i].size) { case 16: /* Supported only on RV350 and later. */ if (desc->nr_channels > 2) { @@ -394,7 +402,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { case UTIL_FORMAT_TYPE_UNSIGNED: /* Signed ints */ case UTIL_FORMAT_TYPE_SIGNED: - switch (desc->channel[0].size) { + switch (desc->channel[i].size) { case 8: result = R300_DATA_TYPE_BYTE; break; @@ -413,10 +421,10 @@ r300_translate_vertex_data_type(enum pipe_format format) { return R300_INVALID_FORMAT; } - if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { + if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { result |= R300_SIGNED; } - if (desc->channel[0].normalized) { + if (desc->channel[i].normalized) { result |= R300_NORMALIZE; } diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 34105aa4bcd..cee56bccdcd 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -260,16 +260,26 @@ uint32_t r300_translate_texformat(enum pipe_format format, return ~0; /* Unsupported/unknown. */ } + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + if (i == 4) + return ~0; /* Unsupported/unknown. */ + /* And finally, uniform formats. */ - switch (desc->channel[0].type) { + switch (desc->channel[i].type) { case UTIL_FORMAT_TYPE_UNSIGNED: case UTIL_FORMAT_TYPE_SIGNED: - if (!desc->channel[0].normalized && + if (!desc->channel[i].normalized && desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) { return ~0; } - switch (desc->channel[0].size) { + switch (desc->channel[i].size) { case 4: switch (desc->nr_channels) { case 2: @@ -303,7 +313,7 @@ uint32_t r300_translate_texformat(enum pipe_format format, return ~0; case UTIL_FORMAT_TYPE_FLOAT: - switch (desc->channel[0].size) { + switch (desc->channel[i].size) { case 16: switch (desc->nr_channels) { case 1: @@ -359,6 +369,11 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) return R300_COLOR_FORMAT_I8; /* 16-bit buffers. */ + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8_SNORM: + return R300_COLOR_FORMAT_UV88; + case PIPE_FORMAT_B5G6R5_UNORM: return R300_COLOR_FORMAT_RGB565; @@ -443,15 +458,25 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) desc = util_format_description(format); + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + if (i == 4) + return ~0; /* Unsupported/unknown. */ + /* Specifies how the shader output is written to the fog unit. */ - if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) { - if (desc->channel[0].size == 32) { + if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { + if (desc->channel[i].size == 32) { modifier |= R300_US_OUT_FMT_C4_32_FP; } else { modifier |= R300_US_OUT_FMT_C4_16_FP; } } else { - if (desc->channel[0].size == 16) { + if (desc->channel[i].size == 16) { modifier |= R300_US_OUT_FMT_C4_16; } else { /* C4_8 seems to be used for the formats whose pixel size @@ -468,7 +493,7 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) /* Add swizzles and return. */ switch (format) { - /* 8-bit outputs. + /* 8-bit outputs, one channel. * COLORFORMAT_I8 stores the C2 component. */ case PIPE_FORMAT_A8_UNORM: return modifier | R300_C2_SEL_A; @@ -478,6 +503,14 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) case PIPE_FORMAT_R8_SNORM: return modifier | R300_C2_SEL_R; + /* 16-bit outputs, two channels. + * COLORFORMAT_UV88 stores C2 and C0. */ + case PIPE_FORMAT_L8A8_UNORM: + return modifier | R300_C0_SEL_A | R300_C2_SEL_R; + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8_SNORM: + return modifier | R300_C0_SEL_G | R300_C2_SEL_R; + /* BGRA outputs. */ case PIPE_FORMAT_B5G6R5_UNORM: case PIPE_FORMAT_B5G5R5A1_UNORM: @@ -556,18 +589,15 @@ void r300_texture_setup_format_state(struct r300_screen *screen, out->tile_config = 0; /* Set sampler state. */ - out->format0 = R300_TX_WIDTH((u_minify(pt->width0, level) - 1) & 0x7ff) | - R300_TX_HEIGHT((u_minify(pt->height0, level) - 1) & 0x7ff); + out->format0 = + R300_TX_WIDTH((u_minify(desc->width0, level) - 1) & 0x7ff) | + R300_TX_HEIGHT((u_minify(desc->height0, level) - 1) & 0x7ff) | + R300_TX_DEPTH(util_logbase2(u_minify(desc->depth0, level)) & 0xf); if (desc->uses_stride_addressing) { /* rectangles love this */ out->format0 |= R300_TX_PITCH_EN; out->format2 = (desc->stride_in_pixels[level] - 1) & 0x1fff; - } else { - /* Power of two textures (3D, mipmaps, and no pitch), - * also NPOT textures with a width being POT. */ - out->format0 |= - R300_TX_DEPTH(util_logbase2(u_minify(pt->depth0, level)) & 0xf); } if (pt->target == PIPE_TEXTURE_CUBE) { @@ -580,10 +610,10 @@ void r300_texture_setup_format_state(struct r300_screen *screen, /* large textures on r500 */ if (is_r500) { - if (pt->width0 > 2048) { + if (desc->width0 > 2048) { out->format2 |= R500_TXWIDTH_BIT11; } - if (pt->height0 > 2048) { + if (desc->height0 > 2048) { out->format2 |= R500_TXHEIGHT_BIT11; } } diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 112282a0a6a..543d0fdc15b 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -44,7 +44,7 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, {{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */ {{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */ {{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */ - {{ 4, 1}, { 0, 0}, { 2, 2}}, /* 64 bits per pixel */ + {{ 4, 1}, { 2, 2}, { 0, 0}}, /* 64 bits per pixel */ {{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ }, { @@ -53,7 +53,7 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, {{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */ {{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */ {{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */ - {{ 32, 8}, { 0, 0}, {16, 16}}, /* 64 bits per pixel */ + {{ 32, 8}, {16, 16}, { 0, 0}}, /* 64 bits per pixel */ {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ } }; @@ -91,9 +91,9 @@ static boolean r300_texture_macro_switch(struct r300_texture_desc *desc, tile = r300_get_pixel_alignment(desc->b.b.format, desc->b.b.nr_samples, desc->microtile, R300_BUFFER_TILED, dim); if (dim == DIM_WIDTH) { - texdim = u_minify(desc->b.b.width0, level); + texdim = u_minify(desc->width0, level); } else { - texdim = u_minify(desc->b.b.height0, level); + texdim = u_minify(desc->height0, level); } /* See TX_FILTER1_n.MACRO_SWITCH. */ @@ -124,7 +124,7 @@ static unsigned r300_texture_get_stride(struct r300_screen *screen, return 0; } - width = u_minify(desc->b.b.width0, level); + width = u_minify(desc->width0, level); if (util_format_is_plain(desc->b.b.format)) { tile_width = r300_get_pixel_alignment(desc->b.b.format, @@ -172,7 +172,7 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, { unsigned height, tile_height; - height = u_minify(desc->b.b.height0, level); + height = u_minify(desc->height0, level); if (util_format_is_plain(desc->b.b.format)) { tile_height = r300_get_pixel_alignment(desc->b.b.format, @@ -237,7 +237,7 @@ static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, r300_texture_get_nblocksy(desc, i, FALSE); } - size *= desc->b.b.depth0; + size *= desc->depth0; desc->size_in_bytes = size; } } @@ -256,7 +256,7 @@ static void r300_setup_miptree(struct r300_screen *screen, { struct pipe_resource *base = &desc->b.b; unsigned stride, size, layer_size, nblocksy, i; - boolean rv350_mode = screen->caps.is_rv350; + boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350; boolean aligned_for_cbzb; desc->size_in_bytes = 0; @@ -292,7 +292,7 @@ static void r300_setup_miptree(struct r300_screen *screen, if (base->target == PIPE_TEXTURE_CUBE) size = layer_size * 6; else - size = layer_size * u_minify(base->depth0, i); + size = layer_size * u_minify(desc->depth0, i); desc->offset_in_bytes[i] = desc->size_in_bytes; desc->size_in_bytes = desc->offset_in_bytes[i] + size; @@ -303,8 +303,8 @@ static void r300_setup_miptree(struct r300_screen *screen, SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", - i, u_minify(base->width0, i), u_minify(base->height0, i), - u_minify(base->depth0, i), stride, desc->size_in_bytes, + i, u_minify(desc->width0, i), u_minify(desc->height0, i), + u_minify(desc->depth0, i), stride, desc->size_in_bytes, desc->macrotile[i] ? "TRUE" : "FALSE"); } } @@ -313,14 +313,14 @@ static void r300_setup_flags(struct r300_texture_desc *desc) { desc->uses_stride_addressing = !util_is_power_of_two(desc->b.b.width0) || - !util_is_power_of_two(desc->b.b.height0) || (desc->stride_in_bytes_override && stride_to_width(desc->b.b.format, desc->stride_in_bytes_override) != desc->b.b.width0); desc->is_npot = desc->uses_stride_addressing || - !util_is_power_of_two(desc->b.b.height0); + !util_is_power_of_two(desc->b.b.height0) || + !util_is_power_of_two(desc->b.b.depth0); } static void r300_setup_cbzb_flags(struct r300_screen *rscreen, @@ -351,7 +351,7 @@ static void r300_setup_tiling(struct r300_screen *screen, { struct r300_winsys_screen *rws = screen->rws; enum pipe_format format = desc->b.b.format; - boolean rv350_mode = screen->caps.is_rv350; + boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350; boolean is_zb = util_format_is_depth_or_stencil(format); boolean dbg_no_tiling = SCREEN_DBG_ON(screen, DBG_NO_TILING); @@ -368,11 +368,11 @@ static void r300_setup_tiling(struct r300_screen *screen, switch (util_format_get_blocksize(format)) { case 1: case 4: + case 8: desc->microtile = R300_BUFFER_TILED; break; case 2: - case 8: if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) { desc->microtile = R300_BUFFER_SQUARETILED; } @@ -416,9 +416,21 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, { desc->b.b = *base; desc->b.b.screen = &rscreen->screen; - desc->stride_in_bytes_override = stride_in_bytes_override; + desc->width0 = base->width0; + desc->height0 = base->height0; + desc->depth0 = base->depth0; + r300_setup_flags(desc); + + /* Align a 3D NPOT texture to POT. */ + if (base->target == PIPE_TEXTURE_3D && desc->is_npot) { + desc->width0 = util_next_power_of_two(desc->width0); + desc->height0 = util_next_power_of_two(desc->height0); + desc->depth0 = util_next_power_of_two(desc->depth0); + } + + /* Setup tiling. */ if (microtile == R300_BUFFER_SELECT_LAYOUT || macrotile == R300_BUFFER_SELECT_LAYOUT) { r300_setup_tiling(rscreen, desc); @@ -428,7 +440,6 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, assert(desc->b.b.last_level == 0); } - r300_setup_flags(desc); r300_setup_cbzb_flags(rscreen, desc); /* Setup the miptree description. */ @@ -444,10 +455,10 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, if (max_buffer_size) { /* Make sure the buffer we got is large enough. */ if (desc->size_in_bytes > max_buffer_size) { - fprintf(stderr, "r300: texture_from_handle: The buffer is not " + fprintf(stderr, "r300: texture_desc_init: The buffer is not " "large enough. Got: %i, Need: %i, Info:\n", max_buffer_size, desc->size_in_bytes); - r300_tex_print_info(rscreen, desc, "texture_from_handle"); + r300_tex_print_info(rscreen, desc, "texture_desc_init"); return FALSE; } @@ -457,7 +468,7 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, } if (SCREEN_DBG_ON(rscreen, DBG_TEX)) - r300_tex_print_info(rscreen, desc, "texture_from_handle"); + r300_tex_print_info(rscreen, desc, "texture_desc_init"); return TRUE; } diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile index 3cdb963f978..ede0bb2ec45 100644 --- a/src/gallium/drivers/r600/Makefile +++ b/src/gallium/drivers/r600/Makefile @@ -7,22 +7,18 @@ LIBRARY_INCLUDES = \ $(shell pkg-config libdrm --cflags-only-I) C_SOURCES = \ - r600_buffer.c \ - r600_state2.c \ - r600_context.c \ - r600_shader.c \ - r600_draw.c \ + r600_asm.c \ r600_blit.c \ + r600_buffer.c \ r600_helper.c \ + r600_pipe.c \ r600_query.c \ r600_resource.c \ - r600_screen.c \ + r600_shader.c \ r600_state.c \ r600_texture.c \ - r600_asm.c \ r700_asm.c \ - r600_hw_states.c \ - eg_asm.c \ - eg_hw_states.c + evergreen_state.c \ + eg_asm.c include ../../Makefile.template diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript index 99c8644e026..bf0ad8571ba 100644 --- a/src/gallium/drivers/r600/SConscript +++ b/src/gallium/drivers/r600/SConscript @@ -16,19 +16,19 @@ env.Append(CPPPATH = [ r600 = env.ConvenienceLibrary( target = 'r600', source = [ + 'r600_asm.c', 'r600_buffer.c', - 'r600_context.c', - 'r600_draw.c', 'r600_blit.c', 'r600_helper.c', + 'r600_pipe.c', 'r600_query.c', 'r600_resource.c', - 'r600_screen.c', + 'r600_shader.c', 'r600_state.c', 'r600_texture.c', - 'r600_shader.c', - 'r600_asm.c', 'r700_asm.c', + 'evergreen_state.c', + 'eg_asm.c', ]) Export('r600') diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index 769f5508744..52b7189e9e5 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -20,14 +20,13 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "radeon.h" -#include "r600_asm.h" -#include "r600_context.h" +#include <stdio.h> +#include <errno.h> #include "util/u_memory.h" +#include "r600_pipe.h" +#include "r600_asm.h" #include "eg_sq.h" #include "r600_opcodes.h" -#include <stdio.h> -#include <errno.h> int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) { @@ -73,8 +72,8 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) | S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COND(cf->cond) | - S_SQ_CF_WORD1_POP_COUNT(cf->pop_count); + S_SQ_CF_WORD1_COND(cf->cond) | + S_SQ_CF_WORD1_POP_COUNT(cf->pop_count); break; default: diff --git a/src/gallium/drivers/r600/eg_hw_states.c b/src/gallium/drivers/r600/eg_hw_states.c deleted file mode 100644 index d6f417e1e37..00000000000 --- a/src/gallium/drivers/r600/eg_hw_states.c +++ /dev/null @@ -1,1215 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <[email protected]> - * 2010 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Dave Airlie - */ -#include <util/u_inlines.h> -#include <util/u_format.h> -#include <util/u_memory.h> -#include <util/u_blitter.h> -#include "util/u_pack_color.h" -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_resource.h" -#include "eg_state_inlines.h" -#include "evergreend.h" - -#include "eg_states_inc.h" - -static void eg_blend(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_blend_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - int i; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_BLEND, 0, 0); - rstate->states[EG_BLEND__CB_BLEND_RED] = fui(rctx->blend_color.color[0]); - rstate->states[EG_BLEND__CB_BLEND_GREEN] = fui(rctx->blend_color.color[1]); - rstate->states[EG_BLEND__CB_BLEND_BLUE] = fui(rctx->blend_color.color[2]); - rstate->states[EG_BLEND__CB_BLEND_ALPHA] = fui(rctx->blend_color.color[3]); - rstate->states[EG_BLEND__CB_BLEND0_CONTROL] = 0x00000000; - rstate->states[EG_BLEND__CB_BLEND1_CONTROL] = 0x00000000; - rstate->states[EG_BLEND__CB_BLEND2_CONTROL] = 0x00000000; - rstate->states[EG_BLEND__CB_BLEND3_CONTROL] = 0x00000000; - rstate->states[EG_BLEND__CB_BLEND4_CONTROL] = 0x00000000; - rstate->states[EG_BLEND__CB_BLEND5_CONTROL] = 0x00000000; - rstate->states[EG_BLEND__CB_BLEND6_CONTROL] = 0x00000000; - rstate->states[EG_BLEND__CB_BLEND7_CONTROL] = 0x00000000; - - for (i = 0; i < 8; i++) { - unsigned eqRGB = state->rt[i].rgb_func; - unsigned srcRGB = state->rt[i].rgb_src_factor; - unsigned dstRGB = state->rt[i].rgb_dst_factor; - - unsigned eqA = state->rt[i].alpha_func; - unsigned srcA = state->rt[i].alpha_src_factor; - unsigned dstA = state->rt[i].alpha_dst_factor; - uint32_t bc = 0; - - if (!state->rt[i].blend_enable) - continue; - - bc |= S_028780_BLEND_CONTROL_ENABLE(1); - - bc |= S_028780_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB)); - bc |= S_028780_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB)); - bc |= S_028780_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB)); - - if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { - bc |= S_028780_SEPARATE_ALPHA_BLEND(1); - bc |= S_028780_ALPHA_COMB_FCN(r600_translate_blend_function(eqA)); - bc |= S_028780_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA)); - bc |= S_028780_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA)); - } - - rstate->states[EG_BLEND__CB_BLEND0_CONTROL + i] = bc; - } - - radeon_state_pm4(rstate); -} - -static void eg_ucp(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_clip_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_UCP, 0, 0); - - for (int i = 0; i < state->nr; i++) { - rstate->states[i * 4 + 0] = fui(state->ucp[i][0]); - rstate->states[i * 4 + 1] = fui(state->ucp[i][1]); - rstate->states[i * 4 + 2] = fui(state->ucp[i][2]); - rstate->states[i * 4 + 3] = fui(state->ucp[i][3]); - } - radeon_state_pm4(rstate); -} - -static void eg_cb(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_framebuffer_state *state, int cb) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - unsigned level = state->cbufs[cb]->level; - unsigned pitch, slice; - unsigned color_info; - unsigned format, swap, ntype; - const struct util_format_description *desc; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_CB0, cb, 0); - rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; - rbuffer = &rtex->resource; - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM; - rstate->nbo = 1; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1; - - ntype = 0; - desc = util_format_description(rtex->resource.base.b.format); - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) - ntype = V_028C70_NUMBER_SRGB; - - format = r600_translate_colorformat(rtex->resource.base.b.format); - swap = r600_translate_colorswap(rtex->resource.base.b.format); - - color_info = S_028C70_FORMAT(format) | - S_028C70_COMP_SWAP(swap) | - S_028C70_BLEND_CLAMP(1) | - S_028C70_SOURCE_FORMAT(1) | - S_028C70_NUMBER_TYPE(ntype); - - rstate->states[EG_CB__CB_COLOR0_BASE] = state->cbufs[cb]->offset >> 8; - rstate->states[EG_CB__CB_COLOR0_INFO] = color_info; - rstate->states[EG_CB__CB_COLOR0_PITCH] = S_028C64_PITCH_TILE_MAX(pitch); - rstate->states[EG_CB__CB_COLOR0_SLICE] = S_028C68_SLICE_TILE_MAX(slice); - rstate->states[EG_CB__CB_COLOR0_VIEW] = 0x00000000; - rstate->states[EG_CB__CB_COLOR0_ATTRIB] = S_028C74_NON_DISP_TILING_ORDER(1); - - radeon_state_pm4(rstate); -} - -static void eg_db(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_framebuffer_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - unsigned level; - unsigned pitch, slice, format; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_DB, 0, 0); - if (state->zsbuf == NULL) - return; - - rtex = (struct r600_resource_texture*)state->zsbuf->texture; - rtex->tilled = 1; - rtex->array_mode = 2; - rtex->tile_type = 1; - rtex->depth = 1; - rbuffer = &rtex->resource; - - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM; - level = state->zsbuf->level; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; - format = r600_translate_dbformat(state->zsbuf->texture->format); - rstate->states[EG_DB__DB_HTILE_DATA_BASE] = state->zsbuf->offset >> 8; - rstate->states[EG_DB__DB_Z_READ_BASE] = state->zsbuf->offset >> 8; - rstate->states[EG_DB__DB_Z_WRITE_BASE] = state->zsbuf->offset >> 8; - rstate->states[EG_DB__DB_STENCIL_READ_BASE] = state->zsbuf->offset >> 8; - rstate->states[EG_DB__DB_STENCIL_WRITE_BASE] = state->zsbuf->offset >> 8; - rstate->states[EG_DB__DB_Z_INFO] = S_028040_ARRAY_MODE(rtex->array_mode) | S_028040_FORMAT(format); - rstate->states[EG_DB__DB_DEPTH_VIEW] = 0x00000000; - rstate->states[EG_DB__DB_DEPTH_SIZE] = S_028058_PITCH_TILE_MAX(pitch); - rstate->states[EG_DB__DB_DEPTH_SLICE] = S_02805C_SLICE_TILE_MAX(slice); - radeon_state_pm4(rstate); -} - -static void eg_rasterizer(struct r600_context *rctx, struct radeon_state *rstate) -{ - const struct pipe_rasterizer_state *state = &rctx->rasterizer->state.rasterizer; - const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; - const struct pipe_clip_state *clip = NULL; - struct r600_screen *rscreen = rctx->screen; - float offset_units = 0, offset_scale = 0; - char depth = 0; - unsigned offset_db_fmt_cntl = 0; - unsigned tmp; - unsigned prov_vtx = 1; - - if (rctx->clip) - clip = &rctx->clip->state.clip; - if (fb->zsbuf) { - offset_units = state->offset_units; - offset_scale = state->offset_scale * 12.0f; - switch (fb->zsbuf->texture->format) { - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - depth = -24; - offset_units *= 2.0f; - break; - case PIPE_FORMAT_Z32_FLOAT: - depth = -23; - offset_units *= 1.0f; - offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1); - break; - case PIPE_FORMAT_Z16_UNORM: - depth = -16; - offset_units *= 4.0f; - break; - default: - R600_ERR("unsupported %d\n", fb->zsbuf->texture->format); - return; - } - } - offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); - - if (state->flatshade_first) - prov_vtx = 0; - - rctx->flat_shade = state->flatshade; - radeon_state_init(rstate, rscreen->rw, R600_STATE_RASTERIZER, 0, 0); - rstate->states[EG_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000000; - if (rctx->flat_shade) - rstate->states[EG_RASTERIZER__SPI_INTERP_CONTROL_0] |= S_0286D4_FLAT_SHADE_ENA(1); - if (state->sprite_coord_enable) { - rstate->states[EG_RASTERIZER__SPI_INTERP_CONTROL_0] |= - S_0286D4_PNT_SPRITE_ENA(1) | - S_0286D4_PNT_SPRITE_OVRD_X(2) | - S_0286D4_PNT_SPRITE_OVRD_Y(3) | - S_0286D4_PNT_SPRITE_OVRD_Z(0) | - S_0286D4_PNT_SPRITE_OVRD_W(1); - if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { - rstate->states[EG_RASTERIZER__SPI_INTERP_CONTROL_0] |= - S_0286D4_PNT_SPRITE_TOP_1(1); - } - } - rstate->states[EG_RASTERIZER__PA_CL_CLIP_CNTL] = 0; - if (clip) { - rstate->states[EG_RASTERIZER__PA_CL_CLIP_CNTL] = S_028810_PS_UCP_MODE(3) | ((1 << clip->nr) - 1); - rstate->states[EG_RASTERIZER__PA_CL_CLIP_CNTL] |= S_028810_ZCLIP_NEAR_DISABLE(clip->depth_clamp); - rstate->states[EG_RASTERIZER__PA_CL_CLIP_CNTL] |= S_028810_ZCLIP_FAR_DISABLE(clip->depth_clamp); - } - rstate->states[EG_RASTERIZER__PA_SU_SC_MODE_CNTL] = - S_028814_PROVOKING_VTX_LAST(prov_vtx) | - S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | - S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | - S_028814_FACE(!state->front_ccw) | - S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri); - rstate->states[EG_RASTERIZER__PA_CL_VS_OUT_CNTL] = - S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | - S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex); - rstate->states[EG_RASTERIZER__PA_CL_NANINF_CNTL] = 0x00000000; - /* point size 12.4 fixed point */ - tmp = (unsigned)(state->point_size * 8.0); - rstate->states[EG_RASTERIZER__PA_SU_POINT_SIZE] = S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp); - rstate->states[EG_RASTERIZER__PA_SU_POINT_MINMAX] = 0x80000000; - rstate->states[EG_RASTERIZER__PA_SU_LINE_CNTL] = 0x00000008; - rstate->states[EG_RASTERIZER__PA_SU_VTX_CNTL] = 0x00000005; - - rstate->states[EG_RASTERIZER__PA_SC_MPASS_PS_CNTL] = 0x00000000; - rstate->states[EG_RASTERIZER__PA_SC_LINE_CNTL] = 0x00000400; - rstate->states[EG_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ] = 0x3F800000; - rstate->states[EG_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ] = 0x3F800000; - rstate->states[EG_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ] = 0x3F800000; - rstate->states[EG_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ] = 0x3F800000; - rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL] = offset_db_fmt_cntl; - rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_CLAMP] = 0x00000000; - rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE] = fui(offset_scale); - rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET] = fui(offset_units); - rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE] = fui(offset_scale); - rstate->states[EG_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET] = fui(offset_units); - radeon_state_pm4(rstate); -} - -static void eg_scissor(struct r600_context *rctx, struct radeon_state *rstate) -{ - const struct pipe_scissor_state *state = &rctx->scissor->state.scissor; - const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; - struct r600_screen *rscreen = rctx->screen; - unsigned minx, maxx, miny, maxy; - u32 tl, br; - - if (state == NULL) { - minx = 0; - miny = 0; - maxx = fb->cbufs[0]->width; - maxy = fb->cbufs[0]->height; - } else { - minx = state->minx; - miny = state->miny; - maxx = state->maxx; - maxy = state->maxy; - } - tl = S_028240_TL_X(minx) | S_028240_TL_Y(miny); - br = S_028244_BR_X(maxx) | S_028244_BR_Y(maxy); - radeon_state_init(rstate, rscreen->rw, R600_STATE_SCISSOR, 0, 0); - /* screen scissor has no WINDOW OFFSET */ - rstate->states[EG_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = tl; - rstate->states[EG_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = br; - rstate->states[EG_SCISSOR__PA_SC_WINDOW_OFFSET] = 0x00000000; - rstate->states[EG_SCISSOR__PA_SC_WINDOW_SCISSOR_TL] = tl | S_028204_WINDOW_OFFSET_DISABLE(1); - rstate->states[EG_SCISSOR__PA_SC_WINDOW_SCISSOR_BR] = br; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_RULE] = 0x0000FFFF; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_0_TL] = tl; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_0_BR] = br; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_1_TL] = tl; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_1_BR] = br; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_2_TL] = tl; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_2_BR] = br; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_3_TL] = tl; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_3_BR] = br; - rstate->states[EG_SCISSOR__PA_SC_EDGERULE] = 0xAAAAAAAA; - rstate->states[EG_SCISSOR__PA_SC_GENERIC_SCISSOR_TL] = tl | S_028240_WINDOW_OFFSET_DISABLE(1); - rstate->states[EG_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = br; - rstate->states[EG_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = tl | S_028240_WINDOW_OFFSET_DISABLE(1); - rstate->states[EG_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = br; - radeon_state_pm4(rstate); -} - -static void eg_viewport(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_viewport_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_VIEWPORT, 0, 0); - rstate->states[EG_VIEWPORT__PA_SC_VPORT_ZMIN_0] = 0x00000000; - rstate->states[EG_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; - rstate->states[EG_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(state->scale[0]); - rstate->states[EG_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui(state->scale[1]); - rstate->states[EG_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = fui(state->scale[2]); - rstate->states[EG_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui(state->translate[0]); - rstate->states[EG_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(state->translate[1]); - rstate->states[EG_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(state->translate[2]); - rstate->states[EG_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; - radeon_state_pm4(rstate); -} - -static void eg_dsa(struct r600_context *rctx, struct radeon_state *rstate) -{ - const struct pipe_depth_stencil_alpha_state *state = &rctx->dsa->state.dsa; - const struct pipe_stencil_ref *stencil_ref = &rctx->stencil_ref->state.stencil_ref; - struct r600_screen *rscreen = rctx->screen; - unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control; - unsigned stencil_ref_mask, stencil_ref_mask_bf, db_render_override, db_render_control; - struct r600_shader *rshader; - struct r600_query *rquery = NULL; - boolean query_running; - int i; - - if (rctx->ps_shader == NULL) { - return; - } - radeon_state_init(rstate, rscreen->rw, R600_STATE_DSA, 0, 0); - - db_shader_control = 0; - db_shader_control |= S_02880C_DUAL_EXPORT_ENABLE(1); - db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); - - rshader = &rctx->ps_shader->shader; - if (rshader->uses_kill) - db_shader_control |= S_02880C_KILL_ENABLE(1); - for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) - db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1); - } - stencil_ref_mask = 0; - stencil_ref_mask_bf = 0; - db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | - S_028800_Z_WRITE_ENABLE(state->depth.writemask) | - S_028800_ZFUNC(state->depth.func); - /* set stencil enable */ - - if (state->stencil[0].enabled) { - db_depth_control |= S_028800_STENCIL_ENABLE(1); - db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)); - db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); - db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); - db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); - - stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) | - S_028430_STENCILWRITEMASK(state->stencil[0].writemask); - stencil_ref_mask |= S_028430_STENCILREF(stencil_ref->ref_value[0]); - if (state->stencil[1].enabled) { - db_depth_control |= S_028800_BACKFACE_ENABLE(1); - db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)); - db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); - db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); - db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); - stencil_ref_mask_bf = S_028434_STENCILMASK_BF(state->stencil[1].valuemask) | - S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask); - stencil_ref_mask_bf |= S_028430_STENCILREF(stencil_ref->ref_value[1]); - } - } - - alpha_test_control = 0; - alpha_ref = 0; - if (state->alpha.enabled) { - alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func); - alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); - alpha_ref = fui(state->alpha.ref_value); - } - - db_render_control = 0; -/// db_render_control = S_028D0C_STENCIL_COMPRESS_DISABLE(1) | -/// S_028D0C_DEPTH_COMPRESS_DISABLE(1); - db_render_override = S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) | - S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | - S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); - - query_running = false; - - LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { - if (rquery->state & R600_QUERY_STATE_STARTED) { - query_running = true; - } - } - - if (query_running) { - db_render_override |= S_028D10_NOOP_CULL_DISABLE(1); - db_render_control |= S_028D0C_PERFECT_ZPASS_COUNTS(1); - } - - rstate->states[EG_DSA__DB_STENCIL_CLEAR] = 0x00000000; - rstate->states[EG_DSA__DB_DEPTH_CLEAR] = 0x3F800000; - rstate->states[EG_DSA__SX_ALPHA_TEST_CONTROL] = alpha_test_control; - rstate->states[EG_DSA__DB_STENCILREFMASK] = stencil_ref_mask; - rstate->states[EG_DSA__DB_STENCILREFMASK_BF] = stencil_ref_mask_bf; - rstate->states[EG_DSA__SX_ALPHA_REF] = alpha_ref; - // rstate->states[EG_DSA__SPI_FOG_FUNC_SCALE] = 0x00000000; - // rstate->states[EG_DSA__SPI_FOG_FUNC_BIAS] = 0x00000000; - rstate->states[EG_DSA__SPI_FOG_CNTL] = 0x00000000; - rstate->states[EG_DSA__DB_DEPTH_CONTROL] = db_depth_control; - rstate->states[EG_DSA__DB_SHADER_CONTROL] = db_shader_control; - rstate->states[EG_DSA__DB_RENDER_CONTROL] = db_render_control; - rstate->states[EG_DSA__DB_RENDER_OVERRIDE] = db_render_override; - - rstate->states[EG_DSA__DB_SRESULTS_COMPARE_STATE1] = 0x00000000; - rstate->states[EG_DSA__DB_PRELOAD_CONTROL] = 0x00000000; - rstate->states[EG_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00; - radeon_state_pm4(rstate); -} - - -static INLINE u32 S_FIXED(float value, u32 frac_bits) -{ - return value * (1 << frac_bits); -} - -static void eg_sampler_border(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_sampler_state *state, unsigned id) -{ - struct r600_screen *rscreen = rctx->screen; - union util_color uc; - - util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - - radeon_state_init(rstate, rscreen->rw, R600_STATE_SAMPLER_BORDER, id, R600_SHADER_PS); - if (uc.ui) { - rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED] = fui(state->border_color[0]); - rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN] = fui(state->border_color[1]); - rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE] = fui(state->border_color[2]); - rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA] = fui(state->border_color[3]); - } - radeon_state_pm4(rstate); -} - -static void eg_sampler(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_sampler_state *state, unsigned id) -{ - struct r600_screen *rscreen = rctx->screen; - union util_color uc; - - util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - - radeon_state_init(rstate, rscreen->rw, R600_STATE_SAMPLER, id, R600_SHADER_PS); - rstate->states[EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0] = - S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | - S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | - S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) | - S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter)) | - S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter)) | - S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | - S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | - S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0); - /* FIXME LOD it depends on texture base level ... */ - rstate->states[EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0] = - S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | - S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)); - - rstate->states[EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] = - S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)) | -S_03C008_TYPE(1); - radeon_state_pm4(rstate); - -} - - -static void eg_resource(struct pipe_context *ctx, struct radeon_state *rstate, - const struct pipe_sampler_view *view, unsigned id) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_screen *rscreen = rctx->screen; - const struct util_format_description *desc; - struct r600_resource_texture *tmp; - struct r600_resource *rbuffer; - unsigned format; - uint32_t word4 = 0, yuv_format = 0, pitch = 0; - unsigned char swizzle[4]; - int r; - - rstate->cpm4 = 0; - swizzle[0] = view->swizzle_r; - swizzle[1] = view->swizzle_g; - swizzle[2] = view->swizzle_b; - swizzle[3] = view->swizzle_a; - format = r600_translate_texformat(view->texture->format, - swizzle, - &word4, &yuv_format); - if (format == ~0) { - return; - } - desc = util_format_description(view->texture->format); - if (desc == NULL) { - R600_ERR("unknow format %d\n", view->texture->format); - return; - } - radeon_state_init(rstate, rscreen->rw, R600_STATE_RESOURCE, id, R600_SHADER_PS); - tmp = (struct r600_resource_texture*)view->texture; - rbuffer = &tmp->resource; - if (tmp->depth) { - r = r600_texture_from_depth(ctx, tmp, view->first_level); - if (r) { - return; - } - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], tmp->uncompressed); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], tmp->uncompressed); - } else { - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], rbuffer->bo); - } - rstate->nbo = 2; - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[1] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[3] = RADEON_GEM_DOMAIN_GTT; - - pitch = (tmp->pitch[0] / tmp->bpt); - pitch = (pitch + 0x7) & ~0x7; - - /* FIXME properly handle first level != 0 */ - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD0] = - S_030000_DIM(r600_tex_dim(view->texture->target)) | - S_030000_PITCH((pitch / 8) - 1) | - S_030000_TEX_WIDTH(view->texture->width0 - 1); - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD1] = - S_030004_TEX_HEIGHT(view->texture->height0 - 1) | - S_030004_TEX_DEPTH(view->texture->depth0 - 1); - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD2] = tmp->offset[0] >> 8; - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD3] = tmp->offset[1] >> 8; - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD4] = - word4 | - S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_NORM) | - S_030010_SRF_MODE_ALL(V_030010_SFR_MODE_NO_ZERO) | - S_030010_REQUEST_SIZE(1) | - S_030010_BASE_LEVEL(view->first_level); - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD5] = - S_030014_LAST_LEVEL(view->last_level) | - S_030014_BASE_ARRAY(0) | - S_030014_LAST_ARRAY(0); - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD6] = 0; - rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD7] = - S_03001C_DATA_FORMAT(format) | - S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE); - radeon_state_pm4(rstate); -} - -static void eg_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) -{ - struct r600_screen *rscreen = rctx->screen; - const struct pipe_blend_state *pbs = &rctx->blend->state.blend; - int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs; - uint32_t color_control, target_mask, shader_mask; - int i; - - target_mask = 0; - shader_mask = 0; - color_control = S_028808_MODE(1); - - for (i = 0; i < nr_cbufs; i++) { - shader_mask |= 0xf << (i * 4); - } - - if (pbs->logicop_enable) { - color_control |= (pbs->logicop_func << 16) | (pbs->logicop_func << 20); - } else { - color_control |= (0xcc << 16); - } - - if (pbs->independent_blend_enable) { - for (i = 0; i < nr_cbufs; i++) { - target_mask |= (pbs->rt[i].colormask << (4 * i)); - } - } else { - for (i = 0; i < nr_cbufs; i++) { - target_mask |= (pbs->rt[0].colormask << (4 * i)); - } - } - radeon_state_init(rstate, rscreen->rw, R600_STATE_CB_CNTL, 0, 0); - rstate->states[EG_CB_CNTL__CB_SHADER_MASK] = shader_mask; - rstate->states[EG_CB_CNTL__CB_TARGET_MASK] = target_mask; - rstate->states[EG_CB_CNTL__CB_COLOR_CONTROL] = color_control; - rstate->states[EG_CB_CNTL__PA_SC_AA_CONFIG] = 0x00000000; - rstate->states[EG_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX] = 0x00000000; - rstate->states[EG_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; - radeon_state_pm4(rstate); -} - - -static void eg_init_config(struct r600_context *rctx) -{ - int ps_prio; - int vs_prio; - int gs_prio; - int es_prio; - int hs_prio, cs_prio, ls_prio; - int num_ps_gprs; - int num_vs_gprs; - int num_gs_gprs; - int num_es_gprs; - int num_hs_gprs; - int num_ls_gprs; - int num_temp_gprs; - int num_ps_threads; - int num_vs_threads; - int num_gs_threads; - int num_es_threads; - int num_hs_threads; - int num_ls_threads; - int num_ps_stack_entries; - int num_vs_stack_entries; - int num_gs_stack_entries; - int num_es_stack_entries; - int num_hs_stack_entries; - int num_ls_stack_entries; - enum radeon_family family; - - family = radeon_get_family(rctx->rw); - ps_prio = 0; - vs_prio = 1; - gs_prio = 2; - es_prio = 3; - hs_prio = 0; - ls_prio = 0; - cs_prio = 0; - - switch (family) { - case CHIP_CEDAR: - default: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 96; - num_vs_threads = 16; - num_gs_threads = 16; - num_es_threads = 16; - num_hs_threads = 16; - num_ls_threads = 16; - num_ps_stack_entries = 42; - num_vs_stack_entries = 42; - num_gs_stack_entries = 42; - num_es_stack_entries = 42; - num_hs_stack_entries = 42; - num_ls_stack_entries = 42; - break; - case CHIP_REDWOOD: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 128; - num_vs_threads = 20; - num_gs_threads = 20; - num_es_threads = 20; - num_hs_threads = 20; - num_ls_threads = 20; - num_ps_stack_entries = 42; - num_vs_stack_entries = 42; - num_gs_stack_entries = 42; - num_es_stack_entries = 42; - num_hs_stack_entries = 42; - num_ls_stack_entries = 42; - break; - case CHIP_JUNIPER: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 128; - num_vs_threads = 20; - num_gs_threads = 20; - num_es_threads = 20; - num_hs_threads = 20; - num_ls_threads = 20; - num_ps_stack_entries = 85; - num_vs_stack_entries = 85; - num_gs_stack_entries = 85; - num_es_stack_entries = 85; - num_hs_stack_entries = 85; - num_ls_stack_entries = 85; - break; - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 128; - num_vs_threads = 20; - num_gs_threads = 20; - num_es_threads = 20; - num_hs_threads = 20; - num_ls_threads = 20; - num_ps_stack_entries = 85; - num_vs_stack_entries = 85; - num_gs_stack_entries = 85; - num_es_stack_entries = 85; - num_hs_stack_entries = 85; - num_ls_stack_entries = 85; - break; - } - - radeon_state_init(&rctx->config, rctx->rw, R600_STATE_CONFIG, 0, 0); - - rctx->config.states[EG_CONFIG__SQ_CONFIG] = 0x00000000; - switch (family) { - case CHIP_CEDAR: - break; - default: - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1); - break; - } - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_EXPORT_SRC_C(1); - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_CS_PRIO(cs_prio); - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_LS_PRIO(ls_prio); - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_HS_PRIO(hs_prio); - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio); - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio); - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio); - rctx->config.states[EG_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio); - - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0; - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs); - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs); - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0; - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs); - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_ES_GPRS(num_es_gprs); - - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_3] = 0; - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_3] |= S_008C0C_NUM_HS_GPRS(num_hs_gprs); - rctx->config.states[EG_CONFIG__SQ_GPR_RESOURCE_MGMT_3] |= S_008C0C_NUM_LS_GPRS(num_ls_gprs); - - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1] = 0; - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1] |= S_008C18_NUM_PS_THREADS(num_ps_threads); - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1] |= S_008C18_NUM_VS_THREADS(num_vs_threads); - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1] |= S_008C18_NUM_GS_THREADS(num_gs_threads); - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1] |= S_008C18_NUM_ES_THREADS(num_es_threads); - - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_2] = 0; - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_2] |= S_008C1C_NUM_HS_THREADS(num_hs_threads); - rctx->config.states[EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_2] |= S_008C1C_NUM_LS_THREADS(num_ls_threads); - - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0; - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C20_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C20_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); - - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0; - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C24_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C24_NUM_ES_STACK_ENTRIES(num_es_stack_entries); - - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_3] = 0; - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_3] |= S_008C28_NUM_HS_STACK_ENTRIES(num_hs_stack_entries); - rctx->config.states[EG_CONFIG__SQ_STACK_RESOURCE_MGMT_3] |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries); - - rctx->config.states[EG_CONFIG__SPI_CONFIG_CNTL] = 0x00000000; - rctx->config.states[EG_CONFIG__SPI_CONFIG_CNTL_1] = S_00913C_VTX_DONE_DELAY(4); - - rctx->config.states[EG_CONFIG__SX_MISC] = 0x00000000; - - rctx->config.states[EG_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00000000; - rctx->config.states[EG_CONFIG__PA_SC_MODE_CNTL_0] = 0x0; - rctx->config.states[EG_CONFIG__PA_SC_MODE_CNTL_1] = 0x0; - - rctx->config.states[EG_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000; - - rctx->config.states[EG_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_GS_VERT_ITEMSIZE_1] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_GS_VERT_ITEMSIZE_2] = 0x00000000; - rctx->config.states[EG_CONFIG__SQ_GS_VERT_ITEMSIZE_3] = 0x00000000; - - rctx->config.states[EG_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_HOS_CNTL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GROUP_DECR] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_GS_MODE] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_STRMOUT_CONFIG] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_STRMOUT_BUFFER_CONFIG] = 0x00000000; - rctx->config.states[EG_CONFIG__VGT_REUSE_OFF] = 0x00000001; - rctx->config.states[EG_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; -// rctx->config.states[EG_CONFIG__VGT_CACHE_INVALIDATION] = 0x2; -// rctx->config.states[EG_CONFIG__VGT_GS_VERTEX_REUSE] = 0x16; - rctx->config.states[EG_CONFIG__PA_CL_ENHANCE] = (3 << 1) | 1; - - radeon_state_pm4(&rctx->config); -} - -static int eg_vs_resource(struct r600_context *rctx, int id, struct r600_resource *rbuffer, uint32_t offset, - uint32_t stride, uint32_t format) -{ - struct radeon_state *vs_resource = &rctx->vs_resource[id]; - struct r600_screen *rscreen = rctx->screen; - - radeon_state_init(vs_resource, rscreen->rw, R600_STATE_RESOURCE, id, R600_SHADER_VS); - - radeon_ws_bo_reference(rscreen->rw, &vs_resource->bo[0], rbuffer->bo); - vs_resource->nbo = 1; - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD0] = offset; - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->size - offset - 1; - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD2] = S_030008_STRIDE(stride) | - S_030008_DATA_FORMAT(format); - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD3] = S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | - S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | - S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | - S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W); - - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD4] = 0x00000000; - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD5] = 0x00000000; - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD6] = 0x00000000; - vs_resource->states[EG_PS_RESOURCE__RESOURCE0_WORD7] = 0xC0000000; - vs_resource->placement[0] = RADEON_GEM_DOMAIN_GTT; - vs_resource->placement[1] = RADEON_GEM_DOMAIN_GTT; - return radeon_state_pm4(vs_resource); -} - -static int eg_draw_vgt_init(struct r600_draw *draw, - int vgt_draw_initiator) -{ - struct r600_context *rctx = r600_context(draw->ctx); - struct r600_screen *rscreen = rctx->screen; - struct r600_resource *rbuffer = (struct r600_resource *)draw->index_buffer; - radeon_state_init(&draw->draw, rscreen->rw, R600_STATE_DRAW, 0, 0); - draw->draw.states[EG_DRAW__VGT_NUM_INDICES] = draw->count; - draw->draw.states[EG_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator; - draw->draw.states[EG_DRAW__VGT_DMA_BASE] = draw->index_buffer_offset; - if (rbuffer) { - radeon_ws_bo_reference(rscreen->rw, &draw->draw.bo[0], rbuffer->bo); - draw->draw.placement[0] = RADEON_GEM_DOMAIN_GTT; - draw->draw.placement[1] = RADEON_GEM_DOMAIN_GTT; - draw->draw.nbo = 1; - } - return radeon_state_pm4(&draw->draw); -} - -static int eg_draw_vgt_prim(struct r600_draw *draw, - uint32_t prim, uint32_t vgt_dma_index_type) -{ - struct r600_context *rctx = r600_context(draw->ctx); - struct r600_screen *rscreen = rctx->screen; - radeon_state_init(&draw->vgt, rscreen->rw, R600_STATE_VGT, 0, 0); - draw->vgt.states[EG_VGT__VGT_PRIMITIVE_TYPE] = prim; - draw->vgt.states[EG_VGT__VGT_MAX_VTX_INDX] = draw->max_index; - draw->vgt.states[EG_VGT__VGT_MIN_VTX_INDX] = draw->min_index; - draw->vgt.states[EG_VGT__VGT_INDX_OFFSET] = draw->start; - draw->vgt.states[EG_VGT__VGT_DMA_INDEX_TYPE] = vgt_dma_index_type; - draw->vgt.states[EG_VGT__VGT_PRIMITIVEID_EN] = 0x00000000; - draw->vgt.states[EG_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001; - draw->vgt.states[EG_VGT__VGT_MULTI_PRIM_IB_RESET_EN] = 0x00000000; - draw->vgt.states[EG_VGT__VGT_INSTANCE_STEP_RATE_0] = 0x00000000; - draw->vgt.states[EG_VGT__VGT_INSTANCE_STEP_RATE_1] = 0x00000000; - return radeon_state_pm4(&draw->vgt); -} - - -static int eg_ps_shader(struct r600_context *rctx, struct r600_context_state *rpshader, - struct radeon_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - const struct pipe_rasterizer_state *rasterizer; - struct r600_shader *rshader = &rpshader->shader; - unsigned i, tmp, exports_ps, num_cout; - boolean have_pos = FALSE; - - rasterizer = &rctx->rasterizer->state.rasterizer; - - radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_PS); - for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(i); - tmp |= S_028644_SEL_CENTROID(1); - if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) - have_pos = TRUE; - if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || - rshader->input[i].name == TGSI_SEMANTIC_POSITION) { - tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); - } - if (rasterizer->sprite_coord_enable & (1 << i)) { - tmp |= S_028644_PT_SPRITE_TEX(1); - } - state->states[EG_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp; - } - - exports_ps = 0; - num_cout = 0; - for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) - exports_ps |= 1; - else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { - exports_ps |= (1 << (num_cout+1)); - num_cout++; - } - } - if (!exports_ps) { - /* always at least export 1 component per pixel */ - exports_ps = 2; - } - state->states[EG_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) | - S_0286CC_PERSP_GRADIENT_ENA(1); - if (have_pos) { - state->states[EG_PS_SHADER__SPI_PS_IN_CONTROL_0] |= S_0286CC_POSITION_ENA(1); - state->states[EG_PS_SHADER__SPI_INPUT_Z] |= 1; - } - state->states[EG_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; - state->states[EG_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028844_NUM_GPRS(rshader->bc.ngpr) | S_028844_PRIME_CACHE_ON_DRAW(1) | - S_028844_STACK_SIZE(rshader->bc.nstack); - state->states[EG_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps; - state->states[EG_PS_SHADER__SPI_BARYC_CNTL] = S_0286E0_PERSP_CENTROID_ENA(1) | - S_0286E0_LINEAR_CENTROID_ENA(1); - radeon_ws_bo_reference(rscreen->rw, &state->bo[0], rpshader->bo); - state->nbo = 1; - state->placement[0] = RADEON_GEM_DOMAIN_GTT; - return radeon_state_pm4(state); -} - -static int eg_vs_shader(struct r600_context *rctx, struct r600_context_state *rpshader, - struct radeon_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_shader *rshader = &rpshader->shader; - unsigned i, tmp; - - radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_VS); - for (i = 0; i < 10; i++) { - state->states[EG_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0; - } - /* so far never got proper semantic id from tgsi */ - for (i = 0; i < 32; i++) { - tmp = i << ((i & 3) * 8); - state->states[EG_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp; - } - state->states[EG_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); - state->states[EG_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028860_NUM_GPRS(rshader->bc.ngpr) | - S_028860_STACK_SIZE(rshader->bc.nstack); - radeon_ws_bo_reference(rscreen->rw, &state->bo[0], rpshader->bo); - radeon_ws_bo_reference(rscreen->rw, &state->bo[1], rpshader->bo); - state->nbo = 2; - state->placement[0] = RADEON_GEM_DOMAIN_GTT; - state->placement[2] = RADEON_GEM_DOMAIN_GTT; - return radeon_state_pm4(state); -} - -static void eg_texture_state_scissor(struct r600_screen *rscreen, - struct r600_resource_texture *rtexture, - unsigned level) -{ - struct radeon_state *rstate = &rtexture->scissor[level]; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_SCISSOR, 0, 0); - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_0_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_0_TL] = 0x80000000; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_1_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_1_TL] = 0x80000000; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_2_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_2_TL] = 0x80000000; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_3_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_3_TL] = 0x80000000; - rstate->states[EG_SCISSOR__PA_SC_CLIPRECT_RULE] = 0x0000FFFF; - rstate->states[EG_SCISSOR__PA_SC_EDGERULE] = 0xAAAAAAAA; - rstate->states[EG_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[EG_SCISSOR__PA_SC_GENERIC_SCISSOR_TL] = 0x80000000; - rstate->states[EG_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[EG_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = 0x80000000; - rstate->states[EG_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[EG_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = 0x80000000; - rstate->states[EG_SCISSOR__PA_SC_WINDOW_SCISSOR_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[EG_SCISSOR__PA_SC_WINDOW_SCISSOR_TL] = 0x80000000; - - radeon_state_pm4(rstate); -} - -static void eg_texture_state_cb(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, unsigned cb, unsigned level) -{ - struct radeon_state *rstate; - struct r600_resource *rbuffer; - unsigned pitch, slice; - unsigned color_info; - unsigned format, swap, ntype, attrib; - const struct util_format_description *desc; - - rstate = &rtexture->cb[cb][level]; - radeon_state_init(rstate, rscreen->rw, R600_STATE_CB0, cb, 0); - rbuffer = &rtexture->resource; - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - pitch = (rtexture->pitch[level] / rtexture->bpt) / 8 - 1; - slice = (rtexture->pitch[level] / rtexture->bpt) * rtexture->height[level] / 64 - 1; - ntype = 0; - attrib = 0; - desc = util_format_description(rbuffer->base.b.format); - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) - ntype = V_028C70_NUMBER_SRGB; - format = r600_translate_colorformat(rtexture->resource.base.b.format); - swap = r600_translate_colorswap(rtexture->resource.base.b.format); - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rtexture->uncompressed); - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->nbo = 1; - color_info = 0; - } else { - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->nbo = 1; - color_info = S_028C70_SOURCE_FORMAT(1); - } - color_info |= S_028C70_FORMAT(format) | - S_028C70_COMP_SWAP(swap) | - S_028C70_BLEND_CLAMP(1) | - S_028C70_NUMBER_TYPE(ntype); - rstate->states[EG_CB__CB_COLOR0_BASE] = rtexture->offset[level] >> 8; - rstate->states[EG_CB__CB_COLOR0_INFO] = color_info; - rstate->states[EG_CB__CB_COLOR0_PITCH] = S_028C64_PITCH_TILE_MAX(pitch); - rstate->states[EG_CB__CB_COLOR0_SLICE] = S_028C68_SLICE_TILE_MAX(slice); - rstate->states[EG_CB__CB_COLOR0_VIEW] = 0x00000000; - rstate->states[EG_CB__CB_COLOR0_ATTRIB] = S_028C74_NON_DISP_TILING_ORDER(1); - - radeon_state_pm4(rstate); -} - -static void eg_texture_state_db(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, unsigned level) -{ - struct radeon_state *rstate = &rtexture->db[level]; - struct r600_resource *rbuffer; - unsigned pitch, slice, format; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_DB, 0, 0); - rbuffer = &rtexture->resource; - rtexture->tilled = 1; - rtexture->array_mode = 2; - rtexture->tile_type = 1; - rtexture->depth = 1; - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - pitch = (rtexture->pitch[level] / rtexture->bpt) / 8 - 1; - slice = (rtexture->pitch[level] / rtexture->bpt) * rtexture->height[level] / 64 - 1; - format = r600_translate_dbformat(rbuffer->base.b.format); - rstate->states[EG_DB__DB_Z_READ_BASE] = rtexture->offset[level] >> 8; - rstate->states[EG_DB__DB_Z_WRITE_BASE] = rtexture->offset[level] >> 8; - rstate->states[EG_DB__DB_Z_INFO] = S_028040_ARRAY_MODE(rtexture->array_mode) | S_028040_FORMAT(format); - rstate->states[EG_DB__DB_DEPTH_VIEW] = 0x00000000; - rstate->states[EG_DB__DB_DEPTH_SIZE] = S_028058_PITCH_TILE_MAX(pitch); - rstate->states[EG_DB__DB_DEPTH_SLICE] = S_02805C_SLICE_TILE_MAX(slice); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->nbo = 1; - - radeon_state_pm4(rstate); -} - -static void eg_texture_state_viewport(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, unsigned level) -{ - struct radeon_state *rstate = &rtexture->viewport[level]; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_VIEWPORT, 0, 0); - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[EG_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui((float)rtexture->width[level]/2.0); - rstate->states[EG_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui((float)rtexture->width[level]/2.0); - rstate->states[EG_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui((float)rtexture->height[level]/2.0); - rstate->states[EG_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui((float)-rtexture->height[level]/2.0); - rstate->states[EG_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = 0x3F000000; - rstate->states[EG_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = 0x3F000000; - rstate->states[EG_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; - rstate->states[EG_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; - - radeon_state_pm4(rstate); -} - -struct r600_context_hw_state_vtbl eg_hw_state_vtbl = { - .blend = eg_blend, - .ucp = eg_ucp, - .cb = eg_cb, - .db = eg_db, - .rasterizer = eg_rasterizer, - .scissor = eg_scissor, - .viewport = eg_viewport, - .dsa = eg_dsa, - .sampler_border = eg_sampler_border, - .sampler = eg_sampler, - .resource = eg_resource, - .cb_cntl = eg_cb_cntl, - .vs_resource = eg_vs_resource, - .vgt_init = eg_draw_vgt_init, - .vgt_prim = eg_draw_vgt_prim, - .vs_shader = eg_vs_shader, - .ps_shader = eg_ps_shader, - .init_config = eg_init_config, - .texture_state_viewport = eg_texture_state_viewport, - .texture_state_db = eg_texture_state_db, - .texture_state_cb = eg_texture_state_cb, - .texture_state_scissor = eg_texture_state_scissor, -}; - -void eg_set_constant_buffer(struct pipe_context *ctx, - uint shader, uint index, - struct pipe_resource *buffer) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - unsigned nconstant = 0, type, shader_class, size; - struct radeon_state *rstate, *rstates; - struct r600_resource *rbuffer = (struct r600_resource*)buffer; - - type = R600_STATE_CBUF; - - switch (shader) { - case PIPE_SHADER_VERTEX: - shader_class = R600_SHADER_VS; - rstates = rctx->vs_constant; - break; - case PIPE_SHADER_FRAGMENT: - shader_class = R600_SHADER_PS; - rstates = rctx->ps_constant; - break; - default: - R600_ERR("unsupported %d\n", shader); - return; - } - - rstate = &rstates[0]; - -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) - nconstant = buffer->width0 / 16; - size = ALIGN_DIVUP(nconstant, 16); - - radeon_state_init(rstate, rscreen->rw, type, 0, shader_class); - rstate->states[EG_VS_CBUF__ALU_CONST_BUFFER_SIZE_VS_0] = size; - rstate->states[EG_VS_CBUF__ALU_CONST_CACHE_VS_0] = 0; - - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM; - if (radeon_state_pm4(rstate)) - return; - radeon_draw_bind(&rctx->draw, rstate); -} diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index 4e3514638b7..be81c28b43f 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -25,6 +25,7 @@ #include "util/u_format.h" #include "evergreend.h" +#include "r600_formats.h" static INLINE uint32_t r600_translate_blend_function(int blend_func) { @@ -123,6 +124,21 @@ static INLINE uint32_t r600_translate_stencil_op(int s_op) return 0; } +static INLINE uint32_t r600_translate_fill(uint32_t func) +{ + switch(func) { + case PIPE_POLYGON_MODE_FILL: + return 2; + case PIPE_POLYGON_MODE_LINE: + return 1; + case PIPE_POLYGON_MODE_POINT: + return 0; + default: + assert(0); + return 0; + } +} + /* translates straight */ static INLINE uint32_t r600_translate_ds_func(int func) { @@ -136,17 +152,17 @@ static inline unsigned r600_tex_wrap(unsigned wrap) case PIPE_TEX_WRAP_REPEAT: return V_03C000_SQ_TEX_WRAP; case PIPE_TEX_WRAP_CLAMP: - return V_03C000_SQ_TEX_CLAMP_LAST_TEXEL; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return V_03C000_SQ_TEX_CLAMP_HALF_BORDER; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return V_03C000_SQ_TEX_CLAMP_LAST_TEXEL; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return V_03C000_SQ_TEX_CLAMP_BORDER; case PIPE_TEX_WRAP_MIRROR_REPEAT: return V_03C000_SQ_TEX_MIRROR; case PIPE_TEX_WRAP_MIRROR_CLAMP: - return V_03C000_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: return V_03C000_SQ_TEX_MIRROR_ONCE_HALF_BORDER; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return V_03C000_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: return V_03C000_SQ_TEX_MIRROR_ONCE_BORDER; } @@ -261,6 +277,14 @@ static inline uint32_t r600_translate_dbformat(enum pipe_format format) } } +static inline uint32_t r600_translate_stencilformat(enum pipe_format format) +{ + if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED) + return 1; + else + return 0; +} + static inline uint32_t r600_translate_colorswap(enum pipe_format format) { switch (format) { @@ -283,6 +307,15 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_B4G4R4A4_UNORM: case PIPE_FORMAT_B4G4R4X4_UNORM: return V_028C70_SWAP_ALT; + + case PIPE_FORMAT_Z16_UNORM: + return V_028C70_SWAP_STD; + + case PIPE_FORMAT_R8G8_UNORM: + return V_028C70_SWAP_STD; + + case PIPE_FORMAT_R16_UNORM: + return V_028C70_SWAP_STD; /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: @@ -310,12 +343,19 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_028C70_SWAP_STD; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_028C70_SWAP_STD; + case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10X2_SNORM: case PIPE_FORMAT_B10G10R10A2_UNORM: case PIPE_FORMAT_R10SG10SB10SA2U_NORM: return V_028C70_SWAP_STD_REV; + case PIPE_FORMAT_R16G16_UNORM: + return V_028C70_SWAP_STD; + /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: @@ -357,6 +397,15 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_B4G4R4X4_UNORM: return V_028C70_COLOR_4_4_4_4; + case PIPE_FORMAT_Z16_UNORM: + return V_028C70_COLOR_16; + + case PIPE_FORMAT_R8G8_UNORM: + return V_028C70_COLOR_8_8; + + case PIPE_FORMAT_R16_UNORM: + return V_028C70_COLOR_16; + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: case PIPE_FORMAT_A8B8G8R8_UNORM: @@ -383,18 +432,37 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_028C70_COLOR_8_24; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_028C70_COLOR_24_8; + case PIPE_FORMAT_R32_FLOAT: return V_028C70_COLOR_32_FLOAT; + case PIPE_FORMAT_R16G16_FLOAT: + return V_028C70_COLOR_16_16_FLOAT; + + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16_UNORM: + return V_028C70_COLOR_16_16; + /* 64-bit buffers. */ + case PIPE_FORMAT_R16G16B16A16_SSCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: return V_028C70_COLOR_16_16_16_16; + + case PIPE_FORMAT_R16G16B16_FLOAT: case PIPE_FORMAT_R16G16B16A16_FLOAT: return V_028C70_COLOR_16_16_16_16_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: return V_028C70_COLOR_32_32_FLOAT; + case PIPE_FORMAT_R32G32_SSCALED: + return V_028C70_COLOR_32_32; + /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32_FLOAT: return V_028C70_COLOR_32_32_32_FLOAT; @@ -431,4 +499,173 @@ static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format) return r600_translate_colorformat(format) != ~0; } +static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format) +{ + uint32_t result = 0; + const struct util_format_description *desc; + unsigned i; + + desc = util_format_description(format); + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { + goto out_unknown; + } + + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + switch (desc->channel[i].type) { + /* Half-floats, floats, doubles */ + case UTIL_FORMAT_TYPE_FLOAT: + switch (desc->channel[i].size) { + case 16: + switch (desc->nr_channels) { + case 1: + result = FMT_16_FLOAT; + break; + case 2: + result = FMT_16_16_FLOAT; + break; + case 3: + result = FMT_16_16_16_FLOAT; + break; + case 4: + result = FMT_16_16_16_16_FLOAT; + break; + } + break; + case 32: + switch (desc->nr_channels) { + case 1: + result = FMT_32_FLOAT; + break; + case 2: + result = FMT_32_32_FLOAT; + break; + case 3: + result = FMT_32_32_32_FLOAT; + break; + case 4: + result = FMT_32_32_32_32_FLOAT; + break; + } + break; + default: + goto out_unknown; + } + break; + /* Unsigned ints */ + case UTIL_FORMAT_TYPE_UNSIGNED: + /* Signed ints */ + case UTIL_FORMAT_TYPE_SIGNED: + switch (desc->channel[i].size) { + case 8: + switch (desc->nr_channels) { + case 1: + result = FMT_8; + break; + case 2: + result = FMT_8_8; + break; + case 3: +// result = V_038008_FMT_8_8_8; /* fails piglit draw-vertices test */ +// break; + case 4: + result = FMT_8_8_8_8; + break; + } + break; + case 16: + switch (desc->nr_channels) { + case 1: + result = FMT_16; + break; + case 2: + result = FMT_16_16; + break; + case 3: +// result = V_038008_FMT_16_16_16; /* fails piglit draw-vertices test */ +// break; + case 4: + result = FMT_16_16_16_16; + break; + } + break; + case 32: + switch (desc->nr_channels) { + case 1: + result = FMT_32; + break; + case 2: + result = FMT_32_32; + break; + case 3: + result = FMT_32_32_32; + break; + case 4: + result = FMT_32_32_32_32; + break; + } + break; + default: + goto out_unknown; + } + break; + default: + goto out_unknown; + } + + result = S_030008_DATA_FORMAT(result); + + if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { + result |= S_030008_FORMAT_COMP_ALL(1); + } + if (desc->channel[i].normalized) { + result |= S_030008_NUM_FORMAT_ALL(0); + } else { + result |= S_030008_NUM_FORMAT_ALL(2); + } + return result; +out_unknown: + R600_ERR("unsupported vertex format %s\n", util_format_name(format)); + return ~0; +} + +static INLINE uint32_t r600_translate_vertex_data_swizzle(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + unsigned i; + uint32_t word3; + + assert(format); + + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { + fprintf(stderr, "r600: Bad format %s in %s:%d\n", + util_format_short_name(format), __FUNCTION__, __LINE__); + return 0; + } + + word3 = 0; + for (i = 0; i < desc->nr_channels; i++) { + switch (i) { + case 0: + word3 |= S_03000C_DST_SEL_X(desc->swizzle[0]); + break; + case 1: + word3 |= S_03000C_DST_SEL_Y(desc->swizzle[1]); + break; + case 2: + word3 |= S_03000C_DST_SEL_Z(desc->swizzle[2]); + break; + case 3: + word3 |= S_03000C_DST_SEL_W(desc->swizzle[3]); + break; + } + } + return word3; +} + #endif diff --git a/src/gallium/drivers/r600/eg_states_inc.h b/src/gallium/drivers/r600/eg_states_inc.h index 462f31cc798..1379c11291f 100644 --- a/src/gallium/drivers/r600/eg_states_inc.h +++ b/src/gallium/drivers/r600/eg_states_inc.h @@ -150,13 +150,14 @@ #define EG_DSA__DB_DEPTH_CONTROL 7 #define EG_DSA__DB_SHADER_CONTROL 8 #define EG_DSA__DB_RENDER_CONTROL 9 -#define EG_DSA__DB_RENDER_OVERRIDE 10 -#define EG_DSA__DB_RENDER_OVERRIDE2 11 -#define EG_DSA__DB_SRESULTS_COMPARE_STATE0 12 -#define EG_DSA__DB_SRESULTS_COMPARE_STATE1 13 -#define EG_DSA__DB_PRELOAD_CONTROL 14 -#define EG_DSA__DB_ALPHA_TO_MASK 15 -#define EG_DSA_SIZE 16 +#define EG_DSA__DB_COUNT_CONTROL 10 +#define EG_DSA__DB_RENDER_OVERRIDE 11 +#define EG_DSA__DB_RENDER_OVERRIDE2 12 +#define EG_DSA__DB_SRESULTS_COMPARE_STATE0 13 +#define EG_DSA__DB_SRESULTS_COMPARE_STATE1 14 +#define EG_DSA__DB_PRELOAD_CONTROL 15 +#define EG_DSA__DB_ALPHA_TO_MASK 16 +#define EG_DSA_SIZE 17 #define EG_DSA_PM4 128 /* EG_VS_SHADER */ @@ -368,27 +369,30 @@ #define EG_GS_SAMPLER_PM4 128 /* EG_PS_SAMPLER_BORDER */ -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED 0 -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN 1 -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE 2 -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA 3 -#define EG_PS_SAMPLER_BORDER_SIZE 4 +#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_INDEX 0 +#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED 1 +#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN 2 +#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE 3 +#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA 4 +#define EG_PS_SAMPLER_BORDER_SIZE 5 #define EG_PS_SAMPLER_BORDER_PM4 128 /* EG_VS_SAMPLER_BORDER */ -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_RED 0 -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_GREEN 1 -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_BLUE 2 -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_ALPHA 3 -#define EG_VS_SAMPLER_BORDER_SIZE 4 +#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_INDEX 0 +#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_RED 1 +#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_GREEN 2 +#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_BLUE 3 +#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_ALPHA 4 +#define EG_VS_SAMPLER_BORDER_SIZE 5 #define EG_VS_SAMPLER_BORDER_PM4 128 /* EG_GS_SAMPLER_BORDER */ -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_RED 0 -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_GREEN 1 -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_BLUE 2 -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_ALPHA 3 -#define EG_GS_SAMPLER_BORDER_SIZE 4 +#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_INDEX 0 +#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_RED 1 +#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_GREEN 2 +#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_BLUE 3 +#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_ALPHA 4 +#define EG_GS_SAMPLER_BORDER_SIZE 5 #define EG_GS_SAMPLER_BORDER_PM4 128 /* EG_CB */ diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c new file mode 100644 index 00000000000..935496c04af --- /dev/null +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -0,0 +1,1743 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* TODO: + * - fix mask for depth control & cull for query + */ +#include <stdio.h> +#include <errno.h> +#include <pipe/p_defines.h> +#include <pipe/p_state.h> +#include <pipe/p_context.h> +#include <tgsi/tgsi_scan.h> +#include <tgsi/tgsi_parse.h> +#include <tgsi/tgsi_util.h> +#include <util/u_blitter.h> +#include <util/u_double_list.h> +#include <util/u_transfer.h> +#include <util/u_surface.h> +#include <util/u_pack_color.h> +#include <util/u_memory.h> +#include <util/u_inlines.h> +#include <util/u_framebuffer.h> +#include <pipebuffer/pb_buffer.h> +#include "r600.h" +#include "evergreend.h" +#include "r600_resource.h" +#include "r600_shader.h" +#include "r600_pipe.h" +#include "eg_state_inlines.h" + +static void evergreen_set_blend_color(struct pipe_context *ctx, + const struct pipe_blend_color *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + + if (rstate == NULL) + return; + + rstate->id = R600_PIPE_STATE_BLEND_COLOR; + r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL); + + free(rctx->states[R600_PIPE_STATE_BLEND_COLOR]); + rctx->states[R600_PIPE_STATE_BLEND_COLOR] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +static void *evergreen_create_blend_state(struct pipe_context *ctx, + const struct pipe_blend_state *state) +{ + struct r600_pipe_blend *blend = CALLOC_STRUCT(r600_pipe_blend); + struct r600_pipe_state *rstate; + u32 color_control, target_mask; + /* FIXME there is more then 8 framebuffer */ + unsigned blend_cntl[8]; + + if (blend == NULL) { + return NULL; + } + rstate = &blend->rstate; + + rstate->id = R600_PIPE_STATE_BLEND; + + target_mask = 0; + color_control = S_028808_MODE(1); + if (state->logicop_enable) { + color_control |= (state->logicop_func << 16) | (state->logicop_func << 20); + } else { + color_control |= (0xcc << 16); + } + /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */ + if (state->independent_blend_enable) { + for (int i = 0; i < 8; i++) { + target_mask |= (state->rt[i].colormask << (4 * i)); + } + } else { + for (int i = 0; i < 8; i++) { + target_mask |= (state->rt[0].colormask << (4 * i)); + } + } + blend->cb_target_mask = target_mask; + r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, + color_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL); + + for (int i = 0; i < 8; i++) { + unsigned eqRGB = state->rt[i].rgb_func; + unsigned srcRGB = state->rt[i].rgb_src_factor; + unsigned dstRGB = state->rt[i].rgb_dst_factor; + unsigned eqA = state->rt[i].alpha_func; + unsigned srcA = state->rt[i].alpha_src_factor; + unsigned dstA = state->rt[i].alpha_dst_factor; + + blend_cntl[i] = 0; + if (!state->rt[i].blend_enable) + continue; + + blend_cntl[i] |= S_028780_BLEND_CONTROL_ENABLE(1); + blend_cntl[i] |= S_028780_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB)); + blend_cntl[i] |= S_028780_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB)); + blend_cntl[i] |= S_028780_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB)); + + if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { + blend_cntl[i] |= S_028780_SEPARATE_ALPHA_BLEND(1); + blend_cntl[i] |= S_028780_ALPHA_COMB_FCN(r600_translate_blend_function(eqA)); + blend_cntl[i] |= S_028780_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA)); + blend_cntl[i] |= S_028780_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA)); + } + } + for (int i = 0; i < 8; i++) { + r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl[i], 0xFFFFFFFF, NULL); + } + + return rstate; +} + +static void evergreen_bind_blend_state(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_blend *blend = (struct r600_pipe_blend *)state; + struct r600_pipe_state *rstate; + + if (state == NULL) + return; + rstate = &blend->rstate; + rctx->states[rstate->id] = rstate; + rctx->cb_target_mask = blend->cb_target_mask; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +static void *evergreen_create_dsa_state(struct pipe_context *ctx, + const struct pipe_depth_stencil_alpha_state *state) +{ + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control; + unsigned stencil_ref_mask, stencil_ref_mask_bf, db_render_override, db_render_control; + + if (rstate == NULL) { + return NULL; + } + + rstate->id = R600_PIPE_STATE_DSA; + /* depth TODO some of those db_shader_control field depend on shader adjust mask & add it to shader */ + /* db_shader_control is 0xFFFFFFBE as Z_EXPORT_ENABLE (bit 0) will be + * set by fragment shader if it export Z and KILL_ENABLE (bit 6) will + * be set if shader use texkill instruction + */ + db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); + stencil_ref_mask = 0; + stencil_ref_mask_bf = 0; + db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | + S_028800_Z_WRITE_ENABLE(state->depth.writemask) | + S_028800_ZFUNC(state->depth.func); + + /* stencil */ + if (state->stencil[0].enabled) { + db_depth_control |= S_028800_STENCIL_ENABLE(1); + db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)); + db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); + db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); + db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); + + + stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) | + S_028430_STENCILWRITEMASK(state->stencil[0].writemask); + if (state->stencil[1].enabled) { + db_depth_control |= S_028800_BACKFACE_ENABLE(1); + db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)); + db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); + db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); + db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); + stencil_ref_mask_bf = S_028434_STENCILMASK_BF(state->stencil[1].valuemask) | + S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask); + } + } + + /* alpha */ + alpha_test_control = 0; + alpha_ref = 0; + if (state->alpha.enabled) { + alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func); + alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); + alpha_ref = fui(state->alpha.ref_value); + } + + /* misc */ + db_render_control = 0; + db_render_override = S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE) | + S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | + S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE); + /* TODO db_render_override depends on query */ + r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028430_DB_STENCILREFMASK, stencil_ref_mask, + 0xFFFFFFFF & C_028430_STENCILREF, NULL); + r600_pipe_state_add_reg(rstate, + R_028434_DB_STENCILREFMASK_BF, stencil_ref_mask_bf, + 0xFFFFFFFF & C_028434_STENCILREF_BF, NULL); + r600_pipe_state_add_reg(rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL); + r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02800C_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AC8_DB_PRELOAD_CONTROL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL); + + return rstate; +} + +static void *evergreen_create_rs_state(struct pipe_context *ctx, + const struct pipe_rasterizer_state *state) +{ + struct r600_pipe_rasterizer *rs = CALLOC_STRUCT(r600_pipe_rasterizer); + struct r600_pipe_state *rstate; + unsigned tmp; + unsigned prov_vtx = 1, polygon_dual_mode; + unsigned clip_rule; + + if (rs == NULL) { + return NULL; + } + + rstate = &rs->rstate; + rs->flatshade = state->flatshade; + rs->sprite_coord_enable = state->sprite_coord_enable; + + clip_rule = state->scissor ? 0xAAAA : 0xFFFF; + + /* offset */ + rs->offset_units = state->offset_units; + rs->offset_scale = state->offset_scale * 12.0f; + + rstate->id = R600_PIPE_STATE_RASTERIZER; + if (state->flatshade_first) + prov_vtx = 0; + tmp = 0x00000001; + if (state->sprite_coord_enable) { + tmp |= S_0286D4_PNT_SPRITE_ENA(1) | + S_0286D4_PNT_SPRITE_OVRD_X(2) | + S_0286D4_PNT_SPRITE_OVRD_Y(3) | + S_0286D4_PNT_SPRITE_OVRD_Z(0) | + S_0286D4_PNT_SPRITE_OVRD_W(1); + if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { + tmp |= S_0286D4_PNT_SPRITE_TOP_1(1); + } + } + r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL); + + polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL || + state->fill_back != PIPE_POLYGON_MODE_FILL); + r600_pipe_state_add_reg(rstate, R_028814_PA_SU_SC_MODE_CNTL, + S_028814_PROVOKING_VTX_LAST(prov_vtx) | + S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | + S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | + S_028814_FACE(!state->front_ccw) | + S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | + S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | + S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) | + S_028814_POLY_MODE(polygon_dual_mode) | + S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) | + S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02881C_PA_CL_VS_OUT_CNTL, + S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | + S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + /* point size 12.4 fixed point */ + tmp = (unsigned)(state->point_size * 8.0); + r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, 0x00000008, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL, 0x00000005, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL); + return rstate; +} + +static void evergreen_bind_rs_state(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + if (state == NULL) + return; + + rctx->flatshade = rs->flatshade; + rctx->sprite_coord_enable = rs->sprite_coord_enable; + rctx->rasterizer = rs; + + rctx->states[rs->rstate.id] = &rs->rstate; + r600_context_pipe_state_set(&rctx->ctx, &rs->rstate); +} + +static void evergreen_delete_rs_state(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; + + if (rctx->rasterizer == rs) { + rctx->rasterizer = NULL; + } + if (rctx->states[rs->rstate.id] == &rs->rstate) { + rctx->states[rs->rstate.id] = NULL; + } + free(rs); +} + +static void *evergreen_create_sampler_state(struct pipe_context *ctx, + const struct pipe_sampler_state *state) +{ + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + union util_color uc; + + if (rstate == NULL) { + return NULL; + } + + rstate->id = R600_PIPE_STATE_SAMPLER; + util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0, + S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | + S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | + S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) | + S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter)) | + S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter)) | + S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | + S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | + S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL); + /* FIXME LOD it depends on texture base level ... */ + r600_pipe_state_add_reg(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0, + S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | + S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, + S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)) | + S_03C008_TYPE(1), + 0xFFFFFFFF, NULL); + + if (uc.ui) { + r600_pipe_state_add_reg(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL); + } + return rstate; +} + +static void *evergreen_create_vertex_elements(struct pipe_context *ctx, + unsigned count, + const struct pipe_vertex_element *elements) +{ + struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); + + assert(count < 32); + v->count = count; + v->refcount = 1; + memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); + return v; +} + +static void evergreen_sampler_view_destroy(struct pipe_context *ctx, + struct pipe_sampler_view *state) +{ + struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state; + + pipe_resource_reference(&state->texture, NULL); + FREE(resource); +} + +static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_context *ctx, + struct pipe_resource *texture, + const struct pipe_sampler_view *state) +{ + struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view); + struct r600_pipe_state *rstate; + const struct util_format_description *desc; + struct r600_resource_texture *tmp; + struct r600_resource *rbuffer; + unsigned format; + uint32_t word4 = 0, yuv_format = 0, pitch = 0; + unsigned char swizzle[4]; + struct r600_bo *bo[2]; + + if (resource == NULL) + return NULL; + rstate = &resource->state; + + /* initialize base object */ + resource->base = *state; + resource->base.texture = NULL; + pipe_reference(NULL, &texture->reference); + resource->base.texture = texture; + resource->base.reference.count = 1; + resource->base.context = ctx; + + swizzle[0] = state->swizzle_r; + swizzle[1] = state->swizzle_g; + swizzle[2] = state->swizzle_b; + swizzle[3] = state->swizzle_a; + format = r600_translate_texformat(state->format, + swizzle, + &word4, &yuv_format); + if (format == ~0) { + format = 0; + } + desc = util_format_description(state->format); + if (desc == NULL) { + R600_ERR("unknow format %d\n", state->format); + } + tmp = (struct r600_resource_texture*)texture; + rbuffer = &tmp->resource; + bo[0] = rbuffer->bo; + bo[1] = rbuffer->bo; + /* FIXME depth texture decompression */ + if (tmp->depth) { + r600_texture_depth_flush(ctx, texture); + tmp = (struct r600_resource_texture*)texture; + rbuffer = &tmp->flushed_depth_texture->resource; + bo[0] = rbuffer->bo; + bo[1] = rbuffer->bo; + } + pitch = align(tmp->pitch_in_pixels[0], 8); + + /* FIXME properly handle first level != 0 */ + r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, + S_030000_DIM(r600_tex_dim(texture->target)) | + S_030000_PITCH((pitch / 8) - 1) | + S_030000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, + S_030004_TEX_HEIGHT(texture->height0 - 1) | + S_030004_TEX_DEPTH(texture->depth0 - 1), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, + (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, + (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]); + r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, + word4 | S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_NORM) | + S_030010_SRF_MODE_ALL(V_030010_SFR_MODE_NO_ZERO) | + S_030010_BASE_LEVEL(state->first_level), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, + S_030014_LAST_LEVEL(state->last_level) | + S_030014_BASE_ARRAY(0) | + S_030014_LAST_ARRAY(0), 0xffffffff, NULL); + r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, + S_03001C_DATA_FORMAT(format) | + S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE), 0xFFFFFFFF, NULL); + + return &resource->base; +} + +static void evergreen_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, + struct pipe_sampler_view **views) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; + + for (int i = 0; i < count; i++) { + if (resource[i]) { + evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i + PIPE_MAX_ATTRIBS); + } + } +} + +static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, + struct pipe_sampler_view **views) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; + + rctx->ps_samplers.views = resource; + rctx->ps_samplers.n_views = count; + + for (int i = 0; i < count; i++) { + if (resource[i]) { + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + } + } +} + +static void evergreen_bind_state(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; + + if (state == NULL) + return; + rctx->states[rstate->id] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +static void evergreen_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void **states) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; + + rctx->ps_samplers.samplers = states; + rctx->ps_samplers.n_samplers = count; + + for (int i = 0; i < count; i++) { + evergreen_context_pipe_state_set_ps_sampler(&rctx->ctx, rstates[i], i); + } +} + +static void evergreen_bind_vs_sampler(struct pipe_context *ctx, unsigned count, void **states) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; + + for (int i = 0; i < count; i++) { + evergreen_context_pipe_state_set_vs_sampler(&rctx->ctx, rstates[i], i); + } +} + +static void evergreen_delete_state(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; + + if (rctx->states[rstate->id] == rstate) { + rctx->states[rstate->id] = NULL; + } + for (int i = 0; i < rstate->nregs; i++) { + r600_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL); + } + free(rstate); +} + +static void evergreen_delete_vertex_element(struct pipe_context *ctx, void *state) +{ + struct r600_vertex_element *v = (struct r600_vertex_element*)state; + + if (v == NULL) + return; + if (--v->refcount) + return; + free(v); +} + +static void evergreen_set_clip_state(struct pipe_context *ctx, + const struct pipe_clip_state *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + + if (rstate == NULL) + return; + + rctx->clip = *state; + rstate->id = R600_PIPE_STATE_CLIP; + for (int i = 0; i < state->nr; i++) { + r600_pipe_state_add_reg(rstate, + R_0285BC_PA_CL_UCP0_X + i * 4, + fui(state->ucp[i][0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0285C0_PA_CL_UCP0_Y + i * 4, + fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0285C4_PA_CL_UCP0_Z + i * 4, + fui(state->ucp[i][2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0285C8_PA_CL_UCP0_W + i * 4, + fui(state->ucp[i][3]), 0xFFFFFFFF, NULL); + } + r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, + S_028810_PS_UCP_MODE(3) | ((1 << state->nr) - 1) | + S_028810_ZCLIP_NEAR_DISABLE(state->depth_clamp) | + S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL); + + free(rctx->states[R600_PIPE_STATE_CLIP]); + rctx->states[R600_PIPE_STATE_CLIP] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +static void evergreen_bind_vertex_elements(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_vertex_element *v = (struct r600_vertex_element*)state; + + evergreen_delete_vertex_element(ctx, rctx->vertex_elements); + rctx->vertex_elements = v; + if (v) { + v->refcount++; +// rctx->vs_rebuild = TRUE; + } +} + +static void evergreen_set_polygon_stipple(struct pipe_context *ctx, + const struct pipe_poly_stipple *state) +{ +} + +static void evergreen_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) +{ +} + +static void evergreen_set_scissor_state(struct pipe_context *ctx, + const struct pipe_scissor_state *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + u32 tl, br; + + if (rstate == NULL) + return; + + rstate->id = R600_PIPE_STATE_SCISSOR; + tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny); + br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); + r600_pipe_state_add_reg(rstate, + R_028210_PA_SC_CLIPRECT_0_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028214_PA_SC_CLIPRECT_0_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028218_PA_SC_CLIPRECT_1_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_02821C_PA_SC_CLIPRECT_1_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028220_PA_SC_CLIPRECT_2_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028224_PA_SC_CLIPRECT_2_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028228_PA_SC_CLIPRECT_3_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_02822C_PA_SC_CLIPRECT_3_BR, br, + 0xFFFFFFFF, NULL); + + free(rctx->states[R600_PIPE_STATE_SCISSOR]); + rctx->states[R600_PIPE_STATE_SCISSOR] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +static void evergreen_set_stencil_ref(struct pipe_context *ctx, + const struct pipe_stencil_ref *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + u32 tmp; + + if (rstate == NULL) + return; + + rctx->stencil_ref = *state; + rstate->id = R600_PIPE_STATE_STENCIL_REF; + tmp = S_028430_STENCILREF(state->ref_value[0]); + r600_pipe_state_add_reg(rstate, + R_028430_DB_STENCILREFMASK, tmp, + ~C_028430_STENCILREF, NULL); + tmp = S_028434_STENCILREF_BF(state->ref_value[1]); + r600_pipe_state_add_reg(rstate, + R_028434_DB_STENCILREFMASK_BF, tmp, + ~C_028434_STENCILREF_BF, NULL); + + free(rctx->states[R600_PIPE_STATE_STENCIL_REF]); + rctx->states[R600_PIPE_STATE_STENCIL_REF] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +static void evergreen_set_viewport_state(struct pipe_context *ctx, + const struct pipe_viewport_state *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + + if (rstate == NULL) + return; + + rctx->viewport = *state; + rstate->id = R600_PIPE_STATE_VIEWPORT; + r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL); + + free(rctx->states[R600_PIPE_STATE_VIEWPORT]); + rctx->states[R600_PIPE_STATE_VIEWPORT] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate, + const struct pipe_framebuffer_state *state, int cb) +{ + struct r600_resource_texture *rtex; + struct r600_resource *rbuffer; + unsigned level = state->cbufs[cb]->level; + unsigned pitch, slice; + unsigned color_info; + unsigned format, swap, ntype; + const struct util_format_description *desc; + struct r600_bo *bo[3]; + + rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; + rbuffer = &rtex->resource; + bo[0] = rbuffer->bo; + bo[1] = rbuffer->bo; + bo[2] = rbuffer->bo; + + pitch = rtex->pitch_in_pixels[level] / 8 - 1; + slice = rtex->pitch_in_pixels[level] * state->cbufs[cb]->height / 64 - 1; + ntype = 0; + desc = util_format_description(rtex->resource.base.b.format); + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) + ntype = V_028C70_NUMBER_SRGB; + + format = r600_translate_colorformat(rtex->resource.base.b.format); + swap = r600_translate_colorswap(rtex->resource.base.b.format); + color_info = S_028C70_FORMAT(format) | + S_028C70_COMP_SWAP(swap) | + S_028C70_BLEND_CLAMP(1) | + S_028C70_NUMBER_TYPE(ntype); + if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + color_info |= S_028C70_SOURCE_FORMAT(1); + + /* FIXME handle enabling of CB beyond BASE8 which has different offset */ + r600_pipe_state_add_reg(rstate, + R_028C60_CB_COLOR0_BASE + cb * 0x3C, + (state->cbufs[cb]->offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + r600_pipe_state_add_reg(rstate, + R_028C78_CB_COLOR0_DIM + cb * 0x3C, + 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028C70_CB_COLOR0_INFO + cb * 0x3C, + color_info, 0xFFFFFFFF, bo[0]); + r600_pipe_state_add_reg(rstate, + R_028C64_CB_COLOR0_PITCH + cb * 0x3C, + S_028C64_PITCH_TILE_MAX(pitch), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028C68_CB_COLOR0_SLICE + cb * 0x3C, + S_028C68_SLICE_TILE_MAX(slice), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028C6C_CB_COLOR0_VIEW + cb * 0x3C, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C, + S_028C74_NON_DISP_TILING_ORDER(1), + 0xFFFFFFFF, bo[0]); +} + +static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate, + const struct pipe_framebuffer_state *state) +{ + struct r600_resource_texture *rtex; + struct r600_resource *rbuffer; + unsigned level; + unsigned pitch, slice, format, stencil_format; + + if (state->zsbuf == NULL) + return; + + rtex = (struct r600_resource_texture*)state->zsbuf->texture; + rtex->tiled = 1; + rtex->array_mode = 2; + rtex->tile_type = 1; + rtex->depth = 1; + rbuffer = &rtex->resource; + + level = state->zsbuf->level; + pitch = rtex->pitch_in_pixels[level] / 8 - 1; + slice = rtex->pitch_in_pixels[level] * state->zsbuf->height / 64 - 1; + format = r600_translate_dbformat(state->zsbuf->texture->format); + stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format); + + r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE, + (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE, + (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + + if (stencil_format) { + uint32_t stencil_offset; + + stencil_offset = ((state->zsbuf->height * rtex->pitch_in_bytes[level]) + 255) & ~255; + r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE, + (state->zsbuf->offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE, + (state->zsbuf->offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + } + + r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO, + S_028044_FORMAT(stencil_format), 0xFFFFFFFF, rbuffer->bo); + + r600_pipe_state_add_reg(rstate, R_028040_DB_Z_INFO, + S_028040_ARRAY_MODE(rtex->array_mode) | S_028040_FORMAT(format), + 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_028058_DB_DEPTH_SIZE, + S_028058_PITCH_TILE_MAX(pitch), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02805C_DB_DEPTH_SLICE, + S_02805C_SLICE_TILE_MAX(slice), + 0xFFFFFFFF, NULL); +} + +static void evergreen_set_framebuffer_state(struct pipe_context *ctx, + const struct pipe_framebuffer_state *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + u32 shader_mask, tl, br, target_mask; + + if (rstate == NULL) + return; + + /* unreference old buffer and reference new one */ + rstate->id = R600_PIPE_STATE_FRAMEBUFFER; + + util_copy_framebuffer_state(&rctx->framebuffer, state); + + rctx->pframebuffer = &rctx->framebuffer; + + /* build states */ + for (int i = 0; i < state->nr_cbufs; i++) { + evergreen_cb(rctx, rstate, state, i); + } + if (state->zsbuf) { + evergreen_db(rctx, rstate, state); + } + + target_mask = 0x00000000; + target_mask = 0xFFFFFFFF; + shader_mask = 0; + for (int i = 0; i < state->nr_cbufs; i++) { + target_mask ^= 0xf << (i * 4); + shader_mask |= 0xf << (i * 4); + } + tl = S_028240_TL_X(0) | S_028240_TL_Y(0); + br = S_028244_BR_X(state->width) | S_028244_BR_Y(state->height); + + r600_pipe_state_add_reg(rstate, + R_028240_PA_SC_GENERIC_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028244_PA_SC_GENERIC_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028254_PA_SC_VPORT_SCISSOR_0_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028034_PA_SC_SCREEN_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028208_PA_SC_WINDOW_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, + 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028238_CB_TARGET_MASK, + 0x00000000, target_mask, NULL); + r600_pipe_state_add_reg(rstate, R_02823C_CB_SHADER_MASK, + shader_mask, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, + 0x00000000, 0xFFFFFFFF, NULL); + + free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]); + rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +static void evergreen_set_index_buffer(struct pipe_context *ctx, + const struct pipe_index_buffer *ib) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + if (ib) { + pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer); + memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer)); + } else { + pipe_resource_reference(&rctx->index_buffer.buffer, NULL); + memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer)); + } + + /* TODO make this more like a state */ +} + +static void evergreen_set_vertex_buffers(struct pipe_context *ctx, unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + for (int i = 0; i < rctx->nvertex_buffer; i++) { + pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); + } + memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); + for (int i = 0; i < count; i++) { + rctx->vertex_buffer[i].buffer = NULL; + if (r600_buffer_is_user_buffer(buffers[i].buffer)) + rctx->any_user_vbs = TRUE; + pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); + } + rctx->nvertex_buffer = count; +} + +static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, + struct pipe_resource *buffer) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_resource *rbuffer = (struct r600_resource*)buffer; + + switch (shader) { + case PIPE_SHADER_VERTEX: + rctx->vs_const_buffer.nregs = 0; + r600_pipe_state_add_reg(&rctx->vs_const_buffer, + R_028180_ALU_CONST_BUFFER_SIZE_VS_0, + ALIGN_DIVUP(buffer->width0 >> 4, 16), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->vs_const_buffer, + R_028980_ALU_CONST_CACHE_VS_0, + (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); + break; + case PIPE_SHADER_FRAGMENT: + rctx->ps_const_buffer.nregs = 0; + r600_pipe_state_add_reg(&rctx->ps_const_buffer, + R_028140_ALU_CONST_BUFFER_SIZE_PS_0, + ALIGN_DIVUP(buffer->width0 >> 4, 16), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->ps_const_buffer, + R_028940_ALU_CONST_CACHE_PS_0, + (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); + break; + default: + R600_ERR("unsupported %d\n", shader); + return; + } +} + +static void *evergreen_create_shader_state(struct pipe_context *ctx, + const struct pipe_shader_state *state) +{ + struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader); + int r; + + r = r600_pipe_shader_create(ctx, shader, state->tokens); + if (r) { + return NULL; + } + return shader; +} + +static void evergreen_bind_ps_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + /* TODO delete old shader */ + rctx->ps_shader = (struct r600_pipe_shader *)state; +} + +static void evergreen_bind_vs_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + /* TODO delete old shader */ + rctx->vs_shader = (struct r600_pipe_shader *)state; +} + +static void evergreen_delete_ps_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; + + if (rctx->ps_shader == shader) { + rctx->ps_shader = NULL; + } + /* TODO proper delete */ + free(shader); +} + +static void evergreen_delete_vs_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; + + if (rctx->vs_shader == shader) { + rctx->vs_shader = NULL; + } + /* TODO proper delete */ + free(shader); +} + +void evergreen_init_state_functions(struct r600_pipe_context *rctx) +{ + rctx->context.create_blend_state = evergreen_create_blend_state; + rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state; + rctx->context.create_fs_state = evergreen_create_shader_state; + rctx->context.create_rasterizer_state = evergreen_create_rs_state; + rctx->context.create_sampler_state = evergreen_create_sampler_state; + rctx->context.create_sampler_view = evergreen_create_sampler_view; + rctx->context.create_vertex_elements_state = evergreen_create_vertex_elements; + rctx->context.create_vs_state = evergreen_create_shader_state; + rctx->context.bind_blend_state = evergreen_bind_blend_state; + rctx->context.bind_depth_stencil_alpha_state = evergreen_bind_state; + rctx->context.bind_fragment_sampler_states = evergreen_bind_ps_sampler; + rctx->context.bind_fs_state = evergreen_bind_ps_shader; + rctx->context.bind_rasterizer_state = evergreen_bind_rs_state; + rctx->context.bind_vertex_elements_state = evergreen_bind_vertex_elements; + rctx->context.bind_vertex_sampler_states = evergreen_bind_vs_sampler; + rctx->context.bind_vs_state = evergreen_bind_vs_shader; + rctx->context.delete_blend_state = evergreen_delete_state; + rctx->context.delete_depth_stencil_alpha_state = evergreen_delete_state; + rctx->context.delete_fs_state = evergreen_delete_ps_shader; + rctx->context.delete_rasterizer_state = evergreen_delete_rs_state; + rctx->context.delete_sampler_state = evergreen_delete_state; + rctx->context.delete_vertex_elements_state = evergreen_delete_vertex_element; + rctx->context.delete_vs_state = evergreen_delete_vs_shader; + rctx->context.set_blend_color = evergreen_set_blend_color; + rctx->context.set_clip_state = evergreen_set_clip_state; + rctx->context.set_constant_buffer = evergreen_set_constant_buffer; + rctx->context.set_fragment_sampler_views = evergreen_set_ps_sampler_view; + rctx->context.set_framebuffer_state = evergreen_set_framebuffer_state; + rctx->context.set_polygon_stipple = evergreen_set_polygon_stipple; + rctx->context.set_sample_mask = evergreen_set_sample_mask; + rctx->context.set_scissor_state = evergreen_set_scissor_state; + rctx->context.set_stencil_ref = evergreen_set_stencil_ref; + rctx->context.set_vertex_buffers = evergreen_set_vertex_buffers; + rctx->context.set_index_buffer = evergreen_set_index_buffer; + rctx->context.set_vertex_sampler_views = evergreen_set_vs_sampler_view; + rctx->context.set_viewport_state = evergreen_set_viewport_state; + rctx->context.sampler_view_destroy = evergreen_sampler_view_destroy; +} + +void evergreen_init_config(struct r600_pipe_context *rctx) +{ + struct r600_pipe_state *rstate = &rctx->config; + int ps_prio; + int vs_prio; + int gs_prio; + int es_prio; + int hs_prio, cs_prio, ls_prio; + int num_ps_gprs; + int num_vs_gprs; + int num_gs_gprs; + int num_es_gprs; + int num_hs_gprs; + int num_ls_gprs; + int num_temp_gprs; + int num_ps_threads; + int num_vs_threads; + int num_gs_threads; + int num_es_threads; + int num_hs_threads; + int num_ls_threads; + int num_ps_stack_entries; + int num_vs_stack_entries; + int num_gs_stack_entries; + int num_es_stack_entries; + int num_hs_stack_entries; + int num_ls_stack_entries; + enum radeon_family family; + unsigned tmp; + + family = r600_get_family(rctx->radeon); + ps_prio = 0; + vs_prio = 1; + gs_prio = 2; + es_prio = 3; + hs_prio = 0; + ls_prio = 0; + cs_prio = 0; + + switch (family) { + case CHIP_CEDAR: + default: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 96; + num_vs_threads = 16; + num_gs_threads = 16; + num_es_threads = 16; + num_hs_threads = 16; + num_ls_threads = 16; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; + case CHIP_REDWOOD: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; + case CHIP_JUNIPER: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 85; + num_vs_stack_entries = 85; + num_gs_stack_entries = 85; + num_es_stack_entries = 85; + num_hs_stack_entries = 85; + num_ls_stack_entries = 85; + break; + case CHIP_CYPRESS: + case CHIP_HEMLOCK: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 85; + num_vs_stack_entries = 85; + num_gs_stack_entries = 85; + num_es_stack_entries = 85; + num_hs_stack_entries = 85; + num_ls_stack_entries = 85; + break; + } + + tmp = 0x00000000; + switch (family) { + case CHIP_CEDAR: + break; + default: + tmp |= S_008C00_VC_ENABLE(1); + break; + } + tmp |= S_008C00_EXPORT_SRC_C(1); + tmp |= S_008C00_CS_PRIO(cs_prio); + tmp |= S_008C00_LS_PRIO(ls_prio); + tmp |= S_008C00_HS_PRIO(hs_prio); + tmp |= S_008C00_PS_PRIO(ps_prio); + tmp |= S_008C00_VS_PRIO(vs_prio); + tmp |= S_008C00_GS_PRIO(gs_prio); + tmp |= S_008C00_ES_PRIO(es_prio); + r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); + + tmp = 0; + tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); + tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); + tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); + r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + + tmp = 0; + tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs); + tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs); + r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + + tmp = 0; + tmp |= S_008C0C_NUM_HS_GPRS(num_hs_gprs); + tmp |= S_008C0C_NUM_LS_GPRS(num_ls_gprs); + r600_pipe_state_add_reg(rstate, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL); + + tmp = 0; + tmp |= S_008C18_NUM_PS_THREADS(num_ps_threads); + tmp |= S_008C18_NUM_VS_THREADS(num_vs_threads); + tmp |= S_008C18_NUM_GS_THREADS(num_gs_threads); + tmp |= S_008C18_NUM_ES_THREADS(num_es_threads); + r600_pipe_state_add_reg(rstate, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + + tmp = 0; + tmp |= S_008C1C_NUM_HS_THREADS(num_hs_threads); + tmp |= S_008C1C_NUM_LS_THREADS(num_ls_threads); + r600_pipe_state_add_reg(rstate, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + + tmp = 0; + tmp |= S_008C20_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); + tmp |= S_008C20_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); + r600_pipe_state_add_reg(rstate, R_008C20_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + + tmp = 0; + tmp |= S_008C24_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); + tmp |= S_008C24_NUM_ES_STACK_ENTRIES(num_es_stack_entries); + r600_pipe_state_add_reg(rstate, R_008C24_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + + tmp = 0; + tmp |= S_008C28_NUM_HS_STACK_ENTRIES(num_hs_stack_entries); + tmp |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries); + r600_pipe_state_add_reg(rstate, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_009100_SPI_CONFIG_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4), 0xFFFFFFFF, NULL); + +// r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x0, 0xFFFFFFFF, NULL); + +// r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028900_SQ_ESGS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028904_SQ_GSVS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028908_SQ_ESTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02890C_SQ_GSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028910_SQ_VSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028914_SQ_PSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_02891C_SQ_GS_VERT_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028920_SQ_GS_VERT_ITEMSIZE_1, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028924_SQ_GS_VERT_ITEMSIZE_2, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028928_SQ_GS_VERT_ITEMSIZE_3, 0x0, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL); + +r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, + 0x0, 0xFFFFFFFF, NULL); + + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); +void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate; + struct r600_resource *rbuffer; + unsigned i, j, offset, prim; + u32 vgt_dma_index_type, vgt_draw_initiator, mask; + struct pipe_vertex_buffer *vertex_buffer; + struct r600_draw rdraw; + struct r600_pipe_state vgt; + struct r600_drawl draw; + + if (rctx->any_user_vbs) { + r600_upload_user_buffers(rctx); + rctx->any_user_vbs = FALSE; + } + + memset(&draw, 0, sizeof(struct r600_drawl)); + draw.ctx = ctx; + draw.mode = info->mode; + draw.start = info->start; + draw.count = info->count; + if (info->indexed && rctx->index_buffer.buffer) { + draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; + draw.min_index = info->min_index; + draw.max_index = info->max_index; + draw.index_bias = info->index_bias; + + r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer, + &rctx->index_buffer.index_size, + &draw.start, + info->count); + + draw.index_size = rctx->index_buffer.index_size; + pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); + draw.index_buffer_offset = draw.start * draw.index_size; + draw.start = 0; + r600_upload_index_buffer(rctx, &draw); + } else { + draw.index_size = 0; + draw.index_buffer = NULL; + draw.min_index = info->min_index; + draw.max_index = info->max_index; + draw.index_bias = info->start; + } + + switch (draw.index_size) { + case 2: + vgt_draw_initiator = 0; + vgt_dma_index_type = 0; + break; + case 4: + vgt_draw_initiator = 0; + vgt_dma_index_type = 1; + break; + case 0: + vgt_draw_initiator = 2; + vgt_dma_index_type = 0; + break; + default: + R600_ERR("unsupported index size %d\n", draw.index_size); + return; + } + if (r600_conv_pipe_prim(draw.mode, &prim)) + return; + + /* rebuild vertex shader if input format changed */ + if (r600_pipe_shader_update(&rctx->context, rctx->vs_shader)) + return; + if (r600_pipe_shader_update(&rctx->context, rctx->ps_shader)) + return; + + for (i = 0 ; i < rctx->vertex_elements->count; i++) { + uint32_t word3, word2; + uint32_t format; + rstate = &rctx->vs_resource[i]; + + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + + j = rctx->vertex_elements->elements[i].vertex_buffer_index; + vertex_buffer = &rctx->vertex_buffer[j]; + rbuffer = (struct r600_resource*)vertex_buffer->buffer; + offset = rctx->vertex_elements->elements[i].src_offset + + vertex_buffer->buffer_offset + + r600_bo_offset(rbuffer->bo); + + format = r600_translate_vertex_data_type(rctx->vertex_elements->elements[i].src_format); + + word2 = format | S_030008_STRIDE(vertex_buffer->stride); + + word3 = r600_translate_vertex_data_swizzle(rctx->vertex_elements->elements[i].src_format); + + r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, word2, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, word3, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, 0xC0000000, 0xFFFFFFFF, NULL); + evergreen_vs_resource_set(&rctx->ctx, rstate, i); + } + + mask = 0; + for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { + mask |= (0xF << (i * 4)); + } + + vgt.id = R600_PIPE_STATE_VGT; + vgt.nregs = 0; + r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw.index_bias, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw.max_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.min_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL); + + if (rctx->rasterizer && rctx->framebuffer.zsbuf) { + float offset_units = rctx->rasterizer->offset_units; + unsigned offset_db_fmt_cntl = 0, depth; + + switch (rctx->framebuffer.zsbuf->texture->format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + depth = -24; + offset_units *= 2.0f; + break; + case PIPE_FORMAT_Z32_FLOAT: + depth = -23; + offset_units *= 1.0f; + offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); + break; + case PIPE_FORMAT_Z16_UNORM: + depth = -16; + offset_units *= 4.0f; + break; + default: + return; + } + offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(depth); + r600_pipe_state_add_reg(&vgt, + R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, + R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, + fui(offset_units), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, + R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, + R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, + fui(offset_units), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, + R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, + offset_db_fmt_cntl, 0xFFFFFFFF, NULL); + } + r600_context_pipe_state_set(&rctx->ctx, &vgt); + + rdraw.vgt_num_indices = draw.count; + rdraw.vgt_num_instances = 1; + rdraw.vgt_index_type = vgt_dma_index_type; + rdraw.vgt_draw_initiator = vgt_draw_initiator; + rdraw.indices = NULL; + if (draw.index_buffer) { + rbuffer = (struct r600_resource*)draw.index_buffer; + rdraw.indices = rbuffer->bo; + rdraw.indices_bo_offset = draw.index_buffer_offset; + } + evergreen_context_draw(&rctx->ctx, &rdraw); + + pipe_resource_reference(&draw.index_buffer, NULL); +} + +void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = &shader->rstate; + struct r600_shader *rshader = &shader->shader; + unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; + int pos_index = -1, face_index = -1; + int ninterp = 0; + boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE; + unsigned spi_baryc_cntl; + + /* clear previous register */ + rstate->nregs = 0; + + for (i = 0; i < rshader->ninput; i++) { + tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); + /* evergreen NUM_INTERP only contains values interpolated into the LDS, + POSITION goes via GPRs from the SC so isn't counted */ + if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) + pos_index = i; + else if (rshader->input[i].name == TGSI_SEMANTIC_FACE) + face_index = i; + else { + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR || + rshader->input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) + ninterp++; + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) + have_linear = TRUE; + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) + have_perspective = TRUE; + if (rshader->input[i].centroid) + have_centroid = TRUE; + } + if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || + rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || + rshader->input[i].name == TGSI_SEMANTIC_POSITION) { + tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); + } + if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && + rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { + tmp |= S_028644_PT_SPRITE_TEX(1); + } + r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); + } + for (i = 0; i < rshader->noutput; i++) { + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + S_02880C_Z_EXPORT_ENABLE(1), + S_02880C_Z_EXPORT_ENABLE(1), NULL); + if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + S_02880C_STENCIL_EXPORT_ENABLE(1), + S_02880C_STENCIL_EXPORT_ENABLE(1), NULL); + } + + exports_ps = 0; + num_cout = 0; + for (i = 0; i < rshader->noutput; i++) { + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || + rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + exports_ps |= 1; + else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { + num_cout++; + } + } + exports_ps |= S_02884C_EXPORT_COLORS(num_cout); + if (!exports_ps) { + /* always at least export 1 component per pixel */ + exports_ps = 2; + } + + if (ninterp == 0) { + ninterp = 1; + have_perspective = TRUE; + } + + spi_ps_in_control_0 = S_0286CC_NUM_INTERP(ninterp) | + S_0286CC_PERSP_GRADIENT_ENA(have_perspective) | + S_0286CC_LINEAR_GRADIENT_ENA(have_linear); + spi_input_z = 0; + if (pos_index != -1) { + spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1) | + S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | + S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr); + spi_input_z |= 1; + } + + spi_ps_in_control_1 = 0; + if (face_index != -1) { + spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | + S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); + } + + spi_baryc_cntl = 0; + if (have_perspective) + spi_baryc_cntl |= S_0286E0_PERSP_CENTER_ENA(1) | + S_0286E0_PERSP_CENTROID_ENA(have_centroid); + if (have_linear) + spi_baryc_cntl |= S_0286E0_LINEAR_CENTER_ENA(1) | + S_0286E0_LINEAR_CENTROID_ENA(have_centroid); + + r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, + spi_ps_in_control_0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, + spi_ps_in_control_1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286E4_SPI_PS_IN_CONTROL_2, + 0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0286E0_SPI_BARYC_CNTL, + spi_baryc_cntl, + 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, + R_028840_SQ_PGM_START_PS, + (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); + r600_pipe_state_add_reg(rstate, + R_028844_SQ_PGM_RESOURCES_PS, + S_028844_NUM_GPRS(rshader->bc.ngpr) | + S_028844_PRIME_CACHE_ON_DRAW(1) | + S_028844_STACK_SIZE(rshader->bc.nstack), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028848_SQ_PGM_RESOURCES_2_PS, + 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_02884C_SQ_PGM_EXPORTS_PS, + exports_ps, 0xFFFFFFFF, NULL); + + if (rshader->uses_kill) { + /* only set some bits here, the other bits are set in the dsa state */ + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + S_02880C_KILL_ENABLE(1), + S_02880C_KILL_ENABLE(1), NULL); + } + + r600_pipe_state_add_reg(rstate, + R_03A200_SQ_LOOP_CONST_0, 0x01000FFF, + 0xFFFFFFFF, NULL); +} + +void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) +{ + struct r600_pipe_state *rstate = &shader->rstate; + struct r600_shader *rshader = &shader->shader; + unsigned spi_vs_out_id[10]; + unsigned i, tmp; + + /* clear previous register */ + rstate->nregs = 0; + + /* so far never got proper semantic id from tgsi */ + for (i = 0; i < 10; i++) { + spi_vs_out_id[i] = 0; + } + for (i = 0; i < 32; i++) { + tmp = i << ((i & 3) * 8); + spi_vs_out_id[i / 4] |= tmp; + } + for (i = 0; i < 10; i++) { + r600_pipe_state_add_reg(rstate, + R_02861C_SPI_VS_OUT_ID_0 + i * 4, + spi_vs_out_id[i], 0xFFFFFFFF, NULL); + } + + r600_pipe_state_add_reg(rstate, + R_0286C4_SPI_VS_OUT_CONFIG, + S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028860_SQ_PGM_RESOURCES_VS, + S_028860_NUM_GPRS(rshader->bc.ngpr) | + S_028860_STACK_SIZE(rshader->bc.nstack), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028864_SQ_PGM_RESOURCES_2_VS, + 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0288A8_SQ_PGM_RESOURCES_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_02885C_SQ_PGM_START_VS, + (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); + r600_pipe_state_add_reg(rstate, + R_0288A4_SQ_PGM_START_FS, + (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); + + r600_pipe_state_add_reg(rstate, + R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, + 0xFFFFFFFF, NULL); +} + +void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx) +{ + struct pipe_depth_stencil_alpha_state dsa; + struct r600_pipe_state *rstate; + + memset(&dsa, 0, sizeof(dsa)); + + rstate = rctx->context.create_depth_stencil_alpha_state(&rctx->context, &dsa); + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + 0x0, + S_02880C_DUAL_EXPORT_ENABLE(1), NULL); + r600_pipe_state_add_reg(rstate, + R_028000_DB_RENDER_CONTROL, + S_028000_DEPTH_COPY_ENABLE(1) | + S_028000_STENCIL_COPY_ENABLE(1) | + S_028000_COPY_CENTROID(1), + S_028000_DEPTH_COPY_ENABLE(1) | + S_028000_STENCIL_COPY_ENABLE(1) | + S_028000_COPY_CENTROID(1), NULL); + return rstate; +} diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 77cd8f1588b..8e96f9355e6 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -26,6 +26,26 @@ #ifndef EVERGREEND_H #define EVERGREEND_H +/* evergreen values */ +#define EVERGREEN_CONFIG_REG_OFFSET 0X00008000 +#define EVERGREEN_CONFIG_REG_END 0X0000AC00 +#define EVERGREEN_CONTEXT_REG_OFFSET 0X00028000 +#define EVERGREEN_CONTEXT_REG_END 0X00029000 +#define EVERGREEN_RESOURCE_OFFSET 0x00030000 +#define EVERGREEN_RESOURCE_END 0x00034000 +#define EVERGREEN_LOOP_CONST_OFFSET 0x0003A200 +#define EVERGREEN_LOOP_CONST_END 0x0003A26C +#define EVERGREEN_BOOL_CONST_OFFSET 0x0003A500 +#define EVERGREEN_BOOL_CONST_END 0x0003A506 +#define EVERGREEN_SAMPLER_OFFSET 0X0003C000 +#define EVERGREEN_SAMPLER_END 0X0003CFF0 + +#define EVERGREEN_CTL_CONST_OFFSET 0x0003CFF0 +#define EVERGREEN_CTL_CONST_END 0x0003E200 + +#define EVENT_TYPE_ZPASS_DONE 0x15 +#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 + #define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7 #define PKT3_NOP 0x10 @@ -309,7 +329,7 @@ #define S_028C74_NON_DISP_TILING_ORDER(x) (((x) & 0x1) << 4) #define G_028C74_NON_DISP_TILING_ORDER(x) (((x) >> 4) & 0x1) #define C_028C74_NON_DISP_TILING_ORDER 0xFFFFFFEF - + #define R_028C78_CB_COLOR0_DIM 0x028C78 #define S_028C78_WIDTH_MAX(x) (((x) & 0xFFFF) << 0) #define G_028C78_WIDTH_MAX(x) (((x) >> 0) & 0xFFFF) @@ -662,60 +682,13 @@ #define S_028D34_DEPTH_HEIGHT_TILE_MAX(x) (((x) & 0x3FF) << 0) #define G_028D34_DEPTH_HEIGHT_TILE_MAX(x) (((x) >> 0) & 0x3FF) #define C_028D34_DEPTH_HEIGHT_TILE_MAX 0xFFFFFC00 -#define R_028D0C_DB_RENDER_CONTROL 0x028D0C -#define S_028D0C_STENCIL_COMPRESS_DISABLE(x) (((x) & 0x1) << 5) -#define S_028D0C_DEPTH_COMPRESS_DISABLE(x) (((x) & 0x1) << 6) -#define S_028D0C_PERFECT_ZPASS_COUNTS(x) (((x) & 0x1) << 15) -#define R_028D10_DB_RENDER_OVERRIDE 0x028D10 -#define V_028D10_FORCE_OFF 0 -#define V_028D10_FORCE_ENABLE 1 -#define V_028D10_FORCE_DISABLE 2 -#define S_028D10_FORCE_HIZ_ENABLE(x) (((x) & 0x3) << 0) -#define G_028D10_FORCE_HIZ_ENABLE(x) (((x) >> 0) & 0x3) -#define C_028D10_FORCE_HIZ_ENABLE 0xFFFFFFFC -#define S_028D10_FORCE_HIS_ENABLE0(x) (((x) & 0x3) << 2) -#define G_028D10_FORCE_HIS_ENABLE0(x) (((x) >> 2) & 0x3) -#define C_028D10_FORCE_HIS_ENABLE0 0xFFFFFFF3 -#define S_028D10_FORCE_HIS_ENABLE1(x) (((x) & 0x3) << 4) -#define G_028D10_FORCE_HIS_ENABLE1(x) (((x) >> 4) & 0x3) -#define C_028D10_FORCE_HIS_ENABLE1 0xFFFFFFCF -#define S_028D10_FORCE_SHADER_Z_ORDER(x) (((x) & 0x1) << 6) -#define G_028D10_FORCE_SHADER_Z_ORDER(x) (((x) >> 6) & 0x1) -#define C_028D10_FORCE_SHADER_Z_ORDER 0xFFFFFFBF -#define S_028D10_FAST_Z_DISABLE(x) (((x) & 0x1) << 7) -#define G_028D10_FAST_Z_DISABLE(x) (((x) >> 7) & 0x1) -#define C_028D10_FAST_Z_DISABLE 0xFFFFFF7F -#define S_028D10_FAST_STENCIL_DISABLE(x) (((x) & 0x1) << 8) -#define G_028D10_FAST_STENCIL_DISABLE(x) (((x) >> 8) & 0x1) -#define C_028D10_FAST_STENCIL_DISABLE 0xFFFFFEFF -#define S_028D10_NOOP_CULL_DISABLE(x) (((x) & 0x1) << 9) -#define G_028D10_NOOP_CULL_DISABLE(x) (((x) >> 9) & 0x1) -#define C_028D10_NOOP_CULL_DISABLE 0xFFFFFDFF -#define S_028D10_FORCE_COLOR_KILL(x) (((x) & 0x1) << 10) -#define G_028D10_FORCE_COLOR_KILL(x) (((x) >> 10) & 0x1) -#define C_028D10_FORCE_COLOR_KILL 0xFFFFFBFF -#define S_028D10_FORCE_Z_READ(x) (((x) & 0x1) << 11) -#define G_028D10_FORCE_Z_READ(x) (((x) >> 11) & 0x1) -#define C_028D10_FORCE_Z_READ 0xFFFFF7FF -#define S_028D10_FORCE_STENCIL_READ(x) (((x) & 0x1) << 12) -#define G_028D10_FORCE_STENCIL_READ(x) (((x) >> 12) & 0x1) -#define C_028D10_FORCE_STENCIL_READ 0xFFFFEFFF -#define S_028D10_FORCE_FULL_Z_RANGE(x) (((x) & 0x3) << 13) -#define G_028D10_FORCE_FULL_Z_RANGE(x) (((x) >> 13) & 0x3) -#define C_028D10_FORCE_FULL_Z_RANGE 0xFFFF9FFF -#define S_028D10_FORCE_QC_SMASK_CONFLICT(x) (((x) & 0x1) << 15) -#define G_028D10_FORCE_QC_SMASK_CONFLICT(x) (((x) >> 15) & 0x1) -#define C_028D10_FORCE_QC_SMASK_CONFLICT 0xFFFF7FFF -#define S_028D10_DISABLE_VIEWPORT_CLAMP(x) (((x) & 0x1) << 16) -#define G_028D10_DISABLE_VIEWPORT_CLAMP(x) (((x) >> 16) & 0x1) -#define C_028D10_DISABLE_VIEWPORT_CLAMP 0xFFFEFFFF -#define S_028D10_IGNORE_SC_ZRANGE(x) (((x) & 0x1) << 17) -#define G_028D10_IGNORE_SC_ZRANGE(x) (((x) >> 17) & 0x1) -#define C_028D10_IGNORE_SC_ZRANGE 0xFFFDFFFF #define R_02880C_DB_SHADER_CONTROL 0x02880C #define S_02880C_Z_EXPORT_ENABLE(x) (((x) & 0x1) << 0) #define G_02880C_Z_EXPORT_ENABLE(x) (((x) >> 0) & 0x1) #define C_02880C_Z_EXPORT_ENABLE 0xFFFFFFFE +#define S_02880C_STENCIL_EXPORT_ENABLE(x) (((x) & 0x1) << 1) +#define G_02880C_STENCIL_EXPORT_ENABLE(x) (((x) >> 1) & 0x1) +#define C_02880C_STENCIL_EXPORT_ENABLE 0xFFFFFFFD #define S_02880C_Z_ORDER(x) (((x) & 0x3) << 4) #define G_02880C_Z_ORDER(x) (((x) >> 4) & 0x3) #define C_02880C_Z_ORDER 0xFFFFFCFF @@ -729,29 +702,6 @@ #define S_02880C_DUAL_EXPORT_ENABLE(x) (((x) & 0x1) << 9) #define G_02880C_DUAL_EXPORT_ENABLE(x) (((x) >> 9) & 0x1) #define C_02880C_DUAL_EXPORT_ENABLE 0xFFFFFDFF -#define R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL 0x028DF8 -#define S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(x) (((x) & 0xFF) << 0) -#define G_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(x) (((x) >> 0) & 0xFF) -#define C_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS 0xFFFFFF00 -#define S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(x) (((x) & 0x1) << 8) -#define G_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(x) (((x) >> 8) & 0x1) -#define C_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT 0xFFFFFEFF -#define R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE 0x028E00 -#define S_028E00_SCALE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028E00_SCALE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028E00_SCALE 0x00000000 -#define R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET 0x028E04 -#define S_028E04_OFFSET(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028E04_OFFSET(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028E04_OFFSET 0x00000000 -#define R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE 0x028E08 -#define S_028E08_SCALE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028E08_SCALE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028E08_SCALE 0x00000000 -#define R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET 0x028E0C -#define S_028E0C_OFFSET(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028E0C_OFFSET(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028E0C_OFFSET 0x00000000 #define R_028A00_PA_SU_POINT_SIZE 0x028A00 #define S_028A00_HEIGHT(x) (((x) & 0xFFFF) << 0) #define G_028A00_HEIGHT(x) (((x) >> 0) & 0xFFFF) @@ -1037,9 +987,6 @@ #define S_030010_ENDIAN_SWAP(x) (((x) & 0x3) << 12) #define G_030010_ENDIAN_SWAP(x) (((x) >> 12) & 0x3) #define C_030010_ENDIAN_SWAP 0xFFFFCFFF -#define S_030010_REQUEST_SIZE(x) (((x) & 0x3) << 14) -#define G_030010_REQUEST_SIZE(x) (((x) >> 14) & 0x3) -#define C_030010_REQUEST_SIZE 0xFFFF3FFF #define S_030010_DST_SEL_X(x) (((x) & 0x7) << 16) #define G_030010_DST_SEL_X(x) (((x) >> 16) & 0x7) #define C_030010_DST_SEL_X 0xFFF8FFFF @@ -1103,43 +1050,12 @@ #define S_030008_DATA_FORMAT(x) (((x) & 0x3F) << 20) #define G_030008_DATA_FORMAT(x) (((x) >> 20) & 0x3F) #define C_030008_DATA_FORMAT 0xFC0FFFFF -#define V_030008_COLOR_INVALID 0x00000000 -#define V_030008_COLOR_8 0x00000001 -#define V_030008_COLOR_4_4 0x00000002 -#define V_030008_COLOR_3_3_2 0x00000003 -#define V_030008_COLOR_16 0x00000005 -#define V_030008_COLOR_16_FLOAT 0x00000006 -#define V_030008_COLOR_8_8 0x00000007 -#define V_030008_COLOR_5_6_5 0x00000008 -#define V_030008_COLOR_6_5_5 0x00000009 -#define V_030008_COLOR_1_5_5_5 0x0000000A -#define V_030008_COLOR_4_4_4_4 0x0000000B -#define V_030008_COLOR_5_5_5_1 0x0000000C -#define V_030008_COLOR_32 0x0000000D -#define V_030008_COLOR_32_FLOAT 0x0000000E -#define V_030008_COLOR_16_16 0x0000000F -#define V_030008_COLOR_16_16_FLOAT 0x00000010 -#define V_030008_COLOR_8_24 0x00000011 -#define V_030008_COLOR_8_24_FLOAT 0x00000012 -#define V_030008_COLOR_24_8 0x00000013 -#define V_030008_COLOR_24_8_FLOAT 0x00000014 -#define V_030008_COLOR_10_11_11 0x00000015 -#define V_030008_COLOR_10_11_11_FLOAT 0x00000016 -#define V_030008_COLOR_11_11_10 0x00000017 -#define V_030008_COLOR_11_11_10_FLOAT 0x00000018 -#define V_030008_COLOR_2_10_10_10 0x00000019 -#define V_030008_COLOR_8_8_8_8 0x0000001A -#define V_030008_COLOR_10_10_10_2 0x0000001B -#define V_030008_COLOR_X24_8_32_FLOAT 0x0000001C -#define V_030008_COLOR_32_32 0x0000001D -#define V_030008_COLOR_32_32_FLOAT 0x0000001E -#define V_030008_COLOR_16_16_16_16 0x0000001F -#define V_030008_COLOR_16_16_16_16_FLOAT 0x00000020 -#define V_030008_COLOR_32_32_32_32 0x00000022 -#define V_030008_COLOR_32_32_32_32_FLOAT 0x00000023 #define S_030008_NUM_FORMAT_ALL(x) (((x) & 0x3) << 26) #define G_030008_NUM_FORMAT_ALL(x) (((x) >> 26) & 0x3) #define C_030008_NUM_FORMAT_ALL 0xF3FFFFFF +#define V_030008_SQ_NUM_FORMAT_NORM 0x00000000 +#define V_030008_SQ_NUM_FORMAT_INT 0x00000001 +#define V_030008_SQ_NUM_FORMAT_SCALED 0x00000002 #define S_030008_FORMAT_COMP_ALL(x) (((x) & 0x1) << 28) #define G_030008_FORMAT_COMP_ALL(x) (((x) >> 28) & 0x1) #define C_030008_FORMAT_COMP_ALL 0xEFFFFFFF @@ -1166,6 +1082,22 @@ #define S_03000C_DST_SEL_W(x) (((x) & 0x7) << 12) #define G_03000C_DST_SEL_W(x) (((x) >> 12) & 0x7) +#define R_00A400_TD_PS_SAMPLER0_BORDER_INDEX 0x00A400 +#define R_00A404_TD_PS_SAMPLER0_BORDER_RED 0x00A404 +#define R_00A408_TD_PS_SAMPLER0_BORDER_GREEN 0x00A408 +#define R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE 0x00A40C +#define R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA 0x00A410 +#define R_00A414_TD_VS_SAMPLER0_BORDER_INDEX 0x00A414 +#define R_00A418_TD_VS_SAMPLER0_BORDER_RED 0x00A418 +#define R_00A41C_TD_VS_SAMPLER0_BORDER_GREEN 0x00A41C +#define R_00A420_TD_VS_SAMPLER0_BORDER_BLUE 0x00A420 +#define R_00A424_TD_VS_SAMPLER0_BORDER_ALPHA 0x00A424 +#define R_00A428_TD_GS_SAMPLER0_BORDER_INDEX 0x00A428 +#define R_00A42C_TD_GS_SAMPLER0_BORDER_RED 0x00A42C +#define R_00A430_TD_GS_SAMPLER0_BORDER_GREEN 0x00A430 +#define R_00A434_TD_GS_SAMPLER0_BORDER_BLUE 0x00A434 +#define R_00A438_TD_GS_SAMPLER0_BORDER_ALPHA 0x00A438 + #define R_03C000_SQ_TEX_SAMPLER_WORD0_0 0x03C000 #define S_03C000_CLAMP_X(x) (((x) & 0x7) << 0) #define G_03C000_CLAMP_X(x) (((x) >> 0) & 0x7) @@ -1456,4 +1388,488 @@ #define SQ_TEX_INST_SAMPLE 0x10 #define SQ_TEX_INST_SAMPLE_L 0x11 #define SQ_TEX_INST_SAMPLE_C 0x18 + +#define R_008A14_PA_CL_ENHANCE 0x00008A14 +#define R_008C0C_SQ_THREAD_RESOURCE_MGMT 0x00008C0C +#define R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ 0x00008D8C +#define R_028000_DB_RENDER_CONTROL 0x00028000 +#define S_028000_DEPTH_CLEAR_ENABLE(x) (((x) & 0x1) << 0) +#define S_028000_STENCIL_CLEAR_ENABLE(x) (((x) & 0x1) << 1) +#define S_028000_DEPTH_COPY_ENABLE(x) (((x) & 0x1) << 2) +#define S_028000_STENCIL_COPY_ENABLE(x) (((x) & 0x1) << 3) +#define S_028000_RESUMMARIZE_ENABLE(x) (((x) & 0x1) << 4) +#define S_028000_STENCIL_COMPRESS_DISABLE(x) (((x) & 0x1) << 5) +#define S_028000_DEPTH_COMPRESS_DISABLE(x) (((x) & 0x1) << 6) +#define S_028000_COPY_CENTROID(x) (((x) & 0x1) << 7) +#define S_028000_COPY_SAMPLE(x) (((x) & 0x7) << 8) +#define S_028000_COLOR_DISABLE(x) (((x) & 0x1) << 12) +#define R_028004_DB_COUNT_CONTROL 0x00028004 +#define S_028004_ZPASS_INCREMENT_DISABLE (((x) & 0x1) << 0) +#define S_028004_PERFECT_ZPASS_COUNTS(x) (((x) & 0x1) << 1) +#define R_028008_DB_DEPTH_VIEW 0x00028008 +#define R_02800C_DB_RENDER_OVERRIDE 0x0002800C +#define V_02800C_FORCE_OFF 0 +#define V_02800C_FORCE_ENABLE 1 +#define V_02800C_FORCE_DISABLE 2 +#define S_02800C_FORCE_HIZ_ENABLE(x) (((x) & 0x3) << 0) +#define G_02800C_FORCE_HIZ_ENABLE(x) (((x) >> 0) & 0x3) +#define C_02800C_FORCE_HIZ_ENABLE 0xFFFFFFFC +#define S_02800C_FORCE_HIS_ENABLE0(x) (((x) & 0x3) << 2) +#define G_02800C_FORCE_HIS_ENABLE0(x) (((x) >> 2) & 0x3) +#define C_02800C_FORCE_HIS_ENABLE0 0xFFFFFFF3 +#define S_02800C_FORCE_HIS_ENABLE1(x) (((x) & 0x3) << 4) +#define G_02800C_FORCE_HIS_ENABLE1(x) (((x) >> 4) & 0x3) +#define C_02800C_FORCE_HIS_ENABLE1 0xFFFFFFCF +#define S_02800C_FORCE_SHADER_Z_ORDER(x) (((x) & 0x1) << 6) +#define G_02800C_FORCE_SHADER_Z_ORDER(x) (((x) >> 6) & 0x1) +#define C_02800C_FORCE_SHADER_Z_ORDER 0xFFFFFFBF +#define S_02800C_FAST_Z_DISABLE(x) (((x) & 0x1) << 7) +#define G_02800C_FAST_Z_DISABLE(x) (((x) >> 7) & 0x1) +#define C_02800C_FAST_Z_DISABLE 0xFFFFFF7F +#define S_02800C_FAST_STENCIL_DISABLE(x) (((x) & 0x1) << 8) +#define G_02800C_FAST_STENCIL_DISABLE(x) (((x) >> 8) & 0x1) +#define C_02800C_FAST_STENCIL_DISABLE 0xFFFFFEFF +#define S_02800C_NOOP_CULL_DISABLE(x) (((x) & 0x1) << 9) +#define G_02800C_NOOP_CULL_DISABLE(x) (((x) >> 9) & 0x1) +#define C_02800C_NOOP_CULL_DISABLE 0xFFFFFDFF +#define S_02800C_FORCE_COLOR_KILL(x) (((x) & 0x1) << 10) +#define G_02800C_FORCE_COLOR_KILL(x) (((x) >> 10) & 0x1) +#define C_02800C_FORCE_COLOR_KILL 0xFFFFFBFF +#define S_02800C_FORCE_Z_READ(x) (((x) & 0x1) << 11) +#define G_02800C_FORCE_Z_READ(x) (((x) >> 11) & 0x1) +#define C_02800C_FORCE_Z_READ 0xFFFFF7FF +#define S_02800C_FORCE_STENCIL_READ(x) (((x) & 0x1) << 12) +#define G_02800C_FORCE_STENCIL_READ(x) (((x) >> 12) & 0x1) +#define C_02800C_FORCE_STENCIL_READ 0xFFFFEFFF +#define S_02800C_FORCE_FULL_Z_RANGE(x) (((x) & 0x3) << 13) +#define G_02800C_FORCE_FULL_Z_RANGE(x) (((x) >> 13) & 0x3) +#define C_02800C_FORCE_FULL_Z_RANGE 0xFFFF9FFF +#define S_02800C_FORCE_QC_SMASK_CONFLICT(x) (((x) & 0x1) << 15) +#define G_02800C_FORCE_QC_SMASK_CONFLICT(x) (((x) >> 15) & 0x1) +#define C_02800C_FORCE_QC_SMASK_CONFLICT 0xFFFF7FFF +#define S_02800C_DISABLE_VIEWPORT_CLAMP(x) (((x) & 0x1) << 16) +#define G_02800C_DISABLE_VIEWPORT_CLAMP(x) (((x) >> 16) & 0x1) +#define C_02800C_DISABLE_VIEWPORT_CLAMP 0xFFFEFFFF +#define S_02800C_IGNORE_SC_ZRANGE(x) (((x) & 0x1) << 17) +#define G_02800C_IGNORE_SC_ZRANGE(x) (((x) >> 17) & 0x1) +#define C_02800C_IGNORE_SC_ZRANGE 0xFFFDFFFF +#define R_028010_DB_RENDER_OVERRIDE2 0x00028010 +#define R_028014_DB_HTILE_DATA_BASE 0x00028014 +#define R_028028_DB_STENCIL_CLEAR 0x00028028 +#define R_02802C_DB_DEPTH_CLEAR 0x0002802C +#define R_028048_DB_Z_READ_BASE 0x00028048 +#define R_02804C_DB_STENCIL_READ_BASE 0x0002804C +#define R_028050_DB_Z_WRITE_BASE 0x00028050 +#define R_028054_DB_STENCIL_WRITE_BASE 0x00028054 +#define R_028140_ALU_CONST_BUFFER_SIZE_PS_0 0x00028140 +#define R_028180_ALU_CONST_BUFFER_SIZE_VS_0 0x00028180 +#define R_028200_PA_SC_WINDOW_OFFSET 0x00028200 +#define R_02820C_PA_SC_CLIPRECT_RULE 0x0002820C +#define R_028210_PA_SC_CLIPRECT_0_TL 0x00028210 +#define R_028214_PA_SC_CLIPRECT_0_BR 0x00028214 +#define R_028218_PA_SC_CLIPRECT_1_TL 0x00028218 +#define R_02821C_PA_SC_CLIPRECT_1_BR 0x0002821C +#define R_028220_PA_SC_CLIPRECT_2_TL 0x00028220 +#define R_028224_PA_SC_CLIPRECT_2_BR 0x00028224 +#define R_028228_PA_SC_CLIPRECT_3_TL 0x00028228 +#define R_02822C_PA_SC_CLIPRECT_3_BR 0x0002822C +#define R_028230_PA_SC_EDGERULE 0x00028230 +#define R_028234_PA_SU_HARDWARE_SCREEN_OFFSET 0x00028234 +#define R_028238_CB_TARGET_MASK 0x00028238 +#define R_02823C_CB_SHADER_MASK 0x0002823C +#define R_028250_PA_SC_VPORT_SCISSOR_0_TL 0x00028250 +#define R_028254_PA_SC_VPORT_SCISSOR_0_BR 0x00028254 +#define R_028350_SX_MISC 0x00028350 +#define R_028380_SQ_VTX_SEMANTIC_0 0x00028380 +#define R_028384_SQ_VTX_SEMANTIC_1 0x00028384 +#define R_028388_SQ_VTX_SEMANTIC_2 0x00028388 +#define R_02838C_SQ_VTX_SEMANTIC_3 0x0002838C +#define R_028390_SQ_VTX_SEMANTIC_4 0x00028390 +#define R_028394_SQ_VTX_SEMANTIC_5 0x00028394 +#define R_028398_SQ_VTX_SEMANTIC_6 0x00028398 +#define R_02839C_SQ_VTX_SEMANTIC_7 0x0002839C +#define R_0283A0_SQ_VTX_SEMANTIC_8 0x000283A0 +#define R_0283A4_SQ_VTX_SEMANTIC_9 0x000283A4 +#define R_0283A8_SQ_VTX_SEMANTIC_10 0x000283A8 +#define R_0283AC_SQ_VTX_SEMANTIC_11 0x000283AC +#define R_0283B0_SQ_VTX_SEMANTIC_12 0x000283B0 +#define R_0283B4_SQ_VTX_SEMANTIC_13 0x000283B4 +#define R_0283B8_SQ_VTX_SEMANTIC_14 0x000283B8 +#define R_0283BC_SQ_VTX_SEMANTIC_15 0x000283BC +#define R_0283C0_SQ_VTX_SEMANTIC_16 0x000283C0 +#define R_0283C4_SQ_VTX_SEMANTIC_17 0x000283C4 +#define R_0283C8_SQ_VTX_SEMANTIC_18 0x000283C8 +#define R_0283CC_SQ_VTX_SEMANTIC_19 0x000283CC +#define R_0283D0_SQ_VTX_SEMANTIC_20 0x000283D0 +#define R_0283D4_SQ_VTX_SEMANTIC_21 0x000283D4 +#define R_0283D8_SQ_VTX_SEMANTIC_22 0x000283D8 +#define R_0283DC_SQ_VTX_SEMANTIC_23 0x000283DC +#define R_0283E0_SQ_VTX_SEMANTIC_24 0x000283E0 +#define R_0283E4_SQ_VTX_SEMANTIC_25 0x000283E4 +#define R_0283E8_SQ_VTX_SEMANTIC_26 0x000283E8 +#define R_0283EC_SQ_VTX_SEMANTIC_27 0x000283EC +#define R_0283F0_SQ_VTX_SEMANTIC_28 0x000283F0 +#define R_0283F4_SQ_VTX_SEMANTIC_29 0x000283F4 +#define R_0283F8_SQ_VTX_SEMANTIC_30 0x000283F8 +#define R_0283FC_SQ_VTX_SEMANTIC_31 0x000283FC +#define R_0282D0_PA_SC_VPORT_ZMIN_0 0x000282D0 +#define R_0282D4_PA_SC_VPORT_ZMAX_0 0x000282D4 +#define R_028400_VGT_MAX_VTX_INDX 0x00028400 +#define R_028404_VGT_MIN_VTX_INDX 0x00028404 +#define R_028408_VGT_INDX_OFFSET 0x00028408 +#define R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX 0x0002840C +#define R_028414_CB_BLEND_RED 0x00028414 +#define R_028418_CB_BLEND_GREEN 0x00028418 +#define R_02841C_CB_BLEND_BLUE 0x0002841C +#define R_028420_CB_BLEND_ALPHA 0x00028420 +#define R_028438_SX_ALPHA_REF 0x00028438 +#define R_02843C_PA_CL_VPORT_XSCALE_0 0x0002843C +#define R_028440_PA_CL_VPORT_XOFFSET_0 0x00028440 +#define R_028444_PA_CL_VPORT_YSCALE_0 0x00028444 +#define R_028448_PA_CL_VPORT_YOFFSET_0 0x00028448 +#define R_02844C_PA_CL_VPORT_ZSCALE_0 0x0002844C +#define R_028450_PA_CL_VPORT_ZOFFSET_0 0x00028450 +#define R_0285BC_PA_CL_UCP0_X 0x000285BC +#define R_0285C0_PA_CL_UCP0_Y 0x000285C0 +#define R_0285C4_PA_CL_UCP0_Z 0x000285C4 +#define R_0285C8_PA_CL_UCP0_W 0x000285C8 +#define R_0285CC_PA_CL_UCP1_X 0x000285CC +#define R_0285D0_PA_CL_UCP1_Y 0x000285D0 +#define R_0285D4_PA_CL_UCP1_Z 0x000285D4 +#define R_0285D8_PA_CL_UCP1_W 0x000285D8 +#define R_0285DC_PA_CL_UCP2_X 0x000285DC +#define R_0285E0_PA_CL_UCP2_Y 0x000285E0 +#define R_0285E4_PA_CL_UCP2_Z 0x000285E4 +#define R_0285E8_PA_CL_UCP2_W 0x000285E8 +#define R_0285EC_PA_CL_UCP3_X 0x000285EC +#define R_0285F0_PA_CL_UCP3_Y 0x000285F0 +#define R_0285F4_PA_CL_UCP3_Z 0x000285F4 +#define R_0285F8_PA_CL_UCP3_W 0x000285F8 +#define R_0285FC_PA_CL_UCP4_X 0x000285FC +#define R_028600_PA_CL_UCP4_Y 0x00028600 +#define R_028604_PA_CL_UCP4_Z 0x00028604 +#define R_028608_PA_CL_UCP4_W 0x00028608 +#define R_02860C_PA_CL_UCP5_X 0x0002860C +#define R_028610_PA_CL_UCP5_Y 0x00028610 +#define R_028614_PA_CL_UCP5_Z 0x00028614 +#define R_028618_PA_CL_UCP5_W 0x00028618 +#define R_02861C_SPI_VS_OUT_ID_0 0x0002861C +#define R_028620_SPI_VS_OUT_ID_1 0x00028620 +#define R_028624_SPI_VS_OUT_ID_2 0x00028624 +#define R_028628_SPI_VS_OUT_ID_3 0x00028628 +#define R_02862C_SPI_VS_OUT_ID_4 0x0002862C +#define R_028630_SPI_VS_OUT_ID_5 0x00028630 +#define R_028634_SPI_VS_OUT_ID_6 0x00028634 +#define R_028638_SPI_VS_OUT_ID_7 0x00028638 +#define R_02863C_SPI_VS_OUT_ID_8 0x0002863C +#define R_028640_SPI_VS_OUT_ID_9 0x00028640 +#define R_028648_SPI_PS_INPUT_CNTL_1 0x00028648 +#define R_02864C_SPI_PS_INPUT_CNTL_2 0x0002864C +#define R_028650_SPI_PS_INPUT_CNTL_3 0x00028650 +#define R_028654_SPI_PS_INPUT_CNTL_4 0x00028654 +#define R_028658_SPI_PS_INPUT_CNTL_5 0x00028658 +#define R_02865C_SPI_PS_INPUT_CNTL_6 0x0002865C +#define R_028660_SPI_PS_INPUT_CNTL_7 0x00028660 +#define R_028664_SPI_PS_INPUT_CNTL_8 0x00028664 +#define R_028668_SPI_PS_INPUT_CNTL_9 0x00028668 +#define R_02866C_SPI_PS_INPUT_CNTL_10 0x0002866C +#define R_028670_SPI_PS_INPUT_CNTL_11 0x00028670 +#define R_028674_SPI_PS_INPUT_CNTL_12 0x00028674 +#define R_028678_SPI_PS_INPUT_CNTL_13 0x00028678 +#define R_02867C_SPI_PS_INPUT_CNTL_14 0x0002867C +#define R_028680_SPI_PS_INPUT_CNTL_15 0x00028680 +#define R_028684_SPI_PS_INPUT_CNTL_16 0x00028684 +#define R_028688_SPI_PS_INPUT_CNTL_17 0x00028688 +#define R_02868C_SPI_PS_INPUT_CNTL_18 0x0002868C +#define R_028690_SPI_PS_INPUT_CNTL_19 0x00028690 +#define R_028694_SPI_PS_INPUT_CNTL_20 0x00028694 +#define R_028698_SPI_PS_INPUT_CNTL_21 0x00028698 +#define R_02869C_SPI_PS_INPUT_CNTL_22 0x0002869C +#define R_0286A0_SPI_PS_INPUT_CNTL_23 0x000286A0 +#define R_0286A4_SPI_PS_INPUT_CNTL_24 0x000286A4 +#define R_0286A8_SPI_PS_INPUT_CNTL_25 0x000286A8 +#define R_0286AC_SPI_PS_INPUT_CNTL_26 0x000286AC +#define R_0286B0_SPI_PS_INPUT_CNTL_27 0x000286B0 +#define R_0286B4_SPI_PS_INPUT_CNTL_28 0x000286B4 +#define R_0286B8_SPI_PS_INPUT_CNTL_29 0x000286B8 +#define R_0286BC_SPI_PS_INPUT_CNTL_30 0x000286BC +#define R_0286C0_SPI_PS_INPUT_CNTL_31 0x000286C0 +#define R_0286C8_SPI_THREAD_GROUPING 0x000286C8 +#define R_0286D8_SPI_INPUT_Z 0x000286D8 +#define R_0286DC_SPI_FOG_CNTL 0x000286DC +#define R_0286E4_SPI_PS_IN_CONTROL_2 0x000286E4 +#define R_0286E8_SPI_COMPUTE_INPUT_CNTL 0x000286E8 +#define R_028780_CB_BLEND0_CONTROL 0x00028780 +#define R_028784_CB_BLEND1_CONTROL 0x00028784 +#define R_028788_CB_BLEND2_CONTROL 0x00028788 +#define R_02878C_CB_BLEND3_CONTROL 0x0002878C +#define R_028790_CB_BLEND4_CONTROL 0x00028790 +#define R_028794_CB_BLEND5_CONTROL 0x00028794 +#define R_028798_CB_BLEND6_CONTROL 0x00028798 +#define R_02879C_CB_BLEND7_CONTROL 0x0002879C +#define R_028818_PA_CL_VTE_CNTL 0x00028818 +#define R_028820_PA_CL_NANINF_CNTL 0x00028820 +#define R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1 0x00028838 +#define R_028840_SQ_PGM_START_PS 0x00028840 +#define R_02884C_SQ_PGM_EXPORTS_PS 0x0002884C +#define S_02884C_EXPORT_COLORS(x) (((x) & 0xF) << 1) +#define G_02884C_EXPORT_COLORS(x) (((x) >> 1) & 0xF) +#define C_02884C_EXPORT_COLORS 0xFFFFFFE1 +#define S_02884C_EXPORT_Z(x) (((x) & 0x1) << 0) +#define G_02884C_EXPORT_Z(x) (((x) >> 0) & 0x1) +#define C_02884C_EXPORT_Z 0xFFFFFFFE +#define R_02885C_SQ_PGM_START_VS 0x0002885C +#define R_0288A4_SQ_PGM_START_FS 0x000288A4 +#define R_0288A8_SQ_PGM_RESOURCES_FS 0x000288A8 +#define R_0288EC_SQ_LDS_ALLOC_PS 0x000288EC +#define R_028900_SQ_ESGS_RING_ITEMSIZE 0x00028900 +#define R_028904_SQ_GSVS_RING_ITEMSIZE 0x00028904 +#define R_028908_SQ_ESTMP_RING_ITEMSIZE 0x00028908 +#define R_02890C_SQ_GSTMP_RING_ITEMSIZE 0x0002890C +#define R_028910_SQ_VSTMP_RING_ITEMSIZE 0x00028910 +#define R_028914_SQ_PSTMP_RING_ITEMSIZE 0x00028914 +#define R_02891C_SQ_GS_VERT_ITEMSIZE 0x0002891C +#define R_028920_SQ_GS_VERT_ITEMSIZE_1 0x00028920 +#define R_028924_SQ_GS_VERT_ITEMSIZE_2 0x00028924 +#define R_028928_SQ_GS_VERT_ITEMSIZE_3 0x00028928 +#define R_028940_ALU_CONST_CACHE_PS_0 0x00028940 +#define R_028980_ALU_CONST_CACHE_VS_0 0x00028980 +#define R_028A04_PA_SU_POINT_MINMAX 0x00028A04 +#define R_028A08_PA_SU_LINE_CNTL 0x00028A08 +#define R_028A10_VGT_OUTPUT_PATH_CNTL 0x00028A10 +#define R_028A14_VGT_HOS_CNTL 0x00028A14 +#define R_028A18_VGT_HOS_MAX_TESS_LEVEL 0x00028A18 +#define R_028A1C_VGT_HOS_MIN_TESS_LEVEL 0x00028A1C +#define R_028A20_VGT_HOS_REUSE_DEPTH 0x00028A20 +#define R_028A24_VGT_GROUP_PRIM_TYPE 0x00028A24 +#define R_028A28_VGT_GROUP_FIRST_DECR 0x00028A28 +#define R_028A2C_VGT_GROUP_DECR 0x00028A2C +#define R_028A30_VGT_GROUP_VECT_0_CNTL 0x00028A30 +#define R_028A34_VGT_GROUP_VECT_1_CNTL 0x00028A34 +#define R_028A38_VGT_GROUP_VECT_0_FMT_CNTL 0x00028A38 +#define R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL 0x00028A3C +#define R_028A48_PA_SC_MODE_CNTL_0 0x00028A48 +#define R_028A4C_PA_SC_MODE_CNTL_1 0x00028A4C +#define R_028AB4_VGT_REUSE_OFF 0x00028AB4 +#define R_028AB8_VGT_VTX_CNT_EN 0x00028AB8 +#define R_028ABC_DB_HTILE_SURFACE 0x00028ABC +#define R_028AC0_DB_SRESULTS_COMPARE_STATE0 0x00028AC0 +#define R_028AC4_DB_SRESULTS_COMPARE_STATE1 0x00028AC4 +#define R_028AC8_DB_PRELOAD_CONTROL 0x00028AC8 +#define R_028B54_VGT_SHADER_STAGES_EN 0x00028B54 +#define R_028B70_DB_ALPHA_TO_MASK 0x00028B70 +#define R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL 0x00028B78 +#define S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(x) (((x) & 0xFF) << 0) +#define G_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(x) (((x) >> 0) & 0xFF) +#define C_028B78_POLY_OFFSET_NEG_NUM_DB_BITS 0xFFFFFF00 +#define S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(x) (((x) & 0x1) << 8) +#define G_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(x) (((x) >> 8) & 0x1) +#define C_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT 0xFFFFFEFF +#define R_028B7C_PA_SU_POLY_OFFSET_CLAMP 0x00028B7C +#define R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE 0x00028B80 +#define S_028B80_SCALE(x) (((x) & 0xFFFFFFFF) << 0) +#define G_028B80_SCALE(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_028B80_SCALE 0x00000000 +#define R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET 0x00028B84 +#define S_028B84_OFFSET(x) (((x) & 0xFFFFFFFF) << 0) +#define G_028B84_OFFSET(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_028B84_OFFSET 0x00000000 +#define R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE 0x00028B88 +#define S_028B88_SCALE(x) (((x) & 0xFFFFFFFF) << 0) +#define G_028B88_SCALE(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_028B88_SCALE 0x00000000 +#define R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET 0x00028B8C +#define S_028B8C_OFFSET(x) (((x) & 0xFFFFFFFF) << 0) +#define G_028B8C_OFFSET(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_028B8C_OFFSET 0x00000000 +#define R_028B94_VGT_STRMOUT_CONFIG 0x00028B94 +#define R_028B98_VGT_STRMOUT_BUFFER_CONFIG 0x00028B98 +#define R_028C00_PA_SC_LINE_CNTL 0x00028C00 +#define R_028C04_PA_SC_AA_CONFIG 0x00028C04 +#define R_028C08_PA_SU_VTX_CNTL 0x00028C08 +#define R_028C0C_PA_CL_GB_VERT_CLIP_ADJ 0x00028C0C +#define R_028C10_PA_CL_GB_VERT_DISC_ADJ 0x00028C10 +#define R_028C14_PA_CL_GB_HORZ_CLIP_ADJ 0x00028C14 +#define R_028C18_PA_CL_GB_HORZ_DISC_ADJ 0x00028C18 +#define R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX 0x00028C1C +#define R_028C3C_PA_SC_AA_MASK 0x00028C3C +#define R_028C60_CB_COLOR0_BASE 0x00028C60 +#define R_028C6C_CB_COLOR0_VIEW 0x00028C6C +#define R_028C9C_CB_COLOR1_BASE 0x00028C9C +#define R_028CA0_CB_COLOR1_PITCH 0x00028CA0 +#define R_028CA4_CB_COLOR1_SLICE 0x00028CA4 +#define R_028CA8_CB_COLOR1_VIEW 0x00028CA8 +#define R_028CAC_CB_COLOR1_INFO 0x00028CAC +#define R_028CB0_CB_COLOR1_ATTRIB 0x00028CB0 +#define R_028CB4_CB_COLOR1_DIM 0x00028CB4 +#define R_028CD8_CB_COLOR2_BASE 0x00028CD8 +#define R_028CDC_CB_COLOR2_PITCH 0x00028CDC +#define R_028CE0_CB_COLOR2_SLICE 0x00028CE0 +#define R_028CE4_CB_COLOR2_VIEW 0x00028CE4 +#define R_028CE8_CB_COLOR2_INFO 0x00028CE8 +#define R_028CEC_CB_COLOR2_ATTRIB 0x00028CEC +#define R_028CF0_CB_COLOR2_DIM 0x00028CF0 +#define R_028D14_CB_COLOR3_BASE 0x00028D14 +#define R_028D18_CB_COLOR3_PITCH 0x00028D18 +#define R_028D1C_CB_COLOR3_SLICE 0x00028D1C +#define R_028D20_CB_COLOR3_VIEW 0x00028D20 +#define R_028D24_CB_COLOR3_INFO 0x00028D24 +#define R_028D28_CB_COLOR3_ATTRIB 0x00028D28 +#define R_028D2C_CB_COLOR3_DIM 0x00028D2C +#define R_028D50_CB_COLOR4_BASE 0x00028D50 +#define R_028D54_CB_COLOR4_PITCH 0x00028D54 +#define R_028D58_CB_COLOR4_SLICE 0x00028D58 +#define R_028D5C_CB_COLOR4_VIEW 0x00028D5C +#define R_028D60_CB_COLOR4_INFO 0x00028D60 +#define R_028D64_CB_COLOR4_ATTRIB 0x00028D64 +#define R_028D68_CB_COLOR4_DIM 0x00028D68 +#define R_028D8C_CB_COLOR5_BASE 0x00028D8C +#define R_028D90_CB_COLOR5_PITCH 0x00028D90 +#define R_028D94_CB_COLOR5_SLICE 0x00028D94 +#define R_028D98_CB_COLOR5_VIEW 0x00028D98 +#define R_028D9C_CB_COLOR5_INFO 0x00028D9C +#define R_028DA0_CB_COLOR5_ATTRIB 0x00028DA0 +#define R_028DA4_CB_COLOR5_DIM 0x00028DA4 +#define R_028DC8_CB_COLOR6_BASE 0x00028DC8 +#define R_028DCC_CB_COLOR6_PITCH 0x00028DCC +#define R_028DD0_CB_COLOR6_SLICE 0x00028DD0 +#define R_028DD4_CB_COLOR6_VIEW 0x00028DD4 +#define R_028DD8_CB_COLOR6_INFO 0x00028DD8 +#define R_028DDC_CB_COLOR6_ATTRIB 0x00028DDC +#define R_028DE0_CB_COLOR6_DIM 0x00028DE0 +#define R_028E04_CB_COLOR7_BASE 0x00028E04 +#define R_028E08_CB_COLOR7_PITCH 0x00028E08 +#define R_028E0C_CB_COLOR7_SLICE 0x00028E0C +#define R_028E10_CB_COLOR7_VIEW 0x00028E10 +#define R_028E14_CB_COLOR7_INFO 0x00028E14 +#define R_028E18_CB_COLOR7_ATTRIB 0x00028E18 +#define R_028E1C_CB_COLOR7_DIM 0x00028E1C +#define R_028E40_CB_COLOR8_BASE 0x00028E40 +#define R_028E44_CB_COLOR8_PITCH 0x00028E44 +#define R_028E48_CB_COLOR8_SLICE 0x00028E48 +#define R_028E4C_CB_COLOR8_VIEW 0x00028E4C +#define R_028E50_CB_COLOR8_INFO 0x00028E50 +#define R_028E54_CB_COLOR8_ATTRIB 0x00028E54 +#define R_028E58_CB_COLOR8_DIM 0x00028E58 +#define R_028E5C_CB_COLOR9_BASE 0x00028E5C +#define R_028E60_CB_COLOR9_PITCH 0x00028E60 +#define R_028E64_CB_COLOR9_SLICE 0x00028E64 +#define R_028E68_CB_COLOR9_VIEW 0x00028E68 +#define R_028E6C_CB_COLOR9_INFO 0x00028E6C +#define R_028E70_CB_COLOR9_ATTRIB 0x00028E70 +#define R_028E74_CB_COLOR9_DIM 0x00028E74 +#define R_028E78_CB_COLOR10_BASE 0x00028E78 +#define R_028E7C_CB_COLOR10_PITCH 0x00028E7C +#define R_028E80_CB_COLOR10_SLICE 0x00028E80 +#define R_028E84_CB_COLOR10_VIEW 0x00028E84 +#define R_028E88_CB_COLOR10_INFO 0x00028E88 +#define R_028E8C_CB_COLOR10_ATTRIB 0x00028E8C +#define R_028E90_CB_COLOR10_DIM 0x00028E90 +#define R_028E94_CB_COLOR11_BASE 0x00028E94 +#define R_028E98_CB_COLOR11_PITCH 0x00028E98 +#define R_028E9C_CB_COLOR11_SLICE 0x00028E9C +#define R_028EA0_CB_COLOR11_VIEW 0x00028EA0 +#define R_028EA4_CB_COLOR11_INFO 0x00028EA4 +#define R_028EA8_CB_COLOR11_ATTRIB 0x00028EA8 +#define R_028EAC_CB_COLOR11_DIM 0x00028EAC +#define R_030000_RESOURCE0_WORD0 0x00030000 +#define R_030004_RESOURCE0_WORD1 0x00030004 +#define R_030008_RESOURCE0_WORD2 0x00030008 +#define R_03000C_RESOURCE0_WORD3 0x0003000C +#define R_030010_RESOURCE0_WORD4 0x00030010 +#define R_030014_RESOURCE0_WORD5 0x00030014 +#define R_030018_RESOURCE0_WORD6 0x00030018 +#define R_03001C_RESOURCE0_WORD7 0x0003001C +#define R_0085F0_CP_COHER_CNTL 0x0085F0 +#define S_0085F0_DEST_BASE_0_ENA(x) (((x) & 0x1) << 0) +#define G_0085F0_DEST_BASE_0_ENA(x) (((x) >> 0) & 0x1) +#define C_0085F0_DEST_BASE_0_ENA 0xFFFFFFFE +#define S_0085F0_DEST_BASE_1_ENA(x) (((x) & 0x1) << 1) +#define G_0085F0_DEST_BASE_1_ENA(x) (((x) >> 1) & 0x1) +#define C_0085F0_DEST_BASE_1_ENA 0xFFFFFFFD +#define S_0085F0_SO0_DEST_BASE_ENA(x) (((x) & 0x1) << 2) +#define G_0085F0_SO0_DEST_BASE_ENA(x) (((x) >> 2) & 0x1) +#define C_0085F0_SO0_DEST_BASE_ENA 0xFFFFFFFB +#define S_0085F0_SO1_DEST_BASE_ENA(x) (((x) & 0x1) << 3) +#define G_0085F0_SO1_DEST_BASE_ENA(x) (((x) >> 3) & 0x1) +#define C_0085F0_SO1_DEST_BASE_ENA 0xFFFFFFF7 +#define S_0085F0_SO2_DEST_BASE_ENA(x) (((x) & 0x1) << 4) +#define G_0085F0_SO2_DEST_BASE_ENA(x) (((x) >> 4) & 0x1) +#define C_0085F0_SO2_DEST_BASE_ENA 0xFFFFFFEF +#define S_0085F0_SO3_DEST_BASE_ENA(x) (((x) & 0x1) << 5) +#define G_0085F0_SO3_DEST_BASE_ENA(x) (((x) >> 5) & 0x1) +#define C_0085F0_SO3_DEST_BASE_ENA 0xFFFFFFDF +#define S_0085F0_CB0_DEST_BASE_ENA(x) (((x) & 0x1) << 6) +#define G_0085F0_CB0_DEST_BASE_ENA(x) (((x) >> 6) & 0x1) +#define C_0085F0_CB0_DEST_BASE_ENA 0xFFFFFFBF +#define S_0085F0_CB1_DEST_BASE_ENA(x) (((x) & 0x1) << 7) +#define G_0085F0_CB1_DEST_BASE_ENA(x) (((x) >> 7) & 0x1) +#define C_0085F0_CB1_DEST_BASE_ENA 0xFFFFFF7F +#define S_0085F0_CB2_DEST_BASE_ENA(x) (((x) & 0x1) << 8) +#define G_0085F0_CB2_DEST_BASE_ENA(x) (((x) >> 8) & 0x1) +#define C_0085F0_CB2_DEST_BASE_ENA 0xFFFFFEFF +#define S_0085F0_CB3_DEST_BASE_ENA(x) (((x) & 0x1) << 9) +#define G_0085F0_CB3_DEST_BASE_ENA(x) (((x) >> 9) & 0x1) +#define C_0085F0_CB3_DEST_BASE_ENA 0xFFFFFDFF +#define S_0085F0_CB4_DEST_BASE_ENA(x) (((x) & 0x1) << 10) +#define G_0085F0_CB4_DEST_BASE_ENA(x) (((x) >> 10) & 0x1) +#define C_0085F0_CB4_DEST_BASE_ENA 0xFFFFFBFF +#define S_0085F0_CB5_DEST_BASE_ENA(x) (((x) & 0x1) << 11) +#define G_0085F0_CB5_DEST_BASE_ENA(x) (((x) >> 11) & 0x1) +#define C_0085F0_CB5_DEST_BASE_ENA 0xFFFFF7FF +#define S_0085F0_CB6_DEST_BASE_ENA(x) (((x) & 0x1) << 12) +#define G_0085F0_CB6_DEST_BASE_ENA(x) (((x) >> 12) & 0x1) +#define C_0085F0_CB6_DEST_BASE_ENA 0xFFFFEFFF +#define S_0085F0_CB7_DEST_BASE_ENA(x) (((x) & 0x1) << 13) +#define G_0085F0_CB7_DEST_BASE_ENA(x) (((x) >> 13) & 0x1) +#define C_0085F0_CB7_DEST_BASE_ENA 0xFFFFDFFF +#define S_0085F0_DB_DEST_BASE_ENA(x) (((x) & 0x1) << 14) +#define G_0085F0_DB_DEST_BASE_ENA(x) (((x) >> 14) & 0x1) +#define C_0085F0_DB_DEST_BASE_ENA 0xFFFFBFFF +#define S_0085F0_CB8_DEST_BASE_ENA(x) (((x) & 0x1) << 15) +#define G_0085F0_CB8_DEST_BASE_ENA(x) (((x) >> 15) & 0x1) + +#define S_0085F0_CB9_DEST_BASE_ENA(x) (((x) & 0x1) << 16) +#define G_0085F0_CB9_DEST_BASE_ENA(x) (((x) >> 16) & 0x1) + +#define S_0085F0_CB10_DEST_BASE_ENA(x) (((x) & 0x1) << 17) +#define G_0085F0_CB10_DEST_BASE_ENA(x) (((x) >> 17) & 0x1) + +#define S_0085F0_CB11_DEST_BASE_ENA(x) (((x) & 0x1) << 18) +#define G_0085F0_CB11_DEST_BASE_ENA(x) (((x) >> 18) & 0x1) + +#define S_0085F0_TC_ACTION_ENA(x) (((x) & 0x1) << 23) +#define G_0085F0_TC_ACTION_ENA(x) (((x) >> 23) & 0x1) +#define C_0085F0_TC_ACTION_ENA 0xFF7FFFFF +#define S_0085F0_VC_ACTION_ENA(x) (((x) & 0x1) << 24) +#define G_0085F0_VC_ACTION_ENA(x) (((x) >> 24) & 0x1) +#define C_0085F0_VC_ACTION_ENA 0xFEFFFFFF +#define S_0085F0_CB_ACTION_ENA(x) (((x) & 0x1) << 25) +#define G_0085F0_CB_ACTION_ENA(x) (((x) >> 25) & 0x1) +#define C_0085F0_CB_ACTION_ENA 0xFDFFFFFF +#define S_0085F0_DB_ACTION_ENA(x) (((x) & 0x1) << 26) +#define G_0085F0_DB_ACTION_ENA(x) (((x) >> 26) & 0x1) +#define C_0085F0_DB_ACTION_ENA 0xFBFFFFFF +#define S_0085F0_SH_ACTION_ENA(x) (((x) & 0x1) << 27) +#define G_0085F0_SH_ACTION_ENA(x) (((x) >> 27) & 0x1) +#define C_0085F0_SH_ACTION_ENA 0xF7FFFFFF +#define S_0085F0_SMX_ACTION_ENA(x) (((x) & 0x1) << 28) +#define G_0085F0_SMX_ACTION_ENA(x) (((x) >> 28) & 0x1) +#define C_0085F0_SMX_ACTION_ENA 0xEFFFFFFF +#define S_0085F0_CR0_ACTION_ENA(x) (((x) & 0x1) << 29) +#define G_0085F0_CR0_ACTION_ENA(x) (((x) >> 29) & 0x1) +#define C_0085F0_CR0_ACTION_ENA 0xDFFFFFFF +#define S_0085F0_CR1_ACTION_ENA(x) (((x) & 0x1) << 30) +#define G_0085F0_CR1_ACTION_ENA(x) (((x) >> 30) & 0x1) +#define C_0085F0_CR1_ACTION_ENA 0xBFFFFFFF +#define S_0085F0_CR2_ACTION_ENA(x) (((x) & 0x1) << 31) +#define G_0085F0_CR2_ACTION_ENA(x) (((x) >> 31) & 0x1) +#define C_0085F0_CR2_ACTION_ENA 0x7FFFFFFF +#define R_008970_VGT_NUM_INDICES 0x008970 +#define R_0287F0_VGT_DRAW_INITIATOR 0x0287F0 + +#define R_03CFF0_SQ_VTX_BASE_VTX_LOC 0x03CFF0 +#define R_03CFF4_SQ_VTX_START_INST_LOC 0x03CFF4 + +#define R_03A200_SQ_LOOP_CONST_0 0x3A200 #endif diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index bce2707e770..15ee0011061 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -26,8 +26,11 @@ #ifndef R600_H #define R600_H +#include <assert.h> #include <stdint.h> #include <stdio.h> +#include <util/u_double_list.h> +#include <pipe/p_compiler.h> #define RADEON_CTX_MAX_PM4 (64 * 1024 / 4) @@ -90,63 +93,48 @@ enum radeon_family { CHIP_LAST, }; -enum radeon_family r600_get_family(struct radeon *rw); +enum chip_class { + R600, + R700, + EVERGREEN, +}; -/* - * radeon object functions - */ -#if 0 -struct radeon_bo { - unsigned refcount; - unsigned handle; - unsigned size; - unsigned alignment; - unsigned map_count; - void *data; +struct r600_tiling_info { + unsigned num_channels; + unsigned num_banks; + unsigned group_bytes; }; -struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, - unsigned size, unsigned alignment, void *ptr); -int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo); -void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo); -struct radeon_bo *radeon_bo_incref(struct radeon *radeon, struct radeon_bo *bo); -struct radeon_bo *radeon_bo_decref(struct radeon *radeon, struct radeon_bo *bo); -int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo); -#endif -/* lowlevel WS bo */ -struct radeon_ws_bo; -struct radeon_ws_bo *radeon_ws_bo(struct radeon *radeon, + +enum radeon_family r600_get_family(struct radeon *rw); +enum chip_class r600_get_family_class(struct radeon *radeon); +struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon); + +/* r600_bo.c */ +struct r600_bo; +struct r600_bo *r600_bo(struct radeon *radeon, unsigned size, unsigned alignment, unsigned usage); -struct radeon_ws_bo *radeon_ws_bo_handle(struct radeon *radeon, +struct r600_bo *r600_bo_handle(struct radeon *radeon, unsigned handle); -void *radeon_ws_bo_map(struct radeon *radeon, struct radeon_ws_bo *bo, unsigned usage, void *ctx); -void radeon_ws_bo_unmap(struct radeon *radeon, struct radeon_ws_bo *bo); -void radeon_ws_bo_reference(struct radeon *radeon, struct radeon_ws_bo **dst, - struct radeon_ws_bo *src); -int radeon_ws_bo_wait(struct radeon *radeon, struct radeon_ws_bo *bo); +void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx); +void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo); +void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, + struct r600_bo *src); +static INLINE unsigned r600_bo_offset(struct r600_bo *bo) +{ + return 0; +} + /* R600/R700 STATES */ #define R600_GROUP_MAX 16 #define R600_BLOCK_MAX_BO 32 #define R600_BLOCK_MAX_REG 128 -enum r600_group_id { - R600_GROUP_CONFIG = 0, - R600_GROUP_CONTEXT, - R600_GROUP_ALU_CONST, - R600_GROUP_RESOURCE, - R600_GROUP_SAMPLER, - R600_GROUP_CTL_CONST, - R600_GROUP_LOOP_CONST, - R600_GROUP_BOOL_CONST, - R600_NGROUPS -}; - struct r600_pipe_reg { - unsigned group_id; u32 offset; u32 mask; u32 value; - struct radeon_ws_bo *bo; + struct r600_bo *bo; }; struct r600_pipe_state { @@ -156,11 +144,9 @@ struct r600_pipe_state { }; static inline void r600_pipe_state_add_reg(struct r600_pipe_state *state, - unsigned group_id, u32 offset, - u32 value, u32 mask, - struct radeon_ws_bo *bo) + u32 offset, u32 value, u32 mask, + struct r600_bo *bo) { - state->regs[state->nregs].group_id = group_id; state->regs[state->nregs].offset = offset; state->regs[state->nregs].value = value; state->regs[state->nregs].mask = mask; @@ -173,30 +159,35 @@ static inline void r600_pipe_state_add_reg(struct r600_pipe_state *state, #define R600_BLOCK_STATUS_DIRTY (1 << 1) struct r600_block_reloc { - struct radeon_ws_bo *bo; - unsigned nreloc; - unsigned bo_pm4_index[R600_BLOCK_MAX_BO]; + struct r600_bo *bo; + unsigned flush_flags; + unsigned flush_mask; + unsigned bo_pm4_index; }; -struct r600_group_block { +struct r600_block { + struct list_head list; unsigned status; unsigned start_offset; unsigned pm4_ndwords; + unsigned pm4_flush_ndwords; unsigned nbo; unsigned nreg; + u32 *reg; u32 pm4[R600_BLOCK_MAX_REG]; unsigned pm4_bo_index[R600_BLOCK_MAX_REG]; struct r600_block_reloc reloc[R600_BLOCK_MAX_BO]; }; -struct r600_group { +struct r600_range { unsigned start_offset; unsigned end_offset; - unsigned nblocks; - struct r600_group_block *blocks; - unsigned *offset_block_id; + struct r600_block **blocks; }; +/* + * relocation + */ #pragma pack(1) struct r600_reloc { uint32_t handle; @@ -206,10 +197,38 @@ struct r600_reloc { }; #pragma pack() +/* + * query + */ +struct r600_query { + u64 result; + /* The kind of query. Currently only OQ is supported. */ + unsigned type; + /* How many results have been written, in dwords. It's incremented + * after end_query and flush. */ + unsigned num_results; + /* if we've flushed the query */ + unsigned state; + /* The buffer where query results are stored. */ + struct r600_bo *buffer; + unsigned buffer_size; + /* linked list of queries */ + struct list_head list; +}; + +#define R600_QUERY_STATE_STARTED (1 << 0) +#define R600_QUERY_STATE_ENDED (1 << 1) +#define R600_QUERY_STATE_SUSPENDED (1 << 2) + + struct r600_context { struct radeon *radeon; - unsigned ngroups; - struct r600_group groups[R600_GROUP_MAX]; + unsigned hash_size; + unsigned hash_shift; + struct r600_range range[256]; + unsigned nblocks; + struct r600_block **blocks; + struct list_head dirty; unsigned pm4_ndwords; unsigned pm4_cdwords; unsigned pm4_dirty_cdwords; @@ -217,8 +236,14 @@ struct r600_context { unsigned nreloc; unsigned creloc; struct r600_reloc *reloc; - struct radeon_ws_bo **bo; + struct radeon_bo **bo; u32 *pm4; + struct list_head query_list; + unsigned num_query_running; + unsigned fence; + struct list_head fenced_bo; + unsigned *cfence; + struct r600_bo *fence_bo; }; struct r600_draw { @@ -227,7 +252,7 @@ struct r600_draw { u32 vgt_index_type; u32 vgt_draw_initiator; u32 indices_bo_offset; - struct radeon_ws_bo *indices; + struct r600_bo *indices; }; int r600_context_init(struct r600_context *ctx, struct radeon *radeon); @@ -241,4 +266,24 @@ void r600_context_flush(struct r600_context *ctx); void r600_context_dump_bof(struct r600_context *ctx, const char *file); void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw); +struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type); +void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query); +boolean r600_context_query_result(struct r600_context *ctx, + struct r600_query *query, + boolean wait, void *vresult); +void r600_query_begin(struct r600_context *ctx, struct r600_query *query); +void r600_query_end(struct r600_context *ctx, struct r600_query *query); +void r600_context_queries_suspend(struct r600_context *ctx); +void r600_context_queries_resume(struct r600_context *ctx); + +int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon); +void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw); +void evergreen_ps_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); +void evergreen_vs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); + +void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); +void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); +void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); +void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); + #endif diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 0d17f75da7e..d13da0ef638 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -20,14 +20,13 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "radeon.h" -#include "r600_context.h" +#include <stdio.h> +#include <errno.h> #include "util/u_memory.h" +#include "r600_pipe.h" #include "r600_sq.h" #include "r600_opcodes.h" #include "r600_asm.h" -#include <stdio.h> -#include <errno.h> static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu) { @@ -420,7 +419,6 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || bc->cf_last->inst != (type << 3) || bc->force_add_cf) { - /* at most 128 slots, one add alu can add 4 slots + 4 constant worst case */ r = r600_bc_add_cf(bc); if (r) { free(nalu); @@ -434,7 +432,9 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int } else { LIST_ADDTAIL(&nalu->bs_list, &bc->cf_last->curr_bs_head->bs_list); } - if (alu->last && (bc->cf_last->ndw >> 1) >= 124) { + /* at most 128 slots, one add alu can add 4 slots + 4 constants(2 slots) + * worst case */ + if (alu->last && (bc->cf_last->ndw >> 1) >= 120) { bc->force_add_cf = 1; } /* number of gpr == the last gpr used in any alu */ @@ -465,8 +465,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int bc->cf_last->ndw += 2; bc->ndw += 2; - if (bc->use_mem_constant) - bc->cf_last->kcache0_mode = 2; + bc->cf_last->kcache0_mode = 2; /* process cur ALU instructions for bank swizzle */ if (alu->last) { @@ -531,7 +530,8 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || (bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX && - bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC)) { + bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) || + bc->force_add_cf) { r = r600_bc_add_cf(bc); if (r) { free(nvtx); @@ -543,6 +543,8 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) /* each fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; + if ((bc->ndw / 4) > 7) + bc->force_add_cf = 1; return 0; } @@ -557,7 +559,8 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || - bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX) { + bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX || + bc->force_add_cf) { r = r600_bc_add_cf(bc); if (r) { free(ntex); @@ -569,6 +572,8 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) /* each texture fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; + if ((bc->ndw / 4) > 7) + bc->force_add_cf = 1; return 0; } @@ -595,7 +600,11 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) | S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) | S_SQ_VTX_WORD1_DST_SEL_W(vtx->dst_sel_w) | - S_SQ_VTX_WORD1_USE_CONST_FIELDS(1) | + S_SQ_VTX_WORD1_USE_CONST_FIELDS(vtx->use_const_fields) | + S_SQ_VTX_WORD1_DATA_FORMAT(vtx->data_format) | + S_SQ_VTX_WORD1_NUM_FORMAT_ALL(vtx->num_format_all) | + S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) | + S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) | S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); bc->bytecode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1); bc->bytecode[id++] = 0; @@ -696,6 +705,7 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | S_SQ_CF_ALU_WORD1_BARRIER(1) | + S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == 0 ? cf->r6xx_uses_waterfall : 0) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); break; case V_SQ_CF_WORD1_SQ_CF_INST_TEX: diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 62a46cb0e1e..bebc7c15b00 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -58,7 +58,7 @@ struct r600_bc_alu { unsigned bank_swizzle; unsigned bank_swizzle_force; u32 value[4]; - int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS]; + int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS]; }; struct r600_bc_tex { @@ -101,6 +101,11 @@ struct r600_bc_vtx { unsigned dst_sel_y; unsigned dst_sel_z; unsigned dst_sel_w; + unsigned use_const_fields; + unsigned data_format; + unsigned num_format_all; + unsigned format_comp_all; + unsigned srf_mode_all; }; struct r600_bc_output { @@ -127,6 +132,7 @@ struct r600_bc_cf { unsigned pop_count; unsigned cf_addr; /* control flow addr */ unsigned kcache0_mode; + unsigned r6xx_uses_waterfall; struct list_head alu; struct list_head tex; struct list_head vtx; @@ -159,7 +165,6 @@ struct r600_cf_callstack { struct r600_bc { enum radeon_family family; int chiprev; /* 0 - r600, 1 - r700, 2 - evergreen */ - unsigned use_mem_constant; struct list_head cf; struct r600_bc_cf *cf_last; unsigned ndw; @@ -175,6 +180,10 @@ struct r600_bc { struct r600_cf_callstack callstack[SQ_MAX_CALL_DEPTH]; }; +/* eg_asm.c */ +int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf); + +/* r600_asm.c */ int r600_bc_init(struct r600_bc *bc, enum radeon_family family); int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu); int r600_bc_add_literal(struct r600_bc *bc, const u32 *value); @@ -185,4 +194,7 @@ int r600_bc_build(struct r600_bc *bc); int r600_bc_add_cfinst(struct r600_bc *bc, int inst); int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type); +/* r700_asm.c */ +int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id); + #endif diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 54fbc50bbc4..cae05aab28b 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -1,5 +1,5 @@ /* - * Copyright 2009 Marek Olšák <[email protected]> + * Copyright 2010 Jerome Glisse <[email protected]> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -19,69 +19,118 @@ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Marek Olšák */ -#include <errno.h> -#include <pipe/p_screen.h> +#include <util/u_surface.h> #include <util/u_blitter.h> -#include <util/u_inlines.h> -#include <util/u_memory.h> -#include "util/u_surface.h" -#include "r600_screen.h" -#include "r600_context.h" -#include "r600d.h" +#include <util/u_format.h> +#include "r600_pipe.h" -static void r600_blitter_save_states(struct pipe_context *ctx) +enum r600_blitter_op /* bitmask */ { - struct r600_context *rctx = r600_context(ctx); + R600_CLEAR = 1, + R600_CLEAR_SURFACE = 2, + R600_COPY = 4 +}; + +static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + r600_context_queries_suspend(&rctx->ctx); - util_blitter_save_blend(rctx->blitter, rctx->blend); - util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->dsa); - if (rctx->stencil_ref) { - util_blitter_save_stencil_ref(rctx->blitter, - &rctx->stencil_ref->state.stencil_ref); + util_blitter_save_blend(rctx->blitter, rctx->states[R600_PIPE_STATE_BLEND]); + util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->states[R600_PIPE_STATE_DSA]); + if (rctx->states[R600_PIPE_STATE_STENCIL_REF]) { + util_blitter_save_stencil_ref(rctx->blitter, &rctx->stencil_ref); } - util_blitter_save_rasterizer(rctx->blitter, rctx->rasterizer); + util_blitter_save_rasterizer(rctx->blitter, rctx->states[R600_PIPE_STATE_RASTERIZER]); util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader); util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader); util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_elements); - if (rctx->viewport) { - util_blitter_save_viewport(rctx->blitter, &rctx->viewport->state.viewport); + if (rctx->states[R600_PIPE_STATE_VIEWPORT]) { + util_blitter_save_viewport(rctx->blitter, &rctx->viewport); } - if (rctx->clip) { - util_blitter_save_clip(rctx->blitter, &rctx->clip->state.clip); + if (rctx->states[R600_PIPE_STATE_CLIP]) { + util_blitter_save_clip(rctx->blitter, &rctx->clip); } - util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer, - rctx->vertex_buffer); + util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer, rctx->vertex_buffer); - /* remove ptr so they don't get deleted */ - rctx->blend = NULL; - rctx->clip = NULL; - rctx->vs_shader = NULL; - rctx->ps_shader = NULL; - rctx->rasterizer = NULL; - rctx->dsa = NULL; rctx->vertex_elements = NULL; - /* suspend queries */ - r600_queries_suspend(ctx); + if (op & (R600_CLEAR_SURFACE | R600_COPY)) + util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer); + + if (op & R600_COPY) { + util_blitter_save_fragment_sampler_states( + rctx->blitter, rctx->ps_samplers.n_samplers, + (void**)rctx->ps_samplers.samplers); + + util_blitter_save_fragment_sampler_views( + rctx->blitter, rctx->ps_samplers.n_views, + (struct pipe_sampler_view**)rctx->ps_samplers.views); + } + +} + +static void r600_blitter_end(struct pipe_context *ctx) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + r600_context_queries_resume(&rctx->ctx); +} + +int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct pipe_framebuffer_state fb = *rctx->pframebuffer; + struct pipe_surface *zsurf, *cbsurf; + int level = 0; + float depth = 1.0f; + + r600_context_queries_suspend(&rctx->ctx); + for (int i = 0; i < fb.nr_cbufs; i++) { + fb.cbufs[i] = NULL; + pipe_surface_reference(&fb.cbufs[i], rctx->pframebuffer->cbufs[i]); + } + fb.zsbuf = NULL; + pipe_surface_reference(&fb.zsbuf, rctx->pframebuffer->zsbuf); + + zsurf = ctx->screen->get_tex_surface(ctx->screen, &texture->resource.base.b, 0, level, 0, + PIPE_BIND_DEPTH_STENCIL); + + cbsurf = ctx->screen->get_tex_surface(ctx->screen, + (struct pipe_resource*)texture->flushed_depth_texture, + 0, level, 0, PIPE_BIND_RENDER_TARGET); + + r600_blitter_begin(ctx, R600_CLEAR); + util_blitter_save_framebuffer(rctx->blitter, &fb); + if (rctx->family == CHIP_RV610 || rctx->family == CHIP_RV630 || + rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635) + depth = 0.0f; + + util_blitter_custom_depth_stencil(rctx->blitter, zsurf, cbsurf, rctx->custom_dsa_flush, depth); + + pipe_surface_reference(&zsurf, NULL); + pipe_surface_reference(&cbsurf, NULL); + for (int i = 0; i < fb.nr_cbufs; i++) { + pipe_surface_reference(&fb.cbufs[i], NULL); + } + pipe_surface_reference(&fb.zsbuf, NULL); + r600_context_queries_resume(&rctx->ctx); + + return 0; } static void r600_clear(struct pipe_context *ctx, unsigned buffers, const float *rgba, double depth, unsigned stencil) { - struct r600_context *rctx = r600_context(ctx); - struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct pipe_framebuffer_state *fb = &rctx->framebuffer; - r600_blitter_save_states(ctx); + r600_blitter_begin(ctx, R600_CLEAR); util_blitter_clear(rctx->blitter, fb->width, fb->height, fb->nr_cbufs, buffers, rgba, depth, stencil); - /* resume queries */ - r600_queries_resume(ctx); + r600_blitter_end(ctx); } static void r600_clear_render_target(struct pipe_context *ctx, @@ -90,16 +139,12 @@ static void r600_clear_render_target(struct pipe_context *ctx, unsigned dstx, unsigned dsty, unsigned width, unsigned height) { - struct r600_context *rctx = r600_context(ctx); - struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; - - r600_blitter_save_states(ctx); - util_blitter_save_framebuffer(rctx->blitter, fb); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + r600_blitter_begin(ctx, R600_CLEAR_SURFACE); util_blitter_clear_render_target(rctx->blitter, dst, rgba, dstx, dsty, width, height); - /* resume queries */ - r600_queries_resume(ctx); + r600_blitter_end(ctx); } static void r600_clear_depth_stencil(struct pipe_context *ctx, @@ -110,19 +155,35 @@ static void r600_clear_depth_stencil(struct pipe_context *ctx, unsigned dstx, unsigned dsty, unsigned width, unsigned height) { - struct r600_context *rctx = r600_context(ctx); - struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; - - r600_blitter_save_states(ctx); - util_blitter_save_framebuffer(rctx->blitter, fb); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + r600_blitter_begin(ctx, R600_CLEAR_SURFACE); util_blitter_clear_depth_stencil(rctx->blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height); - /* resume queries */ - r600_queries_resume(ctx); + r600_blitter_end(ctx); } + +/* Copy a block of pixels from one surface to another using HW. */ +static void r600_hw_copy_region(struct pipe_context *ctx, + struct pipe_resource *dst, + struct pipe_subresource subdst, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + struct pipe_subresource subsrc, + unsigned srcx, unsigned srcy, unsigned srcz, + unsigned width, unsigned height) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + r600_blitter_begin(ctx, R600_COPY); + util_blitter_copy_region(rctx->blitter, dst, subdst, dstx, dsty, dstz, + src, subsrc, srcx, srcy, srcz, width, height, + TRUE); + r600_blitter_end(ctx); +} + static void r600_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst, struct pipe_subresource subdst, @@ -132,463 +193,22 @@ static void r600_resource_copy_region(struct pipe_context *ctx, unsigned srcx, unsigned srcy, unsigned srcz, unsigned width, unsigned height) { - util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz, - src, subsrc, srcx, srcy, srcz, width, height); + boolean is_depth; + /* there is something wrong with depth resource copies at the moment so avoid them for now */ + is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; + if (is_depth) + util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz, + src, subsrc, srcx, srcy, srcz, width, height); + else + r600_hw_copy_region(ctx, dst, subdst, dstx, dsty, dstz, + src, subsrc, srcx, srcy, srcz, width, height); + } -void r600_init_blit_functions(struct r600_context *rctx) +void r600_init_blit_functions(struct r600_pipe_context *rctx) { rctx->context.clear = r600_clear; rctx->context.clear_render_target = r600_clear_render_target; rctx->context.clear_depth_stencil = r600_clear_depth_stencil; rctx->context.resource_copy_region = r600_resource_copy_region; } - - -struct r600_blit_states { - struct radeon_state rasterizer; - struct radeon_state dsa; - struct radeon_state blend; - struct radeon_state cb_cntl; - struct radeon_state vgt; - struct radeon_state draw; - struct radeon_state vs_constant0; - struct radeon_state vs_constant1; - struct radeon_state vs_constant2; - struct radeon_state vs_constant3; - struct radeon_state ps_shader; - struct radeon_state vs_shader; - struct radeon_state vs_resource0; - struct radeon_state vs_resource1; -}; - -static int r600_blit_state_vs_resources(struct r600_screen *rscreen, struct r600_blit_states *bstates) -{ - struct radeon_state *rstate; - struct radeon_ws_bo *bo; - void *data; - u32 vbo[] = { - 0xBF800000, 0xBF800000, 0x3F800000, 0x3F800000, - 0x3F000000, 0x3F000000, 0x3F000000, 0x00000000, - 0x3F800000, 0xBF800000, 0x3F800000, 0x3F800000, - 0x3F000000, 0x3F000000, 0x3F000000, 0x00000000, - 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, - 0x3F000000, 0x3F000000, 0x3F000000, 0x00000000, - 0xBF800000, 0x3F800000, 0x3F800000, 0x3F800000, - 0x3F000000, 0x3F000000, 0x3F000000, 0x00000000 - }; - - /* simple shader */ - bo = radeon_ws_bo(rscreen->rw, 128, 4096, 0); - if (bo == NULL) { - return -ENOMEM; - } - data = radeon_ws_bo_map(rscreen->rw, bo, 0, NULL); - if (!data) { - radeon_ws_bo_reference(rscreen->rw, &bo, NULL); - return -ENOMEM; - } - memcpy(data, vbo, 128); - radeon_ws_bo_unmap(rscreen->rw, bo); - - rstate = &bstates->vs_resource0; - radeon_state_init(rstate, rscreen->rw, R600_STATE_RESOURCE, 0, R600_SHADER_VS); - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD0] = 0x00000000; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD1] = 0x00000080; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD2] = 0x02302000; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD3] = 0x00000000; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD4] = 0x00000000; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD5] = 0x00000000; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD6] = 0xC0000000; - rstate->bo[0] = bo; - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - if (radeon_state_pm4(rstate)) { - radeon_state_fini(rstate); - return -ENOMEM; - } - - rstate = &bstates->vs_resource1; - radeon_state_init(rstate, rscreen->rw, R600_STATE_RESOURCE, 1, R600_SHADER_VS); - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD0] = 0x00000010; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD1] = 0x00000070; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD2] = 0x02302000; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD3] = 0x00000000; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD4] = 0x00000000; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD5] = 0x00000000; - rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD6] = 0xC0000000; - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], bo); - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - if (radeon_state_pm4(rstate)) { - radeon_state_fini(rstate); - return -ENOMEM; - } - - return 0; -} - -static void r600_blit_state_vs_shader(struct r600_screen *rscreen, struct radeon_state *rstate) -{ - struct radeon_ws_bo *bo; - void *data; - u32 shader_bc_r600[] = { - 0x00000004, 0x81000400, - 0x00000008, 0xA01C0000, - 0xC001A03C, 0x94000688, - 0xC0024000, 0x94200688, - 0x7C000000, 0x002D1001, - 0x00080000, 0x00000000, - 0x7C000100, 0x002D1002, - 0x00080000, 0x00000000, - 0x00000001, 0x00601910, - 0x00000401, 0x20601910, - 0x00000801, 0x40601910, - 0x80000C01, 0x60601910, - 0x00000002, 0x00801910, - 0x00000402, 0x20801910, - 0x00000802, 0x40801910, - 0x80000C02, 0x60801910 - }; - u32 shader_bc_r700[] = { - 0x00000004, 0x81000400, - 0x00000008, 0xA01C0000, - 0xC001A03C, 0x94000688, - 0xC0024000, 0x94200688, - 0x7C000000, 0x002D1001, - 0x00080000, 0x00000000, - 0x7C000100, 0x002D1002, - 0x00080000, 0x00000000, - 0x00000001, 0x00600C90, - 0x00000401, 0x20600C90, - 0x00000801, 0x40600C90, - 0x80000C01, 0x60600C90, - 0x00000002, 0x00800C90, - 0x00000402, 0x20800C90, - 0x00000802, 0x40800C90, - 0x80000C02, 0x60800C90 - }; - - /* simple shader */ - bo = radeon_ws_bo(rscreen->rw, 128, 4096, 0); - if (bo == NULL) { - return; - } - data = radeon_ws_bo_map(rscreen->rw, bo, 0, NULL); - if (!data) { - radeon_ws_bo_reference(rscreen->rw, &bo, NULL); - return; - } - switch (rscreen->chip_class) { - case R600: - memcpy(data, shader_bc_r600, 128); - break; - case R700: - memcpy(data, shader_bc_r700, 128); - break; - default: - R600_ERR("unsupported chip family\n"); - radeon_ws_bo_unmap(rscreen->rw, bo); - radeon_ws_bo_reference(rscreen->rw, &bo, NULL); - return; - } - radeon_ws_bo_unmap(rscreen->rw, bo); - - radeon_state_init(rstate, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_VS); - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[R600_VS_SHADER__SPI_VS_OUT_ID_0] = 0x03020100; - rstate->states[R600_VS_SHADER__SPI_VS_OUT_ID_1] = 0x07060504; - rstate->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = 0x00000005; - - rstate->bo[0] = bo; - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], bo); - rstate->nbo = 2; - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; - - radeon_state_pm4(rstate); -} - -static void r600_blit_state_ps_shader(struct r600_screen *rscreen, struct radeon_state *rstate) -{ - struct radeon_ws_bo *bo; - void *data; - u32 shader_bc_r600[] = { - 0x00000002, 0xA00C0000, - 0xC0008000, 0x94200688, - 0x00000000, 0x00201910, - 0x00000400, 0x20201910, - 0x00000800, 0x40201910, - 0x80000C00, 0x60201910 - }; - u32 shader_bc_r700[] = { - 0x00000002, 0xA00C0000, - 0xC0008000, 0x94200688, - 0x00000000, 0x00200C90, - 0x00000400, 0x20200C90, - 0x00000800, 0x40200C90, - 0x80000C00, 0x60200C90 - }; - - /* simple shader */ - bo = radeon_ws_bo(rscreen->rw, 128, 4096, 0); - if (bo == NULL) { - return; - } - data = radeon_ws_bo_map(rscreen->rw, bo, 0, NULL); - if (!data) { - radeon_ws_bo_reference(rscreen->rw, &bo, NULL); - return; - } - switch (rscreen->chip_class) { - case R600: - memcpy(data, shader_bc_r600, 48); - break; - case R700: - memcpy(data, shader_bc_r700, 48); - break; - default: - R600_ERR("unsupported chip family\n"); - radeon_ws_bo_unmap(rscreen->rw, bo); - radeon_ws_bo_reference(rscreen->rw, &bo, NULL); - return; - } - radeon_ws_bo_unmap(rscreen->rw, bo); - - radeon_state_init(rstate, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_PS); - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0] = 0x00000C00; - rstate->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = 0x10000001; - rstate->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = 0x00000002; - rstate->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = 0x00000002; - - rstate->bo[0] = bo; - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - - radeon_state_pm4(rstate); -} - -static void r600_blit_state_vgt(struct r600_screen *rscreen, struct radeon_state *rstate) -{ - radeon_state_init(rstate, rscreen->rw, R600_STATE_VGT, 0, 0); - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001; - rstate->states[R600_VGT__VGT_MAX_VTX_INDX] = 0x00FFFFFF; - rstate->states[R600_VGT__VGT_PRIMITIVE_TYPE] = 0x00000005; - - radeon_state_pm4(rstate); -} - -static void r600_blit_state_draw(struct r600_screen *rscreen, struct radeon_state *rstate) -{ - radeon_state_init(rstate, rscreen->rw, R600_STATE_DRAW, 0, 0); - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[R600_DRAW__VGT_DRAW_INITIATOR] = 0x00000002; - rstate->states[R600_DRAW__VGT_NUM_INDICES] = 0x00000004; - - radeon_state_pm4(rstate); -} - -static void r600_blit_state_vs_constant(struct r600_screen *rscreen, struct radeon_state *rstate, - unsigned id, float c0, float c1, float c2, float c3) -{ - radeon_state_init(rstate, rscreen->rw, R600_STATE_CONSTANT, id, R600_SHADER_VS); - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[R600_VS_CONSTANT__SQ_ALU_CONSTANT0_256] = fui(c0); - rstate->states[R600_VS_CONSTANT__SQ_ALU_CONSTANT1_256] = fui(c1); - rstate->states[R600_VS_CONSTANT__SQ_ALU_CONSTANT2_256] = fui(c2); - rstate->states[R600_VS_CONSTANT__SQ_ALU_CONSTANT3_256] = fui(c3); - - radeon_state_pm4(rstate); -} - -static void r600_blit_state_rasterizer(struct r600_screen *rscreen, struct radeon_state *rstate) -{ - radeon_state_init(rstate, rscreen->rw, R600_STATE_RASTERIZER, 0, 0); - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_SC_LINE_CNTL] = 0x00000400; - rstate->states[R600_RASTERIZER__PA_SC_LINE_STIPPLE] = 0x00000005; - rstate->states[R600_RASTERIZER__PA_SU_LINE_CNTL] = 0x00000008; - rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = 0x80000000; - rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080004; - rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001; - - radeon_state_pm4(rstate); -} - -static void r600_blit_state_dsa(struct r600_screen *rscreen, struct radeon_state *rstate) -{ - radeon_state_init(rstate, rscreen->rw, R600_STATE_DSA, 0, 0); - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[R600_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00; - rstate->states[R600_DSA__DB_DEPTH_CLEAR] = 0x3F800000; - rstate->states[R600_DSA__DB_RENDER_CONTROL] = 0x00000060; - rstate->states[R600_DSA__DB_RENDER_OVERRIDE] = 0x0000002A; - rstate->states[R600_DSA__DB_SHADER_CONTROL] = 0x00000210; - - radeon_state_pm4(rstate); -} - -static void r600_blit_state_blend(struct r600_screen *rscreen, struct radeon_state *rstate) -{ - radeon_state_init(rstate, rscreen->rw, R600_STATE_BLEND, 0, 0); - radeon_state_pm4(rstate); -} - -static void r600_blit_state_cb_cntl(struct r600_screen *rscreen, struct radeon_state *rstate) -{ - radeon_state_init(rstate, rscreen->rw, R600_STATE_CB_CNTL, 0, 0); - rstate->states[R600_CB_CNTL__CB_CLRCMP_CONTROL] = 0x01000000; - rstate->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF; - rstate->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF; - rstate->states[R600_CB_CNTL__CB_COLOR_CONTROL] = 0x00CC0080; - rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = 0x0000000F; - rstate->states[R600_CB_CNTL__CB_TARGET_MASK] = 0x0000000F; - rstate->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; - radeon_state_pm4(rstate); -} - -static int r600_blit_states_init(struct pipe_context *ctx, struct r600_blit_states *bstates) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - - r600_blit_state_ps_shader(rscreen, &bstates->ps_shader); - r600_blit_state_vs_shader(rscreen, &bstates->vs_shader); - r600_blit_state_vgt(rscreen, &bstates->vgt); - r600_blit_state_draw(rscreen, &bstates->draw); - r600_blit_state_vs_constant(rscreen, &bstates->vs_constant0, 0, 1.0, 0.0, 0.0, 0.0); - r600_blit_state_vs_constant(rscreen, &bstates->vs_constant1, 1, 0.0, 1.0, 0.0, 0.0); - r600_blit_state_vs_constant(rscreen, &bstates->vs_constant2, 2, 0.0, 0.0, -0.00199900055, 0.0); - r600_blit_state_vs_constant(rscreen, &bstates->vs_constant3, 3, 0.0, 0.0, -0.99900049, 1.0); - r600_blit_state_rasterizer(rscreen, &bstates->rasterizer); - r600_blit_state_dsa(rscreen, &bstates->dsa); - r600_blit_state_blend(rscreen, &bstates->blend); - r600_blit_state_cb_cntl(rscreen, &bstates->cb_cntl); - r600_blit_state_vs_resources(rscreen, bstates); - return 0; -} - -static void r600_blit_states_destroy(struct pipe_context *ctx, struct r600_blit_states *bstates) -{ - radeon_state_fini(&bstates->ps_shader); - radeon_state_fini(&bstates->vs_shader); - radeon_state_fini(&bstates->vs_resource0); - radeon_state_fini(&bstates->vs_resource1); -} - -int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - struct radeon_draw draw; - struct r600_blit_states bstates; - int r; - - r = r600_texture_scissor(ctx, rtexture, level); - if (r) { - return r; - } - r = r600_texture_cb(ctx, rtexture, 0, level); - if (r) { - return r; - } - r = r600_texture_db(ctx, rtexture, level); - if (r) { - return r; - } - r = r600_texture_viewport(ctx, rtexture, level); - if (r) { - return r; - } - - r = r600_blit_states_init(ctx, &bstates); - if (r) { - return r; - } - bstates.dsa.states[R600_DSA__DB_RENDER_CONTROL] = 0x0000008C; - bstates.cb_cntl.states[R600_CB_CNTL__CB_TARGET_MASK] = 0x00000001; - /* force rebuild */ - bstates.dsa.cpm4 = bstates.cb_cntl.cpm4 = 0; - if (radeon_state_pm4(&bstates.dsa)) { - goto out; - } - if (radeon_state_pm4(&bstates.cb_cntl)) { - goto out; - } - - r = radeon_draw_init(&draw, rscreen->rw); - if (r) { - R600_ERR("failed creating draw for uncompressing textures\n"); - goto out; - } - - radeon_draw_bind(&draw, &bstates.vs_shader); - radeon_draw_bind(&draw, &bstates.ps_shader); - radeon_draw_bind(&draw, &bstates.rasterizer); - radeon_draw_bind(&draw, &bstates.dsa); - radeon_draw_bind(&draw, &bstates.blend); - radeon_draw_bind(&draw, &bstates.cb_cntl); - radeon_draw_bind(&draw, &rctx->config); - radeon_draw_bind(&draw, &bstates.vgt); - radeon_draw_bind(&draw, &bstates.draw); - radeon_draw_bind(&draw, &bstates.vs_resource0); - radeon_draw_bind(&draw, &bstates.vs_resource1); - radeon_draw_bind(&draw, &bstates.vs_constant0); - radeon_draw_bind(&draw, &bstates.vs_constant1); - radeon_draw_bind(&draw, &bstates.vs_constant2); - radeon_draw_bind(&draw, &bstates.vs_constant3); - radeon_draw_bind(&draw, &rtexture->viewport[level]); - radeon_draw_bind(&draw, &rtexture->scissor[level]); - radeon_draw_bind(&draw, &rtexture->cb[0][level]); - radeon_draw_bind(&draw, &rtexture->db[level]); - - /* suspend queries */ - r600_queries_suspend(ctx); - - /* schedule draw*/ - r = radeon_ctx_set_draw(rctx->ctx, &draw); - if (r == -EBUSY) { - r600_flush(ctx, 0, NULL); - r = radeon_ctx_set_draw(rctx->ctx, &draw); - } - if (r) { - goto out; - } - - /* resume queries */ - r600_queries_resume(ctx); - -out: - r600_blit_states_destroy(ctx, &bstates); - return r; -} diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index dc3fc812e1a..2bfa4e22fec 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -31,9 +31,10 @@ #include <util/u_memory.h> #include <util/u_upload_mgr.h> #include "state_tracker/drm_driver.h" -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_resource.h" +#include <xf86drm.h> +#include "radeon_drm.h" +#include "r600.h" +#include "r600_pipe.h" extern struct u_resource_vtbl r600_buffer_vtbl; @@ -42,23 +43,23 @@ u32 r600_domain_from_usage(unsigned usage) u32 domain = RADEON_GEM_DOMAIN_GTT; if (usage & PIPE_BIND_RENDER_TARGET) { - domain |= RADEON_GEM_DOMAIN_VRAM; + domain |= RADEON_GEM_DOMAIN_VRAM; } if (usage & PIPE_BIND_DEPTH_STENCIL) { - domain |= RADEON_GEM_DOMAIN_VRAM; + domain |= RADEON_GEM_DOMAIN_VRAM; } if (usage & PIPE_BIND_SAMPLER_VIEW) { - domain |= RADEON_GEM_DOMAIN_VRAM; + domain |= RADEON_GEM_DOMAIN_VRAM; } /* also need BIND_BLIT_SOURCE/DESTINATION ? */ if (usage & PIPE_BIND_VERTEX_BUFFER) { - domain |= RADEON_GEM_DOMAIN_GTT; + domain |= RADEON_GEM_DOMAIN_GTT; } if (usage & PIPE_BIND_INDEX_BUFFER) { - domain |= RADEON_GEM_DOMAIN_GTT; + domain |= RADEON_GEM_DOMAIN_GTT; } if (usage & PIPE_BIND_CONSTANT_BUFFER) { - domain |= RADEON_GEM_DOMAIN_VRAM; + domain |= RADEON_GEM_DOMAIN_VRAM; } return domain; @@ -67,9 +68,8 @@ u32 r600_domain_from_usage(unsigned usage) struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ) { - struct r600_screen *rscreen = r600_screen(screen); struct r600_resource_buffer *rbuffer; - struct radeon_ws_bo *bo; + struct r600_bo *bo; /* XXX We probably want a different alignment for buffers and textures. */ unsigned alignment = 4096; @@ -86,7 +86,7 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, rbuffer->r.base.vtbl = &r600_buffer_vtbl; rbuffer->r.size = rbuffer->r.base.b.width0; rbuffer->r.domain = r600_domain_from_usage(rbuffer->r.base.b.bind); - bo = radeon_ws_bo(rscreen->rw, rbuffer->r.base.b.width0, alignment, rbuffer->r.base.b.bind); + bo = r600_bo((struct radeon*)screen->winsys, rbuffer->r.base.b.width0, alignment, rbuffer->r.base.b.bind); if (bo == NULL) { FREE(rbuffer); return NULL; @@ -127,10 +127,9 @@ static void r600_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *buf) { struct r600_resource_buffer *rbuffer = r600_buffer(buf); - struct r600_screen *rscreen = r600_screen(screen); if (rbuffer->r.bo) { - radeon_ws_bo_reference(rscreen->rw, &rbuffer->r.bo, NULL); + r600_bo_reference((struct radeon*)screen->winsys, &rbuffer->r.bo, NULL); } FREE(rbuffer); } @@ -138,9 +137,7 @@ static void r600_buffer_destroy(struct pipe_screen *screen, static void *r600_buffer_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { - struct r600_context *rctx = r600_context(pipe); struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); - struct r600_screen *rscreen = r600_screen(pipe->screen); int write = 0; uint8_t *data; int i; @@ -156,9 +153,9 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, flush = TRUE; if (flush) { - radeon_ws_bo_reference(rscreen->rw, &rbuffer->r.bo, NULL); + r600_bo_reference((struct radeon*)pipe->winsys, &rbuffer->r.bo, NULL); rbuffer->num_ranges = 0; - rbuffer->r.bo = radeon_ws_bo(rscreen->rw, + rbuffer->r.bo = r600_bo((struct radeon*)pipe->winsys, rbuffer->r.base.b.width0, 0, rbuffer->r.base.b.bind); break; @@ -171,7 +168,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, if (transfer->usage & PIPE_TRANSFER_WRITE) { write = 1; } - data = radeon_ws_bo_map(rscreen->rw, rbuffer->r.bo, transfer->usage, rctx); + data = r600_bo_map((struct radeon*)pipe->winsys, rbuffer->r.bo, transfer->usage, pipe); if (!data) return NULL; @@ -182,10 +179,9 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); - struct r600_screen *rscreen = r600_screen(pipe->screen); if (rbuffer->r.bo) - radeon_ws_bo_unmap(rscreen->rw, rbuffer->r.bo); + r600_bo_unmap((struct radeon*)pipe->winsys, rbuffer->r.bo); } static void r600_buffer_transfer_flush_region(struct pipe_context *pipe, @@ -229,16 +225,16 @@ struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, { struct radeon *rw = (struct radeon*)screen->winsys; struct r600_resource *rbuffer; - struct radeon_ws_bo *bo = NULL; + struct r600_bo *bo = NULL; - bo = radeon_ws_bo_handle(rw, whandle->handle); + bo = r600_bo_handle(rw, whandle->handle); if (bo == NULL) { return NULL; } rbuffer = CALLOC_STRUCT(r600_resource); if (rbuffer == NULL) { - radeon_ws_bo_reference(rw, &bo, NULL); + r600_bo_reference(rw, &bo, NULL); return NULL; } @@ -263,8 +259,7 @@ struct u_resource_vtbl r600_buffer_vtbl = u_default_transfer_inline_write /* transfer_inline_write */ }; -int r600_upload_index_buffer(struct r600_context *rctx, - struct r600_draw *draw) +int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw) { struct pipe_resource *upload_buffer = NULL; unsigned index_offset = draw->index_buffer_offset; @@ -281,14 +276,17 @@ int r600_upload_index_buffer(struct r600_context *rctx, goto done; } draw->index_buffer_offset = index_offset; - draw->index_buffer = upload_buffer; + + /* Transfer ownership. */ + pipe_resource_reference(&draw->index_buffer, upload_buffer); + pipe_resource_reference(&upload_buffer, NULL); } done: return ret; } -int r600_upload_user_buffers(struct r600_context *rctx) +int r600_upload_user_buffers(struct r600_pipe_context *rctx) { enum pipe_error ret = PIPE_OK; int i, nr; diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c deleted file mode 100644 index 776dc24569b..00000000000 --- a/src/gallium/drivers/r600/r600_context.c +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Corbin Simpson - */ -#include <stdio.h> -#include <util/u_inlines.h> -#include <util/u_format.h> -#include <util/u_memory.h> -#include <util/u_upload_mgr.h> -#include <util/u_blitter.h> -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_resource.h" - -static void r600_destroy_context(struct pipe_context *context) -{ - struct r600_context *rctx = r600_context(context); - - rctx->rasterizer = r600_context_state_decref(rctx->rasterizer); - rctx->poly_stipple = r600_context_state_decref(rctx->poly_stipple); - rctx->scissor = r600_context_state_decref(rctx->scissor); - rctx->clip = r600_context_state_decref(rctx->clip); - rctx->ps_shader = r600_context_state_decref(rctx->ps_shader); - rctx->vs_shader = r600_context_state_decref(rctx->vs_shader); - rctx->depth = r600_context_state_decref(rctx->depth); - rctx->stencil = r600_context_state_decref(rctx->stencil); - rctx->alpha = r600_context_state_decref(rctx->alpha); - rctx->dsa = r600_context_state_decref(rctx->dsa); - rctx->blend = r600_context_state_decref(rctx->blend); - rctx->stencil_ref = r600_context_state_decref(rctx->stencil_ref); - rctx->viewport = r600_context_state_decref(rctx->viewport); - rctx->framebuffer = r600_context_state_decref(rctx->framebuffer); - - free(rctx->ps_constant); - free(rctx->vs_constant); - free(rctx->vs_resource); - - u_upload_destroy(rctx->upload_vb); - u_upload_destroy(rctx->upload_ib); - - radeon_ctx_fini(rctx->ctx); - FREE(rctx); -} - -void r600_flush(struct pipe_context *ctx, unsigned flags, - struct pipe_fence_handle **fence) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_query *rquery = NULL; - - /* flush upload buffers */ - u_upload_flush(rctx->upload_vb); - u_upload_flush(rctx->upload_ib); - - /* suspend queries */ - r600_queries_suspend(ctx); - - radeon_ctx_submit(rctx->ctx); - - LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { - rquery->flushed = true; - } - - radeon_ctx_clear(rctx->ctx); - /* resume queries */ - r600_queries_resume(ctx); -} - -void r600_flush_ctx(void *data) -{ - struct r600_context *rctx = data; - - rctx->context.flush(&rctx->context, 0, NULL); -} - -struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) -{ - struct r600_context *rctx = CALLOC_STRUCT(r600_context); - struct r600_screen* rscreen = r600_screen(screen); - - if (rctx == NULL) - return NULL; - rctx->context.winsys = rscreen->screen.winsys; - rctx->context.screen = screen; - rctx->context.priv = priv; - rctx->context.destroy = r600_destroy_context; - rctx->context.draw_vbo = r600_draw_vbo; - rctx->context.flush = r600_flush; - - /* Easy accessing of screen/winsys. */ - rctx->screen = rscreen; - rctx->rw = rscreen->rw; - - if (rscreen->chip_class == EVERGREEN) - rctx->vtbl = &eg_hw_state_vtbl; - else - rctx->vtbl = &r600_hw_state_vtbl; - - r600_init_blit_functions(rctx); - r600_init_query_functions(rctx); - r600_init_state_functions(rctx); - r600_init_context_resource_functions(rctx); - - rctx->blitter = util_blitter_create(&rctx->context); - if (rctx->blitter == NULL) { - FREE(rctx); - return NULL; - } - - rctx->vtbl->init_config(rctx); - - rctx->upload_ib = u_upload_create(&rctx->context, 32 * 1024, 16, - PIPE_BIND_INDEX_BUFFER); - if (rctx->upload_ib == NULL) { - goto out_free; - } - - rctx->upload_vb = u_upload_create(&rctx->context, 128 * 1024, 16, - PIPE_BIND_VERTEX_BUFFER); - if (rctx->upload_vb == NULL) { - goto out_free; - } - - rctx->vs_constant = (struct radeon_state *)calloc(R600_MAX_CONSTANT, sizeof(struct radeon_state)); - if (!rctx->vs_constant) { - goto out_free; - } - - rctx->ps_constant = (struct radeon_state *)calloc(R600_MAX_CONSTANT, sizeof(struct radeon_state)); - if (!rctx->ps_constant) { - goto out_free; - } - - rctx->vs_resource = (struct radeon_state *)calloc(R600_MAX_RESOURCE, sizeof(struct radeon_state)); - if (!rctx->vs_resource) { - goto out_free; - } - - rctx->ctx = radeon_ctx_init(rscreen->rw); - radeon_draw_init(&rctx->draw, rscreen->rw); - return &rctx->context; - out_free: - FREE(rctx); - return NULL; -} diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h deleted file mode 100644 index 3107f189c78..00000000000 --- a/src/gallium/drivers/r600/r600_context.h +++ /dev/null @@ -1,321 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef R600_CONTEXT_H -#define R600_CONTEXT_H - -#include <stdio.h> -#include <pipe/p_state.h> -#include <pipe/p_context.h> -#include <tgsi/tgsi_scan.h> -#include <tgsi/tgsi_parse.h> -#include <tgsi/tgsi_util.h> -#include <util/u_blitter.h> -#include <util/u_double_list.h> -#include "radeon.h" -#include "r600_shader.h" - -struct u_upload_mgr; - -#define R600_QUERY_STATE_STARTED (1 << 0) -#define R600_QUERY_STATE_ENDED (1 << 1) -#define R600_QUERY_STATE_SUSPENDED (1 << 2) - -struct r600_query { - u64 result; - /* The kind of query. Currently only OQ is supported. */ - unsigned type; - /* How many results have been written, in dwords. It's incremented - * after end_query and flush. */ - unsigned num_results; - /* if we've flushed the query */ - boolean flushed; - unsigned state; - /* The buffer where query results are stored. */ - struct radeon_ws_bo *buffer; - unsigned buffer_size; - /* linked list of queries */ - struct list_head list; - struct radeon_state rstate; -}; - -/* XXX move this to a more appropriate place */ -union pipe_states { - struct pipe_rasterizer_state rasterizer; - struct pipe_poly_stipple poly_stipple; - struct pipe_scissor_state scissor; - struct pipe_clip_state clip; - struct pipe_shader_state shader; - struct pipe_depth_state depth; - struct pipe_stencil_state stencil; - struct pipe_alpha_state alpha; - struct pipe_depth_stencil_alpha_state dsa; - struct pipe_blend_state blend; - struct pipe_blend_color blend_color; - struct pipe_stencil_ref stencil_ref; - struct pipe_framebuffer_state framebuffer; - struct pipe_sampler_state sampler; - struct pipe_sampler_view sampler_view; - struct pipe_viewport_state viewport; -}; - -enum pipe_state_type { - pipe_rasterizer_type = 1, - pipe_poly_stipple_type, - pipe_scissor_type, - pipe_clip_type, - pipe_shader_type, - pipe_depth_type, - pipe_stencil_type, - pipe_alpha_type, - pipe_dsa_type, - pipe_blend_type, - pipe_stencil_ref_type, - pipe_framebuffer_type, - pipe_sampler_type, - pipe_sampler_view_type, - pipe_viewport_type, - pipe_type_count -}; - -#define R600_MAX_RSTATE 16 - -struct r600_context_state { - union pipe_states state; - unsigned refcount; - unsigned type; - struct radeon_state rstate[R600_MAX_RSTATE]; - struct r600_shader shader; - struct radeon_ws_bo *bo; - unsigned nrstate; -}; - -struct r600_vertex_element -{ - unsigned refcount; - unsigned count; - struct pipe_vertex_element elements[32]; -}; - -struct r600_draw { - struct pipe_context *ctx; - struct radeon_state draw; - struct radeon_state vgt; - unsigned mode; - unsigned start; - unsigned count; - unsigned index_size; - struct pipe_resource *index_buffer; - unsigned index_buffer_offset; - unsigned min_index, max_index; -}; - -struct r600_context_hw_states { - struct radeon_state rasterizer; - struct radeon_state scissor; - struct radeon_state dsa; - struct radeon_state cb_cntl; - - struct radeon_state db_flush; - struct radeon_state cb_flush; -}; - -#define R600_MAX_CONSTANT 256 /* magic */ -#define R600_MAX_RESOURCE 160 /* magic */ - -struct r600_shader_sampler_states { - unsigned nsampler; - unsigned nview; - unsigned nborder; - struct radeon_state *sampler[PIPE_MAX_ATTRIBS]; - struct radeon_state *view[PIPE_MAX_ATTRIBS]; - struct radeon_state *border[PIPE_MAX_ATTRIBS]; -}; - -struct r600_context; -struct r600_screen; -struct r600_resource; -struct r600_resource_texture; - -struct r600_context_hw_state_vtbl { - void (*blend)(struct r600_context *rctx, - struct radeon_state *rstate, - const struct pipe_blend_state *state); - void (*ucp)(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_clip_state *state); - void (*cb)(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_framebuffer_state *state, int cb); - void (*db)(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_framebuffer_state *state); - void (*rasterizer)(struct r600_context *rctx, struct radeon_state *rstate); - void (*scissor)(struct r600_context *rctx, struct radeon_state *rstate); - void (*viewport)(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_viewport_state *state); - void (*dsa)(struct r600_context *rctx, struct radeon_state *rstate); - void (*sampler_border)(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_sampler_state *state, unsigned id); - void (*sampler)(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_sampler_state *state, unsigned id); - void (*resource)(struct pipe_context *ctx, struct radeon_state *rstate, - const struct pipe_sampler_view *view, unsigned id); - void (*cb_cntl)(struct r600_context *rctx, struct radeon_state *rstate); - int (*vs_resource)(struct r600_context *rctx, int id, struct r600_resource *rbuffer, uint32_t offset, - uint32_t stride, uint32_t format); - int (*vgt_init)(struct r600_draw *draw, - int vgt_draw_initiator); - int (*vgt_prim)(struct r600_draw *draw, - uint32_t prim, uint32_t vgt_dma_index_type); - - int (*ps_shader)(struct r600_context *rctx, struct r600_context_state *rshader, - struct radeon_state *state); - int (*vs_shader)(struct r600_context *rctx, struct r600_context_state *rpshader, - struct radeon_state *state); - void (*init_config)(struct r600_context *rctx); - - - void (*texture_state_viewport)(struct r600_screen *rscreen, - struct r600_resource_texture *rtexture, - unsigned level); - void (*texture_state_cb)(struct r600_screen *rscreen, - struct r600_resource_texture *rtexture, - unsigned cb, - unsigned level); - void (*texture_state_db)(struct r600_screen *rscreen, - struct r600_resource_texture *rtexture, - unsigned level); - void (*texture_state_scissor)(struct r600_screen *rscreen, - struct r600_resource_texture *rtexture, - unsigned level); -}; -extern struct r600_context_hw_state_vtbl r600_hw_state_vtbl; -extern struct r600_context_hw_state_vtbl eg_hw_state_vtbl; - -struct r600_context { - struct pipe_context context; - struct r600_screen *screen; - struct radeon *rw; - struct radeon_ctx *ctx; - struct blitter_context *blitter; - struct radeon_draw draw; - struct r600_context_hw_state_vtbl *vtbl; - struct radeon_state config; - boolean use_mem_constant; - /* FIXME get rid of those vs_resource,vs/ps_constant */ - struct radeon_state *vs_resource; - unsigned vs_nresource; - struct radeon_state *vs_constant; - struct radeon_state *ps_constant; - /* hw states */ - struct r600_context_hw_states hw_states; - /* pipe states */ - unsigned flat_shade; - - unsigned nvertex_buffer; - struct r600_context_state *rasterizer; - struct r600_context_state *poly_stipple; - struct r600_context_state *scissor; - struct r600_context_state *clip; - struct r600_context_state *ps_shader; - struct r600_context_state *vs_shader; - struct r600_context_state *depth; - struct r600_context_state *stencil; - struct r600_context_state *alpha; - struct r600_context_state *dsa; - struct r600_context_state *blend; - struct r600_context_state *stencil_ref; - struct r600_context_state *viewport; - struct r600_context_state *framebuffer; - struct r600_shader_sampler_states vs_sampler; - struct r600_shader_sampler_states ps_sampler; - /* can add gs later */ - struct r600_vertex_element *vertex_elements; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - struct pipe_index_buffer index_buffer; - struct pipe_blend_color blend_color; - struct list_head query_list; - - /* upload managers */ - struct u_upload_mgr *upload_vb; - struct u_upload_mgr *upload_ib; - bool any_user_vbs; - -}; - -/* Convenience cast wrapper. */ -static INLINE struct r600_context *r600_context(struct pipe_context *pipe) -{ - return (struct r600_context*)pipe; -} - -static INLINE struct r600_query* r600_query(struct pipe_query* q) -{ - return (struct r600_query*)q; -} - -struct r600_context_state *r600_context_state_incref(struct r600_context_state *rstate); -struct r600_context_state *r600_context_state_decref(struct r600_context_state *rstate); -void r600_flush(struct pipe_context *ctx, unsigned flags, - struct pipe_fence_handle **fence); - -int r600_context_hw_states(struct pipe_context *ctx); - -void r600_draw_vbo(struct pipe_context *ctx, - const struct pipe_draw_info *info); - -void r600_init_blit_functions(struct r600_context *rctx); -void r600_init_state_functions(struct r600_context *rctx); -void r600_init_query_functions(struct r600_context* rctx); -struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv); - -extern int r600_pipe_shader_create(struct pipe_context *ctx, - struct r600_context_state *rstate, - const struct tgsi_token *tokens); -extern int r600_pipe_shader_update(struct pipe_context *ctx, - struct r600_context_state *rstate); - -#define R600_ERR(fmt, args...) \ - fprintf(stderr, "EE %s/%s:%d - "fmt, __FILE__, __func__, __LINE__, ##args) - -uint32_t r600_translate_texformat(enum pipe_format format, - const unsigned char *swizzle_view, - uint32_t *word4_p, uint32_t *yuv_format_p); - -/* query */ -extern void r600_queries_resume(struct pipe_context *ctx); -extern void r600_queries_suspend(struct pipe_context *ctx); - -int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf); - -void r600_set_constant_buffer_file(struct pipe_context *ctx, - uint shader, uint index, - struct pipe_resource *buffer); -void r600_set_constant_buffer_mem(struct pipe_context *ctx, - uint shader, uint index, - struct pipe_resource *buffer); -void eg_set_constant_buffer(struct pipe_context *ctx, - uint shader, uint index, - struct pipe_resource *buffer); - -int r600_upload_index_buffer(struct r600_context *rctx, - struct r600_draw *draw); -int r600_upload_user_buffers(struct r600_context *rctx); - -#endif diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c deleted file mode 100644 index 5480ca002d2..00000000000 --- a/src/gallium/drivers/r600/r600_draw.c +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Corbin Simpson - */ -#include <stdio.h> -#include <errno.h> -#include <pipe/p_screen.h> -#include <util/u_format.h> -#include <util/u_math.h> -#include <util/u_inlines.h> -#include <util/u_memory.h> -#include "radeon.h" -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_resource.h" -#include "r600_state_inlines.h" - -static int r600_draw_common(struct r600_draw *draw) -{ - struct r600_context *rctx = r600_context(draw->ctx); - struct r600_screen *rscreen = rctx->screen; - /* FIXME vs_resource */ - struct radeon_state *vs_resource; - struct r600_resource *rbuffer; - unsigned i, j, offset, format, prim; - u32 vgt_dma_index_type, vgt_draw_initiator; - struct pipe_vertex_buffer *vertex_buffer; - int r; - - r = r600_context_hw_states(draw->ctx); - if (r) - return r; - switch (draw->index_size) { - case 2: - vgt_draw_initiator = S_0287F0_SOURCE_SELECT(V_0287F0_DI_SRC_SEL_DMA); - vgt_dma_index_type = 0; - break; - case 4: - vgt_draw_initiator = S_0287F0_SOURCE_SELECT(V_0287F0_DI_SRC_SEL_DMA); - vgt_dma_index_type = 1; - break; - case 0: - vgt_draw_initiator = S_0287F0_SOURCE_SELECT(V_0287F0_DI_SRC_SEL_AUTO_INDEX); - vgt_dma_index_type = 0; - break; - default: - fprintf(stderr, "%s %d unsupported index size %d\n", __func__, __LINE__, draw->index_size); - return -EINVAL; - } - r = r600_conv_pipe_prim(draw->mode, &prim); - if (r) - return r; - - /* rebuild vertex shader if input format changed */ - r = r600_pipe_shader_update(draw->ctx, rctx->vs_shader); - if (r) - return r; - r = r600_pipe_shader_update(draw->ctx, rctx->ps_shader); - if (r) - return r; - radeon_draw_bind(&rctx->draw, &rctx->vs_shader->rstate[0]); - radeon_draw_bind(&rctx->draw, &rctx->ps_shader->rstate[0]); - - for (i = 0 ; i < rctx->vs_nresource; i++) { - radeon_state_fini(&rctx->vs_resource[i]); - } - for (i = 0 ; i < rctx->vertex_elements->count; i++) { - vs_resource = &rctx->vs_resource[i]; - j = rctx->vertex_elements->elements[i].vertex_buffer_index; - vertex_buffer = &rctx->vertex_buffer[j]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = rctx->vertex_elements->elements[i].src_offset + vertex_buffer->buffer_offset; - format = r600_translate_colorformat(rctx->vertex_elements->elements[i].src_format); - - rctx->vtbl->vs_resource(rctx, i, rbuffer, offset, vertex_buffer->stride, format); - radeon_draw_bind(&rctx->draw, vs_resource); - } - rctx->vs_nresource = rctx->vertex_elements->count; - /* FIXME start need to change winsys */ - rctx->vtbl->vgt_init(draw, vgt_draw_initiator); - radeon_draw_bind(&rctx->draw, &draw->draw); - - rctx->vtbl->vgt_prim(draw, prim, vgt_dma_index_type); - radeon_draw_bind(&rctx->draw, &draw->vgt); - - r = radeon_ctx_set_draw(rctx->ctx, &rctx->draw); - if (r == -EBUSY) { - r600_flush(draw->ctx, 0, NULL); - r = radeon_ctx_set_draw(rctx->ctx, &rctx->draw); - } - - radeon_state_fini(&draw->draw); - - return r; -} - -void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_draw draw; - int r; - - assert(info->index_bias == 0); - - memset(&draw, 0, sizeof(draw)); - - if (rctx->any_user_vbs) { - r600_upload_user_buffers(rctx); - rctx->any_user_vbs = false; - } - - draw.ctx = ctx; - draw.mode = info->mode; - draw.start = info->start; - draw.count = info->count; - if (info->indexed && rctx->index_buffer.buffer) { - draw.min_index = info->min_index; - draw.max_index = info->max_index; - draw.index_size = rctx->index_buffer.index_size; - draw.index_buffer = rctx->index_buffer.buffer; - draw.index_buffer_offset = rctx->index_buffer.offset; - - assert(rctx->index_buffer.offset % - rctx->index_buffer.index_size == 0); - r600_upload_index_buffer(rctx, &draw); - } - else { - draw.index_size = 0; - draw.index_buffer = NULL; - draw.min_index = 0; - draw.max_index = 0xffffff; - draw.index_buffer_offset = 0; - } - r = r600_draw_common(&draw); - if (r) - fprintf(stderr,"draw common failed %d\n", r); -} diff --git a/src/gallium/drivers/r600/r600_formats.h b/src/gallium/drivers/r600/r600_formats.h new file mode 100644 index 00000000000..0c91a212384 --- /dev/null +++ b/src/gallium/drivers/r600/r600_formats.h @@ -0,0 +1,56 @@ +#ifndef R600_FORMATS_H +#define R600_FORMATS_H + +/* list of formats from R700 ISA document - apply across GPUs in different registers */ +#define FMT_INVALID 0x00000000 +#define FMT_8 0x00000001 +#define FMT_4_4 0x00000002 +#define FMT_3_3_2 0x00000003 +#define FMT_16 0x00000005 +#define FMT_16_FLOAT 0x00000006 +#define FMT_8_8 0x00000007 +#define FMT_5_6_5 0x00000008 +#define FMT_6_5_5 0x00000009 +#define FMT_1_5_5_5 0x0000000A +#define FMT_4_4_4_4 0x0000000B +#define FMT_5_5_5_1 0x0000000C +#define FMT_32 0x0000000D +#define FMT_32_FLOAT 0x0000000E +#define FMT_16_16 0x0000000F +#define FMT_16_16_FLOAT 0x00000010 +#define FMT_8_24 0x00000011 +#define FMT_8_24_FLOAT 0x00000012 +#define FMT_24_8 0x00000013 +#define FMT_24_8_FLOAT 0x00000014 +#define FMT_10_11_11 0x00000015 +#define FMT_10_11_11_FLOAT 0x00000016 +#define FMT_11_11_10 0x00000017 +#define FMT_11_11_10_FLOAT 0x00000018 +#define FMT_2_10_10_10 0x00000019 +#define FMT_8_8_8_8 0x0000001A +#define FMT_10_10_10_2 0x0000001B +#define FMT_X24_8_32_FLOAT 0x0000001C +#define FMT_32_32 0x0000001D +#define FMT_32_32_FLOAT 0x0000001E +#define FMT_16_16_16_16 0x0000001F +#define FMT_16_16_16_16_FLOAT 0x00000020 +#define FMT_32_32_32_32 0x00000022 +#define FMT_32_32_32_32_FLOAT 0x00000023 +#define FMT_1 0x00000025 +#define FMT_GB_GR 0x00000027 +#define FMT_BG_RG 0x00000028 +#define FMT_32_AS_8 0x00000029 +#define FMT_32_AS_8_8 0x0000002a +#define FMT_5_9_9_9_SHAREDEXP 0x0000002b +#define FMT_8_8_8 0x0000002c +#define FMT_16_16_16 0x0000002d +#define FMT_16_16_16_FLOAT 0x0000002e +#define FMT_32_32_32 0x0000002f +#define FMT_32_32_32_FLOAT 0x00000030 +#define FMT_BC1 0x00000031 +#define FMT_BC2 0x00000032 +#define FMT_BC3 0x00000033 +#define FMT_BC4 0x00000034 +#define FMT_BC5 0x00000035 + +#endif diff --git a/src/gallium/drivers/r600/r600_helper.c b/src/gallium/drivers/r600/r600_helper.c index 5e0e0aab570..7e131093060 100644 --- a/src/gallium/drivers/r600/r600_helper.c +++ b/src/gallium/drivers/r600/r600_helper.c @@ -26,8 +26,7 @@ #include <stdio.h> #include <errno.h> #include <util/u_inlines.h> -#include "r600_screen.h" -#include "r600_context.h" +#include "r600_pipe.h" #include "r600d.h" int r600_conv_pipe_prim(unsigned pprim, unsigned *prim) diff --git a/src/gallium/drivers/r600/r600_hw_states.c b/src/gallium/drivers/r600/r600_hw_states.c deleted file mode 100644 index bca78ee8deb..00000000000 --- a/src/gallium/drivers/r600/r600_hw_states.c +++ /dev/null @@ -1,1287 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <[email protected]> - * 2010 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Dave Airlie - */ - -#include <util/u_inlines.h> -#include <util/u_format.h> -#include <util/u_memory.h> -#include <util/u_blitter.h> -#include "util/u_pack_color.h" -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_resource.h" -#include "r600_state_inlines.h" -#include "r600d.h" - -static void r600_blend(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_blend_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - int i; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_BLEND, 0, 0); - rstate->states[R600_BLEND__CB_BLEND_RED] = fui(rctx->blend_color.color[0]); - rstate->states[R600_BLEND__CB_BLEND_GREEN] = fui(rctx->blend_color.color[1]); - rstate->states[R600_BLEND__CB_BLEND_BLUE] = fui(rctx->blend_color.color[2]); - rstate->states[R600_BLEND__CB_BLEND_ALPHA] = fui(rctx->blend_color.color[3]); - rstate->states[R600_BLEND__CB_BLEND0_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND1_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND2_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND3_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND4_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND5_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND6_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND7_CONTROL] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND_CONTROL] = 0x00000000; - - for (i = 0; i < 8; i++) { - unsigned eqRGB = state->rt[i].rgb_func; - unsigned srcRGB = state->rt[i].rgb_src_factor; - unsigned dstRGB = state->rt[i].rgb_dst_factor; - - unsigned eqA = state->rt[i].alpha_func; - unsigned srcA = state->rt[i].alpha_src_factor; - unsigned dstA = state->rt[i].alpha_dst_factor; - uint32_t bc = 0; - - if (!state->rt[i].blend_enable) - continue; - - bc |= S_028804_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB)); - bc |= S_028804_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB)); - bc |= S_028804_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB)); - - if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { - bc |= S_028804_SEPARATE_ALPHA_BLEND(1); - bc |= S_028804_ALPHA_COMB_FCN(r600_translate_blend_function(eqA)); - bc |= S_028804_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA)); - bc |= S_028804_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA)); - } - - rstate->states[R600_BLEND__CB_BLEND0_CONTROL + i] = bc; - if (i == 0) - rstate->states[R600_BLEND__CB_BLEND_CONTROL] = bc; - } - - radeon_state_pm4(rstate); -} - -static void r600_ucp(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_clip_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_UCP, 0, 0); - - for (int i = 0; i < state->nr; i++) { - rstate->states[i * 4 + 0] = fui(state->ucp[i][0]); - rstate->states[i * 4 + 1] = fui(state->ucp[i][1]); - rstate->states[i * 4 + 2] = fui(state->ucp[i][2]); - rstate->states[i * 4 + 3] = fui(state->ucp[i][3]); - } - radeon_state_pm4(rstate); -} - -static void r600_cb(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_framebuffer_state *state, int cb) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - unsigned level = state->cbufs[cb]->level; - unsigned pitch, slice; - unsigned color_info; - unsigned format, swap, ntype; - const struct util_format_description *desc; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_CB0 + cb, 0, 0); - rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; - rbuffer = &rtex->resource; - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->nbo = 1; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1; - - ntype = 0; - desc = util_format_description(rtex->resource.base.b.format); - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) - ntype = V_0280A0_NUMBER_SRGB; - - format = r600_translate_colorformat(rtex->resource.base.b.format); - swap = r600_translate_colorswap(rtex->resource.base.b.format); - - color_info = S_0280A0_FORMAT(format) | - S_0280A0_COMP_SWAP(swap) | - S_0280A0_BLEND_CLAMP(1) | - S_0280A0_SOURCE_FORMAT(1) | - S_0280A0_NUMBER_TYPE(ntype); - - rstate->states[R600_CB0__CB_COLOR0_BASE] = state->cbufs[cb]->offset >> 8; - rstate->states[R600_CB0__CB_COLOR0_INFO] = color_info; - rstate->states[R600_CB0__CB_COLOR0_SIZE] = S_028060_PITCH_TILE_MAX(pitch) | - S_028060_SLICE_TILE_MAX(slice); - rstate->states[R600_CB0__CB_COLOR0_VIEW] = 0x00000000; - rstate->states[R600_CB0__CB_COLOR0_FRAG] = 0x00000000; - rstate->states[R600_CB0__CB_COLOR0_TILE] = 0x00000000; - rstate->states[R600_CB0__CB_COLOR0_MASK] = 0x00000000; - radeon_state_pm4(rstate); -} - -static void r600_db(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_framebuffer_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - unsigned level; - unsigned pitch, slice, format; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_DB, 0, 0); - if (state->zsbuf == NULL) - return; - - rtex = (struct r600_resource_texture*)state->zsbuf->texture; - rtex->tilled = 1; - rtex->array_mode = 2; - rtex->tile_type = 1; - rtex->depth = 1; - rbuffer = &rtex->resource; - - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM; - level = state->zsbuf->level; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; - format = r600_translate_dbformat(state->zsbuf->texture->format); - rstate->states[R600_DB__DB_DEPTH_BASE] = state->zsbuf->offset >> 8; - rstate->states[R600_DB__DB_DEPTH_INFO] = S_028010_ARRAY_MODE(rtex->array_mode) | - S_028010_FORMAT(format); - rstate->states[R600_DB__DB_DEPTH_VIEW] = 0x00000000; - rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (state->zsbuf->height / 8) -1; - rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) | - S_028000_SLICE_TILE_MAX(slice); - radeon_state_pm4(rstate); -} - -static void r600_rasterizer(struct r600_context *rctx, struct radeon_state *rstate) -{ - const struct pipe_rasterizer_state *state = &rctx->rasterizer->state.rasterizer; - const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; - const struct pipe_clip_state *clip = NULL; - struct r600_screen *rscreen = rctx->screen; - float offset_units = 0, offset_scale = 0; - char depth = 0; - unsigned offset_db_fmt_cntl = 0; - unsigned tmp; - unsigned prov_vtx = 1; - - if (rctx->clip) - clip = &rctx->clip->state.clip; - if (fb->zsbuf) { - offset_units = state->offset_units; - offset_scale = state->offset_scale * 12.0f; - switch (fb->zsbuf->texture->format) { - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - depth = -24; - offset_units *= 2.0f; - break; - case PIPE_FORMAT_Z32_FLOAT: - depth = -23; - offset_units *= 1.0f; - offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1); - break; - case PIPE_FORMAT_Z16_UNORM: - depth = -16; - offset_units *= 4.0f; - break; - default: - R600_ERR("unsupported %d\n", fb->zsbuf->texture->format); - return; - } - } - offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); - - if (state->flatshade_first) - prov_vtx = 0; - - rctx->flat_shade = state->flatshade; - radeon_state_init(rstate, rscreen->rw, R600_STATE_RASTERIZER, 0, 0); - rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001; - if (state->sprite_coord_enable) { - rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] |= - S_0286D4_PNT_SPRITE_ENA(1) | - S_0286D4_PNT_SPRITE_OVRD_X(2) | - S_0286D4_PNT_SPRITE_OVRD_Y(3) | - S_0286D4_PNT_SPRITE_OVRD_Z(0) | - S_0286D4_PNT_SPRITE_OVRD_W(1); - if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { - rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] |= - S_0286D4_PNT_SPRITE_TOP_1(1); - } - } - rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0; - if (clip) { - rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = S_028810_PS_UCP_MODE(3) | ((1 << clip->nr) - 1); - rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] |= S_028810_ZCLIP_NEAR_DISABLE(clip->depth_clamp); - rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] |= S_028810_ZCLIP_FAR_DISABLE(clip->depth_clamp); - } - rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = - S_028814_PROVOKING_VTX_LAST(prov_vtx) | - S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | - S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | - S_028814_FACE(!state->front_ccw) | - S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri); - rstate->states[R600_RASTERIZER__PA_CL_VS_OUT_CNTL] = - S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | - S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex); - rstate->states[R600_RASTERIZER__PA_CL_NANINF_CNTL] = 0x00000000; - /* point size 12.4 fixed point */ - tmp = (unsigned)(state->point_size * 8.0); - rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp); - rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = 0x80000000; - rstate->states[R600_RASTERIZER__PA_SU_LINE_CNTL] = 0x00000008; - rstate->states[R600_RASTERIZER__PA_SC_LINE_STIPPLE] = 0x00000005; - rstate->states[R600_RASTERIZER__PA_SC_MPASS_PS_CNTL] = 0x00000000; - rstate->states[R600_RASTERIZER__PA_SC_LINE_CNTL] = 0x00000400; - rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL] = offset_db_fmt_cntl; - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_CLAMP] = 0x00000000; - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE] = fui(offset_scale); - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET] = fui(offset_units); - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE] = fui(offset_scale); - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET] = fui(offset_units); - radeon_state_pm4(rstate); -} - -static void r600_scissor(struct r600_context *rctx, struct radeon_state *rstate) -{ - const struct pipe_scissor_state *state = &rctx->scissor->state.scissor; - const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; - struct r600_screen *rscreen = rctx->screen; - enum radeon_family family; - unsigned minx, maxx, miny, maxy; - u32 tl, br; - - family = radeon_get_family(rctx->rw); - - if (state == NULL) { - minx = 0; - miny = 0; - maxx = fb->cbufs[0]->width; - maxy = fb->cbufs[0]->height; - } else { - minx = state->minx; - miny = state->miny; - maxx = state->maxx; - maxy = state->maxy; - } - tl = S_028240_TL_X(minx) | S_028240_TL_Y(miny) | S_028240_WINDOW_OFFSET_DISABLE(1); - br = S_028244_BR_X(maxx) | S_028244_BR_Y(maxy); - radeon_state_init(rstate, rscreen->rw, R600_STATE_SCISSOR, 0, 0); - rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_WINDOW_OFFSET] = 0x00000000; - rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_RULE] = 0x0000FFFF; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_BR] = br; - - if (family >= CHIP_RV770) - rstate->states[R600_SCISSOR__PA_SC_EDGERULE] = 0xAAAAAAAA; - - rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = br; - radeon_state_pm4(rstate); -} - -static void r600_viewport(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_viewport_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_VIEWPORT, 0, 0); - rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMIN_0] = 0x00000000; - rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; - rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(state->scale[0]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui(state->scale[1]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = fui(state->scale[2]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui(state->translate[0]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(state->translate[1]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(state->translate[2]); - rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; - radeon_state_pm4(rstate); -} - -static void r600_dsa(struct r600_context *rctx, struct radeon_state *rstate) -{ - const struct pipe_depth_stencil_alpha_state *state = &rctx->dsa->state.dsa; - const struct pipe_stencil_ref *stencil_ref = &rctx->stencil_ref->state.stencil_ref; - struct r600_screen *rscreen = rctx->screen; - unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control; - unsigned stencil_ref_mask, stencil_ref_mask_bf, db_render_override, db_render_control; - struct r600_shader *rshader; - struct r600_query *rquery = NULL; - boolean query_running; - int i; - - if (rctx->ps_shader == NULL) { - return; - } - radeon_state_init(rstate, rscreen->rw, R600_STATE_DSA, 0, 0); - - db_shader_control = 0; - db_shader_control |= S_02880C_DUAL_EXPORT_ENABLE(1); - db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); - - rshader = &rctx->ps_shader->shader; - if (rshader->uses_kill) - db_shader_control |= S_02880C_KILL_ENABLE(1); - for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) - db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1); - } - stencil_ref_mask = 0; - stencil_ref_mask_bf = 0; - db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | - S_028800_Z_WRITE_ENABLE(state->depth.writemask) | - S_028800_ZFUNC(state->depth.func); - /* set stencil enable */ - - if (state->stencil[0].enabled) { - db_depth_control |= S_028800_STENCIL_ENABLE(1); - db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)); - db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); - db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); - db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); - - stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) | - S_028430_STENCILWRITEMASK(state->stencil[0].writemask); - stencil_ref_mask |= S_028430_STENCILREF(stencil_ref->ref_value[0]); - if (state->stencil[1].enabled) { - db_depth_control |= S_028800_BACKFACE_ENABLE(1); - db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)); - db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); - db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); - db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); - stencil_ref_mask_bf = S_028434_STENCILMASK_BF(state->stencil[1].valuemask) | - S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask); - stencil_ref_mask_bf |= S_028430_STENCILREF(stencil_ref->ref_value[1]); - } - } - - alpha_test_control = 0; - alpha_ref = 0; - if (state->alpha.enabled) { - alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func); - alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); - alpha_ref = fui(state->alpha.ref_value); - } - - db_render_control = S_028D0C_STENCIL_COMPRESS_DISABLE(1) | - S_028D0C_DEPTH_COMPRESS_DISABLE(1); - db_render_override = S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) | - S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | - S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); - - query_running = false; - - LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { - if (rquery->state & R600_QUERY_STATE_STARTED) { - query_running = true; - } - } - - if (query_running) { - db_render_override |= S_028D10_NOOP_CULL_DISABLE(1); - if (rscreen->chip_class == R700) - db_render_control |= S_028D0C_R700_PERFECT_ZPASS_COUNTS(1); - } - - rstate->states[R600_DSA__DB_STENCIL_CLEAR] = 0x00000000; - rstate->states[R600_DSA__DB_DEPTH_CLEAR] = 0x3F800000; - rstate->states[R600_DSA__SX_ALPHA_TEST_CONTROL] = alpha_test_control; - rstate->states[R600_DSA__DB_STENCILREFMASK] = stencil_ref_mask; - rstate->states[R600_DSA__DB_STENCILREFMASK_BF] = stencil_ref_mask_bf; - rstate->states[R600_DSA__SX_ALPHA_REF] = alpha_ref; - rstate->states[R600_DSA__SPI_FOG_FUNC_SCALE] = 0x00000000; - rstate->states[R600_DSA__SPI_FOG_FUNC_BIAS] = 0x00000000; - rstate->states[R600_DSA__SPI_FOG_CNTL] = 0x00000000; - rstate->states[R600_DSA__DB_DEPTH_CONTROL] = db_depth_control; - rstate->states[R600_DSA__DB_SHADER_CONTROL] = db_shader_control; - rstate->states[R600_DSA__DB_RENDER_CONTROL] = db_render_control; - rstate->states[R600_DSA__DB_RENDER_OVERRIDE] = db_render_override; - - rstate->states[R600_DSA__DB_SRESULTS_COMPARE_STATE1] = 0x00000000; - rstate->states[R600_DSA__DB_PRELOAD_CONTROL] = 0x00000000; - rstate->states[R600_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00; - radeon_state_pm4(rstate); -} - - -static INLINE u32 S_FIXED(float value, u32 frac_bits) -{ - return value * (1 << frac_bits); -} - -static void r600_sampler_border(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_sampler_state *state, unsigned id) -{ - struct r600_screen *rscreen = rctx->screen; - union util_color uc; - - util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - - radeon_state_init(rstate, rscreen->rw, R600_STATE_SAMPLER_BORDER, id, R600_SHADER_PS); - if (uc.ui) { - rstate->states[R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED] = fui(state->border_color[0]); - rstate->states[R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN] = fui(state->border_color[1]); - rstate->states[R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE] = fui(state->border_color[2]); - rstate->states[R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA] = fui(state->border_color[3]); - } - radeon_state_pm4(rstate); -} - -static void r600_sampler(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_sampler_state *state, unsigned id) -{ - struct r600_screen *rscreen = rctx->screen; - union util_color uc; - - util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - - radeon_state_init(rstate, rscreen->rw, R600_STATE_SAMPLER, id, R600_SHADER_PS); - rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0] = - S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | - S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | - S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) | - S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter)) | - S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter)) | - S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | - S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | - S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0); - /* FIXME LOD it depends on texture base level ... */ - rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0] = - S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | - S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | - S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)); - rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] = S_03C008_TYPE(1); - radeon_state_pm4(rstate); - -} - - -static void r600_resource(struct pipe_context *ctx, struct radeon_state *rstate, - const struct pipe_sampler_view *view, unsigned id) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_screen *rscreen = rctx->screen; - const struct util_format_description *desc; - struct r600_resource_texture *tmp; - struct r600_resource *rbuffer; - unsigned format; - uint32_t word4 = 0, yuv_format = 0, pitch = 0; - unsigned char swizzle[4], array_mode = 0, tile_type = 0; - int r; - - rstate->cpm4 = 0; - swizzle[0] = view->swizzle_r; - swizzle[1] = view->swizzle_g; - swizzle[2] = view->swizzle_b; - swizzle[3] = view->swizzle_a; - format = r600_translate_texformat(view->texture->format, - swizzle, - &word4, &yuv_format); - if (format == ~0) { - return; - } - desc = util_format_description(view->texture->format); - if (desc == NULL) { - R600_ERR("unknow format %d\n", view->texture->format); - return; - } - radeon_state_init(rstate, rscreen->rw, R600_STATE_RESOURCE, id, R600_SHADER_PS); - tmp = (struct r600_resource_texture*)view->texture; - rbuffer = &tmp->resource; - if (tmp->depth) { - r = r600_texture_from_depth(ctx, tmp, view->first_level); - if (r) { - return; - } - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], tmp->uncompressed); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], tmp->uncompressed); - } else { - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], rbuffer->bo); - } - rstate->nbo = 2; - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[1] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[3] = RADEON_GEM_DOMAIN_GTT; - - pitch = (tmp->pitch[0] / tmp->bpt); - pitch = (pitch + 0x7) & ~0x7; - - /* FIXME properly handle first level != 0 */ - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = - S_038000_DIM(r600_tex_dim(view->texture->target)) | - S_038000_TILE_MODE(array_mode) | - S_038000_TILE_TYPE(tile_type) | - S_038000_PITCH((pitch / 8) - 1) | - S_038000_TEX_WIDTH(view->texture->width0 - 1); - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = - S_038004_TEX_HEIGHT(view->texture->height0 - 1) | - S_038004_TEX_DEPTH(view->texture->depth0 - 1) | - S_038004_DATA_FORMAT(format); - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = tmp->offset[0] >> 8; - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = tmp->offset[1] >> 8; - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = - word4 | - S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) | - S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) | - S_038010_REQUEST_SIZE(1) | - S_038010_BASE_LEVEL(view->first_level); - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = - S_038014_LAST_LEVEL(view->last_level) | - S_038014_BASE_ARRAY(0) | - S_038014_LAST_ARRAY(0); - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = - S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE); - radeon_state_pm4(rstate); -} - -static void r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) -{ - struct r600_screen *rscreen = rctx->screen; - const struct pipe_blend_state *pbs = &rctx->blend->state.blend; - int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs; - uint32_t color_control, target_mask, shader_mask; - int i; - - target_mask = 0; - shader_mask = 0; - color_control = S_028808_PER_MRT_BLEND(1); - - for (i = 0; i < nr_cbufs; i++) { - shader_mask |= 0xf << (i * 4); - } - - if (pbs->logicop_enable) { - color_control |= (pbs->logicop_func << 16) | (pbs->logicop_func << 20); - } else { - color_control |= (0xcc << 16); - } - - if (pbs->independent_blend_enable) { - for (i = 0; i < nr_cbufs; i++) { - if (pbs->rt[i].blend_enable) { - color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); - } - target_mask |= (pbs->rt[i].colormask << (4 * i)); - } - } else { - for (i = 0; i < nr_cbufs; i++) { - if (pbs->rt[0].blend_enable) { - color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); - } - target_mask |= (pbs->rt[0].colormask << (4 * i)); - } - } - radeon_state_init(rstate, rscreen->rw, R600_STATE_CB_CNTL, 0, 0); - rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = shader_mask; - rstate->states[R600_CB_CNTL__CB_TARGET_MASK] = target_mask; - rstate->states[R600_CB_CNTL__CB_COLOR_CONTROL] = color_control; - rstate->states[R600_CB_CNTL__PA_SC_AA_CONFIG] = 0x00000000; - rstate->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX] = 0x00000000; - rstate->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX] = 0x00000000; - rstate->states[R600_CB_CNTL__CB_CLRCMP_CONTROL] = 0x01000000; - rstate->states[R600_CB_CNTL__CB_CLRCMP_SRC] = 0x00000000; - rstate->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF; - rstate->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF; - rstate->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; - radeon_state_pm4(rstate); -} - -static void r600_init_config(struct r600_context *rctx) -{ - int ps_prio; - int vs_prio; - int gs_prio; - int es_prio; - int num_ps_gprs; - int num_vs_gprs; - int num_gs_gprs; - int num_es_gprs; - int num_temp_gprs; - int num_ps_threads; - int num_vs_threads; - int num_gs_threads; - int num_es_threads; - int num_ps_stack_entries; - int num_vs_stack_entries; - int num_gs_stack_entries; - int num_es_stack_entries; - enum radeon_family family; - - family = radeon_get_family(rctx->rw); - ps_prio = 0; - vs_prio = 1; - gs_prio = 2; - es_prio = 3; - switch (family) { - case CHIP_R600: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV630: - case CHIP_RV635: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 144; - num_vs_threads = 40; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV610: - case CHIP_RV620: - case CHIP_RS780: - case CHIP_RS880: - default: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV670: - num_ps_gprs = 144; - num_vs_gprs = 40; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV770: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 188; - num_vs_threads = 60; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 256; - num_vs_stack_entries = 256; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV730: - case CHIP_RV740: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 188; - num_vs_threads = 60; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV710: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 144; - num_vs_threads = 48; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - } - radeon_state_init(&rctx->config, rctx->rw, R600_STATE_CONFIG, 0, 0); - - rctx->config.states[R600_CONFIG__SQ_CONFIG] = 0x00000000; - switch (family) { - case CHIP_RV610: - case CHIP_RV620: - case CHIP_RS780: - case CHIP_RS880: - case CHIP_RV710: - break; - default: - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1); - break; - } - - if (!rctx->screen->use_mem_constant) - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1); - - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1); - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio); - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio); - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio); - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio); - - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0; - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs); - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs); - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0; - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs); - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs); - - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0; - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads); - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads); - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads); - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads); - - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0; - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); - - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0; - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); - - rctx->config.states[R600_CONFIG__VC_ENHANCE] = 0x00000000; - rctx->config.states[R600_CONFIG__SX_MISC] = 0x00000000; - - if (family >= CHIP_RV770) { - rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000; - rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002; - rctx->config.states[R600_CONFIG__DB_DEBUG] = 0x00000000; - rctx->config.states[R600_CONFIG__DB_WATERMARKS] = 0x00420204; - rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000000; - rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000; - } else { - rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00000000; - rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000003; - rctx->config.states[R600_CONFIG__DB_DEBUG] = 0x82000000; - rctx->config.states[R600_CONFIG__DB_WATERMARKS] = 0x01020204; - rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001; - rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00004010; - } - rctx->config.states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003; - rctx->config.states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GS_MODE] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001; - rctx->config.states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000; - radeon_state_pm4(&rctx->config); -} - -static int r600_vs_resource(struct r600_context *rctx, int id, struct r600_resource *rbuffer, uint32_t offset, - uint32_t stride, uint32_t format) -{ - struct radeon_state *vs_resource = &rctx->vs_resource[id]; - struct r600_screen *rscreen = rctx->screen; - - radeon_state_init(vs_resource, rscreen->rw, R600_STATE_RESOURCE, id, R600_SHADER_VS); - radeon_ws_bo_reference(rscreen->rw, &vs_resource->bo[0], rbuffer->bo); - vs_resource->nbo = 1; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = offset; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->size - offset - 1; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = S_038008_STRIDE(stride) | - S_038008_DATA_FORMAT(format); - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = 0x00000000; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = 0x00000000; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = 0x00000000; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = 0xC0000000; - vs_resource->placement[0] = RADEON_GEM_DOMAIN_GTT; - vs_resource->placement[1] = RADEON_GEM_DOMAIN_GTT; - return radeon_state_pm4(vs_resource); -} - -static int r600_draw_vgt_init(struct r600_draw *draw, - int vgt_draw_initiator) -{ - struct r600_context *rctx = r600_context(draw->ctx); - struct r600_screen *rscreen = rctx->screen; - struct r600_resource *rbuffer = (struct r600_resource *)draw->index_buffer; - radeon_state_init(&draw->draw, rscreen->rw, R600_STATE_DRAW, 0, 0); - draw->draw.states[R600_DRAW__VGT_NUM_INDICES] = draw->count; - draw->draw.states[R600_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator; - draw->draw.states[R600_DRAW__VGT_DMA_BASE] = draw->index_buffer_offset; - if (rbuffer) { - radeon_ws_bo_reference(rscreen->rw, &draw->draw.bo[0], rbuffer->bo); - draw->draw.placement[0] = RADEON_GEM_DOMAIN_GTT; - draw->draw.placement[1] = RADEON_GEM_DOMAIN_GTT; - draw->draw.nbo = 1; - } - return radeon_state_pm4(&draw->draw); -} - -static int r600_draw_vgt_prim(struct r600_draw *draw, - uint32_t prim, uint32_t vgt_dma_index_type) -{ - struct r600_context *rctx = r600_context(draw->ctx); - struct r600_screen *rscreen = rctx->screen; - radeon_state_init(&draw->vgt, rscreen->rw, R600_STATE_VGT, 0, 0); - draw->vgt.states[R600_VGT__VGT_PRIMITIVE_TYPE] = prim; - draw->vgt.states[R600_VGT__VGT_MAX_VTX_INDX] = draw->max_index; - draw->vgt.states[R600_VGT__VGT_MIN_VTX_INDX] = draw->min_index; - draw->vgt.states[R600_VGT__VGT_INDX_OFFSET] = draw->start; - draw->vgt.states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX] = 0x00000000; - draw->vgt.states[R600_VGT__VGT_DMA_INDEX_TYPE] = vgt_dma_index_type; - draw->vgt.states[R600_VGT__VGT_PRIMITIVEID_EN] = 0x00000000; - draw->vgt.states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001; - draw->vgt.states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN] = 0x00000000; - draw->vgt.states[R600_VGT__VGT_INSTANCE_STEP_RATE_0] = 0x00000000; - draw->vgt.states[R600_VGT__VGT_INSTANCE_STEP_RATE_1] = 0x00000000; - return radeon_state_pm4(&draw->vgt); -} - -static int r600_ps_shader(struct r600_context *rctx, struct r600_context_state *rpshader, - struct radeon_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - const struct pipe_rasterizer_state *rasterizer; - struct r600_shader *rshader = &rpshader->shader; - unsigned i, tmp, exports_ps, num_cout; - boolean have_pos = FALSE; - - rasterizer = &rctx->rasterizer->state.rasterizer; - - radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_PS); - for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(i); - tmp |= S_028644_SEL_CENTROID(1); - if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) - have_pos = TRUE; - if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || - rshader->input[i].name == TGSI_SEMANTIC_POSITION) { - tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); - } - if (rasterizer->sprite_coord_enable & (1 << i)) { - tmp |= S_028644_PT_SPRITE_TEX(1); - } - state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp; - } - - exports_ps = 0; - num_cout = 0; - for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) - exports_ps |= 1; - else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { - exports_ps |= (1 << (num_cout+1)); - num_cout++; - } - } - if (!exports_ps) { - /* always at least export 1 component per pixel */ - exports_ps = 2; - } - state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) | - S_0286CC_PERSP_GRADIENT_ENA(1); - if (have_pos) { - state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] |= S_0286CC_POSITION_ENA(1) | - S_0286CC_BARYC_SAMPLE_CNTL(1); - state->states[R600_PS_SHADER__SPI_INPUT_Z] |= 1; - } - state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; - state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr) | - S_028868_STACK_SIZE(rshader->bc.nstack); - state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps; - radeon_ws_bo_reference(rscreen->rw, &state->bo[0], rpshader->bo); - state->nbo = 1; - state->placement[0] = RADEON_GEM_DOMAIN_GTT; - return radeon_state_pm4(state); -} - -static int r600_vs_shader(struct r600_context *rctx, struct r600_context_state *rpshader, - struct radeon_state *state) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_shader *rshader = &rpshader->shader; - unsigned i, tmp; - - radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_VS); - for (i = 0; i < 10; i++) { - state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0; - } - /* so far never got proper semantic id from tgsi */ - for (i = 0; i < 32; i++) { - tmp = i << ((i & 3) * 8); - state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp; - } - state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); - state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr) | - S_028868_STACK_SIZE(rshader->bc.nstack); - radeon_ws_bo_reference(rscreen->rw, &state->bo[0], rpshader->bo); - radeon_ws_bo_reference(rscreen->rw, &state->bo[1], rpshader->bo); - state->nbo = 2; - state->placement[0] = RADEON_GEM_DOMAIN_GTT; - state->placement[2] = RADEON_GEM_DOMAIN_GTT; - return radeon_state_pm4(state); -} - -static void r600_texture_state_scissor(struct r600_screen *rscreen, - struct r600_resource_texture *rtexture, - unsigned level) -{ - struct radeon_state *rstate = &rtexture->scissor[level]; - enum radeon_family family; - - family = radeon_get_family(rscreen->rw); - - radeon_state_init(rstate, rscreen->rw, R600_STATE_SCISSOR, 0, 0); - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_TL] = 0x80000000; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_TL] = 0x80000000; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_TL] = 0x80000000; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_TL] = 0x80000000; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_RULE] = 0x0000FFFF; - - if (family >= CHIP_RV770) - rstate->states[R600_SCISSOR__PA_SC_EDGERULE] = 0xAAAAAAAA; - - rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_TL] = 0x80000000; - rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = 0x80000000; - rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = 0x80000000; - rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); - rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_TL] = 0x80000000; - - radeon_state_pm4(rstate); -} - -static void r600_texture_state_cb(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, unsigned cb, unsigned level) -{ - struct radeon_state *rstate; - struct r600_resource *rbuffer; - unsigned pitch, slice; - unsigned color_info; - unsigned format, swap, ntype; - const struct util_format_description *desc; - - rstate = &rtexture->cb[cb][level]; - radeon_state_init(rstate, rscreen->rw, R600_STATE_CB0 + cb, 0, 0); - rbuffer = &rtexture->resource; - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - pitch = (rtexture->pitch[level] / rtexture->bpt) / 8 - 1; - slice = (rtexture->pitch[level] / rtexture->bpt) * rtexture->height[level] / 64 - 1; - ntype = 0; - desc = util_format_description(rbuffer->base.b.format); - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) - ntype = V_0280A0_NUMBER_SRGB; - format = r600_translate_colorformat(rtexture->resource.base.b.format); - swap = r600_translate_colorswap(rtexture->resource.base.b.format); - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rtexture->uncompressed); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], rtexture->uncompressed); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[2], rtexture->uncompressed); - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[4] = RADEON_GEM_DOMAIN_GTT; - rstate->nbo = 3; - color_info = 0; - } else { - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], rbuffer->bo); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[2], rbuffer->bo); - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[4] = RADEON_GEM_DOMAIN_GTT; - rstate->nbo = 3; - color_info = S_0280A0_SOURCE_FORMAT(1); - } - color_info |= S_0280A0_FORMAT(format) | - S_0280A0_COMP_SWAP(swap) | - S_0280A0_BLEND_CLAMP(1) | - S_0280A0_NUMBER_TYPE(ntype); - rstate->states[R600_CB0__CB_COLOR0_BASE] = rtexture->offset[level] >> 8; - rstate->states[R600_CB0__CB_COLOR0_INFO] = color_info; - rstate->states[R600_CB0__CB_COLOR0_SIZE] = S_028060_PITCH_TILE_MAX(pitch) | - S_028060_SLICE_TILE_MAX(slice); - - radeon_state_pm4(rstate); -} - -static void r600_texture_state_db(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, unsigned level) -{ - struct radeon_state *rstate = &rtexture->db[level]; - struct r600_resource *rbuffer; - unsigned pitch, slice, format; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_DB, 0, 0); - rbuffer = &rtexture->resource; - rtexture->tilled = 1; - rtexture->array_mode = 2; - rtexture->tile_type = 1; - rtexture->depth = 1; - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - pitch = (rtexture->pitch[level] / rtexture->bpt) / 8 - 1; - slice = (rtexture->pitch[level] / rtexture->bpt) * rtexture->height[level] / 64 - 1; - format = r600_translate_dbformat(rbuffer->base.b.format); - rstate->states[R600_DB__DB_DEPTH_BASE] = rtexture->offset[level] >> 8; - rstate->states[R600_DB__DB_DEPTH_INFO] = S_028010_ARRAY_MODE(rtexture->array_mode) | - S_028010_FORMAT(format); - rstate->states[R600_DB__DB_DEPTH_VIEW] = 0x00000000; - rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (rtexture->height[level] / 8) -1; - rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) | - S_028000_SLICE_TILE_MAX(slice); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->nbo = 1; - - radeon_state_pm4(rstate); -} - -static void r600_texture_state_viewport(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, unsigned level) -{ - struct radeon_state *rstate = &rtexture->viewport[level]; - - radeon_state_init(rstate, rscreen->rw, R600_STATE_VIEWPORT, 0, 0); - - /* set states (most default value are 0 and struct already - * initialized to 0, thus avoid resetting them) - */ - rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui((float)rtexture->width[level]/2.0); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui((float)rtexture->width[level]/2.0); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui((float)rtexture->height[level]/2.0); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui((float)-rtexture->height[level]/2.0); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = 0x3F000000; - rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = 0x3F000000; - rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; - rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; - - radeon_state_pm4(rstate); -} - -struct r600_context_hw_state_vtbl r600_hw_state_vtbl = { - .blend = r600_blend, - .ucp = r600_ucp, - .cb = r600_cb, - .db = r600_db, - .rasterizer = r600_rasterizer, - .scissor = r600_scissor, - .viewport = r600_viewport, - .dsa = r600_dsa, - .sampler_border = r600_sampler_border, - .sampler = r600_sampler, - .resource = r600_resource, - .cb_cntl = r600_cb_cntl, - .vs_resource = r600_vs_resource, - .vgt_init = r600_draw_vgt_init, - .vgt_prim = r600_draw_vgt_prim, - .vs_shader = r600_vs_shader, - .ps_shader = r600_ps_shader, - .init_config = r600_init_config, - .texture_state_viewport = r600_texture_state_viewport, - .texture_state_db = r600_texture_state_db, - .texture_state_cb = r600_texture_state_cb, - .texture_state_scissor = r600_texture_state_scissor, -}; - -void r600_set_constant_buffer_file(struct pipe_context *ctx, - uint shader, uint index, - struct pipe_resource *buffer) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - unsigned nconstant = 0, i, type, shader_class; - struct radeon_state *rstate, *rstates; - struct pipe_transfer *transfer; - u32 *ptr; - - type = R600_STATE_CONSTANT; - - switch (shader) { - case PIPE_SHADER_VERTEX: - shader_class = R600_SHADER_VS; - rstates = rctx->vs_constant; - break; - case PIPE_SHADER_FRAGMENT: - shader_class = R600_SHADER_PS; - rstates = rctx->ps_constant; - break; - default: - R600_ERR("unsupported %d\n", shader); - return; - } - if (buffer && buffer->width0 > 0) { - nconstant = buffer->width0 / 16; - ptr = pipe_buffer_map(ctx, buffer, PIPE_TRANSFER_READ, &transfer); - if (ptr == NULL) - return; - for (i = 0; i < nconstant; i++) { - rstate = &rstates[i]; - radeon_state_init(rstate, rscreen->rw, type, i, shader_class); - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0] = ptr[i * 4 + 0]; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0] = ptr[i * 4 + 1]; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0] = ptr[i * 4 + 2]; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0] = ptr[i * 4 + 3]; - if (radeon_state_pm4(rstate)) - return; - radeon_draw_bind(&rctx->draw, rstate); - } - pipe_buffer_unmap(ctx, buffer, transfer); - } -} - -void r600_set_constant_buffer_mem(struct pipe_context *ctx, - uint shader, uint index, - struct pipe_resource *buffer) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - unsigned nconstant = 0, type, shader_class, size; - struct radeon_state *rstate, *rstates; - struct r600_resource *rbuffer = (struct r600_resource*)buffer; - - type = R600_STATE_CBUF; - - switch (shader) { - case PIPE_SHADER_VERTEX: - shader_class = R600_SHADER_VS; - rstates = rctx->vs_constant; - break; - case PIPE_SHADER_FRAGMENT: - shader_class = R600_SHADER_PS; - rstates = rctx->ps_constant; - break; - default: - R600_ERR("unsupported %d\n", shader); - return; - } - - rstate = &rstates[0]; - -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) - - nconstant = buffer->width0 / 16; - size = ALIGN_DIVUP(nconstant, 16); - - radeon_state_init(rstate, rscreen->rw, type, 0, shader_class); - rstate->states[R600_VS_CBUF__ALU_CONST_BUFFER_SIZE_VS_0] = size; - rstate->states[R600_VS_CBUF__ALU_CONST_CACHE_VS_0] = 0; - - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM; - if (radeon_state_pm4(rstate)) - return; - radeon_draw_bind(&rctx->draw, rstate); -} - diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h index 0cf9c1c401c..4f9b39a7fdc 100644 --- a/src/gallium/drivers/r600/r600_opcodes.h +++ b/src/gallium/drivers/r600/r600_opcodes.h @@ -233,12 +233,6 @@ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL 0x00000012 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE 0x00000013 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR 0x00000014 -/* same up to here */ -/* -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA 0x00000015 -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR 0x00000016 -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT 0x00000018 -*/ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT 0x00000015 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT 0x00000016 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT 0x00000017 @@ -336,9 +330,11 @@ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED_64 0x00000098 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_64 0x00000099 /* TODO Fill in more ALU */ +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR 0x000000B1 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 0x000000BE #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE 0x000000BF #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE 0x000000C0 +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT 0x000000CC #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY 0x000000D6 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW 0x000000D7 diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_pipe.c index 1711fabfc7a..dd8fa4fcd77 100644 --- a/src/gallium/drivers/r600/r600_screen.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -19,37 +19,221 @@ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Corbin Simpson */ #include <stdio.h> -#include "util/u_inlines.h" -#include "util/u_format.h" -#include "util/u_memory.h" -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_public.h" +#include <errno.h> +#include <pipe/p_defines.h> +#include <pipe/p_state.h> +#include <pipe/p_context.h> +#include <tgsi/tgsi_scan.h> +#include <tgsi/tgsi_parse.h> +#include <tgsi/tgsi_util.h> +#include <util/u_blitter.h> +#include <util/u_double_list.h> +#include <util/u_transfer.h> +#include <util/u_surface.h> +#include <util/u_pack_color.h> +#include <util/u_memory.h> +#include <util/u_inlines.h> +#include <util/u_upload_mgr.h> +#include <pipebuffer/pb_buffer.h> +#include "r600.h" +#include "r600d.h" #include "r600_resource.h" +#include "r600_shader.h" +#include "r600_pipe.h" #include "r600_state_inlines.h" +/* + * pipe_context + */ +static void r600_flush(struct pipe_context *ctx, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; +#if 0 + static int dc = 0; + char dname[256]; +#endif + + if (!rctx->ctx.pm4_cdwords) + return; + + u_upload_flush(rctx->upload_vb); + u_upload_flush(rctx->upload_ib); + +#if 0 + sprintf(dname, "gallium-%08d.bof", dc); + if (dc < 20) { + r600_context_dump_bof(&rctx->ctx, dname); + R600_ERR("dumped %s\n", dname); + } + dc++; +#endif + r600_context_flush(&rctx->ctx); +} + +static void r600_destroy_context(struct pipe_context *context) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)context; + + r600_context_fini(&rctx->ctx); + for (int i = 0; i < R600_PIPE_NSTATES; i++) { + free(rctx->states[i]); + } + + u_upload_destroy(rctx->upload_vb); + u_upload_destroy(rctx->upload_ib); + + FREE(rctx->ps_resource); + FREE(rctx->vs_resource); + FREE(rctx); +} + +static struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) +{ + struct r600_pipe_context *rctx = CALLOC_STRUCT(r600_pipe_context); + struct r600_screen* rscreen = (struct r600_screen *)screen; + enum chip_class class; + + if (rctx == NULL) + return NULL; + rctx->context.winsys = rscreen->screen.winsys; + rctx->context.screen = screen; + rctx->context.priv = priv; + rctx->context.destroy = r600_destroy_context; + rctx->context.flush = r600_flush; + + /* Easy accessing of screen/winsys. */ + rctx->screen = rscreen; + rctx->radeon = rscreen->radeon; + rctx->family = r600_get_family(rctx->radeon); + + r600_init_blit_functions(rctx); + r600_init_query_functions(rctx); + r600_init_context_resource_functions(rctx); + + switch (r600_get_family(rctx->radeon)) { + case CHIP_R600: + case CHIP_RV610: + case CHIP_RV630: + case CHIP_RV670: + case CHIP_RV620: + case CHIP_RV635: + case CHIP_RS780: + case CHIP_RS880: + case CHIP_RV770: + case CHIP_RV730: + case CHIP_RV710: + case CHIP_RV740: + rctx->context.draw_vbo = r600_draw_vbo; + r600_init_state_functions(rctx); + if (r600_context_init(&rctx->ctx, rctx->radeon)) { + r600_destroy_context(&rctx->context); + return NULL; + } + r600_init_config(rctx); + break; + case CHIP_CEDAR: + case CHIP_REDWOOD: + case CHIP_JUNIPER: + case CHIP_CYPRESS: + case CHIP_HEMLOCK: + rctx->context.draw_vbo = evergreen_draw; + evergreen_init_state_functions(rctx); + if (evergreen_context_init(&rctx->ctx, rctx->radeon)) { + r600_destroy_context(&rctx->context); + return NULL; + } + evergreen_init_config(rctx); + break; + default: + R600_ERR("unsupported family %d\n", r600_get_family(rctx->radeon)); + r600_destroy_context(&rctx->context); + return NULL; + } + + rctx->upload_ib = u_upload_create(&rctx->context, 32 * 1024, 16, + PIPE_BIND_INDEX_BUFFER); + if (rctx->upload_ib == NULL) { + r600_destroy_context(&rctx->context); + return NULL; + } + + rctx->upload_vb = u_upload_create(&rctx->context, 128 * 1024, 16, + PIPE_BIND_VERTEX_BUFFER); + if (rctx->upload_vb == NULL) { + r600_destroy_context(&rctx->context); + return NULL; + } + + rctx->blitter = util_blitter_create(&rctx->context); + if (rctx->blitter == NULL) { + FREE(rctx); + return NULL; + } + + rctx->vs_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); + if (!rctx->vs_resource) { + FREE(rctx); + return NULL; + } + + rctx->ps_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); + if (!rctx->ps_resource) { + FREE(rctx); + return NULL; + } + + class = r600_get_family_class(rctx->radeon); + if (class == R600 || class == R700) + rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx); + else + rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx); + + r600_blit_uncompress_depth_ptr = r600_blit_uncompress_depth; + + return &rctx->context; +} + +/* + * pipe_screen + */ static const char* r600_get_vendor(struct pipe_screen* pscreen) { return "X.Org"; } +static const char *r600_get_family_name(enum radeon_family family) +{ + switch(family) { + case CHIP_R600: return "R600"; + case CHIP_RV610: return "RV610"; + case CHIP_RV630: return "RV630"; + case CHIP_RV670: return "RV670"; + case CHIP_RV620: return "RV620"; + case CHIP_RV635: return "RV635"; + case CHIP_RS780: return "RS780"; + case CHIP_RS880: return "RS880"; + case CHIP_RV770: return "RV770"; + case CHIP_RV730: return "RV730"; + case CHIP_RV710: return "RV710"; + case CHIP_RV740: return "RV740"; + case CHIP_CEDAR: return "CEDAR"; + case CHIP_REDWOOD: return "REDWOOD"; + case CHIP_JUNIPER: return "JUNIPER"; + case CHIP_CYPRESS: return "CYPRESS"; + case CHIP_HEMLOCK: return "HEMLOCK"; + default: return "unknown"; + } +} + static const char* r600_get_name(struct pipe_screen* pscreen) { - struct r600_screen *screen = r600_screen(pscreen); - enum radeon_family family = radeon_get_family(screen->rw); + struct r600_screen *rscreen = (struct r600_screen *)pscreen; + enum radeon_family family = r600_get_family(rscreen->radeon); - if (family >= CHIP_R600 && family < CHIP_RV770) - return "R600 (HD2XXX,HD3XXX)"; - else if (family < CHIP_CEDAR) - return "R700 (HD4XXX)"; - else - return "EVERGREEN"; + return r600_get_family_name(family); } static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) @@ -72,6 +256,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_DEPTH_CLAMP: + case PIPE_CAP_SHADER_STENCIL_EXPORT: return 1; /* Unsupported features (boolean caps). */ @@ -87,7 +272,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return 14; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: /* FIXME allow this once infrastructure is there */ - return 0; + return 16; case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: case PIPE_CAP_MAX_COMBINED_SAMPLERS: return 16; @@ -104,15 +289,35 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: return 0; + default: R600_ERR("r600: unknown param %d\n", param); return 0; } } +static float r600_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 8192.0f; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 16.0f; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0f; + default: + R600_ERR("r600: unsupported paramf %d\n", param); + return 0.0f; + } +} + static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param) { - switch(shader) { + switch(shader) + { case PIPE_SHADER_FRAGMENT: case PIPE_SHADER_VERTEX: break; @@ -155,24 +360,6 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e } } -static float r600_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 8192.0f; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0f; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0f; - default: - R600_ERR("r600: unsupported paramf %d\n", param); - return 0.0f; - } -} - static boolean r600_is_format_supported(struct pipe_screen* screen, enum pipe_format format, enum pipe_texture_target target, @@ -226,58 +413,25 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, static void r600_destroy_screen(struct pipe_screen* pscreen) { - struct r600_screen* rscreen = r600_screen(pscreen); + struct r600_screen *rscreen = (struct r600_screen *)pscreen; if (rscreen == NULL) return; FREE(rscreen); } -struct pipe_screen *r600_screen_create(struct radeon *rw) + +struct pipe_screen *r600_screen_create(struct radeon *radeon) { - struct r600_screen* rscreen; - enum radeon_family family = radeon_get_family(rw); + struct r600_screen *rscreen; rscreen = CALLOC_STRUCT(r600_screen); if (rscreen == NULL) { return NULL; } - - /* don't enable mem constant for r600 yet */ - rscreen->use_mem_constant = FALSE; - switch (family) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - rscreen->chip_class = R600; - break; - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: - rscreen->chip_class = R700; - break; - case CHIP_CEDAR: - case CHIP_REDWOOD: - case CHIP_JUNIPER: - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - rscreen->chip_class = EVERGREEN; - rscreen->use_mem_constant = TRUE; - break; - default: - FREE(rscreen); - return NULL; - } - radeon_set_mem_constant(rw, rscreen->use_mem_constant); - rscreen->rw = rw; - rscreen->screen.winsys = (struct pipe_winsys*)rw; + rscreen->radeon = radeon; + rscreen->screen.winsys = (struct pipe_winsys*)radeon; rscreen->screen.destroy = r600_destroy_screen; rscreen->screen.get_name = r600_get_name; rscreen->screen.get_vendor = r600_get_vendor; @@ -288,5 +442,8 @@ struct pipe_screen *r600_screen_create(struct radeon *rw) rscreen->screen.context_create = r600_create_context; r600_init_screen_texture_functions(&rscreen->screen); r600_init_screen_resource_functions(&rscreen->screen); + + rscreen->tiling_info = r600_get_tiling_info(radeon); + return &rscreen->screen; } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h new file mode 100644 index 00000000000..35548329e46 --- /dev/null +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -0,0 +1,223 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + */ +#ifndef R600_PIPE_H +#define R600_PIPE_H + +#include <pipe/p_state.h> +#include <pipe/p_screen.h> +#include <pipe/p_context.h> +#include <util/u_math.h> +#include "r600.h" +#include "r600_public.h" +#include "r600_shader.h" +#include "r600_resource.h" + +enum r600_pipe_state_id { + R600_PIPE_STATE_BLEND = 0, + R600_PIPE_STATE_BLEND_COLOR, + R600_PIPE_STATE_CONFIG, + R600_PIPE_STATE_CLIP, + R600_PIPE_STATE_SCISSOR, + R600_PIPE_STATE_VIEWPORT, + R600_PIPE_STATE_RASTERIZER, + R600_PIPE_STATE_VGT, + R600_PIPE_STATE_FRAMEBUFFER, + R600_PIPE_STATE_DSA, + R600_PIPE_STATE_STENCIL_REF, + R600_PIPE_STATE_PS_SHADER, + R600_PIPE_STATE_VS_SHADER, + R600_PIPE_STATE_CONSTANT, + R600_PIPE_STATE_SAMPLER, + R600_PIPE_STATE_RESOURCE, + R600_PIPE_NSTATES +}; + +struct r600_screen { + struct pipe_screen screen; + struct radeon *radeon; + struct r600_tiling_info *tiling_info; +}; + +struct r600_pipe_sampler_view { + struct pipe_sampler_view base; + struct r600_pipe_state state; +}; + +struct r600_pipe_rasterizer { + struct r600_pipe_state rstate; + bool flatshade; + unsigned sprite_coord_enable; + float offset_units; + float offset_scale; +}; + +struct r600_pipe_blend { + struct r600_pipe_state rstate; + unsigned cb_target_mask; +}; + +struct r600_vertex_element +{ + unsigned count; + unsigned refcount; + struct pipe_vertex_element elements[32]; +}; + +struct r600_pipe_shader { + struct r600_shader shader; + struct r600_pipe_state rstate; + struct r600_bo *bo; + struct r600_vertex_element vertex_elements; +}; + +/* needed for blitter save */ +struct r600_textures_info { + struct r600_pipe_sampler_view **views; + unsigned n_views; + void **samplers; + unsigned n_samplers; +}; + +#define R600_CONSTANT_ARRAY_SIZE 256 +#define R600_RESOURCE_ARRAY_SIZE 160 + +struct r600_pipe_context { + struct pipe_context context; + struct blitter_context *blitter; + struct pipe_framebuffer_state *pframebuffer; + unsigned family; + void *custom_dsa_flush; + struct r600_screen *screen; + struct radeon *radeon; + struct r600_pipe_state *states[R600_PIPE_NSTATES]; + struct r600_context ctx; + struct r600_vertex_element *vertex_elements; + struct pipe_framebuffer_state framebuffer; + struct pipe_index_buffer index_buffer; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + unsigned nvertex_buffer; + unsigned cb_target_mask; + /* for saving when using blitter */ + struct pipe_stencil_ref stencil_ref; + struct pipe_viewport_state viewport; + struct pipe_clip_state clip; + struct r600_pipe_state *vs_resource; + struct r600_pipe_state *ps_resource; + struct r600_pipe_state config; + struct r600_pipe_shader *ps_shader; + struct r600_pipe_shader *vs_shader; + struct r600_pipe_state vs_const_buffer; + struct r600_pipe_state ps_const_buffer; + struct r600_pipe_rasterizer *rasterizer; + /* shader information */ + unsigned sprite_coord_enable; + bool flatshade; + struct u_upload_mgr *upload_vb; + struct u_upload_mgr *upload_ib; + unsigned any_user_vbs; + struct r600_textures_info ps_samplers; + +}; + +struct r600_drawl { + struct pipe_context *ctx; + unsigned mode; + unsigned min_index; + unsigned max_index; + unsigned index_bias; + unsigned start; + unsigned count; + unsigned index_size; + unsigned index_buffer_offset; + struct pipe_resource *index_buffer; +}; + +/* evergreen_state.c */ +void evergreen_init_state_functions(struct r600_pipe_context *rctx); +void evergreen_init_config(struct r600_pipe_context *rctx); +void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info); +void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader); +void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader); +void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx); + +/* r600_blit.c */ +void r600_init_blit_functions(struct r600_pipe_context *rctx); +int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture); + +/* r600_buffer.c */ +struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, + const struct pipe_resource *templ); +struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, + void *ptr, unsigned bytes, + unsigned bind); +unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, + struct pipe_resource *buf, + unsigned face, unsigned level); +struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, + struct winsys_handle *whandle); +int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw); +int r600_upload_user_buffers(struct r600_pipe_context *rctx); + +/* r600_query.c */ +void r600_init_query_functions(struct r600_pipe_context *rctx); + +/* r600_resource.c */ +void r600_init_context_resource_functions(struct r600_pipe_context *r600); + +/* r600_shader.c */ +int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader); +int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens); +int r600_find_vs_semantic_index(struct r600_shader *vs, + struct r600_shader *ps, int id); + +/* r600_state.c */ +void r600_init_state_functions(struct r600_pipe_context *rctx); +void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); +void r600_init_config(struct r600_pipe_context *rctx); +void r600_translate_index_buffer(struct r600_pipe_context *r600, + struct pipe_resource **index_buffer, + unsigned *index_size, + unsigned *start, unsigned count); +void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx); +/* r600_helper.h */ +int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); + +/* r600_texture.c */ +void r600_init_screen_texture_functions(struct pipe_screen *screen); +uint32_t r600_translate_texformat(enum pipe_format format, + const unsigned char *swizzle_view, + uint32_t *word4_p, uint32_t *yuv_format_p); + +/* + * common helpers + */ +static INLINE u32 S_FIXED(float value, u32 frac_bits) +{ + return value * (1 << frac_bits); +} +#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) + +#endif diff --git a/src/gallium/drivers/r600/r600_public.h b/src/gallium/drivers/r600/r600_public.h index 1d89c9f9f61..f1970201e89 100644 --- a/src/gallium/drivers/r600/r600_public.h +++ b/src/gallium/drivers/r600/r600_public.h @@ -1,9 +1,28 @@ - +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ #ifndef R600_PUBLIC_H #define R600_PUBLIC_H -struct radeon; - -struct pipe_screen* r600_screen_create(struct radeon *rw); +struct pipe_screen *r600_screen_create(struct radeon *radeon); #endif diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index 12900cce114..726668260cc 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -19,231 +19,55 @@ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Corbin Simpson */ -#include <errno.h> -#include <util/u_inlines.h> -#include <util/u_format.h> -#include <util/u_memory.h> -#include "r600_screen.h" -#include "r600_context.h" - -static void r600_query_begin(struct r600_context *rctx, struct r600_query *rquery) -{ - struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate = &rquery->rstate; - - radeon_state_fini(rstate); - radeon_state_init(rstate, rscreen->rw, R600_STATE_QUERY_BEGIN, 0, 0); - rstate->states[R600_QUERY__OFFSET] = rquery->num_results; - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rquery->buffer); - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - if (radeon_state_pm4(rstate)) { - radeon_state_fini(rstate); - } -} - -static void r600_query_end(struct r600_context *rctx, struct r600_query *rquery) -{ - struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate = &rquery->rstate; - - radeon_state_fini(rstate); - radeon_state_init(rstate, rscreen->rw, R600_STATE_QUERY_END, 0, 0); - rstate->states[R600_QUERY__OFFSET] = rquery->num_results + 8; - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rquery->buffer); - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - if (radeon_state_pm4(rstate)) { - radeon_state_fini(rstate); - } -} +#include "r600_pipe.h" static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type) { - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - struct r600_query *q; - - if (query_type != PIPE_QUERY_OCCLUSION_COUNTER) - return NULL; - - q = CALLOC_STRUCT(r600_query); - if (!q) - return NULL; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - q->type = query_type; - q->buffer_size = 4096; - - q->buffer = radeon_ws_bo(rscreen->rw, q->buffer_size, 1, 0); - if (!q->buffer) { - FREE(q); - return NULL; - } - - LIST_ADDTAIL(&q->list, &rctx->query_list); - - return (struct pipe_query *)q; -} - -static void r600_destroy_query(struct pipe_context *ctx, - struct pipe_query *query) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_query *q = r600_query(query); - - radeon_ws_bo_reference(rscreen->rw, &q->buffer, NULL); - LIST_DEL(&q->list); - FREE(query); + return (struct pipe_query*)r600_context_query_create(&rctx->ctx, query_type); } -static void r600_query_result(struct pipe_context *ctx, struct r600_query *rquery) +static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query) { - struct r600_screen *rscreen = r600_screen(ctx->screen); - u64 start, end; - u32 *results; - int i; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - radeon_ws_bo_wait(rscreen->rw, rquery->buffer); - results = radeon_ws_bo_map(rscreen->rw, rquery->buffer, 0, r600_context(ctx)); - for (i = 0; i < rquery->num_results; i += 4) { - start = (u64)results[i] | (u64)results[i + 1] << 32; - end = (u64)results[i + 2] | (u64)results[i + 3] << 32; - if ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL)) { - rquery->result += end - start; - } - } - radeon_ws_bo_unmap(rscreen->rw, rquery->buffer); - rquery->num_results = 0; -} - -static void r600_query_resume(struct pipe_context *ctx, struct r600_query *rquery) -{ - struct r600_context *rctx = r600_context(ctx); - - if (rquery->num_results >= ((rquery->buffer_size >> 2) - 2)) { - /* running out of space */ - if (!rquery->flushed) { - ctx->flush(ctx, 0, NULL); - } - r600_query_result(ctx, rquery); - } - r600_query_begin(rctx, rquery); - rquery->flushed = false; -} - -static void r600_query_suspend(struct pipe_context *ctx, struct r600_query *rquery) -{ - struct r600_context *rctx = r600_context(ctx); - - r600_query_end(rctx, rquery); - rquery->num_results += 16; + r600_context_query_destroy(&rctx->ctx, (struct r600_query *)query); } static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query) { - struct r600_context *rctx = r600_context(ctx); - struct r600_query *rquery = r600_query(query); - int r; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_query *rquery = (struct r600_query *)query; - rquery->state = R600_QUERY_STATE_STARTED; + rquery->result = 0; rquery->num_results = 0; - rquery->flushed = false; - r600_query_resume(ctx, rquery); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - if (r == -EBUSY) { - /* this shouldn't happen */ - R600_ERR("had to flush while emitting end query\n"); - ctx->flush(ctx, 0, NULL); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - } + r600_query_begin(&rctx->ctx, (struct r600_query *)query); } static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query) { - struct r600_context *rctx = r600_context(ctx); - struct r600_query *rquery = r600_query(query); - int r; - - rquery->state &= ~R600_QUERY_STATE_STARTED; - rquery->state |= R600_QUERY_STATE_ENDED; - r600_query_suspend(ctx, rquery); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - if (r == -EBUSY) { - /* this shouldn't happen */ - R600_ERR("had to flush while emitting end query\n"); - ctx->flush(ctx, 0, NULL); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - } -} - -void r600_queries_suspend(struct pipe_context *ctx) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_query *rquery; - int r; - - LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { - if (rquery->state & R600_QUERY_STATE_STARTED) { - r600_query_suspend(ctx, rquery); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - if (r == -EBUSY) { - /* this shouldn't happen */ - R600_ERR("had to flush while emitting end query\n"); - ctx->flush(ctx, 0, NULL); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - } - } - rquery->state |= R600_QUERY_STATE_SUSPENDED; - } -} - -void r600_queries_resume(struct pipe_context *ctx) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_query *rquery; - int r; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { - if (rquery->state & R600_QUERY_STATE_STARTED) { - r600_query_resume(ctx, rquery); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - if (r == -EBUSY) { - /* this shouldn't happen */ - R600_ERR("had to flush while emitting end query\n"); - ctx->flush(ctx, 0, NULL); - r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); - } - } - rquery->state &= ~R600_QUERY_STATE_SUSPENDED; - } + r600_query_end(&rctx->ctx, (struct r600_query *)query); } static boolean r600_get_query_result(struct pipe_context *ctx, struct pipe_query *query, boolean wait, void *vresult) { - struct r600_query *rquery = r600_query(query); - uint64_t *result = (uint64_t*)vresult; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_query *rquery = (struct r600_query *)query; - if (!rquery->flushed) { + if (rquery->num_results) { ctx->flush(ctx, 0, NULL); - rquery->flushed = true; } - r600_query_result(ctx, rquery); - *result = rquery->result; - rquery->result = 0; - return TRUE; + return r600_context_query_result(&rctx->ctx, (struct r600_query *)query, wait, vresult); } -void r600_init_query_functions(struct r600_context* rctx) +void r600_init_query_functions(struct r600_pipe_context *rctx) { - LIST_INITHEAD(&rctx->query_list); - rctx->context.create_query = r600_create_query; rctx->context.destroy_query = r600_destroy_query; rctx->context.begin_query = r600_begin_query; diff --git a/src/gallium/drivers/r600/r600_resource.c b/src/gallium/drivers/r600/r600_resource.c index 05707740da5..207642ccfa9 100644 --- a/src/gallium/drivers/r600/r600_resource.c +++ b/src/gallium/drivers/r600/r600_resource.c @@ -21,9 +21,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "r600_context.h" -#include "r600_resource.h" -#include "r600_screen.h" +#include "r600_pipe.h" static struct pipe_resource *r600_resource_create(struct pipe_screen *screen, const struct pipe_resource *templ) @@ -46,7 +44,16 @@ static struct pipe_resource *r600_resource_from_handle(struct pipe_screen * scre } } -void r600_init_context_resource_functions(struct r600_context *r600) +void r600_init_screen_resource_functions(struct pipe_screen *screen) +{ + screen->resource_create = r600_resource_create; + screen->resource_from_handle = r600_resource_from_handle; + screen->resource_get_handle = u_resource_get_handle_vtbl; + screen->resource_destroy = u_resource_destroy_vtbl; + screen->user_buffer_create = r600_user_buffer_create; +} + +void r600_init_context_resource_functions(struct r600_pipe_context *r600) { r600->context.get_transfer = u_get_transfer_vtbl; r600->context.transfer_map = u_transfer_map_vtbl; @@ -56,12 +63,3 @@ void r600_init_context_resource_functions(struct r600_context *r600) r600->context.transfer_inline_write = u_transfer_inline_write_vtbl; r600->context.is_resource_referenced = u_is_resource_referenced_vtbl; } - -void r600_init_screen_resource_functions(struct pipe_screen *screen) -{ - screen->resource_create = r600_resource_create; - screen->resource_from_handle = r600_resource_from_handle; - screen->resource_get_handle = u_resource_get_handle_vtbl; - screen->resource_destroy = u_resource_destroy_vtbl; - screen->user_buffer_create = r600_user_buffer_create; -} diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 6ddb1ad32a7..ef484aba4a2 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -25,8 +25,15 @@ #include "util/u_transfer.h" -struct r600_context; -struct r600_screen; +/* Texture transfer. */ +struct r600_transfer { + /* Base class. */ + struct pipe_transfer transfer; + /* Buffer transfer. */ + struct pipe_transfer *buffer_transfer; + unsigned offset; + struct pipe_resource *linear_texture; +}; /* This gets further specialized into either buffer or texture * structures. Use the vtbl struct to choose between the two @@ -34,7 +41,7 @@ struct r600_screen; */ struct r600_resource { struct u_resource base; - struct radeon_ws_bo *bo; + struct r600_bo *bo; u32 domain; u32 flink; u32 size; @@ -42,27 +49,20 @@ struct r600_resource { struct r600_resource_texture { struct r600_resource resource; - unsigned long offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long pitch[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long width[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long height[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long layer_size[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long pitch_override; - unsigned long bpt; - unsigned long size; - unsigned tilled; + unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; + unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS]; + unsigned pitch_in_pixels[PIPE_MAX_TEXTURE_LEVELS]; + unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS]; + unsigned pitch_override; + unsigned size; + unsigned tiled; unsigned array_mode; unsigned tile_type; unsigned depth; unsigned dirty; - struct radeon_ws_bo *uncompressed; - struct radeon_state scissor[PIPE_MAX_TEXTURE_LEVELS]; - struct radeon_state cb[8][PIPE_MAX_TEXTURE_LEVELS]; - struct radeon_state db[PIPE_MAX_TEXTURE_LEVELS]; - struct radeon_state viewport[PIPE_MAX_TEXTURE_LEVELS]; + struct r600_resource_texture *flushed_depth_texture; }; -void r600_init_context_resource_functions(struct r600_context *r600); void r600_init_screen_resource_functions(struct pipe_screen *screen); /* r600_buffer */ @@ -103,7 +103,25 @@ static INLINE struct r600_resource_buffer *r600_buffer(struct pipe_resource *buf static INLINE boolean r600_buffer_is_user_buffer(struct pipe_resource *buffer) { - return r600_buffer(buffer)->user_buffer ? true : false; + return r600_buffer(buffer)->user_buffer ? TRUE : FALSE; } +int r600_texture_depth_flush(struct pipe_context *ctx, + struct pipe_resource *texture); + +extern int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture); + +/* r600_texture.c texture transfer functions. */ +struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, + struct pipe_resource *texture, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box); +void r600_texture_transfer_destroy(struct pipe_context *ctx, + struct pipe_transfer *trans); +void* r600_texture_transfer_map(struct pipe_context *ctx, + struct pipe_transfer* transfer); +void r600_texture_transfer_unmap(struct pipe_context *ctx, + struct pipe_transfer* transfer); + #endif diff --git a/src/gallium/drivers/r600/r600_screen.h b/src/gallium/drivers/r600/r600_screen.h deleted file mode 100644 index 4be77865fbd..00000000000 --- a/src/gallium/drivers/r600/r600_screen.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef R600_SCREEN_H -#define R600_SCREEN_H - -#include <pipe/p_state.h> -#include <pipe/p_screen.h> -#include <pipebuffer/pb_buffer.h> -#include <xf86drm.h> -#include <radeon_drm.h> -#include "radeon.h" -#include "util/u_transfer.h" -#include "r600_resource.h" - -/* Texture transfer. */ -struct r600_transfer { - /* Base class. */ - struct pipe_transfer transfer; - /* Buffer transfer. */ - struct pipe_transfer *buffer_transfer; - unsigned offset; - struct pipe_resource *linear_texture; -}; - -enum chip_class { - R600, - R700, - EVERGREEN, -}; - -struct r600_screen { - struct pipe_screen screen; - struct radeon *rw; - enum chip_class chip_class; - boolean use_mem_constant; -}; - -static INLINE struct r600_screen *r600_screen(struct pipe_screen *screen) -{ - return (struct r600_screen*)screen; -} - -/* Buffer functions. */ -struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, - const struct pipe_resource *templ); -struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, - void *ptr, unsigned bytes, - unsigned bind); -unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, - struct pipe_resource *buf, - unsigned face, unsigned level); -struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, - struct winsys_handle *whandle); - -/* r600_texture.c texture transfer functions. */ -struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, - struct pipe_resource *texture, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box); -void r600_texture_transfer_destroy(struct pipe_context *ctx, - struct pipe_transfer *trans); -void* r600_texture_transfer_map(struct pipe_context *ctx, - struct pipe_transfer* transfer); -void r600_texture_transfer_unmap(struct pipe_context *ctx, - struct pipe_transfer* transfer); -int r600_texture_scissor(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level); -int r600_texture_cb(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned cb, unsigned level); -int r600_texture_db(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level); -int r600_texture_from_depth(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level); -int r600_texture_viewport(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level); - -/* r600_blit.c */ -int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level); - -/* helpers */ -int r600_conv_pipe_format(unsigned pformat, unsigned *format); -int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); - -void r600_init_screen_texture_functions(struct pipe_screen *screen); - -#endif diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 4da6850b0a9..d1143985ead 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -25,9 +25,7 @@ #include "tgsi/tgsi_scan.h" #include "tgsi/tgsi_dump.h" #include "util/u_format.h" -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_shader.h" +#include "r600_pipe.h" #include "r600_asm.h" #include "r600_sq.h" #include "r600_opcodes.h" @@ -35,38 +33,227 @@ #include <stdio.h> #include <errno.h> +static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) +{ + struct r600_pipe_state *rstate = &shader->rstate; + struct r600_shader *rshader = &shader->shader; + unsigned spi_vs_out_id[10]; + unsigned i, tmp; + + /* clear previous register */ + rstate->nregs = 0; + + /* so far never got proper semantic id from tgsi */ + for (i = 0; i < 10; i++) { + spi_vs_out_id[i] = 0; + } + for (i = 0; i < 32; i++) { + tmp = i << ((i & 3) * 8); + spi_vs_out_id[i / 4] |= tmp; + } + for (i = 0; i < 10; i++) { + r600_pipe_state_add_reg(rstate, + R_028614_SPI_VS_OUT_ID_0 + i * 4, + spi_vs_out_id[i], 0xFFFFFFFF, NULL); + } + + r600_pipe_state_add_reg(rstate, + R_0286C4_SPI_VS_OUT_CONFIG, + S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028868_SQ_PGM_RESOURCES_VS, + S_028868_NUM_GPRS(rshader->bc.ngpr) | + S_028868_STACK_SIZE(rshader->bc.nstack), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0288A4_SQ_PGM_RESOURCES_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0288D0_SQ_PGM_CF_OFFSET_VS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0288DC_SQ_PGM_CF_OFFSET_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028858_SQ_PGM_START_VS, + r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + r600_pipe_state_add_reg(rstate, + R_028894_SQ_PGM_START_FS, + r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + + r600_pipe_state_add_reg(rstate, + R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, + 0xFFFFFFFF, NULL); -struct r600_shader_tgsi_instruction; +} -struct r600_shader_ctx { - struct tgsi_shader_info info; - struct tgsi_parse_context parse; - const struct tgsi_token *tokens; - unsigned type; - unsigned file_offset[TGSI_FILE_COUNT]; - unsigned temp_reg; - struct r600_shader_tgsi_instruction *inst_info; - struct r600_bc *bc; - struct r600_shader *shader; - u32 value[4]; - u32 *literals; - u32 nliterals; - u32 max_driver_temp_used; -}; +int r600_find_vs_semantic_index(struct r600_shader *vs, + struct r600_shader *ps, int id) +{ + struct r600_shader_io *input = &ps->input[id]; -struct r600_shader_tgsi_instruction { - unsigned tgsi_opcode; - unsigned is_op3; - unsigned r600_opcode; - int (*process)(struct r600_shader_ctx *ctx); -}; + for (int i = 0; i < vs->noutput; i++) { + if (input->name == vs->output[i].name && + input->sid == vs->output[i].sid) { + return i - 1; + } + } + return 0; +} -static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[]; -static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); +static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = &shader->rstate; + struct r600_shader *rshader = &shader->shader; + unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; + int pos_index = -1, face_index = -1; + + /* clear previous register */ + rstate->nregs = 0; + + for (i = 0; i < rshader->ninput; i++) { + tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); + if (rshader->input[i].centroid) + tmp |= S_028644_SEL_CENTROID(1); + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) + tmp |= S_028644_SEL_LINEAR(1); + + if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) + pos_index = i; + if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || + rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || + rshader->input[i].name == TGSI_SEMANTIC_POSITION) { + tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); + } + if (rshader->input[i].name == TGSI_SEMANTIC_FACE) + face_index = i; + if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && + rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { + tmp |= S_028644_PT_SPRITE_TEX(1); + } + r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); + } + for (i = 0; i < rshader->noutput; i++) { + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + S_02880C_Z_EXPORT_ENABLE(1), + S_02880C_Z_EXPORT_ENABLE(1), NULL); + if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + S_02880C_STENCIL_REF_EXPORT_ENABLE(1), + S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL); + } + + exports_ps = 0; + num_cout = 0; + for (i = 0; i < rshader->noutput; i++) { + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + exports_ps |= 1; + else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { + num_cout++; + } + } + exports_ps |= S_028854_EXPORT_COLORS(num_cout); + if (!exports_ps) { + /* always at least export 1 component per pixel */ + exports_ps = 2; + } + + spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | + S_0286CC_PERSP_GRADIENT_ENA(1); + spi_input_z = 0; + if (pos_index != -1) { + spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) | + S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | + S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) | + S_0286CC_BARYC_SAMPLE_CNTL(1)); + spi_input_z |= 1; + } + + spi_ps_in_control_1 = 0; + if (face_index != -1) { + spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | + S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); + } + + r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028840_SQ_PGM_START_PS, + r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + r600_pipe_state_add_reg(rstate, + R_028850_SQ_PGM_RESOURCES_PS, + S_028868_NUM_GPRS(rshader->bc.ngpr) | + S_028868_STACK_SIZE(rshader->bc.nstack), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028854_SQ_PGM_EXPORTS_PS, + exports_ps, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0288CC_SQ_PGM_CF_OFFSET_PS, + 0x00000000, 0xFFFFFFFF, NULL); + + if (rshader->uses_kill) { + /* only set some bits here, the other bits are set in the dsa state */ + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + S_02880C_KILL_ENABLE(1), + S_02880C_KILL_ENABLE(1), NULL); + } + r600_pipe_state_add_reg(rstate, + R_03E200_SQ_LOOP_CONST_0, 0x01000FFF, + 0xFFFFFFFF, NULL); +} -static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader) +static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) { - struct r600_context *rctx = r600_context(ctx); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_shader *rshader = &shader->shader; + void *ptr; + + /* copy new shader */ + if (shader->bo == NULL) { + shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0); + if (shader->bo == NULL) { + return -ENOMEM; + } + ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL); + memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4); + r600_bo_unmap(rctx->radeon, shader->bo); + } + /* build state */ + rshader->flat_shade = rctx->flatshade; + switch (rshader->processor_type) { + case TGSI_PROCESSOR_VERTEX: + if (rshader->family >= CHIP_CEDAR) { + evergreen_pipe_shader_vs(ctx, shader); + } else { + r600_pipe_shader_vs(ctx, shader); + } + break; + case TGSI_PROCESSOR_FRAGMENT: + if (rshader->family >= CHIP_CEDAR) { + evergreen_pipe_shader_ps(ctx, shader); + } else { + r600_pipe_shader_ps(ctx, shader); + } + break; + default: + return -EINVAL; + } + r600_context_pipe_state_set(&rctx->ctx, &shader->rstate); + return 0; +} + +static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_shader *shader = &rshader->shader; const struct util_format_description *desc; enum pipe_format resource_format[160]; unsigned i, nresources = 0; @@ -76,9 +263,16 @@ static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shad if (shader->processor_type != TGSI_PROCESSOR_VERTEX) return 0; + /* doing a full memcmp fell over the refcount */ + if ((rshader->vertex_elements.count == rctx->vertex_elements->count) && + (!memcmp(&rshader->vertex_elements.elements, &rctx->vertex_elements->elements, 32 * sizeof(struct pipe_vertex_element)))) { + return 0; + } + rshader->vertex_elements = *rctx->vertex_elements; for (i = 0; i < rctx->vertex_elements->count; i++) { resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; } + r600_bo_reference(rctx->radeon, &rshader->bo, NULL); LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { switch (cf->inst) { case V_SQ_CF_WORD1_SQ_CF_INST_VTX: @@ -102,25 +296,40 @@ static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shad return r600_bc_build(&shader->bc); } -int r600_pipe_shader_create(struct pipe_context *ctx, - struct r600_context_state *rpshader, - const struct tgsi_token *tokens) +int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader) { - struct r600_screen *rscreen = r600_screen(ctx->screen); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + int r; + + if (shader == NULL) + return -EINVAL; + /* there should be enough input */ + if (rctx->vertex_elements->count < shader->shader.bc.nresource) { + R600_ERR("%d resources provided, expecting %d\n", + rctx->vertex_elements->count, shader->shader.bc.nresource); + return -EINVAL; + } + r = r600_shader_update(ctx, shader); + if (r) + return r; + return r600_pipe_shader(ctx, shader); +} + +int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); +int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; int r; //fprintf(stderr, "--------------------------------------------------------------\n"); //tgsi_dump(tokens, 0); - if (rpshader == NULL) - return -ENOMEM; - rpshader->shader.family = radeon_get_family(rscreen->rw); - rpshader->shader.use_mem_constant = rscreen->use_mem_constant; - r = r600_shader_from_tgsi(tokens, &rpshader->shader); + shader->shader.family = r600_get_family(rctx->radeon); + r = r600_shader_from_tgsi(tokens, &shader->shader); if (r) { R600_ERR("translation from TGSI failed !\n"); return r; } - r = r600_bc_build(&rpshader->shader.bc); + r = r600_bc_build(&shader->shader.bc); if (r) { R600_ERR("building bytecode failed !\n"); return r; @@ -129,81 +338,41 @@ int r600_pipe_shader_create(struct pipe_context *ctx, return 0; } -static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader) -{ - struct r600_context *rctx = r600_context(ctx); - struct radeon_state *state; - - state = &rpshader->rstate[0]; - radeon_state_fini(&rpshader->rstate[0]); - - return rctx->vtbl->vs_shader(rctx, rpshader, state); -} - -static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader) -{ - struct r600_context *rctx = r600_context(ctx); - struct radeon_state *state; - - state = &rpshader->rstate[0]; - radeon_state_fini(state); - - return rctx->vtbl->ps_shader(rctx, rpshader, state); -} - -static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - struct r600_shader *rshader = &rpshader->shader; - int r; - void *data; +/* + * tgsi -> r600 shader + */ +struct r600_shader_tgsi_instruction; - /* copy new shader */ - radeon_ws_bo_reference(rscreen->rw, &rpshader->bo, NULL); - rpshader->bo = NULL; - rpshader->bo = radeon_ws_bo(rscreen->rw, rshader->bc.ndw * 4, - 4096, 0); - if (rpshader->bo == NULL) { - return -ENOMEM; - } - data = radeon_ws_bo_map(rscreen->rw, rpshader->bo, 0, rctx); - memcpy(data, rshader->bc.bytecode, rshader->bc.ndw * 4); - radeon_ws_bo_unmap(rscreen->rw, rpshader->bo); - /* build state */ - rshader->flat_shade = rctx->flat_shade; - switch (rshader->processor_type) { - case TGSI_PROCESSOR_VERTEX: - r = r600_pipe_shader_vs(ctx, rpshader); - break; - case TGSI_PROCESSOR_FRAGMENT: - r = r600_pipe_shader_ps(ctx, rpshader); - break; - default: - r = -EINVAL; - break; - } - return r; -} +struct r600_shader_ctx { + struct tgsi_shader_info info; + struct tgsi_parse_context parse; + const struct tgsi_token *tokens; + unsigned type; + unsigned file_offset[TGSI_FILE_COUNT]; + unsigned temp_reg; + struct r600_shader_tgsi_instruction *inst_info; + struct r600_bc *bc; + struct r600_shader *shader; + u32 value[4]; + u32 *literals; + u32 nliterals; + u32 max_driver_temp_used; + /* needed for evergreen interpolation */ + boolean input_centroid; + boolean input_linear; + boolean input_perspective; + int num_interp_gpr; +}; -int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader) -{ - struct r600_context *rctx = r600_context(ctx); - int r; +struct r600_shader_tgsi_instruction { + unsigned tgsi_opcode; + unsigned is_op3; + unsigned r600_opcode; + int (*process)(struct r600_shader_ctx *ctx); +}; - if (rpshader == NULL) - return -EINVAL; - /* there should be enough input */ - if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) { - R600_ERR("%d resources provided, expecting %d\n", - rctx->vertex_elements->count, rpshader->shader.bc.nresource); - return -EINVAL; - } - r = r600_shader_update(ctx, &rpshader->shader); - if (r) - return r; - return r600_pipe_shader(ctx, rpshader); -} +static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[]; +static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); static int tgsi_is_supported(struct r600_shader_ctx *ctx) { @@ -225,11 +394,9 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) } #endif for (j = 0; j < i->Instruction.NumSrcRegs; j++) { - if (i->Src[j].Register.Dimension || - i->Src[j].Register.Absolute) { - R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j, - i->Src[j].Register.Dimension, - i->Src[j].Register.Absolute); + if (i->Src[j].Register.Dimension) { + R600_ERR("unsupported src %d (dimension %d)\n", j, + i->Src[j].Register.Dimension); return -EINVAL; } } @@ -242,10 +409,33 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) return 0; } -static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr) +static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) { int i, r; struct r600_bc_alu alu; + int gpr = 0, base_chan = 0; + int ij_index = 0; + + if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { + ij_index = 0; + if (ctx->shader->input[input].centroid) + ij_index++; + } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { + ij_index = 0; + /* if we have perspective add one */ + if (ctx->input_perspective) { + ij_index++; + /* if we have perspective centroid */ + if (ctx->input_centroid) + ij_index++; + } + if (ctx->shader->input[input].centroid) + ij_index++; + } + + /* work out gpr and base_chan from index */ + gpr = ij_index / 2; + base_chan = (2 * (ij_index % 2)) + 1; for (i = 0; i < 8; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -256,13 +446,16 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr) alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; if ((i > 1) && (i < 6)) { - alu.dst.sel = ctx->shader->input[gpr].gpr; + alu.dst.sel = ctx->shader->input[input].gpr; alu.dst.write = 1; } alu.dst.chan = i % 4; - alu.src[0].chan = (1 - (i % 2)); - alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + gpr; + + alu.src[0].sel = gpr; + alu.src[0].chan = (base_chan - (i % 2)); + + alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; alu.bank_swizzle_force = SQ_ALU_VEC_210; if ((i % 4) == 3) @@ -288,6 +481,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->input[i].name = d->Semantic.Name; ctx->shader->input[i].sid = d->Semantic.Index; ctx->shader->input[i].interpolate = d->Declaration.Interpolate; + ctx->shader->input[i].centroid = d->Declaration.Centroid; ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; if (ctx->type == TGSI_PROCESSOR_VERTEX) { /* turn input into fetch */ @@ -304,13 +498,19 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) vtx.dst_sel_y = 1; vtx.dst_sel_z = 2; vtx.dst_sel_w = 3; + vtx.use_const_fields = 1; r = r600_bc_add_vtx(ctx->bc, &vtx); if (r) return r; } if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == 2) { /* turn input into interpolate on EG */ - evergreen_interp_alu(ctx, i); + if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { + if (ctx->shader->input[i].interpolate > 0) { + ctx->shader->input[i].lds_pos = ctx->shader->nlds++; + evergreen_interp_alu(ctx, i); + } + } } break; case TGSI_FILE_OUTPUT: @@ -337,6 +537,53 @@ static int r600_get_temp(struct r600_shader_ctx *ctx) return ctx->temp_reg + ctx->max_driver_temp_used++; } +/* + * for evergreen we need to scan the shader to find the number of GPRs we need to + * reserve for interpolation. + * + * we need to know if we are going to emit + * any centroid inputs + * if perspective and linear are required +*/ +static int evergreen_gpr_count(struct r600_shader_ctx *ctx) +{ + int i; + int num_baryc; + + ctx->input_linear = FALSE; + ctx->input_perspective = FALSE; + ctx->input_centroid = FALSE; + ctx->num_interp_gpr = 1; + + /* any centroid inputs */ + for (i = 0; i < ctx->info.num_inputs; i++) { + /* skip position/face */ + if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || + ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) + continue; + if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) + ctx->input_linear = TRUE; + if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) + ctx->input_perspective = TRUE; + if (ctx->info.input_centroid[i]) + ctx->input_centroid = TRUE; + } + + num_baryc = 0; + /* ignoring sample for now */ + if (ctx->input_perspective) + num_baryc++; + if (ctx->input_linear) + num_baryc++; + if (ctx->input_centroid) + num_baryc *= 2; + + ctx->num_interp_gpr += (num_baryc + 1) >> 1; + + /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ + return ctx->num_interp_gpr; +} + int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) { struct tgsi_full_immediate *immediate; @@ -351,7 +598,6 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s r = r600_bc_init(ctx.bc, shader->family); if (r) return r; - ctx.bc->use_mem_constant = shader->use_mem_constant; ctx.tokens = tokens; tgsi_scan_shader(tokens, &ctx.info); tgsi_parse_init(&ctx.parse, tokens); @@ -383,14 +629,15 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s if (ctx.type == TGSI_PROCESSOR_VERTEX) { ctx.file_offset[TGSI_FILE_INPUT] = 1; } + if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == 2) { + ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); + } ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + ctx.info.file_count[TGSI_FILE_INPUT]; ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + ctx.info.file_count[TGSI_FILE_OUTPUT]; - if (ctx.shader->use_mem_constant) - ctx.file_offset[TGSI_FILE_CONSTANT] = 128; - else - ctx.file_offset[TGSI_FILE_CONSTANT] = 256; + + ctx.file_offset[TGSI_FILE_CONSTANT] = 128; ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + @@ -481,7 +728,14 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { output[i].array_base = 61; output[i].swizzle_x = 2; - output[i].swizzle_y = output[i].swizzle_z = output[i].swizzle_w = 7; + output[i].swizzle_y = 7; + output[i].swizzle_z = output[i].swizzle_w = 7; + output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { + output[i].array_base = 61; + output[i].swizzle_x = 7; + output[i].swizzle_y = 1; + output[i].swizzle_z = output[i].swizzle_w = 7; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else { R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); @@ -514,7 +768,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[i].barrier = 1; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; output[i].array_base = 0; - output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT; + output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); noutput++; } } @@ -587,6 +841,7 @@ static int tgsi_src(struct r600_shader_ctx *ctx, if (tgsi_src->Register.Indirect) r600_src->rel = V_SQ_REL_RELATIVE; r600_src->neg = tgsi_src->Register.Negate; + r600_src->abs = tgsi_src->Register.Absolute; r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; return 0; } @@ -642,13 +897,14 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s } } for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { - if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) { + if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { int treg = r600_get_temp(ctx); for (k = 0; k < 4; k++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = r600_src[j].sel; + alu.src[0].sel = r600_src[i].sel; alu.src[0].chan = k; + alu.src[0].rel = r600_src[i].rel; alu.dst.sel = treg; alu.dst.chan = k; alu.dst.write = 1; @@ -658,7 +914,8 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s if (r) return r; } - r600_src[j].sel = treg; + r600_src[i].sel = treg; + r600_src[i].rel =0; j--; } } @@ -677,13 +934,13 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_ nliteral++; } } - for (i = 0, j = 0; i < inst->Instruction.NumSrcRegs; i++) { - if (inst->Src[j].Register.File == TGSI_FILE_IMMEDIATE) { + for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { + if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { int treg = r600_get_temp(ctx); for (k = 0; k < 4; k++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = r600_src[j].sel; + alu.src[0].sel = r600_src[i].sel; alu.src[0].chan = k; alu.dst.sel = treg; alu.dst.chan = k; @@ -694,11 +951,11 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_ if (r) return r; } - r = r600_bc_add_literal(ctx->bc, ctx->value); + r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]); if (r) return r; - r600_src[j].sel = treg; - j++; + r600_src[i].sel = treg; + j--; } } return 0; @@ -721,6 +978,9 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) r = tgsi_split_constant(ctx, r600_src); if (r) return r; + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; @@ -791,6 +1051,9 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, r = tgsi_split_constant(ctx, r600_src); if (r) return r; + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; r = tgsi_split_literal_constant(ctx, r600_src); if (r) @@ -926,38 +1189,95 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) struct r600_bc_alu alu; int r; - r = tgsi_setup_trig(ctx, r600_src); - if (r) - return r; - + /* We'll only need the trig stuff if we are going to write to the + * X or Y components of the destination vector. + */ + if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { + r = tgsi_setup_trig(ctx, r600_src); + if (r) + return r; + } /* dst.x = COS */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); - r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); - if (r) - return r; + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); + r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); + if (r) + return r; - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 0; - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } /* dst.y = SIN */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); - r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); - if (r) - return r; + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); + r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); + if (r) + return r; + + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + /* dst.z = 0.0; */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + + r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); + if (r) + return r; + + alu.src[0].sel = V_SQ_ALU_SRC_0; + alu.src[0].chan = 0; + + alu.last = 1; + + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; + } + + /* dst.w = 1.0; */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + + r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); + if (r) + return r; + + alu.src[0].sel = V_SQ_ALU_SRC_1; + alu.src[0].chan = 0; + + alu.last = 1; + + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; + } - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 0; - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; return 0; } @@ -1157,34 +1477,6 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) return tgsi_helper_tempx_replicate(ctx); } -static int tgsi_trans(struct r600_shader_ctx *ctx) -{ - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; - int i, j, r; - - for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); - if (inst->Dst[0].Register.WriteMask & (1 << i)) { - alu.inst = ctx->inst_info->r600_opcode; - for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]); - if (r) - return r; - alu.src[j].chan = tgsi_chan(&inst->Src[j], i); - } - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - } - } - return 0; -} - static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -1301,6 +1593,9 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) r = tgsi_split_constant(ctx, r600_src); if (r) return r; + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; /* tmp = (src > 0 ? 1 : src) */ for (i = 0; i < 4; i++) { @@ -1397,6 +1692,9 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) r = tgsi_split_constant(ctx, r600_src); if (r) return r; + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; /* do it in 2 step as op3 doesn't support writemask */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -1429,6 +1727,9 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) r = tgsi_split_constant(ctx, r600_src); if (r) return r; + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; @@ -1502,7 +1803,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - + for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); @@ -1530,7 +1831,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - src_not_temp = false; + src_not_temp = FALSE; src_gpr = ctx->temp_reg; } @@ -1558,6 +1859,11 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) src_chan = 1; src2_chan = 2; break; + default: + assert(0); + src_chan = 0; + src2_chan = 0; + break; } r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); if (r) @@ -1641,7 +1947,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) r = r600_bc_add_literal(ctx->bc, lit_vals); if (r) return r; - src_not_temp = false; + src_not_temp = FALSE; src_gpr = ctx->temp_reg; } @@ -1650,7 +1956,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = src_gpr; - alu.src[0].chan = i; + alu.src[0].chan = tgsi_chan(&inst->Src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) @@ -1670,14 +1976,16 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) memset(&tex, 0, sizeof(struct r600_bc_tex)); tex.inst = opcode; - tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; - tex.sampler_id = tex.resource_id; + tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; + tex.resource_id = tex.sampler_id; + if (ctx->shader->processor_type == TGSI_PROCESSOR_VERTEX) + tex.resource_id += PIPE_MAX_ATTRIBS; tex.src_gpr = src_gpr; tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; - tex.dst_sel_x = 0; - tex.dst_sel_y = 1; - tex.dst_sel_z = 2; - tex.dst_sel_w = 3; + tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; + tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; + tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; + tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; tex.src_sel_x = 0; tex.src_sel_y = 1; tex.src_sel_z = 2; @@ -1720,6 +2028,9 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) r = tgsi_split_constant(ctx, r600_src); if (r) return r; + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; /* 1 - src0 */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -1799,6 +2110,9 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) r = tgsi_split_constant(ctx, r600_src); if (r) return r; + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; if (inst->Dst[0].Register.WriteMask != 0xf) use_temp = 1; @@ -1850,7 +2164,10 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) r = tgsi_split_constant(ctx, r600_src); if (r) return r; - + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; + for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); @@ -2293,7 +2610,40 @@ static int tgsi_log(struct r600_shader_ctx *ctx) } /* r6/7 only for now */ -static int tgsi_arl(struct r600_shader_ctx *ctx) +static int tgsi_eg_arl(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int r; + + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); + if (r) + return r; + alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + alu.last = 1; + alu.dst.chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); + if (r) + return r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); + if (r) + return r; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.last = 1; + r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); + if (r) + return r; + return 0; +} +static int tgsi_r600_arl(struct r600_shader_ctx *ctx) { /* TODO from r600c, ar values don't persist between clauses */ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -2313,6 +2663,7 @@ static int tgsi_arl(struct r600_shader_ctx *ctx) r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); if (r) return r; + ctx->bc->cf_last->r6xx_uses_waterfall = 1; return 0; } @@ -2637,7 +2988,7 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) } static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { - {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl}, + {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, @@ -2718,7 +3069,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, - {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, /* gap */ @@ -2801,7 +3152,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { }; static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { - {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, @@ -2876,7 +3227,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, - {TGSI_OPCODE_TXL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, /* gap */ diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 06dd65038d5..f8bc5951395 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -31,6 +31,8 @@ struct r600_shader_io { unsigned done; int sid; unsigned interpolate; + boolean centroid; + unsigned lds_pos; /* for evergreen */ }; struct r600_shader { @@ -39,11 +41,11 @@ struct r600_shader { boolean flat_shade; unsigned ninput; unsigned noutput; + unsigned nlds; struct r600_shader_io input[32]; struct r600_shader_io output[32]; enum radeon_family family; boolean uses_kill; - boolean use_mem_constant; }; int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 4dcdc492fc1..00234f956aa 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -19,184 +19,566 @@ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse + */ + +/* TODO: + * - fix mask for depth control & cull for query */ #include <stdio.h> #include <errno.h> -#include "util/u_inlines.h" -#include "util/u_format.h" -#include "util/u_memory.h" -#include "util/u_pack_color.h" -#include "r600_screen.h" -#include "r600_context.h" +#include <pipe/p_defines.h> +#include <pipe/p_state.h> +#include <pipe/p_context.h> +#include <tgsi/tgsi_scan.h> +#include <tgsi/tgsi_parse.h> +#include <tgsi/tgsi_util.h> +#include <util/u_double_list.h> +#include <util/u_pack_color.h> +#include <util/u_memory.h> +#include <util/u_inlines.h> +#include <util/u_upload_mgr.h> +#include <util/u_index_modify.h> +#include <util/u_framebuffer.h> +#include <pipebuffer/pb_buffer.h> +#include "r600.h" +#include "r600d.h" #include "r600_resource.h" +#include "r600_shader.h" +#include "r600_pipe.h" +#include "r600_state_inlines.h" -static struct r600_context_state *r600_new_context_state(unsigned type) +static void r600_draw_common(struct r600_drawl *draw) { - struct r600_context_state *rstate = CALLOC_STRUCT(r600_context_state); - if (rstate == NULL) - return NULL; - rstate->type = type; - rstate->refcount = 1; - return rstate; -} + struct r600_pipe_context *rctx = (struct r600_pipe_context *)draw->ctx; + struct r600_pipe_state *rstate; + struct r600_resource *rbuffer; + unsigned i, j, offset, prim; + u32 vgt_dma_index_type, vgt_draw_initiator, mask; + struct pipe_vertex_buffer *vertex_buffer; + struct r600_draw rdraw; + struct r600_pipe_state vgt; + + switch (draw->index_size) { + case 2: + vgt_draw_initiator = 0; + vgt_dma_index_type = 0; + break; + case 4: + vgt_draw_initiator = 0; + vgt_dma_index_type = 1; + break; + case 0: + vgt_draw_initiator = 2; + vgt_dma_index_type = 0; + break; + default: + R600_ERR("unsupported index size %d\n", draw->index_size); + return; + } + if (r600_conv_pipe_prim(draw->mode, &prim)) + return; -static void *r600_create_blend_state(struct pipe_context *ctx, - const struct pipe_blend_state *state) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; - rstate = r600_new_context_state(pipe_blend_type); - rstate->state.blend = *state; - rctx->vtbl->blend(rctx, &rstate->rstate[0], &rstate->state.blend); - - return rstate; + /* rebuild vertex shader if input format changed */ + if (r600_pipe_shader_update(&rctx->context, rctx->vs_shader)) + return; + if (r600_pipe_shader_update(&rctx->context, rctx->ps_shader)) + return; + + for (i = 0 ; i < rctx->vertex_elements->count; i++) { + uint32_t word2, format; + + rstate = &rctx->vs_resource[i]; + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + + j = rctx->vertex_elements->elements[i].vertex_buffer_index; + vertex_buffer = &rctx->vertex_buffer[j]; + rbuffer = (struct r600_resource*)vertex_buffer->buffer; + offset = rctx->vertex_elements->elements[i].src_offset + + vertex_buffer->buffer_offset + + r600_bo_offset(rbuffer->bo); + + format = r600_translate_vertex_data_type(rctx->vertex_elements->elements[i].src_format); + + word2 = format | S_038008_STRIDE(vertex_buffer->stride); + + r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, word2, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, 0xC0000000, 0xFFFFFFFF, NULL); + r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, i); + } + + mask = 0; + for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { + mask |= (0xF << (i * 4)); + } + + vgt.id = R600_PIPE_STATE_VGT; + vgt.nregs = 0; + r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw->index_bias, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw->max_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw->min_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL); + /* build late state */ + if (rctx->rasterizer && rctx->framebuffer.zsbuf) { + float offset_units = rctx->rasterizer->offset_units; + unsigned offset_db_fmt_cntl = 0, depth; + + switch (rctx->framebuffer.zsbuf->texture->format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + depth = -24; + offset_units *= 2.0f; + break; + case PIPE_FORMAT_Z32_FLOAT: + depth = -23; + offset_units *= 1.0f; + offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1); + break; + case PIPE_FORMAT_Z16_UNORM: + depth = -16; + offset_units *= 4.0f; + break; + default: + return; + } + offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); + r600_pipe_state_add_reg(&vgt, + R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, + R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET, + fui(offset_units), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, + R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE, + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, + R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET, + fui(offset_units), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, + R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL, + offset_db_fmt_cntl, 0xFFFFFFFF, NULL); + } + r600_context_pipe_state_set(&rctx->ctx, &vgt); + + rdraw.vgt_num_indices = draw->count; + rdraw.vgt_num_instances = 1; + rdraw.vgt_index_type = vgt_dma_index_type; + rdraw.vgt_draw_initiator = vgt_draw_initiator; + rdraw.indices = NULL; + if (draw->index_buffer) { + rbuffer = (struct r600_resource*)draw->index_buffer; + rdraw.indices = rbuffer->bo; + rdraw.indices_bo_offset = draw->index_buffer_offset; + } + r600_context_draw(&rctx->ctx, &rdraw); } -static void *r600_create_dsa_state(struct pipe_context *ctx, - const struct pipe_depth_stencil_alpha_state *state) +void r600_translate_index_buffer(struct r600_pipe_context *r600, + struct pipe_resource **index_buffer, + unsigned *index_size, + unsigned *start, unsigned count) { - struct r600_context_state *rstate; + switch (*index_size) { + case 1: + util_shorten_ubyte_elts(&r600->context, index_buffer, 0, *start, count); + *index_size = 2; + *start = 0; + break; - rstate = r600_new_context_state(pipe_dsa_type); - rstate->state.dsa = *state; - return rstate; + case 2: + if (*start % 2 != 0) { + util_rebuild_ushort_elts(&r600->context, index_buffer, 0, *start, count); + *start = 0; + } + break; + + case 4: + break; + } } -static void *r600_create_rs_state(struct pipe_context *ctx, - const struct pipe_rasterizer_state *state) +void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { - struct r600_context_state *rstate; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_drawl draw; - rstate = r600_new_context_state(pipe_rasterizer_type); - rstate->state.rasterizer = *state; - return rstate; + if (rctx->any_user_vbs) { + r600_upload_user_buffers(rctx); + rctx->any_user_vbs = FALSE; + } + + memset(&draw, 0, sizeof(struct r600_drawl)); + draw.ctx = ctx; + draw.mode = info->mode; + draw.start = info->start; + draw.count = info->count; + if (info->indexed && rctx->index_buffer.buffer) { + draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; + draw.min_index = info->min_index; + draw.max_index = info->max_index; + draw.index_bias = info->index_bias; + + r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer, + &rctx->index_buffer.index_size, + &draw.start, + info->count); + + draw.index_size = rctx->index_buffer.index_size; + pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); + draw.index_buffer_offset = draw.start * draw.index_size; + draw.start = 0; + r600_upload_index_buffer(rctx, &draw); + } else { + draw.index_size = 0; + draw.index_buffer = NULL; + draw.min_index = info->min_index; + draw.max_index = info->max_index; + draw.index_bias = info->start; + } + r600_draw_common(&draw); + + pipe_resource_reference(&draw.index_buffer, NULL); } -static void *r600_create_sampler_state(struct pipe_context *ctx, - const struct pipe_sampler_state *state) +static void r600_set_blend_color(struct pipe_context *ctx, + const struct pipe_blend_color *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - rstate = r600_new_context_state(pipe_sampler_type); - rstate->state.sampler = *state; - rctx->vtbl->sampler(rctx, &rstate->rstate[0], &rstate->state.sampler, 0); - rctx->vtbl->sampler_border(rctx, &rstate->rstate[1], &rstate->state.sampler, 0); - return rstate; + if (rstate == NULL) + return; + + rstate->id = R600_PIPE_STATE_BLEND_COLOR; + r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL); + free(rctx->states[R600_PIPE_STATE_BLEND_COLOR]); + rctx->states[R600_PIPE_STATE_BLEND_COLOR] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); } -static void r600_remove_sampler_view(struct r600_shader_sampler_states *sampler, - struct r600_context_state *rstate) +static void *r600_create_blend_state(struct pipe_context *ctx, + const struct pipe_blend_state *state) { - int i, j; - - for (i = 0; i < sampler->nview; i++) { - for (j = 0; j < rstate->nrstate; j++) { - if (sampler->view[i] == &rstate->rstate[j]) - sampler->view[i] = NULL; + struct r600_pipe_blend *blend = CALLOC_STRUCT(r600_pipe_blend); + struct r600_pipe_state *rstate; + u32 color_control, target_mask; + + if (blend == NULL) { + return NULL; + } + rstate = &blend->rstate; + + rstate->id = R600_PIPE_STATE_BLEND; + + target_mask = 0; + color_control = S_028808_PER_MRT_BLEND(1); + if (state->logicop_enable) { + color_control |= (state->logicop_func << 16) | (state->logicop_func << 20); + } else { + color_control |= (0xcc << 16); + } + /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */ + if (state->independent_blend_enable) { + for (int i = 0; i < 8; i++) { + if (state->rt[i].blend_enable) { + color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); + } + target_mask |= (state->rt[i].colormask << (4 * i)); + } + } else { + for (int i = 0; i < 8; i++) { + if (state->rt[0].blend_enable) { + color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); + } + target_mask |= (state->rt[0].colormask << (4 * i)); } } -} -static void r600_sampler_view_destroy(struct pipe_context *ctx, - struct pipe_sampler_view *state) -{ - struct r600_context_state *rstate = (struct r600_context_state *)state; - struct r600_context *rctx = r600_context(ctx); + blend->cb_target_mask = target_mask; + r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, + color_control, 0xFFFFFFFF, NULL); + + for (int i = 0; i < 8; i++) { + unsigned eqRGB = state->rt[i].rgb_func; + unsigned srcRGB = state->rt[i].rgb_src_factor; + unsigned dstRGB = state->rt[i].rgb_dst_factor; + + unsigned eqA = state->rt[i].alpha_func; + unsigned srcA = state->rt[i].alpha_src_factor; + unsigned dstA = state->rt[i].alpha_dst_factor; + uint32_t bc = 0; + + if (!state->rt[i].blend_enable) + continue; + + bc |= S_028804_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB)); + bc |= S_028804_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB)); + bc |= S_028804_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB)); + + if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { + bc |= S_028804_SEPARATE_ALPHA_BLEND(1); + bc |= S_028804_ALPHA_COMB_FCN(r600_translate_blend_function(eqA)); + bc |= S_028804_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA)); + bc |= S_028804_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA)); + } - /* need to search list of vs/ps sampler views and remove it from any - uggh */ - r600_remove_sampler_view(&rctx->ps_sampler, rstate); - r600_remove_sampler_view(&rctx->vs_sampler, rstate); - r600_context_state_decref(rstate); + r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL); + if (i == 0) { + r600_pipe_state_add_reg(rstate, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL); + } + } + return rstate; } -static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *ctx, - struct pipe_resource *texture, - const struct pipe_sampler_view *state) +static void r600_bind_blend_state(struct pipe_context *ctx, void *state) { - struct r600_context_state *rstate; - struct r600_context *rctx = r600_context(ctx); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_blend *blend = (struct r600_pipe_blend *)state; + struct r600_pipe_state *rstate; - rstate = r600_new_context_state(pipe_sampler_view_type); - rstate->state.sampler_view = *state; - rstate->state.sampler_view.texture = NULL; - pipe_reference(NULL, &texture->reference); - rstate->state.sampler_view.texture = texture; - rstate->state.sampler_view.reference.count = 1; - rstate->state.sampler_view.context = ctx; - rctx->vtbl->resource(ctx, &rstate->rstate[0], &rstate->state.sampler_view, 0); - return &rstate->state.sampler_view; + if (state == NULL) + return; + rstate = &blend->rstate; + rctx->states[rstate->id] = rstate; + rctx->cb_target_mask = blend->cb_target_mask; + r600_context_pipe_state_set(&rctx->ctx, rstate); } -static void r600_set_sampler_view(struct pipe_context *ctx, - unsigned count, - struct pipe_sampler_view **views, - struct r600_shader_sampler_states *sampler, - unsigned shader_id) +static void *r600_create_dsa_state(struct pipe_context *ctx, + const struct pipe_depth_stencil_alpha_state *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; - unsigned i; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control; + unsigned stencil_ref_mask, stencil_ref_mask_bf, db_render_override, db_render_control; - for (i = 0; i < sampler->nview; i++) { - radeon_draw_unbind(&rctx->draw, sampler->view[i]); + if (rstate == NULL) { + return NULL; } - for (i = 0; i < count; i++) { - rstate = (struct r600_context_state *)views[i]; - if (rstate) { - rstate->nrstate = 0; + rstate->id = R600_PIPE_STATE_DSA; + /* depth TODO some of those db_shader_control field depend on shader adjust mask & add it to shader */ + /* db_shader_control is 0xFFFFFFBE as Z_EXPORT_ENABLE (bit 0) will be + * set by fragment shader if it export Z and KILL_ENABLE (bit 6) will + * be set if shader use texkill instruction + */ + db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); + stencil_ref_mask = 0; + stencil_ref_mask_bf = 0; + db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | + S_028800_Z_WRITE_ENABLE(state->depth.writemask) | + S_028800_ZFUNC(state->depth.func); + + /* stencil */ + if (state->stencil[0].enabled) { + db_depth_control |= S_028800_STENCIL_ENABLE(1); + db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)); + db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); + db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); + db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); + + + stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) | + S_028430_STENCILWRITEMASK(state->stencil[0].writemask); + if (state->stencil[1].enabled) { + db_depth_control |= S_028800_BACKFACE_ENABLE(1); + db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)); + db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); + db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); + db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); + stencil_ref_mask_bf = S_028434_STENCILMASK_BF(state->stencil[1].valuemask) | + S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask); } } - for (i = 0; i < count; i++) { - rstate = (struct r600_context_state *)views[i]; - if (rstate) { - if (rstate->nrstate >= R600_MAX_RSTATE) - continue; - if (rstate->nrstate) { - memcpy(&rstate->rstate[rstate->nrstate], &rstate->rstate[0], sizeof(struct radeon_state)); - } - radeon_state_convert(&rstate->rstate[rstate->nrstate], R600_STATE_RESOURCE, i, shader_id); - sampler->view[i] = &rstate->rstate[rstate->nrstate]; - rstate->nrstate++; + + /* alpha */ + alpha_test_control = 0; + alpha_ref = 0; + if (state->alpha.enabled) { + alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func); + alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); + alpha_ref = fui(state->alpha.ref_value); + } + + /* misc */ + db_render_control = 0; + db_render_override = S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) | + S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | + S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); + /* TODO db_render_override depends on query */ + r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028430_DB_STENCILREFMASK, stencil_ref_mask, + 0xFFFFFFFF & C_028430_STENCILREF, NULL); + r600_pipe_state_add_reg(rstate, + R_028434_DB_STENCILREFMASK_BF, stencil_ref_mask_bf, + 0xFFFFFFFF & C_028434_STENCILREF_BF, NULL); + r600_pipe_state_add_reg(rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286E0_SPI_FOG_FUNC_SCALE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL); + r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028D30_DB_PRELOAD_CONTROL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028D44_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL); + + return rstate; +} + +static void *r600_create_rs_state(struct pipe_context *ctx, + const struct pipe_rasterizer_state *state) +{ + struct r600_pipe_rasterizer *rs = CALLOC_STRUCT(r600_pipe_rasterizer); + struct r600_pipe_state *rstate; + unsigned tmp; + unsigned prov_vtx = 1, polygon_dual_mode; + unsigned clip_rule; + + if (rs == NULL) { + return NULL; + } + + rstate = &rs->rstate; + rs->flatshade = state->flatshade; + rs->sprite_coord_enable = state->sprite_coord_enable; + + clip_rule = state->scissor ? 0xAAAA : 0xFFFF; + /* offset */ + rs->offset_units = state->offset_units; + rs->offset_scale = state->offset_scale * 12.0f; + + rstate->id = R600_PIPE_STATE_RASTERIZER; + if (state->flatshade_first) + prov_vtx = 0; + tmp = 0x00000001; + if (state->sprite_coord_enable) { + tmp |= S_0286D4_PNT_SPRITE_ENA(1) | + S_0286D4_PNT_SPRITE_OVRD_X(2) | + S_0286D4_PNT_SPRITE_OVRD_Y(3) | + S_0286D4_PNT_SPRITE_OVRD_Z(0) | + S_0286D4_PNT_SPRITE_OVRD_W(1); + if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { + tmp |= S_0286D4_PNT_SPRITE_TOP_1(1); } } - sampler->nview = count; + r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL); + + polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL || + state->fill_back != PIPE_POLYGON_MODE_FILL); + r600_pipe_state_add_reg(rstate, R_028814_PA_SU_SC_MODE_CNTL, + S_028814_PROVOKING_VTX_LAST(prov_vtx) | + S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | + S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | + S_028814_FACE(!state->front_ccw) | + S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | + S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | + S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) | + S_028814_POLY_MODE(polygon_dual_mode) | + S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) | + S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02881C_PA_CL_VS_OUT_CNTL, + S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | + S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + /* point size 12.4 fixed point */ + tmp = (unsigned)(state->point_size * 8.0); + r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL); + + tmp = (unsigned)(state->line_width * 8.0); + r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028A0C_PA_SC_LINE_STIPPLE, 0x00000005, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MPASS_PS_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL); + + return rstate; } -static void r600_set_ps_sampler_view(struct pipe_context *ctx, - unsigned count, - struct pipe_sampler_view **views) +static void r600_bind_rs_state(struct pipe_context *ctx, void *state) { - struct r600_context *rctx = r600_context(ctx); - r600_set_sampler_view(ctx, count, views, &rctx->ps_sampler, R600_SHADER_PS); + struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + if (state == NULL) + return; + + rctx->flatshade = rs->flatshade; + rctx->sprite_coord_enable = rs->sprite_coord_enable; + rctx->rasterizer = rs; + + rctx->states[rs->rstate.id] = &rs->rstate; + r600_context_pipe_state_set(&rctx->ctx, &rs->rstate); } -static void r600_set_vs_sampler_view(struct pipe_context *ctx, - unsigned count, - struct pipe_sampler_view **views) +static void r600_delete_rs_state(struct pipe_context *ctx, void *state) { - struct r600_context *rctx = r600_context(ctx); - r600_set_sampler_view(ctx, count, views, &rctx->vs_sampler, R600_SHADER_VS); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; + + if (rctx->rasterizer == rs) { + rctx->rasterizer = NULL; + } + if (rctx->states[rs->rstate.id] == &rs->rstate) { + rctx->states[rs->rstate.id] = NULL; + } + free(rs); } -static void *r600_create_shader_state(struct pipe_context *ctx, - const struct pipe_shader_state *state) +static void *r600_create_sampler_state(struct pipe_context *ctx, + const struct pipe_sampler_state *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; - int r; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + union util_color uc; - rstate = r600_new_context_state(pipe_shader_type); - rstate->state.shader = *state; - r = r600_pipe_shader_create(&rctx->context, rstate, rstate->state.shader.tokens); - if (r) { - r600_context_state_decref(rstate); + if (rstate == NULL) { return NULL; } + + rstate->id = R600_PIPE_STATE_SAMPLER; + util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0, + S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | + S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | + S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) | + S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter)) | + S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter)) | + S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | + S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | + S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL); + /* FIXME LOD it depends on texture base level ... */ + r600_pipe_state_add_reg(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0, + S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | + S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | + S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL); + if (uc.ui) { + r600_pipe_state_add_reg(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL); + } return rstate; } @@ -208,192 +590,242 @@ static void *r600_create_vertex_elements(struct pipe_context *ctx, assert(count < 32); v->count = count; - memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); v->refcount = 1; + memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); return v; } -static void r600_delete_vertex_element(struct pipe_context *ctx, void *state) +static void r600_sampler_view_destroy(struct pipe_context *ctx, + struct pipe_sampler_view *state) { - struct r600_vertex_element *v = (struct r600_vertex_element*)state; + struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state; - if (v == NULL) - return; - if (--v->refcount) - return; - free(v); + pipe_resource_reference(&state->texture, NULL); + FREE(resource); } -static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) +static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *ctx, + struct pipe_resource *texture, + const struct pipe_sampler_view *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_vertex_element *v = (struct r600_vertex_element*)state; + struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view); + struct r600_pipe_state *rstate; + const struct util_format_description *desc; + struct r600_resource_texture *tmp; + struct r600_resource *rbuffer; + unsigned format; + uint32_t word4 = 0, yuv_format = 0, pitch = 0; + unsigned char swizzle[4], array_mode = 0, tile_type = 0; + struct r600_bo *bo[2]; - r600_delete_vertex_element(ctx, rctx->vertex_elements); - rctx->vertex_elements = v; - if (v) { - v->refcount++; + if (resource == NULL) + return NULL; + rstate = &resource->state; + + /* initialize base object */ + resource->base = *state; + resource->base.texture = NULL; + pipe_reference(NULL, &texture->reference); + resource->base.texture = texture; + resource->base.reference.count = 1; + resource->base.context = ctx; + + swizzle[0] = state->swizzle_r; + swizzle[1] = state->swizzle_g; + swizzle[2] = state->swizzle_b; + swizzle[3] = state->swizzle_a; + format = r600_translate_texformat(state->format, + swizzle, + &word4, &yuv_format); + if (format == ~0) { + format = 0; + } + desc = util_format_description(state->format); + if (desc == NULL) { + R600_ERR("unknow format %d\n", state->format); + } + tmp = (struct r600_resource_texture*)texture; + rbuffer = &tmp->resource; + bo[0] = rbuffer->bo; + bo[1] = rbuffer->bo; + /* FIXME depth texture decompression */ + if (tmp->depth) { + r600_texture_depth_flush(ctx, texture); + tmp = (struct r600_resource_texture*)texture; + rbuffer = &tmp->flushed_depth_texture->resource; + bo[0] = rbuffer->bo; + bo[1] = rbuffer->bo; } + pitch = align(tmp->pitch_in_pixels[0], 8); + if (tmp->tiled) { + array_mode = tmp->array_mode; + tile_type = tmp->tile_type; + } + + /* FIXME properly handle first level != 0 */ + r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, + S_038000_DIM(r600_tex_dim(texture->target)) | + S_038000_TILE_MODE(array_mode) | + S_038000_TILE_TYPE(tile_type) | + S_038000_PITCH((pitch / 8) - 1) | + S_038000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, + S_038004_TEX_HEIGHT(texture->height0 - 1) | + S_038004_TEX_DEPTH(texture->depth0 - 1) | + S_038004_DATA_FORMAT(format), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, + (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, + (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]); + r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, + word4 | S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) | + S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) | + S_038010_REQUEST_SIZE(1) | + S_038010_BASE_LEVEL(state->first_level), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, + S_038014_LAST_LEVEL(state->last_level) | + S_038014_BASE_ARRAY(0) | + S_038014_LAST_ARRAY(0), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, + S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE), 0xFFFFFFFF, NULL); + + return &resource->base; } -static void r600_bind_rasterizer_state(struct pipe_context *ctx, void *state) +static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, + struct pipe_sampler_view **views) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate = (struct r600_context_state *)state; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; - if (state == NULL) - return; - rctx->rasterizer = r600_context_state_decref(rctx->rasterizer); - rctx->rasterizer = r600_context_state_incref(rstate); + for (int i = 0; i < count; i++) { + if (resource[i]) { + r600_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i + PIPE_MAX_ATTRIBS); + } + } } -static void r600_bind_blend_state(struct pipe_context *ctx, void *state) +static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, + struct pipe_sampler_view **views) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate = (struct r600_context_state *)state; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; - if (state == NULL) - return; - rctx->blend = r600_context_state_decref(rctx->blend); - rctx->blend = r600_context_state_incref(rstate); + rctx->ps_samplers.views = resource; + rctx->ps_samplers.n_views = count; + for (int i = 0; i < count; i++) { + if (resource[i]) { + r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + } + } } -static void r600_bind_dsa_state(struct pipe_context *ctx, void *state) +static void r600_bind_state(struct pipe_context *ctx, void *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate = (struct r600_context_state *)state; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; if (state == NULL) return; - rctx->dsa = r600_context_state_decref(rctx->dsa); - rctx->dsa = r600_context_state_incref(rstate); -} - -static void r600_bind_ps_shader(struct pipe_context *ctx, void *state) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate = (struct r600_context_state *)state; - - rctx->ps_shader = r600_context_state_decref(rctx->ps_shader); - rctx->ps_shader = r600_context_state_incref(rstate); + rctx->states[rstate->id] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); } -static void r600_bind_vs_shader(struct pipe_context *ctx, void *state) +static void r600_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void **states) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate = (struct r600_context_state *)state; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; - rctx->vs_shader = r600_context_state_decref(rctx->vs_shader); - rctx->vs_shader = r600_context_state_incref(rstate); -} + rctx->ps_samplers.samplers = states; + rctx->ps_samplers.n_samplers = count; -static void r600_bind_sampler_shader(struct pipe_context *ctx, - unsigned count, void **states, - struct r600_shader_sampler_states *sampler, unsigned shader_id) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; - unsigned i; - - for (i = 0; i < sampler->nsampler; i++) { - radeon_draw_unbind(&rctx->draw, sampler->sampler[i]); - } - for (i = 0; i < sampler->nborder; i++) { - radeon_draw_unbind(&rctx->draw, sampler->border[i]); + for (int i = 0; i < count; i++) { + r600_context_pipe_state_set_ps_sampler(&rctx->ctx, rstates[i], i); } - for (i = 0; i < count; i++) { - rstate = (struct r600_context_state *)states[i]; - if (rstate) { - rstate->nrstate = 0; - } - } - for (i = 0; i < count; i++) { - rstate = (struct r600_context_state *)states[i]; - if (rstate) { - if (rstate->nrstate >= R600_MAX_RSTATE) - continue; - if (rstate->nrstate) { - memcpy(&rstate->rstate[rstate->nrstate], &rstate->rstate[0], sizeof(struct radeon_state)); - memcpy(&rstate->rstate[rstate->nrstate+1], &rstate->rstate[1], sizeof(struct radeon_state)); - } - radeon_state_convert(&rstate->rstate[rstate->nrstate], R600_STATE_SAMPLER, i, shader_id); - radeon_state_convert(&rstate->rstate[rstate->nrstate + 1], R600_STATE_SAMPLER_BORDER, i, shader_id); - sampler->sampler[i] = &rstate->rstate[rstate->nrstate]; - sampler->border[i] = &rstate->rstate[rstate->nrstate + 1]; - rstate->nrstate += 2; - } - } - sampler->nsampler = count; - sampler->nborder = count; } -static void r600_bind_ps_sampler(struct pipe_context *ctx, - unsigned count, void **states) +static void r600_bind_vs_sampler(struct pipe_context *ctx, unsigned count, void **states) { - struct r600_context *rctx = r600_context(ctx); - r600_bind_sampler_shader(ctx, count, states, &rctx->ps_sampler, R600_SHADER_PS); -} + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; -static void r600_bind_vs_sampler(struct pipe_context *ctx, - unsigned count, void **states) -{ - struct r600_context *rctx = r600_context(ctx); - r600_bind_sampler_shader(ctx, count, states, &rctx->vs_sampler, R600_SHADER_VS); + for (int i = 0; i < count; i++) { + r600_context_pipe_state_set_vs_sampler(&rctx->ctx, rstates[i], i); + } } static void r600_delete_state(struct pipe_context *ctx, void *state) { - struct r600_context_state *rstate = (struct r600_context_state *)state; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; - r600_context_state_decref(rstate); + if (rctx->states[rstate->id] == rstate) { + rctx->states[rstate->id] = NULL; + } + for (int i = 0; i < rstate->nregs; i++) { + r600_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL); + } + free(rstate); } -static void r600_set_blend_color(struct pipe_context *ctx, - const struct pipe_blend_color *color) +static void r600_delete_vertex_element(struct pipe_context *ctx, void *state) { - struct r600_context *rctx = r600_context(ctx); + struct r600_vertex_element *v = (struct r600_vertex_element*)state; - rctx->blend_color = *color; + if (v == NULL) + return; + if (--v->refcount) + return; + free(v); } static void r600_set_clip_state(struct pipe_context *ctx, const struct pipe_clip_state *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - r600_context_state_decref(rctx->clip); + if (rstate == NULL) + return; - rstate = r600_new_context_state(pipe_clip_type); - rstate->state.clip = *state; - rctx->vtbl->ucp(rctx, &rstate->rstate[0], &rstate->state.clip); - rctx->clip = rstate; + rctx->clip = *state; + rstate->id = R600_PIPE_STATE_CLIP; + for (int i = 0; i < state->nr; i++) { + r600_pipe_state_add_reg(rstate, + R_028E20_PA_CL_UCP0_X + i * 4, + fui(state->ucp[i][0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028E24_PA_CL_UCP0_Y + i * 4, + fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028E28_PA_CL_UCP0_Z + i * 4, + fui(state->ucp[i][2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028E2C_PA_CL_UCP0_W + i * 4, + fui(state->ucp[i][3]), 0xFFFFFFFF, NULL); + } + r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, + S_028810_PS_UCP_MODE(3) | ((1 << state->nr) - 1) | + S_028810_ZCLIP_NEAR_DISABLE(state->depth_clamp) | + S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL); + + free(rctx->states[R600_PIPE_STATE_CLIP]); + rctx->states[R600_PIPE_STATE_CLIP] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); } -static void r600_set_framebuffer_state(struct pipe_context *ctx, - const struct pipe_framebuffer_state *state) +static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; - int i; - - r600_context_state_decref(rctx->framebuffer); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_vertex_element *v = (struct r600_vertex_element*)state; - rstate = r600_new_context_state(pipe_framebuffer_type); - rstate->state.framebuffer = *state; - for (i = 0; i < rstate->state.framebuffer.nr_cbufs; i++) { - pipe_reference(NULL, &state->cbufs[i]->reference); - } - pipe_reference(NULL, &state->zsbuf->reference); - rctx->framebuffer = rstate; - for (i = 0; i < state->nr_cbufs; i++) { - rctx->vtbl->cb(rctx, &rstate->rstate[i+1], state, i); - } - if (state->zsbuf) { - rctx->vtbl->db(rctx, &rstate->rstate[0], state); + r600_delete_vertex_element(ctx, rctx->vertex_elements); + rctx->vertex_elements = v; + if (v) { + v->refcount++; +// rctx->vs_rebuild = TRUE; } - return; } static void r600_set_polygon_stipple(struct pipe_context *ctx, @@ -408,55 +840,296 @@ static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask static void r600_set_scissor_state(struct pipe_context *ctx, const struct pipe_scissor_state *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + u32 tl, br; - r600_context_state_decref(rctx->scissor); + if (rstate == NULL) + return; - rstate = r600_new_context_state(pipe_scissor_type); - rstate->state.scissor = *state; - rctx->scissor = rstate; + rstate->id = R600_PIPE_STATE_SCISSOR; + tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1); + br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); + r600_pipe_state_add_reg(rstate, + R_028210_PA_SC_CLIPRECT_0_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028214_PA_SC_CLIPRECT_0_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028218_PA_SC_CLIPRECT_1_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_02821C_PA_SC_CLIPRECT_1_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028220_PA_SC_CLIPRECT_2_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028224_PA_SC_CLIPRECT_2_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028228_PA_SC_CLIPRECT_3_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_02822C_PA_SC_CLIPRECT_3_BR, br, + 0xFFFFFFFF, NULL); + + free(rctx->states[R600_PIPE_STATE_SCISSOR]); + rctx->states[R600_PIPE_STATE_SCISSOR] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); } static void r600_set_stencil_ref(struct pipe_context *ctx, const struct pipe_stencil_ref *state) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + u32 tmp; - r600_context_state_decref(rctx->stencil_ref); + if (rstate == NULL) + return; - rstate = r600_new_context_state(pipe_stencil_ref_type); - rstate->state.stencil_ref = *state; - rctx->stencil_ref = rstate; + rctx->stencil_ref = *state; + rstate->id = R600_PIPE_STATE_STENCIL_REF; + tmp = S_028430_STENCILREF(state->ref_value[0]); + r600_pipe_state_add_reg(rstate, + R_028430_DB_STENCILREFMASK, tmp, + ~C_028430_STENCILREF, NULL); + tmp = S_028434_STENCILREF_BF(state->ref_value[1]); + r600_pipe_state_add_reg(rstate, + R_028434_DB_STENCILREFMASK_BF, tmp, + ~C_028434_STENCILREF_BF, NULL); + + free(rctx->states[R600_PIPE_STATE_STENCIL_REF]); + rctx->states[R600_PIPE_STATE_STENCIL_REF] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); } -static void r600_set_vertex_buffers(struct pipe_context *ctx, - unsigned count, - const struct pipe_vertex_buffer *buffers) +static void r600_set_viewport_state(struct pipe_context *ctx, + const struct pipe_viewport_state *state) { - struct r600_context *rctx = r600_context(ctx); - unsigned i; - boolean any_user_buffers = FALSE; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - for (i = 0; i < rctx->nvertex_buffer; i++) { - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); + if (rstate == NULL) + return; + + rctx->viewport = *state; + rstate->id = R600_PIPE_STATE_VIEWPORT; + r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL); + + free(rctx->states[R600_PIPE_STATE_VIEWPORT]); + rctx->states[R600_PIPE_STATE_VIEWPORT] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate, + const struct pipe_framebuffer_state *state, int cb) +{ + struct r600_resource_texture *rtex; + struct r600_resource *rbuffer; + unsigned level = state->cbufs[cb]->level; + unsigned pitch, slice; + unsigned color_info; + unsigned format, swap, ntype; + const struct util_format_description *desc; + struct r600_bo *bo[3]; + + rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; + rbuffer = &rtex->resource; + bo[0] = rbuffer->bo; + bo[1] = rbuffer->bo; + bo[2] = rbuffer->bo; + + pitch = rtex->pitch_in_pixels[level] / 8 - 1; + slice = rtex->pitch_in_pixels[level] * state->cbufs[cb]->height / 64 - 1; + ntype = 0; + desc = util_format_description(rtex->resource.base.b.format); + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) + ntype = V_0280A0_NUMBER_SRGB; + + format = r600_translate_colorformat(rtex->resource.base.b.format); + swap = r600_translate_colorswap(rtex->resource.base.b.format); + color_info = S_0280A0_FORMAT(format) | + S_0280A0_COMP_SWAP(swap) | + S_0280A0_ARRAY_MODE(rtex->array_mode); + S_0280A0_BLEND_CLAMP(1) | + S_0280A0_NUMBER_TYPE(ntype); + if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + color_info |= S_0280A0_SOURCE_FORMAT(1); + + r600_pipe_state_add_reg(rstate, + R_028040_CB_COLOR0_BASE + cb * 4, + (state->cbufs[cb]->offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + r600_pipe_state_add_reg(rstate, + R_0280A0_CB_COLOR0_INFO + cb * 4, + color_info, 0xFFFFFFFF, bo[0]); + r600_pipe_state_add_reg(rstate, + R_028060_CB_COLOR0_SIZE + cb * 4, + S_028060_PITCH_TILE_MAX(pitch) | + S_028060_SLICE_TILE_MAX(slice), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028080_CB_COLOR0_VIEW + cb * 4, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0280E0_CB_COLOR0_FRAG + cb * 4, + r600_bo_offset(bo[1]) >> 8, 0xFFFFFFFF, bo[1]); + r600_pipe_state_add_reg(rstate, + R_0280C0_CB_COLOR0_TILE + cb * 4, + r600_bo_offset(bo[2]) >> 8, 0xFFFFFFFF, bo[2]); + r600_pipe_state_add_reg(rstate, + R_028100_CB_COLOR0_MASK + cb * 4, + 0x00000000, 0xFFFFFFFF, NULL); +} + +static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate, + const struct pipe_framebuffer_state *state) +{ + struct r600_resource_texture *rtex; + struct r600_resource *rbuffer; + unsigned level; + unsigned pitch, slice, format; + + if (state->zsbuf == NULL) + return; + + rtex = (struct r600_resource_texture*)state->zsbuf->texture; + rtex->tiled = 1; + rtex->array_mode = 2; + rtex->tile_type = 1; + rtex->depth = 1; + rbuffer = &rtex->resource; + + level = state->zsbuf->level; + pitch = rtex->pitch_in_pixels[level] / 8 - 1; + slice = rtex->pitch_in_pixels[level] * state->zsbuf->height / 64 - 1; + format = r600_translate_dbformat(state->zsbuf->texture->format); + + r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE, + (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_028000_DB_DEPTH_SIZE, + S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028004_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028010_DB_DEPTH_INFO, + S_028010_ARRAY_MODE(rtex->array_mode) | S_028010_FORMAT(format), + 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT, + (state->zsbuf->height / 8) - 1, 0xFFFFFFFF, NULL); +} + +static void r600_set_framebuffer_state(struct pipe_context *ctx, + const struct pipe_framebuffer_state *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + u32 shader_mask, tl, br, shader_control, target_mask; + + if (rstate == NULL) + return; + + /* unreference old buffer and reference new one */ + rstate->id = R600_PIPE_STATE_FRAMEBUFFER; + + util_copy_framebuffer_state(&rctx->framebuffer, state); + + rctx->pframebuffer = &rctx->framebuffer; + + /* build states */ + for (int i = 0; i < state->nr_cbufs; i++) { + r600_cb(rctx, rstate, state, i); } - memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); - for (i = 0; i < count; i++) { - rctx->vertex_buffer[i].buffer = NULL; - if (r600_buffer_is_user_buffer(buffers[i].buffer)) - any_user_buffers = TRUE; - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); + if (state->zsbuf) { + r600_db(rctx, rstate, state); } - rctx->any_user_vbs = any_user_buffers; - rctx->nvertex_buffer = count; + + target_mask = 0x00000000; + target_mask = 0xFFFFFFFF; + shader_mask = 0; + shader_control = 0; + for (int i = 0; i < state->nr_cbufs; i++) { + target_mask ^= 0xf << (i * 4); + shader_mask |= 0xf << (i * 4); + shader_control |= 1 << i; + } + tl = S_028240_TL_X(0) | S_028240_TL_Y(0) | S_028240_WINDOW_OFFSET_DISABLE(1); + br = S_028244_BR_X(state->width) | S_028244_BR_Y(state->height); + + r600_pipe_state_add_reg(rstate, + R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028034_PA_SC_SCREEN_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028208_PA_SC_WINDOW_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028240_PA_SC_GENERIC_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028244_PA_SC_GENERIC_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028254_PA_SC_VPORT_SCISSOR_0_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, + 0xFFFFFFFF, NULL); + if (rctx->family >= CHIP_RV770) { + r600_pipe_state_add_reg(rstate, + R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, + 0xFFFFFFFF, NULL); + } + + r600_pipe_state_add_reg(rstate, R_0287A0_CB_SHADER_CONTROL, + shader_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028238_CB_TARGET_MASK, + 0x00000000, target_mask, NULL); + r600_pipe_state_add_reg(rstate, R_02823C_CB_SHADER_MASK, + shader_mask, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C30_CB_CLRCMP_CONTROL, + 0x01000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C34_CB_CLRCMP_SRC, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C38_CB_CLRCMP_DST, + 0x000000FF, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C3C_CB_CLRCMP_MSK, + 0xFFFFFFFF, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C48_PA_SC_AA_MASK, + 0xFFFFFFFF, 0xFFFFFFFF, NULL); + + free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]); + rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); } static void r600_set_index_buffer(struct pipe_context *ctx, const struct pipe_index_buffer *ib) { - struct r600_context *rctx = r600_context(ctx); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; if (ib) { pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer); @@ -469,21 +1142,113 @@ static void r600_set_index_buffer(struct pipe_context *ctx, /* TODO make this more like a state */ } -static void r600_set_viewport_state(struct pipe_context *ctx, - const struct pipe_viewport_state *state) +static void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, + const struct pipe_vertex_buffer *buffers) { - struct r600_context *rctx = r600_context(ctx); - struct r600_context_state *rstate; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - r600_context_state_decref(rctx->viewport); + for (int i = 0; i < rctx->nvertex_buffer; i++) { + pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); + } + memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); + for (int i = 0; i < count; i++) { + rctx->vertex_buffer[i].buffer = NULL; + if (r600_buffer_is_user_buffer(buffers[i].buffer)) + rctx->any_user_vbs = TRUE; + pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); + } + rctx->nvertex_buffer = count; +} - rstate = r600_new_context_state(pipe_viewport_type); - rstate->state.viewport = *state; - rctx->vtbl->viewport(rctx, &rstate->rstate[0], &rstate->state.viewport); - rctx->viewport = rstate; +static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, + struct pipe_resource *buffer) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_resource *rbuffer = (struct r600_resource*)buffer; + + switch (shader) { + case PIPE_SHADER_VERTEX: + rctx->vs_const_buffer.nregs = 0; + r600_pipe_state_add_reg(&rctx->vs_const_buffer, + R_028180_ALU_CONST_BUFFER_SIZE_VS_0, + ALIGN_DIVUP(buffer->width0 >> 4, 16), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->vs_const_buffer, + R_028980_ALU_CONST_CACHE_VS_0, + r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo); + r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); + break; + case PIPE_SHADER_FRAGMENT: + rctx->ps_const_buffer.nregs = 0; + r600_pipe_state_add_reg(&rctx->ps_const_buffer, + R_028140_ALU_CONST_BUFFER_SIZE_PS_0, + ALIGN_DIVUP(buffer->width0 >> 4, 16), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->ps_const_buffer, + R_028940_ALU_CONST_CACHE_PS_0, + r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo); + r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); + break; + default: + R600_ERR("unsupported %d\n", shader); + return; + } } -void r600_init_state_functions(struct r600_context *rctx) +static void *r600_create_shader_state(struct pipe_context *ctx, + const struct pipe_shader_state *state) +{ + struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader); + int r; + + r = r600_pipe_shader_create(ctx, shader, state->tokens); + if (r) { + return NULL; + } + return shader; +} + +static void r600_bind_ps_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + /* TODO delete old shader */ + rctx->ps_shader = (struct r600_pipe_shader *)state; +} + +static void r600_bind_vs_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + /* TODO delete old shader */ + rctx->vs_shader = (struct r600_pipe_shader *)state; +} + +static void r600_delete_ps_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; + + if (rctx->ps_shader == shader) { + rctx->ps_shader = NULL; + } + /* TODO proper delete */ + free(shader); +} + +static void r600_delete_vs_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; + + if (rctx->vs_shader == shader) { + rctx->vs_shader = NULL; + } + /* TODO proper delete */ + free(shader); +} + +void r600_init_state_functions(struct r600_pipe_context *rctx) { rctx->context.create_blend_state = r600_create_blend_state; rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state; @@ -494,30 +1259,23 @@ void r600_init_state_functions(struct r600_context *rctx) rctx->context.create_vertex_elements_state = r600_create_vertex_elements; rctx->context.create_vs_state = r600_create_shader_state; rctx->context.bind_blend_state = r600_bind_blend_state; - rctx->context.bind_depth_stencil_alpha_state = r600_bind_dsa_state; + rctx->context.bind_depth_stencil_alpha_state = r600_bind_state; rctx->context.bind_fragment_sampler_states = r600_bind_ps_sampler; rctx->context.bind_fs_state = r600_bind_ps_shader; - rctx->context.bind_rasterizer_state = r600_bind_rasterizer_state; + rctx->context.bind_rasterizer_state = r600_bind_rs_state; rctx->context.bind_vertex_elements_state = r600_bind_vertex_elements; rctx->context.bind_vertex_sampler_states = r600_bind_vs_sampler; rctx->context.bind_vs_state = r600_bind_vs_shader; rctx->context.delete_blend_state = r600_delete_state; rctx->context.delete_depth_stencil_alpha_state = r600_delete_state; - rctx->context.delete_fs_state = r600_delete_state; - rctx->context.delete_rasterizer_state = r600_delete_state; + rctx->context.delete_fs_state = r600_delete_ps_shader; + rctx->context.delete_rasterizer_state = r600_delete_rs_state; rctx->context.delete_sampler_state = r600_delete_state; rctx->context.delete_vertex_elements_state = r600_delete_vertex_element; - rctx->context.delete_vs_state = r600_delete_state; + rctx->context.delete_vs_state = r600_delete_vs_shader; rctx->context.set_blend_color = r600_set_blend_color; rctx->context.set_clip_state = r600_set_clip_state; - - if (rctx->screen->chip_class == EVERGREEN) - rctx->context.set_constant_buffer = eg_set_constant_buffer; - else if (rctx->screen->use_mem_constant) - rctx->context.set_constant_buffer = r600_set_constant_buffer_mem; - else - rctx->context.set_constant_buffer = r600_set_constant_buffer_file; - + rctx->context.set_constant_buffer = r600_set_constant_buffer; rctx->context.set_fragment_sampler_views = r600_set_ps_sampler_view; rctx->context.set_framebuffer_state = r600_set_framebuffer_state; rctx->context.set_polygon_stipple = r600_set_polygon_stipple; @@ -531,169 +1289,291 @@ void r600_init_state_functions(struct r600_context *rctx) rctx->context.sampler_view_destroy = r600_sampler_view_destroy; } -struct r600_context_state *r600_context_state_incref(struct r600_context_state *rstate) +void r600_init_config(struct r600_pipe_context *rctx) { - if (rstate == NULL) - return NULL; - rstate->refcount++; - return rstate; -} - -struct r600_context_state *r600_context_state_decref(struct r600_context_state *rstate) -{ - unsigned i; - - if (rstate == NULL) - return NULL; - if (--rstate->refcount) - return NULL; - switch (rstate->type) { - case pipe_sampler_view_type: - pipe_resource_reference(&rstate->state.sampler_view.texture, NULL); - break; - case pipe_framebuffer_type: - for (i = 0; i < rstate->state.framebuffer.nr_cbufs; i++) { - pipe_surface_reference(&rstate->state.framebuffer.cbufs[i], NULL); - } - pipe_surface_reference(&rstate->state.framebuffer.zsbuf, NULL); + int ps_prio; + int vs_prio; + int gs_prio; + int es_prio; + int num_ps_gprs; + int num_vs_gprs; + int num_gs_gprs; + int num_es_gprs; + int num_temp_gprs; + int num_ps_threads; + int num_vs_threads; + int num_gs_threads; + int num_es_threads; + int num_ps_stack_entries; + int num_vs_stack_entries; + int num_gs_stack_entries; + int num_es_stack_entries; + enum radeon_family family; + struct r600_pipe_state *rstate = &rctx->config; + u32 tmp; + + family = r600_get_family(rctx->radeon); + ps_prio = 0; + vs_prio = 1; + gs_prio = 2; + es_prio = 3; + switch (family) { + case CHIP_R600: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; break; - case pipe_viewport_type: - case pipe_depth_type: - case pipe_rasterizer_type: - case pipe_poly_stipple_type: - case pipe_scissor_type: - case pipe_clip_type: - case pipe_stencil_type: - case pipe_alpha_type: - case pipe_dsa_type: - case pipe_blend_type: - case pipe_stencil_ref_type: - case pipe_shader_type: - case pipe_sampler_type: + case CHIP_RV630: + case CHIP_RV635: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 40; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; break; + case CHIP_RV610: + case CHIP_RV620: + case CHIP_RS780: + case CHIP_RS880: default: - R600_ERR("invalid type %d\n", rstate->type); - return NULL; - } - radeon_state_fini(&rstate->rstate[0]); - FREE(rstate); - return NULL; -} - -static void r600_bind_shader_sampler(struct r600_context *rctx, struct r600_shader_sampler_states *sampler) -{ - int i; - - for (i = 0; i < sampler->nsampler; i++) { - if (sampler->sampler[i]) - radeon_draw_bind(&rctx->draw, sampler->sampler[i]); + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV670: + num_ps_gprs = 144; + num_vs_gprs = 40; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV770: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 256; + num_vs_stack_entries = 256; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV730: + case CHIP_RV740: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV710: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 48; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; } - for (i = 0; i < sampler->nborder; i++) { - if (sampler->border[i]) - radeon_draw_bind(&rctx->draw, sampler->border[i]); - } + rstate->id = R600_PIPE_STATE_CONFIG; - for (i = 0; i < sampler->nview; i++) { - if (sampler->view[i]) - radeon_draw_bind(&rctx->draw, sampler->view[i]); + /* SQ_CONFIG */ + tmp = 0; + switch (family) { + case CHIP_RV610: + case CHIP_RV620: + case CHIP_RS780: + case CHIP_RS880: + case CHIP_RV710: + break; + default: + tmp |= S_008C00_VC_ENABLE(1); + break; } -} - - -static int setup_cb_flush(struct r600_context *rctx, struct radeon_state *flush) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - struct pipe_surface *surf; - int i; - - radeon_state_init(flush, rscreen->rw, R600_STATE_CB_FLUSH, 0, 0); - - for (i = 0; i < rctx->framebuffer->state.framebuffer.nr_cbufs; i++) { - surf = rctx->framebuffer->state.framebuffer.cbufs[i]; - - rtex = (struct r600_resource_texture*)surf->texture; - rbuffer = &rtex->resource; - /* just need to the bo to the flush list */ - radeon_ws_bo_reference(rscreen->rw, &flush->bo[i], rbuffer->bo); - flush->placement[i] = RADEON_GEM_DOMAIN_VRAM; + tmp |= S_008C00_DX9_CONSTS(0); + tmp |= S_008C00_ALU_INST_PREFER_VECTOR(1); + tmp |= S_008C00_PS_PRIO(ps_prio); + tmp |= S_008C00_VS_PRIO(vs_prio); + tmp |= S_008C00_GS_PRIO(gs_prio); + tmp |= S_008C00_ES_PRIO(es_prio); + r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); + + /* SQ_GPR_RESOURCE_MGMT_1 */ + tmp = 0; + tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); + tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); + tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); + r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + + /* SQ_GPR_RESOURCE_MGMT_2 */ + tmp = 0; + tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs); + tmp |= S_008C08_NUM_GS_GPRS(num_es_gprs); + r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + + /* SQ_THREAD_RESOURCE_MGMT */ + tmp = 0; + tmp |= S_008C0C_NUM_PS_THREADS(num_ps_threads); + tmp |= S_008C0C_NUM_VS_THREADS(num_vs_threads); + tmp |= S_008C0C_NUM_GS_THREADS(num_gs_threads); + tmp |= S_008C0C_NUM_ES_THREADS(num_es_threads); + r600_pipe_state_add_reg(rstate, R_008C0C_SQ_THREAD_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL); + + /* SQ_STACK_RESOURCE_MGMT_1 */ + tmp = 0; + tmp |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); + tmp |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); + r600_pipe_state_add_reg(rstate, R_008C10_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + + /* SQ_STACK_RESOURCE_MGMT_2 */ + tmp = 0; + tmp |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); + tmp |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); + r600_pipe_state_add_reg(rstate, R_008C14_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_009714_VC_ENHANCE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x00000000, 0xFFFFFFFF, NULL); + + if (family >= CHIP_RV770) { + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, 0x07000002, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514002, 0xFFFFFFFF, NULL); + } else { + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, 0x07000003, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00004012, 0xFFFFFFFF, NULL); } - flush->nbo = rctx->framebuffer->state.framebuffer.nr_cbufs; - return radeon_state_pm4(flush); + r600_pipe_state_add_reg(rstate, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288C0_SQ_FBUF_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288C4_SQ_REDUC_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288C8_SQ_GS_VERT_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AB0_VGT_STRMOUT_EN, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000001, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028B20_VGT_STRMOUT_BUFFER_EN, 0x00000000, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A84_VGT_PRIMITIVEID_EN, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AA4_VGT_INSTANCE_STEP_RATE_1, 0x00000000, 0xFFFFFFFF, NULL); + r600_context_pipe_state_set(&rctx->ctx, rstate); } -static int setup_db_flush(struct r600_context *rctx, struct radeon_state *flush) +void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) { - struct r600_screen *rscreen = rctx->screen; - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - struct pipe_surface *surf; - - surf = rctx->framebuffer->state.framebuffer.zsbuf; - - if (!surf) - return 0; - - radeon_state_init(flush, rscreen->rw, R600_STATE_DB_FLUSH, 0, 0); - rtex = (struct r600_resource_texture*)surf->texture; - rbuffer = &rtex->resource; - /* just need to the bo to the flush list */ - radeon_ws_bo_reference(rscreen->rw, &flush->bo[0], rbuffer->bo); - flush->placement[0] = RADEON_GEM_DOMAIN_VRAM; - - flush->nbo = 1; - return radeon_state_pm4(flush); -} - -int r600_context_hw_states(struct pipe_context *ctx) -{ - struct r600_context *rctx = r600_context(ctx); - unsigned i; - - /* build new states */ - rctx->vtbl->rasterizer(rctx, &rctx->hw_states.rasterizer); - rctx->vtbl->scissor(rctx, &rctx->hw_states.scissor); - rctx->vtbl->dsa(rctx, &rctx->hw_states.dsa); - rctx->vtbl->cb_cntl(rctx, &rctx->hw_states.cb_cntl); - - /* setup flushes */ - setup_db_flush(rctx, &rctx->hw_states.db_flush); - setup_cb_flush(rctx, &rctx->hw_states.cb_flush); - - /* bind states */ - radeon_draw_bind(&rctx->draw, &rctx->config); - - radeon_draw_bind(&rctx->draw, &rctx->hw_states.rasterizer); - radeon_draw_bind(&rctx->draw, &rctx->hw_states.scissor); - radeon_draw_bind(&rctx->draw, &rctx->hw_states.dsa); - radeon_draw_bind(&rctx->draw, &rctx->hw_states.cb_cntl); - - radeon_draw_bind(&rctx->draw, &rctx->hw_states.db_flush); - radeon_draw_bind(&rctx->draw, &rctx->hw_states.cb_flush); - - radeon_draw_bind(&rctx->draw, &rctx->hw_states.db_flush); - radeon_draw_bind(&rctx->draw, &rctx->hw_states.cb_flush); - - if (rctx->viewport) { - radeon_draw_bind(&rctx->draw, &rctx->viewport->rstate[0]); - } - if (rctx->blend) { - radeon_draw_bind(&rctx->draw, &rctx->blend->rstate[0]); - } - if (rctx->clip) { - radeon_draw_bind(&rctx->draw, &rctx->clip->rstate[0]); - } - for (i = 0; i < rctx->framebuffer->state.framebuffer.nr_cbufs; i++) { - radeon_draw_bind(&rctx->draw, &rctx->framebuffer->rstate[i+1]); - } - if (rctx->framebuffer->state.framebuffer.zsbuf) { - radeon_draw_bind(&rctx->draw, &rctx->framebuffer->rstate[0]); + struct pipe_depth_stencil_alpha_state dsa; + struct r600_pipe_state *rstate; + boolean quirk = false; + + if (rctx->family == CHIP_RV610 || rctx->family == CHIP_RV630 || + rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635) + quirk = true; + + memset(&dsa, 0, sizeof(dsa)); + + if (quirk) { + dsa.depth.enabled = 1; + dsa.depth.func = PIPE_FUNC_LEQUAL; + dsa.stencil[0].enabled = 1; + dsa.stencil[0].func = PIPE_FUNC_ALWAYS; + dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[0].zfail_op = PIPE_STENCIL_OP_INCR; + dsa.stencil[0].writemask = 0xff; } - r600_bind_shader_sampler(rctx, &rctx->vs_sampler); - r600_bind_shader_sampler(rctx, &rctx->ps_sampler); - - return 0; + rstate = rctx->context.create_depth_stencil_alpha_state(&rctx->context, &dsa); + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + 0x0, + S_02880C_DUAL_EXPORT_ENABLE(1), NULL); + r600_pipe_state_add_reg(rstate, + R_028D0C_DB_RENDER_CONTROL, + S_028D0C_DEPTH_COPY_ENABLE(1) | + S_028D0C_STENCIL_COPY_ENABLE(1) | + S_028D0C_COPY_CENTROID(1), + S_028D0C_DEPTH_COPY_ENABLE(1) | + S_028D0C_STENCIL_COPY_ENABLE(1) | + S_028D0C_COPY_CENTROID(1), NULL); + return rstate; } diff --git a/src/gallium/drivers/r600/r600_state2.c b/src/gallium/drivers/r600/r600_state2.c deleted file mode 100644 index 86c10a877dd..00000000000 --- a/src/gallium/drivers/r600/r600_state2.c +++ /dev/null @@ -1,2228 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/* TODO: - * - fix mask for depth control & cull for query - */ -#include <stdio.h> -#include <errno.h> -#include <pipe/p_defines.h> -#include <pipe/p_state.h> -#include <pipe/p_context.h> -#include <tgsi/tgsi_scan.h> -#include <tgsi/tgsi_parse.h> -#include <tgsi/tgsi_util.h> -#include <util/u_blitter.h> -#include <util/u_double_list.h> -#include <util/u_transfer.h> -#include <util/u_surface.h> -#include <util/u_pack_color.h> -#include <util/u_memory.h> -#include <util/u_inlines.h> -#include <pipebuffer/pb_buffer.h> -#include "r600.h" -#include "r600d.h" -#include "r700_sq.h" -struct radeon_state { - unsigned dummy; -}; -#include "r600_resource.h" -#include "r600_shader.h" - - -uint32_t r600_translate_texformat(enum pipe_format format, - const unsigned char *swizzle_view, - uint32_t *word4_p, uint32_t *yuv_format_p); - -#include "r600_state_inlines.h" - -enum chip_class { - R600, - R700, - EVERGREEN, -}; - -enum r600_pipe_state_id { - R600_PIPE_STATE_BLEND = 0, - R600_PIPE_STATE_BLEND_COLOR, - R600_PIPE_STATE_CONFIG, - R600_PIPE_STATE_CLIP, - R600_PIPE_STATE_SCISSOR, - R600_PIPE_STATE_VIEWPORT, - R600_PIPE_STATE_RASTERIZER, - R600_PIPE_STATE_VGT, - R600_PIPE_STATE_FRAMEBUFFER, - R600_PIPE_STATE_DSA, - R600_PIPE_STATE_STENCIL_REF, - R600_PIPE_STATE_PS_SHADER, - R600_PIPE_STATE_VS_SHADER, - R600_PIPE_STATE_CONSTANT, - R600_PIPE_STATE_SAMPLER, - R600_PIPE_STATE_RESOURCE, - R600_PIPE_NSTATES -}; - -struct r600_screen { - struct pipe_screen screen; - struct radeon *radeon; - unsigned chip_class; -}; - -struct r600_pipe_sampler_view { - struct pipe_sampler_view base; - struct r600_pipe_state state; -}; - -struct r600_pipe_rasterizer { - struct r600_pipe_state rstate; - bool flatshade; - unsigned sprite_coord_enable; -}; - -struct r600_pipe_blend { - struct r600_pipe_state rstate; - unsigned cb_target_mask; -}; - -struct r600_pipe_shader { - struct r600_shader shader; - struct r600_pipe_state rstate; - struct radeon_ws_bo *bo; -}; - -struct r600_vertex_element -{ - unsigned count; - unsigned refcount; - struct pipe_vertex_element elements[32]; -}; - -struct r600_pipe_context { - struct pipe_context context; - struct r600_screen *screen; - struct radeon *radeon; - struct blitter_context *blitter; - struct r600_pipe_state *states[R600_PIPE_NSTATES]; - struct r600_context ctx; - struct r600_vertex_element *vertex_elements; - struct pipe_framebuffer_state framebuffer; - struct pipe_index_buffer index_buffer; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - unsigned nvertex_buffer; - unsigned cb_target_mask; - /* for saving when using blitter */ - struct pipe_stencil_ref stencil_ref; - struct pipe_viewport_state viewport; - struct pipe_clip_state clip; - unsigned vs_nconst; - unsigned ps_nconst; - struct r600_pipe_state vs_const[256]; - struct r600_pipe_state ps_const[256]; - struct r600_pipe_state vs_resource[160]; - struct r600_pipe_state ps_resource[160]; - struct r600_pipe_state config; - struct r600_pipe_shader *ps_shader; - struct r600_pipe_shader *vs_shader; - /* shader information */ - bool ps_rebuild; - bool vs_rebuild; - unsigned sprite_coord_enable; - bool flatshade; -}; - -static INLINE u32 S_FIXED(float value, u32 frac_bits) -{ - return value * (1 << frac_bits); -} - -/* r600_shader.c */ -static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = &shader->rstate; - struct r600_shader *rshader = &shader->shader; - unsigned spi_vs_out_id[10]; - unsigned i, tmp; - - /* clear previous register */ - rstate->nregs = 0; - - /* so far never got proper semantic id from tgsi */ - for (i = 0; i < 10; i++) { - spi_vs_out_id[i] = 0; - } - for (i = 0; i < 32; i++) { - tmp = i << ((i & 3) * 8); - spi_vs_out_id[i / 4] |= tmp; - } - for (i = 0; i < 10; i++) { - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028614_SPI_VS_OUT_ID_0 + i * 4, - spi_vs_out_id[i], 0xFFFFFFFF, NULL); - } - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0286C4_SPI_VS_OUT_CONFIG, - S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028868_SQ_PGM_RESOURCES_VS, - S_028868_NUM_GPRS(rshader->bc.ngpr) | - S_028868_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0288A4_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0288D0_SQ_PGM_CF_OFFSET_VS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0288DC_SQ_PGM_CF_OFFSET_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028858_SQ_PGM_START_VS, - 0x00000000, 0xFFFFFFFF, shader->bo); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028894_SQ_PGM_START_FS, - 0x00000000, 0xFFFFFFFF, shader->bo); - rctx->vs_rebuild = false; -} - -static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = &shader->rstate; - struct r600_shader *rshader = &shader->shader; - unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z; - boolean have_pos = FALSE; - - /* clear previous register */ - rstate->nregs = 0; - - for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(i); - tmp |= S_028644_SEL_CENTROID(1); - if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) - have_pos = TRUE; - if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || - rshader->input[i].name == TGSI_SEMANTIC_POSITION) { - tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); - } - if (rctx->sprite_coord_enable & (1 << i)) { - tmp |= S_028644_PT_SPRITE_TEX(1); - } - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); - } - - exports_ps = 0; - num_cout = 0; - for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) - exports_ps |= 1; - else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { - exports_ps |= (1 << (num_cout+1)); - num_cout++; - } - } - if (!exports_ps) { - /* always at least export 1 component per pixel */ - exports_ps = 2; - } - - spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | - S_0286CC_PERSP_GRADIENT_ENA(1); - spi_input_z = 0; - if (have_pos) { - spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1) | - S_0286CC_BARYC_SAMPLE_CNTL(1); - spi_input_z |= 1; - } - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286D0_SPI_PS_IN_CONTROL_1, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028840_SQ_PGM_START_PS, - 0x00000000, 0xFFFFFFFF, shader->bo); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028850_SQ_PGM_RESOURCES_PS, - S_028868_NUM_GPRS(rshader->bc.ngpr) | - S_028868_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028854_SQ_PGM_EXPORTS_PS, - exports_ps, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0288CC_SQ_PGM_CF_OFFSET_PS, - 0x00000000, 0xFFFFFFFF, NULL); - rctx->ps_rebuild = false; -} - -static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_shader *rshader = &shader->shader; - void *ptr; - - /* copy new shader */ - if (shader->bo == NULL) { - shader->bo = radeon_ws_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0); - if (shader->bo == NULL) { - return -ENOMEM; - } - ptr = radeon_ws_bo_map(rctx->radeon, shader->bo, 0, NULL); - memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4); - radeon_ws_bo_unmap(rctx->radeon, shader->bo); - } - /* build state */ - rshader->flat_shade = rctx->flatshade; - switch (rshader->processor_type) { - case TGSI_PROCESSOR_VERTEX: - r600_pipe_shader_vs(ctx, shader); - break; - case TGSI_PROCESSOR_FRAGMENT: - r600_pipe_shader_ps(ctx, shader); - break; - default: - return -EINVAL; - } - r600_context_pipe_state_set(&rctx->ctx, &shader->rstate); - return 0; -} - -static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_shader *shader = &rshader->shader; - const struct util_format_description *desc; - enum pipe_format resource_format[160]; - unsigned i, nresources = 0; - struct r600_bc *bc = &shader->bc; - struct r600_bc_cf *cf; - struct r600_bc_vtx *vtx; - - if (shader->processor_type != TGSI_PROCESSOR_VERTEX) - return 0; - for (i = 0; i < rctx->vertex_elements->count; i++) { - resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; - } - radeon_ws_bo_reference(rctx->radeon, &rshader->bo, NULL); - LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { - switch (cf->inst) { - case V_SQ_CF_WORD1_SQ_CF_INST_VTX: - case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: - LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { - desc = util_format_description(resource_format[vtx->buffer_id]); - if (desc == NULL) { - R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]); - return -EINVAL; - } - vtx->dst_sel_x = desc->swizzle[0]; - vtx->dst_sel_y = desc->swizzle[1]; - vtx->dst_sel_z = desc->swizzle[2]; - vtx->dst_sel_w = desc->swizzle[3]; - } - break; - default: - break; - } - } - return r600_bc_build(&shader->bc); -} - -static int r600_pipe_shader_update2(struct pipe_context *ctx, struct r600_pipe_shader *shader) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - int r; - - if (shader == NULL) - return -EINVAL; - if (shader->bo) { - switch (shader->shader.processor_type) { - case TGSI_PROCESSOR_VERTEX: - if (!rctx->vs_rebuild) - return 0; - break; - case TGSI_PROCESSOR_FRAGMENT: - if (!rctx->ps_rebuild) - return 0; - break; - default: - return -EINVAL; - } - } - /* there should be enough input */ - if (rctx->vertex_elements->count < shader->shader.bc.nresource) { - R600_ERR("%d resources provided, expecting %d\n", - rctx->vertex_elements->count, shader->shader.bc.nresource); - return -EINVAL; - } - r = r600_shader_update(ctx, shader); - if (r) - return r; - return r600_pipe_shader(ctx, shader); -} - -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); -static int r600_pipe_shader_create2(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - int r; - -//fprintf(stderr, "--------------------------------------------------------------\n"); -//tgsi_dump(tokens, 0); - shader->shader.family = r600_get_family(rctx->radeon); - r = r600_shader_from_tgsi(tokens, &shader->shader); - if (r) { - R600_ERR("translation from TGSI failed !\n"); - return r; - } - r = r600_bc_build(&shader->shader.bc); - if (r) { - R600_ERR("building bytecode failed !\n"); - return r; - } -//fprintf(stderr, "______________________________________________________________\n"); - return 0; -} -/* r600_shader.c END */ - -static const char* r600_get_vendor(struct pipe_screen* pscreen) -{ - return "X.Org"; -} - -static const char* r600_get_name(struct pipe_screen* pscreen) -{ - struct r600_screen *rscreen = (struct r600_screen *)pscreen; - enum radeon_family family = r600_get_family(rscreen->radeon); - - if (family >= CHIP_R600 && family < CHIP_RV770) - return "R600 (HD2XXX,HD3XXX)"; - else - return "R700 (HD4XXX)"; -} - -static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) -{ - switch (param) { - /* Supported features (boolean caps). */ - case PIPE_CAP_NPOT_TEXTURES: - case PIPE_CAP_TWO_SIDED_STENCIL: - case PIPE_CAP_GLSL: - case PIPE_CAP_DUAL_SOURCE_BLEND: - case PIPE_CAP_ANISOTROPIC_FILTER: - case PIPE_CAP_POINT_SPRITE: - case PIPE_CAP_OCCLUSION_QUERY: - case PIPE_CAP_TEXTURE_SHADOW_MAP: - case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - case PIPE_CAP_TEXTURE_MIRROR_REPEAT: - case PIPE_CAP_BLEND_EQUATION_SEPARATE: - case PIPE_CAP_SM3: - case PIPE_CAP_TEXTURE_SWIZZLE: - case PIPE_CAP_INDEP_BLEND_ENABLE: - case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: - case PIPE_CAP_DEPTH_CLAMP: - return 1; - - /* Unsupported features (boolean caps). */ - case PIPE_CAP_TIMER_QUERY: - case PIPE_CAP_STREAM_OUTPUT: - case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */ - return 0; - - /* Texturing. */ - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 14; - case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - /* FIXME allow this once infrastructure is there */ - return 0; - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 16; - - /* Render targets. */ - case PIPE_CAP_MAX_RENDER_TARGETS: - /* FIXME some r6xx are buggy and can only do 4 */ - return 8; - - /* Fragment coordinate conventions. */ - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: - return 1; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: - return 0; - - default: - R600_ERR("r600: unknown param %d\n", param); - return 0; - } -} - -static float r600_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 8192.0f; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0f; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0f; - default: - R600_ERR("r600: unsupported paramf %d\n", param); - return 0.0f; - } -} - -static boolean r600_is_format_supported(struct pipe_screen* screen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned sample_count, - unsigned usage, - unsigned geom_flags) -{ - unsigned retval = 0; - if (target >= PIPE_MAX_TEXTURE_TYPES) { - R600_ERR("r600: unsupported texture type %d\n", target); - return FALSE; - } - - /* Multisample */ - if (sample_count > 1) - return FALSE; - - if ((usage & PIPE_BIND_SAMPLER_VIEW) && - r600_is_sampler_format_supported(format)) { - retval |= PIPE_BIND_SAMPLER_VIEW; - } - - if ((usage & (PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED)) && - r600_is_colorbuffer_format_supported(format)) { - retval |= usage & - (PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED); - } - - if ((usage & PIPE_BIND_DEPTH_STENCIL) && - r600_is_zs_format_supported(format)) { - retval |= PIPE_BIND_DEPTH_STENCIL; - } - - if ((usage & PIPE_BIND_VERTEX_BUFFER) && - r600_is_vertex_format_supported(format)) - retval |= PIPE_BIND_VERTEX_BUFFER; - - if (usage & PIPE_BIND_TRANSFER_READ) - retval |= PIPE_BIND_TRANSFER_READ; - if (usage & PIPE_BIND_TRANSFER_WRITE) - retval |= PIPE_BIND_TRANSFER_WRITE; - - return retval == usage; -} - -static void r600_destroy_screen(struct pipe_screen* pscreen) -{ - struct r600_screen *rscreen = (struct r600_screen *)pscreen; - - if (rscreen == NULL) - return; - FREE(rscreen); -} - -struct r600_drawl { - struct pipe_context *ctx; - unsigned mode; - unsigned start; - unsigned count; - unsigned index_size; - struct pipe_resource *index_buffer; -}; - -int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); -static void r600_draw_common(struct r600_drawl *draw) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)draw->ctx; - struct r600_pipe_state *rstate; - struct r600_resource *rbuffer; - unsigned i, j, offset, format, prim; - u32 vgt_dma_index_type, vgt_draw_initiator, mask; - struct pipe_vertex_buffer *vertex_buffer; - struct r600_draw rdraw; - struct r600_pipe_state vgt; - - switch (draw->index_size) { - case 2: - vgt_draw_initiator = 0; - vgt_dma_index_type = 0; - break; - case 4: - vgt_draw_initiator = 0; - vgt_dma_index_type = 1; - break; - case 0: - vgt_draw_initiator = 2; - vgt_dma_index_type = 0; - break; - default: - R600_ERR("unsupported index size %d\n", draw->index_size); - return; - } - if (r600_conv_pipe_prim(draw->mode, &prim)) - return; - - /* rebuild vertex shader if input format changed */ - if (r600_pipe_shader_update2(&rctx->context, rctx->vs_shader)) - return; - if (r600_pipe_shader_update2(&rctx->context, rctx->ps_shader)) - return; - - for (i = 0 ; i < rctx->vertex_elements->count; i++) { - rstate = &rctx->vs_resource[i]; - j = rctx->vertex_elements->elements[i].vertex_buffer_index; - vertex_buffer = &rctx->vertex_buffer[j]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = rctx->vertex_elements->elements[i].src_offset + vertex_buffer->buffer_offset; - format = r600_translate_colorformat(rctx->vertex_elements->elements[i].src_format); - rstate->id = R600_PIPE_STATE_RESOURCE; - rstate->nregs = 0; - - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, - R_038008_RESOURCE0_WORD2, - S_038008_STRIDE(vertex_buffer->stride) | - S_038008_DATA_FORMAT(format), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_03800C_RESOURCE0_WORD3, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038010_RESOURCE0_WORD4, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038018_RESOURCE0_WORD6, 0xC0000000, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, i); - } - - mask = 0; - for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { - mask |= (0xF << (i * 4)); - } - - vgt.id = R600_PIPE_STATE_VGT; - vgt.nregs = 0; - r600_pipe_state_add_reg(&vgt, R600_GROUP_CONFIG, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R600_GROUP_CONTEXT, R_028408_VGT_INDX_OFFSET, draw->start, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R600_GROUP_CONTEXT, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set(&rctx->ctx, &vgt); - - rdraw.vgt_num_indices = draw->count; - rdraw.vgt_num_instances = 1; - rdraw.vgt_index_type = vgt_dma_index_type; - rdraw.vgt_draw_initiator = vgt_draw_initiator; - rdraw.indices = NULL; - if (draw->index_buffer) { - rbuffer = (struct r600_resource*)draw->index_buffer; - rdraw.indices = rbuffer->bo; - } - r600_context_draw(&rctx->ctx, &rdraw); -} - -static void r600_draw_vbo2(struct pipe_context *ctx, const struct pipe_draw_info *info) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_drawl draw; - - assert(info->index_bias == 0); - - draw.ctx = ctx; - draw.mode = info->mode; - draw.start = info->start; - draw.count = info->count; - if (info->indexed && rctx->index_buffer.buffer) { - draw.index_size = rctx->index_buffer.index_size; - draw.index_buffer = rctx->index_buffer.buffer; - assert(rctx->index_buffer.offset % - rctx->index_buffer.index_size == 0); - draw.start += rctx->index_buffer.offset / - rctx->index_buffer.index_size; - } else { - draw.index_size = 0; - draw.index_buffer = NULL; - } - r600_draw_common(&draw); -} - -static void r600_flush2(struct pipe_context *ctx, unsigned flags, - struct pipe_fence_handle **fence) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; -#if 0 - static int dc = 0; - char dname[256]; -#endif - - if (!rctx->ctx.pm4_cdwords) - return; - -#if 0 - sprintf(dname, "gallium-%08d.bof", dc); - if (dc < 2) { - r600_context_dump_bof(&rctx->ctx, dname); - R600_ERR("dumped %s\n", dname); - } - dc++; -#endif - r600_context_flush(&rctx->ctx); -} - -static void r600_destroy_context(struct pipe_context *context) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)context; - - r600_context_fini(&rctx->ctx); - for (int i = 0; i < R600_PIPE_NSTATES; i++) { - free(rctx->states[i]); - } - FREE(rctx); -} - -static void r600_blitter_save_states(struct pipe_context *ctx) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - util_blitter_save_blend(rctx->blitter, rctx->states[R600_PIPE_STATE_BLEND]); - util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->states[R600_PIPE_STATE_DSA]); - if (rctx->states[R600_PIPE_STATE_STENCIL_REF]) { - util_blitter_save_stencil_ref(rctx->blitter, &rctx->stencil_ref); - } - util_blitter_save_rasterizer(rctx->blitter, rctx->states[R600_PIPE_STATE_RASTERIZER]); - util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader); - util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader); - util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_elements); - if (rctx->states[R600_PIPE_STATE_VIEWPORT]) { - util_blitter_save_viewport(rctx->blitter, &rctx->viewport); - } - if (rctx->states[R600_PIPE_STATE_CLIP]) { - util_blitter_save_clip(rctx->blitter, &rctx->clip); - } - util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer, rctx->vertex_buffer); - - rctx->vertex_elements = NULL; - - /* TODO queries */ -} - -static void r600_clear(struct pipe_context *ctx, unsigned buffers, - const float *rgba, double depth, unsigned stencil) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_framebuffer_state *fb = &rctx->framebuffer; - - r600_blitter_save_states(ctx); - util_blitter_clear(rctx->blitter, fb->width, fb->height, - fb->nr_cbufs, buffers, rgba, depth, - stencil); -} - -static void r600_clear_render_target(struct pipe_context *ctx, - struct pipe_surface *dst, - const float *rgba, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_framebuffer_state *fb = &rctx->framebuffer; - - util_blitter_save_framebuffer(rctx->blitter, fb); - util_blitter_clear_render_target(rctx->blitter, dst, rgba, - dstx, dsty, width, height); -} - -static void r600_clear_depth_stencil(struct pipe_context *ctx, - struct pipe_surface *dst, - unsigned clear_flags, - double depth, - unsigned stencil, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_framebuffer_state *fb = &rctx->framebuffer; - - util_blitter_save_framebuffer(rctx->blitter, fb); - util_blitter_clear_depth_stencil(rctx->blitter, dst, clear_flags, depth, stencil, - dstx, dsty, width, height); -} - - -static void r600_resource_copy_region(struct pipe_context *ctx, - struct pipe_resource *dst, - struct pipe_subresource subdst, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, - struct pipe_subresource subsrc, - unsigned srcx, unsigned srcy, unsigned srcz, - unsigned width, unsigned height) -{ - util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz, - src, subsrc, srcx, srcy, srcz, width, height); -} - -static void r600_init_blit_functions2(struct r600_pipe_context *rctx) -{ - rctx->context.clear = r600_clear; - rctx->context.clear_render_target = r600_clear_render_target; - rctx->context.clear_depth_stencil = r600_clear_depth_stencil; - rctx->context.resource_copy_region = r600_resource_copy_region; -} - -static void r600_init_context_resource_functions2(struct r600_pipe_context *r600) -{ - r600->context.get_transfer = u_get_transfer_vtbl; - r600->context.transfer_map = u_transfer_map_vtbl; - r600->context.transfer_flush_region = u_transfer_flush_region_vtbl; - r600->context.transfer_unmap = u_transfer_unmap_vtbl; - r600->context.transfer_destroy = u_transfer_destroy_vtbl; - r600->context.transfer_inline_write = u_transfer_inline_write_vtbl; - r600->context.is_resource_referenced = u_is_resource_referenced_vtbl; -} - -static void r600_set_blend_color(struct pipe_context *ctx, - const struct pipe_blend_color *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - - if (rstate == NULL) - return; - - rstate->id = R600_PIPE_STATE_BLEND_COLOR; - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL); - free(rctx->states[R600_PIPE_STATE_BLEND_COLOR]); - rctx->states[R600_PIPE_STATE_BLEND_COLOR] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void *r600_create_blend_state(struct pipe_context *ctx, - const struct pipe_blend_state *state) -{ - struct r600_pipe_blend *blend = CALLOC_STRUCT(r600_pipe_blend); - struct r600_pipe_state *rstate; - u32 color_control, target_mask; - - if (blend == NULL) { - return NULL; - } - rstate = &blend->rstate; - - rstate->id = R600_PIPE_STATE_BLEND; - - target_mask = 0; - color_control = S_028808_PER_MRT_BLEND(1); - if (state->logicop_enable) { - color_control |= (state->logicop_func << 16) | (state->logicop_func << 20); - } else { - color_control |= (0xcc << 16); - } - /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */ - if (state->independent_blend_enable) { - for (int i = 0; i < 8; i++) { - if (state->rt[i].blend_enable) { - color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); - } - target_mask |= (state->rt[i].colormask << (4 * i)); - } - } else { - for (int i = 0; i < 8; i++) { - if (state->rt[0].blend_enable) { - color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); - } - target_mask |= (state->rt[0].colormask << (4 * i)); - } - } - blend->cb_target_mask = target_mask; - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028808_CB_COLOR_CONTROL, - color_control, 0xFFFFFFFF, NULL); - - for (int i = 0; i < 8; i++) { - unsigned eqRGB = state->rt[i].rgb_func; - unsigned srcRGB = state->rt[i].rgb_src_factor; - unsigned dstRGB = state->rt[i].rgb_dst_factor; - - unsigned eqA = state->rt[i].alpha_func; - unsigned srcA = state->rt[i].alpha_src_factor; - unsigned dstA = state->rt[i].alpha_dst_factor; - uint32_t bc = 0; - - if (!state->rt[i].blend_enable) - continue; - - bc |= S_028804_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB)); - bc |= S_028804_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB)); - bc |= S_028804_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB)); - - if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { - bc |= S_028804_SEPARATE_ALPHA_BLEND(1); - bc |= S_028804_ALPHA_COMB_FCN(r600_translate_blend_function(eqA)); - bc |= S_028804_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA)); - bc |= S_028804_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA)); - } - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL); - if (i == 0) { - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL); - } - } - return rstate; -} - -static void r600_bind_blend_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_blend *blend = (struct r600_pipe_blend *)state; - struct r600_pipe_state *rstate; - - if (state == NULL) - return; - rstate = &blend->rstate; - rctx->states[rstate->id] = rstate; - rctx->cb_target_mask = blend->cb_target_mask; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void *r600_create_dsa_state(struct pipe_context *ctx, - const struct pipe_depth_stencil_alpha_state *state) -{ - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control; - unsigned stencil_ref_mask, stencil_ref_mask_bf, db_render_override, db_render_control; - - if (rstate == NULL) { - return NULL; - } - - rstate->id = R600_PIPE_STATE_DSA; - /* depth TODO some of those db_shader_control field depend on shader adjust mask & add it to shader */ - /* db_shader_control is 0xFFFFFFBE as Z_EXPORT_ENABLE (bit 0) will be - * set by fragment shader if it export Z and KILL_ENABLE (bit 6) will - * be set if shader use texkill instruction - */ - db_shader_control = 0x210; - stencil_ref_mask = 0; - stencil_ref_mask_bf = 0; - db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | - S_028800_Z_WRITE_ENABLE(state->depth.writemask) | - S_028800_ZFUNC(state->depth.func); - - /* stencil */ - if (state->stencil[0].enabled) { - db_depth_control |= S_028800_STENCIL_ENABLE(1); - db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)); - db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); - db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); - db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); - - - stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) | - S_028430_STENCILWRITEMASK(state->stencil[0].writemask); - if (state->stencil[1].enabled) { - db_depth_control |= S_028800_BACKFACE_ENABLE(1); - db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)); - db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); - db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); - db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); - stencil_ref_mask_bf = S_028434_STENCILMASK_BF(state->stencil[1].valuemask) | - S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask); - } - } - - /* alpha */ - alpha_test_control = 0; - alpha_ref = 0; - if (state->alpha.enabled) { - alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func); - alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); - alpha_ref = fui(state->alpha.ref_value); - } - - /* misc */ - db_render_control = 0; - db_render_override = S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE) | - S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | - S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); - /* TODO db_render_override depends on query */ - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028430_DB_STENCILREFMASK, stencil_ref_mask, - 0xFFFFFFFF & C_028430_STENCILREF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028434_DB_STENCILREFMASK_BF, stencil_ref_mask_bf, - 0xFFFFFFFF & C_028434_STENCILREF_BF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286E0_SPI_FOG_FUNC_SCALE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D30_DB_PRELOAD_CONTROL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D44_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL); - - return rstate; -} - -static void *r600_create_rs_state(struct pipe_context *ctx, - const struct pipe_rasterizer_state *state) -{ - struct r600_pipe_rasterizer *rs = CALLOC_STRUCT(r600_pipe_rasterizer); - struct r600_pipe_state *rstate; - float offset_units = 0, offset_scale = 0; - unsigned offset_db_fmt_cntl = 0; - unsigned tmp; - unsigned prov_vtx = 1; - - if (rs == NULL) { - return NULL; - } - - rstate = &rs->rstate; - rs->flatshade = state->flatshade; - rs->sprite_coord_enable = state->sprite_coord_enable; - - rstate->id = R600_PIPE_STATE_RASTERIZER; - if (state->flatshade_first) - prov_vtx = 0; - tmp = 0x00000001; - if (state->sprite_coord_enable) { - tmp |= S_0286D4_PNT_SPRITE_ENA(1) | - S_0286D4_PNT_SPRITE_OVRD_X(2) | - S_0286D4_PNT_SPRITE_OVRD_Y(3) | - S_0286D4_PNT_SPRITE_OVRD_Z(0) | - S_0286D4_PNT_SPRITE_OVRD_W(1); - if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { - tmp |= S_0286D4_PNT_SPRITE_TOP_1(1); - } - } - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028814_PA_SU_SC_MODE_CNTL, - S_028814_PROVOKING_VTX_LAST(prov_vtx) | - S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | - S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | - S_028814_FACE(!state->front_ccw) | - S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02881C_PA_CL_VS_OUT_CNTL, - S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | - S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - /* point size 12.4 fixed point */ - tmp = (unsigned)(state->point_size * 8.0); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A08_PA_SU_LINE_CNTL, 0x00000008, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A0C_PA_SC_LINE_STIPPLE, 0x00000005, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A48_PA_SC_MPASS_PS_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL, offset_db_fmt_cntl, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, fui(offset_scale), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE, fui(offset_scale), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units), 0xFFFFFFFF, NULL); - return rstate; -} - -static void r600_bind_rs_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - if (state == NULL) - return; - - if (rctx->flatshade != rs->flatshade) { - rctx->ps_rebuild = true; - } - if (rctx->sprite_coord_enable != rs->sprite_coord_enable) { - rctx->ps_rebuild = true; - } - rctx->flatshade = rs->flatshade; - rctx->sprite_coord_enable = rs->sprite_coord_enable; - - rctx->states[rs->rstate.id] = &rs->rstate; - r600_context_pipe_state_set(&rctx->ctx, &rs->rstate); -} - -static void r600_delete_rs_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; - - if (rctx->states[rs->rstate.id] == &rs->rstate) { - rctx->states[rs->rstate.id] = NULL; - } - free(rs); -} - -static void *r600_create_sampler_state(struct pipe_context *ctx, - const struct pipe_sampler_state *state) -{ - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - union util_color uc; - - if (rstate == NULL) { - return NULL; - } - - rstate->id = R600_PIPE_STATE_SAMPLER; - util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - r600_pipe_state_add_reg(rstate, R600_GROUP_SAMPLER, R_03C000_SQ_TEX_SAMPLER_WORD0_0, - S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | - S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | - S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) | - S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter)) | - S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter)) | - S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | - S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | - S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL); - /* FIXME LOD it depends on texture base level ... */ - r600_pipe_state_add_reg(rstate, R600_GROUP_SAMPLER, R_03C004_SQ_TEX_SAMPLER_WORD1_0, - S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | - S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | - S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_SAMPLER, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL); - if (uc.ui) { - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL); - } - return rstate; -} - -static void *r600_create_vertex_elements(struct pipe_context *ctx, - unsigned count, - const struct pipe_vertex_element *elements) -{ - struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); - - assert(count < 32); - v->count = count; - v->refcount = 1; - memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); - return v; -} - -static void r600_sampler_view_destroy(struct pipe_context *ctx, - struct pipe_sampler_view *state) -{ - struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state; - - pipe_resource_reference(&state->texture, NULL); - FREE(resource); -} - -static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *ctx, - struct pipe_resource *texture, - const struct pipe_sampler_view *state) -{ - struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view); - struct r600_pipe_state *rstate; - const struct util_format_description *desc; - struct r600_resource_texture *tmp; - struct r600_resource *rbuffer; - unsigned format; - uint32_t word4 = 0, yuv_format = 0, pitch = 0; - unsigned char swizzle[4], array_mode = 0, tile_type = 0; - struct radeon_ws_bo *bo[2]; - - if (resource == NULL) - return NULL; - rstate = &resource->state; - - /* initialize base object */ - resource->base = *state; - resource->base.texture = NULL; - pipe_reference(NULL, &texture->reference); - resource->base.texture = texture; - resource->base.reference.count = 1; - resource->base.context = ctx; - - swizzle[0] = state->swizzle_r; - swizzle[1] = state->swizzle_g; - swizzle[2] = state->swizzle_b; - swizzle[3] = state->swizzle_a; - format = r600_translate_texformat(texture->format, - swizzle, - &word4, &yuv_format); - if (format == ~0) { - format = 0; - } - desc = util_format_description(texture->format); - if (desc == NULL) { - R600_ERR("unknow format %d\n", texture->format); - } - tmp = (struct r600_resource_texture*)texture; - rbuffer = &tmp->resource; - bo[0] = rbuffer->bo; - bo[1] = rbuffer->bo; - /* FIXME depth texture decompression */ - if (tmp->depth) { -#if 0 - r = r600_texture_from_depth(ctx, tmp, view->first_level); - if (r) { - return; - } - bo[0] = radeon_ws_bo_incref(rscreen->rw, tmp->uncompressed); - bo[1] = radeon_ws_bo_incref(rscreen->rw, tmp->uncompressed); -#endif - } - pitch = (tmp->pitch[0] / tmp->bpt); - pitch = (pitch + 0x7) & ~0x7; - - /* FIXME properly handle first level != 0 */ - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038000_RESOURCE0_WORD0, - S_038000_DIM(r600_tex_dim(texture->target)) | - S_038000_TILE_MODE(array_mode) | - S_038000_TILE_TYPE(tile_type) | - S_038000_PITCH((pitch / 8) - 1) | - S_038000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038004_RESOURCE0_WORD1, - S_038004_TEX_HEIGHT(texture->height0 - 1) | - S_038004_TEX_DEPTH(texture->depth0 - 1) | - S_038004_DATA_FORMAT(format), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038008_RESOURCE0_WORD2, - tmp->offset[0] >> 8, 0xFFFFFFFF, bo[0]); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_03800C_RESOURCE0_WORD3, - tmp->offset[1] >> 8, 0xFFFFFFFF, bo[1]); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038010_RESOURCE0_WORD4, - word4 | S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) | - S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) | - S_038010_REQUEST_SIZE(1) | - S_038010_BASE_LEVEL(state->first_level), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038014_RESOURCE0_WORD5, - S_038014_LAST_LEVEL(state->last_level) | - S_038014_BASE_ARRAY(0) | - S_038014_LAST_ARRAY(0), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038018_RESOURCE0_WORD6, - S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE), 0xFFFFFFFF, NULL); - - return &resource->base; -} - -static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, - struct pipe_sampler_view **views) -{ - /* TODO */ - assert(1); -} - -static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, - struct pipe_sampler_view **views) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; - - for (int i = 0; i < count; i++) { - if (resource[i]) { - r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); - } - } -} - -static void r600_bind_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; - - if (state == NULL) - return; - rctx->states[rstate->id] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void r600_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void **states) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; - - for (int i = 0; i < count; i++) { - r600_context_pipe_state_set_ps_sampler(&rctx->ctx, rstates[i], i); - } -} - -static void r600_bind_vs_sampler(struct pipe_context *ctx, unsigned count, void **states) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; - - /* TODO implement */ - for (int i = 0; i < count; i++) { - r600_context_pipe_state_set_vs_sampler(&rctx->ctx, rstates[i], i); - } -} - -static void r600_delete_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; - - if (rctx->states[rstate->id] == rstate) { - rctx->states[rstate->id] = NULL; - } - for (int i = 0; i < rstate->nregs; i++) { - radeon_ws_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL); - } - free(rstate); -} - -static void r600_delete_vertex_element(struct pipe_context *ctx, void *state) -{ - struct r600_vertex_element *v = (struct r600_vertex_element*)state; - - if (v == NULL) - return; - if (--v->refcount) - return; - free(v); -} - -static void r600_set_clip_state(struct pipe_context *ctx, - const struct pipe_clip_state *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - - if (rstate == NULL) - return; - - rctx->clip = *state; - rstate->id = R600_PIPE_STATE_CLIP; - for (int i = 0; i < state->nr; i++) { - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028E20_PA_CL_UCP0_X + i * 4, - fui(state->ucp[i][0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028E24_PA_CL_UCP0_Y + i * 4, - fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028E28_PA_CL_UCP0_Z + i * 4, - fui(state->ucp[i][2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028E2C_PA_CL_UCP0_W + i * 4, - fui(state->ucp[i][3]), 0xFFFFFFFF, NULL); - } - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028810_PA_CL_CLIP_CNTL, - S_028810_PS_UCP_MODE(3) | ((1 << state->nr) - 1) | - S_028810_ZCLIP_NEAR_DISABLE(state->depth_clamp) | - S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL); - - free(rctx->states[R600_PIPE_STATE_CLIP]); - rctx->states[R600_PIPE_STATE_CLIP] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_vertex_element *v = (struct r600_vertex_element*)state; - - r600_delete_vertex_element(ctx, rctx->vertex_elements); - rctx->vertex_elements = v; - if (v) { - v->refcount++; - rctx->vs_rebuild = true; - } -} - -static void r600_set_polygon_stipple(struct pipe_context *ctx, - const struct pipe_poly_stipple *state) -{ -} - -static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) -{ -} - -static void r600_set_scissor_state(struct pipe_context *ctx, - const struct pipe_scissor_state *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - u32 tl, br; - - if (rstate == NULL) - return; - - rstate->id = R600_PIPE_STATE_SCISSOR; - tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1); - br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028034_PA_SC_SCREEN_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028208_PA_SC_WINDOW_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028210_PA_SC_CLIPRECT_0_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028214_PA_SC_CLIPRECT_0_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028218_PA_SC_CLIPRECT_1_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_02821C_PA_SC_CLIPRECT_1_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028220_PA_SC_CLIPRECT_2_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028224_PA_SC_CLIPRECT_2_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028228_PA_SC_CLIPRECT_3_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_02822C_PA_SC_CLIPRECT_3_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_02820C_PA_SC_CLIPRECT_RULE, 0x0000FFFF, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, - 0xFFFFFFFF, NULL); - - free(rctx->states[R600_PIPE_STATE_SCISSOR]); - rctx->states[R600_PIPE_STATE_SCISSOR] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void r600_set_stencil_ref(struct pipe_context *ctx, - const struct pipe_stencil_ref *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - u32 tmp; - - if (rstate == NULL) - return; - - rctx->stencil_ref = *state; - rstate->id = R600_PIPE_STATE_STENCIL_REF; - tmp = S_028430_STENCILREF(state->ref_value[0]); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028430_DB_STENCILREFMASK, tmp, - ~C_028430_STENCILREF, NULL); - tmp = S_028434_STENCILREF_BF(state->ref_value[1]); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028434_DB_STENCILREFMASK_BF, tmp, - ~C_028434_STENCILREF_BF, NULL); - - free(rctx->states[R600_PIPE_STATE_STENCIL_REF]); - rctx->states[R600_PIPE_STATE_STENCIL_REF] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void r600_set_viewport_state(struct pipe_context *ctx, - const struct pipe_viewport_state *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - - if (rstate == NULL) - return; - - rctx->viewport = *state; - rstate->id = R600_PIPE_STATE_VIEWPORT; - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL); - - free(rctx->states[R600_PIPE_STATE_VIEWPORT]); - rctx->states[R600_PIPE_STATE_VIEWPORT] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate, - const struct pipe_framebuffer_state *state, int cb) -{ - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - unsigned level = state->cbufs[cb]->level; - unsigned pitch, slice; - unsigned color_info; - unsigned format, swap, ntype; - const struct util_format_description *desc; - struct radeon_ws_bo *bo[3]; - - rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; - rbuffer = &rtex->resource; - bo[0] = rbuffer->bo; - bo[1] = rbuffer->bo; - bo[2] = rbuffer->bo; - - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1; - ntype = 0; - desc = util_format_description(rtex->resource.base.b.format); - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) - ntype = V_0280A0_NUMBER_SRGB; - - format = r600_translate_colorformat(rtex->resource.base.b.format); - swap = r600_translate_colorswap(rtex->resource.base.b.format); - color_info = S_0280A0_FORMAT(format) | - S_0280A0_COMP_SWAP(swap) | - S_0280A0_BLEND_CLAMP(1) | - S_0280A0_SOURCE_FORMAT(1) | - S_0280A0_NUMBER_TYPE(ntype); - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028040_CB_COLOR0_BASE + cb * 4, - state->cbufs[cb]->offset >> 8, 0xFFFFFFFF, bo[0]); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0280A0_CB_COLOR0_INFO + cb * 4, - color_info, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028060_CB_COLOR0_SIZE + cb * 4, - S_028060_PITCH_TILE_MAX(pitch) | - S_028060_SLICE_TILE_MAX(slice), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028080_CB_COLOR0_VIEW + cb * 4, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0280E0_CB_COLOR0_FRAG + cb * 4, - 0x00000000, 0xFFFFFFFF, bo[1]); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_0280C0_CB_COLOR0_TILE + cb * 4, - 0x00000000, 0xFFFFFFFF, bo[2]); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028100_CB_COLOR0_MASK + cb * 4, - 0x00000000, 0xFFFFFFFF, NULL); -} - -static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate, - const struct pipe_framebuffer_state *state) -{ - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - unsigned level; - unsigned pitch, slice, format; - - if (state->zsbuf == NULL) - return; - - rtex = (struct r600_resource_texture*)state->zsbuf->texture; - rtex->tilled = 1; - rtex->array_mode = 2; - rtex->tile_type = 1; - rtex->depth = 1; - rbuffer = &rtex->resource; - - level = state->zsbuf->level; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; - format = r600_translate_dbformat(state->zsbuf->texture->format); - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02800C_DB_DEPTH_BASE, - state->zsbuf->offset >> 8, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028000_DB_DEPTH_SIZE, - S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028004_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028010_DB_DEPTH_INFO, - S_028010_ARRAY_MODE(rtex->array_mode) | S_028010_FORMAT(format), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028D34_DB_PREFETCH_LIMIT, - (state->zsbuf->height / 8) - 1, 0xFFFFFFFF, NULL); -} - -static void r600_set_framebuffer_state(struct pipe_context *ctx, - const struct pipe_framebuffer_state *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); - u32 shader_mask, tl, br, shader_control, target_mask; - - if (rstate == NULL) - return; - - /* unreference old buffer and reference new one */ - rstate->id = R600_PIPE_STATE_FRAMEBUFFER; - for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { - pipe_surface_reference(&rctx->framebuffer.cbufs[i], state->cbufs[i]); - } - pipe_surface_reference(&rctx->framebuffer.zsbuf, state->zsbuf); - rctx->framebuffer = *state; - - /* build states */ - for (int i = 0; i < state->nr_cbufs; i++) { - r600_cb(rctx, rstate, state, i); - } - if (state->zsbuf) { - r600_db(rctx, rstate, state); - } - - target_mask = 0x00000000; - target_mask = 0xFFFFFFFF; - shader_mask = 0; - shader_control = 0; - for (int i = 0; i < state->nr_cbufs; i++) { - target_mask ^= 0xf << (i * 4); - shader_mask |= 0xf << (i * 4); - shader_control |= 1 << i; - } - tl = S_028240_TL_X(0) | S_028240_TL_Y(0) | S_028240_WINDOW_OFFSET_DISABLE(1); - br = S_028244_BR_X(state->width) | S_028244_BR_Y(state->height); - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028240_PA_SC_GENERIC_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028244_PA_SC_GENERIC_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, - R_028254_PA_SC_VPORT_SCISSOR_0_BR, br, - 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0287A0_CB_SHADER_CONTROL, - shader_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028238_CB_TARGET_MASK, - 0x00000000, target_mask, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02823C_CB_SHADER_MASK, - shader_mask, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C04_PA_SC_AA_CONFIG, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C30_CB_CLRCMP_CONTROL, - 0x01000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C34_CB_CLRCMP_SRC, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C38_CB_CLRCMP_DST, - 0x000000FF, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C3C_CB_CLRCMP_MSK, - 0xFFFFFFFF, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028C48_PA_SC_AA_MASK, - 0xFFFFFFFF, 0xFFFFFFFF, NULL); - - free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]); - rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static void r600_set_index_buffer(struct pipe_context *ctx, - const struct pipe_index_buffer *ib) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - if (ib) { - pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer); - memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer)); - } else { - pipe_resource_reference(&rctx->index_buffer.buffer, NULL); - memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer)); - } - - /* TODO make this more like a state */ -} - -static void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, - const struct pipe_vertex_buffer *buffers) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - for (int i = 0; i < rctx->nvertex_buffer; i++) { - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); - } - memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); - for (int i = 0; i < count; i++) { - rctx->vertex_buffer[i].buffer = NULL; - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); - } - rctx->nvertex_buffer = count; -} - -static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, - struct pipe_resource *buffer) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate; - struct pipe_transfer *transfer; - unsigned *nconst = NULL; - u32 *ptr, offset; - - switch (shader) { - case PIPE_SHADER_VERTEX: - rstate = rctx->vs_const; - nconst = &rctx->vs_nconst; - offset = R_030000_SQ_ALU_CONSTANT0_0 + 0x1000; - break; - case PIPE_SHADER_FRAGMENT: - rstate = rctx->ps_const; - nconst = &rctx->ps_nconst; - offset = R_030000_SQ_ALU_CONSTANT0_0; - break; - default: - R600_ERR("unsupported %d\n", shader); - return; - } - if (buffer && buffer->width0 > 0) { - *nconst = buffer->width0 / 16; - ptr = pipe_buffer_map(ctx, buffer, PIPE_TRANSFER_READ, &transfer); - if (ptr == NULL) - return; - for (int i = 0; i < *nconst; i++, offset += 0x10) { - rstate[i].nregs = 0; - r600_pipe_state_add_reg(&rstate[i], R600_GROUP_ALU_CONST, offset + 0x0, ptr[i * 4 + 0], 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rstate[i], R600_GROUP_ALU_CONST, offset + 0x4, ptr[i * 4 + 1], 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rstate[i], R600_GROUP_ALU_CONST, offset + 0x8, ptr[i * 4 + 2], 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rstate[i], R600_GROUP_ALU_CONST, offset + 0xC, ptr[i * 4 + 3], 0xFFFFFFFF, NULL); - r600_context_pipe_state_set(&rctx->ctx, &rstate[i]); - } - pipe_buffer_unmap(ctx, buffer, transfer); - } -} - -static void *r600_create_shader_state(struct pipe_context *ctx, - const struct pipe_shader_state *state) -{ - struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader); - int r; - - r = r600_pipe_shader_create2(ctx, shader, state->tokens); - if (r) { - return NULL; - } - return shader; -} - -static void r600_bind_ps_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - /* TODO delete old shader */ - rctx->ps_shader = (struct r600_pipe_shader *)state; -} - -static void r600_bind_vs_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - /* TODO delete old shader */ - rctx->vs_shader = (struct r600_pipe_shader *)state; -} - -static void r600_delete_ps_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; - - if (rctx->ps_shader == shader) { - rctx->ps_shader = NULL; - } - /* TODO proper delete */ - free(shader); -} - -static void r600_delete_vs_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; - - if (rctx->vs_shader == shader) { - rctx->vs_shader = NULL; - } - /* TODO proper delete */ - free(shader); -} - -static void r600_init_state_functions2(struct r600_pipe_context *rctx) -{ - rctx->context.create_blend_state = r600_create_blend_state; - rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state; - rctx->context.create_fs_state = r600_create_shader_state; - rctx->context.create_rasterizer_state = r600_create_rs_state; - rctx->context.create_sampler_state = r600_create_sampler_state; - rctx->context.create_sampler_view = r600_create_sampler_view; - rctx->context.create_vertex_elements_state = r600_create_vertex_elements; - rctx->context.create_vs_state = r600_create_shader_state; - rctx->context.bind_blend_state = r600_bind_blend_state; - rctx->context.bind_depth_stencil_alpha_state = r600_bind_state; - rctx->context.bind_fragment_sampler_states = r600_bind_ps_sampler; - rctx->context.bind_fs_state = r600_bind_ps_shader; - rctx->context.bind_rasterizer_state = r600_bind_rs_state; - rctx->context.bind_vertex_elements_state = r600_bind_vertex_elements; - rctx->context.bind_vertex_sampler_states = r600_bind_vs_sampler; - rctx->context.bind_vs_state = r600_bind_vs_shader; - rctx->context.delete_blend_state = r600_delete_state; - rctx->context.delete_depth_stencil_alpha_state = r600_delete_state; - rctx->context.delete_fs_state = r600_delete_ps_shader; - rctx->context.delete_rasterizer_state = r600_delete_rs_state; - rctx->context.delete_sampler_state = r600_delete_state; - rctx->context.delete_vertex_elements_state = r600_delete_vertex_element; - rctx->context.delete_vs_state = r600_delete_vs_shader; - rctx->context.set_blend_color = r600_set_blend_color; - rctx->context.set_clip_state = r600_set_clip_state; - rctx->context.set_constant_buffer = r600_set_constant_buffer; - rctx->context.set_fragment_sampler_views = r600_set_ps_sampler_view; - rctx->context.set_framebuffer_state = r600_set_framebuffer_state; - rctx->context.set_polygon_stipple = r600_set_polygon_stipple; - rctx->context.set_sample_mask = r600_set_sample_mask; - rctx->context.set_scissor_state = r600_set_scissor_state; - rctx->context.set_stencil_ref = r600_set_stencil_ref; - rctx->context.set_vertex_buffers = r600_set_vertex_buffers; - rctx->context.set_index_buffer = r600_set_index_buffer; - rctx->context.set_vertex_sampler_views = r600_set_vs_sampler_view; - rctx->context.set_viewport_state = r600_set_viewport_state; - rctx->context.sampler_view_destroy = r600_sampler_view_destroy; -} - -static void r600_init_config2(struct r600_pipe_context *rctx) -{ - int ps_prio; - int vs_prio; - int gs_prio; - int es_prio; - int num_ps_gprs; - int num_vs_gprs; - int num_gs_gprs; - int num_es_gprs; - int num_temp_gprs; - int num_ps_threads; - int num_vs_threads; - int num_gs_threads; - int num_es_threads; - int num_ps_stack_entries; - int num_vs_stack_entries; - int num_gs_stack_entries; - int num_es_stack_entries; - enum radeon_family family; - struct r600_pipe_state *rstate = &rctx->config; - u32 tmp; - - family = r600_get_family(rctx->radeon); - ps_prio = 0; - vs_prio = 1; - gs_prio = 2; - es_prio = 3; - switch (family) { - case CHIP_R600: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV630: - case CHIP_RV635: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 144; - num_vs_threads = 40; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV610: - case CHIP_RV620: - case CHIP_RS780: - case CHIP_RS880: - default: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV670: - num_ps_gprs = 144; - num_vs_gprs = 40; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV770: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 188; - num_vs_threads = 60; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 256; - num_vs_stack_entries = 256; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV730: - case CHIP_RV740: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 188; - num_vs_threads = 60; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV710: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 144; - num_vs_threads = 48; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - } - - rstate->id = R600_PIPE_STATE_CONFIG; - - /* SQ_CONFIG */ - tmp = 0; - switch (family) { - case CHIP_RV610: - case CHIP_RV620: - case CHIP_RS780: - case CHIP_RS880: - case CHIP_RV710: - break; - default: - tmp |= S_008C00_VC_ENABLE(1); - break; - } - tmp |= S_008C00_DX9_CONSTS(1); - tmp |= S_008C00_ALU_INST_PREFER_VECTOR(1); - tmp |= S_008C00_PS_PRIO(ps_prio); - tmp |= S_008C00_VS_PRIO(vs_prio); - tmp |= S_008C00_GS_PRIO(gs_prio); - tmp |= S_008C00_ES_PRIO(es_prio); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); - - /* SQ_GPR_RESOURCE_MGMT_1 */ - tmp = 0; - tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); - tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); - tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); - - /* SQ_GPR_RESOURCE_MGMT_2 */ - tmp = 0; - tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs); - tmp |= S_008C08_NUM_GS_GPRS(num_es_gprs); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); - - /* SQ_THREAD_RESOURCE_MGMT */ - tmp = 0; - tmp |= S_008C0C_NUM_PS_THREADS(num_ps_threads); - tmp |= S_008C0C_NUM_VS_THREADS(num_vs_threads); - tmp |= S_008C0C_NUM_GS_THREADS(num_gs_threads); - tmp |= S_008C0C_NUM_ES_THREADS(num_es_threads); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C0C_SQ_THREAD_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL); - - /* SQ_STACK_RESOURCE_MGMT_1 */ - tmp = 0; - tmp |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); - tmp |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C10_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); - - /* SQ_STACK_RESOURCE_MGMT_2 */ - tmp = 0; - tmp |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); - tmp |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008C14_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009714_VC_ENHANCE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028350_SX_MISC, 0x00000000, 0xFFFFFFFF, NULL); - - if (family >= CHIP_RV770) { - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009508_TA_CNTL_AUX, 0x07000002, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A4C_PA_SC_MODE_CNTL, 0x00514000, 0xFFFFFFFF, NULL); - } else { - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009508_TA_CNTL_AUX, 0x07000003, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONFIG, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A4C_PA_SC_MODE_CNTL, 0x00004010, 0xFFFFFFFF, NULL); - } - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288C0_SQ_FBUF_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288C4_SQ_REDUC_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288C8_SQ_GS_VERT_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A14_VGT_HOS_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A20_VGT_HOS_REUSE_DEPTH, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A24_VGT_GROUP_PRIM_TYPE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A28_VGT_GROUP_FIRST_DECR, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A2C_VGT_GROUP_DECR, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A40_VGT_GS_MODE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028AB0_VGT_STRMOUT_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028AB4_VGT_REUSE_OFF, 0x00000001, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028AB8_VGT_VTX_CNT_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028B20_VGT_STRMOUT_BUFFER_EN, 0x00000000, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028400_VGT_MAX_VTX_INDX, 0x00FFFFFF, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028404_VGT_MIN_VTX_INDX, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A84_VGT_PRIMITIVEID_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028AA4_VGT_INSTANCE_STEP_RATE_1, 0x00000000, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - -static struct pipe_context *r600_create_context2(struct pipe_screen *screen, void *priv) -{ - struct r600_pipe_context *rctx = CALLOC_STRUCT(r600_pipe_context); - struct r600_screen* rscreen = (struct r600_screen *)screen; - - if (rctx == NULL) - return NULL; - rctx->context.winsys = rscreen->screen.winsys; - rctx->context.screen = screen; - rctx->context.priv = priv; - rctx->context.destroy = r600_destroy_context; - rctx->context.draw_vbo = r600_draw_vbo2; - rctx->context.flush = r600_flush2; - - /* Easy accessing of screen/winsys. */ - rctx->screen = rscreen; - rctx->radeon = rscreen->radeon; - - r600_init_blit_functions2(rctx); - r600_init_state_functions2(rctx); - r600_init_context_resource_functions2(rctx); - - rctx->blitter = util_blitter_create(&rctx->context); - if (rctx->blitter == NULL) { - FREE(rctx); - return NULL; - } - - if (r600_context_init(&rctx->ctx, rctx->radeon)) { - r600_destroy_context(&rctx->context); - return NULL; - } - - r600_init_config2(rctx); - - return &rctx->context; -} - -static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param) -{ - switch(shader) - { - case PIPE_SHADER_FRAGMENT: - case PIPE_SHADER_VERTEX: - break; - case PIPE_SHADER_GEOMETRY: - /* TODO: support and enable geometry programs */ - return 0; - default: - /* TODO: support tessellation on Evergreen */ - return 0; - } - - /* TODO: all these should be fixed, since r600 surely supports much more! */ - switch (param) { - case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: - return 16384; - case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: - return 8; /* FIXME */ - case PIPE_SHADER_CAP_MAX_INPUTS: - if(shader == PIPE_SHADER_FRAGMENT) - return 10; - else - return 16; - case PIPE_SHADER_CAP_MAX_TEMPS: - return 256; //max native temporaries - case PIPE_SHADER_CAP_MAX_ADDRS: - return 1; //max native address registers/* FIXME Isn't this equal to TEMPS? */ - case PIPE_SHADER_CAP_MAX_CONSTS: - return 256; //max native parameters - case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: - return 1; - case PIPE_SHADER_CAP_MAX_PREDS: - return 0; /* FIXME */ - case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: - /* TODO: support this! */ - return 0; - default: - return 0; - } -} - -void r600_init_screen_texture_functions(struct pipe_screen *screen); -struct pipe_screen *r600_screen_create2(struct radeon *radeon) -{ - struct r600_screen *rscreen; - enum radeon_family family = r600_get_family(radeon); - - rscreen = CALLOC_STRUCT(r600_screen); - if (rscreen == NULL) { - return NULL; - } - - switch (family) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - rscreen->chip_class = R600; - break; - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: - rscreen->chip_class = R700; - break; - default: - FREE(rscreen); - return NULL; - } - rscreen->radeon = radeon; - rscreen->screen.winsys = (struct pipe_winsys*)radeon; - rscreen->screen.destroy = r600_destroy_screen; - rscreen->screen.get_name = r600_get_name; - rscreen->screen.get_vendor = r600_get_vendor; - rscreen->screen.get_param = r600_get_param; - rscreen->screen.get_shader_param = r600_get_shader_param; - rscreen->screen.get_paramf = r600_get_paramf; - rscreen->screen.is_format_supported = r600_is_format_supported; - rscreen->screen.context_create = r600_create_context2; - r600_init_screen_texture_functions(&rscreen->screen); - r600_init_screen_resource_functions(&rscreen->screen); - - return &rscreen->screen; -} diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index b4c21d9e126..1c1978f8abb 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -25,6 +25,7 @@ #include "util/u_format.h" #include "r600d.h" +#include "r600_formats.h" static INLINE uint32_t r600_translate_blend_function(int blend_func) { @@ -123,6 +124,21 @@ static INLINE uint32_t r600_translate_stencil_op(int s_op) return 0; } +static INLINE uint32_t r600_translate_fill(uint32_t func) +{ + switch(func) { + case PIPE_POLYGON_MODE_FILL: + return 2; + case PIPE_POLYGON_MODE_LINE: + return 1; + case PIPE_POLYGON_MODE_POINT: + return 0; + default: + assert(0); + return 0; + } +} + /* translates straight */ static INLINE uint32_t r600_translate_ds_func(int func) { @@ -136,17 +152,17 @@ static inline unsigned r600_tex_wrap(unsigned wrap) case PIPE_TEX_WRAP_REPEAT: return V_03C000_SQ_TEX_WRAP; case PIPE_TEX_WRAP_CLAMP: - return V_03C000_SQ_TEX_CLAMP_LAST_TEXEL; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return V_03C000_SQ_TEX_CLAMP_HALF_BORDER; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return V_03C000_SQ_TEX_CLAMP_LAST_TEXEL; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return V_03C000_SQ_TEX_CLAMP_BORDER; case PIPE_TEX_WRAP_MIRROR_REPEAT: return V_03C000_SQ_TEX_MIRROR; case PIPE_TEX_WRAP_MIRROR_CLAMP: - return V_03C000_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: return V_03C000_SQ_TEX_MIRROR_ONCE_HALF_BORDER; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return V_03C000_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: return V_03C000_SQ_TEX_MIRROR_ONCE_BORDER; } @@ -283,6 +299,17 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_B4G4R4A4_UNORM: case PIPE_FORMAT_B4G4R4X4_UNORM: return V_0280A0_SWAP_ALT; + + case PIPE_FORMAT_Z16_UNORM: + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_R16_UNORM: + return V_0280A0_SWAP_STD; + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: @@ -310,22 +337,29 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_0280A0_SWAP_STD; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_0280A0_SWAP_STD; + case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10X2_SNORM: case PIPE_FORMAT_B10G10R10A2_UNORM: case PIPE_FORMAT_R10SG10SB10SA2U_NORM: return V_0280A0_SWAP_STD_REV; + case PIPE_FORMAT_R16G16_UNORM: + return V_0280A0_SWAP_STD; + /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: - // return V_0280A0_COLOR_16_16_16_16; + // return FMT_16_16_16_16; case PIPE_FORMAT_R16G16B16A16_FLOAT: - // return V_0280A0_COLOR_16_16_16_16_FLOAT; + // return FMT_16_16_16_16_FLOAT; /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: - // return V_0280A0_COLOR_32_32_32_32_FLOAT; + // return FMT_32_32_32_32_FLOAT; return 0; default: R600_ERR("unsupported colorswap format %d\n", format); @@ -357,6 +391,16 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_B4G4R4X4_UNORM: return V_0280A0_COLOR_4_4_4_4; + case PIPE_FORMAT_Z16_UNORM: + return V_0280A0_COLOR_16; + + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + return V_0280A0_COLOR_8_8; + + case PIPE_FORMAT_R16_UNORM: + return V_0280A0_COLOR_16; + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: case PIPE_FORMAT_A8B8G8R8_UNORM: @@ -383,18 +427,41 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_0280A0_COLOR_8_24; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_0280A0_COLOR_24_8; + case PIPE_FORMAT_R32_FLOAT: return V_0280A0_COLOR_32_FLOAT; + case PIPE_FORMAT_R16G16_FLOAT: + return V_0280A0_COLOR_16_16_FLOAT; + + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16_UNORM: + return V_0280A0_COLOR_16_16; + + /* 64-bit buffers. */ + case PIPE_FORMAT_R16G16B16_USCALED: + case PIPE_FORMAT_R16G16B16A16_USCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: return V_0280A0_COLOR_16_16_16_16; + + case PIPE_FORMAT_R16G16B16_FLOAT: case PIPE_FORMAT_R16G16B16A16_FLOAT: return V_0280A0_COLOR_16_16_16_16_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: return V_0280A0_COLOR_32_32_FLOAT; + case PIPE_FORMAT_R32G32_USCALED: + case PIPE_FORMAT_R32G32_SSCALED: + return V_0280A0_COLOR_32_32; + /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32_FLOAT: return V_0280A0_COLOR_32_32_32_FLOAT; @@ -405,7 +472,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_UYVY: case PIPE_FORMAT_YUYV: default: - R600_ERR("unsupported color format %d\n", format); + R600_ERR("unsupported color format %d %s\n", format, util_format_name(format)); return ~0; /* Unsupported. */ } } @@ -431,4 +498,139 @@ static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format) return r600_translate_colorformat(format) != ~0; } +static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format) +{ + uint32_t result = 0; + const struct util_format_description *desc; + unsigned i; + + desc = util_format_description(format); + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { + goto out_unknown; + } + + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + switch (desc->channel[i].type) { + /* Half-floats, floats, doubles */ + case UTIL_FORMAT_TYPE_FLOAT: + switch (desc->channel[i].size) { + case 16: + switch (desc->nr_channels) { + case 1: + result = FMT_16_FLOAT; + break; + case 2: + result = FMT_16_16_FLOAT; + break; + case 3: + result = FMT_16_16_16_FLOAT; + break; + case 4: + result = FMT_16_16_16_16_FLOAT; + break; + } + break; + case 32: + switch (desc->nr_channels) { + case 1: + result = FMT_32_FLOAT; + break; + case 2: + result = FMT_32_32_FLOAT; + break; + case 3: + result = FMT_32_32_32_FLOAT; + break; + case 4: + result = FMT_32_32_32_32_FLOAT; + break; + } + break; + default: + goto out_unknown; + } + break; + /* Unsigned ints */ + case UTIL_FORMAT_TYPE_UNSIGNED: + /* Signed ints */ + case UTIL_FORMAT_TYPE_SIGNED: + switch (desc->channel[i].size) { + case 8: + switch (desc->nr_channels) { + case 1: + result = FMT_8; + break; + case 2: + result = FMT_8_8; + break; + case 3: + // result = FMT_8_8_8; /* fails piglit draw-vertices test */ + // break; + case 4: + result = FMT_8_8_8_8; + break; + } + break; + case 16: + switch (desc->nr_channels) { + case 1: + result = FMT_16; + break; + case 2: + result = FMT_16_16; + break; + case 3: + // result = FMT_16_16_16; /* fails piglit draw-vertices test */ + // break; + case 4: + result = FMT_16_16_16_16; + break; + } + break; + case 32: + switch (desc->nr_channels) { + case 1: + result = FMT_32; + break; + case 2: + result = FMT_32_32; + break; + case 3: + result = FMT_32_32_32; + break; + case 4: + result = FMT_32_32_32_32; + break; + } + break; + default: + goto out_unknown; + } + break; + default: + goto out_unknown; + } + + result = S_038008_DATA_FORMAT(result); + + if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { + result |= S_038008_FORMAT_COMP_ALL(1); + } + if (desc->channel[i].normalized) { + result |= S_038008_NUM_FORMAT_ALL(0); + } else { + result |= S_038008_NUM_FORMAT_ALL(2); + } + return result; +out_unknown: + R600_ERR("unsupported vertex format %s\n", util_format_name(format)); + return ~0; +} + #endif diff --git a/src/gallium/drivers/r600/r600_states_inc.h b/src/gallium/drivers/r600/r600_states_inc.h index de717f35368..1c8075ebdb5 100644 --- a/src/gallium/drivers/r600/r600_states_inc.h +++ b/src/gallium/drivers/r600/r600_states_inc.h @@ -15,35 +15,34 @@ #define R600_CONFIG__DB_WATERMARKS 10 #define R600_CONFIG__SX_MISC 11 #define R600_CONFIG__SPI_THREAD_GROUPING 12 -#define R600_CONFIG__CB_SHADER_CONTROL 13 -#define R600_CONFIG__SQ_ESGS_RING_ITEMSIZE 14 -#define R600_CONFIG__SQ_GSVS_RING_ITEMSIZE 15 -#define R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE 16 -#define R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE 17 -#define R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE 18 -#define R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE 19 -#define R600_CONFIG__SQ_FBUF_RING_ITEMSIZE 20 -#define R600_CONFIG__SQ_REDUC_RING_ITEMSIZE 21 -#define R600_CONFIG__SQ_GS_VERT_ITEMSIZE 22 -#define R600_CONFIG__VGT_OUTPUT_PATH_CNTL 23 -#define R600_CONFIG__VGT_HOS_CNTL 24 -#define R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL 25 -#define R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL 26 -#define R600_CONFIG__VGT_HOS_REUSE_DEPTH 27 -#define R600_CONFIG__VGT_GROUP_PRIM_TYPE 28 -#define R600_CONFIG__VGT_GROUP_FIRST_DECR 29 -#define R600_CONFIG__VGT_GROUP_DECR 30 -#define R600_CONFIG__VGT_GROUP_VECT_0_CNTL 31 -#define R600_CONFIG__VGT_GROUP_VECT_1_CNTL 32 -#define R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL 33 -#define R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL 34 -#define R600_CONFIG__VGT_GS_MODE 35 -#define R600_CONFIG__PA_SC_MODE_CNTL 36 -#define R600_CONFIG__VGT_STRMOUT_EN 37 -#define R600_CONFIG__VGT_REUSE_OFF 38 -#define R600_CONFIG__VGT_VTX_CNT_EN 39 -#define R600_CONFIG__VGT_STRMOUT_BUFFER_EN 40 -#define R600_CONFIG_SIZE 41 +#define R600_CONFIG__SQ_ESGS_RING_ITEMSIZE 13 +#define R600_CONFIG__SQ_GSVS_RING_ITEMSIZE 14 +#define R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE 15 +#define R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE 16 +#define R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE 17 +#define R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE 18 +#define R600_CONFIG__SQ_FBUF_RING_ITEMSIZE 19 +#define R600_CONFIG__SQ_REDUC_RING_ITEMSIZE 20 +#define R600_CONFIG__SQ_GS_VERT_ITEMSIZE 21 +#define R600_CONFIG__VGT_OUTPUT_PATH_CNTL 22 +#define R600_CONFIG__VGT_HOS_CNTL 23 +#define R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL 24 +#define R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL 25 +#define R600_CONFIG__VGT_HOS_REUSE_DEPTH 26 +#define R600_CONFIG__VGT_GROUP_PRIM_TYPE 27 +#define R600_CONFIG__VGT_GROUP_FIRST_DECR 28 +#define R600_CONFIG__VGT_GROUP_DECR 29 +#define R600_CONFIG__VGT_GROUP_VECT_0_CNTL 30 +#define R600_CONFIG__VGT_GROUP_VECT_1_CNTL 31 +#define R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL 32 +#define R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL 33 +#define R600_CONFIG__VGT_GS_MODE 34 +#define R600_CONFIG__PA_SC_MODE_CNTL 35 +#define R600_CONFIG__VGT_STRMOUT_EN 36 +#define R600_CONFIG__VGT_REUSE_OFF 37 +#define R600_CONFIG__VGT_VTX_CNT_EN 38 +#define R600_CONFIG__VGT_STRMOUT_BUFFER_EN 39 +#define R600_CONFIG_SIZE 40 #define R600_CONFIG_PM4 128 /* R600_CB_CNTL */ @@ -65,7 +64,8 @@ #define R600_CB_CNTL__CB_CLRCMP_DST 15 #define R600_CB_CNTL__CB_CLRCMP_MSK 16 #define R600_CB_CNTL__PA_SC_AA_MASK 17 -#define R600_CB_CNTL_SIZE 18 +#define R600_CB_CNTL__CB_SHADER_CONTROL 18 +#define R600_CB_CNTL_SIZE 19 #define R600_CB_CNTL_PM4 128 /* R600_RASTERIZER */ diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 274679d1274..152cd9210fd 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -31,11 +31,11 @@ #include <util/u_inlines.h> #include <util/u_memory.h> #include "state_tracker/drm_driver.h" -#include "r600_screen.h" -#include "r600_context.h" +#include "r600_pipe.h" #include "r600_resource.h" #include "r600_state_inlines.h" #include "r600d.h" +#include "r600_formats.h" extern struct u_resource_vtbl r600_texture_vtbl; @@ -54,11 +54,30 @@ static void r600_copy_from_tiled_texture(struct pipe_context *ctx, struct r600_t transfer->box.width, transfer->box.height); } -static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex, + +/* Copy from a detiled texture to a tiled one. */ +static void r600_copy_into_tiled_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer) +{ + struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer; + struct pipe_resource *texture = transfer->resource; + struct pipe_subresource subsrc; + + subsrc.face = 0; + subsrc.level = 0; + ctx->resource_copy_region(ctx, texture, transfer->sr, + transfer->box.x, transfer->box.y, transfer->box.z, + rtransfer->linear_texture, subsrc, + 0, 0, 0, + transfer->box.width, transfer->box.height); + + ctx->flush(ctx, 0, NULL); +} + +static unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, unsigned level, unsigned zslice, unsigned face) { - unsigned long offset = rtex->offset[level]; + unsigned offset = rtex->offset[level]; switch (rtex->resource.base.b.target) { case PIPE_TEXTURE_3D: @@ -73,73 +92,128 @@ static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex, } } -static void r600_setup_miptree(struct r600_resource_texture *rtex) +static unsigned r600_texture_get_stride(struct pipe_screen *screen, + struct r600_resource_texture *rtex, + unsigned level) +{ + struct pipe_resource *ptex = &rtex->resource.base.b; + struct radeon *radeon = (struct radeon *)screen->winsys; + enum chip_class chipc = r600_get_family_class(radeon); + unsigned width, stride; + + if (rtex->pitch_override) + return rtex->pitch_override; + + width = u_minify(ptex->width0, level); + + stride = util_format_get_stride(ptex->format, align(width, 64)); + if (chipc == EVERGREEN) + stride = align(stride, 512); + else + stride = align(stride, 256); + return stride; +} + +static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen, + struct r600_resource_texture *rtex, + unsigned level) +{ + struct pipe_resource *ptex = &rtex->resource.base.b; + unsigned height; + + height = u_minify(ptex->height0, level); + height = util_next_power_of_two(height); + return util_format_get_nblocksy(ptex->format, height); +} + +/* Get a width in pixels from a stride in bytes. */ +static unsigned pitch_to_width(enum pipe_format format, + unsigned pitch_in_bytes) +{ + return (pitch_in_bytes / util_format_get_blocksize(format)) * + util_format_get_blockwidth(format); +} + +static void r600_setup_miptree(struct pipe_screen *screen, + struct r600_resource_texture *rtex) { struct pipe_resource *ptex = &rtex->resource.base.b; - unsigned long w, h, pitch, size, layer_size, i, offset; + struct radeon *radeon = (struct radeon *)screen->winsys; + enum chip_class chipc = r600_get_family_class(radeon); + unsigned pitch, size, layer_size, i, offset; + unsigned nblocksy; - rtex->bpt = util_format_get_blocksize(ptex->format); for (i = 0, offset = 0; i <= ptex->last_level; i++) { - w = u_minify(ptex->width0, i); - h = u_minify(ptex->height0, i); - h = util_next_power_of_two(h); - pitch = util_format_get_stride(ptex->format, align(w, 64)); - pitch = align(pitch, 256); - layer_size = pitch * h; - if (ptex->target == PIPE_TEXTURE_CUBE) - size = layer_size * 6; + pitch = r600_texture_get_stride(screen, rtex, i); + nblocksy = r600_texture_get_nblocksy(screen, rtex, i); + + layer_size = pitch * nblocksy; + + if (ptex->target == PIPE_TEXTURE_CUBE) { + if (chipc >= R700) + size = layer_size * 8; + else + size = layer_size * 6; + } else size = layer_size * u_minify(ptex->depth0, i); rtex->offset[i] = offset; rtex->layer_size[i] = layer_size; - rtex->pitch[i] = pitch; - rtex->width[i] = w; - rtex->height[i] = h; + rtex->pitch_in_bytes[i] = pitch; + rtex->pitch_in_pixels[i] = pitch_to_width(ptex->format, pitch); offset += size; } rtex->size = offset; } -struct pipe_resource *r600_texture_create(struct pipe_screen *screen, - const struct pipe_resource *templ) +static struct r600_resource_texture * +r600_texture_create_object(struct pipe_screen *screen, + const struct pipe_resource *base, + unsigned array_mode, + unsigned pitch_in_bytes_override, + unsigned max_buffer_size, + struct r600_bo *bo) { struct r600_resource_texture *rtex; struct r600_resource *resource; struct radeon *radeon = (struct radeon *)screen->winsys; rtex = CALLOC_STRUCT(r600_resource_texture); - if (!rtex) { + if (rtex == NULL) return NULL; - } + resource = &rtex->resource; - resource->base.b = *templ; + resource->base.b = *base; resource->base.vtbl = &r600_texture_vtbl; pipe_reference_init(&resource->base.b.reference, 1); resource->base.b.screen = screen; - r600_setup_miptree(rtex); - - /* FIXME alignment 4096 enought ? too much ? */ + resource->bo = bo; resource->domain = r600_domain_from_usage(resource->base.b.bind); + rtex->pitch_override = pitch_in_bytes_override; + rtex->array_mode = array_mode; + + r600_setup_miptree(screen, rtex); + resource->size = rtex->size; - resource->bo = radeon_ws_bo(radeon, rtex->size, 4096, 0); - if (resource->bo == NULL) { - FREE(rtex); - return NULL; + + if (!resource->bo) { + resource->bo = r600_bo(radeon, rtex->size, 4096, 0); + if (!resource->bo) { + FREE(rtex); + return NULL; + } } - return &resource->base.b; + return rtex; } -static void r600_texture_destroy_state(struct pipe_resource *ptexture) +struct pipe_resource *r600_texture_create(struct pipe_screen *screen, + const struct pipe_resource *templ) { - struct r600_resource_texture *rtexture = (struct r600_resource_texture*)ptexture; + unsigned array_mode = 0; + + return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode, + 0, 0, NULL); - for (int i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) { - radeon_state_fini(&rtexture->scissor[i]); - radeon_state_fini(&rtexture->db[i]); - for (int j = 0; j < 8; j++) { - radeon_state_fini(&rtexture->cb[j][i]); - } - } } static void r600_texture_destroy(struct pipe_screen *screen, @@ -149,13 +223,12 @@ static void r600_texture_destroy(struct pipe_screen *screen, struct r600_resource *resource = &rtex->resource; struct radeon *radeon = (struct radeon *)screen->winsys; + if (rtex->flushed_depth_texture) + pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); + if (resource->bo) { - radeon_ws_bo_reference(radeon, &resource->bo, NULL); + r600_bo_reference(radeon, &resource->bo, NULL); } - if (rtex->uncompressed) { - radeon_ws_bo_reference(radeon, &rtex->uncompressed, NULL); - } - r600_texture_destroy_state(ptex); FREE(rtex); } @@ -166,7 +239,7 @@ static struct pipe_surface *r600_get_tex_surface(struct pipe_screen *screen, { struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface); - unsigned long offset; + unsigned offset; if (surface == NULL) return NULL; @@ -191,46 +264,28 @@ static void r600_tex_surface_destroy(struct pipe_surface *surface) FREE(surface); } + struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, const struct pipe_resource *templ, struct winsys_handle *whandle) { struct radeon *rw = (struct radeon*)screen->winsys; - struct r600_resource_texture *rtex; - struct r600_resource *resource; - struct radeon_ws_bo *bo = NULL; + struct r600_bo *bo = NULL; /* Support only 2D textures without mipmaps */ if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) || templ->depth0 != 1 || templ->last_level != 0) return NULL; - rtex = CALLOC_STRUCT(r600_resource_texture); - if (rtex == NULL) - return NULL; - - bo = radeon_ws_bo_handle(rw, whandle->handle); + bo = r600_bo_handle(rw, whandle->handle); if (bo == NULL) { - FREE(rtex); return NULL; } - resource = &rtex->resource; - resource->base.b = *templ; - resource->base.vtbl = &r600_texture_vtbl; - pipe_reference_init(&resource->base.b.reference, 1); - resource->base.b.screen = screen; - resource->bo = bo; - rtex->depth = 0; - rtex->pitch_override = whandle->stride; - rtex->bpt = util_format_get_blocksize(templ->format); - rtex->pitch[0] = whandle->stride; - rtex->width[0] = templ->width0; - rtex->height[0] = templ->height0; - rtex->offset[0] = 0; - rtex->size = align(rtex->pitch[0] * templ->height0, 64); - - return &resource->base.b; + return (struct pipe_resource *)r600_texture_create_object(screen, templ, 0, + whandle->stride, + 0, + bo); } static unsigned int r600_texture_is_referenced(struct pipe_context *context, @@ -241,6 +296,41 @@ static unsigned int r600_texture_is_referenced(struct pipe_context *context, return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; } +int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture); + +int r600_texture_depth_flush(struct pipe_context *ctx, + struct pipe_resource *texture) +{ + struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; + struct pipe_resource resource; + + if (rtex->flushed_depth_texture) + goto out; + + resource.target = PIPE_TEXTURE_2D; + resource.format = texture->format; + resource.width0 = texture->width0; + resource.height0 = texture->height0; + resource.depth0 = 1; + resource.last_level = 0; + resource.nr_samples = 0; + resource.usage = PIPE_USAGE_DYNAMIC; + resource.bind = 0; + resource.flags = 0; + + resource.bind |= PIPE_BIND_RENDER_TARGET; + + rtex->flushed_depth_texture = (struct r600_resource_texture *)ctx->screen->resource_create(ctx->screen, &resource); + if (rtex->flushed_depth_texture == NULL) { + R600_ERR("failed to create temporary texture to hold untiled copy\n"); + return -ENOMEM; + } + +out: + r600_blit_uncompress_depth_ptr(ctx, rtex); + return 0; +} + struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, struct pipe_resource *texture, struct pipe_subresource sr, @@ -250,6 +340,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; struct pipe_resource resource; struct r600_transfer *trans; + int r; trans = CALLOC_STRUCT(r600_transfer); if (trans == NULL) @@ -258,14 +349,22 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, trans->transfer.sr = sr; trans->transfer.usage = usage; trans->transfer.box = *box; - trans->transfer.stride = rtex->pitch[sr.level]; + trans->transfer.stride = rtex->pitch_in_bytes[sr.level]; trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face); - if (rtex->tilled && !rtex->depth) { + if (rtex->depth) { + r = r600_texture_depth_flush(ctx, texture); + if (r < 0) { + R600_ERR("failed to create temporary texture to hold untiled copy\n"); + pipe_resource_reference(&trans->transfer.resource, NULL); + FREE(trans); + return NULL; + } + } else if (rtex->tiled) { resource.target = PIPE_TEXTURE_2D; resource.format = texture->format; resource.width0 = box->width; resource.height0 = box->height; - resource.depth0 = 0; + resource.depth0 = 1; resource.last_level = 0; resource.nr_samples = 0; resource.usage = PIPE_USAGE_DYNAMIC; @@ -289,6 +388,9 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, FREE(trans); return NULL; } + + trans->transfer.stride = + ((struct r600_resource_texture *)trans->linear_texture)->pitch_in_bytes[0]; if (usage & PIPE_TRANSFER_READ) { /* We cannot map a tiled texture directly because the data is * in a different order, therefore we do detiling using a blit. */ @@ -304,10 +406,17 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx, struct pipe_transfer *transfer) { struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; + struct r600_resource_texture *rtex = (struct r600_resource_texture*)transfer->resource; if (rtransfer->linear_texture) { + if (transfer->usage & PIPE_TRANSFER_WRITE) { + r600_copy_into_tiled_texture(ctx, rtransfer); + } pipe_resource_reference(&rtransfer->linear_texture, NULL); } + if (rtex->flushed_depth_texture) { + pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); + } pipe_resource_reference(&transfer->resource, NULL); FREE(transfer); } @@ -315,40 +424,31 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx, void* r600_texture_transfer_map(struct pipe_context *ctx, struct pipe_transfer* transfer) { - struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; - struct radeon_ws_bo *bo; + struct r600_bo *bo; enum pipe_format format = transfer->resource->format; struct radeon *radeon = (struct radeon *)ctx->screen->winsys; - struct r600_resource_texture *rtex; - unsigned long offset = 0; + unsigned offset = 0; char *map; - int r; - ctx->flush(ctx, 0, NULL); if (rtransfer->linear_texture) { bo = ((struct r600_resource *)rtransfer->linear_texture)->bo; } else { - rtex = (struct r600_resource_texture*)transfer->resource; - if (rtex->depth && rscreen->chip_class != EVERGREEN) { - r = r600_texture_from_depth(ctx, rtex, transfer->sr.level); - if (r) { - return NULL; - } - r600_flush(ctx, 0, NULL); - bo = rtex->uncompressed; - } else { + struct r600_resource_texture *rtex = (struct r600_resource_texture*)transfer->resource; + + if (rtex->flushed_depth_texture) + bo = ((struct r600_resource *)rtex->flushed_depth_texture)->bo; + else bo = ((struct r600_resource *)transfer->resource)->bo; - } + offset = rtransfer->offset + transfer->box.y / util_format_get_blockheight(format) * transfer->stride + transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); } - map = radeon_ws_bo_map(radeon, bo, 0, r600_context(ctx)); + map = r600_bo_map(radeon, bo, 0, ctx); if (!map) { return NULL; } - radeon_ws_bo_wait(radeon, bo); return map + offset; } @@ -358,20 +458,20 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx, { struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; struct radeon *radeon = (struct radeon *)ctx->screen->winsys; - struct r600_resource_texture *rtex; - struct radeon_ws_bo *bo; + struct r600_bo *bo; if (rtransfer->linear_texture) { bo = ((struct r600_resource *)rtransfer->linear_texture)->bo; } else { - rtex = (struct r600_resource_texture*)transfer->resource; - if (rtex->depth) { - bo = rtex->uncompressed; + struct r600_resource_texture *rtex = (struct r600_resource_texture*)transfer->resource; + + if (rtex->flushed_depth_texture) { + bo = ((struct r600_resource *)rtex->flushed_depth_texture)->bo; } else { bo = ((struct r600_resource *)transfer->resource)->bo; } } - radeon_ws_bo_unmap(radeon, bo); + r600_bo_unmap(radeon, bo); } struct u_resource_vtbl r600_texture_vtbl = @@ -466,13 +566,23 @@ uint32_t r600_translate_texformat(enum pipe_format format, case UTIL_FORMAT_COLORSPACE_ZS: switch (format) { case PIPE_FORMAT_Z16_UNORM: - result = V_0280A0_COLOR_16; + result = FMT_16; goto out_word4; + case PIPE_FORMAT_X24S8_USCALED: + word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); case PIPE_FORMAT_Z24X8_UNORM: - result = V_0280A0_COLOR_8_24; - goto out_word4; case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - result = V_0280A0_COLOR_8_24; + result = FMT_8_24; + goto out_word4; + case PIPE_FORMAT_S8X24_USCALED: + word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + result = FMT_24_8; + goto out_word4; + case PIPE_FORMAT_S8_USCALED: + result = V_0280A0_COLOR_8; + word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); goto out_word4; default: goto out_unknown; @@ -526,7 +636,7 @@ uint32_t r600_translate_texformat(enum pipe_format format, if (desc->channel[0].size == 5 && desc->channel[1].size == 6 && desc->channel[2].size == 5) { - result = V_0280A0_COLOR_5_6_5; + result = FMT_5_6_5; goto out_word4; } goto out_unknown; @@ -535,14 +645,14 @@ uint32_t r600_translate_texformat(enum pipe_format format, desc->channel[1].size == 5 && desc->channel[2].size == 5 && desc->channel[3].size == 1) { - result = V_0280A0_COLOR_1_5_5_5; + result = FMT_1_5_5_5; goto out_word4; } if (desc->channel[0].size == 10 && desc->channel[1].size == 10 && desc->channel[2].size == 10 && desc->channel[3].size == 2) { - result = V_0280A0_COLOR_10_10_10_2; + result = FMT_10_10_10_2; goto out_word4; } goto out_unknown; @@ -550,79 +660,89 @@ uint32_t r600_translate_texformat(enum pipe_format format, goto out_unknown; } + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + if (i == 4) + goto out_unknown; + /* uniform formats */ - switch (desc->channel[0].type) { + switch (desc->channel[i].type) { case UTIL_FORMAT_TYPE_UNSIGNED: case UTIL_FORMAT_TYPE_SIGNED: - if (!desc->channel[0].normalized && + if (!desc->channel[i].normalized && desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) { goto out_unknown; } - switch (desc->channel[0].size) { + switch (desc->channel[i].size) { case 4: switch (desc->nr_channels) { case 2: - result = V_0280A0_COLOR_4_4; + result = FMT_4_4; goto out_word4; case 4: - result = V_0280A0_COLOR_4_4_4_4; + result = FMT_4_4_4_4; goto out_word4; } goto out_unknown; case 8: switch (desc->nr_channels) { case 1: - result = V_0280A0_COLOR_8; + result = FMT_8; goto out_word4; case 2: - result = V_0280A0_COLOR_8_8; + result = FMT_8_8; goto out_word4; case 4: - result = V_0280A0_COLOR_8_8_8_8; + result = FMT_8_8_8_8; goto out_word4; } goto out_unknown; case 16: switch (desc->nr_channels) { case 1: - result = V_0280A0_COLOR_16; + result = FMT_16; goto out_word4; case 2: - result = V_0280A0_COLOR_16_16; + result = FMT_16_16; goto out_word4; case 4: - result = V_0280A0_COLOR_16_16_16_16; + result = FMT_16_16_16_16; goto out_word4; } } goto out_unknown; case UTIL_FORMAT_TYPE_FLOAT: - switch (desc->channel[0].size) { + switch (desc->channel[i].size) { case 16: switch (desc->nr_channels) { case 1: - result = V_0280A0_COLOR_16_FLOAT; + result = FMT_16_FLOAT; goto out_word4; case 2: - result = V_0280A0_COLOR_16_16_FLOAT; + result = FMT_16_16_FLOAT; goto out_word4; case 4: - result = V_0280A0_COLOR_16_16_16_16_FLOAT; + result = FMT_16_16_16_16_FLOAT; goto out_word4; } goto out_unknown; case 32: switch (desc->nr_channels) { case 1: - result = V_0280A0_COLOR_32_FLOAT; + result = FMT_32_FLOAT; goto out_word4; case 2: - result = V_0280A0_COLOR_32_32_FLOAT; + result = FMT_32_32_FLOAT; goto out_word4; case 4: - result = V_0280A0_COLOR_32_32_32_32_FLOAT; + result = FMT_32_32_32_32_FLOAT; goto out_word4; } } @@ -638,81 +758,3 @@ out_unknown: // R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); return ~0; } - -int r600_texture_from_depth(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - int r; - - if (!rtexture->depth) { - /* This shouldn't happen maybe print a warning */ - return 0; - } - if (rtexture->uncompressed && !rtexture->dirty) { - /* Uncompressed bo already in good state */ - return 0; - } - - /* allocate uncompressed texture */ - if (rtexture->uncompressed == NULL) { - rtexture->uncompressed = radeon_ws_bo(rscreen->rw, rtexture->size, 4096, 0); - if (rtexture->uncompressed == NULL) { - return -ENOMEM; - } - } - - /* render a rectangle covering whole buffer to uncompress depth */ - r = r600_blit_uncompress_depth(ctx, rtexture, level); - if (r) { - return r; - } - - rtexture->dirty = 0; - return 0; -} - - - -int r600_texture_scissor(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - - if (!rtexture->scissor[level].cpm4) { - rctx->vtbl->texture_state_scissor(rscreen, rtexture, level); - } - return 0; -} - -int r600_texture_cb(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned cb, unsigned level) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - - if (!rtexture->cb[cb][level].cpm4) { - rctx->vtbl->texture_state_cb(rscreen, rtexture, cb, level); - } - return 0; -} - -int r600_texture_db(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - - if (!rtexture->db[level].cpm4) { - rctx->vtbl->texture_state_db(rscreen, rtexture, level); - } - return 0; -} - -int r600_texture_viewport(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - - if (!rtexture->viewport[level].cpm4) { - rctx->vtbl->texture_state_viewport(rscreen, rtexture, level); - } - return 0; -} diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 07bfc0593e9..a3cb5b86004 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -607,8 +607,15 @@ #define G_028D34_DEPTH_HEIGHT_TILE_MAX(x) (((x) >> 0) & 0x3FF) #define C_028D34_DEPTH_HEIGHT_TILE_MAX 0xFFFFFC00 #define R_028D0C_DB_RENDER_CONTROL 0x028D0C +#define S_028D0C_DEPTH_CLEAR_ENABLE(x) (((x) & 0x1) << 0) +#define S_028D0C_STENCIL_CLEAR_ENABLE(x) (((x) & 0x1) << 1) +#define S_028D0C_DEPTH_COPY_ENABLE(x) (((x) & 0x1) << 2) +#define S_028D0C_STENCIL_COPY_ENABLE(x) (((x) & 0x1) << 3) +#define S_028D0C_RESUMMARIZE_ENABLE(x) (((x) & 0x1) << 4) #define S_028D0C_STENCIL_COMPRESS_DISABLE(x) (((x) & 0x1) << 5) #define S_028D0C_DEPTH_COMPRESS_DISABLE(x) (((x) & 0x1) << 6) +#define S_028D0C_COPY_CENTROID(x) (((x) & 0x1) << 7) +#define S_028D0C_COPY_SAMPLE(x) (((x) & 0x1) << 8) #define S_028D0C_R700_PERFECT_ZPASS_COUNTS(x) (((x) & 0x1) << 15) #define R_028D10_DB_RENDER_OVERRIDE 0x028D10 #define V_028D10_FORCE_OFF 0 @@ -660,6 +667,9 @@ #define S_02880C_Z_EXPORT_ENABLE(x) (((x) & 0x1) << 0) #define G_02880C_Z_EXPORT_ENABLE(x) (((x) >> 0) & 0x1) #define C_02880C_Z_EXPORT_ENABLE 0xFFFFFFFE +#define S_02880C_STENCIL_REF_EXPORT_ENABLE(x) (((x) & 0x1) << 1) +#define G_02880C_STENCIL_REF_EXPORT_ENABLE(x) (((x) >> 1) & 0x1) +#define C_02880C_STENCIL_REF_EXPORT_ENABLE 0xFFFFFFFD #define S_02880C_Z_ORDER(x) (((x) & 0x3) << 4) #define G_02880C_Z_ORDER(x) (((x) >> 4) & 0x3) #define C_02880C_Z_ORDER 0xFFFFFCFF @@ -894,6 +904,10 @@ #define S_038000_TILE_MODE(x) (((x) & 0xF) << 3) #define G_038000_TILE_MODE(x) (((x) >> 3) & 0xF) #define C_038000_TILE_MODE 0xFFFFFF87 +#define V_038000_ARRAY_LINEAR_GENERAL 0x00000000 +#define V_038000_ARRAY_LINEAR_ALIGNED 0x00000001 +#define V_038000_ARRAY_1D_TILED_THIN1 0x00000002 +#define V_038000_ARRAY_2D_TILED_THIN1 0x00000004 #define S_038000_TILE_TYPE(x) (((x) & 0x1) << 7) #define G_038000_TILE_TYPE(x) (((x) >> 7) & 0x1) #define C_038000_TILE_TYPE 0xFFFFFF7F @@ -1018,43 +1032,13 @@ #define S_038008_DATA_FORMAT(x) (((x) & 0x3F) << 20) #define G_038008_DATA_FORMAT(x) (((x) >> 20) & 0x3F) #define C_038008_DATA_FORMAT 0xFC0FFFFF -#define V_038008_COLOR_INVALID 0x00000000 -#define V_038008_COLOR_8 0x00000001 -#define V_038008_COLOR_4_4 0x00000002 -#define V_038008_COLOR_3_3_2 0x00000003 -#define V_038008_COLOR_16 0x00000005 -#define V_038008_COLOR_16_FLOAT 0x00000006 -#define V_038008_COLOR_8_8 0x00000007 -#define V_038008_COLOR_5_6_5 0x00000008 -#define V_038008_COLOR_6_5_5 0x00000009 -#define V_038008_COLOR_1_5_5_5 0x0000000A -#define V_038008_COLOR_4_4_4_4 0x0000000B -#define V_038008_COLOR_5_5_5_1 0x0000000C -#define V_038008_COLOR_32 0x0000000D -#define V_038008_COLOR_32_FLOAT 0x0000000E -#define V_038008_COLOR_16_16 0x0000000F -#define V_038008_COLOR_16_16_FLOAT 0x00000010 -#define V_038008_COLOR_8_24 0x00000011 -#define V_038008_COLOR_8_24_FLOAT 0x00000012 -#define V_038008_COLOR_24_8 0x00000013 -#define V_038008_COLOR_24_8_FLOAT 0x00000014 -#define V_038008_COLOR_10_11_11 0x00000015 -#define V_038008_COLOR_10_11_11_FLOAT 0x00000016 -#define V_038008_COLOR_11_11_10 0x00000017 -#define V_038008_COLOR_11_11_10_FLOAT 0x00000018 -#define V_038008_COLOR_2_10_10_10 0x00000019 -#define V_038008_COLOR_8_8_8_8 0x0000001A -#define V_038008_COLOR_10_10_10_2 0x0000001B -#define V_038008_COLOR_X24_8_32_FLOAT 0x0000001C -#define V_038008_COLOR_32_32 0x0000001D -#define V_038008_COLOR_32_32_FLOAT 0x0000001E -#define V_038008_COLOR_16_16_16_16 0x0000001F -#define V_038008_COLOR_16_16_16_16_FLOAT 0x00000020 -#define V_038008_COLOR_32_32_32_32 0x00000022 -#define V_038008_COLOR_32_32_32_32_FLOAT 0x00000023 + #define S_038008_NUM_FORMAT_ALL(x) (((x) & 0x3) << 26) #define G_038008_NUM_FORMAT_ALL(x) (((x) >> 26) & 0x3) #define C_038008_NUM_FORMAT_ALL 0xF3FFFFFF +#define V_038008_SQ_NUM_FORMAT_NORM 0x00000000 +#define V_038008_SQ_NUM_FORMAT_INT 0x00000001 +#define V_038008_SQ_NUM_FORMAT_SCALED 0x00000002 #define S_038008_FORMAT_COMP_ALL(x) (((x) & 0x1) << 28) #define G_038008_FORMAT_COMP_ALL(x) (((x) >> 28) & 0x1) #define C_038008_FORMAT_COMP_ALL 0xEFFFFFFF @@ -2112,6 +2096,9 @@ #define R_0286D4_SPI_INTERP_CONTROL_0 0x0286D4 #define R_028A48_PA_SC_MPASS_PS_CNTL 0x028A48 #define R_028C00_PA_SC_LINE_CNTL 0x028C00 +#define S_028C00_LAST_PIXEL(x) (((x) & 0x1) << 10) +#define G_028C00_LAST_PIXEL(x) (((x) >> 10) & 0x1) +#define C_028C00_LAST_PIXEL 0xFFFFFBFF #define R_028C04_PA_SC_AA_CONFIG 0x028C04 #define R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX 0x028C1C #define R_028C48_PA_SC_AA_MASK 0x028C48 @@ -2125,7 +2112,16 @@ #define R_028814_PA_SU_SC_MODE_CNTL 0x028814 #define R_028A00_PA_SU_POINT_SIZE 0x028A00 #define R_028A04_PA_SU_POINT_MINMAX 0x028A04 +#define S_028A04_MIN_SIZE(x) (((x) & 0xFFFF) << 0) +#define G_028A04_MIN_SIZE(x) (((x) >> 0) & 0xFFFF) +#define C_028A04_MIN_SIZE 0xFFFF0000 +#define S_028A04_MAX_SIZE(x) (((x) & 0xFFFF) << 16) +#define G_028A04_MAX_SIZE(x) (((x) >> 16) & 0xFFFF) +#define C_028A04_MAX_SIZE 0x0000FFFF #define R_028A08_PA_SU_LINE_CNTL 0x028A08 +#define S_028A08_WIDTH(x) (((x) & 0xFFFF) << 0) +#define G_028A08_WIDTH(x) (((x) >> 0) & 0xFFFF) +#define C_028A08_WIDTH 0xFFFF0000 #define R_028A0C_PA_SC_LINE_STIPPLE 0x028A0C #define R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL 0x028DF8 #define R_028DFC_PA_SU_POLY_OFFSET_CLAMP 0x028DFC @@ -2134,6 +2130,27 @@ #define R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE 0x028E08 #define R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET 0x028E0C #define R_028818_PA_CL_VTE_CNTL 0x028818 +#define S_028818_VPORT_X_SCALE_ENA(x) (((x) & 0x1) << 0) +#define G_028818_VPORT_X_SCALE_ENA(x) (((x) >> 0 & 0x1) +#define C_028818_VPORT_X_SCALE_ENA 0xFFFFFFFE +#define S_028818_VPORT_X_OFFSET_ENA(x) (((x) & 0x1) << 1) +#define G_028818_VPORT_X_OFFSET_ENA(x) (((x) >> 1 & 0x1) +#define C_028818_VPORT_X_OFFSET_ENA 0xFFFFFFFD +#define S_028818_VPORT_Y_SCALE_ENA(x) (((x) & 0x1) << 2) +#define G_028818_VPORT_Y_SCALE_ENA(x) (((x) >> 2 & 0x1) +#define C_028818_VPORT_Y_SCALE_ENA 0xFFFFFFFB +#define S_028818_VPORT_Y_OFFSET_ENA(x) (((x) & 0x1) << 3) +#define G_028818_VPORT_Y_OFFSET_ENA(x) (((x) >> 3 & 0x1) +#define C_028818_VPORT_Y_OFFSET_ENA 0xFFFFFFF7 +#define S_028818_VPORT_Z_SCALE_ENA(x) (((x) & 0x1) << 4) +#define G_028818_VPORT_Z_SCALE_ENA(x) (((x) >> 4 & 0x1) +#define C_028818_VPORT_Z_SCALE_ENA 0xFFFFFFEF +#define S_028818_VPORT_Z_OFFSET_ENA(x) (((x) & 0x1) << 5) +#define G_028818_VPORT_Z_OFFSET_ENA(x) (((x) >> 5 & 0x1) +#define C_028818_VPORT_Z_OFFSET_ENA 0xFFFFFFDF +#define S_028818_VTX_W0_FMT(x) (((x) & 0x1) << 10) +#define G_028818_VTX_W0_FMT(x) (((x) >> 10) & 0x1) +#define C_028818_VTX_W0_FMT 0xFFFFFBFF #define R_02843C_PA_CL_VPORT_XSCALE_0 0x02843C #define R_028444_PA_CL_VPORT_YSCALE_0 0x028444 #define R_02844C_PA_CL_VPORT_ZSCALE_0 0x02844C @@ -2199,6 +2216,12 @@ #define R_0286C0_SPI_PS_INPUT_CNTL_31 0x0286C0 #define R_028850_SQ_PGM_RESOURCES_PS 0x028850 #define R_028854_SQ_PGM_EXPORTS_PS 0x028854 +#define S_028854_EXPORT_COLORS(x) (((x) & 0xF) << 1) +#define G_028854_EXPORT_COLORS(x) (((x) >> 1) & 0xF) +#define C_028854_EXPORT_COLORS 0xFFFFFFE1 +#define S_028854_EXPORT_Z(x) (((x) & 0x1) << 0) +#define G_028854_EXPORT_Z(x) (((x) >> 0) & 0x1) +#define C_028854_EXPORT_Z 0xFFFFFFFE #define R_008958_VGT_PRIMITIVE_TYPE 0x008958 #define R_028A7C_VGT_DMA_INDEX_TYPE 0x028A7C #define R_028A88_VGT_DMA_NUM_INSTANCES 0x028A88 @@ -3435,6 +3458,16 @@ #define R_038014_RESOURCE0_WORD5 0x038014 #define R_038018_RESOURCE0_WORD6 0x038018 +#define R_028140_ALU_CONST_BUFFER_SIZE_PS_0 0x00028140 +#define R_028180_ALU_CONST_BUFFER_SIZE_VS_0 0x00028180 +#define R_028940_ALU_CONST_CACHE_PS_0 0x00028940 +#define R_028980_ALU_CONST_CACHE_VS_0 0x00028980 + +#define R_03CFF0_SQ_VTX_BASE_VTX_LOC 0x03CFF0 +#define R_03CFF4_SQ_VTX_START_INST_LOC 0x03CFF4 + +#define R_03E200_SQ_LOOP_CONST_0 0x3E200 + #define SQ_TEX_INST_LD 0x03 #define SQ_TEX_INST_GET_GRADIENTS_H 0x7 #define SQ_TEX_INST_GET_GRADIENTS_V 0x8 diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c index a78cf0ce950..892dee86baf 100644 --- a/src/gallium/drivers/r600/r700_asm.c +++ b/src/gallium/drivers/r600/r700_asm.c @@ -20,12 +20,11 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "radeon.h" -#include "r600_context.h" -#include "r600_asm.h" +#include <stdio.h> #include "util/u_memory.h" +#include "r600_pipe.h" +#include "r600_asm.h" #include "r700_sq.h" -#include <stdio.h> int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) @@ -37,7 +36,7 @@ int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | - S_SQ_ALU_WORD0_SRC0_REL(alu->src[1].rel) | + S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) | S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | S_SQ_ALU_WORD0_LAST(alu->last); diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h deleted file mode 100644 index 5f9f21db1b4..00000000000 --- a/src/gallium/drivers/r600/radeon.h +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Copyright © 2009 Jerome Glisse <[email protected]> - * - * This file is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. - */ -#ifndef RADEON_H -#define RADEON_H - -#define RADEON_CTX_MAX_PM4 (64 * 1024 / 4) - -#include <stdint.h> - -#include <pipe/p_compiler.h> - -typedef uint64_t u64; -typedef uint32_t u32; -typedef uint16_t u16; -typedef uint8_t u8; - -struct radeon; - -enum radeon_family { - CHIP_UNKNOWN, - CHIP_R100, - CHIP_RV100, - CHIP_RS100, - CHIP_RV200, - CHIP_RS200, - CHIP_R200, - CHIP_RV250, - CHIP_RS300, - CHIP_RV280, - CHIP_R300, - CHIP_R350, - CHIP_RV350, - CHIP_RV380, - CHIP_R420, - CHIP_R423, - CHIP_RV410, - CHIP_RS400, - CHIP_RS480, - CHIP_RS600, - CHIP_RS690, - CHIP_RS740, - CHIP_RV515, - CHIP_R520, - CHIP_RV530, - CHIP_RV560, - CHIP_RV570, - CHIP_R580, - CHIP_R600, - CHIP_RV610, - CHIP_RV630, - CHIP_RV670, - CHIP_RV620, - CHIP_RV635, - CHIP_RS780, - CHIP_RS880, - CHIP_RV770, - CHIP_RV730, - CHIP_RV710, - CHIP_RV740, - CHIP_CEDAR, - CHIP_REDWOOD, - CHIP_JUNIPER, - CHIP_CYPRESS, - CHIP_HEMLOCK, - CHIP_LAST, -}; - -enum { - R600_SHADER_PS = 1, - R600_SHADER_VS, - R600_SHADER_GS, - R600_SHADER_FS, - R600_SHADER_MAX = R600_SHADER_FS, -}; - -enum radeon_family radeon_get_family(struct radeon *rw); -void radeon_set_mem_constant(struct radeon *radeon, boolean state); - -/* lowlevel WS bo */ -struct radeon_ws_bo; -struct radeon_ws_bo *radeon_ws_bo(struct radeon *radeon, - unsigned size, unsigned alignment, unsigned usage); -struct radeon_ws_bo *radeon_ws_bo_handle(struct radeon *radeon, - unsigned handle); -void *radeon_ws_bo_map(struct radeon *radeon, struct radeon_ws_bo *bo, unsigned usage, void *ctx); -void radeon_ws_bo_unmap(struct radeon *radeon, struct radeon_ws_bo *bo); -void radeon_ws_bo_reference(struct radeon *radeon, struct radeon_ws_bo **dst, - struct radeon_ws_bo *src); -int radeon_ws_bo_wait(struct radeon *radeon, struct radeon_ws_bo *bo); - -struct radeon_stype_info; -/* - * states functions - */ -struct radeon_state { - struct radeon *radeon; - unsigned refcount; - struct radeon_stype_info *stype; - unsigned state_id; - unsigned id; - unsigned shader_index; - unsigned nstates; - u32 states[64]; - unsigned npm4; - unsigned cpm4; - u32 pm4_crc; - u32 pm4[128]; - unsigned nbo; - struct radeon_ws_bo *bo[4]; - unsigned nreloc; - unsigned reloc_pm4_id[8]; - unsigned reloc_bo_id[8]; - u32 placement[8]; - unsigned bo_dirty[4]; -}; - -int radeon_state_init(struct radeon_state *rstate, struct radeon *radeon, u32 type, u32 id, u32 shader_class); -void radeon_state_fini(struct radeon_state *state); -int radeon_state_pm4(struct radeon_state *state); -int radeon_state_convert(struct radeon_state *state, u32 stype, u32 id, u32 shader_type); - -/* - * draw functions - */ -struct radeon_draw { - struct radeon *radeon; - struct radeon_state **state; -}; - -int radeon_draw_init(struct radeon_draw *draw, struct radeon *radeon); -void radeon_draw_bind(struct radeon_draw *draw, struct radeon_state *state); -void radeon_draw_unbind(struct radeon_draw *draw, struct radeon_state *state); - -/* - * radeon context functions - */ -#pragma pack(1) -struct radeon_cs_reloc { - uint32_t handle; - uint32_t read_domain; - uint32_t write_domain; - uint32_t flags; -}; -#pragma pack() - -struct radeon_ctx; - -struct radeon_ctx *radeon_ctx_init(struct radeon *radeon); -void radeon_ctx_fini(struct radeon_ctx *ctx); -void radeon_ctx_clear(struct radeon_ctx *ctx); -int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw); -int radeon_ctx_submit(struct radeon_ctx *ctx); -void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file); -int radeon_ctx_set_query_state(struct radeon_ctx *ctx, struct radeon_state *state); - -/* - * R600/R700 - */ - -enum r600_stype { - R600_STATE_CONFIG, - R600_STATE_CB_CNTL, - R600_STATE_RASTERIZER, - R600_STATE_VIEWPORT, - R600_STATE_SCISSOR, - R600_STATE_BLEND, - R600_STATE_DSA, - R600_STATE_SHADER, /* has PS,VS,GS,FS variants */ - R600_STATE_CONSTANT, /* has PS,VS,GS,FS variants */ - R600_STATE_CBUF, /* has PS,VS,GS,FS variants */ - R600_STATE_RESOURCE, /* has PS,VS,GS,FS variants */ - R600_STATE_SAMPLER, /* has PS,VS,GS,FS variants */ - R600_STATE_SAMPLER_BORDER, /* has PS,VS,GS,FS variants */ - R600_STATE_CB0, - R600_STATE_CB1, - R600_STATE_CB2, - R600_STATE_CB3, - R600_STATE_CB4, - R600_STATE_CB5, - R600_STATE_CB6, - R600_STATE_CB7, - R600_STATE_DB, - R600_STATE_QUERY_BEGIN, - R600_STATE_QUERY_END, - R600_STATE_UCP, - R600_STATE_VGT, - R600_STATE_DRAW, - R600_STATE_CB_FLUSH, - R600_STATE_DB_FLUSH, - R600_STATE_MAX, -}; - -#include "r600_states_inc.h" -#include "eg_states_inc.h" - -/* R600 QUERY BEGIN/END */ -#define R600_QUERY__OFFSET 0 -#define R600_QUERY_SIZE 1 -#define R600_QUERY_PM4 128 - -void r600_flush_ctx(void *data); -#endif diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c index 3ffda87520f..413da59e559 100644 --- a/src/gallium/drivers/rbug/rbug_context.c +++ b/src/gallium/drivers/rbug/rbug_context.c @@ -43,6 +43,7 @@ rbug_destroy(struct pipe_context *_pipe) struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + remove_from_list(&rb_pipe->list); pipe->destroy(pipe); FREE(rb_pipe); diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index 35d426aa3ea..28953582f0a 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -23,8 +23,8 @@ C_SOURCES = \ sp_state_blend.c \ sp_state_clip.c \ sp_state_derived.c \ - sp_state_fs.c \ sp_state_sampler.c \ + sp_state_shader.c \ sp_state_so.c \ sp_state_rasterizer.c \ sp_state_surface.c \ diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index be5917a6886..d5f4d28aeff 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -24,9 +24,9 @@ softpipe = env.ConvenienceLibrary( 'sp_state_blend.c', 'sp_state_clip.c', 'sp_state_derived.c', - 'sp_state_fs.c', 'sp_state_rasterizer.c', 'sp_state_sampler.c', + 'sp_state_shader.c', 'sp_state_so.c', 'sp_state_surface.c', 'sp_state_vertex.c', diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index a7c9959b3e1..b5d30bc6fc9 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -228,61 +228,17 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->pipe.priv = priv; /* state setters */ - softpipe->pipe.create_blend_state = softpipe_create_blend_state; - softpipe->pipe.bind_blend_state = softpipe_bind_blend_state; - softpipe->pipe.delete_blend_state = softpipe_delete_blend_state; - - softpipe->pipe.create_sampler_state = softpipe_create_sampler_state; - softpipe->pipe.bind_fragment_sampler_states = softpipe_bind_sampler_states; - softpipe->pipe.bind_vertex_sampler_states = softpipe_bind_vertex_sampler_states; - softpipe->pipe.bind_geometry_sampler_states = softpipe_bind_geometry_sampler_states; - softpipe->pipe.delete_sampler_state = softpipe_delete_sampler_state; - - softpipe->pipe.create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state; - softpipe->pipe.bind_depth_stencil_alpha_state = softpipe_bind_depth_stencil_state; - softpipe->pipe.delete_depth_stencil_alpha_state = softpipe_delete_depth_stencil_state; - - softpipe->pipe.create_rasterizer_state = softpipe_create_rasterizer_state; - softpipe->pipe.bind_rasterizer_state = softpipe_bind_rasterizer_state; - softpipe->pipe.delete_rasterizer_state = softpipe_delete_rasterizer_state; - - softpipe->pipe.create_fs_state = softpipe_create_fs_state; - softpipe->pipe.bind_fs_state = softpipe_bind_fs_state; - softpipe->pipe.delete_fs_state = softpipe_delete_fs_state; - - softpipe->pipe.create_vs_state = softpipe_create_vs_state; - softpipe->pipe.bind_vs_state = softpipe_bind_vs_state; - softpipe->pipe.delete_vs_state = softpipe_delete_vs_state; - - softpipe->pipe.create_gs_state = softpipe_create_gs_state; - softpipe->pipe.bind_gs_state = softpipe_bind_gs_state; - softpipe->pipe.delete_gs_state = softpipe_delete_gs_state; - - softpipe->pipe.create_vertex_elements_state = softpipe_create_vertex_elements_state; - softpipe->pipe.bind_vertex_elements_state = softpipe_bind_vertex_elements_state; - softpipe->pipe.delete_vertex_elements_state = softpipe_delete_vertex_elements_state; - - softpipe->pipe.create_stream_output_state = softpipe_create_stream_output_state; - softpipe->pipe.bind_stream_output_state = softpipe_bind_stream_output_state; - softpipe->pipe.delete_stream_output_state = softpipe_delete_stream_output_state; - - softpipe->pipe.set_blend_color = softpipe_set_blend_color; - softpipe->pipe.set_stencil_ref = softpipe_set_stencil_ref; - softpipe->pipe.set_clip_state = softpipe_set_clip_state; - softpipe->pipe.set_sample_mask = softpipe_set_sample_mask; - softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer; + softpipe_init_blend_funcs(&softpipe->pipe); + softpipe_init_clip_funcs(&softpipe->pipe); + softpipe_init_query_funcs( softpipe ); + softpipe_init_rasterizer_funcs(&softpipe->pipe); + softpipe_init_sampler_funcs(&softpipe->pipe); + softpipe_init_shader_funcs(&softpipe->pipe); + softpipe_init_streamout_funcs(&softpipe->pipe); + softpipe_init_texture_funcs( &softpipe->pipe ); + softpipe_init_vertex_funcs(&softpipe->pipe); + softpipe->pipe.set_framebuffer_state = softpipe_set_framebuffer_state; - softpipe->pipe.set_polygon_stipple = softpipe_set_polygon_stipple; - softpipe->pipe.set_scissor_state = softpipe_set_scissor_state; - softpipe->pipe.set_fragment_sampler_views = softpipe_set_sampler_views; - softpipe->pipe.set_vertex_sampler_views = softpipe_set_vertex_sampler_views; - softpipe->pipe.set_geometry_sampler_views = softpipe_set_geometry_sampler_views; - softpipe->pipe.create_sampler_view = softpipe_create_sampler_view; - softpipe->pipe.sampler_view_destroy = softpipe_sampler_view_destroy; - softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; - softpipe->pipe.set_stream_output_buffers = softpipe_set_stream_output_buffers; - softpipe->pipe.set_vertex_buffers = softpipe_set_vertex_buffers; - softpipe->pipe.set_index_buffer = softpipe_set_index_buffer; softpipe->pipe.draw_vbo = softpipe_draw_vbo; softpipe->pipe.draw_stream_output = softpipe_draw_stream_output; @@ -292,9 +248,6 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->pipe.is_resource_referenced = softpipe_is_resource_referenced; - softpipe_init_query_funcs( softpipe ); - softpipe_init_texture_funcs( &softpipe->pipe ); - softpipe->pipe.render_condition = softpipe_render_condition; /* diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 67e2c8f8bc4..346e1b402ba 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -158,9 +158,17 @@ exec_run( const struct sp_fragment_shader *base, case TGSI_SEMANTIC_POSITION: { uint j; - for (j = 0; j < 4; j++) { + + for (j = 0; j < 4; j++) quad->output.depth[j] = machine->Outputs[i].xyzw[2].f[j]; - } + } + break; + case TGSI_SEMANTIC_STENCIL: + { + uint j; + + for (j = 0; j < 4; j++) + quad->output.stencil[j] = (unsigned)machine->Outputs[i].xyzw[1].f[j]; } break; } diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index daa158df7c4..5b18cd035e3 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -169,9 +169,15 @@ fs_sse_run( const struct sp_fragment_shader *base, case TGSI_SEMANTIC_POSITION: { uint j; - for (j = 0; j < 4; j++) { - quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j]; - } + for (j = 0; j < 4; j++) + quad->output.depth[j] = machine->Outputs[i].xyzw[2].f[j]; + } + break; + case TGSI_SEMANTIC_STENCIL: + { + uint j; + for (j = 0; j < 4; j++) + quad->output.stencil[j] = machine->Outputs[i].xyzw[1].f[j]; } break; } diff --git a/src/gallium/drivers/softpipe/sp_quad.h b/src/gallium/drivers/softpipe/sp_quad.h index a3236bd1169..e745aa80619 100644 --- a/src/gallium/drivers/softpipe/sp_quad.h +++ b/src/gallium/drivers/softpipe/sp_quad.h @@ -85,6 +85,7 @@ struct quad_header_output /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; float depth[QUAD_SIZE]; + uint8_t stencil[QUAD_SIZE]; }; diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 5590d408929..c8f5f89568a 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -47,6 +47,8 @@ struct depth_data { unsigned bzzzz[QUAD_SIZE]; /**< Z values fetched from depth buffer */ unsigned qzzzz[QUAD_SIZE]; /**< Z values from the quad */ ubyte stencilVals[QUAD_SIZE]; + boolean use_shader_stencil_refs; + ubyte shader_stencil_refs[QUAD_SIZE]; struct softpipe_cached_tile *tile; }; @@ -186,6 +188,33 @@ convert_quad_depth( struct depth_data *data, } +/** + * Compute the depth_data::shader_stencil_refs[] values from the float fragment stencil values. + */ +static void +convert_quad_stencil( struct depth_data *data, + const struct quad_header *quad ) +{ + unsigned j; + + data->use_shader_stencil_refs = TRUE; + /* Copy quads stencil values + */ + switch (data->format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + { + for (j = 0; j < QUAD_SIZE; j++) { + data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j])); + } + } + break; + default: + assert(0); + } +} /** * Write data->bzzzz[] values and data->stencilVals into the Z/stencil buffer. @@ -272,8 +301,14 @@ do_stencil_test(struct depth_data *data, { unsigned passMask = 0x0; unsigned j; + ubyte refs[QUAD_SIZE]; - ref &= valMask; + for (j = 0; j < QUAD_SIZE; j++) { + if (data->use_shader_stencil_refs) + refs[j] = data->shader_stencil_refs[j] & valMask; + else + refs[j] = ref & valMask; + } switch (func) { case PIPE_FUNC_NEVER: @@ -281,42 +316,42 @@ do_stencil_test(struct depth_data *data, break; case PIPE_FUNC_LESS: for (j = 0; j < QUAD_SIZE; j++) { - if (ref < (data->stencilVals[j] & valMask)) { + if (refs[j] < (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_EQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (ref == (data->stencilVals[j] & valMask)) { + if (refs[j] == (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_LEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (ref <= (data->stencilVals[j] & valMask)) { + if (refs[j] <= (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_GREATER: for (j = 0; j < QUAD_SIZE; j++) { - if (ref > (data->stencilVals[j] & valMask)) { + if (refs[j] > (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_NOTEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (ref != (data->stencilVals[j] & valMask)) { + if (refs[j] != (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_GEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (ref >= (data->stencilVals[j] & valMask)) { + if (refs[j] >= (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } @@ -348,9 +383,14 @@ apply_stencil_op(struct depth_data *data, { unsigned j; ubyte newstencil[QUAD_SIZE]; + ubyte refs[QUAD_SIZE]; for (j = 0; j < QUAD_SIZE; j++) { newstencil[j] = data->stencilVals[j]; + if (data->use_shader_stencil_refs) + refs[j] = data->shader_stencil_refs[j]; + else + refs[j] = ref; } switch (op) { @@ -367,7 +407,7 @@ apply_stencil_op(struct depth_data *data, case PIPE_STENCIL_OP_REPLACE: for (j = 0; j < QUAD_SIZE; j++) { if (mask & (1 << j)) { - newstencil[j] = ref; + newstencil[j] = refs[j]; } } break; @@ -688,15 +728,18 @@ depth_test_quads_fallback(struct quad_stage *qs, unsigned i, pass = 0; const struct sp_fragment_shader *fs = qs->softpipe->fs; boolean interp_depth = !fs->info.writes_z; + boolean shader_stencil_ref = fs->info.writes_stencil; struct depth_data data; + data.use_shader_stencil_refs = FALSE; if (qs->softpipe->depth_stencil->alpha.enabled) { nr = alpha_test_quads(qs, quads, nr); } - if (qs->softpipe->depth_stencil->depth.enabled || - qs->softpipe->depth_stencil->stencil[0].enabled) { + if (qs->softpipe->framebuffer.zsbuf && + (qs->softpipe->depth_stencil->depth.enabled || + qs->softpipe->depth_stencil->stencil[0].enabled)) { data.ps = qs->softpipe->framebuffer.zsbuf; data.format = data.ps->format; @@ -715,6 +758,9 @@ depth_test_quads_fallback(struct quad_stage *qs, } if (qs->softpipe->depth_stencil->stencil[0].enabled) { + if (shader_stencil_ref) + convert_quad_stencil(&data, quads[i]); + depth_stencil_test_quad(qs, &data, quads[i]); write_depth_stencil_values(&data, quads[i]); } @@ -805,6 +851,9 @@ choose_depth_test(struct quad_stage *qs, boolean occlusion = qs->softpipe->active_query_count; + if(!qs->softpipe->framebuffer.zsbuf) + depth = depthwrite = stencil = FALSE; + /* default */ qs->run = depth_test_quads_fallback; diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c index 43b8e88e334..2cfd02a22c6 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.c +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c @@ -47,7 +47,8 @@ sp_build_quad_pipeline(struct softpipe_context *sp) sp->framebuffer.zsbuf && !sp->depth_stencil->alpha.enabled && !sp->fs->info.uses_kill && - !sp->fs->info.writes_z; + !sp->fs->info.writes_z && + !sp->fs->info.writes_stencil; sp->quad.first = sp->quad.blend; diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 2053d02f628..37557d11940 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -114,6 +114,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: return 0; + case PIPE_CAP_SHADER_STENCIL_EXPORT: + return 1; default: return 0; } diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 39d204de8a9..525bf23734a 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -70,6 +70,8 @@ struct sp_fragment_shader { struct tgsi_shader_info info; + struct draw_fragment_shader *draw_shader; + boolean origin_lower_left; /**< fragment shader uses lower left position origin? */ boolean pixel_center_integer; /**< fragment shader uses integer pixel center? */ @@ -113,126 +115,40 @@ struct sp_so_state { }; -void * -softpipe_create_blend_state(struct pipe_context *, - const struct pipe_blend_state *); -void softpipe_bind_blend_state(struct pipe_context *, - void *); -void softpipe_delete_blend_state(struct pipe_context *, - void *); - -void * -softpipe_create_sampler_state(struct pipe_context *, - const struct pipe_sampler_state *); -void softpipe_bind_sampler_states(struct pipe_context *, unsigned, void **); void -softpipe_bind_vertex_sampler_states(struct pipe_context *, - unsigned num_samplers, - void **samplers); -void -softpipe_bind_geometry_sampler_states(struct pipe_context *, - unsigned num_samplers, - void **samplers); -void softpipe_delete_sampler_state(struct pipe_context *, void *); - -void * -softpipe_create_depth_stencil_state(struct pipe_context *, - const struct pipe_depth_stencil_alpha_state *); -void softpipe_bind_depth_stencil_state(struct pipe_context *, void *); -void softpipe_delete_depth_stencil_state(struct pipe_context *, void *); - -void * -softpipe_create_rasterizer_state(struct pipe_context *, - const struct pipe_rasterizer_state *); -void softpipe_bind_rasterizer_state(struct pipe_context *, void *); -void softpipe_delete_rasterizer_state(struct pipe_context *, void *); - -void softpipe_set_framebuffer_state( struct pipe_context *, - const struct pipe_framebuffer_state * ); - -void softpipe_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ); - -void softpipe_set_stencil_ref( struct pipe_context *pipe, - const struct pipe_stencil_ref *stencil_ref ); - -void softpipe_set_clip_state( struct pipe_context *, - const struct pipe_clip_state * ); - -void softpipe_set_sample_mask( struct pipe_context *, - unsigned sample_mask ); - -void softpipe_set_constant_buffer(struct pipe_context *, - uint shader, uint index, - struct pipe_resource *buf); - -void *softpipe_create_fs_state(struct pipe_context *, - const struct pipe_shader_state *); -void softpipe_bind_fs_state(struct pipe_context *, void *); -void softpipe_delete_fs_state(struct pipe_context *, void *); -void *softpipe_create_vs_state(struct pipe_context *, - const struct pipe_shader_state *); -void softpipe_bind_vs_state(struct pipe_context *, void *); -void softpipe_delete_vs_state(struct pipe_context *, void *); -void *softpipe_create_gs_state(struct pipe_context *, - const struct pipe_shader_state *); -void softpipe_bind_gs_state(struct pipe_context *, void *); -void softpipe_delete_gs_state(struct pipe_context *, void *); - -void *softpipe_create_vertex_elements_state(struct pipe_context *, - unsigned count, - const struct pipe_vertex_element *); -void softpipe_bind_vertex_elements_state(struct pipe_context *, void *); -void softpipe_delete_vertex_elements_state(struct pipe_context *, void *); - -void softpipe_set_polygon_stipple( struct pipe_context *, - const struct pipe_poly_stipple * ); - -void softpipe_set_scissor_state( struct pipe_context *, - const struct pipe_scissor_state * ); - -void softpipe_set_sampler_views( struct pipe_context *, - unsigned num, - struct pipe_sampler_view ** ); +softpipe_init_blend_funcs(struct pipe_context *pipe); void -softpipe_set_vertex_sampler_views(struct pipe_context *, - unsigned num, - struct pipe_sampler_view **); +softpipe_init_clip_funcs(struct pipe_context *pipe); void -softpipe_set_geometry_sampler_views(struct pipe_context *, - unsigned num, - struct pipe_sampler_view **); - -struct pipe_sampler_view * -softpipe_create_sampler_view(struct pipe_context *pipe, - struct pipe_resource *texture, - const struct pipe_sampler_view *templ); +softpipe_init_sampler_funcs(struct pipe_context *pipe); void -softpipe_sampler_view_destroy(struct pipe_context *pipe, - struct pipe_sampler_view *view); +softpipe_init_rasterizer_funcs(struct pipe_context *pipe); -void softpipe_set_viewport_state( struct pipe_context *, - const struct pipe_viewport_state * ); - -void softpipe_set_vertex_buffers(struct pipe_context *, - unsigned count, - const struct pipe_vertex_buffer *); +void +softpipe_init_shader_funcs(struct pipe_context *pipe); -void softpipe_set_index_buffer(struct pipe_context *, - const struct pipe_index_buffer *); +void +softpipe_init_streamout_funcs(struct pipe_context *pipe); +void +softpipe_init_vertex_funcs(struct pipe_context *pipe); -void softpipe_update_derived( struct softpipe_context *softpipe ); +void +softpipe_set_framebuffer_state(struct pipe_context *, + const struct pipe_framebuffer_state *); +void +softpipe_update_derived( struct softpipe_context *softpipe ); void softpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info); -void softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode); +void +softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode); void softpipe_map_transfers(struct softpipe_context *sp); @@ -253,20 +169,5 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe); struct vertex_info * softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe); -void * -softpipe_create_stream_output_state( - struct pipe_context *pipe, - const struct pipe_stream_output_state *templ); -void -softpipe_bind_stream_output_state(struct pipe_context *pipe, - void *so); -void -softpipe_delete_stream_output_state(struct pipe_context *pipe, void *so); - -void -softpipe_set_stream_output_buffers(struct pipe_context *pipe, - struct pipe_resource **buffers, - int *offsets, - int num_buffers); #endif diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c index 2a203f44e50..12863824b8e 100644 --- a/src/gallium/drivers/softpipe/sp_state_blend.c +++ b/src/gallium/drivers/softpipe/sp_state_blend.c @@ -34,15 +34,17 @@ #include "sp_state.h" -void * +static void * softpipe_create_blend_state(struct pipe_context *pipe, const struct pipe_blend_state *blend) { return mem_dup(blend, sizeof(*blend)); } -void softpipe_bind_blend_state( struct pipe_context *pipe, - void *blend ) + +static void +softpipe_bind_blend_state(struct pipe_context *pipe, + void *blend) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -53,15 +55,18 @@ void softpipe_bind_blend_state( struct pipe_context *pipe, softpipe->dirty |= SP_NEW_BLEND; } -void softpipe_delete_blend_state(struct pipe_context *pipe, - void *blend) + +static void +softpipe_delete_blend_state(struct pipe_context *pipe, + void *blend) { FREE( blend ); } -void softpipe_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ) +static void +softpipe_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *blend_color) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -73,19 +78,15 @@ void softpipe_set_blend_color( struct pipe_context *pipe, } -/** XXX move someday? Or consolidate all these simple state setters - * into one file. - */ - - -void * +static void * softpipe_create_depth_stencil_state(struct pipe_context *pipe, const struct pipe_depth_stencil_alpha_state *depth_stencil) { return mem_dup(depth_stencil, sizeof(*depth_stencil)); } -void + +static void softpipe_bind_depth_stencil_state(struct pipe_context *pipe, void *depth_stencil) { @@ -96,14 +97,17 @@ softpipe_bind_depth_stencil_state(struct pipe_context *pipe, softpipe->dirty |= SP_NEW_DEPTH_STENCIL_ALPHA; } -void + +static void softpipe_delete_depth_stencil_state(struct pipe_context *pipe, void *depth) { FREE( depth ); } -void softpipe_set_stencil_ref( struct pipe_context *pipe, - const struct pipe_stencil_ref *stencil_ref ) + +static void +softpipe_set_stencil_ref(struct pipe_context *pipe, + const struct pipe_stencil_ref *stencil_ref) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -112,9 +116,28 @@ void softpipe_set_stencil_ref( struct pipe_context *pipe, softpipe->dirty |= SP_NEW_DEPTH_STENCIL_ALPHA; } -void + +static void softpipe_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) { } + +void +softpipe_init_blend_funcs(struct pipe_context *pipe) +{ + pipe->create_blend_state = softpipe_create_blend_state; + pipe->bind_blend_state = softpipe_bind_blend_state; + pipe->delete_blend_state = softpipe_delete_blend_state; + + pipe->set_blend_color = softpipe_set_blend_color; + + pipe->create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state; + pipe->bind_depth_stencil_alpha_state = softpipe_bind_depth_stencil_state; + pipe->delete_depth_stencil_alpha_state = softpipe_delete_depth_stencil_state; + + pipe->set_stencil_ref = softpipe_set_stencil_ref; + + pipe->set_sample_mask = softpipe_set_sample_mask; +} diff --git a/src/gallium/drivers/softpipe/sp_state_clip.c b/src/gallium/drivers/softpipe/sp_state_clip.c index 4946c776e3e..f3a4c234e27 100644 --- a/src/gallium/drivers/softpipe/sp_state_clip.c +++ b/src/gallium/drivers/softpipe/sp_state_clip.c @@ -32,8 +32,9 @@ #include "draw/draw_context.h" -void softpipe_set_clip_state( struct pipe_context *pipe, - const struct pipe_clip_state *clip ) +static void +softpipe_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -42,8 +43,9 @@ void softpipe_set_clip_state( struct pipe_context *pipe, } -void softpipe_set_viewport_state( struct pipe_context *pipe, - const struct pipe_viewport_state *viewport ) +static void +softpipe_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *viewport) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -55,8 +57,9 @@ void softpipe_set_viewport_state( struct pipe_context *pipe, } -void softpipe_set_scissor_state( struct pipe_context *pipe, - const struct pipe_scissor_state *scissor ) +static void +softpipe_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *scissor) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -67,8 +70,9 @@ void softpipe_set_scissor_state( struct pipe_context *pipe, } -void softpipe_set_polygon_stipple( struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple ) +static void +softpipe_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -77,3 +81,13 @@ void softpipe_set_polygon_stipple( struct pipe_context *pipe, softpipe->poly_stipple = *stipple; /* struct copy */ softpipe->dirty |= SP_NEW_STIPPLE; } + + +void +softpipe_init_clip_funcs(struct pipe_context *pipe) +{ + pipe->set_clip_state = softpipe_set_clip_state; + pipe->set_viewport_state = softpipe_set_viewport_state; + pipe->set_scissor_state = softpipe_set_scissor_state; + pipe->set_polygon_stipple = softpipe_set_polygon_stipple; +} diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c index c9ede09f268..3cd4acd7432 100644 --- a/src/gallium/drivers/softpipe/sp_state_rasterizer.c +++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c @@ -33,15 +33,17 @@ -void * +static void * softpipe_create_rasterizer_state(struct pipe_context *pipe, const struct pipe_rasterizer_state *rast) { return mem_dup(rast, sizeof(*rast)); } -void softpipe_bind_rasterizer_state(struct pipe_context *pipe, - void *rasterizer) + +static void +softpipe_bind_rasterizer_state(struct pipe_context *pipe, + void *rasterizer) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -56,10 +58,19 @@ void softpipe_bind_rasterizer_state(struct pipe_context *pipe, softpipe->dirty |= SP_NEW_RASTERIZER; } -void softpipe_delete_rasterizer_state(struct pipe_context *pipe, - void *rasterizer) + +static void +softpipe_delete_rasterizer_state(struct pipe_context *pipe, + void *rasterizer) { FREE( rasterizer ); } +void +softpipe_init_rasterizer_funcs(struct pipe_context *pipe) +{ + pipe->create_rasterizer_state = softpipe_create_rasterizer_state; + pipe->bind_rasterizer_state = softpipe_bind_rasterizer_state; + pipe->delete_rasterizer_state = softpipe_delete_rasterizer_state; +} diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 79d9516ad9c..b59fbc33ed6 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -33,7 +33,6 @@ #include "util/u_inlines.h" #include "draw/draw_context.h" -#include "draw/draw_context.h" #include "sp_context.h" #include "sp_state.h" @@ -54,7 +53,7 @@ static struct sp_sampler *sp_sampler( struct pipe_sampler_state *sampler ) } -void * +static void * softpipe_create_sampler_state(struct pipe_context *pipe, const struct pipe_sampler_state *sampler) { @@ -67,7 +66,7 @@ softpipe_create_sampler_state(struct pipe_context *pipe, } -void +static void softpipe_bind_sampler_states(struct pipe_context *pipe, unsigned num, void **sampler) { @@ -94,7 +93,7 @@ softpipe_bind_sampler_states(struct pipe_context *pipe, } -void +static void softpipe_bind_vertex_sampler_states(struct pipe_context *pipe, unsigned num_samplers, void **samplers) @@ -118,10 +117,14 @@ softpipe_bind_vertex_sampler_states(struct pipe_context *pipe, softpipe->num_vertex_samplers = num_samplers; + draw_set_samplers(softpipe->draw, + softpipe->vertex_samplers, + softpipe->num_vertex_samplers); + softpipe->dirty |= SP_NEW_SAMPLER; } -void +static void softpipe_bind_geometry_sampler_states(struct pipe_context *pipe, unsigned num_samplers, void **samplers) @@ -149,7 +152,7 @@ softpipe_bind_geometry_sampler_states(struct pipe_context *pipe, } -struct pipe_sampler_view * +static struct pipe_sampler_view * softpipe_create_sampler_view(struct pipe_context *pipe, struct pipe_resource *resource, const struct pipe_sampler_view *templ) @@ -168,7 +171,7 @@ softpipe_create_sampler_view(struct pipe_context *pipe, } -void +static void softpipe_sampler_view_destroy(struct pipe_context *pipe, struct pipe_sampler_view *view) { @@ -177,7 +180,7 @@ softpipe_sampler_view_destroy(struct pipe_context *pipe, } -void +static void softpipe_set_sampler_views(struct pipe_context *pipe, unsigned num, struct pipe_sampler_view **views) @@ -207,7 +210,7 @@ softpipe_set_sampler_views(struct pipe_context *pipe, } -void +static void softpipe_set_vertex_sampler_views(struct pipe_context *pipe, unsigned num, struct pipe_sampler_view **views) @@ -234,10 +237,15 @@ softpipe_set_vertex_sampler_views(struct pipe_context *pipe, softpipe->num_vertex_sampler_views = num; + draw_set_sampler_views(softpipe->draw, + softpipe->vertex_sampler_views, + softpipe->num_vertex_sampler_views); + softpipe->dirty |= SP_NEW_TEXTURE; } -void + +static void softpipe_set_geometry_sampler_views(struct pipe_context *pipe, unsigned num, struct pipe_sampler_view **views) @@ -319,8 +327,6 @@ get_sampler_varient( unsigned unit, } - - void softpipe_reset_sampler_varients(struct softpipe_context *softpipe) { @@ -395,9 +401,7 @@ softpipe_reset_sampler_varients(struct softpipe_context *softpipe) } } - - -void +static void softpipe_delete_sampler_state(struct pipe_context *pipe, void *sampler) { @@ -413,4 +417,20 @@ softpipe_delete_sampler_state(struct pipe_context *pipe, } +void +softpipe_init_sampler_funcs(struct pipe_context *pipe) +{ + pipe->create_sampler_state = softpipe_create_sampler_state; + pipe->bind_fragment_sampler_states = softpipe_bind_sampler_states; + pipe->bind_vertex_sampler_states = softpipe_bind_vertex_sampler_states; + pipe->bind_geometry_sampler_states = softpipe_bind_geometry_sampler_states; + pipe->delete_sampler_state = softpipe_delete_sampler_state; + + pipe->set_fragment_sampler_views = softpipe_set_sampler_views; + pipe->set_vertex_sampler_views = softpipe_set_vertex_sampler_views; + pipe->set_geometry_sampler_views = softpipe_set_geometry_sampler_views; + + pipe->create_sampler_view = softpipe_create_sampler_view; + pipe->sampler_view_destroy = softpipe_sampler_view_destroy; +} diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_shader.c index ded242d3dc5..7fff338ccea 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_shader.c @@ -42,7 +42,7 @@ #include "tgsi/tgsi_parse.h" -void * +static void * softpipe_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { @@ -60,7 +60,15 @@ softpipe_create_fs_state(struct pipe_context *pipe, state = softpipe_create_fs_exec( softpipe, templ ); } - assert(state); + if (!state) + return NULL; + + /* draw's fs state */ + state->draw_shader = draw_create_fragment_shader(softpipe->draw, templ); + if (!state->draw_shader) { + state->delete( state ); + return NULL; + } /* get/save the summary info for this shader */ tgsi_scan_shader(templ->tokens, &state->info); @@ -76,7 +84,7 @@ softpipe_create_fs_state(struct pipe_context *pipe, } -void +static void softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -90,11 +98,14 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) softpipe->fs = fs; + draw_bind_fragment_shader(softpipe->draw, + (softpipe->fs ? softpipe->fs->draw_shader : NULL)); + softpipe->dirty |= SP_NEW_FS; } -void +static void softpipe_delete_fs_state(struct pipe_context *pipe, void *fs) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -109,11 +120,13 @@ softpipe_delete_fs_state(struct pipe_context *pipe, void *fs) tgsi_exec_machine_bind_shader(softpipe->fs_machine, NULL, 0, NULL); } + draw_delete_fragment_shader(softpipe->draw, state->draw_shader); + state->delete( state ); } -void * +static void * softpipe_create_vs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { @@ -148,7 +161,7 @@ fail: } -void +static void softpipe_bind_vs_state(struct pipe_context *pipe, void *vs) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -162,7 +175,7 @@ softpipe_bind_vs_state(struct pipe_context *pipe, void *vs) } -void +static void softpipe_delete_vs_state(struct pipe_context *pipe, void *vs) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -174,34 +187,8 @@ softpipe_delete_vs_state(struct pipe_context *pipe, void *vs) FREE( state ); } -void -softpipe_set_constant_buffer(struct pipe_context *pipe, - uint shader, uint index, - struct pipe_resource *constants) -{ - struct softpipe_context *softpipe = softpipe_context(pipe); - unsigned size = constants ? constants->width0 : 0; - const void *data = constants ? softpipe_resource(constants)->data : NULL; - - assert(shader < PIPE_SHADER_TYPES); - draw_flush(softpipe->draw); - - /* note: reference counting */ - pipe_resource_reference(&softpipe->constants[shader][index], constants); - - if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY) { - draw_set_mapped_constant_buffer(softpipe->draw, shader, index, data, size); - } - - softpipe->mapped_constants[shader][index] = data; - softpipe->const_buffer_size[shader][index] = size; - - softpipe->dirty |= SP_NEW_CONSTANTS; -} - - -void * +static void * softpipe_create_gs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { @@ -240,7 +227,7 @@ fail: } -void +static void softpipe_bind_gs_state(struct pipe_context *pipe, void *gs) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -254,7 +241,7 @@ softpipe_bind_gs_state(struct pipe_context *pipe, void *gs) } -void +static void softpipe_delete_gs_state(struct pipe_context *pipe, void *gs) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -266,3 +253,49 @@ softpipe_delete_gs_state(struct pipe_context *pipe, void *gs) (state) ? state->draw_data : 0); FREE(state); } + + +static void +softpipe_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + struct pipe_resource *constants) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + unsigned size = constants ? constants->width0 : 0; + const void *data = constants ? softpipe_resource(constants)->data : NULL; + + assert(shader < PIPE_SHADER_TYPES); + + draw_flush(softpipe->draw); + + /* note: reference counting */ + pipe_resource_reference(&softpipe->constants[shader][index], constants); + + if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY) { + draw_set_mapped_constant_buffer(softpipe->draw, shader, index, data, size); + } + + softpipe->mapped_constants[shader][index] = data; + softpipe->const_buffer_size[shader][index] = size; + + softpipe->dirty |= SP_NEW_CONSTANTS; +} + + +void +softpipe_init_shader_funcs(struct pipe_context *pipe) +{ + pipe->create_fs_state = softpipe_create_fs_state; + pipe->bind_fs_state = softpipe_bind_fs_state; + pipe->delete_fs_state = softpipe_delete_fs_state; + + pipe->create_vs_state = softpipe_create_vs_state; + pipe->bind_vs_state = softpipe_bind_vs_state; + pipe->delete_vs_state = softpipe_delete_vs_state; + + pipe->create_gs_state = softpipe_create_gs_state; + pipe->bind_gs_state = softpipe_bind_gs_state; + pipe->delete_gs_state = softpipe_delete_gs_state; + + pipe->set_constant_buffer = softpipe_set_constant_buffer; +} diff --git a/src/gallium/drivers/softpipe/sp_state_so.c b/src/gallium/drivers/softpipe/sp_state_so.c index cfe23f9e846..ddfa3ef765a 100644 --- a/src/gallium/drivers/softpipe/sp_state_so.c +++ b/src/gallium/drivers/softpipe/sp_state_so.c @@ -34,7 +34,7 @@ #include "draw/draw_context.h" -void * +static void * softpipe_create_stream_output_state(struct pipe_context *pipe, const struct pipe_stream_output_state *templ) { @@ -57,7 +57,8 @@ softpipe_create_stream_output_state(struct pipe_context *pipe, return so; } -void + +static void softpipe_bind_stream_output_state(struct pipe_context *pipe, void *so) { @@ -72,13 +73,15 @@ softpipe_bind_stream_output_state(struct pipe_context *pipe, draw_set_so_state(softpipe->draw, &sp_so->base); } -void + +static void softpipe_delete_stream_output_state(struct pipe_context *pipe, void *so) { FREE( so ); } -void + +static void softpipe_set_stream_output_buffers(struct pipe_context *pipe, struct pipe_resource **buffers, int *offsets, @@ -122,3 +125,16 @@ softpipe_set_stream_output_buffers(struct pipe_context *pipe, draw_set_mapped_so_buffers(softpipe->draw, map_buffers, num_buffers); } + + + +void +softpipe_init_streamout_funcs(struct pipe_context *pipe) +{ + pipe->create_stream_output_state = softpipe_create_stream_output_state; + pipe->bind_stream_output_state = softpipe_bind_stream_output_state; + pipe->delete_stream_output_state = softpipe_delete_stream_output_state; + + pipe->set_stream_output_buffers = softpipe_set_stream_output_buffers; +} + diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c index b650fcaea5c..7d8055f2baf 100644 --- a/src/gallium/drivers/softpipe/sp_state_vertex.c +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -36,7 +36,7 @@ #include "draw/draw_context.h" -void * +static void * softpipe_create_vertex_elements_state(struct pipe_context *pipe, unsigned count, const struct pipe_vertex_element *attribs) @@ -51,7 +51,8 @@ softpipe_create_vertex_elements_state(struct pipe_context *pipe, return velems; } -void + +static void softpipe_bind_vertex_elements_state(struct pipe_context *pipe, void *velems) { @@ -66,13 +67,15 @@ softpipe_bind_vertex_elements_state(struct pipe_context *pipe, draw_set_vertex_elements(softpipe->draw, sp_velems->count, sp_velems->velem); } -void + +static void softpipe_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) { FREE( velems ); } -void + +static void softpipe_set_vertex_buffers(struct pipe_context *pipe, unsigned count, const struct pipe_vertex_buffer *buffers) @@ -89,7 +92,8 @@ softpipe_set_vertex_buffers(struct pipe_context *pipe, draw_set_vertex_buffers(softpipe->draw, count, buffers); } -void + +static void softpipe_set_index_buffer(struct pipe_context *pipe, const struct pipe_index_buffer *ib) { @@ -102,3 +106,15 @@ softpipe_set_index_buffer(struct pipe_context *pipe, draw_set_index_buffer(softpipe->draw, ib); } + + +void +softpipe_init_vertex_funcs(struct pipe_context *pipe) +{ + pipe->create_vertex_elements_state = softpipe_create_vertex_elements_state; + pipe->bind_vertex_elements_state = softpipe_bind_vertex_elements_state; + pipe->delete_vertex_elements_state = softpipe_delete_vertex_elements_state; + + pipe->set_vertex_buffers = softpipe_set_vertex_buffers; + pipe->set_index_buffer = softpipe_set_index_buffer; +} diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index e654bb77c29..088e48f81fe 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -105,14 +105,14 @@ lerp_3d(float a, float b, float c, /** * Compute coord % size for repeat wrap modes. - * Note that if coord is a signed integer, coord % size doesn't give - * the right value for coord < 0 (in terms of texture repeat). Just - * casting to unsigned fixes that. + * Note that if coord is negative, coord % size doesn't give the right + * value. To avoid that problem we add a large multiple of the size + * (rather than using a conditional). */ static INLINE int repeat(int coord, unsigned size) { - return (int) ((unsigned) coord % size); + return (coord + size * 1024) % size; } @@ -656,7 +656,8 @@ get_texel_2d(const struct sp_sampler_varient *samp, if (x < 0 || x >= (int) u_minify(texture->width0, level) || y < 0 || y >= (int) u_minify(texture->height0, level)) { - return samp->sampler->border_color; + return sp_tex_tile_cache_border_color(samp->cache, + samp->sampler->border_color); } else { return get_texel_2d_no_border( samp, addr, x, y ); @@ -750,7 +751,8 @@ get_texel_3d(const struct sp_sampler_varient *samp, if (x < 0 || x >= (int) u_minify(texture->width0, level) || y < 0 || y >= (int) u_minify(texture->height0, level) || z < 0 || z >= (int) u_minify(texture->depth0, level)) { - return samp->sampler->border_color; + return sp_tex_tile_cache_border_color(samp->cache, + samp->sampler->border_color); } else { return get_texel_3d_no_border( samp, addr, x, y, z ); diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c index eb74f14a7be..e817c0c8cf5 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c @@ -298,3 +298,23 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, +/** + * Return the swizzled border color. + */ +const float * +sp_tex_tile_cache_border_color(struct softpipe_tex_tile_cache *tc, + const float border_color[4]) +{ + float rgba01[6]; + + COPY_4V(rgba01, border_color); + rgba01[PIPE_SWIZZLE_ZERO] = 0.0f; + rgba01[PIPE_SWIZZLE_ONE] = 1.0f; + + tc->swz_border_color[0] = rgba01[tc->swizzle_r]; + tc->swz_border_color[1] = rgba01[tc->swizzle_g]; + tc->swz_border_color[2] = rgba01[tc->swizzle_b]; + tc->swz_border_color[3] = rgba01[tc->swizzle_a]; + + return tc->swz_border_color; +} diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h index 0794ffa0c53..05f25133daa 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h @@ -90,6 +90,8 @@ struct softpipe_tex_tile_cache unsigned format; struct softpipe_tex_cached_tile *last_tile; /**< most recently retrieved tile */ + + float swz_border_color[4]; /**< swizzled border color */ }; @@ -154,7 +156,9 @@ sp_get_cached_tile_tex(struct softpipe_tex_tile_cache *tc, } - +const float * +sp_tex_tile_cache_border_color(struct softpipe_tex_tile_cache *tc, + const float border_color[4]); #endif /* SP_TEX_TILE_CACHE_H */ diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index bf33fd94173..aa76b8aa1ec 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -38,6 +38,8 @@ #include "util/u_tile.h" #include "sp_tile_cache.h" +static struct softpipe_cached_tile * +sp_alloc_tile(struct softpipe_tile_cache *tc); /** @@ -94,9 +96,19 @@ sp_create_tile_cache( struct pipe_context *pipe ) if (tc) { tc->pipe = pipe; for (pos = 0; pos < NUM_ENTRIES; pos++) { - tc->entries[pos].addr.bits.invalid = 1; + tc->tile_addrs[pos].bits.invalid = 1; + } + tc->last_tile_addr.bits.invalid = 1; + + /* this allocation allows us to guarantee that allocation + * failures are never fatal later + */ + tc->tile = MALLOC_STRUCT( softpipe_cached_tile ); + if (!tc->tile) + { + FREE(tc); + return NULL; } - tc->last_tile = &tc->entries[0]; /* any tile */ /* XXX this code prevents valgrind warnings about use of uninitialized * memory in programs that don't clear the surface before rendering. @@ -120,7 +132,10 @@ sp_destroy_tile_cache(struct softpipe_tile_cache *tc) for (pos = 0; pos < NUM_ENTRIES; pos++) { /*assert(tc->entries[pos].x < 0);*/ + FREE( tc->entries[pos] ); } + FREE( tc->tile ); + if (tc->transfer) { tc->pipe->transfer_destroy(tc->pipe, tc->transfer); } @@ -285,11 +300,14 @@ sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc) uint numCleared = 0; assert(pt->resource); + if (!tc->tile) + tc->tile = sp_alloc_tile(tc); + /* clear the scratch tile to the clear value */ if (tc->depth_stencil) { - clear_tile(&tc->tile, pt->resource->format, tc->clear_val); + clear_tile(tc->tile, pt->resource->format, tc->clear_val); } else { - clear_tile_rgba(&tc->tile, pt->resource->format, tc->clear_color); + clear_tile_rgba(tc->tile, pt->resource->format, tc->clear_color); } /* push the tile to all positions marked as clear */ @@ -303,12 +321,12 @@ sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc) pipe_put_tile_raw(tc->pipe, pt, x, y, TILE_SIZE, TILE_SIZE, - tc->tile.data.any, 0/*STRIDE*/); + tc->tile->data.any, 0/*STRIDE*/); } else { pipe_put_tile_rgba(tc->pipe, pt, x, y, TILE_SIZE, TILE_SIZE, - (float *) tc->tile.data.color); + (float *) tc->tile->data.color); } numCleared++; } @@ -323,6 +341,27 @@ sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc) #endif } +static void +sp_flush_tile(struct softpipe_tile_cache* tc, unsigned pos) +{ + if (!tc->tile_addrs[pos].bits.invalid) { + if (tc->depth_stencil) { + pipe_put_tile_raw(tc->pipe, tc->transfer, + tc->tile_addrs[pos].bits.x * TILE_SIZE, + tc->tile_addrs[pos].bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, + tc->entries[pos]->data.depth32, 0/*STRIDE*/); + } + else { + pipe_put_tile_rgba(tc->pipe, tc->transfer, + tc->tile_addrs[pos].bits.x * TILE_SIZE, + tc->tile_addrs[pos].bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, + (float *) tc->entries[pos]->data.color); + } + tc->tile_addrs[pos].bits.invalid = 1; /* mark as empty */ + } +} /** * Flush the tile cache: write all dirty tiles back to the transfer. @@ -337,28 +376,21 @@ sp_flush_tile_cache(struct softpipe_tile_cache *tc) if (pt) { /* caching a drawing transfer */ for (pos = 0; pos < NUM_ENTRIES; pos++) { - struct softpipe_cached_tile *tile = tc->entries + pos; - if (!tile->addr.bits.invalid) { - if (tc->depth_stencil) { - pipe_put_tile_raw(tc->pipe, pt, - tile->addr.bits.x * TILE_SIZE, - tile->addr.bits.y * TILE_SIZE, - TILE_SIZE, TILE_SIZE, - tile->data.depth32, 0/*STRIDE*/); - } - else { - pipe_put_tile_rgba(tc->pipe, pt, - tile->addr.bits.x * TILE_SIZE, - tile->addr.bits.y * TILE_SIZE, - TILE_SIZE, TILE_SIZE, - (float *) tile->data.color); - } - tile->addr.bits.invalid = 1; /* mark as empty */ - inuse++; + struct softpipe_cached_tile *tile = tc->entries[pos]; + if (!tile) + { + assert(tc->tile_addrs[pos].bits.invalid); + continue; } + + sp_flush_tile(tc, pos); + ++inuse; } sp_tile_cache_flush_clear(tc); + + + tc->last_tile_addr.bits.invalid = 1; } #if 0 @@ -366,6 +398,38 @@ sp_flush_tile_cache(struct softpipe_tile_cache *tc) #endif } +static struct softpipe_cached_tile * +sp_alloc_tile(struct softpipe_tile_cache *tc) +{ + struct softpipe_cached_tile * tile = MALLOC_STRUCT(softpipe_cached_tile); + if (!tile) + { + /* in this case, steal an existing tile */ + if (!tc->tile) + { + unsigned pos; + for (pos = 0; pos < NUM_ENTRIES; ++pos) { + if (!tc->entries[pos]) + continue; + + sp_flush_tile(tc, pos); + tc->tile = tc->entries[pos]; + tc->entries[pos] = NULL; + break; + } + + /* this should never happen */ + if (!tc->tile) + abort(); + } + + tile = tc->tile; + tc->tile = NULL; + + tc->last_tile_addr.bits.invalid = 1; + } + return tile; +} /** * Get a tile from the cache. @@ -380,30 +444,35 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc, /* cache pos/entry: */ const int pos = CACHE_POS(addr.bits.x, addr.bits.y); - struct softpipe_cached_tile *tile = tc->entries + pos; + struct softpipe_cached_tile *tile = tc->entries[pos]; + + if (!tile) { + tile = sp_alloc_tile(tc); + tc->entries[pos] = tile; + } - if (addr.value != tile->addr.value) { + if (addr.value != tc->tile_addrs[pos].value) { assert(pt->resource); - if (tile->addr.bits.invalid == 0) { + if (tc->tile_addrs[pos].bits.invalid == 0) { /* put dirty tile back in framebuffer */ if (tc->depth_stencil) { pipe_put_tile_raw(tc->pipe, pt, - tile->addr.bits.x * TILE_SIZE, - tile->addr.bits.y * TILE_SIZE, + tc->tile_addrs[pos].bits.x * TILE_SIZE, + tc->tile_addrs[pos].bits.y * TILE_SIZE, TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { pipe_put_tile_rgba(tc->pipe, pt, - tile->addr.bits.x * TILE_SIZE, - tile->addr.bits.y * TILE_SIZE, + tc->tile_addrs[pos].bits.x * TILE_SIZE, + tc->tile_addrs[pos].bits.y * TILE_SIZE, TILE_SIZE, TILE_SIZE, (float *) tile->data.color); } } - tile->addr = addr; + tc->tile_addrs[pos] = addr; if (is_clear_flag_set(tc->clear_flags, addr)) { /* don't get tile from framebuffer, just clear it */ @@ -419,15 +488,15 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc, /* get new tile data from transfer */ if (tc->depth_stencil) { pipe_get_tile_raw(tc->pipe, pt, - tile->addr.bits.x * TILE_SIZE, - tile->addr.bits.y * TILE_SIZE, + tc->tile_addrs[pos].bits.x * TILE_SIZE, + tc->tile_addrs[pos].bits.y * TILE_SIZE, TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { pipe_get_tile_rgba(tc->pipe, pt, - tile->addr.bits.x * TILE_SIZE, - tile->addr.bits.y * TILE_SIZE, + tc->tile_addrs[pos].bits.x * TILE_SIZE, + tc->tile_addrs[pos].bits.y * TILE_SIZE, TILE_SIZE, TILE_SIZE, (float *) tile->data.color); } @@ -435,6 +504,7 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc, } tc->last_tile = tile; + tc->last_tile_addr = addr; return tile; } @@ -464,7 +534,7 @@ sp_tile_cache_clear(struct softpipe_tile_cache *tc, const float *rgba, memset(tc->clear_flags, 255, sizeof(tc->clear_flags)); for (pos = 0; pos < NUM_ENTRIES; pos++) { - struct softpipe_cached_tile *tile = tc->entries + pos; - tile->addr.bits.invalid = 1; + tc->tile_addrs[pos].bits.invalid = 1; } + tc->last_tile_addr.bits.invalid = 1; } diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h index e03d53eb24e..031c7c1ea5c 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -57,7 +57,6 @@ union tile_address { struct softpipe_cached_tile { - union tile_address addr; union { float color[TILE_SIZE][TILE_SIZE][4]; uint color32[TILE_SIZE][TILE_SIZE]; @@ -83,14 +82,16 @@ struct softpipe_tile_cache struct pipe_transfer *transfer; void *transfer_map; - struct softpipe_cached_tile entries[NUM_ENTRIES]; + union tile_address tile_addrs[NUM_ENTRIES]; + struct softpipe_cached_tile *entries[NUM_ENTRIES]; uint clear_flags[(MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32]; float clear_color[4]; /**< for color bufs */ uint clear_val; /**< for z+stencil, or packed color clear value */ boolean depth_stencil; /**< Is the surface a depth/stencil format? */ - struct softpipe_cached_tile tile; /**< scratch tile for clears */ + struct softpipe_cached_tile *tile; /**< scratch tile for clears */ + union tile_address last_tile_addr; struct softpipe_cached_tile *last_tile; /**< most recently retrieved tile */ }; @@ -147,7 +148,7 @@ sp_get_cached_tile(struct softpipe_tile_cache *tc, { union tile_address addr = tile_address( x, y ); - if (tc->last_tile->addr.value == addr.value) + if (tc->last_tile_addr.value == addr.value) return tc->last_tile; return sp_find_cached_tile( tc, addr ); diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c index 0cd620189b7..781fe6334a5 100644 --- a/src/gallium/drivers/svga/svga_tgsi.c +++ b/src/gallium/drivers/svga/svga_tgsi.c @@ -203,7 +203,7 @@ svga_tgsi_translate( const struct svga_shader *shader, emit.imm_start += key.vkey.num_zero_stride_vertex_elements; } - emit.nr_hw_const = (emit.imm_start + emit.info.file_max[TGSI_FILE_IMMEDIATE] + 1); + emit.nr_hw_float_const = (emit.imm_start + emit.info.file_max[TGSI_FILE_IMMEDIATE] + 1); emit.nr_hw_temp = emit.info.file_max[TGSI_FILE_TEMPORARY] + 1; emit.in_main_func = TRUE; diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h index b4e90a957d0..63ef7f867a6 100644 --- a/src/gallium/drivers/svga/svga_tgsi_emit.h +++ b/src/gallium/drivers/svga/svga_tgsi_emit.h @@ -62,7 +62,8 @@ struct svga_shader_emitter int imm_start; - int nr_hw_const; + int nr_hw_float_const; + int nr_hw_int_const; int nr_hw_temp; int insn_offset; diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 72dccdf1502..f2591c5721a 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -197,22 +197,37 @@ translate_src_register( const struct svga_shader_emitter *emit, break; } - /* Indirect addressing (for coninstant buffer lookups only) + /* Indirect addressing. */ - if (reg->Register.Indirect) - { - /* we shift the offset towards the minimum */ - if (svga_arl_needs_adjustment( emit )) { - src.base.num -= svga_arl_adjustment( emit ); + if (reg->Register.Indirect) { + if (emit->unit == PIPE_SHADER_FRAGMENT) { + /* Pixel shaders have only loop registers for relative + * addressing into inputs. Ignore the redundant address + * register, the contents of aL should be in sync with it. + */ + if (reg->Register.File == TGSI_FILE_INPUT) { + src.base.relAddr = 1; + src.indirect = src_token(SVGA3DREG_LOOP, 0); + } + } + else { + /* Constant buffers only. + */ + if (reg->Register.File == TGSI_FILE_CONSTANT) { + /* we shift the offset towards the minimum */ + if (svga_arl_needs_adjustment( emit )) { + src.base.num -= svga_arl_adjustment( emit ); + } + src.base.relAddr = 1; + + /* Not really sure what should go in the second token: + */ + src.indirect = src_token( SVGA3DREG_ADDR, + reg->Indirect.Index ); + + src.indirect.swizzle = SWIZZLE_XXXX; + } } - src.base.relAddr = 1; - - /* Not really sure what should go in the second token: - */ - src.indirect = src_token( SVGA3DREG_ADDR, - reg->Indirect.Index ); - - src.indirect.swizzle = SWIZZLE_XXXX; } src = swizzle( src, @@ -538,7 +553,7 @@ static boolean emit_def_const( struct svga_shader_emitter *emit, static INLINE boolean create_zero_immediate( struct svga_shader_emitter *emit ) { - unsigned idx = emit->nr_hw_const++; + unsigned idx = emit->nr_hw_float_const++; if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, 0, 0, 0, 1 )) @@ -553,7 +568,7 @@ create_zero_immediate( struct svga_shader_emitter *emit ) static INLINE boolean create_loop_const( struct svga_shader_emitter *emit ) { - unsigned idx = emit->nr_hw_const++; + unsigned idx = emit->nr_hw_int_const++; if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx, 255, /* iteration count */ @@ -571,7 +586,7 @@ create_loop_const( struct svga_shader_emitter *emit ) static INLINE boolean create_sincos_consts( struct svga_shader_emitter *emit ) { - unsigned idx = emit->nr_hw_const++; + unsigned idx = emit->nr_hw_float_const++; if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, -1.5500992e-006f, @@ -581,7 +596,7 @@ create_sincos_consts( struct svga_shader_emitter *emit ) return FALSE; emit->sincos_consts_idx = idx; - idx = emit->nr_hw_const++; + idx = emit->nr_hw_float_const++; if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, -0.020833334f, @@ -602,7 +617,7 @@ create_arl_consts( struct svga_shader_emitter *emit ) for (i = 0; i < emit->num_arl_consts; i += 4) { int j; - unsigned idx = emit->nr_hw_const++; + unsigned idx = emit->nr_hw_float_const++; float vals[4]; for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) { vals[j] = emit->arl_consts[i + j].number; @@ -1593,6 +1608,14 @@ static boolean emit_arl(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { ++emit->current_arl; + if (emit->unit == PIPE_SHADER_FRAGMENT) { + /* MOVA not present in pixel shader instruction set. + * Ignore this instruction altogether since it is + * only used for loop counters -- and for that + * we reference aL directly. + */ + return TRUE; + } if (svga_arl_needs_adjustment( emit )) { return emit_fake_arl( emit, insn ); } else { @@ -2384,7 +2407,7 @@ static boolean make_immediate( struct svga_shader_emitter *emit, float d, struct src_register *out ) { - unsigned idx = emit->nr_hw_const++; + unsigned idx = emit->nr_hw_float_const++; if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, a, b, c, d )) diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 271cd4aff5e..04f30f82c3d 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -92,15 +92,7 @@ trace_context_draw_vbo(struct pipe_context *_pipe, trace_dump_call_begin("pipe_context", "draw_vbo"); trace_dump_arg(ptr, pipe); - trace_dump_arg(bool, info->indexed); - trace_dump_arg(uint, info->mode); - trace_dump_arg(uint, info->start); - trace_dump_arg(uint, info->count); - trace_dump_arg(uint, info->start_instance); - trace_dump_arg(uint, info->instance_count); - trace_dump_arg(int, info->index_bias); - trace_dump_arg(uint, info->min_index); - trace_dump_arg(uint, info->max_index); + trace_dump_arg(draw_info, info); pipe->draw_vbo(pipe, info); @@ -987,24 +979,24 @@ trace_context_set_vertex_buffers(struct pipe_context *_pipe, static INLINE void trace_context_set_index_buffer(struct pipe_context *_pipe, - const struct pipe_index_buffer *_ib) + const struct pipe_index_buffer *ib) { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; - struct pipe_index_buffer unwrapped_ib, *ib = NULL; - - if (_ib) { - unwrapped_ib = *_ib; - unwrapped_ib.buffer = trace_resource_unwrap(tr_ctx, _ib->buffer); - ib = &unwrapped_ib; - } trace_dump_call_begin("pipe_context", "set_index_buffer"); trace_dump_arg(ptr, pipe); - trace_dump_arg(index_buffer, _ib); + trace_dump_arg(index_buffer, ib); - pipe->set_index_buffer(pipe, ib); + if (ib) { + struct pipe_index_buffer _ib; + _ib = *ib; + _ib.buffer = trace_resource_unwrap(tr_ctx, ib->buffer); + pipe->set_index_buffer(pipe, &_ib); + } else { + pipe->set_index_buffer(pipe, NULL); + } trace_dump_call_end(); } diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index bd9a9bfaf16..8f816060324 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -573,3 +573,32 @@ void trace_dump_vertex_element(const struct pipe_vertex_element *state) trace_dump_struct_end(); } + + +void trace_dump_draw_info(const struct pipe_draw_info *state) +{ + if (!trace_dumping_enabled_locked()) + return; + + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_draw_info"); + + trace_dump_member(bool, state, indexed); + + trace_dump_member(uint, state, mode); + trace_dump_member(uint, state, start); + trace_dump_member(uint, state, count); + + trace_dump_member(uint, state, start_instance); + trace_dump_member(uint, state, instance_count); + + trace_dump_member(int, state, index_bias); + trace_dump_member(uint, state, min_index); + trace_dump_member(uint, state, max_index); + + trace_dump_struct_end(); +} diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h index 2e70f4e1c74..078d2086109 100644 --- a/src/gallium/drivers/trace/tr_dump_state.h +++ b/src/gallium/drivers/trace/tr_dump_state.h @@ -79,5 +79,7 @@ void trace_dump_index_buffer(const struct pipe_index_buffer *state); void trace_dump_vertex_element(const struct pipe_vertex_element *state); +void trace_dump_draw_info(const struct pipe_draw_info *state); + #endif /* TR_STATE_H */ |