diff options
Diffstat (limited to 'src/gallium/drivers')
115 files changed, 2099 insertions, 1303 deletions
diff --git a/src/gallium/drivers/galahad/glhd_context.c b/src/gallium/drivers/galahad/glhd_context.c index 813a21e2ee3..f73d8a5d32e 100644 --- a/src/gallium/drivers/galahad/glhd_context.c +++ b/src/gallium/drivers/galahad/glhd_context.c @@ -685,6 +685,12 @@ galahad_resource_copy_region(struct pipe_context *_pipe, util_format_short_name(_dst->format)); } + if ((_src->target == PIPE_BUFFER && _dst->target != PIPE_BUFFER) || + (_src->target != PIPE_BUFFER && _dst->target == PIPE_BUFFER)) { + glhd_warn("Resource target mismatch: Source is %i, destination is %i", + _src->target, _dst->target); + } + pipe->resource_copy_region(pipe, dst, dst_level, diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index e62b609eb5a..0f4327fdc81 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -27,6 +27,7 @@ #include "draw/draw_context.h" +#include "util/u_format.h" #include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_string.h" @@ -285,6 +286,9 @@ i915_is_format_supported(struct pipe_screen *screen, const enum pipe_format *list; uint i; + if (!util_format_is_supported(format, tex_usage)) + return FALSE; + if (sample_count > 1) return FALSE; diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c index d02c420f6c2..ac6e94500c8 100644 --- a/src/gallium/drivers/i915/i915_surface.c +++ b/src/gallium/drivers/i915/i915_surface.c @@ -37,6 +37,7 @@ #include "util/u_format.h" #include "util/u_memory.h" #include "util/u_pack_color.h" +#include "util/u_surface.h" /* * surface functions using the render engine @@ -51,6 +52,13 @@ i915_surface_copy_render(struct pipe_context *pipe, { struct i915_context *i915 = i915_context(pipe); + /* Fallback for buffers. */ + if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { + util_resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); + return; + } + util_blitter_save_blend(i915->blitter, (void *)i915->blend); util_blitter_save_depth_stencil_alpha(i915->blitter, (void *)i915->depth_stencil); util_blitter_save_stencil_ref(i915->blitter, &i915->stencil_ref); @@ -158,6 +166,13 @@ i915_surface_copy_blitter(struct pipe_context *pipe, struct pipe_resource *spt = &src_tex->b.b; unsigned dst_offset, src_offset; /* in bytes */ + /* Fallback for buffers. */ + if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { + util_resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); + return; + } + /* XXX cannot copy 3d regions at this time */ assert(src_box->depth == 1); if (dst->target != PIPE_TEXTURE_CUBE && diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index 25204fd088d..9178dfa8f69 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -26,6 +26,7 @@ **************************************************************************/ +#include "util/u_format.h" #include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_string.h" @@ -333,6 +334,9 @@ brw_is_format_supported(struct pipe_screen *screen, const enum pipe_format *list; uint i; + if (!util_format_is_supported(format, tex_usage)) + return FALSE; + if (sample_count > 1) return FALSE; diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README deleted file mode 100644 index 1fc7746a834..00000000000 --- a/src/gallium/drivers/llvmpipe/README +++ /dev/null @@ -1,134 +0,0 @@ -LLVMPIPE -- a fork of softpipe that employs LLVM for code generation. - - -Requirements -============ - - - A x86 or amd64 processor. 64bit mode is preferred. - - Support for sse2 is strongly encouraged. Support for ssse3, and sse4.1 will - yield the most efficient code. The less features the CPU has the more - likely is that you ran into underperforming, buggy, or incomplete code. - - See /proc/cpuinfo to know what your CPU supports. - - - LLVM. Version 2.8 recommended. 2.6 or later required. - - For Linux, on a recent Debian based distribution do: - - aptitude install llvm-dev - - For Windows download pre-built MSVC 9.0 or MinGW binaries from - http://people.freedesktop.org/~jrfonseca/llvm/ and set the LLVM environment - variable to the extracted path. - - For MSVC there are two set of binaries: llvm-x.x-msvc32mt.7z and - llvm-x.x-msvc32mtd.7z . - - You have to set the LLVM=/path/to/llvm-x.x-msvc32mtd env var when passing - debug=yes to scons, and LLVM=/path/to/llvm-x.x-msvc32mt when building with - debug=no. This is necessary as LLVM builds as static library so the chosen - MS CRT must match. - - - scons (optional) - - -Building -======== - -To build everything on Linux invoke scons as: - - scons build=debug libgl-xlib - -Alternatively, you can build it with GNU make, if you prefer, by invoking it as - - make linux-llvm - -but the rest of these instructions assume that scons is used. - -For windows is everything the except except the winsys: - - scons build=debug libgl-gdi - -Using -===== - -On Linux, building will create a drop-in alternative for libGL.so into - - build/foo/gallium/targets/libgl-xlib/libGL.so - -To use it set the LD_LIBRARY_PATH environment variable accordingly. - -For performance evaluation pass debug=no to scons, and use the corresponding -lib directory without the "-debug" suffix. - -On Windows, building will create a drop-in alternative for opengl32.dll. To use -it put it in the same directory as the application. It can also be used by -replacing the native ICD driver, but it's quite an advanced usage, so if you -need to ask, don't even try it. - - -Profiling -========= - -To profile llvmpipe you should pass the options - - scons build=profile <same-as-before> - -This will ensure that frame pointers are used both in C and JIT functions, and -that no tail call optimizations are done by gcc. - - -To better profile JIT code you'll need to build LLVM with oprofile integration. - - ./configure \ - --prefix=$install_dir \ - --enable-optimized \ - --disable-profiling \ - --enable-targets=host-only \ - --with-oprofile - - make -C "$build_dir" - make -C "$build_dir" install - - find "$install_dir/lib" -iname '*.a' -print0 | xargs -0 strip --strip-debug - -The you should define - - export LLVM=/path/to/llvm-2.6-profile - -and rebuild. - - -Unit testing -============ - -Building will also create several unit tests in -build/linux-???-debug/gallium/drivers/llvmpipe: - - - lp_test_blend: blending - - lp_test_conv: SIMD vector conversion - - lp_test_format: pixel unpacking/packing - -Some of this tests can output results and benchmarks to a tab-separated-file -for posterior analysis, e.g.: - - build/linux-x86_64-debug/gallium/drivers/llvmpipe/lp_test_blend -o blend.tsv - - -Development Notes -================= - -- When looking to this code by the first time start in lp_state_fs.c, and - then skim through the lp_bld_* functions called in there, and the comments - at the top of the lp_bld_*.c functions. - -- The driver-independent parts of the LLVM / Gallium code are found in - src/gallium/auxiliary/gallivm/. The filenames and function prefixes - need to be renamed from "lp_bld_" to something else though. - -- We use LLVM-C bindings for now. They are not documented, but follow the C++ - interfaces very closely, and appear to be complete enough for code - generation. See - http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html - for a stand-alone example. See the llvm-c/Core.h file for reference. diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 482a902dd23..268f0fa034b 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -54,6 +54,7 @@ lp_jit_create_types(struct llvmpipe_context *lp) elem_types[LP_JIT_TEXTURE_WIDTH] = elem_types[LP_JIT_TEXTURE_HEIGHT] = elem_types[LP_JIT_TEXTURE_DEPTH] = + elem_types[LP_JIT_TEXTURE_FIRST_LEVEL] = elem_types[LP_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32TypeInContext(lc); elem_types[LP_JIT_TEXTURE_ROW_STRIDE] = elem_types[LP_JIT_TEXTURE_IMG_STRIDE] = @@ -81,6 +82,9 @@ lp_jit_create_types(struct llvmpipe_context *lp) LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, depth, gallivm->target, texture_type, LP_JIT_TEXTURE_DEPTH); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, first_level, + gallivm->target, texture_type, + LP_JIT_TEXTURE_FIRST_LEVEL); LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, last_level, gallivm->target, texture_type, LP_JIT_TEXTURE_LAST_LEVEL); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index a6763dce17a..04e8dd5267b 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -50,6 +50,7 @@ struct lp_jit_texture uint32_t width; uint32_t height; uint32_t depth; + uint32_t first_level; uint32_t last_level; uint32_t row_stride[LP_MAX_TEXTURE_LEVELS]; uint32_t img_stride[LP_MAX_TEXTURE_LEVELS]; @@ -66,6 +67,7 @@ enum { LP_JIT_TEXTURE_WIDTH = 0, LP_JIT_TEXTURE_HEIGHT, LP_JIT_TEXTURE_DEPTH, + LP_JIT_TEXTURE_FIRST_LEVEL, LP_JIT_TEXTURE_LAST_LEVEL, LP_JIT_TEXTURE_ROW_STRIDE, LP_JIT_TEXTURE_IMG_STRIDE, diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 521a52ad3ae..036a6e0c379 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -166,6 +166,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 0; case PIPE_CAP_TGSI_INSTANCEID: case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: return 1; default: return 0; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 3813e0ed972..c82ab821c7e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -653,6 +653,7 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, jit_tex->width = tex->width0; jit_tex->height = tex->height0; jit_tex->depth = tex->depth0; + jit_tex->first_level = view->u.tex.first_level; jit_tex->last_level = tex->last_level; /* We're referencing the texture's internal data, so save a @@ -663,7 +664,7 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, if (!lp_tex->dt) { /* regular texture - setup array of mipmap level pointers */ int j; - for (j = 0; j <= tex->last_level; j++) { + for (j = view->u.tex.first_level; j <= tex->last_level; j++) { jit_tex->data[j] = llvmpipe_get_texture_image_all(lp_tex, j, LP_TEX_USAGE_READ, LP_TEX_LAYOUT_LINEAR); @@ -677,6 +678,7 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, jit_tex->width = TILE_SIZE/8; jit_tex->height = TILE_SIZE/8; jit_tex->depth = 1; + jit_tex->first_level = 0; jit_tex->last_level = 0; jit_tex->row_stride[j] = 0; jit_tex->img_stride[j] = 0; diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index 1dd866195d3..df9fb89cc8e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -269,7 +269,7 @@ llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *lp, if (!lp_tex->dt) { /* regular texture - setup array of mipmap level pointers */ int j; - for (j = 0; j <= tex->last_level; j++) { + for (j = view->u.tex.first_level; j <= tex->last_level; j++) { data[j] = llvmpipe_get_texture_image_all(lp_tex, j, LP_TEX_USAGE_READ, LP_TEX_LAYOUT_LINEAR); @@ -293,7 +293,7 @@ llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *lp, draw_set_mapped_texture(lp->draw, i, tex->width0, tex->height0, tex->depth0, - tex->last_level, + view->u.tex.first_level, tex->last_level, row_stride, img_stride, data); } } diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c index f49638acf08..1b5e9d055f5 100644 --- a/src/gallium/drivers/llvmpipe/lp_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_surface.c @@ -67,6 +67,13 @@ lp_resource_copy(struct pipe_context *pipe, unsigned height = src_box->height; assert(src_box->depth == 1); + /* Fallback for buffers. */ + if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { + util_resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); + return; + } + llvmpipe_flush_resource(pipe, dst, dst_level, dstz, FALSE, /* read_only */ diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample.c index ed4282937f8..ccc139603ee 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.c @@ -149,6 +149,7 @@ lp_llvm_texture_member(const struct lp_sampler_dynamic_state *base, LP_LLVM_TEXTURE_MEMBER(width, LP_JIT_TEXTURE_WIDTH, TRUE) LP_LLVM_TEXTURE_MEMBER(height, LP_JIT_TEXTURE_HEIGHT, TRUE) LP_LLVM_TEXTURE_MEMBER(depth, LP_JIT_TEXTURE_DEPTH, TRUE) +LP_LLVM_TEXTURE_MEMBER(first_level, LP_JIT_TEXTURE_FIRST_LEVEL, TRUE) LP_LLVM_TEXTURE_MEMBER(last_level, LP_JIT_TEXTURE_LAST_LEVEL, TRUE) LP_LLVM_TEXTURE_MEMBER(row_stride, LP_JIT_TEXTURE_ROW_STRIDE, FALSE) LP_LLVM_TEXTURE_MEMBER(img_stride, LP_JIT_TEXTURE_IMG_STRIDE, FALSE) @@ -219,6 +220,7 @@ lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state, sampler->dynamic_state.base.width = lp_llvm_texture_width; sampler->dynamic_state.base.height = lp_llvm_texture_height; sampler->dynamic_state.base.depth = lp_llvm_texture_depth; + sampler->dynamic_state.base.first_level = lp_llvm_texture_first_level; sampler->dynamic_state.base.last_level = lp_llvm_texture_last_level; sampler->dynamic_state.base.row_stride = lp_llvm_texture_row_stride; sampler->dynamic_state.base.img_stride = lp_llvm_texture_img_stride; diff --git a/src/gallium/drivers/noop/noop_pipe.c b/src/gallium/drivers/noop/noop_pipe.c index 496b7f5ec6d..18308b796f3 100644 --- a/src/gallium/drivers/noop/noop_pipe.c +++ b/src/gallium/drivers/noop/noop_pipe.c @@ -337,6 +337,7 @@ static int noop_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_SHADOW_MAP: case PIPE_CAP_TEXTURE_SWIZZLE: case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: return 1; case PIPE_CAP_DUAL_SOURCE_BLEND: diff --git a/src/gallium/drivers/nv50/nv50_3d.xml.h b/src/gallium/drivers/nv50/nv50_3d.xml.h index 9bb3211728c..41a380ec2ec 100644 --- a/src/gallium/drivers/nv50/nv50_3d.xml.h +++ b/src/gallium/drivers/nv50/nv50_3d.xml.h @@ -558,7 +558,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_3D_UNK0F8C 0x00000f8c -#define NV50_3D_UNK0F90 0x00000f90 +#define NV50_3D_COLOR_MASK_COMMON 0x00000f90 #define NV50_3D_UNK0F94 0x00000f94 @@ -1007,7 +1007,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_3D_TEX_CACHE_CTL_UNK1__MASK 0x00000030 #define NV50_3D_TEX_CACHE_CTL_UNK1__SHIFT 4 -#define NV50_3D_UNK133C 0x0000133c +#define NV50_3D_BLEND_SEPARATE_ALPHA 0x0000133c #define NV50_3D_BLEND_EQUATION_RGB 0x00001340 #define NV50_3D_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006 @@ -1033,7 +1033,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_3D_BLEND_FUNC_DST_ALPHA 0x00001358 -#define NV50_3D_UNK135C 0x0000135c +#define NV50_3D_BLEND_ENABLE_COMMON 0x0000135c #define NV50_3D_BLEND_ENABLE(i0) (0x00001360 + 0x4*(i0)) #define NV50_3D_BLEND_ENABLE__ESIZE 0x00000004 @@ -1673,7 +1673,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_3D_MAP_SEMANTIC_0_BFC0_ID__SHIFT 8 #define NV50_3D_MAP_SEMANTIC_0_COLR_NR__MASK 0x00ff0000 #define NV50_3D_MAP_SEMANTIC_0_COLR_NR__SHIFT 16 -#define NV50_3D_MAP_SEMANTIC_0_CLMP_EN 0xff000000 +#define NV50_3D_MAP_SEMANTIC_0_CLMP_EN 0x01000000 #define NV50_3D_MAP_SEMANTIC_1 0x00001908 #define NV50_3D_MAP_SEMANTIC_1_CLIP_START__MASK 0x000000ff @@ -1706,7 +1706,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_3D_CULL_FACE_BACK 0x00000405 #define NV50_3D_CULL_FACE_FRONT_AND_BACK 0x00000408 -#define NV50_3D_UNK1924 0x00001924 +#define NV50_3D_LINE_LAST_PIXEL 0x00001924 #define NVA3_3D_FP_MULTISAMPLE 0x00001928 #define NVA3_3D_FP_MULTISAMPLE_EXPORT_SAMPLE_MASK 0x00000001 diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 930cee7c1e7..632ca4daf74 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -46,6 +46,17 @@ nv50_flush(struct pipe_context *pipe, FIRE_RING(screen->channel); } +static void +nv50_texture_barrier(struct pipe_context *pipe) +{ + struct nouveau_channel *chan = nv50_context(pipe)->screen->base.channel; + + BEGIN_RING(chan, RING_3D(SERIALIZE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1); + OUT_RING (chan, 0x20); +} + void nv50_default_flush_notify(struct nouveau_channel *chan) { @@ -125,6 +136,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv) pipe->clear = nv50_clear; pipe->flush = nv50_flush; + pipe->texture_barrier = nv50_texture_barrier; if (!screen->cur_ctx) screen->cur_ctx = nv50; diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 46e6c2250af..3f031994f0a 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -1,7 +1,6 @@ #ifndef __NV50_CONTEXT_H__ #define __NV50_CONTEXT_H__ -#include <stdio.h> #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" @@ -13,6 +12,7 @@ #include "draw/draw_vertex.h" +#include "nv50_debug.h" #include "nv50_winsys.h" #include "nv50_stateobj.h" #include "nv50_screen.h" @@ -26,15 +26,6 @@ #include "nv50_3d.xml.h" #include "nv50_2d.xml.h" -#define NOUVEAU_ERR(fmt, args...) \ - fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args); - -#ifdef NOUVEAU_DEBUG -# define NOUVEAU_DBG(args...) printf(args); -#else -# define NOUVEAU_DBG(args...) -#endif - #define NV50_NEW_BLEND (1 << 0) #define NV50_NEW_RASTERIZER (1 << 1) #define NV50_NEW_ZSA (1 << 2) @@ -80,6 +71,8 @@ struct nv50_context { uint32_t instance_elts; /* bitmask of per-instance elements */ uint32_t instance_base; uint32_t interpolant_ctrl; + uint32_t semantic_color; + uint32_t semantic_psize; int32_t index_bias; boolean prim_restart; boolean point_sprite; @@ -183,7 +176,7 @@ void nv50_fragprog_validate(struct nv50_context *); void nv50_fp_linkage_validate(struct nv50_context *); void nv50_gp_linkage_validate(struct nv50_context *); void nv50_constbufs_validate(struct nv50_context *); -void nv50_sprite_coords_validate(struct nv50_context *); +void nv50_validate_derived_rs(struct nv50_context *); /* nv50_state.c */ extern void nv50_init_state_functions(struct nv50_context *); diff --git a/src/gallium/drivers/nv50/nv50_debug.h b/src/gallium/drivers/nv50/nv50_debug.h new file mode 100644 index 00000000000..f3dee621519 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_debug.h @@ -0,0 +1,25 @@ + +#ifndef __NV50_DEBUG_H__ +#define __NV50_DEBUG_H__ + +#include <stdio.h> + +#include "util/u_debug.h" + +#define NV50_DEBUG_MISC 0x0001 +#define NV50_DEBUG_SHADER 0x0100 +#define NV50_DEBUG_PROG_IR 0x0200 +#define NV50_DEBUG_PROG_RA 0x0400 +#define NV50_DEBUG_PROG_CFLOW 0x0800 +#define NV50_DEBUG_PROG_ALL 0x1f00 + +#define NV50_DEBUG 0 + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args) + +#define NV50_DBGMSG(ch, args...) \ + if ((NV50_DEBUG) & (NV50_DEBUG_##ch)) \ + debug_printf(args) + +#endif /* __NV50_DEBUG_H__ */ diff --git a/src/gallium/drivers/nv50/nv50_formats.c b/src/gallium/drivers/nv50/nv50_formats.c index 7946117cf30..c65189d0671 100644 --- a/src/gallium/drivers/nv50/nv50_formats.c +++ b/src/gallium/drivers/nv50/nv50_formats.c @@ -93,6 +93,10 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1), SAMPLER_VIEW }, + [PIPE_FORMAT_B4G4R4X4_UNORM] = { 0, + B_(C2, C1, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1), + SAMPLER_VIEW }, + [PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50_SURFACE_FORMAT_A2B10G10R10_UNORM, A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 0), SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER | SCANOUT }, @@ -138,14 +142,62 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = A_(C0, C0, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8, 0), SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_I8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM, - A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0), + [PIPE_FORMAT_L16_SNORM] = { NV50_SURFACE_FORMAT_R16_SNORM, + B_(C0, C0, C0, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 16, 0), + SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_L16_FLOAT] = { NV50_SURFACE_FORMAT_R16_FLOAT, + B_(C0, C0, C0, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0), SAMPLER_VIEW | RENDER_TARGET }, + [PIPE_FORMAT_L32_FLOAT] = { NV50_SURFACE_FORMAT_R32_FLOAT, + B_(C0, C0, C0, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0), + SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_I8_UNORM] = { 0, + A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_I16_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM, + A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_I16_SNORM] = { NV50_SURFACE_FORMAT_R16_SNORM, + B_(C0, C0, C0, C0, SNORM, SNORM, SNORM, SNORM, 16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_I16_FLOAT] = { NV50_SURFACE_FORMAT_R16_FLOAT, + B_(C0, C0, C0, C0, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_I32_FLOAT] = { NV50_SURFACE_FORMAT_R32_FLOAT, + B_(C0, C0, C0, C0, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0), + SAMPLER_VIEW }, + [PIPE_FORMAT_A8_UNORM] = { NV50_SURFACE_FORMAT_A8_UNORM, A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 8, 0), SAMPLER_VIEW | RENDER_TARGET }, + [PIPE_FORMAT_A8_SNORM] = { 0, + A_(ZERO, ZERO, ZERO, C0, SNORM, SNORM, SNORM, SNORM, 8, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_A16_UNORM] = { 0, + A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_A16_SNORM] = { 0, + A_(ZERO, ZERO, ZERO, C0, SNORM, SNORM, SNORM, SNORM, 16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_A16_FLOAT] = { 0, + B_(ZERO, ZERO, ZERO, C0, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_A32_FLOAT] = { 0, + B_(ZERO, ZERO, ZERO, C0, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0), + SAMPLER_VIEW }, + [PIPE_FORMAT_L8A8_UNORM] = { 0, A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0), SAMPLER_VIEW }, @@ -154,6 +206,26 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0), SAMPLER_VIEW }, + [PIPE_FORMAT_L16A16_UNORM] = { 0, + A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 16_16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_L16A16_SNORM] = { 0, + A_(C0, C0, C0, C1, SNORM, SNORM, SNORM, SNORM, 16_16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_L16A16_FLOAT] = { 0, + B_(C0, C0, C0, C1, FLOAT, FLOAT, FLOAT, FLOAT, 16_16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_L32A32_FLOAT] = { 0, + B_(C0, C0, C0, C1, FLOAT, FLOAT, FLOAT, FLOAT, 32_32, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_L4A4_UNORM] = { 0, + B_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 4_4, 0), + SAMPLER_VIEW }, + /* DXT, RGTC */ [PIPE_FORMAT_DXT1_RGB] = { 0, @@ -172,6 +244,22 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT5, 0), SAMPLER_VIEW }, + [PIPE_FORMAT_DXT1_SRGB] = { 0, + B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, DXT1, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_DXT1_SRGBA] = { 0, + B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT1, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_DXT3_SRGBA] = { 0, + B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT3, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_DXT5_SRGBA] = { 0, + B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT5, 0), + SAMPLER_VIEW }, + [PIPE_FORMAT_RGTC1_UNORM] = { 0, B_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, RGTC1, 0), SAMPLER_VIEW }, @@ -237,7 +325,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = /* SNORM 32 */ [PIPE_FORMAT_R32G32B32A32_SNORM] = { 0, - A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0), + A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 32_32_32_32, 0), VERTEX_BUFFER | SAMPLER_VIEW }, [PIPE_FORMAT_R32G32B32_SNORM] = { 0, @@ -255,7 +343,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = /* UNORM 32 */ [PIPE_FORMAT_R32G32B32A32_UNORM] = { 0, - A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0), + A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 32_32_32_32, 0), VERTEX_BUFFER | SAMPLER_VIEW }, [PIPE_FORMAT_R32G32B32_UNORM] = { 0, @@ -281,7 +369,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = VERTEX_BUFFER | SAMPLER_VIEW }, [PIPE_FORMAT_R16G16_SNORM] = { NV50_SURFACE_FORMAT_R16G16_SNORM, - A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16, 0), + A_(C0, C1, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, [PIPE_FORMAT_R16_SNORM] = { NV50_SURFACE_FORMAT_R16_SNORM, @@ -299,7 +387,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = VERTEX_BUFFER | SAMPLER_VIEW }, [PIPE_FORMAT_R16G16_UNORM] = { NV50_SURFACE_FORMAT_R16G16_UNORM, - A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16, 0), + A_(C0, C1, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, [PIPE_FORMAT_R16_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM, @@ -457,4 +545,30 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = [PIPE_FORMAT_R8_USCALED] = { 0, A_(C0, ZERO, ZERO, ONE_FLOAT, USCALED, USCALED, USCALED, USCALED, 8, 0), VERTEX_BUFFER }, + + /* OTHER FORMATS */ + + [PIPE_FORMAT_R8G8_B8G8_UNORM] = { 0, + B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, C1_C2_C1_C0, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_G8R8_G8B8_UNORM] = { 0, + B_(C0, C1, C2, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, C2_C1_C0_C1, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_R8SG8SB8UX8U_NORM] = { 0, + B_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 8_8_8_8, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_R5SG5SB6U_NORM] = { 0, + B_(C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 6_5_5, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_R1_UNORM] = { 0, + B_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, BITMAP_8X8, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_A8B8G8R8_UNORM] = { 0, + B_(C3, C2, C1, C0, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0), + SAMPLER_VIEW }, }; diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c index 82f1b846527..7900bf811df 100644 --- a/src/gallium/drivers/nv50/nv50_pc.c +++ b/src/gallium/drivers/nv50/nv50_pc.c @@ -20,8 +20,6 @@ * SOFTWARE. */ -/* #define NV50PC_DEBUG */ - #include "nv50_pc.h" #include "nv50_program.h" @@ -180,6 +178,7 @@ nv50_op_can_write_flags(uint opcode) switch (opcode) { /* obvious ones like KIL, CALL, etc. not included */ case NV_OP_PHI: case NV_OP_MOV: + case NV_OP_SELECT: case NV_OP_LINTERP: case NV_OP_PINTERP: case NV_OP_LDA: @@ -367,7 +366,7 @@ nv_print_program(struct nv_pc *pc) nv_print_function(pc->root[i]); } -#ifdef NV50PC_DEBUG +#if NV50_DEBUG & NV50_DEBUG_PROG_CFLOW static void nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b) { @@ -425,7 +424,7 @@ nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr) fclose(f); } -#endif +#endif /* NV50_DEBUG_PROG_CFLOW */ static INLINE void nvcg_show_bincode(struct nv_pc *pc) @@ -446,7 +445,7 @@ nv50_emit_program(struct nv_pc *pc) uint32_t *code = pc->emit; int n; - NV50_DBGMSG("emitting program: size = %u\n", pc->bin_size); + NV50_DBGMSG(SHADER, "emitting program: size = %u\n", pc->bin_size); for (n = 0; n < pc->num_blocks; ++n) { struct nv_instruction *i; @@ -472,7 +471,7 @@ nv50_emit_program(struct nv_pc *pc) pc->emit = code; code[pc->bin_size / 4 - 1] |= 1; -#ifdef NV50PC_DEBUG +#if NV50_DEBUG & NV50_DEBUG_SHADER nvcg_show_bincode(pc); #endif @@ -500,7 +499,7 @@ nv50_generate_code(struct nv50_translation_info *ti) ret = nv50_tgsi_to_nc(pc, ti); if (ret) goto out; -#ifdef NV50PC_DEBUG +#if NV50_DEBUG & NV50_DEBUG_PROG_IR nv_print_program(pc); #endif @@ -510,7 +509,7 @@ nv50_generate_code(struct nv50_translation_info *ti) ret = nv_pc_exec_pass0(pc); if (ret) goto out; -#ifdef NV50PC_DEBUG +#if NV50_DEBUG & NV50_DEBUG_PROG_IR nv_print_program(pc); #endif @@ -518,7 +517,7 @@ nv50_generate_code(struct nv50_translation_info *ti) ret = nv_pc_exec_pass1(pc); if (ret) goto out; -#ifdef NV50PC_DEBUG +#if NV50_DEBUG & NV50_DEBUG_PROG_CFLOW nv_print_program(pc); nv_print_cfgraph(pc, "nv50_shader_cfgraph.dot", 0); #endif @@ -552,7 +551,7 @@ nv50_generate_code(struct nv50_translation_info *ti) ti->p->uses_lmem = ti->store_to_memory; - NV50_DBGMSG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success"); + NV50_DBGMSG(SHADER, "SHADER TRANSLATION - %s\n", ret ? "failed" : "success"); out: nv_pc_free_refs(pc); @@ -624,6 +623,9 @@ nvbb_insert_tail(struct nv_basic_block *b, struct nv_instruction *i) i->bb = b; b->num_instructions++; + + if (i->prev && i->prev->is_terminator) + nv_nvi_permute(i->prev, i); } void @@ -669,7 +671,7 @@ nv_nvi_delete(struct nv_instruction *nvi) if (nvi == b->phi) { if (nvi->opcode != NV_OP_PHI) - NV50_DBGMSG("NOTE: b->phi points to non-PHI instruction\n"); + NV50_DBGMSG(PROG_IR, "NOTE: b->phi points to non-PHI instruction\n"); assert(!nvi->prev); if (!nvi->next || nvi->next->opcode != NV_OP_PHI) diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h index e6f3815bafe..5bb0e1296bb 100644 --- a/src/gallium/drivers/nv50/nv50_pc.h +++ b/src/gallium/drivers/nv50/nv50_pc.h @@ -23,13 +23,7 @@ #ifndef __NV50_COMPILER_H__ #define __NV50_COMPILER_H__ -#define NV50PC_DEBUG - -#ifdef NV50PC_DEBUG -# define NV50_DBGMSG(args...) debug_printf(args) -#else -# define NV50_DBGMSG(args...) -#endif +#include "nv50_debug.h" #include "pipe/p_defines.h" #include "util/u_inlines.h" @@ -228,6 +222,8 @@ struct nv_ref { ubyte flags; /* not used yet */ }; +#define NV_REF_FLAG_REGALLOC_PRIV (1 << 0) + struct nv_basic_block; struct nv_instruction { @@ -263,6 +259,15 @@ struct nv_instruction { ubyte quadop; }; +static INLINE int +nvi_vector_size(struct nv_instruction *nvi) +{ + int i; + assert(nvi); + for (i = 0; i < 4 && nvi->def[i]; ++i); + return i; +} + #define CFG_EDGE_FORWARD 0 #define CFG_EDGE_BACK 1 #define CFG_EDGE_LOOP_ENTER 2 diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c index 281ccf7ac61..d72b23c137a 100644 --- a/src/gallium/drivers/nv50/nv50_pc_optimize.c +++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c @@ -20,8 +20,6 @@ * SOFTWARE. */ -/* #define NV50PC_DEBUG */ - #include "nv50_pc.h" #define DESCEND_ARBITRARY(j, f) \ @@ -116,7 +114,7 @@ nvi_isnop(struct nv_instruction *nvi) return FALSE; if (nvi->src[0]->value->join->reg.id < 0) { - NV50_DBGMSG("nvi_isnop: orphaned value detected\n"); + NV50_DBGMSG(PROG_IR, "nvi_isnop: orphaned value detected\n"); return TRUE; } @@ -201,7 +199,7 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) } if (!b->entry) { - NV50_DBGMSG("block %p is now empty\n", b); + NV50_DBGMSG(PROG_IR, "block %p is now empty\n", b); } else if (!b->exit->is_long) { assert(n32); @@ -240,7 +238,7 @@ nv_pc_exec_pass2(struct nv_pc *pc) { int i, ret; - NV50_DBGMSG("preparing %u blocks for emission\n", pc->num_blocks); + NV50_DBGMSG(PROG_IR, "preparing %u blocks for emission\n", pc->num_blocks); pc->num_blocks = 0; /* will reorder bb_list */ @@ -966,7 +964,8 @@ nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) if (bb_is_if_else_endif(b)) { - NV50_DBGMSG("pass_flatten: IF/ELSE/ENDIF construct at BB:%i\n", b->id); + NV50_DBGMSG(PROG_IR, + "pass_flatten: IF/ELSE/ENDIF construct at BB:%i\n", b->id); for (n0 = 0, nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0) if (!nv50_nvi_can_predicate(nvi)) @@ -975,7 +974,7 @@ nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) for (n1 = 0, nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1) if (!nv50_nvi_can_predicate(nvi)) break; -#ifdef NV50PC_DEBUG +#if NV50_DEBUG & NV50_DEBUG_PROG_IR if (nvi) { debug_printf("cannot predicate: "); nv_print_instruction(nvi); } diff --git a/src/gallium/drivers/nv50/nv50_pc_regalloc.c b/src/gallium/drivers/nv50/nv50_pc_regalloc.c index 39ae36681c0..e79fd594cea 100644 --- a/src/gallium/drivers/nv50/nv50_pc_regalloc.c +++ b/src/gallium/drivers/nv50/nv50_pc_regalloc.c @@ -20,11 +20,11 @@ * SOFTWARE. */ -/* #define NV50PC_DEBUG */ - -/* #define NV50_RA_DEBUG_LIVEI */ -/* #define NV50_RA_DEBUG_LIVE_SETS */ -/* #define NV50_RA_DEBUG_JOIN */ +#if NV50_DEBUG & NV50_DEBUG_PROG_RA +# define NV50_RA_DEBUG_LIVEI +# define NV50_RA_DEBUG_LIVE_SETS +# define NV50_RA_DEBUG_JOIN +#endif #include "nv50_context.h" #include "nv50_pc.h" @@ -32,14 +32,39 @@ #include "util/u_simple_list.h" #define NUM_REGISTER_FILES 4 +#define MAX_REGISTER_COUNT 256 struct register_set { struct nv_pc *pc; uint32_t last[NUM_REGISTER_FILES]; - uint32_t bits[NUM_REGISTER_FILES][8]; + uint32_t bits[NUM_REGISTER_FILES][(MAX_REGISTER_COUNT + 31) / 32]; }; +/* using OR because a set bit means occupied/unavailable, aliasing is allowed */ +static void +intersect_register_sets(struct register_set *dst, + struct register_set *src1, struct register_set *src2) +{ + int i, j; + + for (i = 0; i < NUM_REGISTER_FILES; ++i) { + for (j = 0; j < (MAX_REGISTER_COUNT + 31) / 32; ++j) + dst->bits[i][j] = src1->bits[i][j] | src2->bits[i][j]; + } +} + +static void +mask_register_set(struct register_set *set, uint32_t mask, uint32_t umask) +{ + int i, j; + + for (i = 0; i < NUM_REGISTER_FILES; ++i) { + for (j = 0; j < (MAX_REGISTER_COUNT + 31) / 32; ++j) + set->bits[i][j] = (set->bits[i][j] | mask) & umask; + } +} + struct nv_pc_pass { struct nv_pc *pc; @@ -61,11 +86,15 @@ ranges_coalesce(struct nv_range *range) } } +/* @return: TRUE if @new_range can be freed (i.e. was not reused) */ static boolean add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range) { struct nv_range *range, **nextp = &val->livei; + if (bgn == end) /* [a, a) is invalid / empty */ + return TRUE; + for (range = val->livei; range; range = range->next) { if (end < range->bgn) break; /* insert before */ @@ -251,6 +280,8 @@ reg_occupy(struct register_set *set, struct nv_value *val) id <<= s; m = (1 << (1 << s)) - 1; + assert(s >= 0); /* XXX: remove me */ + set->bits[f][id / 32] |= m << (id % 32); if (set->pc->max_reg[f] < id) @@ -286,15 +317,12 @@ join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) if (a->join->reg.id == b->join->reg.id) return TRUE; -#if 1 /* either a or b or both have been assigned */ if (a->join->reg.id >= 0 && b->join->reg.id >= 0) return FALSE; else if (b->join->reg.id >= 0) { - if (a->join->reg.id >= 0) - return FALSE; val = a; a = b; b = val; @@ -309,8 +337,6 @@ join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) return FALSE; } return TRUE; -#endif - return FALSE; } static INLINE void @@ -336,14 +362,14 @@ do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) assert(b->join == a->join); } -static INLINE void +static INLINE boolean try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) { if (!join_allowed(ctx, a, b)) { #ifdef NV50_RA_DEBUG_JOIN debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n); #endif - return; + return FALSE; } if (livei_have_overlap(a->join, b->join)) { #ifdef NV50_RA_DEBUG_JOIN @@ -351,10 +377,27 @@ try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) livei_print(a); livei_print(b); #endif - return; + return FALSE; } do_join_values(ctx, a, b); + + return TRUE; +} + +static void +join_values_nofail(struct nv_pc_pass *ctx, + struct nv_value *a, struct nv_value *b, boolean type_only) +{ + if (type_only) { + assert(join_allowed(ctx, a, b)); + do_join_values(ctx, a, b); + } else { + boolean ok = try_join_values(ctx, a, b); + if (!ok) { + NOUVEAU_ERR("failed to coalesce values\n"); + } + } } static INLINE boolean @@ -369,20 +412,32 @@ need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p) return (b->num_in > 1) && (n == 2); } +/* Look for the @phi's operand whose definition reaches @b. */ static int phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b, struct nv_basic_block *tb) { + struct nv_ref *srci, *srcj; int i, j; - for (j = -1, i = 0; i < 4 && phi->src[i]; ++i) { - if (!nvbb_reachable_by(b, phi->src[i]->value->insn->bb, tb)) + for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) { + srci = phi->src[i]; + /* if already replaced, check with original source first */ + if (srci->flags & NV_REF_FLAG_REGALLOC_PRIV) + srci = srci->value->insn->src[0]; + if (!nvbb_reachable_by(b, srci->value->insn->bb, NULL)) continue; /* NOTE: back-edges are ignored by the reachable-by check */ - if (j < 0 || !nvbb_reachable_by(phi->src[j]->value->insn->bb, - phi->src[i]->value->insn->bb, tb)) + if (j < 0 || !nvbb_reachable_by(srcj->value->insn->bb, + srci->value->insn->bb, NULL)) { j = i; + srcj = srci; + } } + if (j >= 0 && nvbb_reachable_by(b, phi->def[0]->insn->bb, NULL)) + if (!nvbb_reachable_by(srcj->value->insn->bb, + phi->def[0]->insn->bb, NULL)) + j = -1; return j; } @@ -429,16 +484,21 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) ctx->pc->current_block = pn; for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) { - if ((j = phi_opnd_for_bb(i, p, b)) < 0) - continue; - val = i->src[j]->value; - - if (i->src[j]->flags) { - val = val->insn->src[0]->value; - while (j < 4 && i->src[j]) - ++j; - assert(j < 4); + j = phi_opnd_for_bb(i, p, b); + + if (j < 0) { + val = i->def[0]; + } else { + val = i->src[j]->value; + if (i->src[j]->flags & NV_REF_FLAG_REGALLOC_PRIV) { + j = -1; + /* use original value, we already encountered & replaced it */ + val = val->insn->src[0]->value; + } } + if (j < 0) /* need an additional source ? */ + for (j = 0; j < 5 && i->src[j] && i->src[j]->value != val; ++j); + assert(j < 5); ni = new_instruction(ctx->pc, NV_OP_MOV); @@ -452,11 +512,13 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) nv_reference(ctx->pc, &i->src[j], ni->def[0]); - i->src[j]->flags = 1; + i->src[j]->flags |= NV_REF_FLAG_REGALLOC_PRIV; } if (pn != p && pn->exit) { - ctx->pc->current_block = b->in[n ? 0 : 1]; + assert(!b->in[!n]->exit || b->in[!n]->exit->is_terminator); + /* insert terminator (branch to ENDIF) in new else block */ + ctx->pc->current_block = pn; ni = new_instruction(ctx->pc, NV_OP_BRA); ni->target = b; ni->is_terminator = 1; @@ -470,45 +532,50 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) return 0; } +#define JOIN_MASK_PHI (1 << 0) +#define JOIN_MASK_SELECT (1 << 1) +#define JOIN_MASK_MOV (1 << 2) +#define JOIN_MASK_TEX (1 << 3) + static int -pass_join_values(struct nv_pc_pass *ctx, int iter) +pass_join_values(struct nv_pc_pass *ctx, unsigned mask) { int c, n; for (n = 0; n < ctx->num_insns; ++n) { - struct nv_instruction *i = ctx->insns[n]; + struct nv_instruction *nvi, *i = ctx->insns[n]; switch (i->opcode) { case NV_OP_PHI: - if (iter != 2) + if (!(mask & JOIN_MASK_PHI)) break; - for (c = 0; c < 4 && i->src[c]; ++c) - try_join_values(ctx, i->def[0], i->src[c]->value); + for (c = 0; c < 5 && i->src[c]; ++c) + join_values_nofail(ctx, i->def[0], i->src[c]->value, FALSE); break; case NV_OP_MOV: - if ((iter == 2) && i->src[0]->value->insn && - !nv_is_vector_op(i->src[0]->value->join->insn->opcode)) + if (!(mask & JOIN_MASK_MOV)) + break; + nvi = i->src[0]->value->join->insn; + if (nvi && !nv_is_vector_op(nvi->opcode)) try_join_values(ctx, i->def[0], i->src[0]->value); break; case NV_OP_SELECT: - if (iter != 1) + if (!(mask & JOIN_MASK_SELECT)) break; - for (c = 0; c < 4 && i->src[c]; ++c) { - assert(join_allowed(ctx, i->def[0], i->src[c]->value)); - do_join_values(ctx, i->def[0], i->src[c]->value); - } + for (c = 0; c < 5 && i->src[c]; ++c) + join_values_nofail(ctx, i->def[0], i->src[c]->value, TRUE); break; case NV_OP_TEX: case NV_OP_TXB: case NV_OP_TXL: case NV_OP_TXQ: - if (iter) + if (!(mask & JOIN_MASK_TEX)) break; - for (c = 0; c < 4; ++c) { - if (!i->src[c]) - break; - do_join_values(ctx, i->def[c], i->src[c]->value); - } + /* This should work without conflicts because we always generate + * extra MOVs for the sources of a TEX. + */ + for (c = 0; c < 4 && i->src[c]; ++c) + join_values_nofail(ctx, i->def[c], i->src[c]->value, TRUE); break; default: break; @@ -643,15 +710,15 @@ static void collect_live_values(struct nv_basic_block *b, const int n) { int i; - if (b->out[0]) { - if (b->out[1]) { /* what to do about back-edges ? */ + if (b->out[0] && b->out_kind[0] != CFG_EDGE_FAKE) { + if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) { for (i = 0; i < n; ++i) b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i]; } else { memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t)); } } else - if (b->out[1]) { + if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) { memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t)); } else { memset(b->live_set, 0, n * sizeof(uint32_t)); @@ -770,8 +837,8 @@ insert_ordered_tail(struct nv_value *list, struct nv_value *nval) struct nv_value *elem; for (elem = list->prev; - elem != list && elem->livei->bgn > nval->livei->bgn; - elem = elem->prev); + elem != list && elem->livei->bgn > nval->livei->bgn; + elem = elem->prev); /* now elem begins before or at the same time as val */ nval->prev = elem; @@ -780,44 +847,49 @@ insert_ordered_tail(struct nv_value *list, struct nv_value *nval) elem->next = nval; } -static int -pass_linear_scan(struct nv_pc_pass *ctx, int iter) +static void +collect_register_values(struct nv_pc_pass *ctx, struct nv_value *head, + boolean assigned_only) { - struct nv_instruction *i; - struct register_set f, free; + struct nv_value *val; int k, n; - struct nv_value *cur, *val, *tmp[2]; - struct nv_value active, inactive, handled, unhandled; - make_empty_list(&active); - make_empty_list(&inactive); - make_empty_list(&handled); - make_empty_list(&unhandled); + make_empty_list(head); - nv50_ctor_register_set(ctx->pc, &free); - - /* joined values should have range = NULL and thus not be added; - * also, fixed memory values won't be added because they're not - * def'd, just used - */ for (n = 0; n < ctx->num_insns; ++n) { - i = ctx->insns[n]; + struct nv_instruction *i = ctx->insns[n]; + /* for joined values, only the representative will have livei != NULL */ for (k = 0; k < 4; ++k) { if (i->def[k] && i->def[k]->livei) - insert_ordered_tail(&unhandled, i->def[k]); - else - if (0 && i->def[k]) - debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n); + if (!assigned_only || i->def[k]->reg.id >= 0) + insert_ordered_tail(head, i->def[k]); } if (i->flags_def && i->flags_def->livei) - insert_ordered_tail(&unhandled, i->flags_def); + if (!assigned_only || i->flags_def->reg.id >= 0) + insert_ordered_tail(head, i->flags_def); } - for (val = unhandled.next; val != unhandled.prev; val = val->next) { + for (val = head->next; val != head->prev; val = val->next) { assert(val->join == val); assert(val->livei->bgn <= val->next->livei->bgn); } +} + +static int +pass_linear_scan(struct nv_pc_pass *ctx, int iter) +{ + struct register_set f, free; + struct nv_value *cur, *val, *tmp[2]; + struct nv_value active, inactive, handled, unhandled; + + make_empty_list(&active); + make_empty_list(&inactive); + make_empty_list(&handled); + + nv50_ctor_register_set(ctx->pc, &free); + + collect_register_values(ctx, &unhandled, FALSE); foreach_s(cur, tmp[0], &unhandled) { remove_from_list(cur); @@ -854,13 +926,7 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter) reg_occupy(&f, val); if (cur->reg.id < 0) { - boolean mem = FALSE; - - if (nv_is_vector_op(cur->insn->opcode)) - mem = !reg_assign(&f, &cur->insn->def[0], 4); - else - if (iter) - mem = !reg_assign(&f, &cur, 1); + boolean mem = !reg_assign(&f, &cur, 1); if (mem) { NOUVEAU_ERR("out of registers\n"); @@ -874,13 +940,80 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter) return 0; } +/* Allocate values defined by instructions such as TEX, which have to be + * assigned to consecutive registers. + * Linear scan doesn't really work here since the values can have different + * live intervals. + */ +static int +pass_allocate_constrained_values(struct nv_pc_pass *ctx) +{ + struct nv_value regvals, *val; + struct nv_instruction *i; + struct nv_value *defs[4]; + struct register_set regs[4]; + int n, vsize, c; + uint32_t mask; + boolean mem; + + collect_register_values(ctx, ®vals, TRUE); + + for (n = 0; n < ctx->num_insns; ++n) { + i = ctx->insns[n]; + vsize = nvi_vector_size(i); + if (!(vsize > 1)) + continue; + assert(vsize <= 4); + for (c = 0; c < vsize; ++c) + defs[c] = i->def[c]->join; + + if (defs[0]->reg.id >= 0) { + for (c = 1; c < vsize; ++c) + assert(defs[c]->reg.id >= 0); + continue; + } + + /* Compute registers available for this "vector" of consecutive registers. + * Each value (component) has its own independent live interval. + */ + for (c = 0; c < vsize; ++c) { + nv50_ctor_register_set(ctx->pc, ®s[c]); + + foreach(val, ®vals) { + if (val->reg.id >= 0 && livei_have_overlap(val, defs[c])) + reg_occupy(®s[c], val); + } + /* Only 32 bit GPRs will be allocated here, but register set + * granularity for GPRs is 16 bit. + */ + mask = 0x03030303; + if (vsize == 2) /* granularity is 2 and not 4 */ + mask |= 0x03030303 << 4; + mask_register_set(®s[c], 0, mask << (c * 2)); + + if (defs[c]->livei) + insert_ordered_tail(®vals, defs[c]); + } + for (c = 1; c < vsize; ++c) + intersect_register_sets(®s[0], ®s[0], ®s[c]); + + mem = !reg_assign(®s[0], &defs[0], vsize); + + if (mem) { + NOUVEAU_ERR("out of registers\n"); + abort(); + } + } + return 0; +} + static int nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) { struct nv_pc_pass *ctx; int i, ret; - NV50_DBGMSG("REGISTER ALLOCATION - entering\n"); + NV50_DBGMSG(PROG_RA, "REGISTER ALLOCATION - entering\n"); ctx = CALLOC_STRUCT(nv_pc_pass); if (!ctx) @@ -923,16 +1056,16 @@ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) livei_print(&pc->values[i]); #endif - ret = pass_join_values(ctx, 0); + ret = pass_join_values(ctx, JOIN_MASK_PHI); if (ret) goto out; - ret = pass_linear_scan(ctx, 0); + ret = pass_join_values(ctx, JOIN_MASK_SELECT | JOIN_MASK_TEX); if (ret) goto out; - ret = pass_join_values(ctx, 1); + ret = pass_join_values(ctx, JOIN_MASK_MOV); if (ret) goto out; - ret = pass_join_values(ctx, 2); + ret = pass_allocate_constrained_values(ctx); if (ret) goto out; ret = pass_linear_scan(ctx, 1); @@ -942,7 +1075,7 @@ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) for (i = 0; i < pc->num_values; ++i) livei_release(&pc->values[i]); - NV50_DBGMSG("REGISTER ALLOCATION - leaving\n"); + NV50_DBGMSG(PROG_RA, "REGISTER ALLOCATION - leaving\n"); out: FREE(ctx->insns); diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index a63f9d8a6d5..41d3e14dc0f 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -20,8 +20,6 @@ * SOFTWARE. */ -/* #define NV50_PROGRAM_DEBUG */ - #include "nv50_program.h" #include "nv50_pc.h" #include "nv50_context.h" @@ -486,7 +484,7 @@ nv50_fragprog_prepare(struct nv50_translation_info *ti) ++nintp; } - p->fp.colors = (1 << 24) | 4; /* CLAMP, FFC0_ID = 4 */ + p->fp.colors = 4 << NV50_3D_MAP_SEMANTIC_0_FFC0_ID__SHIFT; /* after HPOS */ for (i = 0; i < p->in_nr; ++i) { int j = p->in[i].id; @@ -564,7 +562,7 @@ nv50_prog_scan(struct nv50_translation_info *ti) tgsi_scan_shader(p->pipe.tokens, &ti->scan); -#ifdef NV50_PROGRAM_DEBUG +#if NV50_DEBUG & NV50_DEBUG_SHADER tgsi_dump(p->pipe.tokens, 0); #endif diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 7690c80eef0..641ad7e2780 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -20,6 +20,7 @@ * SOFTWARE. */ +#include "util/u_format.h" #include "util/u_format_s3tc.h" #include "pipe/p_screen.h" @@ -45,17 +46,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, if (sample_count > 1) return FALSE; - if (!util_format_s3tc_enabled) { - switch (format) { - case PIPE_FORMAT_DXT1_RGB: - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - return FALSE; - default: - break; - } - } + if (!util_format_is_supported(format, bindings)) + return FALSE; switch (format) { case PIPE_FORMAT_Z16_UNORM: @@ -108,6 +100,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 1; case PIPE_CAP_MAX_RENDER_TARGETS: return 8; + case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL: + return 1; case PIPE_CAP_TIMER_QUERY: case PIPE_CAP_OCCLUSION_QUERY: return 1; @@ -129,6 +123,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_PRIMITIVE_RESTART: case PIPE_CAP_TGSI_INSTANCEID: case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: return 1; default: NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); @@ -417,6 +412,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) OUT_RING (chan, NV50_3D_MULTISAMPLE_MODE_MS1); BEGIN_RING(chan, RING_3D(MULTISAMPLE_CTRL), 1); OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(LINE_LAST_PIXEL), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(BLEND_SEPARATE_ALPHA), 1); + OUT_RING (chan, 1); BEGIN_RING(chan, RING_3D(SCREEN_Y_CONTROL), 1); OUT_RING (chan, 0); diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c index bea9c095bb3..82c346cb5ea 100644 --- a/src/gallium/drivers/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nv50/nv50_shader_state.c @@ -226,7 +226,7 @@ nv50_gmtyprog_validate(struct nv50_context *nv50) OUT_RING (chan, gp->code_base); } -void +static void nv50_sprite_coords_validate(struct nv50_context *nv50) { struct nouveau_channel *chan = nv50->screen->base.channel; @@ -282,6 +282,39 @@ nv50_sprite_coords_validate(struct nv50_context *nv50) OUT_RINGp (chan, pntc, 8); } +/* Validate state derived from shaders and the rasterizer cso. */ +void +nv50_validate_derived_rs(struct nv50_context *nv50) +{ + struct nouveau_channel *chan = nv50->screen->base.channel; + uint32_t color, psize; + + nv50_sprite_coords_validate(nv50); + + if (nv50->dirty & NV50_NEW_FRAGPROG) + return; + psize = nv50->state.semantic_psize & ~NV50_3D_MAP_SEMANTIC_3_PTSZ_EN__MASK; + color = nv50->state.semantic_color & ~NV50_3D_MAP_SEMANTIC_0_CLMP_EN; + + if (nv50->rast->pipe.clamp_vertex_color) + color |= NV50_3D_MAP_SEMANTIC_0_CLMP_EN; + + if (color != nv50->state.semantic_color) { + nv50->state.semantic_color = color; + BEGIN_RING(chan, RING_3D(MAP_SEMANTIC_0), 1); + OUT_RING (chan, color); + } + + if (nv50->rast->pipe.point_size_per_vertex) + psize |= NV50_3D_MAP_SEMANTIC_3_PTSZ_EN__MASK; + + if (psize != nv50->state.semantic_psize) { + nv50->state.semantic_psize = psize; + BEGIN_RING(chan, RING_3D(MAP_SEMANTIC_3), 1); + OUT_RING (chan, psize); + } +} + static int nv50_vec4_map(uint8_t *map, int mid, uint32_t lin[4], struct nv50_varying *in, struct nv50_varying *out) @@ -372,6 +405,9 @@ nv50_fp_linkage_validate(struct nv50_context *nv50) map[m++] = vp->vp.psiz; } + if (nv50->rast->pipe.clamp_vertex_color) + colors |= NV50_3D_MAP_SEMANTIC_0_CLMP_EN; + n = (m + 3) / 4; assert(m <= 64); @@ -404,6 +440,9 @@ nv50_fp_linkage_validate(struct nv50_context *nv50) nv50->state.interpolant_ctrl = interp; + nv50->state.semantic_color = colors; + nv50->state.semantic_psize = psiz; + BEGIN_RING(chan, RING_3D(NOPERSPECTIVE_BITMAP(0)), 4); OUT_RINGp (chan, lin, 4); diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index db257159698..799f49619d2 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -97,8 +97,14 @@ nv50_blend_state_create(struct pipe_context *pipe, so->pipe = *cso; - SB_BEGIN_3D(so, BLEND_ENABLE(0), 8); + SB_BEGIN_3D(so, COLOR_MASK_COMMON, 1); + SB_DATA (so, !cso->independent_blend_enable); + + SB_BEGIN_3D(so, BLEND_ENABLE_COMMON, 1); + SB_DATA (so, !cso->independent_blend_enable); + if (cso->independent_blend_enable) { + SB_BEGIN_3D(so, BLEND_ENABLE(0), 8); for (i = 0; i < 8; ++i) { SB_DATA(so, cso->rt[i].blend_enable); if (cso->rt[i].blend_enable) @@ -121,8 +127,8 @@ nv50_blend_state_create(struct pipe_context *pipe, } } } else { - for (i = 0; i < 8; ++i) - SB_DATA(so, cso->rt[0].blend_enable); + SB_BEGIN_3D(so, BLEND_ENABLE(0), 1); + SB_DATA (so, cso->rt[0].blend_enable); } if (emit_common_func) { @@ -145,17 +151,16 @@ nv50_blend_state_create(struct pipe_context *pipe, SB_DATA (so, 0); } - SB_BEGIN_3D(so, COLOR_MASK(0), 8); if (cso->independent_blend_enable) { + SB_BEGIN_3D(so, COLOR_MASK(0), 8); for (i = 0; i < 8; ++i) SB_DATA(so, nv50_colormask(cso->rt[i].colormask)); } else { - uint32_t cmask = nv50_colormask(cso->rt[0].colormask); - for (i = 0; i < 8; ++i) - SB_DATA(so, cmask); + SB_BEGIN_3D(so, COLOR_MASK(0), 1); + SB_DATA (so, nv50_colormask(cso->rt[0].colormask)); } - assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); + assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); return so; } @@ -174,6 +179,7 @@ nv50_blend_state_delete(struct pipe_context *pipe, void *hwcso) FREE(hwcso); } +/* NOTE: ignoring line_last_pixel, using FALSE (set on screen init) */ static void * nv50_rasterizer_state_create(struct pipe_context *pipe, const struct pipe_rasterizer_state *cso) @@ -198,6 +204,9 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, SB_BEGIN_3D(so, VERTEX_TWO_SIDE_ENABLE, 1); SB_DATA (so, cso->light_twoside); + SB_BEGIN_3D(so, FRAG_COLOR_CLAMP_EN, 1); + SB_DATA (so, cso->clamp_fragment_color ? 0x11111111 : 0x00000000); + SB_BEGIN_3D(so, LINE_WIDTH, 1); SB_DATA (so, fui(cso->line_width)); SB_BEGIN_3D(so, LINE_SMOOTH_ENABLE, 1); @@ -258,7 +267,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, SB_DATA (so, fui(cso->offset_units * 2.0f)); } - assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); + assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); return (void *)so; } @@ -337,7 +346,7 @@ nv50_zsa_state_create(struct pipe_context *pipe, SB_DATA (so, 0); } - assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); + assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); return (void *)so; } diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index f3d45eb95e0..cdf1a982fcc 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -309,7 +309,7 @@ static struct state_validate { { nv50_fp_linkage_validate, NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, { nv50_gp_linkage_validate, NV50_NEW_GMTYPROG | NV50_NEW_VERTPROG }, - { nv50_sprite_coords_validate, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER | + { nv50_validate_derived_rs, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER | NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, { nv50_constbufs_validate, NV50_NEW_CONSTBUF }, { nv50_validate_textures, NV50_NEW_TEXTURES }, diff --git a/src/gallium/drivers/nv50/nv50_stateobj.h b/src/gallium/drivers/nv50/nv50_stateobj.h index 515e3e78d42..4c98c7e46fc 100644 --- a/src/gallium/drivers/nv50/nv50_stateobj.h +++ b/src/gallium/drivers/nv50/nv50_stateobj.h @@ -21,13 +21,13 @@ struct nv50_blend_stateobj { struct pipe_blend_state pipe; int size; - uint32_t state[78]; + uint32_t state[82]; // TODO: allocate less if !independent_blend_enable }; struct nv50_rasterizer_stateobj { struct pipe_rasterizer_state pipe; int size; - uint32_t state[40]; + uint32_t state[42]; }; struct nv50_zsa_stateobj { diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index dc9e2880f0f..3d7e880ccce 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -27,6 +27,7 @@ #include "util/u_inlines.h" #include "util/u_pack_color.h" #include "util/u_format.h" +#include "util/u_surface.h" #include "nv50_context.h" #include "nv50_resource.h" @@ -198,6 +199,13 @@ nv50_resource_copy_region(struct pipe_context *pipe, int ret; unsigned dst_layer = dstz, src_layer = src_box->z; + /* Fallback for buffers. */ + if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { + util_resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); + return; + } + assert((src->format == dst->format) || (nv50_2d_format_faithful(src->format) && nv50_2d_format_faithful(dst->format))); diff --git a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c index 1449cb04c69..25dcaaea14f 100644 --- a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c +++ b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c @@ -20,8 +20,6 @@ * SOFTWARE. */ -/* #define NV50_TGSI2NC_DEBUG */ - #include <unistd.h> #include "nv50_context.h" @@ -213,7 +211,7 @@ static INLINE void bld_warn_uninitialized(struct bld_context *bld, int kind, struct bld_value_stack *stk, struct nv_basic_block *b) { -#ifdef NV50_TGSI2NC_DEBUG +#if NV50_DEBUG & NV50_DEBUG_PROG_IR long i = (stk - &bld->tvs[0][0]) / 4; long c = (stk - &bld->tvs[0][0]) & 3; @@ -273,6 +271,12 @@ fetch_by_bb(struct bld_value_stack *stack, fetch_by_bb(stack, vals, n, b->in[i]); } +static INLINE boolean +nvbb_is_terminated(struct nv_basic_block *bb) +{ + return bb->exit && bb->exit->is_terminator; +} + static INLINE struct nv_value * bld_load_imm_u32(struct bld_context *bld, uint32_t u); @@ -1556,7 +1560,7 @@ bld_instruction(struct bld_context *bld, int c; uint opcode = translate_opcode(insn->Instruction.Opcode); -#ifdef NV50_TGSI2NC_DEBUG +#if NV50_DEBUG & NV50_DEBUG_PROG_IR debug_printf("bld_instruction:"); tgsi_dump_instruction(insn, 1); #endif @@ -1727,8 +1731,7 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *b = new_basic_block(bld->pc); - if (bld->pc->current_block->exit && - !bld->pc->current_block->exit->is_terminator) + if (!nvbb_is_terminated(bld->pc->current_block)) bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, b, FALSE); --bld->cond_lvl; @@ -1800,7 +1803,8 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; - bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE); + if (!nvbb_is_terminated(bld->pc->current_block)) + bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE); nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_BACK); diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h index 94fa081ad7e..2ca0bc23836 100644 --- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -82,6 +82,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_SERIALIZE 0x00000110 +#define NVC0_3D_LINE_WIDTH_SEPARATE 0x0000020c + #define NVC0_3D_EARLY_FRAGMENT_TESTS 0x00000210 #define NVC0_3D_MEM_BARRIER 0x0000021c @@ -342,10 +344,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_VERTEX_RUNOUT_ADDRESS_LOW 0x00000f88 +#define NVC0_3D_COLOR_MASK_COMMON 0x00000f90 + #define NVC0_3D_DEPTH_BOUNDS(i0) (0x00000f9c + 0x4*(i0)) #define NVC0_3D_DEPTH_BOUNDS__ESIZE 0x00000004 #define NVC0_3D_DEPTH_BOUNDS__LEN 0x00000002 +#define NVC0_3D_RT_SEPARATE_FRAG_DATA 0x00000fac + #define NVC0_3D_MSAA_MASK(i0) (0x00000fbc + 0x4*(i0)) #define NVC0_3D_MSAA_MASK__ESIZE 0x00000004 #define NVC0_3D_MSAA_MASK__LEN 0x00000004 @@ -561,6 +567,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_TEX_CACHE_CTL_UNK1__MASK 0x00000030 #define NVC0_3D_TEX_CACHE_CTL_UNK1__SHIFT 4 +#define NVC0_3D_BLEND_SEPARATE_ALPHA 0x0000133c + #define NVC0_3D_BLEND_EQUATION_RGB 0x00001340 #define NVC0_3D_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006 #define NVC0_3D_BLEND_EQUATION_RGB_MIN 0x00008007 @@ -583,6 +591,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_BLEND_FUNC_DST_ALPHA 0x00001358 +#define NVC0_3D_BLEND_ENABLE_COMMON 0x0000135c + #define NVC0_3D_BLEND_ENABLE(i0) (0x00001360 + 0x4*(i0)) #define NVC0_3D_BLEND_ENABLE__ESIZE 0x00000004 #define NVC0_3D_BLEND_ENABLE__LEN 0x00000008 @@ -651,7 +661,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_SCREEN_Y_CONTROL_Y_NEGATE 0x00000001 #define NVC0_3D_SCREEN_Y_CONTROL_TRIANGLE_RAST_FLIP 0x00000010 -#define NVC0_3D_LINE_WIDTH 0x000013b0 +#define NVC0_3D_LINE_WIDTH_SMOOTH 0x000013b0 + +#define NVC0_3D_LINE_WIDTH_ALIASED 0x000013b4 #define NVC0_3D_GP_VERTEX_OUTPUT_COUNT 0x00001420 #define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MIN 0x00000001 @@ -977,6 +989,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_CULL_FACE_BACK 0x00000405 #define NVC0_3D_CULL_FACE_FRONT_AND_BACK 0x00000408 +#define NVC0_3D_LINE_LAST_PIXEL 0x00001924 + #define NVC0_3D_VIEWPORT_TRANSFORM_EN 0x0000192c #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c @@ -1220,8 +1234,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004 #define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000020 -#define NVC0_3D_COLOR_MASK_BROADCAST 0x00003808 - #define NVC0_3D_VERTEX_ARRAY_SELECT 0x00003820 #define NVC0_3D_BLEND_ENABLES 0x00003858 diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index 5d2168e600b..2f2a3da7c44 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -47,6 +47,15 @@ nvc0_flush(struct pipe_context *pipe, } static void +nvc0_texture_barrier(struct pipe_context *pipe) +{ + struct nouveau_channel *chan = nvc0_context(pipe)->screen->base.channel; + + IMMED_RING(chan, RING_3D(SERIALIZE), 0); + IMMED_RING(chan, RING_3D(TEX_CACHE_CTL), 0); +} + +static void nvc0_context_unreference_resources(struct nvc0_context *nvc0) { unsigned s, i; @@ -128,6 +137,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) pipe->clear = nvc0_clear; pipe->flush = nvc0_flush; + pipe->texture_barrier = nvc0_texture_barrier; if (!screen->cur_ctx) screen->cur_ctx = nvc0; diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index 102997e4fcb..f97141dd46e 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -1,7 +1,6 @@ #ifndef __NVC0_CONTEXT_H__ #define __NVC0_CONTEXT_H__ -#include <stdio.h> #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" @@ -13,6 +12,7 @@ #include "draw/draw_vertex.h" +#include "nv50/nv50_debug.h" #include "nvc0_winsys.h" #include "nvc0_stateobj.h" #include "nvc0_screen.h" @@ -26,15 +26,6 @@ #include "nvc0_2d.xml.h" #include "nvc0_m2mf.xml.h" -#define NOUVEAU_ERR(fmt, args...) \ - fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args); - -#ifdef NOUVEAU_DEBUG -# define NOUVEAU_DBG(args...) printf(args); -#else -# define NOUVEAU_DBG(args...) -#endif - #define NVC0_NEW_BLEND (1 << 0) #define NVC0_NEW_RASTERIZER (1 << 1) #define NVC0_NEW_ZSA (1 << 2) diff --git a/src/gallium/drivers/nvc0/nvc0_formats.c b/src/gallium/drivers/nvc0/nvc0_formats.c index 454c7440631..678e9b563ee 100644 --- a/src/gallium/drivers/nvc0/nvc0_formats.c +++ b/src/gallium/drivers/nvc0/nvc0_formats.c @@ -145,24 +145,64 @@ const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = [PIPE_FORMAT_L8_SRGB] = { NV50_SURFACE_FORMAT_R8_UNORM, A_(C0, C0, C0, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 8, 0), - SAMPLER_VIEW }, + SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_I8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM, - A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0), + [PIPE_FORMAT_L16_SNORM] = { NV50_SURFACE_FORMAT_R16_SNORM, + B_(C0, C0, C0, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 16, 0), SAMPLER_VIEW | RENDER_TARGET }, - [PIPE_FORMAT_I16_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM, - A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 16, 0), + [PIPE_FORMAT_L16_FLOAT] = { NV50_SURFACE_FORMAT_R16_FLOAT, + B_(C0, C0, C0, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0), SAMPLER_VIEW | RENDER_TARGET }, + [PIPE_FORMAT_L32_FLOAT] = { NV50_SURFACE_FORMAT_R32_FLOAT, + B_(C0, C0, C0, ONE_FLOAT, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0), + SAMPLER_VIEW | RENDER_TARGET }, + + [PIPE_FORMAT_I8_UNORM] = { 0, + A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_I16_UNORM] = { 0, + A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_I16_SNORM] = { 0, + B_(C0, C0, C0, C0, SNORM, SNORM, SNORM, SNORM, 16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_I16_FLOAT] = { 0, + B_(C0, C0, C0, C0, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_I32_FLOAT] = { 0, + B_(C0, C0, C0, C0, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0), + SAMPLER_VIEW }, + [PIPE_FORMAT_A8_UNORM] = { NV50_SURFACE_FORMAT_A8_UNORM, A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 8, 0), SAMPLER_VIEW | RENDER_TARGET }, + [PIPE_FORMAT_A8_SNORM] = { 0, + A_(ZERO, ZERO, ZERO, C0, SNORM, SNORM, SNORM, SNORM, 8, 0), + SAMPLER_VIEW }, + [PIPE_FORMAT_A16_UNORM] = { 0, A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 16, 0), SAMPLER_VIEW }, + [PIPE_FORMAT_A16_SNORM] = { 0, + A_(ZERO, ZERO, ZERO, C0, SNORM, SNORM, SNORM, SNORM, 16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_A16_FLOAT] = { 0, + B_(ZERO, ZERO, ZERO, C0, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_A32_FLOAT] = { 0, + B_(ZERO, ZERO, ZERO, C0, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0), + SAMPLER_VIEW }, + [PIPE_FORMAT_L8A8_UNORM] = { 0, A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0), SAMPLER_VIEW }, @@ -175,6 +215,18 @@ const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 16_16, 0), SAMPLER_VIEW }, + [PIPE_FORMAT_L16A16_SNORM] = { 0, + A_(C0, C0, C0, C1, SNORM, SNORM, SNORM, SNORM, 16_16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_L16A16_FLOAT] = { 0, + B_(C0, C0, C0, C1, FLOAT, FLOAT, FLOAT, FLOAT, 16_16, 0), + SAMPLER_VIEW }, + + [PIPE_FORMAT_L32A32_FLOAT] = { 0, + B_(C0, C0, C0, C1, FLOAT, FLOAT, FLOAT, FLOAT, 32_32, 0), + SAMPLER_VIEW }, + [PIPE_FORMAT_L4A4_UNORM] = { 0, B_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 4_4, 0), SAMPLER_VIEW }, @@ -322,7 +374,7 @@ const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = VERTEX_BUFFER | SAMPLER_VIEW }, [PIPE_FORMAT_R16G16_SNORM] = { NV50_SURFACE_FORMAT_R16G16_SNORM, - A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16, 0), + A_(C0, C1, ZERO, ONE_FLOAT, SNORM, SNORM, SNORM, SNORM, 16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, [PIPE_FORMAT_R16_SNORM] = { NV50_SURFACE_FORMAT_R16_SNORM, @@ -340,7 +392,7 @@ const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = VERTEX_BUFFER | SAMPLER_VIEW }, [PIPE_FORMAT_R16G16_UNORM] = { NV50_SURFACE_FORMAT_R16G16_UNORM, - A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16, 0), + A_(C0, C1, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, 16_16, 0), VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET }, [PIPE_FORMAT_R16_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM, @@ -520,4 +572,8 @@ const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = [PIPE_FORMAT_R1_UNORM] = { 0, B_(C0, ZERO, ZERO, ONE_FLOAT, UNORM, UNORM, UNORM, UNORM, BITMAP_8X8, 0), SAMPLER_VIEW }, + + [PIPE_FORMAT_A8B8G8R8_UNORM] = { 0, + B_(C3, C2, C1, C0, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0), + SAMPLER_VIEW }, }; diff --git a/src/gallium/drivers/nvc0/nvc0_graph_macros.h b/src/gallium/drivers/nvc0/nvc0_graph_macros.h index b7d0d3eafa4..a0a875fe627 100644 --- a/src/gallium/drivers/nvc0/nvc0_graph_macros.h +++ b/src/gallium/drivers/nvc0/nvc0_graph_macros.h @@ -68,19 +68,6 @@ static const uint32_t nvc0_9097_vertex_array_select[] = 0x00001841, /* 0x0b: send $r3 */ }; -static const uint32_t nvc0_9097_color_mask_brdc[] = -{ - 0x05a00021, /* maddr [NVC0_3D_COLOR_MASK(0), increment = 4] */ - 0x00000841, /* send $r1 */ - 0x00000841, /* send $r1 */ - 0x00000841, /* send $r1 */ - 0x00000841, /* send $r1 */ - 0x00000841, /* send $r1 */ - 0x00000841, /* send $r1 */ - 0x000008c1, /* exit send $r1 */ - 0x00000841, /* send $r1 */ -}; - /* * [GL_POLYGON_MODE_FRONT] = arg; * diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c index bd85a7f1ffd..2c3b8555f32 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc.c @@ -20,8 +20,6 @@ * SOFTWARE. */ -#define NOUVEAU_DEBUG 1 - #include "nvc0_pc.h" #include "nvc0_program.h" @@ -262,7 +260,7 @@ nvc0_print_program(struct nv_pc *pc) nvc0_print_function(pc->root[i]); } -#if NOUVEAU_DEBUG > 1 +#if NV50_DEBUG & NV50_DEBUG_PROG_CFLOW static void nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b) { @@ -327,7 +325,7 @@ nvc0_pc_print_binary(struct nv_pc *pc) { unsigned i; - NOUVEAU_DBG("nvc0_pc_print_binary(%u ops)\n", pc->emit_size / 8); + NV50_DBGMSG(SHADER, "nvc0_pc_print_binary(%u ops)\n", pc->emit_size / 8); for (i = 0; i < pc->emit_size / 4; i += 2) { debug_printf("0x%08x ", pc->emit[i + 0]); @@ -344,7 +342,7 @@ nvc0_emit_program(struct nv_pc *pc) uint32_t *code = pc->emit; int n; - NOUVEAU_DBG("emitting program: size = %u\n", pc->emit_size); + NV50_DBGMSG(SHADER, "emitting program: size = %u\n", pc->emit_size); pc->emit_pos = 0; for (n = 0; n < pc->num_blocks; ++n) { @@ -365,11 +363,10 @@ nvc0_emit_program(struct nv_pc *pc) pc->emit = code; -#ifdef NOUVEAU_DEBUG +#if NV50_DEBUG & NV50_DEBUG_SHADER nvc0_pc_print_binary(pc); -#else - debug_printf("not printing binary\n"); #endif + return 0; } @@ -396,7 +393,7 @@ nvc0_generate_code(struct nvc0_translation_info *ti) ret = nvc0_tgsi_to_nc(pc, ti); if (ret) goto out; -#if NOUVEAU_DEBUG > 1 +#if NV50_DEBUG & NV50_DEBUG_PROG_IR nvc0_print_program(pc); #endif @@ -406,7 +403,7 @@ nvc0_generate_code(struct nvc0_translation_info *ti) ret = nvc0_pc_exec_pass0(pc); if (ret) goto out; -#ifdef NOUVEAU_DEBUG +#if NV50_DEBUG & NV50_DEBUG_PROG_IR nvc0_print_program(pc); #endif @@ -414,7 +411,7 @@ nvc0_generate_code(struct nvc0_translation_info *ti) ret = nvc0_pc_exec_pass1(pc); if (ret) goto out; -#if NOUVEAU_DEBUG > 1 +#if NV50_DEBUG & NV50_DEBUG_PROG_CFLOW nvc0_print_program(pc); nv_print_cfgraph(pc, "nvc0_shader_cfgraph.dot", 0); #endif @@ -444,7 +441,7 @@ nvc0_generate_code(struct nvc0_translation_info *ti) ti->prog->relocs = pc->reloc_entries; ti->prog->num_relocs = pc->num_relocs; - NOUVEAU_DBG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success"); + NV50_DBGMSG(SHADER, "SHADER TRANSLATION - %s\n", ret ? "failed" : "success"); out: nv_pc_free_refs(pc); @@ -573,7 +570,7 @@ nvc0_insn_delete(struct nv_instruction *nvi) if (nvi == b->phi) { if (nvi->opcode != NV_OP_PHI) - NOUVEAU_DBG("NOTE: b->phi points to non-PHI instruction\n"); + NV50_DBGMSG(PROG_IR, "NOTE: b->phi points to non-PHI instruction\n"); assert(!nvi->prev); if (!nvi->next || nvi->next->opcode != NV_OP_PHI) diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h index 3a5612a5fac..441692d766c 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.h +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -23,20 +23,7 @@ #ifndef __NVC0_COMPILER_H__ #define __NVC0_COMPILER_H__ -#include <stdio.h> - -#ifndef NOUVEAU_DBG -#ifdef NOUVEAU_DEBUG -# define NOUVEAU_DBG(args...) debug_printf(args); -#else -# define NOUVEAU_DBG(args...) -#endif -#endif - -#ifndef NOUVEAU_ERR -#define NOUVEAU_ERR(fmt, args...) \ - fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args); -#endif +#include "nv50/nv50_debug.h" #include "pipe/p_defines.h" #include "util/u_inlines.h" diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c index 76ad40dbcf8..e35653280a1 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -715,6 +715,10 @@ emit_interp(struct nv_pc *pc, struct nv_instruction *i) if (i->opcode == NV_OP_PINTERP) { pc->emit[0] |= 0x040; SID(pc, i->src[1], 26); + + if (i->src[0]->value->reg.address >= 0x280 && + i->src[0]->value->reg.address <= 0x29c) + pc->emit[0] |= 0x080; /* XXX: ? */ } else { SID(pc, NULL, 26); } @@ -875,7 +879,9 @@ emit_st(struct nv_pc *pc, struct nv_instruction *i) void nvc0_emit_instruction(struct nv_pc *pc, struct nv_instruction *i) { +#if NV50_DEBUG & NV50_DEBUG_SHADER debug_printf("EMIT: "); nvc0_print_instruction(i); +#endif switch (i->opcode) { case NV_OP_VFETCH: diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index f7bf1680d09..7f5fbaff690 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -115,7 +115,7 @@ inst_is_noop(struct nv_instruction *nvi) return FALSE; if (nvi->src[0]->value->join->reg.id < 0) { - NOUVEAU_DBG("inst_is_noop: orphaned value detected\n"); + NV50_DBGMSG(PROG_IR, "inst_is_noop: orphaned value detected\n"); return TRUE; } @@ -178,7 +178,7 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) } pc->emit_size += b->emit_size; -#ifdef NOUVEAU_DEBUG +#if NV50_DEBUG & NV50_DEBUG_PROG_IR if (!b->entry) debug_printf("BB:%i is now empty\n", b->id); else @@ -206,7 +206,7 @@ nvc0_pc_exec_pass2(struct nv_pc *pc) { int i, ret; - NOUVEAU_DBG("preparing %u blocks for emission\n", pc->num_blocks); + NV50_DBGMSG(PROG_IR, "preparing %u blocks for emission\n", pc->num_blocks); pc->num_blocks = 0; /* will reorder bb_list */ diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c index f4afe083e2d..15bebb2134a 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c @@ -20,11 +20,11 @@ * SOFTWARE. */ -#define NOUVEAU_DEBUG 1 - -/* #define NVC0_RA_DEBUG_LIVEI */ -/* #define NVC0_RA_DEBUG_LIVE_SETS */ -/* #define NVC0_RA_DEBUG_JOIN */ +#if NV50_DEBUG & NV50_DEBUG_PROG_RA +# define NVC0_RA_DEBUG_LIVEI +# define NVC0_RA_DEBUG_LIVE_SETS +# define NVC0_RA_DEBUG_JOIN +#endif #include "nvc0_pc.h" #include "util/u_simple_list.h" @@ -504,7 +504,9 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) } if (pn != p && pn->exit) { - ctx->pc->current_block = b->in[n ? 0 : 1]; + assert(!b->in[!n]->exit || b->in[!n]->exit->terminator); + /* insert terminator (branch to ENDIF) in new else block */ + ctx->pc->current_block = pn; ni = new_instruction(ctx->pc, NV_OP_BRA); ni->target = b; ni->terminator = 1; @@ -965,7 +967,7 @@ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) struct nv_pc_pass *ctx; int i, ret; - NOUVEAU_DBG("REGISTER ALLOCATION - entering\n"); + NV50_DBGMSG(PROG_RA, "REGISTER ALLOCATION - entering\n"); ctx = CALLOC_STRUCT(nv_pc_pass); if (!ctx) @@ -1031,7 +1033,7 @@ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) for (i = 0; i < pc->num_values; ++i) livei_release(&pc->values[i]); - NOUVEAU_DBG("REGISTER ALLOCATION - leaving\n"); + NV50_DBGMSG(PROG_RA, "REGISTER ALLOCATION - leaving\n"); out: FREE(ctx->insns); diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index 3c59213176e..bcee027917e 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -23,8 +23,6 @@ #include "pipe/p_shader_tokens.h" #include "pipe/p_defines.h" -#define NOUVEAU_DEBUG - #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_dump.h" @@ -225,6 +223,9 @@ nvc0_interp_mode(const struct tgsi_full_declaration *decl) if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE) mode = NVC0_INTERP_PERSPECTIVE; else + if (decl->Declaration.Semantic && decl->Semantic.Name == TGSI_SEMANTIC_COLOR) + mode = NVC0_INTERP_PERSPECTIVE; + else mode = NVC0_INTERP_LINEAR; if (decl->Declaration.Centroid) @@ -574,7 +575,7 @@ nvc0_prog_scan(struct nvc0_translation_info *ti) int ret; unsigned i; -#ifdef NOUVEAU_DEBUG +#if NV50_DEBUG & NV50_DEBUG_SHADER tgsi_dump(prog->pipe.tokens, 0); #endif @@ -691,12 +692,12 @@ nvc0_program_translate(struct nvc0_program *prog) if (ret) NOUVEAU_ERR("shader translation failed\n"); - { - unsigned i; - for (i = 0; i < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++i) - debug_printf("HDR[%02lx] = 0x%08x\n", - i * sizeof(prog->hdr[0]), prog->hdr[i]); - } +#if NV50_DEBUG & NV50_DEBUG_SHADER + unsigned i; + for (i = 0; i < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++i) + debug_printf("HDR[%02lx] = 0x%08x\n", + i * sizeof(prog->hdr[0]), prog->hdr[i]); +#endif out: if (ti->immd32) diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 1047ba3c337..ca0691d2aee 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -20,6 +20,7 @@ * SOFTWARE. */ +#include "util/u_format.h" #include "util/u_format_s3tc.h" #include "pipe/p_screen.h" @@ -39,17 +40,8 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen, if (sample_count > 1) return FALSE; - if (!util_format_s3tc_enabled) { - switch (format) { - case PIPE_FORMAT_DXT1_RGB: - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - return FALSE; - default: - break; - } - } + if (!util_format_is_supported(format, bindings)) + return FALSE; /* transfers & shared are always supported */ bindings &= ~(PIPE_BIND_TRANSFER_READ | @@ -93,6 +85,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 1; case PIPE_CAP_MAX_RENDER_TARGETS: return 8; + case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL: + return 1; case PIPE_CAP_TIMER_QUERY: case PIPE_CAP_OCCLUSION_QUERY: return 1; @@ -113,6 +107,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_PRIMITIVE_RESTART: case PIPE_CAP_TGSI_INSTANCEID: case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: return 1; default: NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); @@ -291,8 +286,6 @@ nvc0_magic_3d_init(struct nouveau_channel *chan) OUT_RING (chan, 1 << 12); BEGIN_RING(chan, RING_3D_(0x151c), 1); OUT_RING (chan, 1); - BEGIN_RING(chan, RING_3D_(0x020c), 1); - OUT_RING (chan, 1); BEGIN_RING(chan, RING_3D_(0x030c), 1); OUT_RING (chan, 0); BEGIN_RING(chan, RING_3D_(0x0300), 1); @@ -309,11 +302,6 @@ nvc0_magic_3d_init(struct nouveau_channel *chan) OUT_RING (chan, 1); BEGIN_RING(chan, RING_3D_(0x075c), 1); OUT_RING (chan, 3); - - BEGIN_RING(chan, RING_3D_(0x0fac), 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, RING_3D_(0x0f90), 1); - OUT_RING (chan, 0); } static void @@ -445,6 +433,14 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) OUT_RING (chan, NVC0_3D_MULTISAMPLE_MODE_1X); BEGIN_RING(chan, RING_3D(MULTISAMPLE_CTRL), 1); OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(LINE_WIDTH_SEPARATE), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D(LINE_LAST_PIXEL), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(BLEND_SEPARATE_ALPHA), 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D(BLEND_ENABLE_COMMON), 1); + OUT_RING (chan, 0); nvc0_magic_3d_init(chan); @@ -452,7 +448,10 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) if (ret) goto fail; - nouveau_resource_init(&screen->text_heap, 0, 1 << 20); + /* XXX: getting a page fault at the end of the code buffer every few + * launches, don't use the last 256 bytes to work around them - prefetch ? + */ + nouveau_resource_init(&screen->text_heap, 0, (1 << 20) - 0x100); ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 6 << 16, &screen->uniforms); @@ -557,17 +556,6 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) OUT_RING (chan, 8192 << 16); OUT_RING (chan, 8192 << 16); - BEGIN_RING(chan, RING_3D_(0x0fac), 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, RING_3D_(0x3484), 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, RING_3D_(0x0dbc), 1); - OUT_RING (chan, 0x00010000); - BEGIN_RING(chan, RING_3D_(0x0dd8), 1); - OUT_RING (chan, 0xff800006); - BEGIN_RING(chan, RING_3D_(0x3488), 1); - OUT_RING (chan, 0); - #define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n); i = 0; @@ -577,10 +565,11 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) MK_MACRO(NVC0_3D_GP_SELECT, nvc0_9097_gp_select); MK_MACRO(NVC0_3D_POLYGON_MODE_FRONT, nvc0_9097_poly_mode_front); MK_MACRO(NVC0_3D_POLYGON_MODE_BACK, nvc0_9097_poly_mode_back); - MK_MACRO(NVC0_3D_COLOR_MASK_BROADCAST, nvc0_9097_color_mask_brdc); BEGIN_RING(chan, RING_3D(RASTERIZE_ENABLE), 1); OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D(RT_SEPARATE_FRAG_DATA), 1); + OUT_RING (chan, 1); BEGIN_RING(chan, RING_3D(GP_SELECT), 1); OUT_RING (chan, 0x40); BEGIN_RING(chan, RING_3D(LAYER), 1); @@ -599,8 +588,6 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) BEGIN_RING(chan, RING_3D(POINT_RASTER_RULES), 1); OUT_RING (chan, NVC0_3D_POINT_RASTER_RULES_OGL); - BEGIN_RING(chan, RING_3D(FRAG_COLOR_CLAMP_EN), 1); - OUT_RING (chan, 0x11111111); BEGIN_RING(chan, RING_3D(EDGEFLAG_ENABLE), 1); OUT_RING (chan, 1); diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c index 7294eaa222e..287160e0b2a 100644 --- a/src/gallium/drivers/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c @@ -97,8 +97,6 @@ nvc0_vertprog_validate(struct nvc0_context *nvc0) // BEGIN_RING(chan, RING_3D_(0x163c), 1); // OUT_RING (chan, 0); - BEGIN_RING(chan, RING_3D(VERT_COLOR_CLAMP_EN), 1); - OUT_RING (chan, 1); } void diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index ab68abcfb5a..b0b2065167e 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -93,9 +93,18 @@ nvc0_blend_state_create(struct pipe_context *pipe, SB_IMMED_3D(so, BLEND_INDEPENDENT, cso->independent_blend_enable); + if (!cso->logicop_enable) + SB_IMMED_3D(so, LOGIC_OP_ENABLE, 0); + + if (cso->logicop_enable) { + SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 2); + SB_DATA (so, 1); + SB_DATA (so, nvgl_logicop_func(cso->logicop_func)); + + SB_IMMED_3D(so, BLEND_ENABLES, 0); + } else if (!cso->independent_blend_enable) { - SB_BEGIN_3D(so, BLEND_ENABLES, 1); - SB_DATA (so, cso->rt[0].blend_enable ? 0xff : 0); + SB_IMMED_3D(so, BLEND_ENABLES, cso->rt[0].blend_enable ? 0xff : 0); if (cso->rt[0].blend_enable) { SB_BEGIN_3D(so, BLEND_EQUATION_RGB, 5); @@ -108,7 +117,8 @@ nvc0_blend_state_create(struct pipe_context *pipe, SB_DATA (so, nvc0_blend_fac(cso->rt[0].alpha_dst_factor)); } - SB_BEGIN_3D(so, COLOR_MASK_BROADCAST, 1); + SB_IMMED_3D(so, COLOR_MASK_COMMON, 1); + SB_BEGIN_3D(so, COLOR_MASK(0), 1); SB_DATA (so, nvc0_colormask(cso->rt[0].colormask)); } else { uint8_t en = 0; @@ -126,23 +136,15 @@ nvc0_blend_state_create(struct pipe_context *pipe, SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_src_factor)); SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_dst_factor)); } - SB_BEGIN_3D(so, BLEND_ENABLES, 1); - SB_DATA (so, en); + SB_IMMED_3D(so, BLEND_ENABLES, en); + SB_IMMED_3D(so, COLOR_MASK_COMMON, 0); SB_BEGIN_3D(so, COLOR_MASK(0), 8); for (i = 0; i < 8; ++i) SB_DATA(so, nvc0_colormask(cso->rt[i].colormask)); } - if (cso->logicop_enable) { - SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 2); - SB_DATA (so, 1); - SB_DATA (so, nvgl_logicop_func(cso->logicop_func)); - } else { - SB_IMMED_3D(so, LOGIC_OP_ENABLE, 0); - } - - assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); + assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); return so; } @@ -161,6 +163,7 @@ nvc0_blend_state_delete(struct pipe_context *pipe, void *hwcso) FREE(hwcso); } +/* NOTE: ignoring line_last_pixel, using FALSE (set on screen init) */ static void * nvc0_rasterizer_state_create(struct pipe_context *pipe, const struct pipe_rasterizer_state *cso) @@ -183,19 +186,23 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe, SB_IMMED_3D(so, PROVOKING_VERTEX_LAST, !cso->flatshade_first); SB_IMMED_3D(so, VERTEX_TWO_SIDE_ENABLE, cso->light_twoside); - SB_BEGIN_3D(so, LINE_WIDTH, 1); - SB_DATA (so, fui(cso->line_width)); + SB_IMMED_3D(so, VERT_COLOR_CLAMP_EN, cso->clamp_vertex_color); + SB_BEGIN_3D(so, FRAG_COLOR_CLAMP_EN, 1); + SB_DATA (so, cso->clamp_fragment_color ? 0x11111111 : 0x00000000); + SB_IMMED_3D(so, LINE_SMOOTH_ENABLE, cso->line_smooth); + if (cso->line_smooth) + SB_BEGIN_3D(so, LINE_WIDTH_SMOOTH, 1); + else + SB_BEGIN_3D(so, LINE_WIDTH_ALIASED, 1); + SB_DATA (so, fui(cso->line_width)); - SB_BEGIN_3D(so, LINE_STIPPLE_ENABLE, 1); + SB_IMMED_3D(so, LINE_STIPPLE_ENABLE, cso->line_stipple_enable); if (cso->line_stipple_enable) { - SB_DATA (so, 1); SB_BEGIN_3D(so, LINE_STIPPLE_PATTERN, 1); SB_DATA (so, (cso->line_stipple_pattern << 8) | cso->line_stipple_factor); - } else { - SB_DATA (so, 0); } SB_IMMED_3D(so, VP_POINT_SIZE_EN, cso->point_size_per_vertex); @@ -249,7 +256,7 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe, SB_DATA (so, fui(cso->offset_units * 2.0f)); } - assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); + assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); return (void *)so; } @@ -320,7 +327,7 @@ nvc0_zsa_state_create(struct pipe_context *pipe, SB_DATA (so, nvgl_comparison_op(cso->alpha.func)); } - assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); + assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); return (void *)so; } diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c index bb81480bab9..9b2a28150b1 100644 --- a/src/gallium/drivers/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -1,6 +1,7 @@ +#include "util/u_math.h" + #include "nvc0_context.h" -#include "os/os_time.h" static void nvc0_validate_zcull(struct nvc0_context *nvc0) @@ -156,11 +157,10 @@ static void nvc0_validate_stencil_ref(struct nvc0_context *nvc0) { struct nouveau_channel *chan = nvc0->screen->base.channel; + const ubyte *ref = &nvc0->stencil_ref.ref_value[0]; - BEGIN_RING(chan, RING_3D(STENCIL_FRONT_FUNC_REF), 1); - OUT_RING (chan, nvc0->stencil_ref.ref_value[0]); - BEGIN_RING(chan, RING_3D(STENCIL_BACK_FUNC_REF), 1); - OUT_RING (chan, nvc0->stencil_ref.ref_value[1]); + IMMED_RING(chan, RING_3D(STENCIL_FRONT_FUNC_REF), ref[0]); + IMMED_RING(chan, RING_3D(STENCIL_BACK_FUNC_REF), ref[1]); } static void @@ -214,10 +214,11 @@ nvc0_validate_viewport(struct nvc0_context *nvc0) /* now set the viewport rectangle to viewport dimensions for clipping */ - x = (int)(vp->translate[0] - fabsf(vp->scale[0])); - y = (int)(vp->translate[1] - fabsf(vp->scale[1])); - w = (int)fabsf(2.0f * vp->scale[0]); - h = (int)fabsf(2.0f * vp->scale[1]); + x = util_iround(MAX2(0.0f, vp->translate[0] - fabsf(vp->scale[0]))); + y = util_iround(MAX2(0.0f, vp->translate[1] - fabsf(vp->scale[1]))); + w = util_iround(vp->translate[0] + fabsf(vp->scale[0])) - x; + h = util_iround(vp->translate[1] + fabsf(vp->scale[1])) - y; + zmin = vp->translate[2] - fabsf(vp->scale[2]); zmax = vp->translate[2] + fabsf(vp->scale[2]); diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h index 8222f9375ee..e0fe9df25d7 100644 --- a/src/gallium/drivers/nvc0/nvc0_stateobj.h +++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h @@ -19,19 +19,19 @@ struct nvc0_blend_stateobj { struct pipe_blend_state pipe; int size; - uint32_t state[72]; + uint32_t state[70]; }; struct nvc0_rasterizer_stateobj { struct pipe_rasterizer_state pipe; int size; - uint32_t state[36]; + uint32_t state[38]; }; struct nvc0_zsa_stateobj { struct pipe_depth_stencil_alpha_state pipe; int size; - uint32_t state[29]; + uint32_t state[26]; }; struct nvc0_vertex_element { diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c index fc5f45ea25d..6be3702bddc 100644 --- a/src/gallium/drivers/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nvc0/nvc0_surface.c @@ -27,6 +27,7 @@ #include "util/u_inlines.h" #include "util/u_pack_color.h" #include "util/u_format.h" +#include "util/u_surface.h" #include "nvc0_context.h" #include "nvc0_resource.h" @@ -234,6 +235,13 @@ nvc0_resource_copy_region(struct pipe_context *pipe, int ret; unsigned dst_layer = dstz, src_layer = src_box->z; + /* Fallback for buffers. */ + if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { + util_resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); + return; + } + nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; if (src->format == dst->format) { diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index a44d330c731..d57be916696 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -22,8 +22,6 @@ #include <unistd.h> -#define NOUVEAU_DEBUG 1 - #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" @@ -200,7 +198,7 @@ static INLINE void bld_warn_uninitialized(struct bld_context *bld, int kind, struct bld_register *reg, struct nv_basic_block *b) { -#ifdef NOUVEAU_DEBUG +#if NV50_DEBUG & NV50_DEBUG_SHADER long i = (reg - &bld->tvs[0][0]) / 4; long c = (reg - &bld->tvs[0][0]) & 3; @@ -259,6 +257,12 @@ fetch_by_bb(struct bld_register *reg, fetch_by_bb(reg, vals, n, b->in[i]); } +static INLINE boolean +nvc0_bblock_is_terminated(struct nv_basic_block *bb) +{ + return bb->exit && bb->exit->terminator; +} + static INLINE struct nv_value * bld_load_imm_u32(struct bld_context *bld, uint32_t u); @@ -1465,7 +1469,7 @@ bld_instruction(struct bld_context *bld, uint opcode = translate_opcode(insn->Instruction.Opcode); uint8_t mask = insn->Dst[0].Register.WriteMask; -#ifdef NOUVEAU_DEBUG +#if NV50_DEBUG & NV50_DEBUG_PROG_IR debug_printf("bld_instruction:"); tgsi_dump_instruction(insn, 1); #endif @@ -1637,8 +1641,7 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *b = new_basic_block(bld->pc); - if (bld->pc->current_block->exit && - !bld->pc->current_block->exit->terminator) + if (!nvc0_bblock_is_terminated(bld->pc->current_block)) bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, b, FALSE); --bld->cond_lvl; diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c index 2bcb93d93e3..2b1510264a1 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.c +++ b/src/gallium/drivers/nvfx/nvfx_context.c @@ -13,7 +13,7 @@ nvfx_flush(struct pipe_context *pipe, struct nvfx_context *nvfx = nvfx_context(pipe); struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; + /*struct nouveau_grobj *eng3d = screen->eng3d;*/ /* XXX: we need to actually be intelligent here */ /* XXX This flag wasn't set by the state tracker anyway. */ diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index 8742f60c163..4a97dfb9c25 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -1,5 +1,6 @@ #include "pipe/p_screen.h" #include "pipe/p_state.h" +#include "util/u_format.h" #include "util/u_format_s3tc.h" #include "util/u_simple_screen.h" @@ -82,6 +83,8 @@ nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 0; // TODO: implement primitive restart case PIPE_CAP_SHADER_STENCIL_EXPORT: return 0; + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + return 0; default: NOUVEAU_ERR("Warning: unknown PIPE_CAP %d\n", param); return 0; @@ -207,6 +210,9 @@ nvfx_screen_is_format_supported(struct pipe_screen *pscreen, { struct nvfx_screen *screen = nvfx_screen(pscreen); + if (!util_format_is_supported(format, bind)) + return FALSE; + if (sample_count > 1) return FALSE; diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c index be31853d717..ced26494e15 100644 --- a/src/gallium/drivers/nvfx/nvfx_surface.c +++ b/src/gallium/drivers/nvfx/nvfx_surface.c @@ -33,6 +33,7 @@ #include "util/u_memory.h" #include "util/u_pack_color.h" #include "util/u_blitter.h" +#include "util/u_surface.h" #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_screen.h" @@ -252,6 +253,13 @@ nvfx_resource_copy_region(struct pipe_context *pipe, if(!w || !h) return; + /* Fallback for buffers. */ + if (dstr->target == PIPE_BUFFER && srcr->target == PIPE_BUFFER) { + util_resource_copy_region(pipe, dstr, dst_level, dstx, dsty, dstz, + srcr, src_level, src_box); + return; + } + if(copy_threshold < 0) copy_threshold = debug_get_num_option("NOUVEAU_COPY_THRESHOLD", 4); diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index 66d900ebb5f..c9401b99f1c 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -29,8 +29,7 @@ C_SOURCES = \ r300_transfer.c LIBRARY_INCLUDES = \ - -I$(TOP)/src/mesa/drivers/dri/r300/compiler \ - -I$(TOP)/src/gallium/winsys/drm/radeon/core + -I$(TOP)/src/mesa/drivers/dri/r300/compiler COMPILER_ARCHIVE = $(TOP)/src/mesa/drivers/dri/r300/compiler/libr300compiler.a diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index b49db937994..3af157a7956 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -6,7 +6,6 @@ env = env.Clone() # add the paths for r300compiler env.Append(CPPPATH = [ '#/src/mesa/drivers/dri/r300/compiler', - '#/src/gallium/winsys/drm/radeon/core', '#/include', '#/src/mesa', ]) diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 37b635fd120..1217488bac7 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -23,10 +23,10 @@ #include "r300_context.h" #include "r300_emit.h" #include "r300_texture.h" -#include "r300_winsys.h" #include "util/u_format.h" #include "util/u_pack_color.h" +#include "util/u_surface.h" enum r300_blitter_op /* bitmask */ { @@ -206,7 +206,7 @@ static void r300_clear(struct pipe_context* pipe, (struct r300_hyperz_state*)r300->hyperz_state.state; uint32_t width = fb->width; uint32_t height = fb->height; - boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); + boolean can_hyperz = r300->rws->get_value(r300->rws, RADEON_VID_CAN_HYPERZ); uint32_t hyperz_dcv = hyperz->zb_depthclearvalue; /* Enable fast Z clear. @@ -253,17 +253,15 @@ static void r300_clear(struct pipe_context* pipe, } else if (r300->zmask_clear.dirty || r300->hiz_clear.dirty) { /* Just clear zmask and hiz now, this does not use the standard draw * procedure. */ - unsigned dwords; - /* Calculate zmask_clear and hiz_clear atom sizes. */ - r300_update_hyperz_state(r300); - dwords = (r300->zmask_clear.dirty ? r300->zmask_clear.size : 0) + - (r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) + - r300_get_num_cs_end_dwords(r300); + unsigned dwords = + (r300->zmask_clear.dirty ? r300->zmask_clear.size : 0) + + (r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) + + r300_get_num_cs_end_dwords(r300); /* Reserve CS space. */ - if (dwords > (R300_MAX_CMDBUF_DWORDS - r300->cs->cdw)) { - r300_flush(&r300->context, R300_FLUSH_ASYNC, NULL); + if (dwords > (RADEON_MAX_CMDBUF_DWORDS - r300->cs->cdw)) { + r300_flush(&r300->context, RADEON_FLUSH_ASYNC, NULL); } /* Emit clear packets. */ @@ -306,16 +304,10 @@ static void r300_clear_render_target(struct pipe_context *pipe, { struct r300_context *r300 = r300_context(pipe); - r300->hyperz_locked = TRUE; - r300_mark_atom_dirty(r300, &r300->hyperz_state); - r300_blitter_begin(r300, R300_CLEAR_SURFACE); util_blitter_clear_render_target(r300->blitter, dst, rgba, dstx, dsty, width, height); r300_blitter_end(r300); - - r300->hyperz_locked = FALSE; - r300_mark_atom_dirty(r300, &r300->hyperz_state); } /* Clear a region of a depth stencil surface. */ @@ -334,21 +326,14 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, if (r300->zmask_in_use && !r300->hyperz_locked) { if (fb->zsbuf->texture == dst->texture) { r300_decompress_zmask(r300); - } else { - r300->hyperz_locked = TRUE; - r300_mark_atom_dirty(r300, &r300->hyperz_state); } } + /* XXX Do not decompress ZMask of the currently-set zbuffer. */ r300_blitter_begin(r300, R300_CLEAR_SURFACE); util_blitter_clear_depth_stencil(r300->blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height); r300_blitter_end(r300); - - if (r300->hyperz_locked) { - r300->hyperz_locked = FALSE; - r300_mark_atom_dirty(r300, &r300->hyperz_state); - } } void r300_decompress_zmask(struct r300_context *r300) @@ -431,13 +416,17 @@ static void r300_resource_copy_region(struct pipe_context *pipe, util_format_description(dst->format); struct pipe_box box; + /* Fallback for buffers. */ + if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { + util_resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); + return; + } + if (r300->zmask_in_use && !r300->hyperz_locked) { if (fb->zsbuf->texture == src || fb->zsbuf->texture == dst) { r300_decompress_zmask(r300); - } else { - r300->hyperz_locked = TRUE; - r300_mark_atom_dirty(r300, &r300->hyperz_state); } } @@ -513,11 +502,6 @@ static void r300_resource_copy_region(struct pipe_context *pipe, r300_resource_set_properties(pipe->screen, src, 0, &old_src); if (old_dst.format != new_dst.format) r300_resource_set_properties(pipe->screen, dst, 0, &old_dst); - - if (r300->hyperz_locked) { - r300->hyperz_locked = FALSE; - r300_mark_atom_dirty(r300, &r300->hyperz_state); - } } void r300_init_blit_functions(struct r300_context *r300) diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 990acea9f44..4949703120d 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -419,4 +419,5 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->is_rv350 = caps->family >= CHIP_FAMILY_RV350; caps->z_compress = caps->is_rv350 ? R300_ZCOMP_8X8 : R300_ZCOMP_4X4; caps->dxtc_swizzle = caps->is_r400 || caps->is_r500; + caps->has_us_format = caps->family == CHIP_FAMILY_R520; } diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 68943d561ba..d0050bed2e8 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -90,6 +90,8 @@ struct r300_capabilities { boolean high_second_pipe; /* DXTC texture swizzling. */ boolean dxtc_swizzle; + /* Whether R500_US_FORMAT0_0 exists (R520-only and depends on DRM). */ + boolean has_us_format; }; /* Enumerations for legibility and telling which card we're running on. */ diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 720d666d98c..15d1278c3bb 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -32,7 +32,6 @@ #include "r300_emit.h" #include "r300_screen.h" #include "r300_screen_buffer.h" -#include "r300_winsys.h" static void r300_update_num_contexts(struct r300_screen *r300screen, int diff) @@ -167,8 +166,8 @@ static boolean r300_setup_atoms(struct r300_context* r300) boolean is_rv350 = r300->screen->caps.is_rv350; boolean is_r500 = r300->screen->caps.is_r500; boolean has_tcl = r300->screen->caps.has_tcl; - boolean drm_2_6_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_6_0); - boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); + boolean drm_2_6_0 = r300->rws->get_value(r300->rws, RADEON_VID_DRM_2_6_0); + boolean can_hyperz = r300->rws->get_value(r300->rws, RADEON_VID_CAN_HYPERZ); boolean has_hiz_ram = r300->screen->caps.hiz_ram > 0; /* Create the actual atom list. @@ -379,7 +378,7 @@ static void r300_init_states(struct pipe_context *pipe) if (r300->screen->caps.is_r500 || (r300->screen->caps.is_rv350 && - r300->rws->get_value(r300->rws, R300_VID_DRM_2_6_0))) { + r300->rws->get_value(r300->rws, RADEON_VID_DRM_2_6_0))) { OUT_CB_REG(R300_GB_Z_PEQ_CONFIG, 0); } END_CB; @@ -391,7 +390,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, { struct r300_context* r300 = CALLOC_STRUCT(r300_context); struct r300_screen* r300screen = r300_screen(screen); - struct r300_winsys_screen *rws = r300screen->rws; + struct radeon_winsys *rws = r300screen->rws; if (!r300) return NULL; @@ -514,19 +513,19 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, "r300: DRM version: %d.%d.%d, Name: %s, ID: 0x%04x, GB: %d, Z: %d\n" "r300: GART size: %d MB, VRAM size: %d MB\n" "r300: AA compression: %s, Z compression: %s, HiZ: %s\n", - rws->get_value(rws, R300_VID_DRM_MAJOR), - rws->get_value(rws, R300_VID_DRM_MINOR), - rws->get_value(rws, R300_VID_DRM_PATCHLEVEL), + rws->get_value(rws, RADEON_VID_DRM_MAJOR), + rws->get_value(rws, RADEON_VID_DRM_MINOR), + rws->get_value(rws, RADEON_VID_DRM_PATCHLEVEL), screen->get_name(screen), - rws->get_value(rws, R300_VID_PCI_ID), - rws->get_value(rws, R300_VID_GB_PIPES), - rws->get_value(rws, R300_VID_Z_PIPES), - rws->get_value(rws, R300_VID_GART_SIZE) >> 20, - rws->get_value(rws, R300_VID_VRAM_SIZE) >> 20, - rws->get_value(rws, R300_CAN_AACOMPRESS) ? "YES" : "NO", - rws->get_value(rws, R300_CAN_HYPERZ) && + rws->get_value(rws, RADEON_VID_PCI_ID), + rws->get_value(rws, RADEON_VID_R300_GB_PIPES), + rws->get_value(rws, RADEON_VID_R300_Z_PIPES), + rws->get_value(rws, RADEON_VID_GART_SIZE) >> 20, + rws->get_value(rws, RADEON_VID_VRAM_SIZE) >> 20, + rws->get_value(rws, RADEON_VID_CAN_AACOMPRESS) ? "YES" : "NO", + rws->get_value(rws, RADEON_VID_CAN_HYPERZ) && r300->screen->caps.zmask_ram ? "YES" : "NO", - rws->get_value(rws, R300_CAN_HYPERZ) && + rws->get_value(rws, RADEON_VID_CAN_HYPERZ) && r300->screen->caps.hiz_ram ? "YES" : "NO"); } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index e395f41290e..8a0a54cf1e9 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -34,7 +34,7 @@ #include "r300_defines.h" #include "r300_screen.h" -#include "r300_winsys.h" +#include "../../winsys/radeon/drm/radeon_winsys.h" struct u_upload_mgr; struct r300_context; @@ -190,6 +190,7 @@ struct r300_texture_format_state { uint32_t format1; /* R300_TX_FORMAT1: 0x44c0 */ uint32_t format2; /* R300_TX_FORMAT2: 0x4500 */ uint32_t tile_config; /* R300_TX_OFFSET (subset thereof) */ + uint32_t us_format0; /* R500_US_FORMAT0_0: 0x4640 (through 15) */ }; struct r300_sampler_view { @@ -211,7 +212,7 @@ struct r300_texture_sampler_state { struct r300_texture_format_state format; uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */ uint32_t filter1; /* R300_TX_FILTER1: 0x4440 */ - uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */ + uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */ }; struct r300_textures_state { @@ -290,12 +291,12 @@ struct r300_query { boolean begin_emitted; /* The buffer where query results are stored. */ - struct r300_winsys_bo *buf; - struct r300_winsys_cs_handle *cs_buf; + struct pb_buffer *buf; + struct radeon_winsys_cs_handle *cs_buf; /* The size of the buffer. */ unsigned buffer_size; /* The domain of the buffer. */ - enum r300_buffer_domain domain; + enum radeon_bo_domain domain; /* Linked list members. */ struct r300_query* prev; @@ -306,10 +307,10 @@ struct r300_surface { struct pipe_surface base; /* Winsys buffer backing the texture. */ - struct r300_winsys_bo *buf; - struct r300_winsys_cs_handle *cs_buf; + struct pb_buffer *buf; + struct radeon_winsys_cs_handle *cs_buf; - enum r300_buffer_domain domain; + enum radeon_bo_domain domain; uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */ uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */ @@ -339,8 +340,8 @@ struct r300_texture_desc { /* Buffer tiling. * Macrotiling is specified per-level because small mipmaps cannot * be macrotiled. */ - enum r300_buffer_tiling microtile; - enum r300_buffer_tiling macrotile[R300_MAX_TEXTURE_LEVELS]; + enum radeon_bo_layout microtile; + enum radeon_bo_layout macrotile[R300_MAX_TEXTURE_LEVELS]; /* Offsets into the buffer. */ unsigned offset_in_bytes[R300_MAX_TEXTURE_LEVELS]; @@ -396,9 +397,9 @@ struct r300_resource struct u_vbuf_resource b; /* Winsys buffer backing this resource. */ - struct r300_winsys_bo *buf; - struct r300_winsys_cs_handle *cs_buf; - enum r300_buffer_domain domain; + struct pb_buffer *buf; + struct radeon_winsys_cs_handle *cs_buf; + enum radeon_bo_domain domain; unsigned buf_size; /* Constant buffers are in user memory. */ @@ -447,14 +448,21 @@ enum r300_hiz_func { HIZ_FUNC_MIN, }; +/* For deferred fragment shader state validation. */ +enum r300_fs_validity_status { + FRAGMENT_SHADER_VALID, /* No need to change/validate the FS. */ + FRAGMENT_SHADER_MAYBE_DIRTY,/* Validate the FS if external state was changed. */ + FRAGMENT_SHADER_DIRTY /* Always validate the FS (if the FS was changed) */ +}; + struct r300_context { /* Parent class */ struct pipe_context context; /* The interface to the windowing system, etc. */ - struct r300_winsys_screen *rws; + struct radeon_winsys *rws; /* The command stream. */ - struct r300_winsys_cs *cs; + struct radeon_winsys_cs *cs; /* Screen. */ struct r300_screen *screen; @@ -580,6 +588,8 @@ struct r300_context { int sprite_coord_enable; /* Whether two-sided color selection is enabled (AKA light_twoside). */ boolean two_sided_color; + /* Whether fragment color clamping is enabled. */ + boolean frag_clamp; /* Whether fast color clear is enabled. */ boolean cbzb_clear; /* Whether ZMASK is enabled. */ @@ -596,6 +606,10 @@ struct r300_context { enum r300_hiz_func hiz_func; /* HiZ clear value. */ uint32_t hiz_clear_value; + /* Whether fragment shader needs to be validated. */ + enum r300_fs_validity_status fs_status; + /* Framebuffer multi-write. */ + boolean fb_multiwrite; void *dsa_decompress_zmask; diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 2e52dfa43c6..c208e672f42 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -29,7 +29,6 @@ #include "r300_reg.h" #include "r300_context.h" -#include "r300_winsys.h" /* Yes, I know macros are ugly. However, they are much prettier than the code * that they neatly hide away, and don't have the cost of function setup,so @@ -40,14 +39,14 @@ */ #define CS_LOCALS(context) \ - struct r300_winsys_cs *cs_copy = (context)->cs; \ - struct r300_winsys_screen *cs_winsys = (context)->rws; \ + struct radeon_winsys_cs *cs_copy = (context)->cs; \ + struct radeon_winsys *cs_winsys = (context)->rws; \ int cs_count = 0; (void) cs_count; (void) cs_winsys; #ifdef DEBUG #define BEGIN_CS(size) do { \ - assert(size <= (R300_MAX_CMDBUF_DWORDS - cs_copy->cdw)); \ + assert(size <= (RADEON_MAX_CMDBUF_DWORDS - cs_copy->cdw)); \ cs_count = size; \ } while (0) diff --git a/src/gallium/drivers/r300/r300_defines.h b/src/gallium/drivers/r300/r300_defines.h index 2d111f9158d..30e9befad21 100644 --- a/src/gallium/drivers/r300/r300_defines.h +++ b/src/gallium/drivers/r300/r300_defines.h @@ -32,19 +32,4 @@ #define R300_INVALID_FORMAT 0xffff -/* Tiling flags. */ -enum r300_buffer_tiling { - R300_BUFFER_LINEAR = 0, - R300_BUFFER_TILED, - R300_BUFFER_SQUARETILED, - - R300_BUFFER_UNKNOWN, - R300_BUFFER_SELECT_LAYOUT = R300_BUFFER_UNKNOWN -}; - -enum r300_buffer_domain { /* bitfield */ - R300_DOMAIN_GTT = 2, - R300_DOMAIN_VRAM = 4 -}; - #endif diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index e17a907e77e..62435c5e2e2 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -375,7 +375,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state; struct r300_surface* surf; unsigned i; - boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); + boolean can_hyperz = r300->rws->get_value(r300->rws, RADEON_VID_CAN_HYPERZ); uint32_t rb3d_cctl = 0; CS_LOCALS(r300); @@ -387,8 +387,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) if (r300->screen->caps.is_r500) { rb3d_cctl = R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE; } - if (fb->nr_cbufs && - r300_fragment_shader_writes_all(r300_fs(r300))) { + if (fb->nr_cbufs && r300->fb_multiwrite) { rb3d_cctl |= R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs); } @@ -483,7 +482,7 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300, /* If we use the multiwrite feature, the colorbuffers 2,3,4 must be * marked as UNUSED in the US block. */ - if (r300_fragment_shader_writes_all(r300_fs(r300))) { + if (r300->fb_multiwrite) { num_cbufs = MIN2(num_cbufs, 1); } @@ -495,6 +494,11 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300, for (i = 0; i < num_cbufs; i++) { OUT_CS(r300_surface(fb->cbufs[i])->format); } + for (; i < 1; i++) { + OUT_CS(R300_US_OUT_FMT_C4_8 | + R300_C0_SEL_B | R300_C1_SEL_G | + R300_C2_SEL_R | R300_C3_SEL_A); + } for (; i < 4; i++) { OUT_CS(R300_US_OUT_FMT_UNUSED); } @@ -771,6 +775,7 @@ void r300_emit_textures_state(struct r300_context *r300, struct r300_texture_sampler_state *texstate; struct r300_resource *tex; unsigned i; + boolean has_us_format = r300->screen->caps.has_us_format; CS_LOCALS(r300); BEGIN_CS(size); @@ -792,6 +797,11 @@ void r300_emit_textures_state(struct r300_context *r300, OUT_CS_REG(R300_TX_OFFSET_0 + (i * 4), texstate->format.tile_config); OUT_CS_RELOC(tex); + + if (has_us_format) { + OUT_CS_REG(R500_US_FORMAT0_0 + (i * 4), + texstate->format.us_format0); + } } } END_CS; @@ -1221,7 +1231,7 @@ validate: if (flushed) return FALSE; - r300_flush(&r300->context, R300_FLUSH_ASYNC, NULL); + r300_flush(&r300->context, RADEON_FLUSH_ASYNC, NULL); flushed = TRUE; goto validate; } diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index b3d0d344ec4..de7d77d608b 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -38,7 +38,7 @@ void r300_flush(struct pipe_context *pipe, { struct r300_context *r300 = r300_context(pipe); struct r300_atom *atom; - struct r300_winsys_bo **rfence = (struct r300_winsys_bo**)fence; + struct pb_buffer **rfence = (struct pb_buffer**)fence; if (r300->draw && !r300->draw_vbo_locked) r300_draw_flush_vbuf(r300); @@ -48,11 +48,11 @@ void r300_flush(struct pipe_context *pipe, *rfence = r300->rws->buffer_create(r300->rws, 1, 1, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STATIC, - R300_DOMAIN_GTT); + RADEON_DOMAIN_GTT); /* Add the fence as a dummy relocation. */ r300->rws->cs_add_reloc(r300->cs, r300->rws->buffer_get_cs_handle(*rfence), - R300_DOMAIN_GTT, R300_DOMAIN_GTT); + RADEON_DOMAIN_GTT, RADEON_DOMAIN_GTT); } if (r300->dirty_hw) { diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 4c502fefb3f..e3a1bc4a0f4 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -22,6 +22,7 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -34,6 +35,7 @@ #include "r300_screen.h" #include "r300_fs.h" #include "r300_reg.h" +#include "r300_texture.h" #include "r300_tgsi_to_rc.h" #include "radeon_code.h" @@ -146,10 +148,10 @@ static void get_external_state( struct r300_fragment_program_external_state* state) { struct r300_textures_state *texstate = r300->textures_state.state; + struct r300_rs_state *rs = r300->rs_state.state; unsigned i; - unsigned char *swizzle; - state->frag_clamp = 0; + state->frag_clamp = rs ? rs->rs.clamp_fragment_color : 0; for (i = 0; i < texstate->sampler_state_count; i++) { struct r300_sampler_state *s = texstate->sampler_states[i]; @@ -160,27 +162,37 @@ static void get_external_state( continue; } - t = r300_resource(texstate->sampler_views[i]->base.texture); + t = r300_resource(v->base.texture); if (s->state.compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { state->unit[i].compare_mode_enabled = 1; - /* Pass depth texture swizzling to the compiler. */ - if (texstate->sampler_views[i]) { - swizzle = texstate->sampler_views[i]->swizzle; - - state->unit[i].depth_texture_swizzle = - RC_MAKE_SWIZZLE(swizzle[0], swizzle[1], - swizzle[2], swizzle[3]); - } else { - state->unit[i].depth_texture_swizzle = RC_SWIZZLE_XYZW; - } - /* Fortunately, no need to translate this. */ state->unit[i].texture_compare_func = s->state.compare_func; } state->unit[i].non_normalized_coords = !s->state.normalized_coords; + state->unit[i].convert_unorm_to_snorm = + v->base.format == PIPE_FORMAT_RGTC1_SNORM || + v->base.format == PIPE_FORMAT_LATC1_SNORM; + + /* Pass texture swizzling to the compiler, some lowering passes need it. */ + if (v->base.format == PIPE_FORMAT_RGTC1_SNORM || + v->base.format == PIPE_FORMAT_LATC1_SNORM) { + unsigned char swizzle[4]; + + util_format_combine_swizzles(swizzle, + util_format_description(v->base.format)->swizzle, + v->swizzle); + + state->unit[i].texture_swizzle = + RC_MAKE_SWIZZLE(swizzle[0], swizzle[1], + swizzle[2], swizzle[3]); + } else if (state->unit[i].compare_mode_enabled) { + state->unit[i].texture_swizzle = + RC_MAKE_SWIZZLE(v->swizzle[0], v->swizzle[1], + v->swizzle[2], v->swizzle[3]); + } /* XXX this should probably take into account STR, not just S. */ if (t->tex.is_npot) { diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index ecaadf4af8e..ef330f34c9e 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -24,7 +24,6 @@ #include "r300_context.h" #include "r300_reg.h" #include "r300_fs.h" -#include "r300_winsys.h" #include "util/u_format.h" #include "util/u_mm.h" @@ -153,7 +152,7 @@ static void r300_update_hyperz(struct r300_context* r300) } if (!zstex || - !r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + !r300->rws->get_value(r300->rws, RADEON_VID_CAN_HYPERZ)) return; /* Zbuffer compression. */ diff --git a/src/gallium/drivers/r300/r300_public.h b/src/gallium/drivers/r300/r300_public.h index 8e7a963c55d..b6059203ff1 100644 --- a/src/gallium/drivers/r300/r300_public.h +++ b/src/gallium/drivers/r300/r300_public.h @@ -2,8 +2,8 @@ #ifndef R300_PUBLIC_H #define R300_PUBLIC_H -struct r300_winsys_screen; +struct radeon_winsys; -struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws); +struct pipe_screen* r300_screen_create(struct radeon_winsys *rws); #endif diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 717485f43cb..9752a519491 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -26,7 +26,6 @@ #include "r300_context.h" #include "r300_screen.h" #include "r300_emit.h" -#include "r300_winsys.h" #include <stdio.h> @@ -46,7 +45,7 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, return NULL; q->type = query_type; - q->domain = R300_DOMAIN_GTT; + q->domain = RADEON_DOMAIN_GTT; q->buffer_size = 4096; if (r300screen->caps.family == CHIP_FAMILY_RV530) @@ -70,7 +69,7 @@ static void r300_destroy_query(struct pipe_context* pipe, { struct r300_query* q = r300_query(query); - r300_winsys_bo_reference(&q->buf, NULL); + pb_reference(&q->buf, NULL); remove_from_list(q); FREE(query); } diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 26594dabe42..429b85545f7 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -218,8 +218,8 @@ static boolean r300_reserve_cs_dwords(struct r300_context *r300, cs_dwords += r300_get_num_cs_end_dwords(r300); /* Reserve requested CS space. */ - if (cs_dwords > (R300_MAX_CMDBUF_DWORDS - r300->cs->cdw)) { - r300_flush(&r300->context, R300_FLUSH_ASYNC, NULL); + if (cs_dwords > (RADEON_MAX_CMDBUF_DWORDS - r300->cs->cdw)) { + r300_flush(&r300->context, RADEON_FLUSH_ASYNC, NULL); flushed = TRUE; } @@ -343,7 +343,7 @@ static boolean immd_is_good_idea(struct r300_context *r300, if (!checked[vbi]) { buf = r300->vbuf_mgr->real_vertex_buffer[vbi]; - if ((r300_resource(buf)->domain != R300_DOMAIN_GTT)) { + if ((r300_resource(buf)->domain != RADEON_DOMAIN_GTT)) { return FALSE; } @@ -1080,7 +1080,7 @@ static void r300_render_draw_elements(struct vbuf_render* render, end_cs_dwords = r300_get_num_cs_end_dwords(r300); while (count) { - free_dwords = R300_MAX_CMDBUF_DWORDS - r300->cs->cdw; + free_dwords = RADEON_MAX_CMDBUF_DWORDS - r300->cs->cdw; short_count = MIN2(count, (free_dwords - end_cs_dwords - 6) * 2); diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 8a69628c53e..9ec16c6562f 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -30,7 +30,6 @@ #include "r300_texture.h" #include "r300_screen_buffer.h" #include "r300_state_inlines.h" -#include "r300_winsys.h" #include "r300_public.h" #include "draw/draw_context.h" @@ -114,9 +113,12 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_MIRROR_REPEAT: case PIPE_CAP_BLEND_EQUATION_SEPARATE: case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: - return 1; + case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL: + return is_r500 ? 1 : 0; case PIPE_CAP_TEXTURE_SWIZZLE: return util_format_s3tc_enabled ? r300screen->caps.dxtc_swizzle : 1; + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + return is_r500 ? 1 : 0; /* Unsupported features (boolean caps). */ case PIPE_CAP_TIMER_QUERY: @@ -127,12 +129,12 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_SHADER_STENCIL_EXPORT: case PIPE_CAP_ARRAY_TEXTURES: + case PIPE_CAP_TGSI_INSTANCEID: return 0; /* SWTCL-only features. */ case PIPE_CAP_STREAM_OUTPUT: case PIPE_CAP_PRIMITIVE_RESTART: - case PIPE_CAP_TGSI_INSTANCEID: return !r300screen->caps.has_tcl; /* Texturing. */ @@ -209,7 +211,7 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e case PIPE_SHADER_CAP_MAX_PREDS: return is_r500 ? 1 : 0; case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: - return 1; + return 0; case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: @@ -247,7 +249,7 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e case PIPE_SHADER_CAP_MAX_PREDS: return is_r500 ? 4 : 0; /* XXX guessed. */ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: - return 1; + return 0; case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: @@ -308,9 +310,9 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, unsigned sample_count, unsigned usage) { - struct r300_winsys_screen *rws = r300_screen(screen)->rws; + struct radeon_winsys *rws = r300_screen(screen)->rws; uint32_t retval = 0; - boolean drm_2_8_0 = rws->get_value(rws, R300_VID_DRM_2_8_0); + boolean drm_2_8_0 = rws->get_value(rws, RADEON_VID_DRM_2_8_0); boolean is_r500 = r300_screen(screen)->caps.is_r500; boolean is_r400 = r300_screen(screen)->caps.is_r400; boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM || @@ -325,11 +327,20 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, format == PIPE_FORMAT_RGTC2_SNORM || format == PIPE_FORMAT_LATC2_UNORM || format == PIPE_FORMAT_LATC2_SNORM; + boolean is_x16f_xy16f = format == PIPE_FORMAT_R16_FLOAT || + format == PIPE_FORMAT_R16G16_FLOAT || + format == PIPE_FORMAT_A16_FLOAT || + format == PIPE_FORMAT_L16_FLOAT || + format == PIPE_FORMAT_L16A16_FLOAT || + format == PIPE_FORMAT_I16_FLOAT; boolean is_half_float = format == PIPE_FORMAT_R16_FLOAT || format == PIPE_FORMAT_R16G16_FLOAT || format == PIPE_FORMAT_R16G16B16_FLOAT || format == PIPE_FORMAT_R16G16B16A16_FLOAT; + if (!util_format_is_supported(format, usage)) + return FALSE; + /* Check multisampling support. */ switch (sample_count) { case 0: @@ -358,6 +369,8 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, (is_r500 || !is_ati1n) && /* ATI2N is supported on r4xx-r5xx. */ (is_r400 || is_r500 || !is_ati2n) && + /* R16F and RG16F texture support was added in as late as DRM 2.8.0 */ + (drm_2_8_0 || !is_x16f_xy16f) && r300_is_sampler_format_supported(format)) { retval |= PIPE_BIND_SAMPLER_VIEW; } @@ -403,7 +416,7 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, static void r300_destroy_screen(struct pipe_screen* pscreen) { struct r300_screen* r300screen = r300_screen(pscreen); - struct r300_winsys_screen *rws = r300_winsys_screen(pscreen); + struct radeon_winsys *rws = radeon_winsys(pscreen); util_slab_destroy(&r300screen->pool_buffers); pipe_mutex_destroy(r300screen->num_contexts_mutex); @@ -418,15 +431,15 @@ static void r300_fence_reference(struct pipe_screen *screen, struct pipe_fence_handle **ptr, struct pipe_fence_handle *fence) { - r300_winsys_bo_reference((struct r300_winsys_bo**)ptr, - (struct r300_winsys_bo*)fence); + pb_reference((struct pb_buffer**)ptr, + (struct pb_buffer*)fence); } static boolean r300_fence_signalled(struct pipe_screen *screen, struct pipe_fence_handle *fence) { - struct r300_winsys_screen *rws = r300_screen(screen)->rws; - struct r300_winsys_bo *rfence = (struct r300_winsys_bo*)fence; + struct radeon_winsys *rws = r300_screen(screen)->rws; + struct pb_buffer *rfence = (struct pb_buffer*)fence; return !rws->buffer_is_busy(rfence); } @@ -435,8 +448,8 @@ static boolean r300_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *fence, uint64_t timeout) { - struct r300_winsys_screen *rws = r300_screen(screen)->rws; - struct r300_winsys_bo *rfence = (struct r300_winsys_bo*)fence; + struct radeon_winsys *rws = r300_screen(screen)->rws; + struct pb_buffer *rfence = (struct pb_buffer*)fence; if (timeout != PIPE_TIMEOUT_INFINITE) { int64_t start_time = os_time_get(); @@ -458,7 +471,7 @@ static boolean r300_fence_finish(struct pipe_screen *screen, return TRUE; } -struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws) +struct pipe_screen* r300_screen_create(struct radeon_winsys *rws) { struct r300_screen *r300screen = CALLOC_STRUCT(r300_screen); @@ -467,9 +480,9 @@ struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws) return NULL; } - r300screen->caps.pci_id = rws->get_value(rws, R300_VID_PCI_ID); - r300screen->caps.num_frag_pipes = rws->get_value(rws, R300_VID_GB_PIPES); - r300screen->caps.num_z_pipes = rws->get_value(rws, R300_VID_Z_PIPES); + r300screen->caps.pci_id = rws->get_value(rws, RADEON_VID_PCI_ID); + r300screen->caps.num_frag_pipes = rws->get_value(rws, RADEON_VID_R300_GB_PIPES); + r300screen->caps.num_z_pipes = rws->get_value(rws, RADEON_VID_R300_Z_PIPES); r300_init_debug(r300screen); r300_parse_chipset(&r300screen->caps); @@ -479,6 +492,9 @@ struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws) if (SCREEN_DBG_ON(r300screen, DBG_NO_HIZ)) r300screen->caps.hiz_ram = 0; + if (!rws->get_value(rws, RADEON_VID_DRM_2_8_0)) + r300screen->caps.has_us_format = FALSE; + pipe_mutex_init(r300screen->num_contexts_mutex); util_slab_create(&r300screen->pool_buffers, diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 576f9c1f4a9..bca86edb1d7 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -32,13 +32,13 @@ #include <stdio.h> -struct r300_winsys_screen; +struct radeon_winsys; struct r300_screen { /* Parent class */ struct pipe_screen screen; - struct r300_winsys_screen *rws; + struct radeon_winsys *rws; /* Chipset capabilities */ struct r300_capabilities caps; @@ -61,8 +61,8 @@ static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) { return (struct r300_screen*)screen; } -static INLINE struct r300_winsys_screen * -r300_winsys_screen(struct pipe_screen *screen) { +static INLINE struct radeon_winsys * +radeon_winsys(struct pipe_screen *screen) { return r300_screen(screen)->rws; } diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index 986ae384fbf..4154c81512e 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -31,7 +31,6 @@ #include "util/u_math.h" #include "r300_screen_buffer.h" -#include "r300_winsys.h" void r300_upload_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, @@ -62,7 +61,7 @@ static void r300_buffer_destroy(struct pipe_screen *screen, FREE(rbuf->constant_buffer); if (rbuf->buf) - r300_winsys_bo_reference(&rbuf->buf, NULL); + pb_reference(&rbuf->buf, NULL); util_slab_free(&r300screen->pool_buffers, rbuf); } @@ -105,7 +104,7 @@ r300_buffer_transfer_map( struct pipe_context *pipe, { struct r300_context *r300 = r300_context(pipe); struct r300_screen *r300screen = r300_screen(pipe->screen); - struct r300_winsys_screen *rws = r300screen->rws; + struct radeon_winsys *rws = r300screen->rws; struct r300_resource *rbuf = r300_resource(transfer->resource); uint8_t *map; @@ -126,7 +125,7 @@ static void r300_buffer_transfer_unmap( struct pipe_context *pipe, struct pipe_transfer *transfer ) { struct r300_screen *r300screen = r300_screen(pipe->screen); - struct r300_winsys_screen *rws = r300screen->rws; + struct radeon_winsys *rws = r300screen->rws; struct r300_resource *rbuf = r300_resource(transfer->resource); if (rbuf->buf) { @@ -144,7 +143,7 @@ static void r300_buffer_transfer_inline_write(struct pipe_context *pipe, unsigned layer_stride) { struct r300_context *r300 = r300_context(pipe); - struct r300_winsys_screen *rws = r300->screen->rws; + struct radeon_winsys *rws = r300->screen->rws; struct r300_resource *rbuf = r300_resource(resource); uint8_t *map = NULL; @@ -188,7 +187,7 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, pipe_reference_init(&rbuf->b.b.b.reference, 1); rbuf->b.b.b.screen = screen; rbuf->b.user_ptr = NULL; - rbuf->domain = R300_DOMAIN_GTT; + rbuf->domain = RADEON_DOMAIN_GTT; rbuf->buf = NULL; rbuf->buf_size = templ->width0; rbuf->constant_buffer = NULL; @@ -237,7 +236,7 @@ struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen, rbuf->b.b.b.flags = 0; rbuf->b.b.vtbl = &r300_buffer_vtbl; rbuf->b.user_ptr = ptr; - rbuf->domain = R300_DOMAIN_GTT; + rbuf->domain = RADEON_DOMAIN_GTT; rbuf->buf = NULL; rbuf->buf_size = size; rbuf->constant_buffer = NULL; diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h index cdbc4425fcb..360ec509cc5 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.h +++ b/src/gallium/drivers/r300/r300_screen_buffer.h @@ -32,7 +32,6 @@ #include "util/u_transfer.h" #include "r300_screen.h" -#include "r300_winsys.h" #include "r300_context.h" /* Functions. */ diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index ecb4fc691cc..24b41d5085d 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -45,7 +45,6 @@ #include "r300_fs.h" #include "r300_texture.h" #include "r300_vs.h" -#include "r300_winsys.h" /* r300_state: Functions used to intialize state context by translating * Gallium state objects into semi-native r300 state objects. */ @@ -398,10 +397,6 @@ static void r300_bind_blend_state(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); UPDATE_STATE(state, r300->blend_state); - - if (r300->fs.state && r300_pick_fragment_shader(r300)) { - r300_mark_fs_code_dirty(r300); - } } /* Free blend state. */ @@ -773,7 +768,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, enum r300_fb_state_change change) { struct pipe_framebuffer_state *state = r300->fb_state.state; - boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); + boolean can_hyperz = r300->rws->get_value(r300->rws, RADEON_VID_CAN_HYPERZ); r300_mark_atom_dirty(r300, &r300->gpu_flush); r300_mark_atom_dirty(r300, &r300->fb_state); @@ -860,6 +855,7 @@ r300_set_framebuffer_state(struct pipe_context* pipe, } } } + assert(state->zsbuf || r300->hyperz_locked || !r300->zmask_in_use); /* Need to reset clamping or colormask. */ r300_mark_atom_dirty(r300, &r300->blend_state); @@ -973,24 +969,14 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) { struct r300_context* r300 = r300_context(pipe); struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader; - struct pipe_framebuffer_state *fb = r300->fb_state.state; - boolean last_multi_write; if (fs == NULL) { r300->fs.state = NULL; return; } - last_multi_write = r300_fragment_shader_writes_all(r300_fs(r300)); - r300->fs.state = fs; - r300_pick_fragment_shader(r300); - r300_mark_fs_code_dirty(r300); - - if (fb->nr_cbufs > 1 && - last_multi_write != r300_fragment_shader_writes_all(fs)) { - r300_mark_fb_state_dirty(r300, R300_CHANGED_MULTIWRITE); - } + r300->fs_status = FRAGMENT_SHADER_DIRTY; r300_mark_atom_dirty(r300, &r300->rs_block_state); /* Will be updated before the emission. */ } @@ -1047,7 +1033,7 @@ static void* r300_create_rs_state(struct pipe_context* pipe, float point_texcoord_bottom = 0;/* R300_GA_POINT_T0: 0x4204 */ float point_texcoord_right = 1; /* R300_GA_POINT_S1: 0x4208 */ float point_texcoord_top = 0; /* R300_GA_POINT_T1: 0x420c */ - boolean vclamp = TRUE; + boolean vclamp = state->clamp_vertex_color; CB_LOCALS; /* Copy rasterizer state. */ @@ -1233,6 +1219,7 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) struct r300_rs_state* rs = (struct r300_rs_state*)state; int last_sprite_coord_enable = r300->sprite_coord_enable; boolean last_two_sided_color = r300->two_sided_color; + boolean last_frag_clamp = r300->frag_clamp; if (r300->draw && rs) { draw_set_rasterizer_state(r300->draw, &rs->rs_draw, state); @@ -1242,10 +1229,12 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) r300->polygon_offset_enabled = rs->polygon_offset_enable; r300->sprite_coord_enable = rs->rs.sprite_coord_enable; r300->two_sided_color = rs->rs.light_twoside; + r300->frag_clamp = rs->rs.clamp_fragment_color; } else { r300->polygon_offset_enabled = FALSE; r300->sprite_coord_enable = 0; r300->two_sided_color = FALSE; + r300->frag_clamp = FALSE; } UPDATE_STATE(state, r300->rs_state); @@ -1255,6 +1244,11 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) last_two_sided_color != r300->two_sided_color) { r300_mark_atom_dirty(r300, &r300->rs_block_state); } + + if (last_frag_clamp != r300->frag_clamp && + r300->fs_status == FRAGMENT_SHADER_VALID) { + r300->fs_status = FRAGMENT_SHADER_MAYBE_DIRTY; + } } /* Free rasterizer state. */ @@ -1551,7 +1545,8 @@ static void r300_set_viewport_state(struct pipe_context* pipe, } r300_mark_atom_dirty(r300, &r300->viewport_state); - if (r300->fs.state && r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED) { + if (r300->fs.state && r300_fs(r300)->shader && + r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED) { r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state); } } diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index ec00e2552ca..afc1451183d 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -592,6 +592,13 @@ static void r300_update_rs_block(struct r300_context *r300) } } +static void rgba_to_bgra(float color[4]) +{ + float x = color[0]; + color[0] = color[2]; + color[2] = x; +} + static uint32_t r300_get_border_color(enum pipe_format format, const float border[4], boolean is_r500) @@ -625,13 +632,13 @@ static uint32_t r300_get_border_color(enum pipe_format format, for (i = 0; i < 4; i++) { switch (desc->swizzle[i]) { case UTIL_FORMAT_SWIZZLE_X: - border_swizzled[2] = border[i]; + border_swizzled[0] = border[i]; break; case UTIL_FORMAT_SWIZZLE_Y: border_swizzled[1] = border[i]; break; case UTIL_FORMAT_SWIZZLE_Z: - border_swizzled[0] = border[i]; + border_swizzled[2] = border[i]; break; case UTIL_FORMAT_SWIZZLE_W: border_swizzled[3] = border[i]; @@ -643,39 +650,46 @@ static uint32_t r300_get_border_color(enum pipe_format format, if (util_format_is_compressed(format)) { switch (format) { case PIPE_FORMAT_RGTC1_SNORM: - case PIPE_FORMAT_RGTC1_UNORM: case PIPE_FORMAT_LATC1_SNORM: + border_swizzled[0] = border_swizzled[0] < 0 ? + border_swizzled[0]*0.5+1 : + border_swizzled[0]*0.5; + /* Pass through. */ + + case PIPE_FORMAT_RGTC1_UNORM: case PIPE_FORMAT_LATC1_UNORM: /* Add 1/32 to round the border color instead of truncating. */ /* The Y component is used for the border color. */ - border_swizzled[1] = border_swizzled[2] + 1.0f/32; + border_swizzled[1] = border_swizzled[0] + 1.0f/32; util_pack_color(border_swizzled, PIPE_FORMAT_B4G4R4A4_UNORM, &uc); return uc.ui; case PIPE_FORMAT_RGTC2_SNORM: case PIPE_FORMAT_LATC2_SNORM: - border_swizzled[0] = border_swizzled[2]; util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_SNORM, &uc); return uc.ui; case PIPE_FORMAT_RGTC2_UNORM: case PIPE_FORMAT_LATC2_UNORM: - util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc); return uc.ui; default: - util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc); + util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); return uc.ui; } } switch (desc->channel[0].size) { case 2: + rgba_to_bgra(border_swizzled); util_pack_color(border_swizzled, PIPE_FORMAT_B2G3R3_UNORM, &uc); break; case 4: + rgba_to_bgra(border_swizzled); util_pack_color(border_swizzled, PIPE_FORMAT_B4G4R4A4_UNORM, &uc); break; case 5: + rgba_to_bgra(border_swizzled); if (desc->channel[1].size == 5) { util_pack_color(border_swizzled, PIPE_FORMAT_B5G5R5A1_UNORM, &uc); } else if (desc->channel[1].size == 6) { @@ -687,32 +701,39 @@ static uint32_t r300_get_border_color(enum pipe_format format, default: case 8: - util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) + util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_SNORM, &uc); + else + util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc); break; case 10: - util_pack_color(border_swizzled, PIPE_FORMAT_B10G10R10A2_UNORM, &uc); + util_pack_color(border_swizzled, PIPE_FORMAT_R10G10B10A2_UNORM, &uc); break; case 16: if (desc->nr_channels <= 2) { - border_swizzled[0] = border_swizzled[2]; if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) { util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_FLOAT, &uc); + } else if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { + util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_SNORM, &uc); } else { util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_UNORM, &uc); } } else { - util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { + util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_SNORM, &uc); + } else { + util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc); + } } break; case 32: if (desc->nr_channels == 1) { - border_swizzled[0] = border_swizzled[2]; util_pack_color(border_swizzled, PIPE_FORMAT_R32_FLOAT, &uc); } else { - util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc); } break; } @@ -720,25 +741,6 @@ static uint32_t r300_get_border_color(enum pipe_format format, return uc.ui; } -static boolean util_format_is_float(enum pipe_format format) -{ - const struct util_format_description *desc = util_format_description(format); - unsigned i; - - if (!format) - return FALSE; - - /* Find the first non-void channel. */ - for (i = 0; i < 4; i++) - if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) - break; - - if (i == 4) - return FALSE; - - return desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT ? TRUE : FALSE; -} - static void r300_merge_textures_and_samplers(struct r300_context* r300) { struct r300_textures_state *state = @@ -747,9 +749,10 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) struct r300_sampler_state *sampler; struct r300_sampler_view *view; struct r300_resource *tex; - unsigned min_level, max_level, i, j, size; + unsigned base_level, min_level, level_count, i, j, size; unsigned count = MIN2(state->sampler_view_count, state->sampler_state_count); + boolean has_us_format = r300->screen->caps.has_us_format; /* The KIL opcode fix, see below. */ if (!count && !r300->screen->caps.is_r500) @@ -779,21 +782,27 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) r300->screen->caps.is_r500); /* determine min/max levels */ - max_level = MIN3(sampler->max_lod + view->base.u.tex.first_level, - tex->b.b.b.last_level, view->base.u.tex.last_level); - min_level = MIN2(sampler->min_lod + view->base.u.tex.first_level, - max_level); - - if (tex->tex.is_npot && min_level > 0) { - /* Even though we do not implement mipmapping for NPOT - * textures, we should at least honor the minimum level - * which is allowed to be displayed. We do this by setting up - * the i-th mipmap level as the zero level. */ - unsigned offset = tex->tex_offset + - tex->tex.offset_in_bytes[min_level]; + base_level = view->base.u.tex.first_level; + min_level = sampler->min_lod; + level_count = MIN3(sampler->max_lod, + tex->b.b.b.last_level - base_level, + view->base.u.tex.last_level - base_level); + + if (base_level + min_level) { + unsigned offset; + + if (tex->tex.is_npot) { + /* Even though we do not implement mipmapping for NPOT + * textures, we should at least honor the minimum level + * which is allowed to be displayed. We do this by setting up + * an i-th mipmap level as the zero level. */ + base_level += min_level; + } + offset = tex->tex_offset + + tex->tex.offset_in_bytes[base_level]; r300_texture_setup_format_state(r300->screen, tex, - min_level, + base_level, &texstate->format); texstate->format.tile_config |= offset & 0xffffffe0; assert((offset & 0x1f) == 0); @@ -870,7 +879,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) } } else { /* the MAX_MIP level is the largest (finest) one */ - texstate->format.format0 |= R300_TX_NUM_LEVELS(max_level); + texstate->format.format0 |= R300_TX_NUM_LEVELS(level_count); texstate->filter0 |= R300_TX_MAX_MIP_LEVEL(min_level); } @@ -902,7 +911,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) texstate->filter0 |= i << 28; - size += 16; + size += 16 + (has_us_format ? 2 : 0); state->count = i+1; } else { /* For the KIL opcode to work on r3xx-r4xx, the texture unit @@ -931,7 +940,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) texstate->border_color = 0; texstate->filter0 |= i << 28; - size += 16; + size += 16 + (has_us_format ? 2 : 0); state->count = i+1; } } @@ -940,11 +949,10 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) r300->textures_state.size = size; /* Pick a fragment shader based on either the texture compare state - * or the uses_pitch flag. */ - if (r300->fs.state && count) { - if (r300_pick_fragment_shader(r300)) { - r300_mark_fs_code_dirty(r300); - } + * or the uses_pitch flag or some other external state. */ + if (count && + r300->fs_status == FRAGMENT_SHADER_VALID) { + r300->fs_status = FRAGMENT_SHADER_MAYBE_DIRTY; } } @@ -973,6 +981,34 @@ static void r300_decompress_depth_textures(struct r300_context *r300) } } +static void r300_validate_fragment_shader(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = r300->fb_state.state; + + if (r300->fs.state && r300->fs_status != FRAGMENT_SHADER_VALID) { + /* Pick the fragment shader based on external states. + * Then mark the state dirty if the fragment shader is either dirty + * or the function r300_pick_fragment_shader changed the shader. */ + if (r300_pick_fragment_shader(r300) || + r300->fs_status == FRAGMENT_SHADER_DIRTY) { + /* Mark the state atom as dirty. */ + r300_mark_fs_code_dirty(r300); + + /* Does Multiwrite need to be changed? */ + if (fb->nr_cbufs > 1) { + boolean new_multiwrite = + r300_fragment_shader_writes_all(r300_fs(r300)); + + if (r300->fb_multiwrite != new_multiwrite) { + r300->fb_multiwrite = new_multiwrite; + r300_mark_fb_state_dirty(r300, R300_CHANGED_MULTIWRITE); + } + } + } + r300->fs_status = FRAGMENT_SHADER_VALID; + } +} + void r300_update_derived_state(struct r300_context* r300) { if (r300->textures_state.dirty) { @@ -980,6 +1016,8 @@ void r300_update_derived_state(struct r300_context* r300) r300_merge_textures_and_samplers(r300); } + r300_validate_fragment_shader(r300); + if (r300->rs_block_state.dirty) { r300_update_rs_block(r300); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index c650fb7ed37..38ca9a24e45 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -29,7 +29,6 @@ #include "r300_texture_desc.h" #include "r300_transfer.h" #include "r300_screen.h" -#include "r300_winsys.h" #include "util/u_format.h" #include "util/u_format_s3tc.h" @@ -39,6 +38,18 @@ #include "pipe/p_screen.h" +void util_format_combine_swizzles(unsigned char *dst, + const unsigned char *swz1, + const unsigned char *swz2) +{ + unsigned i; + + for (i = 0; i < 4; i++) { + dst[i] = swz2[i] <= UTIL_FORMAT_SWIZZLE_W ? + swz1[swz2[i]] : swz2[i]; + } +} + unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, const unsigned char *swizzle_view, boolean dxtc_swizzle) @@ -61,10 +72,7 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, if (swizzle_view) { /* Combine two sets of swizzles. */ - for (i = 0; i < 4; i++) { - swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ? - swizzle_format[swizzle_view[i]] : swizzle_view[i]; - } + util_format_combine_swizzles(swizzle, swizzle_format, swizzle_view); } else { memcpy(swizzle, swizzle_format, 4); } @@ -116,10 +124,10 @@ uint32_t r300_translate_texformat(enum pipe_format format, unsigned i; boolean uniform = TRUE; const uint32_t sign_bit[4] = { - R300_TX_FORMAT_SIGNED_X, - R300_TX_FORMAT_SIGNED_Y, - R300_TX_FORMAT_SIGNED_Z, R300_TX_FORMAT_SIGNED_W, + R300_TX_FORMAT_SIGNED_Z, + R300_TX_FORMAT_SIGNED_Y, + R300_TX_FORMAT_SIGNED_X, }; desc = util_format_description(format); @@ -171,17 +179,22 @@ uint32_t r300_translate_texformat(enum pipe_format format, } } - if (util_format_is_compressed(format) && - dxtc_swizzle && - format != PIPE_FORMAT_RGTC2_UNORM && - format != PIPE_FORMAT_RGTC2_SNORM && - format != PIPE_FORMAT_LATC2_UNORM && - format != PIPE_FORMAT_LATC2_SNORM) { - result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, - TRUE); - } else { - result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, - FALSE); + /* Add swizzling. */ + /* The RGTC1_SNORM and LATC1_SNORM swizzle is done in the shader. */ + if (format != PIPE_FORMAT_RGTC1_SNORM && + format != PIPE_FORMAT_LATC1_SNORM) { + if (util_format_is_compressed(format) && + dxtc_swizzle && + format != PIPE_FORMAT_RGTC2_UNORM && + format != PIPE_FORMAT_RGTC2_SNORM && + format != PIPE_FORMAT_LATC2_UNORM && + format != PIPE_FORMAT_LATC2_SNORM) { + result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, + TRUE); + } else { + result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, + FALSE); + } } /* S3TC formats. */ @@ -212,14 +225,13 @@ uint32_t r300_translate_texformat(enum pipe_format format, switch (format) { case PIPE_FORMAT_RGTC1_SNORM: case PIPE_FORMAT_LATC1_SNORM: - result |= sign_bit[1]; case PIPE_FORMAT_LATC1_UNORM: case PIPE_FORMAT_RGTC1_UNORM: return R500_TX_FORMAT_ATI1N | result; case PIPE_FORMAT_RGTC2_SNORM: case PIPE_FORMAT_LATC2_SNORM: - result |= sign_bit[2] | sign_bit[3]; + result |= sign_bit[1] | sign_bit[0]; case PIPE_FORMAT_RGTC2_UNORM: case PIPE_FORMAT_LATC2_UNORM: return R400_TX_FORMAT_ATI2N | result; @@ -390,20 +402,33 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) switch (format) { /* 8-bit buffers. */ case PIPE_FORMAT_A8_UNORM: - /*case PIPE_FORMAT_A8_SNORM:*/ + case PIPE_FORMAT_A8_SNORM: case PIPE_FORMAT_I8_UNORM: - /*case PIPE_FORMAT_I8_SNORM:*/ + case PIPE_FORMAT_I8_SNORM: case PIPE_FORMAT_L8_UNORM: - /*case PIPE_FORMAT_L8_SNORM:*/ + case PIPE_FORMAT_L8_SNORM: case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return R300_COLOR_FORMAT_I8; /* 16-bit buffers. */ case PIPE_FORMAT_L8A8_UNORM: - /*case PIPE_FORMAT_L8A8_SNORM:*/ + case PIPE_FORMAT_L8A8_SNORM: case PIPE_FORMAT_R8G8_UNORM: case PIPE_FORMAT_R8G8_SNORM: + /* These formats work fine with UV88 if US_OUT_FMT is set correctly. */ + case PIPE_FORMAT_A16_UNORM: + case PIPE_FORMAT_A16_SNORM: + case PIPE_FORMAT_A16_FLOAT: + case PIPE_FORMAT_L16_UNORM: + case PIPE_FORMAT_L16_SNORM: + case PIPE_FORMAT_L16_FLOAT: + case PIPE_FORMAT_I16_UNORM: + case PIPE_FORMAT_I16_SNORM: + case PIPE_FORMAT_I16_FLOAT: + case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_R16_FLOAT: return R300_COLOR_FORMAT_UV88; case PIPE_FORMAT_B5G6R5_UNORM: @@ -434,19 +459,33 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) /*case PIPE_FORMAT_X8B8G8R8_SNORM:*/ case PIPE_FORMAT_R8G8B8X8_UNORM: /*case PIPE_FORMAT_R8G8B8X8_SNORM:*/ - case PIPE_FORMAT_R8SG8SB8UX8U_NORM: + /* These formats work fine with ARGB8888 if US_OUT_FMT is set + * correctly. */ + case PIPE_FORMAT_R16G16_UNORM: + case PIPE_FORMAT_R16G16_SNORM: + case PIPE_FORMAT_R16G16_FLOAT: + case PIPE_FORMAT_L16A16_UNORM: + case PIPE_FORMAT_L16A16_SNORM: + case PIPE_FORMAT_L16A16_FLOAT: + case PIPE_FORMAT_A32_FLOAT: + case PIPE_FORMAT_L32_FLOAT: + case PIPE_FORMAT_I32_FLOAT: + case PIPE_FORMAT_R32_FLOAT: return R300_COLOR_FORMAT_ARGB8888; case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10X2_SNORM: case PIPE_FORMAT_B10G10R10A2_UNORM: - case PIPE_FORMAT_R10SG10SB10SA2U_NORM: return R500_COLOR_FORMAT_ARGB2101010; /* R5xx-only? */ /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: case PIPE_FORMAT_R16G16B16A16_FLOAT: + /* These formats work fine with ARGB16161616 if US_OUT_FMT is set + * correctly. */ + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_L32A32_FLOAT: return R300_COLOR_FORMAT_ARGB16161616; /* 128-bit buffers. */ @@ -489,12 +528,7 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) uint32_t modifier = 0; unsigned i; const struct util_format_description *desc; - static const uint32_t sign_bit[4] = { - R300_OUT_SIGN(0x1), - R300_OUT_SIGN(0x2), - R300_OUT_SIGN(0x4), - R300_OUT_SIGN(0x8), - }; + boolean uniform_sign; desc = util_format_description(format); @@ -509,54 +543,109 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) return ~0; /* Unsupported/unknown. */ /* Specifies how the shader output is written to the fog unit. */ - if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { - if (desc->channel[i].size == 32) { - modifier |= R300_US_OUT_FMT_C4_32_FP; - } else { - modifier |= R300_US_OUT_FMT_C4_16_FP; + switch (desc->channel[i].type) { + case UTIL_FORMAT_TYPE_FLOAT: + switch (desc->channel[i].size) { + case 32: + switch (desc->nr_channels) { + case 1: + modifier |= R300_US_OUT_FMT_C_32_FP; + break; + case 2: + modifier |= R300_US_OUT_FMT_C2_32_FP; + break; + case 4: + modifier |= R300_US_OUT_FMT_C4_32_FP; + break; + } + break; + + case 16: + switch (desc->nr_channels) { + case 1: + modifier |= R300_US_OUT_FMT_C_16_FP; + break; + case 2: + modifier |= R300_US_OUT_FMT_C2_16_FP; + break; + case 4: + modifier |= R300_US_OUT_FMT_C4_16_FP; + break; + } + break; } - } else { - if (desc->channel[i].size == 16) { - modifier |= R300_US_OUT_FMT_C4_16; - } else if (desc->channel[i].size == 10) { + break; + + default: + switch (desc->channel[i].size) { + case 16: + switch (desc->nr_channels) { + case 1: + modifier |= R300_US_OUT_FMT_C_16; + break; + case 2: + modifier |= R300_US_OUT_FMT_C2_16; + break; + case 4: + modifier |= R300_US_OUT_FMT_C4_16; + break; + } + break; + + case 10: modifier |= R300_US_OUT_FMT_C4_10; - } else { + break; + + default: /* C4_8 seems to be used for the formats whose pixel size * is <= 32 bits. */ modifier |= R300_US_OUT_FMT_C4_8; + break; } } /* Add sign. */ - for (i = 0; i < 4; i++) - if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { - modifier |= sign_bit[i]; - } + uniform_sign = TRUE; + for (i = 0; i < desc->nr_channels; i++) + if (desc->channel[i].type != UTIL_FORMAT_TYPE_SIGNED) + uniform_sign = FALSE; + + if (uniform_sign) + modifier |= R300_OUT_SIGN(0xf); /* Add swizzles and return. */ switch (format) { - /* 8-bit outputs, one channel. - * COLORFORMAT_I8 stores the C2 component. */ + /*** Special cases (non-standard channel mapping) ***/ + + /* X8 + * COLORFORMAT_I8 stores the Z component (C2). */ case PIPE_FORMAT_A8_UNORM: - /*case PIPE_FORMAT_A8_SNORM:*/ + case PIPE_FORMAT_A8_SNORM: return modifier | R300_C2_SEL_A; case PIPE_FORMAT_I8_UNORM: - /*case PIPE_FORMAT_I8_SNORM:*/ + case PIPE_FORMAT_I8_SNORM: case PIPE_FORMAT_L8_UNORM: - /*case PIPE_FORMAT_L8_SNORM:*/ + case PIPE_FORMAT_L8_SNORM: case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return modifier | R300_C2_SEL_R; - /* 16-bit outputs, two channels. - * COLORFORMAT_UV88 stores C2 and C0. */ + /* X8Y8 + * COLORFORMAT_UV88 stores ZX (C2 and C0). */ + case PIPE_FORMAT_L8A8_SNORM: case PIPE_FORMAT_L8A8_UNORM: - /*case PIPE_FORMAT_L8A8_SNORM:*/ return modifier | R300_C0_SEL_A | R300_C2_SEL_R; - case PIPE_FORMAT_R8G8_UNORM: case PIPE_FORMAT_R8G8_SNORM: + case PIPE_FORMAT_R8G8_UNORM: return modifier | R300_C0_SEL_G | R300_C2_SEL_R; + /* X32Y32 + * ARGB16161616 stores XZ for RG32F */ + case PIPE_FORMAT_R32G32_FLOAT: + return modifier | R300_C0_SEL_R | R300_C2_SEL_G; + + /*** Generic cases (standard channel mapping) ***/ + /* BGRA outputs. */ case PIPE_FORMAT_B5G6R5_UNORM: case PIPE_FORMAT_B5G5R5A1_UNORM: @@ -577,6 +666,10 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) /*case PIPE_FORMAT_A8R8G8B8_SNORM:*/ case PIPE_FORMAT_X8R8G8B8_UNORM: /*case PIPE_FORMAT_X8R8G8B8_SNORM:*/ + case PIPE_FORMAT_A16_UNORM: + case PIPE_FORMAT_A16_SNORM: + case PIPE_FORMAT_A16_FLOAT: + case PIPE_FORMAT_A32_FLOAT: return modifier | R300_C0_SEL_A | R300_C1_SEL_R | R300_C2_SEL_G | R300_C3_SEL_B; @@ -595,18 +688,39 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) /*case PIPE_FORMAT_R8G8B8X8_SNORM:*/ case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_R8SG8SB8UX8U_NORM: case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10X2_SNORM: - case PIPE_FORMAT_R10SG10SB10SA2U_NORM: + case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_R16G16_UNORM: case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_R16G16_SNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R16_FLOAT: + case PIPE_FORMAT_R16G16_FLOAT: case PIPE_FORMAT_R16G16B16A16_FLOAT: + case PIPE_FORMAT_R32_FLOAT: case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_L16_UNORM: + case PIPE_FORMAT_L16_SNORM: + case PIPE_FORMAT_L16_FLOAT: + case PIPE_FORMAT_L32_FLOAT: + case PIPE_FORMAT_I16_UNORM: + case PIPE_FORMAT_I16_SNORM: + case PIPE_FORMAT_I16_FLOAT: + case PIPE_FORMAT_I32_FLOAT: return modifier | R300_C0_SEL_R | R300_C1_SEL_G | R300_C2_SEL_B | R300_C3_SEL_A; + /* LA outputs. */ + case PIPE_FORMAT_L16A16_UNORM: + case PIPE_FORMAT_L16A16_SNORM: + case PIPE_FORMAT_L16A16_FLOAT: + case PIPE_FORMAT_L32A32_FLOAT: + return modifier | + R300_C0_SEL_R | R300_C1_SEL_A; + default: return ~0; /* Unsupported. */ } @@ -636,6 +750,16 @@ void r300_texture_setup_format_state(struct r300_screen *screen, struct pipe_resource *pt = &tex->b.b.b; struct r300_texture_desc *desc = &tex->tex; boolean is_r500 = screen->caps.is_r500; + unsigned width, height, depth; + unsigned txwidth, txheight, txdepth; + + width = u_minify(desc->width0, level); + height = u_minify(desc->height0, level); + depth = u_minify(desc->depth0, level); + + txwidth = (width - 1) & 0x7ff; + txheight = (height - 1) & 0x7ff; + txdepth = util_logbase2(depth) & 0xf; /* Mask out all the fields we change. */ out->format0 = 0; @@ -645,9 +769,9 @@ void r300_texture_setup_format_state(struct r300_screen *screen, /* Set sampler state. */ out->format0 = - R300_TX_WIDTH((u_minify(desc->width0, level) - 1) & 0x7ff) | - R300_TX_HEIGHT((u_minify(desc->height0, level) - 1) & 0x7ff) | - R300_TX_DEPTH(util_logbase2(u_minify(desc->depth0, level)) & 0xf); + R300_TX_WIDTH(txwidth) | + R300_TX_HEIGHT(txheight) | + R300_TX_DEPTH(txdepth); if (desc->uses_stride_addressing) { /* rectangles love this */ @@ -665,12 +789,32 @@ void r300_texture_setup_format_state(struct r300_screen *screen, /* large textures on r500 */ if (is_r500) { - if (desc->width0 > 2048) { + unsigned us_width = txwidth; + unsigned us_height = txheight; + unsigned us_depth = txdepth; + + if (width > 2048) { out->format2 |= R500_TXWIDTH_BIT11; } - if (desc->height0 > 2048) { + if (height > 2048) { out->format2 |= R500_TXHEIGHT_BIT11; } + + /* The US_FORMAT register fixes an R500 TX addressing bug. + * Don't ask why it must be set like this. I don't know it either. */ + if (width > 2048) { + us_width = (0x000007FF + us_width) >> 1; + us_depth |= 0x0000000D; + } + if (height > 2048) { + us_height = (0x000007FF + us_height) >> 1; + us_depth |= 0x0000000E; + } + + out->us_format0 = + R300_TX_WIDTH(us_width) | + R300_TX_HEIGHT(us_height) | + R300_TX_DEPTH(us_depth); } out->tile_config = R300_TXO_MACRO_TILE(desc->macrotile[level]) | @@ -729,7 +873,7 @@ static void r300_texture_destroy(struct pipe_screen *screen, { struct r300_resource* tex = (struct r300_resource*)texture; - r300_winsys_bo_reference(&tex->buf, NULL); + pb_reference(&tex->buf, NULL); FREE(tex); } @@ -737,7 +881,7 @@ boolean r300_resource_get_handle(struct pipe_screen* screen, struct pipe_resource *texture, struct winsys_handle *whandle) { - struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; + struct radeon_winsys *rws = (struct radeon_winsys *)screen->winsys; struct r300_resource* tex = (struct r300_resource*)texture; if (!tex) { @@ -764,17 +908,17 @@ static const struct u_resource_vtbl r300_texture_vtbl = static struct r300_resource* r300_texture_create_object(struct r300_screen *rscreen, const struct pipe_resource *base, - enum r300_buffer_tiling microtile, - enum r300_buffer_tiling macrotile, + enum radeon_bo_layout microtile, + enum radeon_bo_layout macrotile, unsigned stride_in_bytes_override, unsigned max_buffer_size, - struct r300_winsys_bo *buffer) + struct pb_buffer *buffer) { - struct r300_winsys_screen *rws = rscreen->rws; + struct radeon_winsys *rws = rscreen->rws; struct r300_resource *tex = CALLOC_STRUCT(r300_resource); if (!tex) { if (buffer) - r300_winsys_bo_reference(&buffer, NULL); + pb_reference(&buffer, NULL); return NULL; } @@ -788,13 +932,13 @@ r300_texture_create_object(struct r300_screen *rscreen, tex->tex.macrotile[0] = macrotile; tex->tex.stride_in_bytes_override = stride_in_bytes_override; tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? - R300_DOMAIN_GTT : - R300_DOMAIN_VRAM | R300_DOMAIN_GTT; + RADEON_DOMAIN_GTT : + RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT; tex->buf_size = max_buffer_size; if (!r300_resource_set_properties(&rscreen->screen, &tex->b.b.b, 0, base)) { if (buffer) - r300_winsys_bo_reference(&buffer, NULL); + pb_reference(&buffer, NULL); FREE(tex); return NULL; } @@ -827,15 +971,16 @@ struct pipe_resource *r300_texture_create(struct pipe_screen *screen, const struct pipe_resource *base) { struct r300_screen *rscreen = r300_screen(screen); - enum r300_buffer_tiling microtile, macrotile; + enum radeon_bo_layout microtile, macrotile; if ((base->flags & R300_RESOURCE_FLAG_TRANSFER) || (base->bind & PIPE_BIND_SCANOUT)) { - microtile = R300_BUFFER_LINEAR; - macrotile = R300_BUFFER_LINEAR; + microtile = RADEON_LAYOUT_LINEAR; + macrotile = RADEON_LAYOUT_LINEAR; } else { - microtile = R300_BUFFER_SELECT_LAYOUT; - macrotile = R300_BUFFER_SELECT_LAYOUT; + /* This will make the texture_create_function select the layout. */ + microtile = RADEON_LAYOUT_UNKNOWN; + macrotile = RADEON_LAYOUT_UNKNOWN; } return (struct pipe_resource*) @@ -847,10 +992,10 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, const struct pipe_resource *base, struct winsys_handle *whandle) { - struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys; + struct radeon_winsys *rws = (struct radeon_winsys*)screen->winsys; struct r300_screen *rscreen = r300_screen(screen); - struct r300_winsys_bo *buffer; - enum r300_buffer_tiling microtile, macrotile; + struct pb_buffer *buffer; + enum radeon_bo_layout microtile, macrotile; unsigned stride, size; /* Support only 2D textures without mipmaps */ @@ -869,14 +1014,14 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, /* Enforce a microtiled zbuffer. */ if (util_format_is_depth_or_stencil(base->format) && - microtile == R300_BUFFER_LINEAR) { + microtile == RADEON_LAYOUT_LINEAR) { switch (util_format_get_blocksize(base->format)) { case 4: - microtile = R300_BUFFER_TILED; + microtile = RADEON_LAYOUT_TILED; break; case 2: - microtile = R300_BUFFER_SQUARETILED; + microtile = RADEON_LAYOUT_SQUARETILED; break; } } @@ -917,8 +1062,8 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx, /* Prefer VRAM if there are multiple domains to choose from. */ surface->domain = tex->domain; - if (surface->domain & R300_DOMAIN_VRAM) - surface->domain &= ~R300_DOMAIN_GTT; + if (surface->domain & RADEON_DOMAIN_VRAM) + surface->domain &= ~RADEON_DOMAIN_GTT; surface->offset = r300_texture_get_offset(tex, level, surf_tmpl->u.tex.first_layer); diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 158a387478f..4586bb2e4dc 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -35,6 +35,10 @@ struct r300_texture_desc; struct r300_resource; struct r300_screen; +void util_format_combine_swizzles(unsigned char *dst, + const unsigned char *swz1, + const unsigned char *swz2); + unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, const unsigned char *swizzle_view, boolean dxtc_swizzle); diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 2910666dd51..da5778be65e 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -22,9 +22,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_texture_desc.h" - #include "r300_context.h" -#include "r300_winsys.h" #include "util/u_format.h" @@ -32,8 +30,8 @@ * in the given dimension. */ unsigned r300_get_pixel_alignment(enum pipe_format format, unsigned num_samples, - enum r300_buffer_tiling microtile, - enum r300_buffer_tiling macrotile, + enum radeon_bo_layout microtile, + enum radeon_bo_layout macrotile, enum r300_dim dim, boolean is_rs690) { static const unsigned table[2][5][3][2] = @@ -62,8 +60,8 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, unsigned tile = 0; unsigned pixsize = util_format_get_blocksize(format); - assert(macrotile <= R300_BUFFER_TILED); - assert(microtile <= R300_BUFFER_SQUARETILED); + assert(macrotile <= RADEON_LAYOUT_TILED); + assert(microtile <= RADEON_LAYOUT_SQUARETILED); assert(pixsize <= 16); assert(dim <= DIM_HEIGHT); @@ -98,7 +96,7 @@ static boolean r300_texture_macro_switch(struct r300_resource *tex, unsigned tile, texdim; tile = r300_get_pixel_alignment(tex->b.b.b.format, tex->b.b.b.nr_samples, - tex->tex.microtile, R300_BUFFER_TILED, dim, 0); + tex->tex.microtile, RADEON_LAYOUT_TILED, dim, 0); if (dim == DIM_WIDTH) { texdim = u_minify(tex->tex.width0, level); } else { @@ -233,10 +231,10 @@ static void r300_setup_miptree(struct r300_screen *screen, for (i = 0; i <= base->last_level; i++) { /* Let's see if this miplevel can be macrotiled. */ tex->tex.macrotile[i] = - (tex->tex.macrotile[0] == R300_BUFFER_TILED && + (tex->tex.macrotile[0] == RADEON_LAYOUT_TILED && r300_texture_macro_switch(tex, i, rv350_mode, DIM_WIDTH) && r300_texture_macro_switch(tex, i, rv350_mode, DIM_HEIGHT)) ? - R300_BUFFER_TILED : R300_BUFFER_LINEAR; + RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; stride = r300_texture_get_stride(screen, tex, i); @@ -424,8 +422,8 @@ static void r300_setup_tiling(struct r300_screen *screen, boolean is_zb = util_format_is_depth_or_stencil(format); boolean dbg_no_tiling = SCREEN_DBG_ON(screen, DBG_NO_TILING); - tex->tex.microtile = R300_BUFFER_LINEAR; - tex->tex.macrotile[0] = R300_BUFFER_LINEAR; + tex->tex.microtile = RADEON_LAYOUT_LINEAR; + tex->tex.macrotile[0] = RADEON_LAYOUT_LINEAR; if (!util_format_is_plain(format)) { return; @@ -441,11 +439,11 @@ static void r300_setup_tiling(struct r300_screen *screen, case 1: case 4: case 8: - tex->tex.microtile = R300_BUFFER_TILED; + tex->tex.microtile = RADEON_LAYOUT_TILED; break; case 2: - tex->tex.microtile = R300_BUFFER_SQUARETILED; + tex->tex.microtile = RADEON_LAYOUT_SQUARETILED; break; } @@ -456,7 +454,7 @@ static void r300_setup_tiling(struct r300_screen *screen, /* Set macrotiling. */ if (r300_texture_macro_switch(tex, 0, rv350_mode, DIM_WIDTH) && r300_texture_macro_switch(tex, 0, rv350_mode, DIM_HEIGHT)) { - tex->tex.macrotile[0] = R300_BUFFER_TILED; + tex->tex.macrotile[0] = RADEON_LAYOUT_TILED; } } @@ -501,7 +499,7 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, } /* Setup tiling. */ - if (tex->tex.microtile == R300_BUFFER_SELECT_LAYOUT) { + if (tex->tex.microtile == RADEON_LAYOUT_UNKNOWN) { r300_setup_tiling(rscreen, tex); } diff --git a/src/gallium/drivers/r300/r300_texture_desc.h b/src/gallium/drivers/r300/r300_texture_desc.h index ce6e9643ec6..a84d6fae0ea 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.h +++ b/src/gallium/drivers/r300/r300_texture_desc.h @@ -25,7 +25,7 @@ #define R300_TEXTURE_DESC_H #include "pipe/p_format.h" -#include "r300_defines.h" +#include "r300_context.h" struct pipe_resource; struct r300_screen; @@ -39,8 +39,8 @@ enum r300_dim { unsigned r300_get_pixel_alignment(enum pipe_format format, unsigned num_samples, - enum r300_buffer_tiling microtile, - enum r300_buffer_tiling macrotile, + enum radeon_bo_layout microtile, + enum radeon_bo_layout macrotile, enum r300_dim dim, boolean is_rs690); boolean r300_texture_desc_init(struct r300_screen *rscreen, diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 97ec0a1a1f2..6a000cfe2c6 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -52,8 +52,7 @@ static unsigned translate_opcode(unsigned opcode) case TGSI_OPCODE_MAD: return RC_OPCODE_MAD; case TGSI_OPCODE_SUB: return RC_OPCODE_SUB; case TGSI_OPCODE_LRP: return RC_OPCODE_LRP; - /* case TGSI_OPCODE_CND: return RC_OPCODE_CND; */ - /* case TGSI_OPCODE_CND0: return RC_OPCODE_CND0; */ + case TGSI_OPCODE_CND: return RC_OPCODE_CND; /* case TGSI_OPCODE_DP2A: return RC_OPCODE_DP2A; */ /* gap */ case TGSI_OPCODE_FRC: return RC_OPCODE_FRC; diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index 65c5095be6a..e2ea4cbf6c5 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -226,7 +226,7 @@ void* r300_texture_transfer_map(struct pipe_context *ctx, struct pipe_transfer *transfer) { struct r300_context *r300 = r300_context(ctx); - struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys; + struct radeon_winsys *rws = (struct radeon_winsys *)ctx->winsys; struct r300_transfer *r300transfer = r300_transfer(transfer); struct r300_resource *tex = r300_resource(transfer->resource); char *map; @@ -256,7 +256,7 @@ void* r300_texture_transfer_map(struct pipe_context *ctx, void r300_texture_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer *transfer) { - struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys; + struct radeon_winsys *rws = (struct radeon_winsys *)ctx->winsys; struct r300_transfer *r300transfer = r300_transfer(transfer); struct r300_resource *tex = r300_resource(transfer->resource); diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h deleted file mode 100644 index 3a6798a5423..00000000000 --- a/src/gallium/drivers/r300/r300_winsys.h +++ /dev/null @@ -1,295 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson <[email protected]> - * Copyright 2010 Marek Olšák <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_WINSYS_H -#define R300_WINSYS_H - -/* The public winsys interface header for the r300 pipe driver. - * Any winsys hosting this pipe needs to implement r300_winsys_screen and then - * call r300_screen_create to start things. */ - -#include "r300_defines.h" - -#include "pipebuffer/pb_bufmgr.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#define R300_MAX_CMDBUF_DWORDS (16 * 1024) -#define R300_FLUSH_ASYNC (1 << 0) - -struct winsys_handle; -struct r300_winsys_screen; - -#define r300_winsys_bo pb_buffer -#define r300_winsys_bo_reference(pdst, src) pb_reference(pdst, src) - -struct r300_winsys_cs_handle; /* for write_reloc etc. */ - -struct r300_winsys_cs { - unsigned cdw; /* Number of used dwords. */ - uint32_t *buf; /* The command buffer. */ -}; - -enum r300_value_id { - R300_VID_PCI_ID, - R300_VID_GB_PIPES, - R300_VID_Z_PIPES, - R300_VID_GART_SIZE, - R300_VID_VRAM_SIZE, - R300_VID_DRM_MAJOR, - R300_VID_DRM_MINOR, - R300_VID_DRM_PATCHLEVEL, - - /* These should probably go away: */ - R300_VID_DRM_2_6_0, /* Hyper-Z, GB_Z_PEQ_CONFIG on rv350->r4xx, R500 FG_ALPHA_VALUE */ - R300_VID_DRM_2_8_0, /* R500 US_FORMAT regs, R500 ARGB2101010 colorbuffer, CMask, R16F/RG16F */ - - R300_CAN_HYPERZ, /* ZMask + HiZ */ - R300_CAN_AACOMPRESS, /* CMask */ -}; - -struct r300_winsys_screen { - /** - * Destroy this winsys. - * - * \param ws The winsys this function is called from. - */ - void (*destroy)(struct r300_winsys_screen *ws); - - /** - * Query a system value from a winsys. - * - * \param ws The winsys this function is called from. - * \param vid One of the R300_VID_* enums. - */ - uint32_t (*get_value)(struct r300_winsys_screen *ws, - enum r300_value_id vid); - - /************************************************************************** - * Buffer management. Buffer attributes are mostly fixed over its lifetime. - * - * Remember that gallium gets to choose the interface it needs, and the - * window systems must then implement that interface (rather than the - * other way around...). - *************************************************************************/ - - /** - * Create a buffer object. - * - * \param ws The winsys this function is called from. - * \param size The size to allocate. - * \param alignment An alignment of the buffer in memory. - * \param bind A bitmask of the PIPE_BIND_* flags. - * \param usage A bitmask of the PIPE_USAGE_* flags. - * \param domain A bitmask of the R300_DOMAIN_* flags. - * \return The created buffer object. - */ - struct r300_winsys_bo *(*buffer_create)(struct r300_winsys_screen *ws, - unsigned size, - unsigned alignment, - unsigned bind, - unsigned usage, - enum r300_buffer_domain domain); - - struct r300_winsys_cs_handle *(*buffer_get_cs_handle)( - struct r300_winsys_bo *buf); - - /** - * Map the entire data store of a buffer object into the client's address - * space. - * - * \param buf A winsys buffer object to map. - * \param cs A command stream to flush if the buffer is referenced by it. - * \param usage A bitmask of the PIPE_TRANSFER_* flags. - * \return The pointer at the beginning of the buffer. - */ - void *(*buffer_map)(struct r300_winsys_bo *buf, - struct r300_winsys_cs *cs, - enum pipe_transfer_usage usage); - - /** - * Unmap a buffer object from the client's address space. - * - * \param buf A winsys buffer object to unmap. - */ - void (*buffer_unmap)(struct r300_winsys_bo *buf); - - /** - * Return TRUE if a buffer object is being used by the GPU. - * - * \param buf A winsys buffer object. - */ - boolean (*buffer_is_busy)(struct r300_winsys_bo *buf); - - /** - * Wait for a buffer object until it is not used by a GPU. This is - * equivalent to a fence placed after the last command using the buffer, - * and synchronizing to the fence. - * - * \param buf A winsys buffer object to wait for. - */ - void (*buffer_wait)(struct r300_winsys_bo *buf); - - /** - * Return tiling flags describing a memory layout of a buffer object. - * - * \param buf A winsys buffer object to get the flags from. - * \param macrotile A pointer to the return value of the microtile flag. - * \param microtile A pointer to the return value of the macrotile flag. - * - * \note microtile and macrotile are not bitmasks! - */ - void (*buffer_get_tiling)(struct r300_winsys_bo *buf, - enum r300_buffer_tiling *microtile, - enum r300_buffer_tiling *macrotile); - - /** - * Set tiling flags describing a memory layout of a buffer object. - * - * \param buf A winsys buffer object to set the flags for. - * \param cs A command stream to flush if the buffer is referenced by it. - * \param macrotile A macrotile flag. - * \param microtile A microtile flag. - * \param stride A stride of the buffer in bytes, for texturing. - * - * \note microtile and macrotile are not bitmasks! - */ - void (*buffer_set_tiling)(struct r300_winsys_bo *buf, - struct r300_winsys_cs *cs, - enum r300_buffer_tiling microtile, - enum r300_buffer_tiling macrotile, - unsigned stride); - - /** - * Get a winsys buffer from a winsys handle. The internal structure - * of the handle is platform-specific and only a winsys should access it. - * - * \param ws The winsys this function is called from. - * \param whandle A winsys handle pointer as was received from a state - * tracker. - * \param stride The returned buffer stride in bytes. - * \param size The returned buffer size. - */ - struct r300_winsys_bo *(*buffer_from_handle)(struct r300_winsys_screen *ws, - struct winsys_handle *whandle, - unsigned *stride, - unsigned *size); - - /** - * Get a winsys handle from a winsys buffer. The internal structure - * of the handle is platform-specific and only a winsys should access it. - * - * \param buf A winsys buffer object to get the handle from. - * \param whandle A winsys handle pointer. - * \param stride A stride of the buffer in bytes, for texturing. - * \return TRUE on success. - */ - boolean (*buffer_get_handle)(struct r300_winsys_bo *buf, - unsigned stride, - struct winsys_handle *whandle); - - /************************************************************************** - * Command submission. - * - * Each pipe context should create its own command stream and submit - * commands independently of other contexts. - *************************************************************************/ - - /** - * Create a command stream. - * - * \param ws The winsys this function is called from. - */ - struct r300_winsys_cs *(*cs_create)(struct r300_winsys_screen *ws); - - /** - * Destroy a command stream. - * - * \param cs A command stream to destroy. - */ - void (*cs_destroy)(struct r300_winsys_cs *cs); - - /** - * Add a new buffer relocation. Every relocation must first be added - * before it can be written. - * - * \param cs A command stream to add buffer for validation against. - * \param buf A winsys buffer to validate. - * \param rd A read domain containing a bitmask of the R300_DOMAIN_* flags. - * \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags. - */ - void (*cs_add_reloc)(struct r300_winsys_cs *cs, - struct r300_winsys_cs_handle *buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd); - - /** - * Return TRUE if there is enough memory in VRAM and GTT for the relocs - * added so far. - * - * \param cs A command stream to validate. - */ - boolean (*cs_validate)(struct r300_winsys_cs *cs); - - /** - * Write a relocated dword to a command buffer. - * - * \param cs A command stream the relocation is written to. - * \param buf A winsys buffer to write the relocation for. - * \param rd A read domain containing a bitmask of the R300_DOMAIN_* flags. - * \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags. - */ - void (*cs_write_reloc)(struct r300_winsys_cs *cs, - struct r300_winsys_cs_handle *buf); - - /** - * Flush a command stream. - * - * \param cs A command stream to flush. - * \param flags, R300_FLUSH_ASYNC or 0. - */ - void (*cs_flush)(struct r300_winsys_cs *cs, unsigned flags); - - /** - * Set a flush callback which is called from winsys when flush is - * required. - * - * \param cs A command stream to set the callback for. - * \param flush A flush callback function associated with the command stream. - * \param user A user pointer that will be passed to the flush callback. - */ - void (*cs_set_flush)(struct r300_winsys_cs *cs, - void (*flush)(void *ctx, unsigned flags), - void *user); - - /** - * Return TRUE if a buffer is referenced by a command stream. - * - * \param cs A command stream. - * \param buf A winsys buffer. - */ - boolean (*cs_is_buffer_referenced)(struct r300_winsys_cs *cs, - struct r300_winsys_cs_handle *buf); -}; - -#endif /* R300_WINSYS_H */ diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index 487adddcb72..f20d45f48de 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -367,10 +367,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_B10G10R10A2_UNORM: return V_028C70_SWAP_ALT; + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R16G16_FLOAT: case PIPE_FORMAT_R16G16_UNORM: return V_028C70_SWAP_STD; /* 64-bit buffers. */ + case PIPE_FORMAT_R32G32_FLOAT: case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: case PIPE_FORMAT_R16G16B16A16_SSCALED: @@ -504,6 +507,57 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) } } +static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) +{ +#ifdef PIPE_ARCH_BIG_ENDIAN + switch(colorformat) { + case V_0280A0_COLOR_4_4: + return(ENDIAN_NONE); + + /* 8-bit buffers. */ + case V_0280A0_COLOR_8: + return(ENDIAN_NONE); + + /* 16-bit buffers. */ + case V_0280A0_COLOR_5_6_5: + case V_0280A0_COLOR_1_5_5_5: + case V_0280A0_COLOR_4_4_4_4: + case V_0280A0_COLOR_16: + case V_0280A0_COLOR_8_8: + return(ENDIAN_8IN16); + + /* 32-bit buffers. */ + case V_0280A0_COLOR_8_8_8_8: + case V_0280A0_COLOR_2_10_10_10: + case V_0280A0_COLOR_8_24: + case V_0280A0_COLOR_24_8: + case V_0280A0_COLOR_32_FLOAT: + case V_0280A0_COLOR_16_16_FLOAT: + case V_0280A0_COLOR_16_16: + return(ENDIAN_8IN32); + + /* 64-bit buffers. */ + case V_0280A0_COLOR_16_16_16_16: + case V_0280A0_COLOR_16_16_16_16_FLOAT: + return(ENDIAN_8IN16); + + case V_0280A0_COLOR_32_32_FLOAT: + case V_0280A0_COLOR_32_32: + return(ENDIAN_8IN32); + + /* 128-bit buffers. */ + case V_0280A0_COLOR_32_32_32_FLOAT: + case V_0280A0_COLOR_32_32_32_32_FLOAT: + case V_0280A0_COLOR_32_32_32_32: + return(ENDIAN_8IN32); + default: + return ENDIAN_NONE; /* Unsupported. */ + } +#else + return ENDIAN_NONE; +#endif +} + static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) { return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0; diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 77432661b64..a972f82fb1d 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -107,15 +107,18 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL); for (int i = 0; i < 8; i++) { - unsigned eqRGB = state->rt[i].rgb_func; - unsigned srcRGB = state->rt[i].rgb_src_factor; - unsigned dstRGB = state->rt[i].rgb_dst_factor; - unsigned eqA = state->rt[i].alpha_func; - unsigned srcA = state->rt[i].alpha_src_factor; - unsigned dstA = state->rt[i].alpha_dst_factor; + /* state->rt entries > 0 only written if independent blending */ + const int j = state->independent_blend_enable ? i : 0; + + unsigned eqRGB = state->rt[j].rgb_func; + unsigned srcRGB = state->rt[j].rgb_src_factor; + unsigned dstRGB = state->rt[j].rgb_dst_factor; + unsigned eqA = state->rt[j].alpha_func; + unsigned srcA = state->rt[j].alpha_src_factor; + unsigned dstA = state->rt[j].alpha_dst_factor; blend_cntl[i] = 0; - if (!state->rt[i].blend_enable) + if (!state->rt[j].blend_enable) continue; blend_cntl[i] |= S_028780_BLEND_CONTROL_ENABLE(1); @@ -354,7 +357,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte const struct util_format_description *desc; struct r600_resource_texture *tmp; struct r600_resource *rbuffer; - unsigned format; + unsigned format, endian; uint32_t word4 = 0, yuv_format = 0, pitch = 0; unsigned char swizzle[4], array_mode = 0, tile_type = 0; struct r600_bo *bo[2]; @@ -391,6 +394,8 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte tmp = tmp->flushed_depth_texture; } + endian = r600_colorformat_endian_swap(format); + if (tmp->force_int_type) { word4 &= C_030010_NUM_FORMAT_ALL; word4 |= S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_INT); @@ -422,6 +427,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, word4 | S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_NO_ZERO) | + S_030010_ENDIAN_SWAP(endian) | S_030010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, S_030014_LAST_LEVEL(state->u.tex.last_level) | @@ -652,7 +658,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state unsigned level = state->cbufs[cb]->u.tex.level; unsigned pitch, slice; unsigned color_info; - unsigned format, swap, ntype; + unsigned format, swap, ntype, endian; unsigned offset; unsigned tile_type; const struct util_format_description *desc; @@ -677,35 +683,43 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state level, state->cbufs[cb]->u.tex.first_layer); pitch = rtex->pitch_in_blocks[level] / 8 - 1; slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; - ntype = 0; desc = util_format_description(surf->base.format); + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + ntype = V_028C70_NUMBER_UNORM; if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) ntype = V_028C70_NUMBER_SRGB; + else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) + ntype = V_028C70_NUMBER_SNORM; format = r600_translate_colorformat(surf->base.format); swap = r600_translate_colorswap(surf->base.format); + if (rbuffer->b.b.b.usage == PIPE_USAGE_STAGING) { + endian = ENDIAN_NONE; + } else { + endian = r600_colorformat_endian_swap(format); + } /* disable when gallium grows int textures */ if ((format == FMT_32_32_32_32 || format == FMT_16_16_16_16) && rtex->force_int_type) - ntype = 4; + ntype = V_028C70_NUMBER_UINT; color_info = S_028C70_FORMAT(format) | S_028C70_COMP_SWAP(swap) | S_028C70_ARRAY_MODE(rtex->array_mode[level]) | S_028C70_BLEND_CLAMP(1) | - S_028C70_NUMBER_TYPE(ntype); + S_028C70_NUMBER_TYPE(ntype) | + S_028C70_ENDIAN(endian); - for (i = 0; i < 4; i++) { - if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { - break; - } - } /* we can only set the export size if any thing is snorm/unorm component is > 11 bits, if we aren't a float, sint or uint */ if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS && desc->channel[i].size < 12 && desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT && - ntype != 4 && ntype != 5) + ntype != V_028C70_NUMBER_UINT && ntype != V_028C70_NUMBER_SINT) color_info |= S_028C70_SOURCE_FORMAT(V_028C70_EXPORT_4C_16BPC); if (rtex->array_mode[level] > V_028C70_ARRAY_LINEAR_ALIGNED) { @@ -808,6 +822,9 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, if (rstate == NULL) return; + evergreen_context_flush_dest_caches(&rctx->ctx); + rctx->ctx.num_dest_buffers = state->nr_cbufs; + /* unreference old buffer and reference new one */ rstate->id = R600_PIPE_STATE_FRAMEBUFFER; @@ -819,6 +836,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, } if (state->zsbuf) { evergreen_db(rctx, rstate, state); + rctx->ctx.num_dest_buffers++; } target_mask = 0x00000000; @@ -880,6 +898,19 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, } } +static void evergreen_texture_barrier(struct pipe_context *ctx) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + r600_context_flush_all(&rctx->ctx, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_CB_ACTION_ENA(1) | + S_0085F0_CB0_DEST_BASE_ENA(1) | S_0085F0_CB1_DEST_BASE_ENA(1) | + S_0085F0_CB2_DEST_BASE_ENA(1) | S_0085F0_CB3_DEST_BASE_ENA(1) | + S_0085F0_CB4_DEST_BASE_ENA(1) | S_0085F0_CB5_DEST_BASE_ENA(1) | + S_0085F0_CB6_DEST_BASE_ENA(1) | S_0085F0_CB7_DEST_BASE_ENA(1) | + S_0085F0_CB8_DEST_BASE_ENA(1) | S_0085F0_CB9_DEST_BASE_ENA(1) | + S_0085F0_CB10_DEST_BASE_ENA(1) | S_0085F0_CB11_DEST_BASE_ENA(1)); +} + void evergreen_init_state_functions(struct r600_pipe_context *rctx) { rctx->context.create_blend_state = evergreen_create_blend_state; @@ -920,6 +951,7 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx) rctx->context.set_viewport_state = evergreen_set_viewport_state; rctx->context.sampler_view_destroy = r600_sampler_view_destroy; rctx->context.redefine_user_buffer = u_default_redefine_user_buffer; + rctx->context.texture_barrier = evergreen_texture_barrier; } void evergreen_init_config(struct r600_pipe_context *rctx) @@ -1542,8 +1574,10 @@ void evergreen_pipe_set_buffer_resource(struct r600_pipe_context *rctx, r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, - S_030008_STRIDE(stride), - 0xFFFFFFFF, NULL); +#ifdef PIPE_ARCH_BIG_ENDIAN + S_030008_ENDIAN_SWAP(ENDIAN_8IN32) | +#endif + S_030008_STRIDE(stride), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index c51a163bd06..de445b879a1 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -43,6 +43,7 @@ #define EVERGREEN_CTL_CONST_OFFSET 0x0003CFF0 #define EVERGREEN_CTL_CONST_END 0x0003E200 +#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10 #define EVENT_TYPE_ZPASS_DONE 0x15 #define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 #define EVENT_TYPE(x) ((x) << 0) @@ -1885,4 +1886,10 @@ #define R_03CFF4_SQ_VTX_START_INST_LOC 0x03CFF4 #define R_03A200_SQ_LOOP_CONST_0 0x3A200 + +#define ENDIAN_NONE 0 +#define ENDIAN_8IN16 1 +#define ENDIAN_8IN32 2 +#define ENDIAN_8IN64 3 + #endif diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 0b7d6f70968..0b0df9d019b 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -179,11 +179,13 @@ struct r600_block_reloc { struct r600_block { struct list_head list; unsigned status; + unsigned flags; unsigned start_offset; unsigned pm4_ndwords; unsigned pm4_flush_ndwords; unsigned nbo; - unsigned nreg; + u16 nreg; + u16 nreg_dirty; u32 *reg; u32 pm4[R600_BLOCK_MAX_REG]; unsigned pm4_bo_index[R600_BLOCK_MAX_REG]; @@ -231,6 +233,8 @@ struct r600_query { #define R600_QUERY_STATE_ENDED (1 << 1) #define R600_QUERY_STATE_SUSPENDED (1 << 2) +#define R600_CONTEXT_DRAW_PENDING (1 << 0) +#define R600_CONTEXT_DST_CACHES_DIRTY (1 << 1) struct r600_context { struct radeon *radeon; @@ -253,6 +257,8 @@ struct r600_context { unsigned num_query_running; struct list_head fenced_bo; unsigned max_db; /* for OQ */ + unsigned num_dest_buffers; + unsigned flags; boolean predicate_drawing; }; @@ -288,9 +294,14 @@ void r600_context_queries_suspend(struct r600_context *ctx); void r600_context_queries_resume(struct r600_context *ctx); void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation, int flag_wait); +void r600_context_emit_fence(struct r600_context *ctx, struct r600_bo *fence, + unsigned offset, unsigned value); +void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags); +void r600_context_flush_dest_caches(struct r600_context *ctx); int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon); void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw); +void evergreen_context_flush_dest_caches(struct r600_context *ctx); void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 240093f9b9d..7e854b1b81d 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -22,6 +22,7 @@ */ #include <stdio.h> #include <errno.h> +#include <byteswap.h> #include "util/u_format.h" #include "util/u_memory.h" #include "pipe/p_shader_tokens.h" @@ -32,6 +33,12 @@ #include "r600_formats.h" #include "r600d.h" +#ifdef PIPE_ARCH_BIG_ENDIAN +#define CPU_TO_LE32(x) bswap_32(x) +#else +#define CPU_TO_LE32(x) (x) +#endif + #define NUM_OF_CYCLES 3 #define NUM_OF_COMPONENTS 4 @@ -953,10 +960,17 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], } else result[i] = slots[i]; - // let's check source gprs alu = slots[i]; num_once_inst += is_alu_once_inst(bc, alu); + // let's check dst gpr + if (alu->dst.rel) { + if (have_mova) + return 0; + have_rel = 1; + } + + // let's check source gprs num_src = r600_bc_get_num_operands(bc, alu); for (src = 0; src < num_src; ++src) { if (alu->src[src].rel) { @@ -1376,6 +1390,7 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) | S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); bc->bytecode[id++] = S_SQ_VTX_WORD2_OFFSET(vtx->offset) | + S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian) | S_SQ_VTX_WORD2_MEGA_FETCH(1); bc->bytecode[id++] = 0; return 0; @@ -1910,6 +1925,7 @@ void r600_bc_dump(struct r600_bc *bc) fprintf(stderr, "MODE:%d)\n", vtx->srf_mode_all); id++; fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "ENDIAN:%d ", vtx->endian); fprintf(stderr, "OFFSET:%d\n", vtx->offset); //TODO id++; @@ -1922,7 +1938,7 @@ void r600_bc_dump(struct r600_bc *bc) } static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, - unsigned *num_format, unsigned *format_comp) + unsigned *num_format, unsigned *format_comp, unsigned *endian) { const struct util_format_description *desc; unsigned i; @@ -1930,6 +1946,7 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = 0; *num_format = 0; *format_comp = 0; + *endian = ENDIAN_NONE; desc = util_format_description(pformat); if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { @@ -1960,6 +1977,9 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_16_16_16_16_FLOAT; break; } +#ifdef PIPE_ARCH_BIG_ENDIAN + *endian = ENDIAN_8IN16; +#endif break; case 32: switch (desc->nr_channels) { @@ -1976,6 +1996,9 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_32_32_32_32_FLOAT; break; } +#ifdef PIPE_ARCH_BIG_ENDIAN + *endian = ENDIAN_8IN32; +#endif break; default: goto out_unknown; @@ -2013,6 +2036,9 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_16_16_16_16; break; } +#ifdef PIPE_ARCH_BIG_ENDIAN + *endian = ENDIAN_8IN16; +#endif break; case 32: switch (desc->nr_channels) { @@ -2029,6 +2055,9 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_32_32_32_32; break; } +#ifdef PIPE_ARCH_BIG_ENDIAN + *endian = ENDIAN_8IN32; +#endif break; default: goto out_unknown; @@ -2060,7 +2089,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru struct pipe_vertex_element *elements = ve->elements; const struct util_format_description *desc; unsigned fetch_resource_start = rctx->family >= CHIP_CEDAR ? 0 : 160; - unsigned format, num_format, format_comp; + unsigned format, num_format, format_comp, endian; u32 *bytecode; int i, r; @@ -2107,7 +2136,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru for (i = 0; i < ve->count; i++) { unsigned vbuffer_index; - r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp); + r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp, &endian); desc = util_format_description(ve->elements[i].src_format); if (desc == NULL) { r600_bc_clear(&bc); @@ -2133,6 +2162,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru vtx.format_comp_all = format_comp; vtx.srf_mode_all = 1; vtx.offset = elements[i].src_offset; + vtx.endian = endian; if ((r = r600_bc_add_vtx(&bc, &vtx))) { r600_bc_clear(&bc); @@ -2172,7 +2202,9 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru return -ENOMEM; } - memcpy(bytecode, bc.bytecode, ve->fs_size); + for(i = 0; i < ve->fs_size / 4; i++) { + *(bytecode + i) = CPU_TO_LE32(*(bc.bytecode + i)); + } r600_bo_unmap(rctx->radeon, ve->fetch_shader); r600_bc_clear(&bc); diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 27ea293ebe5..26d337fe125 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -104,6 +104,7 @@ struct r600_bc_vtx { unsigned format_comp_all; unsigned srf_mode_all; unsigned offset; + unsigned endian; }; struct r600_bc_output { diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 04408a5cc8e..151f48a8bf8 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -277,6 +277,13 @@ static void r600_resource_copy_region(struct pipe_context *ctx, struct texture_orig_info orig_info[2]; boolean restore_orig[2]; + /* Fallback for buffers. */ + if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { + util_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); + return; + } + if (rsrc->depth && !rsrc->is_flushing_texture) r600_texture_depth_flush(ctx, src, FALSE); diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 6ced719c8f0..71b47e1b056 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -24,6 +24,8 @@ * Jerome Glisse * Corbin Simpson <[email protected]> */ +#include <byteswap.h> + #include <pipe/p_screen.h> #include <util/u_format.h> #include <util/u_math.h> @@ -266,11 +268,31 @@ void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resour uint8_t *ptr = (*rbuffer)->r.b.user_ptr; unsigned size = (*rbuffer)->r.b.b.b.width0; boolean flushed; +#ifdef PIPE_ARCH_BIG_ENDIAN + int i; + uint32_t *tmpPtr; + + *rbuffer = NULL; + + tmpPtr = (uint32_t *)malloc(size); + /* big endian swap */ + if(tmpPtr == NULL) { + return; + } + for(i = 0; i < size / 4; i++) { + tmpPtr[i] = bswap_32(*((uint32_t *)ptr + i)); + } + + u_upload_data(rctx->vbuf_mgr->uploader, 0, size, tmpPtr, const_offset, + (struct pipe_resource**)rbuffer, &flushed); + free(tmpPtr); +#else *rbuffer = NULL; u_upload_data(rctx->vbuf_mgr->uploader, 0, size, ptr, const_offset, (struct pipe_resource**)rbuffer, &flushed); +#endif } else { *const_offset = 0; } diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 4a30eddb621..89b46f5ad7e 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -30,6 +30,7 @@ #include <tgsi/tgsi_util.h> #include <util/u_blitter.h> #include <util/u_double_list.h> +#include "util/u_format.h" #include <util/u_format_s3tc.h> #include <util/u_transfer.h> #include <util/u_surface.h> @@ -37,6 +38,7 @@ #include <util/u_memory.h> #include <util/u_inlines.h> #include "util/u_upload_mgr.h" +#include "os/os_time.h" #include <pipebuffer/pb_buffer.h> #include "r600.h" #include "r600d.h" @@ -49,15 +51,82 @@ /* * pipe_context */ +static struct r600_fence *r600_create_fence(struct r600_pipe_context *ctx) +{ + struct r600_fence *fence = NULL; + + if (!ctx->fences.bo) { + /* Create the shared buffer object */ + ctx->fences.bo = r600_bo(ctx->radeon, 4096, 0, 0, 0); + if (!ctx->fences.bo) { + R600_ERR("r600: failed to create bo for fence objects\n"); + return NULL; + } + ctx->fences.data = r600_bo_map(ctx->radeon, ctx->fences.bo, PB_USAGE_UNSYNCHRONIZED, NULL); + } + + if (!LIST_IS_EMPTY(&ctx->fences.pool)) { + struct r600_fence *entry; + + /* Try to find a freed fence that has been signalled */ + LIST_FOR_EACH_ENTRY(entry, &ctx->fences.pool, head) { + if (ctx->fences.data[entry->index] != 0) { + LIST_DELINIT(&entry->head); + fence = entry; + break; + } + } + } + + if (!fence) { + /* Allocate a new fence */ + struct r600_fence_block *block; + unsigned index; + + if ((ctx->fences.next_index + 1) >= 1024) { + R600_ERR("r600: too many concurrent fences\n"); + return NULL; + } + + index = ctx->fences.next_index++; + + if (!(index % FENCE_BLOCK_SIZE)) { + /* Allocate a new block */ + block = CALLOC_STRUCT(r600_fence_block); + if (block == NULL) + return NULL; + + LIST_ADD(&block->head, &ctx->fences.blocks); + } else { + block = LIST_ENTRY(struct r600_fence_block, ctx->fences.blocks.next, head); + } + + fence = &block->fences[index % FENCE_BLOCK_SIZE]; + fence->ctx = ctx; + fence->index = index; + } + + pipe_reference_init(&fence->reference, 1); + + ctx->fences.data[fence->index] = 0; + r600_context_emit_fence(&ctx->ctx, ctx->fences.bo, fence->index, 1); + return fence; +} + static void r600_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_fence **rfence = (struct r600_fence**)fence; + #if 0 static int dc = 0; char dname[256]; #endif + if (rfence) + *rfence = r600_create_fence(rctx); + if (!rctx->ctx.pm4_cdwords) return; @@ -101,6 +170,7 @@ static void r600_destroy_context(struct pipe_context *context) struct r600_pipe_context *rctx = (struct r600_pipe_context *)context; rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush); + util_unreference_framebuffer_state(&rctx->framebuffer); r600_context_fini(&rctx->ctx); @@ -113,6 +183,18 @@ static void r600_destroy_context(struct pipe_context *context) u_vbuf_mgr_destroy(rctx->vbuf_mgr); util_slab_destroy(&rctx->pool_transfers); + if (rctx->fences.bo) { + struct r600_fence_block *entry, *tmp; + + LIST_FOR_EACH_ENTRY_SAFE(entry, tmp, &rctx->fences.blocks, head) { + LIST_DEL(&entry->head); + FREE(entry); + } + + r600_bo_unmap(rctx->radeon, rctx->fences.bo); + r600_bo_reference(rctx->radeon, &rctx->fences.bo, NULL); + } + r600_update_num_contexts(rctx->screen, -1); FREE(rctx); @@ -140,6 +222,12 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void rctx->radeon = rscreen->radeon; rctx->family = r600_get_family(rctx->radeon); + rctx->fences.bo = NULL; + rctx->fences.data = NULL; + rctx->fences.next_index = 0; + LIST_INITHEAD(&rctx->fences.pool); + LIST_INITHEAD(&rctx->fences.blocks); + r600_init_blit_functions(rctx); r600_init_query_functions(rctx); r600_init_context_resource_functions(rctx); @@ -284,8 +372,8 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_DEPTH_CLAMP: case PIPE_CAP_SHADER_STENCIL_EXPORT: - case PIPE_CAP_TGSI_INSTANCEID: case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: return 1; case PIPE_CAP_INDEP_BLEND_ENABLE: /* R600 doesn't support per-MRT blends */ @@ -294,10 +382,14 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) else return 1; + case PIPE_CAP_TGSI_INSTANCEID: + return 0; + /* Unsupported features (boolean caps). */ case PIPE_CAP_STREAM_OUTPUT: case PIPE_CAP_PRIMITIVE_RESTART: case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */ + case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL: /* R600 doesn't support per-MRT blends */ if (family == CHIP_R600) return 0; @@ -435,6 +527,9 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, return FALSE; } + if (!util_format_is_supported(format, usage)) + return FALSE; + /* Multisample */ if (sample_count > 1) return FALSE; @@ -492,6 +587,62 @@ static void r600_destroy_screen(struct pipe_screen* pscreen) FREE(rscreen); } +static void r600_fence_reference(struct pipe_screen *pscreen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + struct r600_fence **oldf = (struct r600_fence**)ptr; + struct r600_fence *newf = (struct r600_fence*)fence; + + if (pipe_reference(&(*oldf)->reference, &newf->reference)) { + struct r600_pipe_context *ctx = (*oldf)->ctx; + LIST_ADDTAIL(&(*oldf)->head, &ctx->fences.pool); + } + + *ptr = fence; +} + +static boolean r600_fence_signalled(struct pipe_screen *pscreen, + struct pipe_fence_handle *fence) +{ + struct r600_fence *rfence = (struct r600_fence*)fence; + struct r600_pipe_context *ctx = rfence->ctx; + + return ctx->fences.data[rfence->index]; +} + +static boolean r600_fence_finish(struct pipe_screen *pscreen, + struct pipe_fence_handle *fence, + uint64_t timeout) +{ + struct r600_fence *rfence = (struct r600_fence*)fence; + struct r600_pipe_context *ctx = rfence->ctx; + int64_t start_time = 0; + unsigned spins = 0; + + if (timeout != PIPE_TIMEOUT_INFINITE) { + start_time = os_time_get(); + + /* Convert to microseconds. */ + timeout /= 1000; + } + + while (ctx->fences.data[rfence->index] == 0) { + if (++spins % 256) + continue; +#ifdef PIPE_OS_UNIX + sched_yield(); +#else + os_time_sleep(10); +#endif + if (timeout != PIPE_TIMEOUT_INFINITE && + os_time_get() - start_time >= timeout) { + return FALSE; + } + } + + return TRUE; +} struct pipe_screen *r600_screen_create(struct radeon *radeon) { @@ -513,6 +664,9 @@ struct pipe_screen *r600_screen_create(struct radeon *radeon) rscreen->screen.is_format_supported = r600_is_format_supported; rscreen->screen.context_create = r600_create_context; rscreen->screen.video_context_create = r600_video_create; + rscreen->screen.fence_reference = r600_fence_reference; + rscreen->screen.fence_signalled = r600_fence_signalled; + rscreen->screen.fence_finish = r600_fence_finish; r600_init_screen_resource_functions(&rscreen->screen); rscreen->tiling_info = r600_get_tiling_info(radeon); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 396801e4a41..88aff0e81bb 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -124,6 +124,30 @@ struct r600_textures_info { unsigned n_samplers; }; +struct r600_fence { + struct pipe_reference reference; + struct r600_pipe_context *ctx; + unsigned index; /* in the shared bo */ + struct list_head head; +}; + +#define FENCE_BLOCK_SIZE 16 + +struct r600_fence_block { + struct r600_fence fences[FENCE_BLOCK_SIZE]; + struct list_head head; +}; + +struct r600_pipe_fences { + struct r600_bo *bo; + unsigned *data; + unsigned next_index; + /* linked list of preallocated blocks */ + struct list_head blocks; + /* linked list of freed fences */ + struct list_head pool; +}; + #define R600_CONSTANT_ARRAY_SIZE 256 #define R600_RESOURCE_ARRAY_SIZE 160 @@ -158,9 +182,12 @@ struct r600_pipe_context { bool flatshade; struct r600_textures_info ps_samplers; + struct r600_pipe_fences fences; + struct u_vbuf_mgr *vbuf_mgr; struct util_slab_mempool pool_transfers; bool blit; + }; struct r600_drawl { diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index e7285d624e3..188cea0ff88 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -33,6 +33,13 @@ #include "r600d.h" #include <stdio.h> #include <errno.h> +#include <byteswap.h> + +#ifdef PIPE_ARCH_BIG_ENDIAN +#define CPU_TO_LE32(x) bswap_32(x) +#else +#define CPU_TO_LE32(x) (x) +#endif int r600_find_vs_semantic_index(struct r600_shader *vs, struct r600_shader *ps, int id) @@ -52,7 +59,8 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_shader *rshader = &shader->shader; - void *ptr; + uint32_t *ptr; + int i; /* copy new shader */ if (shader->bo == NULL) { @@ -60,8 +68,10 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s if (shader->bo == NULL) { return -ENOMEM; } - ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL); - memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4); + ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, 0, NULL); + for(i = 0; i < rshader->bc.ndw; i++) { + *(ptr + i) = CPU_TO_LE32(*(rshader->bc.bytecode + i)); + } r600_bo_unmap(rctx->radeon, shader->bo); } /* build state */ @@ -467,6 +477,11 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ +#ifdef PIPE_ARCH_BIG_ENDIAN + vtx.endian = ENDIAN_8IN32; +#else + vtx.endian = ENDIAN_NONE; +#endif if ((r = r600_bc_add_vtx(ctx->bc, &vtx))) return r; @@ -1526,9 +1541,13 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) unsigned src_gpr; int r, i; int opcode; - boolean src_not_temp = - inst->Src[0].Register.File != TGSI_FILE_TEMPORARY && - inst->Src[0].Register.File != TGSI_FILE_INPUT; + /* Texture fetch instructions can only use gprs as source. + * Also they cannot negate the source or take the absolute value */ + const boolean src_requires_loading = + (inst->Src[0].Register.File != TGSI_FILE_TEMPORARY && + inst->Src[0].Register.File != TGSI_FILE_INPUT) || + ctx->src[0].neg || ctx->src[0].abs; + boolean src_loaded = FALSE; src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; @@ -1570,7 +1589,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - src_not_temp = FALSE; + src_loaded = TRUE; src_gpr = ctx->temp_reg; } @@ -1655,11 +1674,11 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (r) return r; - src_not_temp = FALSE; + src_loaded = TRUE; src_gpr = ctx->temp_reg; } - if (src_not_temp) { + if (src_requires_loading && !src_loaded) { for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); @@ -1673,6 +1692,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (r) return r; } + src_loaded = TRUE; src_gpr = ctx->temp_reg; } @@ -1691,10 +1711,18 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; - tex.src_sel_x = 0; - tex.src_sel_y = 1; - tex.src_sel_z = 2; - tex.src_sel_w = 3; + if (src_loaded) { + tex.src_sel_x = 0; + tex.src_sel_y = 1; + tex.src_sel_z = 2; + tex.src_sel_w = 3; + } else { + tex.src_sel_x = ctx->src[0].swizzle[0]; + tex.src_sel_y = ctx->src[0].swizzle[1]; + tex.src_sel_z = ctx->src[0].swizzle[2]; + tex.src_sel_w = ctx->src[0].swizzle[3]; + tex.src_rel = ctx->src[0].rel; + } if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { tex.src_sel_x = 1; @@ -1712,12 +1740,12 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) { tex.coord_type_z = 0; - tex.src_sel_z = 1; + tex.src_sel_z = tex.src_sel_y; } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY) tex.coord_type_z = 0; if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) - tex.src_sel_w = 2; + tex.src_sel_w = tex.src_sel_z; r = r600_bc_add_tex(ctx->bc, &tex); if (r) diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index efb68cbd139..ac2e8986b97 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -161,16 +161,19 @@ static void *r600_create_blend_state(struct pipe_context *ctx, color_control, 0xFFFFFFFD, NULL); for (int i = 0; i < 8; i++) { - unsigned eqRGB = state->rt[i].rgb_func; - unsigned srcRGB = state->rt[i].rgb_src_factor; - unsigned dstRGB = state->rt[i].rgb_dst_factor; + /* state->rt entries > 0 only written if independent blending */ + const int j = state->independent_blend_enable ? i : 0; - unsigned eqA = state->rt[i].alpha_func; - unsigned srcA = state->rt[i].alpha_src_factor; - unsigned dstA = state->rt[i].alpha_dst_factor; + unsigned eqRGB = state->rt[j].rgb_func; + unsigned srcRGB = state->rt[j].rgb_src_factor; + unsigned dstRGB = state->rt[j].rgb_dst_factor; + + unsigned eqA = state->rt[j].alpha_func; + unsigned srcA = state->rt[j].alpha_src_factor; + unsigned dstA = state->rt[j].alpha_dst_factor; uint32_t bc = 0; - if (!state->rt[i].blend_enable) + if (!state->rt[j].blend_enable) continue; bc |= S_028804_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB)); @@ -410,7 +413,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c const struct util_format_description *desc; struct r600_resource_texture *tmp; struct r600_resource *rbuffer; - unsigned format; + unsigned format, endian; uint32_t word4 = 0, yuv_format = 0, pitch = 0; unsigned char swizzle[4], array_mode = 0, tile_type = 0; struct r600_bo *bo[2]; @@ -447,6 +450,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c r600_texture_depth_flush(ctx, texture, TRUE); tmp = tmp->flushed_depth_texture; } + endian = r600_colorformat_endian_swap(format); if (tmp->force_int_type) { word4 &= C_038010_NUM_FORMAT_ALL; @@ -487,6 +491,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c word4 | S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_NO_ZERO) | S_038010_REQUEST_SIZE(1) | + S_038010_ENDIAN_SWAP(endian) | S_038010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, S_038014_LAST_LEVEL(state->u.tex.last_level) | @@ -715,7 +720,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta unsigned level = state->cbufs[cb]->u.tex.level; unsigned pitch, slice; unsigned color_info; - unsigned format, swap, ntype; + unsigned format, swap, ntype, endian; unsigned offset; const struct util_format_description *desc; struct r600_bo *bo[3]; @@ -739,40 +744,37 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta level, state->cbufs[cb]->u.tex.first_layer); pitch = rtex->pitch_in_blocks[level] / 8 - 1; slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; - ntype = 0; desc = util_format_description(surf->base.format); - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) - ntype = V_0280A0_NUMBER_SRGB; - else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) { - switch(desc->channel[0].type) { - case UTIL_FORMAT_TYPE_UNSIGNED: - ntype = V_0280A0_NUMBER_UNORM; - break; - - case UTIL_FORMAT_TYPE_SIGNED: - ntype = V_0280A0_NUMBER_SNORM; - break; - } - } for (i = 0; i < 4; i++) { if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { break; } } + ntype = V_0280A0_NUMBER_UNORM; + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) + ntype = V_0280A0_NUMBER_SRGB; + else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) + ntype = V_0280A0_NUMBER_SNORM; format = r600_translate_colorformat(surf->base.format); swap = r600_translate_colorswap(surf->base.format); + if(rbuffer->b.b.b.usage == PIPE_USAGE_STAGING) { + endian = ENDIAN_NONE; + } else { + endian = r600_colorformat_endian_swap(format); + } /* disable when gallium grows int textures */ if ((format == FMT_32_32_32_32 || format == FMT_16_16_16_16) && rtex->force_int_type) - ntype = 4; + ntype = V_0280A0_NUMBER_UINT; color_info = S_0280A0_FORMAT(format) | S_0280A0_COMP_SWAP(swap) | S_0280A0_ARRAY_MODE(rtex->array_mode[level]) | S_0280A0_BLEND_CLAMP(1) | - S_0280A0_NUMBER_TYPE(ntype); + S_0280A0_NUMBER_TYPE(ntype) | + S_0280A0_ENDIAN(endian); /* on R600 this can't be set if BLEND_CLAMP isn't set, if BLEND_FLOAT32 is set of > 11 bits in a UNORM or SNORM */ @@ -855,6 +857,9 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, if (rstate == NULL) return; + r600_context_flush_dest_caches(&rctx->ctx); + rctx->ctx.num_dest_buffers = state->nr_cbufs; + /* unreference old buffer and reference new one */ rstate->id = R600_PIPE_STATE_FRAMEBUFFER; @@ -866,6 +871,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, } if (state->zsbuf) { r600_db(rctx, rstate, state); + rctx->ctx.num_dest_buffers++; } target_mask = 0x00000000; @@ -945,6 +951,17 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, } } +static void r600_texture_barrier(struct pipe_context *ctx) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + r600_context_flush_all(&rctx->ctx, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_CB_ACTION_ENA(1) | + S_0085F0_CB0_DEST_BASE_ENA(1) | S_0085F0_CB1_DEST_BASE_ENA(1) | + S_0085F0_CB2_DEST_BASE_ENA(1) | S_0085F0_CB3_DEST_BASE_ENA(1) | + S_0085F0_CB4_DEST_BASE_ENA(1) | S_0085F0_CB5_DEST_BASE_ENA(1) | + S_0085F0_CB6_DEST_BASE_ENA(1) | S_0085F0_CB7_DEST_BASE_ENA(1)); +} + void r600_init_state_functions(struct r600_pipe_context *rctx) { rctx->context.create_blend_state = r600_create_blend_state; @@ -985,6 +1002,7 @@ void r600_init_state_functions(struct r600_pipe_context *rctx) rctx->context.set_viewport_state = r600_set_viewport_state; rctx->context.sampler_view_destroy = r600_sampler_view_destroy; rctx->context.redefine_user_buffer = u_default_redefine_user_buffer; + rctx->context.texture_barrier = r600_texture_barrier; } void r600_init_config(struct r600_pipe_context *rctx) @@ -1443,8 +1461,10 @@ void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx, r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, - S_038008_STRIDE(stride), - 0xFFFFFFFF, NULL); +#ifdef PIPE_ARCH_BIG_ENDIAN + S_038008_ENDIAN_SWAP(ENDIAN_8IN32) | +#endif + S_038008_STRIDE(stride), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 43dad0c8023..997c9a597ee 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -435,7 +435,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_resource *rbuffer; - u32 vgt_dma_index_type, vgt_draw_initiator, mask; + u32 vgt_dma_index_type, vgt_dma_swap_mode, vgt_draw_initiator, mask; struct r600_draw rdraw; struct r600_pipe_state vgt; struct r600_drawl draw = {}; @@ -467,14 +467,21 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) draw.info.index_bias = info->start; } + vgt_dma_swap_mode = 0; switch (draw.index_size) { case 2: vgt_draw_initiator = 0; vgt_dma_index_type = 0; +#ifdef PIPE_ARCH_BIG_ENDIAN + vgt_dma_swap_mode = ENDIAN_8IN16; +#endif break; case 4: vgt_draw_initiator = 0; vgt_dma_index_type = 1; +#ifdef PIPE_ARCH_BIG_ENDIAN + vgt_dma_swap_mode = ENDIAN_8IN32; +#endif break; case 0: vgt_draw_initiator = 2; @@ -521,7 +528,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) rdraw.vgt_num_indices = draw.info.count; rdraw.vgt_num_instances = draw.info.instance_count; - rdraw.vgt_index_type = vgt_dma_index_type; + rdraw.vgt_index_type = vgt_dma_index_type | (vgt_dma_swap_mode << 2); rdraw.vgt_draw_initiator = vgt_draw_initiator; rdraw.indices = NULL; if (draw.index_buffer) { diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 2a40f41bbf5..5eabfdc2bc6 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -363,9 +363,12 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) return V_0280A0_SWAP_ALT; case PIPE_FORMAT_R16G16_UNORM: + case PIPE_FORMAT_R16G16_FLOAT: + case PIPE_FORMAT_R32_FLOAT: return V_0280A0_SWAP_STD; /* 64-bit buffers. */ + case PIPE_FORMAT_R32G32_FLOAT: case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: case PIPE_FORMAT_R16G16B16A16_SSCALED: @@ -501,6 +504,57 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) } } +static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) +{ +#ifdef PIPE_ARCH_BIG_ENDIAN + switch(colorformat) { + case V_0280A0_COLOR_4_4: + return(ENDIAN_NONE); + + /* 8-bit buffers. */ + case V_0280A0_COLOR_8: + return(ENDIAN_NONE); + + /* 16-bit buffers. */ + case V_0280A0_COLOR_5_6_5: + case V_0280A0_COLOR_1_5_5_5: + case V_0280A0_COLOR_4_4_4_4: + case V_0280A0_COLOR_16: + case V_0280A0_COLOR_8_8: + return(ENDIAN_8IN16); + + /* 32-bit buffers. */ + case V_0280A0_COLOR_8_8_8_8: + case V_0280A0_COLOR_2_10_10_10: + case V_0280A0_COLOR_8_24: + case V_0280A0_COLOR_24_8: + case V_0280A0_COLOR_32_FLOAT: + case V_0280A0_COLOR_16_16_FLOAT: + case V_0280A0_COLOR_16_16: + return(ENDIAN_8IN32); + + /* 64-bit buffers. */ + case V_0280A0_COLOR_16_16_16_16: + case V_0280A0_COLOR_16_16_16_16_FLOAT: + return(ENDIAN_8IN16); + + case V_0280A0_COLOR_32_32_FLOAT: + case V_0280A0_COLOR_32_32: + return(ENDIAN_8IN32); + + /* 128-bit buffers. */ + case V_0280A0_COLOR_32_32_32_FLOAT: + case V_0280A0_COLOR_32_32_32_32_FLOAT: + case V_0280A0_COLOR_32_32_32_32: + return(ENDIAN_8IN32); + default: + return ENDIAN_NONE; /* Unsupported. */ + } +#else + return ENDIAN_NONE; +#endif +} + static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) { return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0; diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index df70e2889e2..2bff52bec8c 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -3460,4 +3460,10 @@ #define SQ_TEX_INST_SAMPLE 0x10 #define SQ_TEX_INST_SAMPLE_L 0x11 #define SQ_TEX_INST_SAMPLE_C 0x18 + +#define ENDIAN_NONE 0 +#define ENDIAN_8IN16 1 +#define ENDIAN_8IN32 2 +#define ENDIAN_8IN64 3 + #endif diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index c09ce19559c..02892c16bde 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -566,7 +566,7 @@ compute_lambda_1d(const struct sp_sampler_variant *samp, const struct pipe_resource *texture = samp->view->texture; float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]); float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); - float rho = MAX2(dsdx, dsdy) * texture->width0; + float rho = MAX2(dsdx, dsdy) * u_minify(texture->width0, samp->view->u.tex.first_level); return util_fast_log2(rho); } @@ -583,8 +583,8 @@ compute_lambda_2d(const struct sp_sampler_variant *samp, float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]); float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]); - float maxx = MAX2(dsdx, dsdy) * texture->width0; - float maxy = MAX2(dtdx, dtdy) * texture->height0; + float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, samp->view->u.tex.first_level); + float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, samp->view->u.tex.first_level); float rho = MAX2(maxx, maxy); return util_fast_log2(rho); @@ -604,9 +604,9 @@ compute_lambda_3d(const struct sp_sampler_variant *samp, float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]); float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]); float dpdy = fabsf(p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]); - float maxx = MAX2(dsdx, dsdy) * texture->width0; - float maxy = MAX2(dtdx, dtdy) * texture->height0; - float maxz = MAX2(dpdx, dpdy) * texture->depth0; + float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, samp->view->u.tex.first_level); + float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, samp->view->u.tex.first_level); + float maxz = MAX2(dpdx, dpdy) * u_minify(texture->depth0, samp->view->u.tex.first_level); float rho; rho = MAX2(maxx, maxy); @@ -1590,10 +1590,10 @@ mip_filter_linear(struct tgsi_sampler *tgsi_sampler, /* XXX: Take into account all lod values. */ lambda = lod[0]; - level0 = (int)lambda; + level0 = samp->view->u.tex.first_level + (int)lambda; if (lambda < 0.0) { - samp->level = 0; + samp->level = samp->view->u.tex.first_level; samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else if (level0 >= texture->last_level) { @@ -1601,7 +1601,7 @@ mip_filter_linear(struct tgsi_sampler *tgsi_sampler, samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else { - float levelBlend = lambda - level0; + float levelBlend = frac(lambda); float rgba0[4][4]; float rgba1[4][4]; int c,j; @@ -1658,11 +1658,11 @@ mip_filter_nearest(struct tgsi_sampler *tgsi_sampler, lambda = lod[0]; if (lambda < 0.0) { - samp->level = 0; + samp->level = samp->view->u.tex.first_level; samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else { - samp->level = (int)(lambda + 0.5) ; + samp->level = samp->view->u.tex.first_level + (int)(lambda + 0.5) ; samp->level = MIN2(samp->level, (int)texture->last_level); samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } @@ -1699,6 +1699,7 @@ mip_filter_none(struct tgsi_sampler *tgsi_sampler, */ lambda = lod[0]; + samp->level = samp->view->u.tex.first_level; if (lambda < 0.0) { samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } @@ -1741,20 +1742,20 @@ mip_filter_linear_2d_linear_repeat_POT( /* XXX: Take into account all lod values. */ lambda = lod[0]; - level0 = (int)lambda; + level0 = samp->view->u.tex.first_level + (int)lambda; /* Catches both negative and large values of level0: */ if ((unsigned)level0 >= texture->last_level) { if (level0 < 0) - samp->level = 0; + samp->level = samp->view->u.tex.first_level; else samp->level = texture->last_level; img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else { - float levelBlend = lambda - level0; + float levelBlend = frac(lambda); float rgba0[4][4]; float rgba1[4][4]; int c,j; @@ -2238,14 +2239,13 @@ sp_sampler_variant_bind_view( struct sp_sampler_variant *samp, struct softpipe_tex_tile_cache *tex_cache, const struct pipe_sampler_view *view ) { - const struct pipe_sampler_state *sampler = samp->sampler; const struct pipe_resource *texture = view->texture; samp->view = view; samp->cache = tex_cache; samp->xpot = util_unsigned_logbase2( texture->width0 ); samp->ypot = util_unsigned_logbase2( texture->height0 ); - samp->level = CLAMP((int) sampler->min_lod, 0, (int) texture->last_level); + samp->level = view->u.tex.first_level; } diff --git a/src/gallium/drivers/svga/include/svga3d_reg.h b/src/gallium/drivers/svga/include/svga3d_reg.h index 77cb4533100..a527d7dcf20 100644 --- a/src/gallium/drivers/svga/include/svga3d_reg.h +++ b/src/gallium/drivers/svga/include/svga3d_reg.h @@ -968,12 +968,18 @@ typedef enum { } SVGA3dTransferType; /* - * The maximum number vertex arrays we're guaranteed to support in + * The maximum number of vertex arrays we're guaranteed to support in * SVGA_3D_CMD_DRAWPRIMITIVES. */ #define SVGA3D_MAX_VERTEX_ARRAYS 32 /* + * The maximum number of primitive ranges we're guaranteed to support + * in SVGA_3D_CMD_DRAWPRIMITIVES. + */ +#define SVGA3D_MAX_DRAW_PRIMITIVE_RANGES 32 + +/* * Identifiers for commands in the command FIFO. * * IDs between 1000 and 1039 (inclusive) were used by obsolete versions of @@ -1474,10 +1480,12 @@ struct { * SVGA3dCmdDrawPrimitives structure. In order, * they are: * - * 1. SVGA3dVertexDecl, quantity 'numVertexDecls' - * 2. SVGA3dPrimitiveRange, quantity 'numRanges' + * 1. SVGA3dVertexDecl, quantity 'numVertexDecls', but no more than + * SVGA3D_MAX_VERTEX_ARRAYS; + * 2. SVGA3dPrimitiveRange, quantity 'numRanges', but no more than + * SVGA3D_MAX_DRAW_PRIMITIVE_RANGES; * 3. Optionally, SVGA3dVertexDivisor, quantity 'numVertexDecls' (contains - * the frequency divisor for this the corresponding vertex decl) + * the frequency divisor for the corresponding vertex decl). */ } SVGA3dCmdDrawPrimitives; /* SVGA_3D_CMD_DRAWPRIMITIVES */ diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index 4782b4bf70e..dbbc249258d 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -204,7 +204,6 @@ void svga_context_flush( struct svga_context *svga, { struct svga_screen *svgascreen = svga_screen(svga->pipe.screen); struct pipe_fence_handle *fence = NULL; - enum pipe_error ret; svga->curr.nr_fbs = 0; @@ -219,25 +218,11 @@ void svga_context_flush( struct svga_context *svga, svga_screen_cache_flush(svgascreen, fence); - /* To force the reemission of rendertargets and texture bindings at - * the beginning of every command buffer. - */ - svga->dirty |= SVGA_NEW_COMMAND_BUFFER; - - /* - * We must reemit the surface bindings here, because svga_update_state - * will always flush the primitives before processing the - * SVGA_NEW_COMMAND_BUFFER state change. - * - * TODO: Refactor this. + /* To force the re-emission of rendertargets and texture sampler bindings on + * the next command buffer. */ - ret = svga_reemit_framebuffer_bindings(svga); - assert(ret == PIPE_OK); - - ret = svga_reemit_tss_bindings(svga); - assert(ret == PIPE_OK); - - svga->dirty &= ~SVGA_NEW_COMMAND_BUFFER; + svga->rebind.rendertargets = TRUE; + svga->rebind.texture_samplers = TRUE; if (SVGA_DEBUG & DEBUG_SYNC) { if (fence) diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 7b36a3606e0..eca529d262e 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -358,6 +358,11 @@ struct svga_context struct svga_state curr; /* state from the state tracker */ unsigned dirty; /* statechanges since last update_state() */ + struct { + unsigned rendertargets:1; + unsigned texture_samplers:1; + } rebind; + struct u_upload_mgr *upload_ib; struct u_upload_mgr *upload_vb; struct svga_hwtnl *hwtnl; @@ -402,7 +407,6 @@ struct svga_context #define SVGA_NEW_ZERO_STRIDE 0x2000000 #define SVGA_NEW_TEXTURE_FLAGS 0x4000000 #define SVGA_NEW_STENCIL_REF 0x8000000 -#define SVGA_NEW_COMMAND_BUFFER 0x10000000 diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index 2c873a0f7ac..d8af615ede1 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -170,6 +170,20 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) ib_handle[i] = handle; } + if (svga->rebind.rendertargets) { + ret = svga_reemit_framebuffer_bindings(svga); + if (ret != PIPE_OK) { + return ret; + } + } + + if (svga->rebind.texture_samplers) { + ret = svga_reemit_tss_bindings(svga); + if (ret != PIPE_OK) { + return ret; + } + } + SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n", svga->curr.framebuffer.cbufs[0] ? svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL, diff --git a/src/gallium/drivers/svga/svga_draw_private.h b/src/gallium/drivers/svga/svga_draw_private.h index 11afb59875b..ca658ac6745 100644 --- a/src/gallium/drivers/svga/svga_draw_private.h +++ b/src/gallium/drivers/svga/svga_draw_private.h @@ -93,7 +93,9 @@ struct index_cache { struct pipe_resource *buffer; }; -#define QSZ 32 + +/** Max number of primitives per draw call */ +#define QSZ SVGA3D_MAX_DRAW_PRIMITIVE_RANGES struct draw_cmd { struct svga_winsys_context *swc; diff --git a/src/gallium/drivers/svga/svga_pipe_blit.c b/src/gallium/drivers/svga/svga_pipe_blit.c index c87afb6946c..c4f122f4f78 100644 --- a/src/gallium/drivers/svga/svga_pipe_blit.c +++ b/src/gallium/drivers/svga/svga_pipe_blit.c @@ -29,6 +29,8 @@ #include "svga_cmd.h" #include "svga_surface.h" +#include "util/u_surface.h" + #define FILE_DEBUG_FLAG DEBUG_BLIT @@ -54,6 +56,13 @@ static void svga_surface_copy(struct pipe_context *pipe, */ svga_surfaces_flush( svga ); + /* Fallback for buffers. */ + if (dst_tex->target == PIPE_BUFFER && src_tex->target == PIPE_BUFFER) { + util_resource_copy_region(pipe, dst_tex, dst_level, dstx, dsty, dstz, + src_tex, src_level, src_box); + return; + } + #if 0 srcsurf = screen->get_tex_surface(screen, src_tex, src_level, src_box->z, src_box->z, diff --git a/src/gallium/drivers/svga/svga_pipe_clear.c b/src/gallium/drivers/svga/svga_pipe_clear.c index 41f239c1a85..b288c3eb2a6 100644 --- a/src/gallium/drivers/svga/svga_pipe_clear.c +++ b/src/gallium/drivers/svga/svga_pipe_clear.c @@ -52,6 +52,13 @@ try_clear(struct svga_context *svga, if (ret) return ret; + if (svga->rebind.rendertargets) { + ret = svga_reemit_framebuffer_bindings(svga); + if (ret != PIPE_OK) { + return ret; + } + } + if ((buffers & PIPE_CLEAR_COLOR) && fb->cbufs[0]) { flags |= SVGA3D_CLEAR_COLOR; util_pack_color(rgba, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c index e5fcec08d6e..923958674b4 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c +++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c @@ -40,9 +40,6 @@ #include "svga_debug.h" -#define MAX_DMA_SIZE (4 * 1024 * 1024) - - /** * Allocate a winsys_buffer (ie. DMA, aka GMR memory). * @@ -60,18 +57,10 @@ svga_winsys_buffer_create( struct svga_context *svga, struct svga_winsys_screen *sws = svgascreen->sws; struct svga_winsys_buffer *buf; - /* XXX this shouldn't be a hard-coded number; it should be queried - * somehow. - */ - if (size > MAX_DMA_SIZE) { - return NULL; - } - /* Just try */ buf = sws->buffer_create(sws, alignment, usage, size); - if(!buf) { - - SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "flushing screen to find %d bytes GMR\n", + if (!buf) { + SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "flushing context to find %d bytes GMR\n", size); /* Try flushing all pending DMAs */ diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c index b61f85955a2..f9eb4949b2c 100644 --- a/src/gallium/drivers/svga/svga_resource_texture.c +++ b/src/gallium/drivers/svga/svga_resource_texture.c @@ -231,18 +231,19 @@ svga_transfer_dma(struct svga_context *svga, sw = (uint8_t *)st->swbuf + offset; - if(transfer == SVGA3D_WRITE_HOST_VRAM) { + if (transfer == SVGA3D_WRITE_HOST_VRAM) { + unsigned usage = PIPE_TRANSFER_WRITE; + /* Wait for the previous DMAs to complete */ /* TODO: keep one DMA (at half the size) in the background */ - if(y) { - svga_context_flush(svga, &fence); - sws->fence_finish(sws, fence, 0); - sws->fence_reference(sws, &fence, NULL); + if (y) { + svga_context_flush(svga, NULL); + usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; } - hw = sws->buffer_map(sws, st->hwbuf, PIPE_TRANSFER_WRITE); + hw = sws->buffer_map(sws, st->hwbuf, usage); assert(hw); - if(hw) { + if (hw) { memcpy(hw, sw, length); sws->buffer_unmap(sws, st->hwbuf); } diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index 6c987abe056..b847cf331b3 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -181,6 +181,8 @@ svga_get_paramf(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: return 1; + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + return 0; default: return 0; diff --git a/src/gallium/drivers/svga/svga_state_framebuffer.c b/src/gallium/drivers/svga/svga_state_framebuffer.c index cdadb20c178..cc4819431ad 100644 --- a/src/gallium/drivers/svga/svga_state_framebuffer.c +++ b/src/gallium/drivers/svga/svga_state_framebuffer.c @@ -43,7 +43,7 @@ static int emit_framebuffer( struct svga_context *svga, { const struct pipe_framebuffer_state *curr = &svga->curr.framebuffer; struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer; - boolean reemit = !!(dirty & SVGA_NEW_COMMAND_BUFFER); + boolean reemit = svga->rebind.rendertargets; unsigned i; enum pipe_error ret; @@ -88,6 +88,7 @@ static int emit_framebuffer( struct svga_context *svga, pipe_surface_reference(&hw->zsbuf, curr->zsbuf); } + svga->rebind.rendertargets = FALSE; return 0; } @@ -108,6 +109,8 @@ svga_reemit_framebuffer_bindings(struct svga_context *svga) unsigned i; enum pipe_error ret; + assert(svga->rebind.rendertargets); + for (i = 0; i < MIN2(PIPE_MAX_COLOR_BUFS, 8); ++i) { if (hw->cbufs[i]) { ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_COLOR0 + i, hw->cbufs[i]); @@ -138,6 +141,8 @@ svga_reemit_framebuffer_bindings(struct svga_context *svga) } } + svga->rebind.rendertargets = FALSE; + return PIPE_OK; } @@ -145,8 +150,7 @@ svga_reemit_framebuffer_bindings(struct svga_context *svga) struct svga_tracked_state svga_hw_framebuffer = { "hw framebuffer state", - SVGA_NEW_FRAME_BUFFER | - SVGA_NEW_COMMAND_BUFFER, + SVGA_NEW_FRAME_BUFFER, emit_framebuffer }; @@ -296,7 +300,7 @@ static int emit_viewport( struct svga_context *svga, break; case PIPE_PRIM_POINTS: case PIPE_PRIM_TRIANGLES: - adjust_x = -0.375; + adjust_x = -0.5; adjust_y = -0.5; break; } diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c index c502506b93b..8b11a2a425e 100644 --- a/src/gallium/drivers/svga/svga_state_tss.c +++ b/src/gallium/drivers/svga/svga_state_tss.c @@ -66,7 +66,7 @@ static int update_tss_binding(struct svga_context *svga, unsigned dirty ) { - boolean reemit = !!(dirty & SVGA_NEW_COMMAND_BUFFER); + boolean reemit = svga->rebind.texture_samplers; unsigned i; unsigned count = MAX2( svga->curr.num_sampler_views, svga->state.hw_draw.num_views ); @@ -159,6 +159,8 @@ update_tss_binding(struct svga_context *svga, SVGA_FIFOCommitAll( svga->swc ); } + svga->rebind.texture_samplers = FALSE; + return 0; fail: @@ -181,6 +183,8 @@ svga_reemit_tss_bindings(struct svga_context *svga) enum pipe_error ret; struct bind_queue queue; + assert(svga->rebind.texture_samplers); + queue.bind_count = 0; for (i = 0; i < svga->state.hw_draw.num_views; i++) { @@ -220,6 +224,8 @@ svga_reemit_tss_bindings(struct svga_context *svga) SVGA_FIFOCommitAll(svga->swc); } + svga->rebind.texture_samplers = FALSE; + return PIPE_OK; } @@ -227,8 +233,7 @@ svga_reemit_tss_bindings(struct svga_context *svga) struct svga_tracked_state svga_hw_tss_binding = { "texture binding emit", SVGA_NEW_TEXTURE_BINDING | - SVGA_NEW_SAMPLER | - SVGA_NEW_COMMAND_BUFFER, + SVGA_NEW_SAMPLER, update_tss_binding }; diff --git a/src/gallium/drivers/trace/README b/src/gallium/drivers/trace/README index c210cba032a..cacd2e17498 100644 --- a/src/gallium/drivers/trace/README +++ b/src/gallium/drivers/trace/README @@ -13,12 +13,17 @@ It can traces all incoming calls. For tracing then do - GALLIUM_TRACE=tri.trace progs/trivial/tri + GALLIUM_TRACE=tri.trace trivial/tri which should create a tri.trace file, which is an XML file. You can view copying trace.xsl to the same directory, and opening with a XSLT capable browser such as Firefox or Internet Explorer. +For long traces you can use the + + src/gallium/tools/trace/dump.py tri.trace | less -R + + == Remote debugging == For remote debugging see: diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 4db7619c424..4f0d6c4b97a 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -47,15 +47,15 @@ static INLINE struct pipe_resource * trace_resource_unwrap(struct trace_context *tr_ctx, struct pipe_resource *resource) { - struct trace_resource *tr_tex; + struct trace_resource *tr_res; if(!resource) return NULL; - tr_tex = trace_resource(resource); + tr_res = trace_resource(resource); - assert(tr_tex->resource); - return tr_tex->resource; + assert(tr_res->resource); + return tr_res->resource; } @@ -832,19 +832,22 @@ trace_create_sampler_view(struct pipe_context *_pipe, const struct pipe_sampler_view *templ) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_resource *tr_tex = trace_resource(_resource); + struct trace_resource *tr_res = trace_resource(_resource); struct pipe_context *pipe = tr_ctx->pipe; - struct pipe_resource *texture = tr_tex->resource; + struct pipe_resource *resource = tr_res->resource; struct pipe_sampler_view *result; struct trace_sampler_view *tr_view; trace_dump_call_begin("pipe_context", "create_sampler_view"); trace_dump_arg(ptr, pipe); - trace_dump_arg(ptr, texture); - trace_dump_arg(sampler_view_template, templ); + trace_dump_arg(ptr, resource); + + trace_dump_arg_begin("templ"); + trace_dump_sampler_view_template(templ, resource->target); + trace_dump_arg_end(); - result = pipe->create_sampler_view(pipe, texture, templ); + result = pipe->create_sampler_view(pipe, resource, templ); trace_dump_ret(ptr, result); @@ -895,29 +898,32 @@ trace_sampler_view_destroy(struct pipe_context *_pipe, static struct pipe_surface * trace_create_surface(struct pipe_context *_pipe, - struct pipe_resource *_texture, + struct pipe_resource *_resource, const struct pipe_surface *surf_tmpl) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_resource *tr_tex = trace_resource(_texture); + struct trace_resource *tr_res = trace_resource(_resource); struct pipe_context *pipe = tr_ctx->pipe; - struct pipe_resource *texture = tr_tex->resource; + struct pipe_resource *resource = tr_res->resource; struct pipe_surface *result = NULL; trace_dump_call_begin("pipe_context", "create_surface"); trace_dump_arg(ptr, pipe); - trace_dump_arg(ptr, texture); - /* hmm some values unitialized there */ - trace_dump_arg(surface, surf_tmpl); + trace_dump_arg(ptr, resource); + + trace_dump_arg_begin("surf_tmpl"); + trace_dump_surface_template(surf_tmpl, resource->target); + trace_dump_arg_end(); + - result = pipe->create_surface(pipe, texture, surf_tmpl); + result = pipe->create_surface(pipe, resource, surf_tmpl); trace_dump_ret(ptr, result); trace_dump_call_end(); - result = trace_surf_create(tr_tex, result); + result = trace_surf_create(tr_res, result); return result; } @@ -1231,9 +1237,9 @@ trace_context_get_transfer(struct pipe_context *_context, const struct pipe_box *box) { struct trace_context *tr_context = trace_context(_context); - struct trace_resource *tr_tex = trace_resource(_resource); + struct trace_resource *tr_res = trace_resource(_resource); struct pipe_context *context = tr_context->pipe; - struct pipe_resource *texture = tr_tex->resource; + struct pipe_resource *texture = tr_res->resource; struct pipe_transfer *result = NULL; assert(texture->screen == context->screen); @@ -1246,7 +1252,7 @@ trace_context_get_transfer(struct pipe_context *_context, result = context->get_transfer(context, texture, level, usage, box); if (result) - result = trace_transfer_create(tr_context, tr_tex, result); + result = trace_transfer_create(tr_context, tr_res, result); return result; } @@ -1360,9 +1366,9 @@ trace_context_transfer_inline_write(struct pipe_context *_context, unsigned layer_stride) { struct trace_context *tr_context = trace_context(_context); - struct trace_resource *tr_tex = trace_resource(_resource); + struct trace_resource *tr_res = trace_resource(_resource); struct pipe_context *context = tr_context->pipe; - struct pipe_resource *resource = tr_tex->resource; + struct pipe_resource *resource = tr_res->resource; assert(resource->screen == context->screen); @@ -1397,9 +1403,9 @@ static void trace_redefine_user_buffer(struct pipe_context *_context, unsigned offset, unsigned size) { struct trace_context *tr_context = trace_context(_context); - struct trace_resource *tr_tex = trace_resource(_resource); + struct trace_resource *tr_res = trace_resource(_resource); struct pipe_context *context = tr_context->pipe; - struct pipe_resource *resource = tr_tex->resource; + struct pipe_resource *resource = tr_res->resource; assert(resource->screen == context->screen); diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 18805655bd7..291a6a29ca5 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -321,8 +321,29 @@ void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_ trace_dump_struct_end(); } +static void trace_dump_rt_blend_state(const struct pipe_rt_blend_state *state) +{ + trace_dump_struct_begin("pipe_rt_blend_state"); + + trace_dump_member(uint, state, blend_enable); + + trace_dump_member(uint, state, rgb_func); + trace_dump_member(uint, state, rgb_src_factor); + trace_dump_member(uint, state, rgb_dst_factor); + + trace_dump_member(uint, state, alpha_func); + trace_dump_member(uint, state, alpha_src_factor); + trace_dump_member(uint, state, alpha_dst_factor); + + trace_dump_member(uint, state, colormask); + + trace_dump_struct_end(); +} + void trace_dump_blend_state(const struct pipe_blend_state *state) { + unsigned valid_entries = 1; + if (!trace_dumping_enabled_locked()) return; @@ -331,7 +352,22 @@ void trace_dump_blend_state(const struct pipe_blend_state *state) return; } - trace_dump_bytes(state, sizeof *state); + trace_dump_struct_begin("pipe_blend_state"); + + trace_dump_member(bool, state, dither); + + trace_dump_member(bool, state, logicop_enable); + trace_dump_member(uint, state, logicop_func); + + trace_dump_member(bool, state, independent_blend_enable); + + trace_dump_member_begin("rt"); + if (state->independent_blend_enable) + valid_entries = PIPE_MAX_COLOR_BUFS; + trace_dump_struct_array(rt_blend_state, state->rt, valid_entries); + trace_dump_member_end(); + + trace_dump_struct_end(); } @@ -417,7 +453,8 @@ void trace_dump_sampler_state(const struct pipe_sampler_state *state) } -void trace_dump_sampler_view_template(const struct pipe_sampler_view *state) +void trace_dump_sampler_view_template(const struct pipe_sampler_view *state, + enum pipe_texture_target target) { if (!trace_dumping_enabled_locked()) return; @@ -430,13 +467,29 @@ void trace_dump_sampler_view_template(const struct pipe_sampler_view *state) trace_dump_struct_begin("pipe_sampler_view"); trace_dump_member(format, state, format); - /* XXX */ - trace_dump_member(uint, state, u.tex.first_level); - trace_dump_member(uint, state, u.tex.last_level); - trace_dump_member(uint, state, u.tex.first_layer); - trace_dump_member(uint, state, u.tex.last_layer); - trace_dump_member(uint, state, u.buf.first_element); - trace_dump_member(uint, state, u.buf.last_element); + + trace_dump_member_begin("u"); + trace_dump_struct_begin(""); /* anonymous */ + if (target == PIPE_BUFFER) { + trace_dump_member_begin("buf"); + trace_dump_struct_begin(""); /* anonymous */ + trace_dump_member(uint, &state->u.buf, first_element); + trace_dump_member(uint, &state->u.buf, last_element); + trace_dump_struct_end(); /* anonymous */ + trace_dump_member_end(); /* buf */ + } else { + trace_dump_member_begin("tex"); + trace_dump_struct_begin(""); /* anonymous */ + trace_dump_member(uint, &state->u.tex, first_layer); + trace_dump_member(uint, &state->u.tex, last_layer); + trace_dump_member(uint, &state->u.tex, first_level); + trace_dump_member(uint, &state->u.tex, last_level); + trace_dump_struct_end(); /* anonymous */ + trace_dump_member_end(); /* tex */ + } + trace_dump_struct_end(); /* anonymous */ + trace_dump_member_end(); /* u */ + trace_dump_member(uint, state, swizzle_r); trace_dump_member(uint, state, swizzle_g); trace_dump_member(uint, state, swizzle_b); @@ -446,7 +499,8 @@ void trace_dump_sampler_view_template(const struct pipe_sampler_view *state) } -void trace_dump_surface(const struct pipe_surface *state) +void trace_dump_surface_template(const struct pipe_surface *state, + enum pipe_texture_target target) { if (!trace_dumping_enabled_locked()) return; @@ -464,12 +518,26 @@ void trace_dump_surface(const struct pipe_surface *state) trace_dump_member(uint, state, usage); - trace_dump_member(ptr, state, texture); - trace_dump_member(uint, state, u.tex.level); - trace_dump_member(uint, state, u.tex.first_layer); - trace_dump_member(uint, state, u.tex.last_layer); - trace_dump_member(uint, state, u.buf.first_element); - trace_dump_member(uint, state, u.buf.last_element); + trace_dump_member_begin("u"); + trace_dump_struct_begin(""); /* anonymous */ + if (target == PIPE_BUFFER) { + trace_dump_member_begin("buf"); + trace_dump_struct_begin(""); /* anonymous */ + trace_dump_member(uint, &state->u.buf, first_element); + trace_dump_member(uint, &state->u.buf, last_element); + trace_dump_struct_end(); /* anonymous */ + trace_dump_member_end(); /* buf */ + } else { + trace_dump_member_begin("tex"); + trace_dump_struct_begin(""); /* anonymous */ + trace_dump_member(uint, &state->u.tex, level); + trace_dump_member(uint, &state->u.tex, first_layer); + trace_dump_member(uint, &state->u.tex, last_layer); + trace_dump_struct_end(); /* anonymous */ + trace_dump_member_end(); /* tex */ + } + trace_dump_struct_end(); /* anonymous */ + trace_dump_member_end(); /* u */ trace_dump_struct_end(); } diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h index fe8ece78d43..a6e7ccdb864 100644 --- a/src/gallium/drivers/trace/tr_dump_state.h +++ b/src/gallium/drivers/trace/tr_dump_state.h @@ -65,9 +65,11 @@ void trace_dump_framebuffer_state(const struct pipe_framebuffer_state *state); void trace_dump_sampler_state(const struct pipe_sampler_state *state); -void trace_dump_sampler_view_template(const struct pipe_sampler_view *view); +void trace_dump_sampler_view_template(const struct pipe_sampler_view *view, + enum pipe_texture_target target); -void trace_dump_surface(const struct pipe_surface *state); +void trace_dump_surface_template(const struct pipe_surface *state, + enum pipe_texture_target target); void trace_dump_transfer(const struct pipe_transfer *state); diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index 42180c4f19e..a7d33661838 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -117,7 +117,7 @@ trace_screen_get_shader_param(struct pipe_screen *_screen, unsigned shader, trace_dump_call_begin("pipe_screen", "get_shader_param"); trace_dump_arg(ptr, screen); - trace_dump_arg(int, shader); + trace_dump_arg(uint, shader); trace_dump_arg(int, param); result = screen->get_shader_param(screen, shader, param); @@ -282,40 +282,40 @@ trace_screen_resource_from_handle(struct pipe_screen *_screen, static boolean trace_screen_resource_get_handle(struct pipe_screen *_screen, - struct pipe_resource *_texture, + struct pipe_resource *_resource, struct winsys_handle *handle) { struct trace_screen *tr_screen = trace_screen(_screen); - struct trace_resource *tr_texture = trace_resource(_texture); + struct trace_resource *tr_resource = trace_resource(_resource); struct pipe_screen *screen = tr_screen->screen; - struct pipe_resource *texture = tr_texture->resource; + struct pipe_resource *resource = tr_resource->resource; /* TODO trace call */ - return screen->resource_get_handle(screen, texture, handle); + return screen->resource_get_handle(screen, resource, handle); } static void trace_screen_resource_destroy(struct pipe_screen *_screen, - struct pipe_resource *_texture) + struct pipe_resource *_resource) { struct trace_screen *tr_scr = trace_screen(_screen); - struct trace_resource *tr_tex = trace_resource(_texture); + struct trace_resource *tr_res = trace_resource(_resource); struct pipe_screen *screen = tr_scr->screen; - struct pipe_resource *texture = tr_tex->resource; + struct pipe_resource *resource = tr_res->resource; - assert(texture->screen == screen); + assert(resource->screen == screen); - trace_dump_call_begin("pipe_screen", "texture_destroy"); + trace_dump_call_begin("pipe_screen", "resource_destroy"); trace_dump_arg(ptr, screen); - trace_dump_arg(ptr, texture); + trace_dump_arg(ptr, resource); trace_dump_call_end(); - trace_resource_destroy(tr_scr, tr_tex); + trace_resource_destroy(tr_scr, tr_res); } diff --git a/src/gallium/drivers/trace/tr_texture.c b/src/gallium/drivers/trace/tr_texture.c index 27997346471..648b7a4f1cc 100644 --- a/src/gallium/drivers/trace/tr_texture.c +++ b/src/gallium/drivers/trace/tr_texture.c @@ -39,24 +39,24 @@ struct pipe_resource * trace_resource_create(struct trace_screen *tr_scr, struct pipe_resource *texture) { - struct trace_resource *tr_tex; + struct trace_resource *tr_res; if(!texture) goto error; assert(texture->screen == tr_scr->screen); - tr_tex = CALLOC_STRUCT(trace_resource); - if(!tr_tex) + tr_res = CALLOC_STRUCT(trace_resource); + if(!tr_res) goto error; - memcpy(&tr_tex->base, texture, sizeof(struct pipe_resource)); + memcpy(&tr_res->base, texture, sizeof(struct pipe_resource)); - pipe_reference_init(&tr_tex->base.reference, 1); - tr_tex->base.screen = &tr_scr->base; - tr_tex->resource = texture; + pipe_reference_init(&tr_res->base.reference, 1); + tr_res->base.screen = &tr_scr->base; + tr_res->resource = texture; - return &tr_tex->base; + return &tr_res->base; error: pipe_resource_reference(&texture, NULL); @@ -66,15 +66,15 @@ error: void trace_resource_destroy(struct trace_screen *tr_scr, - struct trace_resource *tr_tex) + struct trace_resource *tr_res) { - pipe_resource_reference(&tr_tex->resource, NULL); - FREE(tr_tex); + pipe_resource_reference(&tr_res->resource, NULL); + FREE(tr_res); } struct pipe_surface * -trace_surf_create(struct trace_resource *tr_tex, +trace_surf_create(struct trace_resource *tr_res, struct pipe_surface *surface) { struct trace_surface *tr_surf; @@ -82,7 +82,7 @@ trace_surf_create(struct trace_resource *tr_tex, if(!surface) goto error; - assert(surface->texture == tr_tex->resource); + assert(surface->texture == tr_res->resource); tr_surf = CALLOC_STRUCT(trace_surface); if(!tr_surf) @@ -92,7 +92,7 @@ trace_surf_create(struct trace_resource *tr_tex, pipe_reference_init(&tr_surf->base.reference, 1); tr_surf->base.texture = NULL; - pipe_resource_reference(&tr_surf->base.texture, &tr_tex->base); + pipe_resource_reference(&tr_surf->base.texture, &tr_res->base); tr_surf->surface = surface; return &tr_surf->base; @@ -114,7 +114,7 @@ trace_surf_destroy(struct trace_surface *tr_surf) struct pipe_transfer * trace_transfer_create(struct trace_context *tr_ctx, - struct trace_resource *tr_tex, + struct trace_resource *tr_res, struct pipe_transfer *transfer) { struct trace_transfer *tr_trans; @@ -122,7 +122,7 @@ trace_transfer_create(struct trace_context *tr_ctx, if(!transfer) goto error; - assert(transfer->resource == tr_tex->resource); + assert(transfer->resource == tr_res->resource); tr_trans = CALLOC_STRUCT(trace_transfer); if(!tr_trans) @@ -133,8 +133,8 @@ trace_transfer_create(struct trace_context *tr_ctx, tr_trans->base.resource = NULL; tr_trans->transfer = transfer; - pipe_resource_reference(&tr_trans->base.resource, &tr_tex->base); - assert(tr_trans->base.resource == &tr_tex->base); + pipe_resource_reference(&tr_trans->base.resource, &tr_res->base); + assert(tr_trans->base.resource == &tr_res->base); return &tr_trans->base; diff --git a/src/gallium/drivers/trace/tr_texture.h b/src/gallium/drivers/trace/tr_texture.h index 3352c96e59a..bf3c16e8cd2 100644 --- a/src/gallium/drivers/trace/tr_texture.h +++ b/src/gallium/drivers/trace/tr_texture.h @@ -122,10 +122,10 @@ trace_resource_create(struct trace_screen *tr_scr, void trace_resource_destroy(struct trace_screen *tr_scr, - struct trace_resource *tr_tex); + struct trace_resource *tr_res); struct pipe_surface * -trace_surf_create(struct trace_resource *tr_tex, +trace_surf_create(struct trace_resource *tr_res, struct pipe_surface *surface); void @@ -133,7 +133,7 @@ trace_surf_destroy(struct trace_surface *tr_surf); struct pipe_transfer * trace_transfer_create(struct trace_context *tr_ctx, - struct trace_resource *tr_tex, + struct trace_resource *tr_res, struct pipe_transfer *transfer); void |