diff options
author | Stéphane Marchesin <[email protected]> | 2011-07-13 11:56:44 -0700 |
---|---|---|
committer | Stéphane Marchesin <[email protected]> | 2011-07-13 11:56:44 -0700 |
commit | f7a85f603b0da0770c27b200bff6e239e3aeae95 (patch) | |
tree | 95a0a3267b12826fb82c04a6e2763fb2bce88065 | |
parent | 9baad926602547e14d4fdc6a66ec629dfa6acbb2 (diff) | |
parent | 4f4855b249cbcb77900e9767041becf255afbba1 (diff) |
Merge branch 'master' of ssh://git.freedesktop.org/git/mesa/mesa
112 files changed, 2702 insertions, 953 deletions
diff --git a/configs/linux-llvm b/configs/linux-llvm index 54d82b5376c..ef6c7bb105a 100644 --- a/configs/linux-llvm +++ b/configs/linux-llvm @@ -30,7 +30,7 @@ else endif ifeq ($(MESA_LLVM),1) - LLVM_CFLAGS=`llvm-config --cppflags` + LLVM_CFLAGS=`llvm-config --cppflags|sed 's/-DNDEBUG\>//g'` LLVM_CXXFLAGS=`llvm-config --cxxflags` -Wno-long-long LLVM_LDFLAGS = $(shell llvm-config --ldflags) LLVM_LIBS = $(shell llvm-config --libs) diff --git a/configure.ac b/configure.ac index f19f6478b6a..eace790d84d 100644 --- a/configure.ac +++ b/configure.ac @@ -22,8 +22,8 @@ LIBDRM_REQUIRED=2.4.24 LIBDRM_RADEON_REQUIRED=2.4.24 LIBDRM_INTEL_REQUIRED=2.4.24 LIBDRM_NOUVEAU_REQUIRED=0.6 -DRI2PROTO_REQUIRED=2.1 -GLPROTO_REQUIRED=1.4.11 +DRI2PROTO_REQUIRED=2.6 +GLPROTO_REQUIRED=1.4.14 LIBDRM_XORG_REQUIRED=2.4.24 LIBKMS_XORG_REQUIRED=1.0.0 @@ -617,6 +617,13 @@ AC_ARG_WITH([gallium-drivers], [with_gallium_drivers="$withval"], [with_gallium_drivers="$GALLIUM_DRIVERS_DEFAULT"]) +# Doing '--without-gallium-drivers' will set this variable to 'no'. Clear it +# here so that the script doesn't choke on an unknown driver name later. +case "$with_gallium_drivers" in + yes) with_gallium_drivers="$GALLIUM_DRIVERS_DEFAULT" ;; + no) with_gallium_drivers='' ;; +esac + if test "x$enable_opengl" = xno -a \ "x$enable_gles1" = xno -a \ "x$enable_gles2" = xno -a \ @@ -791,7 +798,7 @@ esac if test "x$enable_dri" = xyes; then DRIVER_DIRS="$DRIVER_DIRS dri" - GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS sw/xlib sw/dri" + GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS sw/dri" GALLIUM_STATE_TRACKERS_DIRS="dri $GALLIUM_STATE_TRACKERS_DIRS" HAVE_ST_DRI="yes" fi @@ -1806,7 +1813,7 @@ fi if test "x$enable_gallium_llvm" = xyes; then if test "x$LLVM_CONFIG" != xno; then LLVM_VERSION=`$LLVM_CONFIG --version` - LLVM_CFLAGS=`$LLVM_CONFIG --cppflags` + LLVM_CFLAGS=`$LLVM_CONFIG --cppflags|sed 's/-DNDEBUG\>//g'` LLVM_LIBS="`$LLVM_CONFIG --libs` -lstdc++" LLVM_LDFLAGS=`$LLVM_CONFIG --ldflags` diff --git a/docs/GL3.txt b/docs/GL3.txt index 49b48472a4a..135bc4bab67 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -19,7 +19,7 @@ Clamping controls (GL_ARB_color_buffer_float) DONE Float textures, renderbuffers (GL_ARB_texture_float) DONE (gallium r300) GL_EXT_packed_float DONE (gallium r600) GL_EXT_texture_shared_exponent DONE (gallium, swrast) -Float depth buffers (GL_ARB_depth_buffer_float) not started +Float depth buffers (GL_ARB_depth_buffer_float) DONE Framebuffer objects (GL_EXT_framebuffer_object) DONE Half-float DONE Multisample blit DONE diff --git a/include/GL/glext.h b/include/GL/glext.h index 6e5e6a11180..9048515c6d9 100644 --- a/include/GL/glext.h +++ b/include/GL/glext.h @@ -6,7 +6,7 @@ extern "C" { #endif /* -** Copyright (c) 2007-2010 The Khronos Group Inc. +** Copyright (c) 2007-2011 The Khronos Group Inc. ** ** Permission is hereby granted, free of charge, to any person obtaining a ** copy of this software and/or associated documentation files (the @@ -29,9 +29,9 @@ extern "C" { */ /* Header file version number, required by OpenGL ABI for Linux */ -/* glext.h last updated $Date: 2010-12-09 02:15:08 -0800 (Thu, 09 Dec 2010) $ */ +/* glext.h last updated $Date: 2011-07-06 02:49:14 -0700 (Wed, 06 Jul 2011) $ */ /* Current version at http://www.opengl.org/registry/ */ -#define GL_GLEXT_VERSION 67 +#define GL_GLEXT_VERSION 71 /* Function declaration macros - to move into glplatform.h */ #if defined(_WIN32) && !defined(APIENTRY) && !defined(__CYGWIN__) && !defined(__SCITECH_SNAP__) @@ -5032,6 +5032,32 @@ extern "C" { #define GL_SKIP_DECODE_EXT 0x8A4A #endif +#ifndef GL_NV_texture_multisample +#define GL_TEXTURE_COVERAGE_SAMPLES_NV 0x9045 +#define GL_TEXTURE_COLOR_SAMPLES_NV 0x9046 +#endif + +#ifndef GL_AMD_blend_minmax_factor +#define GL_FACTOR_MIN_AMD 0x901C +#define GL_FACTOR_MAX_AMD 0x901D +#endif + +#ifndef GL_AMD_sample_positions +#define GL_SUBSAMPLE_DISTANCE_AMD 0x883F +#endif + +#ifndef GL_EXT_x11_sync_object +#define GL_SYNC_X11_FENCE_EXT 0x90E1 +#endif + +#ifndef GL_AMD_multi_draw_indirect +#endif + +#ifndef GL_EXT_framebuffer_multisample_blit_scaled +#define GL_SCALED_RESOLVE_FASTEST_EXT 0x90BA +#define GL_SCALED_RESOLVE_NICEST_EXT 0x90BB +#endif + /*************************************************************/ @@ -11041,6 +11067,58 @@ typedef void (APIENTRYP PFNGLVDPAUUNMAPSURFACESNVPROC) (GLsizei numSurface, cons #define GL_EXT_texture_sRGB_decode 1 #endif +#ifndef GL_NV_texture_multisample +#define GL_NV_texture_multisample 1 +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glTexImage2DMultisampleCoverageNV (GLenum target, GLsizei coverageSamples, GLsizei colorSamples, GLint internalFormat, GLsizei width, GLsizei height, GLboolean fixedSampleLocations); +GLAPI void APIENTRY glTexImage3DMultisampleCoverageNV (GLenum target, GLsizei coverageSamples, GLsizei colorSamples, GLint internalFormat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedSampleLocations); +GLAPI void APIENTRY glTextureImage2DMultisampleNV (GLuint texture, GLenum target, GLsizei samples, GLint internalFormat, GLsizei width, GLsizei height, GLboolean fixedSampleLocations); +GLAPI void APIENTRY glTextureImage3DMultisampleNV (GLuint texture, GLenum target, GLsizei samples, GLint internalFormat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedSampleLocations); +GLAPI void APIENTRY glTextureImage2DMultisampleCoverageNV (GLuint texture, GLenum target, GLsizei coverageSamples, GLsizei colorSamples, GLint internalFormat, GLsizei width, GLsizei height, GLboolean fixedSampleLocations); +GLAPI void APIENTRY glTextureImage3DMultisampleCoverageNV (GLuint texture, GLenum target, GLsizei coverageSamples, GLsizei colorSamples, GLint internalFormat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedSampleLocations); +#endif /* GL_GLEXT_PROTOTYPES */ +typedef void (APIENTRYP PFNGLTEXIMAGE2DMULTISAMPLECOVERAGENVPROC) (GLenum target, GLsizei coverageSamples, GLsizei colorSamples, GLint internalFormat, GLsizei width, GLsizei height, GLboolean fixedSampleLocations); +typedef void (APIENTRYP PFNGLTEXIMAGE3DMULTISAMPLECOVERAGENVPROC) (GLenum target, GLsizei coverageSamples, GLsizei colorSamples, GLint internalFormat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedSampleLocations); +typedef void (APIENTRYP PFNGLTEXTUREIMAGE2DMULTISAMPLENVPROC) (GLuint texture, GLenum target, GLsizei samples, GLint internalFormat, GLsizei width, GLsizei height, GLboolean fixedSampleLocations); +typedef void (APIENTRYP PFNGLTEXTUREIMAGE3DMULTISAMPLENVPROC) (GLuint texture, GLenum target, GLsizei samples, GLint internalFormat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedSampleLocations); +typedef void (APIENTRYP PFNGLTEXTUREIMAGE2DMULTISAMPLECOVERAGENVPROC) (GLuint texture, GLenum target, GLsizei coverageSamples, GLsizei colorSamples, GLint internalFormat, GLsizei width, GLsizei height, GLboolean fixedSampleLocations); +typedef void (APIENTRYP PFNGLTEXTUREIMAGE3DMULTISAMPLECOVERAGENVPROC) (GLuint texture, GLenum target, GLsizei coverageSamples, GLsizei colorSamples, GLint internalFormat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedSampleLocations); +#endif + +#ifndef GL_AMD_blend_minmax_factor +#define GL_AMD_blend_minmax_factor 1 +#endif + +#ifndef GL_AMD_sample_positions +#define GL_AMD_sample_positions 1 +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glSetMultisamplefvAMD (GLenum pname, GLuint index, const GLfloat *val); +#endif /* GL_GLEXT_PROTOTYPES */ +typedef void (APIENTRYP PFNGLSETMULTISAMPLEFVAMDPROC) (GLenum pname, GLuint index, const GLfloat *val); +#endif + +#ifndef GL_EXT_x11_sync_object +#define GL_EXT_x11_sync_object 1 +#ifdef GL_GLEXT_PROTOTYPES +GLAPI GLsync APIENTRY glImportSyncEXT (GLenum external_sync_type, GLintptr external_sync, GLbitfield flags); +#endif /* GL_GLEXT_PROTOTYPES */ +typedef GLsync (APIENTRYP PFNGLIMPORTSYNCEXTPROC) (GLenum external_sync_type, GLintptr external_sync, GLbitfield flags); +#endif + +#ifndef GL_AMD_multi_draw_indirect +#define GL_AMD_multi_draw_indirect 1 +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glMultiDrawArraysIndirectAMD (GLenum mode, const GLvoid *indirect, GLsizei primcount, GLsizei stride); +GLAPI void APIENTRY glMultiDrawElementsIndirectAMD (GLenum mode, GLenum type, const GLvoid *indirect, GLsizei primcount, GLsizei stride); +#endif /* GL_GLEXT_PROTOTYPES */ +typedef void (APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTAMDPROC) (GLenum mode, const GLvoid *indirect, GLsizei primcount, GLsizei stride); +typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTAMDPROC) (GLenum mode, GLenum type, const GLvoid *indirect, GLsizei primcount, GLsizei stride); +#endif + +#ifndef GL_EXT_framebuffer_multisample_blit_scaled +#define GL_EXT_framebuffer_multisample_blit_scaled 1 +#endif + #ifdef __cplusplus } diff --git a/scons/llvm.py b/scons/llvm.py index b89899bbf87..66f972df5fb 100644 --- a/scons/llvm.py +++ b/scons/llvm.py @@ -141,7 +141,15 @@ def generate(env): llvm_version = distutils.version.LooseVersion(llvm_version) try: - env.ParseConfig('llvm-config --cppflags') + # Treat --cppflags specially to prevent NDEBUG from disabling + # assertion failures in debug builds. + cppflags = env.ParseFlags('!llvm-config --cppflags') + try: + cppflags['CPPDEFINES'].remove('NDEBUG') + except ValueError: + pass + env.MergeFlags(cppflags) + env.ParseConfig('llvm-config --libs') env.ParseConfig('llvm-config --ldflags') except OSError: diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp index 01e660ef7d9..29dfb868d95 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp @@ -207,21 +207,13 @@ lp_disassemble(const void* func) } raw_debug_ostream Out; -#if HAVE_LLVM >= 0x0300 - TargetMachine *TM = T->createTargetMachine(Triple, sys::getHostCPUName(), ""); -#else - TargetMachine *TM = T->createTargetMachine(Triple, ""); -#endif #if HAVE_LLVM >= 0x0300 unsigned int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); #else int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); #endif -#if HAVE_LLVM >= 0x0300 - OwningPtr<MCInstPrinter> Printer( - T->createMCInstPrinter(*TM, AsmPrinterVariant, *AsmInfo)); -#elif HAVE_LLVM >= 0x0208 +#if HAVE_LLVM >= 0x0208 OwningPtr<MCInstPrinter> Printer( T->createMCInstPrinter(AsmPrinterVariant, *AsmInfo)); #else @@ -233,6 +225,12 @@ lp_disassemble(const void* func) return; } +#if HAVE_LLVM >= 0x0300 + TargetMachine *TM = T->createTargetMachine(Triple, sys::getHostCPUName(), ""); +#else + TargetMachine *TM = T->createTargetMachine(Triple, ""); +#endif + const TargetInstrInfo *TII = TM->getInstrInfo(); /* diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.c b/src/gallium/auxiliary/gallivm/lp_bld_type.c index c5cf6d4a6c4..efd159f8869 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.c @@ -325,16 +325,18 @@ lp_typekind_name(LLVMTypeKind t) return "LLVMArrayTypeKind"; case LLVMPointerTypeKind: return "LLVMPointerTypeKind"; +#if HAVE_LLVM < 0x0300 case LLVMOpaqueTypeKind: return "LLVMOpaqueTypeKind"; +#endif case LLVMVectorTypeKind: return "LLVMVectorTypeKind"; case LLVMMetadataTypeKind: return "LLVMMetadataTypeKind"; - /* Only in LLVM 2.7 and later??? +#if HAVE_LLVM == 0x0207 case LLVMUnionTypeKind: return "LLVMUnionTypeKind"; - */ +#endif default: return "unknown LLVMTypeKind"; } diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 5378f2d782f..9391f1b80e0 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -458,6 +458,19 @@ util_pack_mask_z(enum pipe_format format, uint32_t z) } } + +static INLINE uint64_t +util_pack64_mask_z(enum pipe_format format, uint32_t z) +{ + switch (format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + return z; + default: + return util_pack_mask_z(format, z); + } +} + + static INLINE uint32_t util_pack_mask_z_stencil(enum pipe_format format, uint32_t z, uint8_t s) { @@ -481,6 +494,21 @@ util_pack_mask_z_stencil(enum pipe_format format, uint32_t z, uint8_t s) } +static INLINE uint64_t +util_pack64_mask_z_stencil(enum pipe_format format, uint32_t z, uint8_t s) +{ + uint64_t packed; + + switch (format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + packed = util_pack64_mask_z(format, z); + packed |= (uint64_t)s << 32ull; + return packed; + default: + return util_pack_mask_z_stencil(format, z, s); + } +} + /** * Note: it's assumed that z is in [0,1] @@ -525,6 +553,24 @@ util_pack_z(enum pipe_format format, double z) return 0; } } + + +static INLINE uint64_t +util_pack64_z(enum pipe_format format, double z) +{ + union fi fui; + + if (z == 0) + return 0; + + switch (format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + fui.f = (float)z; + return fui.ui; + default: + return util_pack_z(format, z); + } +} /** @@ -554,6 +600,24 @@ util_pack_z_stencil(enum pipe_format format, double z, uint8_t s) } +static INLINE uint64_t +util_pack64_z_stencil(enum pipe_format format, double z, uint8_t s) +{ + uint64_t packed; + + switch (format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + packed = util_pack64_z(format, z); + packed |= (uint64_t)s << 32ull; + break; + default: + return util_pack_z_stencil(format, z, s); + } + + return packed; +} + + /** * Pack 4 ubytes into a 4-byte word */ diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index 4c5cc4da182..8e123867da6 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -358,8 +358,41 @@ util_clear_depth_stencil(struct pipe_context *pipe, dst_map += dst_stride; } } - break; + break; case 8: + { + uint64_t zstencil = util_pack64_z_stencil(dst->texture->format, + depth, stencil); + + assert(dst->format == PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED); + + if (!need_rmw) { + for (i = 0; i < height; i++) { + uint64_t *row = (uint64_t *)dst_map; + for (j = 0; j < width; j++) + *row++ = zstencil; + dst_map += dst_stride; + } + } + else { + uint64_t src_mask; + + if (clear_flags & PIPE_CLEAR_DEPTH) + src_mask = 0x00000000ffffffffull; + else + src_mask = 0x000000ff00000000ull; + + for (i = 0; i < height; i++) { + uint64_t *row = (uint64_t *)dst_map; + for (j = 0; j < width; j++) { + uint64_t tmp = *row & ~src_mask; + *row++ = tmp | (zstencil & src_mask); + } + dst_map += dst_stride; + } + } + break; + } default: assert(0); break; diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index e3c7085ba92..23f12e5f464 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -318,6 +318,32 @@ z32f_get_tile_rgba(const float *src, } } +/*** PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED ***/ + +/** + * Return each Z value as four floats in [0,1]. + */ +static void +z32f_x24s8_get_tile_rgba(const float *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = *src; + src += 2; + } + p += dst_stride; + } +} + void pipe_tile_raw_to_rgba(enum pipe_format format, @@ -352,6 +378,9 @@ pipe_tile_raw_to_rgba(enum pipe_format format, case PIPE_FORMAT_Z32_FLOAT: z32f_get_tile_rgba((float *) src, w, h, dst, dst_stride); break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + z32f_x24s8_get_tile_rgba((float *) src, w, h, dst, dst_stride); + break; default: util_format_read_4f(format, dst, dst_stride * sizeof(float), @@ -445,6 +474,12 @@ pipe_put_tile_rgba_format(struct pipe_context *pipe, case PIPE_FORMAT_X8Z24_UNORM: /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ break; + case PIPE_FORMAT_Z32_FLOAT: + /*z32f_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + /*z32f_s8x24_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ + break; default: util_format_write_4f(format, p, src_stride * sizeof(float), diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c index e50db6d67fe..71fe53e3a27 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.c +++ b/src/gallium/auxiliary/util/u_upload_mgr.c @@ -257,7 +257,7 @@ enum pipe_error u_upload_buffer( struct u_upload_mgr *upload, ret = u_upload_data( upload, min_out_offset, size, - map, + map + offset, out_offset, outbuf, flushed ); diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 28ff40a2328..1b30309bb58 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -99,11 +99,11 @@ static void i915_destroy(struct pipe_context *pipe) struct i915_context *i915 = i915_context(pipe); int i; - draw_destroy(i915->draw); - if (i915->blitter) util_blitter_destroy(i915->blitter); + draw_destroy(i915->draw); + if(i915->batch) i915->iws->batchbuffer_destroy(i915->batch); diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index 7a013a916b9..8569b5a190b 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -33,6 +33,9 @@ nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 1; case PIPE_CAP_GLSL: return 1; + case PIPE_CAP_SM3: + /* TODO: >= nv4x support Shader Model 3.0 */ + return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index fb0b0f104bf..c95872b0809 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -69,7 +69,7 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) | S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst); - if (bc->chiprev == CHIPREV_EVERGREEN) /* no EOP on cayman */ + if (bc->chip_class == EVERGREEN) /* no EOP on cayman */ bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program); id++; diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 887f52e67db..4605c833dea 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -510,11 +510,11 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) if (R600_BIG_ENDIAN) { switch(colorformat) { case V_028C70_COLOR_4_4: - return(ENDIAN_NONE); + return ENDIAN_NONE; /* 8-bit buffers. */ case V_028C70_COLOR_8: - return(ENDIAN_NONE); + return ENDIAN_NONE; /* 16-bit buffers. */ case V_028C70_COLOR_5_6_5: @@ -522,7 +522,7 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_028C70_COLOR_4_4_4_4: case V_028C70_COLOR_16: case V_028C70_COLOR_8_8: - return(ENDIAN_8IN16); + return ENDIAN_8IN16; /* 32-bit buffers. */ case V_028C70_COLOR_8_8_8_8: @@ -532,23 +532,23 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_028C70_COLOR_32_FLOAT: case V_028C70_COLOR_16_16_FLOAT: case V_028C70_COLOR_16_16: - return(ENDIAN_8IN32); + return ENDIAN_8IN32; /* 64-bit buffers. */ case V_028C70_COLOR_16_16_16_16: case V_028C70_COLOR_16_16_16_16_FLOAT: - return(ENDIAN_8IN16); + return ENDIAN_8IN16; case V_028C70_COLOR_32_32_FLOAT: case V_028C70_COLOR_32_32: - return(ENDIAN_8IN32); + return ENDIAN_8IN32; /* 96-bit buffers. */ case V_028C70_COLOR_32_32_32_FLOAT: /* 128-bit buffers. */ case V_028C70_COLOR_32_32_32_32_FLOAT: case V_028C70_COLOR_32_32_32_32: - return(ENDIAN_8IN32); + return ENDIAN_8IN32; default: return ENDIAN_NONE; /* Unsupported. */ } @@ -657,13 +657,11 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, u32 color_control, target_mask; /* FIXME there is more then 8 framebuffer */ unsigned blend_cntl[8]; - enum radeon_family family; if (blend == NULL) { return NULL; } - family = r600_get_family(rctx->radeon); rstate = &blend->rstate; rstate->id = R600_PIPE_STATE_BLEND; @@ -690,7 +688,7 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, color_control, 0xFFFFFFFD, NULL); - if (family != CHIP_CAYMAN) + if (rctx->chip_class != CAYMAN) r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL); else { r600_pipe_state_add_reg(rstate, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 0xFFFFFFFF, 0xFFFFFFFF, NULL); @@ -827,9 +825,6 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, unsigned tmp; unsigned prov_vtx = 1, polygon_dual_mode; unsigned clip_rule; - enum radeon_family family; - - family = r600_get_family(rctx->radeon); if (rs == NULL) { return NULL; @@ -888,7 +883,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, tmp = (unsigned)state->line_width * 8; r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL); - if (family == CHIP_CAYMAN) { + if (rctx->chip_class == CAYMAN) { r600_pipe_state_add_reg(rstate, CM_R_028BDC_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, CM_R_028BE4_PA_SU_VTX_CNTL, S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules), @@ -1447,14 +1442,11 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); u32 shader_mask, tl, br, target_mask; - enum radeon_family family; int tl_x, tl_y, br_x, br_y; if (rstate == NULL) return; - family = r600_get_family(rctx->radeon); - evergreen_context_flush_dest_caches(&rctx->ctx); rctx->ctx.num_dest_buffers = state->nr_cbufs; @@ -1491,7 +1483,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, if (br_y == 0) tl_y = 1; /* cayman hw workaround */ - if (family == CHIP_CAYMAN) { + if (rctx->chip_class == CAYMAN) { if (br_x == 1 && br_y == 1) br_x = 2; } @@ -1535,7 +1527,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, shader_mask, 0xFFFFFFFF, NULL); - if (family == CHIP_CAYMAN) { + if (rctx->chip_class == CAYMAN) { r600_pipe_state_add_reg(rstate, CM_R_028BE0_PA_SC_AA_CONFIG, 0x00000000, 0xFFFFFFFF, NULL); } else { @@ -1722,9 +1714,9 @@ void evergreen_init_config(struct r600_pipe_context *rctx) enum radeon_family family; unsigned tmp; - family = r600_get_family(rctx->radeon); + family = rctx->family; - if (family == CHIP_CAYMAN) { + if (rctx->chip_class == CAYMAN) { cayman_init_config(rctx); return; } @@ -2034,6 +2026,11 @@ void evergreen_init_config(struct r600_pipe_context *rctx) tmp |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries); r600_pipe_state_add_reg(rstate, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL); + tmp = 0; + tmp |= S_008E2C_NUM_PS_LDS(0x1000); + tmp |= S_008E2C_NUM_LS_LDS(0x1000); + r600_pipe_state_add_reg(rstate, R_008E2C_SQ_LDS_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009100_SPI_CONFIG_CNTL, 0x0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4), 0xFFFFFFFF, NULL); diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index d795f5757ed..96dbd4da91b 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -216,6 +216,13 @@ #define S_008C28_NUM_LS_STACK_ENTRIES(x) (((x) & 0xFFF) << 16) #define G_008C28_NUM_LS_STACK_ENTRIES(x) (((x) >> 16) & 0xFFF) #define C_008C28_NUM_LS_STACK_ENTRIES(x) 0xF000FFFF +#define R_008E2C_SQ_LDS_RESOURCE_MGMT 0x00008E2C +#define S_008E2C_NUM_PS_LDS(x) (((x) & 0xFFFF) << 0) +#define G_008E2C_NUM_PS_LDS(x) (((x) >> 0) & 0xFFFF) +#define C_008E2C_NUM_PS_LDS(x) 0x0000FFFF +#define S_008E2C_NUM_LS_LDS(x) (((x) & 0xFFFF) << 16) +#define G_008E2C_NUM_LS_LDS(x) (((x) >> 16) & 0xFFFF) +#define C_008E2C_NUM_LS_LDS(x) 0xFFFF0000 #define R_008CF0_SQ_MS_FIFO_SIZES 0x00008CF0 #define S_008CF0_CACHE_FIFO_SIZE(x) (((x) & 0xFF) << 0) diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 065f955ebcb..5fae2b00c8b 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -41,9 +41,9 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r if(alu->is_op3) return 3; - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: switch (alu->inst) { case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: return 0; @@ -93,8 +93,8 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r "Need instruction operand number for 0x%x.\n", alu->inst); } break; - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: switch (alu->inst) { case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: return 0; @@ -195,48 +195,10 @@ static struct r600_bc_tex *r600_bc_tex(void) return tex; } -int r600_bc_init(struct r600_bc *bc, enum radeon_family family) +void r600_bc_init(struct r600_bc *bc, enum chip_class chip_class) { LIST_INITHEAD(&bc->cf); - bc->family = family; - switch (bc->family) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - bc->chiprev = CHIPREV_R600; - break; - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: - bc->chiprev = CHIPREV_R700; - break; - case CHIP_CEDAR: - case CHIP_REDWOOD: - case CHIP_JUNIPER: - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - case CHIP_PALM: - case CHIP_SUMO: - case CHIP_SUMO2: - case CHIP_BARTS: - case CHIP_TURKS: - case CHIP_CAICOS: - bc->chiprev = CHIPREV_EVERGREEN; - break; - case CHIP_CAYMAN: - bc->chiprev = CHIPREV_CAYMAN; - break; - default: - R600_ERR("unknown family %d\n", bc->family); - return -EINVAL; - } - return 0; + bc->chip_class = chip_class; } static int r600_bc_add_cf(struct r600_bc *bc) @@ -301,9 +263,9 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output) /* alu instructions that can ony exits once per group */ static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: return !alu->is_op3 && ( alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT || @@ -339,8 +301,8 @@ static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu) alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT); - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: default: return !alu->is_op3 && ( alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE || @@ -382,16 +344,16 @@ static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu) static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: return !alu->is_op3 && ( alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4); - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: default: return !alu->is_op3 && ( alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE || @@ -403,13 +365,13 @@ static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu) static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: return !alu->is_op3 && alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: default: return !alu->is_op3 && alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; @@ -418,15 +380,15 @@ static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu) static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: return !alu->is_op3 && ( alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT); - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: default: return !alu->is_op3 && ( alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT); @@ -438,16 +400,16 @@ static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { return is_alu_reduction_inst(bc, alu) || is_alu_mova_inst(bc, alu) || - (bc->chiprev == CHIPREV_EVERGREEN && + (bc->chip_class == EVERGREEN && alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR); } /* alu instructions that can only execute on the trans unit */ static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { - switch (bc->chiprev) { - case CHIPREV_R600: - case CHIPREV_R700: + switch (bc->chip_class) { + case R600: + case R700: if (!alu->is_op3) return alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT || @@ -478,8 +440,8 @@ static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2 || alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2 || alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4; - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: default: if (!alu->is_op3) /* Note that FLT_TO_INT_* instructions are vector-only instructions @@ -525,7 +487,7 @@ static int assign_alu_units(struct r600_bc *bc, struct r600_bc_alu *alu_first, { struct r600_bc_alu *alu; unsigned i, chan, trans; - int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; + int max_slots = bc->chip_class == CAYMAN ? 4 : 5; for (i = 0; i < max_slots; i++) assignment[i] = NULL; @@ -612,7 +574,7 @@ static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, static int reserve_cfile(struct r600_bc *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan) { int res, num_res = 4; - if (bc->chiprev >= CHIPREV_R700) { + if (bc->chip_class >= R700) { num_res = 2; chan /= 2; } @@ -733,8 +695,8 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, struct alu_bank_swizzle bs; int bank_swizzle[5]; int i, r = 0, forced = 0; - boolean scalar_only = bc->chiprev == CHIPREV_CAYMAN ? false : true; - int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; + boolean scalar_only = bc->chip_class == CAYMAN ? false : true; + int max_slots = bc->chip_class == CAYMAN ? 4 : 5; for (i = 0; i < max_slots; i++) { if (slots[i] && slots[i]->bank_swizzle_force) { @@ -806,7 +768,7 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, struct r600_bc_alu *prev[5]; int gpr[5], chan[5]; int i, j, r, src, num_src; - int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; + int max_slots = bc->chip_class == CAYMAN ? 4 : 5; r = assign_alu_units(bc, alu_prev, prev); if (r) @@ -834,7 +796,7 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, if (!is_gpr(alu->src[src].sel) || alu->src[src].rel) continue; - if (bc->chiprev < CHIPREV_CAYMAN) { + if (bc->chip_class < CAYMAN) { if (alu->src[src].sel == gpr[4] && alu->src[src].chan == chan[4]) { alu->src[src].sel = V_SQ_ALU_SRC_PS; @@ -948,7 +910,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], int i, j, r, src, num_src; int num_once_inst = 0; int have_mova = 0, have_rel = 0; - int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; + int max_slots = bc->chip_class == CAYMAN ? 4 : 5; r = assign_alu_units(bc, alu_prev, prev); if (r) @@ -1252,7 +1214,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int uint32_t literal[4]; unsigned nliteral; struct r600_bc_alu *slots[5]; - int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; + int max_slots = bc->chip_class == CAYMAN ? 4 : 5; r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots); if (r) return r; @@ -1302,26 +1264,26 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc) { - switch (bc->chiprev) { - case CHIPREV_R600: + switch (bc->chip_class) { + case R600: return 8; - case CHIPREV_R700: + case R700: return 16; - case CHIPREV_EVERGREEN: - case CHIPREV_CAYMAN: + case EVERGREEN: + case CAYMAN: return 64; default: - R600_ERR("Unknown chiprev %d.\n", bc->chiprev); + R600_ERR("Unknown chip class %d.\n", bc->chip_class); return 8; } } static inline boolean last_inst_was_vtx_fetch(struct r600_bc *bc) { - if (bc->chiprev == CHIPREV_CAYMAN) { + if (bc->chip_class == CAYMAN) { if (bc->cf_last->inst != CM_V_SQ_CF_WORD1_SQ_CF_INST_TC) return TRUE; } else { @@ -1350,7 +1312,7 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) free(nvtx); return r; } - if (bc->chiprev == CHIPREV_CAYMAN) + if (bc->chip_class == CAYMAN) bc->cf_last->inst = CM_V_SQ_CF_WORD1_SQ_CF_INST_TC; else bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX; @@ -1438,7 +1400,7 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) | S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x); - if (bc->chiprev < CHIPREV_CAYMAN) + if (bc->chip_class < CAYMAN) bc->bytecode[id] |= S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); id++; bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) | @@ -1453,7 +1415,7 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); bc->bytecode[id] = S_SQ_VTX_WORD2_OFFSET(vtx->offset)| S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian); - if (bc->chiprev < CHIPREV_CAYMAN) + if (bc->chip_class < CAYMAN) bc->bytecode[id] |= S_SQ_VTX_WORD2_MEGA_FETCH(1); id++; bc->bytecode[id++] = 0; @@ -1560,13 +1522,13 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) | S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) | S_SQ_CF_ALU_WORD1_BARRIER(1) | - S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == CHIPREV_R600 ? cf->r6xx_uses_waterfall : 0) | + S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chip_class == R600 ? cf->r6xx_uses_waterfall : 0) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); break; case V_SQ_CF_WORD1_SQ_CF_INST_TEX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: - if (bc->chiprev == CHIPREV_R700) + if (bc->chip_class == R700) r700_bc_cf_vtx_build(&bc->bytecode[id], cf); else r600_bc_cf_vtx_build(&bc->bytecode[id], cf); @@ -1673,7 +1635,7 @@ int r600_bc_build(struct r600_bc *bc) return -ENOMEM; LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { addr = cf->addr; - if (bc->chiprev >= CHIPREV_EVERGREEN) + if (bc->chip_class >= EVERGREEN) r = eg_bc_cf_build(bc, cf); else r = r600_bc_cf_build(bc, cf); @@ -1691,17 +1653,17 @@ int r600_bc_build(struct r600_bc *bc) if (r) return r; r600_bc_alu_adjust_literals(bc, alu, literal, nliteral); - switch(bc->chiprev) { - case CHIPREV_R600: + switch(bc->chip_class) { + case R600: r = r600_bc_alu_build(bc, alu, addr); break; - case CHIPREV_R700: - case CHIPREV_EVERGREEN: /* eg alu is same encoding as r700 */ - case CHIPREV_CAYMAN: /* eg alu is same encoding as r700 */ + case R700: + case EVERGREEN: /* eg alu is same encoding as r700 */ + case CAYMAN: /* eg alu is same encoding as r700 */ r = r700_bc_alu_build(bc, alu, addr); break; default: - R600_ERR("unknown family %d\n", bc->family); + R600_ERR("unknown chip class %d.\n", bc->chip_class); return -EINVAL; } if (r) @@ -1726,7 +1688,7 @@ int r600_bc_build(struct r600_bc *bc) } break; case V_SQ_CF_WORD1_SQ_CF_INST_TEX: - if (bc->chiprev == CHIPREV_CAYMAN) { + if (bc->chip_class == CAYMAN) { LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { r = r600_bc_vtx_build(bc, vtx, addr); if (r) @@ -1812,17 +1774,17 @@ void r600_bc_dump(struct r600_bc *bc) unsigned nliteral; char chip = '6'; - switch (bc->chiprev) { - case 1: + switch (bc->chip_class) { + case R700: chip = '7'; break; - case 2: + case EVERGREEN: chip = 'E'; break; - case 3: + case CAYMAN: chip = 'C'; break; - case 0: + case R600: default: chip = '6'; break; @@ -1993,7 +1955,7 @@ void r600_bc_dump(struct r600_bc *bc) fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); fprintf(stderr, "SRC(GPR:%d ", vtx->src_gpr); fprintf(stderr, "SEL_X:%d) ", vtx->src_sel_x); - if (bc->chiprev < CHIPREV_CAYMAN) + if (bc->chip_class < CAYMAN) fprintf(stderr, "MEGA_FETCH_COUNT:%d ", vtx->mega_fetch_count); else fprintf(stderr, "SEL_Y:%d) ", 0); @@ -2162,7 +2124,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru struct r600_bc_vtx vtx; struct pipe_vertex_element *elements = ve->elements; const struct util_format_description *desc; - unsigned fetch_resource_start = rctx->family >= CHIP_CEDAR ? 0 : 160; + unsigned fetch_resource_start = rctx->chip_class >= EVERGREEN ? 0 : 160; unsigned format, num_format, format_comp, endian; u32 *bytecode; int i, r; @@ -2180,9 +2142,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru } memset(&bc, 0, sizeof(bc)); - r = r600_bc_init(&bc, r600_get_family(rctx->radeon)); - if (r) - return r; + r600_bc_init(&bc, rctx->chip_class); for (i = 0; i < ve->count; i++) { if (elements[i].instance_divisor > 1) { @@ -2287,7 +2247,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru r600_bo_unmap(rctx->radeon, ve->fetch_shader); r600_bc_clear(&bc); - if (rctx->family >= CHIP_CEDAR) + if (rctx->chip_class >= EVERGREEN) evergreen_fetch_shader(&rctx->context, ve); else r600_fetch_shader(&rctx->context, ve); diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 540f45bbd06..cbdaacf7178 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -171,8 +171,7 @@ struct r600_cf_callstack { }; struct r600_bc { - enum radeon_family family; - int chiprev; /* 0 - r600, 1 - r700, 2 - evergreen */ + enum chip_class chip_class; int type; struct list_head cf; struct r600_bc_cf *cf_last; @@ -193,7 +192,7 @@ struct r600_bc { int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf); /* r600_asm.c */ -int r600_bc_init(struct r600_bc *bc, enum radeon_family family); +void r600_bc_init(struct r600_bc *bc, enum chip_class chip_class); void r600_bc_clear(struct r600_bc *bc); int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu); int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx); diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h index 184f32c9960..7ae091ea5cd 100644 --- a/src/gallium/drivers/r600/r600_opcodes.h +++ b/src/gallium/drivers/r600/r600_opcodes.h @@ -409,14 +409,8 @@ #define EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_EXPORT_COMBINED 0x0000005B #define EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RAT_COMBINED_CACHELESS 0x0000005C +#define BC_INST(bc, x) ((bc)->chip_class >= EVERGREEN ? EG_##x : x) -#define CHIPREV_R600 0 -#define CHIPREV_R700 1 -#define CHIPREV_EVERGREEN 2 -#define CHIPREV_CAYMAN 3 - -#define BC_INST(bc, x) ((bc)->chiprev >= CHIPREV_EVERGREEN ? EG_##x : x) - -#define CTX_INST(x) (ctx->bc->chiprev >= CHIPREV_EVERGREEN ? EG_##x : x) +#define CTX_INST(x) (ctx->bc->chip_class >= EVERGREEN ? EG_##x : x) #endif diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index d512268f63f..a3df4f571a0 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -194,7 +194,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void { struct r600_pipe_context *rctx = CALLOC_STRUCT(r600_pipe_context); struct r600_screen* rscreen = (struct r600_screen *)screen; - enum chip_class class; if (rctx == NULL) return NULL; @@ -211,6 +210,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void rctx->screen = rscreen; rctx->radeon = rscreen->radeon; rctx->family = r600_get_family(rctx->radeon); + rctx->chip_class = r600_get_family_class(rctx->radeon); rctx->fences.bo = NULL; rctx->fences.data = NULL; @@ -224,47 +224,29 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void r600_init_surface_functions(rctx); rctx->context.draw_vbo = r600_draw_vbo; - switch (r600_get_family(rctx->radeon)) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: + switch (rctx->chip_class) { + case R600: + case R700: r600_init_state_functions(rctx); if (r600_context_init(&rctx->ctx, rctx->radeon)) { r600_destroy_context(&rctx->context); return NULL; } r600_init_config(rctx); + rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx); break; - case CHIP_CEDAR: - case CHIP_REDWOOD: - case CHIP_JUNIPER: - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - case CHIP_PALM: - case CHIP_SUMO: - case CHIP_SUMO2: - case CHIP_BARTS: - case CHIP_TURKS: - case CHIP_CAICOS: - case CHIP_CAYMAN: + case EVERGREEN: + case CAYMAN: evergreen_init_state_functions(rctx); if (evergreen_context_init(&rctx->ctx, rctx->radeon)) { r600_destroy_context(&rctx->context); return NULL; } evergreen_init_config(rctx); + rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx); break; default: - R600_ERR("unsupported family %d\n", r600_get_family(rctx->radeon)); + R600_ERR("Unsupported chip class %d.\n", rctx->chip_class); r600_destroy_context(&rctx->context); return NULL; } @@ -289,12 +271,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } - class = r600_get_family_class(rctx->radeon); - if (class == R600 || class == R700) - rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx); - else - rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx); - return &rctx->context; } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index c58c2f77743..6f399ed43b0 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -175,7 +175,8 @@ struct r600_pipe_fences { struct r600_pipe_context { struct pipe_context context; struct blitter_context *blitter; - unsigned family; + enum radeon_family family; + enum chip_class chip_class; void *custom_dsa_flush; struct r600_screen *screen; struct radeon *radeon; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index f83d7079b29..3e21ad1fdc6 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -99,14 +99,14 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s /* build state */ switch (rshader->processor_type) { case TGSI_PROCESSOR_VERTEX: - if (rshader->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_shader_vs(ctx, shader); } else { r600_pipe_shader_vs(ctx, shader); } break; case TGSI_PROCESSOR_FRAGMENT: - if (rshader->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_shader_ps(ctx, shader); } else { r600_pipe_shader_ps(ctx, shader); @@ -135,7 +135,6 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s fprintf(stderr, "--------------------------------------------------------------\n"); tgsi_dump(shader->tokens, 0); } - shader->shader.family = r600_get_family(rctx->radeon); r = r600_shader_from_tgsi(rctx, shader); if (r) { R600_ERR("translation from TGSI failed !\n"); @@ -317,7 +316,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->input[i].interpolate = d->Declaration.Interpolate; ctx->shader->input[i].centroid = d->Declaration.Centroid; ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; - if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev >= CHIPREV_EVERGREEN) { + if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chip_class >= EVERGREEN) { /* turn input into interpolate on EG */ if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { if (ctx->shader->input[i].interpolate > 0) { @@ -610,9 +609,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi ctx.bc = &shader->bc; ctx.shader = shader; - r = r600_bc_init(ctx.bc, shader->family); - if (r) - return r; + r600_bc_init(ctx.bc, rctx->chip_class); ctx.tokens = tokens; tgsi_scan_shader(tokens, &ctx.info); tgsi_parse_init(&ctx.parse, tokens); @@ -651,13 +648,13 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi } if (ctx.type == TGSI_PROCESSOR_VERTEX) { ctx.file_offset[TGSI_FILE_INPUT] = 1; - if (ctx.bc->chiprev >= CHIPREV_EVERGREEN) { + if (ctx.bc->chip_class >= EVERGREEN) { r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); } else { r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); } } - if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev >= CHIPREV_EVERGREEN) { + if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) { ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); } ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + @@ -711,9 +708,9 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi goto out_err; if ((r = tgsi_split_literal_constant(&ctx))) goto out_err; - if (ctx.bc->chiprev == CHIPREV_CAYMAN) + if (ctx.bc->chip_class == CAYMAN) ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; - else if (ctx.bc->chiprev >= CHIPREV_EVERGREEN) + else if (ctx.bc->chip_class >= EVERGREEN) ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; else ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; @@ -802,7 +799,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { output[i + j].array_base = shader->output[i].sid; output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; - if (shader->fs_write_all && (shader->family >= CHIP_CEDAR)) { + if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) { for (j = 1; j < shader->nr_cbufs; j++) { memset(&output[i + j], 0, sizeof(struct r600_bc_output)); output[i + j].gpr = shader->output[i].gpr; @@ -886,7 +883,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi } /* set export done on last export of each type */ for (i = noutput - 1, output_done = 0; i >= 0; i--) { - if (ctx.bc->chiprev < CHIPREV_CAYMAN) { + if (ctx.bc->chip_class < CAYMAN) { if (i == (noutput - 1)) { output[i].end_of_program = 1; } @@ -903,7 +900,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi goto out_err; } /* add program end */ - if (ctx.bc->chiprev == CHIPREV_CAYMAN) + if (ctx.bc->chip_class == CAYMAN) cm_bc_add_cf_end(ctx.bc); free(ctx.literals); @@ -939,6 +936,17 @@ static void r600_bc_src(struct r600_bc_alu_src *bc_src, bc_src->value = shader_src->value[bc_src->chan]; } +static void r600_bc_src_set_abs(struct r600_bc_alu_src *bc_src) +{ + bc_src->abs = 1; + bc_src->neg = 0; +} + +static void r600_bc_src_toggle_neg(struct r600_bc_alu_src *bc_src) +{ + bc_src->neg = !bc_src->neg; +} + static void tgsi_dst(struct r600_shader_ctx *ctx, const struct tgsi_full_dst_register *tgsi_dst, unsigned swizzle, @@ -995,12 +1003,10 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) /* handle some special cases */ switch (ctx->inst_info->tgsi_opcode) { case TGSI_OPCODE_SUB: - alu.src[1].neg = 1; + r600_bc_src_toggle_neg(&alu.src[1]); break; case TGSI_OPCODE_ABS: - alu.src[0].abs = 1; - if (alu.src[0].neg) - alu.src[0].neg = 0; + r600_bc_src_set_abs(&alu.src[0]); break; default: break; @@ -1114,7 +1120,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; - if (ctx->bc->chiprev == CHIPREV_R600) { + if (ctx->bc->chip_class == R600) { alu.src[1].value = *(uint32_t *)&double_pi; alu.src[2].value = *(uint32_t *)&neg_pi; } else { @@ -1221,7 +1227,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) /* dst.x = COS */ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0 ; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); @@ -1255,7 +1261,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) /* dst.y = SIN */ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0 ; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); @@ -1364,19 +1370,37 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) struct r600_bc_alu alu; int r; + /* tmp.x = max(src.y, 0.0) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); + r600_bc_src(&alu.src[0], &ctx->src[0], 1); + alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ + alu.src[1].chan = 1; + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 0; + alu.dst.write = 1; + + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + if (inst->Dst[0].Register.WriteMask & (1 << 2)) { int chan; int sel; int i; - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - /* dst.z = log(src.y) */ + /* tmp.z = log(tmp.x) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); - r600_bc_src(&alu.src[0], &ctx->src[0], 1); - tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; if (i == 2) { alu.dst.write = 1; alu.last = 1; @@ -1388,10 +1412,11 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) return r; } } else { - /* dst.z = log(src.y) */ + /* tmp.z = log(tmp.x) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); - r600_bc_src(&alu.src[0], &ctx->src[0], 1); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; alu.dst.sel = ctx->temp_reg; alu.dst.chan = 2; alu.dst.write = 1; @@ -1404,13 +1429,12 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) chan = alu.dst.chan; sel = alu.dst.sel; - /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ + /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); - r600_bc_src(&alu.src[0], &ctx->src[0], 3); - alu.src[1].sel = sel; - alu.src[1].chan = chan; - + alu.src[0].sel = sel; + alu.src[0].chan = chan; + r600_bc_src(&alu.src[1], &ctx->src[0], 3); r600_bc_src(&alu.src[2], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -1421,7 +1445,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (r) return r; - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { /* dst.z = exp(tmp.x) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -1506,7 +1530,7 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { r600_bc_src(&alu.src[i], &ctx->src[i], 0); - alu.src[i].abs = 1; + r600_bc_src_set_abs(&alu.src[i]); } alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -1898,7 +1922,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { int out_chan; /* Add perspective divide */ - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { out_chan = 2; for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -1980,7 +2004,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } /* tmp1.z = RCP_e(|tmp1.z|) */ - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); @@ -2192,7 +2216,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; r600_bc_src(&alu.src[1], &ctx->src[0], i); - alu.src[1].neg = 1; + r600_bc_src_toggle_neg(&alu.src[1]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == lasti) { @@ -2373,7 +2397,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if (r) return r; - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; @@ -2429,7 +2453,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) /* result.z = RoughApprox2ToX(tmp);*/ if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); @@ -2489,14 +2513,15 @@ static int tgsi_log(struct r600_shader_ctx *ctx) int r; int i; - /* result.x = floor(log2(src)); */ + /* result.x = floor(log2(|src|)); */ if (inst->Dst[0].Register.WriteMask & 1) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -2514,6 +2539,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -2538,15 +2564,16 @@ static int tgsi_log(struct r600_shader_ctx *ctx) return r; } - /* result.y = src.x / (2 ^ floor(log2(src.x))); */ + /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -2564,6 +2591,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; @@ -2590,7 +2618,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); @@ -2624,7 +2652,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) return r; } - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); @@ -2663,6 +2691,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.src[1].sel = ctx->temp_reg; alu.src[1].chan = 1; @@ -2677,14 +2706,15 @@ static int tgsi_log(struct r600_shader_ctx *ctx) return r; } - /* result.z = log2(src);*/ + /* result.z = log2(|src|);*/ if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { - if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; if (i == 2) @@ -2702,6 +2732,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bc_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 76aebf2b1ea..3ba84bd8907 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -43,7 +43,6 @@ struct r600_shader { unsigned nlds; struct r600_shader_io input[32]; struct r600_shader_io output[32]; - enum radeon_family family; boolean uses_kill; boolean fs_write_all; boolean clamp_color; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 3085cd9a87a..01406f2bad6 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -263,6 +263,10 @@ static uint32_t r600_translate_dbformat(enum pipe_format format) return V_028010_DEPTH_X8_24; case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_028010_DEPTH_8_24; + case PIPE_FORMAT_Z32_FLOAT: + return V_028010_DEPTH_32_FLOAT; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + return V_028010_DEPTH_X24_8_32_FLOAT; default: return ~0U; } @@ -353,6 +357,7 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R16G16_UNORM: case PIPE_FORMAT_R16G16_FLOAT: case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT: return V_0280A0_SWAP_STD; /* 64-bit buffers. */ @@ -360,6 +365,7 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: case PIPE_FORMAT_R16G16B16A16_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: @@ -444,7 +450,11 @@ static uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_S8_USCALED_Z24_UNORM: return V_0280A0_COLOR_24_8; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + return V_0280A0_COLOR_X24_8_32_FLOAT; + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT: return V_0280A0_COLOR_32_FLOAT; case PIPE_FORMAT_R16G16_FLOAT: @@ -501,11 +511,11 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) if (R600_BIG_ENDIAN) { switch(colorformat) { case V_0280A0_COLOR_4_4: - return(ENDIAN_NONE); + return ENDIAN_NONE; /* 8-bit buffers. */ case V_0280A0_COLOR_8: - return(ENDIAN_NONE); + return ENDIAN_NONE; /* 16-bit buffers. */ case V_0280A0_COLOR_5_6_5: @@ -513,7 +523,7 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_0280A0_COLOR_4_4_4_4: case V_0280A0_COLOR_16: case V_0280A0_COLOR_8_8: - return(ENDIAN_8IN16); + return ENDIAN_8IN16; /* 32-bit buffers. */ case V_0280A0_COLOR_8_8_8_8: @@ -523,22 +533,23 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_0280A0_COLOR_32_FLOAT: case V_0280A0_COLOR_16_16_FLOAT: case V_0280A0_COLOR_16_16: - return(ENDIAN_8IN32); + return ENDIAN_8IN32; /* 64-bit buffers. */ case V_0280A0_COLOR_16_16_16_16: case V_0280A0_COLOR_16_16_16_16_FLOAT: - return(ENDIAN_8IN16); + return ENDIAN_8IN16; case V_0280A0_COLOR_32_32_FLOAT: case V_0280A0_COLOR_32_32: - return(ENDIAN_8IN32); + case V_0280A0_COLOR_X24_8_32_FLOAT: + return ENDIAN_8IN32; /* 128-bit buffers. */ case V_0280A0_COLOR_32_32_32_FLOAT: case V_0280A0_COLOR_32_32_32_32_FLOAT: case V_0280A0_COLOR_32_32_32_32: - return(ENDIAN_8IN32); + return ENDIAN_8IN32; default: return ENDIAN_NONE; /* Unsupported. */ } @@ -635,6 +646,7 @@ void r600_polygon_offset_update(struct r600_pipe_context *rctx) offset_units *= 2.0f; break; case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: depth = -23; offset_units *= 1.0f; offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1); @@ -1399,7 +1411,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta /* EXPORT_NORM is an optimzation that can be enabled for better * performance in certain cases */ - if (rctx->family < CHIP_RV770) { + if (rctx->chip_class == R600) { /* EXPORT_NORM can be enabled if: * - 11-bit or smaller UNORM/SNORM/SRGB * - BLEND_CLAMP is enabled @@ -1559,7 +1571,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, 0xFFFFFFFF, NULL); - if (rctx->family >= CHIP_RV770) { + if (rctx->chip_class >= R700) { r600_pipe_state_add_reg(rstate, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, 0xFFFFFFFF, NULL); @@ -1653,16 +1665,13 @@ void r600_init_state_functions(struct r600_pipe_context *rctx) void r600_adjust_gprs(struct r600_pipe_context *rctx) { - enum radeon_family family; struct r600_pipe_state rstate; unsigned num_ps_gprs = rctx->default_ps_gprs; unsigned num_vs_gprs = rctx->default_vs_gprs; unsigned tmp; int diff; - family = r600_get_family(rctx->radeon); - - if (family >= CHIP_CEDAR) + if (rctx->chip_class >= EVERGREEN) return; if (!rctx->ps_shader && !rctx->vs_shader) @@ -1714,7 +1723,7 @@ void r600_init_config(struct r600_pipe_context *rctx) struct r600_pipe_state *rstate = &rctx->config; u32 tmp; - family = r600_get_family(rctx->radeon); + family = rctx->family; ps_prio = 0; vs_prio = 1; gs_prio = 2; @@ -1895,7 +1904,7 @@ void r600_init_config(struct r600_pipe_context *rctx) r600_pipe_state_add_reg(rstate, R_009714_VC_ENHANCE, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x00000000, 0xFFFFFFFF, NULL); - if (family >= CHIP_RV770) { + if (rctx->chip_class >= R700) { r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, S_009508_DISABLE_CUBE_ANISO(1) | diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index d9140403e5a..408eaed491b 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -109,7 +109,7 @@ void r600_bind_rs_state(struct pipe_context *ctx, void *state) rctx->states[rs->rstate.id] = &rs->rstate; r600_context_pipe_state_set(&rctx->ctx, &rs->rstate); - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_polygon_offset_update(rctx); } else { r600_polygon_offset_update(rctx); @@ -212,7 +212,7 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, /* Zero states. */ for (i = 0; i < count; i++) { if (!buffers[i].buffer) { - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); } else { r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); @@ -220,7 +220,7 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, } } for (; i < rctx->vbuf_mgr->nr_real_vertex_buffers; i++) { - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); } else { r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); @@ -367,7 +367,7 @@ static void r600_spi_update(struct r600_pipe_context *rctx) for (i = 0; i < rshader->ninput; i++) { if (rshader->input[i].name == TGSI_SEMANTIC_POSITION || rshader->input[i].name == TGSI_SEMANTIC_FACE) - if (rctx->family >= CHIP_CEDAR) + if (rctx->chip_class >= EVERGREEN) continue; else sid=0; @@ -387,7 +387,7 @@ static void r600_spi_update(struct r600_pipe_context *rctx) tmp |= S_028644_PT_SPRITE_TEX(1); } - if (rctx->family < CHIP_CEDAR) { + if (rctx->chip_class < EVERGREEN) { if (rshader->input[i].centroid) tmp |= S_028644_SEL_CENTROID(1); @@ -434,14 +434,14 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, rstate = &rctx->vs_const_buffer_resource[index]; if (!rstate->id) { - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_init_buffer_resource(rctx, rstate); } else { r600_pipe_init_buffer_resource(rctx, rstate); } } - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); } else { @@ -462,13 +462,13 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, rstate = &rctx->ps_const_buffer_resource[index]; if (!rstate->id) { - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_init_buffer_resource(rctx, rstate); } else { r600_pipe_init_buffer_resource(rctx, rstate); } } - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); } else { @@ -521,14 +521,14 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); if (!rstate->id) { - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_init_buffer_resource(rctx, rstate); } else { r600_pipe_init_buffer_resource(rctx, rstate); } } - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride); evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); } else { @@ -600,7 +600,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) r600_shader_rebuild(ctx, rctx->vs_shader); if ((rctx->ps_shader->shader.clamp_color != rctx->clamp_fragment_color) || - ((rctx->family >= CHIP_CEDAR) && rctx->ps_shader->shader.fs_write_all && + ((rctx->chip_class >= EVERGREEN) && rctx->ps_shader->shader.fs_write_all && (rctx->ps_shader->shader.nr_cbufs != rctx->nr_cbufs))) r600_shader_rebuild(ctx, rctx->ps_shader); @@ -655,7 +655,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) rdraw.indices_bo_offset = draw.index_buffer_offset; } - if (rctx->family >= CHIP_CEDAR) { + if (rctx->chip_class >= EVERGREEN) { evergreen_context_draw(&rctx->ctx, &rdraw); } else { r600_context_draw(&rctx->ctx, &rdraw); diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 18460419f85..10c32c53a6d 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -313,7 +313,14 @@ static boolean permit_hardware_blit(struct pipe_screen *screen, PIPE_BIND_SAMPLER_VIEW)) return FALSE; - return TRUE; + switch (res->usage) { + case PIPE_USAGE_STREAM: + case PIPE_USAGE_STAGING: + return FALSE; + + default: + return TRUE; + } } static boolean r600_texture_get_handle(struct pipe_screen* screen, @@ -851,6 +858,12 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen, result = FMT_8; word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); goto out_word4; + case PIPE_FORMAT_Z32_FLOAT: + result = FMT_32_FLOAT; + goto out_word4; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + result = FMT_X24_8_32_FLOAT; + goto out_word4; default: goto out_unknown; } diff --git a/src/gallium/state_trackers/dri/common/dri_context.c b/src/gallium/state_trackers/dri/common/dri_context.c index 08bbdf96e34..e6612b1911d 100644 --- a/src/gallium/state_trackers/dri/common/dri_context.c +++ b/src/gallium/state_trackers/dri/common/dri_context.c @@ -143,8 +143,6 @@ dri_unbind_context(__DRIcontext * cPriv) /* dri_util.c ensures cPriv is not null */ struct dri_screen *screen = dri_screen(cPriv->driScreenPriv); struct dri_context *ctx = dri_context(cPriv); - struct dri_drawable *draw = dri_drawable(ctx->dPriv); - struct dri_drawable *read = dri_drawable(ctx->rPriv); struct st_api *stapi = screen->st_api; if (--ctx->bind_count == 0) { diff --git a/src/gallium/state_trackers/egl/x11/x11_screen.c b/src/gallium/state_trackers/egl/x11/x11_screen.c index f1cc4400ba5..6155b4d03c0 100644 --- a/src/gallium/state_trackers/egl/x11/x11_screen.c +++ b/src/gallium/state_trackers/egl/x11/x11_screen.c @@ -452,6 +452,12 @@ dri2InvalidateBuffers(Display *dpy, XID drawable) extern unsigned dri2GetSwapEventType(Display *dpy, XID drawable); +extern void * +dri2GetGlxDrawableFromXDrawableId(Display *dpy, XID id); + +extern void * +GetGLXDrawable(Display *dpy, XID drawable); + /** * This is also called from src/glx/dri2.c. */ @@ -460,4 +466,16 @@ unsigned dri2GetSwapEventType(Display *dpy, XID drawable) return 0; } +void * +dri2GetGlxDrawableFromXDrawableId(Display *dpy, XID id) +{ + return NULL; +} + +void * +GetGLXDrawable(Display *dpy, XID drawable) +{ + return NULL; +} + #endif /* GLX_DIRECT_RENDERING */ diff --git a/src/gallium/state_trackers/glx/xlib/glx_api.c b/src/gallium/state_trackers/glx/xlib/glx_api.c index 6233fb81781..a7aafd846cd 100644 --- a/src/gallium/state_trackers/glx/xlib/glx_api.c +++ b/src/gallium/state_trackers/glx/xlib/glx_api.c @@ -864,16 +864,19 @@ choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig ) parselist++; break; case GLX_FBCONFIG_ID: + case GLX_VISUAL_ID: if (!fbConfig) return NULL; parselist++; desiredVisualID = *parselist++; break; case GLX_X_RENDERABLE: + case GLX_MAX_PBUFFER_WIDTH: + case GLX_MAX_PBUFFER_HEIGHT: + case GLX_MAX_PBUFFER_PIXELS: if (!fbConfig) - return NULL; - parselist += 2; - /* ignore */ + return NULL; /* invalid config option */ + parselist += 2; /* ignore the parameter */ break; #ifdef GLX_EXT_texture_from_pixmap diff --git a/src/gallium/targets/egl-static/Makefile b/src/gallium/targets/egl-static/Makefile index 832d7ba438f..69e7eecdf0c 100644 --- a/src/gallium/targets/egl-static/Makefile +++ b/src/gallium/targets/egl-static/Makefile @@ -42,7 +42,7 @@ egl_CPPFLAGS += \ -I$(TOP)/src/egl/main \ -D_EGL_MAIN=_eglMain egl_LIBS += $(TOP)/src/gallium/state_trackers/egl/libegl.a -egl_SYS += $(LIBUDEV_LIBS) $(DLOPEN_LIBS) -lEGL -lm +egl_SYS += $(LIBUDEV_LIBS) $(DLOPEN_LIBS) -lEGL -lm -lpthread # EGL platforms ifneq ($(findstring x11, $(EGL_PLATFORMS)),) diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index 4d9dd505c41..60d2e289396 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -55,6 +55,7 @@ static const struct r600_reg evergreen_config_reg_list[] = { {R_008C24_SQ_STACK_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, {R_008C28_SQ_STACK_RESOURCE_MGMT_3, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, {R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, + {R_008E2C_SQ_LDS_RESOURCE_MGMT, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, {R_009100_SPI_CONFIG_CNTL, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, {R_00913C_SPI_CONFIG_CNTL_1, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, }; diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 4602f7f2a4b..b5a4d928bf5 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -186,7 +186,7 @@ static int eg_interpret_tiling(struct radeon *radeon, uint32_t tiling_config) static int radeon_drm_get_tiling(struct radeon *radeon) { - struct drm_radeon_info info; + struct drm_radeon_info info = {}; int r; uint32_t tiling_config = 0; @@ -208,8 +208,8 @@ static int radeon_drm_get_tiling(struct radeon *radeon) static int radeon_get_clock_crystal_freq(struct radeon *radeon) { - struct drm_radeon_info info; - uint32_t clock_crystal_freq; + struct drm_radeon_info info = {}; + uint32_t clock_crystal_freq = 0; int r; info.request = RADEON_INFO_CLOCK_CRYSTAL_FREQ; @@ -226,8 +226,8 @@ static int radeon_get_clock_crystal_freq(struct radeon *radeon) static int radeon_get_num_backends(struct radeon *radeon) { - struct drm_radeon_info info; - uint32_t num_backends; + struct drm_radeon_info info = {}; + uint32_t num_backends = 0; int r; info.request = RADEON_INFO_NUM_BACKENDS; diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 633cd35f7a7..a2f13ff0863 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -1504,7 +1504,7 @@ void r600_context_flush(struct r600_context *ctx) /* suspend queries */ r600_context_queries_suspend(ctx); - if (ctx->radeon->family >= CHIP_CEDAR) + if (ctx->radeon->chip_class >= EVERGREEN) evergreen_context_flush_dest_caches(ctx); else r600_context_flush_dest_caches(ctx); diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp index 1255072a571..0f2f1a0eea4 100644 --- a/src/glsl/ir_function.cpp +++ b/src/glsl/ir_function.cpp @@ -165,8 +165,18 @@ ir_function_signature * ir_function::matching_signature(const exec_list *actual_parameters) { ir_function_signature *match = NULL; - int matched_score = 0; - + bool multiple_inexact_matches = false; + + /* From page 42 (page 49 of the PDF) of the GLSL 1.20 spec: + * + * "If an exact match is found, the other signatures are ignored, and + * the exact match is used. Otherwise, if no exact match is found, then + * the implicit conversions in Section 4.1.10 "Implicit Conversions" will + * be applied to the calling arguments if this can make their types match + * a signature. In this case, it is a semantic error if there are + * multiple ways to apply these conversions to the actual arguments of a + * call such that the call can be made to match multiple signatures." + */ foreach_iter(exec_list_iterator, iter, signatures) { ir_function_signature *const sig = (ir_function_signature *) iter.get(); @@ -178,13 +188,24 @@ ir_function::matching_signature(const exec_list *actual_parameters) if (score == 0) return sig; - /* If we found a match with fewer conversions, use that instead */ - if (score > 0 && (match == NULL || score < matched_score)) { - match = sig; - matched_score = score; + if (score > 0) { + if (match == NULL) + match = sig; + else + multiple_inexact_matches = true; } } + /* There is no exact match (we would have returned it by now). If there + * are multiple inexact matches, the call is ambiguous, which is an error. + * + * FINISHME: Report a decent error. Returning NULL will likely result in + * FINISHME: a "no matching signature" error; it should report that the + * FINISHME: call is ambiguous. But reporting errors from here is hard. + */ + if (multiple_inexact_matches) + return NULL; + return match; } diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index dd265673c55..59a040751d9 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -56,10 +56,8 @@ bool do_if_simplification(exec_list *instructions); bool do_discard_simplification(exec_list *instructions); bool lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth = 0); bool do_mat_op_to_vec(exec_list *instructions); -bool do_mod_to_fract(exec_list *instructions); bool do_noop_swizzle(exec_list *instructions); bool do_structure_splitting(exec_list *instructions); -bool do_sub_to_add_neg(exec_list *instructions); bool do_swizzle_swizzle(exec_list *instructions); bool do_tree_grafting(exec_list *instructions); bool do_vec_index_to_cond_assign(exec_list *instructions); diff --git a/src/glsl/ir_reader.cpp b/src/glsl/ir_reader.cpp index 30df257be2f..f3a621734ba 100644 --- a/src/glsl/ir_reader.cpp +++ b/src/glsl/ir_reader.cpp @@ -482,19 +482,21 @@ ir_reader::read_return(s_expression *expr) { s_expression *s_retval; - s_pattern pat[] = { "return", s_retval}; - if (!MATCH(expr, pat)) { - ir_read_error(expr, "expected (return <rvalue>)"); - return NULL; - } - - ir_rvalue *retval = read_rvalue(s_retval); - if (retval == NULL) { - ir_read_error(NULL, "when reading return value"); + s_pattern return_value_pat[] = { "return", s_retval}; + s_pattern return_void_pat[] = { "return" }; + if (MATCH(expr, return_value_pat)) { + ir_rvalue *retval = read_rvalue(s_retval); + if (retval == NULL) { + ir_read_error(NULL, "when reading return value"); + return NULL; + } + return new(mem_ctx) ir_return(retval); + } else if (MATCH(expr, return_void_pat)) { + return new(mem_ctx) ir_return; + } else { + ir_read_error(expr, "expected (return <rvalue>) or (return)"); return NULL; } - - return new(mem_ctx) ir_return(retval); } diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 265da84e5a9..34b64837a46 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -1248,7 +1248,7 @@ assign_attribute_or_color_locations(gl_shader_program *prog, */ const int generic_base = (target_index == MESA_SHADER_VERTEX) - ? VERT_ATTRIB_GENERIC0 : FRAG_RESULT_DATA0; + ? (int) VERT_ATTRIB_GENERIC0 : (int) FRAG_RESULT_DATA0; const enum ir_variable_mode direction = (target_index == MESA_SHADER_VERTEX) ? ir_var_in : ir_var_out; diff --git a/src/glsl/lower_jumps.cpp b/src/glsl/lower_jumps.cpp index dd2601d1aad..61874990a94 100644 --- a/src/glsl/lower_jumps.cpp +++ b/src/glsl/lower_jumps.cpp @@ -60,12 +60,76 @@ #include <string.h> #include "ir.h" +/** + * Enum recording the result of analyzing how control flow might exit + * an IR node. + * + * Each possible value of jump_strength indicates a strictly stronger + * guarantee on control flow than the previous value. + * + * The ordering of strengths roughly reflects the way jumps are + * lowered: jumps with higher strength tend to be lowered to jumps of + * lower strength. Accordingly, strength is used as a heuristic to + * determine which lowering to perform first. + * + * This enum is also used by get_jump_strength() to categorize + * instructions as either break, continue, return, or other. When + * used in this fashion, strength_always_clears_execute_flag is not + * used. + * + * The control flow analysis made by this optimization pass makes two + * simplifying assumptions: + * + * - It ignores discard instructions, since they are lowered by a + * separate pass (lower_discard.cpp). + * + * - It assumes it is always possible for control to flow from a loop + * to the instruction immediately following it. Technically, this + * is not true (since all execution paths through the loop might + * jump back to the top, or return from the function). + * + * Both of these simplifying assumtions are safe, since they can never + * cause reachable code to be incorrectly classified as unreachable; + * they can only do the opposite. + */ enum jump_strength { + /** + * Analysis has produced no guarantee on how control flow might + * exit this IR node. It might fall out the bottom (with or + * without clearing the execute flag, if present), or it might + * continue to the top of the innermost enclosing loop, break out + * of it, or return from the function. + */ strength_none, + + /** + * The only way control can fall out the bottom of this node is + * through a code path that clears the execute flag. It might also + * continue to the top of the innermost enclosing loop, break out + * of it, or return from the function. + */ strength_always_clears_execute_flag, + + /** + * Control cannot fall out the bottom of this node. It might + * continue to the top of the innermost enclosing loop, break out + * of it, or return from the function. + */ strength_continue, + + /** + * Control cannot fall out the bottom of this node, or continue the + * top of the innermost enclosing loop. It can only break out of + * it or return from the function. + */ strength_break, + + /** + * Control cannot fall out the bottom of this node, continue to the + * top of the innermost enclosing loop, or break out of it. It can + * only return from the function. + */ strength_return }; @@ -146,16 +210,17 @@ struct function_record ir_function_signature* signature; ir_variable* return_flag; /* used to break out of all loops and then jump to the return instruction */ ir_variable* return_value; - bool is_main; + bool lower_return; unsigned nesting_depth; - function_record(ir_function_signature* p_signature = 0) + function_record(ir_function_signature* p_signature = 0, + bool lower_return = false) { this->signature = p_signature; this->return_flag = 0; this->return_value = 0; this->nesting_depth = 0; - this->is_main = this->signature && (strcmp(this->signature->function_name(), "main") == 0); + this->lower_return = lower_return; } ir_variable* get_return_flag() @@ -180,6 +245,27 @@ struct function_record }; struct ir_lower_jumps_visitor : public ir_control_flow_visitor { + /* Postconditions: on exit of any visit() function: + * + * ANALYSIS: this->block.min_strength, + * this->block.may_clear_execute_flag, and + * this->loop.may_set_return_flag are updated to reflect the + * characteristics of the visited statement. + * + * DEAD_CODE_ELIMINATION: If this->block.min_strength is not + * strength_none, the visited node is at the end of its exec_list. + * In other words, any unreachable statements that follow the + * visited statement in its exec_list have been removed. + * + * CONTAINED_JUMPS_LOWERED: If the visited statement contains other + * statements, then should_lower_jump() is false for all of the + * return, break, or continue statements it contains. + * + * Note that visiting a jump does not lower it. That is the + * responsibility of the statement (or function signature) that + * contains the jump. + */ + bool progress; struct function_record function; @@ -218,20 +304,140 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor { } } + /** + * Insert the instructions necessary to lower a return statement, + * before the given return instruction. + */ + void insert_lowered_return(ir_return *ir) + { + ir_variable* return_flag = this->function.get_return_flag(); + if(!this->function.signature->return_type->is_void()) { + ir_variable* return_value = this->function.get_return_value(); + ir->insert_before( + new(ir) ir_assignment( + new (ir) ir_dereference_variable(return_value), + ir->value)); + } + ir->insert_before( + new(ir) ir_assignment( + new (ir) ir_dereference_variable(return_flag), + new (ir) ir_constant(true))); + this->loop.may_set_return_flag = true; + } + + /** + * If the given instruction is a return, lower it to instructions + * that store the return value (if there is one), set the return + * flag, and then break. + * + * It is safe to pass NULL to this function. + */ + void lower_return_unconditionally(ir_instruction *ir) + { + if (get_jump_strength(ir) != strength_return) { + return; + } + insert_lowered_return((ir_return*)ir); + ir->replace_with(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); + } + + /** + * Create the necessary instruction to replace a break instruction. + */ + ir_instruction *create_lowered_break() + { + void *ctx = this->function.signature; + return new(ctx) ir_assignment( + new(ctx) ir_dereference_variable(this->loop.get_break_flag()), + new(ctx) ir_constant(true), + 0); + } + + /** + * If the given instruction is a break, lower it to an instruction + * that sets the break flag, without consulting + * should_lower_jump(). + * + * It is safe to pass NULL to this function. + */ + void lower_break_unconditionally(ir_instruction *ir) + { + if (get_jump_strength(ir) != strength_break) { + return; + } + ir->replace_with(create_lowered_break()); + } + + /** + * If the block ends in a conditional or unconditional break, lower + * it, even though should_lower_jump() says it needn't be lowered. + */ + void lower_final_breaks(exec_list *block) + { + ir_instruction *ir = (ir_instruction *) block->get_tail(); + lower_break_unconditionally(ir); + ir_if *ir_if = ir->as_if(); + if (ir_if) { + lower_break_unconditionally( + (ir_instruction *) ir_if->then_instructions.get_tail()); + lower_break_unconditionally( + (ir_instruction *) ir_if->else_instructions.get_tail()); + } + } + virtual void visit(class ir_loop_jump * ir) { + /* Eliminate all instructions after each one, since they are + * unreachable. This satisfies the DEAD_CODE_ELIMINATION + * postcondition. + */ truncate_after_instruction(ir); + + /* Set this->block.min_strength based on this instruction. This + * satisfies the ANALYSIS postcondition. It is not necessary to + * update this->block.may_clear_execute_flag or + * this->loop.may_set_return_flag, because an unlowered jump + * instruction can't change any flags. + */ this->block.min_strength = ir->is_break() ? strength_break : strength_continue; + + /* The CONTAINED_JUMPS_LOWERED postcondition is already + * satisfied, because jump statements can't contain other + * statements. + */ } virtual void visit(class ir_return * ir) { + /* Eliminate all instructions after each one, since they are + * unreachable. This satisfies the DEAD_CODE_ELIMINATION + * postcondition. + */ truncate_after_instruction(ir); + + /* Set this->block.min_strength based on this instruction. This + * satisfies the ANALYSIS postcondition. It is not necessary to + * update this->block.may_clear_execute_flag or + * this->loop.may_set_return_flag, because an unlowered return + * instruction can't change any flags. + */ this->block.min_strength = strength_return; + + /* The CONTAINED_JUMPS_LOWERED postcondition is already + * satisfied, because jump statements can't contain other + * statements. + */ } virtual void visit(class ir_discard * ir) { + /* Nothing needs to be done. The ANALYSIS and + * DEAD_CODE_ELIMINATION postconditions are already satisfied, + * because discard statements are ignored by this optimization + * pass. The CONTAINED_JUMPS_LOWERED postcondition is already + * satisfied, because discard statements can't contain other + * statements. + */ } enum jump_strength get_jump_strength(ir_instruction* ir) @@ -274,10 +480,8 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor { /* never lower return at the end of a this->function */ if(this->function.nesting_depth == 0 && ir->get_next()->is_tail_sentinel()) lower = false; - else if (this->function.is_main) - lower = lower_main_return; else - lower = lower_sub_return; + lower = this->function.lower_return; break; } return lower; @@ -285,9 +489,20 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor { block_record visit_block(exec_list* list) { + /* Note: since visiting a node may change that node's next + * pointer, we can't use visit_exec_list(), because + * visit_exec_list() caches the node's next pointer before + * visiting it. So we use foreach_list() instead. + * + * foreach_list() isn't safe if the node being visited gets + * removed, but fortunately this visitor doesn't do that. + */ + block_record saved_block = this->block; this->block = block_record(); - visit_exec_list(list, this); + foreach_list(node, list) { + ((ir_instruction *) node)->accept(this); + } block_record ret = this->block; this->block = saved_block; return ret; @@ -304,18 +519,34 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor { block_record block_records[2]; ir_jump* jumps[2]; + /* Recursively lower nested jumps. This satisfies the + * CONTAINED_JUMPS_LOWERED postcondition, except in the case of + * unconditional jumps at the end of ir->then_instructions and + * ir->else_instructions, which are handled below. + */ block_records[0] = visit_block(&ir->then_instructions); block_records[1] = visit_block(&ir->else_instructions); retry: /* we get here if we put code after the if inside a branch */ - for(unsigned i = 0; i < 2; ++i) { - exec_list& list = i ? ir->else_instructions : ir->then_instructions; - jumps[i] = 0; - if(!list.is_empty() && get_jump_strength((ir_instruction*)list.get_tail())) - jumps[i] = (ir_jump*)list.get_tail(); - } + /* Determine which of ir->then_instructions and + * ir->else_instructions end with an unconditional jump. + */ + for(unsigned i = 0; i < 2; ++i) { + exec_list& list = i ? ir->else_instructions : ir->then_instructions; + jumps[i] = 0; + if(!list.is_empty() && get_jump_strength((ir_instruction*)list.get_tail())) + jumps[i] = (ir_jump*)list.get_tail(); + } + + /* Loop until we have satisfied the CONTAINED_JUMPS_LOWERED + * postcondition by lowering jumps in both then_instructions and + * else_instructions. + */ for(;;) { + /* Determine the types of the jumps that terminate + * ir->then_instructions and ir->else_instructions. + */ jump_strength jump_strengths[2]; for(unsigned i = 0; i < 2; ++i) { @@ -326,7 +557,12 @@ retry: /* we get here if we put code after the if inside a branch */ jump_strengths[i] = strength_none; } - /* move both jumps out if possible */ + /* If both code paths end in a jump, and the jumps are the + * same, and we are pulling out jumps, replace them with a + * single jump that comes after the if instruction. The new + * jump will be visited next, and it will be lowered if + * necessary by the loop or conditional that encloses it. + */ if(pull_out_jumps && jump_strengths[0] == jump_strengths[1]) { bool unify = true; if(jump_strengths[0] == strength_continue) @@ -344,10 +580,19 @@ retry: /* we get here if we put code after the if inside a branch */ jumps[1]->remove(); this->progress = true; + /* Update jumps[] to reflect the fact that the jumps + * are gone, and update block_records[] to reflect the + * fact that control can now flow to the next + * instruction. + */ jumps[0] = 0; jumps[1] = 0; block_records[0].min_strength = strength_none; block_records[1].min_strength = strength_none; + + /* The CONTAINED_JUMPS_LOWERED postcondition is now + * satisfied, so we can break out of the loop. + */ break; } } @@ -367,50 +612,91 @@ retry: /* we get here if we put code after the if inside a branch */ else if(should_lower[1]) lower = 1; else + /* Neither code path ends in a jump that needs to be + * lowered, so the CONTAINED_JUMPS_LOWERED postcondition + * is satisfied and we can break out of the loop. + */ break; if(jump_strengths[lower] == strength_return) { - ir_variable* return_flag = this->function.get_return_flag(); - if(!this->function.signature->return_type->is_void()) { - ir_variable* return_value = this->function.get_return_value(); - jumps[lower]->insert_before(new(ir) ir_assignment(new (ir) ir_dereference_variable(return_value), ((ir_return*)jumps[lower])->value, NULL)); - } - jumps[lower]->insert_before(new(ir) ir_assignment(new (ir) ir_dereference_variable(return_flag), new (ir) ir_constant(true), NULL)); - this->loop.may_set_return_flag = true; + /* To lower a return, we create a return flag (if the + * function doesn't have one already) and add instructions + * that: 1. store the return value (if this function has a + * non-void return) and 2. set the return flag + */ + insert_lowered_return((ir_return*)jumps[lower]); if(this->loop.loop) { + /* If we are in a loop, replace the return instruction + * with a break instruction, and then loop so that the + * break instruction can be lowered if necessary. + */ ir_loop_jump* lowered = 0; lowered = new(ir) ir_loop_jump(ir_loop_jump::jump_break); + /* Note: we must update block_records and jumps to + * reflect the fact that the control path has been + * altered from a return to a break. + */ block_records[lower].min_strength = strength_break; jumps[lower]->replace_with(lowered); jumps[lower] = lowered; - } else + } else { + /* If we are not in a loop, we then proceed as we would + * for a continue statement (set the execute flag to + * false to prevent the rest of the function from + * executing). + */ goto lower_continue; + } this->progress = true; } else if(jump_strengths[lower] == strength_break) { - /* We can't lower to an actual continue because that would execute the increment. + /* To lower a break, we create a break flag (if the loop + * doesn't have one already) and add an instruction that + * sets it. * - * In the lowered code, we instead put the break check between the this->loop body and the increment, - * which is impossible with a real continue as defined by the GLSL IR currently. + * Then we proceed as we would for a continue statement + * (set the execute flag to false to prevent the rest of + * the loop body from executing). * - * Smarter options (such as undoing the increment) are possible but it's not worth implementing them, - * because if break is lowered, continue is almost surely lowered too. + * The visit() function for the loop will ensure that the + * break flag is checked after executing the loop body. */ - jumps[lower]->insert_before(new(ir) ir_assignment(new (ir) ir_dereference_variable(this->loop.get_break_flag()), new (ir) ir_constant(true), 0)); + jumps[lower]->insert_before(create_lowered_break()); goto lower_continue; } else if(jump_strengths[lower] == strength_continue) { lower_continue: + /* To lower a continue, we create an execute flag (if the + * loop doesn't have one already) and replace the continue + * with an instruction that clears it. + * + * Note that this code path gets exercised when lowering + * return statements that are not inside a loop, so + * this->loop must be initialized even outside of loops. + */ ir_variable* execute_flag = this->loop.get_execute_flag(); jumps[lower]->replace_with(new(ir) ir_assignment(new (ir) ir_dereference_variable(execute_flag), new (ir) ir_constant(false), 0)); + /* Note: we must update block_records and jumps to reflect + * the fact that the control path has been altered to an + * instruction that clears the execute flag. + */ jumps[lower] = 0; block_records[lower].min_strength = strength_always_clears_execute_flag; block_records[lower].may_clear_execute_flag = true; this->progress = true; - break; + + /* Let the loop run again, in case the other branch of the + * if needs to be lowered too. + */ } } /* move out a jump out if possible */ if(pull_out_jumps) { + /* If one of the branches ends in a jump, and control cannot + * fall out the bottom of the other branch, then we can move + * the jump after the if. + * + * Set move_out to the branch we are moving a jump out of. + */ int move_out = -1; if(jumps[0] && block_records[1].min_strength >= strength_continue) move_out = 0; @@ -421,22 +707,46 @@ lower_continue: { jumps[move_out]->remove(); ir->insert_after(jumps[move_out]); + /* Note: we must update block_records and jumps to reflect + * the fact that the jump has been moved out of the if. + */ jumps[move_out] = 0; block_records[move_out].min_strength = strength_none; this->progress = true; } } + /* Now satisfy the ANALYSIS postcondition by setting + * this->block.min_strength and + * this->block.may_clear_execute_flag based on the + * characteristics of the two branches. + */ if(block_records[0].min_strength < block_records[1].min_strength) this->block.min_strength = block_records[0].min_strength; else this->block.min_strength = block_records[1].min_strength; this->block.may_clear_execute_flag = this->block.may_clear_execute_flag || block_records[0].may_clear_execute_flag || block_records[1].may_clear_execute_flag; + /* Now we need to clean up the instructions that follow the + * if. + * + * If those instructions are unreachable, then satisfy the + * DEAD_CODE_ELIMINATION postcondition by eliminating them. + * Otherwise that postcondition is already satisfied. + */ if(this->block.min_strength) truncate_after_instruction(ir); else if(this->block.may_clear_execute_flag) { + /* If the "if" instruction might clear the execute flag, then + * we need to guard any instructions that follow so that they + * are only executed if the execute flag is set. + * + * If one of the branches of the "if" always clears the + * execute flag, and the other branch never clears it, then + * this is easy: just move all the instructions following the + * "if" into the branch that never clears it. + */ int move_into = -1; if(block_records[0].min_strength && !block_records[1].may_clear_execute_flag) move_into = 1; @@ -451,14 +761,34 @@ lower_continue: if(!next->is_tail_sentinel()) { move_outer_block_inside(ir, list); + /* If any instructions moved, then we need to visit + * them (since they are now inside the "if"). Since + * block_records[move_into] is in its default state + * (see assertion above), we can safely replace + * block_records[move_into] with the result of this + * analysis. + */ exec_list list; list.head = next; block_records[move_into] = visit_block(&list); + /* + * Then we need to re-start our jump lowering, since one + * of the instructions we moved might be a jump that + * needs to be lowered. + */ this->progress = true; goto retry; } } else { + /* If we get here, then the simple case didn't apply; we + * need to actually guard the instructions that follow. + * + * To avoid creating unnecessarily-deep nesting, first + * look through the instructions that follow and unwrap + * any instructions that that are already wrapped in the + * appropriate guard. + */ ir_instruction* ir_after; for(ir_after = (ir_instruction*)ir->get_next(); !ir_after->is_tail_sentinel();) { @@ -479,6 +809,9 @@ lower_continue: this->progress = true; } + /* Then, wrap all the instructions that follow in a single + * guard. + */ if(!ir->get_next()->is_tail_sentinel()) { assert(this->loop.execute_flag); ir_if* if_execute = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.execute_flag)); @@ -493,29 +826,111 @@ lower_continue: virtual void visit(ir_loop *ir) { + /* Visit the body of the loop, with a fresh data structure in + * this->loop so that the analysis we do here won't bleed into + * enclosing loops. + * + * We assume that all code after a loop is reachable from the + * loop (see comments on enum jump_strength), so the + * DEAD_CODE_ELIMINATION postcondition is automatically + * satisfied, as is the block.min_strength portion of the + * ANALYSIS postcondition. + * + * The block.may_clear_execute_flag portion of the ANALYSIS + * postcondition is automatically satisfied because execute + * flags do not propagate outside of loops. + * + * The loop.may_set_return_flag portion of the ANALYSIS + * postcondition is handled below. + */ ++this->function.nesting_depth; loop_record saved_loop = this->loop; this->loop = loop_record(this->function.signature, ir); + /* Recursively lower nested jumps. This satisfies the + * CONTAINED_JUMPS_LOWERED postcondition, except in the case of + * an unconditional continue or return at the bottom of the + * loop, which are handled below. + */ block_record body = visit_block(&ir->body_instructions); + /* If the loop ends in an unconditional continue, eliminate it + * because it is redundant. + */ + ir_instruction *ir_last + = (ir_instruction *) ir->body_instructions.get_tail(); + if (get_jump_strength(ir_last) == strength_continue) { + ir_last->remove(); + } + + /* If the loop ends in an unconditional return, and we are + * lowering returns, lower it. + */ + if (this->function.lower_return) + lower_return_unconditionally(ir_last); + if(body.min_strength >= strength_break) { - /* FINISHME: turn the this->loop into an if, or replace it with its body */ + /* FINISHME: If the min_strength of the loop body is + * strength_break or strength_return, that means that it + * isn't a loop at all, since control flow always leaves the + * body of the loop via break or return. In principle the + * loop could be eliminated in this case. This optimization + * is not implemented yet. + */ } if(this->loop.break_flag) { + /* We only get here if we are lowering breaks */ + assert (lower_break); + + /* If a break flag was generated while visiting the body of + * the loop, then at least one break was lowered, so we need + * to generate an if statement at the end of the loop that + * does a "break" if the break flag is set. The break we + * generate won't violate the CONTAINED_JUMPS_LOWERED + * postcondition, because should_lower_jump() always returns + * false for a break that happens at the end of a loop. + * + * However, if the loop already ends in a conditional or + * unconditional break, then we need to lower that break, + * because it won't be at the end of the loop anymore. + */ + lower_final_breaks(&ir->body_instructions); + ir_if* break_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.break_flag)); break_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); ir->body_instructions.push_tail(break_if); } + /* If the body of the loop may set the return flag, then at + * least one return was lowered to a break, so we need to ensure + * that the return flag is checked after the body of the loop is + * executed. + */ if(this->loop.may_set_return_flag) { assert(this->function.return_flag); + /* Generate the if statement to check the return flag */ ir_if* return_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->function.return_flag)); + /* Note: we also need to propagate the knowledge that the + * return flag may get set to the outer context. This + * satisfies the loop.may_set_return_flag part of the + * ANALYSIS postcondition. + */ saved_loop.may_set_return_flag = true; if(saved_loop.loop) + /* If this loop is nested inside another one, then the if + * statement that we generated should break out of that + * loop if the return flag is set. Caller will lower that + * break statement if necessary. + */ return_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); else + /* Otherwise, all we need to do is ensure that the + * instructions that follow are only executed if the + * return flag is clear. We can do that by moving those + * instructions into the else clause of the generated if + * statement. + */ move_outer_block_inside(ir, &return_if->else_instructions); ir->insert_after(return_if); } @@ -530,14 +945,39 @@ lower_continue: assert(!this->function.signature); assert(!this->loop.loop); + bool lower_return; + if (strcmp(ir->function_name(), "main") == 0) + lower_return = lower_main_return; + else + lower_return = lower_sub_return; + function_record saved_function = this->function; loop_record saved_loop = this->loop; - this->function = function_record(ir); + this->function = function_record(ir, lower_return); this->loop = loop_record(ir); assert(!this->loop.loop); + + /* Visit the body of the function to lower any jumps that occur + * in it, except possibly an unconditional return statement at + * the end of it. + */ visit_block(&ir->body); + /* If the body ended in an unconditional return of non-void, + * then we don't need to lower it because it's the one canonical + * return. + * + * If the body ended in a return of void, eliminate it because + * it is redundant. + */ + if (ir->return_type->is_void() && + get_jump_strength((ir_instruction *) ir->body.get_tail())) { + ir_jump *jump = (ir_jump *) ir->body.get_tail(); + assert (jump->ir_type == ir_type_return); + jump->remove(); + } + if(this->function.return_value) ir->body.push_tail(new(ir) ir_return(new (ir) ir_dereference_variable(this->function.return_value))); diff --git a/src/glx/dri2.c b/src/glx/dri2.c index adfd3d1f7c8..229840d6919 100644 --- a/src/glx/dri2.c +++ b/src/glx/dri2.c @@ -88,6 +88,7 @@ static Bool DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire) { XExtDisplayInfo *info = DRI2FindDisplay(dpy); + struct glx_drawable *glxDraw; XextCheckExtension(dpy, info, dri2ExtensionName, False); @@ -97,7 +98,10 @@ DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire) case DRI2_BufferSwapComplete: { GLXBufferSwapComplete *aevent = (GLXBufferSwapComplete *)event; - xDRI2BufferSwapComplete *awire = (xDRI2BufferSwapComplete *)wire; + xDRI2BufferSwapComplete2 *awire = (xDRI2BufferSwapComplete2 *)wire; + __GLXDRIdrawable *pdraw; + + pdraw = dri2GetGlxDrawableFromXDrawableId(dpy, awire->drawable); /* Ignore swap events if we're not looking for them */ aevent->type = dri2GetSwapEventType(dpy, awire->drawable); @@ -124,7 +128,13 @@ DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire) } aevent->ust = ((CARD64)awire->ust_hi << 32) | awire->ust_lo; aevent->msc = ((CARD64)awire->msc_hi << 32) | awire->msc_lo; - aevent->sbc = ((CARD64)awire->sbc_hi << 32) | awire->sbc_lo; + + glxDraw = GetGLXDrawable(dpy, pdraw->drawable); + if (awire->sbc < glxDraw->lastEventSbc) + glxDraw->eventSbcWrap += 0x100000000; + glxDraw->lastEventSbc = awire->sbc; + aevent->sbc = awire->sbc + glxDraw->eventSbcWrap; + return True; } #endif diff --git a/src/glx/glx_pbuffer.c b/src/glx/glx_pbuffer.c index 0e74e7ccd0e..6738252a31d 100644 --- a/src/glx/glx_pbuffer.c +++ b/src/glx/glx_pbuffer.c @@ -396,6 +396,7 @@ CreateDrawable(Display *dpy, struct glx_config *config, Drawable drawable, const int *attrib_list, CARD8 glxCode) { xGLXCreateWindowReq *req; + struct glx_drawable *glxDraw; CARD32 *data; unsigned int i; CARD8 opcode; @@ -411,6 +412,10 @@ CreateDrawable(Display *dpy, struct glx_config *config, if (!opcode) return None; + glxDraw = Xmalloc(sizeof(*glxDraw)); + if (!glxDraw) + return None; + LockDisplay(dpy); GetReqExtra(GLXCreateWindow, 8 * i, req); data = (CARD32 *) (req + 1); @@ -429,6 +434,11 @@ CreateDrawable(Display *dpy, struct glx_config *config, UnlockDisplay(dpy); SyncHandle(); + if (InitGLXDrawable(dpy, glxDraw, drawable, xid)) { + free(glxDraw); + return None; + } + if (!CreateDRIDrawable(dpy, config, drawable, xid, attrib_list, i)) { if (glxCode == X_GLXCreatePixmap) glxCode = X_GLXDestroyPixmap; @@ -454,6 +464,7 @@ DestroyDrawable(Display * dpy, GLXDrawable drawable, CARD32 glxCode) protocolDestroyDrawable(dpy, drawable, glxCode); + DestroyGLXDrawable(dpy, drawable); DestroyDRIDrawable(dpy, drawable, GL_FALSE); return; diff --git a/src/glx/glxclient.h b/src/glx/glxclient.h index 06415288165..f9154266101 100644 --- a/src/glx/glxclient.h +++ b/src/glx/glxclient.h @@ -567,6 +567,8 @@ struct glx_display */ struct glx_screen **screens; + __glxHashTable *glXDrawHash; + #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) __glxHashTable *drawHash; @@ -579,6 +581,14 @@ struct glx_display #endif }; +struct glx_drawable { + XID xDrawable; + XID drawable; + + uint32_t lastEventSbc; + int64_t eventSbcWrap; +}; + extern int glx_screen_init(struct glx_screen *psc, int screen, struct glx_display * priv); @@ -784,6 +794,12 @@ extern int applegl_create_display(struct glx_display *display); #endif + +extern struct glx_drawable *GetGLXDrawable(Display *dpy, GLXDrawable drawable); +extern int InitGLXDrawable(Display *dpy, struct glx_drawable *glxDraw, + XID xDrawable, GLXDrawable drawable); +extern void DestroyGLXDrawable(Display *dpy, GLXDrawable drawable); + extern struct glx_context dummyContext; extern struct glx_screen * diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c index 191b321ce32..fc0a07901a7 100644 --- a/src/glx/glxcmds.c +++ b/src/glx/glxcmds.c @@ -90,6 +90,51 @@ GetGLXDRIDrawable(Display * dpy, GLXDrawable drawable) #endif +_X_HIDDEN struct glx_drawable * +GetGLXDrawable(Display *dpy, GLXDrawable drawable) +{ + struct glx_display *priv = __glXInitialize(dpy); + struct glx_drawable *glxDraw; + + if (priv == NULL) + return NULL; + + if (__glxHashLookup(priv->glXDrawHash, drawable, (void *) &glxDraw) == 0) + return glxDraw; + + return NULL; +} + +_X_HIDDEN int +InitGLXDrawable(Display *dpy, struct glx_drawable *glxDraw, XID xDrawable, + GLXDrawable drawable) +{ + struct glx_display *priv = __glXInitialize(dpy); + + if (!priv) + return -1; + + glxDraw->xDrawable = xDrawable; + glxDraw->drawable = drawable; + glxDraw->lastEventSbc = 0; + glxDraw->eventSbcWrap = 0; + + return __glxHashInsert(priv->glXDrawHash, drawable, glxDraw); +} + +_X_HIDDEN void +DestroyGLXDrawable(Display *dpy, GLXDrawable drawable) +{ + struct glx_display *priv = __glXInitialize(dpy); + struct glx_drawable *glxDraw; + + if (!priv) + return; + + glxDraw = GetGLXDrawable(dpy, drawable); + __glxHashDelete(priv->glXDrawHash, drawable); + free(glxDraw); +} /** * Get the GLX per-screen data structure associated with a GLX context. @@ -608,6 +653,7 @@ glXCreateGLXPixmap(Display * dpy, XVisualInfo * vis, Pixmap pixmap) return pixmap; #else xGLXCreateGLXPixmapReq *req; + struct glx_drawable *glxDraw; GLXPixmap xid; CARD8 opcode; @@ -616,6 +662,10 @@ glXCreateGLXPixmap(Display * dpy, XVisualInfo * vis, Pixmap pixmap) return None; } + glxDraw = Xmalloc(sizeof(*glxDraw)); + if (!glxDraw) + return None; + /* Send the glXCreateGLXPixmap request */ LockDisplay(dpy); GetReq(GLXCreateGLXPixmap, req); @@ -628,6 +678,11 @@ glXCreateGLXPixmap(Display * dpy, XVisualInfo * vis, Pixmap pixmap) UnlockDisplay(dpy); SyncHandle(); + if (InitGLXDrawable(dpy, glxDraw, pixmap, req->glxpixmap)) { + free(glxDraw); + return None; + } + #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) do { /* FIXME: Maybe delay __DRIdrawable creation until the drawable @@ -700,6 +755,8 @@ glXDestroyGLXPixmap(Display * dpy, GLXPixmap glxpixmap) UnlockDisplay(dpy); SyncHandle(); + DestroyGLXDrawable(dpy, glxpixmap); + #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) { struct glx_display *const priv = __glXInitialize(dpy); diff --git a/src/glx/glxext.c b/src/glx/glxext.c index 73c332793a0..8704c484f96 100644 --- a/src/glx/glxext.c +++ b/src/glx/glxext.c @@ -133,12 +133,20 @@ __glXWireToEvent(Display *dpy, XEvent *event, xEvent *wire) case GLX_BufferSwapComplete: { GLXBufferSwapComplete *aevent = (GLXBufferSwapComplete *)event; - xGLXBufferSwapComplete *awire = (xGLXBufferSwapComplete *)wire; + xGLXBufferSwapComplete2 *awire = (xGLXBufferSwapComplete2 *)wire; + struct glx_drawable *glxDraw = GetGLXDrawable(dpy, awire->drawable); aevent->event_type = awire->event_type; aevent->drawable = awire->drawable; aevent->ust = ((CARD64)awire->ust_hi << 32) | awire->ust_lo; aevent->msc = ((CARD64)awire->msc_hi << 32) | awire->msc_lo; - aevent->sbc = ((CARD64)awire->sbc_hi << 32) | awire->sbc_lo; + + if (!glxDraw) + return False; + + if (awire->sbc < glxDraw->lastEventSbc) + glxDraw->eventSbcWrap += 0x100000000; + glxDraw->lastEventSbc = awire->sbc; + aevent->sbc = awire->sbc + glxDraw->eventSbcWrap; return True; } default: @@ -227,6 +235,8 @@ glx_display_free(struct glx_display *priv) if (priv->serverGLXversion) Xfree((char *) priv->serverGLXversion); + __glxHashDestroy(priv->glXDrawHash); + #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) __glxHashDestroy(priv->drawHash); @@ -847,6 +857,8 @@ __glXInitialize(Display * dpy) XESetCloseDisplay(dpy, dpyPriv->codes->extension, __glXCloseDisplay); XESetErrorString (dpy, dpyPriv->codes->extension,__glXErrorString); + dpyPriv->glXDrawHash = __glxHashCreate(); + #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) glx_direct = (getenv("LIBGL_ALWAYS_INDIRECT") == NULL); glx_accel = (getenv("LIBGL_ALWAYS_SOFTWARE") == NULL); diff --git a/src/mesa/drivers/dri/i915/i830_context.c b/src/mesa/drivers/dri/i915/i830_context.c index abfb32be3ae..d22118beb0b 100644 --- a/src/mesa/drivers/dri/i915/i830_context.c +++ b/src/mesa/drivers/dri/i915/i830_context.c @@ -33,6 +33,7 @@ #include "tnl/t_pipeline.h" #include "intel_span.h" #include "intel_tris.h" +#include "../glsl/ralloc.h" /*************************************** * Mesa's Driver Functions @@ -53,7 +54,7 @@ i830CreateContext(const struct gl_config * mesaVis, void *sharedContextPrivate) { struct dd_function_table functions; - struct i830_context *i830 = CALLOC_STRUCT(i830_context); + struct i830_context *i830 = rzalloc(NULL, struct i830_context); struct intel_context *intel = &i830->intel; struct gl_context *ctx = &intel->ctx; if (!i830) diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c index d4af5e51026..71ce44fd5c9 100644 --- a/src/mesa/drivers/dri/i915/i830_texstate.c +++ b/src/mesa/drivers/dri/i915/i830_texstate.c @@ -76,7 +76,8 @@ translate_texture_format(GLuint mesa_format) case MESA_FORMAT_RGBA_DXT5: return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); default: - fprintf(stderr, "%s: bad image format %x\n", __FUNCTION__, mesa_format); + fprintf(stderr, "%s: bad image format %s\n", __FUNCTION__, + _mesa_get_format_name(mesa_format)); abort(); return 0; } diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index f02f2d78267..11bee140ab6 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -36,6 +36,7 @@ #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" #include "tnl/tnl.h" +#include "../glsl/ralloc.h" #include "i915_reg.h" #include "i915_program.h" @@ -97,8 +98,7 @@ i915CreateContext(int api, void *sharedContextPrivate) { struct dd_function_table functions; - struct i915_context *i915 = - (struct i915_context *) CALLOC_STRUCT(i915_context); + struct i915_context *i915 = rzalloc(NULL, struct i915_context); struct intel_context *intel = &i915->intel; struct gl_context *ctx = &intel->ctx; diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index b67ebb9a1ec..e9e8078328a 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -210,6 +210,7 @@ get_result_vector(struct i915_fragment_program *p, case PROGRAM_OUTPUT: switch (inst->DstReg.Index) { case FRAG_RESULT_COLOR: + case FRAG_RESULT_DATA0: return UREG(REG_TYPE_OC, 0); case FRAG_RESULT_DEPTH: p->depth_written = 1; diff --git a/src/mesa/drivers/dri/i915/i915_tex_layout.c b/src/mesa/drivers/dri/i915/i915_tex_layout.c index 6e4512129cd..e6a47116223 100644 --- a/src/mesa/drivers/dri/i915/i915_tex_layout.c +++ b/src/mesa/drivers/dri/i915/i915_tex_layout.c @@ -219,9 +219,9 @@ i915_miptree_layout_2d(struct intel_context *intel, width, height, 1); if (mt->compressed) - img_height = MAX2(1, height / 4); + img_height = ALIGN(height, 4) / 4; else - img_height = (MAX2(2, height) + 1) & ~1; + img_height = ALIGN(height, 2); mt->total_height += img_height; diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index bcf42d59969..7cd6820cd51 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -82,6 +82,7 @@ translate_texture_format(gl_format mesa_format, GLenum DepthMode) case MESA_FORMAT_RGBA_DXT5: return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); case MESA_FORMAT_S8_Z24: + case MESA_FORMAT_X8_Z24: if (DepthMode == GL_ALPHA) return (MAPSURF_32BIT | MT_32BIT_x8A24); else if (DepthMode == GL_INTENSITY) @@ -89,7 +90,8 @@ translate_texture_format(gl_format mesa_format, GLenum DepthMode) else return (MAPSURF_32BIT | MT_32BIT_x8L24); default: - fprintf(stderr, "%s: bad image format %x\n", __FUNCTION__, mesa_format); + fprintf(stderr, "%s: bad image format %s\n", __FUNCTION__, + _mesa_get_format_name(mesa_format)); abort(); return 0; } diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index 7bcb72f42d0..941c4350ddd 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -1078,6 +1078,13 @@ intelRunPipeline(struct gl_context * ctx) if (ctx->NewState) _mesa_update_state_locked(ctx); + /* We need to get this done before we start the pipeline, or a + * change in the INTEL_FALLBACK() of its intel_draw_buffers() call + * while the pipeline is running will result in mismatched swrast + * map/unmaps, and later assertion failures. + */ + intel_prepare_render(intel); + if (intel->NewGLState) { if (intel->NewGLState & _NEW_TEXTURE) { intel->vtbl.update_texture_state(intel); @@ -1092,7 +1099,9 @@ intelRunPipeline(struct gl_context * ctx) } intel_map_vertex_shader_textures(ctx); + intel->tnl_pipeline_running = true; _tnl_run_pipeline(ctx); + intel->tnl_pipeline_running = false; intel_unmap_vertex_shader_textures(ctx); _mesa_unlock_context_textures(ctx); @@ -1228,6 +1237,8 @@ intelFallback(struct intel_context *intel, GLbitfield bit, GLboolean mode) if (mode) { intel->Fallback |= bit; if (oldfallback == 0) { + assert(!intel->tnl_pipeline_running); + intel_flush(ctx); if (INTEL_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "ENTER FALLBACK %x: %s\n", @@ -1239,6 +1250,8 @@ intelFallback(struct intel_context *intel, GLbitfield bit, GLboolean mode) else { intel->Fallback &= ~bit; if (oldfallback == bit) { + assert(!intel->tnl_pipeline_running); + _swrast_flush(ctx); if (INTEL_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "LEAVE FALLBACK %s\n", getFallbackString(bit)); diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index 94b8c20b019..9c26150d241 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -43,7 +43,8 @@ prepare_cc_vp(struct brw_context *brw) struct gl_context *ctx = &brw->intel.ctx; struct brw_cc_viewport *ccv; - ccv = brw_state_batch(brw, sizeof(*ccv), 32, &brw->cc.vp_offset); + ccv = brw_state_batch(brw, AUB_TRACE_CC_VP_STATE, + sizeof(*ccv), 32, &brw->cc.vp_offset); /* _NEW_TRANSOFORM */ if (ctx->Transform.DepthClamp) { @@ -98,7 +99,8 @@ static void upload_cc_unit(struct brw_context *brw) struct gl_context *ctx = &brw->intel.ctx; struct brw_cc_unit_state *cc; - cc = brw_state_batch(brw, sizeof(*cc), 64, &brw->cc.state_offset); + cc = brw_state_batch(brw, AUB_TRACE_CC_STATE, + sizeof(*cc), 64, &brw->cc.state_offset); memset(cc, 0, sizeof(*cc)); /* _NEW_STENCIL */ diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index b9efbb74c87..31fbadf5ef2 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -40,7 +40,8 @@ brw_prepare_clip_unit(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; struct brw_clip_unit_state *clip; - clip = brw_state_batch(brw, sizeof(*clip), 32, &brw->clip.state_offset); + clip = brw_state_batch(brw, AUB_TRACE_CLIP_STATE, + sizeof(*clip), 32, &brw->clip.state_offset); memset(clip, 0, sizeof(*clip)); /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_CLIP_PROG */ diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 636821839a1..ac683bd9960 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -40,6 +40,7 @@ #include "brw_state.h" #include "intel_span.h" #include "tnl/t_pipeline.h" +#include "../glsl/ralloc.h" /*************************************** * Mesa's Driver Functions @@ -59,7 +60,7 @@ GLboolean brwCreateContext( int api, void *sharedContextPrivate) { struct dd_function_table functions; - struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context); + struct brw_context *brw = rzalloc(NULL, struct brw_context); struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; unsigned i; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index a8e2b802803..471015cf9d0 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -188,6 +188,31 @@ struct brw_state_flags { GLuint cache; }; +enum state_struct_type { + AUB_TRACE_VS_STATE = 1, + AUB_TRACE_GS_STATE = 2, + AUB_TRACE_CLIP_STATE = 3, + AUB_TRACE_SF_STATE = 4, + AUB_TRACE_WM_STATE = 5, + AUB_TRACE_CC_STATE = 6, + AUB_TRACE_CLIP_VP_STATE = 7, + AUB_TRACE_SF_VP_STATE = 8, + AUB_TRACE_CC_VP_STATE = 0x9, + AUB_TRACE_SAMPLER_STATE = 0xa, + AUB_TRACE_KERNEL_INSTRUCTIONS = 0xb, + AUB_TRACE_SCRATCH_SPACE = 0xc, + AUB_TRACE_SAMPLER_DEFAULT_COLOR = 0xd, + + AUB_TRACE_SCISSOR_STATE = 0x15, + AUB_TRACE_BLEND_STATE = 0x16, + AUB_TRACE_DEPTH_STENCIL_STATE = 0x17, + + /* Not written to .aub files the same way the structures above are. */ + AUB_TRACE_NO_TYPE = 0x100, + AUB_TRACE_BINDING_TABLE = 0x101, + AUB_TRACE_SURFACE_STATE = 0x102, + AUB_TRACE_VS_CONSTANTS = 0x103, +}; /** Subclass of Mesa vertex program */ struct brw_vertex_program { @@ -744,6 +769,14 @@ struct brw_context int num_prepare_atoms, num_emit_atoms; struct brw_tracked_state prepare_atoms[64], emit_atoms[64]; + + /* If (INTEL_DEBUG & DEBUG_BATCH) */ + struct { + uint32_t offset; + uint32_t size; + enum state_struct_type type; + } *state_batch_list; + int state_batch_count; }; diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index bbfefcd816a..e0309e71fc3 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -41,7 +41,8 @@ brw_prepare_gs_unit(struct brw_context *brw) struct intel_context *intel = &brw->intel; struct brw_gs_unit_state *gs; - gs = brw_state_batch(brw, sizeof(*gs), 32, &brw->gs.state_offset); + gs = brw_state_batch(brw, AUB_TRACE_GS_STATE, + sizeof(*gs), 32, &brw->gs.state_offset); memset(gs, 0, sizeof(*gs)); diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index eb3d103099b..9201be7caab 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -46,7 +46,8 @@ static void upload_sf_vp(struct brw_context *brw) const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); const GLfloat *v = ctx->Viewport._WindowMap.m; - sfv = brw_state_batch(brw, sizeof(*sfv), 32, &brw->sf.vp_offset); + sfv = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, + sizeof(*sfv), 32, &brw->sf.vp_offset); memset(sfv, 0, sizeof(*sfv)); if (render_to_fbo) { @@ -129,7 +130,8 @@ static void upload_sf_unit( struct brw_context *brw ) int chipset_max_threads; bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; - sf = brw_state_batch(brw, sizeof(*sf), 64, &brw->sf.state_offset); + sf = brw_state_batch(brw, AUB_TRACE_SF_STATE, + sizeof(*sf), 64, &brw->sf.state_offset); memset(sf, 0, sizeof(*sf)); diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index b384651d8d0..cede4e5c916 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -172,6 +172,7 @@ void brw_destroy_caches( struct brw_context *brw ); sizeof(*(s)), false) void *brw_state_batch(struct brw_context *brw, + enum state_struct_type type, int size, int alignment, uint32_t *out_offset); diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index 213c7a38d8c..5a983c3d847 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -32,6 +32,29 @@ #include "brw_state.h" #include "intel_batchbuffer.h" #include "main/imports.h" +#include "../glsl/ralloc.h" + +static void +brw_track_state_batch(struct brw_context *brw, + enum state_struct_type type, + uint32_t offset, + int size) +{ + struct intel_batchbuffer *batch = &brw->intel.batch; + + if (!brw->state_batch_list) { + /* Our structs are always aligned to at least 32 bytes, so + * our array doesn't need to be any larger + */ + brw->state_batch_list = ralloc_size(brw, sizeof(*brw->state_batch_list) * + batch->bo->size / 32); + } + + brw->state_batch_list[brw->state_batch_count].offset = offset; + brw->state_batch_list[brw->state_batch_count].size = size; + brw->state_batch_list[brw->state_batch_count].type = type; + brw->state_batch_count++; +} /** * Allocates a block of space in the batchbuffer for indirect state. @@ -49,6 +72,7 @@ */ void * brw_state_batch(struct brw_context *brw, + enum state_struct_type type, int size, int alignment, uint32_t *out_offset) @@ -71,6 +95,9 @@ brw_state_batch(struct brw_context *brw, batch->state_batch_offset = offset; + if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) + brw_track_state_batch(brw, type, offset, size); + *out_offset = offset; return batch->map + (offset>>2); } diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 7a3a88f04f5..b9e5cc1a534 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -31,44 +31,23 @@ #include "brw_context.h" #include "brw_defines.h" -/** - * Prints out a header, the contents, and the message associated with - * the hardware state data given. - * - * \param name Name of the state object - * \param data Pointer to the base of the state object - * \param hw_offset Hardware offset of the base of the state data. - * \param index Index of the DWORD being output. - */ static void -state_out(const char *name, void *data, uint32_t hw_offset, int index, - char *fmt, ...) -{ - va_list va; +batch_out(struct brw_context *brw, const char *name, uint32_t offset, + int index, char *fmt, ...) PRINTFLIKE(5, 6); - fprintf(stderr, "%8s: 0x%08x: 0x%08x: ", - name, hw_offset + index * 4, ((uint32_t *)data)[index]); - va_start(va, fmt); - vfprintf(stderr, fmt, va); - va_end(va); -} - -/** Generic, undecoded state buffer debug printout */ static void -state_struct_out(const char *name, drm_intel_bo *buffer, - unsigned int offset, unsigned int size) +batch_out(struct brw_context *brw, const char *name, uint32_t offset, + int index, char *fmt, ...) { - int i; - - if (buffer == NULL) - return; - - drm_intel_bo_map(buffer, GL_FALSE); - for (i = 0; i < size / 4; i++) { - state_out(name, buffer->virtual + offset, buffer->offset + offset, i, - "dword %d\n", i); - } - drm_intel_bo_unmap(buffer); + struct intel_context *intel = &brw->intel; + uint32_t *data = intel->batch.bo->virtual + offset; + va_list va; + + fprintf(stderr, "0x%08x: 0x%08x: %8s: ", + offset + index * 4, data[index], name); + va_start(va, fmt); + vfprintf(stderr, fmt, va); + va_end(va); } static const char * @@ -98,394 +77,536 @@ get_965_surface_format(unsigned int surface_format) } } -static void dump_wm_surface_state(struct brw_context *brw) +static void dump_vs_state(struct brw_context *brw, uint32_t offset) { - dri_bo *bo; - GLubyte *base; - int i; + struct intel_context *intel = &brw->intel; + const char *name = "VS_STATE"; + struct brw_vs_unit_state *vs = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + vs->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "vs5\n"); + batch_out(brw, name, offset, 6, "vs6\n"); +} - bo = brw->intel.batch.bo; - drm_intel_bo_map(bo, GL_FALSE); - base = bo->virtual; +static void dump_gs_state(struct brw_context *brw, uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "GS_STATE"; + struct brw_gs_unit_state *gs = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + gs->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "vs5\n"); + batch_out(brw, name, offset, 6, "vs6\n"); +} - for (i = 0; i < brw->wm.nr_surfaces; i++) { - unsigned int surfoff; - uint32_t *surf; - char name[20]; +static void dump_clip_state(struct brw_context *brw, uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "CLIP_STATE"; + struct brw_clip_unit_state *clip = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + clip->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "clip5\n"); + batch_out(brw, name, offset, 6, "clip6\n"); + batch_out(brw, name, offset, 7, "vp xmin %f\n", clip->viewport_xmin); + batch_out(brw, name, offset, 8, "vp xmax %f\n", clip->viewport_xmax); + batch_out(brw, name, offset, 9, "vp ymin %f\n", clip->viewport_ymin); + batch_out(brw, name, offset, 10, "vp ymax %f\n", clip->viewport_ymax); +} - if (brw->wm.surf_offset[i] == 0) { - fprintf(stderr, "WM SURF%d: NULL\n", i); - continue; - } - surfoff = bo->offset + brw->wm.surf_offset[i]; - surf = (uint32_t *)(base + brw->wm.surf_offset[i]); - - sprintf(name, "WM SURF%d", i); - state_out(name, surf, surfoff, 0, "%s %s\n", - get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), - get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT))); - state_out(name, surf, surfoff, 1, "offset\n"); - state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n", - GET_FIELD(surf[2], BRW_SURFACE_WIDTH) + 1, - GET_FIELD(surf[2], BRW_SURFACE_HEIGHT) + 1); - state_out(name, surf, surfoff, 3, "pitch %d, %s tiled\n", - GET_FIELD(surf[3], BRW_SURFACE_PITCH) + 1, - (surf[3] & BRW_SURFACE_TILED) ? - ((surf[3] & BRW_SURFACE_TILED_Y) ? "Y" : "X") : "not"); - state_out(name, surf, surfoff, 4, "mip base %d\n", - GET_FIELD(surf[4], BRW_SURFACE_MIN_LOD)); - state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n", - GET_FIELD(surf[5], BRW_SURFACE_X_OFFSET), - GET_FIELD(surf[5], BRW_SURFACE_Y_OFFSET)); - } - drm_intel_bo_unmap(bo); +static void dump_sf_state(struct brw_context *brw, uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "SF_STATE"; + struct brw_sf_unit_state *sf = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + sf->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "sf5: viewport offset\n"); + batch_out(brw, name, offset, 6, "sf6\n"); + batch_out(brw, name, offset, 7, "sf7\n"); } -static void dump_gen7_surface_state(struct brw_context *brw) +static void dump_wm_state(struct brw_context *brw, uint32_t offset) { - dri_bo *bo; - GLubyte *base; - int i; + struct intel_context *intel = &brw->intel; + const char *name = "WM_STATE"; + struct brw_wm_unit_state *wm = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "wm4\n"); + batch_out(brw, name, offset, 5, "wm5: %s%s%s%s%s%s, %d threads\n", + wm->wm5.enable_8_pix ? "8pix" : "", + wm->wm5.enable_16_pix ? "16pix" : "", + wm->wm5.program_uses_depth ? ", uses depth" : "", + wm->wm5.program_computes_depth ? ", computes depth" : "", + wm->wm5.program_uses_killpixel ? ", kills" : "", + wm->wm5.thread_dispatch_enable ? "" : ", no dispatch", + wm->wm5.max_threads + 1); + batch_out(brw, name, offset, 6, "depth offset constant %f\n", + wm->global_depth_offset_constant); + batch_out(brw, name, offset, 7, "depth offset scale %f\n", + wm->global_depth_offset_scale); + batch_out(brw, name, offset, 8, "wm8: kernel 1 (gen5+)\n"); + batch_out(brw, name, offset, 9, "wm9: kernel 2 (gen5+)\n"); + batch_out(brw, name, offset, 10, "wm10: kernel 3 (gen5+)\n"); +} - bo = brw->intel.batch.bo; - drm_intel_bo_map(bo, GL_FALSE); - base = bo->virtual; +static void dump_surface_state(struct brw_context *brw, uint32_t offset) +{ + const char *name = "SURF"; + uint32_t *surf = brw->intel.batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "%s %s\n", + get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), + get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT))); + batch_out(brw, name, offset, 1, "offset\n"); + batch_out(brw, name, offset, 2, "%dx%d size, %d mips\n", + GET_FIELD(surf[2], BRW_SURFACE_WIDTH) + 1, + GET_FIELD(surf[2], BRW_SURFACE_HEIGHT) + 1, + GET_FIELD(surf[2], BRW_SURFACE_LOD)); + batch_out(brw, name, offset, 3, "pitch %d, %s tiled\n", + GET_FIELD(surf[3], BRW_SURFACE_PITCH) + 1, + (surf[3] & BRW_SURFACE_TILED) ? + ((surf[3] & BRW_SURFACE_TILED_Y) ? "Y" : "X") : "not"); + batch_out(brw, name, offset, 4, "mip base %d\n", + GET_FIELD(surf[4], BRW_SURFACE_MIN_LOD)); + batch_out(brw, name, offset, 5, "x,y offset: %d,%d\n", + GET_FIELD(surf[5], BRW_SURFACE_X_OFFSET), + GET_FIELD(surf[5], BRW_SURFACE_Y_OFFSET)); +} - for (i = 0; i < brw->wm.nr_surfaces; i++) { - unsigned int surfoff; - struct gen7_surface_state *surf; - char name[20]; +static void dump_gen7_surface_state(struct brw_context *brw, uint32_t offset) +{ + const char *name = "SURF"; + struct gen7_surface_state *surf = brw->intel.batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "%s %s\n", + get_965_surfacetype(surf->ss0.surface_type), + get_965_surface_format(surf->ss0.surface_format)); + batch_out(brw, name, offset, 1, "offset\n"); + batch_out(brw, name, offset, 2, "%dx%d size, %d mips\n", + surf->ss2.width + 1, surf->ss2.height + 1, surf->ss5.mip_count); + batch_out(brw, name, offset, 3, "pitch %d, %stiled\n", + surf->ss3.pitch + 1, surf->ss0.tiled_surface ? "" : "not "); + batch_out(brw, name, offset, 4, "mip base %d\n", + surf->ss5.min_lod); + batch_out(brw, name, offset, 5, "x,y offset: %d,%d\n", + surf->ss5.x_offset, surf->ss5.y_offset); +} - if (brw->wm.surf_offset[i] == 0) { - fprintf(stderr, "WM SURF%d: NULL\n", i); - continue; - } - surfoff = bo->offset + brw->wm.surf_offset[i]; - surf = (struct gen7_surface_state *) (base + brw->wm.surf_offset[i]); - - sprintf(name, "WM SURF%d", i); - state_out(name, surf, surfoff, 0, "%s %s\n", - get_965_surfacetype(surf->ss0.surface_type), - get_965_surface_format(surf->ss0.surface_format)); - state_out(name, surf, surfoff, 1, "offset\n"); - state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n", - surf->ss2.width + 1, surf->ss2.height + 1, surf->ss5.mip_count); - state_out(name, surf, surfoff, 3, "pitch %d, %stiled\n", - surf->ss3.pitch + 1, surf->ss0.tiled_surface ? "" : "not "); - state_out(name, surf, surfoff, 4, "mip base %d\n", - surf->ss5.min_lod); - state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n", - surf->ss5.x_offset, surf->ss5.y_offset); +static void +dump_sdc(struct brw_context *brw, uint32_t offset) +{ + const char *name = "SDC"; + struct intel_context *intel = &brw->intel; + + if (intel->gen >= 5 && intel->gen <= 6) { + struct gen5_sampler_default_color *sdc = (intel->batch.bo->virtual + + offset); + batch_out(brw, name, offset, 0, "unorm rgba\n"); + batch_out(brw, name, offset, 1, "r %f\n", sdc->f[0]); + batch_out(brw, name, offset, 2, "b %f\n", sdc->f[1]); + batch_out(brw, name, offset, 3, "g %f\n", sdc->f[2]); + batch_out(brw, name, offset, 4, "a %f\n", sdc->f[3]); + batch_out(brw, name, offset, 5, "half float rg\n"); + batch_out(brw, name, offset, 6, "half float ba\n"); + batch_out(brw, name, offset, 7, "u16 rg\n"); + batch_out(brw, name, offset, 8, "u16 ba\n"); + batch_out(brw, name, offset, 9, "s16 rg\n"); + batch_out(brw, name, offset, 10, "s16 ba\n"); + batch_out(brw, name, offset, 11, "s8 rgba\n"); + } else { + struct brw_sampler_default_color *sdc = (intel->batch.bo->virtual + + offset); + batch_out(brw, name, offset, 0, "r %f\n", sdc->color[0]); + batch_out(brw, name, offset, 1, "g %f\n", sdc->color[1]); + batch_out(brw, name, offset, 2, "b %f\n", sdc->color[2]); + batch_out(brw, name, offset, 3, "a %f\n", sdc->color[3]); } - drm_intel_bo_unmap(bo); } -static void dump_wm_sampler_state(struct brw_context *brw) +static void dump_sampler_state(struct brw_context *brw, + uint32_t offset, uint32_t size) { struct intel_context *intel = &brw->intel; - struct gl_context *ctx = &brw->intel.ctx; int i; + struct brw_sampler_state *samp = intel->batch.bo->virtual + offset; assert(intel->gen < 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - unsigned int offset; - uint32_t sdc_offset; - struct brw_sampler_state *samp; + for (i = 0; i < size / sizeof(*samp); i++) { char name[20]; - if (!ctx->Texture.Unit[i]._ReallyEnabled) { - fprintf(stderr, "WM SAMP%d: disabled\n", i); - continue; - } - - offset = (intel->batch.bo->offset + - brw->wm.sampler_offset + - i * sizeof(struct brw_sampler_state)); - samp = (struct brw_sampler_state *)(intel->batch.bo->virtual + - brw->wm.sampler_offset + - i * sizeof(struct brw_sampler_state)); - sprintf(name, "WM SAMP%d", i); - state_out(name, samp, offset, 0, "filtering\n"); - state_out(name, samp, offset, 1, "wrapping, lod\n"); - state_out(name, samp, offset, 2, "default color pointer\n"); - state_out(name, samp, offset, 3, "chroma key, aniso\n"); - - sprintf(name, " WM SDC%d", i); - - sdc_offset = intel->batch.bo->offset + brw->wm.sdc_offset[i]; - if (intel->gen >= 5) { - struct gen5_sampler_default_color *sdc = (intel->batch.bo->virtual + - brw->wm.sdc_offset[i]); - state_out(name, sdc, sdc_offset, 0, "unorm rgba\n"); - state_out(name, sdc, sdc_offset, 1, "r %f\n", sdc->f[0]); - state_out(name, sdc, sdc_offset, 2, "b %f\n", sdc->f[1]); - state_out(name, sdc, sdc_offset, 3, "g %f\n", sdc->f[2]); - state_out(name, sdc, sdc_offset, 4, "a %f\n", sdc->f[3]); - state_out(name, sdc, sdc_offset, 5, "half float rg\n"); - state_out(name, sdc, sdc_offset, 6, "half float ba\n"); - state_out(name, sdc, sdc_offset, 7, "u16 rg\n"); - state_out(name, sdc, sdc_offset, 8, "u16 ba\n"); - state_out(name, sdc, sdc_offset, 9, "s16 rg\n"); - state_out(name, sdc, sdc_offset, 10, "s16 ba\n"); - state_out(name, sdc, sdc_offset, 11, "s8 rgba\n"); - } else { - struct brw_sampler_default_color *sdc = (intel->batch.bo->virtual + - brw->wm.sdc_offset[i]); - state_out(name, sdc, sdc_offset, 0, "r %f\n", sdc->color[0]); - state_out(name, sdc, sdc_offset, 1, "g %f\n", sdc->color[1]); - state_out(name, sdc, sdc_offset, 2, "b %f\n", sdc->color[2]); - state_out(name, sdc, sdc_offset, 3, "a %f\n", sdc->color[3]); - } + batch_out(brw, name, offset, 0, "filtering\n"); + batch_out(brw, name, offset, 1, "wrapping, lod\n"); + batch_out(brw, name, offset, 2, "default color pointer\n"); + batch_out(brw, name, offset, 3, "chroma key, aniso\n"); + + samp++; + offset += sizeof(*samp); } - drm_intel_bo_unmap(intel->batch.bo); } -static void dump_gen7_sampler_state(struct brw_context *brw) +static void dump_gen7_sampler_state(struct brw_context *brw, + uint32_t offset, uint32_t size) { struct intel_context *intel = &brw->intel; - struct gl_context *ctx = &brw->intel.ctx; + struct gen7_sampler_state *samp = intel->batch.bo->virtual + offset; int i; assert(intel->gen >= 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - unsigned int offset; - uint32_t sdc_offset; - struct gen7_sampler_state *samp; + for (i = 0; i < size / sizeof(*samp); i++) { char name[20]; - if (!ctx->Texture.Unit[i]._ReallyEnabled) { - fprintf(stderr, "WM SAMP%d: disabled\n", i); - continue; - } - - offset = (intel->batch.bo->offset + - brw->wm.sampler_offset + - i * sizeof(struct gen7_sampler_state)); - samp = (struct gen7_sampler_state *) - (intel->batch.bo->virtual + brw->wm.sampler_offset + - i * sizeof(struct gen7_sampler_state)); - sprintf(name, "WM SAMP%d", i); - state_out(name, samp, offset, 0, "filtering\n"); - state_out(name, samp, offset, 1, "wrapping, lod\n"); - state_out(name, samp, offset, 2, "default color pointer\n"); - state_out(name, samp, offset, 3, "chroma key, aniso\n"); - - sprintf(name, " WM SDC%d", i); - - sdc_offset = intel->batch.bo->offset + brw->wm.sdc_offset[i]; - struct brw_sampler_default_color *sdc = - intel->batch.bo->virtual + brw->wm.sdc_offset[i]; - state_out(name, sdc, sdc_offset, 0, "r %f\n", sdc->color[0]); - state_out(name, sdc, sdc_offset, 1, "g %f\n", sdc->color[1]); - state_out(name, sdc, sdc_offset, 2, "b %f\n", sdc->color[2]); - state_out(name, sdc, sdc_offset, 3, "a %f\n", sdc->color[3]); + batch_out(brw, name, offset, 0, "filtering\n"); + batch_out(brw, name, offset, 1, "wrapping, lod\n"); + batch_out(brw, name, offset, 2, "default color pointer\n"); + batch_out(brw, name, offset, 3, "chroma key, aniso\n"); + + samp++; + offset += sizeof(*samp); } drm_intel_bo_unmap(intel->batch.bo); } -static void dump_sf_viewport_state(struct brw_context *brw) +static void dump_sf_viewport_state(struct brw_context *brw, + uint32_t offset) { struct intel_context *intel = &brw->intel; const char *name = "SF VP"; - struct brw_sf_viewport *vp; - uint32_t vp_off; + struct brw_sf_viewport *vp = intel->batch.bo->virtual + offset; assert(intel->gen < 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - - vp = intel->batch.bo->virtual + brw->sf.vp_offset; - vp_off = intel->batch.bo->offset + brw->sf.vp_offset; + batch_out(brw, name, offset, 0, "m00 = %f\n", vp->viewport.m00); + batch_out(brw, name, offset, 1, "m11 = %f\n", vp->viewport.m11); + batch_out(brw, name, offset, 2, "m22 = %f\n", vp->viewport.m22); + batch_out(brw, name, offset, 3, "m30 = %f\n", vp->viewport.m30); + batch_out(brw, name, offset, 4, "m31 = %f\n", vp->viewport.m31); + batch_out(brw, name, offset, 5, "m32 = %f\n", vp->viewport.m32); - state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); - state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11); - state_out(name, vp, vp_off, 2, "m22 = %f\n", vp->viewport.m22); - state_out(name, vp, vp_off, 3, "m30 = %f\n", vp->viewport.m30); - state_out(name, vp, vp_off, 4, "m31 = %f\n", vp->viewport.m31); - state_out(name, vp, vp_off, 5, "m32 = %f\n", vp->viewport.m32); - - state_out(name, vp, vp_off, 6, "top left = %d,%d\n", + batch_out(brw, name, offset, 6, "top left = %d,%d\n", vp->scissor.xmin, vp->scissor.ymin); - state_out(name, vp, vp_off, 7, "bottom right = %d,%d\n", + batch_out(brw, name, offset, 7, "bottom right = %d,%d\n", vp->scissor.xmax, vp->scissor.ymax); - - drm_intel_bo_unmap(intel->batch.bo); } -static void dump_clip_viewport_state(struct brw_context *brw) +static void dump_clip_viewport_state(struct brw_context *brw, + uint32_t offset) { struct intel_context *intel = &brw->intel; const char *name = "CLIP VP"; - struct brw_clipper_viewport *vp; - uint32_t vp_off; + struct brw_clipper_viewport *vp = intel->batch.bo->virtual + offset; assert(intel->gen < 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - - vp = intel->batch.bo->virtual + brw->clip.vp_offset; - vp_off = intel->batch.bo->offset + brw->clip.vp_offset; - - state_out(name, vp, vp_off, 0, "xmin = %f\n", vp->xmin); - state_out(name, vp, vp_off, 1, "xmax = %f\n", vp->xmax); - state_out(name, vp, vp_off, 2, "ymin = %f\n", vp->ymin); - state_out(name, vp, vp_off, 3, "ymax = %f\n", vp->ymax); - drm_intel_bo_unmap(intel->batch.bo); + batch_out(brw, name, offset, 0, "xmin = %f\n", vp->xmin); + batch_out(brw, name, offset, 1, "xmax = %f\n", vp->xmax); + batch_out(brw, name, offset, 2, "ymin = %f\n", vp->ymin); + batch_out(brw, name, offset, 3, "ymax = %f\n", vp->ymax); } -static void dump_sf_clip_viewport_state(struct brw_context *brw) +static void dump_sf_clip_viewport_state(struct brw_context *brw, + uint32_t offset) { struct intel_context *intel = &brw->intel; const char *name = "SF_CLIP VP"; - struct gen7_sf_clip_viewport *vp; - uint32_t vp_off; + struct gen7_sf_clip_viewport *vp = intel->batch.bo->virtual + offset; assert(intel->gen >= 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - - vp = intel->batch.bo->virtual + brw->sf.vp_offset; - vp_off = intel->batch.bo->offset + brw->sf.vp_offset; - - state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); - state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11); - state_out(name, vp, vp_off, 2, "m22 = %f\n", vp->viewport.m22); - state_out(name, vp, vp_off, 3, "m30 = %f\n", vp->viewport.m30); - state_out(name, vp, vp_off, 4, "m31 = %f\n", vp->viewport.m31); - state_out(name, vp, vp_off, 5, "m32 = %f\n", vp->viewport.m32); - state_out(name, vp, vp_off, 6, "guardband xmin = %f\n", vp->guardband.xmin); - state_out(name, vp, vp_off, 7, "guardband xmax = %f\n", vp->guardband.xmax); - state_out(name, vp, vp_off, 8, "guardband ymin = %f\n", vp->guardband.ymin); - state_out(name, vp, vp_off, 9, "guardband ymax = %f\n", vp->guardband.ymax); - drm_intel_bo_unmap(intel->batch.bo); + batch_out(brw, name, offset, 0, "m00 = %f\n", vp->viewport.m00); + batch_out(brw, name, offset, 1, "m11 = %f\n", vp->viewport.m11); + batch_out(brw, name, offset, 2, "m22 = %f\n", vp->viewport.m22); + batch_out(brw, name, offset, 3, "m30 = %f\n", vp->viewport.m30); + batch_out(brw, name, offset, 4, "m31 = %f\n", vp->viewport.m31); + batch_out(brw, name, offset, 5, "m32 = %f\n", vp->viewport.m32); + batch_out(brw, name, offset, 6, "guardband xmin = %f\n", vp->guardband.xmin); + batch_out(brw, name, offset, 7, "guardband xmax = %f\n", vp->guardband.xmax); + batch_out(brw, name, offset, 8, "guardband ymin = %f\n", vp->guardband.ymin); + batch_out(brw, name, offset, 9, "guardband ymax = %f\n", vp->guardband.ymax); } -static void dump_cc_viewport_state(struct brw_context *brw) +static void dump_cc_viewport_state(struct brw_context *brw, uint32_t offset) { - struct intel_context *intel = &brw->intel; const char *name = "CC VP"; - struct brw_cc_viewport *vp; - uint32_t vp_off; + struct brw_cc_viewport *vp = brw->intel.batch.bo->virtual + offset; - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - - vp = intel->batch.bo->virtual + brw->cc.vp_offset; - vp_off = intel->batch.bo->offset + brw->cc.vp_offset; + batch_out(brw, name, offset, 0, "min_depth = %f\n", vp->min_depth); + batch_out(brw, name, offset, 1, "max_depth = %f\n", vp->max_depth); +} - state_out(name, vp, vp_off, 0, "min_depth = %f\n", vp->min_depth); - state_out(name, vp, vp_off, 1, "max_depth = %f\n", vp->max_depth); - drm_intel_bo_unmap(intel->batch.bo); +static void dump_depth_stencil_state(struct brw_context *brw, uint32_t offset) +{ + const char *name = "D_S"; + struct gen6_depth_stencil_state *ds = brw->intel.batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, + "stencil %sable, func %d, write %sable\n", + ds->ds0.stencil_enable ? "en" : "dis", + ds->ds0.stencil_func, + ds->ds0.stencil_write_enable ? "en" : "dis"); + batch_out(brw, name, offset, 1, + "stencil test mask 0x%x, write mask 0x%x\n", + ds->ds1.stencil_test_mask, ds->ds1.stencil_write_mask); + batch_out(brw, name, offset, 2, + "depth test %sable, func %d, write %sable\n", + ds->ds2.depth_test_enable ? "en" : "dis", + ds->ds2.depth_test_func, + ds->ds2.depth_write_enable ? "en" : "dis"); } -static void dump_depth_stencil_state(struct brw_context *brw) +static void dump_cc_state_gen4(struct brw_context *brw, uint32_t offset) { - struct intel_context *intel = &brw->intel; - const char *name = "DEPTH STENCIL"; - struct gen6_depth_stencil_state *ds; - uint32_t ds_off; - - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - - ds = intel->batch.bo->virtual + brw->cc.depth_stencil_state_offset; - ds_off = intel->batch.bo->offset + brw->cc.depth_stencil_state_offset; - - state_out(name, ds, ds_off, 0, "stencil %sable, func %d, write %sable\n", - ds->ds0.stencil_enable ? "en" : "dis", - ds->ds0.stencil_func, - ds->ds0.stencil_write_enable ? "en" : "dis"); - state_out(name, ds, ds_off, 1, "stencil test mask 0x%x, write mask 0x%x\n", - ds->ds1.stencil_test_mask, ds->ds1.stencil_write_mask); - state_out(name, ds, ds_off, 2, "depth test %sable, func %d, write %sable\n", - ds->ds2.depth_test_enable ? "en" : "dis", - ds->ds2.depth_test_func, - ds->ds2.depth_write_enable ? "en" : "dis"); - drm_intel_bo_unmap(intel->batch.bo); + const char *name = "CC"; + + batch_out(brw, name, offset, 0, "cc0\n"); + batch_out(brw, name, offset, 1, "cc1\n"); + batch_out(brw, name, offset, 2, "cc2\n"); + batch_out(brw, name, offset, 3, "cc3\n"); + batch_out(brw, name, offset, 4, "cc4: viewport offset\n"); + batch_out(brw, name, offset, 5, "cc5\n"); + batch_out(brw, name, offset, 6, "cc6\n"); + batch_out(brw, name, offset, 7, "cc7\n"); } -static void dump_cc_state(struct brw_context *brw) +static void dump_cc_state_gen6(struct brw_context *brw, uint32_t offset) { const char *name = "CC"; - struct gen6_color_calc_state *cc; - uint32_t cc_off; - dri_bo *bo = brw->intel.batch.bo; - - if (brw->cc.state_offset == 0) - return; - - drm_intel_bo_map(bo, GL_FALSE); - cc = bo->virtual + brw->cc.state_offset; - cc_off = bo->offset + brw->cc.state_offset; - - state_out(name, cc, cc_off, 0, "alpha test format %s, round disable %d, stencil ref %d," - "bf stencil ref %d\n", - cc->cc0.alpha_test_format ? "FLOAT32" : "UNORM8", - cc->cc0.round_disable, - cc->cc0.stencil_ref, - cc->cc0.bf_stencil_ref); - state_out(name, cc, cc_off, 1, "\n"); - state_out(name, cc, cc_off, 2, "constant red %f\n", cc->constant_r); - state_out(name, cc, cc_off, 3, "constant green %f\n", cc->constant_g); - state_out(name, cc, cc_off, 4, "constant blue %f\n", cc->constant_b); - state_out(name, cc, cc_off, 5, "constant alpha %f\n", cc->constant_a); - - drm_intel_bo_unmap(bo); + struct gen6_color_calc_state *cc = brw->intel.batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, + "alpha test format %s, round disable %d, stencil ref %d, " + "bf stencil ref %d\n", + cc->cc0.alpha_test_format ? "FLOAT32" : "UNORM8", + cc->cc0.round_disable, + cc->cc0.stencil_ref, + cc->cc0.bf_stencil_ref); + batch_out(brw, name, offset, 1, "\n"); + batch_out(brw, name, offset, 2, "constant red %f\n", cc->constant_r); + batch_out(brw, name, offset, 3, "constant green %f\n", cc->constant_g); + batch_out(brw, name, offset, 4, "constant blue %f\n", cc->constant_b); + batch_out(brw, name, offset, 5, "constant alpha %f\n", cc->constant_a); +} +static void dump_blend_state(struct brw_context *brw, uint32_t offset) +{ + const char *name = "BLEND"; + + batch_out(brw, name, offset, 0, "\n"); + batch_out(brw, name, offset, 1, "\n"); } -static void dump_blend_state(struct brw_context *brw) +static void +dump_scissor(struct brw_context *brw, uint32_t offset) { + const char *name = "SCISSOR"; struct intel_context *intel = &brw->intel; - const char *name = "BLEND"; - struct gen6_blend_state *blend; - uint32_t blend_off; + struct gen6_scissor_rect *scissor = intel->batch.bo->virtual + offset; - drm_intel_bo_map(intel->batch.bo, GL_FALSE); + batch_out(brw, name, offset, 0, "xmin %d, ymin %d\n", + scissor->xmin, scissor->ymin); + batch_out(brw, name, offset, 1, "xmax %d, ymax %d\n", + scissor->xmax, scissor->ymax); +} - blend = intel->batch.bo->virtual + brw->cc.blend_state_offset; - blend_off = intel->batch.bo->offset + brw->cc.blend_state_offset; +static void +dump_vs_constants(struct brw_context *brw, uint32_t offset, uint32_t size) +{ + const char *name = "VS_CONST"; + struct intel_context *intel = &brw->intel; + uint32_t *as_uint = intel->batch.bo->virtual + offset; + float *as_float = intel->batch.bo->virtual + offset; + int i; - state_out(name, blend, blend_off, 0, "\n"); - state_out(name, blend, blend_off, 1, "\n"); + for (i = 0; i < size / 4; i += 4) { + batch_out(brw, name, offset, i, "%3d: (% f % f % f % f) (0x%08x 0x%08x 0x%08x 0x%08x)\n", + i / 4, + as_float[i], as_float[i + 1], as_float[i + 2], as_float[i + 3], + as_uint[i], as_uint[i + 1], as_uint[i + 2], as_uint[i + 3]); + } +} - drm_intel_bo_unmap(intel->batch.bo); +static void dump_binding_table(struct brw_context *brw, uint32_t offset, + uint32_t size) +{ + char name[20]; + int i; + uint32_t *data = brw->intel.batch.bo->virtual + offset; + + for (i = 0; i < size / 4; i++) { + if (data[i] == 0) + continue; + sprintf(name, "BIND%d", i); + batch_out(brw, name, offset, i, "surface state address\n"); + } } -static void brw_debug_prog(struct brw_context *brw, - const char *name, uint32_t prog_offset) +static void +dump_prog_cache(struct brw_context *brw) { - unsigned int i; + struct intel_context *intel = &brw->intel; + struct brw_cache *cache = &brw->cache; + unsigned int b, i; uint32_t *data; drm_intel_bo_map(brw->cache.bo, false); - data = brw->cache.bo->virtual + prog_offset; - - for (i = 0; i < brw->cache.bo->size / 4 / 4; i++) { - fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", - name, (unsigned int)brw->cache.bo->offset + i * 4 * 4, - data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); - /* Stop at the end of the program. It'd be nice to keep track of the actual - * intended program size instead of guessing like this. - */ - if (data[i * 4 + 0] == 0 && - data[i * 4 + 1] == 0 && - data[i * 4 + 2] == 0 && - data[i * 4 + 3] == 0) - break; + for (b = 0; b < cache->size; b++) { + struct brw_cache_item *item; + + for (item = cache->items[b]; item; item = item->next) { + const char *name; + uint32_t offset = item->offset; + + data = brw->cache.bo->virtual + item->offset; + + switch (item->cache_id) { + case BRW_VS_PROG: + name = "VS kernel"; + break; + case BRW_GS_PROG: + name = "GS kernel"; + break; + case BRW_CLIP_PROG: + name = "CLIP kernel"; + break; + case BRW_SF_PROG: + name = "SF kernel"; + break; + case BRW_WM_PROG: + name = "WM kernel"; + break; + default: + name = "unknown"; + break; + } + + for (i = 0; i < item->size / 4 / 4; i++) { + fprintf(stderr, "0x%08x: %8s: 0x%08x 0x%08x 0x%08x 0x%08x ", + offset + i * 4 * 4, + name, + data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); + + brw_disasm(stderr, (void *)(data + i * 4), intel->gen); + } + } } drm_intel_bo_unmap(brw->cache.bo); } +static void +dump_state_batch(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + int i; + + for (i = 0; i < brw->state_batch_count; i++) { + uint32_t offset = brw->state_batch_list[i].offset; + uint32_t size = brw->state_batch_list[i].size; + + switch (brw->state_batch_list[i].type) { + case AUB_TRACE_VS_STATE: + dump_vs_state(brw, offset); + break; + case AUB_TRACE_GS_STATE: + dump_gs_state(brw, offset); + break; + case AUB_TRACE_CLIP_STATE: + dump_clip_state(brw, offset); + break; + case AUB_TRACE_SF_STATE: + dump_sf_state(brw, offset); + break; + case AUB_TRACE_WM_STATE: + dump_wm_state(brw, offset); + break; + case AUB_TRACE_CLIP_VP_STATE: + dump_clip_viewport_state(brw, offset); + break; + case AUB_TRACE_SF_VP_STATE: + if (intel->gen >= 7) { + dump_sf_clip_viewport_state(brw, offset); + } else { + dump_sf_viewport_state(brw, offset); + } + break; + case AUB_TRACE_CC_VP_STATE: + dump_cc_viewport_state(brw, offset); + break; + case AUB_TRACE_DEPTH_STENCIL_STATE: + dump_depth_stencil_state(brw, offset); + break; + case AUB_TRACE_CC_STATE: + if (intel->gen >= 6) + dump_cc_state_gen6(brw, offset); + else + dump_cc_state_gen4(brw, offset); + break; + case AUB_TRACE_BLEND_STATE: + dump_blend_state(brw, offset); + break; + case AUB_TRACE_BINDING_TABLE: + dump_binding_table(brw, offset, size); + break; + case AUB_TRACE_SURFACE_STATE: + if (intel->gen < 7) { + dump_surface_state(brw, offset); + } else { + dump_gen7_surface_state(brw, offset); + } + break; + case AUB_TRACE_SAMPLER_STATE: + if (intel->gen < 7) { + dump_sampler_state(brw, offset, size); + } else { + dump_gen7_sampler_state(brw, offset, size); + } + break; + case AUB_TRACE_SAMPLER_DEFAULT_COLOR: + dump_sdc(brw, offset); + break; + case AUB_TRACE_SCISSOR_STATE: + dump_scissor(brw, offset); + break; + case AUB_TRACE_VS_CONSTANTS: + dump_vs_constants(brw, offset, size); + break; + default: + break; + } + } +} /** * Print additional debug information associated with the batchbuffer @@ -501,51 +622,10 @@ void brw_debug_batch(struct intel_context *intel) { struct brw_context *brw = brw_context(&intel->ctx); - state_struct_out("WM bind", - brw->intel.batch.bo, - brw->wm.bind_bo_offset, - 4 * brw->wm.nr_surfaces); - if (intel->gen < 7) { - dump_wm_surface_state(brw); - dump_wm_sampler_state(brw); - } else { - dump_gen7_surface_state(brw); - dump_gen7_sampler_state(brw); - } - - if (intel->gen < 6) - state_struct_out("VS", intel->batch.bo, brw->vs.state_offset, - sizeof(struct brw_vs_unit_state)); - brw_debug_prog(brw, "VS prog", brw->vs.prog_offset); - - if (intel->gen < 6) - state_struct_out("GS", intel->batch.bo, brw->gs.state_offset, - sizeof(struct brw_gs_unit_state)); - if (brw->gs.prog_active) { - brw_debug_prog(brw, "GS prog", brw->gs.prog_offset); - } + drm_intel_bo_map(intel->batch.bo, false); + dump_state_batch(brw); + drm_intel_bo_unmap(intel->batch.bo); - if (intel->gen < 6) { - state_struct_out("SF", intel->batch.bo, brw->sf.state_offset, - sizeof(struct brw_sf_unit_state)); - brw_debug_prog(brw, "SF prog", brw->sf.prog_offset); - } - if (intel->gen >= 7) - dump_sf_clip_viewport_state(brw); - else - dump_sf_viewport_state(brw); - if (intel->gen == 6) - dump_clip_viewport_state(brw); - - if (intel->gen < 6) - state_struct_out("WM", intel->batch.bo, brw->wm.state_offset, - sizeof(struct brw_wm_unit_state)); - brw_debug_prog(brw, "WM prog", brw->wm.prog_offset); - - if (intel->gen >= 6) { - dump_cc_viewport_state(brw); - dump_depth_stencil_state(brw); - dump_cc_state(brw); - dump_blend_state(brw); - } + if (0) + dump_prog_cache(brw); } diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index d5010a21e80..fc4373ab311 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -43,10 +43,12 @@ brw_prepare_vs_unit(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; struct brw_vs_unit_state *vs; - vs = brw_state_batch(brw, sizeof(*vs), 32, &brw->vs.state_offset); + vs = brw_state_batch(brw, AUB_TRACE_VS_STATE, + sizeof(*vs), 32, &brw->vs.state_offset); memset(vs, 0, sizeof(*vs)); /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_VS_PROG */ + vs->thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1; vs->thread0.kernel_start_pointer = brw_program_reloc(brw, brw->vs.state_offset + @@ -54,7 +56,6 @@ brw_prepare_vs_unit(struct brw_context *brw) brw->vs.prog_offset + (vs->thread0.grf_reg_count << 1)) >> 6; - vs->thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1; vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; /* Choosing multiple program flow means that we may get 2-vertex threads, * which will have the channel mask for dwords 4-7 enabled in the thread, diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index 611f6333689..f9ee4d112a5 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -182,7 +182,8 @@ static void upload_vs_surfaces(struct brw_context *brw) /* Might want to calculate nr_surfaces first, to avoid taking up so much * space for the binding table. (once we have vs samplers) */ - bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_VS_MAX_SURF, + bind = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(uint32_t) * BRW_VS_MAX_SURF, 32, &brw->vs.bind_bo_offset); for (i = 0; i < BRW_VS_MAX_SURF; i++) { diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 8612e743265..6aeeda6e0fa 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -123,6 +123,8 @@ static void brw_new_batch( struct intel_context *intel ) */ intel->batch.need_workaround_flush = true; + brw->state_batch_count = 0; + brw->vb.nr_current_buffers = 0; /* Mark that the current program cache BO has been used by the GPU. diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 59dcda7b414..7cd3edad235 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -563,13 +563,14 @@ static void precalc_dst( struct brw_wm_compile *c, struct prog_src_register src0 = inst->SrcReg[0]; struct prog_src_register src1 = inst->SrcReg[1]; struct prog_dst_register dst = inst->DstReg; - + struct prog_dst_register temp = get_temp(c); + if (dst.WriteMask & WRITEMASK_Y) { /* dst.y = mul src0.y, src1.y */ emit_op(c, OPCODE_MUL, - dst_mask(dst, WRITEMASK_Y), + dst_mask(temp, WRITEMASK_Y), inst->SaturateMode, src0, src1, @@ -584,7 +585,7 @@ static void precalc_dst( struct brw_wm_compile *c, */ swz = emit_op(c, OPCODE_SWZ, - dst_mask(dst, WRITEMASK_XZ), + dst_mask(temp, WRITEMASK_XZ), inst->SaturateMode, src_swizzle(src0, SWIZZLE_ONE, z, z, z), src_undef(), @@ -597,12 +598,26 @@ static void precalc_dst( struct brw_wm_compile *c, */ emit_op(c, OPCODE_MOV, - dst_mask(dst, WRITEMASK_W), + dst_mask(temp, WRITEMASK_W), inst->SaturateMode, src1, src_undef(), src_undef()); } + + /* This will get optimized out in general, but it ensures that we + * don't overwrite src operands in our channel-wise splitting + * above. See piglit fp-dst-aliasing-[12]. + */ + emit_op(c, + OPCODE_MOV, + dst, + 0, + src_reg_from_dst(temp), + src_undef(), + src_undef()); + + release_temp(c, temp); } @@ -611,7 +626,17 @@ static void precalc_lit( struct brw_wm_compile *c, { struct prog_src_register src0 = inst->SrcReg[0]; struct prog_dst_register dst = inst->DstReg; - + + if (dst.WriteMask & WRITEMASK_YZ) { + emit_op(c, + OPCODE_LIT, + dst_mask(dst, WRITEMASK_YZ), + inst->SaturateMode, + src0, + src_undef(), + src_undef()); + } + if (dst.WriteMask & WRITEMASK_XW) { struct prog_instruction *swz; @@ -627,16 +652,6 @@ static void precalc_lit( struct brw_wm_compile *c, /* Avoid letting the negation flag of src0 affect our 1 constant. */ swz->SrcReg[0].Negate = NEGATE_NONE; } - - if (dst.WriteMask & WRITEMASK_YZ) { - emit_op(c, - OPCODE_LIT, - dst_mask(dst, WRITEMASK_YZ), - inst->SaturateMode, - src0, - src_undef(), - src_undef()); - } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 5de39aa4575..98146136703 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -108,7 +108,8 @@ upload_default_color(struct brw_context *brw, struct gl_sampler_object *sampler, if (intel->gen == 5 || intel->gen == 6) { struct gen5_sampler_default_color *sdc; - sdc = brw_state_batch(brw, sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); + sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR, + sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); memset(sdc, 0, sizeof(*sdc)); @@ -144,7 +145,8 @@ upload_default_color(struct brw_context *brw, struct gl_sampler_object *sampler, } else { struct brw_sampler_default_color *sdc; - sdc = brw_state_batch(brw, sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); + sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR, + sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); COPY_4V(sdc->color, color); } @@ -326,7 +328,8 @@ prepare_wm_samplers(struct brw_context *brw) if (brw->wm.sampler_count == 0) return; - samplers = brw_state_batch(brw, brw->wm.sampler_count * sizeof(*samplers), + samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, + brw->wm.sampler_count * sizeof(*samplers), 32, &brw->wm.sampler_offset); memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers)); diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 506e2bdff5b..c820ce48c29 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -78,7 +78,8 @@ brw_prepare_wm_unit(struct brw_context *brw) const struct gl_fragment_program *fp = brw->fragment_program; struct brw_wm_unit_state *wm; - wm = brw_state_batch(brw, sizeof(*wm), 32, &brw->wm.state_offset); + wm = brw_state_batch(brw, AUB_TRACE_WM_STATE, + sizeof(*wm), 32, &brw->wm.state_offset); memset(wm, 0, sizeof(*wm)); if (brw->wm.prog_data->prog_offset_16) { diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 89fea9cc952..fb4fb146f8d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -226,7 +226,8 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit ) const GLuint surf_index = SURF_INDEX_TEXTURE(unit); uint32_t *surf; - surf = brw_state_batch(brw, 6 * 4, 32, &brw->wm.surf_offset[surf_index]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + 6 * 4, 32, &brw->wm.surf_offset[surf_index]); surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT | BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | @@ -272,7 +273,8 @@ brw_create_constant_surface(struct brw_context *brw, const GLint w = width - 1; uint32_t *surf; - surf = brw_state_batch(brw, 6 * 4, 32, out_offset); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + 6 * 4, 32, out_offset); surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | @@ -404,7 +406,8 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit) struct intel_context *intel = &brw->intel; uint32_t *surf; - surf = brw_state_batch(brw, 6 * 4, 32, &brw->wm.surf_offset[unit]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + 6 * 4, 32, &brw->wm.surf_offset[unit]); surf[0] = (BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT | BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT); @@ -439,7 +442,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, uint32_t tile_x, tile_y; uint32_t format = 0; - surf = brw_state_batch(brw, 6 * 4, 32, &brw->wm.surf_offset[unit]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + 6 * 4, 32, &brw->wm.surf_offset[unit]); switch (irb->Base.Format) { case MESA_FORMAT_XRGB8888: @@ -637,7 +641,8 @@ brw_wm_upload_binding_table(struct brw_context *brw) /* Might want to calculate nr_surfaces first, to avoid taking up so much * space for the binding table. */ - bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_WM_MAX_SURF, + bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, + sizeof(uint32_t) * BRW_WM_MAX_SURF, 32, &brw->wm.bind_bo_offset); for (i = 0; i < BRW_WM_MAX_SURF; i++) { diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c index 294d5a5e644..41d13ad2bf4 100644 --- a/src/mesa/drivers/dri/i965/gen6_cc.c +++ b/src/mesa/drivers/dri/i965/gen6_cc.c @@ -51,7 +51,8 @@ prepare_blend_state(struct brw_context *brw) nr_draw_buffers = 1; size = sizeof(*blend) * nr_draw_buffers; - blend = brw_state_batch(brw, size, 64, &brw->cc.blend_state_offset); + blend = brw_state_batch(brw, AUB_TRACE_BLEND_STATE, + size, 64, &brw->cc.blend_state_offset); memset(blend, 0, size); @@ -139,7 +140,8 @@ gen6_prepare_color_calc_state(struct brw_context *brw) struct gl_context *ctx = &brw->intel.ctx; struct gen6_color_calc_state *cc; - cc = brw_state_batch(brw, sizeof(*cc), 64, &brw->cc.state_offset); + cc = brw_state_batch(brw, AUB_TRACE_CC_STATE, + sizeof(*cc), 64, &brw->cc.state_offset); memset(cc, 0, sizeof(*cc)); /* _NEW_COLOR */ diff --git a/src/mesa/drivers/dri/i965/gen6_depthstencil.c b/src/mesa/drivers/dri/i965/gen6_depthstencil.c index 775e1ce2c9c..5d14147db3d 100644 --- a/src/mesa/drivers/dri/i965/gen6_depthstencil.c +++ b/src/mesa/drivers/dri/i965/gen6_depthstencil.c @@ -34,7 +34,8 @@ gen6_prepare_depth_stencil_state(struct brw_context *brw) struct gl_context *ctx = &brw->intel.ctx; struct gen6_depth_stencil_state *ds; - ds = brw_state_batch(brw, sizeof(*ds), 64, + ds = brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE, + sizeof(*ds), 64, &brw->cc.depth_stencil_state_offset); memset(ds, 0, sizeof(*ds)); diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c index 7492e508864..dc73b10f4cd 100644 --- a/src/mesa/drivers/dri/i965/gen6_scissor_state.c +++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c @@ -39,7 +39,8 @@ gen6_upload_scissor_state(struct brw_context *brw) struct gen6_scissor_rect *scissor; uint32_t scissor_state_offset; - scissor = brw_state_batch(brw, sizeof(*scissor), 32, &scissor_state_offset); + scissor = brw_state_batch(brw, AUB_TRACE_SCISSOR_STATE, + sizeof(*scissor), 32, &scissor_state_offset); /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT */ diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c index c6c55c926c7..a4bfa54837d 100644 --- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c @@ -43,7 +43,8 @@ prepare_clip_vp(struct brw_context *brw) { struct brw_clipper_viewport *vp; - vp = brw_state_batch(brw, sizeof(*vp), 32, &brw->clip.vp_offset); + vp = brw_state_batch(brw, AUB_TRACE_CLIP_VP_STATE, + sizeof(*vp), 32, &brw->clip.vp_offset); vp->xmin = -1.0; vp->xmax = 1.0; @@ -72,7 +73,8 @@ prepare_sf_vp(struct brw_context *brw) const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); const GLfloat *v = ctx->Viewport._WindowMap.m; - sfv = brw_state_batch(brw, sizeof(*sfv), 32, &brw->sf.vp_offset); + sfv = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, + sizeof(*sfv), 32, &brw->sf.vp_offset); memset(sfv, 0, sizeof(*sfv)); /* _NEW_BUFFERS */ diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index 022e23e12b0..fb4cdbaadf9 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -60,7 +60,7 @@ gen6_prepare_vs_push_constants(struct brw_context *brw) float *param; int i; - param = brw_state_batch(brw, + param = brw_state_batch(brw, AUB_TRACE_VS_CONSTANTS, (MAX_CLIP_PLANES + nr_params) * 4 * sizeof(float), 32, &brw->vs.push_const_offset); diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 9ef6133e2b9..185da9c355f 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -54,7 +54,7 @@ gen6_prepare_wm_push_constants(struct brw_context *brw) float *constants; unsigned int i; - constants = brw_state_batch(brw, + constants = brw_state_batch(brw, AUB_TRACE_NO_TYPE, brw->wm.prog_data->nr_params * sizeof(float), 32, &brw->wm.push_const_offset); diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c index 95f6fbf7414..e787c21f4d1 100644 --- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c @@ -183,7 +183,8 @@ gen7_prepare_samplers(struct brw_context *brw) if (brw->wm.sampler_count == 0) return; - samplers = brw_state_batch(brw, brw->wm.sampler_count * sizeof(*samplers), + samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, + brw->wm.sampler_count * sizeof(*samplers), 32, &brw->wm.sampler_offset); memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers)); diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index 99efe96a1fa..0f97cea652d 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -39,11 +39,12 @@ upload_sbe_state(struct brw_context *brw) uint32_t num_outputs = brw_count_bits(brw->fragment_program->Base.InputsRead); uint32_t dw1, dw10, dw11; int i; - int attr = 0; + int attr = 0, input_index = 0; /* _NEW_TRANSFORM */ int urb_start = ctx->Transform.ClipPlanesEnabled ? 2 : 1; /* _NEW_LIGHT */ int two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); + uint16_t attr_overrides[FRAG_ATTRIB_MAX]; /* FINISHME: Attribute Swizzle Control Mode? */ dw1 = @@ -57,12 +58,6 @@ upload_sbe_state(struct brw_context *brw) dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT; dw10 = 0; - if (ctx->Point.PointSprite) { - for (i = 0; i < 8; i++) { - if (ctx->Point.CoordReplace[i]) - dw10 |= (1 << i); - } - } /* _NEW_LIGHT (flat shading) */ dw11 = 0; @@ -71,30 +66,43 @@ upload_sbe_state(struct brw_context *brw) ((brw->fragment_program->Base.InputsRead & FRAG_BIT_WPOS) ? 0 : 1)); } + /* Create the mapping from the FS inputs we produce to the VS outputs + * they source from. + */ + for (; attr < FRAG_ATTRIB_MAX; attr++) { + if (!(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr))) + continue; + + if (ctx->Point.PointSprite && + attr >= FRAG_ATTRIB_TEX0 && attr <= FRAG_ATTRIB_TEX7 && + ctx->Point.CoordReplace[attr - FRAG_ATTRIB_TEX0]) { + dw10 |= (1 << input_index); + } + + if (attr == FRAG_ATTRIB_PNTC) + dw10 |= (1 << input_index); + + /* The hardware can only do the overrides on 16 overrides at a + * time, and the other up to 16 have to be lined up so that the + * input index = the output index. We'll need to do some + * tweaking to make sure that's the case. + */ + assert(input_index < 16 || attr == input_index); + + attr_overrides[input_index++] = get_attr_override(brw, attr, + two_side_color); + } + + for (; attr < FRAG_ATTRIB_MAX; attr++) + attr_overrides[input_index++] = 0; + BEGIN_BATCH(14); OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2)); OUT_BATCH(dw1); /* Output dwords 2 through 9 */ for (i = 0; i < 8; i++) { - uint32_t attr_overrides = 0; - - for (; attr < 64; attr++) { - if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { - attr_overrides |= get_attr_override(brw, attr, two_side_color); - attr++; - break; - } - } - - for (; attr < 64; attr++) { - if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { - attr_overrides |= get_attr_override(brw, attr, two_side_color) << 16; - attr++; - break; - } - } - OUT_BATCH(attr_overrides); + OUT_BATCH(attr_overrides[i * 2] | attr_overrides[i * 2 + 1] << 16); } OUT_BATCH(dw10); /* point sprite texcoord bitmask */ diff --git a/src/mesa/drivers/dri/i965/gen7_viewport_state.c b/src/mesa/drivers/dri/i965/gen7_viewport_state.c index 838ad3a3948..e9aacd56317 100644 --- a/src/mesa/drivers/dri/i965/gen7_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen7_viewport_state.c @@ -36,7 +36,8 @@ prepare_sf_clip_viewport(struct brw_context *brw) const GLfloat *v = ctx->Viewport._WindowMap.m; struct gen7_sf_clip_viewport *vp; - vp = brw_state_batch(brw, sizeof(vp), 64, &brw->sf.vp_offset); + vp = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, + sizeof(vp), 64, &brw->sf.vp_offset); /* Also assign to clip.vp_offset in case something uses it. */ brw->clip.vp_offset = brw->sf.vp_offset; diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 17f75354f1d..a102ca772b3 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -51,7 +51,7 @@ gen7_prepare_wm_constants(struct brw_context *brw) float *constants; unsigned int i; - constants = brw_state_batch(brw, + constants = brw_state_batch(brw, AUB_TRACE_NO_TYPE, brw->wm.prog_data->nr_params * sizeof(float), 32, &brw->wm.push_const_offset); @@ -138,11 +138,9 @@ upload_wm_state(struct brw_context *brw) const struct brw_tracked_state gen7_wm_state = { .dirty = { - .mesa = (_NEW_LINE | _NEW_POLYGON | _NEW_POLYGONSTIPPLE | + .mesa = (_NEW_LINE | _NEW_POLYGON | _NEW_COLOR | _NEW_BUFFERS), - .brw = (BRW_NEW_CURBE_OFFSETS | - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_NR_WM_SURFACES | + .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_URB_FENCE | BRW_NEW_BATCH), .cache = 0, @@ -240,10 +238,7 @@ upload_ps_state(struct brw_context *brw) const struct brw_tracked_state gen7_ps_state = { .dirty = { - .mesa = (_NEW_LINE | - _NEW_POLYGON | - _NEW_POLYGONSTIPPLE | - _NEW_PROGRAM_CONSTANTS), + .mesa = _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_CURBE_OFFSETS | BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_NR_WM_SURFACES | diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index 9994b67bfc5..4add1a69f02 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -65,8 +65,8 @@ gen7_update_texture_surface(struct gl_context *ctx, GLuint unit) const GLuint surf_index = SURF_INDEX_TEXTURE(unit); struct gen7_surface_state *surf; - surf = brw_state_batch(brw, sizeof(*surf), 32, - &brw->wm.surf_offset[surf_index]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(*surf), 32, &brw->wm.surf_offset[surf_index]); memset(surf, 0, sizeof(*surf)); surf->ss0.surface_type = translate_tex_target(tObj->Target); @@ -135,7 +135,8 @@ gen7_create_constant_surface(struct brw_context *brw, const GLint w = width - 1; struct gen7_surface_state *surf; - surf = brw_state_batch(brw, sizeof(*surf), 32, out_offset); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(*surf), 32, out_offset); memset(surf, 0, sizeof(*surf)); surf->ss0.surface_type = BRW_SURFACE_BUFFER; @@ -210,8 +211,8 @@ gen7_update_null_renderbuffer_surface(struct brw_context *brw, unsigned unit) { struct gen7_surface_state *surf; - surf = brw_state_batch(brw, sizeof(*surf), 32, - &brw->wm.surf_offset[unit]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(*surf), 32, &brw->wm.surf_offset[unit]); memset(surf, 0, sizeof(*surf)); surf->ss0.surface_type = BRW_SURFACE_NULL; @@ -235,8 +236,8 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, struct gen7_surface_state *surf; uint32_t tile_x, tile_y; - surf = brw_state_batch(brw, sizeof(*surf), 32, - &brw->wm.surf_offset[unit]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(*surf), 32, &brw->wm.surf_offset[unit]); memset(surf, 0, sizeof(*surf)); switch (irb->Base.Format) { diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 292b7b034ee..2ba13632569 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -56,7 +56,7 @@ #include "drirenderbuffer.h" #include "utils.h" - +#include "../glsl/ralloc.h" #ifndef INTEL_DEBUG int INTEL_DEBUG = (0); @@ -924,7 +924,7 @@ intelDestroyContext(__DRIcontext * driContextPriv) _math_matrix_dtr(&intel->ViewportMatrix); - FREE(intel); + ralloc_free(intel); driContextPriv->driverPrivate = NULL; } } diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 148fb0c2c9a..1727badb704 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -199,6 +199,7 @@ struct intel_context drm_intel_bo *first_post_swapbuffers_batch; GLboolean need_throttle; GLboolean no_batch_wrap; + bool tnl_pipeline_running; /**< Set while i915's _tnl_run_pipeline. */ struct { diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index 6cf37c4c40c..4c4945c7941 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -271,7 +271,8 @@ intel_region_reference(struct intel_region **dst, struct intel_region *src) if (*dst) intel_region_release(dst); - src->refcount++; + if (src) + src->refcount++; *dst = src; } } diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index eda07a43dee..1a3643da593 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -55,15 +55,11 @@ get_teximage_readbuffer(struct intel_context *intel, GLenum internalFormat) DBG("%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(internalFormat)); - switch (internalFormat) { - case GL_DEPTH_COMPONENT: - case GL_DEPTH_COMPONENT16: - case GL_DEPTH24_STENCIL8_EXT: - case GL_DEPTH_STENCIL_EXT: + if (_mesa_is_depth_format(internalFormat) || + _mesa_is_depthstencil_format(internalFormat)) return intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH); - default: - return intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer); - } + + return intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer); } diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 269faefa1c0..1f8b885bbec 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -120,25 +120,6 @@ intel_miptree_create_for_teximage(struct intel_context *intel, expect_accelerated_upload); } - - - -static GLuint -target_to_face(GLenum target) -{ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: - return ((GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X); - default: - return 0; - } -} - /* There are actually quite a few combinations this will work for, * more than what I've listed here. */ @@ -426,7 +407,7 @@ intelTexImage(struct gl_context * ctx, DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__, _mesa_lookup_enum_by_nr(target), level, width, height, depth, border); - intelImage->face = target_to_face(target); + intelImage->face = _mesa_tex_target_to_face(target); intelImage->level = level; if (_mesa_is_format_compressed(texImage->TexFormat)) { @@ -835,7 +816,7 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, rb->region->width, rb->region->height, 1, 0, internalFormat, texFormat); - intelImage->face = target_to_face(target); + intelImage->face = _mesa_tex_target_to_face(target); intelImage->level = level; texImage->RowStride = rb->region->pitch; intel_miptree_reference(&intelImage->mt, intelObj->mt); @@ -893,7 +874,7 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target, image->region->width, image->region->height, 1, 0, image->internal_format, image->format); - intelImage->face = target_to_face(target); + intelImage->face = _mesa_tex_target_to_face(target); intelImage->level = 0; texImage->RowStride = image->region->pitch; intel_miptree_reference(&intelImage->mt, intelObj->mt); diff --git a/src/mesa/drivers/dri/nouveau/nv10_render.c b/src/mesa/drivers/dri/nouveau/nv10_render.c index 20fb4478426..6134650346d 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_render.c +++ b/src/mesa/drivers/dri/nouveau/nv10_render.c @@ -99,7 +99,7 @@ get_hw_format(int type) case GL_UNSIGNED_SHORT: return NV10_3D_VTXBUF_FMT_TYPE_V16_SNORM; case GL_UNSIGNED_BYTE: - return NV10_3D_VTXBUF_FMT_TYPE_B8G8R8A8_UNORM; + return NV10_3D_VTXBUF_FMT_TYPE_U8_UNORM; default: assert(0); } diff --git a/src/mesa/drivers/x11/fakeglx.c b/src/mesa/drivers/x11/fakeglx.c index 48657b44be1..1f5fc33d775 100644 --- a/src/mesa/drivers/x11/fakeglx.c +++ b/src/mesa/drivers/x11/fakeglx.c @@ -1097,12 +1097,16 @@ choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig ) parselist++; break; case GLX_FBCONFIG_ID: + case GLX_VISUAL_ID: if (!fbConfig) return NULL; parselist++; desiredVisualID = *parselist++; break; case GLX_X_RENDERABLE: + case GLX_MAX_PBUFFER_WIDTH: + case GLX_MAX_PBUFFER_HEIGHT: + case GLX_MAX_PBUFFER_PIXELS: if (!fbConfig) return NULL; parselist += 2; diff --git a/src/mesa/main/depthstencil.c b/src/mesa/main/depthstencil.c index ab62c97fe5a..40d6c9612a2 100644 --- a/src/mesa/main/depthstencil.c +++ b/src/mesa/main/depthstencil.c @@ -63,7 +63,8 @@ static void delete_wrapper(struct gl_renderbuffer *rb) { ASSERT(rb->Format == MESA_FORMAT_S8 || - rb->Format == MESA_FORMAT_X8_Z24); + rb->Format == MESA_FORMAT_X8_Z24 || + rb->Format == MESA_FORMAT_Z32_FLOAT); _mesa_reference_renderbuffer(&rb->Wrapped, NULL); free(rb); } @@ -393,6 +394,217 @@ _mesa_new_z24_renderbuffer_wrapper(struct gl_context *ctx, } +static void +get_row_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, GLuint count, + GLint x, GLint y, void *values) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + GLfloat temp[MAX_WIDTH*2]; + GLfloat *dst = (GLfloat *) values; + const GLfloat *src = (const GLfloat *) dsrb->GetPointer(ctx, dsrb, x, y); + GLuint i; + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + if (!src) { + dsrb->GetRow(ctx, dsrb, count, x, y, temp); + src = temp; + } + for (i = 0; i < count; i++) { + dst[i] = src[i*2]; + } +} + +static void +get_values_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, GLuint count, + const GLint x[], const GLint y[], void *values) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + GLfloat temp[MAX_WIDTH*2]; + GLfloat *dst = (GLfloat *) values; + GLuint i; + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + ASSERT(count <= MAX_WIDTH); + /* don't bother trying direct access */ + dsrb->GetValues(ctx, dsrb, count, x, y, temp); + for (i = 0; i < count; i++) { + dst[i] = temp[i*2]; + } +} + +static void +put_row_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, GLuint count, + GLint x, GLint y, const void *values, const GLubyte *mask) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + const GLfloat *src = (const GLfloat *) values; + GLfloat *dst = (GLfloat *) dsrb->GetPointer(ctx, dsrb, x, y); + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + if (dst) { + /* direct access */ + GLuint i; + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + dst[i*2] = src[i]; + } + } + } + else { + /* get, modify, put */ + GLfloat temp[MAX_WIDTH*2]; + GLuint i; + dsrb->GetRow(ctx, dsrb, count, x, y, temp); + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2] = src[i]; + } + } + dsrb->PutRow(ctx, dsrb, count, x, y, temp, mask); + } +} + +static void +put_mono_row_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, GLuint count, + GLint x, GLint y, const void *value, const GLubyte *mask) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + GLfloat *dst = (GLfloat *) dsrb->GetPointer(ctx, dsrb, x, y); + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + if (dst) { + /* direct access */ + GLuint i; + const GLfloat val = *(GLfloat*)value; + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + dst[i*2] = val; + } + } + } + else { + /* get, modify, put */ + GLfloat temp[MAX_WIDTH*2]; + GLuint i; + const GLfloat val = *(GLfloat *)value; + dsrb->GetRow(ctx, dsrb, count, x, y, temp); + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2] = val; + } + } + dsrb->PutRow(ctx, dsrb, count, x, y, temp, mask); + } +} + +static void +put_values_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, GLuint count, + const GLint x[], const GLint y[], + const void *values, const GLubyte *mask) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + const GLfloat *src = (const GLfloat *) values; + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + if (dsrb->GetPointer(ctx, dsrb, 0, 0)) { + /* direct access */ + GLuint i; + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + GLfloat *dst = (GLfloat *) dsrb->GetPointer(ctx, dsrb, x[i], y[i]); + *dst = src[i]; + } + } + } + else { + /* get, modify, put */ + GLfloat temp[MAX_WIDTH*2]; + GLuint i; + dsrb->GetValues(ctx, dsrb, count, x, y, temp); + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2] = src[i]; + } + } + dsrb->PutValues(ctx, dsrb, count, x, y, temp, mask); + } +} + +static void +put_mono_values_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, + GLuint count, const GLint x[], const GLint y[], + const void *value, const GLubyte *mask) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + GLfloat temp[MAX_WIDTH*2]; + GLuint i; + const GLfloat val = *(GLfloat *)value; + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + /* get, modify, put */ + dsrb->GetValues(ctx, dsrb, count, x, y, temp); + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2] = val; + } + } + dsrb->PutValues(ctx, dsrb, count, x, y, temp, mask); +} + + +/** + * Wrap the given GL_DEPTH_STENCIL renderbuffer so that it acts like + * a depth renderbuffer. + * \return new depth renderbuffer + */ +struct gl_renderbuffer * +_mesa_new_z32f_renderbuffer_wrapper(struct gl_context *ctx, + struct gl_renderbuffer *dsrb) +{ + struct gl_renderbuffer *z32frb; + + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + + z32frb = ctx->Driver.NewRenderbuffer(ctx, 0); + if (!z32frb) + return NULL; + + /* NOTE: need to do manual refcounting here */ + z32frb->Wrapped = dsrb; + dsrb->RefCount++; + + z32frb->Name = dsrb->Name; + z32frb->RefCount = 0; + z32frb->Width = dsrb->Width; + z32frb->Height = dsrb->Height; + z32frb->RowStride = dsrb->RowStride; + z32frb->InternalFormat = GL_DEPTH_COMPONENT32F; + z32frb->Format = MESA_FORMAT_Z32_FLOAT; + z32frb->_BaseFormat = GL_DEPTH_COMPONENT; + z32frb->DataType = GL_FLOAT; + z32frb->Data = NULL; + z32frb->Delete = delete_wrapper; + z32frb->AllocStorage = alloc_wrapper_storage; + z32frb->GetPointer = nop_get_pointer; + z32frb->GetRow = get_row_z32f; + z32frb->GetValues = get_values_z32f; + z32frb->PutRow = put_row_z32f; + z32frb->PutRowRGB = NULL; + z32frb->PutMonoRow = put_mono_row_z32f; + z32frb->PutValues = put_values_z32f; + z32frb->PutMonoValues = put_mono_values_z32f; + + return z32frb; +} + + /*====================================================================== * Stencil wrapper around depth/stencil renderbuffer */ @@ -402,16 +614,22 @@ get_row_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count, GLint x, GLint y, void *values) { struct gl_renderbuffer *dsrb = s8rb->Wrapped; - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; GLubyte *dst = (GLubyte *) values; const GLuint *src = (const GLuint *) dsrb->GetPointer(ctx, dsrb, x, y); ASSERT(s8rb->DataType == GL_UNSIGNED_BYTE); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); if (!src) { dsrb->GetRow(ctx, dsrb, count, x, y, temp); src = temp; } - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + dst[i] = src[i*2+1] & 0xff; + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { dst[i] = src[i] & 0xff; } @@ -429,14 +647,20 @@ get_values_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count const GLint x[], const GLint y[], void *values) { struct gl_renderbuffer *dsrb = s8rb->Wrapped; - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; GLubyte *dst = (GLubyte *) values; ASSERT(s8rb->DataType == GL_UNSIGNED_BYTE); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); ASSERT(count <= MAX_WIDTH); /* don't bother trying direct access */ dsrb->GetValues(ctx, dsrb, count, x, y, temp); - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + dst[i] = temp[i*2+1] & 0xff; + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { dst[i] = temp[i] & 0xff; } @@ -457,11 +681,19 @@ put_row_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count, const GLubyte *src = (const GLubyte *) values; GLuint *dst = (GLuint *) dsrb->GetPointer(ctx, dsrb, x, y); ASSERT(s8rb->DataType == GL_UNSIGNED_BYTE); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); if (dst) { /* direct access */ GLuint i; - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + dst[i*2+1] = src[i]; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { dst[i] = (dst[i] & 0xffffff00) | src[i]; @@ -479,9 +711,16 @@ put_row_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count, } else { /* get, modify, put */ - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; dsrb->GetRow(ctx, dsrb, count, x, y, temp); - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2+1] = src[i]; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { temp[i] = (temp[i] & 0xffffff00) | src[i]; @@ -508,11 +747,19 @@ put_mono_row_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint cou const GLubyte val = *((GLubyte *) value); GLuint *dst = (GLuint *) dsrb->GetPointer(ctx, dsrb, x, y); ASSERT(s8rb->DataType == GL_UNSIGNED_BYTE); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); if (dst) { /* direct access */ GLuint i; - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + dst[i*2+1] = val; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { dst[i] = (dst[i] & 0xffffff00) | val; @@ -530,9 +777,16 @@ put_mono_row_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint cou } else { /* get, modify, put */ - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; dsrb->GetRow(ctx, dsrb, count, x, y, temp); - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2+1] = val; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { temp[i] = (temp[i] & 0xffffff00) | val; @@ -559,11 +813,20 @@ put_values_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count struct gl_renderbuffer *dsrb = s8rb->Wrapped; const GLubyte *src = (const GLubyte *) values; ASSERT(s8rb->DataType == GL_UNSIGNED_BYTE); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); if (dsrb->GetPointer(ctx, dsrb, 0, 0)) { /* direct access */ GLuint i; - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + GLuint *dst = (GLuint *) dsrb->GetPointer(ctx, dsrb, x[i], y[i]); + dst[1] = src[i]; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { GLuint *dst = (GLuint *) dsrb->GetPointer(ctx, dsrb, x[i], y[i]); @@ -583,9 +846,16 @@ put_values_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count } else { /* get, modify, put */ - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; dsrb->GetValues(ctx, dsrb, count, x, y, temp); - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2+1] = src[i]; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { temp[i] = (temp[i] & 0xffffff00) | src[i]; @@ -610,11 +880,18 @@ put_mono_values_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint const void *value, const GLubyte *mask) { struct gl_renderbuffer *dsrb = s8rb->Wrapped; - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; const GLubyte val = *((GLubyte *) value); /* get, modify, put */ dsrb->GetValues(ctx, dsrb, count, x, y, temp); - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2+1] = val; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { temp[i] = (temp[i] & 0xffffff00) | val; @@ -644,8 +921,10 @@ _mesa_new_s8_renderbuffer_wrapper(struct gl_context *ctx, struct gl_renderbuffer struct gl_renderbuffer *s8rb; ASSERT(dsrb->Format == MESA_FORMAT_Z24_S8 || - dsrb->Format == MESA_FORMAT_S8_Z24); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + dsrb->Format == MESA_FORMAT_S8_Z24 || + dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); s8rb = ctx->Driver.NewRenderbuffer(ctx, 0); if (!s8rb) diff --git a/src/mesa/main/depthstencil.h b/src/mesa/main/depthstencil.h index ef63c5d7a31..b47a2e482c2 100644 --- a/src/mesa/main/depthstencil.h +++ b/src/mesa/main/depthstencil.h @@ -34,6 +34,11 @@ _mesa_new_z24_renderbuffer_wrapper(struct gl_context *ctx, extern struct gl_renderbuffer * +_mesa_new_z32f_renderbuffer_wrapper(struct gl_context *ctx, + struct gl_renderbuffer *dsrb); + + +extern struct gl_renderbuffer * _mesa_new_s8_renderbuffer_wrapper(struct gl_context *ctx, struct gl_renderbuffer *dsrb); diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 8cc3fd49a34..84969360d92 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -1131,6 +1131,16 @@ _mesa_base_fbo_format(struct gl_context *ctx, GLenum internalFormat) return GL_DEPTH_STENCIL_EXT; else return 0; + case GL_DEPTH_COMPONENT32F: + if (ctx->Extensions.ARB_depth_buffer_float) + return GL_DEPTH_COMPONENT; + else + return 0; + case GL_DEPTH32F_STENCIL8: + if (ctx->Extensions.ARB_depth_buffer_float) + return GL_DEPTH_STENCIL; + else + return 0; case GL_RED: case GL_R8: case GL_R16: @@ -2266,6 +2276,15 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, /* special cases */ *params = GL_INDEX; } + else if (format == MESA_FORMAT_Z32_FLOAT_X24S8) { + /* depends on the attachment parameter */ + if (attachment == GL_STENCIL_ATTACHMENT) { + *params = GL_INDEX; + } + else { + *params = GL_FLOAT; + } + } else { *params = _mesa_get_format_datatype(format); } @@ -2584,6 +2603,10 @@ _mesa_BlitFramebufferEXT(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, } } + if (!mask) { + return; + } + ASSERT(ctx->Driver.BlitFramebuffer); ctx->Driver.BlitFramebuffer(ctx, srcX0, srcY0, srcX1, srcY1, diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c index e88ba43971b..f9298d2d1e9 100644 --- a/src/mesa/main/formats.c +++ b/src/mesa/main/formats.c @@ -1091,6 +1091,25 @@ static struct gl_format_info format_info[MESA_FORMAT_COUNT] = 0, 0, 0, 0, 0, 1, 1, 4 }, + /* ARB_depth_buffer_float */ + { + MESA_FORMAT_Z32_FLOAT, /* Name */ + "MESA_FORMAT_Z32_FLOAT", /* StrName */ + GL_DEPTH_COMPONENT, /* BaseFormat */ + GL_FLOAT, /* DataType */ + 0, 0, 0, 0, /* Red/Green/Blue/AlphaBits */ + 0, 0, 0, 32, 0, /* Lum/Int/Index/Depth/StencilBits */ + 1, 1, 4 /* BlockWidth/Height,Bytes */ + }, + { + MESA_FORMAT_Z32_FLOAT_X24S8, /* Name */ + "MESA_FORMAT_Z32_FLOAT_X24S8", /* StrName */ + GL_DEPTH_STENCIL, /* BaseFormat */ + GL_NONE /* XXX */, /* DataType */ + 0, 0, 0, 0, /* Red/Green/Blue/AlphaBits */ + 0, 0, 0, 32, 8, /* Lum/Int/Index/Depth/StencilBits */ + 1, 1, 8 /* BlockWidth/Height,Bytes */ + }, }; @@ -1466,7 +1485,9 @@ _mesa_test_formats(void) info->DataType == GL_SIGNED_NORMALIZED || info->DataType == GL_UNSIGNED_INT || info->DataType == GL_INT || - info->DataType == GL_FLOAT); + info->DataType == GL_FLOAT || + /* Z32_FLOAT_X24S8 has DataType of GL_NONE */ + info->DataType == GL_NONE); if (info->BaseFormat == GL_RGB) { assert(info->RedBits > 0); @@ -1654,6 +1675,16 @@ _mesa_format_to_type_and_comps(gl_format format, *comps = 1; return; + case MESA_FORMAT_Z32_FLOAT: + *datatype = GL_FLOAT; + *comps = 1; + return; + + case MESA_FORMAT_Z32_FLOAT_X24S8: + *datatype = GL_FLOAT_32_UNSIGNED_INT_24_8_REV; + *comps = 1; + return; + case MESA_FORMAT_DUDV8: *datatype = GL_BYTE; *comps = 2; diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h index 0640bbc4af1..5b8c01781a6 100644 --- a/src/mesa/main/formats.h +++ b/src/mesa/main/formats.h @@ -209,6 +209,9 @@ typedef enum MESA_FORMAT_RGB9_E5_FLOAT, MESA_FORMAT_R11_G11_B10_FLOAT, + MESA_FORMAT_Z32_FLOAT, + MESA_FORMAT_Z32_FLOAT_X24S8, + MESA_FORMAT_COUNT } gl_format; diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index 66c9bd91096..6e2ce74212e 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -631,8 +631,14 @@ _mesa_update_depth_buffer(struct gl_context *ctx, || fb->_DepthBuffer->Wrapped != depthRb || _mesa_get_format_base_format(fb->_DepthBuffer->Format) != GL_DEPTH_COMPONENT) { /* need to update wrapper */ - struct gl_renderbuffer *wrapper - = _mesa_new_z24_renderbuffer_wrapper(ctx, depthRb); + struct gl_renderbuffer *wrapper; + + if (depthRb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + wrapper = _mesa_new_z32f_renderbuffer_wrapper(ctx, depthRb); + } + else { + wrapper = _mesa_new_z24_renderbuffer_wrapper(ctx, depthRb); + } _mesa_reference_renderbuffer(&fb->_DepthBuffer, wrapper); ASSERT(fb->_DepthBuffer->Wrapped == depthRb); } diff --git a/src/mesa/main/image.c b/src/mesa/main/image.c index 6d7bc735887..37127dcb7a2 100644 --- a/src/mesa/main/image.c +++ b/src/mesa/main/image.c @@ -84,6 +84,7 @@ _mesa_type_is_packed(GLenum type) case GL_UNSIGNED_INT_24_8_EXT: case GL_UNSIGNED_INT_5_9_9_9_REV: case GL_UNSIGNED_INT_10F_11F_11F_REV: + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: return GL_TRUE; } @@ -228,6 +229,8 @@ _mesa_sizeof_packed_type( GLenum type ) return sizeof(GLuint); case GL_UNSIGNED_INT_10F_11F_11F_REV: return sizeof(GLuint); + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + return 8; default: return -1; } @@ -379,6 +382,11 @@ _mesa_bytes_per_pixel( GLenum format, GLenum type ) return sizeof(GLuint); else return -1; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + if (format == GL_DEPTH_STENCIL) + return 8; + else + return -1; default: return -1; } @@ -531,8 +539,10 @@ _mesa_is_legal_format_and_type(const struct gl_context *ctx, else return GL_FALSE; case GL_DEPTH_STENCIL_EXT: - if (ctx->Extensions.EXT_packed_depth_stencil - && type == GL_UNSIGNED_INT_24_8_EXT) + if ((ctx->Extensions.EXT_packed_depth_stencil && + type == GL_UNSIGNED_INT_24_8_EXT) || + (ctx->Extensions.ARB_depth_buffer_float && + type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)) return GL_TRUE; else return GL_FALSE; @@ -884,6 +894,7 @@ _mesa_is_depth_format(GLenum format) case GL_DEPTH_COMPONENT16: case GL_DEPTH_COMPONENT24: case GL_DEPTH_COMPONENT32: + case GL_DEPTH_COMPONENT32F: return GL_TRUE; default: return GL_FALSE; @@ -931,6 +942,7 @@ _mesa_is_depthstencil_format(GLenum format) switch (format) { case GL_DEPTH24_STENCIL8_EXT: case GL_DEPTH_STENCIL_EXT: + case GL_DEPTH32F_STENCIL8: return GL_TRUE; default: return GL_FALSE; @@ -956,6 +968,8 @@ _mesa_is_depth_or_stencil_format(GLenum format) case GL_STENCIL_INDEX16_EXT: case GL_DEPTH_STENCIL_EXT: case GL_DEPTH24_STENCIL8_EXT: + case GL_DEPTH_COMPONENT32F: + case GL_DEPTH32F_STENCIL8: return GL_TRUE; default: return GL_FALSE; diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c index f2724dbca7e..8a811cb7225 100644 --- a/src/mesa/main/mipmap.c +++ b/src/mesa/main/mipmap.c @@ -706,6 +706,17 @@ do_row(GLenum datatype, GLuint comps, GLint srcWidth, } } + else if (datatype == GL_FLOAT_32_UNSIGNED_INT_24_8_REV && comps == 1) { + GLuint i, j, k; + const GLfloat *rowA = (const GLfloat *) srcRowA; + const GLfloat *rowB = (const GLfloat *) srcRowB; + GLfloat *dst = (GLfloat *) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i*2] = (rowA[j*2] + rowA[k*2] + rowB[j*2] + rowB[k*2]) * 0.25F; + } + } + else { _mesa_problem(NULL, "bad format in do_row()"); } @@ -1341,6 +1352,15 @@ do_row_3D(GLenum datatype, GLuint comps, GLint srcWidth, } } + else if (datatype == GL_FLOAT_32_UNSIGNED_INT_24_8_REV && comps == 1) { + DECLARE_ROW_POINTERS(GLfloat, 2); + + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_F_3D(0); + } + } + else { _mesa_problem(NULL, "bad format in do_row()"); } diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index a232a51c355..7de1d05b919 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -1971,7 +1971,8 @@ extract_uint_indexes(GLuint n, GLuint indexes[], srcType == GL_INT || srcType == GL_UNSIGNED_INT_24_8_EXT || srcType == GL_HALF_FLOAT_ARB || - srcType == GL_FLOAT); + srcType == GL_FLOAT || + srcType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); switch (srcType) { case GL_BITMAP: @@ -2142,6 +2143,23 @@ extract_uint_indexes(GLuint n, GLuint indexes[], } } break; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + { + GLuint i; + const GLuint *s = (const GLuint *) src; + if (unpack->SwapBytes) { + for (i = 0; i < n; i++) { + GLuint value = s[i*2+1]; + SWAP4BYTE(value); + indexes[i] = value & 0xff; /* lower 8 bits */ + } + } + else { + for (i = 0; i < n; i++) + indexes[i] = s[i*2+1] & 0xff; /* lower 8 bits */ + } + } + break; default: _mesa_problem(NULL, "bad srcType in extract_uint_indexes"); @@ -4412,11 +4430,13 @@ _mesa_unpack_stencil_span( struct gl_context *ctx, GLuint n, srcType == GL_INT || srcType == GL_UNSIGNED_INT_24_8_EXT || srcType == GL_HALF_FLOAT_ARB || - srcType == GL_FLOAT); + srcType == GL_FLOAT || + srcType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); ASSERT(dstType == GL_UNSIGNED_BYTE || dstType == GL_UNSIGNED_SHORT || - dstType == GL_UNSIGNED_INT); + dstType == GL_UNSIGNED_INT || + dstType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); /* only shift and offset apply to stencil */ transferOps &= IMAGE_SHIFT_OFFSET_BIT; @@ -4488,6 +4508,15 @@ _mesa_unpack_stencil_span( struct gl_context *ctx, GLuint n, case GL_UNSIGNED_INT: memcpy(dest, indexes, n * sizeof(GLuint)); break; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + { + GLuint *dst = (GLuint *) dest; + GLuint i; + for (i = 0; i < n; i++) { + dst[i*2+1] = indexes[i] & 0xff; /* lower 8 bits */ + } + } + break; default: _mesa_problem(ctx, "bad dstType in _mesa_unpack_stencil_span"); } @@ -4798,6 +4827,20 @@ _mesa_unpack_depth_span( struct gl_context *ctx, GLuint n, } } break; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + { + GLuint i; + const GLfloat *src = (const GLfloat *)source; + for (i = 0; i < n; i++) { + GLfloat value = src[i * 2]; + if (srcPacking->SwapBytes) { + SWAP4BYTE(value); + } + depthValues[i] = value; + } + needClamp = GL_TRUE; + } + break; case GL_FLOAT: DEPTH_VALUES(GLfloat, 1*); needClamp = GL_TRUE; @@ -4874,9 +4917,18 @@ _mesa_unpack_depth_span( struct gl_context *ctx, GLuint n, zValues[i] = (GLushort) (depthValues[i] * (GLfloat) depthMax); } } + else if (dstType == GL_FLOAT) { + /* Nothing to do. depthValues is pointing to dest. */ + } + else if (dstType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV) { + GLfloat *zValues = (GLfloat*) dest; + GLuint i; + for (i = 0; i < n; i++) { + zValues[i*2] = depthValues[i]; + } + } else { - ASSERT(dstType == GL_FLOAT); - /*ASSERT(depthMax == 1.0F);*/ + ASSERT(0); } free(depthTemp); @@ -5004,10 +5056,11 @@ _mesa_pack_depth_span( struct gl_context *ctx, GLuint n, GLvoid *dest, /** - * Pack depth and stencil values as GL_DEPTH_STENCIL/GL_UNSIGNED_INT_24_8. + * Pack depth and stencil values as GL_DEPTH_STENCIL (GL_UNSIGNED_INT_24_8 etc) */ void -_mesa_pack_depth_stencil_span(struct gl_context *ctx, GLuint n, GLuint *dest, +_mesa_pack_depth_stencil_span(struct gl_context *ctx,GLuint n, + GLenum dstType, GLuint *dest, const GLfloat *depthVals, const GLstencil *stencilVals, const struct gl_pixelstore_attrib *dstPacking) @@ -5037,9 +5090,19 @@ _mesa_pack_depth_stencil_span(struct gl_context *ctx, GLuint n, GLuint *dest, stencilVals = stencilCopy; } - for (i = 0; i < n; i++) { - GLuint z = (GLuint) (depthVals[i] * 0xffffff); - dest[i] = (z << 8) | (stencilVals[i] & 0xff); + switch (dstType) { + case GL_UNSIGNED_INT_24_8: + for (i = 0; i < n; i++) { + GLuint z = (GLuint) (depthVals[i] * 0xffffff); + dest[i] = (z << 8) | (stencilVals[i] & 0xff); + } + break; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + for (i = 0; i < n; i++) { + ((GLfloat*)dest)[i*2] = depthVals[i]; + dest[i*2+1] = stencilVals[i] & 0xff; + } + break; } if (dstPacking->SwapBytes) { diff --git a/src/mesa/main/pack.h b/src/mesa/main/pack.h index 78238ea5839..00aab409e42 100644 --- a/src/mesa/main/pack.h +++ b/src/mesa/main/pack.h @@ -130,8 +130,8 @@ _mesa_pack_depth_span(struct gl_context *ctx, GLuint n, GLvoid *dest, extern void -_mesa_pack_depth_stencil_span(struct gl_context *ctx, - GLuint n, GLuint *dest, +_mesa_pack_depth_stencil_span(struct gl_context *ctx,GLuint n, + GLenum dstType, GLuint *dest, const GLfloat *depthVals, const GLstencil *stencilVals, const struct gl_pixelstore_attrib *dstPacking); diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c index 0331a8ca2fe..84c5b22286a 100644 --- a/src/mesa/main/readpix.c +++ b/src/mesa/main/readpix.c @@ -61,6 +61,14 @@ _mesa_error_check_format_type(struct gl_context *ctx, GLenum format, return GL_TRUE; } + if (ctx->Extensions.ARB_depth_buffer_float + && type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV + && format != GL_DEPTH_STENCIL_EXT) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "gl%sPixels(format is not GL_DEPTH_STENCIL_EXT)", readDraw); + return GL_TRUE; + } + /* basic combinations test */ if (!_mesa_is_legal_format_and_type(ctx, format, type)) { _mesa_error(ctx, GL_INVALID_ENUM, @@ -142,10 +150,23 @@ _mesa_error_check_format_type(struct gl_context *ctx, GLenum format, } break; case GL_DEPTH_STENCIL_EXT: - if (!ctx->Extensions.EXT_packed_depth_stencil || - type != GL_UNSIGNED_INT_24_8_EXT) { - _mesa_error(ctx, GL_INVALID_ENUM, "gl%sPixels(type)", readDraw); - return GL_TRUE; + /* Check validity of the type first. */ + switch (type) { + case GL_UNSIGNED_INT_24_8_EXT: + if (!ctx->Extensions.EXT_packed_depth_stencil) { + _mesa_error(ctx, GL_INVALID_ENUM, "gl%sPixels(type)", readDraw); + return GL_TRUE; + } + break; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + if (!ctx->Extensions.ARB_depth_buffer_float) { + _mesa_error(ctx, GL_INVALID_ENUM, "gl%sPixels(type)", readDraw); + return GL_TRUE; + } + break; + default: + _mesa_error(ctx, GL_INVALID_ENUM, "gl%sPixels(type)", readDraw); + return GL_TRUE; } if ((drawing && !_mesa_dest_buffer_exists(ctx, format)) || (reading && !_mesa_source_buffer_exists(ctx, format))) { diff --git a/src/mesa/main/renderbuffer.c b/src/mesa/main/renderbuffer.c index c36175c60e7..f5b20020d23 100644 --- a/src/mesa/main/renderbuffer.c +++ b/src/mesa/main/renderbuffer.c @@ -66,6 +66,9 @@ get_datatype_bytes(struct gl_renderbuffer *rb) int component_size; switch (rb->DataType) { + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + component_size = 8; + break; case GL_FLOAT: case GL_UNSIGNED_INT: case GL_UNSIGNED_INT_24_8_EXT: diff --git a/src/mesa/main/texfetch.c b/src/mesa/main/texfetch.c index 6716ce1b071..72283eb68af 100644 --- a/src/mesa/main/texfetch.c +++ b/src/mesa/main/texfetch.c @@ -913,6 +913,20 @@ texfetch_funcs[MESA_FORMAT_COUNT] = fetch_texel_2d_r11_g11_b10f, fetch_texel_3d_r11_g11_b10f, store_texel_r11_g11_b10f + }, + { + MESA_FORMAT_Z32_FLOAT, + fetch_texel_1d_f_r_f32, /* Reuse the R32F functions. */ + fetch_texel_2d_f_r_f32, + fetch_texel_3d_f_r_f32, + store_texel_r_f32 + }, + { + MESA_FORMAT_Z32_FLOAT_X24S8, + fetch_texel_1d_z32f_x24s8, + fetch_texel_2d_z32f_x24s8, + fetch_texel_3d_z32f_x24s8, + store_texel_z32f_x24s8 } }; diff --git a/src/mesa/main/texfetch_tmp.h b/src/mesa/main/texfetch_tmp.h index e6fd81d4d57..d170adf2e00 100644 --- a/src/mesa/main/texfetch_tmp.h +++ b/src/mesa/main/texfetch_tmp.h @@ -2287,7 +2287,8 @@ static void FETCH(f_z24_s8)( const struct gl_texture_image *texImage, const GLuint *src = TEXEL_ADDR(GLuint, texImage, i, j, k, 1); const GLfloat scale = 1.0F / (GLfloat) 0xffffff; texel[0] = ((*src) >> 8) * scale; - ASSERT(texImage->TexFormat == MESA_FORMAT_Z24_S8); + ASSERT(texImage->TexFormat == MESA_FORMAT_Z24_S8 || + texImage->TexFormat == MESA_FORMAT_Z24_X8); ASSERT(texel[0] >= 0.0F); ASSERT(texel[0] <= 1.0F); } @@ -2314,7 +2315,8 @@ static void FETCH(f_s8_z24)( const struct gl_texture_image *texImage, const GLuint *src = TEXEL_ADDR(GLuint, texImage, i, j, k, 1); const GLfloat scale = 1.0F / (GLfloat) 0xffffff; texel[0] = ((*src) & 0x00ffffff) * scale; - ASSERT(texImage->TexFormat == MESA_FORMAT_S8_Z24); + ASSERT(texImage->TexFormat == MESA_FORMAT_S8_Z24 || + texImage->TexFormat == MESA_FORMAT_X8_Z24); ASSERT(texel[0] >= 0.0F); ASSERT(texel[0] <= 1.0F); } @@ -2374,6 +2376,29 @@ static void store_texel_r11_g11_b10f(struct gl_texture_image *texImage, #endif +/* MESA_FORMAT_Z32_FLOAT_X24S8 ***********************************************/ + +static void FETCH(z32f_x24s8)(const struct gl_texture_image *texImage, + GLint i, GLint j, GLint k, GLfloat *texel) +{ + const GLfloat *src = TEXEL_ADDR(GLfloat, texImage, i, j, k, 2); + texel[RCOMP] = src[0]; + texel[GCOMP] = 0.0F; + texel[BCOMP] = 0.0F; + texel[ACOMP] = 1.0F; +} + +#if DIM == 3 +static void store_texel_z32f_x24s8(struct gl_texture_image *texImage, + GLint i, GLint j, GLint k, const void *texel) +{ + const GLfloat *src = (const GLfloat *) texel; + GLfloat *dst = TEXEL_ADDR(GLfloat, texImage, i, j, k, 2); + dst[0] = src[0]; +} +#endif + + #undef TEXEL_ADDR #undef DIM #undef FETCH diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c index 8cbb021d8b0..c919a74e047 100644 --- a/src/mesa/main/texformat.c +++ b/src/mesa/main/texformat.c @@ -416,6 +416,19 @@ _mesa_choose_tex_format( struct gl_context *ctx, GLint internalFormat, } } + if (ctx->Extensions.ARB_depth_buffer_float) { + switch (internalFormat) { + case GL_DEPTH_COMPONENT32F: + ASSERT(ctx->TextureFormatSupported[MESA_FORMAT_Z32_FLOAT]); + return MESA_FORMAT_Z32_FLOAT; + case GL_DEPTH32F_STENCIL8: + ASSERT(ctx->TextureFormatSupported[MESA_FORMAT_Z32_FLOAT_X24S8]); + return MESA_FORMAT_Z32_FLOAT_X24S8; + default: + ; /* fallthrough */ + } + } + if (ctx->Extensions.ATI_envmap_bumpmap) { switch (internalFormat) { case GL_DUDV_ATI: diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index e527981ff47..6e1e63bdfb0 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -1002,15 +1002,17 @@ memcpy_texture(struct gl_context *ctx, /** - * Store a 32-bit integer depth component texture image. + * Store a 32-bit integer or float depth component texture image. */ static GLboolean _mesa_texstore_z32(TEXSTORE_PARAMS) { const GLuint depthScale = 0xffffffff; const GLuint texelBytes = _mesa_get_format_bytes(dstFormat); + const GLenum dstType = _mesa_get_format_datatype(dstFormat); (void) dims; - ASSERT(dstFormat == MESA_FORMAT_Z32); + ASSERT(dstFormat == MESA_FORMAT_Z32 || + dstFormat == MESA_FORMAT_Z32_FLOAT); ASSERT(texelBytes == sizeof(GLuint)); if (ctx->Pixel.DepthScale == 1.0f && @@ -1018,7 +1020,7 @@ _mesa_texstore_z32(TEXSTORE_PARAMS) !srcPacking->SwapBytes && baseInternalFormat == GL_DEPTH_COMPONENT && srcFormat == GL_DEPTH_COMPONENT && - srcType == GL_UNSIGNED_INT) { + srcType == dstType) { /* simple memcpy path */ memcpy_texture(ctx, dims, dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset, @@ -1039,7 +1041,7 @@ _mesa_texstore_z32(TEXSTORE_PARAMS) const GLvoid *src = _mesa_image_address(dims, srcPacking, srcAddr, srcWidth, srcHeight, srcFormat, srcType, img, row, 0); _mesa_unpack_depth_span(ctx, srcWidth, - GL_UNSIGNED_INT, (GLuint *) dstRow, + dstType, dstRow, depthScale, srcType, src, srcPacking); dstRow += dstRowStride; } @@ -4285,6 +4287,72 @@ _mesa_texstore_r11_g11_b10f(TEXSTORE_PARAMS) } +static GLboolean +_mesa_texstore_z32f_x24s8(TEXSTORE_PARAMS) +{ + ASSERT(dstFormat == MESA_FORMAT_Z32_FLOAT_X24S8); + ASSERT(srcFormat == GL_DEPTH_STENCIL || + srcFormat == GL_DEPTH_COMPONENT || + srcFormat == GL_STENCIL_INDEX); + ASSERT(srcFormat != GL_DEPTH_STENCIL || + srcType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + + if (srcFormat == GL_DEPTH_STENCIL && + ctx->Pixel.DepthScale == 1.0f && + ctx->Pixel.DepthBias == 0.0f && + !srcPacking->SwapBytes) { + /* simple path */ + memcpy_texture(ctx, dims, + dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset, + dstRowStride, + dstImageOffsets, + srcWidth, srcHeight, srcDepth, srcFormat, srcType, + srcAddr, srcPacking); + } + else if (srcFormat == GL_DEPTH_COMPONENT || + srcFormat == GL_STENCIL_INDEX) { + GLint img, row; + const GLint srcRowStride + = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType) + / sizeof(uint64_t); + + /* In case we only upload depth we need to preserve the stencil */ + for (img = 0; img < srcDepth; img++) { + uint64_t *dstRow = (uint64_t *) dstAddr + + dstImageOffsets[dstZoffset + img] + + dstYoffset * dstRowStride / sizeof(uint64_t) + + dstXoffset; + const uint64_t *src + = (const uint64_t *) _mesa_image_address(dims, srcPacking, srcAddr, + srcWidth, srcHeight, + srcFormat, srcType, + img, 0, 0); + for (row = 0; row < srcHeight; row++) { + /* The unpack functions with: + * dstType = GL_FLOAT_32_UNSIGNED_INT_24_8_REV + * only write their own dword, so the other dword (stencil + * or depth) is preserved. */ + if (srcFormat != GL_STENCIL_INDEX) + _mesa_unpack_depth_span(ctx, srcWidth, + GL_FLOAT_32_UNSIGNED_INT_24_8_REV, /* dst type */ + dstRow, /* dst addr */ + 1.0f, srcType, src, srcPacking); + + if (srcFormat != GL_DEPTH_COMPONENT) + _mesa_unpack_stencil_span(ctx, srcWidth, + GL_FLOAT_32_UNSIGNED_INT_24_8_REV, /* dst type */ + dstRow, /* dst addr */ + srcType, src, srcPacking, + ctx->_ImageTransferState); + + src += srcRowStride; + dstRow += dstRowStride / sizeof(uint64_t); + } + } + } + return GL_TRUE; +} + /** * Table mapping MESA_FORMAT_* to _mesa_texstore_*() @@ -4419,6 +4487,9 @@ texstore_funcs[MESA_FORMAT_COUNT] = { MESA_FORMAT_RGB9_E5_FLOAT, _mesa_texstore_rgb9_e5 }, { MESA_FORMAT_R11_G11_B10_FLOAT, _mesa_texstore_r11_g11_b10f }, + + { MESA_FORMAT_Z32_FLOAT, _mesa_texstore_z32 }, + { MESA_FORMAT_Z32_FLOAT_X24S8, _mesa_texstore_z32f_x24s8 }, }; diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 738e97ca55c..d8e5a3a9772 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1415,9 +1415,9 @@ ir_to_mesa_visitor::visit(ir_dereference_variable *ir) case ir_var_in: case ir_var_inout: /* The linker assigns locations for varyings and attributes, - * including deprecated builtins (like gl_Color), user-assign - * generic attributes (glBindVertexLocation), and - * user-defined varyings. + * including deprecated builtins (like gl_Color), + * user-assigned generic attributes (glBindVertexLocation), + * and user-defined varyings. * * FINISHME: We would hit this path for function arguments. Fix! */ diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index 181fedd2b99..117000ba716 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -381,7 +381,8 @@ check_clear_depth_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuff assert(rb->Format == MESA_FORMAT_S8 || rb->Format == MESA_FORMAT_Z24_S8 || - rb->Format == MESA_FORMAT_S8_Z24); + rb->Format == MESA_FORMAT_S8_Z24 || + rb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); if (ctx->Scissor.Enabled && (ctx->Scissor.X != 0 || @@ -436,7 +437,8 @@ check_clear_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb assert(rb->Format == MESA_FORMAT_S8 || rb->Format == MESA_FORMAT_Z24_S8 || - rb->Format == MESA_FORMAT_S8_Z24); + rb->Format == MESA_FORMAT_S8_Z24 || + rb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); if (maskStencil) return GL_TRUE; diff --git a/src/mesa/state_tracker/st_cb_condrender.c b/src/mesa/state_tracker/st_cb_condrender.c index 64c6c117fca..1ced560e160 100644 --- a/src/mesa/state_tracker/st_cb_condrender.c +++ b/src/mesa/state_tracker/st_cb_condrender.c @@ -41,6 +41,7 @@ #include "st_context.h" #include "st_cb_queryobj.h" #include "st_cb_condrender.h" +#include "st_cb_bitmap.h" /** @@ -55,6 +56,8 @@ st_BeginConditionalRender(struct gl_context *ctx, struct gl_query_object *q, struct pipe_context *pipe = st->pipe; uint m; + st_flush_bitmap_cache(st); + switch (mode) { case GL_QUERY_WAIT: m = PIPE_RENDER_COND_WAIT; @@ -90,6 +93,8 @@ st_EndConditionalRender(struct gl_context *ctx, struct gl_query_object *q) struct pipe_context *pipe = st->pipe; (void) q; + st_flush_bitmap_cache(st); + pipe->render_condition(pipe, NULL, 0); st->render_condition = NULL; } diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index d61d7ac22be..1d908c0317a 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -273,26 +273,6 @@ make_passthrough_vertex_shader(struct st_context *st, /** - * Return a texture base format for drawing/copying an image - * of the given format. - */ -static GLenum -base_format(GLenum format) -{ - switch (format) { - case GL_DEPTH_COMPONENT: - return GL_DEPTH_COMPONENT; - case GL_DEPTH_STENCIL: - return GL_DEPTH_STENCIL; - case GL_STENCIL_INDEX: - return GL_STENCIL_INDEX; - default: - return GL_RGBA; - } -} - - -/** * Return a texture internalFormat for drawing/copying an image * of the given format and type. */ @@ -301,11 +281,36 @@ internal_format(struct gl_context *ctx, GLenum format, GLenum type) { switch (format) { case GL_DEPTH_COMPONENT: - return GL_DEPTH_COMPONENT; + switch (type) { + case GL_UNSIGNED_SHORT: + return GL_DEPTH_COMPONENT16; + + case GL_UNSIGNED_INT: + return GL_DEPTH_COMPONENT32; + + case GL_FLOAT: + if (ctx->Extensions.ARB_depth_buffer_float) + return GL_DEPTH_COMPONENT32F; + else + return GL_DEPTH_COMPONENT; + + default: + return GL_DEPTH_COMPONENT; + } + case GL_DEPTH_STENCIL: - return GL_DEPTH_STENCIL; + switch (type) { + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + return GL_DEPTH32F_STENCIL8; + + case GL_UNSIGNED_INT_24_8: + default: + return GL_DEPTH24_STENCIL8; + } + case GL_STENCIL_INDEX: return GL_STENCIL_INDEX; + default: if (_mesa_is_integer_format(format)) { switch (type) { @@ -812,6 +817,7 @@ draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, for (row = 0; row < height; row++) { GLubyte sValues[MAX_WIDTH]; GLuint zValues[MAX_WIDTH]; + GLfloat *zValuesFloat = (GLfloat*)zValues; GLenum destType = GL_UNSIGNED_BYTE; const GLvoid *source = _mesa_image_address2d(&clippedUnpack, pixels, width, height, @@ -822,7 +828,11 @@ draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, ctx->_ImageTransferState); if (format == GL_DEPTH_STENCIL) { - _mesa_unpack_depth_span(ctx, spanWidth, GL_UNSIGNED_INT, zValues, + GLenum ztype = + pt->resource->format == PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED ? + GL_FLOAT : GL_UNSIGNED_INT; + + _mesa_unpack_depth_span(ctx, spanWidth, ztype, zValues, (1 << 24) - 1, type, source, &clippedUnpack); } @@ -887,6 +897,26 @@ draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, } } break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + if (format == GL_DEPTH_STENCIL) { + uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); + GLfloat *destf = (GLfloat*)dest; + GLint k; + assert(usage == PIPE_TRANSFER_WRITE); + for (k = 0; k < spanWidth; k++) { + destf[k*2] = zValuesFloat[k]; + dest[k*2+1] = sValues[k] & 0xff; + } + } + else { + uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); + GLint k; + assert(usage == PIPE_TRANSFER_READ_WRITE); + for (k = 0; k < spanWidth; k++) { + dest[k*2+1] = sValues[k] & 0xff; + } + } + break; default: assert(0); } @@ -974,7 +1004,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, GLboolean write_stencil = GL_FALSE, write_depth = GL_FALSE; struct pipe_sampler_view *sv[2]; int num_sampler_view = 1; - enum pipe_format stencil_format = PIPE_FORMAT_NONE; struct st_fp_variant *fpv; if (format == GL_DEPTH_STENCIL) @@ -984,24 +1013,12 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, else if (format == GL_DEPTH_COMPONENT) write_depth = GL_TRUE; - if (write_stencil) { - enum pipe_format tex_format; - /* can we write to stencil if not fallback */ - if (!pipe->screen->get_param(pipe->screen, PIPE_CAP_SHADER_STENCIL_EXPORT)) - goto stencil_fallback; - - tex_format = st_choose_format(st->pipe->screen, base_format(format), - GL_NONE, GL_NONE, - PIPE_TEXTURE_2D, - 0, PIPE_BIND_SAMPLER_VIEW); - if (tex_format == PIPE_FORMAT_Z24_UNORM_S8_USCALED) - stencil_format = PIPE_FORMAT_X24S8_USCALED; - else if (tex_format == PIPE_FORMAT_S8_USCALED_Z24_UNORM) - stencil_format = PIPE_FORMAT_S8X24_USCALED; - else - stencil_format = PIPE_FORMAT_S8_USCALED; - if (stencil_format == PIPE_FORMAT_NONE) - goto stencil_fallback; + if (write_stencil && + !pipe->screen->get_param(pipe->screen, PIPE_CAP_SHADER_STENCIL_EXPORT)) { + /* software fallback */ + draw_stencil_pixels(ctx, x, y, width, height, format, type, + unpack, pixels); + return; } /* Mesa state should be up to date by now */ @@ -1046,7 +1063,32 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, sv[0] = st_create_texture_sampler_view(st->pipe, pt); if (sv[0]) { - if (write_stencil) { + /* Create a second sampler view to read stencil. + * The stencil is written using the shader stencil export + * functionality. */ + if (write_stencil) { + enum pipe_format stencil_format = PIPE_FORMAT_NONE; + + switch (pt->format) { + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + case PIPE_FORMAT_X24S8_USCALED: + stencil_format = PIPE_FORMAT_X24S8_USCALED; + break; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + case PIPE_FORMAT_S8X24_USCALED: + stencil_format = PIPE_FORMAT_S8X24_USCALED; + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + case PIPE_FORMAT_X32_S8X24_USCALED: + stencil_format = PIPE_FORMAT_X32_S8X24_USCALED; + break; + case PIPE_FORMAT_S8_USCALED: + stencil_format = PIPE_FORMAT_S8_USCALED; + break; + default: + assert(0); + } + sv[1] = st_create_texture_sampler_view_format(st->pipe, pt, stencil_format); num_sampler_view++; @@ -1067,11 +1109,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, pipe_resource_reference(&pt, NULL); } } - return; - -stencil_fallback: - draw_stencil_pixels(ctx, x, y, width, height, format, type, - unpack, pixels); } @@ -1188,6 +1225,18 @@ copy_stencil_pixels(struct gl_context *ctx, GLint srcx, GLint srcy, assert(usage == PIPE_TRANSFER_WRITE); memcpy(dst, src, width); break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + { + uint *dst4 = (uint *) dst; + int j; + dst4++; + assert(usage == PIPE_TRANSFER_READ_WRITE); + for (j = 0; j < width; j++) { + *dst4 = src[j] & 0xff; + dst4 += 2; + } + } + break; default: assert(0); } diff --git a/src/mesa/state_tracker/st_cb_queryobj.c b/src/mesa/state_tracker/st_cb_queryobj.c index d0ac253bcec..057499615bf 100644 --- a/src/mesa/state_tracker/st_cb_queryobj.c +++ b/src/mesa/state_tracker/st_cb_queryobj.c @@ -41,6 +41,7 @@ #include "pipe/p_defines.h" #include "st_context.h" #include "st_cb_queryobj.h" +#include "st_cb_bitmap.h" #if FEATURE_queryobj @@ -83,6 +84,8 @@ st_BeginQuery(struct gl_context *ctx, struct gl_query_object *q) struct st_query_object *stq = st_query_object(q); unsigned type; + st_flush_bitmap_cache(st_context(ctx)); + /* convert GL query type to Gallium query type */ switch (q->Target) { case GL_ANY_SAMPLES_PASSED: @@ -128,6 +131,8 @@ st_EndQuery(struct gl_context *ctx, struct gl_query_object *q) struct pipe_context *pipe = st_context(ctx)->pipe; struct st_query_object *stq = st_query_object(q); + st_flush_bitmap_cache(st_context(ctx)); + pipe->end_query(pipe, stq->pq); } diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 67926e39297..e2b29fe3068 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -151,6 +151,24 @@ st_read_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, } } break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + if (format == GL_DEPTH_STENCIL) { + const uint *src = (uint *) (stmap + srcY * pt->stride); + const GLfloat *srcf = (const GLfloat*)src; + GLint k; + for (k = 0; k < width; k++) { + zValues[k] = srcf[k*2]; + sValues[k] = src[k*2+1] & 0xff; + } + } + else { + const uint *src = (uint *) (stmap + srcY * pt->stride); + GLint k; + for (k = 0; k < width; k++) { + sValues[k] = src[k*2+1] & 0xff; + } + } + break; default: assert(0); } @@ -159,7 +177,7 @@ st_read_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, dest = _mesa_image_address2d(packing, pixels, width, height, format, type, j, 0); if (format == GL_DEPTH_STENCIL) { - _mesa_pack_depth_stencil_span(ctx, width, dest, + _mesa_pack_depth_stencil_span(ctx, width, type, dest, zValues, sValues, packing); } else { @@ -568,6 +586,31 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y, GLsizei width, GLsizei h dst += dstStride; } } + else if (pformat == PIPE_FORMAT_Z32_FLOAT) { + for (i = 0; i < height; i++) { + GLfloat zfloat[MAX_WIDTH]; + pipe_get_tile_raw(pipe, trans, 0, y, width, 1, zfloat, 0); + y += yStep; + _mesa_pack_depth_span(ctx, width, dst, type, + zfloat, &clippedPacking); + dst += dstStride; + } + } + else if (pformat == PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED) { + assert(format == GL_DEPTH_COMPONENT); + for (i = 0; i < height; i++) { + GLfloat zfloat[MAX_WIDTH]; /* Z32 */ + GLfloat zfloat2[MAX_WIDTH*2]; /* Z32X32 */ + pipe_get_tile_raw(pipe, trans, 0, y, width, 1, zfloat2, 0); + y += yStep; + for (j = 0; j < width; j++) { + zfloat[j] = zfloat2[j*2]; + } + _mesa_pack_depth_span(ctx, width, dst, type, + zfloat, &clippedPacking); + dst += dstStride; + } + } else { /* RGBA format */ /* Do a row at a time to flip image data vertically */ diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 6eddbfc88e4..6d4bc544d0c 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -179,6 +179,9 @@ struct st_context *st_create_context(gl_api api, struct pipe_context *pipe, st_init_driver_functions(&funcs); ctx = _mesa_create_context(api, visual, shareCtx, &funcs, NULL); + if (!ctx) { + return NULL; + } /* XXX: need a capability bit in gallium to query if the pipe * driver prefers DP4 or MUL/MAD for vertex transformation. diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index d3aebe526dd..99b231d9706 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -607,4 +607,15 @@ void st_init_extensions(struct st_context *st) if (screen->get_param(screen, PIPE_CAP_SM3)) { ctx->Extensions.ARB_shader_texture_lod = GL_TRUE; } + + if (screen->is_format_supported(screen, PIPE_FORMAT_Z32_FLOAT, + PIPE_TEXTURE_2D, 0, + PIPE_BIND_DEPTH_STENCIL | + PIPE_BIND_SAMPLER_VIEW) && + screen->is_format_supported(screen, PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED, + PIPE_TEXTURE_2D, 0, + PIPE_BIND_DEPTH_STENCIL | + PIPE_BIND_SAMPLER_VIEW)) { + ctx->Extensions.ARB_depth_buffer_float = GL_TRUE; + } } diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index fa5d8f5050a..bd4f0860c52 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -68,42 +68,70 @@ GLenum st_format_datatype(enum pipe_format format) { const struct util_format_description *desc; + int i; desc = util_format_description(format); assert(desc); + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) { if (format == PIPE_FORMAT_B5G5R5A1_UNORM || format == PIPE_FORMAT_B5G6R5_UNORM) { return GL_UNSIGNED_SHORT; } + else if (format == PIPE_FORMAT_R11G11B10_FLOAT || + format == PIPE_FORMAT_R9G9B9E5_FLOAT) { + return GL_FLOAT; + } else if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED || format == PIPE_FORMAT_S8_USCALED_Z24_UNORM || format == PIPE_FORMAT_Z24X8_UNORM || format == PIPE_FORMAT_X8Z24_UNORM) { return GL_UNSIGNED_INT_24_8; } + else if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED) { + return GL_FLOAT_32_UNSIGNED_INT_24_8_REV; + } else { const GLuint size = format_max_bits(format); + + assert(i < 4); + if (i == 4) + return GL_NONE; + if (size == 8) { - if (desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) + if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) return GL_UNSIGNED_BYTE; else return GL_BYTE; } else if (size == 16) { - if (desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) + if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) + return GL_HALF_FLOAT; + if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) return GL_UNSIGNED_SHORT; else return GL_SHORT; } - else { - assert( size <= 32 ); - if (desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) + else if (size <= 32) { + if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) + return GL_FLOAT; + if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) return GL_UNSIGNED_INT; else return GL_INT; } + else { + assert(size == 64); + assert(desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT); + return GL_DOUBLE; + } } } else if (format == PIPE_FORMAT_UYVY) { @@ -180,6 +208,10 @@ st_mesa_format_to_pipe_format(gl_format mesaFormat) return PIPE_FORMAT_Z24X8_UNORM; case MESA_FORMAT_S8: return PIPE_FORMAT_S8_USCALED; + case MESA_FORMAT_Z32_FLOAT: + return PIPE_FORMAT_Z32_FLOAT; + case MESA_FORMAT_Z32_FLOAT_X24S8: + return PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED; case MESA_FORMAT_YCBCR: return PIPE_FORMAT_UYVY; #if FEATURE_texture_s3tc @@ -402,6 +434,10 @@ st_pipe_format_to_mesa_format(enum pipe_format format) return MESA_FORMAT_X8_Z24; case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return MESA_FORMAT_S8_Z24; + case PIPE_FORMAT_Z32_FLOAT: + return MESA_FORMAT_Z32_FLOAT; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + return MESA_FORMAT_Z32_FLOAT_X24S8; case PIPE_FORMAT_UYVY: return MESA_FORMAT_YCBCR; @@ -759,6 +795,10 @@ static const struct format_mapping format_map[] = { { GL_DEPTH_COMPONENT, 0 }, { DEFAULT_DEPTH_FORMATS } }, + { + { GL_DEPTH_COMPONENT32F, 0 }, + { PIPE_FORMAT_Z32_FLOAT, 0 } + }, /* stencil formats */ { @@ -775,6 +815,10 @@ static const struct format_mapping format_map[] = { { GL_DEPTH_STENCIL_EXT, GL_DEPTH24_STENCIL8_EXT, 0 }, { PIPE_FORMAT_Z24_UNORM_S8_USCALED, PIPE_FORMAT_S8_USCALED_Z24_UNORM, 0 } }, + { + { GL_DEPTH32F_STENCIL8, 0 }, + { PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED, 0 } + }, /* sRGB formats */ { diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c index a8c4b5c3f49..7bd82aae206 100644 --- a/src/mesa/state_tracker/st_manager.c +++ b/src/mesa/state_tracker/st_manager.c @@ -918,6 +918,15 @@ st_manager_add_color_renderbuffer(struct st_context *st, return FALSE; st_framebuffer_update_attachments(stfb); + + /* + * Force a call to the state tracker manager to validate the + * new renderbuffer. It might be that there is a window system + * renderbuffer available. + */ + if(stfb->iface) + stfb->iface_stamp = p_atomic_read(&stfb->iface->stamp) - 1; + st_invalidate_state(st->ctx, _NEW_BUFFERS); return TRUE; diff --git a/src/mesa/swrast/s_blit.c b/src/mesa/swrast/s_blit.c index 3516a41bf41..7f53f19eb62 100644 --- a/src/mesa/swrast/s_blit.c +++ b/src/mesa/swrast/s_blit.c @@ -568,9 +568,6 @@ _swrast_BlitFramebuffer(struct gl_context *ctx, }; GLint i; - if (!ctx->DrawBuffer->_NumColorDrawBuffers) - return; - if (!_mesa_clip_blit(ctx, &srcX0, &srcY0, &srcX1, &srcY1, &dstX0, &dstY0, &dstX1, &dstY1)) { return; diff --git a/src/mesa/swrast/s_readpix.c b/src/mesa/swrast/s_readpix.c index 214f2ea1aaa..66ca39293a6 100644 --- a/src/mesa/swrast/s_readpix.c +++ b/src/mesa/swrast/s_readpix.c @@ -446,7 +446,7 @@ read_depth_stencil_pixels(struct gl_context *ctx, GLfloat depthVals[MAX_WIDTH]; _swrast_read_depth_span_float(ctx, depthRb, width, x, y + i, depthVals); - _mesa_pack_depth_stencil_span(ctx, width, depthStencilDst, + _mesa_pack_depth_stencil_span(ctx, width, type, depthStencilDst, depthVals, stencilVals, packing); } } |