diff options
Diffstat (limited to 'src')
108 files changed, 1614 insertions, 584 deletions
diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 82f409d294b..a7030f03dbd 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -165,7 +165,7 @@ EGLint dri2_to_egl_attribute_map[] = { 0, /* __DRI_ATTRIB_BIND_TO_TEXTURE_RGBA */ 0, /* __DRI_ATTRIB_BIND_TO_MIPMAP_TEXTURE */ 0, /* __DRI_ATTRIB_BIND_TO_TEXTURE_TARGETS */ - 0, /* __DRI_ATTRIB_YINVERTED */ + EGL_Y_INVERTED_NOK, /* __DRI_ATTRIB_YINVERTED */ }; static void @@ -781,6 +781,7 @@ dri2_initialize(_EGLDriver *drv, _EGLDisplay *disp, disp->Extensions.KHR_gl_renderbuffer_image = EGL_TRUE; disp->Extensions.KHR_gl_texture_2D_image = EGL_TRUE; disp->Extensions.NOK_swap_region = EGL_TRUE; + disp->Extensions.NOK_texture_from_pixmap = EGL_TRUE; /* we're supporting EGL 1.4 */ *major = 1; @@ -1229,19 +1230,8 @@ dri2_bind_tex_image(_EGLDriver *drv, ctx = _eglGetCurrentContext(); dri2_ctx = dri2_egl_context(ctx); - if (buffer != EGL_BACK_BUFFER) { - _eglError(EGL_BAD_PARAMETER, "eglBindTexImage"); + if (!_eglBindTexImage(drv, disp, surf, buffer)) return EGL_FALSE; - } - - /* We allow binding pixmaps too... Not conformat, but we can do it - * for free and it's useful for X compositors. Supposedly there's - * a EGL_NOKIA_texture_from_pixmap extension that allows that, but - * I couldn't find it at this time. */ - if ((dri2_surf->base.Type & (EGL_PBUFFER_BIT | EGL_PIXMAP_BIT)) == 0) { - _eglError(EGL_BAD_SURFACE, "eglBindTexImage"); - return EGL_FALSE; - } switch (dri2_surf->base.TextureFormat) { case EGL_TEXTURE_RGB: @@ -1251,8 +1241,7 @@ dri2_bind_tex_image(_EGLDriver *drv, format = __DRI_TEXTURE_FORMAT_RGBA; break; default: - _eglError(EGL_BAD_MATCH, "eglBindTexImage"); - return EGL_FALSE; + assert(0); } switch (dri2_surf->base.TextureTarget) { @@ -1260,15 +1249,14 @@ dri2_bind_tex_image(_EGLDriver *drv, target = GL_TEXTURE_2D; break; default: - _eglError(EGL_BAD_PARAMETER, "eglBindTexImage"); - return EGL_FALSE; + assert(0); } (*dri2_dpy->tex_buffer->setTexBuffer2)(dri2_ctx->dri_context, target, format, dri2_surf->dri_drawable); - return dri2_surf->base.BoundToTexture = EGL_TRUE; + return EGL_TRUE; } static EGLBoolean diff --git a/src/egl/drivers/glx/egl_glx.c b/src/egl/drivers/glx/egl_glx.c index 3cbfebe4881..e08ef5f2228 100644 --- a/src/egl/drivers/glx/egl_glx.c +++ b/src/egl/drivers/glx/egl_glx.c @@ -41,6 +41,7 @@ #include "eglconfigutil.h" #include "eglconfig.h" #include "eglcontext.h" +#include "egldefines.h" #include "egldisplay.h" #include "egldriver.h" #include "eglcurrent.h" @@ -48,7 +49,6 @@ #include "eglsurface.h" #define CALLOC_STRUCT(T) (struct T *) calloc(1, sizeof(struct T)) -#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) #ifndef GLX_VERSION_1_4 #error "GL/glx.h must be equal to or greater than GLX 1.4" diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 923992da48b..1a533e0880b 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -261,7 +261,7 @@ EGLBoolean EGLAPIENTRY eglInitialize(EGLDisplay dpy, EGLint *major, EGLint *minor) { _EGLDisplay *disp = _eglLockDisplay(dpy); - EGLint major_int, minor_int; + EGLint major_int = 0, minor_int = 0; if (!disp) RETURN_EGL_ERROR(NULL, EGL_BAD_DISPLAY, EGL_FALSE); diff --git a/src/egl/main/eglconfig.c b/src/egl/main/eglconfig.c index 47513a4edb2..fa947d76887 100644 --- a/src/egl/main/eglconfig.c +++ b/src/egl/main/eglconfig.c @@ -223,7 +223,12 @@ static const struct { 0 }, { EGL_NONE, ATTRIB_TYPE_PSEUDO, ATTRIB_CRITERION_IGNORE, - 0 } + 0 }, + + { EGL_Y_INVERTED_NOK, ATTRIB_TYPE_BOOLEAN, + ATTRIB_CRITERION_EXACT, + EGL_DONT_CARE }, + }; @@ -478,6 +483,28 @@ _eglMatchConfig(const _EGLConfig *conf, const _EGLConfig *criteria) return matched; } +static INLINE EGLBoolean +_eglIsConfigAttribValid(_EGLConfig *conf, EGLint attr) +{ + if (_eglIndexConfig(conf, attr) < 0) + return EGL_FALSE; + + /* there are some holes in the range */ + switch (attr) { + case 0x3030 /* a gap before EGL_SAMPLES */: + case EGL_NONE: +#ifdef EGL_VERSION_1_4 + case EGL_MATCH_NATIVE_PIXMAP: +#endif + return EGL_FALSE; + case EGL_Y_INVERTED_NOK: + return conf->Display->Extensions.NOK_texture_from_pixmap; + default: + break; + } + + return EGL_TRUE; +} /** * Initialize a criteria config from the given attribute list. @@ -500,15 +527,13 @@ _eglParseConfigAttribList(_EGLConfig *conf, const EGLint *attrib_list) /* parse the list */ for (i = 0; attrib_list && attrib_list[i] != EGL_NONE; i += 2) { - EGLint idx; - attr = attrib_list[i]; val = attrib_list[i + 1]; - idx = _eglIndexConfig(conf, attr); - if (idx < 0) - return EGL_FALSE; - conf->Storage[idx] = val; + if (!_eglIsConfigAttribValid(conf, attr)) + return EGL_FALSE; + + SET_CONFIG_ATTRIB(conf, attr, val); /* rememeber some attributes for post-processing */ switch (attr) { @@ -781,28 +806,6 @@ _eglChooseConfig(_EGLDriver *drv, _EGLDisplay *disp, const EGLint *attrib_list, } -static INLINE EGLBoolean -_eglIsConfigAttribValid(_EGLConfig *conf, EGLint attr) -{ - if (_eglIndexConfig(conf, attr) < 0) - return EGL_FALSE; - - /* there are some holes in the range */ - switch (attr) { - case 0x3030 /* a gap before EGL_SAMPLES */: - case EGL_NONE: -#ifdef EGL_VERSION_1_4 - case EGL_MATCH_NATIVE_PIXMAP: -#endif - return EGL_FALSE; - default: - break; - } - - return EGL_TRUE; -} - - /** * Fallback for eglGetConfigAttrib. */ diff --git a/src/egl/main/eglconfig.h b/src/egl/main/eglconfig.h index ced060f7797..ca63c40d3d7 100644 --- a/src/egl/main/eglconfig.h +++ b/src/egl/main/eglconfig.h @@ -8,16 +8,24 @@ #define _EGL_CONFIG_FIRST_ATTRIB EGL_BUFFER_SIZE #define _EGL_CONFIG_LAST_ATTRIB EGL_CONFORMANT -#define _EGL_CONFIG_NUM_ATTRIBS \ +#define _EGL_CONFIG_NUM_CONTIGUOUS_ATTRIBS \ (_EGL_CONFIG_LAST_ATTRIB - _EGL_CONFIG_FIRST_ATTRIB + 1) -#define _EGL_CONFIG_STORAGE_SIZE _EGL_CONFIG_NUM_ATTRIBS +/* Attributes outside the contiguous block: + * + * EGL_Y_INVERTED_NOK + */ +#define _EGL_CONFIG_FIRST_EXTRA_ATTRIB _EGL_CONFIG_NUM_CONTIGUOUS_ATTRIBS +#define _EGL_CONFIG_NUM_EXTRA_ATTRIBS 1 + +#define _EGL_CONFIG_NUM_ATTRIBS \ + _EGL_CONFIG_NUM_CONTIGUOUS_ATTRIBS + _EGL_CONFIG_NUM_EXTRA_ATTRIBS struct _egl_config { _EGLDisplay *Display; - EGLint Storage[_EGL_CONFIG_STORAGE_SIZE]; + EGLint Storage[_EGL_CONFIG_NUM_ATTRIBS]; }; @@ -37,10 +45,15 @@ _eglIndexConfig(const _EGLConfig *conf, EGLint key) { (void) conf; if (key >= _EGL_CONFIG_FIRST_ATTRIB && - key < _EGL_CONFIG_FIRST_ATTRIB + _EGL_CONFIG_NUM_ATTRIBS) + key < _EGL_CONFIG_FIRST_ATTRIB + _EGL_CONFIG_NUM_CONTIGUOUS_ATTRIBS) return key - _EGL_CONFIG_FIRST_ATTRIB; - else + + switch (key) { + case EGL_Y_INVERTED_NOK: + return _EGL_CONFIG_FIRST_EXTRA_ATTRIB; + default: return -1; + } } diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h index 0b7f9d83036..42e305f91ac 100644 --- a/src/egl/main/egldisplay.h +++ b/src/egl/main/egldisplay.h @@ -47,6 +47,7 @@ struct _egl_extensions EGLBoolean KHR_gl_texture_3D_image; EGLBoolean KHR_gl_renderbuffer_image; EGLBoolean NOK_swap_region; + EGLBoolean NOK_texture_from_pixmap; char String[_EGL_MAX_EXTENSIONS_LEN]; }; diff --git a/src/egl/main/eglmisc.c b/src/egl/main/eglmisc.c index 82ddb6cad99..e62a9e7de8c 100644 --- a/src/egl/main/eglmisc.c +++ b/src/egl/main/eglmisc.c @@ -97,6 +97,7 @@ _eglUpdateExtensionsString(_EGLDisplay *dpy) _EGL_CHECK_EXTENSION(KHR_gl_renderbuffer_image); _EGL_CHECK_EXTENSION(NOK_swap_region); + _EGL_CHECK_EXTENSION(NOK_texture_from_pixmap); #undef _EGL_CHECK_EXTENSION } diff --git a/src/egl/main/eglsurface.c b/src/egl/main/eglsurface.c index 8026a6314d3..d46bdb0672e 100644 --- a/src/egl/main/eglsurface.c +++ b/src/egl/main/eglsurface.c @@ -36,12 +36,17 @@ _eglClampSwapInterval(_EGLSurface *surf, EGLint interval) static EGLint _eglParseSurfaceAttribList(_EGLSurface *surf, const EGLint *attrib_list) { + _EGLDisplay *dpy = surf->Resource.Display; EGLint type = surf->Type; + EGLint texture_type = EGL_PBUFFER_BIT; EGLint i, err = EGL_SUCCESS; if (!attrib_list) return EGL_SUCCESS; + if (dpy->Extensions.NOK_texture_from_pixmap) + texture_type |= EGL_PIXMAP_BIT; + for (i = 0; attrib_list[i] != EGL_NONE; i++) { EGLint attr = attrib_list[i++]; EGLint val = attrib_list[i]; @@ -125,7 +130,7 @@ _eglParseSurfaceAttribList(_EGLSurface *surf, const EGLint *attrib_list) surf->LargestPbuffer = !!val; break; case EGL_TEXTURE_FORMAT: - if (type != EGL_PBUFFER_BIT) { + if (!(type & texture_type)) { err = EGL_BAD_ATTRIBUTE; break; } @@ -143,7 +148,7 @@ _eglParseSurfaceAttribList(_EGLSurface *surf, const EGLint *attrib_list) surf->TextureFormat = val; break; case EGL_TEXTURE_TARGET: - if (type != EGL_PBUFFER_BIT) { + if (!(type & texture_type)) { err = EGL_BAD_ATTRIBUTE; break; } @@ -160,7 +165,7 @@ _eglParseSurfaceAttribList(_EGLSurface *surf, const EGLint *attrib_list) surf->TextureTarget = val; break; case EGL_MIPMAP_TEXTURE: - if (type != EGL_PBUFFER_BIT) { + if (!(type & texture_type)) { err = EGL_BAD_ATTRIBUTE; break; } @@ -452,11 +457,16 @@ EGLBoolean _eglBindTexImage(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface, EGLint buffer) { + EGLint texture_type = EGL_PBUFFER_BIT; + /* Just do basic error checking and return success/fail. * Drivers must implement the real stuff. */ - if (surface->Type != EGL_PBUFFER_BIT) { + if (dpy->Extensions.NOK_texture_from_pixmap) + texture_type |= EGL_PIXMAP_BIT; + + if (!(surface->Type & texture_type)) { _eglError(EGL_BAD_SURFACE, "eglBindTexImage"); return EGL_FALSE; } @@ -466,6 +476,11 @@ _eglBindTexImage(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface, return EGL_FALSE; } + if (surface->TextureTarget == EGL_NO_TEXTURE) { + _eglError(EGL_BAD_MATCH, "eglBindTexImage"); + return EGL_FALSE; + } + if (buffer != EGL_BACK_BUFFER) { _eglError(EGL_BAD_PARAMETER, "eglBindTexImage"); return EGL_FALSE; diff --git a/src/egl/main/eglsurface.h b/src/egl/main/eglsurface.h index 0a00035730f..8f520dcdf65 100644 --- a/src/egl/main/eglsurface.h +++ b/src/egl/main/eglsurface.h @@ -83,7 +83,7 @@ extern EGLBoolean _eglSurfaceAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf, EGLint attribute, EGLint value); -extern EGLBoolean +PUBLIC extern EGLBoolean _eglBindTexImage(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf, EGLint buffer); diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 05b187805b1..7ea51621f72 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -12,6 +12,7 @@ #include "gallivm/lp_bld_printf.h" #include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_dump.h" #include "util/u_cpu_detect.h" #include "util/u_string.h" @@ -214,27 +215,34 @@ draw_llvm_create(struct draw_context *draw) llvm->pass = LLVMCreateFunctionPassManager(llvm->provider); LLVMAddTargetData(llvm->target, llvm->pass); - /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, - * but there are more on SVN. */ - /* TODO: Add more passes */ - LLVMAddCFGSimplificationPass(llvm->pass); - LLVMAddPromoteMemoryToRegisterPass(llvm->pass); - LLVMAddConstantPropagationPass(llvm->pass); - if(util_cpu_caps.has_sse4_1) { - /* FIXME: There is a bug in this pass, whereby the combination of fptosi - * and sitofp (necessary for trunc/floor/ceil/round implementation) - * somehow becomes invalid code. + + if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) { + /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, + * but there are more on SVN. */ + /* TODO: Add more passes */ + LLVMAddCFGSimplificationPass(llvm->pass); + LLVMAddPromoteMemoryToRegisterPass(llvm->pass); + LLVMAddConstantPropagationPass(llvm->pass); + if(util_cpu_caps.has_sse4_1) { + /* FIXME: There is a bug in this pass, whereby the combination of fptosi + * and sitofp (necessary for trunc/floor/ceil/round implementation) + * somehow becomes invalid code. + */ + LLVMAddInstructionCombiningPass(llvm->pass); + } + LLVMAddGVNPass(llvm->pass); + } else { + /* We need at least this pass to prevent the backends to fail in + * unexpected ways. */ - LLVMAddInstructionCombiningPass(llvm->pass); + LLVMAddPromoteMemoryToRegisterPass(llvm->pass); } - LLVMAddGVNPass(llvm->pass); init_globals(llvm); - -#if 0 - LLVMDumpModule(lp_build_module); -#endif + if (gallivm_debug & GALLIVM_DEBUG_IR) { + LLVMDumpModule(llvm->module); + } return llvm; } @@ -283,7 +291,10 @@ generate_vs(struct draw_llvm *llvm, num_vs = 4; /* number of vertices per block */ #endif - /*tgsi_dump(tokens, 0);*/ + if (gallivm_debug & GALLIVM_DEBUG_IR) { + tgsi_dump(tokens, 0); + } + lp_build_tgsi_soa(builder, tokens, vs_type, @@ -727,7 +738,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) LLVMRunFunctionPassManager(llvm->pass, variant->function); - if (0) { + if (gallivm_debug & GALLIVM_DEBUG_IR) { lp_debug_dump_value(variant->function); debug_printf("\n"); } @@ -735,8 +746,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function); variant->jit_func = voidptr_to_draw_jit_vert_func(code); - if (0) + if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(code); + } } @@ -881,7 +893,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian LLVMRunFunctionPassManager(llvm->pass, variant->function_elts); - if (0) { + if (gallivm_debug & GALLIVM_DEBUG_IR) { lp_debug_dump_value(variant->function_elts); debug_printf("\n"); } @@ -889,8 +901,9 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function_elts); variant->jit_func_elts = voidptr_to_draw_vert_func_elts(code); - if (0) + if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(code); + } } void diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h index 1897acce795..858002b34fe 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h @@ -36,6 +36,19 @@ #include "util/u_string.h" +#define GALLIVM_DEBUG_TGSI 0x1 +#define GALLIVM_DEBUG_IR 0x2 +#define GALLIVM_DEBUG_ASM 0x4 +#define GALLIVM_DEBUG_NO_OPT 0x8 + + +#ifdef DEBUG +extern unsigned gallivm_debug; +#else +#define gallivm_debug 0 +#endif + + static INLINE void lp_build_name(LLVMValueRef val, const char *format, ...) { diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index a2b0298a1c7..e1b94adc85a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -89,6 +89,11 @@ lp_build_format_swizzle_soa(const struct util_format_description *format_desc, * It requires that a packed pixel fits into an element of the output * channels. The common case is when converting pixel with a depth of 32 bit or * less into floats. + * + * \param format_desc the format of the 'packed' incoming pixel vector + * \param type the desired type for rgba_out (type.length = n, above) + * \param packed the incoming vector of packed pixels + * \param rgba_out returns the SoA R,G,B,A vectors */ void lp_build_unpack_rgba_soa(LLVMBuilderRef builder, @@ -115,8 +120,8 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, /* Decode the input vector components */ start = 0; for (chan = 0; chan < format_desc->nr_channels; ++chan) { - unsigned width = format_desc->channel[chan].size; - unsigned stop = start + width; + const unsigned width = format_desc->channel[chan].size; + const unsigned stop = start + width; LLVMValueRef input; input = packed; @@ -247,9 +252,10 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, /** - * Fetch a pixel into a SoA. + * Fetch a texels from a texture, returning them in SoA layout. * - * \param type the desired return type for 'rgba' + * \param type the desired return type for 'rgba'. The vector length + * is the number of texels to fetch * * \param base_ptr points to start of the texture image block. For non- * compressed formats, this simply points to the texel. @@ -290,6 +296,7 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder, /* * gather the texels from the texture + * Ex: packed = {BGRA, BGRA, BGRA, BGRA}. */ packed = lp_build_gather(builder, type.length, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 5067d0a164f..bd080f397aa 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -29,9 +29,23 @@ #include "pipe/p_compiler.h" #include "util/u_cpu_detect.h" #include "util/u_debug.h" +#include "lp_bld_debug.h" #include "lp_bld_init.h" +#ifdef DEBUG +unsigned gallivm_debug = 0; + +static const struct debug_named_value lp_bld_debug_flags[] = { + { "tgsi", GALLIVM_DEBUG_TGSI }, + { "ir", GALLIVM_DEBUG_IR }, + { "asm", GALLIVM_DEBUG_ASM }, + { "nopt", GALLIVM_DEBUG_NO_OPT }, + {NULL, 0} +}; +#endif + + LLVMModuleRef lp_build_module = NULL; LLVMExecutionEngineRef lp_build_engine = NULL; LLVMModuleProviderRef lp_build_provider = NULL; @@ -41,6 +55,10 @@ LLVMTargetDataRef lp_build_target = NULL; void lp_build_init(void) { +#ifdef DEBUG + gallivm_debug = debug_get_flags_option("GALLIVM_DEBUG", lp_bld_debug_flags, 0 ); +#endif + LLVMInitializeNativeTarget(); LLVMLinkInJIT(); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index aaf3360aa24..d6e1c478be9 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -81,6 +81,8 @@ #define QUAD_BOTTOM_LEFT 2 #define QUAD_BOTTOM_RIGHT 3 +#define LP_MAX_INSTRUCTIONS 256 + struct lp_exec_mask { struct lp_build_context *bld; @@ -105,6 +107,13 @@ struct lp_exec_mask { } loop_stack[LP_MAX_TGSI_NESTING]; int loop_stack_size; + LLVMValueRef ret_mask; + struct { + int pc; + LLVMValueRef ret_mask; + } call_stack[LP_MAX_TGSI_NESTING]; + int call_stack_size; + LLVMValueRef exec_mask; }; @@ -134,6 +143,9 @@ struct lp_build_tgsi_soa_context struct lp_build_mask_context *mask; struct lp_exec_mask exec_mask; + + struct tgsi_full_instruction *instructions; + uint max_instructions; }; static const unsigned char @@ -166,9 +178,10 @@ static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context mask->has_mask = FALSE; mask->cond_stack_size = 0; mask->loop_stack_size = 0; + mask->call_stack_size = 0; mask->int_vec_type = lp_build_int_vec_type(mask->bld->type); - mask->break_mask = mask->cont_mask = mask->cond_mask = + mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type); } @@ -189,9 +202,16 @@ static void lp_exec_mask_update(struct lp_exec_mask *mask) } else mask->exec_mask = mask->cond_mask; + if (mask->call_stack_size) { + mask->exec_mask = LLVMBuildAnd(mask->bld->builder, + mask->exec_mask, + mask->ret_mask, + "callmask"); + } mask->has_mask = (mask->cond_stack_size > 0 || - mask->loop_stack_size > 0); + mask->loop_stack_size > 0 || + mask->call_stack_size > 0); } static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, @@ -368,6 +388,49 @@ static void lp_exec_mask_store(struct lp_exec_mask *mask, LLVMBuildStore(mask->bld->builder, val, dst); } +static void lp_exec_mask_call(struct lp_exec_mask *mask, + int func, + int *pc) +{ + assert(mask->call_stack_size < LP_MAX_TGSI_NESTING); + mask->call_stack[mask->call_stack_size].pc = *pc; + mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask; + mask->call_stack_size++; + *pc = func; +} + +static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) +{ + LLVMValueRef exec_mask; + + if (mask->call_stack_size == 0) { + /* returning from main() */ + *pc = -1; + return; + } + exec_mask = LLVMBuildNot(mask->bld->builder, + mask->exec_mask, + "ret"); + + mask->ret_mask = LLVMBuildAnd(mask->bld->builder, + mask->ret_mask, + exec_mask, "ret_full"); + + lp_exec_mask_update(mask); +} + +static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask) +{ +} + +static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) +{ + assert(mask->call_stack_size); + mask->call_stack_size--; + *pc = mask->call_stack[mask->call_stack_size].pc; + mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask; + lp_exec_mask_update(mask); +} static LLVMValueRef emit_ddx(struct lp_build_tgsi_soa_context *bld, @@ -418,34 +481,36 @@ emit_fetch( const unsigned chan_index ) { const struct tgsi_full_src_register *reg = &inst->Src[index]; - unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); + const unsigned swizzle = + tgsi_util_get_full_src_register_swizzle(reg, chan_index); LLVMValueRef res; LLVMValueRef addr = NULL; - switch (swizzle) { - case TGSI_SWIZZLE_X: - case TGSI_SWIZZLE_Y: - case TGSI_SWIZZLE_Z: - case TGSI_SWIZZLE_W: - - if (reg->Register.Indirect) { - LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); - unsigned swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, chan_index ); - addr = LLVMBuildLoad(bld->base.builder, - bld->addr[reg->Indirect.Index][swizzle], - ""); - /* for indexing we want integers */ - addr = LLVMBuildFPToSI(bld->base.builder, addr, - int_vec_type, ""); - addr = LLVMBuildExtractElement(bld->base.builder, - addr, LLVMConstInt(LLVMInt32Type(), 0, 0), - ""); - addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0)); - } - - switch (reg->Register.File) { - case TGSI_FILE_CONSTANT: { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0); + if (swizzle > 3) { + assert(0 && "invalid swizzle in emit_fetch()"); + return bld->base.undef; + } + + if (reg->Register.Indirect) { + LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); + unsigned swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, chan_index ); + addr = LLVMBuildLoad(bld->base.builder, + bld->addr[reg->Indirect.Index][swizzle], + ""); + /* for indexing we want integers */ + addr = LLVMBuildFPToSI(bld->base.builder, addr, + int_vec_type, ""); + addr = LLVMBuildExtractElement(bld->base.builder, + addr, LLVMConstInt(LLVMInt32Type(), 0, 0), + ""); + addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0)); + } + + switch (reg->Register.File) { + case TGSI_FILE_CONSTANT: + { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), + reg->Register.Index*4 + swizzle, 0); LLVMValueRef scalar, scalar_ptr; if (reg->Register.Indirect) { @@ -453,24 +518,26 @@ emit_fetch( "\taddr = %d\n", addr);*/ index = lp_build_add(&bld->base, index, addr); } - scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, ""); + scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, + &index, 1, ""); scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); res = lp_build_broadcast_scalar(&bld->base, scalar); - break; } + break; - case TGSI_FILE_IMMEDIATE: - res = bld->immediates[reg->Register.Index][swizzle]; - assert(res); - break; + case TGSI_FILE_IMMEDIATE: + res = bld->immediates[reg->Register.Index][swizzle]; + assert(res); + break; - case TGSI_FILE_INPUT: - res = bld->inputs[reg->Register.Index][swizzle]; - assert(res); - break; + case TGSI_FILE_INPUT: + res = bld->inputs[reg->Register.Index][swizzle]; + assert(res); + break; - case TGSI_FILE_TEMPORARY: { + case TGSI_FILE_TEMPORARY: + { LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle, reg->Register.Indirect, @@ -478,17 +545,11 @@ emit_fetch( res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); if(!res) return bld->base.undef; - break; - } - - default: - assert( 0 ); - return bld->base.undef; } break; default: - assert( 0 ); + assert(0 && "invalid src register in emit_fetch()"); return bld->base.undef; } @@ -939,7 +1000,8 @@ static boolean emit_instruction( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst, - const struct tgsi_opcode_info *info) + const struct tgsi_opcode_info *info, + int *pc) { unsigned chan_index; LLVMValueRef src0, src1, src2; @@ -963,6 +1025,8 @@ emit_instruction( * redundant code. */ + (*pc)++; + assert(info->num_dst <= 1); if (info->num_dst) { FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { @@ -1561,16 +1625,18 @@ emit_instruction( break; case TGSI_OPCODE_CAL: - /* FIXME */ - return FALSE; + lp_exec_mask_call(&bld->exec_mask, + inst->Label.Label, + pc); + break; case TGSI_OPCODE_RET: - /* FIXME */ - return FALSE; + lp_exec_mask_ret(&bld->exec_mask, pc); break; case TGSI_OPCODE_END: + *pc = -1; break; case TGSI_OPCODE_SSG: @@ -1736,6 +1802,10 @@ emit_instruction( lp_exec_bgnloop(&bld->exec_mask); break; + case TGSI_OPCODE_BGNSUB: + lp_exec_mask_bgnsub(&bld->exec_mask); + break; + case TGSI_OPCODE_ELSE: lp_exec_mask_cond_invert(&bld->exec_mask); break; @@ -1748,6 +1818,10 @@ emit_instruction( lp_exec_endloop(&bld->exec_mask); break; + case TGSI_OPCODE_ENDSUB: + lp_exec_mask_endsub(&bld->exec_mask, pc); + break; + case TGSI_OPCODE_PUSHA: /* deprecated? */ assert(0); @@ -1888,7 +1962,9 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, struct lp_build_tgsi_soa_context bld; struct tgsi_parse_context parse; uint num_immediates = 0; + uint num_instructions = 0; unsigned i; + int pc = 0; /* Setup build context */ memset(&bld, 0, sizeof bld); @@ -1902,6 +1978,13 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, bld.sampler = sampler; bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 || info->opcode_count[TGSI_OPCODE_ARL] > 0; + bld.instructions = (struct tgsi_full_instruction *) + MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) ); + bld.max_instructions = LP_MAX_INSTRUCTIONS; + + if (!bld.instructions) { + return; + } lp_exec_mask_init(&bld.exec_mask, &bld.base); @@ -1918,11 +2001,21 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, case TGSI_TOKEN_TYPE_INSTRUCTION: { - unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode; - const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(opcode); - if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, opcode_info )) - _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", - opcode_info->mnemonic); + /* save expanded instruction */ + if (num_instructions == bld.max_instructions) { + bld.instructions = REALLOC(bld.instructions, + bld.max_instructions + * sizeof(struct tgsi_full_instruction), + (bld.max_instructions + LP_MAX_INSTRUCTIONS) + * sizeof(struct tgsi_full_instruction)); + bld.max_instructions += LP_MAX_INSTRUCTIONS; + } + + memcpy(bld.instructions + num_instructions, + &parse.FullToken.FullInstruction, + sizeof(bld.instructions[0])); + + num_instructions++; } break; @@ -1949,6 +2042,16 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, assert( 0 ); } } + + while (pc != -1) { + struct tgsi_full_instruction *instr = bld.instructions + pc; + const struct tgsi_opcode_info *opcode_info = + tgsi_get_opcode_info(instr->Instruction.Opcode); + if (!emit_instruction( &bld, instr, opcode_info, &pc )) + _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", + opcode_info->mnemonic); + } + if (0) { LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); LLVMValueRef function = LLVMGetBasicBlockParent(block); @@ -1958,5 +2061,14 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, debug_printf("2222222222222222222222222222 \n"); } tgsi_parse_free( &parse ); + + if (0) { + LLVMModuleRef module = LLVMGetGlobalParent( + LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder))); + LLVMDumpModule(module); + + } + + FREE( bld.instructions ); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.c b/src/gallium/auxiliary/gallivm/lp_bld_type.c index 37d278d2379..e6cd400c43f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.c @@ -195,6 +195,7 @@ lp_uint_type(struct lp_type type) { struct lp_type res_type; + assert(type.length <= LP_MAX_VECTOR_LENGTH); memset(&res_type, 0, sizeof res_type); res_type.width = type.width; res_type.length = type.length; @@ -211,6 +212,7 @@ lp_int_type(struct lp_type type) { struct lp_type res_type; + assert(type.length <= LP_MAX_VECTOR_LENGTH); memset(&res_type, 0, sizeof res_type); res_type.width = type.width; res_type.length = type.length; @@ -239,6 +241,43 @@ lp_wider_type(struct lp_type type) /** + * Return the size of the LLVMType in bits. + * XXX this function doesn't necessarily handle all LLVM types. + */ +unsigned +lp_sizeof_llvm_type(LLVMTypeRef t) +{ + LLVMTypeKind k = LLVMGetTypeKind(t); + + switch (k) { + case LLVMIntegerTypeKind: + return LLVMGetIntTypeWidth(t); + case LLVMFloatTypeKind: + return 8 * sizeof(float); + case LLVMDoubleTypeKind: + return 8 * sizeof(double); + case LLVMVectorTypeKind: + { + LLVMTypeRef elem = LLVMGetElementType(t); + unsigned len = LLVMGetVectorSize(t); + return len * lp_sizeof_llvm_type(elem); + } + break; + case LLVMArrayTypeKind: + { + LLVMTypeRef elem = LLVMGetElementType(t); + unsigned len = LLVMGetArrayLength(t); + return len * lp_sizeof_llvm_type(elem); + } + break; + default: + assert(0 && "Unexpected type in lp_get_llvm_type_size()"); + return 0; + } +} + + +/** * Return string name for a LLVMTypeKind. Useful for debugging. */ const char * diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h index b3f9e9175d3..17819d4d32a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h @@ -316,6 +316,10 @@ struct lp_type lp_wider_type(struct lp_type type); +unsigned +lp_sizeof_llvm_type(LLVMTypeRef t); + + const char * lp_typekind_name(LLVMTypeKind t); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 1218242653f..c15d970b573 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -3136,7 +3136,7 @@ exec_instruction( break; case TGSI_OPCODE_DIV: - assert( 0 ); + exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DP2: diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index ad553c71a57..3521847b619 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -1366,4 +1366,12 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, return ok; } +#else + +void ppc_dummy_func(void); + +void ppc_dummy_func(void) +{ +} + #endif /* PIPE_ARCH_PPC */ diff --git a/src/gallium/auxiliary/util/u_debug_symbol.c b/src/gallium/auxiliary/util/u_debug_symbol.c index 417d0cf04c9..6e250575d66 100644 --- a/src/gallium/auxiliary/util/u_debug_symbol.c +++ b/src/gallium/auxiliary/util/u_debug_symbol.c @@ -67,21 +67,6 @@ BOOL WINAPI j_SymInitialize(HANDLE hProcess, PSTR UserSearchPath, BOOL fInvadePr return FALSE; } -typedef BOOL (WINAPI *PFNSYMCLEANUP)(HANDLE); -static PFNSYMCLEANUP pfnSymCleanup = NULL; - -static -BOOL WINAPI j_SymCleanup(HANDLE hProcess) -{ - if( - (hModule_Imagehlp || (hModule_Imagehlp = LoadLibraryA("IMAGEHLP.DLL"))) && - (pfnSymCleanup || (pfnSymCleanup = (PFNSYMCLEANUP) GetProcAddress(hModule_Imagehlp, "SymCleanup"))) - ) - return pfnSymCleanup(hProcess); - else - return FALSE; -} - typedef DWORD (WINAPI *PFNSYMSETOPTIONS)(DWORD); static PFNSYMSETOPTIONS pfnSymSetOptions = NULL; @@ -97,36 +82,6 @@ DWORD WINAPI j_SymSetOptions(DWORD SymOptions) return FALSE; } -typedef BOOL (WINAPI *PFNSYMUNDNAME)(PIMAGEHLP_SYMBOL, PSTR, DWORD); -static PFNSYMUNDNAME pfnSymUnDName = NULL; - -static -BOOL WINAPI j_SymUnDName(PIMAGEHLP_SYMBOL Symbol, PSTR UnDecName, DWORD UnDecNameLength) -{ - if( - (hModule_Imagehlp || (hModule_Imagehlp = LoadLibraryA("IMAGEHLP.DLL"))) && - (pfnSymUnDName || (pfnSymUnDName = (PFNSYMUNDNAME) GetProcAddress(hModule_Imagehlp, "SymUnDName"))) - ) - return pfnSymUnDName(Symbol, UnDecName, UnDecNameLength); - else - return FALSE; -} - -typedef PFUNCTION_TABLE_ACCESS_ROUTINE PFNSYMFUNCTIONTABLEACCESS; -static PFNSYMFUNCTIONTABLEACCESS pfnSymFunctionTableAccess = NULL; - -static -PVOID WINAPI j_SymFunctionTableAccess(HANDLE hProcess, DWORD AddrBase) -{ - if( - (hModule_Imagehlp || (hModule_Imagehlp = LoadLibraryA("IMAGEHLP.DLL"))) && - (pfnSymFunctionTableAccess || (pfnSymFunctionTableAccess = (PFNSYMFUNCTIONTABLEACCESS) GetProcAddress(hModule_Imagehlp, "SymFunctionTableAccess"))) - ) - return pfnSymFunctionTableAccess(hProcess, AddrBase); - else - return NULL; -} - typedef PGET_MODULE_BASE_ROUTINE PFNSYMGETMODULEBASE; static PFNSYMGETMODULEBASE pfnSymGetModuleBase = NULL; @@ -142,41 +97,6 @@ DWORD WINAPI j_SymGetModuleBase(HANDLE hProcess, DWORD dwAddr) return 0; } -typedef BOOL (WINAPI *PFNSTACKWALK)(DWORD, HANDLE, HANDLE, LPSTACKFRAME, LPVOID, PREAD_PROCESS_MEMORY_ROUTINE, PFUNCTION_TABLE_ACCESS_ROUTINE, PGET_MODULE_BASE_ROUTINE, PTRANSLATE_ADDRESS_ROUTINE); -static PFNSTACKWALK pfnStackWalk = NULL; - -static -BOOL WINAPI j_StackWalk( - DWORD MachineType, - HANDLE hProcess, - HANDLE hThread, - LPSTACKFRAME StackFrame, - PVOID ContextRecord, - PREAD_PROCESS_MEMORY_ROUTINE ReadMemoryRoutine, - PFUNCTION_TABLE_ACCESS_ROUTINE FunctionTableAccessRoutine, - PGET_MODULE_BASE_ROUTINE GetModuleBaseRoutine, - PTRANSLATE_ADDRESS_ROUTINE TranslateAddress -) -{ - if( - (hModule_Imagehlp || (hModule_Imagehlp = LoadLibraryA("IMAGEHLP.DLL"))) && - (pfnStackWalk || (pfnStackWalk = (PFNSTACKWALK) GetProcAddress(hModule_Imagehlp, "StackWalk"))) - ) - return pfnStackWalk( - MachineType, - hProcess, - hThread, - StackFrame, - ContextRecord, - ReadMemoryRoutine, - FunctionTableAccessRoutine, - GetModuleBaseRoutine, - TranslateAddress - ); - else - return FALSE; -} - typedef BOOL (WINAPI *PFNSYMGETSYMFROMADDR)(HANDLE, DWORD, LPDWORD, PIMAGEHLP_SYMBOL); static PFNSYMGETSYMFROMADDR pfnSymGetSymFromAddr = NULL; @@ -192,21 +112,6 @@ BOOL WINAPI j_SymGetSymFromAddr(HANDLE hProcess, DWORD Address, PDWORD Displacem return FALSE; } -typedef BOOL (WINAPI *PFNSYMGETLINEFROMADDR)(HANDLE, DWORD, LPDWORD, PIMAGEHLP_LINE); -static PFNSYMGETLINEFROMADDR pfnSymGetLineFromAddr = NULL; - -static -BOOL WINAPI j_SymGetLineFromAddr(HANDLE hProcess, DWORD dwAddr, PDWORD pdwDisplacement, PIMAGEHLP_LINE Line) -{ - if( - (hModule_Imagehlp || (hModule_Imagehlp = LoadLibraryA("IMAGEHLP.DLL"))) && - (pfnSymGetLineFromAddr || (pfnSymGetLineFromAddr = (PFNSYMGETLINEFROMADDR) GetProcAddress(hModule_Imagehlp, "SymGetLineFromAddr"))) - ) - return pfnSymGetLineFromAddr(hProcess, dwAddr, pdwDisplacement, Line); - else - return FALSE; -} - static INLINE boolean debug_symbol_print_imagehlp(const void *addr) diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst index c82e681a254..bce1eef9be1 100644 --- a/src/gallium/docs/source/context.rst +++ b/src/gallium/docs/source/context.rst @@ -200,9 +200,16 @@ returned). Otherwise, if the ``wait`` parameter is FALSE, the call will not block and the return value will be TRUE if the query has completed or FALSE otherwise. -A common type of query is the occlusion query which counts the number of -fragments/pixels which are written to the framebuffer (and not culled by -Z/stencil/alpha testing or shader KILL instructions). +The most common type of query is the occlusion query, +``PIPE_QUERY_OCCLUSION_COUNTER``, which counts the number of fragments which +are written to the framebuffer without being culled by +:ref:`Depth, Stencil, & Alpha` testing or shader KILL instructions. + +Another type of query, ``PIPE_QUERY_TIME_ELAPSED``, returns the amount of +time, in milliseconds, the context takes to perform operations. + +Gallium does not guarantee the availability of any query types; one must +always check the capabilities of the :ref:`Screen` first. Conditional Rendering @@ -284,11 +291,6 @@ data to be written to the resource at this point. The returned map points to the start of the mapped range according to the box region, not the beginning of the resource. -.. _transfer_flush_region: -``transfer_flush_region`` If a transfer was created with TRANFER_FLUSH_EXPLICIT, -only the region specified is guaranteed to be written to. This is relative to -the mapped range, not the beginning of the resource. - ``transfer_unmap`` remove the memory mapping for the transfer object. Any pointers into the map should be considered invalid and discarded. @@ -296,6 +298,16 @@ Any pointers into the map should be considered invalid and discarded. Basically get_transfer, transfer_map, data write, transfer_unmap, and transfer_destroy all in one. +.. _transfer_flush_region: + +transfer_flush_region +%%%%%%%%%%%%%%%%%%%%% + +If a transfer was created with ``FLUSH_EXPLICIT``, it will not automatically +be flushed on write or unmap. Flushes must be requested with +``transfer_flush_region``. Flush ranges are relative to the mapped range, not +the beginning of the resource. + .. _pipe_transfer: PIPE_TRANSFER @@ -315,5 +327,4 @@ These flags control the behavior of a transfer object. operations pending on the resource are undefined. Cannot be used with ``READ``. * ``FLUSH_EXPLICIT``: Written ranges will be notified later with - :ref:`transfer_flush_region`. Cannot be used with - ``READ``. + :ref:`transfer_flush_region`. Cannot be used with ``READ``. diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 71b7aec35a5..96257f93df9 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -1,3 +1,5 @@ +.. _screen: + Screen ====== @@ -33,6 +35,7 @@ The integer capabilities: * ``MAX_RENDER_TARGETS``: The maximum number of render targets that may be bound. * ``OCCLUSION_QUERY``: Whether occlusion queries are available. +* ``TIMER_QUERY``: Whether timer queries are available. * ``TEXTURE_SHADOW_MAP``: XXX * ``MAX_TEXTURE_2D_LEVELS``: The maximum number of mipmap levels available for a 2D texture. diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index 5af4eaa88b1..750f0aa98ab 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -76,6 +76,8 @@ cell_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; case PIPE_CAP_OCCLUSION_QUERY: return 1; + case PIPE_CAP_TIMER_QUERY: + return 0; case PIPE_CAP_TEXTURE_SHADOW_MAP: return 10; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index d196c779e43..7cf627d975b 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -113,6 +113,8 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; case PIPE_CAP_OCCLUSION_QUERY: return 0; + case PIPE_CAP_TIMER_QUERY: + return 0; case PIPE_CAP_TEXTURE_SHADOW_MAP: return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c index 003b1fd5bf0..8b3f46f2c16 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.c +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -161,7 +161,7 @@ brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, int ret; if (batch->ptr - batch->map > batch->buf->size) { - debug_printf("bad relocation ptr %p map %p offset %d size %d\n", + debug_printf("bad relocation ptr %p map %p offset %li size %i\n", batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); return PIPE_ERROR_OUT_OF_MEMORY; diff --git a/src/gallium/drivers/i965/brw_resource_texture.c b/src/gallium/drivers/i965/brw_resource_texture.c index 07537fe44ef..ca09d88fd12 100644 --- a/src/gallium/drivers/i965/brw_resource_texture.c +++ b/src/gallium/drivers/i965/brw_resource_texture.c @@ -210,7 +210,7 @@ brw_texture_get_handle(struct pipe_screen *screen, stride = tex->pitch * tex->cpp; - return bscreen->sws->bo_get_handle(tex->bo, whandle, stride); + return bscreen->sws->bo_get_handle(tex->bo, whandle, stride) == PIPE_OK; } diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index d242691f2d2..1890b640e90 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -172,6 +172,8 @@ brw_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; case PIPE_CAP_OCCLUSION_QUERY: return 0; + case PIPE_CAP_TIMER_QUERY: + return 0; case PIPE_CAP_TEXTURE_SHADOW_MAP: return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 4ea367597e1..526e85c82e1 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -59,7 +59,7 @@ lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxil python lp_tile_soa.py ../../auxiliary/util/u_format.csv > $@ LDFLAGS += $(LLVM_LDFLAGS) -LIBS += $(GL_LIB_DEPS) -L../../auxiliary/ -lgallium libllvmpipe.a $(LLVM_LIBS) +LIBS += -L../../auxiliary/ -lgallium libllvmpipe.a $(LLVM_LIBS) $(GL_LIB_DEPS) LD=g++ $(PROGS): lp_test_main.o libllvmpipe.a diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h index ee818143610..92fb2b3ee5b 100644 --- a/src/gallium/drivers/llvmpipe/lp_debug.h +++ b/src/gallium/drivers/llvmpipe/lp_debug.h @@ -39,16 +39,13 @@ st_print_current(void); #define DEBUG_PIPE 0x1 #define DEBUG_TGSI 0x2 #define DEBUG_TEX 0x4 -#define DEBUG_ASM 0x8 #define DEBUG_SETUP 0x10 #define DEBUG_RAST 0x20 #define DEBUG_QUERY 0x40 #define DEBUG_SCREEN 0x80 -#define DEBUG_JIT 0x100 #define DEBUG_SHOW_TILES 0x200 #define DEBUG_SHOW_SUBTILES 0x400 #define DEBUG_COUNTERS 0x800 -#define DEBUG_NO_LLVM_OPT 0x1000 #ifdef DEBUG diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 243aea6c3a3..23aa34ddec1 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -38,7 +38,7 @@ #include "util/u_memory.h" #include "util/u_cpu_detect.h" #include "gallivm/lp_bld_init.h" -#include "lp_debug.h" +#include "gallivm/lp_bld_debug.h" #include "lp_screen.h" #include "gallivm/lp_bld_intr.h" #include "lp_jit.h" @@ -151,8 +151,9 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) screen->context_ptr_type = LLVMPointerType(context_type, 0); } - if (LP_DEBUG & DEBUG_JIT) + if (gallivm_debug & GALLIVM_DEBUG_IR) { LLVMDumpModule(screen->module); + } } @@ -180,7 +181,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) screen->pass = LLVMCreateFunctionPassManager(screen->provider); LLVMAddTargetData(screen->target, screen->pass); - if ((LP_DEBUG & DEBUG_NO_LLVM_OPT) == 0) { + if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) { /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, * but there are more on SVN. */ /* TODO: Add more passes */ diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 9d254853cb8..22fbf381ae0 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -53,16 +53,13 @@ static const struct debug_named_value lp_debug_flags[] = { { "pipe", DEBUG_PIPE }, { "tgsi", DEBUG_TGSI }, { "tex", DEBUG_TEX }, - { "asm", DEBUG_ASM }, { "setup", DEBUG_SETUP }, { "rast", DEBUG_RAST }, { "query", DEBUG_QUERY }, { "screen", DEBUG_SCREEN }, - { "jit", DEBUG_JIT }, { "show_tiles", DEBUG_SHOW_TILES }, { "show_subtiles", DEBUG_SHOW_SUBTILES }, { "counters", DEBUG_COUNTERS }, - { "nopt", DEBUG_NO_LLVM_OPT }, {NULL, 0} }; #endif @@ -108,6 +105,8 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return PIPE_MAX_COLOR_BUFS; case PIPE_CAP_OCCLUSION_QUERY: return 1; + case PIPE_CAP_TIMER_QUERY: + return 0; case PIPE_CAP_TEXTURE_MIRROR_CLAMP: return 1; case PIPE_CAP_TEXTURE_MIRROR_REPEAT: diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index cc163ebd4fa..70ce77006a2 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -87,7 +87,6 @@ #include "lp_bld_depth.h" #include "lp_bld_interp.h" #include "lp_context.h" -#include "lp_debug.h" #include "lp_perf.h" #include "lp_screen.h" #include "lp_setup.h" @@ -862,7 +861,7 @@ generate_fragment(struct llvmpipe_context *lp, if (1) LLVMRunFunctionPassManager(screen->pass, function); - if (LP_DEBUG & DEBUG_JIT) { + if (gallivm_debug & GALLIVM_DEBUG_IR) { /* Print the LLVM IR to stderr */ lp_debug_dump_value(function); debug_printf("\n"); @@ -876,8 +875,9 @@ generate_fragment(struct llvmpipe_context *lp, variant->jit_function[do_tri_test] = cast_voidptr_to_lp_jit_frag_func(f); - if (LP_DEBUG & DEBUG_ASM) + if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(f); + } } } @@ -889,7 +889,7 @@ generate_variant(struct llvmpipe_context *lp, { struct lp_fragment_shader_variant *variant; - if (LP_DEBUG & DEBUG_JIT) { + if (gallivm_debug & GALLIVM_DEBUG_IR) { unsigned i; tgsi_dump(shader->base.tokens, 0); @@ -997,7 +997,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, /* we need to keep a local copy of the tokens */ shader->base.tokens = tgsi_dup_tokens(templ->tokens); - if (LP_DEBUG & DEBUG_TGSI) { + if (gallivm_debug & GALLIVM_DEBUG_TGSI) { debug_printf("llvmpipe: Create fragment shader %p:\n", (void *) shader); tgsi_dump(templ->tokens, 0); } diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index ff3a7b2843d..a0eed8c2e2d 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -125,6 +125,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 8; case PIPE_CAP_OCCLUSION_QUERY: return 1; + case PIPE_CAP_TIMER_QUERY: + return 0; case PIPE_CAP_TEXTURE_SHADOW_MAP: return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index 0ff25e54f73..a44f9e94d70 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -52,6 +52,8 @@ nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return screen->is_nv4x ? 4 : 2; case PIPE_CAP_OCCLUSION_QUERY: return 1; + case PIPE_CAP_TIMER_QUERY: + return 0; case PIPE_CAP_TEXTURE_SHADOW_MAP: return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: @@ -84,6 +86,44 @@ nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: return 0; + case PIPE_CAP_MAX_FS_INSTRUCTIONS: + case PIPE_CAP_MAX_FS_ALU_INSTRUCTIONS: + case PIPE_CAP_MAX_FS_TEX_INSTRUCTIONS: + case PIPE_CAP_MAX_FS_TEX_INDIRECTIONS: + return 4096; + case PIPE_CAP_MAX_FS_CONTROL_FLOW_DEPTH: + /* FIXME: is it the dynamic (nv30:0/nv40:24) or the static + value (written there) ? */ + return screen->is_nv4x ? 4 : 0; + /*case PIPE_CAP_MAX_FS_INPUTS:*/ /* FIXME */ + /*case PIPE_CAP_MAX_FS_CONSTS:*/ /* FIXME */ + /* return 0;*/ + case PIPE_CAP_MAX_FS_TEMPS: + return 32; + case PIPE_CAP_MAX_FS_ADDRS: + return screen->is_nv4x ? 1 : 0; + /*case PIPE_CAP_MAX_FS_PREDS:*/ /* FIXME */ + /* return 0;*/ + case PIPE_CAP_MAX_VS_INSTRUCTIONS: + case PIPE_CAP_MAX_VS_ALU_INSTRUCTIONS: + return screen->is_nv4x ? 512 : 256; + case PIPE_CAP_MAX_VS_TEX_INSTRUCTIONS: + case PIPE_CAP_MAX_VS_TEX_INDIRECTIONS: + return screen->is_nv4x ? 512 : 0; + case PIPE_CAP_MAX_VS_CONTROL_FLOW_DEPTH: + /* FIXME: is it the dynamic (nv30/nv40:24) or the static + value (written there) ? */ + return screen->is_nv4x ? 4 : 1; + /*case PIPE_CAP_MAX_VS_INPUTS:*/ /* FIXME */ + /* return 0;*/ + case PIPE_CAP_MAX_VS_CONSTS: + return 256; + case PIPE_CAP_MAX_VS_TEMPS: + return screen->is_nv4x ? 48 : 16; + case PIPE_CAP_MAX_VS_ADDRS: + return 2; + /*case PIPE_CAP_MAX_VS_PREDS:*/ /* FIXME */ + /* return 0;*/ default: NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); return 0; diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 0444fdac7d5..9837deaa5e3 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -80,6 +80,9 @@ static void r300_destroy_context(struct pipe_context* context) FREE(r300->ztop_state.state); FREE(r300->fs_constants.state); FREE(r300->vs_constants.state); + if (!r300->screen->caps.has_tcl) { + FREE(r300->vertex_stream_state.state); + } FREE(r300); } @@ -151,6 +154,16 @@ static void r300_setup_atoms(struct r300_context* r300) r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); r300->fs_constants.state = CALLOC_STRUCT(r300_constant_buffer); r300->vs_constants.state = CALLOC_STRUCT(r300_constant_buffer); + if (!r300->screen->caps.has_tcl) { + r300->vertex_stream_state.state = CALLOC_STRUCT(r300_vertex_stream_state); + } + + /* Some non-CSO atoms don't use the state pointer. */ + r300->invariant_state.allow_null_state = TRUE; + r300->fs_rc_constant_state.allow_null_state = TRUE; + r300->pvs_flush.allow_null_state = TRUE; + r300->query_start.allow_null_state = TRUE; + r300->texture_cache_inval.allow_null_state = TRUE; } struct pipe_context* r300_create_context(struct pipe_screen* screen, @@ -201,6 +214,9 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300)); /* Enable Draw's clipping. */ draw_set_driver_clipping(r300->draw, FALSE); + /* Disable converting points/lines to triangles. */ + draw_wide_line_threshold(r300->draw, 10000000.f); + draw_wide_point_threshold(r300->draw, 10000000.f); } r300_setup_atoms(r300); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 5ad448978b9..e44906d0099 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -55,6 +55,8 @@ struct r300_atom { unsigned size; /* Whether this atom should be emitted. */ boolean dirty; + /* Whether this atom may be emitted with state == NULL. */ + boolean allow_null_state; }; struct r300_blend_state { @@ -88,8 +90,10 @@ struct r300_dsa_state { }; struct r300_rs_state { - /* Draw-specific rasterizer state */ + /* Original rasterizer state. */ struct pipe_rasterizer_state rs; + /* Draw-specific rasterizer state. */ + struct pipe_rasterizer_state rs_draw; uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ uint32_t antialiasing_config; /* R300_GB_AA_CONFIG: 0x4020 */ @@ -235,10 +239,6 @@ struct r300_constant_buffer { struct r300_query { /* The kind of query. Currently only OQ is supported. */ unsigned type; - /* Whether this query is currently active. Only active queries will - * get emitted into the command stream, and only active queries get - * tallied. */ - boolean active; /* The current count of this query. Required to be at least 32 bits. */ unsigned int count; /* The offset of this query into the query buffer, in bytes. */ @@ -304,16 +304,6 @@ struct r300_texture { enum r300_buffer_tiling microtile, macrotile; }; -struct r300_vertex_info { - /* Parent class */ - struct vertex_info vinfo; - - /* R300_VAP_PROG_STREAK_CNTL_[0-7] */ - uint32_t vap_prog_stream_cntl[8]; - /* R300_VAP_PROG_STREAK_CNTL_EXT_[0-7] */ - uint32_t vap_prog_stream_cntl_ext[8]; -}; - struct r300_vertex_element_state { unsigned count; struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 8eb321fa08a..85a1aa7b06e 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -40,6 +40,7 @@ static struct debug_option debug_options[] = { { "texalloc", DBG_TEXALLOC, "Texture allocation (for debugging)" }, { "fall", DBG_FALL, "Fallbacks (for debugging)" }, { "rs", DBG_RS, "Rasterizer (for debugging)" }, + { "fb", DBG_FB, "Framebuffer (for debugging)" }, { "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking)" }, { "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" }, { "noimmd", DBG_NO_IMMD, "Disable immediate mode (for benchmarking)" }, @@ -97,3 +98,84 @@ void r300_init_debug(struct r300_screen * screen) } } } + +void r500_dump_rs_block(struct r300_rs_block *rs) +{ + unsigned count, ip, it_count, ic_count, i, j; + unsigned tex_ptr; + unsigned col_ptr, col_fmt; + + count = rs->inst_count & 0xf; + count++; + + it_count = rs->count & 0x7f; + ic_count = (rs->count >> 7) & 0xf; + + fprintf(stderr, "RS Block: %d texcoords (linear), %d colors (perspective)\n", + it_count, ic_count); + fprintf(stderr, "%d instructions\n", count); + + for (i = 0; i < count; i++) { + if (rs->inst[i] & 0x10) { + ip = rs->inst[i] & 0xf; + fprintf(stderr, "texture: ip %d to psf %d\n", + ip, (rs->inst[i] >> 5) & 0x7f); + + tex_ptr = rs->ip[ip] & 0xffffff; + fprintf(stderr, " : "); + + j = 3; + do { + if (tex_ptr & 0x3f == 63) { + fprintf(stderr, "1.0"); + } else if (tex_ptr & 0x3f == 62) { + fprintf(stderr, "0.0"); + } else { + fprintf(stderr, "[%d]", tex_ptr & 0x3f); + } + } while (j-- && fprintf(stderr, "/")); + fprintf(stderr, "\n"); + } + + if (rs->inst[i] & 0x10000) { + ip = (rs->inst[i] >> 12) & 0xf; + fprintf(stderr, "color: ip %d to psf %d\n", + ip, (rs->inst[i] >> 18) & 0x7f); + + col_ptr = (rs->ip[ip] >> 24) & 0x7; + col_fmt = (rs->ip[ip] >> 27) & 0xf; + fprintf(stderr, " : offset %d ", col_ptr); + + switch (col_fmt) { + case 0: + fprintf(stderr, "(R/G/B/A)"); + break; + case 1: + fprintf(stderr, "(R/G/B/0)"); + break; + case 2: + fprintf(stderr, "(R/G/B/1)"); + break; + case 4: + fprintf(stderr, "(0/0/0/A)"); + break; + case 5: + fprintf(stderr, "(0/0/0/0)"); + break; + case 6: + fprintf(stderr, "(0/0/0/1)"); + break; + case 8: + fprintf(stderr, "(1/1/1/A)"); + break; + case 9: + fprintf(stderr, "(1/1/1/0)"); + break; + case 10: + fprintf(stderr, "(1/1/1/1)"); + break; + } + fprintf(stderr, "\n"); + } + } +} diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 568109cf960..7f7f2929cc3 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -548,8 +548,8 @@ void r300_emit_query_start(struct r300_context *r300, unsigned size, void*state) } -static void r300_emit_query_finish(struct r300_context *r300, - struct r300_query *query) +static void r300_emit_query_end_frag_pipes(struct r300_context *r300, + struct r300_query *query) { struct r300_capabilities* caps = &r300->screen->caps; CS_LOCALS(r300); @@ -604,8 +604,8 @@ static void r300_emit_query_finish(struct r300_context *r300, END_CS; } -static void rv530_emit_query_single(struct r300_context *r300, - struct r300_query *query) +static void rv530_emit_query_end_single_z(struct r300_context *r300, + struct r300_query *query) { CS_LOCALS(r300); @@ -617,8 +617,8 @@ static void rv530_emit_query_single(struct r300_context *r300, END_CS; } -static void rv530_emit_query_double(struct r300_context *r300, - struct r300_query *query) +static void rv530_emit_query_end_double_z(struct r300_context *r300, + struct r300_query *query) { CS_LOCALS(r300); @@ -646,11 +646,13 @@ void r300_emit_query_end(struct r300_context* r300) if (caps->family == CHIP_FAMILY_RV530) { if (caps->num_z_pipes == 2) - rv530_emit_query_double(r300, query); + rv530_emit_query_end_double_z(r300, query); else - rv530_emit_query_single(r300, query); + rv530_emit_query_end_single_z(r300, query); } else - r300_emit_query_finish(r300, query); + r300_emit_query_end_frag_pipes(r300, query); + + query->begin_emitted = FALSE; } void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state) @@ -714,6 +716,10 @@ void r300_emit_rs_block_state(struct r300_context* r300, unsigned count = (rs->inst_count & R300_RS_INST_COUNT_MASK) + 1; CS_LOCALS(r300); + if (SCREEN_DBG_ON(r300->screen, DBG_DRAW)) { + r500_dump_rs_block(rs); + } + DBG(r300, DBG_DRAW, "r300: RS emit:\n"); BEGIN_CS(size); @@ -1094,7 +1100,8 @@ validate: } } /* ...occlusion query buffer... */ - if (r300->query_start.dirty) { + if (r300->query_start.dirty || + (r300->query_current && r300->query_current->begin_emitted)) { if (!r300_add_buffer(r300->rws, r300->oqbo, 0, RADEON_GEM_DOMAIN_GTT)) { r300->context.flush(&r300->context, 0, NULL); diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index e78c6a3624f..d6876c1903f 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -1,5 +1,6 @@ /* * Copyright 2008 Corbin Simpson <[email protected]> + * Copyright 2010 Marek Olšák <[email protected]> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -37,6 +38,8 @@ static void r300_flush(struct pipe_context* pipe, struct r300_context *r300 = r300_context(pipe); struct r300_query *query; struct r300_atom *atom; + struct pipe_framebuffer_state *fb; + unsigned i; CS_LOCALS(r300); (void) cs_count; @@ -48,15 +51,15 @@ static void r300_flush(struct pipe_context* pipe, draw_flush(r300->draw); } - r300_emit_query_end(r300); - if (r300->dirty_hw) { + r300_emit_query_end(r300); + FLUSH_CS; r300->dirty_hw = 0; /* New kitchen sink, baby. */ foreach(atom, &r300->atom_list) { - if (atom->state) { + if (atom->state || atom->allow_null_state) { atom->dirty = TRUE; } } @@ -72,6 +75,39 @@ static void r300_flush(struct pipe_context* pipe, foreach(query, &r300->query_list) { query->flushed = TRUE; } + + /* XXX + * + * This is a preliminary implementation of glFinish. Note that st/mesa + * uses a non-null fence when glFinish is called and then waits for + * the fence. Instead of returning the actual fence, we do the sync + * directly. + * + * The ideal implementation should use something like EmitIrqLocked and + * WaitIrq, or better, real fences. + * + * This feature degrades performance to the level of r300c for games that + * use glFinish a lot, even openarena does. Ideally we wouldn't need + * glFinish at all if we had proper throttling in swapbuffers so that + * the CPU wouldn't outrun the GPU by several frames, so this is basically + * a temporary fix for the input lag. Once swap&sync works with DRI2, + * I'll be happy to remove this code. + * + * - M. */ + if (fence && r300->fb_state.state) { + fb = r300->fb_state.state; + + for (i = 0; i < fb->nr_cbufs; i++) { + if (fb->cbufs[i]->texture) { + r300->rws->buffer_wait(r300->rws, + r300_texture(fb->cbufs[i]->texture)->buffer); + } + if (fb->zsbuf) { + r300->rws->buffer_wait(r300->rws, + r300_texture(fb->zsbuf->texture)->buffer); + } + } + } } void r300_init_flush_functions(struct r300_context* r300) diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 5c27796e894..6acbac22196 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -43,8 +43,6 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, q->type = query_type; assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); - q->active = FALSE; - if (r300screen->caps.family == CHIP_FAMILY_RV530) query_size = r300screen->caps.num_z_pipes * sizeof(uint32_t); else @@ -59,6 +57,7 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, /* XXX */ if (q->offset >= 4096) { q->offset = 0; + fprintf(stderr, "r300: Rewinding OQBO...\n"); } return (struct pipe_query*)q; @@ -80,7 +79,12 @@ static void r300_begin_query(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); struct r300_query* q = (struct r300_query*)query; - assert(r300->query_current == NULL); + if (r300->query_current != NULL) { + fprintf(stderr, "r300: begin_query: " + "Some other query has already been started.\n"); + assert(0); + return; + } pipe_buffer_write(pipe, r300->oqbo, @@ -97,10 +101,14 @@ static void r300_end_query(struct pipe_context* pipe, struct pipe_query* query) { struct r300_context* r300 = r300_context(pipe); - struct r300_query* q = (struct r300_query*)query; + + if ((struct r300_query*)query != r300->query_current) { + fprintf(stderr, "r300: end_query: Got invalid query.\n"); + assert(0); + return; + } r300_emit_query_end(r300); - q->begin_emitted = false; r300->query_current = NULL; } diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 8795410efde..e1f61982be2 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -735,6 +735,8 @@ void r300_swtcl_draw_arrays(struct pipe_context* pipe, return; } + r300_update_derived_state(r300); + for (i = 0; i < r300->vertex_buffer_count; i++) { void* buf = pipe_buffer_map(pipe, r300->vertex_buffer[i].buffer, @@ -747,6 +749,10 @@ void r300_swtcl_draw_arrays(struct pipe_context* pipe, draw_arrays(r300->draw, mode, start, count); + /* XXX Not sure whether this is the best fix. + * It prevents CS from being rejected and weird assertion failures. */ + draw_flush(r300->draw); + for (i = 0; i < r300->vertex_buffer_count; i++) { pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, vb_transfer[i]); @@ -779,6 +785,8 @@ void r300_swtcl_draw_range_elements(struct pipe_context* pipe, return; } + r300_update_derived_state(r300); + for (i = 0; i < r300->vertex_buffer_count; i++) { void* buf = pipe_buffer_map(pipe, r300->vertex_buffer[i].buffer, @@ -794,6 +802,10 @@ void r300_swtcl_draw_range_elements(struct pipe_context* pipe, draw_arrays(r300->draw, mode, start, count); + /* XXX Not sure whether this is the best fix. + * It prevents CS from being rejected and weird assertion failures. */ + draw_flush(r300->draw); + for (i = 0; i < r300->vertex_buffer_count; i++) { pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, vb_transfer[i]); @@ -827,7 +839,7 @@ struct r300_render { size_t vbo_max_used; void * vbo_ptr; - struct pipe_transfer *vbo_transfer; + struct pipe_transfer *vbo_transfer; }; static INLINE struct r300_render* @@ -842,8 +854,6 @@ r300_render_get_vertex_info(struct vbuf_render* render) struct r300_render* r300render = r300_render(render); struct r300_context* r300 = r300render->r300; - r300_update_derived_state(r300); - return &r300->vertex_info; } @@ -891,10 +901,6 @@ static void r300_render_unmap_vertices(struct vbuf_render* render, { struct r300_render* r300render = r300_render(render); struct pipe_context* context = &r300render->r300->context; - CS_LOCALS(r300render->r300); - BEGIN_CS(2); - OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, max); - END_CS; r300render->vbo_max_used = MAX2(r300render->vbo_max_used, r300render->vertex_size * (max + 1)); @@ -928,10 +934,13 @@ static void r500_render_draw_arrays(struct vbuf_render* render, struct r300_context* r300 = r300render->r300; uint8_t* ptr; unsigned i; + unsigned dwords = 6; CS_LOCALS(r300); - r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, 2, 0, 0); + (void) i; (void) ptr; + + r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0); DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count); @@ -952,7 +961,10 @@ static void r500_render_draw_arrays(struct vbuf_render* render, r300render->vbo_transfer); */ - BEGIN_CS(2); + BEGIN_CS(dwords); + OUT_CS_REG(R300_GA_COLOR_CONTROL, + r300_provoking_vertex_fixes(r300, r300render->prim)); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | r300render->hwprim); @@ -966,13 +978,18 @@ static void r500_render_draw_elements(struct vbuf_render* render, struct r300_render* r300render = r300_render(render); struct r300_context* r300 = r300render->r300; int i; - unsigned dwords = 2 + (count+1)/2; + unsigned dwords = 6 + (count+1)/2; + unsigned max_index = (r300render->vbo_size - r300render->vbo_offset) / + (r300render->r300->vertex_info.size * 4) - 1; CS_LOCALS(r300); r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0); BEGIN_CS(dwords); + OUT_CS_REG(R300_GA_COLOR_CONTROL, + r300_provoking_vertex_fixes(r300, r300render->prim)); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, max_index); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300render->hwprim); diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 8399f5df8e4..640b3d34688 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -115,6 +115,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return 1; /* Unsupported features (boolean caps). */ + case PIPE_CAP_TIMER_QUERY: case PIPE_CAP_DUAL_SOURCE_BLEND: case PIPE_CAP_TGSI_CONT_SUPPORTED: case PIPE_CAP_INDEP_BLEND_ENABLE: diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index d58aa138a70..29492024fe3 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -61,19 +61,23 @@ static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) { * those changes. */ /*@{*/ -#define DBG_HELP 0x0000001 -#define DBG_FP 0x0000002 -#define DBG_VP 0x0000004 -#define DBG_CS 0x0000008 -#define DBG_DRAW 0x0000010 -#define DBG_TEX 0x0000020 -#define DBG_FALL 0x0000040 -#define DBG_ANISOHQ 0x0000080 -#define DBG_NO_TILING 0x0000100 -#define DBG_NO_IMMD 0x0000200 -#define DBG_STATS 0x0000400 -#define DBG_RS 0x0000800 -#define DBG_TEXALLOC 0x0001000 +#define DBG_HELP (1 << 0) +/* Logging. */ +#define DBG_FP (1 << 1) +#define DBG_VP (1 << 2) +#define DBG_CS (1 << 3) +#define DBG_DRAW (1 << 4) +#define DBG_TEX (1 << 5) +#define DBG_TEXALLOC (1 << 6) +#define DBG_RS (1 << 7) +#define DBG_FALL (1 << 8) +#define DBG_FB (1 << 9) +/* Features. */ +#define DBG_ANISOHQ (1 << 16) +#define DBG_NO_TILING (1 << 17) +#define DBG_NO_IMMD (1 << 18) +/* Statistics. */ +#define DBG_STATS (1 << 24) /*@}*/ static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags) diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index e7fc872662c..ac4e87abe09 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -566,13 +566,35 @@ static void r300_fb_set_tiling_flags(struct r300_context *r300, } } +static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index, + const char *binding) +{ + struct pipe_resource *tex = surf->texture; + struct r300_texture *rtex = r300_texture(tex); + + fprintf(stderr, + "r300: %s[%i] Dim: %ix%i, Offset: %i, ZSlice: %i, " + "Face: %i, Level: %i, Format: %s\n" + + "r300: TEX: Macro: %s, Micro: %s, Pitch: %i, " + "Dim: %ix%ix%i, LastLevel: %i, Format: %s\n", + + binding, index, surf->width, surf->height, surf->offset, + surf->zslice, surf->face, surf->level, + util_format_short_name(surf->format), + + rtex->macrotile ? "YES" : " NO", rtex->microtile ? "YES" : " NO", + rtex->hwpitch[0], tex->width0, tex->height0, tex->depth0, + tex->last_level, util_format_short_name(tex->format)); +} + static void r300_set_framebuffer_state(struct pipe_context* pipe, const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); struct pipe_framebuffer_state *old_state = r300->fb_state.state; - unsigned max_width, max_height; + unsigned max_width, max_height, i; uint32_t zbuffer_bpp = 0; if (state->nr_cbufs > 4) { @@ -634,6 +656,16 @@ static void r300->rs_state.dirty = TRUE; } } + + if (DBG_ON(r300, DBG_FB)) { + fprintf(stderr, "r300: set_framebuffer_state:\n"); + for (i = 0; i < state->nr_cbufs; i++) { + r300_print_fb_surf_info(state->cbufs[i], i, "CB"); + } + if (state->zsbuf) { + r300_print_fb_surf_info(state->zsbuf, 0, "ZB"); + } + } } /* Create fragment shader state. */ @@ -724,8 +756,12 @@ static void* r300_create_rs_state(struct pipe_context* pipe, int i; float psiz; - /* Copy rasterizer state for Draw. */ + /* Copy rasterizer state. */ rs->rs = *state; + rs->rs_draw = *state; + + /* Override some states for Draw. */ + rs->rs_draw.sprite_coord_enable = 0; /* We can do this in HW. */ #ifdef PIPE_ARCH_LITTLE_ENDIAN rs->vap_control_status = R300_VC_NO_SWAP; @@ -872,9 +908,9 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) int last_sprite_coord_enable = r300->sprite_coord_enable; boolean last_two_sided_color = r300->two_sided_color; - if (r300->draw) { + if (r300->draw && rs) { draw_flush(r300->draw); - draw_set_rasterizer_state(r300->draw, &rs->rs, state); + draw_set_rasterizer_state(r300->draw, &rs->rs_draw, state); } if (rs) { @@ -1214,7 +1250,7 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, } } -/* Update the PSC tables. */ +/* Initialize the PSC tables. */ static void r300_vertex_psc(struct r300_vertex_element_state *velems) { struct r300_vertex_stream_state *vstream = &velems->vertex_stream; @@ -1353,7 +1389,6 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe, abort(); } } - } } return velems; @@ -1374,6 +1409,7 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe, if (r300->draw) { draw_flush(r300->draw); draw_set_vertex_elements(r300->draw, velems->count, velems->velem); + return; } UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state); @@ -1396,8 +1432,10 @@ static void* r300_create_vs_state(struct pipe_context* pipe, vs->state = *shader; vs->state.tokens = tgsi_dup_tokens(shader->tokens); + r300_init_vs_outputs(vs); + if (r300->screen->caps.has_tcl) { - r300_translate_vertex_shader(r300, vs, vs->state.tokens); + r300_translate_vertex_shader(r300, vs); } else { vs->draw_vs = draw_create_vertex_shader(r300->draw, shader); } @@ -1467,7 +1505,7 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, struct r300_constant_buffer *cbuf; struct pipe_transfer *tr; void *mapped; - int max_size = 0; + int max_size = 0, max_size_bytes = 0, clamped_size = 0; switch (shader) { case PIPE_SHADER_VERTEX: @@ -1486,6 +1524,7 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, assert(0); return; } + max_size_bytes = max_size * 4 * sizeof(float); if (buf == NULL || buf->width0 == 0 || (mapped = pipe_buffer_map(pipe, buf, PIPE_TRANSFER_READ, &tr)) == NULL) @@ -1494,19 +1533,21 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, return; } - assert((buf->width0 % 4 * sizeof(float)) == 0); + if (shader == PIPE_SHADER_FRAGMENT || + (shader == PIPE_SHADER_VERTEX && r300->screen->caps.has_tcl)) { + assert((buf->width0 % (4 * sizeof(float))) == 0); - /* Check the size of the constant buffer. */ - /* XXX Subtract immediates and RC_STATE_* variables. */ - if (buf->width0 > (sizeof(float) * 4 * max_size)) { - fprintf(stderr, "r300: Max size of the constant buffer is " - "%i*4 floats.\n", max_size); - abort(); - } + /* Check the size of the constant buffer. */ + /* XXX Subtract immediates and RC_STATE_* variables. */ + if (buf->width0 > max_size_bytes) { + fprintf(stderr, "r300: Max size of the constant buffer is " + "%i*4 floats.\n", max_size); + } + clamped_size = MIN2(buf->width0, max_size_bytes); - memcpy(cbuf->constants, mapped, buf->width0); - cbuf->count = buf->width0 / (4 * sizeof(float)); - pipe_buffer_unmap(pipe, buf, tr); + memcpy(cbuf->constants, mapped, clamped_size); + cbuf->count = clamped_size / (4 * sizeof(float)); + } if (shader == PIPE_SHADER_VERTEX) { if (r300->screen->caps.has_tcl) { @@ -1516,12 +1557,13 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, r300->pvs_flush.dirty = TRUE; } else if (r300->draw) { draw_set_mapped_constant_buffer(r300->draw, PIPE_SHADER_VERTEX, - 0, cbuf->constants, - buf->width0); + 0, mapped, buf->width0); } } else if (shader == PIPE_SHADER_FRAGMENT) { r300->fs_constants.dirty = TRUE; } + + pipe_buffer_unmap(pipe, buf, tr); } void r300_init_state_functions(struct r300_context* r300) diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index c7388998270..7583862a1a4 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -116,13 +116,12 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300) static void r300_swtcl_vertex_psc(struct r300_context *r300) { struct r300_vertex_stream_state *vstream = r300->vertex_stream_state.state; - struct vertex_info* vinfo = &r300->vertex_info; + struct vertex_info *vinfo = &r300->vertex_info; uint16_t type, swizzle; enum pipe_format format; unsigned i, attrib_count; int* vs_output_tab = r300->stream_loc_notcl; - /* XXX hax */ memset(vstream, 0, sizeof(struct r300_vertex_stream_state)); /* For each Draw attribute, route it to the fragment shader according @@ -615,13 +614,13 @@ void r300_update_derived_state(struct r300_context* r300) if (r300->rs_block_state.dirty) { r300_update_rs_block(r300); - } - if (r300->draw) { - memset(&r300->vertex_info, 0, sizeof(struct vertex_info)); - r300_draw_emit_all_attribs(r300); - draw_compute_vertex_size(&r300->vertex_info); - r300_swtcl_vertex_psc(r300); + if (r300->draw) { + memset(&r300->vertex_info, 0, sizeof(struct vertex_info)); + r300_draw_emit_all_attribs(r300); + draw_compute_vertex_size(&r300->vertex_info); + r300_swtcl_vertex_psc(r300); + } } r300_update_hyperz_state(r300); diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index f3186431e1d..59f89b3482a 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -181,21 +181,23 @@ static void r300_dummy_vertex_shader( state.tokens = ureg_finalize(ureg); shader->dummy = TRUE; - r300_translate_vertex_shader(r300, shader, state.tokens); + r300_translate_vertex_shader(r300, shader); ureg_destroy(ureg); } -void r300_translate_vertex_shader(struct r300_context* r300, - struct r300_vertex_shader* vs, - const struct tgsi_token *tokens) +void r300_init_vs_outputs(struct r300_vertex_shader *vs) +{ + tgsi_scan_shader(vs->state.tokens, &vs->info); + r300_shader_read_vs_outputs(&vs->info, &vs->outputs); +} + +void r300_translate_vertex_shader(struct r300_context *r300, + struct r300_vertex_shader *vs) { struct r300_vertex_program_compiler compiler; struct tgsi_to_rc ttr; - tgsi_scan_shader(tokens, &vs->info); - r300_shader_read_vs_outputs(&vs->info, &vs->outputs); - /* Setup the compiler */ rc_init(&compiler.Base); @@ -205,7 +207,7 @@ void r300_translate_vertex_shader(struct r300_context* r300, if (compiler.Base.Debug) { debug_printf("r300: Initial vertex program\n"); - tgsi_dump(tokens, 0); + tgsi_dump(vs->state.tokens, 0); } /* Translate TGSI to our internal representation */ @@ -213,7 +215,7 @@ void r300_translate_vertex_shader(struct r300_context* r300, ttr.info = &vs->info; ttr.use_half_swizzles = FALSE; - r300_tgsi_to_rc(&ttr, tokens); + r300_tgsi_to_rc(&ttr, vs->state.tokens); compiler.RequiredOutputs = ~(~0 << (vs->info.num_outputs + 1)); compiler.SetHwInputOutput = &set_vertex_inputs_outputs; diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 57b3fbca0bb..31890d78caf 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -56,8 +56,8 @@ struct r300_vertex_shader { void *draw_vs; }; -void r300_translate_vertex_shader(struct r300_context* r300, - struct r300_vertex_shader* vs, - const struct tgsi_token *tokens); +void r300_init_vs_outputs(struct r300_vertex_shader *vs); +void r300_translate_vertex_shader(struct r300_context *r300, + struct r300_vertex_shader *vs); #endif /* R300_VS_H */ diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 1642981eaa8..3d0413f90af 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -87,13 +87,8 @@ struct r300_winsys_screen { struct r300_winsys_buffer **pdst, struct r300_winsys_buffer *src); - boolean (*buffer_references)(struct r300_winsys_buffer *a, - struct r300_winsys_buffer *b); - - void (*buffer_flush_range)(struct r300_winsys_screen *rws, - struct r300_winsys_buffer *buf, - unsigned offset, - unsigned length); + void (*buffer_wait)(struct r300_winsys_screen *rws, + struct r300_winsys_buffer *buf); /* Add a pipe_resource to the list of buffer objects to validate. */ boolean (*add_buffer)(struct r300_winsys_screen *winsys, diff --git a/src/gallium/drivers/rbug/rbug_screen.c b/src/gallium/drivers/rbug/rbug_screen.c index 7d7b9247c35..2b60af2302a 100644 --- a/src/gallium/drivers/rbug/rbug_screen.c +++ b/src/gallium/drivers/rbug/rbug_screen.c @@ -37,7 +37,7 @@ #include "rbug_context.h" #include "rbug_objects.h" -DEBUG_GET_ONCE_BOOL_OPTION(rbug, "GALLIUM_RBUG", FALSE); +DEBUG_GET_ONCE_BOOL_OPTION(rbug, "GALLIUM_RBUG", FALSE) static void rbug_screen_destroy(struct pipe_screen *_screen) diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index 4ef5d9f7b1d..b959af63aff 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -30,6 +30,7 @@ */ #include "draw/draw_context.h" +#include "os/os_time.h" #include "pipe/p_defines.h" #include "util/u_memory.h" #include "sp_context.h" @@ -37,6 +38,7 @@ #include "sp_state.h" struct softpipe_query { + unsigned type; uint64_t start; uint64_t end; }; @@ -51,8 +53,13 @@ static struct pipe_query * softpipe_create_query(struct pipe_context *pipe, unsigned type) { - assert(type == PIPE_QUERY_OCCLUSION_COUNTER); - return (struct pipe_query *)CALLOC_STRUCT( softpipe_query ); + struct softpipe_query* sq; + + assert(type == PIPE_QUERY_OCCLUSION_COUNTER || type == PIPE_QUERY_TIME_ELAPSED); + sq = CALLOC_STRUCT( softpipe_query ); + sq->type = type; + + return (struct pipe_query *)sq; } @@ -69,7 +76,17 @@ softpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) struct softpipe_context *softpipe = softpipe_context( pipe ); struct softpipe_query *sq = softpipe_query(q); - sq->start = softpipe->occlusion_count; + switch (sq->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + sq->start = softpipe->occlusion_count; + break; + case PIPE_QUERY_TIME_ELAPSED: + sq->start = 1000*os_time_get(); + break; + default: + assert(0); + break; + } softpipe->active_query_count++; softpipe->dirty |= SP_NEW_QUERY; } @@ -82,7 +99,17 @@ softpipe_end_query(struct pipe_context *pipe, struct pipe_query *q) struct softpipe_query *sq = softpipe_query(q); softpipe->active_query_count--; - sq->end = softpipe->occlusion_count; + switch (sq->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + sq->end = softpipe->occlusion_count; + break; + case PIPE_QUERY_TIME_ELAPSED: + sq->end = 1000*os_time_get(); + break; + default: + assert(0); + break; + } softpipe->dirty |= SP_NEW_QUERY; } diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index f874c3e60c0..8c33efa1987 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -82,6 +82,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return PIPE_MAX_COLOR_BUFS; case PIPE_CAP_OCCLUSION_QUERY: return 1; + case PIPE_CAP_TIMER_QUERY: + return 1; case PIPE_CAP_TEXTURE_MIRROR_CLAMP: return 1; case PIPE_CAP_TEXTURE_MIRROR_REPEAT: diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 7aa85559b23..4e6123fbd07 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -343,11 +343,15 @@ softpipe_get_transfer(struct pipe_context *pipe, if (spt) { struct pipe_transfer *pt = &spt->base; enum pipe_format format = resource->format; + const unsigned hgt = u_minify(spr->base.height0, sr.level); + const unsigned nblocksy = util_format_get_nblocksy(format, hgt); + pipe_resource_reference(&pt->resource, resource); pt->sr = sr; pt->usage = usage; pt->box = *box; pt->stride = spr->stride[sr.level]; + pt->slice_stride = pt->stride * nblocksy; spt->offset = sp_get_tex_image_offset(spr, sr.level, sr.face, box->z); diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index 2c3c3f52202..bef22f41ae5 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -134,6 +134,8 @@ svga_get_paramf(struct pipe_screen *screen, enum pipe_cap param) return MIN2(result.u, PIPE_MAX_COLOR_BUFS); case PIPE_CAP_OCCLUSION_QUERY: return 1; + case PIPE_CAP_TIMER_QUERY: + return 0; case PIPE_CAP_TEXTURE_SHADOW_MAP: return 1; diff --git a/src/gallium/drivers/svga/svga_swtnl.h b/src/gallium/drivers/svga/svga_swtnl.h index 8724690f7e1..65c675f99c9 100644 --- a/src/gallium/drivers/svga/svga_swtnl.h +++ b/src/gallium/drivers/svga/svga_swtnl.h @@ -30,7 +30,6 @@ struct svga_context; struct pipe_context; -struct pipe_buffer; struct vbuf_render; diff --git a/src/gallium/drivers/trace/tr_dump.h b/src/gallium/drivers/trace/tr_dump.h index f21f72b0c79..74c5e83e9e1 100644 --- a/src/gallium/drivers/trace/tr_dump.h +++ b/src/gallium/drivers/trace/tr_dump.h @@ -37,7 +37,6 @@ #include "pipe/p_compiler.h" #include "pipe/p_format.h" -struct pipe_buffer; struct pipe_resource; struct pipe_surface; struct pipe_transfer; diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 2d363dd47b6..29c55ff094c 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -379,7 +379,8 @@ enum pipe_transfer_usage { #define PIPE_QUERY_OCCLUSION_COUNTER 0 #define PIPE_QUERY_PRIMITIVES_GENERATED 1 #define PIPE_QUERY_PRIMITIVES_EMITTED 2 -#define PIPE_QUERY_TYPES 3 +#define PIPE_QUERY_TIME_ELAPSED 3 +#define PIPE_QUERY_TYPES 4 /** @@ -423,6 +424,7 @@ enum pipe_cap { PIPE_CAP_POINT_SPRITE, PIPE_CAP_MAX_RENDER_TARGETS, PIPE_CAP_OCCLUSION_QUERY, + PIPE_CAP_TIMER_QUERY, PIPE_CAP_TEXTURE_SHADOW_MAP, PIPE_CAP_MAX_TEXTURE_2D_LEVELS, PIPE_CAP_MAX_TEXTURE_3D_LEVELS, diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index 7195dc03963..0d9de48c909 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -54,7 +54,6 @@ struct winsys_handle; /** Opaque type */ struct pipe_fence_handle; struct pipe_winsys; -struct pipe_texture; struct pipe_resource; struct pipe_surface; struct pipe_transfer; diff --git a/src/gallium/include/state_tracker/dri1_api.h b/src/gallium/include/state_tracker/dri1_api.h index a48c5de5a05..0d702d90928 100644 --- a/src/gallium/include/state_tracker/dri1_api.h +++ b/src/gallium/include/state_tracker/dri1_api.h @@ -9,7 +9,6 @@ struct pipe_screen; struct pipe_winsys; -struct pipe_buffer; struct pipe_context; struct pipe_resource; diff --git a/src/gallium/include/state_tracker/drm_api.h b/src/gallium/include/state_tracker/drm_api.h index 3d8fdd86fc7..8fd0995444d 100644 --- a/src/gallium/include/state_tracker/drm_api.h +++ b/src/gallium/include/state_tracker/drm_api.h @@ -6,7 +6,6 @@ struct pipe_screen; struct pipe_winsys; -struct pipe_buffer; struct pipe_context; struct pipe_resource; diff --git a/src/gallium/state_trackers/glx/xlib/glx_getproc.c b/src/gallium/state_trackers/glx/xlib/glx_getproc.c index bd4a85caa04..26fcae78ece 100644 --- a/src/gallium/state_trackers/glx/xlib/glx_getproc.c +++ b/src/gallium/state_trackers/glx/xlib/glx_getproc.c @@ -34,7 +34,6 @@ #include <string.h> #include "GL/glx.h" #include "glapi/glapi.h" -#include "pipe/p_compiler.h" struct name_address_pair { diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c index b90f9c908d2..921b6900fcd 100644 --- a/src/gallium/state_trackers/xorg/xorg_dri2.c +++ b/src/gallium/state_trackers/xorg/xorg_dri2.c @@ -403,7 +403,7 @@ xorg_dri2_init(ScreenPtr pScreen) } #endif - dri2info.version = DRI2INFOREC_VERSION; + dri2info.version = min(DRI2INFOREC_VERSION, 3); dri2info.fd = ms->fd; dri2info.driverName = pScrn->driverName; diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c index 583493116d5..a9610a86780 100644 --- a/src/gallium/state_trackers/xorg/xorg_renderer.c +++ b/src/gallium/state_trackers/xorg/xorg_renderer.c @@ -7,7 +7,6 @@ #include "util/u_draw_quad.h" #include "util/u_math.h" #include "util/u_memory.h" -#include "util/u_rect.h" #include "util/u_sampler.h" #include "util/u_surface.h" diff --git a/src/gallium/winsys/radeon/drm/radeon_buffer.h b/src/gallium/winsys/radeon/drm/radeon_buffer.h index b48b6358e01..b9ecf9ded07 100644 --- a/src/gallium/winsys/radeon/drm/radeon_buffer.h +++ b/src/gallium/winsys/radeon/drm/radeon_buffer.h @@ -88,4 +88,7 @@ boolean radeon_drm_bufmgr_get_handle(struct pb_buffer *_buf, boolean radeon_drm_bufmgr_is_buffer_referenced(struct pb_buffer *_buf, enum r300_reference_domain domain); + +void radeon_drm_bufmgr_wait(struct pb_buffer *_buf); + #endif diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c index b8366498922..a05205da886 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c @@ -419,3 +419,10 @@ void radeon_drm_bufmgr_flush_maps(struct pb_manager *_mgr) make_empty_list(&mgr->buffer_map_list); } + +void radeon_drm_bufmgr_wait(struct pb_buffer *_buf) +{ + struct radeon_drm_buffer *buf = get_drm_buffer(_buf); + + radeon_bo_wait(buf->bo); +} diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c index 94cd5281e26..e188f7e7ccd 100644 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ b/src/gallium/winsys/radeon/drm/radeon_r300.c @@ -98,6 +98,13 @@ static void radeon_r300_winsys_buffer_unmap(struct r300_winsys_screen *ws, pb_unmap(_buf); } +static void radeon_r300_winsys_buffer_wait(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf) +{ + struct pb_buffer *_buf = radeon_pb_buffer(buf); + radeon_drm_bufmgr_wait(_buf); +} + static void radeon_r300_winsys_buffer_reference(struct r300_winsys_screen *rws, struct r300_winsys_buffer **pdst, struct r300_winsys_buffer *src) @@ -343,6 +350,7 @@ radeon_setup_winsys(int fd, struct radeon_libdrm_winsys* ws) ws->base.buffer_get_tiling = radeon_r300_winsys_buffer_get_tiling; ws->base.buffer_map = radeon_r300_winsys_buffer_map; ws->base.buffer_unmap = radeon_r300_winsys_buffer_unmap; + ws->base.buffer_wait = radeon_r300_winsys_buffer_wait; ws->base.buffer_reference = radeon_r300_winsys_buffer_reference; ws->base.buffer_from_handle = radeon_r300_winsys_buffer_from_handle; ws->base.buffer_get_handle = radeon_r300_winsys_buffer_get_handle; diff --git a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c index d4d4270eb86..b997abda9b0 100644 --- a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c +++ b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c @@ -145,6 +145,7 @@ wsw_dt_create(struct sw_winsys *ws, * XXX Why don't we just get the template. */ memset(&templ, 0, sizeof(templ)); + templ.target = PIPE_TEXTURE_2D; templ.width0 = width; templ.height0 = height; templ.format = format; @@ -175,6 +176,18 @@ wsw_dt_from_handle(struct sw_winsys *ws, return wsw_dt_wrap_texture(wsw, tex, stride); } +static boolean +wsw_dt_get_handle(struct sw_winsys *ws, + struct sw_displaytarget *dt, + struct winsys_handle *whandle) +{ + struct wrapper_sw_winsys *wsw = wrapper_sw_winsys(ws); + struct wrapper_sw_displaytarget *wdt = wrapper_sw_displaytarget(dt); + struct pipe_resource *tex = wdt->tex; + + return wsw->screen->resource_get_handle(wsw->screen, tex, whandle); +} + static void * wsw_dt_map(struct sw_winsys *ws, struct sw_displaytarget *dt, @@ -267,6 +280,7 @@ wrapper_sw_winsys_warp_pipe_screen(struct pipe_screen *screen) wsw->base.displaytarget_create = wsw_dt_create; wsw->base.displaytarget_from_handle = wsw_dt_from_handle; + wsw->base.displaytarget_get_handle = wsw_dt_get_handle; wsw->base.displaytarget_map = wsw_dt_map; wsw->base.displaytarget_unmap = wsw_dt_unmap; wsw->base.displaytarget_destroy = wsw_dt_destroy; diff --git a/src/glx/dri2_glx.c b/src/glx/dri2_glx.c index eafb87c3597..45959915b4b 100644 --- a/src/glx/dri2_glx.c +++ b/src/glx/dri2_glx.c @@ -361,6 +361,14 @@ dri2WaitGL(__GLXDRIdrawable * pdraw) static void dri2FlushFrontBuffer(__DRIdrawable *driDrawable, void *loaderPrivate) { + __GLXDRIdrawablePrivate *pdraw = loaderPrivate; + __GLXdisplayPrivate *priv = __glXInitialize(pdraw->base.psc->dpy); + __GLXDRIdisplayPrivate *pdp = (__GLXDRIdisplayPrivate *)priv->dri2Display; + + /* Old servers don't send invalidate events */ + if (!pdp->invalidateAvailable) + dri2InvalidateBuffers(priv->dpy, pdraw->base.drawable); + dri2WaitGL(loaderPrivate); } @@ -421,16 +429,16 @@ dri2SwapBuffers(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor, (*pdraw->psc->f->flush)(pdraw->driDrawable); #endif + /* Old servers don't send invalidate events */ + if (!pdp->invalidateAvailable) + dri2InvalidateBuffers(dpyPriv->dpy, pdraw->drawable); + /* Old servers can't handle swapbuffers */ if (!pdp->swapAvailable) { dri2CopySubBuffer(pdraw, 0, 0, priv->width, priv->height); return 0; } - /* Old servers don't send invalidate events */ - if (!pdp->invalidateAvailable) - dri2InvalidateBuffers(dpyPriv->dpy, pdraw->drawable); - #ifdef X_DRI2SwapBuffers DRI2SwapBuffers(pdraw->psc->dpy, pdraw->xDrawable, target_msc, divisor, remainder, &ret); @@ -737,10 +745,9 @@ dri2CreateDisplay(Display * dpy) pdp->loader_extensions[i++] = &systemTimeExtension.base; #ifdef __DRI_USE_INVALIDATE - if (pdp->invalidateAvailable) - pdp->loader_extensions[i++] = &dri2UseInvalidate.base; - pdp->loader_extensions[i++] = NULL; + pdp->loader_extensions[i++] = &dri2UseInvalidate.base; #endif + pdp->loader_extensions[i++] = NULL; return &pdp->base; } diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index 029a16500b5..49ef859e456 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -42,7 +42,6 @@ #include "brw_state.h" #include "brw_clip.h" - #define FRONT_UNFILLED_BIT 0x1 #define BACK_UNFILLED_BIT 0x2 @@ -127,6 +126,14 @@ static void compile_clip_prog( struct brw_context *brw, */ program = brw_get_program(&c.func, &program_size); + if (INTEL_DEBUG & DEBUG_CLIP) { + printf("clip:\n"); + for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) + brw_disasm(stdout, &((struct brw_instruction *)program)[i], + intel->gen); + printf("\n"); + } + /* Upload */ dri_bo_unreference(brw->clip.prog_bo); diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c index b27fe654ca9..916a99ea004 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_tri.c +++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c @@ -177,7 +177,7 @@ void brw_clip_tri_init_vertices( struct brw_clip_compile *c ) void brw_clip_tri_flat_shade( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; - struct brw_instruction *is_poly; + struct brw_instruction *is_poly, *is_trifan; struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); @@ -195,8 +195,22 @@ void brw_clip_tri_flat_shade( struct brw_clip_compile *c ) is_poly = brw_ELSE(p, is_poly); { if (c->key.pv_first) { - brw_clip_copy_colors(c, 1, 0); - brw_clip_copy_colors(c, 2, 0); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_TRIFAN)); + is_trifan = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_copy_colors(c, 0, 1); + brw_clip_copy_colors(c, 2, 1); + } + is_trifan = brw_ELSE(p, is_trifan); + { + brw_clip_copy_colors(c, 1, 0); + brw_clip_copy_colors(c, 2, 0); + } + brw_ENDIF(p, is_trifan); } else { brw_clip_copy_colors(c, 0, 2); diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c index 34a966a47a2..a730664f9b3 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_util.c +++ b/src/mesa/drivers/dri/i965/brw_clip_util.c @@ -370,18 +370,13 @@ void brw_clip_ff_sync(struct brw_clip_compile *c) need_ff_sync = brw_IF(p, BRW_EXECUTE_1); { brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1)); - brw_ff_sync(p, - c->reg.R0, - 0, - c->reg.R0, - 1, - 1, /* used */ - 1, /* msg length */ - 1, /* response length */ - 0, /* eot */ - 1, /* write compelete */ - 0, /* urb offset */ - BRW_URB_SWIZZLE_NONE); + brw_ff_sync(p, + c->reg.R0, + 0, + c->reg.R0, + 1, /* allocate */ + 1, /* response length */ + 0 /* eot */); } brw_ENDIF(p, need_ff_sync); brw_set_predicate_control(p, BRW_PREDICATE_NONE); diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 6b04ad9ec6a..dc4bd5802d4 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -192,8 +192,6 @@ GLboolean brwCreateContext( int api, ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; - make_empty_list(&brw->query.active_head); - brw_draw_init( brw ); return GL_TRUE; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 1f09651126b..a97fcb0f4db 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -418,18 +418,12 @@ struct brw_vertex_info { struct brw_query_object { struct gl_query_object Base; - /** Doubly linked list of active query objects in the context. */ - struct brw_query_object *prev, *next; - /** Last query BO associated with this query. */ dri_bo *bo; /** First index in bo with query data for this object. */ int first_index; /** Last index in bo with query data for this object. */ int last_index; - - /* Total count of pixels from previous BOs */ - unsigned int count; }; @@ -664,7 +658,7 @@ struct brw_context } cc; struct { - struct brw_query_object active_head; + struct brw_query_object *obj; dri_bo *bo; int index; GLboolean active; @@ -726,7 +720,7 @@ void brw_upload_urb_fence(struct brw_context *brw); void brw_upload_cs_urb_state(struct brw_context *brw); /* brw_disasm.c */ -int brw_disasm (FILE *file, struct brw_instruction *inst); +int brw_disasm (FILE *file, struct brw_instruction *inst, int gen); /*====================================================================== * Inline conversion functions. These are better-typed than the diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index f26a13fc3c3..2d3556b8054 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -783,7 +783,7 @@ #define CMD_BINDING_TABLE_PTRS 0x7801 # define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8) # define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9) -# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 10) +# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12) #define CMD_3D_SAMPLER_STATE_POINTERS 0x7802 /* SNB+ */ # define PS_SAMPLER_STATE_CHANGE (1 << 12) diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index db3fc50a63b..ff12daf497d 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -323,6 +323,11 @@ char *math_precision[2] = { [1] = "partial_precision" }; +char *urb_opcode[2] = { + [0] = "urb_write", + [1] = "ff_sync", +}; + char *urb_swizzle[4] = { [BRW_URB_SWIZZLE_NONE] = "", [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave", @@ -774,7 +779,7 @@ static int src1 (FILE *file, struct brw_instruction *inst) } } -int brw_disasm (FILE *file, struct brw_instruction *inst) +int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) { int err = 0; int space = 0; @@ -829,12 +834,20 @@ int brw_disasm (FILE *file, struct brw_instruction *inst) } if (inst->header.opcode == BRW_OPCODE_SEND) { + int target; + + if (gen >= 5) + target = inst->bits2.send_gen5.sfid; + else + target = inst->bits3.generic.msg_target; + newline (file); pad (file, 16); space = 0; err |= control (file, "target function", target_function, - inst->bits3.generic.msg_target, &space); - switch (inst->bits3.generic.msg_target) { + target, &space); + + switch (target) { case BRW_MESSAGE_TARGET_MATH: err |= control (file, "math function", math_function, inst->bits3.math.function, &space); @@ -864,8 +877,17 @@ int brw_disasm (FILE *file, struct brw_instruction *inst) inst->bits3.dp_write.send_commit_msg); break; case BRW_MESSAGE_TARGET_URB: - format (file, " %d", inst->bits3.urb.offset); + if (gen >= 5) { + format (file, " %d", inst->bits3.urb_gen5.offset); + } else { + format (file, " %d", inst->bits3.urb.offset); + } + space = 1; + if (gen >= 5) { + err |= control (file, "urb opcode", urb_opcode, + inst->bits3.urb_gen5.opcode, &space); + } err |= control (file, "urb swizzle", urb_swizzle, inst->bits3.urb.swizzle_control, &space); err |= control (file, "urb allocate", urb_allocate, diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 8247faa36d8..9cbff24863d 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -59,7 +59,7 @@ static GLuint half_float_types[5] = { 0, BRW_SURFACEFORMAT_R16_FLOAT, BRW_SURFACEFORMAT_R16G16_FLOAT, - 0, /* can't seem to render this one */ + BRW_SURFACEFORMAT_R16G16B16A16_FLOAT, BRW_SURFACEFORMAT_R16G16B16A16_FLOAT }; diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 4f55158e8f3..3a32ad26c12 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -822,13 +822,8 @@ void brw_ff_sync(struct brw_compile *p, GLuint msg_reg_nr, struct brw_reg src0, GLboolean allocate, - GLboolean used, - GLuint msg_length, GLuint response_length, - GLboolean eot, - GLboolean writes_complete, - GLuint offset, - GLuint swizzle); + GLboolean eot); void brw_fb_WRITE(struct brw_compile *p, struct brw_reg dest, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 785d382a009..175899b0268 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -280,28 +280,23 @@ static void brw_set_math_message( struct brw_context *brw, } -static void brw_set_ff_sync_message( struct brw_context *brw, - struct brw_instruction *insn, - GLboolean allocate, - GLboolean used, - GLuint msg_length, - GLuint response_length, - GLboolean end_of_thread, - GLboolean complete, - GLuint offset, - GLuint swizzle_control ) +static void brw_set_ff_sync_message(struct brw_context *brw, + struct brw_instruction *insn, + GLboolean allocate, + GLuint response_length, + GLboolean end_of_thread) { brw_set_src1(insn, brw_imm_d(0)); - insn->bits3.urb_gen5.opcode = 1; - insn->bits3.urb_gen5.offset = offset; - insn->bits3.urb_gen5.swizzle_control = swizzle_control; + insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ + insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ + insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ insn->bits3.urb_gen5.allocate = allocate; - insn->bits3.urb_gen5.used = used; - insn->bits3.urb_gen5.complete = complete; + insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ + insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ insn->bits3.urb_gen5.header_present = 1; - insn->bits3.urb_gen5.response_length = response_length; - insn->bits3.urb_gen5.msg_length = msg_length; + insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */ + insn->bits3.urb_gen5.msg_length = 1; insn->bits3.urb_gen5.end_of_thread = end_of_thread; insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; insn->bits2.send_gen5.end_of_thread = end_of_thread; @@ -1451,18 +1446,11 @@ void brw_ff_sync(struct brw_compile *p, GLuint msg_reg_nr, struct brw_reg src0, GLboolean allocate, - GLboolean used, - GLuint msg_length, GLuint response_length, - GLboolean eot, - GLboolean writes_complete, - GLuint offset, - GLuint swizzle) + GLboolean eot) { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - assert(msg_length < 16); - brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); @@ -1470,13 +1458,8 @@ void brw_ff_sync(struct brw_compile *p, insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_ff_sync_message(p->brw, - insn, - allocate, - used, - msg_length, - response_length, - eot, - writes_complete, - offset, - swizzle); + insn, + allocate, + response_length, + eot); } diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 4b13494ecf9..94d93f3aa65 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -122,6 +122,16 @@ static void compile_gs_prog( struct brw_context *brw, */ program = brw_get_program(&c.func, &program_size); + if (INTEL_DEBUG & DEBUG_GS) { + int i; + + printf("gs:\n"); + for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) + brw_disasm(stdout, &((struct brw_instruction *)program)[i], + intel->gen); + printf("\n"); + } + /* Upload */ dri_bo_unreference(brw->gs.prog_bo); @@ -163,6 +173,12 @@ static void populate_key( struct brw_context *brw, /* _NEW_LIGHT */ key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION); + if (key->primitive == GL_QUADS && ctx->Light.ShadeModel != GL_FLAT) { + /* Provide consistent primitive order with brw_set_prim's + * optimization of single quads to trifans. + */ + key->pv_first = GL_TRUE; + } key->need_gs_prog = (key->hint_gs_always || brw->primitive == GL_QUADS || diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c index dd7b057d620..99a6f6be113 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c @@ -104,18 +104,13 @@ static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim) { struct brw_compile *p = &c->func; brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim)); - brw_ff_sync(p, - c->reg.R0, - 0, - c->reg.R0, - 1, - 1, /* used */ - 1, /* msg length */ - 1, /* response length */ - 0, /* eot */ - 1, /* write compelete */ - 0, /* urb offset */ - BRW_URB_SWIZZLE_NONE); + brw_ff_sync(p, + c->reg.R0, + 0, + c->reg.R0, + 1, /* allocate */ + 1, /* response length */ + 0 /* eot */); } diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c index 6cce7e50890..3f47a68049f 100644 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -38,7 +38,6 @@ * required for handling queries, so that we can be sure that we won't * have to emit a batchbuffer without getting the ending PS_DEPTH_COUNT. */ -#include "main/simple_list.h" #include "main/imports.h" #include "brw_context.h" @@ -105,7 +104,7 @@ brw_begin_query(GLcontext *ctx, struct gl_query_object *q) query->first_index = -1; query->last_index = -1; - insert_at_head(&brw->query.active_head, query); + brw->query.obj = query; intel->stats_wm++; } @@ -131,7 +130,7 @@ brw_end_query(GLcontext *ctx, struct gl_query_object *q) brw->query.bo = NULL; } - remove_from_list(query); + brw->query.obj = NULL; intel->stats_wm--; } @@ -161,7 +160,7 @@ brw_prepare_query_begin(struct brw_context *brw) struct intel_context *intel = &brw->intel; /* Skip if we're not doing any queries. */ - if (is_empty_list(&brw->query.active_head)) + if (!brw->query.obj) return; /* Get a new query BO if we're going to need it. */ @@ -182,10 +181,10 @@ void brw_emit_query_begin(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - struct brw_query_object *query; + struct brw_query_object *query = brw->query.obj; /* Skip if we're not doing any queries, or we've emitted the start. */ - if (brw->query.active || is_empty_list(&brw->query.active_head)) + if (!query || brw->query.active) return; BEGIN_BATCH(4); @@ -205,16 +204,14 @@ brw_emit_query_begin(struct brw_context *brw) OUT_BATCH(0); ADVANCE_BATCH(); - foreach(query, &brw->query.active_head) { - if (query->bo != brw->query.bo) { - if (query->bo != NULL) - brw_queryobj_get_results(query); - dri_bo_reference(brw->query.bo); - query->bo = brw->query.bo; - query->first_index = brw->query.index; - } - query->last_index = brw->query.index; + if (query->bo != brw->query.bo) { + if (query->bo != NULL) + brw_queryobj_get_results(query); + dri_bo_reference(brw->query.bo); + query->bo = brw->query.bo; + query->first_index = brw->query.index; } + query->last_index = brw->query.index; brw->query.active = GL_TRUE; } diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 57d1c29ade1..b0dd1ff3afb 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -46,6 +46,7 @@ static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) { + struct intel_context *intel = &brw->intel; struct brw_sf_compile c; const GLuint *program; GLuint program_size; @@ -107,6 +108,14 @@ static void compile_sf_prog( struct brw_context *brw, */ program = brw_get_program(&c.func, &program_size); + if (INTEL_DEBUG & DEBUG_SF) { + printf("sf:\n"); + for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) + brw_disasm(stdout, &((struct brw_instruction *)program)[i], + intel->gen); + printf("\n"); + } + /* Upload */ dri_bo_unreference(brw->sf.prog_bo); @@ -154,6 +163,7 @@ static void upload_sf_prog(struct brw_context *brw) break; } + /* _NEW_POINT */ key.do_point_sprite = ctx->Point.PointSprite; if (key.do_point_sprite) { int i; diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 44b085e214b..57ffb2d89e0 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -48,6 +48,7 @@ static void do_vs_prog( struct brw_context *brw, const GLuint *program; struct brw_vs_compile c; int aux_size; + int i; memset(&c, 0, sizeof(c)); memcpy(&c.key, key, sizeof(*key)); @@ -63,6 +64,17 @@ static void do_vs_prog( struct brw_context *brw, c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG; } + /* Put dummy slots into the VUE for the SF to put the replaced + * point sprite coords in. We shouldn't need these dummy slots, + * which take up precious URB space, but it would mean that the SF + * doesn't get nice aligned pairs of input coords into output + * coords, which would be a pain to handle. + */ + for (i = 0; i < 8; i++) { + if (c.key.point_coord_replace & (1 << i)) + c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_TEX0 + i); + } + if (0) _mesa_print_program(&c.vp->program.Base); @@ -106,6 +118,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) struct brw_vs_prog_key key; struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; + int i; memset(&key, 0, sizeof(key)); @@ -117,6 +130,14 @@ static void brw_upload_vs_prog(struct brw_context *brw) key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL); + /* _NEW_POINT */ + if (ctx->Point.PointSprite) { + for (i = 0; i < 8; i++) { + if (ctx->Point.CoordReplace[i]) + key.point_coord_replace |= (1 << i); + } + } + /* Make an early check for the key. */ dri_bo_unreference(brw->vs.prog_bo); @@ -135,7 +156,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) */ const struct brw_tracked_state brw_vs_prog = { .dirty = { - .mesa = _NEW_TRANSFORM | _NEW_POLYGON, + .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT, .brw = BRW_NEW_VERTEX_PROGRAM, .cache = 0 }, diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 95e0501b1eb..6493744f3eb 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -43,7 +43,7 @@ struct brw_vs_prog_key { GLuint program_string_id; GLuint nr_userclip:4; GLuint copy_edgeflag:1; - GLuint pad:26; + GLuint point_coord_replace:8; }; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index dc6ab81c4ac..0b44deeb634 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1882,7 +1882,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) printf("vs-native:\n"); for (i = 0; i < p->nr_insn; i++) - brw_disasm(stderr, &p->store[i]); + brw_disasm(stderr, &p->store[i], intel->gen); printf("\n"); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 375e7953912..323cfac8fa7 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -1717,7 +1717,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) printf("wm-native:\n"); for (i = 0; i < p->nr_insn; i++) - brw_disasm(stderr, &p->store[i]); + brw_disasm(stderr, &p->store[i], p->brw->intel.gen); printf("\n"); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 88b885cb941..fe3c89b7212 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -2111,7 +2111,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) if (INTEL_DEBUG & DEBUG_WM) { printf("wm-native:\n"); for (i = 0; i < p->nr_insn; i++) - brw_disasm(stderr, &p->store[i]); + brw_disasm(stderr, &p->store[i], intel->gen); printf("\n"); } } diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 7d9f302dca6..a590c799ad3 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -353,6 +353,9 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) OUT_BATCH(clear_val); ADVANCE_BATCH(); + if (intel->always_flush_cache) + intel_batchbuffer_emit_mi_flush(intel->batch); + if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL); else diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 0369942b39e..a94f6886f98 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -471,12 +471,12 @@ static const struct dri_debug_control debug_control[] = { { "buf", DEBUG_BUFMGR}, { "reg", DEBUG_REGION}, { "fbo", DEBUG_FBO}, - { "lock", DEBUG_LOCK}, + { "gs", DEBUG_GS}, { "sync", DEBUG_SYNC}, { "prim", DEBUG_PRIMS }, { "vert", DEBUG_VERTS }, { "dri", DEBUG_DRI }, - { "dma", DEBUG_DMA }, + { "sf", DEBUG_SF }, { "san", DEBUG_SANITY }, { "sleep", DEBUG_SLEEP }, { "stats", DEBUG_STATS }, @@ -487,6 +487,7 @@ static const struct dri_debug_control debug_control[] = { { "glsl_force", DEBUG_GLSL_FORCE }, { "urb", DEBUG_URB }, { "vs", DEBUG_VS }, + { "clip", DEBUG_CLIP }, { NULL, 0 } }; @@ -905,6 +906,12 @@ intelMakeCurrent(__DRIcontext * driContextPriv, driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1; intel_prepare_render(intel); _mesa_make_current(&intel->ctx, fb, readFb); + + /* We do this in intel_prepare_render() too, but intel->ctx.DrawBuffer + * is NULL at that point. We can't call _mesa_makecurrent() + * first, since we need the buffer size for the initial + * viewport. So just call intel_draw_buffer() again here. */ + intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer); } else { _mesa_make_current(NULL, NULL, NULL); diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index db244e58729..dae5896a5cd 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -326,12 +326,12 @@ extern int INTEL_DEBUG; #define DEBUG_BUFMGR 0x200 #define DEBUG_REGION 0x400 #define DEBUG_FBO 0x800 -#define DEBUG_LOCK 0x1000 +#define DEBUG_GS 0x1000 #define DEBUG_SYNC 0x2000 #define DEBUG_PRIMS 0x4000 #define DEBUG_VERTS 0x8000 #define DEBUG_DRI 0x10000 -#define DEBUG_DMA 0x20000 +#define DEBUG_SF 0x20000 #define DEBUG_SANITY 0x40000 #define DEBUG_SLEEP 0x80000 #define DEBUG_STATS 0x100000 @@ -341,6 +341,7 @@ extern int INTEL_DEBUG; #define DEBUG_URB 0x1000000 #define DEBUG_VS 0x2000000 #define DEBUG_GLSL_FORCE 0x4000000 +#define DEBUG_CLIP 0x8000000 #define DBG(...) do { \ if (INTEL_DEBUG & FILE_DEBUG_FLAG) \ diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c b/src/mesa/drivers/dri/intel/intel_tex_format.c index 7be5231eaef..610a169beb2 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_format.c +++ b/src/mesa/drivers/dri/intel/intel_tex_format.c @@ -1,7 +1,7 @@ #include "intel_context.h" #include "intel_tex.h" #include "main/enums.h" - +#include "main/formats.h" /** * Choose hardware texture format given the user's glTexImage parameters. @@ -208,22 +208,11 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, int intel_compressed_num_bytes(GLuint mesaFormat) { - int bytes = 0; - switch(mesaFormat) { - - case MESA_FORMAT_RGB_FXT1: - case MESA_FORMAT_RGBA_FXT1: - case MESA_FORMAT_RGB_DXT1: - case MESA_FORMAT_RGBA_DXT1: - bytes = 2; - break; - - case MESA_FORMAT_RGBA_DXT3: - case MESA_FORMAT_RGBA_DXT5: - bytes = 4; - default: - break; - } - - return bytes; + GLuint bw, bh; + GLuint block_size; + + block_size = _mesa_get_format_bytes(mesaFormat); + _mesa_get_format_block_size(mesaFormat, &bw, &bh); + + return block_size / bh; } diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index e432afc3d41..34d22b45591 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -21,6 +21,7 @@ C_SOURCES = \ radeon_dataflow.c \ radeon_dataflow_deadcode.c \ radeon_dataflow_swizzles.c \ + radeon_optimize.c \ r3xx_fragprog.c \ r300_fragprog.c \ r300_fragprog_swizzle.c \ diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c index cfa48a59e3a..5d5de2f1b2a 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -56,7 +56,8 @@ static const struct swizzle_data native_swizzles[] = { {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1}, {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1}, {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0}, - {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0} + {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0}, + {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0} }; static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]); @@ -221,6 +222,7 @@ unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle) case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE; case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; + case RC_SWIZZLE_HALF: return R300_ALU_ARGA_HALF; default: return R300_ALU_ARGA_ONE; } } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 25bf373b6fd..3e88ccbc46d 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -152,6 +152,10 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) debug_program_log(c, "after deadcode"); + rc_optimize(&c->Base); + + debug_program_log(c, "after dataflow optimize"); + rc_dataflow_swizzles(&c->Base); if (c->Base.Error) return; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index 16e2f3a2181..0e6c62541fa 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -30,7 +30,7 @@ #include "radeon_program.h" -static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn cb, void * userdata) { struct rc_sub_instruction * inst = &fullinst->U.I; const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); @@ -46,18 +46,15 @@ static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, refmask &= RC_MASK_XYZW; - for(unsigned int chan = 0; chan < 4; ++chan) { - if (GET_BIT(refmask, chan)) { - cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, chan); - } - } + if (refmask) + cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, refmask); if (refmask && inst->SrcReg[src].RelAddr) cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X); } } -static void reads_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) { struct rc_pair_instruction * inst = &fullinst->U.P; unsigned int refmasks[3] = { 0, 0, 0 }; @@ -84,27 +81,23 @@ static void reads_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, v } for(unsigned int src = 0; src < 3; ++src) { - if (inst->RGB.Src[src].Used) { - for(unsigned int chan = 0; chan < 3; ++chan) { - if (GET_BIT(refmasks[src], chan)) - cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, chan); - } - } + if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ)) + cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, + refmasks[src] & RC_MASK_XYZ); - if (inst->Alpha.Src[src].Used) { - if (GET_BIT(refmasks[src], 3)) - cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 3); - } + if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W)) + cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W); } } /** - * Calls a callback function for all sourced register channels. + * Calls a callback function for all register reads. * - * This is conservative, i.e. channels may be called multiple times, - * and the writemask of the instruction is not taken into account. + * This is conservative, i.e. if the same register is referenced multiple times, + * the callback may also be called multiple times. + * Also, the writemask of the instruction is not taken into account. */ -void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata) +void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) { if (inst->Type == RC_INSTRUCTION_NORMAL) { reads_normal(inst, cb, userdata); @@ -115,44 +108,39 @@ void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * -static void writes_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) { struct rc_sub_instruction * inst = &fullinst->U.I; const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); - if (opcode->HasDstReg) { - for(unsigned int chan = 0; chan < 4; ++chan) { - if (GET_BIT(inst->DstReg.WriteMask, chan)) - cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, chan); - } - } + if (opcode->HasDstReg && inst->DstReg.WriteMask) + cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask); if (inst->WriteALUResult) - cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0); + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); } -static void writes_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) { struct rc_pair_instruction * inst = &fullinst->U.P; - for(unsigned int chan = 0; chan < 3; ++chan) { - if (GET_BIT(inst->RGB.WriteMask, chan)) - cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, chan); - } + if (inst->RGB.WriteMask) + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask); if (inst->Alpha.WriteMask) - cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, 3); + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W); if (inst->WriteALUResult) - cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0); + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); } /** - * Calls a callback function for all written register channels. + * Calls a callback function for all register writes in the instruction, + * reporting writemasks to the callback function. * * \warning Does not report output registers for paired instructions! */ -void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata) +void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) { if (inst->Type == RC_INSTRUCTION_NORMAL) { writes_normal(inst, cb, userdata); @@ -162,6 +150,48 @@ void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * } +struct mask_to_chan_data { + void * UserData; + rc_read_write_chan_fn Fn; +}; + +static void mask_to_chan_cb(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct mask_to_chan_data * d = data; + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_BIT(mask, chan)) + d->Fn(d->UserData, inst, file, index, chan); + } +} + +/** + * Calls a callback function for all sourced register channels. + * + * This is conservative, i.e. channels may be called multiple times, + * and the writemask of the instruction is not taken into account. + */ +void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) +{ + struct mask_to_chan_data d; + d.UserData = userdata; + d.Fn = cb; + rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d); +} + +/** + * Calls a callback function for all written register channels. + * + * \warning Does not report output registers for paired instructions! + */ +void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) +{ + struct mask_to_chan_data d; + d.UserData = userdata; + d.Fn = cb; + rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d); +} + static void remap_normal_instruction(struct rc_instruction * fullinst, rc_remap_register_fn cb, void * userdata) { diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h index 62cda20eea6..60a6e192a9f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h @@ -39,10 +39,15 @@ struct rc_swizzle_caps; * Help analyze and modify the register accesses of instructions. */ /*@{*/ -typedef void (*rc_read_write_fn)(void * userdata, struct rc_instruction * inst, +typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int chan); -void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata); -void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata); +void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); +void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); + +typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask); +void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); +void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst, rc_register_file * pfile, unsigned int * pindex); @@ -60,4 +65,6 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f void rc_dataflow_swizzles(struct radeon_compiler * c); /*@}*/ +void rc_optimize(struct radeon_compiler * c); + #endif /* RADEON_DATAFLOW_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c index d889612f4f4..863654cf685 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c @@ -150,7 +150,7 @@ static void allocate_and_insert_proxies(struct emulate_branch_state * s, sap.Proxies = proxies; for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { - rc_for_all_writes(inst, scan_write, &sap); + rc_for_all_writes_mask(inst, scan_write, &sap); rc_remap_registers(inst, remap_proxy_function, &sap); } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c new file mode 100644 index 00000000000..21d72108886 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -0,0 +1,446 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" +#include "radeon_swizzle.h" + + +static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) +{ + struct rc_src_register combine; + combine.File = inner.File; + combine.Index = inner.Index; + combine.RelAddr = inner.RelAddr; + if (outer.Abs) { + combine.Abs = 1; + combine.Negate = outer.Negate; + } else { + combine.Abs = inner.Abs; + combine.Negate = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(outer.Swizzle, chan); + if (swz < 4) + combine.Negate |= GET_BIT(inner.Negate, swz) << chan; + } + combine.Negate ^= outer.Negate; + } + combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); + return combine; +} + +struct peephole_state { + struct radeon_compiler * C; + struct rc_instruction * Mov; + unsigned int Conflict:1; + + /** Whether Mov's source has been clobbered */ + unsigned int SourceClobbered:1; + + /** Which components of Mov's destination register are still from that Mov? */ + unsigned int MovMask:4; + + /** Which components of Mov's destination register are clearly *not* from that Mov */ + unsigned int DefinedMask:4; + + /** Which components of Mov's source register are sourced */ + unsigned int SourcedMask:4; + + /** Branch depth beyond Mov; negative value indicates we left the Mov's block */ + int BranchDepth; +}; + +static void peephole_scan_read(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct peephole_state * s = data; + + if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index) + return; + + /* These instructions cannot read from the constants file. + * see radeonTransformTEX() + */ + if(s->Mov->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && + s->Mov->U.I.SrcReg[0].File != RC_FILE_INPUT && + (inst->U.I.Opcode == RC_OPCODE_TEX || + inst->U.I.Opcode == RC_OPCODE_TXB || + inst->U.I.Opcode == RC_OPCODE_TXP || + inst->U.I.Opcode == RC_OPCODE_KIL)){ + s->Conflict = 1; + return; + } + if ((mask & s->MovMask) == mask) { + if (s->SourceClobbered) { + s->Conflict = 1; + } + } else if ((mask & s->DefinedMask) == mask) { + /* read from something entirely written by other instruction: this is okay */ + } else { + /* read from component combination that is not well-defined without + * the MOV: cannot remove it */ + s->Conflict = 1; + } +} + +static void peephole_scan_write(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct peephole_state * s = data; + + if (s->BranchDepth < 0) + return; + + if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) { + s->MovMask &= ~mask; + if (s->BranchDepth == 0) + s->DefinedMask |= mask; + else + s->DefinedMask &= ~mask; + } + if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) { + if (mask & s->SourcedMask) + s->SourceClobbered = 1; + } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) { + s->SourceClobbered = 1; + } +} + +static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mov) +{ + struct peephole_state s; + + if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || inst_mov->U.I.WriteALUResult) + return; + + memset(&s, 0, sizeof(s)); + s.C = c; + s.Mov = inst_mov; + s.MovMask = inst_mov->U.I.DstReg.WriteMask; + s.DefinedMask = RC_MASK_XYZW & ~s.MovMask; + + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan); + s.SourcedMask |= (1 << swz) & RC_MASK_XYZW; + } + + /* 1st pass: Check whether all subsequent readers can be changed */ + for(struct rc_instruction * inst = inst_mov->Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + rc_for_all_reads_mask(inst, peephole_scan_read, &s); + rc_for_all_writes_mask(inst, peephole_scan_write, &s); + if (s.Conflict) + return; + + if (s.BranchDepth >= 0) { + if (inst->U.I.Opcode == RC_OPCODE_IF) { + s.BranchDepth++; + } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) { + s.BranchDepth--; + if (s.BranchDepth < 0) { + s.DefinedMask &= ~s.MovMask; + s.MovMask = 0; + } + } + } + } + + if (s.Conflict) + return; + + /* 2nd pass: We can satisfy all readers, so switch them over all at once */ + s.MovMask = inst_mov->U.I.DstReg.WriteMask; + s.BranchDepth = 0; + + for(struct rc_instruction * inst = inst_mov->Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) { + unsigned int refmask = 0; + + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); + refmask |= (1 << swz) & RC_MASK_XYZW; + } + + if ((refmask & s.MovMask) == refmask) + inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]); + } + } + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY && + inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) { + s.MovMask &= ~inst->U.I.DstReg.WriteMask; + } + } + + if (s.BranchDepth >= 0) { + if (inst->U.I.Opcode == RC_OPCODE_IF) { + s.BranchDepth++; + } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) { + s.BranchDepth--; + if (s.BranchDepth < 0) + break; /* no more readers after this point */ + } + } + } + + /* Finally, remove the original MOV instruction */ + rc_remove_instruction(inst_mov); +} + +/** + * Check if a source register is actually always the same + * swizzle constant. + */ +static int is_src_uniform_constant(struct rc_src_register src, + rc_swizzle * pswz, unsigned int * pnegate) +{ + int have_used = 0; + + if (src.File != RC_FILE_NONE) { + *pswz = 0; + return 0; + } + + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(src.Swizzle, chan); + if (swz < 4) { + *pswz = 0; + return 0; + } + if (swz == RC_SWIZZLE_UNUSED) + continue; + + if (!have_used) { + *pswz = swz; + *pnegate = GET_BIT(src.Negate, chan); + have_used = 1; + } else { + if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) { + *pswz = 0; + return 0; + } + } + } + + return 1; +} + + +static void constant_folding_mad(struct rc_instruction * inst) +{ + rc_swizzle swz; + unsigned int negate; + + if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MUL; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_ADD; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2]; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_ADD; + if (negate) + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } + } +} + +static void constant_folding_mul(struct rc_instruction * inst) +{ + rc_swizzle swz; + unsigned int negate; + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_MOV; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + return; + } + } +} + +static void constant_folding_add(struct rc_instruction * inst) +{ + rc_swizzle swz; + unsigned int negate; + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + return; + } + } +} + + +/** + * Replace 0.0, 1.0 and 0.5 immediate constants by their + * respective swizzles. Simplify instructions like ADD dst, src, 0; + */ +static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || + inst->U.I.SrcReg[src].RelAddr || + inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) + continue; + + struct rc_constant * constant = + &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; + + if (constant->Type != RC_CONSTANT_IMMEDIATE) + continue; + + struct rc_src_register newsrc = inst->U.I.SrcReg[src]; + int have_real_reference = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); + if (swz >= 4) + continue; + + unsigned int newswz; + float imm = constant->u.Immediate[swz]; + float baseimm = imm; + if (imm < 0.0) + baseimm = -baseimm; + + if (baseimm == 0.0) { + newswz = RC_SWIZZLE_ZERO; + } else if (baseimm == 1.0) { + newswz = RC_SWIZZLE_ONE; + } else if (baseimm == 0.5) { + newswz = RC_SWIZZLE_HALF; + } else { + have_real_reference = 1; + continue; + } + + SET_SWZ(newsrc.Swizzle, chan, newswz); + if (imm < 0.0 && !newsrc.Abs) + newsrc.Negate ^= 1 << chan; + } + + if (!have_real_reference) { + newsrc.File = RC_FILE_NONE; + newsrc.Index = 0; + } + + /* don't make the swizzle worse */ + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) && + c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) + continue; + + inst->U.I.SrcReg[src] = newsrc; + } + + /* Simplify instructions based on constants */ + if (inst->U.I.Opcode == RC_OPCODE_MAD) + constant_folding_mad(inst); + + /* note: MAD can simplify to MUL or ADD */ + if (inst->U.I.Opcode == RC_OPCODE_MUL) + constant_folding_mul(inst); + else if (inst->U.I.Opcode == RC_OPCODE_ADD) + constant_folding_add(inst); +} + +void rc_optimize(struct radeon_compiler * c) +{ + struct rc_instruction * inst = c->Program.Instructions.Next; + while(inst != &c->Program.Instructions) { + struct rc_instruction * cur = inst; + inst = inst->Next; + + constant_folding(c, cur); + + if (cur->U.I.Opcode == RC_OPCODE_MOV) { + peephole(c, cur); + /* cur may no longer be part of the program */ + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c index fdfee867014..8a912da4613 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c @@ -159,7 +159,7 @@ static int try_add_live_intervals(struct regalloc_state * s, } static void scan_callback(void * data, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int chan) + rc_register_file file, unsigned int index, unsigned int mask) { struct regalloc_state * s = data; struct register_info * reg; @@ -191,8 +191,8 @@ static void compute_live_intervals(struct regalloc_state * s) for(struct rc_instruction * inst = s->C->Program.Instructions.Next; inst != &s->C->Program.Instructions; inst = inst->Next) { - rc_for_all_reads(inst, scan_callback, s); - rc_for_all_writes(inst, scan_callback, s); + rc_for_all_reads_mask(inst, scan_callback, s); + rc_for_all_writes_mask(inst, scan_callback, s); } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index df67aafe028..a279549ff89 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -448,8 +448,8 @@ static void schedule_block(struct r300_fragment_program_compiler * c, * counter-intuitive, to account for the case where an * instruction writes to the same register as it reads * from. */ - rc_for_all_writes(inst, &scan_write, &s); - rc_for_all_reads(inst, &scan_read, &s); + rc_for_all_writes_chan(inst, &scan_write, &s); + rc_for_all_reads_chan(inst, &scan_read, &s); DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c index 42c08cd5505..8336e58d554 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c @@ -119,7 +119,7 @@ int radeonTransformTEX( struct rc_instruction * inst_cmp; unsigned tmp_texsample = rc_find_free_temporary(c); unsigned tmp_sum = rc_find_free_temporary(c); - unsigned tmp_recip_w; + unsigned tmp_recip_w = 0; int pass, fail, tex; /* Save the output register. */ diff --git a/src/mesa/main/dlopen.c b/src/mesa/main/dlopen.c index 658ac9e40cf..57a33292ed1 100644 --- a/src/mesa/main/dlopen.c +++ b/src/mesa/main/dlopen.c @@ -67,22 +67,27 @@ _mesa_dlopen(const char *libname, int flags) GenericFunc _mesa_dlsym(void *handle, const char *fname) { + union { + void *v; + GenericFunc f; + } u; #if defined(__blrts) - return (GenericFunc) NULL; + u.v = NULL; #elif defined(__DJGPP__) /* need '_' prefix on symbol names */ char fname2[1000]; fname2[0] = '_'; strncpy(fname2 + 1, fname, 998); fname2[999] = 0; - return (GenericFunc) dlsym(handle, fname2); + u.v = dlsym(handle, fname2); #elif defined(_GNU_SOURCE) - return (GenericFunc) dlsym(handle, fname); + u.v = dlsym(handle, fname); #elif defined(__MINGW32__) - return (GenericFunc) GetProcAddress(handle, fname); + u.v = (void *) GetProcAddress(handle, fname); #else - return (GenericFunc) NULL; + u.v = NULL; #endif + return u.f; } diff --git a/src/mesa/main/drawtex.c b/src/mesa/main/drawtex.c index 86d5b555e0f..c2ad5f23862 100644 --- a/src/mesa/main/drawtex.c +++ b/src/mesa/main/drawtex.c @@ -25,8 +25,6 @@ #include "main/state.h" #include "main/imports.h" -#include "main/dispatch.h" - #if FEATURE_OES_draw_texture diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index d2dcddddf27..12d046b0754 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -25,6 +25,7 @@ #include "glheader.h" #include "context.h" #include "enable.h" +#include "enums.h" #include "extensions.h" #include "get.h" #include "macros.h" @@ -135,8 +136,8 @@ enum value_extra { struct value_desc { GLenum pname; - enum value_location location : 8; - enum value_type type : 8; + GLubyte location; /**< enum value_location */ + GLubyte type; /**< enum value_type */ int offset; const int *extra; }; @@ -1678,7 +1679,8 @@ check_extra(GLcontext *ctx, const char *func, const struct value_desc *d) } if (total > 0 && enabled == 0) { - _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=0x%x)", func, d->pname); + _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)", func, + _mesa_lookup_enum_by_nr(d->pname)); return GL_FALSE; } @@ -1727,7 +1729,8 @@ find_value(const char *func, GLenum pname, void **p, union value *v) /* If the enum isn't valid, the hash walk ends with index 0, * which is the API mask entry at the beginning of values[]. */ if (d->type == TYPE_API_MASK) { - _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=0x%x)", func, pname); + _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)", func, + _mesa_lookup_enum_by_nr(pname)); return &error_value; } hash += prime_step; @@ -2256,10 +2259,12 @@ find_value_indexed(const char *func, GLenum pname, int index, union value *v) } invalid_enum: - _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=0x%x)", func, pname); + _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)", func, + _mesa_lookup_enum_by_nr(pname)); return TYPE_INVALID; invalid_value: - _mesa_error(ctx, GL_INVALID_VALUE, "%s(pname=0x%x)", func, pname); + _mesa_error(ctx, GL_INVALID_VALUE, "%s(pname=%s)", func, + _mesa_lookup_enum_by_nr(pname)); return TYPE_INVALID; } diff --git a/src/mesa/main/querymatrix.c b/src/mesa/main/querymatrix.c index e5c08a64146..a6b04e95519 100644 --- a/src/mesa/main/querymatrix.c +++ b/src/mesa/main/querymatrix.c @@ -70,7 +70,7 @@ fpclassify(double x) } } -#elif defined(__APPLE__) || defined(__CYGWIN__) +#elif defined(__APPLE__) || defined(__CYGWIN__) || defined(__FreeBSD__) /* fpclassify is available. */ diff --git a/src/mesa/main/transformfeedback.c b/src/mesa/main/transformfeedback.c index cd3dd9b38c1..050ebf02701 100644 --- a/src/mesa/main/transformfeedback.c +++ b/src/mesa/main/transformfeedback.c @@ -190,7 +190,8 @@ _mesa_free_transform_feedback(GLcontext *ctx) /* Delete the default feedback object */ assert(ctx->Driver.DeleteTransformFeedback); - ctx->Driver.DeleteTransformFeedback(ctx, ctx->TransformFeedback.DefaultObject); + ctx->Driver.DeleteTransformFeedback(ctx, + ctx->TransformFeedback.DefaultObject); ctx->TransformFeedback.CurrentObject = NULL; } @@ -749,7 +750,7 @@ _mesa_BindTransformFeedback(GLenum target, GLuint name) if (ctx->TransformFeedback.CurrentObject->Active && !ctx->TransformFeedback.CurrentObject->Paused) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBindTransformFeedback(transform is active, or not paused)"); + "glBindTransformFeedback(transform is active, or not paused)"); return; } @@ -844,7 +845,7 @@ _mesa_ResumeTransformFeedback(void) if (!obj->Active || !obj->Paused) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glPauseTransformFeedback(feedback not active or not paused)"); + "glPauseTransformFeedback(feedback not active or not paused)"); return; } @@ -871,6 +872,11 @@ _mesa_DrawTransformFeedback(GLenum mode, GLuint name) struct gl_transform_feedback_object *obj = lookup_transform_feedback_object(ctx, name); + if (mode > GL_POLYGON) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glDrawTransformFeedback(mode=0x%x)", mode); + return; + } if (!obj) { _mesa_error(ctx, GL_INVALID_VALUE, "glDrawTransformFeedback(name = %u)", name); diff --git a/src/mesa/shader/shader_api.c b/src/mesa/shader/shader_api.c index 505c7bb46f9..f47f213ac84 100644 --- a/src/mesa/shader/shader_api.c +++ b/src/mesa/shader/shader_api.c @@ -1753,7 +1753,8 @@ set_program_uniform(GLcontext *ctx, struct gl_program *program, /* check that the sampler (tex unit index) is legal */ if (texUnit >= ctx->Const.MaxTextureImageUnits) { _mesa_error(ctx, GL_INVALID_VALUE, - "glUniform1(invalid sampler/tex unit index)"); + "glUniform1(invalid sampler/tex unit index for '%s')", + param->Name); return; } @@ -1801,7 +1802,8 @@ set_program_uniform(GLcontext *ctx, struct gl_program *program, /* non-array: count must be at most one; count == 0 is handled by the loop below */ if (count > 1) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glUniform(uniform is not an array)"); + "glUniform(uniform '%s' is not an array)", + param->Name); return; } } @@ -1864,14 +1866,15 @@ _mesa_uniform(GLcontext *ctx, GLint location, GLsizei count, return; /* The standard specifies this as a no-op */ if (location < -1) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(location)"); + _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(location=%d)", + location); return; } split_location_offset(&location, &offset); if (location < 0 || location >= (GLint) shProg->Uniforms->NumUniforms) { - _mesa_error(ctx, GL_INVALID_VALUE, "glUniform(location)"); + _mesa_error(ctx, GL_INVALID_VALUE, "glUniform(location=%d)", location); return; } diff --git a/src/mesa/state_tracker/st_cb_queryobj.c b/src/mesa/state_tracker/st_cb_queryobj.c index a8bd5db6304..e423d9d8a51 100644 --- a/src/mesa/state_tracker/st_cb_queryobj.c +++ b/src/mesa/state_tracker/st_cb_queryobj.c @@ -94,6 +94,9 @@ st_BeginQuery(GLcontext *ctx, struct gl_query_object *q) case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: type = PIPE_QUERY_PRIMITIVES_EMITTED; break; + case GL_TIME_ELAPSED_EXT: + type = PIPE_QUERY_TIME_ELAPSED; + break; default: assert(0 && "unexpected query target in st_BeginQuery()"); return; diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 12d3c99a351..b8493dab93f 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -46,6 +46,7 @@ #include "st_debug.h" #include "st_context.h" #include "st_atom.h" +#include "st_cb_bitmap.h" #include "st_cb_readpixels.h" #include "st_cb_fbo.h" @@ -344,6 +345,8 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, return; } + st_flush_bitmap_cache(st); + dest = _mesa_map_pbo_dest(ctx, &clippedPacking, dest); if (!dest) return; diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 0cd80fa59f7..459e924cca3 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -296,6 +296,9 @@ void st_init_extensions(struct st_context *st) if (screen->get_param(screen, PIPE_CAP_OCCLUSION_QUERY)) { ctx->Extensions.ARB_occlusion_query = GL_TRUE; } + if (screen->get_param(screen, PIPE_CAP_TIMER_QUERY)) { + ctx->Extensions.EXT_timer_query = GL_TRUE; + } if (screen->get_param(screen, PIPE_CAP_TEXTURE_SHADOW_MAP)) { ctx->Extensions.ARB_depth_texture = GL_TRUE; |