diff options
Diffstat (limited to 'src')
661 files changed, 48753 insertions, 21948 deletions
diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 0aca929e6aa..f09ae14f10c 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -592,6 +592,13 @@ dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp) wl_display_destroy(dri2_dpy->wl_dpy); break; #endif +#ifdef HAVE_DRM_PLATFORM + case _EGL_PLATFORM_DRM: + if (dri2_dpy->own_gbm_device) { + gbm_device_destroy(&dri2_dpy->gbm_dri->base.base); + } + break; +#endif default: break; } @@ -719,6 +726,23 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf, } /** + * Called via eglDestroyContext(), drv->API.DestroyContext(). + */ +static EGLBoolean +dri2_destroy_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLContext *ctx) +{ + struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); + struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); + + if (_eglPutContext(ctx)) { + dri2_dpy->core->destroyContext(dri2_ctx->dri_context); + free(dri2_ctx); + } + + return EGL_TRUE; +} + +/** * Called via eglMakeCurrent(), drv->API.MakeCurrent(). */ static EGLBoolean @@ -758,9 +782,8 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf, drv->API.DestroySurface(drv, disp, old_dsurf); if (old_rsurf) drv->API.DestroySurface(drv, disp, old_rsurf); - /* no destroy? */ if (old_ctx) - _eglPutContext(old_ctx); + drv->API.DestroyContext(drv, disp, old_ctx); return EGL_TRUE; } else { @@ -1341,6 +1364,7 @@ _EGL_MAIN(const char *args) dri2_drv->base.API.Initialize = dri2_initialize; dri2_drv->base.API.Terminate = dri2_terminate; dri2_drv->base.API.CreateContext = dri2_create_context; + dri2_drv->base.API.DestroyContext = dri2_destroy_context; dri2_drv->base.API.MakeCurrent = dri2_make_current; dri2_drv->base.API.GetProcAddress = dri2_get_proc_address; dri2_drv->base.API.WaitClient = dri2_wait_client; diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h index 3854200bc69..db93eec14ba 100644 --- a/src/egl/drivers/dri2/egl_dri2.h +++ b/src/egl/drivers/dri2/egl_dri2.h @@ -86,6 +86,7 @@ struct dri2_egl_display #ifdef HAVE_DRM_PLATFORM struct gbm_dri_device *gbm_dri; + int own_gbm_device; #endif char *device_name; @@ -122,8 +123,6 @@ enum wayland_buffer_type { WL_BUFFER_THIRD, WL_BUFFER_COUNT }; - -#define __DRI_BUFFER_COUNT 10 #endif enum dri2_surface_type { diff --git a/src/egl/drivers/dri2/platform_drm.c b/src/egl/drivers/dri2/platform_drm.c index 579baf9f9d2..04b10e279ec 100644 --- a/src/egl/drivers/dri2/platform_drm.c +++ b/src/egl/drivers/dri2/platform_drm.c @@ -30,6 +30,10 @@ #include <string.h> #include <xf86drm.h> #include <dlfcn.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> #include "egl_dri2.h" @@ -90,6 +94,7 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp) { struct dri2_egl_display *dri2_dpy; struct gbm_device *gbm; + int fd = -1; int i; dri2_dpy = malloc(sizeof *dri2_dpy); @@ -100,7 +105,15 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp) disp->DriverData = (void *) dri2_dpy; - gbm = (struct gbm_device *) disp->PlatformDisplay; + gbm = disp->PlatformDisplay; + if (gbm == NULL) { + fd = open("/dev/dri/card0", O_RDWR); + dri2_dpy->own_gbm_device = 1; + gbm = gbm_create_device(fd); + if (gbm == NULL) + return EGL_FALSE; + } + if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0) { free(dri2_dpy); return EGL_FALSE; @@ -112,7 +125,15 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp) return EGL_FALSE; } - dri2_dpy->fd = gbm_device_get_fd(gbm); + if (fd < 0) { + fd = dup(gbm_device_get_fd(gbm)); + if (fd < 0) { + free(dri2_dpy); + return EGL_FALSE; + } + } + + dri2_dpy->fd = fd; dri2_dpy->device_name = dri2_get_device_name_for_fd(dri2_dpy->fd); dri2_dpy->driver_name = dri2_dpy->gbm_dri->base.driver_name; diff --git a/src/egl/drivers/glx/egl_glx.c b/src/egl/drivers/glx/egl_glx.c index 7cf8f4d5514..042936f960e 100644 --- a/src/egl/drivers/glx/egl_glx.c +++ b/src/egl/drivers/glx/egl_glx.c @@ -713,6 +713,24 @@ GLX_eglCreateContext(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf, return &GLX_ctx->Base; } +/** + * Called via eglDestroyContext(), drv->API.DestroyContext(). + */ +static EGLBoolean +GLX_eglDestroyContext(_EGLDriver *drv, _EGLDisplay *disp, _EGLContext *ctx) +{ + struct GLX_egl_driver *GLX_drv = GLX_egl_driver(drv); + struct GLX_egl_context *GLX_ctx = GLX_egl_context(ctx); + + if (_eglPutContext(ctx)) { + assert(GLX_ctx); + GLX_drv->glXDestroyContext(disp, ctx); + + free(GLX_ctx); + } + + return EGL_TRUE; +} /** * Destroy a surface. The display is allowed to be uninitialized. @@ -1142,6 +1160,7 @@ _EGL_MAIN(const char *args) GLX_drv->Base.API.Initialize = GLX_eglInitialize; GLX_drv->Base.API.Terminate = GLX_eglTerminate; GLX_drv->Base.API.CreateContext = GLX_eglCreateContext; + GLX_drv->Base.API.DestroyContext = GLX_eglDestroyContext; GLX_drv->Base.API.MakeCurrent = GLX_eglMakeCurrent; GLX_drv->Base.API.CreateWindowSurface = GLX_eglCreateWindowSurface; GLX_drv->Base.API.CreatePixmapSurface = GLX_eglCreatePixmapSurface; diff --git a/src/egl/main/Android.mk b/src/egl/main/Android.mk new file mode 100644 index 00000000000..25a7c657676 --- /dev/null +++ b/src/egl/main/Android.mk @@ -0,0 +1,64 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Android.mk for core EGL + +LOCAL_PATH := $(call my-dir) + +# from Makefile +SOURCES = \ + eglapi.c \ + eglarray.c \ + eglconfig.c \ + eglcontext.c \ + eglcurrent.c \ + egldisplay.c \ + egldriver.c \ + eglfallbacks.c \ + eglglobals.c \ + eglimage.c \ + egllog.c \ + eglmisc.c \ + eglmode.c \ + eglscreen.c \ + eglstring.c \ + eglsurface.c \ + eglsync.c + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(SOURCES) + +LOCAL_CFLAGS := \ + -D_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_ANDROID \ + -D_EGL_DRIVER_SEARCH_DIR=\"/system/lib/egl\" \ + -D_EGL_OS_UNIX=1 + +ifeq ($(strip $(MESA_BUILD_GALLIUM)),true) +LOCAL_CFLAGS += -D_EGL_BUILT_IN_DRIVER_GALLIUM +endif + +LOCAL_MODULE := libmesa_egl + +include $(MESA_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/egl/main/Makefile b/src/egl/main/Makefile index 775fbbe178b..3172ad2ec03 100644 --- a/src/egl/main/Makefile +++ b/src/egl/main/Makefile @@ -63,6 +63,7 @@ EGL_LIB_DEPS += $(XCB_DRI2_LIBS) endif ifneq ($(findstring drm, $(EGL_PLATFORMS)),) EGL_LIB_DEPS += -lgbm +INCLUDE_DIRS += -I$(TOP)/src/gbm/main endif EGL_LIB_DEPS += $(LIBUDEV_LIBS) $(DLOPEN_LIBS) $(LIBDRM_LIB) $(WAYLAND_LIBS) endif @@ -70,6 +71,7 @@ endif ifneq ($(findstring wayland, $(EGL_PLATFORMS)),) LOCAL_LIBS += $(TOP)/src/egl/wayland/wayland-drm/libwayland-drm.a +INCLUDE_DIRS += $(WAYLAND_CFLAGS) endif ifeq ($(filter glx, $(EGL_DRIVERS_DIRS)),glx) @@ -93,6 +95,19 @@ ifeq ($(firstword $(EGL_PLATFORMS)),fbdev) EGL_NATIVE_PLATFORM=_EGL_PLATFORM_FBDEV endif +ifneq ($(findstring x11, $(EGL_PLATFORMS)),) +LOCAL_CFLAGS += -DHAVE_X11_PLATFORM +endif +ifneq ($(findstring wayland, $(EGL_PLATFORMS)),) +LOCAL_CFLAGS += -DHAVE_WAYLAND_PLATFORM +endif +ifneq ($(findstring drm, $(EGL_PLATFORMS)),) +LOCAL_CFLAGS += -DHAVE_DRM_PLATFORM +endif +ifneq ($(findstring fbdev, $(EGL_PLATFORMS)),) +LOCAL_CFLAGS += -DHAVE_FBDEV_PLATFORM +endif + LOCAL_CFLAGS += \ -D_EGL_NATIVE_PLATFORM=$(EGL_NATIVE_PLATFORM) \ -D_EGL_DRIVER_SEARCH_DIR=\"$(EGL_DRIVER_INSTALL_DIR)\" diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 0ba7794e2c9..3cb1a5baaf3 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -301,7 +301,7 @@ _eglUnlockDisplay(_EGLDisplay *dpy) EGLDisplay EGLAPIENTRY eglGetDisplay(EGLNativeDisplayType nativeDisplay) { - _EGLPlatformType plat = _eglGetNativePlatform(); + _EGLPlatformType plat = _eglGetNativePlatform(nativeDisplay); _EGLDisplay *dpy = _eglFindDisplay(plat, (void *) nativeDisplay); return _eglGetDisplayHandle(dpy); } @@ -538,7 +538,7 @@ eglCreateWindowSurface(EGLDisplay dpy, EGLConfig config, EGLSurface ret; _EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv); - if (disp->Platform != _eglGetNativePlatform()) + if (disp->Platform != _eglGetNativePlatform(disp->PlatformDisplay)) RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_WINDOW, EGL_NO_SURFACE); surf = drv->API.CreateWindowSurface(drv, disp, conf, window, attrib_list); @@ -559,7 +559,7 @@ eglCreatePixmapSurface(EGLDisplay dpy, EGLConfig config, EGLSurface ret; _EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv); - if (disp->Platform != _eglGetNativePlatform()) + if (disp->Platform != _eglGetNativePlatform(disp->PlatformDisplay)) RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_PIXMAP, EGL_NO_SURFACE); surf = drv->API.CreatePixmapSurface(drv, disp, conf, pixmap, attrib_list); @@ -720,7 +720,7 @@ eglCopyBuffers(EGLDisplay dpy, EGLSurface surface, EGLNativePixmapType target) EGLBoolean ret; _EGL_CHECK_SURFACE(disp, surf, EGL_FALSE, drv); - if (disp->Platform != _eglGetNativePlatform()) + if (disp->Platform != _eglGetNativePlatform(disp->PlatformDisplay)) RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_PIXMAP, EGL_FALSE); ret = drv->API.CopyBuffers(drv, disp, surf, target); @@ -948,6 +948,9 @@ eglGetProcAddress(const char *procname) { "eglBindWaylandDisplayWL", (_EGLProc) eglBindWaylandDisplayWL }, { "eglUnbindWaylandDisplayWL", (_EGLProc) eglUnbindWaylandDisplayWL }, #endif +#ifdef EGL_ANDROID_swap_rectangle + { "eglSetSwapRectangleANDROID", (_EGLProc) eglSetSwapRectangleANDROID }, +#endif { NULL, NULL } }; EGLint i; @@ -1565,3 +1568,25 @@ eglUnbindWaylandDisplayWL(EGLDisplay dpy, struct wl_display *display) RETURN_EGL_EVAL(disp, ret); } #endif + +#ifdef EGL_ANDROID_swap_rectangle +EGLBoolean EGLAPIENTRY +eglSetSwapRectangleANDROID(EGLDisplay dpy, EGLSurface draw, + EGLint left, EGLint top, + EGLint width, EGLint height) +{ + _EGLDisplay *disp = _eglLockDisplay(dpy); + _EGLSurface *surf = _eglLookupSurface(draw, disp); + _EGLDriver *drv; + EGLBoolean ret; + + _EGL_CHECK_SURFACE(disp, surf, EGL_FALSE, drv); + + if (!disp->Extensions.ANDROID_swap_rectangle) + RETURN_EGL_EVAL(disp, EGL_FALSE); + + ret = drv->API.SetSwapRectangleANDROID(drv, disp, surf, left, top, width, height); + + RETURN_EGL_EVAL(disp, ret); +} +#endif diff --git a/src/egl/main/eglapi.h b/src/egl/main/eglapi.h index 4fcbe40cd4c..1e0aef69dd7 100644 --- a/src/egl/main/eglapi.h +++ b/src/egl/main/eglapi.h @@ -131,6 +131,10 @@ typedef EGLBoolean (*BindWaylandDisplayWL_t)(_EGLDriver *drv, _EGLDisplay *disp, typedef EGLBoolean (*UnbindWaylandDisplayWL_t)(_EGLDriver *drv, _EGLDisplay *disp, struct wl_display *display); #endif +#ifdef EGL_ANDROID_swap_rectangle +typedef EGLBoolean (*SetSwapRectangleANDROID_t)(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw, EGLint left, EGLint top, EGLint width, EGLint height); +#endif + /** * The API dispatcher jumps through these functions */ @@ -210,6 +214,10 @@ struct _egl_api BindWaylandDisplayWL_t BindWaylandDisplayWL; UnbindWaylandDisplayWL_t UnbindWaylandDisplayWL; #endif + +#ifdef EGL_ANDROID_swap_rectangle + SetSwapRectangleANDROID_t SetSwapRectangleANDROID; +#endif }; #endif /* EGLAPI_INCLUDED */ diff --git a/src/egl/main/eglconfig.c b/src/egl/main/eglconfig.c index 483d9807cf0..e1d53da3cd5 100644 --- a/src/egl/main/eglconfig.c +++ b/src/egl/main/eglconfig.c @@ -529,8 +529,9 @@ _eglParseConfigAttribList(_EGLConfig *conf, _EGLDisplay *dpy, if (!_eglValidateConfig(conf, EGL_TRUE)) return EGL_FALSE; - /* the spec says that EGL_LEVEL cannot be EGL_DONT_CARE */ - if (conf->Level == EGL_DONT_CARE) + /* EGL_LEVEL and EGL_MATCH_NATIVE_PIXMAP cannot be EGL_DONT_CARE */ + if (conf->Level == EGL_DONT_CARE || + conf->MatchNativePixmap == EGL_DONT_CARE) return EGL_FALSE; /* ignore other attributes when EGL_CONFIG_ID is given */ diff --git a/src/egl/main/egldisplay.c b/src/egl/main/egldisplay.c index 60f31777272..1d05e57c429 100644 --- a/src/egl/main/egldisplay.c +++ b/src/egl/main/egldisplay.c @@ -43,6 +43,39 @@ #include "eglmutex.h" #include "egllog.h" +/* Includes for _eglNativePlatformDetectNativeDisplay */ +#ifdef HAVE_MINCORE +#include <unistd.h> +#include <sys/mman.h> +#endif +#ifdef HAVE_WAYLAND_PLATFORM +#include <wayland-client.h> +#endif +#ifdef HAVE_DRM_PLATFORM +#include <gbm.h> +#endif +#ifdef HAVE_FBDEV_PLATFORM +#include <stdint.h> +#include <sys/types.h> +#include <sys/stat.h> +#endif + + +/** + * Map --with-egl-platforms names to platform types. + */ +static const struct { + _EGLPlatformType platform; + const char *name; +} egl_platforms[_EGL_NUM_PLATFORMS] = { + { _EGL_PLATFORM_WINDOWS, "gdi" }, + { _EGL_PLATFORM_X11, "x11" }, + { _EGL_PLATFORM_WAYLAND, "wayland" }, + { _EGL_PLATFORM_DRM, "drm" }, + { _EGL_PLATFORM_FBDEV, "fbdev" }, + { _EGL_PLATFORM_ANDROID, "android" } +}; + /** * Return the native platform by parsing EGL_PLATFORM. @@ -50,17 +83,6 @@ static _EGLPlatformType _eglGetNativePlatformFromEnv(void) { - /* map --with-egl-platforms names to platform types */ - static const struct { - _EGLPlatformType platform; - const char *name; - } egl_platforms[_EGL_NUM_PLATFORMS] = { - { _EGL_PLATFORM_WINDOWS, "gdi" }, - { _EGL_PLATFORM_X11, "x11" }, - { _EGL_PLATFORM_WAYLAND, "wayland" }, - { _EGL_PLATFORM_DRM, "drm" }, - { _EGL_PLATFORM_FBDEV, "fbdev" } - }; _EGLPlatformType plat = _EGL_INVALID_PLATFORM; const char *plat_name; EGLint i; @@ -84,19 +106,105 @@ _eglGetNativePlatformFromEnv(void) /** + * Perform validity checks on a generic pointer. + */ +static EGLBoolean +_eglPointerIsDereferencable(void *p) +{ +#ifdef HAVE_MINCORE + uintptr_t addr = (uintptr_t) p; + unsigned char valid = 0; + const long page_size = getpagesize(); + + if (p == NULL) + return EGL_FALSE; + + /* align addr to page_size */ + addr &= ~(page_size - 1); + + if (mincore((void *) addr, page_size, &valid) < 0) { + _eglLog(_EGL_DEBUG, "mincore failed: %m"); + return EGL_FALSE; + } + + return (valid & 0x01) == 0x01; +#else + return p != NULL; +#endif +} + + +/** + * Try detecting native platform with the help of native display characteristcs. + */ +static _EGLPlatformType +_eglNativePlatformDetectNativeDisplay(EGLNativeDisplayType nativeDisplay) +{ +#ifdef HAVE_FBDEV_PLATFORM + struct stat buf; +#endif + + if (nativeDisplay == EGL_DEFAULT_DISPLAY) + return _EGL_INVALID_PLATFORM; + +#ifdef HAVE_FBDEV_PLATFORM + /* fbdev is the only platform that can be a file descriptor. */ + if (fstat((intptr_t) nativeDisplay, &buf) == 0 && S_ISCHR(buf.st_mode)) + return _EGL_PLATFORM_FBDEV; +#endif + + if (_eglPointerIsDereferencable(nativeDisplay)) { + void *first_pointer = *(void **) nativeDisplay; + +#ifdef HAVE_WAYLAND_PLATFORM + /* wl_display is a wl_proxy, which is a wl_object. + * wl_object's first element points to the interfacetype. */ + if (first_pointer == &wl_display_interface) + return _EGL_PLATFORM_WAYLAND; +#endif + +#ifdef HAVE_DRM_PLATFORM + /* gbm has a pointer to its constructor as first element. */ + if (first_pointer == gbm_create_device) + return _EGL_PLATFORM_DRM; +#endif + +#ifdef HAVE_X11_PLATFORM + /* If not matched to any other platform, fallback to x11. */ + return _EGL_PLATFORM_X11; +#endif + } + + return _EGL_INVALID_PLATFORM; +} + + +/** * Return the native platform. It is the platform of the EGL native types. */ _EGLPlatformType -_eglGetNativePlatform(void) +_eglGetNativePlatform(EGLNativeDisplayType nativeDisplay) { static _EGLPlatformType native_platform = _EGL_INVALID_PLATFORM; + char *detection_method = NULL; if (native_platform == _EGL_INVALID_PLATFORM) { native_platform = _eglGetNativePlatformFromEnv(); - if (native_platform == _EGL_INVALID_PLATFORM) - native_platform = _EGL_NATIVE_PLATFORM; + detection_method = "environment overwrite"; + if (native_platform == _EGL_INVALID_PLATFORM) { + native_platform = _eglNativePlatformDetectNativeDisplay(nativeDisplay); + detection_method = "autodetected"; + if (native_platform == _EGL_INVALID_PLATFORM) { + native_platform = _EGL_NATIVE_PLATFORM; + detection_method = "build-time configuration"; + } + } } + if (detection_method != NULL) + _eglLog(_EGL_DEBUG, "Native platform type: %s (%s)", + egl_platforms[native_platform].name, detection_method); + return native_platform; } diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h index 9cd4dbfcc8a..cddea803c24 100644 --- a/src/egl/main/egldisplay.h +++ b/src/egl/main/egldisplay.h @@ -44,6 +44,7 @@ enum _egl_platform_type { _EGL_PLATFORM_WAYLAND, _EGL_PLATFORM_DRM, _EGL_PLATFORM_FBDEV, + _EGL_PLATFORM_ANDROID, _EGL_NUM_PLATFORMS, _EGL_INVALID_PLATFORM = -1 @@ -107,6 +108,9 @@ struct _egl_extensions EGLBoolean NOK_swap_region; EGLBoolean NOK_texture_from_pixmap; + + EGLBoolean ANDROID_image_native_buffer; + EGLBoolean ANDROID_swap_rectangle; }; @@ -150,7 +154,7 @@ struct _egl_display extern _EGLPlatformType -_eglGetNativePlatform(void); +_eglGetNativePlatform(EGLNativeDisplayType nativeDisplay); extern void diff --git a/src/egl/main/eglmisc.c b/src/egl/main/eglmisc.c index da189b689a3..ab48bc68218 100644 --- a/src/egl/main/eglmisc.c +++ b/src/egl/main/eglmisc.c @@ -113,6 +113,9 @@ _eglUpdateExtensionsString(_EGLDisplay *dpy) _EGL_CHECK_EXTENSION(NOK_swap_region); _EGL_CHECK_EXTENSION(NOK_texture_from_pixmap); + + _EGL_CHECK_EXTENSION(ANDROID_image_native_buffer); + _EGL_CHECK_EXTENSION(ANDROID_swap_rectangle); #undef _EGL_CHECK_EXTENSION } diff --git a/src/egl/main/eglsurface.c b/src/egl/main/eglsurface.c index c9cfb01388e..3564ecd01b0 100644 --- a/src/egl/main/eglsurface.c +++ b/src/egl/main/eglsurface.c @@ -269,11 +269,13 @@ _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type, { const char *func; EGLint renderBuffer = EGL_BACK_BUFFER; + EGLint swapBehavior = EGL_BUFFER_PRESERVED; EGLint err; switch (type) { case EGL_WINDOW_BIT: func = "eglCreateWindowSurface"; + swapBehavior = EGL_BUFFER_DESTROYED; break; case EGL_PIXMAP_BIT: func = "eglCreatePixmapSurface"; @@ -315,7 +317,7 @@ _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type, surf->MipmapLevel = 0; surf->MultisampleResolve = EGL_MULTISAMPLE_RESOLVE_DEFAULT; - surf->SwapBehavior = EGL_BUFFER_DESTROYED; + surf->SwapBehavior = swapBehavior; surf->HorizontalResolution = EGL_UNKNOWN; surf->VerticalResolution = EGL_UNKNOWN; diff --git a/src/egl/wayland/wayland-drm/protocol/wayland-drm.xml b/src/egl/wayland/wayland-drm/protocol/wayland-drm.xml index 0331f124e80..cde943060ca 100644 --- a/src/egl/wayland/wayland-drm/protocol/wayland-drm.xml +++ b/src/egl/wayland/wayland-drm/protocol/wayland-drm.xml @@ -1,5 +1,32 @@ <?xml version="1.0" encoding="UTF-8"?> <protocol name="drm"> + + <copyright> + Copyright © 2008-2011 Kristian Høgsberg + Copyright © 2010-2011 Intel Corporation + + Permission to use, copy, modify, distribute, and sell this + software and its documentation for any purpose is hereby granted + without fee, provided that\n the above copyright notice appear in + all copies and that both that copyright notice and this permission + notice appear in supporting documentation, and that the name of + the copyright holders not be used in advertising or publicity + pertaining to distribution of the software without specific, + written prior permission. The copyright holders make no + representations about the suitability of this software for any + purpose. It is provided "as is" without express or implied + warranty. + + THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, + ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF + THIS SOFTWARE. + </copyright> + <!-- drm support. This object is created by the server and published using the display's global event. --> <interface name="wl_drm" version="1"> diff --git a/src/gallium/Android.common.mk b/src/gallium/Android.common.mk new file mode 100644 index 00000000000..782510ff0f4 --- /dev/null +++ b/src/gallium/Android.common.mk @@ -0,0 +1,32 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# src/gallium/Android.common.mk + +LOCAL_C_INCLUDES += \ + $(GALLIUM_TOP)/include \ + $(GALLIUM_TOP)/auxiliary \ + $(GALLIUM_TOP)/winsys \ + $(GALLIUM_TOP)/drivers + +include $(MESA_COMMON_MK) diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk new file mode 100644 index 00000000000..b49a61b1ffd --- /dev/null +++ b/src/gallium/Android.mk @@ -0,0 +1,44 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# src/gallium/Android.mk + +GALLIUM_TOP := $(call my-dir) +GALLIUM_COMMON_MK := $(GALLIUM_TOP)/Android.common.mk + +SUBDIRS := \ + targets/egl-static \ + state_trackers/egl \ + auxiliary + +# swrast +SUBDIRS += winsys/sw/android drivers/softpipe + +# r600g +ifneq ($(filter r600g, $(MESA_GPU_DRIVERS)),) +SUBDIRS += winsys/radeon/drm +SUBDIRS += winsys/r600/drm drivers/r600 +endif + +mkfiles := $(patsubst %,$(GALLIUM_TOP)/%/Android.mk,$(SUBDIRS)) +include $(mkfiles) diff --git a/src/gallium/auxiliary/Android.mk b/src/gallium/auxiliary/Android.mk new file mode 100644 index 00000000000..0c37dd31ab6 --- /dev/null +++ b/src/gallium/auxiliary/Android.mk @@ -0,0 +1,55 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +# get C_SOURCES and GENERATED_SOURCES +include $(LOCAL_PATH)/Makefile.sources + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(C_SOURCES) + +LOCAL_C_INCLUDES := $(GALLIUM_TOP)/auxiliary/util + +LOCAL_MODULE := libmesa_gallium + +# generate sources +LOCAL_MODULE_CLASS := STATIC_LIBRARIES +intermediates := $(call local-intermediates-dir) +LOCAL_GENERATED_SOURCES := $(addprefix $(intermediates)/, $(GENERATED_SOURCES)) + +$(LOCAL_GENERATED_SOURCES): PRIVATE_PYTHON := $(MESA_PYTHON2) +$(LOCAL_GENERATED_SOURCES): PRIVATE_CUSTOM_TOOL = $(PRIVATE_PYTHON) $^ > $@ + +$(intermediates)/indices/u_indices_gen.c \ +$(intermediates)/indices/u_unfilled_gen.c \ +$(intermediates)/util/u_format_srgb.c \ +$(intermediates)/util/u_half.c: $(intermediates)/%.c: $(LOCAL_PATH)/%.py + $(transform-generated-source) + +$(intermediates)/util/u_format_table.c: $(intermediates)/%.c: $(LOCAL_PATH)/%.py $(LOCAL_PATH)/util/u_format.csv + $(transform-generated-source) + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 7dae7bc908b..896c058fde9 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -3,205 +3,10 @@ include $(TOP)/configs/current LIBNAME = gallium -C_SOURCES = \ - cso_cache/cso_cache.c \ - cso_cache/cso_context.c \ - cso_cache/cso_hash.c \ - draw/draw_context.c \ - draw/draw_fs.c \ - draw/draw_gs.c \ - draw/draw_pipe.c \ - draw/draw_pipe_aaline.c \ - draw/draw_pipe_aapoint.c \ - draw/draw_pipe_clip.c \ - draw/draw_pipe_cull.c \ - draw/draw_pipe_flatshade.c \ - draw/draw_pipe_offset.c \ - draw/draw_pipe_pstipple.c \ - draw/draw_pipe_stipple.c \ - draw/draw_pipe_twoside.c \ - draw/draw_pipe_unfilled.c \ - draw/draw_pipe_util.c \ - draw/draw_pipe_validate.c \ - draw/draw_pipe_vbuf.c \ - draw/draw_pipe_wide_line.c \ - draw/draw_pipe_wide_point.c \ - draw/draw_pt.c \ - draw/draw_pt_emit.c \ - draw/draw_pt_fetch.c \ - draw/draw_pt_fetch_emit.c \ - draw/draw_pt_fetch_shade_emit.c \ - draw/draw_pt_fetch_shade_pipeline.c \ - draw/draw_pt_post_vs.c \ - draw/draw_pt_so_emit.c \ - draw/draw_pt_util.c \ - draw/draw_pt_vsplit.c \ - draw/draw_vertex.c \ - draw/draw_vs.c \ - draw/draw_vs_aos.c \ - draw/draw_vs_aos_io.c \ - draw/draw_vs_aos_machine.c \ - draw/draw_vs_exec.c \ - draw/draw_vs_ppc.c \ - draw/draw_vs_sse.c \ - draw/draw_vs_variant.c \ - indices/u_indices_gen.c \ - indices/u_unfilled_gen.c \ - os/os_misc.c \ - os/os_stream.c \ - os/os_stream_log.c \ - os/os_stream_null.c \ - os/os_stream_stdc.c \ - os/os_stream_str.c \ - os/os_time.c \ - pipebuffer/pb_buffer_fenced.c \ - pipebuffer/pb_buffer_malloc.c \ - pipebuffer/pb_bufmgr_alt.c \ - pipebuffer/pb_bufmgr_cache.c \ - pipebuffer/pb_bufmgr_debug.c \ - pipebuffer/pb_bufmgr_mm.c \ - pipebuffer/pb_bufmgr_ondemand.c \ - pipebuffer/pb_bufmgr_pool.c \ - pipebuffer/pb_bufmgr_slab.c \ - pipebuffer/pb_validate.c \ - rbug/rbug_connection.c \ - rbug/rbug_context.c \ - rbug/rbug_core.c \ - rbug/rbug_demarshal.c \ - rbug/rbug_texture.c \ - rbug/rbug_shader.c \ - rtasm/rtasm_cpu.c \ - rtasm/rtasm_execmem.c \ - rtasm/rtasm_ppc.c \ - rtasm/rtasm_ppc_spe.c \ - rtasm/rtasm_x86sse.c \ - tgsi/tgsi_build.c \ - tgsi/tgsi_dump.c \ - tgsi/tgsi_exec.c \ - tgsi/tgsi_info.c \ - tgsi/tgsi_iterate.c \ - tgsi/tgsi_parse.c \ - tgsi/tgsi_ppc.c \ - tgsi/tgsi_sanity.c \ - tgsi/tgsi_scan.c \ - tgsi/tgsi_sse2.c \ - tgsi/tgsi_text.c \ - tgsi/tgsi_transform.c \ - tgsi/tgsi_ureg.c \ - tgsi/tgsi_util.c \ - translate/translate.c \ - translate/translate_cache.c \ - translate/translate_generic.c \ - translate/translate_sse.c \ - util/u_debug.c \ - util/u_debug_describe.c \ - util/u_debug_refcnt.c \ - util/u_debug_stack.c \ - util/u_debug_symbol.c \ - util/u_dump_defines.c \ - util/u_dump_state.c \ - util/u_bitmask.c \ - util/u_blit.c \ - util/u_blitter.c \ - util/u_cache.c \ - util/u_caps.c \ - util/u_cpu_detect.c \ - util/u_dl.c \ - util/u_draw.c \ - util/u_draw_quad.c \ - util/u_format.c \ - util/u_format_other.c \ - util/u_format_latc.c \ - util/u_format_s3tc.c \ - util/u_format_rgtc.c \ - util/u_format_srgb.c \ - util/u_format_table.c \ - util/u_format_tests.c \ - util/u_format_yuv.c \ - util/u_format_zs.c \ - util/u_framebuffer.c \ - util/u_gen_mipmap.c \ - util/u_half.c \ - util/u_handle_table.c \ - util/u_hash.c \ - util/u_hash_table.c \ - util/u_index_modify.c \ - util/u_keymap.c \ - util/u_linear.c \ - util/u_linkage.c \ - util/u_network.c \ - util/u_math.c \ - util/u_mm.c \ - util/u_pstipple.c \ - util/u_rect.c \ - util/u_ringbuffer.c \ - util/u_sampler.c \ - util/u_simple_shaders.c \ - util/u_slab.c \ - util/u_snprintf.c \ - util/u_staging.c \ - util/u_surface.c \ - util/u_surfaces.c \ - util/u_texture.c \ - util/u_tile.c \ - util/u_transfer.c \ - util/u_resource.c \ - util/u_upload_mgr.c \ - util/u_vbuf_mgr.c \ - vl/vl_csc.c \ - vl/vl_compositor.c \ - vl/vl_decoder.c \ - vl/vl_mpeg12_decoder.c \ - vl/vl_mpeg12_bitstream.c \ - vl/vl_zscan.c \ - vl/vl_idct.c \ - vl/vl_mc.c \ - vl/vl_vertex_buffers.c \ - vl/vl_video_buffer.c - -GALLIVM_SOURCES = \ - gallivm/lp_bld_arit.c \ - gallivm/lp_bld_assert.c \ - gallivm/lp_bld_bitarit.c \ - gallivm/lp_bld_const.c \ - gallivm/lp_bld_conv.c \ - gallivm/lp_bld_flow.c \ - gallivm/lp_bld_format_aos.c \ - gallivm/lp_bld_format_soa.c \ - gallivm/lp_bld_format_yuv.c \ - gallivm/lp_bld_gather.c \ - gallivm/lp_bld_init.c \ - gallivm/lp_bld_intr.c \ - gallivm/lp_bld_logic.c \ - gallivm/lp_bld_pack.c \ - gallivm/lp_bld_printf.c \ - gallivm/lp_bld_quad.c \ - gallivm/lp_bld_sample.c \ - gallivm/lp_bld_sample_aos.c \ - gallivm/lp_bld_sample_soa.c \ - gallivm/lp_bld_struct.c \ - gallivm/lp_bld_swizzle.c \ - gallivm/lp_bld_tgsi_aos.c \ - gallivm/lp_bld_tgsi_info.c \ - gallivm/lp_bld_tgsi_soa.c \ - gallivm/lp_bld_type.c \ - draw/draw_llvm.c \ - draw/draw_llvm_sample.c \ - draw/draw_llvm_translate.c \ - draw/draw_vs_llvm.c \ - draw/draw_pt_fetch_shade_pipeline_llvm.c - -GALLIVM_CPP_SOURCES = \ - gallivm/lp_bld_debug.cpp \ - gallivm/lp_bld_misc.cpp - -GENERATED_SOURCES = \ - indices/u_indices_gen.c \ - indices/u_unfilled_gen.c \ - util/u_format_srgb.c \ - util/u_format_table.c \ - util/u_half.c +# get source lists +include Makefile.sources +C_SOURCES += $(GENERATED_SOURCES) ifeq ($(MESA_LLVM),1) C_SOURCES += \ diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources new file mode 100644 index 00000000000..766beb0fafc --- /dev/null +++ b/src/gallium/auxiliary/Makefile.sources @@ -0,0 +1,200 @@ +C_SOURCES := \ + cso_cache/cso_cache.c \ + cso_cache/cso_context.c \ + cso_cache/cso_hash.c \ + draw/draw_context.c \ + draw/draw_fs.c \ + draw/draw_gs.c \ + draw/draw_pipe.c \ + draw/draw_pipe_aaline.c \ + draw/draw_pipe_aapoint.c \ + draw/draw_pipe_clip.c \ + draw/draw_pipe_cull.c \ + draw/draw_pipe_flatshade.c \ + draw/draw_pipe_offset.c \ + draw/draw_pipe_pstipple.c \ + draw/draw_pipe_stipple.c \ + draw/draw_pipe_twoside.c \ + draw/draw_pipe_unfilled.c \ + draw/draw_pipe_util.c \ + draw/draw_pipe_validate.c \ + draw/draw_pipe_vbuf.c \ + draw/draw_pipe_wide_line.c \ + draw/draw_pipe_wide_point.c \ + draw/draw_pt.c \ + draw/draw_pt_emit.c \ + draw/draw_pt_fetch.c \ + draw/draw_pt_fetch_emit.c \ + draw/draw_pt_fetch_shade_emit.c \ + draw/draw_pt_fetch_shade_pipeline.c \ + draw/draw_pt_post_vs.c \ + draw/draw_pt_so_emit.c \ + draw/draw_pt_util.c \ + draw/draw_pt_vsplit.c \ + draw/draw_vertex.c \ + draw/draw_vs.c \ + draw/draw_vs_aos.c \ + draw/draw_vs_aos_io.c \ + draw/draw_vs_aos_machine.c \ + draw/draw_vs_exec.c \ + draw/draw_vs_ppc.c \ + draw/draw_vs_sse.c \ + draw/draw_vs_variant.c \ + os/os_misc.c \ + os/os_stream.c \ + os/os_stream_log.c \ + os/os_stream_null.c \ + os/os_stream_stdc.c \ + os/os_stream_str.c \ + os/os_time.c \ + pipebuffer/pb_buffer_fenced.c \ + pipebuffer/pb_buffer_malloc.c \ + pipebuffer/pb_bufmgr_alt.c \ + pipebuffer/pb_bufmgr_cache.c \ + pipebuffer/pb_bufmgr_debug.c \ + pipebuffer/pb_bufmgr_mm.c \ + pipebuffer/pb_bufmgr_ondemand.c \ + pipebuffer/pb_bufmgr_pool.c \ + pipebuffer/pb_bufmgr_slab.c \ + pipebuffer/pb_validate.c \ + postprocess/pp_celshade.c \ + postprocess/pp_colors.c \ + postprocess/pp_init.c \ + postprocess/pp_mlaa.c \ + postprocess/pp_run.c \ + postprocess/pp_program.c \ + rbug/rbug_connection.c \ + rbug/rbug_context.c \ + rbug/rbug_core.c \ + rbug/rbug_demarshal.c \ + rbug/rbug_texture.c \ + rbug/rbug_shader.c \ + rtasm/rtasm_cpu.c \ + rtasm/rtasm_execmem.c \ + rtasm/rtasm_ppc.c \ + rtasm/rtasm_ppc_spe.c \ + rtasm/rtasm_x86sse.c \ + tgsi/tgsi_build.c \ + tgsi/tgsi_dump.c \ + tgsi/tgsi_exec.c \ + tgsi/tgsi_info.c \ + tgsi/tgsi_iterate.c \ + tgsi/tgsi_parse.c \ + tgsi/tgsi_ppc.c \ + tgsi/tgsi_sanity.c \ + tgsi/tgsi_scan.c \ + tgsi/tgsi_sse2.c \ + tgsi/tgsi_text.c \ + tgsi/tgsi_transform.c \ + tgsi/tgsi_ureg.c \ + tgsi/tgsi_util.c \ + translate/translate.c \ + translate/translate_cache.c \ + translate/translate_generic.c \ + translate/translate_sse.c \ + util/u_debug.c \ + util/u_debug_describe.c \ + util/u_debug_memory.c \ + util/u_debug_refcnt.c \ + util/u_debug_stack.c \ + util/u_debug_symbol.c \ + util/u_dump_defines.c \ + util/u_dump_state.c \ + util/u_bitmask.c \ + util/u_blit.c \ + util/u_blitter.c \ + util/u_cache.c \ + util/u_caps.c \ + util/u_cpu_detect.c \ + util/u_dl.c \ + util/u_draw.c \ + util/u_draw_quad.c \ + util/u_format.c \ + util/u_format_other.c \ + util/u_format_latc.c \ + util/u_format_s3tc.c \ + util/u_format_rgtc.c \ + util/u_format_tests.c \ + util/u_format_yuv.c \ + util/u_format_zs.c \ + util/u_framebuffer.c \ + util/u_gen_mipmap.c \ + util/u_handle_table.c \ + util/u_hash.c \ + util/u_hash_table.c \ + util/u_index_modify.c \ + util/u_keymap.c \ + util/u_linear.c \ + util/u_linkage.c \ + util/u_network.c \ + util/u_math.c \ + util/u_mm.c \ + util/u_pstipple.c \ + util/u_rect.c \ + util/u_ringbuffer.c \ + util/u_sampler.c \ + util/u_simple_shaders.c \ + util/u_slab.c \ + util/u_snprintf.c \ + util/u_staging.c \ + util/u_surface.c \ + util/u_surfaces.c \ + util/u_texture.c \ + util/u_tile.c \ + util/u_transfer.c \ + util/u_resource.c \ + util/u_upload_mgr.c \ + util/u_vbuf_mgr.c \ + vl/vl_csc.c \ + vl/vl_compositor.c \ + vl/vl_decoder.c \ + vl/vl_mpeg12_decoder.c \ + vl/vl_mpeg12_bitstream.c \ + vl/vl_zscan.c \ + vl/vl_idct.c \ + vl/vl_mc.c \ + vl/vl_vertex_buffers.c \ + vl/vl_video_buffer.c + +GENERATED_SOURCES := \ + indices/u_indices_gen.c \ + indices/u_unfilled_gen.c \ + util/u_format_srgb.c \ + util/u_format_table.c \ + util/u_half.c + +GALLIVM_SOURCES := \ + gallivm/lp_bld_arit.c \ + gallivm/lp_bld_assert.c \ + gallivm/lp_bld_bitarit.c \ + gallivm/lp_bld_const.c \ + gallivm/lp_bld_conv.c \ + gallivm/lp_bld_flow.c \ + gallivm/lp_bld_format_aos.c \ + gallivm/lp_bld_format_soa.c \ + gallivm/lp_bld_format_yuv.c \ + gallivm/lp_bld_gather.c \ + gallivm/lp_bld_init.c \ + gallivm/lp_bld_intr.c \ + gallivm/lp_bld_logic.c \ + gallivm/lp_bld_pack.c \ + gallivm/lp_bld_printf.c \ + gallivm/lp_bld_quad.c \ + gallivm/lp_bld_sample.c \ + gallivm/lp_bld_sample_aos.c \ + gallivm/lp_bld_sample_soa.c \ + gallivm/lp_bld_struct.c \ + gallivm/lp_bld_swizzle.c \ + gallivm/lp_bld_tgsi_aos.c \ + gallivm/lp_bld_tgsi_info.c \ + gallivm/lp_bld_tgsi_soa.c \ + gallivm/lp_bld_type.c \ + draw/draw_llvm.c \ + draw/draw_llvm_sample.c \ + draw/draw_llvm_translate.c \ + draw/draw_vs_llvm.c \ + draw/draw_pt_fetch_shade_pipeline_llvm.c + +GALLIVM_CPP_SOURCES := \ + gallivm/lp_bld_debug.cpp \ + gallivm/lp_bld_misc.cpp diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index d18f55f1644..07c420e138d 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -47,201 +47,20 @@ env.Depends('util/u_format_table.c', [ 'util/u_format_pack.py', ]) -source = [ - 'cso_cache/cso_cache.c', - 'cso_cache/cso_context.c', - 'cso_cache/cso_hash.c', - 'draw/draw_context.c', - 'draw/draw_fs.c', - 'draw/draw_gs.c', - 'draw/draw_pipe.c', - 'draw/draw_pipe_aaline.c', - 'draw/draw_pipe_aapoint.c', - 'draw/draw_pipe_clip.c', - 'draw/draw_pipe_cull.c', - 'draw/draw_pipe_flatshade.c', - 'draw/draw_pipe_offset.c', - 'draw/draw_pipe_pstipple.c', - 'draw/draw_pipe_stipple.c', - 'draw/draw_pipe_twoside.c', - 'draw/draw_pipe_unfilled.c', - 'draw/draw_pipe_util.c', - 'draw/draw_pipe_validate.c', - 'draw/draw_pipe_vbuf.c', - 'draw/draw_pipe_wide_line.c', - 'draw/draw_pipe_wide_point.c', - 'draw/draw_pt.c', - 'draw/draw_pt_emit.c', - 'draw/draw_pt_fetch.c', - 'draw/draw_pt_fetch_emit.c', - 'draw/draw_pt_fetch_shade_emit.c', - 'draw/draw_pt_fetch_shade_pipeline.c', - 'draw/draw_pt_post_vs.c', - 'draw/draw_pt_so_emit.c', - 'draw/draw_pt_util.c', - 'draw/draw_pt_vsplit.c', - 'draw/draw_vertex.c', - 'draw/draw_vs.c', - 'draw/draw_vs_aos.c', - 'draw/draw_vs_aos_io.c', - 'draw/draw_vs_aos_machine.c', - 'draw/draw_vs_exec.c', - 'draw/draw_vs_ppc.c', - 'draw/draw_vs_sse.c', - 'draw/draw_vs_variant.c', - #'indices/u_indices.c', - #'indices/u_unfilled_indices.c', - 'indices/u_indices_gen.c', - 'indices/u_unfilled_gen.c', - 'os/os_misc.c', - 'os/os_stream.c', - 'os/os_stream_log.c', - 'os/os_stream_null.c', - 'os/os_stream_stdc.c', - 'os/os_stream_str.c', - 'os/os_time.c', - 'pipebuffer/pb_buffer_fenced.c', - 'pipebuffer/pb_buffer_malloc.c', - 'pipebuffer/pb_bufmgr_alt.c', - 'pipebuffer/pb_bufmgr_cache.c', - 'pipebuffer/pb_bufmgr_debug.c', - 'pipebuffer/pb_bufmgr_mm.c', - 'pipebuffer/pb_bufmgr_ondemand.c', - 'pipebuffer/pb_bufmgr_pool.c', - 'pipebuffer/pb_bufmgr_slab.c', - 'pipebuffer/pb_validate.c', - 'rbug/rbug_connection.c', - 'rbug/rbug_context.c', - 'rbug/rbug_core.c', - 'rbug/rbug_demarshal.c', - 'rbug/rbug_shader.c', - 'rbug/rbug_texture.c', - 'rtasm/rtasm_cpu.c', - 'rtasm/rtasm_execmem.c', - 'rtasm/rtasm_ppc.c', - 'rtasm/rtasm_ppc_spe.c', - 'rtasm/rtasm_x86sse.c', - 'tgsi/tgsi_build.c', - 'tgsi/tgsi_dump.c', - 'tgsi/tgsi_exec.c', - 'tgsi/tgsi_info.c', - 'tgsi/tgsi_iterate.c', - 'tgsi/tgsi_parse.c', - 'tgsi/tgsi_ppc.c', - 'tgsi/tgsi_sanity.c', - 'tgsi/tgsi_scan.c', - 'tgsi/tgsi_sse2.c', - 'tgsi/tgsi_text.c', - 'tgsi/tgsi_transform.c', - 'tgsi/tgsi_ureg.c', - 'tgsi/tgsi_util.c', - 'translate/translate.c', - 'translate/translate_cache.c', - 'translate/translate_generic.c', - 'translate/translate_sse.c', - 'util/u_bitmask.c', - 'util/u_blit.c', - 'util/u_blitter.c', - 'util/u_cache.c', - 'util/u_caps.c', - 'util/u_cpu_detect.c', - 'util/u_debug.c', - 'util/u_debug_describe.c', - 'util/u_debug_memory.c', - 'util/u_debug_refcnt.c', - 'util/u_debug_stack.c', - 'util/u_debug_symbol.c', - 'util/u_dump_defines.c', - 'util/u_dump_state.c', - 'util/u_dl.c', - 'util/u_draw.c', - 'util/u_draw_quad.c', - 'util/u_format.c', - 'util/u_format_other.c', - 'util/u_format_latc.c', - 'util/u_format_s3tc.c', - 'util/u_format_rgtc.c', - 'util/u_format_srgb.c', - 'util/u_format_table.c', - 'util/u_format_tests.c', - 'util/u_format_yuv.c', - 'util/u_format_zs.c', - 'util/u_framebuffer.c', - 'util/u_gen_mipmap.c', - 'util/u_half.c', - 'util/u_handle_table.c', - 'util/u_hash.c', - 'util/u_hash_table.c', - 'util/u_index_modify.c', - 'util/u_keymap.c', - 'util/u_linear.c', - 'util/u_linkage.c', - 'util/u_network.c', - 'util/u_math.c', - 'util/u_mm.c', - 'util/u_pstipple.c', - 'util/u_rect.c', - 'util/u_resource.c', - 'util/u_ringbuffer.c', - 'util/u_sampler.c', - 'util/u_simple_shaders.c', - 'util/u_slab.c', - 'util/u_snprintf.c', - 'util/u_staging.c', - 'util/u_surface.c', - 'util/u_surfaces.c', - 'util/u_texture.c', - 'util/u_tile.c', - 'util/u_transfer.c', - 'util/u_upload_mgr.c', - 'util/u_vbuf_mgr.c', - 'vl/vl_csc.c', - 'vl/vl_compositor.c', - 'vl/vl_decoder.c', - 'vl/vl_mpeg12_decoder.c', - 'vl/vl_mpeg12_bitstream.c', - 'vl/vl_zscan.c', - 'vl/vl_idct.c', - 'vl/vl_mc.c', - 'vl/vl_vertex_buffers.c', - 'vl/vl_video_buffer.c', -] +source = env.ParseSourceList('Makefile.sources', [ + 'C_SOURCES', + 'GENERATED_SOURCES' +]) if env['llvm']: - source += [ - 'gallivm/lp_bld_arit.c', - 'gallivm/lp_bld_assert.c', - 'gallivm/lp_bld_bitarit.c', - 'gallivm/lp_bld_const.c', - 'gallivm/lp_bld_conv.c', - 'gallivm/lp_bld_debug.cpp', - 'gallivm/lp_bld_flow.c', - 'gallivm/lp_bld_format_aos.c', - 'gallivm/lp_bld_format_soa.c', - 'gallivm/lp_bld_format_yuv.c', - 'gallivm/lp_bld_gather.c', - 'gallivm/lp_bld_init.c', - 'gallivm/lp_bld_intr.c', - 'gallivm/lp_bld_logic.c', - 'gallivm/lp_bld_misc.cpp', - 'gallivm/lp_bld_pack.c', - 'gallivm/lp_bld_printf.c', - 'gallivm/lp_bld_quad.c', - 'gallivm/lp_bld_sample.c', - 'gallivm/lp_bld_sample_aos.c', - 'gallivm/lp_bld_sample_soa.c', - 'gallivm/lp_bld_struct.c', - 'gallivm/lp_bld_swizzle.c', - 'gallivm/lp_bld_tgsi_aos.c', - 'gallivm/lp_bld_tgsi_info.c', - 'gallivm/lp_bld_tgsi_soa.c', - 'gallivm/lp_bld_type.c', - 'draw/draw_llvm.c', - 'draw/draw_llvm_sample.c', - 'draw/draw_llvm_translate.c', - 'draw/draw_pt_fetch_shade_pipeline_llvm.c', - 'draw/draw_vs_llvm.c' - ] + source += env.ParseSourceList('Makefile.sources', [ + 'GALLIVM_SOURCES', + 'GALLIVM_CPP_SOURCES' + ]) + + if env['toolchain'] == 'crossmingw': + # compile lp_bld_misc.cpp without -gstabs option + source = env.compile_without_gstabs(source, "gallivm/lp_bld_misc.cpp") gallium = env.ConvenienceLibrary( target = 'gallium', diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 8bb87440497..996e295e4b5 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -96,7 +96,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var); * Create LLVM type for struct draw_jit_texture */ static LLVMTypeRef -create_jit_texture_type(struct gallivm_state *gallivm) +create_jit_texture_type(struct gallivm_state *gallivm, const char *struct_name) { LLVMTargetDataRef target = gallivm->target; LLVMTypeRef texture_type; @@ -120,13 +120,21 @@ create_jit_texture_type(struct gallivm_state *gallivm) elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4); +#if HAVE_LLVM >= 0x0300 + texture_type = LLVMStructCreateNamed(gallivm->context, struct_name); + LLVMStructSetBody(texture_type, elem_types, + Elements(elem_types), 0); +#else texture_type = LLVMStructTypeInContext(gallivm->context, elem_types, Elements(elem_types), 0); + LLVMAddTypeName(gallivm->module, struct_name, texture_type); + /* Make sure the target's struct layout cache doesn't return * stale/invalid data. */ LLVMInvalidateStructLayout(gallivm->target, texture_type); +#endif LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width, target, texture_type, @@ -176,7 +184,7 @@ create_jit_texture_type(struct gallivm_state *gallivm) */ static LLVMTypeRef create_jit_context_type(struct gallivm_state *gallivm, - LLVMTypeRef texture_type) + LLVMTypeRef texture_type, const char *struct_name) { LLVMTargetDataRef target = gallivm->target; LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context); @@ -189,11 +197,17 @@ create_jit_context_type(struct gallivm_state *gallivm, elem_types[3] = LLVMPointerType(float_type, 0); /* viewport */ elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_VERTEX_SAMPLERS); /* textures */ - +#if HAVE_LLVM >= 0x0300 + context_type = LLVMStructCreateNamed(gallivm->context, struct_name); + LLVMStructSetBody(context_type, elem_types, + Elements(elem_types), 0); +#else context_type = LLVMStructTypeInContext(gallivm->context, elem_types, Elements(elem_types), 0); + LLVMAddTypeName(gallivm->module, struct_name, context_type); LLVMInvalidateStructLayout(gallivm->target, context_type); +#endif LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants, target, context_type, 0); @@ -215,7 +229,7 @@ create_jit_context_type(struct gallivm_state *gallivm, * Create LLVM type for struct pipe_vertex_buffer */ static LLVMTypeRef -create_jit_vertex_buffer_type(struct gallivm_state *gallivm) +create_jit_vertex_buffer_type(struct gallivm_state *gallivm, const char *struct_name) { LLVMTargetDataRef target = gallivm->target; LLVMTypeRef elem_types[3]; @@ -225,10 +239,17 @@ create_jit_vertex_buffer_type(struct gallivm_state *gallivm) elem_types[1] = LLVMInt32TypeInContext(gallivm->context); elem_types[2] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* vs_constants */ +#if HAVE_LLVM >= 0x0300 + vb_type = LLVMStructCreateNamed(gallivm->context, struct_name); + LLVMStructSetBody(vb_type, elem_types, + Elements(elem_types), 0); +#else vb_type = LLVMStructTypeInContext(gallivm->context, elem_types, Elements(elem_types), 0); + LLVMAddTypeName(gallivm->module, struct_name, vb_type); LLVMInvalidateStructLayout(gallivm->target, vb_type); +#endif LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride, target, vb_type, 0); @@ -258,10 +279,17 @@ create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems) elem_types[1] = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4); elem_types[2] = LLVMArrayType(elem_types[1], data_elems); +#if HAVE_LLVM >= 0x0300 + vertex_header = LLVMStructCreateNamed(gallivm->context, struct_name); + LLVMStructSetBody(vertex_header, elem_types, + Elements(elem_types), 0); +#else vertex_header = LLVMStructTypeInContext(gallivm->context, elem_types, Elements(elem_types), 0); + LLVMAddTypeName(gallivm->module, struct_name, vertex_header); LLVMInvalidateStructLayout(gallivm->target, vertex_header); +#endif /* these are bit-fields and we can't take address of them LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask, @@ -284,8 +312,6 @@ create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems) target, vertex_header, DRAW_JIT_VERTEX_DATA); - LLVMAddTypeName(gallivm->module, struct_name, vertex_header); - return vertex_header; } @@ -299,19 +325,15 @@ create_jit_types(struct draw_llvm *llvm) struct gallivm_state *gallivm = llvm->gallivm; LLVMTypeRef texture_type, context_type, buffer_type, vb_type; - texture_type = create_jit_texture_type(gallivm); - LLVMAddTypeName(gallivm->module, "texture", texture_type); + texture_type = create_jit_texture_type(gallivm, "texture"); - context_type = create_jit_context_type(gallivm, texture_type); - LLVMAddTypeName(gallivm->module, "draw_jit_context", context_type); + context_type = create_jit_context_type(gallivm, texture_type, "draw_jit_context"); llvm->context_ptr_type = LLVMPointerType(context_type, 0); buffer_type = LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0); - LLVMAddTypeName(gallivm->module, "buffer", buffer_type); llvm->buffer_ptr_type = LLVMPointerType(buffer_type, 0); - vb_type = create_jit_vertex_buffer_type(gallivm); - LLVMAddTypeName(gallivm->module, "pipe_vertex_buffer", vb_type); + vb_type = create_jit_vertex_buffer_type(gallivm, "pipe_vertex_buffer"); llvm->vb_ptr_type = LLVMPointerType(vb_type, 0); } diff --git a/src/gallium/auxiliary/gallivm/f.cpp b/src/gallium/auxiliary/gallivm/f.cpp index 5eb09c01ab3..6b9c35b3ce5 100644 --- a/src/gallium/auxiliary/gallivm/f.cpp +++ b/src/gallium/auxiliary/gallivm/f.cpp @@ -15,8 +15,9 @@ * * How to use this source: * - * - Download and abuild the NTL library from - * http://shoup.net/ntl/download.html + * - Download and build the NTL library from + * http://shoup.net/ntl/download.html , or install libntl-dev package if on + * Debian. * * - Download boost source code matching to your distro. * @@ -24,22 +25,32 @@ * * - Build as * - * g++ -o minimax -I /path/to/ntl/include main.cpp f.cpp /path/to/ntl/src/ntl.a -lboost_math_tr1 + * g++ -o minimax -I /path/to/ntl/include main.cpp f.cpp /path/to/ntl/src/ntl.a * * - Run as * * ./minimax * - * - For example, to compute exp2 5th order polynomial between [0, 1] do: + * - For example, to compute log2 5th order polynomial between [1, 2] do: + * + * variant 0 + * range 1 2 + * order 5 0 + * step 200 + * info + * + * and take the coefficients from the P = { ... } array. + * + * - To compute exp2 5th order polynomial between [0, 1] do: * * variant 1 * range 0 1 * order 5 0 - * steps 200 + * step 200 * info * * - For more info see - * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html + * http://www.boost.org/doc/libs/1_47_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html */ #define L22 diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 02b3bde7893..2be8598704e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -61,7 +61,7 @@ #include "lp_bld_arit.h" -#define EXP_POLY_DEGREE 3 +#define EXP_POLY_DEGREE 5 #define LOG_POLY_DEGREE 5 @@ -1645,7 +1645,7 @@ lp_build_rsqrt(struct lp_build_context *bld, assert(type.floating); if (util_cpu_caps.has_sse && type.width == 32 && type.length == 4) { - const unsigned num_iterations = 0; + const unsigned num_iterations = 1; LLVMValueRef res; unsigned i; @@ -2151,7 +2151,7 @@ lp_build_exp(struct lp_build_context *bld, assert(lp_check_value(bld->type, x)); - return lp_build_mul(bld, log2e, lp_build_exp2(bld, x)); + return lp_build_exp2(bld, lp_build_mul(bld, log2e, x)); } @@ -2168,7 +2168,7 @@ lp_build_log(struct lp_build_context *bld, assert(lp_check_value(bld->type, x)); - return lp_build_mul(bld, log2, lp_build_exp2(bld, x)); + return lp_build_mul(bld, log2, lp_build_log2(bld, x)); } @@ -2218,18 +2218,18 @@ lp_build_polynomial(struct lp_build_context *bld, */ const double lp_build_exp2_polynomial[] = { #if EXP_POLY_DEGREE == 5 - 0.999999999690134838155, - 0.583974334321735217258, - 0.164553105719676828492, - 0.0292811063701710962255, - 0.00354944426657875141846, - 0.000296253726543423377365 + 0.999999925063526176901, + 0.693153073200168932794, + 0.240153617044375388211, + 0.0558263180532956664775, + 0.00898934009049466391101, + 0.00187757667519147912699 #elif EXP_POLY_DEGREE == 4 - 1.00000001502262084505, - 0.563586057338685991394, - 0.150436017652442413623, - 0.0243220604213317927308, - 0.0025359088446580436489 + 1.00000259337069434683, + 0.693003834469974940458, + 0.24144275689150793076, + 0.0520114606103070150235, + 0.0135341679161270268764 #elif EXP_POLY_DEGREE == 3 0.999925218562710312959, 0.695833540494823811697, @@ -2465,6 +2465,12 @@ lp_build_log2_approx(struct lp_build_context *bld, assert(type.floating && type.width == 32); + /* + * We don't explicitly handle denormalized numbers. They will yield a + * result in the neighbourhood of -127, which appears to be adequate + * enough. + */ + i = LLVMBuildBitCast(builder, x, int_vec_type, ""); /* exp = (float) exponent(x) */ diff --git a/src/gallium/auxiliary/os/os_mman.h b/src/gallium/auxiliary/os/os_mman.h new file mode 100644 index 00000000000..b48eb053023 --- /dev/null +++ b/src/gallium/auxiliary/os/os_mman.h @@ -0,0 +1,87 @@ +/************************************************************************** + * + * Copyright 2011 LunarG, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * OS independent memory mapping (with large file support). + * + * @author Chia-I Wu <[email protected]> + */ + +#ifndef _OS_MMAN_H_ +#define _OS_MMAN_H_ + + +#include "pipe/p_config.h" +#include "pipe/p_compiler.h" + +#if defined(PIPE_OS_UNIX) +# ifndef _FILE_OFFSET_BITS +# error _FILE_OFFSET_BITS must be defined to 64 +# endif +# include <sys/mman.h> +#else +# error Unsupported OS +#endif + +#if defined(PIPE_OS_ANDROID) +# include <errno.h> /* for EINVAL */ +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + +#if defined(PIPE_OS_ANDROID) + +extern void *__mmap2(void *, size_t, int, int, int, size_t); + +static INLINE void *os_mmap(void *addr, size_t length, int prot, int flags, int fd, loff_t offset) +{ + /* offset must be aligned to 4096 (not necessarily the page size) */ + if (unlikely(offset & 4095)) { + errno = EINVAL; + return MAP_FAILED; + } + + return __mmap2(addr, length, prot, flags, fd, (size_t) (offset >> 12)); +} + +#else +/* assume large file support exists */ +# define os_mmap(addr, length, prot, flags, fd, offset) mmap(addr, length, prot, flags, fd, offset) +#endif + +#define os_munmap(addr, length) munmap(addr, length) + + +#ifdef __cplusplus +} +#endif + +#endif /* _OS_MMAN_H_ */ diff --git a/src/gallium/auxiliary/os/os_thread.h b/src/gallium/auxiliary/os/os_thread.h index 8f1245bff55..d8301298b7f 100644 --- a/src/gallium/auxiliary/os/os_thread.h +++ b/src/gallium/auxiliary/os/os_thread.h @@ -314,7 +314,7 @@ typedef int64_t pipe_condvar; * pipe_barrier */ -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) +#if (defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU)) && !defined(PIPE_OS_ANDROID) typedef pthread_barrier_t pipe_barrier; diff --git a/src/gallium/auxiliary/postprocess/ADDING b/src/gallium/auxiliary/postprocess/ADDING new file mode 100644 index 00000000000..3735835142a --- /dev/null +++ b/src/gallium/auxiliary/postprocess/ADDING @@ -0,0 +1,87 @@ +How to add a new post-processing filter +======================================= + +The Gallium post-processing queue works by passing the current screen to a fragment shader. +These shaders may be written in any supported language, but are added here in TGSI text +assembly. + +You can translate GLSL/ARB fairly easily via llvmpipe (LP_DEBUG=tgsi). I don't know the +status of the D3D state tracker, but if/when that works, I'd assume HLSL would be possible +too. + + + +Steps +===== + +1. Add it to PP +2. Make it known to PP +3. Make it known to driconf +4. ???? +5. Profit + + + + +1. Add it to PP +--------------- + +Once you have the shader(s) in TGSI asm, put them to static const char arrays in a header +file (see pp_colors.h). + +Add the filter's prototypes (main and init functions) to postprocess.h. This is mostly a +copy-paste job with only changing the name. + +Then create a file containing empty main and init functions, named as you specified above. +See pp_colors.c for an example. + + + +2. Make it known to PP +---------------------- + +Add your filter to filters.h, in a correct place. Placement is important, AA should usually +be the last effect in the queue for example. + +Name is the config option your filter will be enabled by, both in driconf and as an env var. + +Inner temp means an intermediate framebuffer you may use in your filter to store +results between passes. If you have a single-pass filter, request 0 of those. + +Shaders is the number of shaders your filter needs. The minimum is 2. + + +You could also write the init and main functions now. If your filter is single-pass without +a vertex shader and any other input than the main screen, you can use pp_nocolor as your +main function as is. + + + +3. Make it known to driconf +--------------------------- + +First time outside of auxiliary/postprocess. First, add a suitable description to +drivers/dri/common/xmlpool/t_options.h, and regenerate options.h by running make in that +directory. Use the name you put into filters.h as the config option name. + +With driconf aware of the option, make Gallium aware of it too. Add it to +state_trackers/dri/common/dri_screen.c in a proper section, specifying its default value and +the accepted range (if applicable). + +Do check that __driNConfigOptions is still correct after the addition. + + + +4. ???? +------- + +Testing, praying, hookers, blow, sacrificial lambs... + + + +5. Profit +--------- + +Assuming you got here, sharing is caring. Send your filter to mesa-dev. + + diff --git a/src/gallium/auxiliary/postprocess/filters.h b/src/gallium/auxiliary/postprocess/filters.h new file mode 100644 index 00000000000..2454088707d --- /dev/null +++ b/src/gallium/auxiliary/postprocess/filters.h @@ -0,0 +1,58 @@ +/************************************************************************** + * + * Copyright 2011 Lauri Kasanen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef PP_EXTERNAL_FILTERS_H +#define PP_EXTERNAL_FILTERS_H + +#include "postprocess/postprocess.h" + +typedef void (*pp_init_func) (struct pp_queue_t *, unsigned int, + unsigned int); + +struct pp_filter_t +{ + const char *name; /* Config name */ + unsigned int inner_tmps; /* Request how many inner temps */ + unsigned int shaders; /* Request how many shaders */ + unsigned int verts; /* How many are vertex shaders */ + pp_init_func init; /* Init function */ + pp_func main; /* Run function */ +}; + +/* Order matters. Put new filters in a suitable place. */ + +static const struct pp_filter_t pp_filters[PP_FILTERS] = { +/* name inner shaders verts init run */ + { "pp_noblue", 0, 2, 1, pp_noblue_init, pp_nocolor }, + { "pp_nogreen", 0, 2, 1, pp_nogreen_init, pp_nocolor }, + { "pp_nored", 0, 2, 1, pp_nored_init, pp_nocolor }, + { "pp_celshade", 0, 2, 1, pp_celshade_init, pp_nocolor }, + { "pp_jimenezmlaa", 2, 5, 2, pp_jimenezmlaa_init, pp_jimenezmlaa }, + { "pp_jimenezmlaa_color", 2, 5, 2, pp_jimenezmlaa_init_color, pp_jimenezmlaa_color }, +}; + +#endif diff --git a/src/gallium/auxiliary/postprocess/postprocess.h b/src/gallium/auxiliary/postprocess/postprocess.h new file mode 100644 index 00000000000..ef94f79997a --- /dev/null +++ b/src/gallium/auxiliary/postprocess/postprocess.h @@ -0,0 +1,100 @@ +/************************************************************************** + * + * Copyright 2011 Lauri Kasanen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef POSTPROCESS_H +#define POSTPROCESS_H + +#include "postprocess/pp_program.h" + +#define PP_FILTERS 6 /* Increment this if you add filters */ +#define PP_MAX_PASSES 6 + +struct pp_queue_t; /* Forward definition */ + +/* Less typing later on */ +typedef void (*pp_func) (struct pp_queue_t *, struct pipe_resource *, + struct pipe_resource *, unsigned int); +/** +* The main post-processing queue. +*/ +struct pp_queue_t +{ + pp_func *pp_queue; /* An array of pp_funcs */ + unsigned int n_filters; /* Number of enabled filters */ + + struct pipe_resource *tmp[2]; /* Two temp FBOs for the queue */ + struct pipe_resource *inner_tmp[3]; /* Three for filter use */ + + unsigned int n_tmp, n_inner_tmp; + + struct pipe_resource *depth; /* depth of original input */ + struct pipe_resource *stencil; /* stencil shared by inner_tmps */ + + struct pipe_surface *tmps[2], *inner_tmps[3], *stencils; + + void ***shaders; /* Shaders in TGSI form */ + unsigned int *verts; + struct program *p; + + bool fbos_init; +}; + +/* Main functions */ + +struct pp_queue_t *pp_init(struct pipe_screen *, const unsigned int *); +void pp_run(struct pp_queue_t *, struct pipe_resource *, + struct pipe_resource *, struct pipe_resource *); +void pp_free(struct pp_queue_t *); +void pp_free_fbos(struct pp_queue_t *); +void pp_debug(const char *, ...); +struct program *pp_init_prog(struct pp_queue_t *, struct pipe_screen *); +void pp_init_fbos(struct pp_queue_t *, unsigned int, unsigned int, + struct pipe_resource *); + +/* The filters */ + +void pp_nocolor(struct pp_queue_t *, struct pipe_resource *, + struct pipe_resource *, unsigned int); + +void pp_jimenezmlaa(struct pp_queue_t *, struct pipe_resource *, + struct pipe_resource *, unsigned int); +void pp_jimenezmlaa_color(struct pp_queue_t *, struct pipe_resource *, + struct pipe_resource *, unsigned int); + +/* The filter init functions */ + +void pp_celshade_init(struct pp_queue_t *, unsigned int, unsigned int); + +void pp_nored_init(struct pp_queue_t *, unsigned int, unsigned int); +void pp_nogreen_init(struct pp_queue_t *, unsigned int, unsigned int); +void pp_noblue_init(struct pp_queue_t *, unsigned int, unsigned int); + +void pp_jimenezmlaa_init(struct pp_queue_t *, unsigned int, unsigned int); +void pp_jimenezmlaa_init_color(struct pp_queue_t *, unsigned int, + unsigned int); + +#endif diff --git a/src/gallium/auxiliary/postprocess/pp_celshade.c b/src/gallium/auxiliary/postprocess/pp_celshade.c new file mode 100644 index 00000000000..4454764ea84 --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_celshade.c @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2011 Lauri Kasanen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "postprocess/postprocess.h" +#include "postprocess/pp_celshade.h" +#include "postprocess/pp_filters.h" + +/** Init function */ +void +pp_celshade_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val) +{ + ppq->shaders[n][1] = + pp_tgsi_to_state(ppq->p->pipe, celshade, false, "celshade"); +} diff --git a/src/gallium/auxiliary/postprocess/pp_celshade.h b/src/gallium/auxiliary/postprocess/pp_celshade.h new file mode 100644 index 00000000000..536ac7f1f1c --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_celshade.h @@ -0,0 +1,79 @@ +/************************************************************************** + * + * Copyright 2011 Lauri Kasanen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELSHADE_H +#define CELSHADE_H + +static const char celshade[] = "FRAG\n" + "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n" + "DCL IN[0], GENERIC[0], PERSPECTIVE\n" + "DCL OUT[0], COLOR\n" + "DCL SAMP[0]\n" + "DCL TEMP[0..4]\n" + "IMM FLT32 { 0.2126, 0.7152, 0.0722, 4.0000}\n" + "IMM FLT32 { 0.5000, 2.0000, 1.0000, -0.1250}\n" + "IMM FLT32 { 0.2500, 0.1000, 0.1250, 3.0000}\n" + " 0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D\n" + " 1: DP3 TEMP[1].x, TEMP[0].xyzz, IMM[0]\n" + " 2: MUL TEMP[3].x, TEMP[1].xxxx, IMM[0].wwww\n" + " 3: ROUND TEMP[2].x, TEMP[3].xxxx\n" + " 4: MUL TEMP[3].x, TEMP[2].xxxx, IMM[2].xxxx\n" + " 5: MOV TEMP[2].x, TEMP[3].xxxx\n" + " 6: ADD TEMP[4].x, TEMP[1].xxxx, -TEMP[3].xxxx\n" + " 7: SGT TEMP[1].w, TEMP[4].xxxx, IMM[2].yyyy\n" + " 8: IF TEMP[1].wwww :19\n" + " 9: ADD TEMP[4].y, TEMP[3].xxxx, IMM[2].yyyy\n" + " 10: ADD TEMP[1].z, TEMP[1].xxxx, -TEMP[4].yyyy\n" + " 11: ADD TEMP[1].y, TEMP[3].xxxx, IMM[2].zzzz\n" + " 12: ADD TEMP[2].x, TEMP[1].yyyy, -TEMP[4].yyyy\n" + " 13: RCP TEMP[4].y, TEMP[2].xxxx\n" + " 14: MUL TEMP[2].x, TEMP[1].zzzz, TEMP[4].yyyy\n" + " 15: MAD TEMP[1].y, -IMM[1].yyyy, TEMP[2].xxxx, IMM[2].wwww\n" + " 16: MUL TEMP[1].z, TEMP[2].xxxx, TEMP[1].yyyy\n" + " 17: MUL TEMP[1].y, TEMP[2].xxxx, TEMP[1].zzzz\n" + " 18: MAD TEMP[2].x, TEMP[1].yyyy, IMM[2].zzzz, TEMP[3].xxxx\n" + " 19: ENDIF\n" + " 20: SLT TEMP[3].x, TEMP[4].xxxx, -IMM[2].yyyy\n" + " 21: IF TEMP[3].xxxx :34\n" + " 22: ADD TEMP[3].x, TEMP[2].xxxx, -IMM[2].zzzz\n" + " 23: ADD TEMP[4].x, TEMP[1].xxxx, -TEMP[3].xxxx\n" + " 24: ADD TEMP[1].x, TEMP[2].xxxx, -IMM[2].yyyy\n" + " 25: ADD TEMP[4].y, TEMP[1].xxxx, -TEMP[3].xxxx\n" + " 26: RCP TEMP[3].x, TEMP[4].yyyy\n" + " 27: MUL TEMP[1].x, TEMP[4].xxxx, TEMP[3].xxxx\n" + " 28: MAD TEMP[4].x, -IMM[1].yyyy, TEMP[1].xxxx, IMM[2].wwww\n" + " 29: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[4].xxxx\n" + " 30: MUL TEMP[4].x, TEMP[1].xxxx, TEMP[3].xxxx\n" + " 31: ADD TEMP[3].x, IMM[1].zzzz, -TEMP[4].xxxx\n" + " 32: MAD TEMP[1].x, TEMP[3].xxxx, -IMM[2].zzzz, TEMP[2].xxxx\n" + " 33: MOV TEMP[2].x, TEMP[1].xxxx\n" + " 34: ENDIF\n" + " 35: MAD TEMP[1].x, TEMP[2].xxxx, IMM[1].yyyy, IMM[2].yyyy\n" + " 36: MUL OUT[0], TEMP[0], TEMP[1].xxxx\n" + " 37: END\n"; + +#endif diff --git a/src/gallium/auxiliary/postprocess/pp_colors.c b/src/gallium/auxiliary/postprocess/pp_colors.c new file mode 100644 index 00000000000..36bb1f552f5 --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_colors.c @@ -0,0 +1,80 @@ +/************************************************************************** + * + * Copyright 2011 Lauri Kasanen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "postprocess/postprocess.h" +#include "postprocess/pp_colors.h" +#include "postprocess/pp_filters.h" + +/** The run function of the color filters */ +void +pp_nocolor(struct pp_queue_t *ppq, struct pipe_resource *in, + struct pipe_resource *out, unsigned int n) +{ + + struct program *p = ppq->p; + + pp_filter_setup_in(p, in); + pp_filter_setup_out(p, out); + + pp_filter_set_fb(p); + pp_filter_misc_state(p); + + cso_single_sampler(p->cso, 0, &p->sampler_point); + cso_single_sampler_done(p->cso); + cso_set_fragment_sampler_views(p->cso, 1, &p->view); + + cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][0]); + cso_set_fragment_shader_handle(p->cso, ppq->shaders[n][1]); + + pp_filter_draw(p); + pp_filter_end_pass(p); +} + + +/* Init functions */ + +void +pp_nored_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val) +{ + ppq->shaders[n][1] = pp_tgsi_to_state(ppq->p->pipe, nored, false, "nored"); +} + + +void +pp_nogreen_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val) +{ + ppq->shaders[n][1] = + pp_tgsi_to_state(ppq->p->pipe, nogreen, false, "nogreen"); +} + + +void +pp_noblue_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val) +{ + ppq->shaders[n][1] = + pp_tgsi_to_state(ppq->p->pipe, noblue, false, "noblue"); +} diff --git a/src/gallium/auxiliary/postprocess/pp_colors.h b/src/gallium/auxiliary/postprocess/pp_colors.h new file mode 100644 index 00000000000..588cd2f0c52 --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_colors.h @@ -0,0 +1,69 @@ +/************************************************************************** + * + * Copyright 2011 Lauri Kasanen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef PP_COLORS_H +#define PP_COLORS_H + +static const char nored[] = "FRAG\n" + "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n" + "DCL IN[0], GENERIC[0], PERSPECTIVE\n" + "DCL OUT[0], COLOR\n" + "DCL SAMP[0]\n" + "DCL TEMP[0]\n" + "IMM FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}\n" + " 0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D\n" + " 1: MOV TEMP[0].x, IMM[0].xxxx\n" + " 2: MOV OUT[0], TEMP[0]\n" + " 3: END\n"; + + +static const char nogreen[] = "FRAG\n" + "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n" + "DCL IN[0], GENERIC[0], PERSPECTIVE\n" + "DCL OUT[0], COLOR\n" + "DCL SAMP[0]\n" + "DCL TEMP[0]\n" + "IMM FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}\n" + " 0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D\n" + " 1: MOV TEMP[0].y, IMM[0].xxxx\n" + " 2: MOV OUT[0], TEMP[0]\n" + " 3: END\n"; + + +static const char noblue[] = "FRAG\n" + "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n" + "DCL IN[0], GENERIC[0], PERSPECTIVE\n" + "DCL OUT[0], COLOR\n" + "DCL SAMP[0]\n" + "DCL TEMP[0]\n" + "IMM FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}\n" + " 0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D\n" + " 1: MOV TEMP[0].z, IMM[0].xxxx\n" + " 2: MOV OUT[0], TEMP[0]\n" + " 3: END\n"; + +#endif diff --git a/src/gallium/auxiliary/postprocess/pp_filters.h b/src/gallium/auxiliary/postprocess/pp_filters.h new file mode 100644 index 00000000000..0e34bb6d20f --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_filters.h @@ -0,0 +1,57 @@ +/************************************************************************** + * + * Copyright 2011 Lauri Kasanen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef PP_FILTERS_H +#define PP_FILTERS_H + +/* Internal include, mainly for the filters */ + +#include "cso_cache/cso_context.h" +#include "pipe/p_context.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_state.h" +#include "tgsi/tgsi_text.h" +#include "util/u_memory.h" +#include "util/u_draw_quad.h" + +#define PP_MAX_TOKENS 2048 + + +/* Helper functions for the filters */ + +void pp_filter_setup_in(struct program *, struct pipe_resource *); +void pp_filter_setup_out(struct program *, struct pipe_resource *); +void pp_filter_end_pass(struct program *); +void *pp_tgsi_to_state(struct pipe_context *, const char *, bool, + const char *); +void pp_filter_misc_state(struct program *); +void pp_filter_draw(struct program *); +void pp_filter_set_fb(struct program *); +void pp_filter_set_clear_fb(struct program *); + + +#endif diff --git a/src/gallium/auxiliary/postprocess/pp_init.c b/src/gallium/auxiliary/postprocess/pp_init.c new file mode 100644 index 00000000000..75417999b7e --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_init.c @@ -0,0 +1,283 @@ +/************************************************************************** + * + * Copyright 2011 Lauri Kasanen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> + +#include "postprocess/filters.h" + +#include "pipe/p_screen.h" +#include "util/u_inlines.h" +#include "util/u_blit.h" +#include "util/u_math.h" +#include "cso_cache/cso_context.h" + +/** Initialize the post-processing queue. */ +struct pp_queue_t * +pp_init(struct pipe_screen *pscreen, const unsigned int *enabled) +{ + + unsigned int curpos = 0, i, tmp_req = 0; + struct pp_queue_t *ppq; + pp_func *tmp_q; + + pp_debug("Initializing the post-processing queue.\n"); + + /* How many filters were requested? */ + for (i = 0; i < PP_FILTERS; i++) { + if (enabled[i]) + curpos++; + } + if (!curpos) + return NULL; + + ppq = calloc(1, sizeof(struct pp_queue_t)); + tmp_q = calloc(curpos, sizeof(pp_func)); + ppq->shaders = calloc(curpos, sizeof(void *)); + ppq->verts = calloc(curpos, sizeof(unsigned int)); + + if (!tmp_q || !ppq || !ppq->shaders || !ppq->verts) + goto error; + + ppq->p = pp_init_prog(ppq, pscreen); + if (!ppq->p) + goto error; + + /* Add the enabled filters to the queue, in order */ + curpos = 0; + ppq->pp_queue = tmp_q; + for (i = 0; i < PP_FILTERS; i++) { + if (enabled[i]) { + ppq->pp_queue[curpos] = pp_filters[i].main; + tmp_req = MAX2(tmp_req, pp_filters[i].inner_tmps); + + if (pp_filters[i].shaders) { + ppq->shaders[curpos] = + calloc(pp_filters[i].shaders + 1, sizeof(void *)); + ppq->verts[curpos] = pp_filters[i].verts; + if (!ppq->shaders[curpos]) + goto error; + } + pp_filters[i].init(ppq, curpos, enabled[i]); + + curpos++; + } + } + + ppq->p->blitctx = util_create_blit(ppq->p->pipe, ppq->p->cso); + if (!ppq->p->blitctx) + goto error; + + ppq->n_filters = curpos; + ppq->n_tmp = (curpos > 2 ? 2 : 1); + ppq->n_inner_tmp = tmp_req; + + ppq->fbos_init = false; + + for (i = 0; i < curpos; i++) + ppq->shaders[i][0] = ppq->p->passvs; + + pp_debug("Queue successfully allocated. %u filter(s).\n", curpos); + + return ppq; + + error: + pp_debug("Error setting up pp\n"); + + if (ppq) + free(ppq->p); + free(ppq); + free(tmp_q); + + return NULL; +} + +/** Free any allocated FBOs (temp buffers). Called after resizing for example. */ +void +pp_free_fbos(struct pp_queue_t *ppq) +{ + + unsigned int i; + + if (!ppq->fbos_init) + return; + + for (i = 0; i < ppq->n_tmp; i++) { + pipe_surface_reference(&ppq->tmps[i], NULL); + pipe_resource_reference(&ppq->tmp[i], NULL); + } + for (i = 0; i < ppq->n_inner_tmp; i++) { + pipe_surface_reference(&ppq->inner_tmps[i], NULL); + pipe_resource_reference(&ppq->inner_tmp[i], NULL); + } + pipe_surface_reference(&ppq->stencils, NULL); + pipe_resource_reference(&ppq->stencil, NULL); + + ppq->fbos_init = false; +} + +/** Free the pp queue. Called on context termination. */ +void +pp_free(struct pp_queue_t *ppq) +{ + + unsigned int i, j; + + pp_free_fbos(ppq); + + util_destroy_blit(ppq->p->blitctx); + + cso_set_fragment_sampler_views(ppq->p->cso, 0, NULL); + cso_release_all(ppq->p->cso); + + for (i = 0; i < ppq->n_filters; i++) { + for (j = 0; j < PP_MAX_PASSES && ppq->shaders[i][j]; j++) { + if (j >= ppq->verts[i]) { + ppq->p->pipe->delete_fs_state(ppq->p->pipe, ppq->shaders[i][j]); + ppq->shaders[i][j] = NULL; + } + else if (ppq->shaders[i][j] != ppq->p->passvs) { + ppq->p->pipe->delete_vs_state(ppq->p->pipe, ppq->shaders[i][j]); + ppq->shaders[i][j] = NULL; + } + } + } + + cso_destroy_context(ppq->p->cso); + ppq->p->pipe->destroy(ppq->p->pipe); + + free(ppq->p); + free(ppq->pp_queue); + free(ppq); + + pp_debug("Queue taken down.\n"); +} + +/** Internal debug function. Should be available to final users. */ +void +pp_debug(const char *fmt, ...) +{ + va_list ap; + + if (!getenv("PP_DEBUG")) + return; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +/** Allocate the temp FBOs. Called on makecurrent and resize. */ +void +pp_init_fbos(struct pp_queue_t *ppq, const unsigned int w, + const unsigned int h, struct pipe_resource *indepth) +{ + + struct program *p = ppq->p; /* The lazy will inherit the earth */ + + unsigned int i; + struct pipe_resource tmp_res; + + if (ppq->fbos_init) + return; + + pp_debug("Initializing FBOs, size %ux%u\n", w, h); + pp_debug("Requesting %u temps and %u inner temps\n", ppq->n_tmp, + ppq->n_inner_tmp); + + memset(&tmp_res, 0, sizeof(tmp_res)); + tmp_res.target = PIPE_TEXTURE_2D; + tmp_res.format = p->surf.format = PIPE_FORMAT_B8G8R8A8_UNORM; + tmp_res.width0 = w; + tmp_res.height0 = h; + tmp_res.depth0 = 1; + tmp_res.array_size = 1; + tmp_res.last_level = 0; + tmp_res.bind = p->surf.usage = PIPE_BIND_RENDER_TARGET; + + if (!p->screen->is_format_supported(p->screen, tmp_res.format, + tmp_res.target, 1, tmp_res.bind)) + pp_debug("Temp buffers' format fail\n"); + + for (i = 0; i < ppq->n_tmp; i++) { + ppq->tmp[i] = p->screen->resource_create(p->screen, &tmp_res); + ppq->tmps[i] = p->pipe->create_surface(p->pipe, ppq->tmp[i], &p->surf); + + if (!ppq->tmp[i] || !ppq->tmps[i]) + goto error; + } + + for (i = 0; i < ppq->n_inner_tmp; i++) { + ppq->inner_tmp[i] = p->screen->resource_create(p->screen, &tmp_res); + ppq->inner_tmps[i] = p->pipe->create_surface(p->pipe, + ppq->inner_tmp[i], + &p->surf); + + if (!ppq->inner_tmp[i] || !ppq->inner_tmps[i]) + goto error; + } + + tmp_res.format = p->surf.format = indepth->format; + tmp_res.bind = p->surf.usage = PIPE_BIND_DEPTH_STENCIL; + ppq->depth = indepth; + if (!ppq->depth) + goto error; + + tmp_res.format = p->surf.format = PIPE_FORMAT_S8_USCALED_Z24_UNORM; + + if (!p->screen->is_format_supported(p->screen, tmp_res.format, + tmp_res.target, 1, tmp_res.bind)) { + + tmp_res.format = p->surf.format = PIPE_FORMAT_Z24_UNORM_S8_USCALED; + + if (!p->screen->is_format_supported(p->screen, tmp_res.format, + tmp_res.target, 1, tmp_res.bind)) + pp_debug("Temp Sbuffer format fail\n"); + } + + ppq->stencil = p->screen->resource_create(p->screen, &tmp_res); + ppq->stencils = p->pipe->create_surface(p->pipe, ppq->stencil, &p->surf); + if (!ppq->stencil || !ppq->stencils) + goto error; + + + p->framebuffer.width = w; + p->framebuffer.height = h; + + p->viewport.scale[0] = p->viewport.translate[0] = (float) w / 2.0; + p->viewport.scale[1] = p->viewport.translate[1] = (float) h / 2.0; + p->viewport.scale[3] = 1.0f; + p->viewport.translate[3] = 0.0f; + + ppq->fbos_init = true; + + return; + + error: + pp_debug("Failed to allocate temp buffers!\n"); +} diff --git a/src/gallium/auxiliary/postprocess/pp_mlaa.c b/src/gallium/auxiliary/postprocess/pp_mlaa.c new file mode 100644 index 00000000000..476502fca93 --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_mlaa.c @@ -0,0 +1,304 @@ +/** + * Copyright (C) 2010 Jorge Jimenez ([email protected]) + * Copyright (C) 2010 Belen Masia ([email protected]) + * Copyright (C) 2010 Jose I. Echevarria ([email protected]) + * Copyright (C) 2010 Fernando Navarro ([email protected]) + * Copyright (C) 2010 Diego Gutierrez ([email protected]) + * Copyright (C) 2011 Lauri Kasanen ([email protected]) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the following statement: + * + * "Uses Jimenez's MLAA. Copyright (C) 2010 by Jorge Jimenez, Belen Masia, + * Jose I. Echevarria, Fernando Navarro and Diego Gutierrez." + * + * Only for use in the Mesa project, this point 2 is filled by naming the + * technique Jimenez's MLAA in the Mesa config options. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS + * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation are + * those of the authors and should not be interpreted as representing official + * policies, either expressed or implied, of the copyright holders. + */ + +#include <stdio.h> +#include <string.h> +#include "postprocess/postprocess.h" +#include "postprocess/pp_mlaa.h" +#include "postprocess/pp_filters.h" +#include "util/u_blit.h" +#include "util/u_box.h" +#include "util/u_sampler.h" +#include "util/u_inlines.h" +#include "pipe/p_screen.h" + +#define IMM_SPACE 80 + +static float constants[] = { 1, 1, 0, 0 }; +static unsigned int dimensions[2] = { 0, 0 }; + +static struct pipe_resource *constbuf, *areamaptex; + +/** Upload the constants. */ +static void +up_consts(struct pipe_context *pipe) +{ + struct pipe_box box; + + u_box_2d(0, 0, sizeof(constants), 1, &box); + pipe->transfer_inline_write(pipe, constbuf, 0, PIPE_TRANSFER_WRITE, + &box, constants, sizeof(constants), + sizeof(constants)); +} + +/** Run function of the MLAA filter. */ +static void +pp_jimenezmlaa_run(struct pp_queue_t *ppq, struct pipe_resource *in, + struct pipe_resource *out, unsigned int n, bool iscolor) +{ + + struct program *p = ppq->p; + + struct pipe_depth_stencil_alpha_state mstencil; + struct pipe_sampler_view v_tmp, *arr[3]; + + unsigned int w = p->framebuffer.width; + unsigned int h = p->framebuffer.height; + + const struct pipe_stencil_ref ref = { {1} }; + memset(&mstencil, 0, sizeof(mstencil)); + cso_set_stencil_ref(p->cso, &ref); + + /* Init the pixel size constant */ + if (dimensions[0] != p->framebuffer.width || + dimensions[1] != p->framebuffer.height) { + constants[0] = 1.0 / p->framebuffer.width; + constants[1] = 1.0 / p->framebuffer.height; + + up_consts(p->pipe); + dimensions[0] = p->framebuffer.width; + dimensions[1] = p->framebuffer.height; + } + + p->pipe->set_constant_buffer(p->pipe, PIPE_SHADER_VERTEX, 0, constbuf); + p->pipe->set_constant_buffer(p->pipe, PIPE_SHADER_FRAGMENT, 0, constbuf); + + mstencil.stencil[0].enabled = 1; + mstencil.stencil[0].valuemask = mstencil.stencil[0].writemask = ~0; + mstencil.stencil[0].func = PIPE_FUNC_ALWAYS; + mstencil.stencil[0].fail_op = PIPE_STENCIL_OP_KEEP; + mstencil.stencil[0].zfail_op = PIPE_STENCIL_OP_KEEP; + mstencil.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE; + + p->framebuffer.zsbuf = ppq->stencils; + + /* First pass: depth edge detection */ + if (iscolor) + pp_filter_setup_in(p, in); + else + pp_filter_setup_in(p, ppq->depth); + + pp_filter_setup_out(p, ppq->inner_tmp[0]); + + pp_filter_set_fb(p); + pp_filter_misc_state(p); + cso_set_depth_stencil_alpha(p->cso, &mstencil); + p->pipe->clear(p->pipe, PIPE_CLEAR_STENCIL | PIPE_CLEAR_COLOR, + p->clear_color, 0, 0); + + cso_single_sampler(p->cso, 0, &p->sampler_point); + cso_single_sampler_done(p->cso); + cso_set_fragment_sampler_views(p->cso, 1, &p->view); + + cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][1]); /* offsetvs */ + cso_set_fragment_shader_handle(p->cso, ppq->shaders[n][2]); + + pp_filter_draw(p); + pp_filter_end_pass(p); + + + /* Second pass: blend weights */ + /* Sampler order: areamap, edgesmap, edgesmapL (reversed, thx compiler) */ + mstencil.stencil[0].func = PIPE_FUNC_EQUAL; + mstencil.stencil[0].zpass_op = PIPE_STENCIL_OP_KEEP; + cso_set_depth_stencil_alpha(p->cso, &mstencil); + + pp_filter_setup_in(p, areamaptex); + pp_filter_setup_out(p, ppq->inner_tmp[1]); + + u_sampler_view_default_template(&v_tmp, ppq->inner_tmp[0], + ppq->inner_tmp[0]->format); + arr[1] = arr[2] = p->pipe->create_sampler_view(p->pipe, + ppq->inner_tmp[0], &v_tmp); + + pp_filter_set_clear_fb(p); + + cso_single_sampler(p->cso, 0, &p->sampler_point); + cso_single_sampler(p->cso, 1, &p->sampler_point); + cso_single_sampler(p->cso, 2, &p->sampler); + cso_single_sampler_done(p->cso); + + arr[0] = p->view; + cso_set_fragment_sampler_views(p->cso, 3, arr); + + cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][0]); /* passvs */ + cso_set_fragment_shader_handle(p->cso, ppq->shaders[n][3]); + + pp_filter_draw(p); + pp_filter_end_pass(p); + pipe_sampler_view_reference(&arr[1], NULL); + + + /* Third pass: smoothed edges */ + /* Sampler order: colormap, blendmap (wtf compiler) */ + pp_filter_setup_in(p, ppq->inner_tmp[1]); + pp_filter_setup_out(p, out); + + pp_filter_set_fb(p); + + /* Blit the input to the output */ + util_blit_pixels(p->blitctx, in, 0, 0, 0, + w, h, 0, p->framebuffer.cbufs[0], + 0, 0, w, h, 0, PIPE_TEX_MIPFILTER_NEAREST); + + u_sampler_view_default_template(&v_tmp, in, in->format); + arr[0] = p->pipe->create_sampler_view(p->pipe, in, &v_tmp); + + cso_single_sampler(p->cso, 0, &p->sampler_point); + cso_single_sampler(p->cso, 1, &p->sampler_point); + cso_single_sampler_done(p->cso); + + arr[1] = p->view; + cso_set_fragment_sampler_views(p->cso, 2, arr); + + cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][1]); /* offsetvs */ + cso_set_fragment_shader_handle(p->cso, ppq->shaders[n][4]); + + p->blend.rt[0].blend_enable = 1; + cso_set_blend(p->cso, &p->blend); + + pp_filter_draw(p); + pp_filter_end_pass(p); + pipe_sampler_view_reference(&arr[0], NULL); + + p->blend.rt[0].blend_enable = 0; + p->framebuffer.zsbuf = NULL; +} + +/** The init function of the MLAA filter. */ +static void +pp_jimenezmlaa_init_run(struct pp_queue_t *ppq, unsigned int n, + unsigned int val, bool iscolor) +{ + + struct pipe_box box; + struct pipe_resource res; + + char *tmp_text = calloc(sizeof(blend2fs_1) + sizeof(blend2fs_2) + + IMM_SPACE, sizeof(char)); + + constbuf = pipe_buffer_create(ppq->p->screen, PIPE_BIND_CONSTANT_BUFFER, + PIPE_USAGE_STATIC, sizeof(constants)); + if (!constbuf) { + pp_debug("Failed to allocate constant buffer\n"); + return; + } + + + pp_debug("mlaa: using %u max search steps\n", val); + + if (!tmp_text) { + pp_debug("Failed to allocate shader space\n"); + return; + } + sprintf(tmp_text, "%s" + "IMM FLT32 { %.8f, 0.0000, 0.0000, 0.0000}\n" + "%s\n", blend2fs_1, (float) val, blend2fs_2); + + memset(&res, 0, sizeof(res)); + + res.target = PIPE_TEXTURE_2D; + res.format = PIPE_FORMAT_R8G8_UNORM; + res.width0 = res.height0 = 165; + res.bind = PIPE_BIND_SAMPLER_VIEW; + res.usage = PIPE_USAGE_STATIC; + res.depth0 = res.array_size = res.nr_samples = 1; + + if (!ppq->p->screen->is_format_supported(ppq->p->screen, res.format, + res.target, 1, res.bind)) + pp_debug("Areamap format not supported\n"); + + areamaptex = ppq->p->screen->resource_create(ppq->p->screen, &res); + u_box_2d(0, 0, 165, 165, &box); + + ppq->p->pipe->transfer_inline_write(ppq->p->pipe, areamaptex, 0, + PIPE_TRANSFER_WRITE, &box, + areamap, 165 * 2, sizeof(areamap)); + + + + ppq->shaders[n][1] = pp_tgsi_to_state(ppq->p->pipe, offsetvs, true, + "offsetvs"); + if (iscolor) + ppq->shaders[n][2] = pp_tgsi_to_state(ppq->p->pipe, color1fs, + false, "color1fs"); + else + ppq->shaders[n][2] = pp_tgsi_to_state(ppq->p->pipe, depth1fs, + false, "depth1fs"); + ppq->shaders[n][3] = pp_tgsi_to_state(ppq->p->pipe, tmp_text, false, + "blend2fs"); + ppq->shaders[n][4] = pp_tgsi_to_state(ppq->p->pipe, neigh3fs, false, + "neigh3fs"); + + free(tmp_text); +} + +/** Short wrapper to init the depth version. */ +void +pp_jimenezmlaa_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val) +{ + + pp_jimenezmlaa_init_run(ppq, n, val, false); +} + +/** Short wrapper to init the color version. */ +void +pp_jimenezmlaa_init_color(struct pp_queue_t *ppq, unsigned int n, + unsigned int val) +{ + + pp_jimenezmlaa_init_run(ppq, n, val, true); +} + +/** Short wrapper to run the depth version. */ +void +pp_jimenezmlaa(struct pp_queue_t *ppq, struct pipe_resource *in, + struct pipe_resource *out, unsigned int n) +{ + pp_jimenezmlaa_run(ppq, in, out, n, false); +} + +/** Short wrapper to run the color version. */ +void +pp_jimenezmlaa_color(struct pp_queue_t *ppq, struct pipe_resource *in, + struct pipe_resource *out, unsigned int n) +{ + pp_jimenezmlaa_run(ppq, in, out, n, true); +} diff --git a/src/gallium/auxiliary/postprocess/pp_mlaa.h b/src/gallium/auxiliary/postprocess/pp_mlaa.h new file mode 100644 index 00000000000..9972d59c6a6 --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_mlaa.h @@ -0,0 +1,342 @@ +/** + * Copyright (C) 2010 Jorge Jimenez ([email protected]) + * Copyright (C) 2010 Belen Masia ([email protected]) + * Copyright (C) 2010 Jose I. Echevarria ([email protected]) + * Copyright (C) 2010 Fernando Navarro ([email protected]) + * Copyright (C) 2010 Diego Gutierrez ([email protected]) + * Copyright (C) 2011 Lauri Kasanen ([email protected]) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the following statement: + * + * "Uses Jimenez's MLAA. Copyright (C) 2010 by Jorge Jimenez, Belen Masia, + * Jose I. Echevarria, Fernando Navarro and Diego Gutierrez." + * + * Only for use in the Mesa project, this point 2 is filled by naming the + * technique Jimenez's MLAA in the Mesa config options. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS + * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation are + * those of the authors and should not be interpreted as representing official + * policies, either expressed or implied, of the copyright holders. + */ + +#ifndef PP_MLAA_H +#define PP_MLAA_H + +#include "postprocess/pp_mlaa_areamap.h" + +static const char depth1fs[] = "FRAG\n" + "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n" + "DCL IN[0], GENERIC[0], PERSPECTIVE\n" + "DCL IN[1], GENERIC[10], PERSPECTIVE\n" + "DCL IN[2], GENERIC[11], PERSPECTIVE\n" + "DCL OUT[0], COLOR\n" + "DCL SAMP[0]\n" + "DCL TEMP[0..2]\n" + "IMM FLT32 { 0.0030, 0.0000, 1.0000, 0.0000}\n" + " 0: TEX TEMP[0].x, IN[1].xyyy, SAMP[0], 2D\n" + " 1: MOV TEMP[1].x, TEMP[0].xxxx\n" + " 2: TEX TEMP[0].x, IN[1].zwww, SAMP[0], 2D\n" + " 3: MOV TEMP[1].y, TEMP[0].xxxx\n" + " 4: TEX TEMP[0].x, IN[2].xyyy, SAMP[0], 2D\n" + " 5: MOV TEMP[1].z, TEMP[0].xxxx\n" + " 6: TEX TEMP[0].x, IN[2].zwww, SAMP[0], 2D\n" + " 7: MOV TEMP[1].w, TEMP[0].xxxx\n" + " 8: TEX TEMP[0].x, IN[0].xyyy, SAMP[0], 2D\n" + " 9: ADD TEMP[2], TEMP[0].xxxx, -TEMP[1]\n" + " 10: ABS TEMP[0], TEMP[2]\n" + " 11: SGE TEMP[2], TEMP[0], IMM[0].xxxx\n" + " 12: DP4 TEMP[0].x, TEMP[2], IMM[0].zzzz\n" + " 13: SEQ TEMP[1].x, TEMP[0].xxxx, IMM[0].yyyy\n" + " 14: IF TEMP[1].xxxx :16\n" + " 15: KILP\n" + " 16: ENDIF\n" + " 17: MOV OUT[0], TEMP[2]\n" + " 18: END\n"; + + +static const char color1fs[] = "FRAG\n" + "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n" + "DCL IN[0], GENERIC[0], PERSPECTIVE\n" + "DCL IN[1], GENERIC[10], PERSPECTIVE\n" + "DCL IN[2], GENERIC[11], PERSPECTIVE\n" + "DCL OUT[0], COLOR\n" + "DCL SAMP[0]\n" + "DCL TEMP[0..2]\n" + "IMM FLT32 { 0.2126, 0.7152, 0.0722, 0.1000}\n" + "IMM FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}\n" + " 0: TEX TEMP[1].xyz, IN[1].xyyy, SAMP[0], 2D\n" + " 1: DP3 TEMP[0].x, TEMP[1].xyzz, IMM[0]\n" + " 2: TEX TEMP[1].xyz, IN[1].zwww, SAMP[0], 2D\n" + " 3: DP3 TEMP[0].y, TEMP[1].xyzz, IMM[0].xyzz\n" + " 4: TEX TEMP[1].xyz, IN[2].xyyy, SAMP[0], 2D\n" + " 5: DP3 TEMP[0].z, TEMP[1].xyzz, IMM[0].xyzz\n" + " 6: TEX TEMP[1].xyz, IN[2].zwww, SAMP[0], 2D\n" + " 7: DP3 TEMP[0].w, TEMP[1].xyzz, IMM[0].xyzz\n" + " 8: TEX TEMP[1].xyz, IN[0].xyyy, SAMP[0], 2D\n" + " 9: DP3 TEMP[2].x, TEMP[1].xyzz, IMM[0].xyzz\n" + " 10: ADD TEMP[1], TEMP[2].xxxx, -TEMP[0]\n" + " 11: ABS TEMP[0], TEMP[1]\n" + " 12: SGE TEMP[2], TEMP[0], IMM[0].wwww\n" + " 13: DP4 TEMP[0].x, TEMP[2], IMM[1].xxxx\n" + " 14: SEQ TEMP[1].x, TEMP[0].xxxx, IMM[1].yyyy\n" + " 15: IF TEMP[1].xxxx :17\n" + " 16: KILP\n" + " 17: ENDIF\n" + " 18: MOV OUT[0], TEMP[2]\n" + " 19: END\n"; + + +static const char neigh3fs[] = "FRAG\n" + "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n" + "DCL IN[0], GENERIC[0], PERSPECTIVE\n" + "DCL IN[1], GENERIC[10], PERSPECTIVE\n" + "DCL IN[2], GENERIC[11], PERSPECTIVE\n" + "DCL OUT[0], COLOR\n" + "DCL SAMP[0]\n" + "DCL SAMP[1]\n" + "DCL TEMP[0..8]\n" + "IMM FLT32 { 1.0000, 0.00001, 0.0000, 0.0000}\n" + " 0: TEX TEMP[0], IN[0].xyyy, SAMP[1], 2D\n" + " 1: MOV TEMP[1].x, TEMP[0].xxxx\n" + " 2: TEX TEMP[2].y, IN[2].zwww, SAMP[1], 2D\n" + " 3: MOV TEMP[1].y, TEMP[2].yyyy\n" + " 4: MOV TEMP[1].z, TEMP[0].zzzz\n" + " 5: TEX TEMP[1].w, IN[2].xyyy, SAMP[1], 2D\n" + " 6: MUL TEMP[4], TEMP[1], TEMP[1]\n" + " 7: MUL TEMP[5], TEMP[4], TEMP[1]\n" + " 8: DP4 TEMP[1].x, TEMP[5], IMM[0].xxxx\n" + " 9: SLT TEMP[4].x, TEMP[1].xxxx, IMM[0].yyyy\n" + " 10: IF TEMP[4].xxxx :12\n" + " 11: KILP\n" + " 12: ENDIF\n" + " 13: TEX TEMP[4], IN[0].xyyy, SAMP[0], 2D\n" + " 14: TEX TEMP[6], IN[1].zwww, SAMP[0], 2D\n" + " 15: ADD TEMP[7].x, IMM[0].xxxx, -TEMP[0].xxxx\n" + " 16: MUL TEMP[8], TEMP[4], TEMP[7].xxxx\n" + " 17: MAD TEMP[7], TEMP[6], TEMP[0].xxxx, TEMP[8]\n" + " 18: MUL TEMP[6], TEMP[7], TEMP[5].xxxx\n" + " 19: TEX TEMP[7], IN[2].zwww, SAMP[0], 2D\n" + " 20: ADD TEMP[8].x, IMM[0].xxxx, -TEMP[2].yyyy\n" + " 21: MUL TEMP[3], TEMP[4], TEMP[8].xxxx\n" + " 22: MAD TEMP[8], TEMP[7], TEMP[2].yyyy, TEMP[3]\n" + " 23: MAD TEMP[2], TEMP[8], TEMP[5].yyyy, TEMP[6]\n" + " 24: TEX TEMP[6], IN[1].xyyy, SAMP[0], 2D\n" + " 25: ADD TEMP[7].x, IMM[0].xxxx, -TEMP[0].zzzz\n" + " 26: MUL TEMP[8], TEMP[4], TEMP[7].xxxx\n" + " 27: MAD TEMP[7], TEMP[6], TEMP[0].zzzz, TEMP[8]\n" + " 28: MAD TEMP[0], TEMP[7], TEMP[5].zzzz, TEMP[2]\n" + " 29: TEX TEMP[2], IN[2].xyyy, SAMP[0], 2D\n" + " 30: ADD TEMP[6].x, IMM[0].xxxx, -TEMP[1].wwww\n" + " 31: MUL TEMP[7], TEMP[4], TEMP[6].xxxx\n" + " 32: MAD TEMP[4], TEMP[2], TEMP[1].wwww, TEMP[7]\n" + " 33: MAD TEMP[2], TEMP[4], TEMP[5].wwww, TEMP[0]\n" + " 34: RCP TEMP[0].x, TEMP[1].xxxx\n" + " 35: MUL OUT[0], TEMP[2], TEMP[0].xxxx\n" + " 36: END\n"; + + +static const char offsetvs[] = "VERT\n" + "DCL IN[0]\n" + "DCL IN[1]\n" + "DCL OUT[0], POSITION\n" + "DCL OUT[1], GENERIC[0]\n" + "DCL OUT[2], GENERIC[10]\n" + "DCL OUT[3], GENERIC[11]\n" + "DCL CONST[0]\n" + "IMM FLT32 { 1.0000, 0.0000, -1.0000, 0.0000}\n" + " 0: MOV OUT[0], IN[0]\n" + " 1: MOV OUT[1], IN[1]\n" + " 2: MAD OUT[2], CONST[0].xyxy, IMM[0].zyyz, IN[1].xyxy\n" + " 3: MAD OUT[3], CONST[0].xyxy, IMM[0].xyyx, IN[1].xyxy\n" + " 4: END\n"; + + +static const char blend2fs_1[] = "FRAG\n" + "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n" + "DCL IN[0], GENERIC[0], PERSPECTIVE\n" + "DCL OUT[0], COLOR\n" + "DCL SAMP[0]\n" + "DCL SAMP[1]\n" + "DCL SAMP[2]\n" + "DCL CONST[0]\n" + "DCL TEMP[0..6]\n" + "IMM FLT32 { 0.0000, -0.2500, 0.00609756, 0.5000}\n" + "IMM FLT32 { -1.5000, -2.0000, 0.9000, 1.5000}\n" + "IMM FLT32 { 2.0000, 1.0000, 4.0000, 33.0000}\n"; + +static const char blend2fs_2[] = + " 0: MOV TEMP[0], IMM[0].xxxx\n" + " 1: TEX TEMP[1], IN[0].xyyy, SAMP[1], 2D\n" + " 2: MOV TEMP[2].x, TEMP[1]\n" + " 3: SNE TEMP[3].x, TEMP[1].yyyy, IMM[0].xxxx\n" + " 4: IF TEMP[3].xxxx :76\n" + " 5: MOV TEMP[1].xy, IN[0].xyxx\n" + " 6: MOV TEMP[4].x, IMM[1].xxxx\n" + " 7: BGNLOOP :24\n" + " 8: MUL TEMP[5].x, IMM[1].yyyy, IMM[3].xxxx\n" + " 9: SLE TEMP[6].x, TEMP[4].xxxx, TEMP[5].xxxx\n" + " 10: IF TEMP[6].xxxx :12\n" + " 11: BRK\n" + " 12: ENDIF\n" + " 13: MOV TEMP[4].y, IMM[0].xxxx\n" + " 14: MAD TEMP[3].xyz, CONST[0].xyyy, TEMP[4].xyyy, TEMP[1].xyyy\n" + " 15: MOV TEMP[3].w, IMM[0].xxxx\n" + " 16: TXL TEMP[5], TEMP[3], SAMP[2], 2D\n" + " 17: MOV TEMP[3].x, TEMP[5].yyyy\n" + " 18: SLT TEMP[6].x, TEMP[5].yyyy, IMM[1].zzzz\n" + " 19: IF TEMP[6].xxxx :21\n" + " 20: BRK\n" + " 21: ENDIF\n" + " 22: ADD TEMP[6].x, TEMP[4].xxxx, IMM[1].yyyy\n" + " 23: MOV TEMP[4].x, TEMP[6].xxxx\n" + " 24: ENDLOOP :7\n" + " 25: ADD TEMP[1].x, TEMP[4].xxxx, IMM[1].wwww\n" + " 26: MAD TEMP[6].x, -IMM[2].xxxx, TEMP[3].xxxx, TEMP[1].xxxx\n" + " 27: MUL TEMP[1].x, IMM[1].yyyy, IMM[3].xxxx\n" + " 28: MAX TEMP[4].x, TEMP[6].xxxx, TEMP[1].xxxx\n" + " 29: MOV TEMP[1].x, TEMP[4].xxxx\n" + " 30: MOV TEMP[3].xy, IN[0].xyxx\n" + " 31: MOV TEMP[5].x, IMM[1].wwww\n" + " 32: BGNLOOP :49\n" + " 33: MUL TEMP[6].x, IMM[2].xxxx, IMM[3].xxxx\n" + " 34: SGE TEMP[4].x, TEMP[5].xxxx, TEMP[6].xxxx\n" + " 35: IF TEMP[4].xxxx :37\n" + " 36: BRK\n" + " 37: ENDIF\n" + " 38: MOV TEMP[5].y, IMM[0].xxxx\n" + " 39: MAD TEMP[4].xyz, CONST[0].xyyy, TEMP[5].xyyy, TEMP[3].xyyy\n" + " 40: MOV TEMP[4].w, IMM[0].xxxx\n" + " 41: TXL TEMP[6].xy, TEMP[4], SAMP[2], 2D\n" + " 42: MOV TEMP[4].x, TEMP[6].yyyy\n" + " 43: SLT TEMP[0].x, TEMP[6].yyyy, IMM[1].zzzz\n" + " 44: IF TEMP[0].xxxx :46\n" + " 45: BRK\n" + " 46: ENDIF\n" + " 47: ADD TEMP[6].x, TEMP[5].xxxx, IMM[2].xxxx\n" + " 48: MOV TEMP[5].x, TEMP[6].xxxx\n" + " 49: ENDLOOP :32\n" + " 50: ADD TEMP[3].x, TEMP[5].xxxx, IMM[1].xxxx\n" + " 51: MAD TEMP[5].x, IMM[2].xxxx, TEMP[4].xxxx, TEMP[3].xxxx\n" + " 52: MUL TEMP[3].x, IMM[2].xxxx, IMM[3].xxxx\n" + " 53: MIN TEMP[4].x, TEMP[5].xxxx, TEMP[3].xxxx\n" + " 54: MOV TEMP[3].x, TEMP[1].xxxx\n" + " 55: MOV TEMP[3].y, TEMP[4].xxxx\n" + " 56: MOV TEMP[5].yw, IMM[0].yyyy\n" + " 57: MOV TEMP[5].x, TEMP[1].xxxx\n" + " 58: ADD TEMP[1].x, TEMP[4].xxxx, IMM[2].yyyy\n" + " 59: MOV TEMP[5].z, TEMP[1].xxxx\n" + " 60: MAD TEMP[1], TEMP[5], CONST[0].xyxy, IN[0].xyxy\n" + " 61: MOV TEMP[4], TEMP[1].xyyy\n" + " 62: MOV TEMP[4].w, IMM[0].xxxx\n" + " 63: TXL TEMP[5].x, TEMP[4], SAMP[2], 2D\n" + " 64: MOV TEMP[4].x, TEMP[5].xxxx\n" + " 65: MOV TEMP[5], TEMP[1].zwww\n" + " 66: MOV TEMP[5].w, IMM[0].xxxx\n" + " 67: TXL TEMP[1].x, TEMP[5], SAMP[2], 2D\n" + " 68: MOV TEMP[4].y, TEMP[1].xxxx\n" + " 69: MUL TEMP[5].xy, IMM[2].zzzz, TEMP[4].xyyy\n" + " 70: ROUND TEMP[1].xy, TEMP[5].xyyy\n" + " 71: ABS TEMP[4].xy, TEMP[3].xyyy\n" + " 72: MAD TEMP[3].xy, IMM[2].wwww, TEMP[1].xyyy, TEMP[4].xyyy\n" + " 73: MUL TEMP[5].xyz, TEMP[3].xyyy, IMM[0].zzzz\n" + " 74: MOV TEMP[5].w, IMM[0].xxxx\n" + " 75: TXL TEMP[0].xy, TEMP[5], SAMP[0], 2D\n" + " 76: ENDIF\n" + " 77: SNE TEMP[1].x, TEMP[2].xxxx, IMM[0].xxxx\n" + " 78: IF TEMP[1].xxxx :151\n" + " 79: MOV TEMP[1].xy, IN[0].xyxx\n" + " 80: MOV TEMP[3].x, IMM[1].xxxx\n" + " 81: BGNLOOP :98\n" + " 82: MUL TEMP[4].x, IMM[1].yyyy, IMM[3].xxxx\n" + " 83: SLE TEMP[5].x, TEMP[3].xxxx, TEMP[4].xxxx\n" + " 84: IF TEMP[5].xxxx :86\n" + " 85: BRK\n" + " 86: ENDIF\n" + " 87: MOV TEMP[3].y, IMM[0].xxxx\n" + " 88: MAD TEMP[5].xyz, CONST[0].xyyy, TEMP[3].yxxx, TEMP[1].xyyy\n" + " 89: MOV TEMP[5].w, IMM[0].xxxx\n" + " 90: TXL TEMP[4], TEMP[5], SAMP[2], 2D\n" + " 91: MOV TEMP[2].x, TEMP[4].xxxx\n" + " 92: SLT TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz\n" + " 93: IF TEMP[5].xxxx :95\n" + " 94: BRK\n" + " 95: ENDIF\n" + " 96: ADD TEMP[4].x, TEMP[3].xxxx, IMM[1].yyyy\n" + " 97: MOV TEMP[3].x, TEMP[4].xxxx\n" + " 98: ENDLOOP :81\n" + " 99: ADD TEMP[1].x, TEMP[3].xxxx, IMM[1].wwww\n" + "100: MAD TEMP[6].x, -IMM[2].xxxx, TEMP[2].xxxx, TEMP[1].xxxx\n" + "101: MUL TEMP[1].x, IMM[1].yyyy, IMM[3].xxxx\n" + "102: MAX TEMP[3].x, TEMP[6].xxxx, TEMP[1].xxxx\n" + "103: MOV TEMP[1].x, TEMP[3].xxxx\n" + "104: MOV TEMP[2].xy, IN[0].xyxx\n" + "105: MOV TEMP[4].x, IMM[1].wwww\n" + "106: BGNLOOP :123\n" + "107: MUL TEMP[5].x, IMM[2].xxxx, IMM[3].xxxx\n" + "108: SGE TEMP[6].x, TEMP[4].xxxx, TEMP[5].xxxx\n" + "109: IF TEMP[6].xxxx :111\n" + "110: BRK\n" + "111: ENDIF\n" + "112: MOV TEMP[4].y, IMM[0].xxxx\n" + "113: MAD TEMP[5].xyz, CONST[0].xyyy, TEMP[4].yxxx, TEMP[2].xyyy\n" + "114: MOV TEMP[5].w, IMM[0].xxxx\n" + "115: TXL TEMP[6], TEMP[5], SAMP[2], 2D\n" + "116: MOV TEMP[3].x, TEMP[6].xxxx\n" + "117: SLT TEMP[5].x, TEMP[6].xxxx, IMM[1].zzzz\n" + "118: IF TEMP[5].xxxx :120\n" + "119: BRK\n" + "120: ENDIF\n" + "121: ADD TEMP[6].x, TEMP[4].xxxx, IMM[2].xxxx\n" + "122: MOV TEMP[4].x, TEMP[6].xxxx\n" + "123: ENDLOOP :106\n" + "124: ADD TEMP[2].x, TEMP[4].xxxx, IMM[1].xxxx\n" + "125: MAD TEMP[4].x, IMM[2].xxxx, TEMP[3].xxxx, TEMP[2].xxxx\n" + "126: MUL TEMP[2].x, IMM[2].xxxx, IMM[3].xxxx\n" + "127: MIN TEMP[3].x, TEMP[4].xxxx, TEMP[2].xxxx\n" + "128: MOV TEMP[2].x, TEMP[1].xxxx\n" + "129: MOV TEMP[2].y, TEMP[3].xxxx\n" + "130: MOV TEMP[4].xz, IMM[0].yyyy\n" + "131: MOV TEMP[4].y, TEMP[1].xxxx\n" + "132: ADD TEMP[1].x, TEMP[3].xxxx, IMM[2].yyyy\n" + "133: MOV TEMP[4].w, TEMP[1].xxxx\n" + "134: MAD TEMP[1], TEMP[4], CONST[0].xyxy, IN[0].xyxy\n" + "135: MOV TEMP[3], TEMP[1].xyyy\n" + "136: MOV TEMP[3].w, IMM[0].xxxx\n" + "137: TXL TEMP[4].y, TEMP[3], SAMP[2], 2D\n" + "138: MOV TEMP[3].x, TEMP[4].yyyy\n" + "139: MOV TEMP[4], TEMP[1].zwww\n" + "140: MOV TEMP[4].w, IMM[0].xxxx\n" + "141: TXL TEMP[1].y, TEMP[4], SAMP[2], 2D\n" + "142: MOV TEMP[3].y, TEMP[1].yyyy\n" + "143: MUL TEMP[4].xy, IMM[2].zzzz, TEMP[3].xyyy\n" + "144: ROUND TEMP[1].xy, TEMP[4].xyyy\n" + "145: ABS TEMP[3].xy, TEMP[2].xyyy\n" + "146: MAD TEMP[2].xy, IMM[2].wwww, TEMP[1].xyyy, TEMP[3].xyyy\n" + "147: MUL TEMP[3].xyz, TEMP[2].xyyy, IMM[0].zzzz\n" + "148: MOV TEMP[3].w, IMM[0].xxxx\n" + "149: TXL TEMP[1].xy, TEMP[3], SAMP[0], 2D\n" + "150: MOV TEMP[0].zw, TEMP[1].yyxy\n" + "151: ENDIF\n" + "152: MOV OUT[0], TEMP[0]\n" + "153: END\n"; + +#endif diff --git a/src/gallium/auxiliary/postprocess/pp_mlaa_areamap.h b/src/gallium/auxiliary/postprocess/pp_mlaa_areamap.h new file mode 100644 index 00000000000..1446ff2cdf0 --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_mlaa_areamap.h @@ -0,0 +1,2821 @@ +/** + * Copyright (C) 2010 Jorge Jimenez ([email protected]) + * Copyright (C) 2010 Belen Masia ([email protected]) + * Copyright (C) 2010 Jose I. Echevarria ([email protected]) + * Copyright (C) 2010 Fernando Navarro ([email protected]) + * Copyright (C) 2010 Diego Gutierrez ([email protected]) + * Copyright (C) 2011 Lauri Kasanen ([email protected]) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the following statement: + * + * "Uses Jimenez's MLAA. Copyright (C) 2010 by Jorge Jimenez, Belen Masia, + * Jose I. Echevarria, Fernando Navarro and Diego Gutierrez." + * + * Only for use in the Mesa project, this point 2 is filled by naming the + * technique Jimenez's MLAA in the Mesa config options. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS + * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation are + * those of the authors and should not be interpreted as representing official + * policies, either expressed or implied, of the copyright holders. + */ + +#ifndef PP_MLAA_AREAMAP_H +#define PP_MLAA_AREAMAP_H + +static const unsigned char areamap[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 0, 10, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 63, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 85, 0, 31, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 0, + 31, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 51, + 0, 21, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 51, 0, 21, 0, 4, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 102, 0, 63, 0, 36, 0, 15, 0, 3, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 102, 0, + 63, 0, 36, 0, 15, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 0, 72, + 0, 47, 0, 28, 0, 12, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 106, 0, 72, 0, 47, 0, 28, 0, 12, 0, 2, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 109, 0, 79, 0, 56, 0, 38, 0, 23, 0, 10, 0, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 109, 0, + 79, 0, 56, 0, 38, 0, 23, 0, 10, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 111, 0, 85, + 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, + 9, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 113, 0, 89, 0, 69, 0, 53, 0, 39, 0, 27, 0, 17, + 0, 7, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 113, 0, + 89, 0, 69, 0, 53, 0, 39, 0, 27, 0, 17, 0, 7, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 114, 0, 92, + 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15, 0, 7, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, + 23, 0, 15, 0, 7, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 115, 0, 95, 0, 78, 0, 63, 0, 51, 0, 39, 0, 30, + 0, 21, 0, 13, 0, 6, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 115, 0, + 95, 0, 78, 0, 63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 0, 98, + 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26, 0, 19, 0, 12, 0, 5, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, + 35, 0, 26, 0, 19, 0, 12, 0, 5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 117, 0, 100, 0, 85, 0, 71, 0, 60, 0, 49, 0, 40, + 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 117, 0, + 100, 0, 85, 0, 71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, + 5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 0, 102, + 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36, 0, 28, 0, 22, 0, 15, 0, 10, + 0, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, + 44, 0, 36, 0, 28, 0, 22, 0, 15, 0, 10, 0, 4, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 119, 0, 103, 0, 89, 0, 77, 0, 67, 0, 57, 0, 48, + 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9, 0, 4, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, + 103, 0, 89, 0, 77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, + 14, 0, 9, 0, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 105, + 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44, 0, 37, 0, 30, 0, 24, 0, 18, + 0, 13, 0, 8, 0, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, + 52, 0, 44, 0, 37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 120, 0, 106, 0, 93, 0, 82, 0, 72, 0, 63, 0, 55, + 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17, 0, 12, 0, 8, 0, 3, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, + 106, 0, 93, 0, 82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, + 22, 0, 17, 0, 12, 0, 8, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 107, + 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51, 0, 44, 0, 37, 0, 31, 0, 26, + 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, + 58, 0, 51, 0, 44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, + 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 120, 0, 108, 0, 97, 0, 86, 0, 77, 0, 69, 0, 61, + 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24, 0, 19, 0, 15, 0, 11, 0, 7, + 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, + 108, 0, 97, 0, 86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, + 29, 0, 24, 0, 19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 109, + 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56, 0, 50, 0, 43, 0, 38, 0, 32, + 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6, 0, 3, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, + 63, 0, 56, 0, 50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, + 10, 0, 6, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 99, 0, 90, 0, 81, 0, 73, 0, 66, + 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30, 0, 26, 0, 21, 0, 17, 0, 13, + 0, 10, 0, 6, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, + 110, 0, 99, 0, 90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, + 35, 0, 30, 0, 26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, + 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61, 0, 55, 0, 49, 0, 43, 0, 38, + 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13, 0, 9, 0, 6, 0, 3, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, + 68, 0, 61, 0, 55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, + 16, 0, 13, 0, 9, 0, 6, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 121, 0, 111, 0, 102, 0, 93, 0, 85, 0, 77, 0, 70, + 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36, 0, 31, 0, 27, 0, 23, 0, 19, + 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, + 111, 0, 102, 0, 93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, + 41, 0, 36, 0, 31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, + 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, + 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65, 0, 59, 0, 54, 0, 48, 0, 43, + 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18, 0, 15, 0, 11, 0, 8, 0, 5, + 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, + 72, 0, 65, 0, 59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, + 22, 0, 18, 0, 15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 103, 0, 95, 0, 87, 0, 80, 0, 74, + 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41, 0, 36, 0, 32, 0, 28, 0, 24, + 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, + 112, 0, 103, 0, 95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, + 46, 0, 41, 0, 36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, + 8, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, + 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69, 0, 63, 0, 58, 0, 53, 0, 48, + 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23, 0, 20, 0, 17, 0, 13, 0, 10, + 0, 7, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, + 75, 0, 69, 0, 63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, + 27, 0, 23, 0, 20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 105, 0, 97, 0, 90, 0, 83, 0, 77, + 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45, 0, 41, 0, 37, 0, 33, 0, 29, + 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, + 113, 0, 105, 0, 97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, + 50, 0, 45, 0, 41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, + 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 114, + 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72, 0, 67, 0, 62, 0, 57, 0, 52, + 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28, 0, 24, 0, 21, 0, 18, 0, 15, + 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, + 78, 0, 72, 0, 67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, + 31, 0, 28, 0, 24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, + 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 123, 0, 114, 0, 106, 0, 99, 0, 92, 0, 86, 0, 80, + 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, + 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 9, 0, 7, 0, 4, + 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, + 114, 0, 106, 0, 99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, + 54, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, + 17, 0, 15, 0, 12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, + 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75, 0, 70, 0, 65, 0, 60, 0, 55, + 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32, 0, 29, 0, 26, 0, 22, 0, 20, + 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, + 81, 0, 75, 0, 70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, + 36, 0, 32, 0, 29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, + 6, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 101, 0, 94, 0, 88, 0, 82, + 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53, 0, 49, 0, 45, 0, 41, 0, 37, + 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19, 0, 16, 0, 13, 0, 11, 0, 8, + 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, + 115, 0, 108, 0, 101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, + 57, 0, 53, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, + 22, 0, 19, 0, 16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, + 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78, 0, 73, 0, 68, 0, 63, 0, 59, + 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 33, 0, 30, 0, 26, 0, 24, + 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, + 83, 0, 78, 0, 73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, + 39, 0, 36, 0, 33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, + 10, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 123, 0, 116, 0, 109, 0, 102, 0, 96, 0, 90, 0, 85, + 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56, 0, 52, 0, 48, 0, 45, 0, 41, + 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, + 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, + 116, 0, 109, 0, 102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, + 60, 0, 56, 0, 52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, + 25, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 0, 63, 0, 85, 0, 95, 0, 102, + 0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117, 0, 118, 0, + 119, + 0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0, 121, 0, 122, 0, + 122, + 0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0, 123, 0, 62, 0, + 63, + 0, 85, 0, 95, 0, 102, 0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, + 116, + 0, 117, 0, 118, 0, 119, 0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, + 121, + 0, 121, 0, 122, 0, 122, 0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, + 123, + 0, 123, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 31, 31, 0, 63, 0, 85, 0, 95, 0, 102, 0, 106, + 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117, 0, 118, 0, 119, 0, + 119, + 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0, 121, 0, 122, 0, 122, 0, + 122, + 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0, 123, 31, 31, 0, 63, 0, + 85, + 0, 95, 0, 102, 0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, + 117, + 0, 118, 0, 119, 0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0, + 121, + 0, 122, 0, 122, 0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0, + 123, + 0, 0, 0, 10, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0, 89, 0, 92, + 0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0, 108, 0, + 109, + 0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0, 114, 0, 114, 0, + 115, + 0, 115, 0, 115, 0, 116, 0, 63, 0, 20, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79, + 0, 85, 0, 89, 0, 92, 0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, + 0, 107, 0, 108, 0, 109, 0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, + 113, + 0, 114, 0, 114, 0, 115, 0, 115, 0, 115, 0, 116, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 0, + 10, 10, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0, 89, 0, 92, 0, 95, + 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0, 108, 0, 109, 0, + 110, + 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0, 114, 0, 114, 0, 115, 0, + 115, + 0, 115, 0, 116, 63, 0, 10, 10, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, + 0, 89, 0, 92, 0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, + 0, 108, 0, 109, 0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0, + 114, + 0, 114, 0, 115, 0, 115, 0, 115, 0, 116, 0, 0, 0, 0, 0, 6, 0, 21, 0, 36, + 0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0, 87, 0, 89, + 0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0, 102, 0, 103, + 0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0, 109, 0, 85, 0, + 31, + 0, 12, 0, 21, 0, 36, 0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, + 0, 85, 0, 87, 0, 89, 0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, + 0, 102, 0, 102, 0, 103, 0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, + 108, + 0, 109, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 85, 0, 31, 0, 6, 6, 0, 21, 0, 36, 0, 47, + 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0, 87, 0, 89, 0, 92, + 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0, 102, 0, 103, 0, 104, + 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0, 109, 85, 0, 31, 0, 6, 6, + 0, 21, 0, 36, 0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, + 0, 87, 0, 89, 0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, + 0, 102, 0, 103, 0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0, + 109, + 0, 0, 0, 0, 0, 0, 0, 4, 0, 15, 0, 28, 0, 38, 0, 46, 0, 53, 0, 58, + 0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0, 86, 0, 88, + 0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0, 99, 0, 100, + 0, 101, 0, 102, 0, 102, 0, 95, 0, 51, 0, 21, 0, 8, 0, 15, 0, 28, 0, 38, + 0, 46, 0, 53, 0, 58, 0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, + 0, 85, 0, 86, 0, 88, 0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, + 0, 98, 0, 99, 0, 100, 0, 101, 0, 102, 0, 102, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, + 51, 0, 21, 0, 4, 4, 0, 15, 0, 28, 0, 38, 0, 46, 0, 53, 0, 58, 0, 63, + 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0, 86, 0, 88, 0, 90, + 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0, 99, 0, 100, 0, 101, + 0, 102, 0, 102, 95, 0, 51, 0, 21, 0, 4, 4, 0, 15, 0, 28, 0, 38, 0, 46, + 0, 53, 0, 58, 0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, + 0, 86, 0, 88, 0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, + 0, 99, 0, 100, 0, 101, 0, 102, 0, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, + 0, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0, 63, 0, 67, + 0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0, 86, 0, 87, + 0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 0, 102, 0, 63, + 0, 36, 0, 15, 0, 6, 0, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, + 0, 60, 0, 63, 0, 67, 0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, + 0, 85, 0, 86, 0, 87, 0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, + 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 102, 0, 63, 0, 36, 0, 15, 0, 3, 3, 0, 12, + 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0, 63, 0, 67, 0, 70, + 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0, 86, 0, 87, 0, 89, + 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 102, 0, 63, 0, 36, 0, + 15, 0, 3, 3, 0, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, + 0, 63, 0, 67, 0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, + 0, 86, 0, 87, 0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 10, 0, 19, 0, 27, 0, 34, + 0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0, 69, 0, 71, + 0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0, 86, 0, 87, + 0, 88, 0, 89, 0, 90, 0, 106, 0, 72, 0, 47, 0, 28, 0, 12, 0, 4, 0, 10, + 0, 19, 0, 27, 0, 34, 0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, + 0, 66, 0, 69, 0, 71, 0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, + 0, 85, 0, 86, 0, 87, 0, 88, 0, 89, 0, 90, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 0, + 72, 0, 47, 0, 28, 0, 12, 0, 2, 2, 0, 10, 0, 19, 0, 27, 0, 34, 0, 39, + 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0, 69, 0, 71, 0, 73, + 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0, 86, 0, 87, 0, 88, + 0, 89, 0, 90, 106, 0, 72, 0, 47, 0, 28, 0, 12, 0, 2, 2, 0, 10, 0, 19, + 0, 27, 0, 34, 0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, + 0, 69, 0, 71, 0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, + 0, 86, 0, 87, 0, 88, 0, 89, 0, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 2, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0, 44, 0, 48, + 0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0, 72, 0, 74, + 0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 0, 109, 0, 79, + 0, 56, 0, 38, 0, 23, 0, 10, 0, 4, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35, + 0, 40, 0, 44, 0, 48, 0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, + 0, 70, 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, + 0, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 109, 0, 79, 0, 56, 0, 38, 0, 23, 0, 10, 0, + 2, 2, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0, 44, 0, 48, 0, 52, + 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0, 72, 0, 74, 0, 75, + 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 109, 0, 79, 0, 56, 0, + 38, 0, 23, 0, 10, 0, 2, 2, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, + 0, 44, 0, 48, 0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, + 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 7, 0, 15, + 0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0, 53, 0, 56, + 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0, 74, 0, 75, + 0, 77, 0, 78, 0, 79, 0, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, + 0, 4, 0, 7, 0, 15, 0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, + 0, 51, 0, 53, 0, 56, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, + 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 79, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 111, 0, + 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 0, 2, 2, 0, 7, 0, 15, 0, 21, + 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0, 53, 0, 56, 0, 59, + 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0, 74, 0, 75, 0, 77, + 0, 78, 0, 79, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 0, 2, 2, + 0, 7, 0, 15, 0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, + 0, 53, 0, 56, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, + 0, 74, 0, 75, 0, 77, 0, 78, 0, 79, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 0, 7, 0, 13, 0, 19, 0, 24, 0, 28, 0, 33, + 0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0, 59, 0, 61, + 0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 0, 113, 0, 89, + 0, 69, 0, 53, 0, 39, 0, 27, 0, 17, 0, 7, 0, 2, 0, 7, 0, 13, 0, 19, + 0, 24, 0, 28, 0, 33, 0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, + 0, 57, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, + 0, 74, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 113, 0, 89, 0, 69, 0, 53, 0, 39, 0, 27, 0, + 17, 0, 7, 0, 1, 1, 0, 7, 0, 13, 0, 19, 0, 24, 0, 28, 0, 33, 0, 37, + 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0, 59, 0, 61, 0, 63, + 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 113, 0, 89, 0, 69, 0, + 53, 0, 39, 0, 27, 0, 17, 0, 7, 0, 1, 1, 0, 7, 0, 13, 0, 19, 0, 24, + 0, 28, 0, 33, 0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, + 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0, 40, 0, 43, + 0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, + 0, 66, 0, 68, 0, 69, 0, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, + 0, 15, 0, 7, 0, 2, 0, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, + 0, 37, 0, 40, 0, 43, 0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, + 0, 62, 0, 63, 0, 65, 0, 66, 0, 68, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 114, 0, + 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15, 0, 7, 0, 1, 1, 0, 6, + 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0, 40, 0, 43, 0, 46, + 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 66, + 0, 68, 0, 69, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15, 0, + 7, 0, 1, 1, 0, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, + 0, 40, 0, 43, 0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, + 0, 63, 0, 65, 0, 66, 0, 68, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 5, 0, 11, 0, 15, 0, 20, + 0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 51, + 0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 115, 0, 95, + 0, 78, 0, 63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 0, 2, 0, 5, + 0, 11, 0, 15, 0, 20, 0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, + 0, 46, 0, 48, 0, 51, 0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, + 0, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 115, 0, 95, 0, 78, 0, 63, 0, 51, 0, 39, 0, + 30, 0, 21, 0, 13, 0, 6, 0, 1, 1, 0, 5, 0, 11, 0, 15, 0, 20, 0, 24, + 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 51, 0, 53, + 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 115, 0, 95, 0, 78, 0, + 63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 0, 1, 1, 0, 5, 0, 11, + 0, 15, 0, 20, 0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, + 0, 48, 0, 51, 0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0, 29, 0, 32, + 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0, 54, 0, 55, + 0, 57, 0, 59, 0, 60, 0, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, + 0, 26, 0, 19, 0, 12, 0, 5, 0, 2, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22, + 0, 26, 0, 29, 0, 32, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, + 0, 52, 0, 54, 0, 55, 0, 57, 0, 59, 0, 60, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 0, + 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26, 0, 19, 0, 12, 0, 5, 0, + 1, 1, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0, 29, 0, 32, 0, 35, + 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0, 54, 0, 55, 0, 57, + 0, 59, 0, 60, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26, 0, + 19, 0, 12, 0, 5, 0, 1, 1, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, + 0, 29, 0, 32, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, + 0, 54, 0, 55, 0, 57, 0, 59, 0, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 9, + 0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0, 38, 0, 41, + 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 0, 117, 0, 100, + 0, 85, 0, 71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, + 0, 2, 0, 4, 0, 9, 0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, + 0, 36, 0, 38, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, + 0, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 117, 0, 100, 0, 85, 0, 71, 0, 60, 0, 49, 0, + 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 0, 1, 1, 0, 4, 0, 9, 0, 13, + 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0, 38, 0, 41, 0, 43, + 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 117, 0, 100, 0, 85, 0, + 71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 0, 1, 1, + 0, 4, 0, 9, 0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, + 0, 38, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 8, 0, 12, 0, 16, 0, 19, 0, 23, + 0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, + 0, 49, 0, 51, 0, 52, 0, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, + 0, 36, 0, 28, 0, 22, 0, 15, 0, 10, 0, 4, 0, 2, 0, 4, 0, 8, 0, 12, + 0, 16, 0, 19, 0, 23, 0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, + 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 52, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 0, + 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36, 0, 28, 0, 22, 0, 15, 0, + 10, 0, 4, 0, 1, 1, 0, 4, 0, 8, 0, 12, 0, 16, 0, 19, 0, 23, 0, 26, + 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, + 0, 51, 0, 52, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36, 0, + 28, 0, 22, 0, 15, 0, 10, 0, 4, 0, 1, 1, 0, 4, 0, 8, 0, 12, 0, 16, + 0, 19, 0, 23, 0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, + 0, 45, 0, 47, 0, 49, 0, 51, 0, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0, 30, 0, 32, + 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 0, 119, 0, 103, + 0, 89, 0, 77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, + 0, 9, 0, 4, 0, 2, 0, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, + 0, 27, 0, 30, 0, 32, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, + 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 103, 0, 89, 0, 77, 0, 67, 0, 57, 0, + 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9, 0, 4, 0, 1, 1, 0, 4, + 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0, 30, 0, 32, 0, 35, + 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 119, 0, 103, 0, 89, 0, + 77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9, 0, + 4, 0, 1, 1, 0, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, + 0, 30, 0, 32, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 7, 0, 11, 0, 14, + 0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0, 37, 0, 39, + 0, 41, 0, 43, 0, 45, 0, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, + 0, 44, 0, 37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 0, 2, 0, 3, + 0, 7, 0, 11, 0, 14, 0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, + 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, + 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44, 0, 37, 0, 30, 0, 24, 0, + 18, 0, 13, 0, 8, 0, 4, 0, 1, 1, 0, 3, 0, 7, 0, 11, 0, 14, 0, 17, + 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0, 37, 0, 39, 0, 41, + 0, 43, 0, 45, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44, 0, + 37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 0, 1, 1, 0, 3, 0, 7, + 0, 11, 0, 14, 0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, + 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, 24, + 0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 0, 120, 0, 106, + 0, 93, 0, 82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, + 0, 17, 0, 12, 0, 8, 0, 3, 0, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, + 0, 19, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, + 0, 41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 106, 0, 93, 0, 82, 0, 72, 0, 63, 0, + 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17, 0, 12, 0, 8, 0, 3, 0, + 0, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, 24, 0, 27, + 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 120, 0, 106, 0, 93, 0, + 82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17, 0, + 12, 0, 8, 0, 3, 0, 0, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, + 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 6, + 0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 32, + 0, 34, 0, 36, 0, 38, 0, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, + 0, 51, 0, 44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, + 0, 0, 0, 3, 0, 6, 0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, + 0, 28, 0, 30, 0, 32, 0, 34, 0, 36, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, + 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51, 0, 44, 0, 37, 0, 31, 0, + 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0, 0, 0, 3, 0, 6, 0, 10, + 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 32, 0, 34, + 0, 36, 0, 38, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51, 0, + 44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0, 0, + 0, 3, 0, 6, 0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, + 0, 30, 0, 32, 0, 34, 0, 36, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0, 15, 0, 17, + 0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 0, 120, 0, 108, + 0, 97, 0, 86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, + 0, 24, 0, 19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 0, 3, 0, 6, 0, 9, + 0, 12, 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, + 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 108, 0, 97, 0, 86, 0, 77, 0, 69, 0, + 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24, 0, 19, 0, 15, 0, 11, 0, + 7, 0, 3, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0, 15, 0, 17, 0, 20, + 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 120, 0, 108, 0, 97, 0, + 86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24, 0, + 19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 12, + 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0, 23, 0, 26, + 0, 28, 0, 30, 0, 31, 0, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, + 0, 56, 0, 50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, + 0, 6, 0, 3, 0, 0, 0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, + 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, + 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56, 0, 50, 0, 43, 0, 38, 0, + 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6, 0, 3, 0, 0, 0, 0, 3, + 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0, 23, 0, 26, 0, 28, + 0, 30, 0, 31, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56, 0, + 50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6, 0, + 3, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, + 0, 23, 0, 26, 0, 28, 0, 30, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 5, 0, 8, 0, 11, + 0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 0, 121, 0, 110, + 0, 99, 0, 90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, + 0, 30, 0, 26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 0, 3, + 0, 5, 0, 8, 0, 11, 0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, + 0, 28, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 99, 0, 90, 0, 81, 0, 73, 0, + 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30, 0, 26, 0, 21, 0, 17, 0, + 13, 0, 10, 0, 6, 0, 3, 0, 0, 0, 0, 3, 0, 5, 0, 8, 0, 11, 0, 13, + 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 121, 0, 110, 0, 99, 0, + 90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30, 0, + 26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 0, 0, 3, 0, 5, + 0, 8, 0, 11, 0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0, 17, 0, 20, + 0, 22, 0, 24, 0, 25, 0, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, + 0, 61, 0, 55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, + 0, 13, 0, 9, 0, 6, 0, 3, 0, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, + 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, + 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61, 0, 55, 0, 49, 0, 43, 0, + 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13, 0, 9, 0, 6, 0, 3, 0, + 0, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0, 17, 0, 20, 0, 22, + 0, 24, 0, 25, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61, 0, + 55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13, 0, + 9, 0, 6, 0, 3, 0, 0, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, + 0, 17, 0, 20, 0, 22, 0, 24, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 5, + 0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 0, 121, 0, 111, + 0, 102, 0, 93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, + 0, 36, 0, 31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, + 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, + 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 111, 0, 102, 0, 93, 0, 85, 0, 77, 0, + 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36, 0, 31, 0, 27, 0, 23, 0, + 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 5, 0, 7, + 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 121, 0, 111, 0, 102, 0, + 93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36, 0, + 31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0, 0, + 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 14, + 0, 16, 0, 18, 0, 20, 0, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, + 0, 65, 0, 59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, + 0, 18, 0, 15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 2, 0, 5, 0, 7, + 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, + 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65, 0, 59, 0, 54, 0, 48, 0, + 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18, 0, 15, 0, 11, 0, 8, 0, + 5, 0, 2, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 14, 0, 16, + 0, 18, 0, 20, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65, 0, + 59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18, 0, + 15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, + 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 0, 122, 0, 112, + 0, 103, 0, 95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, + 0, 41, 0, 36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, + 0, 5, 0, 2, 0, 0, 0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, + 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 103, 0, 95, 0, 87, 0, 80, 0, + 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41, 0, 36, 0, 32, 0, 28, 0, + 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 0, 2, + 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 122, 0, 112, 0, 103, 0, + 95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41, 0, + 36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5, 0, + 2, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 7, 0, 9, + 0, 11, 0, 13, 0, 15, 0, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, + 0, 69, 0, 63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, + 0, 23, 0, 20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 2, + 0, 4, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, + 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69, 0, 63, 0, 58, 0, 53, 0, + 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23, 0, 20, 0, 17, 0, 13, 0, + 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 7, 0, 9, 0, 11, + 0, 13, 0, 15, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69, 0, + 63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23, 0, + 20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 4, + 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 122, 0, 113, + 0, 105, 0, 97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, + 0, 45, 0, 41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, + 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, + 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 105, 0, 97, 0, 90, 0, 83, 0, + 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45, 0, 41, 0, 37, 0, 33, 0, + 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, + 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 122, 0, 113, 0, 105, 0, + 97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45, 0, + 41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10, 0, + 7, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, + 0, 6, 0, 8, 0, 10, 0, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, + 0, 72, 0, 67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, + 0, 28, 0, 24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, + 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, + 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72, 0, 67, 0, 62, 0, 57, 0, + 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28, 0, 24, 0, 21, 0, 18, 0, + 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, + 0, 8, 0, 10, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72, 0, + 67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28, 0, + 24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0, 0, + 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 123, 0, 114, + 0, 106, 0, 99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, + 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, + 0, 15, 0, 12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 0, 2, 0, 4, 0, 6, + 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 114, 0, 106, 0, 99, 0, 92, 0, 86, 0, + 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49, 0, 45, 0, 41, 0, 37, 0, + 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 9, 0, 7, 0, + 4, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 123, 0, 114, 0, 106, 0, + 99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49, 0, + 45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15, 0, + 12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 0, 4, 0, 6, 0, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, + 0, 75, 0, 70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, + 0, 32, 0, 29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, + 0, 4, 0, 2, 0, 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, + 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75, 0, 70, 0, 65, 0, 60, 0, + 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32, 0, 29, 0, 26, 0, 22, 0, + 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 2, + 0, 4, 0, 6, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75, 0, + 70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32, 0, + 29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4, 0, + 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 123, 0, 115, + 0, 108, 0, 101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, + 0, 53, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, + 0, 19, 0, 16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 2, + 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 101, 0, 94, 0, 88, 0, + 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53, 0, 49, 0, 45, 0, 41, 0, + 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19, 0, 16, 0, 13, 0, 11, 0, + 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4, 123, 0, 115, 0, 108, 0, + 101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53, 0, + 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19, 0, + 16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 0, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, + 0, 78, 0, 73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, + 0, 36, 0, 33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, + 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, + 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78, 0, 73, 0, 68, 0, 63, 0, + 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 33, 0, 30, 0, 26, 0, + 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8, 0, 6, 0, 4, 0, 2, 0, + 0, 0, 0, 1, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78, 0, + 73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, + 33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8, 0, + 6, 0, 4, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 116, + 0, 109, 0, 102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, + 0, 56, 0, 52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, + 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 116, 0, 109, 0, 102, 0, 96, 0, 90, 0, + 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56, 0, 52, 0, 48, 0, 45, 0, + 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23, 0, 20, 0, 17, 0, 15, 0, + 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0, 0, 123, 0, 116, 0, 109, 0, + 102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56, 0, + 52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23, 0, + 20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 0, 63, 0, 85, 0, 95, 0, 102, 0, + 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117, 0, 118, 0, + 119, 0, + 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0, 121, 0, 122, 0, + 122, 0, + 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0, 123, 0, 31, 31, 63, + 0, + 85, 0, 95, 0, 102, 0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, + 0, + 117, 0, 118, 0, 119, 0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, + 121, 0, + 121, 0, 122, 0, 122, 0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, + 123, 0, + 123, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 63, 0, 85, 0, 95, 0, 102, 0, 106, 0, + 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117, 0, 118, 0, 119, 0, + 119, 0, + 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0, 121, 0, 122, 0, 122, 0, + 122, 0, + 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0, 123, 0, 31, 31, 63, 0, 85, + 0, + 95, 0, 102, 0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117, + 0, + 118, 0, 119, 0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0, + 121, 0, + 122, 0, 122, 0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0, + 123, 0, + 0, 0, 10, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0, 89, 0, 92, 0, + 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0, 108, 0, 109, + 0, + 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0, 114, 0, 114, 0, + 115, 0, + 115, 0, 115, 0, 116, 0, 0, 63, 10, 10, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, + 85, 0, 89, 0, 92, 0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, + 107, 0, 108, 0, 109, 0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, + 113, 0, + 114, 0, 114, 0, 115, 0, 115, 0, 115, 0, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 0, + 20, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0, 89, 0, 92, 0, 95, 0, + 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0, 108, 0, 109, 0, 110, + 0, + 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0, 114, 0, 114, 0, 115, 0, + 115, 0, + 115, 0, 116, 0, 0, 63, 10, 10, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0, + 89, 0, 92, 0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0, + 108, 0, 109, 0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0, + 114, 0, + 114, 0, 115, 0, 115, 0, 115, 0, 116, 0, 0, 0, 0, 0, 6, 0, 21, 0, 36, 0, + 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0, 87, 0, 89, 0, + 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0, 102, 0, 103, 0, + 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0, 109, 0, 0, 85, 0, + 31, + 6, 6, 21, 0, 36, 0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, + 85, 0, 87, 0, 89, 0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, + 102, 0, 102, 0, 103, 0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, + 108, 0, + 109, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 85, 0, 31, 0, 12, 0, 21, 0, 36, 0, 47, 0, + 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0, 87, 0, 89, 0, 92, 0, + 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0, 102, 0, 103, 0, 104, 0, + 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0, 109, 0, 0, 85, 0, 31, 6, 6, + 21, 0, 36, 0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0, + 87, 0, 89, 0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0, + 102, 0, 103, 0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0, + 109, 0, + 0, 0, 0, 0, 0, 0, 4, 0, 15, 0, 28, 0, 38, 0, 46, 0, 53, 0, 58, 0, + 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0, 86, 0, 88, 0, + 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0, 99, 0, 100, 0, + 101, 0, 102, 0, 102, 0, 0, 95, 0, 51, 0, 21, 4, 4, 15, 0, 28, 0, 38, 0, + 46, 0, 53, 0, 58, 0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, + 85, 0, 86, 0, 88, 0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, + 98, 0, 99, 0, 100, 0, 101, 0, 102, 0, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, + 51, 0, 21, 0, 8, 0, 15, 0, 28, 0, 38, 0, 46, 0, 53, 0, 58, 0, 63, 0, + 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0, 86, 0, 88, 0, 90, 0, + 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0, 99, 0, 100, 0, 101, 0, + 102, 0, 102, 0, 0, 95, 0, 51, 0, 21, 4, 4, 15, 0, 28, 0, 38, 0, 46, 0, + 53, 0, 58, 0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0, + 86, 0, 88, 0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0, + 99, 0, 100, 0, 101, 0, 102, 0, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, + 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0, 63, 0, 67, 0, + 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0, 86, 0, 87, 0, + 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 0, 0, 102, 0, 63, + 0, 36, 0, 15, 3, 3, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, + 60, 0, 63, 0, 67, 0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, + 85, 0, 86, 0, 87, 0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, + 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 102, 0, 63, 0, 36, 0, 15, 0, 6, 0, 12, 0, + 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0, 63, 0, 67, 0, 70, 0, + 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0, 86, 0, 87, 0, 89, 0, + 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 0, 0, 102, 0, 63, 0, 36, + 0, 15, 3, 3, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0, + 63, 0, 67, 0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0, + 86, 0, 87, 0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 10, 0, 19, 0, 27, 0, 34, 0, + 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0, 69, 0, 71, 0, + 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0, 86, 0, 87, 0, + 88, 0, 89, 0, 90, 0, 0, 106, 0, 72, 0, 47, 0, 28, 0, 12, 2, 2, 10, 0, + 19, 0, 27, 0, 34, 0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, + 66, 0, 69, 0, 71, 0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, + 85, 0, 86, 0, 87, 0, 88, 0, 89, 0, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 0, + 72, 0, 47, 0, 28, 0, 12, 0, 4, 0, 10, 0, 19, 0, 27, 0, 34, 0, 39, 0, + 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0, 69, 0, 71, 0, 73, 0, + 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0, 86, 0, 87, 0, 88, 0, + 89, 0, 90, 0, 0, 106, 0, 72, 0, 47, 0, 28, 0, 12, 2, 2, 10, 0, 19, 0, + 27, 0, 34, 0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0, + 69, 0, 71, 0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0, + 86, 0, 87, 0, 88, 0, 89, 0, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0, 44, 0, 48, 0, + 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0, 72, 0, 74, 0, + 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 0, 0, 109, 0, 79, + 0, 56, 0, 38, 0, 23, 0, 10, 2, 2, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, + 40, 0, 44, 0, 48, 0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, + 70, 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, + 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 109, 0, 79, 0, 56, 0, 38, 0, 23, 0, 10, 0, + 4, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0, 44, 0, 48, 0, 52, 0, + 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0, 72, 0, 74, 0, 75, 0, + 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 0, 0, 109, 0, 79, 0, 56, + 0, 38, 0, 23, 0, 10, 2, 2, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0, + 44, 0, 48, 0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0, + 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 7, 0, 15, 0, + 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0, 53, 0, 56, 0, + 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0, 74, 0, 75, 0, + 77, 0, 78, 0, 79, 0, 0, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, + 2, 2, 7, 0, 15, 0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, + 51, 0, 53, 0, 56, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, + 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 79, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 111, 0, + 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 0, 4, 0, 7, 0, 15, 0, 21, 0, + 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0, 53, 0, 56, 0, 59, 0, + 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0, 74, 0, 75, 0, 77, 0, + 78, 0, 79, 0, 0, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 2, 2, + 7, 0, 15, 0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0, + 53, 0, 56, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0, + 74, 0, 75, 0, 77, 0, 78, 0, 79, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 0, 7, 0, 13, 0, 19, 0, 24, 0, 28, 0, 33, 0, + 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0, 59, 0, 61, 0, + 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 0, 0, 113, 0, 89, + 0, 69, 0, 53, 0, 39, 0, 27, 0, 17, 0, 7, 1, 1, 7, 0, 13, 0, 19, 0, + 24, 0, 28, 0, 33, 0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, + 57, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, + 74, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 113, 0, 89, 0, 69, 0, 53, 0, 39, 0, 27, 0, + 17, 0, 7, 0, 2, 0, 7, 0, 13, 0, 19, 0, 24, 0, 28, 0, 33, 0, 37, 0, + 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0, 59, 0, 61, 0, 63, 0, + 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 0, 0, 113, 0, 89, 0, 69, + 0, 53, 0, 39, 0, 27, 0, 17, 0, 7, 1, 1, 7, 0, 13, 0, 19, 0, 24, 0, + 28, 0, 33, 0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0, + 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, + 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0, 40, 0, 43, 0, + 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, + 66, 0, 68, 0, 69, 0, 0, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, + 0, 15, 0, 7, 1, 1, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, + 37, 0, 40, 0, 43, 0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, + 62, 0, 63, 0, 65, 0, 66, 0, 68, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 114, 0, + 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15, 0, 7, 0, 2, 0, 6, 0, + 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0, 40, 0, 43, 0, 46, 0, + 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 66, 0, + 68, 0, 69, 0, 0, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15, + 0, 7, 1, 1, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0, + 40, 0, 43, 0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0, + 63, 0, 65, 0, 66, 0, 68, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 5, 0, 11, 0, 15, 0, 20, 0, + 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 51, 0, + 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 0, 115, 0, 95, + 0, 78, 0, 63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 1, 1, 5, 0, + 11, 0, 15, 0, 20, 0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, + 46, 0, 48, 0, 51, 0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, + 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 115, 0, 95, 0, 78, 0, 63, 0, 51, 0, 39, 0, + 30, 0, 21, 0, 13, 0, 6, 0, 2, 0, 5, 0, 11, 0, 15, 0, 20, 0, 24, 0, + 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 51, 0, 53, 0, + 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 0, 115, 0, 95, 0, 78, + 0, 63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 1, 1, 5, 0, 11, 0, + 15, 0, 20, 0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, + 48, 0, 51, 0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0, 29, 0, 32, 0, + 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0, 54, 0, 55, 0, + 57, 0, 59, 0, 60, 0, 0, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, + 0, 26, 0, 19, 0, 12, 0, 5, 1, 1, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, + 26, 0, 29, 0, 32, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, + 52, 0, 54, 0, 55, 0, 57, 0, 59, 0, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 0, + 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26, 0, 19, 0, 12, 0, 5, 0, + 2, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0, 29, 0, 32, 0, 35, 0, + 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0, 54, 0, 55, 0, 57, 0, + 59, 0, 60, 0, 0, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26, + 0, 19, 0, 12, 0, 5, 1, 1, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0, + 29, 0, 32, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0, + 54, 0, 55, 0, 57, 0, 59, 0, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 9, 0, + 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0, 38, 0, 41, 0, + 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 0, 0, 117, 0, 100, + 0, 85, 0, 71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, + 1, 1, 4, 0, 9, 0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, + 36, 0, 38, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, + 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 117, 0, 100, 0, 85, 0, 71, 0, 60, 0, 49, 0, + 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 0, 2, 0, 4, 0, 9, 0, 13, 0, + 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0, 38, 0, 41, 0, 43, 0, + 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 0, 0, 117, 0, 100, 0, 85, + 0, 71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 1, 1, + 4, 0, 9, 0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0, + 38, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 8, 0, 12, 0, 16, 0, 19, 0, 23, 0, + 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, + 49, 0, 51, 0, 52, 0, 0, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, + 0, 36, 0, 28, 0, 22, 0, 15, 0, 10, 0, 4, 1, 1, 4, 0, 8, 0, 12, 0, + 16, 0, 19, 0, 23, 0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, + 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 0, + 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36, 0, 28, 0, 22, 0, 15, 0, + 10, 0, 4, 0, 2, 0, 4, 0, 8, 0, 12, 0, 16, 0, 19, 0, 23, 0, 26, 0, + 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, + 51, 0, 52, 0, 0, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36, + 0, 28, 0, 22, 0, 15, 0, 10, 0, 4, 1, 1, 4, 0, 8, 0, 12, 0, 16, 0, + 19, 0, 23, 0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0, + 45, 0, 47, 0, 49, 0, 51, 0, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, + 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0, 30, 0, 32, 0, + 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 0, 0, 119, 0, 103, + 0, 89, 0, 77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, + 0, 9, 0, 4, 1, 1, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, + 27, 0, 30, 0, 32, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, + 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 103, 0, 89, 0, 77, 0, 67, 0, 57, 0, + 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9, 0, 4, 0, 2, 0, 4, 0, + 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0, 30, 0, 32, 0, 35, 0, + 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 0, 0, 119, 0, 103, 0, 89, + 0, 77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9, + 0, 4, 1, 1, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0, + 30, 0, 32, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 7, 0, 11, 0, 14, 0, + 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0, 37, 0, 39, 0, + 41, 0, 43, 0, 45, 0, 0, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, + 0, 44, 0, 37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 1, 1, 3, 0, + 7, 0, 11, 0, 14, 0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, + 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, + 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44, 0, 37, 0, 30, 0, 24, 0, + 18, 0, 13, 0, 8, 0, 4, 0, 2, 0, 3, 0, 7, 0, 11, 0, 14, 0, 17, 0, + 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0, 37, 0, 39, 0, 41, 0, + 43, 0, 45, 0, 0, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44, + 0, 37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 1, 1, 3, 0, 7, 0, + 11, 0, 14, 0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0, + 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, 24, 0, + 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 0, 0, 120, 0, 106, + 0, 93, 0, 82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, + 0, 17, 0, 12, 0, 8, 0, 3, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, + 19, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, + 41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 106, 0, 93, 0, 82, 0, 72, 0, 63, 0, + 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17, 0, 12, 0, 8, 0, 3, 0, + 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, 24, 0, 27, 0, + 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 0, 0, 120, 0, 106, 0, 93, + 0, 82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17, + 0, 12, 0, 8, 0, 3, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, + 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 6, 0, + 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 32, 0, + 34, 0, 36, 0, 38, 0, 0, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, + 0, 51, 0, 44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, + 0, 0, 3, 0, 6, 0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, + 28, 0, 30, 0, 32, 0, 34, 0, 36, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, + 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51, 0, 44, 0, 37, 0, 31, 0, + 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0, 0, 3, 0, 6, 0, 10, 0, + 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 32, 0, 34, 0, + 36, 0, 38, 0, 0, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51, + 0, 44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0, + 3, 0, 6, 0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0, + 30, 0, 32, 0, 34, 0, 36, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0, 15, 0, 17, 0, + 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 0, 0, 120, 0, 108, + 0, 97, 0, 86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, + 0, 24, 0, 19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 3, 0, 6, 0, 9, 0, + 12, 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, + 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 108, 0, 97, 0, 86, 0, 77, 0, 69, 0, + 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24, 0, 19, 0, 15, 0, 11, 0, + 7, 0, 3, 0, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0, 15, 0, 17, 0, 20, 0, + 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 0, 0, 120, 0, 108, 0, 97, + 0, 86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24, + 0, 19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0, + 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0, 23, 0, 26, 0, + 28, 0, 30, 0, 31, 0, 0, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, + 0, 56, 0, 50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, + 0, 6, 0, 3, 0, 0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, + 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, + 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56, 0, 50, 0, 43, 0, 38, 0, + 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6, 0, 3, 0, 0, 0, 3, 0, + 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0, 23, 0, 26, 0, 28, 0, + 30, 0, 31, 0, 0, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56, + 0, 50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6, + 0, 3, 0, 0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0, + 23, 0, 26, 0, 28, 0, 30, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 5, 0, 8, 0, 11, 0, + 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 0, 0, 121, 0, 110, + 0, 99, 0, 90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, + 0, 30, 0, 26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 3, 0, + 5, 0, 8, 0, 11, 0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, + 28, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 99, 0, 90, 0, 81, 0, 73, 0, + 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30, 0, 26, 0, 21, 0, 17, 0, + 13, 0, 10, 0, 6, 0, 3, 0, 0, 0, 3, 0, 5, 0, 8, 0, 11, 0, 13, 0, + 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 0, 0, 121, 0, 110, 0, 99, + 0, 90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30, + 0, 26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 3, 0, 5, 0, + 8, 0, 11, 0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0, 17, 0, 20, 0, + 22, 0, 24, 0, 25, 0, 0, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, + 0, 61, 0, 55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, + 0, 13, 0, 9, 0, 6, 0, 3, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, + 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, + 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61, 0, 55, 0, 49, 0, 43, 0, + 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13, 0, 9, 0, 6, 0, 3, 0, + 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0, 17, 0, 20, 0, 22, 0, + 24, 0, 25, 0, 0, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61, + 0, 55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13, + 0, 9, 0, 6, 0, 3, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0, + 17, 0, 20, 0, 22, 0, 24, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 5, 0, + 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 0, 0, 121, 0, 111, + 0, 102, 0, 93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, + 0, 36, 0, 31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, + 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, + 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 111, 0, 102, 0, 93, 0, 85, 0, 77, 0, + 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36, 0, 31, 0, 27, 0, 23, 0, + 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0, 0, 2, 0, 5, 0, 7, 0, + 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 0, 0, 121, 0, 111, 0, 102, + 0, 93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36, + 0, 31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0, + 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 14, 0, + 16, 0, 18, 0, 20, 0, 0, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, + 0, 65, 0, 59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, + 0, 18, 0, 15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 2, 0, 5, 0, 7, 0, + 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, + 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65, 0, 59, 0, 54, 0, 48, 0, + 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18, 0, 15, 0, 11, 0, 8, 0, + 5, 0, 2, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 14, 0, 16, 0, + 18, 0, 20, 0, 0, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65, + 0, 59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18, + 0, 15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, + 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 0, 0, 122, 0, 112, + 0, 103, 0, 95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, + 0, 41, 0, 36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, + 0, 5, 0, 2, 0, 0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, + 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 103, 0, 95, 0, 87, 0, 80, 0, + 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41, 0, 36, 0, 32, 0, 28, 0, + 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 2, 0, + 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 0, 0, 122, 0, 112, 0, 103, + 0, 95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41, + 0, 36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5, + 0, 2, 0, 0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 7, 0, 9, 0, + 11, 0, 13, 0, 15, 0, 0, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, + 0, 69, 0, 63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, + 0, 23, 0, 20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 2, 0, + 4, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, + 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69, 0, 63, 0, 58, 0, 53, 0, + 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23, 0, 20, 0, 17, 0, 13, 0, + 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 2, 0, 4, 0, 7, 0, 9, 0, 11, 0, + 13, 0, 15, 0, 0, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69, + 0, 63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23, + 0, 20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 2, 0, 4, 0, + 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 0, 122, 0, 113, + 0, 105, 0, 97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, + 0, 45, 0, 41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, + 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, + 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 105, 0, 97, 0, 90, 0, 83, 0, + 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45, 0, 41, 0, 37, 0, 33, 0, + 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, + 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 0, 122, 0, 113, 0, 105, + 0, 97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45, + 0, 41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10, + 0, 7, 0, 5, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, + 6, 0, 8, 0, 10, 0, 0, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, + 0, 72, 0, 67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, + 0, 28, 0, 24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, + 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, + 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72, 0, 67, 0, 62, 0, 57, 0, + 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28, 0, 24, 0, 21, 0, 18, 0, + 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0, 0, 2, 0, 4, 0, 6, 0, + 8, 0, 10, 0, 0, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72, + 0, 67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28, + 0, 24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0, + 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 0, 123, 0, 114, + 0, 106, 0, 99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, + 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, + 0, 15, 0, 12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, + 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 114, 0, 106, 0, 99, 0, 92, 0, 86, 0, + 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49, 0, 45, 0, 41, 0, 37, 0, + 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 9, 0, 7, 0, + 4, 0, 2, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 0, 123, 0, 114, 0, 106, + 0, 99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49, + 0, 45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15, + 0, 12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 0, 4, 0, 6, 0, 0, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, + 0, 75, 0, 70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, + 0, 32, 0, 29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, + 0, 4, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, + 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75, 0, 70, 0, 65, 0, 60, 0, + 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32, 0, 29, 0, 26, 0, 22, 0, + 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4, 0, 2, 0, 0, 0, 2, 0, + 4, 0, 6, 0, 0, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75, + 0, 70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32, + 0, 29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4, + 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 0, 123, 0, 115, + 0, 108, 0, 101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, + 0, 53, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, + 0, 19, 0, 16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 2, 0, + 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 101, 0, 94, 0, 88, 0, + 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53, 0, 49, 0, 45, 0, 41, 0, + 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19, 0, 16, 0, 13, 0, 11, 0, + 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 2, 0, 4, 0, 0, 123, 0, 115, 0, 108, + 0, 101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53, + 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19, + 0, 16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, + 0, 78, 0, 73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, + 0, 36, 0, 33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, + 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, + 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78, 0, 73, 0, 68, 0, 63, 0, + 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 33, 0, 30, 0, 26, 0, + 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8, 0, 6, 0, 4, 0, 2, 0, + 0, 0, 1, 0, 0, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78, + 0, 73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, + 0, 33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8, + 0, 6, 0, 4, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 116, + 0, 109, 0, 102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, + 0, 56, 0, 52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, + 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 116, 0, 109, 0, 102, 0, 96, 0, 90, 0, + 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56, 0, 52, 0, 48, 0, 45, 0, + 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23, 0, 20, 0, 17, 0, 15, 0, + 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0, 0, 0, 123, 0, 116, 0, 109, + 0, 102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56, + 0, 52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23, + 0, 20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 31, 31, 63, 0, 85, 0, 95, 0, 102, 0, 106, 0, 109, 0, + 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117, 0, 118, 0, 119, 0, 119, 0, + 120, 0, + 120, 0, 120, 0, 121, 0, 121, 0, 121, 0, 121, 0, 122, 0, 122, 0, 122, 0, + 122, 0, + 122, 0, 123, 0, 123, 0, 123, 0, 123, 0, 123, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 31, + 0, 63, 0, 85, 0, 95, 0, 102, 0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, + 115, + 0, 116, 0, 117, 0, 118, 0, 119, 0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, + 121, + 0, 121, 0, 121, 0, 122, 0, 122, 0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, + 123, + 0, 123, 0, 123, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 10, 10, + 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0, 89, 0, 92, 0, 95, 0, 98, 0, + 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0, 108, 0, 109, 0, 110, 0, + 110, 0, + 111, 0, 112, 0, 112, 0, 113, 0, 113, 0, 114, 0, 114, 0, 115, 0, 115, 0, + 115, 0, + 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 63, 0, 10, 10, 0, 31, 0, 51, 0, 63, 0, 72, + 0, 79, 0, 85, 0, 89, 0, 92, 0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, + 0, 106, 0, 107, 0, 108, 0, 109, 0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, + 113, + 0, 113, 0, 114, 0, 114, 0, 115, 0, 115, 0, 115, 0, 116, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 85, 0, 31, 6, 6, 21, 0, 36, 0, 47, 0, 56, 0, + 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0, 87, 0, 89, 0, 92, 0, 93, 0, + 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0, 102, 0, 103, 0, 104, 0, 105, 0, + 106, 0, 106, 0, 107, 0, 108, 0, 108, 0, 109, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 0, + 31, 0, 6, 6, 0, 21, 0, 36, 0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, + 0, 81, 0, 85, 0, 87, 0, 89, 0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, + 0, 100, 0, 102, 0, 102, 0, 103, 0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, + 108, + 0, 108, 0, 109, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 51, + 0, 21, 4, 4, 15, 0, 28, 0, 38, 0, 46, 0, 53, 0, 58, 0, 63, 0, 68, 0, + 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0, 86, 0, 88, 0, 90, 0, 91, 0, + 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0, 99, 0, 100, 0, 101, 0, 102, 0, + 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 51, 0, 21, 0, 4, 4, 0, 15, 0, 28, + 0, 38, 0, 46, 0, 53, 0, 58, 0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, + 0, 82, 0, 85, 0, 86, 0, 88, 0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, + 0, 97, 0, 98, 0, 99, 0, 100, 0, 101, 0, 102, 0, 102, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 102, 0, 63, 0, 36, 0, 15, 3, 3, 12, 0, 23, 0, + 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0, 63, 0, 67, 0, 70, 0, 72, 0, + 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0, 86, 0, 87, 0, 89, 0, 90, 0, + 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 102, 0, + 63, 0, 36, 0, 15, 0, 3, 3, 0, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, + 0, 55, 0, 60, 0, 63, 0, 67, 0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, + 0, 83, 0, 85, 0, 86, 0, 87, 0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, + 0, 95, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 0, 72, + 0, 47, 0, 28, 0, 12, 2, 2, 10, 0, 19, 0, 27, 0, 34, 0, 39, 0, 44, 0, + 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0, 69, 0, 71, 0, 73, 0, 75, 0, + 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0, 86, 0, 87, 0, 88, 0, 89, 0, + 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 106, 0, 72, 0, 47, 0, 28, 0, 12, 0, 2, 2, + 0, 10, 0, 19, 0, 27, 0, 34, 0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, + 0, 63, 0, 66, 0, 69, 0, 71, 0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, + 0, 83, 0, 85, 0, 86, 0, 87, 0, 88, 0, 89, 0, 90, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 109, 0, 79, 0, 56, 0, 38, 0, 23, 0, 10, 2, 2, + 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0, 44, 0, 48, 0, 52, 0, 55, 0, + 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0, 72, 0, 74, 0, 75, 0, 77, 0, + 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 109, 0, + 79, 0, 56, 0, 38, 0, 23, 0, 10, 0, 2, 2, 0, 9, 0, 17, 0, 23, 0, 30, + 0, 35, 0, 40, 0, 44, 0, 48, 0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, + 0, 68, 0, 70, 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, + 0, 83, 0, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 111, 0, 85, + 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 2, 2, 7, 0, 15, 0, 21, 0, 26, 0, + 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0, 53, 0, 56, 0, 59, 0, 61, 0, + 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, + 79, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, + 9, 0, 2, 2, 0, 7, 0, 15, 0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, + 0, 47, 0, 51, 0, 53, 0, 56, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, + 0, 71, 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 79, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 113, 0, 89, 0, 69, 0, 53, 0, 39, 0, 27, 0, 17, + 0, 7, 1, 1, 7, 0, 13, 0, 19, 0, 24, 0, 28, 0, 33, 0, 37, 0, 40, 0, + 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0, 59, 0, 61, 0, 63, 0, 65, 0, + 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 113, 0, + 89, 0, 69, 0, 53, 0, 39, 0, 27, 0, 17, 0, 7, 0, 1, 1, 0, 7, 0, 13, + 0, 19, 0, 24, 0, 28, 0, 33, 0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, + 0, 55, 0, 57, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, + 0, 73, 0, 74, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 114, 0, 92, + 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15, 0, 7, 1, 1, 6, 0, 12, 0, + 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0, 40, 0, 43, 0, 46, 0, 49, 0, + 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 66, 0, 68, 0, + 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, + 23, 0, 15, 0, 7, 0, 1, 1, 0, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, + 0, 34, 0, 37, 0, 40, 0, 43, 0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, + 0, 60, 0, 62, 0, 63, 0, 65, 0, 66, 0, 68, 0, 69, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 115, 0, 95, 0, 78, 0, 63, 0, 51, 0, 39, 0, 30, + 0, 21, 0, 13, 0, 6, 1, 1, 5, 0, 11, 0, 15, 0, 20, 0, 24, 0, 28, 0, + 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 51, 0, 53, 0, 55, 0, + 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 115, 0, + 95, 0, 78, 0, 63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 0, 1, 1, + 0, 5, 0, 11, 0, 15, 0, 20, 0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, + 0, 43, 0, 46, 0, 48, 0, 51, 0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, + 0, 63, 0, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 0, 98, + 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26, 0, 19, 0, 12, 0, 5, 1, 1, + 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0, 29, 0, 32, 0, 35, 0, 38, 0, + 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0, 54, 0, 55, 0, 57, 0, 59, 0, + 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, + 35, 0, 26, 0, 19, 0, 12, 0, 5, 0, 1, 1, 0, 5, 0, 10, 0, 14, 0, 18, + 0, 22, 0, 26, 0, 29, 0, 32, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, + 0, 50, 0, 52, 0, 54, 0, 55, 0, 57, 0, 59, 0, 60, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 117, 0, 100, 0, 85, 0, 71, 0, 60, 0, 49, 0, 40, + 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 1, 1, 4, 0, 9, 0, 13, 0, 17, 0, + 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0, 38, 0, 41, 0, 43, 0, 45, 0, + 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 117, 0, + 100, 0, 85, 0, 71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, + 5, 0, 1, 1, 0, 4, 0, 9, 0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, + 0, 33, 0, 36, 0, 38, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, + 0, 55, 0, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 0, 102, + 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36, 0, 28, 0, 22, 0, 15, 0, 10, + 0, 4, 1, 1, 4, 0, 8, 0, 12, 0, 16, 0, 19, 0, 23, 0, 26, 0, 29, 0, + 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, + 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, + 44, 0, 36, 0, 28, 0, 22, 0, 15, 0, 10, 0, 4, 0, 1, 1, 0, 4, 0, 8, + 0, 12, 0, 16, 0, 19, 0, 23, 0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, + 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 52, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 119, 0, 103, 0, 89, 0, 77, 0, 67, 0, 57, 0, 48, + 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9, 0, 4, 1, 1, 4, 0, 8, 0, + 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0, 30, 0, 32, 0, 35, 0, 37, 0, + 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, + 103, 0, 89, 0, 77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, + 14, 0, 9, 0, 4, 0, 1, 1, 0, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, + 0, 24, 0, 27, 0, 30, 0, 32, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, + 0, 47, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 105, + 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44, 0, 37, 0, 30, 0, 24, 0, 18, + 0, 13, 0, 8, 0, 4, 1, 1, 3, 0, 7, 0, 11, 0, 14, 0, 17, 0, 20, 0, + 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, + 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, + 52, 0, 44, 0, 37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 0, 1, 1, + 0, 3, 0, 7, 0, 11, 0, 14, 0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, + 0, 33, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 120, 0, 106, 0, 93, 0, 82, 0, 72, 0, 63, 0, 55, + 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17, 0, 12, 0, 8, 0, 3, 0, 0, + 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, 24, 0, 27, 0, 29, 0, + 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, + 106, 0, 93, 0, 82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, + 22, 0, 17, 0, 12, 0, 8, 0, 3, 0, 0, 0, 0, 3, 0, 7, 0, 10, 0, 13, + 0, 16, 0, 19, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, + 0, 39, 0, 41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 107, + 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51, 0, 44, 0, 37, 0, 31, 0, 26, + 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0, 3, 0, 6, 0, 10, 0, 13, 0, + 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 32, 0, 34, 0, 36, 0, + 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, + 58, 0, 51, 0, 44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, + 3, 0, 0, 0, 0, 3, 0, 6, 0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, + 0, 26, 0, 28, 0, 30, 0, 32, 0, 34, 0, 36, 0, 38, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 120, 0, 108, 0, 97, 0, 86, 0, 77, 0, 69, 0, 61, + 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24, 0, 19, 0, 15, 0, 11, 0, 7, + 0, 3, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0, 15, 0, 17, 0, 20, 0, 22, 0, + 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, + 108, 0, 97, 0, 86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, + 29, 0, 24, 0, 19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 0, 0, 3, 0, 6, + 0, 9, 0, 12, 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, + 0, 33, 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 109, + 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56, 0, 50, 0, 43, 0, 38, 0, 32, + 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6, 0, 3, 0, 0, 3, 0, 6, 0, + 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, + 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, + 63, 0, 56, 0, 50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, + 10, 0, 6, 0, 3, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, + 0, 19, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 31, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 99, 0, 90, 0, 81, 0, 73, 0, 66, + 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30, 0, 26, 0, 21, 0, 17, 0, 13, + 0, 10, 0, 6, 0, 3, 0, 0, 3, 0, 5, 0, 8, 0, 11, 0, 13, 0, 16, 0, + 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, + 110, 0, 99, 0, 90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, + 35, 0, 30, 0, 26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 0, + 0, 3, 0, 5, 0, 8, 0, 11, 0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, + 0, 26, 0, 28, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, + 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61, 0, 55, 0, 49, 0, 43, 0, 38, + 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13, 0, 9, 0, 6, 0, 3, 0, 0, + 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, + 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, + 68, 0, 61, 0, 55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, + 16, 0, 13, 0, 9, 0, 6, 0, 3, 0, 0, 0, 0, 2, 0, 5, 0, 8, 0, 10, + 0, 13, 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 25, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 121, 0, 111, 0, 102, 0, 93, 0, 85, 0, 77, 0, 70, + 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36, 0, 31, 0, 27, 0, 23, 0, 19, + 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, + 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, + 111, 0, 102, 0, 93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, + 41, 0, 36, 0, 31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, + 2, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, + 0, 21, 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, + 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65, 0, 59, 0, 54, 0, 48, 0, 43, + 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18, 0, 15, 0, 11, 0, 8, 0, 5, + 0, 2, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, + 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, + 72, 0, 65, 0, 59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, + 22, 0, 18, 0, 15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 5, + 0, 7, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 103, 0, 95, 0, 87, 0, 80, 0, 74, + 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41, 0, 36, 0, 32, 0, 28, 0, 24, + 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 2, 0, 5, 0, + 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, + 112, 0, 103, 0, 95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, + 46, 0, 41, 0, 36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, + 8, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, + 0, 15, 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, + 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69, 0, 63, 0, 58, 0, 53, 0, 48, + 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23, 0, 20, 0, 17, 0, 13, 0, 10, + 0, 7, 0, 5, 0, 2, 0, 0, 2, 0, 4, 0, 7, 0, 9, 0, 11, 0, 13, 0, + 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, + 75, 0, 69, 0, 63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, + 27, 0, 23, 0, 20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, + 0, 2, 0, 4, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 105, 0, 97, 0, 90, 0, 83, 0, 77, + 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45, 0, 41, 0, 37, 0, 33, 0, 29, + 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, + 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, + 113, 0, 105, 0, 97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, + 50, 0, 45, 0, 41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, + 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, + 0, 10, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 114, + 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72, 0, 67, 0, 62, 0, 57, 0, 52, + 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28, 0, 24, 0, 21, 0, 18, 0, 15, + 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, + 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, + 78, 0, 72, 0, 67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, + 31, 0, 28, 0, 24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, + 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 123, 0, 114, 0, 106, 0, 99, 0, 92, 0, 86, 0, 80, + 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, + 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 9, 0, 7, 0, 4, + 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, + 114, 0, 106, 0, 99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, + 54, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, + 17, 0, 15, 0, 12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4, + 0, 6, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, + 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75, 0, 70, 0, 65, 0, 60, 0, 55, + 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32, 0, 29, 0, 26, 0, 22, 0, 20, + 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0, + 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, + 81, 0, 75, 0, 70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, + 36, 0, 32, 0, 29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, + 6, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 101, 0, 94, 0, 88, 0, 82, + 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53, 0, 49, 0, 45, 0, 41, 0, 37, + 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19, 0, 16, 0, 13, 0, 11, 0, 8, + 0, 6, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, + 115, 0, 108, 0, 101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, + 57, 0, 53, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, + 22, 0, 19, 0, 16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, + 0, 2, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, + 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78, 0, 73, 0, 68, 0, 63, 0, 59, + 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 33, 0, 30, 0, 26, 0, 24, + 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, + 83, 0, 78, 0, 73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, + 39, 0, 36, 0, 33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, + 10, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 123, 0, 116, 0, 109, 0, 102, 0, 96, 0, 90, 0, 85, + 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56, 0, 52, 0, 48, 0, 45, 0, 41, + 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, + 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, + 116, 0, 109, 0, 102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, + 60, 0, 56, 0, 52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, + 25, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +#endif diff --git a/src/gallium/auxiliary/postprocess/pp_program.c b/src/gallium/auxiliary/postprocess/pp_program.c new file mode 100644 index 00000000000..b92ac80a5db --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_program.c @@ -0,0 +1,139 @@ +/************************************************************************** + * + * Copyright 2010 Jakob Bornecrantz + * Copyright 2011 Lauri Kasanen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "postprocess/postprocess.h" +#include "cso_cache/cso_context.h" +#include "pipe/p_screen.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_inlines.h" +#include "util/u_simple_shaders.h" + +/** Initialize the internal details */ +struct program * +pp_init_prog(struct pp_queue_t *ppq, struct pipe_screen *pscreen) +{ + + struct program *p = calloc(1, sizeof(struct program)); + + pp_debug("Initializing program\n"); + if (!pscreen) + return NULL; + + if (!p) + return NULL; + + p->screen = pscreen; + p->pipe = pscreen->context_create(pscreen, NULL); + p->cso = cso_create_context(p->pipe); + + { + static const float verts[4][2][4] = { + { + {1.0f, 1.0f, 0.0f, 1.0f}, + {1.0f, 1.0f, 0.0f, 1.0f} + }, + { + {-1.0f, 1.0f, 0.0f, 1.0f}, + {0.0f, 1.0f, 0.0f, 1.0f} + }, + { + {-1.0f, -1.0f, 0.0f, 1.0f}, + {0.0f, 0.0f, 0.0f, 1.0f} + }, + { + {1.0f, -1.0f, 0.0f, 1.0f}, + {1.0f, 0.0f, 0.0f, 1.0f} + } + }; + + p->vbuf = pipe_buffer_create(pscreen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STATIC, sizeof(verts)); + pipe_buffer_write(p->pipe, p->vbuf, 0, sizeof(verts), verts); + } + + p->blend.rt[0].colormask = PIPE_MASK_RGBA; + p->blend.rt[0].rgb_src_factor = p->blend.rt[0].alpha_src_factor = + PIPE_BLENDFACTOR_SRC_ALPHA; + p->blend.rt[0].rgb_dst_factor = p->blend.rt[0].alpha_dst_factor = + PIPE_BLENDFACTOR_INV_SRC_ALPHA; + + p->rasterizer.cull_face = PIPE_FACE_NONE; + p->rasterizer.gl_rasterization_rules = 1; + + p->sampler.wrap_s = p->sampler.wrap_t = p->sampler.wrap_r = + PIPE_TEX_WRAP_CLAMP_TO_EDGE; + + p->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + p->sampler.min_img_filter = p->sampler.mag_img_filter = + PIPE_TEX_FILTER_LINEAR; + p->sampler.normalized_coords = 1; + + p->sampler_point.wrap_s = p->sampler_point.wrap_t = + p->sampler_point.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + p->sampler_point.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + p->sampler_point.min_img_filter = p->sampler_point.mag_img_filter = + PIPE_TEX_FILTER_NEAREST; + p->sampler_point.normalized_coords = 1; + + p->velem[0].src_offset = 0; + p->velem[0].instance_divisor = 0; + p->velem[0].vertex_buffer_index = 0; + p->velem[0].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + p->velem[1].src_offset = 1 * 4 * sizeof(float); + p->velem[1].instance_divisor = 0; + p->velem[1].vertex_buffer_index = 0; + p->velem[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + + if (!p->screen->is_format_supported(p->screen, + PIPE_FORMAT_R32G32B32A32_FLOAT, + PIPE_BUFFER, 1, + PIPE_BIND_VERTEX_BUFFER)) + pp_debug("Vertex buf format fail\n"); + + + { + const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC + }; + const uint semantic_indexes[] = { 0, 0 }; + p->passvs = util_make_vertex_passthrough_shader(p->pipe, 2, + semantic_names, + semantic_indexes); + } + + p->framebuffer.nr_cbufs = 1; + + p->surf.usage = PIPE_BIND_RENDER_TARGET; + p->surf.format = PIPE_FORMAT_B8G8R8A8_UNORM; + + p->pipe->set_sample_mask(p->pipe, ~0); + + return p; +} diff --git a/src/gallium/auxiliary/postprocess/pp_program.h b/src/gallium/auxiliary/postprocess/pp_program.h new file mode 100644 index 00000000000..2749b35b372 --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_program.h @@ -0,0 +1,64 @@ +/************************************************************************** + * + * Copyright 2010 Jakob Bornecrantz + * Copyright 2011 Lauri Kasanen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef PP_PROGRAM_H +#define PP_PROGRAM_H + +#include "pipe/p_state.h" + +/** +* Internal control details. +*/ +struct program +{ + struct pipe_screen *screen; + struct pipe_context *pipe; + struct cso_context *cso; + + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state depthstencil; + struct pipe_rasterizer_state rasterizer; + struct pipe_sampler_state sampler; /* bilinear */ + struct pipe_sampler_state sampler_point; /* point */ + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state framebuffer; + struct pipe_vertex_element velem[2]; + + float clear_color[4]; + + void *passvs; + + struct pipe_resource *vbuf; + struct pipe_surface surf; + struct pipe_sampler_view *view; + + struct blit_state *blitctx; +}; + + +#endif diff --git a/src/gallium/auxiliary/postprocess/pp_run.c b/src/gallium/auxiliary/postprocess/pp_run.c new file mode 100644 index 00000000000..ce671aea360 --- /dev/null +++ b/src/gallium/auxiliary/postprocess/pp_run.c @@ -0,0 +1,188 @@ +/************************************************************************** + * + * Copyright 2011 Lauri Kasanen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "postprocess.h" + +#include "postprocess/pp_filters.h" +#include "util/u_blit.h" +#include "util/u_inlines.h" +#include "util/u_sampler.h" + +/** +* Main run function of the PP queue. Called on swapbuffers/flush. +* +* Runs all requested filters in order and handles shuffling the temp +* buffers in between. +*/ +void +pp_run(struct pp_queue_t *ppq, struct pipe_resource *in, + struct pipe_resource *out, struct pipe_resource *indepth) +{ + + unsigned int i; + + if (in->width0 != ppq->p->framebuffer.width || + in->height0 != ppq->p->framebuffer.height) { + pp_debug("Resizing the temp pp buffers\n"); + pp_free_fbos(ppq); + pp_init_fbos(ppq, in->width0, in->height0, indepth); + } + + if (in == out && ppq->n_filters == 1) { + /* Make a copy of in to tmp[0] in this case. */ + unsigned int w = ppq->p->framebuffer.width; + unsigned int h = ppq->p->framebuffer.height; + + util_blit_pixels(ppq->p->blitctx, in, 0, 0, 0, + w, h, 0, ppq->tmps[0], + 0, 0, w, h, 0, PIPE_TEX_MIPFILTER_NEAREST); + + in = ppq->tmp[0]; + } + + switch (ppq->n_filters) { + case 1: /* No temp buf */ + ppq->pp_queue[0] (ppq, in, out, 0); + break; + case 2: /* One temp buf */ + + ppq->pp_queue[0] (ppq, in, ppq->tmp[0], 0); + ppq->pp_queue[1] (ppq, ppq->tmp[0], out, 1); + + break; + default: /* Two temp bufs */ + ppq->pp_queue[0] (ppq, in, ppq->tmp[0], 0); + + for (i = 1; i < (ppq->n_filters - 1); i++) { + if (i % 2 == 0) + ppq->pp_queue[i] (ppq, ppq->tmp[1], ppq->tmp[0], i); + + else + ppq->pp_queue[i] (ppq, ppq->tmp[0], ppq->tmp[1], i); + } + + if (i % 2 == 0) + ppq->pp_queue[i] (ppq, ppq->tmp[1], out, i); + + else + ppq->pp_queue[i] (ppq, ppq->tmp[0], out, i); + + break; + } +} + + +/* Utility functions for the filters. You're not forced to use these if */ +/* your filter is more complicated. */ + +/** Setup this resource as the filter input. */ +void +pp_filter_setup_in(struct program *p, struct pipe_resource *in) +{ + struct pipe_sampler_view v_tmp; + u_sampler_view_default_template(&v_tmp, in, in->format); + p->view = p->pipe->create_sampler_view(p->pipe, in, &v_tmp); +} + +/** Setup this resource as the filter output. */ +void +pp_filter_setup_out(struct program *p, struct pipe_resource *out) +{ + p->surf.format = out->format; + p->surf.usage = PIPE_BIND_RENDER_TARGET; + + p->framebuffer.cbufs[0] = p->pipe->create_surface(p->pipe, out, &p->surf); +} + +/** Clean up the input and output set with the above. */ +void +pp_filter_end_pass(struct program *p) +{ + pipe_surface_reference(&p->framebuffer.cbufs[0], NULL); + pipe_sampler_view_reference(&p->view, NULL); +} + +/** +* Convert the TGSI assembly to a runnable shader. +* +* We need not care about geometry shaders. All we have is screen quads. +*/ +void * +pp_tgsi_to_state(struct pipe_context *pipe, const char *text, bool isvs, + const char *name) +{ + struct pipe_shader_state state; + struct tgsi_token tokens[PP_MAX_TOKENS]; + + if (tgsi_text_translate(text, tokens, Elements(tokens)) == FALSE) { + pp_debug("Failed to translate %s\n", name); + return NULL; + } + + state.tokens = tokens; + + if (isvs) + return pipe->create_vs_state(pipe, &state); + else + return pipe->create_fs_state(pipe, &state); +} + +/** Setup misc state for the filter. */ +void +pp_filter_misc_state(struct program *p) +{ + cso_set_blend(p->cso, &p->blend); + cso_set_depth_stencil_alpha(p->cso, &p->depthstencil); + cso_set_rasterizer(p->cso, &p->rasterizer); + cso_set_viewport(p->cso, &p->viewport); + + cso_set_vertex_elements(p->cso, 2, p->velem); +} + +/** Draw with the filter to the set output. */ +void +pp_filter_draw(struct program *p) +{ + util_draw_vertex_buffer(p->pipe, p->cso, p->vbuf, 0, + PIPE_PRIM_QUADS, 4, 2); + p->pipe->flush(p->pipe, NULL); +} + +/** Set the framebuffer as active. */ +void +pp_filter_set_fb(struct program *p) +{ + cso_set_framebuffer(p->cso, &p->framebuffer); +} + +/** Set the framebuffer as active and clear it. */ +void +pp_filter_set_clear_fb(struct program *p) +{ + cso_set_framebuffer(p->cso, &p->framebuffer); + p->pipe->clear(p->pipe, PIPE_CLEAR_COLOR, p->clear_color, 0, 0); +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 712e8aca794..38dc1efa551 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1594,6 +1594,9 @@ store_dest(struct tgsi_exec_machine *mach, #define FETCH(VAL,INDEX,CHAN)\ fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) +#define IFETCH(VAL,INDEX,CHAN)\ + fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT) + /** * Execute ARB-style KIL which is predicated by a src register. @@ -1921,6 +1924,86 @@ exec_txd(struct tgsi_exec_machine *mach, } +static void +exec_txf(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + struct tgsi_sampler *sampler; + const uint unit = inst->Src[1].Register.Index; + union tgsi_exec_channel r[4]; + uint chan; + float rgba[NUM_CHANNELS][QUAD_SIZE]; + int j; + + IFETCH(&r[3], 0, CHAN_W); + + switch(inst->Texture.Texture) { + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_2D_ARRAY: + IFETCH(&r[2], 0, CHAN_Z); + /* fallthrough */ + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + case TGSI_TEXTURE_1D_ARRAY: + IFETCH(&r[1], 0, CHAN_Y); + /* fallthrough */ + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: + IFETCH(&r[0], 0, CHAN_X); + break; + default: + assert(0); + break; + } + + sampler = mach->Samplers[unit]; + sampler->get_texel(sampler, r[0].i, r[1].i, r[2].i, r[3].i, rgba); + + for (j = 0; j < QUAD_SIZE; j++) { + r[0].f[j] = rgba[0][j]; + r[1].f[j] = rgba[1][j]; + r[2].f[j] = rgba[2][j]; + r[3].f[j] = rgba[3][j]; + } + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_txq(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + struct tgsi_sampler *sampler; + const uint unit = inst->Src[1].Register.Index; + int result[4]; + union tgsi_exec_channel r[4], src; + uint chan; + int i,j; + + fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_INT); + sampler = mach->Samplers[unit]; + + sampler->get_dims(sampler, src.i[0], result); + + for (i = 0; i < QUAD_SIZE; i++) { + for (j = 0; j < 4; j++) { + r[j].i[i] = result[j]; + } + } + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, + TGSI_EXEC_DATA_INT); + } + } +} static void exec_sample(struct tgsi_exec_machine *mach, @@ -2989,6 +3072,17 @@ micro_xor(union tgsi_exec_channel *dst, } static void +micro_mod(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->i[0] = src0->i[0] % src1->i[0]; + dst->i[1] = src0->i[1] % src1->i[1]; + dst->i[2] = src0->i[2] % src1->i[2]; + dst->i[3] = src0->i[3] % src1->i[3]; +} + +static void micro_f2i(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { @@ -3691,7 +3785,7 @@ exec_instruction( break; case TGSI_OPCODE_MOD: - assert (0); + exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_XOR: @@ -3703,11 +3797,11 @@ exec_instruction( break; case TGSI_OPCODE_TXF: - assert (0); + exec_txf(mach, inst); break; case TGSI_OPCODE_TXQ: - assert (0); + exec_txq(mach, inst); break; case TGSI_OPCODE_EMIT: diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 33f33aa82c7..3f6964c17fb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -90,6 +90,11 @@ struct tgsi_sampler const float c0[QUAD_SIZE], enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]); + void (*get_dims)(struct tgsi_sampler *sampler, int level, + int dims[4]); + void (*get_texel)(struct tgsi_sampler *sampler, const int i[QUAD_SIZE], + const int j[QUAD_SIZE], const int k[QUAD_SIZE], + const int lod[QUAD_SIZE], float rgba[NUM_CHANNELS][QUAD_SIZE]); }; #define TGSI_EXEC_NUM_TEMPS 128 @@ -400,6 +405,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param) return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 1; + case PIPE_SHADER_CAP_INTEGERS: + return 1; default: return 0; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 83c6ac75e54..f165f8240e6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -200,19 +200,20 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->file_max[file] = MAX2(info->file_max[file], (int)reg); } break; + case TGSI_TOKEN_TYPE_PROPERTY: - { - const struct tgsi_full_property *fullprop - = &parse.FullToken.FullProperty; + { + const struct tgsi_full_property *fullprop + = &parse.FullToken.FullProperty; - info->properties[info->num_properties].name = - fullprop->Property.PropertyName; - memcpy(info->properties[info->num_properties].data, - fullprop->u, 8 * sizeof(unsigned));; + info->properties[info->num_properties].name = + fullprop->Property.PropertyName; + memcpy(info->properties[info->num_properties].data, + fullprop->u, 8 * sizeof(unsigned));; - ++info->num_properties; - } - break; + ++info->num_properties; + } + break; default: assert( 0 ); @@ -222,6 +223,23 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] || info->opcode_count[TGSI_OPCODE_KILP]); + /* extract simple properties */ + for (i = 0; i < info->num_properties; ++i) { + switch (info->properties[i].name) { + case TGSI_PROPERTY_FS_COORD_ORIGIN: + info->origin_lower_left = info->properties[i].data[0]; + break; + case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: + info->pixel_center_integer = info->properties[i].data[0]; + break; + case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: + info->color0_writes_all_cbufs = info->properties[i].data[0]; + break; + default: + ; + } + } + tgsi_parse_free (&parse); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index 53ab3d509dd..d6e593b3968 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -68,6 +68,9 @@ struct tgsi_shader_info boolean writes_edgeflag; /**< vertex shader outputs edgeflag */ boolean uses_kill; /**< KIL or KILP instruction used? */ boolean uses_instanceid; + boolean origin_lower_left; + boolean pixel_center_integer; + boolean color0_writes_all_cbufs; /** * Bitmask indicating which register files are accessed with diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 528f344a0f7..d8e46f07c88 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -26,8 +26,8 @@ /** * @file - * Blitter utility to facilitate acceleration of the clear, clear_render_target, clear_depth_stencil - * resource_copy_region functions. + * Blitter utility to facilitate acceleration of the clear, clear_render_target, + * clear_depth_stencil, and resource_copy_region functions. * * @author Marek Olšák */ @@ -197,8 +197,6 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) memset(&velem[0], 0, sizeof(velem[0]) * 2); for (i = 0; i < 2; i++) { velem[i].src_offset = i * 4 * sizeof(float); - velem[i].instance_divisor = 0; - velem[i].vertex_buffer_index = 0; velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; } ctx->velem_state = pipe->create_vertex_elements_state(pipe, 2, &velem[0]); @@ -288,26 +286,33 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx) unsigned i; /* restore the state objects which are always required to be saved */ - pipe->bind_blend_state(pipe, ctx->base.saved_blend_state); - pipe->bind_depth_stencil_alpha_state(pipe, ctx->base.saved_dsa_state); pipe->bind_rasterizer_state(pipe, ctx->base.saved_rs_state); - pipe->bind_fs_state(pipe, ctx->base.saved_fs); pipe->bind_vs_state(pipe, ctx->base.saved_vs); pipe->bind_vertex_elements_state(pipe, ctx->base.saved_velem_state); - ctx->base.saved_blend_state = INVALID_PTR; - ctx->base.saved_dsa_state = INVALID_PTR; ctx->base.saved_rs_state = INVALID_PTR; - ctx->base.saved_fs = INVALID_PTR; ctx->base.saved_vs = INVALID_PTR; ctx->base.saved_velem_state = INVALID_PTR; + /* restore the state objects which are required to be saved for clear/copy + */ + if (ctx->base.saved_blend_state != INVALID_PTR) { + pipe->bind_blend_state(pipe, ctx->base.saved_blend_state); + ctx->base.saved_blend_state = INVALID_PTR; + } + if (ctx->base.saved_dsa_state != INVALID_PTR) { + pipe->bind_depth_stencil_alpha_state(pipe, ctx->base.saved_dsa_state); + ctx->base.saved_dsa_state = INVALID_PTR; + } + if (ctx->base.saved_fs != INVALID_PTR) { + pipe->bind_fs_state(pipe, ctx->base.saved_fs); + ctx->base.saved_fs = INVALID_PTR; + } + pipe->set_stencil_ref(pipe, &ctx->base.saved_stencil_ref); pipe->set_viewport_state(pipe, &ctx->base.saved_viewport); pipe->set_clip_state(pipe, &ctx->base.saved_clip); - /* restore the state objects which are required to be saved before copy/fill - */ if (ctx->base.saved_fb_state.nr_cbufs != ~0) { pipe->set_framebuffer_state(pipe, &ctx->base.saved_fb_state); util_unreference_framebuffer_state(&ctx->base.saved_fb_state); @@ -724,14 +729,14 @@ boolean is_overlap(unsigned sx1, unsigned sx2, unsigned sy1, unsigned sy2, return sx1 < dx2 && sx2 > dx1 && sy1 < dy2 && sy2 > dy1; } -void util_blitter_copy_region(struct blitter_context *blitter, - struct pipe_resource *dst, - unsigned dstlevel, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, - unsigned srclevel, - const struct pipe_box *srcbox, - boolean ignore_stencil) +void util_blitter_copy_texture(struct blitter_context *blitter, + struct pipe_resource *dst, + unsigned dstlevel, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned srclevel, + const struct pipe_box *srcbox, + boolean ignore_stencil) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; struct pipe_context *pipe = ctx->base.pipe; diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index 41470d92bba..df6f023a638 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -126,12 +126,15 @@ struct pipe_context *util_blitter_get_pipe(struct blitter_context *blitter) } /* - * These CSOs must be saved before any of the following functions is called: + * These states must be saved before any of the following functions is called: * - blend state * - depth stencil alpha state * - rasterizer state * - vertex shader + * - any other shader??? (XXX) * - fragment shader + * - vertex buffers + * - vertex elements */ /** @@ -169,14 +172,14 @@ void util_blitter_clear_depth_custom(struct blitter_context *blitter, * - fragment sampler states * - fragment sampler textures */ -void util_blitter_copy_region(struct blitter_context *blitter, - struct pipe_resource *dst, - unsigned dstlevel, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, - unsigned srclevel, - const struct pipe_box *srcbox, - boolean ignore_stencil); +void util_blitter_copy_texture(struct blitter_context *blitter, + struct pipe_resource *dst, + unsigned dstlevel, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned srclevel, + const struct pipe_box *srcbox, + boolean ignore_stencil); /** * Clear a region of a (color) surface to a constant value. diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 004df439ff5..2d6193039a7 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -730,7 +730,7 @@ debug_dump_float_rgba_bmp(const char *filename, pixel.rgbRed = float_to_ubyte(ptr[x*4 + 0]); pixel.rgbGreen = float_to_ubyte(ptr[x*4 + 1]); pixel.rgbBlue = float_to_ubyte(ptr[x*4 + 2]); - pixel.rgbAlpha = 255; + pixel.rgbAlpha = float_to_ubyte(ptr[x*4 + 3]); os_stream_write(stream, &pixel, 4); } } diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c index 9cbdd0a5b99..34922ab18ab 100644 --- a/src/gallium/auxiliary/util/u_format.c +++ b/src/gallium/auxiliary/util/u_format.c @@ -390,3 +390,53 @@ util_format_translate(enum pipe_format dst_format, FREE(tmp_row); } } + +void util_format_compose_swizzles(const unsigned char swz1[4], + const unsigned char swz2[4], + unsigned char dst[4]) +{ + unsigned i; + + for (i = 0; i < 4; i++) { + dst[i] = swz2[i] <= UTIL_FORMAT_SWIZZLE_W ? + swz1[swz2[i]] : swz2[i]; + } +} + +void util_format_swizzle_4f(float *dst, const float *src, + const unsigned char swz[4]) +{ + unsigned i; + + for (i = 0; i < 4; i++) { + if (swz[i] <= UTIL_FORMAT_SWIZZLE_W) + dst[i] = src[swz[i]]; + else if (swz[i] == UTIL_FORMAT_SWIZZLE_0) + dst[i] = 0; + else if (swz[i] == UTIL_FORMAT_SWIZZLE_1) + dst[i] = 1; + } +} + +void util_format_unswizzle_4f(float *dst, const float *src, + const unsigned char swz[4]) +{ + unsigned i; + + for (i = 0; i < 4; i++) { + switch (swz[i]) { + case UTIL_FORMAT_SWIZZLE_X: + dst[0] = src[i]; + break; + case UTIL_FORMAT_SWIZZLE_Y: + dst[1] = src[i]; + break; + case UTIL_FORMAT_SWIZZLE_Z: + dst[2] = src[i]; + break; + case UTIL_FORMAT_SWIZZLE_W: + dst[3] = src[i]; + break; + } + } +} diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 347e2beb8dd..a3d2aae62c8 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -260,10 +260,10 @@ PIPE_FORMAT_R10G10B10X2_USCALED , plain, 1, 1, u10 , u10 , u10 , x2 , xyz1, r # A.k.a. D3DDECLTYPE_DEC3N PIPE_FORMAT_R10G10B10X2_SNORM , plain, 1, 1, sn10, sn10, sn10 , x2 , xyz1, rgb -PIPE_FORMAT_YV12 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv -PIPE_FORMAT_YV16 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv -PIPE_FORMAT_IYUV , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv -PIPE_FORMAT_NV12 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv -PIPE_FORMAT_NV21 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv -PIPE_FORMAT_IA44 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv -PIPE_FORMAT_AI44 , subsampled, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_YV12 , other, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_YV16 , other, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_IYUV , other, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_NV12 , other, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_NV21 , other, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_IA44 , other, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv +PIPE_FORMAT_AI44 , other, 1, 1, x8 , x8 , x8 , x8 , xyzw, yuv diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index bb3ed72e932..566fa79e781 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -815,6 +815,25 @@ util_format_translate(enum pipe_format dst_format, unsigned src_x, unsigned src_y, unsigned width, unsigned height); +/* + * Swizzle operations. + */ + +/* Compose two sets of swizzles. + * If V is a 4D vector and the function parameters represent functions that + * swizzle vector components, this holds: + * swz2(swz1(V)) = dst(V) + */ +void util_format_compose_swizzles(const unsigned char swz1[4], + const unsigned char swz2[4], + unsigned char dst[4]); + +void util_format_swizzle_4f(float *dst, const float *src, + const unsigned char swz[4]); + +void util_format_unswizzle_4f(float *dst, const float *src, + const unsigned char swz[4]); + #ifdef __cplusplus } // extern "C" { #endif diff --git a/src/gallium/auxiliary/util/u_format_s3tc.c b/src/gallium/auxiliary/util/u_format_s3tc.c index bb989c29d81..d8a7c0d453f 100644 --- a/src/gallium/auxiliary/util/u_format_s3tc.c +++ b/src/gallium/auxiliary/util/u_format_s3tc.c @@ -119,8 +119,15 @@ util_format_s3tc_init(void) library = util_dl_open(DXTN_LIBNAME); if (!library) { - debug_printf("couldn't open " DXTN_LIBNAME ", software DXTn " - "compression/decompression unavailable\n"); + if (getenv("force_s3tc_enable") && + !strcmp(getenv("force_s3tc_enable"), "true")) { + debug_printf("couldn't open " DXTN_LIBNAME ", enabling DXTn due to " + "force_s3tc_enable=true environment variable\n"); + util_format_s3tc_enabled = TRUE; + } else { + debug_printf("couldn't open " DXTN_LIBNAME ", software DXTn " + "compression/decompression unavailable\n"); + } return; } diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 0b5284428eb..46d9322932a 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -199,6 +199,16 @@ roundf(float x) #endif /* _MSC_VER */ +#ifdef PIPE_OS_ANDROID + +static INLINE +double log2(double d) +{ + return log(d) * (1.0 / M_LN2); +} + +#endif + @@ -409,7 +419,7 @@ unsigned ffs( unsigned u ) return i; } -#elif defined(__MINGW32__) +#elif defined(__MINGW32__) || defined(PIPE_OS_ANDROID) #define ffs __builtin_ffs #endif diff --git a/src/gallium/auxiliary/util/u_pstipple.c b/src/gallium/auxiliary/util/u_pstipple.c index f79a6938d1d..ac0df8c1a9c 100644 --- a/src/gallium/auxiliary/util/u_pstipple.c +++ b/src/gallium/auxiliary/util/u_pstipple.c @@ -52,6 +52,7 @@ #include "tgsi/tgsi_transform.h" #include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_scan.h" /** Approx number of new tokens for instructions in pstip_transform_inst() */ #define NUM_NEW_TOKENS 50 @@ -175,6 +176,7 @@ util_pstipple_create_sampler(struct pipe_context *pipe) */ struct pstip_transform_context { struct tgsi_transform_context base; + struct tgsi_shader_info info; uint tempsUsed; /**< bitmask */ int wincoordInput; int maxInput; @@ -183,12 +185,13 @@ struct pstip_transform_context { int texTemp; /**< temp registers */ int numImmed; boolean firstInstruction; + uint coordOrigin; }; /** * TGSI declaration transform callback. - * Look for a free sampler, a free input attrib, and two free temp regs. + * Track samplers used, temps used, inputs used. */ static void pstip_transform_decl(struct tgsi_transform_context *ctx, @@ -197,10 +200,11 @@ pstip_transform_decl(struct tgsi_transform_context *ctx, struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + /* XXX we can use tgsi_shader_info instead of some of this */ + if (decl->Declaration.File == TGSI_FILE_SAMPLER) { uint i; - for (i = decl->Range.First; - i <= decl->Range.Last; i++) { + for (i = decl->Range.First; i <= decl->Range.Last; i++) { pctx->samplersUsed |= 1 << i; } } @@ -211,8 +215,7 @@ pstip_transform_decl(struct tgsi_transform_context *ctx, } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { uint i; - for (i = decl->Range.First; - i <= decl->Range.Last; i++) { + for (i = decl->Range.First; i <= decl->Range.Last; i++) { pctx->tempsUsed |= (1 << i); } } @@ -243,8 +246,16 @@ free_bit(uint bitfield) /** * TGSI instruction transform callback. - * Replace writes to result.color w/ a temp reg. - * Upon END instruction, insert texture sampling code for antialiasing. + * Before the first instruction, insert our new code to sample the + * stipple texture (using the fragment coord register) then kill the + * fragment if the stipple texture bit is off. + * + * Insert: + * declare new registers + * MUL texTemp, INPUT[wincoord], 1/32; + * TEX texTemp, texTemp, sampler; + * KIL -texTemp; # if -texTemp < 0, KILL fragment + * [...original code...] */ static void pstip_transform_inst(struct tgsi_transform_context *ctx, @@ -261,7 +272,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, uint i; int wincoordInput; - /* find free sampler */ + /* find free texture sampler */ pctx->freeSampler = free_bit(pctx->samplersUsed); if (pctx->freeSampler >= PIPE_MAX_SAMPLERS) pctx->freeSampler = PIPE_MAX_SAMPLERS - 1; @@ -271,7 +282,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, else wincoordInput = pctx->wincoordInput; - /* find one free temp reg */ + /* find one free temp register */ for (i = 0; i < 32; i++) { if ((pctx->tempsUsed & (1 << i)) == 0) { /* found a free temp */ @@ -397,6 +408,7 @@ util_pstipple_create_fragment_shader(struct pipe_context *pipe, struct pipe_shader_state *new_fs; struct pstip_transform_context transform; const uint newLen = tgsi_num_tokens(fs->tokens) + NUM_NEW_TOKENS; + unsigned i; new_fs = MALLOC(sizeof(*new_fs)); if (!new_fs) @@ -408,22 +420,33 @@ util_pstipple_create_fragment_shader(struct pipe_context *pipe, return NULL; } + /* Setup shader transformation info/context. + */ memset(&transform, 0, sizeof(transform)); transform.wincoordInput = -1; transform.maxInput = -1; transform.texTemp = -1; transform.firstInstruction = TRUE; + transform.coordOrigin = TGSI_FS_COORD_ORIGIN_UPPER_LEFT; transform.base.transform_instruction = pstip_transform_inst; transform.base.transform_declaration = pstip_transform_decl; transform.base.transform_immediate = pstip_transform_immed; + tgsi_scan_shader(fs->tokens, &transform.info); + + /* find fragment coordinate origin property */ + for (i = 0; i < transform.info.num_properties; i++) { + if (transform.info.properties[i].name == TGSI_PROPERTY_FS_COORD_ORIGIN) + transform.coordOrigin = transform.info.properties[i].data[0]; + } + tgsi_transform_shader(fs->tokens, (struct tgsi_token *) new_fs->tokens, newLen, &transform.base); #if 0 /* DEBUG */ tgsi_dump(fs->tokens, 0); - tgsi_dump(pstip_fs.tokens, 0); + tgsi_dump(new_fs->tokens, 0); #endif assert(transform.freeSampler < PIPE_MAX_SAMPLERS); diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.c b/src/gallium/auxiliary/util/u_vbuf_mgr.c index 374fc336b83..d9b39e528bb 100644 --- a/src/gallium/auxiliary/util/u_vbuf_mgr.c +++ b/src/gallium/auxiliary/util/u_vbuf_mgr.c @@ -34,21 +34,6 @@ #include "translate/translate.h" #include "translate/translate_cache.h" -/* Hardware vertex fetcher limitations can be described by this structure. */ -struct u_vbuf_caps { - /* Vertex format CAPs. */ - /* TRUE if hardware supports it. */ - unsigned format_fixed32:1; /* PIPE_FORMAT_*32*_FIXED */ - unsigned format_float16:1; /* PIPE_FORMAT_*16*_FLOAT */ - unsigned format_float64:1; /* PIPE_FORMAT_*64*_FLOAT */ - unsigned format_norm32:1; /* PIPE_FORMAT_*32*NORM */ - unsigned format_scaled32:1; /* PIPE_FORMAT_*32*SCALED */ - - /* Whether vertex fetches don't have to be dword-aligned. */ - /* TRUE if hardware supports it. */ - unsigned fetch_dword_unaligned:1; -}; - struct u_vbuf_mgr_elements { unsigned count; struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS]; @@ -69,7 +54,6 @@ struct u_vbuf_mgr_elements { struct u_vbuf_mgr_priv { struct u_vbuf_mgr b; - struct u_vbuf_caps caps; struct pipe_context *pipe; struct translate_cache *translate_cache; @@ -79,6 +63,8 @@ struct u_vbuf_mgr_priv { void *saved_ve, *fallback_ve; boolean ve_binding_lock; + unsigned saved_buffer_offset[PIPE_MAX_ATTRIBS]; + boolean any_user_vbs; boolean incompatible_vb_layout; }; @@ -87,25 +73,25 @@ static void u_vbuf_mgr_init_format_caps(struct u_vbuf_mgr_priv *mgr) { struct pipe_screen *screen = mgr->pipe->screen; - mgr->caps.format_fixed32 = + mgr->b.caps.format_fixed32 = screen->is_format_supported(screen, PIPE_FORMAT_R32_FIXED, PIPE_BUFFER, 0, PIPE_BIND_VERTEX_BUFFER); - mgr->caps.format_float16 = + mgr->b.caps.format_float16 = screen->is_format_supported(screen, PIPE_FORMAT_R16_FLOAT, PIPE_BUFFER, 0, PIPE_BIND_VERTEX_BUFFER); - mgr->caps.format_float64 = + mgr->b.caps.format_float64 = screen->is_format_supported(screen, PIPE_FORMAT_R64_FLOAT, PIPE_BUFFER, 0, PIPE_BIND_VERTEX_BUFFER); - mgr->caps.format_norm32 = + mgr->b.caps.format_norm32 = screen->is_format_supported(screen, PIPE_FORMAT_R32_UNORM, PIPE_BUFFER, 0, PIPE_BIND_VERTEX_BUFFER) && screen->is_format_supported(screen, PIPE_FORMAT_R32_SNORM, PIPE_BUFFER, 0, PIPE_BIND_VERTEX_BUFFER); - mgr->caps.format_scaled32 = + mgr->b.caps.format_scaled32 = screen->is_format_supported(screen, PIPE_FORMAT_R32_USCALED, PIPE_BUFFER, 0, PIPE_BIND_VERTEX_BUFFER) && screen->is_format_supported(screen, PIPE_FORMAT_R32_SSCALED, PIPE_BUFFER, @@ -128,7 +114,7 @@ u_vbuf_mgr_create(struct pipe_context *pipe, upload_buffer_alignment, upload_buffer_bind); - mgr->caps.fetch_dword_unaligned = + mgr->b.caps.fetch_dword_unaligned = fetch_alignment == U_VERTEX_FETCH_BYTE_ALIGNED; u_vbuf_mgr_init_format_caps(mgr); @@ -182,7 +168,7 @@ u_vbuf_translate_begin(struct u_vbuf_mgr_priv *mgr, /* Check for support. */ if (mgr->ve->ve[i].src_format == mgr->ve->native_format[i] && - (mgr->caps.fetch_dword_unaligned || + (mgr->b.caps.fetch_dword_unaligned || (vb->buffer_offset % 4 == 0 && vb->stride % 4 == 0 && mgr->ve->ve[i].src_offset % 4 == 0))) { @@ -363,7 +349,7 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb, /* Choose a native format. * For now we don't care about the alignment, that's going to * be sorted out later. */ - if (!mgr->caps.format_fixed32) { + if (!mgr->b.caps.format_fixed32) { switch (format) { FORMAT_REPLACE(R32_FIXED, R32_FLOAT); FORMAT_REPLACE(R32G32_FIXED, R32G32_FLOAT); @@ -372,7 +358,7 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb, default:; } } - if (!mgr->caps.format_float16) { + if (!mgr->b.caps.format_float16) { switch (format) { FORMAT_REPLACE(R16_FLOAT, R32_FLOAT); FORMAT_REPLACE(R16G16_FLOAT, R32G32_FLOAT); @@ -381,7 +367,7 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb, default:; } } - if (!mgr->caps.format_float64) { + if (!mgr->b.caps.format_float64) { switch (format) { FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); @@ -390,7 +376,7 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb, default:; } } - if (!mgr->caps.format_norm32) { + if (!mgr->b.caps.format_norm32) { switch (format) { FORMAT_REPLACE(R32_UNORM, R32_FLOAT); FORMAT_REPLACE(R32G32_UNORM, R32G32_FLOAT); @@ -403,7 +389,7 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb, default:; } } - if (!mgr->caps.format_scaled32) { + if (!mgr->b.caps.format_scaled32) { switch (format) { FORMAT_REPLACE(R32_USCALED, R32_FLOAT); FORMAT_REPLACE(R32G32_USCALED, R32G32_FLOAT); @@ -425,11 +411,11 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb, ve->incompatible_layout = ve->incompatible_layout || ve->ve[i].src_format != ve->native_format[i] || - (!mgr->caps.fetch_dword_unaligned && ve->ve[i].src_offset % 4 != 0); + (!mgr->b.caps.fetch_dword_unaligned && ve->ve[i].src_offset % 4 != 0); } /* Align the formats to the size of DWORD if needed. */ - if (!mgr->caps.fetch_dword_unaligned) { + if (!mgr->b.caps.fetch_dword_unaligned) { for (i = 0; i < count; i++) { ve->native_format_size[i] = align(ve->native_format_size[i], 4); } @@ -470,7 +456,7 @@ void u_vbuf_mgr_set_vertex_buffers(struct u_vbuf_mgr *mgrb, mgr->any_user_vbs = FALSE; mgr->incompatible_vb_layout = FALSE; - if (!mgr->caps.fetch_dword_unaligned) { + if (!mgr->b.caps.fetch_dword_unaligned) { /* Check if the strides and offsets are aligned to the size of DWORD. */ for (i = 0; i < count; i++) { if (bufs[i].buffer) { @@ -488,6 +474,7 @@ void u_vbuf_mgr_set_vertex_buffers(struct u_vbuf_mgr *mgrb, pipe_resource_reference(&mgr->b.vertex_buffer[i].buffer, vb->buffer); pipe_resource_reference(&mgr->b.real_vertex_buffer[i], NULL); + mgr->saved_buffer_offset[i] = vb->buffer_offset; if (!vb->buffer) { continue; @@ -647,6 +634,13 @@ u_vbuf_mgr_draw_begin(struct u_vbuf_mgr *mgrb, void u_vbuf_mgr_draw_end(struct u_vbuf_mgr *mgrb) { struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb; + unsigned i; + + /* buffer offsets were modified in u_vbuf_upload_buffers */ + if (mgr->any_user_vbs) { + for (i = 0; i < mgr->b.nr_vertex_buffers; i++) + mgr->b.vertex_buffer[i].buffer_offset = mgr->saved_buffer_offset[i]; + } if (mgr->fallback_ve) { u_vbuf_translate_end(mgr); diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.h b/src/gallium/auxiliary/util/u_vbuf_mgr.h index 4e6372435d8..c653ca4346d 100644 --- a/src/gallium/auxiliary/util/u_vbuf_mgr.h +++ b/src/gallium/auxiliary/util/u_vbuf_mgr.h @@ -37,6 +37,21 @@ #include "pipe/p_state.h" #include "util/u_transfer.h" +/* Hardware vertex fetcher limitations can be described by this structure. */ +struct u_vbuf_caps { + /* Vertex format CAPs. */ + /* TRUE if hardware supports it. */ + unsigned format_fixed32:1; /* PIPE_FORMAT_*32*_FIXED */ + unsigned format_float16:1; /* PIPE_FORMAT_*16*_FLOAT */ + unsigned format_float64:1; /* PIPE_FORMAT_*64*_FLOAT */ + unsigned format_norm32:1; /* PIPE_FORMAT_*32*NORM */ + unsigned format_scaled32:1; /* PIPE_FORMAT_*32*SCALED */ + + /* Whether vertex fetches don't have to be dword-aligned. */ + /* TRUE if hardware supports it. */ + unsigned fetch_dword_unaligned:1; +}; + /* The manager. * This structure should also be used to access vertex buffers * from a driver. */ @@ -63,6 +78,8 @@ struct u_vbuf_mgr { * - u_upload_buffer * - u_upload_flush */ struct u_upload_mgr *uploader; + + struct u_vbuf_caps caps; }; struct u_vbuf_resource { diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index 3bd4af2e3e0..c73f9769446 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -231,6 +231,8 @@ init_pipe_state(struct vl_compositor *c) struct pipe_rasterizer_state rast; struct pipe_sampler_state sampler; struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state dsa; + unsigned i; assert(c); @@ -289,6 +291,24 @@ init_pipe_state(struct vl_compositor *c) c->rast = c->pipe->create_rasterizer_state(c->pipe, &rast); + memset(&dsa, 0, sizeof dsa); + dsa.depth.enabled = 0; + dsa.depth.writemask = 0; + dsa.depth.func = PIPE_FUNC_ALWAYS; + for (i = 0; i < 2; ++i) { + dsa.stencil[i].enabled = 0; + dsa.stencil[i].func = PIPE_FUNC_ALWAYS; + dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].valuemask = 0; + dsa.stencil[i].writemask = 0; + } + dsa.alpha.enabled = 0; + dsa.alpha.func = PIPE_FUNC_ALWAYS; + dsa.alpha.ref_value = 0; + c->dsa = c->pipe->create_depth_stencil_alpha_state(c->pipe, &dsa); + c->pipe->bind_depth_stencil_alpha_state(c->pipe, c->dsa); return true; } @@ -296,6 +316,11 @@ static void cleanup_pipe_state(struct vl_compositor *c) { assert(c); + /* Asserted in softpipe_delete_fs_state() for some reason */ + c->pipe->bind_vs_state(c->pipe, NULL); + c->pipe->bind_fs_state(c->pipe, NULL); + + c->pipe->delete_depth_stencil_alpha_state(c->pipe, c->dsa); c->pipe->delete_sampler_state(c->pipe, c->sampler_linear); c->pipe->delete_sampler_state(c->pipe, c->sampler_nearest); c->pipe->delete_blend_state(c->pipe, c->blend); @@ -648,7 +673,6 @@ vl_compositor_set_rgba_layer(struct vl_compositor *c, void vl_compositor_render(struct vl_compositor *c, - enum pipe_mpeg12_picture_type picture_type, struct pipe_surface *dst_surface, struct pipe_video_rect *dst_area, struct pipe_video_rect *dst_clip) diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h index 87ad39be1be..207510092a0 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.h +++ b/src/gallium/auxiliary/vl/vl_compositor.h @@ -68,6 +68,7 @@ struct vl_compositor void *sampler_nearest; void *blend; void *rast; + void *dsa; void *vertex_elems_state; void *vs; @@ -155,7 +156,6 @@ vl_compositor_set_rgba_layer(struct vl_compositor *compositor, */ void vl_compositor_render(struct vl_compositor *compositor, - enum pipe_mpeg12_picture_type picture_type, struct pipe_surface *dst_surface, struct pipe_video_rect *dst_area, struct pipe_video_rect *dst_clip); diff --git a/src/gallium/auxiliary/vl/vl_decoder.c b/src/gallium/auxiliary/vl/vl_decoder.c index fac03359a0f..b23827d300a 100644 --- a/src/gallium/auxiliary/vl/vl_decoder.c +++ b/src/gallium/auxiliary/vl/vl_decoder.c @@ -44,6 +44,19 @@ vl_profile_supported(struct pipe_screen *screen, enum pipe_video_profile profile } } +unsigned +vl_num_buffers_desired(struct pipe_screen *screen, enum pipe_video_profile profile) +{ + assert(screen); + switch (u_reduce_video_profile(profile)) { + case PIPE_VIDEO_CODEC_MPEG12: + return 4; + + default: + return 1; + } +} + struct pipe_video_decoder * vl_create_decoder(struct pipe_context *pipe, enum pipe_video_profile profile, diff --git a/src/gallium/auxiliary/vl/vl_decoder.h b/src/gallium/auxiliary/vl/vl_decoder.h index 0e9280dbfa2..fed529c9bc7 100644 --- a/src/gallium/auxiliary/vl/vl_decoder.h +++ b/src/gallium/auxiliary/vl/vl_decoder.h @@ -38,6 +38,12 @@ bool vl_profile_supported(struct pipe_screen *screen, enum pipe_video_profile profile); /** + * the desired number of buffers for optimal operation + */ +unsigned +vl_num_buffers_desired(struct pipe_screen *screen, enum pipe_video_profile profile); + +/** * standard implementation of pipe->create_video_decoder */ struct pipe_video_decoder * diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c index 645d06a0925..ad786145392 100644 --- a/src/gallium/auxiliary/vl/vl_idct.c +++ b/src/gallium/auxiliary/vl/vl_idct.c @@ -143,7 +143,7 @@ static void * create_mismatch_vert_shader(struct vl_idct *idct) { struct ureg_program *shader; - struct ureg_src vrect, vpos; + struct ureg_src vpos; struct ureg_src scale; struct ureg_dst t_tex; struct ureg_dst o_vpos, o_addr[2]; @@ -152,7 +152,6 @@ create_mismatch_vert_shader(struct vl_idct *idct) if (!shader) return NULL; - vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); t_tex = ureg_DECL_temporary(shader); diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c index bd05205b52d..0b3723c9792 100644 --- a/src/gallium/auxiliary/vl/vl_mc.c +++ b/src/gallium/auxiliary/vl/vl_mc.c @@ -103,16 +103,15 @@ create_ref_vert_shader(struct vl_mc *r) { struct ureg_program *shader; struct ureg_src mv_scale; - struct ureg_src vrect, vmv[2]; + struct ureg_src vmv[2]; struct ureg_dst t_vpos; - struct ureg_dst o_vpos, o_vmv[2]; + struct ureg_dst o_vmv[2]; unsigned i; shader = ureg_create(TGSI_PROCESSOR_VERTEX); if (!shader) return NULL; - vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vmv[0] = ureg_DECL_vs_input(shader, VS_I_MV_TOP); vmv[1] = ureg_DECL_vs_input(shader, VS_I_MV_BOTTOM); @@ -121,7 +120,6 @@ create_ref_vert_shader(struct vl_mc *r) (float)MACROBLOCK_HEIGHT / r->buffer_height) ); - o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); o_vmv[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP); o_vmv[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM); diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c index 9dd032e911d..db05b151f95 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c @@ -1,6 +1,7 @@ /************************************************************************** * - * Copyright 2011 Christian König. + * Copyright 2011 Maarten Lankhorst + * Copyright 2011 Christian König * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -25,1813 +26,967 @@ * **************************************************************************/ -/** - * This file is based uppon slice_xvmc.c and vlc.h from the xine project, - * which in turn is based on mpeg2dec. The following is the original copyright: - * - * Copyright (C) 2000-2002 Michel Lespinasse <[email protected]> - * Copyright (C) 1999-2000 Aaron Holtzman <[email protected]> - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <stdint.h> - -#include <pipe/p_compiler.h> -#include <pipe/p_video_state.h> +#include <pipe/p_video_decoder.h> +#include <util/u_memory.h> #include "vl_vlc.h" #include "vl_mpeg12_bitstream.h" -/* take num bits from the high part of bit_buf and zero extend them */ -#define UBITS(buf,num) (((uint32_t)(buf)) >> (32 - (num))) - -/* take num bits from the high part of bit_buf and sign extend them */ -#define SBITS(buf,num) (((int32_t)(buf)) >> (32 - (num))) - -/* macroblock modes */ -#define MACROBLOCK_INTRA 1 -#define MACROBLOCK_PATTERN 2 -#define MACROBLOCK_MOTION_BACKWARD 4 -#define MACROBLOCK_MOTION_FORWARD 8 -#define MACROBLOCK_QUANT 16 - -/* motion_type */ -#define MOTION_TYPE_MASK (3*64) -#define MOTION_TYPE_BASE 64 -#define MC_FIELD (1*64) -#define MC_FRAME (2*64) -#define MC_16X8 (2*64) -#define MC_DMV (3*64) - -/* picture structure */ -#define TOP_FIELD 1 -#define BOTTOM_FIELD 2 -#define FRAME_PICTURE 3 - -/* picture coding type (mpeg2 header) */ -#define I_TYPE 1 -#define P_TYPE 2 -#define B_TYPE 3 -#define D_TYPE 4 - -typedef struct { - uint8_t modes; - uint8_t len; -} MBtab; - -typedef struct { - uint8_t delta; - uint8_t len; -} MVtab; - -typedef struct { - int8_t dmv; - uint8_t len; -} DMVtab; - -typedef struct { - uint8_t cbp; - uint8_t len; -} CBPtab; - -typedef struct { - uint8_t size; - uint8_t len; -} DCtab; - -typedef struct { - uint8_t run; - uint8_t level; - uint8_t len; -} DCTtab; - -typedef struct { - uint8_t mba; - uint8_t len; -} MBAtab; - -#define INTRA MACROBLOCK_INTRA -#define QUANT MACROBLOCK_QUANT -#define MC MACROBLOCK_MOTION_FORWARD -#define CODED MACROBLOCK_PATTERN -#define FWD MACROBLOCK_MOTION_FORWARD -#define BWD MACROBLOCK_MOTION_BACKWARD -#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD - -static const MBtab MB_I [] = { - {INTRA|QUANT, 2}, {INTRA, 1} -}; - -static const MBtab MB_P [] = { - {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA, 5}, - {MC, 3}, {MC, 3}, {MC, 3}, {MC, 3}, - {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, - {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1} -}; - -static const MBtab MB_B [] = { - {0, 0}, {INTRA|QUANT, 6}, - {BWD|CODED|QUANT, 6}, {FWD|CODED|QUANT, 6}, - {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5}, - {INTRA, 5}, {INTRA, 5}, - {FWD, 4}, {FWD, 4}, {FWD, 4}, {FWD, 4}, - {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, - {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, - {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, - {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, - {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2} -}; - -#undef INTRA -#undef QUANT -#undef MC -#undef CODED -#undef FWD -#undef BWD -#undef INTER - -static const MVtab MV_4 [] = { - { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2} -}; - -static const MVtab MV_10 [] = { - { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, - { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10}, - {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9}, - { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, - { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, - { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7} -}; - -static const DMVtab DMV_2 [] = { - { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2} -}; - -static const CBPtab CBP_7 [] = { - {0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7}, - {0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7}, - {0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6}, - {0x24, 6}, {0x24, 6}, {0x18, 6}, {0x18, 6}, - {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, - {0x02, 5}, {0x02, 5}, {0x02, 5}, {0x02, 5}, - {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, - {0x01, 5}, {0x01, 5}, {0x01, 5}, {0x01, 5}, - {0x38, 5}, {0x38, 5}, {0x38, 5}, {0x38, 5}, - {0x34, 5}, {0x34, 5}, {0x34, 5}, {0x34, 5}, - {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, - {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, - {0x28, 5}, {0x28, 5}, {0x28, 5}, {0x28, 5}, - {0x14, 5}, {0x14, 5}, {0x14, 5}, {0x14, 5}, - {0x30, 5}, {0x30, 5}, {0x30, 5}, {0x30, 5}, - {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, - {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, - {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, - {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, - {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, - {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, - {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, - {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, - {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3} -}; - -static const CBPtab CBP_9 [] = { - {0, 0}, {0x00, 9}, {0x27, 9}, {0x1b, 9}, - {0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9}, - {0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8}, - {0x2e, 8}, {0x2e, 8}, {0x1e, 8}, {0x1e, 8}, - {0x39, 8}, {0x39, 8}, {0x35, 8}, {0x35, 8}, - {0x2d, 8}, {0x2d, 8}, {0x1d, 8}, {0x1d, 8}, - {0x26, 8}, {0x26, 8}, {0x1a, 8}, {0x1a, 8}, - {0x25, 8}, {0x25, 8}, {0x19, 8}, {0x19, 8}, - {0x2b, 8}, {0x2b, 8}, {0x17, 8}, {0x17, 8}, - {0x33, 8}, {0x33, 8}, {0x0f, 8}, {0x0f, 8}, - {0x2a, 8}, {0x2a, 8}, {0x16, 8}, {0x16, 8}, - {0x32, 8}, {0x32, 8}, {0x0e, 8}, {0x0e, 8}, - {0x29, 8}, {0x29, 8}, {0x15, 8}, {0x15, 8}, - {0x31, 8}, {0x31, 8}, {0x0d, 8}, {0x0d, 8}, - {0x23, 8}, {0x23, 8}, {0x13, 8}, {0x13, 8}, - {0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8} +enum { + dct_End_of_Block = 0xFF, + dct_Escape = 0xFE, + dct_DC = 0xFD, + dct_AC = 0xFC }; -static const DCtab DC_lum_5 [] = { - {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, - {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, - {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, - {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5} +struct dct_coeff +{ + uint8_t length; + uint8_t run; + int16_t level; }; -static const DCtab DC_chrom_5 [] = { - {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, - {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, - {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, - {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5} +struct dct_coeff_compressed +{ + uint32_t bitcode; + struct dct_coeff coeff; }; -static const DCtab DC_long [] = { - {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, - {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, - {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6}, - {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9} +/* coding table as found in the spec annex B.5 table B-1 */ +static const struct vl_vlc_compressed macroblock_address_increment[] = { + { 0x8000, { 1, 1 } }, + { 0x6000, { 3, 2 } }, + { 0x4000, { 3, 3 } }, + { 0x3000, { 4, 4 } }, + { 0x2000, { 4, 5 } }, + { 0x1800, { 5, 6 } }, + { 0x1000, { 5, 7 } }, + { 0x0e00, { 7, 8 } }, + { 0x0c00, { 7, 9 } }, + { 0x0b00, { 8, 10 } }, + { 0x0a00, { 8, 11 } }, + { 0x0900, { 8, 12 } }, + { 0x0800, { 8, 13 } }, + { 0x0700, { 8, 14 } }, + { 0x0600, { 8, 15 } }, + { 0x05c0, { 10, 16 } }, + { 0x0580, { 10, 17 } }, + { 0x0540, { 10, 18 } }, + { 0x0500, { 10, 19 } }, + { 0x04c0, { 10, 20 } }, + { 0x0480, { 10, 21 } }, + { 0x0460, { 11, 22 } }, + { 0x0440, { 11, 23 } }, + { 0x0420, { 11, 24 } }, + { 0x0400, { 11, 25 } }, + { 0x03e0, { 11, 26 } }, + { 0x03c0, { 11, 27 } }, + { 0x03a0, { 11, 28 } }, + { 0x0380, { 11, 29 } }, + { 0x0360, { 11, 30 } }, + { 0x0340, { 11, 31 } }, + { 0x0320, { 11, 32 } }, + { 0x0300, { 11, 33 } } }; -static const DCTtab DCT_16 [] = { - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - { 2,18, 0}, { 2,17, 0}, { 2,16, 0}, { 2,15, 0}, - { 7, 3, 0}, { 17, 2, 0}, { 16, 2, 0}, { 15, 2, 0}, - { 14, 2, 0}, { 13, 2, 0}, { 12, 2, 0}, { 32, 1, 0}, - { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0} -}; +#define Q PIPE_MPEG12_MB_TYPE_QUANT +#define F PIPE_MPEG12_MB_TYPE_MOTION_FORWARD +#define B PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD +#define P PIPE_MPEG12_MB_TYPE_PATTERN +#define I PIPE_MPEG12_MB_TYPE_INTRA -static const DCTtab DCT_15 [] = { - { 1,40,15}, { 1,39,15}, { 1,38,15}, { 1,37,15}, - { 1,36,15}, { 1,35,15}, { 1,34,15}, { 1,33,15}, - { 1,32,15}, { 2,14,15}, { 2,13,15}, { 2,12,15}, - { 2,11,15}, { 2,10,15}, { 2, 9,15}, { 2, 8,15}, - { 1,31,14}, { 1,31,14}, { 1,30,14}, { 1,30,14}, - { 1,29,14}, { 1,29,14}, { 1,28,14}, { 1,28,14}, - { 1,27,14}, { 1,27,14}, { 1,26,14}, { 1,26,14}, - { 1,25,14}, { 1,25,14}, { 1,24,14}, { 1,24,14}, - { 1,23,14}, { 1,23,14}, { 1,22,14}, { 1,22,14}, - { 1,21,14}, { 1,21,14}, { 1,20,14}, { 1,20,14}, - { 1,19,14}, { 1,19,14}, { 1,18,14}, { 1,18,14}, - { 1,17,14}, { 1,17,14}, { 1,16,14}, { 1,16,14} +/* coding table as found in the spec annex B.5 table B-2 */ +static const struct vl_vlc_compressed macroblock_type_i[] = { + { 0x8000, { 1, I } }, + { 0x4000, { 2, Q|I } } }; -static const DCTtab DCT_13 [] = { - { 11, 2,13}, { 10, 2,13}, { 6, 3,13}, { 4, 4,13}, - { 3, 5,13}, { 2, 7,13}, { 2, 6,13}, { 1,15,13}, - { 1,14,13}, { 1,13,13}, { 1,12,13}, { 27, 1,13}, - { 26, 1,13}, { 25, 1,13}, { 24, 1,13}, { 23, 1,13}, - { 1,11,12}, { 1,11,12}, { 9, 2,12}, { 9, 2,12}, - { 5, 3,12}, { 5, 3,12}, { 1,10,12}, { 1,10,12}, - { 3, 4,12}, { 3, 4,12}, { 8, 2,12}, { 8, 2,12}, - { 22, 1,12}, { 22, 1,12}, { 21, 1,12}, { 21, 1,12}, - { 1, 9,12}, { 1, 9,12}, { 20, 1,12}, { 20, 1,12}, - { 19, 1,12}, { 19, 1,12}, { 2, 5,12}, { 2, 5,12}, - { 4, 3,12}, { 4, 3,12}, { 1, 8,12}, { 1, 8,12}, - { 7, 2,12}, { 7, 2,12}, { 18, 1,12}, { 18, 1,12} +/* coding table as found in the spec annex B.5 table B-3 */ +static const struct vl_vlc_compressed macroblock_type_p[] = { + { 0x8000, { 1, F|P } }, + { 0x4000, { 2, P } }, + { 0x2000, { 3, F } }, + { 0x1800, { 5, I } }, + { 0x1000, { 5, Q|F|P } }, + { 0x0800, { 5, Q|P } }, + { 0x0400, { 6, Q|I } } }; -static const DCTtab DCT_B14_10 [] = { - { 17, 1,10}, { 6, 2,10}, { 1, 7,10}, { 3, 3,10}, - { 2, 4,10}, { 16, 1,10}, { 15, 1,10}, { 5, 2,10} +/* coding table as found in the spec annex B.5 table B-4 */ +static const struct vl_vlc_compressed macroblock_type_b[] = { + { 0x8000, { 2, F|B } }, + { 0xC000, { 2, F|B|P } }, + { 0x4000, { 3, B } }, + { 0x6000, { 3, B|P } }, + { 0x2000, { 4, F } }, + { 0x3000, { 4, F|P } }, + { 0x1800, { 5, I } }, + { 0x1000, { 5, Q|F|B|P } }, + { 0x0C00, { 6, Q|F|P } }, + { 0x0800, { 6, Q|B|P } }, + { 0x0400, { 6, Q|I } } }; -static const DCTtab DCT_B14_8 [] = { - { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, - { 3, 2, 7}, { 3, 2, 7}, { 10, 1, 7}, { 10, 1, 7}, - { 1, 4, 7}, { 1, 4, 7}, { 9, 1, 7}, { 9, 1, 7}, - { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, - { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, - { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, - { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, - { 14, 1, 8}, { 1, 6, 8}, { 13, 1, 8}, { 12, 1, 8}, - { 4, 2, 8}, { 2, 3, 8}, { 1, 5, 8}, { 11, 1, 8} +#undef Q +#undef F +#undef B +#undef P +#undef I + +/* coding table as found in the spec annex B.5 table B-9 */ +static const struct vl_vlc_compressed coded_block_pattern[] = { + { 0xE000, { 3, 60 } }, + { 0xD000, { 4, 4 } }, + { 0xC000, { 4, 8 } }, + { 0xB000, { 4, 16 } }, + { 0xA000, { 4, 32 } }, + { 0x9800, { 5, 12 } }, + { 0x9000, { 5, 48 } }, + { 0x8800, { 5, 20 } }, + { 0x8000, { 5, 40 } }, + { 0x7800, { 5, 28 } }, + { 0x7000, { 5, 44 } }, + { 0x6800, { 5, 52 } }, + { 0x6000, { 5, 56 } }, + { 0x5800, { 5, 1 } }, + { 0x5000, { 5, 61 } }, + { 0x4800, { 5, 2 } }, + { 0x4000, { 5, 62 } }, + { 0x3C00, { 6, 24 } }, + { 0x3800, { 6, 36 } }, + { 0x3400, { 6, 3 } }, + { 0x3000, { 6, 63 } }, + { 0x2E00, { 7, 5 } }, + { 0x2C00, { 7, 9 } }, + { 0x2A00, { 7, 17 } }, + { 0x2800, { 7, 33 } }, + { 0x2600, { 7, 6 } }, + { 0x2400, { 7, 10 } }, + { 0x2200, { 7, 18 } }, + { 0x2000, { 7, 34 } }, + { 0x1F00, { 8, 7 } }, + { 0x1E00, { 8, 11 } }, + { 0x1D00, { 8, 19 } }, + { 0x1C00, { 8, 35 } }, + { 0x1B00, { 8, 13 } }, + { 0x1A00, { 8, 49 } }, + { 0x1900, { 8, 21 } }, + { 0x1800, { 8, 41 } }, + { 0x1700, { 8, 14 } }, + { 0x1600, { 8, 50 } }, + { 0x1500, { 8, 22 } }, + { 0x1400, { 8, 42 } }, + { 0x1300, { 8, 15 } }, + { 0x1200, { 8, 51 } }, + { 0x1100, { 8, 23 } }, + { 0x1000, { 8, 43 } }, + { 0x0F00, { 8, 25 } }, + { 0x0E00, { 8, 37 } }, + { 0x0D00, { 8, 26 } }, + { 0x0C00, { 8, 38 } }, + { 0x0B00, { 8, 29 } }, + { 0x0A00, { 8, 45 } }, + { 0x0900, { 8, 53 } }, + { 0x0800, { 8, 57 } }, + { 0x0700, { 8, 30 } }, + { 0x0600, { 8, 46 } }, + { 0x0500, { 8, 54 } }, + { 0x0400, { 8, 58 } }, + { 0x0380, { 9, 31 } }, + { 0x0300, { 9, 47 } }, + { 0x0280, { 9, 55 } }, + { 0x0200, { 9, 59 } }, + { 0x0180, { 9, 27 } }, + { 0x0100, { 9, 39 } }, + { 0x0080, { 9, 0 } } }; -static const DCTtab DCT_B14AC_5 [] = { - { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, - { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, - {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2} +/* coding table as found in the spec annex B.5 table B-10 */ +static const struct vl_vlc_compressed motion_code[] = { + { 0x0320, { 11, -16 } }, + { 0x0360, { 11, -15 } }, + { 0x03a0, { 11, -14 } }, + { 0x03e0, { 11, -13 } }, + { 0x0420, { 11, -12 } }, + { 0x0460, { 11, -11 } }, + { 0x04c0, { 10, -10 } }, + { 0x0540, { 10, -9 } }, + { 0x05c0, { 10, -8 } }, + { 0x0700, { 8, -7 } }, + { 0x0900, { 8, -6 } }, + { 0x0b00, { 8, -5 } }, + { 0x0e00, { 7, -4 } }, + { 0x1800, { 5, -3 } }, + { 0x3000, { 4, -2 } }, + { 0x6000, { 3, -1 } }, + { 0x8000, { 1, 0 } }, + { 0x4000, { 3, 1 } }, + { 0x2000, { 4, 2 } }, + { 0x1000, { 5, 3 } }, + { 0x0c00, { 7, 4 } }, + { 0x0a00, { 8, 5 } }, + { 0x0800, { 8, 6 } }, + { 0x0600, { 8, 7 } }, + { 0x0580, { 10, 8 } }, + { 0x0500, { 10, 9 } }, + { 0x0480, { 10, 10 } }, + { 0x0440, { 11, 11 } }, + { 0x0400, { 11, 12 } }, + { 0x03c0, { 11, 13 } }, + { 0x0380, { 11, 14 } }, + { 0x0340, { 11, 15 } }, + { 0x0300, { 11, 16 } } }; -static const DCTtab DCT_B14DC_5 [] = { - { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, - { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1} +/* coding table as found in the spec annex B.5 table B-11 */ +static const struct vl_vlc_compressed dmvector[] = { + { 0x0000, { 1, 0 } }, + { 0x8000, { 2, 1 } }, + { 0xc000, { 2, -1 } } }; -static const DCTtab DCT_B15_10 [] = { - { 6, 2, 9}, { 6, 2, 9}, { 15, 1, 9}, { 15, 1, 9}, - { 3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9} +/* coding table as found in the spec annex B.5 table B-12 */ +static const struct vl_vlc_compressed dct_dc_size_luminance[] = { + { 0x8000, { 3, 0 } }, + { 0x0000, { 2, 1 } }, + { 0x4000, { 2, 2 } }, + { 0xA000, { 3, 3 } }, + { 0xC000, { 3, 4 } }, + { 0xE000, { 4, 5 } }, + { 0xF000, { 5, 6 } }, + { 0xF800, { 6, 7 } }, + { 0xFC00, { 7, 8 } }, + { 0xFE00, { 8, 9 } }, + { 0xFF00, { 9, 10 } }, + { 0xFF80, { 9, 11 } } }; -static const DCTtab DCT_B15_8 [] = { - { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, - { 8, 1, 7}, { 8, 1, 7}, { 9, 1, 7}, { 9, 1, 7}, - { 7, 1, 7}, { 7, 1, 7}, { 3, 2, 7}, { 3, 2, 7}, - { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, - { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, - { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, - { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, - { 2, 5, 8}, { 12, 1, 8}, { 1,11, 8}, { 1,10, 8}, - { 14, 1, 8}, { 13, 1, 8}, { 4, 2, 8}, { 2, 4, 8}, - { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, - { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, - { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, - { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, - { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, - { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, - { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, - { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, - { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, - { 10, 1, 7}, { 10, 1, 7}, { 2, 3, 7}, { 2, 3, 7}, - { 11, 1, 7}, { 11, 1, 7}, { 1, 8, 7}, { 1, 8, 7}, - { 1, 9, 7}, { 1, 9, 7}, { 1,12, 8}, { 1,13, 8}, - { 3, 3, 8}, { 5, 2, 8}, { 1,14, 8}, { 1,15, 8} +/* coding table as found in the spec annex B.5 table B-13 */ +static const struct vl_vlc_compressed dct_dc_size_chrominance[] = { + { 0x0000, { 2, 0 } }, + { 0x4000, { 2, 1 } }, + { 0x8000, { 2, 2 } }, + { 0xC000, { 3, 3 } }, + { 0xE000, { 4, 4 } }, + { 0xF000, { 5, 5 } }, + { 0xF800, { 6, 6 } }, + { 0xFC00, { 7, 7 } }, + { 0xFE00, { 8, 8 } }, + { 0xFF00, { 9, 9 } }, + { 0xFF80, { 10, 10 } }, + { 0xFFC0, { 10, 11 } } }; -static const MBAtab MBA_5 [] = { - {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4}, - {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, - {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, - {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1} +/* coding table as found in the spec annex B.5 table B-14 */ +static const struct dct_coeff_compressed dct_coeff_tbl_zero[] = { + { 0x8000, { 2, dct_End_of_Block, 0 } }, + { 0x8000, { 1, dct_DC, 1 } }, + { 0xC000, { 2, dct_AC, 1 } }, + { 0x6000, { 3, 1, 1 } }, + { 0x4000, { 4, 0, 2 } }, + { 0x5000, { 4, 2, 1 } }, + { 0x2800, { 5, 0, 3 } }, + { 0x3800, { 5, 3, 1 } }, + { 0x3000, { 5, 4, 1 } }, + { 0x1800, { 6, 1, 2 } }, + { 0x1C00, { 6, 5, 1 } }, + { 0x1400, { 6, 6, 1 } }, + { 0x1000, { 6, 7, 1 } }, + { 0x0C00, { 7, 0, 4 } }, + { 0x0800, { 7, 2, 2 } }, + { 0x0E00, { 7, 8, 1 } }, + { 0x0A00, { 7, 9, 1 } }, + { 0x0400, { 6, dct_Escape, 0 } }, + { 0x2600, { 8, 0, 5 } }, + { 0x2100, { 8, 0, 6 } }, + { 0x2500, { 8, 1, 3 } }, + { 0x2400, { 8, 3, 2 } }, + { 0x2700, { 8, 10, 1 } }, + { 0x2300, { 8, 11, 1 } }, + { 0x2200, { 8, 12, 1 } }, + { 0x2000, { 8, 13, 1 } }, + { 0x0280, { 10, 0, 7 } }, + { 0x0300, { 10, 1, 4 } }, + { 0x02C0, { 10, 2, 3 } }, + { 0x03C0, { 10, 4, 2 } }, + { 0x0240, { 10, 5, 2 } }, + { 0x0380, { 10, 14, 1 } }, + { 0x0340, { 10, 15, 1 } }, + { 0x0200, { 10, 16, 1 } }, + { 0x01D0, { 12, 0, 8 } }, + { 0x0180, { 12, 0, 9 } }, + { 0x0130, { 12, 0, 10 } }, + { 0x0100, { 12, 0, 11 } }, + { 0x01B0, { 12, 1, 5 } }, + { 0x0140, { 12, 2, 4 } }, + { 0x01C0, { 12, 3, 3 } }, + { 0x0120, { 12, 4, 3 } }, + { 0x01E0, { 12, 6, 2 } }, + { 0x0150, { 12, 7, 2 } }, + { 0x0110, { 12, 8, 2 } }, + { 0x01F0, { 12, 17, 1 } }, + { 0x01A0, { 12, 18, 1 } }, + { 0x0190, { 12, 19, 1 } }, + { 0x0170, { 12, 20, 1 } }, + { 0x0160, { 12, 21, 1 } }, + { 0x00D0, { 13, 0, 12 } }, + { 0x00C8, { 13, 0, 13 } }, + { 0x00C0, { 13, 0, 14 } }, + { 0x00B8, { 13, 0, 15 } }, + { 0x00B0, { 13, 1, 6 } }, + { 0x00A8, { 13, 1, 7 } }, + { 0x00A0, { 13, 2, 5 } }, + { 0x0098, { 13, 3, 4 } }, + { 0x0090, { 13, 5, 3 } }, + { 0x0088, { 13, 9, 2 } }, + { 0x0080, { 13, 10, 2 } }, + { 0x00F8, { 13, 22, 1 } }, + { 0x00F0, { 13, 23, 1 } }, + { 0x00E8, { 13, 24, 1 } }, + { 0x00E0, { 13, 25, 1 } }, + { 0x00D8, { 13, 26, 1 } }, + { 0x007C, { 14, 0, 16 } }, + { 0x0078, { 14, 0, 17 } }, + { 0x0074, { 14, 0, 18 } }, + { 0x0070, { 14, 0, 19 } }, + { 0x006C, { 14, 0, 20 } }, + { 0x0068, { 14, 0, 21 } }, + { 0x0064, { 14, 0, 22 } }, + { 0x0060, { 14, 0, 23 } }, + { 0x005C, { 14, 0, 24 } }, + { 0x0058, { 14, 0, 25 } }, + { 0x0054, { 14, 0, 26 } }, + { 0x0050, { 14, 0, 27 } }, + { 0x004C, { 14, 0, 28 } }, + { 0x0048, { 14, 0, 29 } }, + { 0x0044, { 14, 0, 30 } }, + { 0x0040, { 14, 0, 31 } }, + { 0x0030, { 15, 0, 32 } }, + { 0x002E, { 15, 0, 33 } }, + { 0x002C, { 15, 0, 34 } }, + { 0x002A, { 15, 0, 35 } }, + { 0x0028, { 15, 0, 36 } }, + { 0x0026, { 15, 0, 37 } }, + { 0x0024, { 15, 0, 38 } }, + { 0x0022, { 15, 0, 39 } }, + { 0x0020, { 15, 0, 40 } }, + { 0x003E, { 15, 1, 8 } }, + { 0x003C, { 15, 1, 9 } }, + { 0x003A, { 15, 1, 10 } }, + { 0x0038, { 15, 1, 11 } }, + { 0x0036, { 15, 1, 12 } }, + { 0x0034, { 15, 1, 13 } }, + { 0x0032, { 15, 1, 14 } }, + { 0x0013, { 16, 1, 15 } }, + { 0x0012, { 16, 1, 16 } }, + { 0x0011, { 16, 1, 17 } }, + { 0x0010, { 16, 1, 18 } }, + { 0x0014, { 16, 6, 3 } }, + { 0x001A, { 16, 11, 2 } }, + { 0x0019, { 16, 12, 2 } }, + { 0x0018, { 16, 13, 2 } }, + { 0x0017, { 16, 14, 2 } }, + { 0x0016, { 16, 15, 2 } }, + { 0x0015, { 16, 16, 2 } }, + { 0x001F, { 16, 27, 1 } }, + { 0x001E, { 16, 28, 1 } }, + { 0x001D, { 16, 29, 1 } }, + { 0x001C, { 16, 30, 1 } }, + { 0x001B, { 16, 31, 1 } } }; -static const MBAtab MBA_11 [] = { - {32, 11}, {31, 11}, {30, 11}, {29, 11}, - {28, 11}, {27, 11}, {26, 11}, {25, 11}, - {24, 11}, {23, 11}, {22, 11}, {21, 11}, - {20, 10}, {20, 10}, {19, 10}, {19, 10}, - {18, 10}, {18, 10}, {17, 10}, {17, 10}, - {16, 10}, {16, 10}, {15, 10}, {15, 10}, - {14, 8}, {14, 8}, {14, 8}, {14, 8}, - {14, 8}, {14, 8}, {14, 8}, {14, 8}, - {13, 8}, {13, 8}, {13, 8}, {13, 8}, - {13, 8}, {13, 8}, {13, 8}, {13, 8}, - {12, 8}, {12, 8}, {12, 8}, {12, 8}, - {12, 8}, {12, 8}, {12, 8}, {12, 8}, - {11, 8}, {11, 8}, {11, 8}, {11, 8}, - {11, 8}, {11, 8}, {11, 8}, {11, 8}, - {10, 8}, {10, 8}, {10, 8}, {10, 8}, - {10, 8}, {10, 8}, {10, 8}, {10, 8}, - { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, - { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7} +/* coding table as found in the spec annex B.5 table B-15 */ +static const struct dct_coeff_compressed dct_coeff_tbl_one[] = { + { 0x6000, { 4, dct_End_of_Block, 0 } }, + { 0x8000, { 2, 0, 1 } }, + { 0x4000, { 3, 1, 1 } }, + { 0xC000, { 3, 0, 2 } }, + { 0x2800, { 5, 2, 1 } }, + { 0x7000, { 4, 0, 3 } }, + { 0x3800, { 5, 3, 1 } }, + { 0x1800, { 6, 4, 1 } }, + { 0x3000, { 5, 1, 2 } }, + { 0x1C00, { 6, 5, 1 } }, + { 0x0C00, { 7, 6, 1 } }, + { 0x0800, { 7, 7, 1 } }, + { 0xE000, { 5, 0, 4 } }, + { 0x0E00, { 7, 2, 2 } }, + { 0x0A00, { 7, 8, 1 } }, + { 0xF000, { 7, 9, 1 } }, + { 0x0400, { 6, dct_Escape, 0 } }, + { 0xE800, { 5, 0, 5 } }, + { 0x1400, { 6, 0, 6 } }, + { 0xF200, { 7, 1, 3 } }, + { 0x2600, { 8, 3, 2 } }, + { 0xF400, { 7, 10, 1 } }, + { 0x2100, { 8, 11, 1 } }, + { 0x2500, { 8, 12, 1 } }, + { 0x2400, { 8, 13, 1 } }, + { 0x1000, { 6, 0, 7 } }, + { 0x2700, { 8, 1, 4 } }, + { 0xFC00, { 8, 2, 3 } }, + { 0xFD00, { 8, 4, 2 } }, + { 0x0200, { 9, 5, 2 } }, + { 0x0280, { 9, 14, 1 } }, + { 0x0380, { 9, 15, 1 } }, + { 0x0340, { 10, 16, 1 } }, + { 0xF600, { 7, 0, 8 } }, + { 0xF800, { 7, 0, 9 } }, + { 0x2300, { 8, 0, 10 } }, + { 0x2200, { 8, 0, 11 } }, + { 0x2000, { 8, 1, 5 } }, + { 0x0300, { 10, 2, 4 } }, + { 0x01C0, { 12, 3, 3 } }, + { 0x0120, { 12, 4, 3 } }, + { 0x01E0, { 12, 6, 2 } }, + { 0x0150, { 12, 7, 2 } }, + { 0x0110, { 12, 8, 2 } }, + { 0x01F0, { 12, 17, 1 } }, + { 0x01A0, { 12, 18, 1 } }, + { 0x0190, { 12, 19, 1 } }, + { 0x0170, { 12, 20, 1 } }, + { 0x0160, { 12, 21, 1 } }, + { 0xFA00, { 8, 0, 12 } }, + { 0xFB00, { 8, 0, 13 } }, + { 0xFE00, { 8, 0, 14 } }, + { 0xFF00, { 8, 0, 15 } }, + { 0x00B0, { 13, 1, 6 } }, + { 0x00A8, { 13, 1, 7 } }, + { 0x00A0, { 13, 2, 5 } }, + { 0x0098, { 13, 3, 4 } }, + { 0x0090, { 13, 5, 3 } }, + { 0x0088, { 13, 9, 2 } }, + { 0x0080, { 13, 10, 2 } }, + { 0x00F8, { 13, 22, 1 } }, + { 0x00F0, { 13, 23, 1 } }, + { 0x00E8, { 13, 24, 1 } }, + { 0x00E0, { 13, 25, 1 } }, + { 0x00D8, { 13, 26, 1 } }, + { 0x007C, { 14, 0, 16 } }, + { 0x0078, { 14, 0, 17 } }, + { 0x0074, { 14, 0, 18 } }, + { 0x0070, { 14, 0, 19 } }, + { 0x006C, { 14, 0, 20 } }, + { 0x0068, { 14, 0, 21 } }, + { 0x0064, { 14, 0, 22 } }, + { 0x0060, { 14, 0, 23 } }, + { 0x005C, { 14, 0, 24 } }, + { 0x0058, { 14, 0, 25 } }, + { 0x0054, { 14, 0, 26 } }, + { 0x0050, { 14, 0, 27 } }, + { 0x004C, { 14, 0, 28 } }, + { 0x0048, { 14, 0, 29 } }, + { 0x0044, { 14, 0, 30 } }, + { 0x0040, { 14, 0, 31 } }, + { 0x0030, { 15, 0, 32 } }, + { 0x002E, { 15, 0, 33 } }, + { 0x002C, { 15, 0, 34 } }, + { 0x002A, { 15, 0, 35 } }, + { 0x0028, { 15, 0, 36 } }, + { 0x0026, { 15, 0, 37 } }, + { 0x0024, { 15, 0, 38 } }, + { 0x0022, { 15, 0, 39 } }, + { 0x0020, { 15, 0, 40 } }, + { 0x003E, { 15, 1, 8 } }, + { 0x003C, { 15, 1, 9 } }, + { 0x003A, { 15, 1, 10 } }, + { 0x0038, { 15, 1, 11 } }, + { 0x0036, { 15, 1, 12 } }, + { 0x0034, { 15, 1, 13 } }, + { 0x0032, { 15, 1, 14 } }, + { 0x0013, { 16, 1, 15 } }, + { 0x0012, { 16, 1, 16 } }, + { 0x0011, { 16, 1, 17 } }, + { 0x0010, { 16, 1, 18 } }, + { 0x0014, { 16, 6, 3 } }, + { 0x001A, { 16, 11, 2 } }, + { 0x0019, { 16, 12, 2 } }, + { 0x0018, { 16, 13, 2 } }, + { 0x0017, { 16, 14, 2 } }, + { 0x0016, { 16, 15, 2 } }, + { 0x0015, { 16, 16, 2 } }, + { 0x001F, { 16, 27, 1 } }, + { 0x001E, { 16, 28, 1 } }, + { 0x001D, { 16, 29, 1 } }, + { 0x001C, { 16, 30, 1 } }, + { 0x001B, { 16, 31, 1 } } }; -static const int non_linear_quantizer_scale[] = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 10, 12, 14, 16, 18, 20, 22, - 24, 28, 32, 36, 40, 44, 48, 52, - 56, 64, 72, 80, 88, 96, 104, 112 +/* q_scale_type */ +static const unsigned quant_scale[2][32] = { + { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62 }, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, 22, 24, + 28, 32, 36, 40, 44, 48, 52, 56, 64, 72, 80, 88, 96, 104, 112 } }; -static INLINE int -get_macroblock_modes(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture) -{ - int macroblock_modes; - const MBtab * tab; - - switch (picture->picture_coding_type) { - case I_TYPE: - - tab = MB_I + vl_vlc_ubits(&bs->vlc, 1); - vl_vlc_dumpbits(&bs->vlc, tab->len); - macroblock_modes = tab->modes; - - return macroblock_modes; - - case P_TYPE: - - tab = MB_P + vl_vlc_ubits(&bs->vlc, 5); - vl_vlc_dumpbits(&bs->vlc, tab->len); - macroblock_modes = tab->modes; - - if (picture->picture_structure != FRAME_PICTURE) { - if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { - macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE; - vl_vlc_dumpbits(&bs->vlc, 2); - } - return macroblock_modes; - } else if (picture->frame_pred_frame_dct) { - if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) - macroblock_modes |= MC_FRAME; - return macroblock_modes; - } else { - if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { - macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE; - vl_vlc_dumpbits(&bs->vlc, 2); - } - return macroblock_modes; - } - - case B_TYPE: - - tab = MB_B + vl_vlc_ubits(&bs->vlc, 6); - vl_vlc_dumpbits(&bs->vlc, tab->len); - macroblock_modes = tab->modes; - - if (picture->picture_structure != FRAME_PICTURE) { - if (! (macroblock_modes & MACROBLOCK_INTRA)) { - macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE; - vl_vlc_dumpbits(&bs->vlc, 2); - } - } else if (picture->frame_pred_frame_dct) { - macroblock_modes |= MC_FRAME; - } else if (!(macroblock_modes & MACROBLOCK_INTRA)) { - macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE; - vl_vlc_dumpbits(&bs->vlc, 2); - } - return macroblock_modes; - - case D_TYPE: +static struct vl_vlc_entry tbl_B1[1 << 11]; +static struct vl_vlc_entry tbl_B2[1 << 2]; +static struct vl_vlc_entry tbl_B3[1 << 6]; +static struct vl_vlc_entry tbl_B4[1 << 6]; +static struct vl_vlc_entry tbl_B9[1 << 9]; +static struct vl_vlc_entry tbl_B10[1 << 11]; +static struct vl_vlc_entry tbl_B11[1 << 2]; +static struct vl_vlc_entry tbl_B12[1 << 10]; +static struct vl_vlc_entry tbl_B13[1 << 10]; +static struct dct_coeff tbl_B14_DC[1 << 17]; +static struct dct_coeff tbl_B14_AC[1 << 17]; +static struct dct_coeff tbl_B15[1 << 17]; - vl_vlc_dumpbits(&bs->vlc, 1); - return MACROBLOCK_INTRA; - - default: - return 0; - } -} - -static INLINE enum pipe_mpeg12_dct_type -get_dct_type(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int macroblock_modes) +static INLINE void +init_dct_coeff_table(struct dct_coeff *dst, const struct dct_coeff_compressed *src, + unsigned size, bool is_DC) { - enum pipe_mpeg12_dct_type dct_type = PIPE_MPEG12_DCT_TYPE_FRAME; - - if ((picture->picture_structure == FRAME_PICTURE) && - (!picture->frame_pred_frame_dct) && - (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))) { + unsigned i; - dct_type = vl_vlc_ubits(&bs->vlc, 1) ? PIPE_MPEG12_DCT_TYPE_FIELD : PIPE_MPEG12_DCT_TYPE_FRAME; - vl_vlc_dumpbits(&bs->vlc, 1); + for (i=0;i<(1<<17);++i) { + dst[i].length = 0; + dst[i].level = 0; + dst[i].run = dct_End_of_Block; } - return dct_type; -} - -static INLINE int -get_quantizer_scale(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture) -{ - int quantizer_scale_code; - quantizer_scale_code = vl_vlc_ubits(&bs->vlc, 5); - vl_vlc_dumpbits(&bs->vlc, 5); + for(; size > 0; --size, ++src) { + struct dct_coeff coeff = src->coeff; + bool has_sign = true; - if (picture->q_scale_type) - return non_linear_quantizer_scale[quantizer_scale_code]; - else - return quantizer_scale_code << 1; -} - -static INLINE int -get_motion_delta(struct vl_mpg12_bs *bs, unsigned f_code) -{ - int delta; - int sign; - const MVtab * tab; + switch (coeff.run) { + case dct_End_of_Block: + if (is_DC) + continue; - if (bs->vlc.buf & 0x80000000) { - vl_vlc_dumpbits(&bs->vlc, 1); - return 0; - } else if (bs->vlc.buf >= 0x0c000000) { + has_sign = false; + break; - tab = MV_4 + vl_vlc_ubits(&bs->vlc, 4); - delta = (tab->delta << f_code) + 1; - bs->vlc.bits += tab->len + f_code + 1; - bs->vlc.buf <<= tab->len; + case dct_Escape: + has_sign = false; + break; - sign = vl_vlc_sbits(&bs->vlc, 1); - bs->vlc.buf <<= 1; + case dct_DC: + if (!is_DC) + continue; - if (f_code) - delta += vl_vlc_ubits(&bs->vlc, f_code); - bs->vlc.buf <<= f_code; + coeff.length += 1; + coeff.run = 1; + break; - return (delta ^ sign) - sign; + case dct_AC: + if (is_DC) + continue; - } else { + coeff.length += 1; + coeff.run = 1; + break; - tab = MV_10 + vl_vlc_ubits(&bs->vlc, 10); - delta = (tab->delta << f_code) + 1; - bs->vlc.bits += tab->len + 1; - bs->vlc.buf <<= tab->len; + default: + coeff.length += 1; + coeff.run += 1; + break; + } - sign = vl_vlc_sbits(&bs->vlc, 1); - bs->vlc.buf <<= 1; + for(i=0; i<(1 << (17 - coeff.length)); ++i) + dst[src->bitcode << 1 | i] = coeff; - if (f_code) { - vl_vlc_needbits(&bs->vlc); - delta += vl_vlc_ubits(&bs->vlc, f_code); - vl_vlc_dumpbits(&bs->vlc, f_code); + if (has_sign) { + coeff.level = -coeff.level; + for(; i<(1 << (18 - coeff.length)); ++i) + dst[src->bitcode << 1 | i] = coeff; } - - return (delta ^ sign) - sign; } } -static INLINE int -bound_motion_vector(int vec, unsigned f_code) +static INLINE void +init_tables() { -#if 1 - unsigned int limit; - int sign; - - limit = 16 << f_code; - - if ((unsigned int)(vec + limit) < 2 * limit) - return vec; - else { - sign = ((int32_t)vec) >> 31; - return vec - ((2 * limit) ^ sign) + sign; - } -#else - return ((int32_t)vec << (28 - f_code)) >> (28 - f_code); -#endif + vl_vlc_init_table(tbl_B1, Elements(tbl_B1), macroblock_address_increment, Elements(macroblock_address_increment)); + vl_vlc_init_table(tbl_B2, Elements(tbl_B2), macroblock_type_i, Elements(macroblock_type_i)); + vl_vlc_init_table(tbl_B3, Elements(tbl_B3), macroblock_type_p, Elements(macroblock_type_p)); + vl_vlc_init_table(tbl_B4, Elements(tbl_B4), macroblock_type_b, Elements(macroblock_type_b)); + vl_vlc_init_table(tbl_B9, Elements(tbl_B9), coded_block_pattern, Elements(coded_block_pattern)); + vl_vlc_init_table(tbl_B10, Elements(tbl_B10), motion_code, Elements(motion_code)); + vl_vlc_init_table(tbl_B11, Elements(tbl_B11), dmvector, Elements(dmvector)); + vl_vlc_init_table(tbl_B12, Elements(tbl_B12), dct_dc_size_luminance, Elements(dct_dc_size_luminance)); + vl_vlc_init_table(tbl_B13, Elements(tbl_B13), dct_dc_size_chrominance, Elements(dct_dc_size_chrominance)); + init_dct_coeff_table(tbl_B14_DC, dct_coeff_tbl_zero, Elements(dct_coeff_tbl_zero), true); + init_dct_coeff_table(tbl_B14_AC, dct_coeff_tbl_zero, Elements(dct_coeff_tbl_zero), false); + init_dct_coeff_table(tbl_B15, dct_coeff_tbl_one, Elements(dct_coeff_tbl_one), false); } static INLINE int -get_dmv(struct vl_mpg12_bs *bs) +DIV2DOWN(int todiv) { - const DMVtab * tab; - - tab = DMV_2 + vl_vlc_ubits(&bs->vlc, 2); - vl_vlc_dumpbits(&bs->vlc, tab->len); - return tab->dmv; + return (todiv&~1)/2; } static INLINE int -get_coded_block_pattern(struct vl_mpg12_bs *bs) +DIV2UP(int todiv) { - const CBPtab * tab; - - vl_vlc_needbits(&bs->vlc); - - if (bs->vlc.buf >= 0x20000000) { - - tab = CBP_7 + (vl_vlc_ubits(&bs->vlc, 7) - 16); - vl_vlc_dumpbits(&bs->vlc, tab->len); - return tab->cbp; - - } else { - - tab = CBP_9 + vl_vlc_ubits(&bs->vlc, 9); - vl_vlc_dumpbits(&bs->vlc, tab->len); - return tab->cbp; - } + return (todiv+1)/2; } -static INLINE int -get_luma_dc_dct_diff(struct vl_mpg12_bs *bs) +static INLINE void +motion_vector(struct vl_mpg12_bs *bs, int r, int s, int dmv, short delta[2], short dmvector[2]) { - const DCtab * tab; - int size; - int dc_diff; - - if (bs->vlc.buf < 0xf8000000) { - tab = DC_lum_5 + vl_vlc_ubits(&bs->vlc, 5); - size = tab->size; - if (size) { - bs->vlc.bits += tab->len + size; - bs->vlc.buf <<= tab->len; - dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size); - bs->vlc.buf <<= size; - return dc_diff; - } else { - vl_vlc_dumpbits(&bs->vlc, 3); - return 0; - } - } else { - tab = DC_long + (vl_vlc_ubits(&bs->vlc, 9) - 0x1e0); - size = tab->size; - vl_vlc_dumpbits(&bs->vlc, tab->len); - vl_vlc_needbits(&bs->vlc); - dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size); - vl_vlc_dumpbits(&bs->vlc, size); - return dc_diff; + int t; + for (t = 0; t < 2; ++t) { + int motion_code; + int r_size = bs->desc.f_code[s][t]; + + vl_vlc_fillbits(&bs->vlc); + motion_code = vl_vlc_get_vlclbf(&bs->vlc, tbl_B10, 11); + + assert(r_size >= 0); + if (r_size && motion_code) { + int residual = vl_vlc_get_uimsbf(&bs->vlc, r_size) + 1; + delta[t] = ((abs(motion_code) - 1) << r_size) + residual; + if (motion_code < 0) + delta[t] = -delta[t]; + } else + delta[t] = motion_code; + if (dmv) + dmvector[t] = vl_vlc_get_vlclbf(&bs->vlc, tbl_B11, 2); } } static INLINE int -get_chroma_dc_dct_diff(struct vl_mpg12_bs *bs) +wrap(short f, int shift) { - const DCtab * tab; - int size; - int dc_diff; - - if (bs->vlc.buf < 0xf8000000) { - tab = DC_chrom_5 + vl_vlc_ubits(&bs->vlc, 5); - size = tab->size; - if (size) { - bs->vlc.bits += tab->len + size; - bs->vlc.buf <<= tab->len; - dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size); - bs->vlc.buf <<= size; - return dc_diff; - } else { - vl_vlc_dumpbits(&bs->vlc, 2); - return 0; - } - } else { - tab = DC_long + (vl_vlc_ubits(&bs->vlc, 10) - 0x3e0); - size = tab->size; - vl_vlc_dumpbits(&bs->vlc, tab->len + 1); - vl_vlc_needbits(&bs->vlc); - dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size); - vl_vlc_dumpbits(&bs->vlc, size); - return dc_diff; - } + if (f < (-16 << shift)) + return f + (32 << shift); + else if (f >= 16 << shift) + return f - (32 << shift); + else + return f; } static INLINE void -get_intra_block_B14(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest) +motion_vector_frame(struct vl_mpg12_bs *bs, int s, struct pipe_mpeg12_macroblock *mb) { - int i, val; - const DCTtab *tab; - - i = 0; - - vl_vlc_needbits(&bs->vlc); - - while (1) { - if (bs->vlc.buf >= 0x28000000) { - - tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5); - - i += tab->run; - if (i >= 64) - break; /* end of block */ + int dmv = mb->macroblock_modes.bits.frame_motion_type == PIPE_MPEG12_MO_TYPE_DUAL_PRIME; + short dmvector[2], delta[2]; - normal_code: - bs->vlc.buf <<= tab->len; - bs->vlc.bits += tab->len + 1; - val = tab->level * quantizer_scale; + if (mb->macroblock_modes.bits.frame_motion_type == PIPE_MPEG12_MO_TYPE_FIELD) { + mb->motion_vertical_field_select |= vl_vlc_get_uimsbf(&bs->vlc, 1) << s; + motion_vector(bs, 0, s, dmv, delta, dmvector); + mb->PMV[0][s][0] = wrap(mb->PMV[0][s][0] + delta[0], bs->desc.f_code[s][0]); + mb->PMV[0][s][1] = wrap(DIV2DOWN(mb->PMV[0][s][1]) + delta[1], bs->desc.f_code[s][1]) * 2; - val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1); + mb->motion_vertical_field_select |= vl_vlc_get_uimsbf(&bs->vlc, 1) << (s + 2); + motion_vector(bs, 1, s, dmv, delta, dmvector); + mb->PMV[1][s][0] = wrap(mb->PMV[1][s][0] + delta[0], bs->desc.f_code[s][0]); + mb->PMV[1][s][1] = wrap(DIV2DOWN(mb->PMV[1][s][1]) + delta[1], bs->desc.f_code[s][1]) * 2; - dest[i] = val; - - bs->vlc.buf <<= 1; - vl_vlc_needbits(&bs->vlc); - - continue; - - } else if (bs->vlc.buf >= 0x04000000) { - - tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4); - - i += tab->run; - if (i < 64) - goto normal_code; - - /* escape code */ - - i += UBITS(bs->vlc.buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check needed to avoid buffer overflow */ - - vl_vlc_dumpbits(&bs->vlc, 12); - vl_vlc_needbits(&bs->vlc); - val = vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale; - - dest[i] = val; - - vl_vlc_dumpbits(&bs->vlc, 12); - vl_vlc_needbits(&bs->vlc); - - continue; - - } else if (bs->vlc.buf >= 0x02000000) { - tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bs->vlc.buf >= 0x00800000) { - tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bs->vlc.buf >= 0x00200000) { - tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16); - bs->vlc.buf <<= 16; - vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ + } else { + motion_vector(bs, 0, s, dmv, delta, dmvector); + mb->PMV[0][s][0] = wrap(mb->PMV[0][s][0] + delta[0], bs->desc.f_code[s][0]); + mb->PMV[0][s][1] = wrap(mb->PMV[0][s][1] + delta[1], bs->desc.f_code[s][1]); } - - vl_vlc_dumpbits(&bs->vlc, 2); /* dump end of block code */ } static INLINE void -get_intra_block_B15(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest) +motion_vector_field(struct vl_mpg12_bs *bs, int s, struct pipe_mpeg12_macroblock *mb) { - int i, val; - const DCTtab * tab; - - i = 0; - - vl_vlc_needbits(&bs->vlc); - - while (1) { - if (bs->vlc.buf >= 0x04000000) { - - tab = DCT_B15_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4); - - i += tab->run; - if (i < 64) { - - normal_code: - bs->vlc.buf <<= tab->len; - bs->vlc.bits += tab->len + 1; - val = tab->level * quantizer_scale; - - val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1); + int dmv = mb->macroblock_modes.bits.field_motion_type == PIPE_MPEG12_MO_TYPE_DUAL_PRIME; + short dmvector[2], delta[2]; - dest[i] = val; + if (mb->macroblock_modes.bits.field_motion_type == PIPE_MPEG12_MO_TYPE_16x8) { + mb->motion_vertical_field_select |= vl_vlc_get_uimsbf(&bs->vlc, 1) << s; + motion_vector(bs, 0, s, dmv, delta, dmvector); - bs->vlc.buf <<= 1; - vl_vlc_needbits(&bs->vlc); - - continue; - - } else { - - /* end of block. I commented out this code because if we */ - /* dont exit here we will still exit at the later test :) */ - - /* if (i >= 128) break; */ /* end of block */ - - /* escape code */ - - i += UBITS(bs->vlc.buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check against buffer overflow */ - - vl_vlc_dumpbits(&bs->vlc, 12); - vl_vlc_needbits(&bs->vlc); - val = vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale; - - dest[i] = val; - - vl_vlc_dumpbits(&bs->vlc, 12); - vl_vlc_needbits(&bs->vlc); - - continue; - - } - } else if (bs->vlc.buf >= 0x02000000) { - tab = DCT_B15_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bs->vlc.buf >= 0x00800000) { - tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bs->vlc.buf >= 0x00200000) { - tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16); - bs->vlc.buf <<= 16; - vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ + mb->motion_vertical_field_select |= vl_vlc_get_uimsbf(&bs->vlc, 1) << (s + 2); + motion_vector(bs, 1, s, dmv, delta, dmvector); + } else { + if (!dmv) + mb->motion_vertical_field_select |= vl_vlc_get_uimsbf(&bs->vlc, 1) << s; + motion_vector(bs, 0, s, dmv, delta, dmvector); } - - vl_vlc_dumpbits(&bs->vlc, 4); /* dump end of block code */ } static INLINE void -get_non_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest) -{ - int i, val; - const DCTtab *tab; - - i = -1; - - vl_vlc_needbits(&bs->vlc); - if (bs->vlc.buf >= 0x28000000) { - tab = DCT_B14DC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5); - goto entry_1; - } else - goto entry_2; - - while (1) { - if (bs->vlc.buf >= 0x28000000) { - - tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5); - - entry_1: - i += tab->run; - if (i >= 64) - break; /* end of block */ - - normal_code: - bs->vlc.buf <<= tab->len; - bs->vlc.bits += tab->len + 1; - val = ((2*tab->level+1) * quantizer_scale) >> 1; - - val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1); - - dest[i] = val; - - bs->vlc.buf <<= 1; - vl_vlc_needbits(&bs->vlc); - - continue; - - } - - entry_2: - if (bs->vlc.buf >= 0x04000000) { - - tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4); - - i += tab->run; - if (i < 64) - goto normal_code; - - /* escape code */ - - i += UBITS(bs->vlc.buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check needed to avoid buffer overflow */ - - vl_vlc_dumpbits(&bs->vlc, 12); - vl_vlc_needbits(&bs->vlc); - val = 2 * (vl_vlc_sbits(&bs->vlc, 12) + vl_vlc_sbits(&bs->vlc, 1)) + 1; - val = (val * quantizer_scale) / 2; - - dest[i] = val; - - vl_vlc_dumpbits(&bs->vlc, 12); - vl_vlc_needbits(&bs->vlc); - - continue; - - } else if (bs->vlc.buf >= 0x02000000) { - tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bs->vlc.buf >= 0x00800000) { - tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bs->vlc.buf >= 0x00200000) { - tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16); - bs->vlc.buf <<= 16; - vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ - } - vl_vlc_dumpbits(&bs->vlc, 2); /* dump end of block code */ +reset_predictor(struct vl_mpg12_bs *bs) { + bs->pred_dc[0] = bs->pred_dc[1] = bs->pred_dc[2] = 0; } static INLINE void -get_mpeg1_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest) +decode_dct(struct vl_mpg12_bs *bs, struct pipe_mpeg12_macroblock *mb, int scale) { - int i, val; - const DCTtab * tab; - - i = 0; + static const unsigned blk2cc[] = { 0, 0, 0, 0, 1, 2 }; + static const struct vl_vlc_entry *blk2dcsize[] = { + tbl_B12, tbl_B12, tbl_B12, tbl_B12, tbl_B13, tbl_B13 + }; - vl_vlc_needbits(&bs->vlc); + bool intra = mb->macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA; + const struct dct_coeff *table = intra ? bs->intra_dct_tbl : tbl_B14_AC; + const struct dct_coeff *entry; + int i, cbp, blk = 0; + short *dst = mb->blocks; - while (1) { - if (bs->vlc.buf >= 0x28000000) { + vl_vlc_fillbits(&bs->vlc); + mb->coded_block_pattern = cbp = intra ? 0x3F : vl_vlc_get_vlclbf(&bs->vlc, tbl_B9, 9); - tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5); + goto entry; - i += tab->run; - if (i >= 64) - break; /* end of block */ + while(1) { + vl_vlc_eatbits(&bs->vlc, entry->length); + if (entry->run == dct_End_of_Block) { - normal_code: - bs->vlc.buf <<= tab->len; - bs->vlc.bits += tab->len + 1; - val = tab->level * quantizer_scale; + dst += 64; + cbp <<= 1; + cbp &= 0x3F; + blk++; - /* oddification */ - val = (val - 1) | 1; - - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1); - - dest[i] = val; - - bs->vlc.buf <<= 1; - vl_vlc_needbits(&bs->vlc); - - continue; - - } else if (bs->vlc.buf >= 0x04000000) { - - tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4); - - i += tab->run; - if (i < 64) - goto normal_code; - - /* escape code */ - - i += UBITS(bs->vlc.buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check needed to avoid buffer overflow */ +entry: + if (!cbp) + break; - vl_vlc_dumpbits(&bs->vlc, 12); - vl_vlc_needbits(&bs->vlc); - val = vl_vlc_sbits(&bs->vlc, 8); - if (! (val & 0x7f)) { - vl_vlc_dumpbits(&bs->vlc, 8); - val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val; + while(!(cbp & 0x20)) { + cbp <<= 1; + blk++; } - val = val * quantizer_scale; - - /* oddification */ - val = (val + ~SBITS (val, 1)) | 1; - - dest[i] = val; - - vl_vlc_dumpbits(&bs->vlc, 8); - vl_vlc_needbits(&bs->vlc); - - continue; - - } else if (bs->vlc.buf >= 0x02000000) { - tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bs->vlc.buf >= 0x00800000) { - tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bs->vlc.buf >= 0x00200000) { - tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16); - bs->vlc.buf <<= 16; - vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ - } - vl_vlc_dumpbits(&bs->vlc, 2); /* dump end of block code */ -} - -static INLINE void -get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest) -{ - int i, val; - const DCTtab * tab; - - i = -1; - - vl_vlc_needbits(&bs->vlc); - if (bs->vlc.buf >= 0x28000000) { - tab = DCT_B14DC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5); - goto entry_1; - } else - goto entry_2; - - while (1) { - if (bs->vlc.buf >= 0x28000000) { - - tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5); - - entry_1: - i += tab->run; - if (i >= 64) - break; /* end of block */ - normal_code: - bs->vlc.buf <<= tab->len; - bs->vlc.bits += tab->len + 1; - val = ((2*tab->level+1) * quantizer_scale) >> 1; + vl_vlc_fillbits(&bs->vlc); - /* oddification */ - val = (val - 1) | 1; + if (intra) { + unsigned cc = blk2cc[blk]; + unsigned size = vl_vlc_get_vlclbf(&bs->vlc, blk2dcsize[blk], 10); - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1); - - dest[i] = val; - - bs->vlc.buf <<= 1; - vl_vlc_needbits(&bs->vlc); - - continue; - - } - - entry_2: - if (bs->vlc.buf >= 0x04000000) { - - tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4); - - i += tab->run; - if (i < 64) - goto normal_code; - - /* escape code */ + if (size) { + int dct_diff = vl_vlc_get_uimsbf(&bs->vlc, size); + int half_range = 1 << (size - 1); + if (dct_diff < half_range) + dct_diff = (dct_diff + 1) - (2 * half_range); + bs->pred_dc[cc] += dct_diff; + } - i += UBITS(bs->vlc.buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check needed to avoid buffer overflow */ + dst[0] = bs->pred_dc[cc]; + i = 0; - vl_vlc_dumpbits(&bs->vlc, 12); - vl_vlc_needbits(&bs->vlc); - val = vl_vlc_sbits(&bs->vlc, 8); - if (! (val & 0x7f)) { - vl_vlc_dumpbits(&bs->vlc, 8); - val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val; + } else { + entry = tbl_B14_DC + vl_vlc_peekbits(&bs->vlc, 17); + i = -1; + continue; } - val = 2 * (val + SBITS (val, 1)) + 1; - val = (val * quantizer_scale) / 2; - - /* oddification */ - val = (val + ~SBITS (val, 1)) | 1; - - dest[i] = val; - - vl_vlc_dumpbits(&bs->vlc, 8); - vl_vlc_needbits(&bs->vlc); - - continue; - - } else if (bs->vlc.buf >= 0x02000000) { - tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bs->vlc.buf >= 0x00800000) { - tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bs->vlc.buf >= 0x00200000) { - tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16); - bs->vlc.buf <<= 16; - vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ - } - vl_vlc_dumpbits(&bs->vlc, 2); /* dump end of block code */ -} - -static INLINE void -slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc, - unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int quantizer_scale, int dc_dct_pred[3]) -{ - short dest[64]; - - bs->ycbcr_stream[cc]->x = x; - bs->ycbcr_stream[cc]->y = y; - bs->ycbcr_stream[cc]->intra = PIPE_MPEG12_DCT_INTRA; - bs->ycbcr_stream[cc]->coding = coding; - - vl_vlc_needbits(&bs->vlc); - - /* Get the intra DC coefficient and inverse quantize it */ - if (cc == 0) - dc_dct_pred[0] += get_luma_dc_dct_diff(bs); - else - dc_dct_pred[cc] += get_chroma_dc_dct_diff(bs); - - memset(dest, 0, sizeof(int16_t) * 64); - dest[0] = dc_dct_pred[cc]; - if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1) { - if (picture->picture_coding_type != D_TYPE) - get_mpeg1_intra_block(bs, quantizer_scale, dest); - } else if (picture->intra_vlc_format) - get_intra_block_B15(bs, quantizer_scale, dest); - else - get_intra_block_B14(bs, quantizer_scale, dest); - - memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64); - - bs->num_ycbcr_blocks[cc]++; - bs->ycbcr_stream[cc]++; - bs->ycbcr_buffer[cc] += 64; -} - -static INLINE void -slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc, - unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int quantizer_scale) -{ - short dest[64]; - - bs->ycbcr_stream[cc]->x = x; - bs->ycbcr_stream[cc]->y = y; - bs->ycbcr_stream[cc]->intra = PIPE_MPEG12_DCT_DELTA; - bs->ycbcr_stream[cc]->coding = coding; - - memset(dest, 0, sizeof(int16_t) * 64); - if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1) - get_mpeg1_non_intra_block(bs, quantizer_scale, dest); - else - get_non_intra_block(bs, quantizer_scale, dest); - - memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64); - - bs->num_ycbcr_blocks[cc]++; - bs->ycbcr_stream[cc]++; - bs->ycbcr_buffer[cc] += 64; -} - -static INLINE void -motion_mp1(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) -{ - int motion_x, motion_y; - - mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME; - - vl_vlc_needbits(&bs->vlc); - motion_x = (mv->top.x + (get_motion_delta(bs, f_code[0]) << f_code[1])); - motion_x = bound_motion_vector (motion_x, f_code[0] + f_code[1]); - mv->top.x = mv->bottom.x = motion_x; - - vl_vlc_needbits(&bs->vlc); - motion_y = (mv->top.y + (get_motion_delta(bs, f_code[0]) << f_code[1])); - motion_y = bound_motion_vector (motion_y, f_code[0] + f_code[1]); - mv->top.y = mv->bottom.y = motion_y; -} - -static INLINE void -motion_fr_frame(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) -{ - int motion_x, motion_y; - - mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME; - - vl_vlc_needbits(&bs->vlc); - motion_x = mv->top.x + get_motion_delta(bs, f_code[0]); - motion_x = bound_motion_vector(motion_x, f_code[0]); - mv->top.x = mv->bottom.x = motion_x; - - vl_vlc_needbits(&bs->vlc); - motion_y = mv->top.y + get_motion_delta(bs, f_code[1]); - motion_y = bound_motion_vector(motion_y, f_code[1]); - mv->top.y = mv->bottom.y = motion_y; -} - -static INLINE void -motion_fr_field(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) -{ - int motion_x, motion_y; - - vl_vlc_needbits(&bs->vlc); - mv->top.field_select = vl_vlc_ubits(&bs->vlc, 1) ? - PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD; - vl_vlc_dumpbits(&bs->vlc, 1); - - motion_x = mv->top.x + get_motion_delta(bs, f_code[0]); - motion_x = bound_motion_vector (motion_x, f_code[0]); - mv->top.x = motion_x; - - vl_vlc_needbits(&bs->vlc); - motion_y = (mv->top.y >> 1) + get_motion_delta(bs, f_code[1]); - /* motion_y = bound_motion_vector (motion_y, f_code[1]); */ - mv->top.y = motion_y << 1; - - vl_vlc_needbits(&bs->vlc); - mv->bottom.field_select = vl_vlc_ubits(&bs->vlc, 1) ? - PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD; - vl_vlc_dumpbits(&bs->vlc, 1); - - motion_x = mv->bottom.x + get_motion_delta(bs, f_code[0]); - motion_x = bound_motion_vector (motion_x, f_code[0]); - mv->bottom.x = motion_x; - - vl_vlc_needbits(&bs->vlc); - motion_y = (mv->bottom.y >> 1) + get_motion_delta(bs, f_code[1]); - /* motion_y = bound_motion_vector (motion_y, f_code[1]); */ - mv->bottom.y = motion_y << 1; -} - -static INLINE void -motion_fr_dmv(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) -{ - int motion_x, motion_y; - - // TODO Implement dmv - mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME; - - vl_vlc_needbits(&bs->vlc); - motion_x = mv->top.x + get_motion_delta(bs, f_code[0]); - motion_x = bound_motion_vector(motion_x, f_code[0]); - mv->top.x = mv->bottom.x = motion_x; - - vl_vlc_needbits(&bs->vlc); - motion_y = (mv->top.y >> 1) + get_motion_delta(bs, f_code[1]); - /* motion_y = bound_motion_vector (motion_y, f_code[1]); */ - mv->top.y = mv->bottom.y = motion_y << 1; -} - -/* like motion_frame, but parsing without actual motion compensation */ -static INLINE void -motion_fr_conceal(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) -{ - int tmp; - - mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME; - - vl_vlc_needbits(&bs->vlc); - tmp = (mv->top.x + get_motion_delta(bs, f_code[0])); - tmp = bound_motion_vector (tmp, f_code[0]); - mv->top.x = mv->bottom.x = tmp; - - vl_vlc_needbits(&bs->vlc); - tmp = (mv->top.y + get_motion_delta(bs, f_code[1])); - tmp = bound_motion_vector (tmp, f_code[1]); - mv->top.y = mv->bottom.y = tmp; - - vl_vlc_dumpbits(&bs->vlc, 1); /* remove marker_bit */ -} - -static INLINE void -motion_fi_field(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) -{ - int motion_x, motion_y; - - vl_vlc_needbits(&bs->vlc); - - // ref_field - //vl_vlc_ubits(&bs->vlc, 1); - - // TODO field select may need to do something here for bob (weave ok) - mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME; - vl_vlc_dumpbits(&bs->vlc, 1); - - motion_x = mv->top.x + get_motion_delta(bs, f_code[0]); - motion_x = bound_motion_vector (motion_x, f_code[0]); - mv->top.x = mv->bottom.x = motion_x; - - vl_vlc_needbits(&bs->vlc); - motion_y = mv->top.y + get_motion_delta(bs, f_code[1]); - motion_y = bound_motion_vector (motion_y, f_code[1]); - mv->top.y = mv->bottom.y = motion_y; -} - -static INLINE void -motion_fi_16x8(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) -{ - int motion_x, motion_y; - - vl_vlc_needbits(&bs->vlc); - - // ref_field - //vl_vlc_ubits(&bs->vlc, 1); - - // TODO field select may need to do something here bob (weave ok) - mv->top.field_select = PIPE_VIDEO_FRAME; - vl_vlc_dumpbits(&bs->vlc, 1); - - motion_x = mv->top.x + get_motion_delta(bs, f_code[0]); - motion_x = bound_motion_vector (motion_x, f_code[0]); - mv->top.x = motion_x; - - vl_vlc_needbits(&bs->vlc); - motion_y = mv->top.y + get_motion_delta(bs, f_code[1]); - motion_y = bound_motion_vector (motion_y, f_code[1]); - mv->top.y = motion_y; - - vl_vlc_needbits(&bs->vlc); - // ref_field - //vl_vlc_ubits(&bs->vlc, 1); - - // TODO field select may need to do something here for bob (weave ok) - mv->bottom.field_select = PIPE_VIDEO_FRAME; - vl_vlc_dumpbits(&bs->vlc, 1); - - motion_x = mv->bottom.x + get_motion_delta(bs, f_code[0]); - motion_x = bound_motion_vector (motion_x, f_code[0]); - mv->bottom.x = motion_x; - - vl_vlc_needbits(&bs->vlc); - motion_y = mv->bottom.y + get_motion_delta(bs, f_code[1]); - motion_y = bound_motion_vector (motion_y, f_code[1]); - mv->bottom.y = motion_y; -} - -static INLINE void -motion_fi_dmv(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) -{ - int motion_x, motion_y; - - // TODO field select may need to do something here for bob (weave ok) - mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME; - - vl_vlc_needbits(&bs->vlc); - motion_x = mv->top.x + get_motion_delta(bs, f_code[0]); - motion_x = bound_motion_vector (motion_x, f_code[0]); - mv->top.x = mv->bottom.x = motion_x; - - vl_vlc_needbits(&bs->vlc); - motion_y = mv->top.y + get_motion_delta(bs, f_code[1]); - motion_y = bound_motion_vector (motion_y, f_code[1]); - mv->top.y = mv->bottom.y = motion_y; -} - - -static INLINE void -motion_fi_conceal(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv) -{ - int tmp; - vl_vlc_needbits(&bs->vlc); - vl_vlc_dumpbits(&bs->vlc, 1); /* remove field_select */ - - tmp = (mv->top.x + get_motion_delta(bs, f_code[0])); - tmp = bound_motion_vector(tmp, f_code[0]); - mv->top.x = mv->bottom.x = tmp; - - vl_vlc_needbits(&bs->vlc); - tmp = (mv->top.y + get_motion_delta(bs, f_code[1])); - tmp = bound_motion_vector(tmp, f_code[1]); - mv->top.y = mv->bottom.y = tmp; - - vl_vlc_dumpbits(&bs->vlc, 1); /* remove marker_bit */ -} - -#define MOTION_CALL(routine, macroblock_modes) \ -do { \ - if ((macroblock_modes) & MACROBLOCK_MOTION_FORWARD) \ - routine(bs, picture->f_code[0], &mv_fwd); \ - if ((macroblock_modes) & MACROBLOCK_MOTION_BACKWARD) \ - routine(bs, picture->f_code[1], &mv_bwd); \ -} while (0) - -static INLINE void -store_motionvectors(struct vl_mpg12_bs *bs, unsigned *mv_pos, - struct pipe_motionvector *mv_fwd, - struct pipe_motionvector *mv_bwd) -{ - bs->mv_stream[0][*mv_pos].top = mv_fwd->top; - bs->mv_stream[0][*mv_pos].bottom = - mv_fwd->top.field_select == PIPE_VIDEO_FRAME ? - mv_fwd->top : mv_fwd->bottom; - - bs->mv_stream[1][*mv_pos].top = mv_bwd->top; - bs->mv_stream[1][*mv_pos].bottom = - mv_bwd->top.field_select == PIPE_VIDEO_FRAME ? - mv_bwd->top : mv_bwd->bottom; - - (*mv_pos)++; -} - -static INLINE bool -slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, - int *quantizer_scale, unsigned *x, unsigned *y, unsigned *mv_pos) -{ - const MBAtab * mba; - - vl_vlc_need32bits(&bs->vlc); - while(bs->vlc.buf < 0x101 || bs->vlc.buf > 0x1AF) { - if(!vl_vlc_getbyte(&bs->vlc)) - return false; - } - *y = (bs->vlc.buf & 0xFF) - 1; - vl_vlc_restart(&bs->vlc); + } else if (entry->run == dct_Escape) { + i += vl_vlc_get_uimsbf(&bs->vlc, 6) + 1; + if (i > 64) + break; - *quantizer_scale = get_quantizer_scale(bs, picture); + dst[i] = vl_vlc_get_simsbf(&bs->vlc, 12) * scale; - /* ignore intra_slice and all the extra data */ - while (bs->vlc.buf & 0x80000000) { - vl_vlc_dumpbits(&bs->vlc, 9); - vl_vlc_needbits(&bs->vlc); - } + } else { + i += entry->run; + if (i > 64) + break; - /* decode initial macroblock address increment */ - *x = 0; - while (1) { - if (bs->vlc.buf >= 0x08000000) { - mba = MBA_5 + (vl_vlc_ubits(&bs->vlc, 6) - 2); - break; - } else if (bs->vlc.buf >= 0x01800000) { - mba = MBA_11 + (vl_vlc_ubits(&bs->vlc, 12) - 24); - break; - } else switch (vl_vlc_ubits(&bs->vlc, 12)) { - case 8: /* macroblock_escape */ - *x += 33; - vl_vlc_dumpbits(&bs->vlc, 11); - vl_vlc_needbits(&bs->vlc); - continue; - case 15: /* macroblock_stuffing (MPEG1 only) */ - bs->vlc.buf &= 0xfffff; - vl_vlc_dumpbits(&bs->vlc, 11); - vl_vlc_needbits(&bs->vlc); - continue; - default: /* error */ - return false; + dst[i] = entry->level * scale; } - } - vl_vlc_dumpbits(&bs->vlc, mba->len + 1); - *x += mba->mba; - while (*x >= bs->width) { - *x -= bs->width; - (*y)++; + vl_vlc_fillbits(&bs->vlc); + entry = table + vl_vlc_peekbits(&bs->vlc, 17); } - if (*y > bs->height) - return false; - - *mv_pos = *x + *y * bs->width; - - return true; } static INLINE bool -decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture) +decode_slice(struct vl_mpg12_bs *bs) { - enum pipe_video_field_select default_field_select; - struct pipe_motionvector mv_fwd, mv_bwd; - enum pipe_mpeg12_dct_type dct_type; - - /* predictor for DC coefficients in intra blocks */ - int dc_dct_pred[3] = { 0, 0, 0 }; - int quantizer_scale; + struct pipe_mpeg12_macroblock mb; + short dct_blocks[64*6]; + unsigned dct_scale; + signed x = -1; - unsigned x, y, mv_pos; + memset(&mb, 0, sizeof(mb)); + mb.base.codec = PIPE_VIDEO_CODEC_MPEG12; + mb.y = vl_vlc_get_uimsbf(&bs->vlc, 8) - 1; + mb.blocks = dct_blocks; - switch(picture->picture_structure) { - case TOP_FIELD: - default_field_select = PIPE_VIDEO_TOP_FIELD; - break; + reset_predictor(bs); + dct_scale = quant_scale[bs->desc.q_scale_type][vl_vlc_get_uimsbf(&bs->vlc, 5)]; - case BOTTOM_FIELD: - default_field_select = PIPE_VIDEO_BOTTOM_FIELD; - break; + if (vl_vlc_get_uimsbf(&bs->vlc, 1)) + while (vl_vlc_get_uimsbf(&bs->vlc, 9) & 1) + vl_vlc_fillbits(&bs->vlc); - default: - default_field_select = PIPE_VIDEO_FRAME; - break; - } - - if (!slice_init(bs, picture, &quantizer_scale, &x, &y, &mv_pos)) - return false; + do { + int inc = 0; - mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0; - mv_fwd.top.field_select = mv_fwd.bottom.field_select = default_field_select; + vl_vlc_fillbits(&bs->vlc); - mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0; - mv_bwd.top.field_select = mv_bwd.bottom.field_select = default_field_select; + while (vl_vlc_peekbits(&bs->vlc, 11) == 15) { + vl_vlc_eatbits(&bs->vlc, 11); + vl_vlc_fillbits(&bs->vlc); + } - while (1) { - int macroblock_modes; - int mba_inc; - const MBAtab * mba; + while (vl_vlc_peekbits(&bs->vlc, 11) == 8) { + vl_vlc_eatbits(&bs->vlc, 11); + vl_vlc_fillbits(&bs->vlc); + inc += 33; + } + inc += vl_vlc_get_vlclbf(&bs->vlc, tbl_B1, 11); + if (x != -1) { + mb.num_skipped_macroblocks = inc - 1; + bs->decoder->decode_macroblock(bs->decoder, &mb.base, 1); + } + mb.x = x += inc; - vl_vlc_needbits(&bs->vlc); + switch (bs->desc.picture_coding_type) { + case PIPE_MPEG12_PICTURE_CODING_TYPE_I: + mb.macroblock_type = vl_vlc_get_vlclbf(&bs->vlc, tbl_B2, 2); + break; - macroblock_modes = get_macroblock_modes(bs, picture); - dct_type = get_dct_type(bs, picture, macroblock_modes); + case PIPE_MPEG12_PICTURE_CODING_TYPE_P: + mb.macroblock_type = vl_vlc_get_vlclbf(&bs->vlc, tbl_B3, 6); + break; - switch(macroblock_modes & (MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD)) { - case (MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD): - mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_HALF; - mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_HALF; + case PIPE_MPEG12_PICTURE_CODING_TYPE_B: + mb.macroblock_type = vl_vlc_get_vlclbf(&bs->vlc, tbl_B4, 6); break; default: - mv_fwd.top.field_select = mv_fwd.bottom.field_select = default_field_select; - mv_bwd.top.field_select = mv_bwd.bottom.field_select = default_field_select; + mb.macroblock_type = 0; + /* dumb gcc */ + assert(0); + } - /* fall through */ - case MACROBLOCK_MOTION_FORWARD: - mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX; - mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN; - break; + mb.macroblock_modes.value = 0; + if (mb.macroblock_type & (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD | PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD)) { + if (bs->desc.picture_structure == PIPE_MPEG12_PICTURE_STRUCTURE_FRAME) { + if (bs->desc.frame_pred_frame_dct == 0) + mb.macroblock_modes.bits.frame_motion_type = vl_vlc_get_uimsbf(&bs->vlc, 2); + else + mb.macroblock_modes.bits.frame_motion_type = 2; + } else + mb.macroblock_modes.bits.field_motion_type = vl_vlc_get_uimsbf(&bs->vlc, 2); - case MACROBLOCK_MOTION_BACKWARD: - mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN; - mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX; - break; + } else if ((mb.macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA) && bs->desc.concealment_motion_vectors) { + if (bs->desc.picture_structure == PIPE_MPEG12_PICTURE_STRUCTURE_FRAME) + mb.macroblock_modes.bits.frame_motion_type = 2; + else + mb.macroblock_modes.bits.field_motion_type = 1; } - /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */ - if (macroblock_modes & MACROBLOCK_QUANT) - quantizer_scale = get_quantizer_scale(bs, picture); - - if (macroblock_modes & MACROBLOCK_INTRA) { + if (bs->desc.picture_structure == PIPE_MPEG12_PICTURE_STRUCTURE_FRAME && + bs->desc.frame_pred_frame_dct == 0 && + mb.macroblock_type & (PIPE_MPEG12_MB_TYPE_INTRA | PIPE_MPEG12_MB_TYPE_PATTERN)) + mb.macroblock_modes.bits.dct_type = vl_vlc_get_uimsbf(&bs->vlc, 1); - if (picture->concealment_motion_vectors) { - if (picture->picture_structure == FRAME_PICTURE) - motion_fr_conceal(bs, picture->f_code[0], &mv_fwd); - else - motion_fi_conceal(bs, picture->f_code[0], &mv_fwd); + if (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_QUANT) + dct_scale = quant_scale[bs->desc.q_scale_type][vl_vlc_get_uimsbf(&bs->vlc, 5)]; - } else { - mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0; - mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0; - } - mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN; - mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN; - - // unravaled loop of 6 block(i) calls in macroblock() - slice_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale, dc_dct_pred); - slice_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred); - slice_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred); - slice_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred); - slice_intra_DCT(bs, picture, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred); - slice_intra_DCT(bs, picture, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred); - - if (picture->picture_coding_type == D_TYPE) { - vl_vlc_needbits(&bs->vlc); - vl_vlc_dumpbits(&bs->vlc, 1); - } + if (inc > 1 && bs->desc.picture_coding_type == PIPE_MPEG12_PICTURE_CODING_TYPE_P) + memset(mb.PMV, 0, sizeof(mb.PMV)); - } else { - if (picture->picture_structure == FRAME_PICTURE) - switch (macroblock_modes & MOTION_TYPE_MASK) { - case MC_FRAME: - if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1) { - MOTION_CALL(motion_mp1, macroblock_modes); - } else { - MOTION_CALL(motion_fr_frame, macroblock_modes); - } - break; - - case MC_FIELD: - MOTION_CALL (motion_fr_field, macroblock_modes); - break; - - case MC_DMV: - MOTION_CALL (motion_fr_dmv, MACROBLOCK_MOTION_FORWARD); - break; - - case 0: - /* non-intra mb without forward mv in a P picture */ - mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0; - mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0; - break; - } + mb.motion_vertical_field_select = 0; + if ((mb.macroblock_type & PIPE_MPEG12_MB_TYPE_MOTION_FORWARD) || + (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA && bs->desc.concealment_motion_vectors)) { + if (bs->desc.picture_structure == PIPE_MPEG12_PICTURE_STRUCTURE_FRAME) + motion_vector_frame(bs, 0, &mb); else - switch (macroblock_modes & MOTION_TYPE_MASK) { - case MC_FIELD: - MOTION_CALL (motion_fi_field, macroblock_modes); - break; - - case MC_16X8: - MOTION_CALL (motion_fi_16x8, macroblock_modes); - break; - - case MC_DMV: - MOTION_CALL (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD); - break; - - case 0: - /* non-intra mb without forward mv in a P picture */ - mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0; - mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0; - break; - } - - if (macroblock_modes & MACROBLOCK_PATTERN) { - int coded_block_pattern = get_coded_block_pattern(bs); - - // TODO optimize not fully used for idct accel only mc. - if (coded_block_pattern & 0x20) - slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale); // cc0 luma 0 - if (coded_block_pattern & 0x10) - slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale); // cc0 luma 1 - if (coded_block_pattern & 0x08) - slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale); // cc0 luma 2 - if (coded_block_pattern & 0x04) - slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale); // cc0 luma 3 - if (coded_block_pattern & 0x2) - slice_non_intra_DCT(bs, picture, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc1 croma - if (coded_block_pattern & 0x1) - slice_non_intra_DCT(bs, picture, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc2 croma - } - - dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0; + motion_vector_field(bs, 0, &mb); } - store_motionvectors(bs, &mv_pos, &mv_fwd, &mv_bwd); - if (++x >= bs->width) { - ++y; - if (y >= bs->height) - return false; - x -= bs->width; + if (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD) { + if (bs->desc.picture_structure == PIPE_MPEG12_PICTURE_STRUCTURE_FRAME) + motion_vector_frame(bs, 1, &mb); + else + motion_vector_field(bs, 1, &mb); } - vl_vlc_needbits(&bs->vlc); - mba_inc = 0; - while (1) { - if (bs->vlc.buf >= 0x10000000) { - mba = MBA_5 + (vl_vlc_ubits(&bs->vlc, 5) - 2); - break; - } else if (bs->vlc.buf >= 0x03000000) { - mba = MBA_11 + (vl_vlc_ubits(&bs->vlc, 11) - 24); - break; - } else switch (vl_vlc_ubits(&bs->vlc, 11)) { - case 8: /* macroblock_escape */ - mba_inc += 33; - /* pass through */ - case 15: /* macroblock_stuffing (MPEG1 only) */ - vl_vlc_dumpbits(&bs->vlc, 11); - vl_vlc_needbits(&bs->vlc); - continue; - default: /* end of slice, or error */ - return true; - } + if (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA && bs->desc.concealment_motion_vectors) { + unsigned extra = vl_vlc_get_uimsbf(&bs->vlc, 1); + mb.PMV[1][0][0] = mb.PMV[0][0][0]; + mb.PMV[1][0][1] = mb.PMV[0][0][1]; + assert(extra); + } else if (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA || + !(mb.macroblock_type & (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD | + PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD))) { + memset(mb.PMV, 0, sizeof(mb.PMV)); } - vl_vlc_dumpbits(&bs->vlc, mba->len); - mba_inc += mba->mba; - if (mba_inc) { - //TODO conversion to signed format signed format - dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0; - - mv_fwd.top.field_select = mv_fwd.bottom.field_select = default_field_select; - mv_bwd.top.field_select = mv_bwd.bottom.field_select = default_field_select; - - if (picture->picture_coding_type == P_TYPE) { - mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0; - mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX; - } - x += mba_inc; - do { - store_motionvectors(bs, &mv_pos, &mv_fwd, &mv_bwd); - } while (--mba_inc); + if ((mb.macroblock_type & PIPE_MPEG12_MB_TYPE_MOTION_FORWARD && + mb.macroblock_modes.bits.frame_motion_type == 2) || + (mb.macroblock_modes.bits.frame_motion_type == 3)) { + mb.PMV[1][0][0] = mb.PMV[0][0][0]; + mb.PMV[1][0][1] = mb.PMV[0][0][1]; } - while (x >= bs->width) { - ++y; - if (y >= bs->height) - return false; - x -= bs->width; + + if (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD && + mb.macroblock_modes.bits.frame_motion_type == 2) { + mb.PMV[1][1][0] = mb.PMV[0][1][0]; + mb.PMV[1][1][1] = mb.PMV[0][1][1]; } - } + + if (inc > 1 || !(mb.macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA)) + reset_predictor(bs); + + if (mb.macroblock_type & (PIPE_MPEG12_MB_TYPE_INTRA | PIPE_MPEG12_MB_TYPE_PATTERN)) { + memset(dct_blocks, 0, sizeof(dct_blocks)); + decode_dct(bs, &mb, dct_scale); + } else + mb.coded_block_pattern = 0; + + } while (vl_vlc_bytes_left(&bs->vlc) && vl_vlc_peekbits(&bs->vlc, 23)); + + mb.num_skipped_macroblocks = 0; + bs->decoder->decode_macroblock(bs->decoder, &mb.base, 1); + return true; } void -vl_mpg12_bs_init(struct vl_mpg12_bs *bs, unsigned width, unsigned height) +vl_mpg12_bs_init(struct vl_mpg12_bs *bs, struct pipe_video_decoder *decoder) { + static bool tables_initialized = false; + assert(bs); memset(bs, 0, sizeof(struct vl_mpg12_bs)); - bs->width = width; - bs->height = height; + bs->decoder = decoder; + + if (!tables_initialized) { + init_tables(); + tables_initialized = true; + } } void -vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES], - short *ycbcr_buffer[VL_MAX_PLANES], struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES]) +vl_mpg12_bs_set_picture_desc(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture) { - unsigned i; - - assert(bs); - assert(ycbcr_stream && ycbcr_buffer); - assert(mv_stream); - - for (i = 0; i < VL_MAX_PLANES; ++i) { - bs->ycbcr_stream[i] = ycbcr_stream[i]; - bs->ycbcr_buffer[i] = ycbcr_buffer[i]; - } - for (i = 0; i < VL_MAX_REF_FRAMES; ++i) - bs->mv_stream[i] = mv_stream[i]; - - // TODO - for (i = 0; i < bs->width*bs->height; ++i) { - bs->mv_stream[0][i].top.x = bs->mv_stream[0][i].top.y = 0; - bs->mv_stream[0][i].top.field_select = PIPE_VIDEO_FRAME; - bs->mv_stream[0][i].top.weight = PIPE_VIDEO_MV_WEIGHT_MAX; - bs->mv_stream[0][i].bottom.x = bs->mv_stream[0][i].bottom.y = 0; - bs->mv_stream[0][i].bottom.field_select = PIPE_VIDEO_FRAME; - bs->mv_stream[0][i].bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX; - - bs->mv_stream[1][i].top.x = bs->mv_stream[1][i].top.y = 0; - bs->mv_stream[1][i].top.field_select = PIPE_VIDEO_FRAME; - bs->mv_stream[1][i].top.weight = PIPE_VIDEO_MV_WEIGHT_MIN; - bs->mv_stream[1][i].bottom.x = bs->mv_stream[1][i].bottom.y = 0; - bs->mv_stream[1][i].bottom.field_select = PIPE_VIDEO_FRAME; - bs->mv_stream[1][i].bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN; - } + bs->desc = *picture; + bs->intra_dct_tbl = picture->intra_vlc_format ? tbl_B15 : tbl_B14_AC; } void -vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer, - struct pipe_mpeg12_picture_desc *picture, unsigned num_ycbcr_blocks[3]) +vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const uint8_t *buffer) { assert(bs); - assert(num_ycbcr_blocks); assert(buffer && num_bytes); - bs->num_ycbcr_blocks = num_ycbcr_blocks; + while(num_bytes > 2) { + if (buffer[0] == 0x00 && buffer[1] == 0x00 && buffer[2] == 0x01 && + buffer[3] >= 0x01 && buffer[3] < 0xAF) { + unsigned consumed; + + buffer += 3; + num_bytes -= 3; + + vl_vlc_init(&bs->vlc, buffer, num_bytes); + + if (!decode_slice(bs)) + return; - vl_vlc_init(&bs->vlc, buffer, num_bytes); + /* it's possible for the vlc to consume up to eight extra bytes */ + consumed = num_bytes - vl_vlc_bytes_left(&bs->vlc); + consumed = consumed > 8 ? consumed - 8 : 0; - while(decode_slice(bs, picture)); + /* crap, this is a bug we have consumed more bytes than left in the buffer */ + assert(consumed <= num_bytes); + + num_bytes -= consumed; + buffer += consumed; + + } else { + ++buffer; + --num_bytes; + } + } } diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h index 4e48a9faa2f..c3f14a17932 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h +++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h @@ -33,27 +33,22 @@ struct vl_mpg12_bs { - unsigned width, height; + struct pipe_video_decoder *decoder; - struct vl_vlc vlc; - - unsigned *num_ycbcr_blocks; + struct pipe_mpeg12_picture_desc desc; + struct dct_coeff *intra_dct_tbl; - struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES]; - short *ycbcr_buffer[VL_MAX_PLANES]; - - struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES]; + struct vl_vlc vlc; + short pred_dc[3]; }; void -vl_mpg12_bs_init(struct vl_mpg12_bs *bs, unsigned width, unsigned height); +vl_mpg12_bs_init(struct vl_mpg12_bs *bs, struct pipe_video_decoder *decoder); void -vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES], - short *ycbcr_buffer[VL_MAX_PLANES], struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES]); +vl_mpg12_bs_set_picture_desc(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture); void -vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer, - struct pipe_mpeg12_picture_desc *picture, unsigned num_ycbcr_blocks[3]); +vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const uint8_t *buffer); #endif /* vl_mpeg12_bitstream_h */ diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c index 61d947ca4c8..7d53168afe5 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c @@ -30,6 +30,7 @@ #include <util/u_memory.h> #include <util/u_rect.h> +#include <util/u_sampler.h> #include <util/u_video.h> #include "vl_mpeg12_decoder.h" @@ -75,36 +76,44 @@ static const struct format_config mc_format_config[] = { static const unsigned num_mc_format_configs = sizeof(mc_format_config) / sizeof(struct format_config); +static const unsigned const_empty_block_mask_420[3][2][2] = { + { { 0x20, 0x10 }, { 0x08, 0x04 } }, + { { 0x02, 0x02 }, { 0x02, 0x02 } }, + { { 0x01, 0x01 }, { 0x01, 0x01 } } +}; + static bool -init_zscan_buffer(struct vl_mpeg12_buffer *buffer) +init_zscan_buffer(struct vl_mpeg12_decoder *dec, struct vl_mpeg12_buffer *buffer) { - enum pipe_format formats[3]; - - struct pipe_sampler_view **source; + struct pipe_resource *res, res_tmpl; + struct pipe_sampler_view sv_tmpl; struct pipe_surface **destination; - struct vl_mpeg12_decoder *dec; - unsigned i; - assert(buffer); - - dec = (struct vl_mpeg12_decoder*)buffer->base.decoder; + assert(dec && buffer); - formats[0] = formats[1] = formats[2] = dec->zscan_source_format; - buffer->zscan_source = vl_video_buffer_create_ex - ( - dec->base.context, - dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT, - align(dec->num_blocks, dec->blocks_per_line) / dec->blocks_per_line, - 1, PIPE_VIDEO_CHROMA_FORMAT_444, formats, PIPE_USAGE_STATIC - ); + memset(&res_tmpl, 0, sizeof(res_tmpl)); + res_tmpl.target = PIPE_TEXTURE_2D; + res_tmpl.format = dec->zscan_source_format; + res_tmpl.width0 = dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT; + res_tmpl.height0 = align(dec->num_blocks, dec->blocks_per_line) / dec->blocks_per_line; + res_tmpl.depth0 = 1; + res_tmpl.array_size = 1; + res_tmpl.usage = PIPE_USAGE_STREAM; + res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW; - if (!buffer->zscan_source) + res = dec->base.context->screen->resource_create(dec->base.context->screen, &res_tmpl); + if (!res) goto error_source; - source = buffer->zscan_source->get_sampler_view_planes(buffer->zscan_source); - if (!source) + + memset(&sv_tmpl, 0, sizeof(sv_tmpl)); + u_sampler_view_default_template(&sv_tmpl, res, res->format); + sv_tmpl.swizzle_r = sv_tmpl.swizzle_g = sv_tmpl.swizzle_b = sv_tmpl.swizzle_a = PIPE_SWIZZLE_RED; + buffer->zscan_source = dec->base.context->create_sampler_view(dec->base.context, res, &sv_tmpl); + pipe_resource_reference(&res, NULL); + if (!buffer->zscan_source) goto error_sampler; if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) @@ -117,7 +126,7 @@ init_zscan_buffer(struct vl_mpeg12_buffer *buffer) for (i = 0; i < VL_MAX_PLANES; ++i) if (!vl_zscan_init_buffer(i == 0 ? &dec->zscan_y : &dec->zscan_c, - &buffer->zscan[i], source[i], destination[i])) + &buffer->zscan[i], buffer->zscan_source, destination[i])) goto error_plane; return true; @@ -128,7 +137,7 @@ error_plane: error_surface: error_sampler: - buffer->zscan_source->destroy(buffer->zscan_source); + pipe_sampler_view_reference(&buffer->zscan_source, NULL); error_source: return false; @@ -143,21 +152,18 @@ cleanup_zscan_buffer(struct vl_mpeg12_buffer *buffer) for (i = 0; i < VL_MAX_PLANES; ++i) vl_zscan_cleanup_buffer(&buffer->zscan[i]); - buffer->zscan_source->destroy(buffer->zscan_source); + + pipe_sampler_view_reference(&buffer->zscan_source, NULL); } static bool -init_idct_buffer(struct vl_mpeg12_buffer *buffer) +init_idct_buffer(struct vl_mpeg12_decoder *dec, struct vl_mpeg12_buffer *buffer) { struct pipe_sampler_view **idct_source_sv, **mc_source_sv; - struct vl_mpeg12_decoder *dec; - unsigned i; - assert(buffer); - - dec = (struct vl_mpeg12_decoder*)buffer->base.decoder; + assert(dec && buffer); idct_source_sv = dec->idct_source->get_sampler_view_planes(dec->idct_source); if (!idct_source_sv) @@ -187,27 +193,18 @@ error_source_sv: static void cleanup_idct_buffer(struct vl_mpeg12_buffer *buf) { - struct vl_mpeg12_decoder *dec; unsigned i; assert(buf); - dec = (struct vl_mpeg12_decoder*)buf->base.decoder; - assert(dec); - for (i = 0; i < 3; ++i) vl_idct_cleanup_buffer(&buf->idct[0]); } static bool -init_mc_buffer(struct vl_mpeg12_buffer *buf) +init_mc_buffer(struct vl_mpeg12_decoder *dec, struct vl_mpeg12_buffer *buf) { - struct vl_mpeg12_decoder *dec; - - assert(buf); - - dec = (struct vl_mpeg12_decoder*)buf->base.decoder; - assert(dec); + assert(dec && buf); if(!vl_mc_init_buffer(&dec->mc_y, &buf->mc[0])) goto error_mc_y; @@ -241,183 +238,148 @@ cleanup_mc_buffer(struct vl_mpeg12_buffer *buf) vl_mc_cleanup_buffer(&buf->mc[i]); } -static void -vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer) +static INLINE void +MacroBlockTypeToPipeWeights(const struct pipe_mpeg12_macroblock *mb, unsigned weights[2]) { - struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; - struct vl_mpeg12_decoder *dec; + assert(mb); - assert(buf); - - dec = (struct vl_mpeg12_decoder*)buf->base.decoder; - assert(dec); - - cleanup_zscan_buffer(buf); - - if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) - cleanup_idct_buffer(buf); - - cleanup_mc_buffer(buf); - - vl_vb_cleanup(&buf->vertex_stream); - - FREE(buf); -} - -static void -vl_mpeg12_buffer_begin_frame(struct pipe_video_decode_buffer *buffer) -{ - struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; - struct vl_mpeg12_decoder *dec; - - struct pipe_sampler_view **sampler_views; - unsigned i; - - assert(buf); - - dec = (struct vl_mpeg12_decoder *)buf->base.decoder; - assert(dec); - - vl_vb_map(&buf->vertex_stream, dec->base.context); - - sampler_views = buf->zscan_source->get_sampler_view_planes(buf->zscan_source); - - assert(sampler_views); - - for (i = 0; i < VL_MAX_PLANES; ++i) { - struct pipe_resource *tex = sampler_views[i]->texture; - struct pipe_box rect = - { - 0, 0, 0, - tex->width0, - tex->height0, - 1 - }; - - buf->tex_transfer[i] = dec->base.context->get_transfer - ( - dec->base.context, tex, - 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, - &rect - ); - - buf->texels[i] = dec->base.context->transfer_map(dec->base.context, buf->tex_transfer[i]); - } - - if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) { - struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES]; - struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES]; - - for (i = 0; i < VL_MAX_PLANES; ++i) - ycbcr_stream[i] = vl_vb_get_ycbcr_stream(&buf->vertex_stream, i); + switch (mb->macroblock_type & (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD | PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD)) { + case PIPE_MPEG12_MB_TYPE_MOTION_FORWARD: + weights[0] = PIPE_VIDEO_MV_WEIGHT_MAX; + weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN; + break; - for (i = 0; i < VL_MAX_REF_FRAMES; ++i) - mv_stream[i] = vl_vb_get_mv_stream(&buf->vertex_stream, i); + case (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD | PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD): + weights[0] = PIPE_VIDEO_MV_WEIGHT_HALF; + weights[1] = PIPE_VIDEO_MV_WEIGHT_HALF; + break; - vl_mpg12_bs_set_buffers(&buf->bs, ycbcr_stream, buf->texels, mv_stream); - } else { + case PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD: + weights[0] = PIPE_VIDEO_MV_WEIGHT_MIN; + weights[1] = PIPE_VIDEO_MV_WEIGHT_MAX; + break; - for (i = 0; i < VL_MAX_PLANES; ++i) - vl_zscan_set_layout(&buf->zscan[i], dec->zscan_linear); + default: + if (mb->macroblock_type & PIPE_MPEG12_MB_TYPE_PATTERN) { + /* patern without a motion vector, just copy the old frame content */ + weights[0] = PIPE_VIDEO_MV_WEIGHT_MAX; + weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN; + } else { + weights[0] = PIPE_VIDEO_MV_WEIGHT_MIN; + weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN; + } + break; } } -static void -vl_mpeg12_buffer_set_quant_matrix(struct pipe_video_decode_buffer *buffer, - const uint8_t intra_matrix[64], - const uint8_t non_intra_matrix[64]) +static INLINE struct vl_motionvector +MotionVectorToPipe(const struct pipe_mpeg12_macroblock *mb, unsigned vector, + unsigned field_select_mask, unsigned weight) { - struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; - unsigned i; + struct vl_motionvector mv; + + assert(mb); + + if (mb->macroblock_type & (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD | PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD)) { + switch (mb->macroblock_modes.bits.frame_motion_type) { + case PIPE_MPEG12_MO_TYPE_FRAME: + mv.top.x = mb->PMV[0][vector][0]; + mv.top.y = mb->PMV[0][vector][1]; + mv.top.field_select = PIPE_VIDEO_FRAME; + mv.top.weight = weight; + + mv.bottom.x = mb->PMV[0][vector][0]; + mv.bottom.y = mb->PMV[0][vector][1]; + mv.bottom.weight = weight; + mv.bottom.field_select = PIPE_VIDEO_FRAME; + break; + + case PIPE_MPEG12_MO_TYPE_FIELD: + mv.top.x = mb->PMV[0][vector][0]; + mv.top.y = mb->PMV[0][vector][1]; + mv.top.field_select = (mb->motion_vertical_field_select & field_select_mask) ? + PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD; + mv.top.weight = weight; + + mv.bottom.x = mb->PMV[1][vector][0]; + mv.bottom.y = mb->PMV[1][vector][1]; + mv.bottom.field_select = (mb->motion_vertical_field_select & (field_select_mask << 2)) ? + PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD; + mv.bottom.weight = weight; + break; + + default: // TODO: Support DUALPRIME and 16x8 + break; + } + } else { + mv.top.x = mv.top.y = 0; + mv.top.field_select = PIPE_VIDEO_FRAME; + mv.top.weight = weight; - for (i = 0; i < VL_MAX_PLANES; ++i) { - vl_zscan_upload_quant(&buf->zscan[i], intra_matrix, true); - vl_zscan_upload_quant(&buf->zscan[i], non_intra_matrix, false); + mv.bottom.x = mv.bottom.y = 0; + mv.bottom.field_select = PIPE_VIDEO_FRAME; + mv.bottom.weight = weight; } + return mv; } -static struct pipe_ycbcr_block * -vl_mpeg12_buffer_get_ycbcr_stream(struct pipe_video_decode_buffer *buffer, int component) -{ - struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; - - assert(buf); - - return vl_vb_get_ycbcr_stream(&buf->vertex_stream, component); -} - -static short * -vl_mpeg12_buffer_get_ycbcr_buffer(struct pipe_video_decode_buffer *buffer, int component) -{ - struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; - - assert(buf); - assert(component < VL_MAX_PLANES); - - return buf->texels[component]; -} - -static unsigned -vl_mpeg12_buffer_get_mv_stream_stride(struct pipe_video_decode_buffer *buffer) +static INLINE void +UploadYcbcrBlocks(struct vl_mpeg12_decoder *dec, + struct vl_mpeg12_buffer *buf, + const struct pipe_mpeg12_macroblock *mb) { - struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; + unsigned intra; + unsigned tb, x, y, num_blocks = 0; - assert(buf); + assert(dec && buf); + assert(mb); - return vl_vb_get_mv_stream_stride(&buf->vertex_stream); -} - -static struct pipe_motionvector * -vl_mpeg12_buffer_get_mv_stream(struct pipe_video_decode_buffer *buffer, int ref_frame) -{ - struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; - - assert(buf); + if (!mb->coded_block_pattern) + return; - return vl_vb_get_mv_stream(&buf->vertex_stream, ref_frame); -} + intra = mb->macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA ? 1 : 0; -static void -vl_mpeg12_buffer_decode_bitstream(struct pipe_video_decode_buffer *buffer, - unsigned num_bytes, const void *data, - struct pipe_picture_desc *picture, - unsigned num_ycbcr_blocks[3]) -{ - struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; - struct pipe_mpeg12_picture_desc *pic = (struct pipe_mpeg12_picture_desc *)picture; - - struct vl_mpeg12_decoder *dec; - unsigned i; + for (y = 0; y < 2; ++y) { + for (x = 0; x < 2; ++x, ++tb) { + if (mb->coded_block_pattern & const_empty_block_mask_420[0][y][x]) { - assert(buf); + struct vl_ycbcr_block *stream = buf->ycbcr_stream[0]; + stream->x = mb->x * 2 + x; + stream->y = mb->y * 2 + y; + stream->intra = intra; + stream->coding = mb->macroblock_modes.bits.dct_type; + stream->block_num = buf->block_num++; - dec = (struct vl_mpeg12_decoder *)buf->base.decoder; - assert(dec); + buf->num_ycbcr_blocks[0]++; + buf->ycbcr_stream[0]++; - for (i = 0; i < VL_MAX_PLANES; ++i) - vl_zscan_set_layout(&buf->zscan[i], pic->alternate_scan ? dec->zscan_alternate : dec->zscan_normal); - - vl_mpg12_bs_decode(&buf->bs, num_bytes, data, pic, num_ycbcr_blocks); -} + num_blocks++; + } + } + } -static void -vl_mpeg12_buffer_end_frame(struct pipe_video_decode_buffer *buffer) -{ - struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; - struct vl_mpeg12_decoder *dec; - unsigned i; + /* TODO: Implement 422, 444 */ + //assert(ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420); - assert(buf); + for (tb = 1; tb < 3; ++tb) { + if (mb->coded_block_pattern & const_empty_block_mask_420[tb][0][0]) { - dec = (struct vl_mpeg12_decoder *)buf->base.decoder; - assert(dec); + struct vl_ycbcr_block *stream = buf->ycbcr_stream[tb]; + stream->x = mb->x; + stream->y = mb->y; + stream->intra = intra; + stream->coding = 0; + stream->block_num = buf->block_num++; - vl_vb_unmap(&buf->vertex_stream, dec->base.context); + buf->num_ycbcr_blocks[tb]++; + buf->ycbcr_stream[tb]++; - for (i = 0; i < VL_MAX_PLANES; ++i) { - dec->base.context->transfer_unmap(dec->base.context, buf->tex_transfer[i]); - dec->base.context->transfer_destroy(dec->base.context, buf->tex_transfer[i]); + num_blocks++; + } } + + memcpy(buf->texels, mb->blocks, 64 * sizeof(short) * num_blocks); + buf->texels += 64 * num_blocks; } static void @@ -452,7 +414,6 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder) pipe_resource_reference(&dec->quads.buffer, NULL); pipe_resource_reference(&dec->pos.buffer, NULL); - pipe_resource_reference(&dec->block_num.buffer, NULL); pipe_sampler_view_reference(&dec->zscan_linear, NULL); pipe_sampler_view_reference(&dec->zscan_normal, NULL); @@ -461,7 +422,7 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder) FREE(dec); } -static struct pipe_video_decode_buffer * +static void * vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder) { struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder; @@ -473,38 +434,25 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder) if (buffer == NULL) return NULL; - buffer->base.decoder = decoder; - buffer->base.destroy = vl_mpeg12_buffer_destroy; - buffer->base.begin_frame = vl_mpeg12_buffer_begin_frame; - buffer->base.set_quant_matrix = vl_mpeg12_buffer_set_quant_matrix; - buffer->base.get_ycbcr_stream = vl_mpeg12_buffer_get_ycbcr_stream; - buffer->base.get_ycbcr_buffer = vl_mpeg12_buffer_get_ycbcr_buffer; - buffer->base.get_mv_stream_stride = vl_mpeg12_buffer_get_mv_stream_stride; - buffer->base.get_mv_stream = vl_mpeg12_buffer_get_mv_stream; - buffer->base.decode_bitstream = vl_mpeg12_buffer_decode_bitstream; - buffer->base.end_frame = vl_mpeg12_buffer_end_frame; - if (!vl_vb_init(&buffer->vertex_stream, dec->base.context, dec->base.width / MACROBLOCK_WIDTH, dec->base.height / MACROBLOCK_HEIGHT)) goto error_vertex_buffer; - if (!init_mc_buffer(buffer)) + if (!init_mc_buffer(dec, buffer)) goto error_mc; if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) - if (!init_idct_buffer(buffer)) + if (!init_idct_buffer(dec, buffer)) goto error_idct; - if (!init_zscan_buffer(buffer)) + if (!init_zscan_buffer(dec, buffer)) goto error_zscan; if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) - vl_mpg12_bs_init(&buffer->bs, - dec->base.width / MACROBLOCK_WIDTH, - dec->base.height / MACROBLOCK_HEIGHT); + vl_mpg12_bs_init(&buffer->bs, decoder); - return &buffer->base; + return buffer; error_zscan: if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) @@ -522,76 +470,307 @@ error_vertex_buffer: } static void -vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer, - unsigned num_ycbcr_blocks[3], - struct pipe_video_buffer *refs[2], - struct pipe_video_buffer *dst) +vl_mpeg12_destroy_buffer(struct pipe_video_decoder *decoder, void *buffer) { - struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer *)buffer; - struct vl_mpeg12_decoder *dec; + struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder; + struct vl_mpeg12_buffer *buf = buffer; + + assert(dec && buf); + + cleanup_zscan_buffer(buf); + + if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) + cleanup_idct_buffer(buf); + + cleanup_mc_buffer(buf); + + vl_vb_cleanup(&buf->vertex_stream); + + FREE(buf); +} + +static void +vl_mpeg12_set_decode_buffer(struct pipe_video_decoder *decoder, void *buffer) +{ + struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; + + assert(dec && buffer); + + dec->current_buffer = buffer; +} + +static void +vl_mpeg12_set_picture_parameters(struct pipe_video_decoder *decoder, + struct pipe_picture_desc *picture) +{ + struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; + struct pipe_mpeg12_picture_desc *pic = (struct pipe_mpeg12_picture_desc *)picture; + + assert(dec && pic); + + dec->picture_desc = *pic; +} + +static void +vl_mpeg12_set_quant_matrix(struct pipe_video_decoder *decoder, + const struct pipe_quant_matrix *matrix) +{ + struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; + const struct pipe_mpeg12_quant_matrix *m = (const struct pipe_mpeg12_quant_matrix *)matrix; + + assert(dec); + assert(matrix->codec == PIPE_VIDEO_CODEC_MPEG12); + + memcpy(dec->intra_matrix, m->intra_matrix, 64); + memcpy(dec->non_intra_matrix, m->non_intra_matrix, 64); +} - struct pipe_sampler_view **sv[VL_MAX_REF_FRAMES], **mc_source_sv; +static void +vl_mpeg12_set_decode_target(struct pipe_video_decoder *decoder, + struct pipe_video_buffer *target) +{ + struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; struct pipe_surface **surfaces; + unsigned i; + + assert(dec); + + surfaces = target->get_surfaces(target); + for (i = 0; i < VL_MAX_PLANES; ++i) + pipe_surface_reference(&dec->target_surfaces[i], surfaces[i]); +} + +static void +vl_mpeg12_set_reference_frames(struct pipe_video_decoder *decoder, + struct pipe_video_buffer **ref_frames, + unsigned num_ref_frames) +{ + struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; + struct pipe_sampler_view **sv; + unsigned i,j; + + assert(dec); + assert(num_ref_frames <= VL_MAX_REF_FRAMES); + + for (i = 0; i < num_ref_frames; ++i) { + sv = ref_frames[i]->get_sampler_view_planes(ref_frames[i]); + for (j = 0; j < VL_MAX_PLANES; ++j) + pipe_sampler_view_reference(&dec->ref_frames[i][j], sv[j]); + } + + for (; i < VL_MAX_REF_FRAMES; ++i) + for (j = 0; j < VL_MAX_PLANES; ++j) + pipe_sampler_view_reference(&dec->ref_frames[i][j], NULL); +} + +static void +vl_mpeg12_begin_frame(struct pipe_video_decoder *decoder) +{ + struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; + struct vl_mpeg12_buffer *buf; + + struct pipe_resource *tex; + struct pipe_box rect = { 0, 0, 0, 1, 1, 1 }; + + unsigned i; + + assert(dec); + + buf = dec->current_buffer; + assert(buf); + + if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) + dec->intra_matrix[0] = 1 << (7 - dec->picture_desc.intra_dc_precision); + + for (i = 0; i < VL_MAX_PLANES; ++i) { + vl_zscan_upload_quant(&buf->zscan[i], dec->intra_matrix, true); + vl_zscan_upload_quant(&buf->zscan[i], dec->non_intra_matrix, false); + } + + vl_vb_map(&buf->vertex_stream, dec->base.context); + + tex = buf->zscan_source->texture; + rect.width = tex->width0; + rect.height = tex->height0; + + buf->tex_transfer = dec->base.context->get_transfer + ( + dec->base.context, tex, + 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, + &rect + ); + + buf->block_num = 0; + buf->texels = dec->base.context->transfer_map(dec->base.context, buf->tex_transfer); + + for (i = 0; i < VL_MAX_PLANES; ++i) { + buf->ycbcr_stream[i] = vl_vb_get_ycbcr_stream(&buf->vertex_stream, i); + buf->num_ycbcr_blocks[i] = 0; + } + + for (i = 0; i < VL_MAX_REF_FRAMES; ++i) + buf->mv_stream[i] = vl_vb_get_mv_stream(&buf->vertex_stream, i); + + if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) { + vl_mpg12_bs_set_picture_desc(&buf->bs, &dec->picture_desc); + + } else { + + for (i = 0; i < VL_MAX_PLANES; ++i) + vl_zscan_set_layout(&buf->zscan[i], dec->zscan_linear); + } +} + +static void +vl_mpeg12_decode_macroblock(struct pipe_video_decoder *decoder, + const struct pipe_macroblock *macroblocks, + unsigned num_macroblocks) +{ + struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; + const struct pipe_mpeg12_macroblock *mb = (const struct pipe_mpeg12_macroblock *)macroblocks; + struct vl_mpeg12_buffer *buf; + + unsigned i, j, mv_weights[2]; + + assert(dec && dec->current_buffer); + assert(macroblocks && macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12); + + buf = dec->current_buffer; + assert(buf); + + for (; num_macroblocks > 0; --num_macroblocks) { + unsigned mb_addr = mb->y * dec->width_in_macroblocks + mb->x; + + if (mb->macroblock_type & (PIPE_MPEG12_MB_TYPE_PATTERN | PIPE_MPEG12_MB_TYPE_INTRA)) + UploadYcbcrBlocks(dec, buf, mb); + + MacroBlockTypeToPipeWeights(mb, mv_weights); + + for (i = 0; i < 2; ++i) { + if (!dec->ref_frames[i][0]) continue; + + buf->mv_stream[i][mb_addr] = MotionVectorToPipe + ( + mb, i, + i ? PIPE_MPEG12_FS_FIRST_BACKWARD : PIPE_MPEG12_FS_FIRST_FORWARD, + mv_weights[i] + ); + } + + /* see section 7.6.6 of the spec */ + if (mb->num_skipped_macroblocks > 0) { + struct vl_motionvector skipped_mv[2]; + + if (dec->ref_frames[0][0] && !dec->ref_frames[1][0]) { + skipped_mv[0].top.x = skipped_mv[0].top.y = 0; + skipped_mv[0].top.weight = PIPE_VIDEO_MV_WEIGHT_MAX; + } else { + skipped_mv[0] = buf->mv_stream[0][mb_addr]; + skipped_mv[1] = buf->mv_stream[1][mb_addr]; + } + skipped_mv[0].top.field_select = PIPE_VIDEO_FRAME; + skipped_mv[1].top.field_select = PIPE_VIDEO_FRAME; + + skipped_mv[0].bottom = skipped_mv[0].top; + skipped_mv[1].bottom = skipped_mv[1].top; + + ++mb_addr; + for (i = 0; i < mb->num_skipped_macroblocks; ++i, ++mb_addr) { + for (j = 0; j < 2; ++j) { + if (!dec->ref_frames[j][0]) continue; + buf->mv_stream[j][mb_addr] = skipped_mv[j]; + + } + } + } + + ++mb; + } +} + +static void +vl_mpeg12_decode_bitstream(struct pipe_video_decoder *decoder, + unsigned num_bytes, const void *data) +{ + struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; + struct vl_mpeg12_buffer *buf; + + unsigned i; + + assert(dec && dec->current_buffer); + buf = dec->current_buffer; + assert(buf); + + for (i = 0; i < VL_MAX_PLANES; ++i) + vl_zscan_set_layout(&buf->zscan[i], dec->picture_desc.alternate_scan ? + dec->zscan_alternate : dec->zscan_normal); + + vl_mpg12_bs_decode(&buf->bs, num_bytes, data); +} + +static void +vl_mpeg12_end_frame(struct pipe_video_decoder *decoder) +{ + struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; + struct pipe_sampler_view **mc_source_sv; struct pipe_vertex_buffer vb[3]; + struct vl_mpeg12_buffer *buf; unsigned i, j, component; unsigned nr_components; - assert(buf); + assert(dec && dec->current_buffer); - dec = (struct vl_mpeg12_decoder *)buf->base.decoder; - assert(dec); + buf = dec->current_buffer; + + vl_vb_unmap(&buf->vertex_stream, dec->base.context); - for (i = 0; i < 2; ++i) - sv[i] = refs[i] ? refs[i]->get_sampler_view_planes(refs[i]) : NULL; + dec->base.context->transfer_unmap(dec->base.context, buf->tex_transfer); + dec->base.context->transfer_destroy(dec->base.context, buf->tex_transfer); vb[0] = dec->quads; vb[1] = dec->pos; - surfaces = dst->get_surfaces(dst); - dec->base.context->bind_vertex_elements_state(dec->base.context, dec->ves_mv); for (i = 0; i < VL_MAX_PLANES; ++i) { - if (!surfaces[i]) continue; + if (!dec->target_surfaces[i]) continue; - vl_mc_set_surface(&buf->mc[i], surfaces[i]); + vl_mc_set_surface(&buf->mc[i], dec->target_surfaces[i]); for (j = 0; j < VL_MAX_REF_FRAMES; ++j) { - if (!sv[j]) continue; + if (!dec->ref_frames[j][i]) continue; vb[2] = vl_vb_get_mv(&buf->vertex_stream, j);; dec->base.context->set_vertex_buffers(dec->base.context, 3, vb); - vl_mc_render_ref(&buf->mc[i], sv[j][i]); + vl_mc_render_ref(&buf->mc[i], dec->ref_frames[j][i]); } } - vb[2] = dec->block_num; - dec->base.context->bind_vertex_elements_state(dec->base.context, dec->ves_ycbcr); for (i = 0; i < VL_MAX_PLANES; ++i) { - if (!num_ycbcr_blocks[i]) continue; + if (!buf->num_ycbcr_blocks[i]) continue; vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, i); - dec->base.context->set_vertex_buffers(dec->base.context, 3, vb); + dec->base.context->set_vertex_buffers(dec->base.context, 2, vb); - vl_zscan_render(&buf->zscan[i] , num_ycbcr_blocks[i]); + vl_zscan_render(&buf->zscan[i] , buf->num_ycbcr_blocks[i]); if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) - vl_idct_flush(&buf->idct[i], num_ycbcr_blocks[i]); + vl_idct_flush(&buf->idct[i], buf->num_ycbcr_blocks[i]); } mc_source_sv = dec->mc_source->get_sampler_view_planes(dec->mc_source); for (i = 0, component = 0; i < VL_MAX_PLANES; ++i) { - if (!surfaces[i]) continue; + if (!dec->target_surfaces[i]) continue; - nr_components = util_format_get_nr_components(surfaces[i]->texture->format); + nr_components = util_format_get_nr_components(dec->target_surfaces[i]->texture->format); for (j = 0; j < nr_components; ++j, ++component) { - if (!num_ycbcr_blocks[i]) continue; + if (!buf->num_ycbcr_blocks[i]) continue; vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, component); - dec->base.context->set_vertex_buffers(dec->base.context, 3, vb); + dec->base.context->set_vertex_buffers(dec->base.context, 2, vb); if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) vl_idct_prepare_stage2(&buf->idct[component]); @@ -599,11 +778,19 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer, dec->base.context->set_fragment_sampler_views(dec->base.context, 1, &mc_source_sv[component]); dec->base.context->bind_fragment_sampler_states(dec->base.context, 1, &dec->sampler_ycbcr); } - vl_mc_render_ycbcr(&buf->mc[i], j, num_ycbcr_blocks[component]); + vl_mc_render_ycbcr(&buf->mc[i], j, buf->num_ycbcr_blocks[component]); } } } +static void +vl_mpeg12_flush(struct pipe_video_decoder *decoder) +{ + assert(decoder); + + //Noop, for shaders it is much faster to flush everything in end_frame +} + static bool init_pipe_state(struct vl_mpeg12_decoder *dec) { @@ -870,21 +1057,21 @@ vl_create_mpeg12_decoder(struct pipe_context *context, dec->base.destroy = vl_mpeg12_destroy; dec->base.create_buffer = vl_mpeg12_create_buffer; - dec->base.flush_buffer = vl_mpeg12_decoder_flush_buffer; + dec->base.destroy_buffer = vl_mpeg12_destroy_buffer; + dec->base.set_decode_buffer = vl_mpeg12_set_decode_buffer; + dec->base.set_picture_parameters = vl_mpeg12_set_picture_parameters; + dec->base.set_quant_matrix = vl_mpeg12_set_quant_matrix; + dec->base.set_decode_target = vl_mpeg12_set_decode_target; + dec->base.set_reference_frames = vl_mpeg12_set_reference_frames; + dec->base.begin_frame = vl_mpeg12_begin_frame; + dec->base.decode_macroblock = vl_mpeg12_decode_macroblock; + dec->base.decode_bitstream = vl_mpeg12_decode_bitstream; + dec->base.end_frame = vl_mpeg12_end_frame; + dec->base.flush = vl_mpeg12_flush; dec->blocks_per_line = MAX2(util_next_power_of_two(dec->base.width) / block_size_pixels, 4); dec->num_blocks = (dec->base.width * dec->base.height) / block_size_pixels; - - dec->quads = vl_vb_upload_quads(dec->base.context); - dec->pos = vl_vb_upload_pos( - dec->base.context, - dec->base.width / MACROBLOCK_WIDTH, - dec->base.height / MACROBLOCK_HEIGHT - ); - dec->block_num = vl_vb_upload_block_num(dec->base.context, dec->num_blocks); - - dec->ves_ycbcr = vl_vb_get_ves_ycbcr(dec->base.context); - dec->ves_mv = vl_vb_get_ves_mv(dec->base.context); + dec->width_in_macroblocks = align(dec->base.width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH; /* TODO: Implement 422, 444 */ assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420); @@ -892,14 +1079,27 @@ vl_create_mpeg12_decoder(struct pipe_context *context, if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) { dec->chroma_width = dec->base.width / 2; dec->chroma_height = dec->base.height / 2; + dec->num_blocks = dec->num_blocks * 2; } else if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) { dec->chroma_width = dec->base.width; dec->chroma_height = dec->base.height / 2; + dec->num_blocks = dec->num_blocks * 2 + dec->num_blocks; } else { dec->chroma_width = dec->base.width; dec->chroma_height = dec->base.height; + dec->num_blocks = dec->num_blocks * 3; } + dec->quads = vl_vb_upload_quads(dec->base.context); + dec->pos = vl_vb_upload_pos( + dec->base.context, + dec->base.width / MACROBLOCK_WIDTH, + dec->base.height / MACROBLOCK_HEIGHT + ); + + dec->ves_ycbcr = vl_vb_get_ves_ycbcr(dec->base.context); + dec->ves_mv = vl_vb_get_ves_mv(dec->base.context); + switch (entrypoint) { case PIPE_VIDEO_ENTRYPOINT_BITSTREAM: format_config = find_format_config(dec, bitstream_format_config, num_bitstream_format_configs); @@ -946,6 +1146,9 @@ vl_create_mpeg12_decoder(struct pipe_context *context, if (!init_pipe_state(dec)) goto error_pipe_state; + memset(dec->intra_matrix, 0x10, 64); + memset(dec->non_intra_matrix, 0x10, 64); + return &dec->base; error_pipe_state: diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h index 01265e368a3..4a8d65335f6 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h @@ -49,12 +49,12 @@ struct vl_mpeg12_decoder unsigned blocks_per_line; unsigned num_blocks; + unsigned width_in_macroblocks; enum pipe_format zscan_source_format; struct pipe_vertex_buffer quads; struct pipe_vertex_buffer pos; - struct pipe_vertex_buffer block_num; void *ves_ycbcr; void *ves_mv; @@ -73,23 +73,34 @@ struct vl_mpeg12_decoder struct vl_mc mc_y, mc_c; void *dsa; + + struct vl_mpeg12_buffer *current_buffer; + struct pipe_mpeg12_picture_desc picture_desc; + uint8_t intra_matrix[64]; + uint8_t non_intra_matrix[64]; + struct pipe_sampler_view *ref_frames[VL_MAX_REF_FRAMES][VL_MAX_PLANES]; + struct pipe_surface *target_surfaces[VL_MAX_PLANES]; }; struct vl_mpeg12_buffer { - struct pipe_video_decode_buffer base; - struct vl_vertex_buffer vertex_stream; - struct pipe_video_buffer *zscan_source; + unsigned block_num; + unsigned num_ycbcr_blocks[3]; + + struct pipe_sampler_view *zscan_source; struct vl_mpg12_bs bs; struct vl_zscan_buffer zscan[VL_MAX_PLANES]; struct vl_idct_buffer idct[VL_MAX_PLANES]; struct vl_mc_buffer mc[VL_MAX_PLANES]; - struct pipe_transfer *tex_transfer[VL_MAX_PLANES]; - short *texels[VL_MAX_PLANES]; + struct pipe_transfer *tex_transfer; + short *texels; + + struct vl_ycbcr_block *ycbcr_stream[VL_MAX_PLANES]; + struct vl_motionvector *mv_stream[VL_MAX_REF_FRAMES]; }; /** diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c index c0f1449bf80..281db8018eb 100644 --- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c +++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c @@ -125,49 +125,6 @@ vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height) return pos; } -struct pipe_vertex_buffer -vl_vb_upload_block_num(struct pipe_context *pipe, unsigned num_blocks) -{ - struct pipe_vertex_buffer buf; - struct pipe_transfer *buf_transfer; - struct vertex2s *v; - unsigned i; - - assert(pipe); - - /* create buffer */ - buf.stride = sizeof(struct vertex2s); - buf.buffer_offset = 0; - buf.buffer = pipe_buffer_create - ( - pipe->screen, - PIPE_BIND_VERTEX_BUFFER, - PIPE_USAGE_STATIC, - sizeof(struct vertex2s) * num_blocks - ); - - if(!buf.buffer) - return buf; - - /* and fill it */ - v = pipe_buffer_map - ( - pipe, - buf.buffer, - PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, - &buf_transfer - ); - - for ( i = 0; i < num_blocks; ++i, ++v) { - v->x = i; - v->y = i; - } - - pipe_buffer_unmap(pipe, buf_transfer); - - return buf; -} - static struct pipe_vertex_element vl_vb_get_quad_vertex_element(void) { @@ -211,12 +168,10 @@ vl_vb_get_ves_ycbcr(struct pipe_context *pipe) /* Position element */ vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R8G8B8A8_USCALED; - vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 1, 1); - /* block num element */ - vertex_elems[VS_I_BLOCK_NUM].src_format = PIPE_FORMAT_R16G16_SSCALED; + vertex_elems[VS_I_BLOCK_NUM].src_format = PIPE_FORMAT_R32_FLOAT; - vl_vb_element_helper(&vertex_elems[VS_I_BLOCK_NUM], 1, 2); + vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 2, 1); return pipe->create_vertex_elements_state(pipe, 3, vertex_elems); } @@ -266,7 +221,7 @@ vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, pipe->screen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM, - sizeof(struct pipe_ycbcr_block) * size * 4 + sizeof(struct vl_ycbcr_block) * size * 4 ); if (!buffer->ycbcr[i].resource) goto error_ycbcr; @@ -278,7 +233,7 @@ vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, pipe->screen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM, - sizeof(struct pipe_motionvector) * size + sizeof(struct vl_motionvector) * size ); if (!buffer->mv[i].resource) goto error_mv; @@ -310,7 +265,7 @@ vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component) assert(buffer); - buf.stride = sizeof(struct pipe_ycbcr_block); + buf.stride = sizeof(struct vl_ycbcr_block); buf.buffer_offset = 0; buf.buffer = buffer->ycbcr[component].resource; @@ -324,7 +279,7 @@ vl_vb_get_mv(struct vl_vertex_buffer *buffer, int motionvector) assert(buffer); - buf.stride = sizeof(struct pipe_motionvector); + buf.stride = sizeof(struct vl_motionvector); buf.buffer_offset = 0; buf.buffer = buffer->mv[motionvector].resource; @@ -360,7 +315,7 @@ vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe) } -struct pipe_ycbcr_block * +struct vl_ycbcr_block * vl_vb_get_ycbcr_stream(struct vl_vertex_buffer *buffer, int component) { assert(buffer); @@ -377,7 +332,7 @@ vl_vb_get_mv_stream_stride(struct vl_vertex_buffer *buffer) return buffer->width; } -struct pipe_motionvector * +struct vl_motionvector * vl_vb_get_mv_stream(struct vl_vertex_buffer *buffer, int ref_frame) { assert(buffer); diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h index 74845a42b69..874ecce9041 100644 --- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h +++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h @@ -52,20 +52,56 @@ enum VS_INPUT NUM_VS_INPUTS = 4 }; +enum vl_mv_weight +{ + PIPE_VIDEO_MV_WEIGHT_MIN = 0, + PIPE_VIDEO_MV_WEIGHT_HALF = 128, + PIPE_VIDEO_MV_WEIGHT_MAX = 256 +}; + +enum vl_field_select +{ + PIPE_VIDEO_FRAME = 0, + PIPE_VIDEO_TOP_FIELD = 1, + PIPE_VIDEO_BOTTOM_FIELD = 3, + + /* TODO + PIPE_VIDEO_DUALPRIME + PIPE_VIDEO_16x8 + */ +}; + +struct vl_motionvector +{ + struct { + int16_t x, y; + int16_t field_select; /**< enum pipe_video_field_select */ + int16_t weight; /**< enum pipe_video_mv_weight */ + } top, bottom; +}; + +struct vl_ycbcr_block +{ + uint8_t x, y; + uint8_t intra; + uint8_t coding; + float block_num; +}; + struct vl_vertex_buffer { unsigned width, height; struct { - struct pipe_resource *resource; - struct pipe_transfer *transfer; - struct pipe_ycbcr_block *vertex_stream; + struct pipe_resource *resource; + struct pipe_transfer *transfer; + struct vl_ycbcr_block *vertex_stream; } ycbcr[VL_MAX_PLANES]; struct { - struct pipe_resource *resource; - struct pipe_transfer *transfer; - struct pipe_motionvector *vertex_stream; + struct pipe_resource *resource; + struct pipe_transfer *transfer; + struct vl_motionvector *vertex_stream; } mv[VL_MAX_REF_FRAMES]; }; @@ -73,8 +109,6 @@ struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe); struct pipe_vertex_buffer vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height); -struct pipe_vertex_buffer vl_vb_upload_block_num(struct pipe_context *pipe, unsigned num_blocks); - void *vl_vb_get_ves_ycbcr(struct pipe_context *pipe); void *vl_vb_get_ves_mv(struct pipe_context *pipe); @@ -89,13 +123,13 @@ void vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe); struct pipe_vertex_buffer vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component); -struct pipe_ycbcr_block *vl_vb_get_ycbcr_stream(struct vl_vertex_buffer *buffer, int component); +struct vl_ycbcr_block *vl_vb_get_ycbcr_stream(struct vl_vertex_buffer *buffer, int component); struct pipe_vertex_buffer vl_vb_get_mv(struct vl_vertex_buffer *buffer, int ref_frame); unsigned vl_vb_get_mv_stream_stride(struct vl_vertex_buffer *buffer); -struct pipe_motionvector *vl_vb_get_mv_stream(struct vl_vertex_buffer *buffer, int ref_frame); +struct vl_motionvector *vl_vb_get_mv_stream(struct vl_vertex_buffer *buffer, int ref_frame); void vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe); diff --git a/src/gallium/auxiliary/vl/vl_vlc.h b/src/gallium/auxiliary/vl/vl_vlc.h index e81b1e9afd2..4db1334d6a4 100644 --- a/src/gallium/auxiliary/vl/vl_vlc.h +++ b/src/gallium/auxiliary/vl/vl_vlc.h @@ -25,116 +25,148 @@ * **************************************************************************/ -/** - * This file is based uppon slice_xvmc.c and vlc.h from the xine project, - * which in turn is based on mpeg2dec. The following is the original copyright: - * - * Copyright (C) 2000-2002 Michel Lespinasse <[email protected]> - * Copyright (C) 1999-2000 Aaron Holtzman <[email protected]> - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - #ifndef vl_vlc_h #define vl_vlc_h -#include "pipe/p_compiler.h" +#include <assert.h> + +#include <pipe/p_compiler.h> + +#include <util/u_math.h> +#include "util/u_pointer.h" struct vl_vlc { - uint32_t buf; /* current 32 bit working set of buffer */ - int bits; /* used bits in working set */ - const uint8_t *ptr; /* buffer with stream data */ - const uint8_t *max; /* ptr+len of buffer */ + uint64_t buffer; + unsigned valid_bits; + uint32_t *data; + uint32_t *end; +}; + +struct vl_vlc_entry +{ + int8_t length; + int8_t value; +}; + +struct vl_vlc_compressed +{ + uint16_t bitcode; + struct vl_vlc_entry entry; }; static INLINE void -vl_vlc_restart(struct vl_vlc *vlc) +vl_vlc_init_table(struct vl_vlc_entry *dst, unsigned dst_size, const struct vl_vlc_compressed *src, unsigned src_size) { - vlc->buf = (vlc->ptr[0] << 24) | (vlc->ptr[1] << 16) | (vlc->ptr[2] << 8) | vlc->ptr[3]; - vlc->bits = -16; - vlc->ptr += 4; + unsigned i, bits = util_logbase2(dst_size); + + for (i=0;i<dst_size;++i) { + dst[i].length = 0; + dst[i].value = 0; + } + + for(; src_size > 0; --src_size, ++src) { + for(i=0; i<(1 << (bits - src->entry.length)); ++i) + dst[src->bitcode >> (16 - bits) | i] = src->entry; + } +} + +static INLINE void +vl_vlc_fillbits(struct vl_vlc *vlc) +{ + if (vlc->valid_bits < 32) { + uint32_t value = *vlc->data; + + //assert(vlc->data <= vlc->end); + +#ifndef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + vlc->buffer |= (uint64_t)value << (32 - vlc->valid_bits); + ++vlc->data; + vlc->valid_bits += 32; + } } static INLINE void vl_vlc_init(struct vl_vlc *vlc, const uint8_t *data, unsigned len) { - vlc->ptr = data; - vlc->max = data + len; - vl_vlc_restart(vlc); + assert(vlc); + assert(data && len); + + vlc->buffer = 0; + vlc->valid_bits = 0; + + /* align the data pointer */ + while (pointer_to_uintptr(data) & 3) { + vlc->buffer |= (uint64_t)*data << (56 - vlc->valid_bits); + ++data; + --len; + vlc->valid_bits += 8; + } + vlc->data = (uint32_t*)data; + vlc->end = (uint32_t*)(data + len); + + vl_vlc_fillbits(vlc); + vl_vlc_fillbits(vlc); +} + +static INLINE unsigned +vl_vlc_bytes_left(struct vl_vlc *vlc) +{ + return ((uint8_t*)vlc->end)-((uint8_t*)vlc->data); } -static INLINE bool -vl_vlc_getbyte(struct vl_vlc *vlc) +static INLINE unsigned +vl_vlc_peekbits(struct vl_vlc *vlc, unsigned num_bits) { - vlc->buf <<= 8; - vlc->buf |= vlc->ptr[0]; - vlc->ptr++; - return vlc->ptr < vlc->max; + //assert(vlc->valid_bits >= num_bits); + + return vlc->buffer >> (64 - num_bits); } -#define vl_vlc_getword(vlc, shift) \ -do { \ - (vlc)->buf |= (((vlc)->ptr[0] << 8) | (vlc)->ptr[1]) << (shift); \ - (vlc)->ptr += 2; \ -} while (0) - -/* make sure that there are at least 16 valid bits in bit_buf */ -#define vl_vlc_needbits(vlc) \ -do { \ - if ((vlc)->bits >= 0) { \ - vl_vlc_getword(vlc, (vlc)->bits); \ - (vlc)->bits -= 16; \ - } \ -} while (0) - -/* make sure that the full 32 bit of the buffer are valid */ static INLINE void -vl_vlc_need32bits(struct vl_vlc *vlc) +vl_vlc_eatbits(struct vl_vlc *vlc, unsigned num_bits) { - vl_vlc_needbits(vlc); - if (vlc->bits > -8) { - unsigned n = -vlc->bits; - vlc->buf <<= n; - vlc->buf |= *vlc->ptr << 8; - vlc->bits = -8; - vlc->ptr++; - } - if (vlc->bits > -16) { - unsigned n = -vlc->bits - 8; - vlc->buf <<= n; - vlc->buf |= *vlc->ptr; - vlc->bits = -16; - vlc->ptr++; - } + //assert(vlc->valid_bits > num_bits); + + vlc->buffer <<= num_bits; + vlc->valid_bits -= num_bits; } -/* remove num valid bits from bit_buf */ -#define vl_vlc_dumpbits(vlc, num) \ -do { \ - (vlc)->buf <<= (num); \ - (vlc)->bits += (num); \ -} while (0) +static INLINE unsigned +vl_vlc_get_uimsbf(struct vl_vlc *vlc, unsigned num_bits) +{ + unsigned value; + + //assert(vlc->valid_bits >= num_bits); + + value = vlc->buffer >> (64 - num_bits); + vl_vlc_eatbits(vlc, num_bits); + + return value; +} + +static INLINE signed +vl_vlc_get_simsbf(struct vl_vlc *vlc, unsigned num_bits) +{ + signed value; + + //assert(vlc->valid_bits >= num_bits); + + value = ((int64_t)vlc->buffer) >> (64 - num_bits); + vl_vlc_eatbits(vlc, num_bits); -/* take num bits from the high part of bit_buf and zero extend them */ -#define vl_vlc_ubits(vlc, num) (((uint32_t)((vlc)->buf)) >> (32 - (num))) + return value; +} -/* take num bits from the high part of bit_buf and sign extend them */ -#define vl_vlc_sbits(vlc, num) (((int32_t)((vlc)->buf)) >> (32 - (num))) +static INLINE int8_t +vl_vlc_get_vlclbf(struct vl_vlc *vlc, const struct vl_vlc_entry *tbl, unsigned num_bits) +{ + tbl += vl_vlc_peekbits(vlc, num_bits); + vl_vlc_eatbits(vlc, tbl->length); + return tbl->value; +} #endif /* vl_vlc_h */ diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst index 25a3245066c..3faf801b4b1 100644 --- a/src/gallium/docs/source/context.rst +++ b/src/gallium/docs/source/context.rst @@ -329,8 +329,15 @@ textured quad blitter.. The source and destination may be the same resource, but overlapping blits are not permitted. ``resource_resolve`` resolves a multisampled resource into a non-multisampled -one. Formats and dimensions must match. This function must be present if a driver +one. Their formats must match. This function must be present if a driver supports multisampling. +The region that is to be resolved is described by ``pipe_resolve_info``, which +provides a source and a destination rectangle. +The source rectangle may be vertically flipped, but otherwise the dimensions +of the rectangles must match, unless PIPE_CAP_SCALED_RESOLVE is supported, +in which case scaling and horizontal flipping are allowed as well. +The result of resolving depth/stencil values may be any function of the values at +the sample points, but returning the value of the centermost sample is preferred. The interfaces to these calls are likely to change to make it easier for a driver to batch multiple blits with the same source and diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 4debcc6ecc4..039cb1c03d5 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -1031,9 +1031,21 @@ XXX so let's discuss it, yeah? TBD -.. opcode:: TXQ - Texture Size Query +.. opcode:: TXQ - Texture Size Query (as per NV_gpu_program4) + retrieve the dimensions of the texture + depending on the target. For 1D (width), 2D/RECT/CUBE + (width, height), 3D (width, height, depth), + 1D array (width, layers), 2D array (width, height, layers) - TBD +.. math:: + + lod = src0 + + dst.x = texture_width(unit, lod) + + dst.y = texture_height(unit, lod) + + dst.z = texture_depth(unit, lod) .. opcode:: CONT - Continue diff --git a/src/gallium/drivers/i915/i915_batch.h b/src/gallium/drivers/i915/i915_batch.h index a1f8bcae802..56d331f3e7a 100644 --- a/src/gallium/drivers/i915/i915_batch.h +++ b/src/gallium/drivers/i915/i915_batch.h @@ -64,11 +64,16 @@ static INLINE void i915_flush_heuristically(struct i915_context* i915, int num_vertex) { struct i915_winsys *iws = i915->iws; - i915->vertices_since_last_flush += num_vertex; - if ( i915->vertices_since_last_flush > 4096 - || ( i915->vertices_since_last_flush > 256 && - !iws->buffer_is_busy(iws, i915->current.cbuf_bo)) ) + + i915->queued_vertices += num_vertex; + + /* fire if we have more than 1/20th of the last frame's vertices */ + if (i915->queued_vertices > i915->last_fired_vertices / 20) { FLUSH_BATCH(NULL); + i915->fired_vertices += i915->queued_vertices; + i915->queued_vertices = 0; + return; + } } diff --git a/src/gallium/drivers/i915/i915_clear.c b/src/gallium/drivers/i915/i915_clear.c index e1d6a749cdc..4f9aa2c3120 100644 --- a/src/gallium/drivers/i915/i915_clear.c +++ b/src/gallium/drivers/i915/i915_clear.c @@ -125,6 +125,9 @@ i915_clear_emit(struct pipe_context *pipe, unsigned buffers, const float *rgba, * This is not required, just a heuristic */ FLUSH_BATCH(NULL); + + i915->last_fired_vertices = i915->fired_vertices; + i915->fired_vertices = 0; } /** diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index 84862351ffe..fca8688a526 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -264,7 +264,10 @@ struct i915_context { struct util_slab_mempool transfer_pool; struct util_slab_mempool texture_transfer_pool; - int vertices_since_last_flush; + /* state for tracking flushes */ + int last_fired_vertices; + int fired_vertices; + int queued_vertices; /** blitter/hw-clear */ struct blitter_context* blitter; diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c index 6d76afa9dbc..5d8e3c8274f 100644 --- a/src/gallium/drivers/i915/i915_flush.c +++ b/src/gallium/drivers/i915/i915_flush.c @@ -77,5 +77,6 @@ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence) i915->static_dirty = ~0; /* kernel emits flushes in between batchbuffers */ i915->flush_dirty = 0; - i915->vertices_since_last_flush = 0; + i915->fired_vertices += i915->queued_vertices; + i915->queued_vertices = 0; } diff --git a/src/gallium/drivers/i915/i915_fpc.h b/src/gallium/drivers/i915/i915_fpc.h index b760bc461a1..b2683c82033 100644 --- a/src/gallium/drivers/i915/i915_fpc.h +++ b/src/gallium/drivers/i915/i915_fpc.h @@ -39,9 +39,9 @@ #define I915_PROGRAM_SIZE 192 -/* Use those indices for pos/face routing, must be >= I915_TEX_UNITS */ -#define I915_SEMANTIC_POS 10 -#define I915_SEMANTIC_FACE 11 +/* Use those indices for pos/face routing, must be >= num of inputs */ +#define I915_SEMANTIC_POS 100 +#define I915_SEMANTIC_FACE 101 /** diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index e743f6031eb..c108c702983 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -221,6 +221,8 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 0; + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap); return 0; diff --git a/src/gallium/drivers/i915/i915_state_dynamic.c b/src/gallium/drivers/i915/i915_state_dynamic.c index 204cee6fe9e..1a21433eb9e 100644 --- a/src/gallium/drivers/i915/i915_state_dynamic.c +++ b/src/gallium/drivers/i915/i915_state_dynamic.c @@ -268,8 +268,8 @@ static void upload_SCISSOR_RECT(struct i915_context *i915) { unsigned x1 = i915->scissor.minx; unsigned y1 = i915->scissor.miny; - unsigned x2 = i915->scissor.maxx; - unsigned y2 = i915->scissor.maxy; + unsigned x2 = i915->scissor.maxx - 1; + unsigned y2 = i915->scissor.maxy - 1; unsigned sc[3]; sc[0] = _3DSTATE_SCISSOR_RECT_0_CMD; diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c index ac6e94500c8..41146be9311 100644 --- a/src/gallium/drivers/i915/i915_surface.c +++ b/src/gallium/drivers/i915/i915_surface.c @@ -80,7 +80,7 @@ i915_surface_copy_render(struct pipe_context *pipe, i915->saved_nr_sampler_views, i915->saved_sampler_views); - util_blitter_copy_region(i915->blitter, dst, dst_level, dstx, dsty, dstz, + util_blitter_copy_texture(i915->blitter, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box, TRUE); } diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index 9178dfa8f69..39e9e2fa6ac 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -243,6 +243,8 @@ brw_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shad return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 1; + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: assert(0); return 0; diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index ba9705bebee..f9301354fc5 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -51,6 +51,7 @@ C_SOURCES = \ CPP_SOURCES = \ PROGS := lp_test_format \ + lp_test_arit \ lp_test_blend \ lp_test_conv \ lp_test_printf \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index d6b20ceb5ce..129de297824 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -88,11 +88,12 @@ if not env['embedded']: 'format', 'blend', 'conv', - 'printf', - 'sincos', + 'printf', + 'sincos', ] if not env['msvc']: + tests.append('arit') tests.append('round') for test in tests: diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 268f0fa034b..ce92a80721a 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -68,10 +68,17 @@ lp_jit_create_types(struct llvmpipe_context *lp) elem_types[LP_JIT_TEXTURE_BORDER_COLOR] = LLVMArrayType(LLVMFloatTypeInContext(lc), 4); +#if HAVE_LLVM >= 0x0300 + texture_type = LLVMStructCreateNamed(gallivm->context, "texture"); + LLVMStructSetBody(texture_type, elem_types, + Elements(elem_types), 0); +#else texture_type = LLVMStructTypeInContext(lc, elem_types, Elements(elem_types), 0); + LLVMAddTypeName(gallivm->module, "texture", texture_type); LLVMInvalidateStructLayout(gallivm->target, texture_type); +#endif LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width, gallivm->target, texture_type, @@ -112,8 +119,6 @@ lp_jit_create_types(struct llvmpipe_context *lp) LP_CHECK_STRUCT_SIZE(struct lp_jit_texture, gallivm->target, texture_type); - - LLVMAddTypeName(gallivm->module, "texture", texture_type); } /* struct lp_jit_context */ @@ -129,11 +134,19 @@ lp_jit_create_types(struct llvmpipe_context *lp) elem_types[LP_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); +#if HAVE_LLVM >= 0x0300 + context_type = LLVMStructCreateNamed(gallivm->context, "context"); + LLVMStructSetBody(context_type, elem_types, + Elements(elem_types), 0); +#else context_type = LLVMStructTypeInContext(lc, elem_types, Elements(elem_types), 0); LLVMInvalidateStructLayout(gallivm->target, context_type); + LLVMAddTypeName(gallivm->module, "context", context_type); +#endif + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants, gallivm->target, context_type, LP_JIT_CTX_CONSTANTS); @@ -155,8 +168,6 @@ lp_jit_create_types(struct llvmpipe_context *lp) LP_CHECK_STRUCT_SIZE(struct lp_jit_context, gallivm->target, context_type); - LLVMAddTypeName(gallivm->module, "context", context_type); - lp->jit_context_ptr_type = LLVMPointerType(context_type, 0); } diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 4b2ae1436ea..9e2a45caad6 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -30,6 +30,7 @@ #include "util/u_math.h" #include "util/u_cpu_detect.h" #include "util/u_format.h" +#include "util/u_string.h" #include "util/u_format_s3tc.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" @@ -93,7 +94,9 @@ llvmpipe_get_vendor(struct pipe_screen *screen) static const char * llvmpipe_get_name(struct pipe_screen *screen) { - return "llvmpipe"; + static char buf[100]; + util_snprintf(buf, sizeof(buf), "llvmpipe (LLVM 0x%x)", HAVE_LLVM); + return buf; } diff --git a/src/gallium/drivers/llvmpipe/lp_test_arit.c b/src/gallium/drivers/llvmpipe/lp_test_arit.c new file mode 100644 index 00000000000..ea2a659142f --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_test_arit.c @@ -0,0 +1,295 @@ +/************************************************************************** + * + * Copyright 2011 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> + +#include "util/u_pointer.h" +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_arit.h" + +#include "lp_test.h" + + +void +write_tsv_header(FILE *fp) +{ + fprintf(fp, + "result\t" + "format\n"); + + fflush(fp); +} + + +typedef float (*unary_func_t)(float); + + +/** + * Describe a test case of one unary function. + */ +struct unary_test_t +{ + /* + * Test name -- name of the mathematical function under test. + */ + + const char *name; + + LLVMValueRef + (*builder)(struct lp_build_context *bld, LLVMValueRef a); + + /* + * Reference (pure-C) function. + */ + float + (*ref)(float a); + + /* + * Test values. + */ + const float *values; + unsigned num_values; +}; + + +const float exp2_values[] = { + -60, + -4, + -2, + -1, + -1e-007, + 0, + 1e-007, + 1, + 2, + 4, + 60 +}; + + +const float log2_values[] = { +#if 0 + /* + * Smallest denormalized number; meant just for experimentation, but not + * validation. + */ + 1.4012984643248171e-45, +#endif + 1e-007, + 0.5, + 1, + 2, + 4, + 100000, + 1e+018 +}; + + +static float rsqrtf(float x) +{ + return 1.0/sqrt(x); +} + + +const float rsqrt_values[] = { + -1, -1e-007, + 1e-007, 1, + -4, -1, + 1, 4, + -1e+035, -100000, + 100000, 1e+035, +}; + + +const float sincos_values[] = { + -5*M_PI/4, + -4*M_PI/4, + -4*M_PI/4, + -3*M_PI/4, + -2*M_PI/4, + -1*M_PI/4, + 1*M_PI/4, + 2*M_PI/4, + 3*M_PI/4, + 4*M_PI/4, + 5*M_PI/4, +}; + + +/* + * Unary test cases. + */ + +static const struct unary_test_t unary_tests[] = { + {"exp2", &lp_build_exp2, &exp2f, exp2_values, Elements(exp2_values)}, + {"log2", &lp_build_log2, &log2f, log2_values, Elements(log2_values)}, + {"exp", &lp_build_exp, &expf, exp2_values, Elements(exp2_values)}, + {"log", &lp_build_log, &logf, log2_values, Elements(log2_values)}, + {"rsqrt", &lp_build_rsqrt, &rsqrtf, rsqrt_values, Elements(rsqrt_values)}, + {"sin", &lp_build_sin, &sinf, sincos_values, Elements(sincos_values)}, + {"cos", &lp_build_cos, &cosf, sincos_values, Elements(sincos_values)}, +}; + + +/* + * Build LLVM function that exercises the unary operator builder. + */ +static LLVMValueRef +build_unary_test_func(struct gallivm_state *gallivm, + LLVMModuleRef module, + LLVMContextRef context, + const struct unary_test_t *test) +{ + LLVMTypeRef i32t = LLVMInt32TypeInContext(context); + LLVMTypeRef f32t = LLVMFloatTypeInContext(context); + LLVMTypeRef v4f32t = LLVMVectorType(f32t, 4); + LLVMTypeRef args[1] = { f32t }; + LLVMValueRef func = LLVMAddFunction(module, test->name, LLVMFunctionType(f32t, args, Elements(args), 0)); + LLVMValueRef arg1 = LLVMGetParam(func, 0); + LLVMBuilderRef builder = gallivm->builder; + LLVMBasicBlockRef block = LLVMAppendBasicBlockInContext(context, func, "entry"); + LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); + LLVMValueRef ret; + + struct lp_build_context bld; + + lp_build_context_init(&bld, gallivm, lp_float32_vec4_type()); + + LLVMSetFunctionCallConv(func, LLVMCCallConv); + + LLVMPositionBuilderAtEnd(builder, block); + + /* scalar to vector */ + arg1 = LLVMBuildInsertElement(builder, LLVMGetUndef(v4f32t), arg1, index0, ""); + + ret = test->builder(&bld, arg1); + + /* vector to scalar */ + ret = LLVMBuildExtractElement(builder, ret, index0, ""); + + LLVMBuildRet(builder, ret); + return func; +} + + +/* + * Test one LLVM unary arithmetic builder function. + */ +static boolean +test_unary(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, const struct unary_test_t *test) +{ + LLVMModuleRef module = gallivm->module; + LLVMValueRef test_func; + LLVMExecutionEngineRef engine = gallivm->engine; + LLVMContextRef context = gallivm->context; + char *error = NULL; + unary_func_t test_func_jit; + boolean success = TRUE; + int i; + + test_func = build_unary_test_func(gallivm, module, context, test); + + if (LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { + printf("LLVMVerifyModule: %s\n", error); + LLVMDumpModule(module); + abort(); + } + LLVMDisposeMessage(error); + + test_func_jit = (unary_func_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_func)); + + for (i = 0; i < test->num_values; ++i) { + float value = test->values[i]; + float ref = test->ref(value); + float src = test_func_jit(value); + + double error = fabs(src - ref); + double precision = error ? -log2(error/fabs(ref)) : FLT_MANT_DIG; + + bool pass = precision >= 20.0; + + if (isnan(ref)) { + continue; + } + + if (!pass || verbose) { + printf("%s(%.9g): ref = %.9g, src = %.9g, precision = %f bits, %s\n", + test->name, value, ref, src, precision, + pass ? "PASS" : "FAIL"); + } + + if (!pass) { + success = FALSE; + } + } + + LLVMFreeMachineCodeForFunction(engine, test_func); + + return success; +} + + +boolean +test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) +{ + boolean success = TRUE; + int i; + + for (i = 0; i < Elements(unary_tests); ++i) { + if (!test_unary(gallivm, verbose, fp, &unary_tests[i])) { + success = FALSE; + } + } + + return success; +} + + +boolean +test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, + unsigned long n) +{ + /* + * Not randomly generated test cases, so test all. + */ + + return test_all(gallivm, verbose, fp); +} + + +boolean +test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) +{ + return TRUE; +} diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index f4324e69971..a2795b604d2 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -75,7 +75,7 @@ def generate_format_read(format, dst_channel, dst_native_type, dst_suffix): src_native_type = native_type(format) print 'static void' - print 'lp_tile_%s_swizzle_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)' % (name, dst_suffix, dst_native_type) + print 'lp_tile_%s_swizzle_%s(%s * restrict dst, const uint8_t * restrict src, unsigned src_stride, unsigned x0, unsigned y0)' % (name, dst_suffix, dst_native_type) print '{' print ' unsigned x, y;' print ' const uint8_t *src_row = src + y0*src_stride;' @@ -273,7 +273,7 @@ def generate_format_write(format, src_channel, src_native_type, src_suffix): name = format.short_name() print 'static void' - print 'lp_tile_%s_unswizzle_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)' % (name, src_suffix, src_native_type) + print 'lp_tile_%s_unswizzle_%s(const %s * restrict src, uint8_t * restrict dst, unsigned dst_stride, unsigned x0, unsigned y0)' % (name, src_suffix, src_native_type) print '{' if format.layout == PLAIN \ and format.colorspace == 'rgb' \ @@ -501,7 +501,7 @@ def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix): print 'void' print 'lp_tile_swizzle_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y)' % (dst_suffix, dst_native_type) print '{' - print ' void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0);' % dst_native_type + print ' void (*func)(%s * restrict dst, const uint8_t * restrict src, unsigned src_stride, unsigned x0, unsigned y0);' % dst_native_type print '#ifdef DEBUG' print ' lp_tile_swizzle_count += 1;' print '#endif' @@ -539,7 +539,7 @@ def generate_unswizzle(formats, src_channel, src_native_type, src_suffix): print 'lp_tile_unswizzle_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y)' % (src_suffix, src_native_type) print '{' - print ' void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0);' % src_native_type + print ' void (*func)(const %s * restrict src, uint8_t * restrict dst, unsigned dst_stride, unsigned x0, unsigned y0);' % src_native_type print '#ifdef DEBUG' print ' lp_tile_unswizzle_count += 1;' print '#endif' diff --git a/src/gallium/drivers/noop/noop_pipe.c b/src/gallium/drivers/noop/noop_pipe.c index 18308b796f3..ffc444e37d1 100644 --- a/src/gallium/drivers/noop/noop_pipe.c +++ b/src/gallium/drivers/noop/noop_pipe.c @@ -324,131 +324,34 @@ static const char *noop_get_name(struct pipe_screen* pscreen) static int noop_get_param(struct pipe_screen* pscreen, enum pipe_cap param) { - switch (param) { - /* Supported features (boolean caps). */ - case PIPE_CAP_NPOT_TEXTURES: - case PIPE_CAP_TWO_SIDED_STENCIL: - case PIPE_CAP_GLSL: - case PIPE_CAP_OCCLUSION_QUERY: - case PIPE_CAP_POINT_SPRITE: - case PIPE_CAP_ANISOTROPIC_FILTER: - case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - case PIPE_CAP_TEXTURE_MIRROR_REPEAT: - case PIPE_CAP_TEXTURE_SHADOW_MAP: - case PIPE_CAP_TEXTURE_SWIZZLE: - case PIPE_CAP_BLEND_EQUATION_SEPARATE: - case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: - - return 1; - case PIPE_CAP_DUAL_SOURCE_BLEND: - - case PIPE_CAP_SM3: - case PIPE_CAP_INDEP_BLEND_ENABLE: - case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: - case PIPE_CAP_DEPTH_CLAMP: - case PIPE_CAP_SHADER_STENCIL_EXPORT: - case PIPE_CAP_TIMER_QUERY: - case PIPE_CAP_STREAM_OUTPUT: - case PIPE_CAP_PRIMITIVE_RESTART: - case PIPE_CAP_INDEP_BLEND_FUNC: - return 0; - - /* Texturing. */ - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 14; - case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return 16; - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 16; - - /* Render targets. */ - case PIPE_CAP_MAX_RENDER_TARGETS: - return 8; - - /* Fragment coordinate conventions. */ - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: - return 1; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: - return 0; - - default: - return 0; - } + struct pipe_screen *screen = ((struct noop_pipe_screen*)pscreen)->oscreen; + + return screen->get_param(screen, param); } static float noop_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param) { - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 8192.0f; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0f; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0f; - default: - return 0.0f; - } + struct pipe_screen *screen = ((struct noop_pipe_screen*)pscreen)->oscreen; + + return screen->get_paramf(screen, param); } static int noop_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param) { - switch(shader) - { - case PIPE_SHADER_FRAGMENT: - case PIPE_SHADER_VERTEX: - case PIPE_SHADER_GEOMETRY: - break; - default: - return 0; - } + struct pipe_screen *screen = ((struct noop_pipe_screen*)pscreen)->oscreen; - switch (param) { - case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: - return 16384; - case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: - return 8; - case PIPE_SHADER_CAP_MAX_INPUTS: - return 16; - case PIPE_SHADER_CAP_MAX_TEMPS: - return 256; - case PIPE_SHADER_CAP_MAX_ADDRS: - return 1; - case PIPE_SHADER_CAP_MAX_CONSTS: - return 256; - case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: - return 1; - case PIPE_SHADER_CAP_MAX_PREDS: - return 0; - case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: - return 1; - case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: - case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: - case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: - case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: - return 1; - default: - return 0; - } + return screen->get_shader_param(screen, shader, param); } -static boolean noop_is_format_supported(struct pipe_screen* screen, +static boolean noop_is_format_supported(struct pipe_screen* pscreen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, unsigned usage) { - return true; + struct pipe_screen *screen = ((struct noop_pipe_screen*)pscreen)->oscreen; + + return screen->is_format_supported(screen, format, target, sample_count, usage); } static void noop_destroy_screen(struct pipe_screen *screen) diff --git a/src/gallium/drivers/nouveau/Makefile b/src/gallium/drivers/nouveau/Makefile index 3210d1ff77b..aae6d9889bb 100644 --- a/src/gallium/drivers/nouveau/Makefile +++ b/src/gallium/drivers/nouveau/Makefile @@ -10,6 +10,7 @@ LIBRARY_INCLUDES = \ C_SOURCES = nouveau_screen.c \ nouveau_fence.c \ nouveau_mm.c \ - nouveau_buffer.c + nouveau_buffer.c \ + nouveau_video.c include ../../Makefile.template diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h index 696e0d3f24e..19bf7c84ac7 100644 --- a/src/gallium/drivers/nouveau/nouveau_context.h +++ b/src/gallium/drivers/nouveau/nouveau_context.h @@ -23,4 +23,7 @@ nouveau_context(struct pipe_context *pipe) return (struct nouveau_context *)pipe; } +void +nouveau_context_init_vdec(struct nouveau_context *); + #endif diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index d910809a0ec..cf291c6c595 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -76,6 +76,7 @@ nouveau_screen_bo_from_handle(struct pipe_screen *pscreen, int nouveau_screen_init(struct nouveau_screen *, struct nouveau_device *); void nouveau_screen_fini(struct nouveau_screen *); +void nouveau_screen_init_vdec(struct nouveau_screen *); #ifndef NOUVEAU_NVC0 diff --git a/src/gallium/drivers/nouveau/nouveau_video.c b/src/gallium/drivers/nouveau/nouveau_video.c new file mode 100644 index 00000000000..620c030e112 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_video.c @@ -0,0 +1,41 @@ + +#include "vl/vl_decoder.h" +#include "vl/vl_video_buffer.h" + +#include "nouveau/nouveau_screen.h" +#include "nouveau/nouveau_context.h" + +static int +nouveau_screen_get_video_param(struct pipe_screen *pscreen, + enum pipe_video_profile profile, + enum pipe_video_cap param) +{ + switch (param) { + case PIPE_VIDEO_CAP_SUPPORTED: + return vl_profile_supported(pscreen, profile); + case PIPE_VIDEO_CAP_NPOT_TEXTURES: + return 1; + case PIPE_VIDEO_CAP_MAX_WIDTH: + case PIPE_VIDEO_CAP_MAX_HEIGHT: + return vl_video_buffer_max_size(pscreen); + case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED: + return vl_num_buffers_desired(pscreen, profile); + default: + debug_printf("unknown video param: %d\n", param); + return 0; + } +} + +void +nouveau_screen_init_vdec(struct nouveau_screen *screen) +{ + screen->base.get_video_param = nouveau_screen_get_video_param; + screen->base.is_video_format_supported = vl_video_buffer_is_format_supported; +} + +void +nouveau_context_init_vdec(struct nouveau_context *nv) +{ + nv->pipe.create_video_decoder = vl_create_decoder; + nv->pipe.create_video_buffer = vl_video_buffer_create; +} diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index ac3e361a446..0d464063b5b 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -149,6 +149,8 @@ nv50_create(struct pipe_screen *pscreen, void *priv) assert(nv50->draw); draw_set_rasterize_stage(nv50->draw, nv50_draw_render_stage(nv50)); + nouveau_context_init_vdec(&nv50->base); + return pipe; } diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index c1226d5eb26..284db69e312 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -171,7 +171,8 @@ void nv50_validate_derived_rs(struct nv50_context *); extern void nv50_init_state_functions(struct nv50_context *); /* nv50_state_validate.c */ -extern boolean nv50_state_validate(struct nv50_context *); +/* @words: check for space before emitting relocs */ +extern boolean nv50_state_validate(struct nv50_context *, unsigned words); /* nv50_surface.c */ extern void nv50_clear(struct pipe_context *, unsigned buffers, diff --git a/src/gallium/drivers/nv50/nv50_formats.c b/src/gallium/drivers/nv50/nv50_formats.c index be43147468a..34502d0a397 100644 --- a/src/gallium/drivers/nv50/nv50_formats.c +++ b/src/gallium/drivers/nv50/nv50_formats.c @@ -116,7 +116,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = SAMPLER_VIEW | DEPTH_STENCIL }, [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50_ZETA_FORMAT_Z24_S8_UNORM, - B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, Z24_S8, 0), + B_(C0, C1, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, Z24_S8, 0), SAMPLER_VIEW | DEPTH_STENCIL }, [PIPE_FORMAT_Z24X8_UNORM] = { NV50_ZETA_FORMAT_Z24_X8_UNORM, @@ -124,7 +124,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = SAMPLER_VIEW | DEPTH_STENCIL }, [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50_ZETA_FORMAT_S8_Z24_UNORM, - B_(C1, C1, C1, ONE_FLOAT, UINT, UNORM, UINT, UINT, S8_Z24, 0), + B_(C1, C0, C1, ONE_FLOAT, UINT, UNORM, UINT, UINT, S8_Z24, 0), SAMPLER_VIEW | DEPTH_STENCIL }, [PIPE_FORMAT_Z32_FLOAT] = { NV50_ZETA_FORMAT_Z32_FLOAT, diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c index d72b23c137a..4271731efa7 100644 --- a/src/gallium/drivers/nv50/nv50_pc_optimize.c +++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c @@ -462,6 +462,10 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) mi->saturate = 1; mi->def[0] = nvi->def[0]; mi->def[0]->insn = mi; + if (nvi->flags_def) { + mi->flags_def = nvi->flags_def; + mi->flags_def->insn = mi; + } nv_nvi_delete(nvi); } } diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index a697ff5ecf7..581aad19627 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -91,6 +91,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_SHADOW_MAP: case PIPE_CAP_NPOT_TEXTURES: case PIPE_CAP_ANISOTROPIC_FILTER: + case PIPE_CAP_SCALED_RESOLVE: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP: return nv50_screen(pscreen)->tesla->grclass >= NVA0_3D; @@ -182,6 +183,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 0; /* please inline, or provide function declarations */ + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); return 0; @@ -315,6 +318,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nv50_screen_init_resource_functions(pscreen); + nouveau_screen_init_vdec(&screen->base); + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, &screen->fence.bo); if (ret) @@ -602,6 +607,9 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) screen->mm_VRAM_fe0 = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, 0xfe0); + if (!nv50_blitctx_create(screen)) + goto fail; + nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE); return pscreen; diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index 64ad209a728..315ca80c0d2 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -21,6 +21,8 @@ struct nv50_context; #define NV50_SCREEN_RESIDENT_BO_COUNT 5 +struct nv50_blitctx; + struct nv50_screen { struct nouveau_screen base; struct nouveau_winsys *nvws; @@ -39,6 +41,8 @@ struct nv50_screen { struct nouveau_resource *gp_code_heap; struct nouveau_resource *fp_code_heap; + struct nv50_blitctx *blitctx; + struct { void **entries; int next; @@ -71,6 +75,8 @@ nv50_screen(struct pipe_screen *screen) return (struct nv50_screen *)screen; } +boolean nv50_blitctx_create(struct nv50_screen *); + void nv50_screen_make_buffers_resident(struct nv50_screen *); int nv50_screen_tic_alloc(struct nv50_screen *, void *); diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c index e5b10c37bef..d73f7c7f213 100644 --- a/src/gallium/drivers/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nv50/nv50_shader_state.c @@ -130,13 +130,14 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog) int ret; unsigned size; - if (prog->translated) + if (!prog->translated) { + prog->translated = nv50_program_translate(prog); + if (!prog->translated) + return FALSE; + } else + if (prog->res) return TRUE; - prog->translated = nv50_program_translate(prog); - if (!prog->translated) - return FALSE; - if (prog->type == PIPE_SHADER_FRAGMENT) heap = nv50->screen->fp_code_heap; else if (prog->type == PIPE_SHADER_GEOMETRY) heap = nv50->screen->gp_code_heap; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 8b0b08f8e93..44f2d25c1a7 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -350,7 +350,7 @@ static struct state_validate { #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) boolean -nv50_state_validate(struct nv50_context *nv50) +nv50_state_validate(struct nv50_context *nv50, unsigned words) { unsigned i; @@ -367,6 +367,8 @@ nv50_state_validate(struct nv50_context *nv50) nv50->dirty = 0; } + MARK_RING(nv50->screen->base.channel, words, 0); + nv50_bufctx_emit_relocs(nv50); return TRUE; diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index eefbaad6483..8bca900e1ff 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -198,6 +198,7 @@ nv50_resource_copy_region(struct pipe_context *pipe, { struct nv50_screen *screen = nv50_context(pipe)->screen; int ret; + boolean m2mf; unsigned dst_layer = dstz, src_layer = src_box->z; /* Fallback for buffers. */ @@ -207,9 +208,15 @@ nv50_resource_copy_region(struct pipe_context *pipe, return; } + assert(src->nr_samples == dst->nr_samples); + + m2mf = (src->format == dst->format) || + (util_format_get_blocksizebits(src->format) == + util_format_get_blocksizebits(dst->format)); + nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; - if (src->format == dst->format && src->nr_samples == dst->nr_samples) { + if (m2mf) { struct nv50_m2mf_rect drect, srect; unsigned i; unsigned nx = util_format_get_nblocksx(src->format, src_box->width); @@ -368,7 +375,7 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers, /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */ nv50->dirty &= NV50_NEW_FRAMEBUFFER; - if (!nv50_state_validate(nv50)) + if (!nv50_state_validate(nv50, 9 + (fb->nr_cbufs * 2))) return; if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) { @@ -405,12 +412,546 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers, nv50->dirty = dirty & ~NV50_NEW_FRAMEBUFFER; } + +struct nv50_blitctx +{ + struct nv50_screen *screen; + struct { + struct pipe_framebuffer_state fb; + struct nv50_program *vp; + struct nv50_program *gp; + struct nv50_program *fp; + unsigned num_textures[3]; + unsigned num_samplers[3]; + struct pipe_sampler_view *texture; + struct nv50_tsc_entry *sampler; + unsigned dirty; + unsigned clip_nr; + } saved; + struct nv50_program vp; + struct nv50_program fp; + struct nv50_tsc_entry sampler[2]; /* nearest, bilinear */ + uint32_t fp_offset; + uint16_t color_mask; + uint8_t filter; +}; + +static void +nv50_blitctx_make_vp(struct nv50_blitctx *blit) +{ + static const uint32_t code[] = + { + 0x10000001, /* mov b32 o[0x00] s[0x00] */ /* HPOS.x */ + 0x0423c788, + 0x10000205, /* mov b32 o[0x04] s[0x04] */ /* HPOS.y */ + 0x0423c788, + 0x10000409, /* mov b32 o[0x08] s[0x08] */ /* TEXC.x */ + 0x0423c788, + 0x1000060d, /* mov b32 o[0x0c] s[0x0c] */ /* TEXC.y */ + 0x0423c788, + 0x10000811, /* exit mov b32 o[0x10] s[0x10] */ /* TEXC.z */ + 0x0423c789, + }; + + blit->vp.type = PIPE_SHADER_VERTEX; + blit->vp.translated = TRUE; + blit->vp.code = (uint32_t *)code; /* const_cast */ + blit->vp.code_size = sizeof(code); + blit->vp.max_gpr = 4; + blit->vp.max_out = 5; + blit->vp.out_nr = 2; + blit->vp.out[0].mask = 0x3; + blit->vp.out[0].sn = TGSI_SEMANTIC_POSITION; + blit->vp.out[1].hw = 2; + blit->vp.out[1].mask = 0x7; + blit->vp.out[1].sn = TGSI_SEMANTIC_GENERIC; + blit->vp.vp.attrs[0] = 0x73; + blit->vp.vp.psiz = 0x40; + blit->vp.vp.edgeflag = 0x40; +} + +static void +nv50_blitctx_make_fp(struct nv50_blitctx *blit) +{ + static const uint32_t code[] = + { + /* 3 coords RGBA in, RGBA out, also for Z32_FLOAT(_S8X24_USCALED) */ + 0x80000000, /* interp $r0 v[0x0] */ + 0x80010004, /* interp $r1 v[0x4] */ + 0x80020009, /* interp $r2 flat v[0x8] */ + 0x00040780, + 0xf6800001, /* texauto live { $r0,1,2,3 } $t0 $s0 { $r0,1,2 } */ + 0x0000c785, /* exit */ + + /* 3 coords ZS in, S encoded in R, Z encoded in GBA (8_UNORM) */ + 0x80000000, /* interp $r0 v[0x00] */ + 0x80010004, /* interp $r1 v[0x04] */ + 0x80020009, /* interp $r2 flat v[0x8] */ + 0x00040780, + 0xf6800001, /* texauto live { $r0,1,#,# } $t0 $s0 { $r0,1,2 } */ + 0x00000784, + 0xc03f0009, /* mul f32 $r2 $r0 (2^24 - 1) */ + 0x04b7ffff, + 0xa0000201, /* cvt f32 $r0 s32 $r1 */ + 0x44014780, + 0xa0000409, /* cvt rni s32 $r2 f32 $r2 */ + 0x8c004780, + 0xc0010001, /* mul f32 $r0 $r0 1/0xff */ + 0x03b8080b, + 0xd03f0405, /* and b32 $r1 $r2 0x0000ff */ + 0x0000000f, + 0xd000040d, /* and b32 $r3 $r2 0xff0000 */ + 0x000ff003, + 0xd0000409, /* and b32 $r2 $r2 0x00ff00 */ + 0x00000ff3, + 0xa0000205, /* cvt f32 $r1 s32 $r1 */ + 0x44014780, + 0xa000060d, /* cvt f32 $r3 s32 $r3 */ + 0x44014780, + 0xa0000409, /* cvt f32 $r2 s32 $r2 */ + 0x44014780, + 0xc0010205, /* mul f32 $r1 $r1 1/0x0000ff */ + 0x03b8080b, + 0xc001060d, /* mul f32 $r3 $r3 1/0x00ff00 */ + 0x0338080b, + 0xc0010409, /* mul f32 $r2 $r2 1/0xff0000 */ + 0x0378080b, + 0xf0000001, /* exit never nop */ + 0xe0000001, + + /* 3 coords ZS in, Z encoded in RGB, S encoded in A (U8_UNORM) */ + 0x80000000, /* interp $r0 v[0x00] */ + 0x80010004, /* interp $r1 v[0x04] */ + 0x80020009, /* interp $r2 flat v[0x8] */ + 0x00040780, + 0xf6800001, /* texauto live { $r0,1,#,# } $t0 $s0 { $r0,1,2 } */ + 0x00000784, + 0xc03f0009, /* mul f32 $r2 $r0 (2^24 - 1) */ + 0x04b7ffff, + 0xa0000281, /* cvt f32 $r3 s32 $r1 */ + 0x44014780, + 0xa0000409, /* cvt rni s32 $r2 f32 $r2 */ + 0x8c004780, + 0xc001060d, /* mul f32 $r3 $r3 1/0xff */ + 0x03b8080b, + 0xd03f0401, /* and b32 $r0 $r2 0x0000ff */ + 0x0000000f, + 0xd0000405, /* and b32 $r1 $r2 0x00ff00 */ + 0x00000ff3, + 0xd0000409, /* and b32 $r2 $r2 0xff0000 */ + 0x000ff003, + 0xa0000001, /* cvt f32 $r0 s32 $r0 */ + 0x44014780, + 0xa0000205, /* cvt f32 $r1 s32 $r1 */ + 0x44014780, + 0xa0000409, /* cvt f32 $r2 s32 $r2 */ + 0x44014780, + 0xc0010001, /* mul f32 $r0 $r0 1/0x0000ff */ + 0x03b8080b, + 0xc0010205, /* mul f32 $r1 $r1 1/0x00ff00 */ + 0x0378080b, + 0xc0010409, /* mul f32 $r2 $r2 1/0xff0000 */ + 0x0338080b, + 0xf0000001, /* exit never nop */ + 0xe0000001 + }; + + blit->fp.type = PIPE_SHADER_FRAGMENT; + blit->fp.translated = TRUE; + blit->fp.code = (uint32_t *)code; /* const_cast */ + blit->fp.code_size = sizeof(code); + blit->fp.max_gpr = 4; + blit->fp.max_out = 4; + blit->fp.in_nr = 1; + blit->fp.in[0].mask = 0x7; /* last component flat */ + blit->fp.in[0].linear = 1; + blit->fp.in[0].sn = TGSI_SEMANTIC_GENERIC; + blit->fp.out_nr = 1; + blit->fp.out[0].mask = 0xf; + blit->fp.out[0].sn = TGSI_SEMANTIC_COLOR; + blit->fp.fp.interp = 0x00020403; + blit->fp.gp.primid = 0x80; +} + +static void +nv50_blitctx_make_sampler(struct nv50_blitctx *blit) +{ + /* clamp to edge, min/max lod = 0, nearest filtering */ + + blit->sampler[0].id = -1; + + blit->sampler[0].tsc[0] = 0x00000092; + blit->sampler[0].tsc[1] = 0x00000051; + + /* clamp to edge, min/max lod = 0, bilinear filtering */ + + blit->sampler[1].id = -1; + + blit->sampler[1].tsc[0] = 0x00000092; + blit->sampler[1].tsc[1] = 0x00000062; +} + +/* Since shaders cannot export stencil, we cannot copy stencil values when + * rendering to ZETA, so we attach the ZS surface to a colour render target. + */ +static INLINE enum pipe_format +nv50_blit_zeta_to_colour_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: return PIPE_FORMAT_R16_UNORM; + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: return PIPE_FORMAT_R8G8B8A8_UNORM; + case PIPE_FORMAT_Z32_FLOAT: return PIPE_FORMAT_R32_FLOAT; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: return PIPE_FORMAT_R32G32_FLOAT; + default: + assert(0); + return PIPE_FORMAT_NONE; + } +} + +static void +nv50_blitctx_get_color_mask_and_fp(struct nv50_blitctx *blit, + enum pipe_format format, uint8_t mask) +{ + blit->color_mask = 0; + + switch (format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + blit->fp_offset = 160; + if (mask & PIPE_MASK_Z) + blit->color_mask |= 0x0111; + if (mask & PIPE_MASK_S) + blit->color_mask |= 0x1000; + break; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + blit->fp_offset = 24; + if (mask & PIPE_MASK_Z) + blit->color_mask |= 0x1110; + if (mask & PIPE_MASK_S) + blit->color_mask |= 0x0001; + break; + default: + blit->fp_offset = 0; + if (mask & (PIPE_MASK_R | PIPE_MASK_Z)) blit->color_mask |= 0x0001; + if (mask & (PIPE_MASK_G | PIPE_MASK_S)) blit->color_mask |= 0x0010; + if (mask & PIPE_MASK_B) blit->color_mask |= 0x0100; + if (mask & PIPE_MASK_A) blit->color_mask |= 0x1000; + break; + } +} + +static void +nv50_blit_set_dst(struct nv50_context *nv50, + struct pipe_resource *res, unsigned level, unsigned layer) +{ + struct pipe_context *pipe = &nv50->base.pipe; + struct pipe_surface templ; + + if (util_format_is_depth_or_stencil(res->format)) + templ.format = nv50_blit_zeta_to_colour_format(res->format); + else + templ.format = res->format; + + templ.usage = PIPE_USAGE_STREAM; + templ.u.tex.level = level; + templ.u.tex.first_layer = templ.u.tex.last_layer = layer; + + nv50->framebuffer.cbufs[0] = nv50_miptree_surface_new(pipe, res, &templ); + nv50->framebuffer.nr_cbufs = 1; + nv50->framebuffer.zsbuf = NULL; + nv50->framebuffer.width = nv50->framebuffer.cbufs[0]->width; + nv50->framebuffer.height = nv50->framebuffer.cbufs[0]->height; +} + +static INLINE void +nv50_blit_fixup_tic_entry(struct pipe_sampler_view *view) +{ + struct nv50_tic_entry *ent = nv50_tic_entry(view); + + ent->tic[2] &= ~(1 << 31); /* scaled coordinates, ok with 3d textures ? */ + + /* magic: */ + + ent->tic[3] = 0x20000000; /* affects quality of near vertical edges in MS8 */ +} + +static void +nv50_blit_set_src(struct nv50_context *nv50, + struct pipe_resource *res, unsigned level, unsigned layer) +{ + struct pipe_context *pipe = &nv50->base.pipe; + struct pipe_sampler_view templ; + + templ.format = res->format; + templ.u.tex.first_layer = templ.u.tex.last_layer = layer; + templ.u.tex.first_level = templ.u.tex.last_level = level; + templ.swizzle_r = PIPE_SWIZZLE_RED; + templ.swizzle_g = PIPE_SWIZZLE_GREEN; + templ.swizzle_b = PIPE_SWIZZLE_BLUE; + templ.swizzle_a = PIPE_SWIZZLE_ALPHA; + + nv50->textures[2][0] = nv50_create_sampler_view(pipe, res, &templ); + + nv50_blit_fixup_tic_entry(nv50->textures[2][0]); + + nv50->num_textures[0] = nv50->num_textures[1] = 0; + nv50->num_textures[2] = 1; +} + +static void +nv50_blitctx_prepare_state(struct nv50_blitctx *blit) +{ + struct nouveau_channel *chan = blit->screen->base.channel; + + /* blend state */ + BEGIN_RING(chan, RING_3D(COLOR_MASK(0)), 1); + OUT_RING (chan, blit->color_mask); + BEGIN_RING(chan, RING_3D(BLEND_ENABLE(0)), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(LOGIC_OP_ENABLE), 1); + OUT_RING (chan, 0); + + /* rasterizer state */ +#ifndef NV50_SCISSORS_CLIPPING + BEGIN_RING(chan, RING_3D(SCISSOR_ENABLE(0)), 1); + OUT_RING (chan, 1); +#endif + BEGIN_RING(chan, RING_3D(VERTEX_TWO_SIDE_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(FRAG_COLOR_CLAMP_EN), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(MULTISAMPLE_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(MSAA_MASK(0)), 4); + OUT_RING (chan, 0xffff); + OUT_RING (chan, 0xffff); + OUT_RING (chan, 0xffff); + OUT_RING (chan, 0xffff); + BEGIN_RING(chan, RING_3D(POLYGON_MODE_FRONT), 3); + OUT_RING (chan, NV50_3D_POLYGON_MODE_FRONT_FILL); + OUT_RING (chan, NV50_3D_POLYGON_MODE_BACK_FILL); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(CULL_FACE_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(POLYGON_STIPPLE_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(POLYGON_OFFSET_FILL_ENABLE), 1); + OUT_RING (chan, 0); + + /* zsa state */ + BEGIN_RING(chan, RING_3D(DEPTH_TEST_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(STENCIL_ENABLE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(ALPHA_TEST_ENABLE), 1); + OUT_RING (chan, 0); +} + +static void +nv50_blitctx_pre_blit(struct nv50_blitctx *blit, struct nv50_context *nv50) +{ + int s; + + blit->saved.fb.width = nv50->framebuffer.width; + blit->saved.fb.height = nv50->framebuffer.height; + blit->saved.fb.nr_cbufs = nv50->framebuffer.nr_cbufs; + blit->saved.fb.cbufs[0] = nv50->framebuffer.cbufs[0]; + blit->saved.fb.zsbuf = nv50->framebuffer.zsbuf; + + blit->saved.vp = nv50->vertprog; + blit->saved.gp = nv50->gmtyprog; + blit->saved.fp = nv50->fragprog; + + nv50->vertprog = &blit->vp; + nv50->gmtyprog = NULL; + nv50->fragprog = &blit->fp; + + blit->saved.clip_nr = nv50->clip.nr; + + nv50->clip.nr = 0; + + for (s = 0; s < 3; ++s) { + blit->saved.num_textures[s] = nv50->num_textures[s]; + blit->saved.num_samplers[s] = nv50->num_samplers[s]; + } + blit->saved.texture = nv50->textures[2][0]; + blit->saved.sampler = nv50->samplers[2][0]; + + nv50->samplers[2][0] = &blit->sampler[blit->filter]; + + nv50->num_samplers[0] = nv50->num_samplers[1] = 0; + nv50->num_samplers[2] = 1; + + blit->saved.dirty = nv50->dirty; + + nv50->dirty = + NV50_NEW_FRAMEBUFFER | + NV50_NEW_VERTPROG | NV50_NEW_FRAGPROG | NV50_NEW_GMTYPROG | + NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS; +} + +static void +nv50_blitctx_post_blit(struct nv50_context *nv50, struct nv50_blitctx *blit) +{ + int s; + + pipe_surface_reference(&nv50->framebuffer.cbufs[0], NULL); + + nv50->framebuffer.width = blit->saved.fb.width; + nv50->framebuffer.height = blit->saved.fb.height; + nv50->framebuffer.nr_cbufs = blit->saved.fb.nr_cbufs; + nv50->framebuffer.cbufs[0] = blit->saved.fb.cbufs[0]; + nv50->framebuffer.zsbuf = blit->saved.fb.zsbuf; + + nv50->vertprog = blit->saved.vp; + nv50->gmtyprog = blit->saved.gp; + nv50->fragprog = blit->saved.fp; + + nv50->clip.nr = blit->saved.clip_nr; + + pipe_sampler_view_reference(&nv50->textures[2][0], NULL); + + for (s = 0; s < 3; ++s) { + nv50->num_textures[s] = blit->saved.num_textures[s]; + nv50->num_samplers[s] = blit->saved.num_samplers[s]; + } + nv50->textures[2][0] = blit->saved.texture; + nv50->samplers[2][0] = blit->saved.sampler; + + nv50->dirty = blit->saved.dirty | + (NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR | NV50_NEW_SAMPLE_MASK | + NV50_NEW_RASTERIZER | NV50_NEW_ZSA | NV50_NEW_BLEND | + NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS | + NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG | NV50_NEW_FRAGPROG); +} + +static void +nv50_resource_resolve(struct pipe_context *pipe, + const struct pipe_resolve_info *info) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_screen *screen = nv50->screen; + struct nv50_blitctx *blit = screen->blitctx; + struct nouveau_channel *chan = screen->base.channel; + struct pipe_resource *src = info->src.res; + struct pipe_resource *dst = info->dst.res; + float x0, x1, y0, y1, z; + float x_range, y_range; + + nv50_blitctx_get_color_mask_and_fp(blit, dst->format, info->mask); + + blit->filter = util_format_is_depth_or_stencil(dst->format) ? 0 : 1; + + nv50_blitctx_pre_blit(blit, nv50); + + nv50_blit_set_dst(nv50, dst, info->dst.level, info->dst.layer); + nv50_blit_set_src(nv50, src, 0, info->src.layer); + + nv50_blitctx_prepare_state(blit); + + nv50_state_validate(nv50, 36); + + x_range = + (float)(info->src.x1 - info->src.x0) / + (float)(info->dst.x1 - info->dst.x0); + y_range = + (float)(info->src.y1 - info->src.y0) / + (float)(info->dst.y1 - info->dst.y0); + + x0 = (float)info->src.x0 - x_range * (float)info->dst.x0; + y0 = (float)info->src.y0 - y_range * (float)info->dst.y0; + + x1 = x0 + 16384.0f * x_range; + y1 = y0 + 16384.0f * y_range; + + x0 *= (float)(1 << nv50_miptree(src)->ms_x); + x1 *= (float)(1 << nv50_miptree(src)->ms_x); + y0 *= (float)(1 << nv50_miptree(src)->ms_y); + y1 *= (float)(1 << nv50_miptree(src)->ms_y); + + z = (float)info->src.layer; + + BEGIN_RING(chan, RING_3D(FP_START_ID), 1); + OUT_RING (chan, + blit->fp.code_base + blit->fp_offset); + + BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1); + OUT_RING (chan, 0); + + /* Draw a large triangle in screen coordinates covering the whole + * render target, with scissors defining the destination region. + * The vertex is supplied with non-normalized texture coordinates + * arranged in a way to yield the desired offset and scale. + */ + + BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); + OUT_RING (chan, (info->dst.x1 << 16) | info->dst.x0); + OUT_RING (chan, (info->dst.y1 << 16) | info->dst.y0); + + BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1); + OUT_RING (chan, NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES); + BEGIN_RING(chan, RING_3D(VTX_ATTR_3F_X(1)), 3); + OUT_RINGf (chan, x0); + OUT_RINGf (chan, y0); + OUT_RINGf (chan, z); + BEGIN_RING(chan, RING_3D(VTX_ATTR_2F_X(0)), 2); + OUT_RINGf (chan, 0.0f); + OUT_RINGf (chan, 0.0f); + BEGIN_RING(chan, RING_3D(VTX_ATTR_3F_X(1)), 3); + OUT_RINGf (chan, x1); + OUT_RINGf (chan, y0); + OUT_RINGf (chan, z); + BEGIN_RING(chan, RING_3D(VTX_ATTR_2F_X(0)), 2); + OUT_RINGf (chan, 16384 << nv50_miptree(dst)->ms_x); + OUT_RINGf (chan, 0.0f); + BEGIN_RING(chan, RING_3D(VTX_ATTR_3F_X(1)), 3); + OUT_RINGf (chan, x0); + OUT_RINGf (chan, y1); + OUT_RINGf (chan, z); + BEGIN_RING(chan, RING_3D(VTX_ATTR_2F_X(0)), 2); + OUT_RINGf (chan, 0.0f); + OUT_RINGf (chan, 16384 << nv50_miptree(dst)->ms_y); + BEGIN_RING(chan, RING_3D(VERTEX_END_GL), 1); + OUT_RING (chan, 0); + + /* re-enable normally constant state */ + + BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1); + OUT_RING (chan, 1); + + nv50_blitctx_post_blit(nv50, blit); +} + +boolean +nv50_blitctx_create(struct nv50_screen *screen) +{ + screen->blitctx = CALLOC_STRUCT(nv50_blitctx); + if (!screen->blitctx) { + NOUVEAU_ERR("failed to allocate blit context\n"); + return FALSE; + } + + screen->blitctx->screen = screen; + + nv50_blitctx_make_vp(screen->blitctx); + nv50_blitctx_make_fp(screen->blitctx); + + nv50_blitctx_make_sampler(screen->blitctx); + + screen->blitctx->color_mask = 0x1111; + + return TRUE; +} + void nv50_init_surface_functions(struct nv50_context *nv50) { struct pipe_context *pipe = &nv50->base.pipe; pipe->resource_copy_region = nv50_resource_copy_region; + pipe->resource_resolve = nv50_resource_resolve; pipe->clear_render_target = nv50_clear_render_target; pipe->clear_depth_stencil = nv50_clear_depth_stencil; } diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index f23008ae4cf..1c8347a793a 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -647,7 +647,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_VERTEX | NV50_NEW_ARRAYS))) nv50_update_user_vbufs(nv50); - nv50_state_validate(nv50); + nv50_state_validate(nv50, 8); /* 8 as minimum, we use flush_notify here */ chan->flush_notify = nv50_draw_vbo_flush_notify; diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index 983db23eedb..360afbb943e 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -150,6 +150,8 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) assert(nvc0->draw); draw_set_rasterize_stage(nvc0->draw, nvc0_draw_render_stage(nvc0)); + nouveau_context_init_vdec(&nvc0->base); + return pipe; } diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 605a0b04018..c79256a6ba2 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -24,6 +24,9 @@ #include "util/u_format_s3tc.h" #include "pipe/p_screen.h" +#include "vl/vl_decoder.h" +#include "vl/vl_video_buffer.h" + #include "nvc0_context.h" #include "nvc0_screen.h" @@ -167,6 +170,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 0; /* please inline, or provide function declarations */ + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); return 0; @@ -373,6 +378,8 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nvc0_screen_init_resource_functions(pscreen); + nouveau_screen_init_vdec(&screen->base); + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, &screen->fence.bo); if (ret) diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c index 67bba3c6cc3..a4fd17e5324 100644 --- a/src/gallium/drivers/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nvc0/nvc0_surface.c @@ -205,6 +205,7 @@ nvc0_resource_copy_region(struct pipe_context *pipe, { struct nvc0_screen *screen = nvc0_context(pipe)->screen; int ret; + boolean m2mf; unsigned dst_layer = dstz, src_layer = src_box->z; /* Fallback for buffers. */ @@ -214,9 +215,15 @@ nvc0_resource_copy_region(struct pipe_context *pipe, return; } + assert(src->nr_samples == dst->nr_samples); + + m2mf = (src->format == dst->format) || + (util_format_get_blocksizebits(src->format) == + util_format_get_blocksizebits(dst->format)); + nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; - if (src->format == dst->format && src->nr_samples == dst->nr_samples) { + if (m2mf) { struct nv50_m2mf_rect drect, srect; unsigned i; unsigned nx = util_format_get_nblocksx(src->format, src_box->width); diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index 8cb6cd0938e..3b77c9600c6 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -179,6 +179,8 @@ nvfx_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, enum return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 1; + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: break; } @@ -224,6 +226,8 @@ nvfx_screen_get_video_param(struct pipe_screen *screen, case PIPE_VIDEO_CAP_MAX_WIDTH: case PIPE_VIDEO_CAP_MAX_HEIGHT: return vl_video_buffer_max_size(screen); + case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED: + return vl_num_buffers_desired(screen, profile); default: return 0; } diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c index 339906e6a63..04b0304b44f 100644 --- a/src/gallium/drivers/nvfx/nvfx_surface.c +++ b/src/gallium/drivers/nvfx/nvfx_surface.c @@ -288,7 +288,7 @@ nvfx_resource_copy_region(struct pipe_context *pipe, * TODO: perhaps support reinterpreting the formats */ struct blitter_context* blitter = nvfx_get_blitter(pipe, 1); - util_blitter_copy_region(blitter, dstr, dst_level, dstx, dsty, dstz, srcr, src_level, src_box, TRUE); + util_blitter_copy_texture(blitter, dstr, dst_level, dstx, dsty, dstz, srcr, src_level, src_box, TRUE); nvfx_put_blitter(pipe, blitter); } else diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index 4088216adcb..4f021276a8f 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -26,19 +26,51 @@ C_SOURCES = \ r300_texture.c \ r300_texture_desc.c \ r300_tgsi_to_rc.c \ - r300_transfer.c + r300_transfer.c \ + \ + compiler/radeon_code.c \ + compiler/radeon_compiler.c \ + compiler/radeon_compiler_util.c \ + compiler/radeon_emulate_branches.c \ + compiler/radeon_emulate_loops.c \ + compiler/radeon_program.c \ + compiler/radeon_program_print.c \ + compiler/radeon_opcodes.c \ + compiler/radeon_program_alu.c \ + compiler/radeon_program_pair.c \ + compiler/radeon_program_tex.c \ + compiler/radeon_pair_translate.c \ + compiler/radeon_pair_schedule.c \ + compiler/radeon_pair_regalloc.c \ + compiler/radeon_pair_dead_sources.c \ + compiler/radeon_dataflow.c \ + compiler/radeon_dataflow_deadcode.c \ + compiler/radeon_dataflow_swizzles.c \ + compiler/radeon_list.c \ + compiler/radeon_optimize.c \ + compiler/radeon_remove_constants.c \ + compiler/radeon_rename_regs.c \ + compiler/radeon_variable.c \ + compiler/r3xx_fragprog.c \ + compiler/r300_fragprog.c \ + compiler/r300_fragprog_swizzle.c \ + compiler/r300_fragprog_emit.c \ + compiler/r500_fragprog.c \ + compiler/r500_fragprog_emit.c \ + compiler/r3xx_vertprog.c \ + compiler/r3xx_vertprog_dump.c \ + compiler/memory_pool.c \ + \ + $(TOP)/src/glsl/ralloc.c \ + $(TOP)/src/mesa/program/register_allocate.c -LIBRARY_INCLUDES = \ - -I$(TOP)/src/mesa/drivers/dri/r300/compiler \ - -I$(TOP)/include - -COMPILER_ARCHIVE = $(TOP)/src/mesa/drivers/dri/r300/compiler/libr300compiler.a -EXTRA_OBJECTS = \ - $(COMPILER_ARCHIVE) +LIBRARY_INCLUDES = \ + -I$(TOP)/include \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/glsl include ../../Makefile.template -.PHONY: $(COMPILER_ARCHIVE) -$(COMPILER_ARCHIVE): - $(MAKE) -C $(TOP)/src/mesa/drivers/dri/r300/compiler +test: default + @$(MAKE) -s -C compiler/tests/ diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index 3af157a7956..7ffd1c27c96 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -1,13 +1,11 @@ Import('*') -r300compiler = SConscript('#/src/mesa/drivers/dri/r300/compiler/SConscript') - env = env.Clone() -# add the paths for r300compiler env.Append(CPPPATH = [ - '#/src/mesa/drivers/dri/r300/compiler', '#/include', '#/src/mesa', + '#/src/glsl', + '#/src/mapi', ]) r300 = env.ConvenienceLibrary( @@ -36,7 +34,41 @@ r300 = env.ConvenienceLibrary( 'r300_texture_desc.c', 'r300_tgsi_to_rc.c', 'r300_transfer.c', - ] + r300compiler) + r300compiler + 'compiler/radeon_code.c', + 'compiler/radeon_compiler.c', + 'compiler/radeon_compiler_util.c', + 'compiler/radeon_program.c', + 'compiler/radeon_program_print.c', + 'compiler/radeon_opcodes.c', + 'compiler/radeon_program_alu.c', + 'compiler/radeon_program_pair.c', + 'compiler/radeon_program_tex.c', + 'compiler/radeon_pair_translate.c', + 'compiler/radeon_pair_schedule.c', + 'compiler/radeon_pair_regalloc.c', + 'compiler/radeon_pair_dead_sources.c', + 'compiler/radeon_optimize.c', + 'compiler/radeon_remove_constants.c', + 'compiler/radeon_rename_regs.c', + 'compiler/radeon_emulate_branches.c', + 'compiler/radeon_emulate_loops.c', + 'compiler/radeon_dataflow.c', + 'compiler/radeon_dataflow_deadcode.c', + 'compiler/radeon_dataflow_swizzles.c', + 'compiler/radeon_variable.c', + 'compiler/radeon_list.c', + 'compiler/r3xx_fragprog.c', + 'compiler/r300_fragprog.c', + 'compiler/r300_fragprog_swizzle.c', + 'compiler/r300_fragprog_emit.c', + 'compiler/r500_fragprog.c', + 'compiler/r500_fragprog_emit.c', + 'compiler/r3xx_vertprog.c', + 'compiler/r3xx_vertprog_dump.c', + 'compiler/memory_pool.c', + '#/src/glsl/ralloc.c', + '#/src/mesa/program/register_allocate.c' + ]) env.Alias('r300', r300) diff --git a/src/gallium/drivers/r300/compiler/memory_pool.c b/src/gallium/drivers/r300/compiler/memory_pool.c new file mode 100644 index 00000000000..ddcdddf9e3c --- /dev/null +++ b/src/gallium/drivers/r300/compiler/memory_pool.c @@ -0,0 +1,97 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "memory_pool.h" + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + + +#define POOL_LARGE_ALLOC 4096 +#define POOL_ALIGN 8 + + +struct memory_block { + struct memory_block * next; +}; + +void memory_pool_init(struct memory_pool * pool) +{ + memset(pool, 0, sizeof(struct memory_pool)); +} + + +void memory_pool_destroy(struct memory_pool * pool) +{ + while(pool->blocks) { + struct memory_block * block = pool->blocks; + pool->blocks = block->next; + free(block); + } +} + +static void refill_pool(struct memory_pool * pool) +{ + unsigned int blocksize = pool->total_allocated; + struct memory_block * newblock; + + if (!blocksize) + blocksize = 2*POOL_LARGE_ALLOC; + + newblock = (struct memory_block*)malloc(blocksize); + newblock->next = pool->blocks; + pool->blocks = newblock; + + pool->head = (unsigned char*)(newblock + 1); + pool->end = ((unsigned char*)newblock) + blocksize; + pool->total_allocated += blocksize; +} + + +void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes) +{ + if (bytes < POOL_LARGE_ALLOC) { + void * ptr; + + if (pool->head + bytes > pool->end) + refill_pool(pool); + + assert(pool->head + bytes <= pool->end); + + ptr = pool->head; + + pool->head += bytes; + pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1)); + + return ptr; + } else { + struct memory_block * block = (struct memory_block*)malloc(bytes + sizeof(struct memory_block)); + + block->next = pool->blocks; + pool->blocks = block; + + return (block + 1); + } +} + + diff --git a/src/gallium/drivers/r300/compiler/memory_pool.h b/src/gallium/drivers/r300/compiler/memory_pool.h new file mode 100644 index 00000000000..42344d0e3ba --- /dev/null +++ b/src/gallium/drivers/r300/compiler/memory_pool.h @@ -0,0 +1,80 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef MEMORY_POOL_H +#define MEMORY_POOL_H + +struct memory_block; + +/** + * Provides a pool of memory that can quickly be allocated from, at the + * cost of being unable to explicitly free one of the allocated blocks. + * Instead, the entire pool can be freed at once. + * + * The idea is to allow one to quickly allocate a flexible amount of + * memory during operations like shader compilation while avoiding + * reference counting headaches. + */ +struct memory_pool { + unsigned char * head; + unsigned char * end; + unsigned int total_allocated; + struct memory_block * blocks; +}; + + +void memory_pool_init(struct memory_pool * pool); +void memory_pool_destroy(struct memory_pool * pool); +void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes); + + +/** + * Generic helper for growing an array that has separate size/count + * and reserved counters to accomodate up to num new element. + * + * type * Array; + * unsigned int Size; + * unsigned int Reserved; + * + * memory_pool_array_reserve(pool, type, Array, Size, Reserved, k); + * assert(Size + k < Reserved); + * + * \note Size is not changed by this macro. + * + * \warning Array, Size, Reserved have to be lvalues and may be evaluated + * several times. + */ +#define memory_pool_array_reserve(pool, type, array, size, reserved, num) do { \ + unsigned int _num = (num); \ + if ((size) + _num > (reserved)) { \ + unsigned int newreserve = (reserved) * 2; \ + type * newarray; \ + if (newreserve < _num) \ + newreserve = 4 * _num; /* arbitrary heuristic */ \ + newarray = memory_pool_malloc((pool), newreserve * sizeof(type)); \ + memcpy(newarray, (array), (size) * sizeof(type)); \ + (array) = newarray; \ + (reserved) = newreserve; \ + } \ +} while(0) + +#endif /* MEMORY_POOL_H */ diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog.c b/src/gallium/drivers/r300/compiler/r300_fragprog.c new file mode 100644 index 00000000000..deba9ca834d --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r300_fragprog.c @@ -0,0 +1,338 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "r300_fragprog.h" + +#include <stdio.h> + +#include "../r300_reg.h" + +static void presub_string(char out[10], unsigned int inst) +{ + switch(inst & 0x600000){ + case R300_ALU_SRCP_1_MINUS_2_SRC0: + sprintf(out, "bias"); + break; + case R300_ALU_SRCP_SRC1_MINUS_SRC0: + sprintf(out, "sub"); + break; + case R300_ALU_SRCP_SRC1_PLUS_SRC0: + sprintf(out, "add"); + break; + case R300_ALU_SRCP_1_MINUS_SRC0: + sprintf(out, "inv "); + break; + } +} + +static int get_msb(unsigned int bit, unsigned int r400_ext_addr) +{ + return (r400_ext_addr & bit) ? 1 << 5 : 0; +} + +/* just some random things... */ +void r300FragmentProgramDump(struct radeon_compiler *c, void *user) +{ + struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; + struct r300_fragment_program_code *code = &compiler->code->code.r300; + int n, i, j; + static int pc = 0; + + fprintf(stderr, "pc=%d*************************************\n", pc++); + + fprintf(stderr, "Hardware program\n"); + fprintf(stderr, "----------------\n"); + if (c->is_r400) { + fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext); + } + + for (n = 0; n <= (code->config & 3); n++) { + uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n]; + unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) + + (((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6); + unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) + + (((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6); + int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT; + int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT; + + fprintf(stderr, "NODE %d: alu_offset: %u, tex_offset: %d, " + "alu_end: %u, tex_end: %d (code_addr: %08x)\n", n, + alu_offset, tex_offset, alu_end, tex_end, code_addr); + + if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) { + fprintf(stderr, " TEX:\n"); + for (i = tex_offset; + i <= tex_offset + tex_end; + ++i) { + const char *instr; + + switch ((code->tex. + inst[i] >> R300_TEX_INST_SHIFT) & + 15) { + case R300_TEX_OP_LD: + instr = "TEX"; + break; + case R300_TEX_OP_KIL: + instr = "KIL"; + break; + case R300_TEX_OP_TXP: + instr = "TXP"; + break; + case R300_TEX_OP_TXB: + instr = "TXB"; + break; + default: + instr = "UNKNOWN"; + } + + fprintf(stderr, + " %s t%i, %c%i, texture[%i] (%08x)\n", + instr, + (code->tex. + inst[i] >> R300_DST_ADDR_SHIFT) & 31, + 't', + (code->tex. + inst[i] >> R300_SRC_ADDR_SHIFT) & 31, + (code->tex. + inst[i] & R300_TEX_ID_MASK) >> + R300_TEX_ID_SHIFT, + code->tex.inst[i]); + } + } + + for (i = alu_offset; + i <= alu_offset + alu_end; ++i) { + char srcc[4][10], dstc[20]; + char srca[4][10], dsta[20]; + char argc[3][20]; + char arga[3][20]; + char flags[5], tmp[10]; + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].rgb_addr >> (j * 6); + int rega = code->alu.inst[i].alpha_addr >> (j * 6); + int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j), + code->alu.inst[i].r400_ext_addr); + int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j), + code->alu.inst[i].r400_ext_addr); + + sprintf(srcc[j], "%c%i", + (regc & 32) ? 'c' : 't', (regc & 31) | msbc); + sprintf(srca[j], "%c%i", + (rega & 32) ? 'c' : 't', (rega & 31) | msba); + } + + dstc[0] = 0; + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : ""); + if (flags[0] != 0) { + unsigned int msb = get_msb( + R400_ADDRD_EXT_RGB_MSB_BIT, + code->alu.inst[i].r400_ext_addr); + + sprintf(dstc, "t%i.%s ", + ((code->alu.inst[i]. + rgb_addr >> R300_ALU_DSTC_SHIFT) + & 31) | msb, + flags); + } + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(tmp, "o%i.%s", + (code->alu.inst[i]. + rgb_addr >> 29) & 3, + flags); + strcat(dstc, tmp); + } + /* Presub */ + presub_string(srcc[3], code->alu.inst[i].rgb_inst); + presub_string(srca[3], code->alu.inst[i].alpha_inst); + + dsta[0] = 0; + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) { + unsigned int msb = get_msb( + R400_ADDRD_EXT_A_MSB_BIT, + code->alu.inst[i].r400_ext_addr); + sprintf(dsta, "t%i.w ", + ((code->alu.inst[i]. + alpha_addr >> R300_ALU_DSTA_SHIFT) & 31) + | msb); + } + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) { + sprintf(tmp, "o%i.w ", + (code->alu.inst[i]. + alpha_addr >> 25) & 3); + strcat(dsta, tmp); + } + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) { + strcat(dsta, "Z"); + } + + fprintf(stderr, + "%3i: xyz: %3s %3s %3s %5s-> %-20s (%08x)\n" + " w: %3s %3s %3s %5s-> %-20s (%08x)\n", i, + srcc[0], srcc[1], srcc[2], srcc[3], dstc, + code->alu.inst[i].rgb_addr, srca[0], srca[1], + srca[2], srca[3], dsta, + code->alu.inst[i].alpha_addr); + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].rgb_inst >> (j * 7); + int rega = code->alu.inst[i].alpha_inst >> (j * 7); + int d; + char buf[20]; + + d = regc & 31; + if (d < 12) { + switch (d % 4) { + case R300_ALU_ARGC_SRC0C_XYZ: + sprintf(buf, "%s.xyz", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_XXX: + sprintf(buf, "%s.xxx", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_YYY: + sprintf(buf, "%s.yyy", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_ZZZ: + sprintf(buf, "%s.zzz", + srcc[d / 4]); + break; + } + } else if (d < 15) { + sprintf(buf, "%s.www", srca[d - 12]); + } else if (d < 20 ) { + switch(d) { + case R300_ALU_ARGC_SRCP_XYZ: + sprintf(buf, "srcp.xyz"); + break; + case R300_ALU_ARGC_SRCP_XXX: + sprintf(buf, "srcp.xxx"); + break; + case R300_ALU_ARGC_SRCP_YYY: + sprintf(buf, "srcp.yyy"); + break; + case R300_ALU_ARGC_SRCP_ZZZ: + sprintf(buf, "srcp.zzz"); + break; + case R300_ALU_ARGC_SRCP_WWW: + sprintf(buf, "srcp.www"); + break; + } + } else if (d == 20) { + sprintf(buf, "0.0"); + } else if (d == 21) { + sprintf(buf, "1.0"); + } else if (d == 22) { + sprintf(buf, "0.5"); + } else if (d >= 23 && d < 32) { + d -= 23; + switch (d / 3) { + case 0: + sprintf(buf, "%s.yzx", + srcc[d % 3]); + break; + case 1: + sprintf(buf, "%s.zxy", + srcc[d % 3]); + break; + case 2: + sprintf(buf, "%s.Wzy", + srcc[d % 3]); + break; + } + } else { + sprintf(buf, "%i", d); + } + + sprintf(argc[j], "%s%s%s%s", + (regc & 32) ? "-" : "", + (regc & 64) ? "|" : "", + buf, (regc & 64) ? "|" : ""); + + d = rega & 31; + if (d < 9) { + sprintf(buf, "%s.%c", srcc[d / 3], + 'x' + (char)(d % 3)); + } else if (d < 12) { + sprintf(buf, "%s.w", srca[d - 9]); + } else if (d < 16) { + switch(d) { + case R300_ALU_ARGA_SRCP_X: + sprintf(buf, "srcp.x"); + break; + case R300_ALU_ARGA_SRCP_Y: + sprintf(buf, "srcp.y"); + break; + case R300_ALU_ARGA_SRCP_Z: + sprintf(buf, "srcp.z"); + break; + case R300_ALU_ARGA_SRCP_W: + sprintf(buf, "srcp.w"); + break; + } + } else if (d == 16) { + sprintf(buf, "0.0"); + } else if (d == 17) { + sprintf(buf, "1.0"); + } else if (d == 18) { + sprintf(buf, "0.5"); + } else { + sprintf(buf, "%i", d); + } + + sprintf(arga[j], "%s%s%s%s", + (rega & 32) ? "-" : "", + (rega & 64) ? "|" : "", + buf, (rega & 64) ? "|" : ""); + } + + fprintf(stderr, " xyz: %8s %8s %8s op: %08x %s\n" + " w: %8s %8s %8s op: %08x\n", + argc[0], argc[1], argc[2], + code->alu.inst[i].rgb_inst, + code->alu.inst[i].rgb_inst & R300_ALU_INSERT_NOP ? + "NOP" : "", + arga[0], arga[1],arga[2], + code->alu.inst[i].alpha_inst); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog.h b/src/gallium/drivers/r300/compiler/r300_fragprog.h new file mode 100644 index 00000000000..0c88bab2f33 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r300_fragprog.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs <[email protected]> + * Jerome Glisse <[email protected]> + */ +#ifndef __R300_FRAGPROG_H_ +#define __R300_FRAGPROG_H_ + +#include "radeon_compiler.h" +#include "radeon_program.h" + + +extern void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user); + +extern void r300FragmentProgramDump(struct radeon_compiler *c, void *user); + +#endif diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c b/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c new file mode 100644 index 00000000000..e6fd1fde62d --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c @@ -0,0 +1,536 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * Emit the r300_fragment_program_code that can be understood by the hardware. + * Input is a pre-transformed radeon_program. + * + * \author Ben Skeggs <[email protected]> + * + * \author Jerome Glisse <[email protected]> + */ + +#include "r300_fragprog.h" + +#include "../r300_reg.h" + +#include "radeon_program_pair.h" +#include "r300_fragprog_swizzle.h" + + +struct r300_emit_state { + struct r300_fragment_program_compiler * compiler; + + unsigned current_node : 2; + unsigned node_first_tex : 8; + unsigned node_first_alu : 8; + uint32_t node_flags; +}; + +#define PROG_CODE \ + struct r300_fragment_program_compiler *c = emit->compiler; \ + struct r300_fragment_program_code *code = &c->code->code.r300 + +#define error(fmt, args...) do { \ + rc_error(&c->Base, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + } while(0) + +static unsigned int get_msbs_alu(unsigned int bits) +{ + return (bits >> 6) & 0x7; +} + +/** + * @param lsbs The number of least significant bits + */ +static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs) +{ + return (bits >> lsbs) & 0x15; +} + +#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask) + +/** + * Mark a temporary register as used. + */ +static void use_temporary(struct r300_fragment_program_code *code, unsigned int index) +{ + if (index > code->pixsize) + code->pixsize = index; +} + +static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src) +{ + if (!src.Used) + return 0; + + if (src.File == RC_FILE_CONSTANT) { + return src.Index | (1 << 5); + } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { + use_temporary(code, src.Index); + return src.Index & 0x1f; + } + + return 0; +} + + +static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) +{ + switch(opcode) { + case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP; + case RC_OPCODE_CND: return R300_ALU_OUTC_CND; + case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3; + case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4; + case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC; + default: + error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name); + /* fall through */ + case RC_OPCODE_NOP: + /* fall through */ + case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD; + case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX; + case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN; + case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; + } +} + +static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) +{ + switch(opcode) { + case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP; + case RC_OPCODE_CND: return R300_ALU_OUTA_CND; + case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4; + case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4; + case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2; + case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC; + case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2; + default: + error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name); + /* fall through */ + case RC_OPCODE_NOP: + /* fall through */ + case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD; + case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX; + case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN; + case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP; + case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ; + } +} + +/** + * Emit one paired ALU instruction. + */ +static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst) +{ + int ip; + int j; + PROG_CODE; + + if (code->alu.length >= c->Base.max_alu_insts) { + error("Too many ALU instructions"); + return 0; + } + + ip = code->alu.length++; + + code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode); + code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); + + for(j = 0; j < 3; ++j) { + /* Set the RGB address */ + unsigned int src = use_source(code, inst->RGB.Src[j]); + unsigned int arg; + if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j); + + code->alu.inst[ip].rgb_addr |= src << (6*j); + + /* Set the Alpha address */ + src = use_source(code, inst->Alpha.Src[j]); + if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j); + + code->alu.inst[ip].alpha_addr |= src << (6*j); + + arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); + arg |= inst->RGB.Arg[j].Abs << 6; + arg |= inst->RGB.Arg[j].Negate << 5; + code->alu.inst[ip].rgb_inst |= arg << (7*j); + + arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle); + arg |= inst->Alpha.Arg[j].Abs << 6; + arg |= inst->Alpha.Arg[j].Negate << 5; + code->alu.inst[ip].alpha_inst |= arg << (7*j); + } + + /* Presubtract */ + if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->alu.inst[ip].rgb_inst |= + R300_ALU_SRCP_1_MINUS_2_SRC0; + break; + case RC_PRESUB_ADD: + code->alu.inst[ip].rgb_inst |= + R300_ALU_SRCP_SRC1_PLUS_SRC0; + break; + case RC_PRESUB_SUB: + code->alu.inst[ip].rgb_inst |= + R300_ALU_SRCP_SRC1_MINUS_SRC0; + break; + case RC_PRESUB_INV: + code->alu.inst[ip].rgb_inst |= + R300_ALU_SRCP_1_MINUS_SRC0; + break; + default: + break; + } + } + + if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { + switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->alu.inst[ip].alpha_inst |= + R300_ALU_SRCP_1_MINUS_2_SRC0; + break; + case RC_PRESUB_ADD: + code->alu.inst[ip].alpha_inst |= + R300_ALU_SRCP_SRC1_PLUS_SRC0; + break; + case RC_PRESUB_SUB: + code->alu.inst[ip].alpha_inst |= + R300_ALU_SRCP_SRC1_MINUS_SRC0; + break; + case RC_PRESUB_INV: + code->alu.inst[ip].alpha_inst |= + R300_ALU_SRCP_1_MINUS_SRC0; + break; + default: + break; + } + } + + if (inst->RGB.Saturate) + code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP; + if (inst->Alpha.Saturate) + code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP; + + if (inst->RGB.WriteMask) { + use_temporary(code, inst->RGB.DestIndex); + if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT; + code->alu.inst[ip].rgb_addr |= + ((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) | + (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); + } + if (inst->RGB.OutputWriteMask) { + code->alu.inst[ip].rgb_addr |= + (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) | + R300_RGB_TARGET(inst->RGB.Target); + emit->node_flags |= R300_RGBA_OUT; + } + + if (inst->Alpha.WriteMask) { + use_temporary(code, inst->Alpha.DestIndex); + if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT; + code->alu.inst[ip].alpha_addr |= + ((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) | + R300_ALU_DSTA_REG; + } + if (inst->Alpha.OutputWriteMask) { + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT | + R300_ALPHA_TARGET(inst->Alpha.Target); + emit->node_flags |= R300_RGBA_OUT; + } + if (inst->Alpha.DepthWriteMask) { + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH; + emit->node_flags |= R300_W_OUT; + c->code->writes_depth = 1; + } + if (inst->Nop) + code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP; + + return 1; +} + + +/** + * Finish the current node without advancing to the next one. + */ +static int finish_node(struct r300_emit_state * emit) +{ + struct r300_fragment_program_compiler * c = emit->compiler; + struct r300_fragment_program_code *code = &emit->compiler->code->code.r300; + unsigned alu_offset; + unsigned alu_end; + unsigned tex_offset; + unsigned tex_end; + + unsigned int alu_offset_msbs, alu_end_msbs; + + if (code->alu.length == emit->node_first_alu) { + /* Generate a single NOP for this node */ + struct rc_pair_instruction inst; + memset(&inst, 0, sizeof(inst)); + if (!emit_alu(emit, &inst)) + return 0; + } + + alu_offset = emit->node_first_alu; + alu_end = code->alu.length - alu_offset - 1; + tex_offset = emit->node_first_tex; + tex_end = code->tex.length - tex_offset - 1; + + if (code->tex.length == emit->node_first_tex) { + if (emit->current_node > 0) { + error("Node %i has no TEX instructions", emit->current_node); + return 0; + } + + tex_end = 0; + } else { + if (emit->current_node == 0) + code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX; + } + + /* Write the config register. + * Note: The order in which the words for each node are written + * is not correct here and needs to be fixed up once we're entirely + * done + * + * Also note that the register specification from AMD is slightly + * incorrect in its description of this register. */ + code->code_addr[emit->current_node] = + ((alu_offset << R300_ALU_START_SHIFT) + & R300_ALU_START_MASK) + | ((alu_end << R300_ALU_SIZE_SHIFT) + & R300_ALU_SIZE_MASK) + | ((tex_offset << R300_TEX_START_SHIFT) + & R300_TEX_START_MASK) + | ((tex_end << R300_TEX_SIZE_SHIFT) + & R300_TEX_SIZE_MASK) + | emit->node_flags + | (get_msbs_tex(tex_offset, 5) + << R400_TEX_START_MSB_SHIFT) + | (get_msbs_tex(tex_end, 5) + << R400_TEX_SIZE_MSB_SHIFT) + ; + + /* Write r400 extended instruction fields. These will be ignored on + * r300 cards. */ + alu_offset_msbs = get_msbs_alu(alu_offset); + alu_end_msbs = get_msbs_alu(alu_end); + switch(emit->current_node) { + case 0: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START3_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT; + break; + case 1: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START2_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT; + break; + case 2: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START1_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT; + break; + case 3: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START0_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT; + break; + } + return 1; +} + + +/** + * Begin a block of texture instructions. + * Create the necessary indirection. + */ +static int begin_tex(struct r300_emit_state * emit) +{ + PROG_CODE; + + if (code->alu.length == emit->node_first_alu && + code->tex.length == emit->node_first_tex) { + return 1; + } + + if (emit->current_node == 3) { + error("Too many texture indirections"); + return 0; + } + + if (!finish_node(emit)) + return 0; + + emit->current_node++; + emit->node_first_tex = code->tex.length; + emit->node_first_alu = code->alu.length; + emit->node_flags = 0; + return 1; +} + + +static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst) +{ + unsigned int unit; + unsigned int dest; + unsigned int opcode; + PROG_CODE; + + if (code->tex.length >= emit->compiler->Base.max_tex_insts) { + error("Too many TEX instructions"); + return 0; + } + + unit = inst->U.I.TexSrcUnit; + dest = inst->U.I.DstReg.Index; + + switch(inst->U.I.Opcode) { + case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; + case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break; + case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; + case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; + default: + error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name); + return 0; + } + + if (inst->U.I.Opcode == RC_OPCODE_KIL) { + unit = 0; + dest = 0; + } else { + use_temporary(code, dest); + } + + use_temporary(code, inst->U.I.SrcReg[0].Index); + + code->tex.inst[code->tex.length++] = + ((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) + & R300_SRC_ADDR_MASK) + | ((dest << R300_DST_ADDR_SHIFT) + & R300_DST_ADDR_MASK) + | (unit << R300_TEX_ID_SHIFT) + | (opcode << R300_TEX_INST_SHIFT) + | (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ? + R400_SRC_ADDR_EXT_BIT : 0) + | (dest >= R300_PFS_NUM_TEMP_REGS ? + R400_DST_ADDR_EXT_BIT : 0) + ; + return 1; +} + + +/** + * Final compilation step: Turn the intermediate radeon_program into + * machine-readable instructions. + */ +void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) +{ + struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; + struct r300_emit_state emit; + struct r300_fragment_program_code *code = &compiler->code->code.r300; + unsigned int tex_end; + + memset(&emit, 0, sizeof(emit)); + emit.compiler = compiler; + + memset(code, 0, sizeof(struct r300_fragment_program_code)); + + for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; + inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; + inst = inst->Next) { + if (inst->Type == RC_INSTRUCTION_NORMAL) { + if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { + begin_tex(&emit); + continue; + } + + emit_tex(&emit, inst); + } else { + emit_alu(&emit, &inst->U.P); + } + } + + if (code->pixsize >= compiler->Base.max_temp_regs) + rc_error(&compiler->Base, "Too many hardware temporaries used.\n"); + + if (compiler->Base.Error) + return; + + /* Finish the program */ + finish_node(&emit); + + code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */ + + /* Set r400 extended instruction fields. These values will be ignored + * on r300 cards. */ + code->r400_code_offset_ext |= + (get_msbs_alu(0) + << R400_ALU_OFFSET_MSB_SHIFT) + | (get_msbs_alu(code->alu.length - 1) + << R400_ALU_SIZE_MSB_SHIFT); + + tex_end = code->tex.length ? code->tex.length - 1 : 0; + code->code_offset = + ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) + & R300_PFS_CNTL_ALU_OFFSET_MASK) + | (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT) + & R300_PFS_CNTL_ALU_END_MASK) + | ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) + & R300_PFS_CNTL_TEX_OFFSET_MASK) + | ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT) + & R300_PFS_CNTL_TEX_END_MASK) + | (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT) + | (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT) + ; + + if (emit.current_node < 3) { + int shift = 3 - emit.current_node; + int i; + for(i = emit.current_node; i >= 0; --i) + code->code_addr[shift + i] = code->code_addr[i]; + for(i = 0; i < shift; ++i) + code->code_addr[i] = 0; + } + + if (code->pixsize >= R300_PFS_NUM_TEMP_REGS + || code->alu.length > R300_PFS_MAX_ALU_INST + || code->tex.length > R300_PFS_MAX_TEX_INST) { + + code->r390_mode = 1; + } +} diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c new file mode 100644 index 00000000000..b7bca8c0cfa --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c @@ -0,0 +1,243 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * Utilities to deal with the somewhat odd restriction on R300 fragment + * program swizzles. + */ + +#include "r300_fragprog_swizzle.h" + +#include <stdio.h> + +#include "../r300_reg.h" +#include "radeon_compiler.h" + +#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO)) + +struct swizzle_data { + unsigned int hash; /**< swizzle value this matches */ + unsigned int base; /**< base value for hw swizzle */ + unsigned int stride; /**< difference in base between arg0/1/2 */ + unsigned int srcp_stride; /**< difference in base between arg0/scrp */ +}; + +static const struct swizzle_data native_swizzles[] = { + {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, 15}, + {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, 15}, + {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, 15}, + {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, 15}, + {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, 7}, + {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, 0}, + {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, 0}, + {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, 0}, + {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0}, + {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0}, + {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0} +}; + +static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]); + +/** + * Find a native RGB swizzle that matches the given swizzle. + * Returns 0 if none found. + */ +static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle) +{ + int i, comp; + + for(i = 0; i < num_native_swizzles; ++i) { + const struct swizzle_data* sd = &native_swizzles[i]; + for(comp = 0; comp < 3; ++comp) { + unsigned int swz = GET_SWZ(swizzle, comp); + if (swz == RC_SWIZZLE_UNUSED) + continue; + if (swz != GET_SWZ(sd->hash, comp)) + break; + } + if (comp == 3) + return sd; + } + + return 0; +} + +/** + * Determines if the given swizzle is valid for r300/r400. In most situations + * it is better to use r300_swizzle_is_native() which can be accesed via + * struct radeon_compiler *c; c->SwizzleCaps->IsNative(). + */ +int r300_swizzle_is_native_basic(unsigned int swizzle) +{ + if(lookup_native_swizzle(swizzle)) + return 1; + else + return 0; +} + +/** + * Check whether the given instruction supports the swizzle and negate + * combinations in the given source register. + */ +static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) +{ + const struct swizzle_data* sd; + unsigned int relevant; + int j; + + if (opcode == RC_OPCODE_KIL || + opcode == RC_OPCODE_TEX || + opcode == RC_OPCODE_TXB || + opcode == RC_OPCODE_TXP) { + if (reg.Abs || reg.Negate) + return 0; + + for(j = 0; j < 4; ++j) { + unsigned int swz = GET_SWZ(reg.Swizzle, j); + if (swz == RC_SWIZZLE_UNUSED) + continue; + if (swz != j) + return 0; + } + + return 1; + } + + relevant = 0; + + for(j = 0; j < 3; ++j) + if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED) + relevant |= 1 << j; + + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) + return 0; + + sd = lookup_native_swizzle(reg.Swizzle); + if (!sd || (reg.File == RC_FILE_PRESUB && sd->srcp_stride == 0)) + return 0; + + return 1; +} + + +static void r300_swizzle_split( + struct rc_src_register src, unsigned int mask, + struct rc_swizzle_split * split) +{ + split->NumPhases = 0; + + while(mask) { + unsigned int best_matchcount = 0; + unsigned int best_matchmask = 0; + int i, comp; + + for(i = 0; i < num_native_swizzles; ++i) { + const struct swizzle_data *sd = &native_swizzles[i]; + unsigned int matchcount = 0; + unsigned int matchmask = 0; + for(comp = 0; comp < 3; ++comp) { + unsigned int swz; + if (!GET_BIT(mask, comp)) + continue; + swz = GET_SWZ(src.Swizzle, comp); + if (swz == RC_SWIZZLE_UNUSED) + continue; + if (swz == GET_SWZ(sd->hash, comp)) { + /* check if the negate bit of current component + * is the same for already matched components */ + if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp)))) + continue; + + matchcount++; + matchmask |= 1 << comp; + } + } + if (matchcount > best_matchcount) { + best_matchcount = matchcount; + best_matchmask = matchmask; + if (matchmask == (mask & RC_MASK_XYZ)) + break; + } + } + + if (mask & RC_MASK_W) + best_matchmask |= RC_MASK_W; + + split->Phase[split->NumPhases++] = best_matchmask; + mask &= ~best_matchmask; + } +} + +struct rc_swizzle_caps r300_swizzle_caps = { + .IsNative = r300_swizzle_is_native, + .Split = r300_swizzle_split +}; + + +/** + * Translate an RGB (XYZ) swizzle into the hardware code for the given + * instruction source. + */ +unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle) +{ + const struct swizzle_data* sd = lookup_native_swizzle(swizzle); + + if (!sd || (src == RC_PAIR_PRESUB_SRC && sd->srcp_stride == 0)) { + fprintf(stderr, "Not a native swizzle: %08x\n", swizzle); + return 0; + } + + if (src == RC_PAIR_PRESUB_SRC) { + return sd->base + sd->srcp_stride; + } else { + return sd->base + src*sd->stride; + } +} + + +/** + * Translate an Alpha (W) swizzle into the hardware code for the given + * instruction source. + */ +unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle) +{ + unsigned int swz = GET_SWZ(swizzle, 0); + if (src == RC_PAIR_PRESUB_SRC) { + return R300_ALU_ARGA_SRCP_X + swz; + } + if (swz < 3) + return swz + 3*src; + + switch(swz) { + case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; + case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE; + case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; + case RC_SWIZZLE_HALF: return R300_ALU_ARGA_HALF; + default: return R300_ALU_ARGA_ONE; + } +} diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h new file mode 100644 index 00000000000..f2635be140d --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __R300_FRAGPROG_SWIZZLE_H_ +#define __R300_FRAGPROG_SWIZZLE_H_ + +#include "radeon_swizzle.h" + +extern struct rc_swizzle_caps r300_swizzle_caps; + +unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle); +unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle); +int r300_swizzle_is_native_basic(unsigned int swizzle); + +#endif /* __R300_FRAGPROG_SWIZZLE_H_ */ diff --git a/src/gallium/drivers/r300/compiler/r3xx_fragprog.c b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c new file mode 100644 index 00000000000..bb6c010e8e3 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c @@ -0,0 +1,172 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include <stdio.h> + +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" +#include "radeon_emulate_branches.h" +#include "radeon_emulate_loops.h" +#include "radeon_program_alu.h" +#include "radeon_program_tex.h" +#include "radeon_rename_regs.h" +#include "radeon_remove_constants.h" +#include "r300_fragprog.h" +#include "r300_fragprog_swizzle.h" +#include "r500_fragprog.h" + + +static void dataflow_outputs_mark_use(void * userdata, void * data, + void (*callback)(void *, unsigned int, unsigned int)) +{ + struct r300_fragment_program_compiler * c = userdata; + callback(data, c->OutputColor[0], RC_MASK_XYZW); + callback(data, c->OutputColor[1], RC_MASK_XYZW); + callback(data, c->OutputColor[2], RC_MASK_XYZW); + callback(data, c->OutputColor[3], RC_MASK_XYZW); + callback(data, c->OutputDepth, RC_MASK_W); +} + +static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) +{ + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; + struct rc_instruction *rci; + + for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) { + struct rc_sub_instruction * inst = &rci->U.I; + unsigned i; + const struct rc_opcode_info *info = rc_get_opcode_info(inst->Opcode); + + if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth) + continue; + + if (inst->DstReg.WriteMask & RC_MASK_Z) { + inst->DstReg.WriteMask = RC_MASK_W; + } else { + inst->DstReg.WriteMask = 0; + continue; + } + + if (!info->IsComponentwise) { + continue; + } + + for (i = 0; i < info->NumSrcRegs; i++) { + inst->SrcReg[i] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[i]); + } + } +} + +static int radeon_saturate_output( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data) +{ + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + + if (!info->HasDstReg || inst->U.I.DstReg.File != RC_FILE_OUTPUT) + return 0; + + inst->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; + return 1; +} + +void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) +{ + int is_r500 = c->Base.is_r500; + int opt = !c->Base.disable_optimizations; + int sat_out = c->state.frag_clamp; + + /* Lists of instruction transformations. */ + struct radeon_program_transformation saturate_output[] = { + { &radeon_saturate_output, c }, + { 0, 0 } + }; + + struct radeon_program_transformation rewrite_tex[] = { + { &radeonTransformTEX, c }, + { 0, 0 } + }; + + struct radeon_program_transformation rewrite_if[] = { + { &r500_transform_IF, 0 }, + {0, 0} + }; + + struct radeon_program_transformation native_rewrite_r500[] = { + { &radeonTransformALU, 0 }, + { &radeonTransformDeriv, 0 }, + { &radeonTransformTrigScale, 0 }, + { 0, 0 } + }; + + struct radeon_program_transformation native_rewrite_r300[] = { + { &radeonTransformALU, 0 }, + { &r300_transform_trig_simple, 0 }, + { 0, 0 } + }; + + /* List of compiler passes. */ + struct radeon_compiler_pass fs_list[] = { + /* NAME DUMP PREDICATE FUNCTION PARAM */ + {"rewrite depth out", 1, 1, rc_rewrite_depth_out, NULL}, + /* This transformation needs to be done before any of the IF + * instructions are modified. */ + {"transform KILP", 1, 1, rc_transform_KILP, NULL}, + {"unroll loops", 1, is_r500, rc_unroll_loops, NULL}, + {"transform loops", 1, !is_r500, rc_transform_loops, NULL}, + {"emulate branches", 1, !is_r500, rc_emulate_branches, NULL}, + {"saturate output writes", 1, sat_out, rc_local_transform, saturate_output}, + {"transform TEX", 1, 1, rc_local_transform, rewrite_tex}, + {"transform IF", 1, is_r500, rc_local_transform, rewrite_if}, + {"native rewrite", 1, is_r500, rc_local_transform, native_rewrite_r500}, + {"native rewrite", 1, !is_r500, rc_local_transform, native_rewrite_r300}, + {"deadcode", 1, opt, rc_dataflow_deadcode, dataflow_outputs_mark_use}, + {"emulate loops", 1, !is_r500, rc_emulate_loops, NULL}, + {"dataflow optimize", 1, opt, rc_optimize, NULL}, + {"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL}, + {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, + /* This pass makes it easier for the scheduler to group TEX + * instructions and reduces the chances of creating too + * many texture indirections.*/ + {"register rename", 1, !is_r500, rc_rename_regs, NULL}, + {"pair translate", 1, 1, rc_pair_translate, NULL}, + {"pair scheduling", 1, 1, rc_pair_schedule, NULL}, + {"dead sources", 1, 1, rc_pair_remove_dead_sources, NULL}, + {"register allocation", 1, 1, rc_pair_regalloc, &opt}, + {"final code validation", 0, 1, rc_validate_final_shader, NULL}, + {"machine code generation", 0, is_r500, r500BuildFragmentProgramHwCode, NULL}, + {"machine code generation", 0, !is_r500, r300BuildFragmentProgramHwCode, NULL}, + {"dump machine code", 0, is_r500 && (c->Base.Debug & RC_DBG_LOG), r500FragmentProgramDump, NULL}, + {"dump machine code", 0, !is_r500 && (c->Base.Debug & RC_DBG_LOG), r300FragmentProgramDump, NULL}, + {NULL, 0, 0, NULL, NULL} + }; + + c->Base.type = RC_FRAGMENT_PROGRAM; + c->Base.SwizzleCaps = c->Base.is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps; + + rc_run_compiler(&c->Base, fs_list); + + rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); +} diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c new file mode 100644 index 00000000000..654f9a070d5 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c @@ -0,0 +1,1045 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include <stdio.h> + +#include "../r300_reg.h" + +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" +#include "radeon_program_alu.h" +#include "radeon_swizzle.h" +#include "radeon_emulate_branches.h" +#include "radeon_emulate_loops.h" +#include "radeon_remove_constants.h" + +struct loop { + int BgnLoop; + +}; + +/* + * Take an already-setup and valid source then swizzle it appropriately to + * obtain a constant ZERO or ONE source. + */ +#define __CONST(x, y) \ + (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_src_class(vpi->SrcReg[x].File), \ + RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4)) + + +static unsigned long t_dst_mask(unsigned int mask) +{ + /* RC_MASK_* is equivalent to VSF_FLAG_* */ + return mask & RC_MASK_XYZW; +} + +static unsigned long t_dst_class(rc_register_file file) +{ + switch (file) { + default: + fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file); + /* fall-through */ + case RC_FILE_TEMPORARY: + return PVS_DST_REG_TEMPORARY; + case RC_FILE_OUTPUT: + return PVS_DST_REG_OUT; + case RC_FILE_ADDRESS: + return PVS_DST_REG_A0; + } +} + +static unsigned long t_dst_index(struct r300_vertex_program_code *vp, + struct rc_dst_register *dst) +{ + if (dst->File == RC_FILE_OUTPUT) + return vp->outputs[dst->Index]; + + return dst->Index; +} + +static unsigned long t_src_class(rc_register_file file) +{ + switch (file) { + default: + fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file); + /* fall-through */ + case RC_FILE_NONE: + case RC_FILE_TEMPORARY: + return PVS_SRC_REG_TEMPORARY; + case RC_FILE_INPUT: + return PVS_SRC_REG_INPUT; + case RC_FILE_CONSTANT: + return PVS_SRC_REG_CONSTANT; + } +} + +static int t_src_conflict(struct rc_src_register a, struct rc_src_register b) +{ + unsigned long aclass = t_src_class(a.File); + unsigned long bclass = t_src_class(b.File); + + if (aclass != bclass) + return 0; + if (aclass == PVS_SRC_REG_TEMPORARY) + return 0; + + if (a.RelAddr || b.RelAddr) + return 1; + if (a.Index != b.Index) + return 1; + + return 0; +} + +static inline unsigned long t_swizzle(unsigned int swizzle) +{ + /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ + return swizzle; +} + +static unsigned long t_src_index(struct r300_vertex_program_code *vp, + struct rc_src_register *src) +{ + if (src->File == RC_FILE_INPUT) { + assert(vp->inputs[src->Index] != -1); + return vp->inputs[src->Index]; + } else { + if (src->Index < 0) { + fprintf(stderr, + "negative offsets for indirect addressing do not work.\n"); + return 0; + } + return src->Index; + } +} + +/* these two functions should probably be merged... */ + +static unsigned long t_src(struct r300_vertex_program_code *vp, + struct rc_src_register *src) +{ + /* src->Negate uses the RC_MASK_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 1)), + t_swizzle(GET_SWZ(src->Swizzle, 2)), + t_swizzle(GET_SWZ(src->Swizzle, 3)), + t_src_class(src->File), + src->Negate) | + (src->RelAddr << 4) | (src->Abs << 3); +} + +static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, + struct rc_src_register *src) +{ + /* src->Negate uses the RC_MASK_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_src_class(src->File), + src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (src->RelAddr << 4) | (src->Abs << 3); +} + +static int valid_dst(struct r300_vertex_program_code *vp, + struct rc_dst_register *dst) +{ + if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) { + return 0; + } else if (dst->File == RC_FILE_ADDRESS) { + assert(dst->Index == 0); + } + + return 1; +} + +static void ei_vector1(struct r300_vertex_program_code *vp, + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + 0, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = __CONST(0, RC_SWIZZLE_ZERO); +} + +static void ei_vector2(struct r300_vertex_program_code *vp, + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + 0, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[1]); + inst[3] = __CONST(1, RC_SWIZZLE_ZERO); +} + +static void ei_math1(struct r300_vertex_program_code *vp, + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + 1, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = __CONST(0, RC_SWIZZLE_ZERO); +} + +static void ei_lit(struct r300_vertex_program_code *vp, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} + + inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, + 1, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + /* NOTE: Users swizzling might not work. */ + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (vpi->SrcReg[0].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (vpi->SrcReg[0].RelAddr << 4); + inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (vpi->SrcReg[0].RelAddr << 4); +} + +static void ei_mad(struct r300_vertex_program_code *vp, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + unsigned int i; + /* Remarks about hardware limitations of MAD + * (please preserve this comment, as this information is _NOT_ + * in the documentation provided by AMD). + * + * As described in the documentation, MAD with three unique temporary + * source registers requires the use of the macro version. + * + * However (and this is not mentioned in the documentation), apparently + * the macro version is _NOT_ a full superset of the normal version. + * In particular, the macro version does not always work when relative + * addressing is used in the source operands. + * + * This limitation caused incorrect rendering in Sauerbraten's OpenGL + * assembly shader path when using medium quality animations + * (i.e. animations with matrix blending instead of quaternion blending). + * + * Unfortunately, I (nha) have been unable to extract a Piglit regression + * test for this issue - for some reason, it is possible to have vertex + * programs whose prefix is *exactly* the same as the prefix of the + * offending program in Sauerbraten up to the offending instruction + * without causing any trouble. + * + * Bottom line: Only use the macro version only when really necessary; + * according to AMD docs, this should improve performance by one clock + * as a nice side bonus. + */ + if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY && + vpi->SrcReg[1].File == RC_FILE_TEMPORARY && + vpi->SrcReg[2].File == RC_FILE_TEMPORARY && + vpi->SrcReg[0].Index != vpi->SrcReg[1].Index && + vpi->SrcReg[0].Index != vpi->SrcReg[2].Index && + vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) { + inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, + 0, + 1, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + } else { + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + 0, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + /* Arguments with constant swizzles still count as a unique + * temporary, so we should make sure these arguments share a + * register index with one of the other arguments. */ + for (i = 0; i < 3; i++) { + unsigned int j; + if (vpi->SrcReg[i].File != RC_FILE_NONE) + continue; + + for (j = 0; j < 3; j++) { + if (i != j) { + vpi->SrcReg[i].Index = + vpi->SrcReg[j].Index; + break; + } + } + } + } + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[1]); + inst[3] = t_src(vp, &vpi->SrcReg[2]); +} + +static void ei_pow(struct r300_vertex_program_code *vp, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, + 1, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); +} + +static void mark_write(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + unsigned int * writemasks = userdata; + + if (file != RC_FILE_TEMPORARY) + return; + + if (index >= R300_VS_MAX_TEMPS) + return; + + writemasks[index] |= mask; +} + +static unsigned long t_pred_src(struct r300_vertex_program_compiler * compiler) +{ + return PVS_SRC_OPERAND(compiler->PredicateIndex, + t_swizzle(RC_SWIZZLE_ZERO), + t_swizzle(RC_SWIZZLE_ZERO), + t_swizzle(RC_SWIZZLE_ZERO), + t_swizzle(RC_SWIZZLE_W), + t_src_class(RC_FILE_TEMPORARY), + 0); +} + +static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler, + unsigned int hw_opcode, int is_math) +{ + return PVS_OP_DST_OPERAND(hw_opcode, + is_math, + 0, + compiler->PredicateIndex, + RC_MASK_W, + t_dst_class(RC_FILE_TEMPORARY)); + +} + +static void ei_if(struct r300_vertex_program_compiler * compiler, + struct rc_instruction *rci, + unsigned int * inst, + unsigned int branch_depth) +{ + unsigned int predicate_opcode; + int is_math = 0; + + if (!compiler->Base.is_r500) { + rc_error(&compiler->Base,"Opcode IF not supported\n"); + return; + } + + /* Reserve a temporary to use as our predicate stack counter, if we + * don't already have one. */ + if (!compiler->PredicateMask) { + unsigned int writemasks[RC_REGISTER_MAX_INDEX]; + struct rc_instruction * inst; + unsigned int i; + memset(writemasks, 0, sizeof(writemasks)); + for(inst = compiler->Base.Program.Instructions.Next; + inst != &compiler->Base.Program.Instructions; + inst = inst->Next) { + rc_for_all_writes_mask(inst, mark_write, writemasks); + } + for(i = 0; i < compiler->Base.max_temp_regs; i++) { + unsigned int mask = ~writemasks[i] & RC_MASK_XYZW; + /* Only the W component can be used fo the predicate + * stack counter. */ + if (mask & RC_MASK_W) { + compiler->PredicateMask = RC_MASK_W; + compiler->PredicateIndex = i; + break; + } + } + if (i == compiler->Base.max_temp_regs) { + rc_error(&compiler->Base, "No free temporary to use for" + " predicate stack counter.\n"); + return; + } + } + predicate_opcode = + branch_depth ? VE_PRED_SET_NEQ_PUSH : ME_PRED_SET_NEQ; + + rci->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(rci->U.I.SrcReg[0].Swizzle,0)); + if (branch_depth == 0) { + is_math = 1; + predicate_opcode = ME_PRED_SET_NEQ; + inst[1] = t_src(compiler->code, &rci->U.I.SrcReg[0]); + inst[2] = 0; + } else { + predicate_opcode = VE_PRED_SET_NEQ_PUSH; + inst[1] = t_pred_src(compiler); + inst[2] = t_src(compiler->code, &rci->U.I.SrcReg[0]); + } + + inst[0] = t_pred_dst(compiler, predicate_opcode, is_math); + inst[3] = 0; + +} + +static void ei_else(struct r300_vertex_program_compiler * compiler, + unsigned int * inst) +{ + if (!compiler->Base.is_r500) { + rc_error(&compiler->Base,"Opcode ELSE not supported\n"); + return; + } + inst[0] = t_pred_dst(compiler, ME_PRED_SET_INV, 1); + inst[1] = t_pred_src(compiler); + inst[2] = 0; + inst[3] = 0; +} + +static void ei_endif(struct r300_vertex_program_compiler *compiler, + unsigned int * inst) +{ + if (!compiler->Base.is_r500) { + rc_error(&compiler->Base,"Opcode ENDIF not supported\n"); + return; + } + inst[0] = t_pred_dst(compiler, ME_PRED_SET_POP, 1); + inst[1] = t_pred_src(compiler); + inst[2] = 0; + inst[3] = 0; +} + +static void translate_vertex_program(struct radeon_compiler *c, void *user) +{ + struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c; + struct rc_instruction *rci; + + struct loop * loops = NULL; + int current_loop_depth = 0; + int loops_reserved = 0; + + unsigned int branch_depth = 0; + + compiler->code->pos_end = 0; /* Not supported yet */ + compiler->code->length = 0; + compiler->code->num_temporaries = 0; + + compiler->SetHwInputOutput(compiler); + + for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) { + struct rc_sub_instruction *vpi = &rci->U.I; + unsigned int *inst = compiler->code->body.d + compiler->code->length; + const struct rc_opcode_info *info = rc_get_opcode_info(vpi->Opcode); + + /* Skip instructions writing to non-existing destination */ + if (!valid_dst(compiler->code, &vpi->DstReg)) + continue; + + if (info->HasDstReg) { + /* Neither is Saturate. */ + if (vpi->SaturateMode != RC_SATURATE_NONE) { + rc_error(&compiler->Base, "Vertex program does not support the Saturate " + "modifier (yet).\n"); + } + } + + if (compiler->code->length >= c->max_alu_insts * 4) { + rc_error(&compiler->Base, "Vertex program has too many instructions\n"); + return; + } + + assert(compiler->Base.is_r500 || + (vpi->Opcode != RC_OPCODE_SEQ && + vpi->Opcode != RC_OPCODE_SNE)); + + switch (vpi->Opcode) { + case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; + case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break; + case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break; + case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; + case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; + case RC_OPCODE_ELSE: ei_else(compiler, inst); break; + case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break; + case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; + case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; + case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; + case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break; + case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; + case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; + case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; + case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break; + case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break; + case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break; + case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break; + case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break; + case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break; + case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break; + case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break; + case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break; + case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; + case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break; + case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; + case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break; + case RC_OPCODE_BGNLOOP: + { + struct loop * l; + + if ((!compiler->Base.is_r500 + && loops_reserved >= R300_VS_MAX_LOOP_DEPTH) + || loops_reserved >= R500_VS_MAX_FC_DEPTH) { + rc_error(&compiler->Base, + "Loops are nested too deep."); + return; + } + memory_pool_array_reserve(&compiler->Base.Pool, + struct loop, loops, current_loop_depth, + loops_reserved, 1); + l = &loops[current_loop_depth++]; + memset(l , 0, sizeof(struct loop)); + l->BgnLoop = (compiler->code->length / 4); + continue; + } + case RC_OPCODE_ENDLOOP: + { + struct loop * l; + unsigned int act_addr; + unsigned int last_addr; + unsigned int ret_addr; + + assert(loops); + l = &loops[current_loop_depth - 1]; + act_addr = l->BgnLoop - 1; + last_addr = (compiler->code->length / 4) - 1; + ret_addr = l->BgnLoop; + + if (loops_reserved >= R300_VS_MAX_FC_OPS) { + rc_error(&compiler->Base, + "Too many flow control instructions."); + return; + } + if (compiler->Base.is_r500) { + compiler->code->fc_op_addrs.r500 + [compiler->code->num_fc_ops].lw = + R500_PVS_FC_ACT_ADRS(act_addr) + | R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff) + ; + compiler->code->fc_op_addrs.r500 + [compiler->code->num_fc_ops].uw = + R500_PVS_FC_LAST_INST(last_addr) + | R500_PVS_FC_RTN_INST(ret_addr) + ; + } else { + compiler->code->fc_op_addrs.r300 + [compiler->code->num_fc_ops] = + R300_PVS_FC_ACT_ADRS(act_addr) + | R300_PVS_FC_LOOP_CNT_JMP_INST(0xff) + | R300_PVS_FC_LAST_INST(last_addr) + | R300_PVS_FC_RTN_INST(ret_addr) + ; + } + compiler->code->fc_loop_index[compiler->code->num_fc_ops] = + R300_PVS_FC_LOOP_INIT_VAL(0x0) + | R300_PVS_FC_LOOP_STEP_VAL(0x1) + ; + compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP( + compiler->code->num_fc_ops); + compiler->code->num_fc_ops++; + current_loop_depth--; + continue; + } + + default: + rc_error(&compiler->Base, "Unknown opcode %s\n", info->Name); + return; + } + + /* Non-flow control instructions that are inside an if statement + * need to pay attention to the predicate bit. */ + if (branch_depth + && vpi->Opcode != RC_OPCODE_IF + && vpi->Opcode != RC_OPCODE_ELSE + && vpi->Opcode != RC_OPCODE_ENDIF) { + + inst[0] |= (PVS_DST_PRED_ENABLE_MASK + << PVS_DST_PRED_ENABLE_SHIFT); + inst[0] |= (PVS_DST_PRED_SENSE_MASK + << PVS_DST_PRED_SENSE_SHIFT); + } + + /* Update the number of temporaries. */ + if (info->HasDstReg && vpi->DstReg.File == RC_FILE_TEMPORARY && + vpi->DstReg.Index >= compiler->code->num_temporaries) + compiler->code->num_temporaries = vpi->DstReg.Index + 1; + + for (unsigned i = 0; i < info->NumSrcRegs; i++) + if (vpi->SrcReg[i].File == RC_FILE_TEMPORARY && + vpi->SrcReg[i].Index >= compiler->code->num_temporaries) + compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1; + + if (compiler->PredicateMask) + if (compiler->PredicateIndex >= compiler->code->num_temporaries) + compiler->code->num_temporaries = compiler->PredicateIndex + 1; + + if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) { + rc_error(&compiler->Base, "Too many temporaries.\n"); + return; + } + + compiler->code->length += 4; + + if (compiler->Base.Error) + return; + } +} + +struct temporary_allocation { + unsigned int Allocated:1; + unsigned int HwTemp:15; + struct rc_instruction * LastRead; +}; + +static void allocate_temporary_registers(struct radeon_compiler *c, void *user) +{ + struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c; + struct rc_instruction *inst; + struct rc_instruction *end_loop = NULL; + unsigned int num_orig_temps = 0; + char hwtemps[RC_REGISTER_MAX_INDEX]; + struct temporary_allocation * ta; + unsigned int i, j; + + memset(hwtemps, 0, sizeof(hwtemps)); + + rc_recompute_ips(c); + + /* Pass 1: Count original temporaries. */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + if (inst->U.I.SrcReg[i].Index >= num_orig_temps) + num_orig_temps = inst->U.I.SrcReg[i].Index + 1; + } + } + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { + if (inst->U.I.DstReg.Index >= num_orig_temps) + num_orig_temps = inst->U.I.DstReg.Index + 1; + } + } + } + + ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool, + sizeof(struct temporary_allocation) * num_orig_temps); + memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps); + + /* Pass 2: Determine original temporary lifetimes */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + /* Instructions inside of loops need to use the ENDLOOP + * instruction as their LastRead. */ + if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { + int endloops = 1; + struct rc_instruction * ptr; + for(ptr = inst->Next; + ptr != &compiler->Base.Program.Instructions; + ptr = ptr->Next){ + if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { + endloops++; + } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { + endloops--; + if (endloops <= 0) { + end_loop = ptr; + break; + } + } + } + } + + if (inst == end_loop) { + end_loop = NULL; + continue; + } + + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + ta[inst->U.I.SrcReg[i].Index].LastRead = end_loop ? end_loop : inst; + } + } + } + + /* Pass 3: Register allocation */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->U.I.SrcReg[i].Index; + inst->U.I.SrcReg[i].Index = ta[orig].HwTemp; + + if (ta[orig].Allocated && inst == ta[orig].LastRead) + hwtemps[ta[orig].HwTemp] = 0; + } + } + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->U.I.DstReg.Index; + + if (!ta[orig].Allocated) { + for(j = 0; j < c->max_temp_regs; ++j) { + if (!hwtemps[j]) + break; + } + ta[orig].Allocated = 1; + ta[orig].HwTemp = j; + hwtemps[ta[orig].HwTemp] = 1; + } + + inst->U.I.DstReg.Index = ta[orig].HwTemp; + } + } + } +} + +/** + * R3xx-R4xx vertex engine does not support the Absolute source operand modifier + * and the Saturate opcode modifier. Only Absolute is currently transformed. + */ +static int transform_nonnative_modifiers( + struct radeon_compiler *c, + struct rc_instruction *inst, + void* unused) +{ + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + /* Transform ABS(a) to MAX(a, -a). */ + for (i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].Abs) { + struct rc_instruction *new_inst; + unsigned temp; + + inst->U.I.SrcReg[i].Abs = 0; + + temp = rc_find_free_temporary(c); + + new_inst = rc_insert_new_instruction(c, inst->Prev); + new_inst->U.I.Opcode = RC_OPCODE_MAX; + new_inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + new_inst->U.I.DstReg.Index = temp; + new_inst->U.I.SrcReg[0] = inst->U.I.SrcReg[i]; + new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i]; + new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; + + memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i])); + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = temp; + inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW; + } + } + return 1; +} + +/** + * Vertex engine cannot read two inputs or two constants at the same time. + * Introduce intermediate MOVs to temporary registers to account for this. + */ +static int transform_source_conflicts( + struct radeon_compiler *c, + struct rc_instruction* inst, + void* unused) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->NumSrcRegs == 3) { + if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]) + || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) { + int tmpreg = rc_find_free_temporary(c); + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = tmpreg; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + + reset_srcreg(&inst->U.I.SrcReg[2]); + inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[2].Index = tmpreg; + } + } + + if (opcode->NumSrcRegs >= 2) { + if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) { + int tmpreg = rc_find_free_temporary(c); + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = tmpreg; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + + reset_srcreg(&inst->U.I.SrcReg[1]); + inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[1].Index = tmpreg; + } + } + + return 1; +} + +static void rc_vs_add_artificial_outputs(struct radeon_compiler *c, void *user) +{ + struct r300_vertex_program_compiler * compiler = (struct r300_vertex_program_compiler*)c; + int i; + + for(i = 0; i < 32; ++i) { + if ((compiler->RequiredOutputs & (1 << i)) && + !(compiler->Base.Program.OutputsWritten & (1 << i))) { + struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_MOV; + + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = i; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + + inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT; + inst->U.I.SrcReg[0].Index = 0; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + compiler->Base.Program.OutputsWritten |= 1 << i; + } + } +} + +static void dataflow_outputs_mark_used(void * userdata, void * data, + void (*callback)(void *, unsigned int, unsigned int)) +{ + struct r300_vertex_program_compiler * c = userdata; + int i; + + for(i = 0; i < 32; ++i) { + if (c->RequiredOutputs & (1 << i)) + callback(data, i, RC_MASK_XYZW); + } +} + +static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) +{ + (void) opcode; + (void) reg; + + return 1; +} + +static void transform_negative_addressing(struct r300_vertex_program_compiler *c, + struct rc_instruction *arl, + struct rc_instruction *end, + int min_offset) +{ + struct rc_instruction *inst, *add; + unsigned const_swizzle; + + /* Transform ARL */ + add = rc_insert_new_instruction(&c->Base, arl->Prev); + add->U.I.Opcode = RC_OPCODE_ADD; + add->U.I.DstReg.File = RC_FILE_TEMPORARY; + add->U.I.DstReg.Index = rc_find_free_temporary(&c->Base); + add->U.I.DstReg.WriteMask = RC_MASK_X; + add->U.I.SrcReg[0] = arl->U.I.SrcReg[0]; + add->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + add->U.I.SrcReg[1].Index = rc_constants_add_immediate_scalar(&c->Base.Program.Constants, + min_offset, &const_swizzle); + add->U.I.SrcReg[1].Swizzle = const_swizzle; + + arl->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + arl->U.I.SrcReg[0].Index = add->U.I.DstReg.Index; + arl->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XXXX; + + /* Rewrite offsets up to and excluding inst. */ + for (inst = arl->Next; inst != end; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) + if (inst->U.I.SrcReg[i].RelAddr) + inst->U.I.SrcReg[i].Index -= min_offset; + } +} + +static void rc_emulate_negative_addressing(struct radeon_compiler *compiler, void *user) +{ + struct r300_vertex_program_compiler * c = (struct r300_vertex_program_compiler*)compiler; + struct rc_instruction *inst, *lastARL = NULL; + int min_offset = 0; + + for (inst = c->Base.Program.Instructions.Next; inst != &c->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (inst->U.I.Opcode == RC_OPCODE_ARL) { + if (lastARL != NULL && min_offset < 0) + transform_negative_addressing(c, lastARL, inst, min_offset); + + lastARL = inst; + min_offset = 0; + continue; + } + + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].RelAddr && + inst->U.I.SrcReg[i].Index < 0) { + /* ARL must precede any indirect addressing. */ + if (lastARL == NULL) { + rc_error(&c->Base, "Vertex shader: Found relative addressing without ARL."); + return; + } + + if (inst->U.I.SrcReg[i].Index < min_offset) + min_offset = inst->U.I.SrcReg[i].Index; + } + } + } + + if (lastARL != NULL && min_offset < 0) + transform_negative_addressing(c, lastARL, inst, min_offset); +} + +static struct rc_swizzle_caps r300_vertprog_swizzle_caps = { + .IsNative = &swizzle_is_native, + .Split = 0 /* should never be called */ +}; + +void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) +{ + int is_r500 = c->Base.is_r500; + int opt = !c->Base.disable_optimizations; + + /* Lists of instruction transformations. */ + struct radeon_program_transformation alu_rewrite_r500[] = { + { &r300_transform_vertex_alu, 0 }, + { &r300_transform_trig_scale_vertex, 0 }, + { 0, 0 } + }; + + struct radeon_program_transformation alu_rewrite_r300[] = { + { &r300_transform_vertex_alu, 0 }, + { &r300_transform_trig_simple, 0 }, + { 0, 0 } + }; + + /* Note: These passes have to be done seperately from ALU rewrite, + * otherwise non-native ALU instructions with source conflits + * or non-native modifiers will not be treated properly. + */ + struct radeon_program_transformation emulate_modifiers[] = { + { &transform_nonnative_modifiers, 0 }, + { 0, 0 } + }; + + struct radeon_program_transformation resolve_src_conflicts[] = { + { &transform_source_conflicts, 0 }, + { 0, 0 } + }; + + /* List of compiler passes. */ + struct radeon_compiler_pass vs_list[] = { + /* NAME DUMP PREDICATE FUNCTION PARAM */ + {"add artificial outputs", 0, 1, rc_vs_add_artificial_outputs, NULL}, + {"transform loops", 1, 1, rc_transform_loops, NULL}, + {"emulate branches", 1, !is_r500, rc_emulate_branches, NULL}, + {"emulate negative addressing", 1, 1, rc_emulate_negative_addressing, NULL}, + {"native rewrite", 1, is_r500, rc_local_transform, alu_rewrite_r500}, + {"native rewrite", 1, !is_r500, rc_local_transform, alu_rewrite_r300}, + {"emulate modifiers", 1, !is_r500, rc_local_transform, emulate_modifiers}, + {"deadcode", 1, opt, rc_dataflow_deadcode, dataflow_outputs_mark_used}, + {"dataflow optimize", 1, opt, rc_optimize, NULL}, + /* This pass must be done after optimizations. */ + {"source conflict resolve", 1, 1, rc_local_transform, resolve_src_conflicts}, + {"register allocation", 1, opt, allocate_temporary_registers, NULL}, + {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, + {"final code validation", 0, 1, rc_validate_final_shader, NULL}, + {"machine code generation", 0, 1, translate_vertex_program, NULL}, + {"dump machine code", 0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump, NULL}, + {NULL, 0, 0, NULL, NULL} + }; + + c->Base.type = RC_VERTEX_PROGRAM; + c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; + + rc_run_compiler(&c->Base, vs_list); + + c->code->InputsRead = c->Base.Program.InputsRead; + c->code->OutputsWritten = c->Base.Program.OutputsWritten; + rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); +} diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c new file mode 100644 index 00000000000..2bc0a87eed8 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c @@ -0,0 +1,207 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" +#include "radeon_code.h" +#include "../r300_reg.h" + +#include <stdio.h> + +static char* r300_vs_ve_ops[] = { + /* R300 vector ops */ + " VE_NO_OP", + " VE_DOT_PRODUCT", + " VE_MULTIPLY", + " VE_ADD", + " VE_MULTIPLY_ADD", + " VE_DISTANCE_FACTOR", + " VE_FRACTION", + " VE_MAXIMUM", + " VE_MINIMUM", + "VE_SET_GREATER_THAN_EQUAL", + " VE_SET_LESS_THAN", + " VE_MULTIPLYX2_ADD", + " VE_MULTIPLY_CLAMP", + " VE_FLT2FIX_DX", + " VE_FLT2FIX_DX_RND", + /* R500 vector ops */ + " VE_PRED_SET_EQ_PUSH", + " VE_PRED_SET_GT_PUSH", + " VE_PRED_SET_GTE_PUSH", + " VE_PRED_SET_NEQ_PUSH", + " VE_COND_WRITE_EQ", + " VE_COND_WRITE_GT", + " VE_COND_WRITE_GTE", + " VE_COND_WRITE_NEQ", + " VE_COND_MUX_EQ", + " VE_COND_MUX_GT", + " VE_COND_MUX_GTE", + " VE_SET_GREATER_THAN", + " VE_SET_EQUAL", + " VE_SET_NOT_EQUAL", + " (reserved)", + " (reserved)", + " (reserved)", +}; + +static char* r300_vs_me_ops[] = { + /* R300 math ops */ + " ME_NO_OP", + " ME_EXP_BASE2_DX", + " ME_LOG_BASE2_DX", + " ME_EXP_BASEE_FF", + " ME_LIGHT_COEFF_DX", + " ME_POWER_FUNC_FF", + " ME_RECIP_DX", + " ME_RECIP_FF", + " ME_RECIP_SQRT_DX", + " ME_RECIP_SQRT_FF", + " ME_MULTIPLY", + " ME_EXP_BASE2_FULL_DX", + " ME_LOG_BASE2_FULL_DX", + " ME_POWER_FUNC_FF_CLAMP_B", + "ME_POWER_FUNC_FF_CLAMP_B1", + "ME_POWER_FUNC_FF_CLAMP_01", + " ME_SIN", + " ME_COS", + /* R500 math ops */ + " ME_LOG_BASE2_IEEE", + " ME_RECIP_IEEE", + " ME_RECIP_SQRT_IEEE", + " ME_PRED_SET_EQ", + " ME_PRED_SET_GT", + " ME_PRED_SET_GTE", + " ME_PRED_SET_NEQ", + " ME_PRED_SET_CLR", + " ME_PRED_SET_INV", + " ME_PRED_SET_POP", + " ME_PRED_SET_RESTORE", + " (reserved)", + " (reserved)", + " (reserved)", +}; + +/* XXX refactor to avoid clashing symbols */ +static char* r300_vs_src_debug[] = { + "t", + "i", + "c", + "a", +}; + +static char* r300_vs_dst_debug[] = { + "t", + "a0", + "o", + "ox", + "a", + "i", + "u", + "u", +}; + +static char* r300_vs_swiz_debug[] = { + "X", + "Y", + "Z", + "W", + "0", + "1", + "U", + "U", +}; + + +static void r300_vs_op_dump(uint32_t op) +{ + fprintf(stderr, " dst: %d%s op: ", + (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]); + if ((op >> PVS_DST_PRED_ENABLE_SHIFT) & 0x1) { + fprintf(stderr, "PRED %u", + (op >> PVS_DST_PRED_SENSE_SHIFT) & 0x1); + } + if (op & 0x80) { + if (op & 0x1) { + fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n"); + } else { + fprintf(stderr, " PVS_MACRO_OP_2CLK_MADD\n"); + } + } else if (op & 0x40) { + fprintf(stderr, "%s\n", r300_vs_me_ops[op & 0x1f]); + } else { + fprintf(stderr, "%s\n", r300_vs_ve_ops[op & 0x1f]); + } +} + +static void r300_vs_src_dump(uint32_t src) +{ + fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n", + (src >> 5) & 0xff, r300_vs_src_debug[src & 0x3], + src & (1 << 25) ? "-" : " ", + r300_vs_swiz_debug[(src >> 13) & 0x7], + src & (1 << 26) ? "-" : " ", + r300_vs_swiz_debug[(src >> 16) & 0x7], + src & (1 << 27) ? "-" : " ", + r300_vs_swiz_debug[(src >> 19) & 0x7], + src & (1 << 28) ? "-" : " ", + r300_vs_swiz_debug[(src >> 22) & 0x7]); +} + +void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user) +{ + struct r300_vertex_program_compiler *c = (struct r300_vertex_program_compiler*)compiler; + struct r300_vertex_program_code * vs = c->code; + unsigned instrcount = vs->length / 4; + unsigned i; + + fprintf(stderr, "Final vertex program code:\n"); + + for(i = 0; i < instrcount; i++) { + unsigned offset = i*4; + unsigned src; + + fprintf(stderr, "%d: op: 0x%08x", i, vs->body.d[offset]); + r300_vs_op_dump(vs->body.d[offset]); + + for(src = 0; src < 3; ++src) { + fprintf(stderr, " src%i: 0x%08x", src, vs->body.d[offset+1+src]); + r300_vs_src_dump(vs->body.d[offset+1+src]); + } + } + + fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops); + for(i = 0; i < vs->num_fc_ops; i++) { + switch((vs->fc_ops >> (i * 2)) & 0x3 ) { + case 0: fprintf(stderr, "NOP"); break; + case 1: fprintf(stderr, "JUMP"); break; + case 2: fprintf(stderr, "LOOP"); break; + case 3: fprintf(stderr, "JSR"); break; + } + if (c->Base.is_r500) { + fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x\n", + vs->fc_op_addrs.r500[i].uw, + vs->fc_op_addrs.r500[i].lw); + } else { + fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog.c b/src/gallium/drivers/r300/compiler/r500_fragprog.c new file mode 100644 index 00000000000..cf99f5e4538 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r500_fragprog.c @@ -0,0 +1,539 @@ +/* + * Copyright 2008 Corbin Simpson <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "r500_fragprog.h" + +#include <stdio.h> + +#include "radeon_compiler_util.h" +#include "radeon_list.h" +#include "radeon_variable.h" +#include "../r300_reg.h" + +/** + * Rewrite IF instructions to use the ALU result special register. + */ +int r500_transform_IF( + struct radeon_compiler * c, + struct rc_instruction * inst_if, + void *data) +{ + struct rc_variable * writer; + struct rc_list * writer_list, * list_ptr; + struct rc_list * var_list = rc_get_variables(c); + unsigned int generic_if = 0; + unsigned int alu_chan; + + if (inst_if->U.I.Opcode != RC_OPCODE_IF) { + return 0; + } + + writer_list = rc_variable_list_get_writers( + var_list, inst_if->Type, &inst_if->U.I.SrcReg[0]); + if (!writer_list) { + generic_if = 1; + } else { + + /* Make sure it is safe for the writers to write to + * ALU Result */ + for (list_ptr = writer_list; list_ptr; + list_ptr = list_ptr->Next) { + struct rc_instruction * inst; + writer = list_ptr->Item; + /* We are going to modify the destination register + * of writer, so if it has a reader other than + * inst_if (aka ReaderCount > 1) we must fall back to + * our generic IF. + * If the writer has a lower IP than inst_if, this + * means that inst_if is above the writer in a loop. + * I'm not sure why this would ever happen, but + * if it does we want to make sure we fall back + * to our generic IF. */ + if (writer->ReaderCount > 1 || writer->Inst->IP < inst_if->IP) { + generic_if = 1; + break; + } + + /* The ALU Result is not preserved across IF + * instructions, so if there is another IF + * instruction between writer and inst_if, then + * we need to fall back to generic IF. */ + for (inst = writer->Inst; inst != inst_if; inst = inst->Next) { + const struct rc_opcode_info * info = + rc_get_opcode_info(inst->U.I.Opcode); + if (info->IsFlowControl) { + generic_if = 1; + break; + } + } + if (generic_if) { + break; + } + } + } + + if (GET_SWZ(inst_if->U.I.SrcReg[0].Swizzle, 0) == RC_SWIZZLE_X) { + alu_chan = RC_ALURESULT_X; + } else { + alu_chan = RC_ALURESULT_W; + } + if (generic_if) { + struct rc_instruction * inst_mov = + rc_insert_new_instruction(c, inst_if->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.WriteMask = 0; + inst_mov->U.I.DstReg.File = RC_FILE_NONE; + inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL; + inst_mov->U.I.WriteALUResult = alu_chan; + inst_mov->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0]; + if (alu_chan == RC_ALURESULT_X) { + inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4( + inst_mov->U.I.SrcReg[0].Swizzle, + RC_SWIZZLE_X, RC_SWIZZLE_UNUSED, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED); + } else { + inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4( + inst_mov->U.I.SrcReg[0].Swizzle, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_Z); + } + } else { + rc_compare_func compare_func = RC_COMPARE_FUNC_NEVER; + unsigned int reverse_srcs = 0; + unsigned int preserve_opcode = 0; + for (list_ptr = writer_list; list_ptr; + list_ptr = list_ptr->Next) { + writer = list_ptr->Item; + switch(writer->Inst->U.I.Opcode) { + case RC_OPCODE_SEQ: + compare_func = RC_COMPARE_FUNC_EQUAL; + break; + case RC_OPCODE_SNE: + compare_func = RC_COMPARE_FUNC_NOTEQUAL; + break; + case RC_OPCODE_SLE: + reverse_srcs = 1; + /* Fall through */ + case RC_OPCODE_SGE: + compare_func = RC_COMPARE_FUNC_GEQUAL; + break; + case RC_OPCODE_SGT: + reverse_srcs = 1; + /* Fall through */ + case RC_OPCODE_SLT: + compare_func = RC_COMPARE_FUNC_LESS; + break; + default: + compare_func = RC_COMPARE_FUNC_NOTEQUAL; + preserve_opcode = 1; + break; + } + if (!preserve_opcode) { + writer->Inst->U.I.Opcode = RC_OPCODE_SUB; + } + writer->Inst->U.I.DstReg.WriteMask = 0; + writer->Inst->U.I.DstReg.File = RC_FILE_NONE; + writer->Inst->U.I.WriteALUResult = alu_chan; + writer->Inst->U.I.ALUResultCompare = compare_func; + if (reverse_srcs) { + struct rc_src_register temp_src; + temp_src = writer->Inst->U.I.SrcReg[0]; + writer->Inst->U.I.SrcReg[0] = + writer->Inst->U.I.SrcReg[1]; + writer->Inst->U.I.SrcReg[1] = temp_src; + } + } + } + + inst_if->U.I.SrcReg[0].File = RC_FILE_SPECIAL; + inst_if->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT; + inst_if->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE( + RC_SWIZZLE_X, RC_SWIZZLE_UNUSED, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED); + inst_if->U.I.SrcReg[0].Negate = 0; + + return 1; +} + +static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) +{ + unsigned int relevant; + int i; + + if (opcode == RC_OPCODE_TEX || + opcode == RC_OPCODE_TXB || + opcode == RC_OPCODE_TXP || + opcode == RC_OPCODE_TXD || + opcode == RC_OPCODE_TXL || + opcode == RC_OPCODE_KIL) { + if (reg.Abs) + return 0; + + if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE)) + return 0; + + for(i = 0; i < 4; ++i) { + unsigned int swz = GET_SWZ(reg.Swizzle, i); + if (swz == RC_SWIZZLE_UNUSED) { + reg.Negate &= ~(1 << i); + continue; + } + if (swz >= 4) + return 0; + } + + if (reg.Negate) + return 0; + + return 1; + } else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) { + /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles; + * if it doesn't fit perfectly into a .xyzw case... */ + if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate) + return 1; + + return 0; + } else { + /* ALU instructions support almost everything */ + relevant = 0; + for(i = 0; i < 3; ++i) { + unsigned int swz = GET_SWZ(reg.Swizzle, i); + if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO) + relevant |= 1 << i; + } + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) + return 0; + + return 1; + } +} + +/** + * Split source register access. + * + * The only thing we *cannot* do in an ALU instruction is per-component + * negation. + */ +static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask, + struct rc_swizzle_split * split) +{ + unsigned int negatebase[2] = { 0, 0 }; + int i; + + for(i = 0; i < 4; ++i) { + unsigned int swz = GET_SWZ(src.Swizzle, i); + if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i)) + continue; + negatebase[GET_BIT(src.Negate, i)] |= 1 << i; + } + + split->NumPhases = 0; + + for(i = 0; i <= 1; ++i) { + if (!negatebase[i]) + continue; + + split->Phase[split->NumPhases++] = negatebase[i]; + } +} + +struct rc_swizzle_caps r500_swizzle_caps = { + .IsNative = r500_swizzle_is_native, + .Split = r500_swizzle_split +}; + +static char *toswiz(int swiz_val) { + switch(swiz_val) { + case 0: return "R"; + case 1: return "G"; + case 2: return "B"; + case 3: return "A"; + case 4: return "0"; + case 5: return "H"; + case 6: return "1"; + case 7: return "U"; + } + return NULL; +} + +static char *toop(int op_val) +{ + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP3"; break; + case 2: str = "DP4"; break; + case 3: str = "D2A"; break; + case 4: str = "MIN"; break; + case 5: str = "MAX"; break; + case 6: str = "Reserved"; break; + case 7: str = "CND"; break; + case 8: str = "CMP"; break; + case 9: str = "FRC"; break; + case 10: str = "SOP"; break; + case 11: str = "MDH"; break; + case 12: str = "MDV"; break; + } + return str; +} + +static char *to_alpha_op(int op_val) +{ + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP"; break; + case 2: str = "MIN"; break; + case 3: str = "MAX"; break; + case 4: str = "Reserved"; break; + case 5: str = "CND"; break; + case 6: str = "CMP"; break; + case 7: str = "FRC"; break; + case 8: str = "EX2"; break; + case 9: str = "LN2"; break; + case 10: str = "RCP"; break; + case 11: str = "RSQ"; break; + case 12: str = "SIN"; break; + case 13: str = "COS"; break; + case 14: str = "MDH"; break; + case 15: str = "MDV"; break; + } + return str; +} + +static char *to_mask(int val) +{ + char *str = NULL; + switch(val) { + case 0: str = "NONE"; break; + case 1: str = "R"; break; + case 2: str = "G"; break; + case 3: str = "RG"; break; + case 4: str = "B"; break; + case 5: str = "RB"; break; + case 6: str = "GB"; break; + case 7: str = "RGB"; break; + case 8: str = "A"; break; + case 9: str = "AR"; break; + case 10: str = "AG"; break; + case 11: str = "ARG"; break; + case 12: str = "AB"; break; + case 13: str = "ARB"; break; + case 14: str = "AGB"; break; + case 15: str = "ARGB"; break; + } + return str; +} + +static char *to_texop(int val) +{ + switch(val) { + case 0: return "NOP"; + case 1: return "LD"; + case 2: return "TEXKILL"; + case 3: return "PROJ"; + case 4: return "LODBIAS"; + case 5: return "LOD"; + case 6: return "DXDY"; + } + return NULL; +} + +void r500FragmentProgramDump(struct radeon_compiler *c, void *user) +{ + struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; + struct r500_fragment_program_code *code = &compiler->code->code.r500; + int n, i; + uint32_t inst; + uint32_t inst0; + char *str = NULL; + fprintf(stderr, "R500 Fragment Program:\n--------\n"); + + for (n = 0; n < code->inst_end+1; n++) { + inst0 = inst = code->inst[n].inst0; + fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst); + switch(inst & 0x3) { + case R500_INST_TYPE_ALU: str = "ALU"; break; + case R500_INST_TYPE_OUT: str = "OUT"; break; + case R500_INST_TYPE_FC: str = "FC"; break; + case R500_INST_TYPE_TEX: str = "TEX"; break; + }; + fprintf(stderr,"%s %s %s %s %s ", str, + inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "", + inst & R500_INST_LAST ? "LAST" : "", + inst & R500_INST_NOP ? "NOP" : "", + inst & R500_INST_ALU_WAIT ? "ALU WAIT" : ""); + fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf), + to_mask((inst >> 15) & 0xf)); + + switch(inst0 & 0x3) { + case R500_INST_TYPE_ALU: + case R500_INST_TYPE_OUT: + fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1); + inst = code->inst[n].inst1; + + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + + fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2); + inst = code->inst[n].inst2; + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3); + inst = code->inst[n].inst3; + fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n", + (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), + (inst >> 11) & 0x3, + (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), + (inst >> 24) & 0x3, (inst >> 29) & 0x3); + + + fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4); + inst = code->inst[n].inst4; + fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, + (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3, + (inst >> 29) & 0x3, + (inst >> 31) & 0x1); + + fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5); + inst = code->inst[n].inst5; + fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7), + (inst >> 23) & 0x3, + (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3); + break; + case R500_INST_TYPE_FC: + fprintf(stderr, "\t2:FC_INST 0x%08x:", code->inst[n].inst2); + inst = code->inst[n].inst2; + /* JUMP_FUNC JUMP_ANY*/ + fprintf(stderr, "0x%02x %1x ", inst >> 8 & 0xff, + (inst & R500_FC_JUMP_ANY) >> 5); + + /* OP */ + switch(inst & 0x7){ + case R500_FC_OP_JUMP: + fprintf(stderr, "JUMP"); + break; + case R500_FC_OP_LOOP: + fprintf(stderr, "LOOP"); + break; + case R500_FC_OP_ENDLOOP: + fprintf(stderr, "ENDLOOP"); + break; + case R500_FC_OP_REP: + fprintf(stderr, "REP"); + break; + case R500_FC_OP_ENDREP: + fprintf(stderr, "ENDREP"); + break; + case R500_FC_OP_BREAKLOOP: + fprintf(stderr, "BREAKLOOP"); + break; + case R500_FC_OP_BREAKREP: + fprintf(stderr, "BREAKREP"); + break; + case R500_FC_OP_CONTINUE: + fprintf(stderr, "CONTINUE"); + break; + } + fprintf(stderr," "); + /* A_OP */ + switch(inst & (0x3 << 6)){ + case R500_FC_A_OP_NONE: + fprintf(stderr, "NONE"); + break; + case R500_FC_A_OP_POP: + fprintf(stderr, "POP"); + break; + case R500_FC_A_OP_PUSH: + fprintf(stderr, "PUSH"); + break; + } + /* B_OP0 B_OP1 */ + for(i=0; i<2; i++){ + fprintf(stderr, " "); + switch(inst & (0x3 << (24 + (i * 2)))){ + /* R500_FC_B_OP0_NONE + * R500_FC_B_OP1_NONE */ + case 0: + fprintf(stderr, "NONE"); + break; + case R500_FC_B_OP0_DECR: + case R500_FC_B_OP1_DECR: + fprintf(stderr, "DECR"); + break; + case R500_FC_B_OP0_INCR: + case R500_FC_B_OP1_INCR: + fprintf(stderr, "INCR"); + break; + } + } + /*POP_CNT B_ELSE */ + fprintf(stderr, " %d %1x", (inst >> 16) & 0x1f, (inst & R500_FC_B_ELSE) >> 4); + inst = code->inst[n].inst3; + /* JUMP_ADDR */ + fprintf(stderr, " %d", inst >> 16); + + if(code->inst[n].inst2 & R500_FC_IGNORE_UNCOVERED){ + fprintf(stderr, " IGN_UNC"); + } + inst = code->inst[n].inst3; + fprintf(stderr, "\n\t3:FC_ADDR 0x%08x:", inst); + fprintf(stderr, "BOOL: 0x%02x, INT: 0x%02x, JUMP_ADDR: %d, JMP_GLBL: %1x\n", + inst & 0x1f, (inst >> 8) & 0x1f, (inst >> 16) & 0x1ff, inst >> 31); + break; + case R500_INST_TYPE_TEX: + inst = code->inst[n].inst1; + fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf, + to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "", + (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED"); + inst = code->inst[n].inst2; + fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst, + inst & 127, inst & (1<<7) ? "(rel)" : "", + toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3), + toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3), + (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "", + toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3), + toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3)); + + fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3); + break; + } + fprintf(stderr,"\n"); + } + +} diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog.h b/src/gallium/drivers/r300/compiler/r500_fragprog.h new file mode 100644 index 00000000000..6aa448cc6f7 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r500_fragprog.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs <[email protected]> + * Jerome Glisse <[email protected]> + */ +#ifndef __R500_FRAGPROG_H_ +#define __R500_FRAGPROG_H_ + +#include "radeon_compiler.h" +#include "radeon_swizzle.h" + +extern void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user); + +extern void r500FragmentProgramDump(struct radeon_compiler *c, void *user); + +extern struct rc_swizzle_caps r500_swizzle_caps; + +extern int r500_transform_IF( + struct radeon_compiler * c, + struct rc_instruction * inst_if, + void* data); + +#endif diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c b/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c new file mode 100644 index 00000000000..c30cd753d15 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c @@ -0,0 +1,678 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * Copyright 2008 Corbin Simpson <[email protected]> + * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * \author Ben Skeggs <[email protected]> + * + * \author Jerome Glisse <[email protected]> + * + * \author Corbin Simpson <[email protected]> + * + */ + +#include "r500_fragprog.h" + +#include "../r300_reg.h" + +#include "radeon_program_pair.h" + +#define PROG_CODE \ + struct r500_fragment_program_code *code = &c->code->code.r500 + +#define error(fmt, args...) do { \ + rc_error(&c->Base, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + } while(0) + + +struct branch_info { + int If; + int Else; + int Endif; +}; + +struct r500_loop_info { + int BgnLoop; + + int BranchDepth; + int * Brks; + int BrkCount; + int BrkReserved; + + int * Conts; + int ContCount; + int ContReserved; +}; + +struct emit_state { + struct radeon_compiler * C; + struct r500_fragment_program_code * Code; + + struct branch_info * Branches; + unsigned int CurrentBranchDepth; + unsigned int BranchesReserved; + + struct r500_loop_info * Loops; + unsigned int CurrentLoopDepth; + unsigned int LoopsReserved; + + unsigned int MaxBranchDepth; + +}; + +static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) +{ + switch(opcode) { + case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; + case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND; + case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; + case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; + case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; + case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; + case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; + default: + error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); + /* fall through */ + case RC_OPCODE_NOP: + /* fall through */ + case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; + case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; + case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; + case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; + } +} + +static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) +{ + switch(opcode) { + case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP; + case RC_OPCODE_CND: return R500_ALPHA_OP_CND; + case RC_OPCODE_COS: return R500_ALPHA_OP_COS; + case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH; + case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV; + case RC_OPCODE_DP3: return R500_ALPHA_OP_DP; + case RC_OPCODE_DP4: return R500_ALPHA_OP_DP; + case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2; + case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC; + case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2; + default: + error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); + /* fall through */ + case RC_OPCODE_NOP: + /* fall through */ + case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD; + case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX; + case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN; + case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP; + case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ; + case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN; + } +} + +static unsigned int fix_hw_swizzle(unsigned int swz) +{ + switch (swz) { + case RC_SWIZZLE_ZERO: + case RC_SWIZZLE_UNUSED: + swz = 4; + break; + case RC_SWIZZLE_HALF: + swz = 5; + break; + case RC_SWIZZLE_ONE: + swz = 6; + break; + } + + return swz; +} + +static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg) +{ + unsigned int t = inst->RGB.Arg[arg].Source; + int comp; + t |= inst->RGB.Arg[arg].Negate << 11; + t |= inst->RGB.Arg[arg].Abs << 12; + + for(comp = 0; comp < 3; ++comp) + t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2); + + return t; +} + +static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i) +{ + unsigned int t = inst->Alpha.Arg[i].Source; + t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2; + t |= inst->Alpha.Arg[i].Negate << 5; + t |= inst->Alpha.Arg[i].Abs << 6; + return t; +} + +static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func) +{ + switch(func) { + case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ; + case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT; + case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE; + case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE; + default: + rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func); + return 0; + } +} + +static void use_temporary(struct r500_fragment_program_code* code, unsigned int index) +{ + if (index > code->max_temp_idx) + code->max_temp_idx = index; +} + +static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src) +{ + /* From docs: + * Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST. + * MSB = 1 << 7 */ + if (!src.Used) + return 1 << 7; + + if (src.File == RC_FILE_CONSTANT) { + return src.Index | R500_RGB_ADDR0_CONST; + } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { + use_temporary(code, src.Index); + return src.Index; + } + + return 0; +} + +/** + * NOP the specified instruction if it is not a texture lookup. + */ +static void alu_nop(struct r300_fragment_program_compiler *c, int ip) +{ + PROG_CODE; + + if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) { + code->inst[ip].inst0 |= R500_INST_NOP; + } +} + +/** + * Emit a paired ALU instruction. + */ +static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst) +{ + int ip; + PROG_CODE; + + if (code->inst_end >= c->Base.max_alu_insts-1) { + error("emit_alu: Too many instructions"); + return; + } + + ip = ++code->inst_end; + + /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */ + if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX || + inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) { + if (ip > 0) { + alu_nop(c, ip - 1); + } + } + + code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode); + code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode); + + if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) { + code->inst[ip].inst0 = R500_INST_TYPE_OUT; + if (inst->WriteALUResult) { + error("Cannot write output and ALU result at the same time"); + return; + } + } else { + code->inst[ip].inst0 = R500_INST_TYPE_ALU; + } + code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT; + + code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11); + code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0; + code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); + if (inst->Nop) { + code->inst[ip].inst0 |= R500_INST_NOP; + } + if (inst->Alpha.DepthWriteMask) { + code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; + c->code->writes_depth = 1; + } + + code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); + code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex); + use_temporary(code, inst->Alpha.DestIndex); + use_temporary(code, inst->RGB.DestIndex); + + if (inst->RGB.Saturate) + code->inst[ip].inst0 |= R500_INST_RGB_CLAMP; + if (inst->Alpha.Saturate) + code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP; + + /* Set the presubtract operation. */ + switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0; + break; + case RC_PRESUB_SUB: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0; + break; + case RC_PRESUB_ADD: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0; + break; + case RC_PRESUB_INV: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0; + break; + default: + break; + } + switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0; + break; + case RC_PRESUB_SUB: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0; + break; + case RC_PRESUB_ADD: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0; + break; + case RC_PRESUB_INV: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0; + break; + default: + break; + } + + code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0])); + code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1])); + code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2])); + + code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0])); + code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1])); + code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2])); + + code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT; + code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT; + code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT; + + code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT; + code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; + code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; + + code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target); + code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target); + + if (inst->WriteALUResult) { + code->inst[ip].inst3 |= R500_ALU_RGB_WMASK; + + if (inst->WriteALUResult == RC_ALURESULT_X) + code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED; + else + code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA; + + code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare); + } +} + +static unsigned int translate_strq_swizzle(unsigned int swizzle) +{ + unsigned int swiz = 0; + int i; + for (i = 0; i < 4; i++) + swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2; + return swiz; +} + +/** + * Emit a single TEX instruction + */ +static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst) +{ + int ip; + PROG_CODE; + + if (code->inst_end >= c->Base.max_alu_insts-1) { + error("emit_tex: Too many instructions"); + return 0; + } + + ip = ++code->inst_end; + + code->inst[ip].inst0 = R500_INST_TYPE_TEX + | (inst->DstReg.WriteMask << 11) + | R500_INST_TEX_SEM_WAIT; + code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) + | R500_TEX_SEM_ACQUIRE; + + if (inst->TexSrcTarget == RC_TEXTURE_RECT) + code->inst[ip].inst1 |= R500_TEX_UNSCALED; + + switch (inst->Opcode) { + case RC_OPCODE_KIL: + code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; + break; + case RC_OPCODE_TEX: + code->inst[ip].inst1 |= R500_TEX_INST_LD; + break; + case RC_OPCODE_TXB: + code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; + break; + case RC_OPCODE_TXP: + code->inst[ip].inst1 |= R500_TEX_INST_PROJ; + break; + case RC_OPCODE_TXD: + code->inst[ip].inst1 |= R500_TEX_INST_DXDY; + break; + case RC_OPCODE_TXL: + code->inst[ip].inst1 |= R500_TEX_INST_LOD; + break; + default: + error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name); + } + + use_temporary(code, inst->SrcReg[0].Index); + if (inst->Opcode != RC_OPCODE_KIL) + use_temporary(code, inst->DstReg.Index); + + code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) + | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8) + | R500_TEX_DST_ADDR(inst->DstReg.Index) + | (GET_SWZ(inst->TexSwizzle, 0) << 24) + | (GET_SWZ(inst->TexSwizzle, 1) << 26) + | (GET_SWZ(inst->TexSwizzle, 2) << 28) + | (GET_SWZ(inst->TexSwizzle, 3) << 30) + ; + + if (inst->Opcode == RC_OPCODE_TXD) { + use_temporary(code, inst->SrcReg[1].Index); + use_temporary(code, inst->SrcReg[2].Index); + + /* DX and DY parameters are specified in a separate register. */ + code->inst[ip].inst3 = + R500_DX_ADDR(inst->SrcReg[1].Index) | + (translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) | + R500_DY_ADDR(inst->SrcReg[2].Index) | + (translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24); + } + + return 1; +} + +static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst) +{ + unsigned int newip; + + if (s->Code->inst_end >= s->C->max_alu_insts-1) { + rc_error(s->C, "emit_tex: Too many instructions"); + return; + } + + newip = ++s->Code->inst_end; + + /* Currently all loops use the same integer constant to intialize + * the loop variables. */ + if(!s->Code->int_constants[0]) { + s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff); + s->Code->int_constant_count = 1; + } + s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT; + + switch(inst->U.I.Opcode){ + struct branch_info * branch; + struct r500_loop_info * loop; + case RC_OPCODE_BGNLOOP: + memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info, + s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1); + + loop = &s->Loops[s->CurrentLoopDepth++]; + memset(loop, 0, sizeof(struct r500_loop_info)); + loop->BranchDepth = s->CurrentBranchDepth; + loop->BgnLoop = newip; + + s->Code->inst[newip].inst2 = R500_FC_OP_LOOP + | R500_FC_JUMP_FUNC(0x00) + | R500_FC_IGNORE_UNCOVERED + ; + break; + case RC_OPCODE_BRK: + loop = &s->Loops[s->CurrentLoopDepth - 1]; + memory_pool_array_reserve(&s->C->Pool, int, loop->Brks, + loop->BrkCount, loop->BrkReserved, 1); + + loop->Brks[loop->BrkCount++] = newip; + s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_B_OP1_DECR + | R500_FC_B_POP_CNT( + s->CurrentBranchDepth - loop->BranchDepth) + | R500_FC_IGNORE_UNCOVERED + ; + break; + + case RC_OPCODE_CONT: + loop = &s->Loops[s->CurrentLoopDepth - 1]; + memory_pool_array_reserve(&s->C->Pool, int, loop->Conts, + loop->ContCount, loop->ContReserved, 1); + loop->Conts[loop->ContCount++] = newip; + s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_B_OP1_DECR + | R500_FC_B_POP_CNT( + s->CurrentBranchDepth - loop->BranchDepth) + | R500_FC_IGNORE_UNCOVERED + ; + break; + + case RC_OPCODE_ENDLOOP: + { + loop = &s->Loops[s->CurrentLoopDepth - 1]; + /* Emit ENDLOOP */ + s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_JUMP_ANY + | R500_FC_IGNORE_UNCOVERED + ; + /* The constant integer at index 0 is used by all loops. */ + s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0) + | R500_FC_JUMP_ADDR(loop->BgnLoop + 1) + ; + + /* Set jump address and int constant for BGNLOOP */ + s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0) + | R500_FC_JUMP_ADDR(newip) + ; + + /* Set jump address for the BRK instructions. */ + while(loop->BrkCount--) { + s->Code->inst[loop->Brks[loop->BrkCount]].inst3 = + R500_FC_JUMP_ADDR(newip + 1); + } + + /* Set jump address for CONT instructions. */ + while(loop->ContCount--) { + s->Code->inst[loop->Conts[loop->ContCount]].inst3 = + R500_FC_JUMP_ADDR(newip); + } + s->CurrentLoopDepth--; + break; + } + case RC_OPCODE_IF: + if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) { + rc_error(s->C, "Branch depth exceeds hardware limit"); + return; + } + memory_pool_array_reserve(&s->C->Pool, struct branch_info, + s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1); + + branch = &s->Branches[s->CurrentBranchDepth++]; + branch->If = newip; + branch->Else = -1; + branch->Endif = -1; + + if (s->CurrentBranchDepth > s->MaxBranchDepth) + s->MaxBranchDepth = s->CurrentBranchDepth; + + /* actual instruction is filled in at ENDIF time */ + break; + + case RC_OPCODE_ELSE: + if (!s->CurrentBranchDepth) { + rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); + return; + } + + branch = &s->Branches[s->CurrentBranchDepth - 1]; + branch->Else = newip; + + /* actual instruction is filled in at ENDIF time */ + break; + + case RC_OPCODE_ENDIF: + if (!s->CurrentBranchDepth) { + rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); + return; + } + + branch = &s->Branches[s->CurrentBranchDepth - 1]; + branch->Endif = newip; + + s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ + | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ + | R500_FC_B_OP1_NONE /* no branch counter if stay */ + | R500_FC_B_POP_CNT(1) + ; + s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */ + | R500_FC_B_OP0_INCR /* increment branch counter if stay */ + | R500_FC_IGNORE_UNCOVERED + ; + + if (branch->Else >= 0) { + /* increment branch counter also if jump */ + s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR; + s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1); + + s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_B_ELSE /* all active pixels want to jump */ + | R500_FC_B_OP0_NONE /* no counter op if stay */ + | R500_FC_B_OP1_DECR /* decrement branch counter if jump */ + | R500_FC_B_POP_CNT(1) + ; + s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + } else { + /* don't touch branch counter on jump */ + s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE; + s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + } + + + s->CurrentBranchDepth--; + break; + default: + rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name); + } +} + +void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) +{ + struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; + struct emit_state s; + struct r500_fragment_program_code *code = &compiler->code->code.r500; + + memset(&s, 0, sizeof(s)); + s.C = &compiler->Base; + s.Code = code; + + memset(code, 0, sizeof(*code)); + code->max_temp_idx = 1; + code->inst_end = -1; + + for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; + inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; + inst = inst->Next) { + if (inst->Type == RC_INSTRUCTION_NORMAL) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->IsFlowControl) { + emit_flowcontrol(&s, inst); + } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { + continue; + } else { + emit_tex(compiler, &inst->U.I); + } + } else { + emit_paired(compiler, &inst->U.P); + } + } + + if (code->max_temp_idx >= compiler->Base.max_temp_regs) + rc_error(&compiler->Base, "Too many hardware temporaries used"); + + if (compiler->Base.Error) + return; + + if (code->inst_end == -1 || + (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { + int ip; + + /* This may happen when dead-code elimination is disabled or + * when most of the fragment program logic is leading to a KIL */ + if (code->inst_end >= compiler->Base.max_alu_insts-1) { + rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions"); + return; + } + + ip = ++code->inst_end; + code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; + } + + /* Enable full flow control mode if we are using loops or have if + * statements nested at least four deep. */ + if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) { + if (code->max_temp_idx < 1) + code->max_temp_idx = 1; + + code->us_fc_ctrl |= R500_FC_FULL_FC_EN; + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_code.c b/src/gallium/drivers/r300/compiler/radeon_code.c new file mode 100644 index 00000000000..6842fb873bc --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_code.c @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_code.h" + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "radeon_program.h" + +void rc_constants_init(struct rc_constant_list * c) +{ + memset(c, 0, sizeof(*c)); +} + +/** + * Copy a constants structure, assuming that the destination structure + * is not initialized. + */ +void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src) +{ + dst->Constants = malloc(sizeof(struct rc_constant) * src->Count); + memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count); + dst->Count = src->Count; + dst->_Reserved = src->Count; +} + +void rc_constants_destroy(struct rc_constant_list * c) +{ + free(c->Constants); + memset(c, 0, sizeof(*c)); +} + +unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant) +{ + unsigned index = c->Count; + + if (c->Count >= c->_Reserved) { + struct rc_constant * newlist; + + c->_Reserved = c->_Reserved * 2; + if (!c->_Reserved) + c->_Reserved = 16; + + newlist = malloc(sizeof(struct rc_constant) * c->_Reserved); + memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count); + + free(c->Constants); + c->Constants = newlist; + } + + c->Constants[index] = *constant; + c->Count++; + + return index; +} + + +/** + * Add a state vector to the constant list, while trying to avoid duplicates. + */ +unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1) +{ + unsigned index; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_STATE) { + if (c->Constants[index].u.State[0] == state0 && + c->Constants[index].u.State[1] == state1) + return index; + } + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_STATE; + constant.Size = 4; + constant.u.State[0] = state0; + constant.u.State[1] = state1; + + return rc_constants_add(c, &constant); +} + + +/** + * Add an immediate vector to the constant list, while trying to avoid + * duplicates. + */ +unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data) +{ + unsigned index; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { + if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4)) + return index; + } + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 4; + memcpy(constant.u.Immediate, data, sizeof(float) * 4); + + return rc_constants_add(c, &constant); +} + + +/** + * Add an immediate scalar to the constant list, while trying to avoid + * duplicates. + */ +unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle) +{ + unsigned index; + int free_index = -1; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { + unsigned comp; + for(comp = 0; comp < c->Constants[index].Size; ++comp) { + if (c->Constants[index].u.Immediate[comp] == data) { + *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp); + return index; + } + } + + if (c->Constants[index].Size < 4) + free_index = index; + } + } + + if (free_index >= 0) { + unsigned comp = c->Constants[free_index].Size++; + c->Constants[free_index].u.Immediate[comp] = data; + *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp); + return free_index; + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 1; + constant.u.Immediate[0] = data; + *swizzle = RC_SWIZZLE_XXXX; + + return rc_constants_add(c, &constant); +} + +void rc_constants_print(struct rc_constant_list * c) +{ + unsigned int i; + for(i = 0; i < c->Count; i++) { + if (c->Constants[i].Type == RC_CONSTANT_IMMEDIATE) { + float * values = c->Constants[i].u.Immediate; + fprintf(stderr, "CONST[%u] = " + "{ %10.4f %10.4f %10.4f %10.4f }\n", + i, values[0],values[1], values[2], values[3]); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_code.h b/src/gallium/drivers/r300/compiler/radeon_code.h new file mode 100644 index 00000000000..67e6acf8b10 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_code.h @@ -0,0 +1,306 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_CODE_H +#define RADEON_CODE_H + +#include <stdint.h> + +#define R300_PFS_MAX_ALU_INST 64 +#define R300_PFS_MAX_TEX_INST 32 +#define R300_PFS_MAX_TEX_INDIRECT 4 +#define R300_PFS_NUM_TEMP_REGS 32 +#define R300_PFS_NUM_CONST_REGS 32 + +#define R400_PFS_MAX_ALU_INST 512 +#define R400_PFS_MAX_TEX_INST 512 + +#define R500_PFS_MAX_INST 512 +#define R500_PFS_NUM_TEMP_REGS 128 +#define R500_PFS_NUM_CONST_REGS 256 +#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32 +#define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4 + + +#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) + +enum { + /** + * External constants are constants whose meaning is unknown to this + * compiler. For example, a Mesa gl_program's constants are turned + * into external constants. + */ + RC_CONSTANT_EXTERNAL = 0, + + RC_CONSTANT_IMMEDIATE, + + /** + * Constant referring to state that is known by this compiler, + * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state. + */ + RC_CONSTANT_STATE +}; + +enum { + RC_STATE_SHADOW_AMBIENT = 0, + + RC_STATE_R300_WINDOW_DIMENSION, + RC_STATE_R300_TEXRECT_FACTOR, + RC_STATE_R300_TEXSCALE_FACTOR, + RC_STATE_R300_VIEWPORT_SCALE, + RC_STATE_R300_VIEWPORT_OFFSET +}; + +struct rc_constant { + unsigned Type:2; /**< RC_CONSTANT_xxx */ + unsigned Size:3; + + union { + unsigned External; + float Immediate[4]; + unsigned State[2]; + } u; +}; + +struct rc_constant_list { + struct rc_constant * Constants; + unsigned Count; + + unsigned _Reserved; +}; + +void rc_constants_init(struct rc_constant_list * c); +void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src); +void rc_constants_destroy(struct rc_constant_list * c); +unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant); +unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2); +unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data); +unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle); +void rc_constants_print(struct rc_constant_list * c); + +/** + * Compare functions. + * + * \note By design, RC_COMPARE_FUNC_xxx + GL_NEVER gives you + * the correct GL compare function. + */ +typedef enum { + RC_COMPARE_FUNC_NEVER = 0, + RC_COMPARE_FUNC_LESS, + RC_COMPARE_FUNC_EQUAL, + RC_COMPARE_FUNC_LEQUAL, + RC_COMPARE_FUNC_GREATER, + RC_COMPARE_FUNC_NOTEQUAL, + RC_COMPARE_FUNC_GEQUAL, + RC_COMPARE_FUNC_ALWAYS +} rc_compare_func; + +/** + * Coordinate wrapping modes. + * + * These are not quite the same as their GL counterparts yet. + */ +typedef enum { + RC_WRAP_NONE = 0, + RC_WRAP_REPEAT, + RC_WRAP_MIRRORED_REPEAT, + RC_WRAP_MIRRORED_CLAMP +} rc_wrap_mode; + +/** + * Stores state that influences the compilation of a fragment program. + */ +struct r300_fragment_program_external_state { + struct { + /** + * This field contains swizzle for some lowering passes + * (shadow comparison, unorm->snorm conversion) + */ + unsigned texture_swizzle:12; + + /** + * If the sampler is used as a shadow sampler, + * this field specifies the compare function. + * + * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0). + * \sa rc_compare_func + */ + unsigned texture_compare_func : 3; + + /** + * No matter what the sampler type is, + * this field turns it into a shadow sampler. + */ + unsigned compare_mode_enabled : 1; + + /** + * If the sampler will receive non-normalized coords, + * this field is set. The scaling factor is given by + * RC_STATE_R300_TEXRECT_FACTOR. + */ + unsigned non_normalized_coords : 1; + + /** + * This field specifies wrapping modes for the sampler. + * + * If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths + * will be performed on the coordinates. + */ + unsigned wrap_mode : 3; + + /** + * The coords are scaled after applying the wrap mode emulation + * and right before texture fetch. The scaling factor is given by + * RC_STATE_R300_TEXSCALE_FACTOR. */ + unsigned clamp_and_scale_before_fetch : 1; + + /** + * Fetch RGTC1_SNORM or LATC1_SNORM as UNORM and convert UNORM -> SNORM + * in the shader. + */ + unsigned convert_unorm_to_snorm:1; + } unit[16]; + + unsigned frag_clamp:1; +}; + + + +struct r300_fragment_program_node { + int tex_offset; /**< first tex instruction */ + int tex_end; /**< last tex instruction, relative to tex_offset */ + int alu_offset; /**< first ALU instruction */ + int alu_end; /**< last ALU instruction, relative to alu_offset */ + int flags; +}; + +/** + * Stores an R300 fragment program in its compiled-to-hardware form. + */ +struct r300_fragment_program_code { + struct { + unsigned int length; /**< total # of texture instructions used */ + uint32_t inst[R400_PFS_MAX_TEX_INST]; + } tex; + + struct { + unsigned int length; /**< total # of ALU instructions used */ + struct { + uint32_t rgb_inst; + uint32_t rgb_addr; + uint32_t alpha_inst; + uint32_t alpha_addr; + uint32_t r400_ext_addr; + } inst[R400_PFS_MAX_ALU_INST]; + } alu; + + uint32_t config; /* US_CONFIG */ + uint32_t pixsize; /* US_PIXSIZE */ + uint32_t code_offset; /* US_CODE_OFFSET */ + uint32_t r400_code_offset_ext; /* US_CODE_EXT */ + uint32_t code_addr[4]; /* US_CODE_ADDR */ + /*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries + * for r400 cards */ + unsigned int r390_mode:1; +}; + + +struct r500_fragment_program_code { + struct { + uint32_t inst0; + uint32_t inst1; + uint32_t inst2; + uint32_t inst3; + uint32_t inst4; + uint32_t inst5; + } inst[R500_PFS_MAX_INST]; + + int inst_end; /* Number of instructions - 1; also, last instruction to be executed */ + + int max_temp_idx; + + uint32_t us_fc_ctrl; + + uint32_t int_constants[32]; + uint32_t int_constant_count; +}; + +struct rX00_fragment_program_code { + union { + struct r300_fragment_program_code r300; + struct r500_fragment_program_code r500; + } code; + + unsigned writes_depth:1; + + struct rc_constant_list constants; + unsigned *constants_remap_table; +}; + + +#define R300_VS_MAX_ALU 256 +#define R300_VS_MAX_ALU_DWORDS (R300_VS_MAX_ALU * 4) +#define R500_VS_MAX_ALU 1024 +#define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4) +#define R300_VS_MAX_TEMPS 32 +/* This is the max for all chipsets (r300-r500) */ +#define R300_VS_MAX_FC_OPS 16 +/* The r500 maximum depth is not just for loops, but any combination of loops + * and subroutine jumps. */ +#define R500_VS_MAX_FC_DEPTH 8 +#define R300_VS_MAX_LOOP_DEPTH 1 + +#define VSF_MAX_INPUTS 32 +#define VSF_MAX_OUTPUTS 32 + +struct r300_vertex_program_code { + int length; + union { + uint32_t d[R500_VS_MAX_ALU_DWORDS]; + float f[R500_VS_MAX_ALU_DWORDS]; + } body; + + int pos_end; + int num_temporaries; /* Number of temp vars used by program */ + int inputs[VSF_MAX_INPUTS]; + int outputs[VSF_MAX_OUTPUTS]; + + struct rc_constant_list constants; + unsigned *constants_remap_table; + + uint32_t InputsRead; + uint32_t OutputsWritten; + + unsigned int num_fc_ops; + uint32_t fc_ops; + union { + uint32_t r300[R300_VS_MAX_FC_OPS]; + struct { + uint32_t lw; + uint32_t uw; + } r500[R300_VS_MAX_FC_OPS]; + } fc_op_addrs; + int32_t fc_loop_index[R300_VS_MAX_FC_OPS]; +}; + +#endif /* RADEON_CODE_H */ + diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.c b/src/gallium/drivers/r300/compiler/radeon_compiler.c new file mode 100644 index 00000000000..b7936725d85 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_compiler.c @@ -0,0 +1,489 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> + +#include "radeon_dataflow.h" +#include "radeon_program.h" +#include "radeon_program_pair.h" +#include "radeon_compiler_util.h" + + +void rc_init(struct radeon_compiler * c) +{ + memset(c, 0, sizeof(*c)); + + memory_pool_init(&c->Pool); + c->Program.Instructions.Prev = &c->Program.Instructions; + c->Program.Instructions.Next = &c->Program.Instructions; + c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; +} + +void rc_destroy(struct radeon_compiler * c) +{ + rc_constants_destroy(&c->Program.Constants); + memory_pool_destroy(&c->Pool); + free(c->ErrorMsg); +} + +void rc_debug(struct radeon_compiler * c, const char * fmt, ...) +{ + va_list ap; + + if (!(c->Debug & RC_DBG_LOG)) + return; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +void rc_error(struct radeon_compiler * c, const char * fmt, ...) +{ + va_list ap; + + c->Error = 1; + + if (!c->ErrorMsg) { + /* Only remember the first error */ + char buf[1024]; + int written; + + va_start(ap, fmt); + written = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + if (written < sizeof(buf)) { + c->ErrorMsg = strdup(buf); + } else { + c->ErrorMsg = malloc(written + 1); + + va_start(ap, fmt); + vsnprintf(c->ErrorMsg, written + 1, fmt, ap); + va_end(ap); + } + } + + if (c->Debug & RC_DBG_LOG) { + fprintf(stderr, "r300compiler error: "); + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + } +} + +int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion) +{ + rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion); + return 1; +} + +/** + * Recompute c->Program.InputsRead and c->Program.OutputsWritten + * based on which inputs and outputs are actually referenced + * in program instructions. + */ +void rc_calculate_inputs_outputs(struct radeon_compiler * c) +{ + struct rc_instruction *inst; + + c->Program.InputsRead = 0; + c->Program.OutputsWritten = 0; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) + { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + int i; + + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT) + c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index; + } + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) + c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index; + } + } +} + +/** + * Rewrite the program such that everything that source the given input + * register will source new_input instead. + */ +void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input) +{ + struct rc_instruction * inst; + + c->Program.InputsRead &= ~(1 << input); + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + for(i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) { + inst->U.I.SrcReg[i].File = new_input.File; + inst->U.I.SrcReg[i].Index = new_input.Index; + inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle); + if (!inst->U.I.SrcReg[i].Abs) { + inst->U.I.SrcReg[i].Negate ^= new_input.Negate; + inst->U.I.SrcReg[i].Abs = new_input.Abs; + } + + c->Program.InputsRead |= 1 << new_input.Index; + } + } + } +} + + +/** + * Rewrite the program such that everything that writes into the given + * output register will instead write to new_output. The new_output + * writemask is honoured. + */ +void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask) +{ + struct rc_instruction * inst; + + c->Program.OutputsWritten &= ~(1 << output); + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { + inst->U.I.DstReg.Index = new_output; + inst->U.I.DstReg.WriteMask &= writemask; + + c->Program.OutputsWritten |= 1 << new_output; + } + } + } +} + + +/** + * Rewrite the program such that a given output is duplicated. + */ +void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output) +{ + unsigned tempreg = rc_find_free_temporary(c); + struct rc_instruction * inst; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = tempreg; + } + } + } + + inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = output; + + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = tempreg; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = dup_output; + + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = tempreg; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + c->Program.OutputsWritten |= 1 << dup_output; +} + + +/** + * Introduce standard code fragment to deal with fragment.position. + */ +void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, + int full_vtransform) +{ + unsigned tempregi = rc_find_free_temporary(c); + struct rc_instruction * inst_rcp; + struct rc_instruction * inst_mul; + struct rc_instruction * inst_mad; + struct rc_instruction * inst; + + c->Program.InputsRead &= ~(1 << wpos); + c->Program.InputsRead |= 1 << new_input; + + /* perspective divide */ + inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = tempregi; + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + + inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst_rcp->U.I.SrcReg[0].Index = new_input; + inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + + inst_mul = rc_insert_new_instruction(c, inst_rcp); + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = tempregi; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; + + inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst_mul->U.I.SrcReg[0].Index = new_input; + + inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[1].Index = tempregi; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + + /* viewport transformation */ + inst_mad = rc_insert_new_instruction(c, inst_mul); + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = tempregi; + inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; + + inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[0].Index = tempregi; + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; + + inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; + + inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0; + + if (full_vtransform) { + inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0); + inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0); + } else { + inst_mad->U.I.SrcReg[1].Index = + inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); + } + + for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + for(i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && + inst->U.I.SrcReg[i].Index == wpos) { + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = tempregi; + } + } + } +} + + +/** + * The FACE input in hardware contains 1 if it's a back face, 0 otherwise. + * Gallium and OpenGL define it the other way around. + * + * So let's just negate FACE at the beginning of the shader and rewrite the rest + * of the shader to read from the newly allocated temporary. + */ +void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) +{ + unsigned tempregi = rc_find_free_temporary(c); + struct rc_instruction *inst_add; + struct rc_instruction *inst; + + /* perspective divide */ + inst_add = rc_insert_new_instruction(c, &c->Program.Instructions); + inst_add->U.I.Opcode = RC_OPCODE_ADD; + + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = tempregi; + inst_add->U.I.DstReg.WriteMask = RC_MASK_X; + + inst_add->U.I.SrcReg[0].File = RC_FILE_NONE; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; + + inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT; + inst_add->U.I.SrcReg[1].Index = face; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; + inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW; + + for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + for(i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && + inst->U.I.SrcReg[i].Index == face) { + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = tempregi; + } + } + } +} + +static void reg_count_callback(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + int *max_reg = userdata; + if (file == RC_FILE_TEMPORARY) + (int)index > *max_reg ? *max_reg = index : 0; +} + +void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) +{ + int max_reg = -1; + struct rc_instruction * tmp; + memset(s, 0, sizeof(*s)); + + for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions; + tmp = tmp->Next){ + const struct rc_opcode_info * info; + rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg); + if (tmp->Type == RC_INSTRUCTION_NORMAL) { + info = rc_get_opcode_info(tmp->U.I.Opcode); + if (info->Opcode == RC_OPCODE_BEGIN_TEX) + continue; + if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE) + s->num_presub_ops++; + } else { + if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) + s->num_presub_ops++; + if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) + s->num_presub_ops++; + /* Assuming alpha will never be a flow control or + * a tex instruction. */ + if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP) + s->num_alpha_insts++; + if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP) + s->num_rgb_insts++; + info = rc_get_opcode_info(tmp->U.P.RGB.Opcode); + } + if (info->IsFlowControl) + s->num_fc_insts++; + if (info->HasTexture) + s->num_tex_insts++; + s->num_insts++; + } + s->num_temp_regs = max_reg + 1; +} + +static void print_stats(struct radeon_compiler * c) +{ + struct rc_program_stats s; + + if (c->initial_num_insts <= 5) + return; + + rc_get_stats(c, &s); + + switch (c->type) { + case RC_VERTEX_PROGRAM: + fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n" + "~%4u Instructions\n" + "~%4u Flow Control Instructions\n" + "~%4u Temporary Registers\n" + "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", + s.num_insts, s.num_fc_insts, s.num_temp_regs); + break; + + case RC_FRAGMENT_PROGRAM: + fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n" + "~%4u Instructions\n" + "~%4u Vector Instructions (RGB)\n" + "~%4u Scalar Instructions (Alpha)\n" + "~%4u Flow Control Instructions\n" + "~%4u Texture Instructions\n" + "~%4u Presub Operations\n" + "~%4u Temporary Registers\n" + "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", + s.num_insts, s.num_rgb_insts, s.num_alpha_insts, + s.num_fc_insts, s.num_tex_insts, s.num_presub_ops, + s.num_temp_regs); + break; + default: + assert(0); + } +} + +static const char *shader_name[RC_NUM_PROGRAM_TYPES] = { + "Vertex Program", + "Fragment Program" +}; + +void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list) +{ + for (unsigned i = 0; list[i].name; i++) { + if (list[i].predicate) { + list[i].run(c, list[i].user); + + if (c->Error) + return; + + if ((c->Debug & RC_DBG_LOG) && list[i].dump) { + fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name); + rc_print_program(&c->Program); + } + } + } +} + +/* Executes a list of compiler passes given in the parameter 'list'. */ +void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list) +{ + struct rc_program_stats s; + + rc_get_stats(c, &s); + c->initial_num_insts = s.num_insts; + + if (c->Debug & RC_DBG_LOG) { + fprintf(stderr, "%s: before compilation\n", shader_name[c->type]); + rc_print_program(&c->Program); + } + + rc_run_compiler_passes(c, list); + + if (c->Debug & RC_DBG_STATS) + print_stats(c); +} + +void rc_validate_final_shader(struct radeon_compiler *c, void *user) +{ + /* Check the number of constants. */ + if (c->Program.Constants.Count > c->max_constants) { + rc_error(c, "Too many constants. Max: %i, Got: %i\n", + c->max_constants, c->Program.Constants.Count); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.h b/src/gallium/drivers/r300/compiler/radeon_compiler.h new file mode 100644 index 00000000000..74594af23c2 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_compiler.h @@ -0,0 +1,171 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_COMPILER_H +#define RADEON_COMPILER_H + +#include "main/compiler.h" + +#include "memory_pool.h" +#include "radeon_code.h" +#include "radeon_program.h" +#include "radeon_emulate_loops.h" + +#define RC_DBG_LOG (1 << 0) +#define RC_DBG_STATS (1 << 1) + +struct rc_swizzle_caps; + +enum rc_program_type { + RC_VERTEX_PROGRAM, + RC_FRAGMENT_PROGRAM, + RC_NUM_PROGRAM_TYPES +}; + +struct radeon_compiler { + struct memory_pool Pool; + struct rc_program Program; + enum rc_program_type type; + unsigned Debug:2; + unsigned Error:1; + char * ErrorMsg; + + /* Hardware specification. */ + unsigned is_r400:1; + unsigned is_r500:1; + unsigned has_half_swizzles:1; + unsigned has_presub:1; + unsigned disable_optimizations:1; + unsigned max_temp_regs; + unsigned max_constants; + int max_alu_insts; + unsigned max_tex_insts; + + /* Whether to remove unused constants and empty holes in constant space. */ + unsigned remove_unused_constants:1; + + /** + * Variables used internally, not be touched by callers + * of the compiler + */ + /*@{*/ + struct rc_swizzle_caps * SwizzleCaps; + /*@}*/ + + struct emulate_loop_state loop_state; + + unsigned initial_num_insts; /* Number of instructions at start. */ +}; + +void rc_init(struct radeon_compiler * c); +void rc_destroy(struct radeon_compiler * c); + +void rc_debug(struct radeon_compiler * c, const char * fmt, ...); +void rc_error(struct radeon_compiler * c, const char * fmt, ...); + +int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion); + +/** + * This macro acts like an if-statement that can be used to implement + * non-aborting assertions in the compiler. + * + * It checks whether \p cond is true. If not, an internal compiler error is + * flagged and the if-clause is run. + * + * A typical use-case would be: + * + * if (rc_assert(c, condition-that-must-be-true)) + * return; + */ +#define rc_assert(c, cond) \ + (!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond)) + +void rc_calculate_inputs_outputs(struct radeon_compiler * c); + +void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input); +void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask); +void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output); +void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, + int full_vtransform); +void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face); + +struct r300_fragment_program_compiler { + struct radeon_compiler Base; + struct rX00_fragment_program_code *code; + /* Optional transformations and features. */ + struct r300_fragment_program_external_state state; + unsigned enable_shadow_ambient; + /* Register corresponding to the depthbuffer. */ + unsigned OutputDepth; + /* Registers corresponding to the four colorbuffers. */ + unsigned OutputColor[4]; + + void * UserData; + void (*AllocateHwInputs)( + struct r300_fragment_program_compiler * c, + void (*allocate)(void * data, unsigned input, unsigned hwreg), + void * mydata); +}; + +void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c); + +struct r300_vertex_program_compiler { + struct radeon_compiler Base; + struct r300_vertex_program_code *code; + uint32_t RequiredOutputs; + + void * UserData; + void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c); + + int PredicateIndex; + unsigned int PredicateMask; +}; + +void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c); +void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user); + +struct radeon_compiler_pass { + const char *name; /* Name of the pass. */ + int dump; /* Dump the program if Debug == 1? */ + int predicate; /* Run this pass? */ + void (*run)(struct radeon_compiler *c, void *user); /* The main entrypoint. */ + void *user; /* Optional parameter which is passed to the run function. */ +}; + +struct rc_program_stats { + unsigned num_insts; + unsigned num_fc_insts; + unsigned num_tex_insts; + unsigned num_rgb_insts; + unsigned num_alpha_insts; + unsigned num_presub_ops; + unsigned num_temp_regs; +}; + +void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s); + +/* Executes a list of compiler passes given in the parameter 'list'. */ +void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list); +void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list); +void rc_validate_final_shader(struct radeon_compiler *c, void *user); + +#endif /* RADEON_COMPILER_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler_util.c b/src/gallium/drivers/r300/compiler/radeon_compiler_util.c new file mode 100644 index 00000000000..2742721f800 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_compiler_util.c @@ -0,0 +1,701 @@ +/* + * Copyright 2010 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + */ + +#include "radeon_compiler_util.h" + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" +/** + */ +unsigned int rc_swizzle_to_writemask(unsigned int swz) +{ + unsigned int mask = 0; + unsigned int i; + + for(i = 0; i < 4; i++) { + mask |= 1 << GET_SWZ(swz, i); + } + mask &= RC_MASK_XYZW; + + return mask; +} + +rc_swizzle get_swz(unsigned int swz, rc_swizzle idx) +{ + if (idx & 0x4) + return idx; + return GET_SWZ(swz, idx); +} + +/** + * The purpose of this function is to standardize the number channels used by + * swizzles. All swizzles regardless of what instruction they are a part of + * should have 4 channels initialized with values. + * @param channels The number of channels in initial_value that have a + * meaningful value. + * @return An initialized swizzle that has all of the unused channels set to + * RC_SWIZZLE_UNUSED. + */ +unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels) +{ + unsigned int i; + for (i = channels; i < 4; i++) { + SET_SWZ(initial_value, i, RC_SWIZZLE_UNUSED); + } + return initial_value; +} + +unsigned int combine_swizzles4(unsigned int src, + rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w) +{ + unsigned int ret = 0; + + ret |= get_swz(src, swz_x); + ret |= get_swz(src, swz_y) << 3; + ret |= get_swz(src, swz_z) << 6; + ret |= get_swz(src, swz_w) << 9; + + return ret; +} + +unsigned int combine_swizzles(unsigned int src, unsigned int swz) +{ + unsigned int ret = 0; + + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X)); + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9; + + return ret; +} + +/** + * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W + */ +rc_swizzle rc_mask_to_swizzle(unsigned int mask) +{ + switch (mask) { + case RC_MASK_X: return RC_SWIZZLE_X; + case RC_MASK_Y: return RC_SWIZZLE_Y; + case RC_MASK_Z: return RC_SWIZZLE_Z; + case RC_MASK_W: return RC_SWIZZLE_W; + } + return RC_SWIZZLE_UNUSED; +} + +/* Reorder mask bits according to swizzle. */ +unsigned swizzle_mask(unsigned swizzle, unsigned mask) +{ + unsigned ret = 0; + for (unsigned chan = 0; chan < 4; ++chan) { + unsigned swz = GET_SWZ(swizzle, chan); + if (swz < 4) + ret |= GET_BIT(mask, swz) << chan; + } + return ret; +} + +static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info) +{ + if (info->HasTexture) { + return 0; + } + switch (info->Opcode) { + case RC_OPCODE_DP2: + case RC_OPCODE_DP3: + case RC_OPCODE_DP4: + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + return 0; + default: + return 1; + } +} + +/** + * @return A swizzle the results from converting old_swizzle using + * conversion_swizzle + */ +unsigned int rc_adjust_channels( + unsigned int old_swizzle, + unsigned int conversion_swizzle) +{ + unsigned int i; + unsigned int new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); + for (i = 0; i < 4; i++) { + unsigned int new_chan = get_swz(conversion_swizzle, i); + if (new_chan == RC_SWIZZLE_UNUSED) { + continue; + } + SET_SWZ(new_swizzle, new_chan, GET_SWZ(old_swizzle, i)); + } + return new_swizzle; +} + +static unsigned int rewrite_writemask( + unsigned int old_mask, + unsigned int conversion_swizzle) +{ + unsigned int new_mask = 0; + unsigned int i; + + for (i = 0; i < 4; i++) { + if (!GET_BIT(old_mask, i) + || GET_SWZ(conversion_swizzle, i) == RC_SWIZZLE_UNUSED) { + continue; + } + new_mask |= (1 << GET_SWZ(conversion_swizzle, i)); + } + + return new_mask; +} + +/** + * This function rewrites the writemask of sub and adjusts the swizzles + * of all its source registers based on the conversion_swizzle. + * conversion_swizzle represents a mapping of the old writemask to the + * new writemask. For a detailed description of how conversion swizzles + * work see rc_rewrite_swizzle(). + */ +void rc_pair_rewrite_writemask( + struct rc_pair_sub_instruction * sub, + unsigned int conversion_swizzle) +{ + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + unsigned int i; + + sub->WriteMask = rewrite_writemask(sub->WriteMask, conversion_swizzle); + + if (!srcs_need_rewrite(info)) { + return ; + } + + for (i = 0; i < info->NumSrcRegs; i++) { + sub->Arg[i].Swizzle = + rc_adjust_channels(sub->Arg[i].Swizzle, + conversion_swizzle); + } +} + +static void normal_rewrite_writemask_cb( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + unsigned int * new_mask = (unsigned int *)userdata; + src->Swizzle = rc_adjust_channels(src->Swizzle, *new_mask); +} + +/** + * This function is the same as rc_pair_rewrite_writemask() except it + * operates on normal instructions. + */ +void rc_normal_rewrite_writemask( + struct rc_instruction * inst, + unsigned int conversion_swizzle) +{ + unsigned int new_mask; + struct rc_sub_instruction * sub = &inst->U.I; + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + sub->DstReg.WriteMask = + rewrite_writemask(sub->DstReg.WriteMask, conversion_swizzle); + + if (info->HasTexture) { + unsigned int i; + assert(sub->TexSwizzle == RC_SWIZZLE_XYZW); + for (i = 0; i < 4; i++) { + unsigned int swz = GET_SWZ(conversion_swizzle, i); + if (swz > 3) + continue; + SET_SWZ(sub->TexSwizzle, swz, i); + } + } + + if (!srcs_need_rewrite(info)) { + return; + } + + new_mask = sub->DstReg.WriteMask; + rc_for_all_reads_src(inst, normal_rewrite_writemask_cb, &new_mask); +} + +/** + * This function replaces each value 'swz' in swizzle with the value of + * GET_SWZ(conversion_swizzle, swz). So, if you want to change all the X's + * in swizzle to Y, then conversion_swizzle should be Y___ (0xff9). If you want + * to change all the Y's in swizzle to X, then conversion_swizzle should be + * _X__ (0xfc7). If you want to change the Y's to X and the X's to Y, then + * conversion swizzle should be YX__ (0xfc1). + * @param swizzle The swizzle to change + * @param conversion_swizzle Describes the conversion to perform on the swizzle + * @return A converted swizzle + */ +unsigned int rc_rewrite_swizzle( + unsigned int swizzle, + unsigned int conversion_swizzle) +{ + unsigned int chan; + unsigned int out_swizzle = swizzle; + + for (chan = 0; chan < 4; chan++) { + unsigned int swz = GET_SWZ(swizzle, chan); + unsigned int new_swz; + if (swz > 3) { + SET_SWZ(out_swizzle, chan, swz); + } else { + new_swz = GET_SWZ(conversion_swizzle, swz); + if (new_swz != RC_SWIZZLE_UNUSED) { + SET_SWZ(out_swizzle, chan, new_swz); + } else { + SET_SWZ(out_swizzle, chan, swz); + } + } + } + return out_swizzle; +} + +/** + * Left multiplication of a register with a swizzle + */ +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg) +{ + struct rc_src_register tmp = srcreg; + int i; + tmp.Swizzle = 0; + tmp.Negate = 0; + for(i = 0; i < 4; ++i) { + rc_swizzle swz = GET_SWZ(swizzle, i); + if (swz < 4) { + tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); + tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; + } else { + tmp.Swizzle |= swz << (i*3); + } + } + return tmp; +} + +void reset_srcreg(struct rc_src_register* reg) +{ + memset(reg, 0, sizeof(struct rc_src_register)); + reg->Swizzle = RC_SWIZZLE_XYZW; +} + +unsigned int rc_src_reads_dst_mask( + rc_register_file src_file, + unsigned int src_idx, + unsigned int src_swz, + rc_register_file dst_file, + unsigned int dst_idx, + unsigned int dst_mask) +{ + if (src_file != dst_file || src_idx != dst_idx) { + return RC_MASK_NONE; + } + return dst_mask & rc_swizzle_to_writemask(src_swz); +} + +/** + * @return A bit mask specifying whether this swizzle will select from an RGB + * source, an Alpha source, or both. + */ +unsigned int rc_source_type_swz(unsigned int swizzle) +{ + unsigned int chan; + unsigned int swz = RC_SWIZZLE_UNUSED; + unsigned int ret = RC_SOURCE_NONE; + + for(chan = 0; chan < 4; chan++) { + swz = GET_SWZ(swizzle, chan); + if (swz == RC_SWIZZLE_W) { + ret |= RC_SOURCE_ALPHA; + } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y + || swz == RC_SWIZZLE_Z) { + ret |= RC_SOURCE_RGB; + } + } + return ret; +} + +unsigned int rc_source_type_mask(unsigned int mask) +{ + unsigned int ret = RC_SOURCE_NONE; + + if (mask & RC_MASK_XYZ) + ret |= RC_SOURCE_RGB; + + if (mask & RC_MASK_W) + ret |= RC_SOURCE_ALPHA; + + return ret; +} + +struct src_select { + rc_register_file File; + int Index; + unsigned int SrcType; +}; + +struct can_use_presub_data { + struct src_select Selects[5]; + unsigned int SelectCount; + const struct rc_src_register * ReplaceReg; + unsigned int ReplaceRemoved; +}; + +static void can_use_presub_data_add_select( + struct can_use_presub_data * data, + rc_register_file file, + unsigned int index, + unsigned int src_type) +{ + struct src_select * select; + + select = &data->Selects[data->SelectCount++]; + select->File = file; + select->Index = index; + select->SrcType = src_type; +} + +/** + * This callback function counts the number of sources in inst that are + * different from the sources in can_use_presub_data->RemoveSrcs. + */ +static void can_use_presub_read_cb( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct can_use_presub_data * d = userdata; + + if (!d->ReplaceRemoved && src == d->ReplaceReg) { + d->ReplaceRemoved = 1; + return; + } + + if (src->File == RC_FILE_NONE) + return; + + can_use_presub_data_add_select(d, src->File, src->Index, + rc_source_type_swz(src->Swizzle)); +} + +unsigned int rc_inst_can_use_presub( + struct rc_instruction * inst, + rc_presubtract_op presub_op, + unsigned int presub_writemask, + const struct rc_src_register * replace_reg, + const struct rc_src_register * presub_src0, + const struct rc_src_register * presub_src1) +{ + struct can_use_presub_data d; + unsigned int num_presub_srcs; + unsigned int i; + const struct rc_opcode_info * info = + rc_get_opcode_info(inst->U.I.Opcode); + int rgb_count = 0, alpha_count = 0; + unsigned int src_type0, src_type1; + + if (presub_op == RC_PRESUB_NONE) { + return 1; + } + + if (info->HasTexture) { + return 0; + } + + /* We can't use more than one presubtract value in an + * instruction, unless the two prsubtract operations + * are the same and read from the same registers. + * XXX For now we will limit instructions to only one presubtract + * value.*/ + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { + return 0; + } + + memset(&d, 0, sizeof(d)); + d.ReplaceReg = replace_reg; + + rc_for_all_reads_src(inst, can_use_presub_read_cb, &d); + + num_presub_srcs = rc_presubtract_src_reg_count(presub_op); + + src_type0 = rc_source_type_swz(presub_src0->Swizzle); + can_use_presub_data_add_select(&d, + presub_src0->File, + presub_src0->Index, + src_type0); + + if (num_presub_srcs > 1) { + src_type1 = rc_source_type_swz(presub_src1->Swizzle); + can_use_presub_data_add_select(&d, + presub_src1->File, + presub_src1->Index, + src_type1); + + /* Even if both of the presub sources read from the same + * register, we still need to use 2 different source selects + * for them, so we need to increment the count to compensate. + */ + if (presub_src0->File == presub_src1->File + && presub_src0->Index == presub_src1->Index) { + if (src_type0 & src_type1 & RC_SOURCE_RGB) { + rgb_count++; + } + if (src_type0 & src_type1 & RC_SOURCE_ALPHA) { + alpha_count++; + } + } + } + + /* Count the number of source selects for Alpha and RGB. If we + * encounter two of the same source selects then we can ignore the + * first one. */ + for (i = 0; i < d.SelectCount; i++) { + unsigned int j; + unsigned int src_type = d.Selects[i].SrcType; + for (j = i + 1; j < d.SelectCount; j++) { + if (d.Selects[i].File == d.Selects[j].File + && d.Selects[i].Index == d.Selects[j].Index) { + src_type &= ~d.Selects[j].SrcType; + } + } + if (src_type & RC_SOURCE_RGB) { + rgb_count++; + } + + if (src_type & RC_SOURCE_ALPHA) { + alpha_count++; + } + } + + if (rgb_count > 3 || alpha_count > 3) { + return 0; + } + + return 1; +} + +struct max_data { + unsigned int Max; + unsigned int HasFileType; + rc_register_file File; +}; + +static void max_callback( + void * userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct max_data * d = (struct max_data*)userdata; + if (file == d->File && (!d->HasFileType || index > d->Max)) { + d->Max = index; + d->HasFileType = 1; + } +} + +/** + * @return The maximum index of the specified register file used by the + * program. + */ +int rc_get_max_index( + struct radeon_compiler * c, + rc_register_file file) +{ + struct max_data data; + struct rc_instruction * inst; + data.Max = 0; + data.HasFileType = 0; + data.File = file; + for (inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + rc_for_all_reads_mask(inst, max_callback, &data); + rc_for_all_writes_mask(inst, max_callback, &data); + } + if (!data.HasFileType) { + return -1; + } else { + return data.Max; + } +} + +static unsigned int get_source_readmask( + struct rc_pair_sub_instruction * sub, + unsigned int source, + unsigned int src_type) +{ + unsigned int i; + unsigned int readmask = 0; + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + + for (i = 0; i < info->NumSrcRegs; i++) { + if (sub->Arg[i].Source != source + || src_type != rc_source_type_swz(sub->Arg[i].Swizzle)) { + continue; + } + readmask |= rc_swizzle_to_writemask(sub->Arg[i].Swizzle); + } + return readmask; +} + +/** + * This function attempts to remove a source from a pair instructions. + * @param inst + * @param src_type RC_SOURCE_RGB, RC_SOURCE_ALPHA, or both bitwise or'd + * @param source The index of the source to remove + * @param new_readmask A mask representing the components that are read by + * the source that is intended to replace the one you are removing. If you + * want to remove a source only and not replace it, this parameter should be + * zero. + * @return 1 if the source was successfully removed, 0 if it was not + */ +unsigned int rc_pair_remove_src( + struct rc_instruction * inst, + unsigned int src_type, + unsigned int source, + unsigned int new_readmask) +{ + unsigned int readmask = 0; + + readmask |= get_source_readmask(&inst->U.P.RGB, source, src_type); + readmask |= get_source_readmask(&inst->U.P.Alpha, source, src_type); + + if ((new_readmask & readmask) != readmask) + return 0; + + if (src_type & RC_SOURCE_RGB) { + memset(&inst->U.P.RGB.Src[source], 0, + sizeof(struct rc_pair_instruction_source)); + } + + if (src_type & RC_SOURCE_ALPHA) { + memset(&inst->U.P.Alpha.Src[source], 0, + sizeof(struct rc_pair_instruction_source)); + } + + return 1; +} + +/** + * @return RC_OPCODE_NOOP if inst is not a flow control instruction. + * @return The opcode of inst if it is a flow control instruction. + */ +rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst) +{ + const struct rc_opcode_info * info; + if (inst->Type == RC_INSTRUCTION_NORMAL) { + info = rc_get_opcode_info(inst->U.I.Opcode); + } else { + info = rc_get_opcode_info(inst->U.P.RGB.Opcode); + /*A flow control instruction shouldn't have an alpha + * instruction.*/ + assert(!info->IsFlowControl || + inst->U.P.Alpha.Opcode == RC_OPCODE_NOP); + } + + if (info->IsFlowControl) + return info->Opcode; + else + return RC_OPCODE_NOP; + +} + +/** + * @return The BGNLOOP instruction that starts the loop ended by endloop. + */ +struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop) +{ + unsigned int endloop_count = 0; + struct rc_instruction * inst; + for (inst = endloop->Prev; inst != endloop; inst = inst->Prev) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_ENDLOOP) { + endloop_count++; + } else if (op == RC_OPCODE_BGNLOOP) { + if (endloop_count == 0) { + return inst; + } else { + endloop_count--; + } + } + } + return NULL; +} + +/** + * @return The ENDLOOP instruction that ends the loop started by bgnloop. + */ +struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop) +{ + unsigned int bgnloop_count = 0; + struct rc_instruction * inst; + for (inst = bgnloop->Next; inst!=bgnloop; inst = inst->Next) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_BGNLOOP) { + bgnloop_count++; + } else if (op == RC_OPCODE_ENDLOOP) { + if (bgnloop_count == 0) { + return inst; + } else { + bgnloop_count--; + } + } + } + return NULL; +} + +/** + * @return A conversion swizzle for converting from old_mask->new_mask + */ +unsigned int rc_make_conversion_swizzle( + unsigned int old_mask, + unsigned int new_mask) +{ + unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); + unsigned int old_idx; + unsigned int new_idx = 0; + for (old_idx = 0; old_idx < 4; old_idx++) { + if (!GET_BIT(old_mask, old_idx)) + continue; + for ( ; new_idx < 4; new_idx++) { + if (GET_BIT(new_mask, new_idx)) { + SET_SWZ(conversion_swizzle, old_idx, new_idx); + new_idx++; + break; + } + } + } + return conversion_swizzle; +} diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler_util.h b/src/gallium/drivers/r300/compiler/radeon_compiler_util.h new file mode 100644 index 00000000000..3730aa888c0 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_compiler_util.h @@ -0,0 +1,89 @@ +#include "radeon_program_constants.h" + +#ifndef RADEON_PROGRAM_UTIL_H +#define RADEON_PROGRAM_UTIL_H + +#include "radeon_opcodes.h" + +struct radeon_compiler; +struct rc_instruction; +struct rc_pair_instruction; +struct rc_pair_sub_instruction; +struct rc_src_register; + +unsigned int rc_swizzle_to_writemask(unsigned int swz); + +rc_swizzle get_swz(unsigned int swz, rc_swizzle idx); + +unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels); + +unsigned int combine_swizzles4(unsigned int src, + rc_swizzle swz_x, rc_swizzle swz_y, + rc_swizzle swz_z, rc_swizzle swz_w); + +unsigned int combine_swizzles(unsigned int src, unsigned int swz); + +rc_swizzle rc_mask_to_swizzle(unsigned int mask); + +unsigned swizzle_mask(unsigned swizzle, unsigned mask); + +unsigned int rc_adjust_channels( + unsigned int old_swizzle, + unsigned int conversion_swizzle); + +void rc_pair_rewrite_writemask( + struct rc_pair_sub_instruction * sub, + unsigned int conversion_swizzle); + +void rc_normal_rewrite_writemask( + struct rc_instruction * inst, + unsigned int conversion_swizzle); + +unsigned int rc_rewrite_swizzle( + unsigned int swizzle, + unsigned int new_mask); + +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); + +void reset_srcreg(struct rc_src_register* reg); + +unsigned int rc_src_reads_dst_mask( + rc_register_file src_file, + unsigned int src_idx, + unsigned int src_swz, + rc_register_file dst_file, + unsigned int dst_idx, + unsigned int dst_mask); + +unsigned int rc_source_type_swz(unsigned int swizzle); + +unsigned int rc_source_type_mask(unsigned int mask); + +unsigned int rc_inst_can_use_presub( + struct rc_instruction * inst, + rc_presubtract_op presub_op, + unsigned int presub_writemask, + const struct rc_src_register * replace_reg, + const struct rc_src_register * presub_src0, + const struct rc_src_register * presub_src1); + +int rc_get_max_index( + struct radeon_compiler * c, + rc_register_file file); + +unsigned int rc_pair_remove_src( + struct rc_instruction * inst, + unsigned int src_type, + unsigned int source, + unsigned int new_readmask); + +rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst); + +struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop); +struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop); + +unsigned int rc_make_conversion_swizzle( + unsigned int old_mask, + unsigned int new_mask); + +#endif /* RADEON_PROGRAM_UTIL_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow.c b/src/gallium/drivers/r300/compiler/radeon_dataflow.c new file mode 100644 index 00000000000..a8decacedaf --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow.c @@ -0,0 +1,892 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2010 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" +#include "radeon_program.h" + +struct read_write_mask_data { + void * UserData; + rc_read_write_mask_fn Cb; +}; + +static void reads_normal_callback( + void * userdata, + struct rc_instruction * fullinst, + struct rc_src_register * src) +{ + struct read_write_mask_data * cb_data = userdata; + unsigned int refmask = 0; + unsigned int chan; + for(chan = 0; chan < 4; chan++) { + refmask |= 1 << GET_SWZ(src->Swizzle, chan); + } + refmask &= RC_MASK_XYZW; + + if (refmask) { + cb_data->Cb(cb_data->UserData, fullinst, src->File, + src->Index, refmask); + } + + if (refmask && src->RelAddr) { + cb_data->Cb(cb_data->UserData, fullinst, RC_FILE_ADDRESS, 0, + RC_MASK_X); + } +} + +static void pair_get_src_refmasks(unsigned int * refmasks, + struct rc_pair_instruction * inst, + unsigned int swz, unsigned int src) +{ + if (swz >= 4) + return; + + if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y || swz == RC_SWIZZLE_Z) { + if(src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + int srcp_regs = + rc_presubtract_src_reg_count( + inst->RGB.Src[src].Index); + for(i = 0; i < srcp_regs; i++) { + refmasks[i] |= 1 << swz; + } + } + else { + refmasks[src] |= 1 << swz; + } + } + + if (swz == RC_SWIZZLE_W) { + if (src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + int srcp_regs = rc_presubtract_src_reg_count( + inst->Alpha.Src[src].Index); + for(i = 0; i < srcp_regs; i++) { + refmasks[i] |= 1 << swz; + } + } + else { + refmasks[src] |= 1 << swz; + } + } +} + +static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + unsigned int refmasks[3] = { 0, 0, 0 }; + + unsigned int arg; + + for(arg = 0; arg < 3; ++arg) { + unsigned int chan; + for(chan = 0; chan < 3; ++chan) { + unsigned int swz_rgb = + GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan); + unsigned int swz_alpha = + GET_SWZ(inst->Alpha.Arg[arg].Swizzle, chan); + pair_get_src_refmasks(refmasks, inst, swz_rgb, + inst->RGB.Arg[arg].Source); + pair_get_src_refmasks(refmasks, inst, swz_alpha, + inst->Alpha.Arg[arg].Source); + } + } + + for(unsigned int src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ)) + cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, + refmasks[src] & RC_MASK_XYZ); + + if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W)) + cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W); + } +} + +static void pair_sub_for_all_args( + struct rc_instruction * fullinst, + struct rc_pair_sub_instruction * sub, + rc_pair_read_arg_fn cb, + void * userdata) +{ + int i; + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + + for(i = 0; i < info->NumSrcRegs; i++) { + unsigned int src_type; + + src_type = rc_source_type_swz(sub->Arg[i].Swizzle); + + if (src_type == RC_SOURCE_NONE) + continue; + + if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) { + unsigned int presub_type; + unsigned int presub_src_count; + struct rc_pair_instruction_source * src_array; + unsigned int j; + + if (src_type & RC_SOURCE_RGB) { + presub_type = fullinst-> + U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index; + src_array = fullinst->U.P.RGB.Src; + } else { + presub_type = fullinst-> + U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index; + src_array = fullinst->U.P.Alpha.Src; + } + presub_src_count + = rc_presubtract_src_reg_count(presub_type); + for(j = 0; j < presub_src_count; j++) { + cb(userdata, fullinst, &sub->Arg[i], + &src_array[j]); + } + } else { + struct rc_pair_instruction_source * src = + rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]); + if (src) { + cb(userdata, fullinst, &sub->Arg[i], src); + } + } + } +} + +/* This function calls the callback function (cb) for each source used by + * the instruction. + * */ +void rc_for_all_reads_src( + struct rc_instruction * inst, + rc_read_src_fn cb, + void * userdata) +{ + const struct rc_opcode_info * opcode = + rc_get_opcode_info(inst->U.I.Opcode); + + /* This function only works with normal instructions. */ + if (inst->Type != RC_INSTRUCTION_NORMAL) { + assert(0); + return; + } + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + + if (inst->U.I.SrcReg[src].File == RC_FILE_NONE) + continue; + + if (inst->U.I.SrcReg[src].File == RC_FILE_PRESUB) { + unsigned int i; + unsigned int srcp_regs = rc_presubtract_src_reg_count( + inst->U.I.PreSub.Opcode); + for( i = 0; i < srcp_regs; i++) { + cb(userdata, inst, &inst->U.I.PreSub.SrcReg[i]); + } + } else { + cb(userdata, inst, &inst->U.I.SrcReg[src]); + } + } +} + +/** + * This function calls the callback function (cb) for each arg of the RGB and + * alpha components. + */ +void rc_pair_for_all_reads_arg( + struct rc_instruction * inst, + rc_pair_read_arg_fn cb, + void * userdata) +{ + /* This function only works with pair instructions. */ + if (inst->Type != RC_INSTRUCTION_PAIR) { + assert(0); + return; + } + + pair_sub_for_all_args(inst, &inst->U.P.RGB, cb, userdata); + pair_sub_for_all_args(inst, &inst->U.P.Alpha, cb, userdata); +} + +/** + * Calls a callback function for all register reads. + * + * This is conservative, i.e. if the same register is referenced multiple times, + * the callback may also be called multiple times. + * Also, the writemask of the instruction is not taken into account. + */ +void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) { + struct read_write_mask_data cb_data; + cb_data.UserData = userdata; + cb_data.Cb = cb; + + rc_for_all_reads_src(inst, reads_normal_callback, &cb_data); + } else { + reads_pair(inst, cb, userdata); + } +} + + + +static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) +{ + struct rc_sub_instruction * inst = &fullinst->U.I; + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + + if (opcode->HasDstReg && inst->DstReg.WriteMask) + cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask); + + if (inst->WriteALUResult) + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); +} + +static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + + if (inst->RGB.WriteMask) + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask); + + if (inst->Alpha.WriteMask) + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W); + + if (inst->WriteALUResult) + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); +} + +/** + * Calls a callback function for all register writes in the instruction, + * reporting writemasks to the callback function. + * + * \warning Does not report output registers for paired instructions! + */ +void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) { + writes_normal(inst, cb, userdata); + } else { + writes_pair(inst, cb, userdata); + } +} + + +struct mask_to_chan_data { + void * UserData; + rc_read_write_chan_fn Fn; +}; + +static void mask_to_chan_cb(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct mask_to_chan_data * d = data; + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_BIT(mask, chan)) + d->Fn(d->UserData, inst, file, index, chan); + } +} + +/** + * Calls a callback function for all sourced register channels. + * + * This is conservative, i.e. channels may be called multiple times, + * and the writemask of the instruction is not taken into account. + */ +void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) +{ + struct mask_to_chan_data d; + d.UserData = userdata; + d.Fn = cb; + rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d); +} + +/** + * Calls a callback function for all written register channels. + * + * \warning Does not report output registers for paired instructions! + */ +void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) +{ + struct mask_to_chan_data d; + d.UserData = userdata; + d.Fn = cb; + rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d); +} + +static void remap_normal_instruction(struct rc_instruction * fullinst, + rc_remap_register_fn cb, void * userdata) +{ + struct rc_sub_instruction * inst = &fullinst->U.I; + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + unsigned int remapped_presub = 0; + + if (opcode->HasDstReg) { + rc_register_file file = inst->DstReg.File; + unsigned int index = inst->DstReg.Index; + + cb(userdata, fullinst, &file, &index); + + inst->DstReg.File = file; + inst->DstReg.Index = index; + } + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + rc_register_file file = inst->SrcReg[src].File; + unsigned int index = inst->SrcReg[src].Index; + + if (file == RC_FILE_PRESUB) { + unsigned int i; + unsigned int srcp_srcs = rc_presubtract_src_reg_count( + inst->PreSub.Opcode); + /* Make sure we only remap presubtract sources once in + * case more than one source register reads the + * presubtract result. */ + if (remapped_presub) + continue; + + for(i = 0; i < srcp_srcs; i++) { + file = inst->PreSub.SrcReg[i].File; + index = inst->PreSub.SrcReg[i].Index; + cb(userdata, fullinst, &file, &index); + inst->PreSub.SrcReg[i].File = file; + inst->PreSub.SrcReg[i].Index = index; + } + remapped_presub = 1; + } + else { + cb(userdata, fullinst, &file, &index); + + inst->SrcReg[src].File = file; + inst->SrcReg[src].Index = index; + } + } +} + +static void remap_pair_instruction(struct rc_instruction * fullinst, + rc_remap_register_fn cb, void * userdata) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + + if (inst->RGB.WriteMask) { + rc_register_file file = RC_FILE_TEMPORARY; + unsigned int index = inst->RGB.DestIndex; + + cb(userdata, fullinst, &file, &index); + + inst->RGB.DestIndex = index; + } + + if (inst->Alpha.WriteMask) { + rc_register_file file = RC_FILE_TEMPORARY; + unsigned int index = inst->Alpha.DestIndex; + + cb(userdata, fullinst, &file, &index); + + inst->Alpha.DestIndex = index; + } + + for(unsigned int src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used) { + rc_register_file file = inst->RGB.Src[src].File; + unsigned int index = inst->RGB.Src[src].Index; + + cb(userdata, fullinst, &file, &index); + + inst->RGB.Src[src].File = file; + inst->RGB.Src[src].Index = index; + } + + if (inst->Alpha.Src[src].Used) { + rc_register_file file = inst->Alpha.Src[src].File; + unsigned int index = inst->Alpha.Src[src].Index; + + cb(userdata, fullinst, &file, &index); + + inst->Alpha.Src[src].File = file; + inst->Alpha.Src[src].Index = index; + } + } +} + + +/** + * Remap all register accesses according to the given function. + * That is, call the function \p cb for each referenced register (both read and written) + * and update the given instruction \p inst accordingly + * if it modifies its \ref pfile and \ref pindex contents. + */ +void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) + remap_normal_instruction(inst, cb, userdata); + else + remap_pair_instruction(inst, cb, userdata); +} + +struct branch_write_mask { + unsigned int IfWriteMask:4; + unsigned int ElseWriteMask:4; + unsigned int HasElse:1; +}; + +union get_readers_read_cb { + rc_read_src_fn I; + rc_pair_read_arg_fn P; +}; + +struct get_readers_callback_data { + struct radeon_compiler * C; + struct rc_reader_data * ReaderData; + rc_read_src_fn ReadNormalCB; + rc_pair_read_arg_fn ReadPairCB; + rc_read_write_mask_fn WriteCB; + rc_register_file DstFile; + unsigned int DstIndex; + unsigned int DstMask; + unsigned int AliveWriteMask; + /* For convenience, this is indexed starting at 1 */ + struct branch_write_mask BranchMasks[R500_PFS_MAX_BRANCH_DEPTH_FULL + 1]; +}; + +static struct rc_reader * add_reader( + struct memory_pool * pool, + struct rc_reader_data * data, + struct rc_instruction * inst, + unsigned int mask) +{ + struct rc_reader * new; + memory_pool_array_reserve(pool, struct rc_reader, data->Readers, + data->ReaderCount, data->ReadersReserved, 1); + new = &data->Readers[data->ReaderCount++]; + new->Inst = inst; + new->WriteMask = mask; + return new; +} + +static void add_reader_normal( + struct memory_pool * pool, + struct rc_reader_data * data, + struct rc_instruction * inst, + unsigned int mask, + struct rc_src_register * src) +{ + struct rc_reader * new = add_reader(pool, data, inst, mask); + new->U.I.Src = src; +} + + +static void add_reader_pair( + struct memory_pool * pool, + struct rc_reader_data * data, + struct rc_instruction * inst, + unsigned int mask, + struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src) +{ + struct rc_reader * new = add_reader(pool, data, inst, mask); + new->U.P.Src = src; + new->U.P.Arg = arg; +} + +static unsigned int get_readers_read_callback( + struct get_readers_callback_data * cb_data, + unsigned int has_rel_addr, + rc_register_file file, + unsigned int index, + unsigned int swizzle) +{ + unsigned int shared_mask, read_mask; + + if (has_rel_addr) { + cb_data->ReaderData->Abort = 1; + return RC_MASK_NONE; + } + + shared_mask = rc_src_reads_dst_mask(file, index, swizzle, + cb_data->DstFile, cb_data->DstIndex, cb_data->AliveWriteMask); + + if (shared_mask == RC_MASK_NONE) + return shared_mask; + + /* If we make it this far, it means that this source reads from the + * same register written to by d->ReaderData->Writer. */ + + read_mask = rc_swizzle_to_writemask(swizzle); + if (cb_data->ReaderData->AbortOnRead & read_mask) { + cb_data->ReaderData->Abort = 1; + return shared_mask; + } + + if (cb_data->ReaderData->LoopDepth > 0) { + cb_data->ReaderData->AbortOnWrite |= + (read_mask & cb_data->AliveWriteMask); + } + + /* XXX The behavior in this case should be configurable. */ + if ((read_mask & cb_data->AliveWriteMask) != read_mask) { + cb_data->ReaderData->Abort = 1; + return shared_mask; + } + + return shared_mask; +} + +static void get_readers_pair_read_callback( + void * userdata, + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src) +{ + unsigned int shared_mask; + struct get_readers_callback_data * d = userdata; + + shared_mask = get_readers_read_callback(d, + 0 /*Pair Instructions don't use RelAddr*/, + src->File, src->Index, arg->Swizzle); + + if (shared_mask == RC_MASK_NONE) + return; + + if (d->ReadPairCB) + d->ReadPairCB(d->ReaderData, inst, arg, src); + + if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) + return; + + add_reader_pair(&d->C->Pool, d->ReaderData, inst, shared_mask, arg, src); +} + +/** + * This function is used by rc_get_readers_normal() to determine whether inst + * is a reader of userdata->ReaderData->Writer + */ +static void get_readers_normal_read_callback( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct get_readers_callback_data * d = userdata; + unsigned int shared_mask; + + shared_mask = get_readers_read_callback(d, + src->RelAddr, src->File, src->Index, src->Swizzle); + + if (shared_mask == RC_MASK_NONE) + return; + /* The callback function could potentially clear d->ReaderData->Abort, + * so we need to call it before we return. */ + if (d->ReadNormalCB) + d->ReadNormalCB(d->ReaderData, inst, src); + + if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) + return; + + add_reader_normal(&d->C->Pool, d->ReaderData, inst, shared_mask, src); +} + +/** + * This function is used by rc_get_readers_normal() to determine when + * userdata->ReaderData->Writer is dead (i. e. All compontents of its + * destination register have been overwritten by other instructions). + */ +static void get_readers_write_callback( + void *userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct get_readers_callback_data * d = userdata; + + if (index == d->DstIndex && file == d->DstFile) { + unsigned int shared_mask = mask & d->DstMask; + d->ReaderData->AbortOnRead &= ~shared_mask; + d->AliveWriteMask &= ~shared_mask; + if (d->ReaderData->AbortOnWrite & shared_mask) { + d->ReaderData->Abort = 1; + } + } + + if(d->WriteCB) + d->WriteCB(d->ReaderData, inst, file, index, mask); +} + +static void push_branch_mask( + struct get_readers_callback_data * d, + unsigned int * branch_depth) +{ + (*branch_depth)++; + if (*branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) { + d->ReaderData->Abort = 1; + return; + } + d->BranchMasks[*branch_depth].IfWriteMask = + d->AliveWriteMask; +} + +static void pop_branch_mask( + struct get_readers_callback_data * d, + unsigned int * branch_depth) +{ + struct branch_write_mask * masks = &d->BranchMasks[*branch_depth]; + + if (masks->HasElse) { + /* Abort on read for components that were written in the IF + * block. */ + d->ReaderData->AbortOnRead |= + masks->IfWriteMask & ~masks->ElseWriteMask; + /* Abort on read for components that were written in the ELSE + * block. */ + d->ReaderData->AbortOnRead |= + masks->ElseWriteMask & ~d->AliveWriteMask; + + d->AliveWriteMask = masks->IfWriteMask + ^ ((masks->IfWriteMask ^ masks->ElseWriteMask) + & (masks->IfWriteMask ^ d->AliveWriteMask)); + } else { + d->ReaderData->AbortOnRead |= + masks->IfWriteMask & ~d->AliveWriteMask; + d->AliveWriteMask = masks->IfWriteMask; + + } + memset(masks, 0, sizeof(struct branch_write_mask)); + (*branch_depth)--; +} + +static void get_readers_for_single_write( + void * userdata, + struct rc_instruction * writer, + rc_register_file dst_file, + unsigned int dst_index, + unsigned int dst_mask) +{ + struct rc_instruction * tmp; + unsigned int branch_depth = 0; + struct rc_instruction * endloop = NULL; + unsigned int abort_on_read_at_endloop = 0; + struct get_readers_callback_data * d = userdata; + + d->ReaderData->Writer = writer; + d->ReaderData->AbortOnRead = 0; + d->ReaderData->AbortOnWrite = 0; + d->ReaderData->LoopDepth = 0; + d->ReaderData->InElse = 0; + d->DstFile = dst_file; + d->DstIndex = dst_index; + d->DstMask = dst_mask; + d->AliveWriteMask = dst_mask; + memset(d->BranchMasks, 0, sizeof(d->BranchMasks)); + + if (!dst_mask) + return; + + for(tmp = writer->Next; tmp != &d->C->Program.Instructions; + tmp = tmp->Next){ + rc_opcode opcode = rc_get_flow_control_inst(tmp); + switch(opcode) { + case RC_OPCODE_BGNLOOP: + d->ReaderData->LoopDepth++; + push_branch_mask(d, &branch_depth); + break; + case RC_OPCODE_ENDLOOP: + if (d->ReaderData->LoopDepth > 0) { + d->ReaderData->LoopDepth--; + if (d->ReaderData->LoopDepth == 0) { + d->ReaderData->AbortOnWrite = 0; + } + pop_branch_mask(d, &branch_depth); + } else { + /* Here we have reached an ENDLOOP without + * seeing its BGNLOOP. These means that + * the writer was written inside of a loop, + * so it could have readers that are above it + * (i.e. they have a lower IP). To find these + * readers we jump to the BGNLOOP instruction + * and check each instruction until we get + * back to the writer. + */ + endloop = tmp; + tmp = rc_match_endloop(tmp); + if (!tmp) { + rc_error(d->C, "Failed to match endloop.\n"); + d->ReaderData->Abort = 1; + return; + } + abort_on_read_at_endloop = d->ReaderData->AbortOnRead; + d->ReaderData->AbortOnRead |= d->AliveWriteMask; + continue; + } + break; + case RC_OPCODE_IF: + push_branch_mask(d, &branch_depth); + break; + case RC_OPCODE_ELSE: + if (branch_depth == 0) { + d->ReaderData->InElse = 1; + } else { + unsigned int temp_mask = d->AliveWriteMask; + d->AliveWriteMask = + d->BranchMasks[branch_depth].IfWriteMask; + d->BranchMasks[branch_depth].ElseWriteMask = + temp_mask; + d->BranchMasks[branch_depth].HasElse = 1; + } + break; + case RC_OPCODE_ENDIF: + if (branch_depth == 0) { + d->ReaderData->AbortOnRead = d->AliveWriteMask; + d->ReaderData->InElse = 0; + } + else { + pop_branch_mask(d, &branch_depth); + } + break; + default: + break; + } + + if (d->ReaderData->InElse) + continue; + + if (tmp->Type == RC_INSTRUCTION_NORMAL) { + rc_for_all_reads_src(tmp, + get_readers_normal_read_callback, d); + } else { + rc_pair_for_all_reads_arg(tmp, + get_readers_pair_read_callback, d); + } + + /* This can happen when we jump from an ENDLOOP to BGNLOOP */ + if (tmp == writer) { + tmp = endloop; + endloop = NULL; + d->ReaderData->AbortOnRead = abort_on_read_at_endloop; + continue; + } + rc_for_all_writes_mask(tmp, get_readers_write_callback, d); + + if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) + return; + + if (branch_depth == 0 && !d->AliveWriteMask) + return; + } +} + +static void init_get_readers_callback_data( + struct get_readers_callback_data * d, + struct rc_reader_data * reader_data, + struct radeon_compiler * c, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb) +{ + reader_data->Abort = 0; + reader_data->ReaderCount = 0; + reader_data->ReadersReserved = 0; + reader_data->Readers = NULL; + + d->C = c; + d->ReaderData = reader_data; + d->ReadNormalCB = read_normal_cb; + d->ReadPairCB = read_pair_cb; + d->WriteCB = write_cb; +} + +/** + * This function will create a list of readers via the rc_reader_data struct. + * This function will abort (set the flag data->Abort) and return if it + * encounters an instruction that reads from @param writer and also a different + * instruction. Here are some examples: + * + * writer = instruction 0; + * 0 MOV TEMP[0].xy, TEMP[1].xy + * 1 MOV TEMP[0].zw, TEMP[2].xy + * 2 MOV TEMP[3], TEMP[0] + * The Abort flag will be set on instruction 2, because it reads values written + * by instructions 0 and 1. + * + * writer = instruction 1; + * 0 IF TEMP[0].x + * 1 MOV TEMP[1], TEMP[2] + * 2 ELSE + * 3 MOV TEMP[1], TEMP[2] + * 4 ENDIF + * 5 MOV TEMP[3], TEMP[1] + * The Abort flag will be set on instruction 5, because it could read from the + * value written by either instruction 1 or 3, depending on the jump decision + * made at instruction 0. + * + * writer = instruction 0; + * 0 MOV TEMP[0], TEMP[1] + * 2 BGNLOOP + * 3 ADD TEMP[0], TEMP[0], none.1 + * 4 ENDLOOP + * The Abort flag will be set on instruction 3, because in the first iteration + * of the loop it reads the value written by instruction 0 and in all other + * iterations it reads the value written by instruction 3. + * + * @param read_cb This function will be called for for every instruction that + * has been determined to be a reader of writer. + * @param write_cb This function will be called for every instruction after + * writer. + */ +void rc_get_readers( + struct radeon_compiler * c, + struct rc_instruction * writer, + struct rc_reader_data * data, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb) +{ + struct get_readers_callback_data d; + + init_get_readers_callback_data(&d, data, c, read_normal_cb, + read_pair_cb, write_cb); + + rc_for_all_writes_mask(writer, get_readers_for_single_write, &d); +} + +void rc_get_readers_sub( + struct radeon_compiler * c, + struct rc_instruction * writer, + struct rc_pair_sub_instruction * sub_writer, + struct rc_reader_data * data, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb) +{ + struct get_readers_callback_data d; + + init_get_readers_callback_data(&d, data, c, read_normal_cb, + read_pair_cb, write_cb); + + if (sub_writer->WriteMask) { + get_readers_for_single_write(&d, writer, RC_FILE_TEMPORARY, + sub_writer->DestIndex, sub_writer->WriteMask); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow.h b/src/gallium/drivers/r300/compiler/radeon_dataflow.h new file mode 100644 index 00000000000..d8a627258ea --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow.h @@ -0,0 +1,134 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2010 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_DATAFLOW_H +#define RADEON_DATAFLOW_H + +#include "radeon_program_constants.h" + +struct radeon_compiler; +struct rc_instruction; +struct rc_swizzle_caps; +struct rc_src_register; +struct rc_pair_instruction_arg; +struct rc_pair_instruction_source; +struct rc_pair_sub_instruction; +struct rc_compiler; + + +/** + * Help analyze and modify the register accesses of instructions. + */ +/*@{*/ +typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int chan); +void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); +void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); + +typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask); +void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); +void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); + +typedef void (*rc_read_src_fn)(void * userdata, struct rc_instruction * inst, + struct rc_src_register * src); +void rc_for_all_reads_src(struct rc_instruction * inst, rc_read_src_fn cb, + void * userdata); + +typedef void (*rc_pair_read_arg_fn)(void * userdata, + struct rc_instruction * inst, struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src); +void rc_pair_for_all_reads_arg(struct rc_instruction * inst, + rc_pair_read_arg_fn cb, void * userdata); + +typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex); +void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata); +/*@}*/ + +struct rc_reader { + struct rc_instruction * Inst; + unsigned int WriteMask; + union { + struct { + struct rc_src_register * Src; + } I; + struct { + struct rc_pair_instruction_arg * Arg; + struct rc_pair_instruction_source * Src; + } P; + } U; +}; + +struct rc_reader_data { + unsigned int Abort; + unsigned int AbortOnRead; + unsigned int AbortOnWrite; + unsigned int LoopDepth; + unsigned int InElse; + struct rc_instruction * Writer; + + unsigned int ReaderCount; + unsigned int ReadersReserved; + struct rc_reader * Readers; + + /* If this flag is enabled, rc_get_readers will exit as soon possbile + * after the Abort flag is set.*/ + unsigned int ExitOnAbort; + void * CbData; +}; + +void rc_get_readers( + struct radeon_compiler * c, + struct rc_instruction * writer, + struct rc_reader_data * data, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb); + +void rc_get_readers_sub( + struct radeon_compiler * c, + struct rc_instruction * writer, + struct rc_pair_sub_instruction * sub_writer, + struct rc_reader_data * data, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb); +/** + * Compiler passes based on dataflow analysis. + */ +/*@{*/ +typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data, + void (*mark_fn)(void * data, unsigned int index, unsigned int mask)); +void rc_dataflow_deadcode(struct radeon_compiler * c, void *user); +void rc_dataflow_swizzles(struct radeon_compiler * c, void *user); +/*@}*/ + +void rc_optimize(struct radeon_compiler * c, void *user); + +#endif /* RADEON_DATAFLOW_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c b/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c new file mode 100644 index 00000000000..678e1475883 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c @@ -0,0 +1,359 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" + + +struct updatemask_state { + unsigned char Output[RC_REGISTER_MAX_INDEX]; + unsigned char Temporary[RC_REGISTER_MAX_INDEX]; + unsigned char Address; + unsigned char Special[RC_NUM_SPECIAL_REGISTERS]; +}; + +struct instruction_state { + unsigned char WriteMask:4; + unsigned char WriteALUResult:1; + unsigned char SrcReg[3]; +}; + +struct loopinfo { + struct updatemask_state * Breaks; + unsigned int BreakCount; + unsigned int BreaksReserved; +}; + +struct branchinfo { + unsigned int HaveElse:1; + + struct updatemask_state StoreEndif; + struct updatemask_state StoreElse; +}; + +struct deadcode_state { + struct radeon_compiler * C; + struct instruction_state * Instructions; + + struct updatemask_state R; + + struct branchinfo * BranchStack; + unsigned int BranchStackSize; + unsigned int BranchStackReserved; + + struct loopinfo * LoopStack; + unsigned int LoopStackSize; + unsigned int LoopStackReserved; +}; + + +static void or_updatemasks( + struct updatemask_state * dst, + struct updatemask_state * a, + struct updatemask_state * b) +{ + for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) { + dst->Output[i] = a->Output[i] | b->Output[i]; + dst->Temporary[i] = a->Temporary[i] | b->Temporary[i]; + } + + for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i) + dst->Special[i] = a->Special[i] | b->Special[i]; + + dst->Address = a->Address | b->Address; +} + +static void push_break(struct deadcode_state *s) +{ + struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1]; + memory_pool_array_reserve(&s->C->Pool, struct updatemask_state, + loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1); + + memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R)); +} + +static void push_loop(struct deadcode_state * s) +{ + memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack, + s->LoopStackSize, s->LoopStackReserved, 1); + memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo)); +} + +static void push_branch(struct deadcode_state * s) +{ + struct branchinfo * branch; + + memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack, + s->BranchStackSize, s->BranchStackReserved, 1); + + branch = &s->BranchStack[s->BranchStackSize++]; + branch->HaveElse = 0; + memcpy(&branch->StoreEndif, &s->R, sizeof(s->R)); +} + +static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index) +{ + if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) { + if (index >= RC_REGISTER_MAX_INDEX) { + rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file); + return 0; + } + + if (file == RC_FILE_OUTPUT) + return &s->R.Output[index]; + else + return &s->R.Temporary[index]; + } else if (file == RC_FILE_ADDRESS) { + return &s->R.Address; + } else if (file == RC_FILE_SPECIAL) { + if (index >= RC_NUM_SPECIAL_REGISTERS) { + rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index); + return 0; + } + + return &s->R.Special[index]; + } + + return 0; +} + +static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask) +{ + unsigned char * pused = get_used_ptr(s, file, index); + if (pused) + *pused |= mask; +} + +static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + struct instruction_state * insts = &s->Instructions[inst->IP]; + unsigned int usedmask = 0; + unsigned int srcmasks[3]; + + if (opcode->HasDstReg) { + unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index); + if (pused) { + usedmask = *pused & inst->U.I.DstReg.WriteMask; + *pused &= ~usedmask; + } + } + + insts->WriteMask |= usedmask; + + if (inst->U.I.WriteALUResult) { + unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT); + if (pused && *pused) { + if (inst->U.I.WriteALUResult == RC_ALURESULT_X) + usedmask |= RC_MASK_X; + else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) + usedmask |= RC_MASK_W; + + *pused = 0; + insts->WriteALUResult = 1; + } + } + + rc_compute_sources_for_writemask(inst, usedmask, srcmasks); + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + unsigned int refmask = 0; + unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src]; + insts->SrcReg[src] |= newsrcmask; + + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_BIT(newsrcmask, chan)) + refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); + } + + /* get rid of spurious bits from ZERO, ONE, etc. swizzles */ + refmask &= RC_MASK_XYZW; + + if (!refmask) + continue; + + mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask); + + if (inst->U.I.SrcReg[src].RelAddr) + mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X); + } +} + +static void mark_output_use(void * data, unsigned int index, unsigned int mask) +{ + struct deadcode_state * s = data; + + mark_used(s, RC_FILE_OUTPUT, index, mask); +} + +void rc_dataflow_deadcode(struct radeon_compiler * c, void *user) +{ + struct deadcode_state s; + unsigned int nr_instructions; + rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user; + unsigned int ip; + + memset(&s, 0, sizeof(s)); + s.C = c; + + nr_instructions = rc_recompute_ips(c); + s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions); + memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions); + + dce(c, &s, &mark_output_use); + + for(struct rc_instruction * inst = c->Program.Instructions.Prev; + inst != &c->Program.Instructions; + inst = inst->Prev) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + switch(opcode->Opcode){ + /* Mark all sources in the loop body as used before doing + * normal deadcode analysis. This is probably not optimal. + */ + case RC_OPCODE_ENDLOOP: + { + int endloops = 1; + struct rc_instruction *ptr; + for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){ + opcode = rc_get_opcode_info(ptr->U.I.Opcode); + if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ + endloops--; + continue; + } + if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){ + endloops++; + continue; + } + if(opcode->HasDstReg){ + int src = 0; + unsigned int srcmasks[3]; + rc_compute_sources_for_writemask(ptr, + ptr->U.I.DstReg.WriteMask, srcmasks); + for(src=0; src < opcode->NumSrcRegs; src++){ + mark_used(&s, + ptr->U.I.SrcReg[src].File, + ptr->U.I.SrcReg[src].Index, + srcmasks[src]); + } + } + } + push_loop(&s); + break; + } + case RC_OPCODE_BRK: + push_break(&s); + break; + case RC_OPCODE_BGNLOOP: + { + unsigned int i; + struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1]; + for(i = 0; i < loop->BreakCount; i++) { + or_updatemasks(&s.R, &s.R, &loop->Breaks[i]); + } + break; + } + case RC_OPCODE_CONT: + break; + case RC_OPCODE_ENDIF: + push_branch(&s); + break; + default: + if (opcode->IsFlowControl && s.BranchStackSize) { + struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1]; + if (opcode->Opcode == RC_OPCODE_IF) { + or_updatemasks(&s.R, + &s.R, + branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif); + + s.BranchStackSize--; + } else if (opcode->Opcode == RC_OPCODE_ELSE) { + if (branch->HaveElse) { + rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__); + } else { + memcpy(&branch->StoreElse, &s.R, sizeof(s.R)); + memcpy(&s.R, &branch->StoreEndif, sizeof(s.R)); + branch->HaveElse = 1; + } + } else { + rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name); + } + } + } + + update_instruction(&s, inst); + } + + ip = 0; + for(struct rc_instruction * inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next, ++ip) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + int dead = 1; + unsigned int srcmasks[3]; + unsigned int usemask; + + if (!opcode->HasDstReg) { + dead = 0; + } else { + inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask; + if (s.Instructions[ip].WriteMask) + dead = 0; + + if (s.Instructions[ip].WriteALUResult) + dead = 0; + else + inst->U.I.WriteALUResult = RC_ALURESULT_NONE; + } + + if (dead) { + struct rc_instruction * todelete = inst; + inst = inst->Prev; + rc_remove_instruction(todelete); + continue; + } + + usemask = s.Instructions[ip].WriteMask; + + if (inst->U.I.WriteALUResult == RC_ALURESULT_X) + usemask |= RC_MASK_X; + else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) + usemask |= RC_MASK_W; + + rc_compute_sources_for_writemask(inst, usemask, srcmasks); + + for(unsigned int src = 0; src < 3; ++src) { + for(unsigned int chan = 0; chan < 4; ++chan) { + if (!GET_BIT(srcmasks[src], chan)) + SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); + } + } + } + + rc_calculate_inputs_outputs(c); +} diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c b/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c new file mode 100644 index 00000000000..133a9f72ec7 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" +#include "radeon_swizzle.h" + + +static void rewrite_source(struct radeon_compiler * c, + struct rc_instruction * inst, unsigned src) +{ + struct rc_swizzle_split split; + unsigned int tempreg = rc_find_free_temporary(c); + unsigned int usemask; + + usemask = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED) + usemask |= 1 << chan; + } + + c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split); + + for(unsigned int phase = 0; phase < split.NumPhases; ++phase) { + struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev); + unsigned int phase_refmask; + unsigned int masked_negate; + + mov->U.I.Opcode = RC_OPCODE_MOV; + mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + mov->U.I.DstReg.Index = tempreg; + mov->U.I.DstReg.WriteMask = split.Phase[phase]; + mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src]; + mov->U.I.PreSub = inst->U.I.PreSub; + + phase_refmask = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + if (!GET_BIT(split.Phase[phase], chan)) + SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED); + else + phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan); + } + + phase_refmask &= RC_MASK_XYZW; + + masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate; + if (masked_negate == 0) + mov->U.I.SrcReg[0].Negate = 0; + else if (masked_negate == split.Phase[phase]) + mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW; + + } + + inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[src].Index = tempreg; + inst->U.I.SrcReg[src].Swizzle = 0; + inst->U.I.SrcReg[src].Negate = RC_MASK_NONE; + inst->U.I.SrcReg[src].Abs = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, + GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED); + } +} + +void rc_dataflow_swizzles(struct radeon_compiler * c, void *user) +{ + struct rc_instruction * inst; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int src; + + for(src = 0; src < opcode->NumSrcRegs; ++src) { + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) + rewrite_source(c, inst, src); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_branches.c b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.c new file mode 100644 index 00000000000..7bede344f30 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.c @@ -0,0 +1,342 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_emulate_branches.h" + +#include <stdio.h> + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + + +struct proxy_info { + unsigned int Proxied:1; + unsigned int Index:RC_REGISTER_INDEX_BITS; +}; + +struct register_proxies { + struct proxy_info Temporary[RC_REGISTER_MAX_INDEX]; +}; + +struct branch_info { + struct rc_instruction * If; + struct rc_instruction * Else; +}; + +struct emulate_branch_state { + struct radeon_compiler * C; + + struct branch_info * Branches; + unsigned int BranchCount; + unsigned int BranchReserved; +}; + + +static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + struct branch_info * branch; + struct rc_instruction * inst_mov; + + memory_pool_array_reserve(&s->C->Pool, struct branch_info, + s->Branches, s->BranchCount, s->BranchReserved, 1); + + DBG("%s\n", __FUNCTION__); + + branch = &s->Branches[s->BranchCount++]; + memset(branch, 0, sizeof(struct branch_info)); + branch->If = inst; + + /* Make a safety copy of the decision register, because we will need + * it at ENDIF time and it might be overwritten in both branches. */ + inst_mov = rc_insert_new_instruction(s->C, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C); + inst_mov->U.I.DstReg.WriteMask = RC_MASK_X; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; + inst->U.I.SrcReg[0].Swizzle = 0; + inst->U.I.SrcReg[0].Abs = 0; + inst->U.I.SrcReg[0].Negate = 0; +} + +static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + struct branch_info * branch; + + if (!s->BranchCount) { + rc_error(s->C, "Encountered ELSE outside of branches"); + return; + } + + DBG("%s\n", __FUNCTION__); + + branch = &s->Branches[s->BranchCount - 1]; + branch->Else = inst; +} + + +struct state_and_proxies { + struct emulate_branch_state * S; + struct register_proxies * Proxies; +}; + +static struct proxy_info * get_proxy_info(struct state_and_proxies * sap, + rc_register_file file, unsigned int index) +{ + if (file == RC_FILE_TEMPORARY) { + return &sap->Proxies->Temporary[index]; + } else { + return 0; + } +} + +static void scan_write(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int comp) +{ + struct state_and_proxies * sap = userdata; + struct proxy_info * proxy = get_proxy_info(sap, file, index); + + if (proxy && !proxy->Proxied) { + proxy->Proxied = 1; + proxy->Index = rc_find_free_temporary(sap->S->C); + } +} + +static void remap_proxy_function(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex) +{ + struct state_and_proxies * sap = userdata; + struct proxy_info * proxy = get_proxy_info(sap, *pfile, *pindex); + + if (proxy && proxy->Proxied) { + *pfile = RC_FILE_TEMPORARY; + *pindex = proxy->Index; + } +} + +/** + * Redirect all writes in the instruction range [begin, end) to proxy + * temporary registers. + */ +static void allocate_and_insert_proxies(struct emulate_branch_state * s, + struct register_proxies * proxies, + struct rc_instruction * begin, + struct rc_instruction * end) +{ + struct state_and_proxies sap; + + sap.S = s; + sap.Proxies = proxies; + + for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { + rc_for_all_writes_mask(inst, scan_write, &sap); + rc_remap_registers(inst, remap_proxy_function, &sap); + } + + for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { + if (proxies->Temporary[index].Proxied) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, begin->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = proxies->Temporary[index].Index; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = index; + } + } +} + + +static void inject_cmp(struct emulate_branch_state * s, + struct rc_instruction * inst_if, + struct rc_instruction * inst_endif, + rc_register_file file, unsigned int index, + struct proxy_info ifproxy, + struct proxy_info elseproxy) +{ + struct rc_instruction * inst_cmp = rc_insert_new_instruction(s->C, inst_endif); + inst_cmp->U.I.Opcode = RC_OPCODE_CMP; + inst_cmp->U.I.DstReg.File = file; + inst_cmp->U.I.DstReg.Index = index; + inst_cmp->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst_cmp->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0]; + inst_cmp->U.I.SrcReg[0].Abs = 1; + inst_cmp->U.I.SrcReg[0].Negate = RC_MASK_XYZW; + inst_cmp->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[1].Index = ifproxy.Proxied ? ifproxy.Index : index; + inst_cmp->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[2].Index = elseproxy.Proxied ? elseproxy.Index : index; +} + +static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + struct branch_info * branch; + struct register_proxies IfProxies; + struct register_proxies ElseProxies; + + if (!s->BranchCount) { + rc_error(s->C, "Encountered ENDIF outside of branches"); + return; + } + + DBG("%s\n", __FUNCTION__); + + branch = &s->Branches[s->BranchCount - 1]; + + memset(&IfProxies, 0, sizeof(IfProxies)); + memset(&ElseProxies, 0, sizeof(ElseProxies)); + + allocate_and_insert_proxies(s, &IfProxies, branch->If->Next, branch->Else ? branch->Else : inst); + + if (branch->Else) + allocate_and_insert_proxies(s, &ElseProxies, branch->Else->Next, inst); + + /* Insert the CMP instructions at the end. */ + for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { + if (IfProxies.Temporary[index].Proxied || ElseProxies.Temporary[index].Proxied) { + inject_cmp(s, branch->If, inst, RC_FILE_TEMPORARY, index, + IfProxies.Temporary[index], ElseProxies.Temporary[index]); + } + } + + /* Remove all traces of the branch instructions */ + rc_remove_instruction(branch->If); + if (branch->Else) + rc_remove_instruction(branch->Else); + rc_remove_instruction(inst); + + s->BranchCount--; + + if (VERBOSE) { + DBG("Program after ENDIF handling:\n"); + rc_print_program(&s->C->Program); + } +} + + +struct remap_output_data { + unsigned int Output:RC_REGISTER_INDEX_BITS; + unsigned int Temporary:RC_REGISTER_INDEX_BITS; +}; + +static void remap_output_function(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex) +{ + struct remap_output_data * data = userdata; + + if (*pfile == RC_FILE_OUTPUT && *pindex == data->Output) { + *pfile = RC_FILE_TEMPORARY; + *pindex = data->Temporary; + } +} + + +/** + * Output registers cannot be read from and so cannot be dealt with like + * temporary registers. + * + * We do the simplest thing: If an output registers is written within + * a branch, then *all* writes to this register are proxied to a + * temporary register, and a final MOV is appended to the end of + * the program. + */ +static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode; + + if (!s->BranchCount) + return; + + opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (!opcode->HasDstReg) + return; + + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) { + struct remap_output_data remap; + struct rc_instruction * inst_mov; + + remap.Output = inst->U.I.DstReg.Index; + remap.Temporary = rc_find_free_temporary(s->C); + + for(struct rc_instruction * inst = s->C->Program.Instructions.Next; + inst != &s->C->Program.Instructions; + inst = inst->Next) { + rc_remap_registers(inst, &remap_output_function, &remap); + } + + inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT; + inst_mov->U.I.DstReg.Index = remap.Output; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = remap.Temporary; + } +} + +/** + * Remove branch instructions; instead, execute both branches + * on different register sets and choose between their results + * using CMP instructions in place of the original ENDIF. + */ +void rc_emulate_branches(struct radeon_compiler *c, void *user) +{ + struct emulate_branch_state s; + struct rc_instruction * ptr; + + memset(&s, 0, sizeof(s)); + s.C = c; + + /* Untypical loop because we may remove the current instruction */ + ptr = c->Program.Instructions.Next; + while(ptr != &c->Program.Instructions) { + struct rc_instruction * inst = ptr; + ptr = ptr->Next; + + if (inst->Type == RC_INSTRUCTION_NORMAL) { + switch(inst->U.I.Opcode) { + case RC_OPCODE_IF: + handle_if(&s, inst); + break; + case RC_OPCODE_ELSE: + handle_else(&s, inst); + break; + case RC_OPCODE_ENDIF: + handle_endif(&s, inst); + break; + default: + fix_output_writes(&s, inst); + break; + } + } else { + rc_error(c, "%s: unhandled instruction type\n", __FUNCTION__); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_branches.h b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.h new file mode 100644 index 00000000000..818ab84d0cd --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.h @@ -0,0 +1,30 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_EMULATE_BRANCHES_H +#define RADEON_EMULATE_BRANCHES_H + +struct radeon_compiler; + +void rc_emulate_branches(struct radeon_compiler *c, void *user); + +#endif /* RADEON_EMULATE_BRANCHES_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c new file mode 100644 index 00000000000..205eecd1129 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c @@ -0,0 +1,522 @@ +/* + * Copyright 2010 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + */ + +#include "radeon_emulate_loops.h" + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + +struct const_value { + struct radeon_compiler * C; + struct rc_src_register * Src; + float Value; + int HasValue; +}; + +struct count_inst { + struct radeon_compiler * C; + int Index; + rc_swizzle Swz; + float Amount; + int Unknown; +}; + +static float get_constant_value(struct radeon_compiler * c, + struct rc_src_register * src, + int chan) +{ + float base = 1.0f; + int swz = GET_SWZ(src->Swizzle, chan); + if(swz >= 4 || src->Index >= c->Program.Constants.Count ){ + rc_error(c, "get_constant_value: Can't find a value.\n"); + return 0.0f; + } + if(GET_BIT(src->Negate, chan)){ + base = -1.0f; + } + return base * + c->Program.Constants.Constants[src->Index].u.Immediate[swz]; +} + +static int src_reg_is_immediate(struct rc_src_register * src, + struct radeon_compiler * c) +{ + return src->File == RC_FILE_CONSTANT && + c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE; +} + +static unsigned int loop_max_possible_iterations(struct radeon_compiler *c, + struct loop_info * loop) +{ + unsigned int total_i = rc_recompute_ips(c); + unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1; + /* +1 because the program already has one iteration of the loop. */ + return 1 + ((c->max_alu_insts - total_i) / loop_i); +} + +static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop, + unsigned int iterations) +{ + unsigned int i; + struct rc_instruction * ptr; + struct rc_instruction * first = loop->BeginLoop->Next; + struct rc_instruction * last = loop->EndLoop->Prev; + struct rc_instruction * append_to = last; + rc_remove_instruction(loop->BeginLoop); + rc_remove_instruction(loop->EndLoop); + for( i = 1; i < iterations; i++){ + for(ptr = first; ptr != last->Next; ptr = ptr->Next){ + struct rc_instruction *new = rc_alloc_instruction(c); + memcpy(new, ptr, sizeof(struct rc_instruction)); + rc_insert_instruction(append_to, new); + append_to = new; + } + } +} + + +static void update_const_value(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct const_value * value = data; + if(value->Src->File != file || + value->Src->Index != index || + !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){ + return; + } + switch(inst->U.I.Opcode){ + case RC_OPCODE_MOV: + if(!src_reg_is_immediate(&inst->U.I.SrcReg[0], value->C)){ + return; + } + value->HasValue = 1; + value->Value = + get_constant_value(value->C, &inst->U.I.SrcReg[0], 0); + break; + } +} + +static void get_incr_amount(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct count_inst * count_inst = data; + int amnt_src_index; + const struct rc_opcode_info * opcode; + float amount; + + if(file != RC_FILE_TEMPORARY || + count_inst->Index != index || + (1 << GET_SWZ(count_inst->Swz,0) != mask)){ + return; + } + /* Find the index of the counter register. */ + opcode = rc_get_opcode_info(inst->U.I.Opcode); + if(opcode->NumSrcRegs != 2){ + count_inst->Unknown = 1; + return; + } + if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[0].Index == count_inst->Index && + inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){ + amnt_src_index = 1; + } else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[1].Index == count_inst->Index && + inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){ + amnt_src_index = 0; + } + else{ + count_inst->Unknown = 1; + return; + } + if(src_reg_is_immediate(&inst->U.I.SrcReg[amnt_src_index], + count_inst->C)){ + amount = get_constant_value(count_inst->C, + &inst->U.I.SrcReg[amnt_src_index], 0); + } + else{ + count_inst->Unknown = 1 ; + return; + } + switch(inst->U.I.Opcode){ + case RC_OPCODE_ADD: + count_inst->Amount += amount; + break; + case RC_OPCODE_SUB: + if(amnt_src_index == 0){ + count_inst->Unknown = 0; + return; + } + count_inst->Amount -= amount; + break; + default: + count_inst->Unknown = 1; + return; + } +} + +/** + * If c->max_alu_inst is -1, then all eligible loops will be unrolled regardless + * of how many iterations they have. + */ +static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop) +{ + int end_loops; + int iterations; + struct count_inst count_inst; + float limit_value; + struct rc_src_register * counter; + struct rc_src_register * limit; + struct const_value counter_value; + struct rc_instruction * inst; + + /* Find the counter and the upper limit */ + + if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], c)){ + limit = &loop->Cond->U.I.SrcReg[0]; + counter = &loop->Cond->U.I.SrcReg[1]; + } + else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], c)){ + limit = &loop->Cond->U.I.SrcReg[1]; + counter = &loop->Cond->U.I.SrcReg[0]; + } + else{ + DBG("No constant limit.\n"); + return 0; + } + + /* Find the initial value of the counter */ + counter_value.Src = counter; + counter_value.Value = 0.0f; + counter_value.HasValue = 0; + counter_value.C = c; + for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop; + inst = inst->Next){ + rc_for_all_writes_mask(inst, update_const_value, &counter_value); + } + if(!counter_value.HasValue){ + DBG("Initial counter value cannot be determined.\n"); + return 0; + } + DBG("Initial counter value is %f\n", counter_value.Value); + /* Determine how the counter is modified each loop */ + count_inst.C = c; + count_inst.Index = counter->Index; + count_inst.Swz = counter->Swizzle; + count_inst.Amount = 0.0f; + count_inst.Unknown = 0; + end_loops = 1; + for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){ + switch(inst->U.I.Opcode){ + /* XXX In the future we might want to try to unroll nested + * loops here.*/ + case RC_OPCODE_BGNLOOP: + end_loops++; + break; + case RC_OPCODE_ENDLOOP: + loop->EndLoop = inst; + end_loops--; + break; + case RC_OPCODE_BRK: + /* Don't unroll loops if it has a BRK instruction + * other one used when testing the main conditional + * of the loop. */ + + /* Make sure we haven't entered a nested loops. */ + if(inst != loop->Brk && end_loops == 1) { + return 0; + } + break; + /* XXX Check if the counter is modified within an if statement. + */ + case RC_OPCODE_IF: + break; + default: + rc_for_all_writes_mask(inst, get_incr_amount, &count_inst); + if(count_inst.Unknown){ + return 0; + } + break; + } + } + /* Infinite loop */ + if(count_inst.Amount == 0.0f){ + return 0; + } + DBG("Counter is increased by %f each iteration.\n", count_inst.Amount); + /* Calculate the number of iterations of this loop. Keeping this + * simple, since we only support increment and decrement loops. + */ + limit_value = get_constant_value(c, limit, 0); + DBG("Limit is %f.\n", limit_value); + /* The iteration calculations are opposite of what you would expect. + * In a normal loop, if the condition is met, then loop continues, but + * with our loops, if the condition is met, the is exited. */ + switch(loop->Cond->U.I.Opcode){ + case RC_OPCODE_SGE: + case RC_OPCODE_SLE: + iterations = (int) ceilf((limit_value - counter_value.Value) / + count_inst.Amount); + break; + + case RC_OPCODE_SGT: + case RC_OPCODE_SLT: + iterations = (int) floorf((limit_value - counter_value.Value) / + count_inst.Amount) + 1; + break; + default: + return 0; + } + + if (c->max_alu_insts > 0 + && iterations > loop_max_possible_iterations(c, loop)) { + return 0; + } + + DBG("Loop will have %d iterations.\n", iterations); + + /* Prepare loop for unrolling */ + rc_remove_instruction(loop->Cond); + rc_remove_instruction(loop->If); + rc_remove_instruction(loop->Brk); + rc_remove_instruction(loop->EndIf); + + unroll_loop(c, loop, iterations); + loop->EndLoop = NULL; + return 1; +} + +/** + * @param c + * @param loop + * @param inst A pointer to a BGNLOOP instruction. + * @return 1 if all of the members of loop where set. + * @return 0 if there was an error and some members of loop are still NULL. + */ +static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop, + struct rc_instruction * inst) +{ + struct rc_instruction * ptr; + + if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){ + rc_error(c, "%s: expected BGNLOOP", __FUNCTION__); + return 0; + } + + memset(loop, 0, sizeof(struct loop_info)); + + loop->BeginLoop = inst; + + for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) { + + if (ptr == &c->Program.Instructions) { + rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n", + __FUNCTION__); + return 0; + } + + switch(ptr->U.I.Opcode){ + case RC_OPCODE_BGNLOOP: + { + /* Nested loop, skip ahead to the end. */ + unsigned int loop_depth = 1; + for(ptr = ptr->Next; ptr != &c->Program.Instructions; + ptr = ptr->Next){ + if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { + loop_depth++; + } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { + if (!--loop_depth) { + break; + } + } + } + if (ptr == &c->Program.Instructions) { + rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n", + __FUNCTION__); + return 0; + } + break; + } + case RC_OPCODE_BRK: + if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF + || ptr->Prev->U.I.Opcode != RC_OPCODE_IF + || loop->Brk){ + continue; + } + loop->Brk = ptr; + loop->If = ptr->Prev; + loop->EndIf = ptr->Next; + switch(loop->If->Prev->U.I.Opcode){ + case RC_OPCODE_SLT: + case RC_OPCODE_SGE: + case RC_OPCODE_SGT: + case RC_OPCODE_SLE: + case RC_OPCODE_SEQ: + case RC_OPCODE_SNE: + break; + default: + return 0; + } + loop->Cond = loop->If->Prev; + break; + + case RC_OPCODE_ENDLOOP: + loop->EndLoop = ptr; + break; + } + } + + if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf + && loop->Cond && loop->EndLoop) { + return 1; + } + return 0; +} + +/** + * This function prepares a loop to be unrolled by converting it into an if + * statement. Here is an outline of the conversion process: + * BGNLOOP; -> BGNLOOP; + * <Additional conditional code> -> <Additional conditional code> + * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2]; + * IF temp[0]; -> IF temp[0]; + * BRK; -> + * ENDIF; -> <Loop Body> + * <Loop Body> -> ENDIF; + * ENDLOOP; -> ENDLOOP + * + * @param inst A pointer to a BGNLOOP instruction. + * @return 1 for success, 0 for failure + */ +static int transform_loop(struct emulate_loop_state * s, + struct rc_instruction * inst) +{ + struct loop_info * loop; + + memory_pool_array_reserve(&s->C->Pool, struct loop_info, + s->Loops, s->LoopCount, s->LoopReserved, 1); + + loop = &s->Loops[s->LoopCount++]; + + if (!build_loop_info(s->C, loop, inst)) { + rc_error(s->C, "Failed to build loop info\n"); + return 0; + } + + if(try_unroll_loop(s->C, loop)){ + return 1; + } + + /* Reverse the conditional instruction */ + switch(loop->Cond->U.I.Opcode){ + case RC_OPCODE_SGE: + loop->Cond->U.I.Opcode = RC_OPCODE_SLT; + break; + case RC_OPCODE_SLT: + loop->Cond->U.I.Opcode = RC_OPCODE_SGE; + break; + case RC_OPCODE_SLE: + loop->Cond->U.I.Opcode = RC_OPCODE_SGT; + break; + case RC_OPCODE_SGT: + loop->Cond->U.I.Opcode = RC_OPCODE_SLE; + break; + case RC_OPCODE_SEQ: + loop->Cond->U.I.Opcode = RC_OPCODE_SNE; + break; + case RC_OPCODE_SNE: + loop->Cond->U.I.Opcode = RC_OPCODE_SEQ; + break; + default: + rc_error(s->C, "loop->Cond is not a conditional.\n"); + return 0; + } + + /* Prepare the loop to be emulated */ + rc_remove_instruction(loop->Brk); + rc_remove_instruction(loop->EndIf); + rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf); + return 1; +} + +void rc_transform_loops(struct radeon_compiler *c, void *user) +{ + struct emulate_loop_state * s = &c->loop_state; + struct rc_instruction * ptr; + + memset(s, 0, sizeof(struct emulate_loop_state)); + s->C = c; + for(ptr = s->C->Program.Instructions.Next; + ptr != &s->C->Program.Instructions; ptr = ptr->Next) { + if(ptr->Type == RC_INSTRUCTION_NORMAL && + ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ + if (!transform_loop(s, ptr)) + return; + } + } +} + +void rc_unroll_loops(struct radeon_compiler *c, void *user) +{ + struct rc_instruction * inst; + struct loop_info loop; + + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { + if (build_loop_info(c, &loop, inst)) { + try_unroll_loop(c, &loop); + } + } + } +} + +void rc_emulate_loops(struct radeon_compiler *c, void *user) +{ + struct emulate_loop_state * s = &c->loop_state; + int i; + /* Iterate backwards of the list of loops so that loops that nested + * loops are unrolled first. + */ + for( i = s->LoopCount - 1; i >= 0; i-- ){ + unsigned int iterations; + + if(!s->Loops[i].EndLoop){ + continue; + } + iterations = loop_max_possible_iterations(s->C, &s->Loops[i]); + unroll_loop(s->C, &s->Loops[i], iterations); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h new file mode 100644 index 00000000000..cd800c059d9 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h @@ -0,0 +1,32 @@ + + +#ifndef RADEON_EMULATE_LOOPS_H +#define RADEON_EMULATE_LOOPS_H + +#define MAX_ITERATIONS 8 + +struct radeon_compiler; + +struct loop_info { + struct rc_instruction * BeginLoop; + struct rc_instruction * Cond; + struct rc_instruction * If; + struct rc_instruction * Brk; + struct rc_instruction * EndIf; + struct rc_instruction * EndLoop; +}; + +struct emulate_loop_state { + struct radeon_compiler * C; + struct loop_info * Loops; + unsigned int LoopCount; + unsigned int LoopReserved; +}; + +void rc_transform_loops(struct radeon_compiler *c, void *user); + +void rc_unroll_loops(struct radeon_compiler * c, void *user); + +void rc_emulate_loops(struct radeon_compiler * c, void *user); + +#endif /* RADEON_EMULATE_LOOPS_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_list.c b/src/gallium/drivers/r300/compiler/radeon_list.c new file mode 100644 index 00000000000..811c908a81a --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_list.c @@ -0,0 +1,90 @@ +/* + * Copyright 2011 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_list.h" + +#include <stdlib.h> +#include <stdio.h> + +#include "memory_pool.h" + +struct rc_list * rc_list(struct memory_pool * pool, void * item) +{ + struct rc_list * new = memory_pool_malloc(pool, sizeof(struct rc_list)); + new->Item = item; + new->Next = NULL; + new->Prev = NULL; + + return new; +} + +void rc_list_add(struct rc_list ** list, struct rc_list * new_value) +{ + struct rc_list * temp; + + if (*list == NULL) { + *list = new_value; + return; + } + + for (temp = *list; temp->Next; temp = temp->Next); + + temp->Next = new_value; + new_value->Prev = temp; +} + +void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value) +{ + if (*list == rm_value) { + *list = rm_value->Next; + return; + } + + rm_value->Prev->Next = rm_value->Next; + if (rm_value->Next) { + rm_value->Next->Prev = rm_value->Prev; + } +} + +unsigned int rc_list_count(struct rc_list * list) +{ + unsigned int count = 0; + while (list) { + count++; + list = list->Next; + } + return count; +} + +void rc_list_print(struct rc_list * list) +{ + while(list) { + fprintf(stderr, "%p->", list->Item); + list = list->Next; + } + fprintf(stderr, "\n"); +} diff --git a/src/gallium/drivers/r300/compiler/radeon_list.h b/src/gallium/drivers/r300/compiler/radeon_list.h new file mode 100644 index 00000000000..b3c8f89cc68 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_list.h @@ -0,0 +1,46 @@ +/* + * Copyright 2011 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_LIST_H +#define RADEON_LIST_H + +struct memory_pool; + +struct rc_list { + void * Item; + struct rc_list * Prev; + struct rc_list * Next; +}; + +struct rc_list * rc_list(struct memory_pool * pool, void * item); +void rc_list_add(struct rc_list ** list, struct rc_list * new_value); +void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value); +unsigned int rc_list_count(struct rc_list * list); +void rc_list_print(struct rc_list * list); + +#endif /* RADEON_LIST_H */ + diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.c b/src/gallium/drivers/r300/compiler/radeon_opcodes.c new file mode 100644 index 00000000000..afd78ad79dd --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.c @@ -0,0 +1,546 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_opcodes.h" +#include "radeon_program.h" + +#include "radeon_program_constants.h" + +struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { + { + .Opcode = RC_OPCODE_NOP, + .Name = "NOP" + }, + { + .Opcode = RC_OPCODE_ILLEGAL_OPCODE, + .Name = "ILLEGAL OPCODE" + }, + { + .Opcode = RC_OPCODE_ABS, + .Name = "ABS", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_ADD, + .Name = "ADD", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_ARL, + .Name = "ARL", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_CEIL, + .Name = "CEIL", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_CLAMP, + .Name = "CLAMP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_CMP, + .Name = "CMP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_CND, + .Name = "CND", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_COS, + .Name = "COS", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_DDX, + .Name = "DDX", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_DDY, + .Name = "DDY", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_DP2, + .Name = "DP2", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DP3, + .Name = "DP3", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DP4, + .Name = "DP4", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DPH, + .Name = "DPH", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DST, + .Name = "DST", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_EX2, + .Name = "EX2", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_EXP, + .Name = "EXP", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_FLR, + .Name = "FLR", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_FRC, + .Name = "FRC", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_KIL, + .Name = "KIL", + .NumSrcRegs = 1 + }, + { + .Opcode = RC_OPCODE_LG2, + .Name = "LG2", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_LIT, + .Name = "LIT", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_LOG, + .Name = "LOG", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_LRP, + .Name = "LRP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MAD, + .Name = "MAD", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MAX, + .Name = "MAX", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MIN, + .Name = "MIN", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MOV, + .Name = "MOV", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MUL, + .Name = "MUL", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_POW, + .Name = "POW", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_RCP, + .Name = "RCP", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_RSQ, + .Name = "RSQ", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_SCS, + .Name = "SCS", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_SEQ, + .Name = "SEQ", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SFL, + .Name = "SFL", + .NumSrcRegs = 0, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SGE, + .Name = "SGE", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SGT, + .Name = "SGT", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SIN, + .Name = "SIN", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_SLE, + .Name = "SLE", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SLT, + .Name = "SLT", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SNE, + .Name = "SNE", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SSG, + .Name = "SSG", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SUB, + .Name = "SUB", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SWZ, + .Name = "SWZ", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_XPD, + .Name = "XPD", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TEX, + .Name = "TEX", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXB, + .Name = "TXB", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXD, + .Name = "TXD", + .HasTexture = 1, + .NumSrcRegs = 3, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXL, + .Name = "TXL", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXP, + .Name = "TXP", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_IF, + .Name = "IF", + .IsFlowControl = 1, + .NumSrcRegs = 1 + }, + { + .Opcode = RC_OPCODE_ELSE, + .Name = "ELSE", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_ENDIF, + .Name = "ENDIF", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_BGNLOOP, + .Name = "BGNLOOP", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_BRK, + .Name = "BRK", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_ENDLOOP, + .Name = "ENDLOOP", + .IsFlowControl = 1, + .NumSrcRegs = 0, + }, + { + .Opcode = RC_OPCODE_CONT, + .Name = "CONT", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_REPL_ALPHA, + .Name = "REPL_ALPHA", + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_BEGIN_TEX, + .Name = "BEGIN_TEX" + }, + { + .Opcode = RC_OPCODE_KILP, + .Name = "KILP", + } +}; + +void rc_compute_sources_for_writemask( + const struct rc_instruction *inst, + unsigned int writemask, + unsigned int *srcmasks) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + srcmasks[0] = 0; + srcmasks[1] = 0; + srcmasks[2] = 0; + + if (opcode->Opcode == RC_OPCODE_KIL) + srcmasks[0] |= RC_MASK_XYZW; + else if (opcode->Opcode == RC_OPCODE_IF) + srcmasks[0] |= RC_MASK_X; + + if (!writemask) + return; + + if (opcode->IsComponentwise) { + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) + srcmasks[src] |= writemask; + } else if (opcode->IsStandardScalar) { + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) + srcmasks[src] |= RC_MASK_X; + } else { + switch(opcode->Opcode) { + case RC_OPCODE_ARL: + srcmasks[0] |= RC_MASK_X; + break; + case RC_OPCODE_DP2: + srcmasks[0] |= RC_MASK_XY; + srcmasks[1] |= RC_MASK_XY; + break; + case RC_OPCODE_DP3: + case RC_OPCODE_XPD: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZ; + break; + case RC_OPCODE_DP4: + srcmasks[0] |= RC_MASK_XYZW; + srcmasks[1] |= RC_MASK_XYZW; + break; + case RC_OPCODE_DPH: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZW; + break; + case RC_OPCODE_TXB: + case RC_OPCODE_TXP: + case RC_OPCODE_TXL: + srcmasks[0] |= RC_MASK_W; + /* Fall through */ + case RC_OPCODE_TEX: + switch (inst->U.I.TexSrcTarget) { + case RC_TEXTURE_1D: + srcmasks[0] |= RC_MASK_X; + break; + case RC_TEXTURE_2D: + case RC_TEXTURE_RECT: + case RC_TEXTURE_1D_ARRAY: + srcmasks[0] |= RC_MASK_XY; + break; + case RC_TEXTURE_3D: + case RC_TEXTURE_CUBE: + case RC_TEXTURE_2D_ARRAY: + srcmasks[0] |= RC_MASK_XYZ; + break; + } + break; + case RC_OPCODE_TXD: + switch (inst->U.I.TexSrcTarget) { + case RC_TEXTURE_1D_ARRAY: + srcmasks[0] |= RC_MASK_Y; + /* Fall through. */ + case RC_TEXTURE_1D: + srcmasks[0] |= RC_MASK_X; + srcmasks[1] |= RC_MASK_X; + srcmasks[2] |= RC_MASK_X; + break; + case RC_TEXTURE_2D_ARRAY: + srcmasks[0] |= RC_MASK_Z; + /* Fall through. */ + case RC_TEXTURE_2D: + case RC_TEXTURE_RECT: + srcmasks[0] |= RC_MASK_XY; + srcmasks[1] |= RC_MASK_XY; + srcmasks[2] |= RC_MASK_XY; + break; + case RC_TEXTURE_3D: + case RC_TEXTURE_CUBE: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZ; + srcmasks[2] |= RC_MASK_XYZ; + break; + } + break; + case RC_OPCODE_DST: + srcmasks[0] |= RC_MASK_Y | RC_MASK_Z; + srcmasks[1] |= RC_MASK_Y | RC_MASK_W; + break; + case RC_OPCODE_EXP: + case RC_OPCODE_LOG: + srcmasks[0] |= RC_MASK_XY; + break; + case RC_OPCODE_LIT: + srcmasks[0] |= RC_MASK_X | RC_MASK_Y | RC_MASK_W; + break; + default: + break; + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.h b/src/gallium/drivers/r300/compiler/radeon_opcodes.h new file mode 100644 index 00000000000..b5868820611 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.h @@ -0,0 +1,263 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_OPCODES_H +#define RADEON_OPCODES_H + +#include <assert.h> + +/** + * Opcodes understood by the Radeon compiler. + */ +typedef enum { + RC_OPCODE_NOP = 0, + RC_OPCODE_ILLEGAL_OPCODE, + + /** vec4 instruction: dst.c = abs(src0.c); */ + RC_OPCODE_ABS, + + /** vec4 instruction: dst.c = src0.c + src1.c; */ + RC_OPCODE_ADD, + + /** special instruction: load address register + * dst.x = floor(src.x), where dst must be an address register */ + RC_OPCODE_ARL, + + /** vec4 instruction: dst.c = ceil(src0.c) */ + RC_OPCODE_CEIL, + + /** vec4 instruction: dst.c = clamp(src0.c, src1.c, src2.c) */ + RC_OPCODE_CLAMP, + + /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */ + RC_OPCODE_CMP, + + /** vec4 instruction: dst.c = src2.c > 0.5 ? src0.c : src1.c */ + RC_OPCODE_CND, + + /** scalar instruction: dst = cos(src0.x) */ + RC_OPCODE_COS, + + /** special instruction: take vec4 partial derivative in X direction + * dst.c = d src0.c / dx */ + RC_OPCODE_DDX, + + /** special instruction: take vec4 partial derivative in Y direction + * dst.c = d src0.c / dy */ + RC_OPCODE_DDY, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y */ + RC_OPCODE_DP2, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */ + RC_OPCODE_DP3, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */ + RC_OPCODE_DP4, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src1.w */ + RC_OPCODE_DPH, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_DST, + + /** scalar instruction: dst = 2**src0.x */ + RC_OPCODE_EX2, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_EXP, + + /** vec4 instruction: dst.c = floor(src0.c) */ + RC_OPCODE_FLR, + + /** vec4 instruction: dst.c = src0.c - floor(src0.c) */ + RC_OPCODE_FRC, + + /** special instruction: stop execution if any component of src0 is negative */ + RC_OPCODE_KIL, + + /** scalar instruction: dst = log_2(src0.x) */ + RC_OPCODE_LG2, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_LIT, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_LOG, + + /** vec4 instruction: dst.c = src0.c*src1.c + (1 - src0.c)*src2.c */ + RC_OPCODE_LRP, + + /** vec4 instruction: dst.c = src0.c*src1.c + src2.c */ + RC_OPCODE_MAD, + + /** vec4 instruction: dst.c = max(src0.c, src1.c) */ + RC_OPCODE_MAX, + + /** vec4 instruction: dst.c = min(src0.c, src1.c) */ + RC_OPCODE_MIN, + + /** vec4 instruction: dst.c = src0.c */ + RC_OPCODE_MOV, + + /** vec4 instruction: dst.c = src0.c*src1.c */ + RC_OPCODE_MUL, + + /** scalar instruction: dst = src0.x ** src1.x */ + RC_OPCODE_POW, + + /** scalar instruction: dst = 1 / src0.x */ + RC_OPCODE_RCP, + + /** scalar instruction: dst = 1 / sqrt(src0.x) */ + RC_OPCODE_RSQ, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_SCS, + + /** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SEQ, + + /** vec4 instruction: dst.c = 0.0 */ + RC_OPCODE_SFL, + + /** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SGE, + + /** vec4 instruction: dst.c = (src0.c > src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SGT, + + /** scalar instruction: dst = sin(src0.x) */ + RC_OPCODE_SIN, + + /** vec4 instruction: dst.c = (src0.c <= src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SLE, + + /** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SLT, + + /** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SNE, + + /** vec4 instruction: dst.c = (src0.c < 0 ?) -1 : ((src0.c > 0) : 1 : 0) */ + RC_OPCODE_SSG, + + /** vec4 instruction: dst.c = src0.c - src1.c */ + RC_OPCODE_SUB, + + /** vec4 instruction: dst.c = src0.c */ + RC_OPCODE_SWZ, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_XPD, + + RC_OPCODE_TEX, + RC_OPCODE_TXB, + RC_OPCODE_TXD, + RC_OPCODE_TXL, + RC_OPCODE_TXP, + + /** branch instruction: + * If src0.x != 0.0, continue with the next instruction; + * otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF. + */ + RC_OPCODE_IF, + + /** branch instruction: jump to matching RC_OPCODE_ENDIF */ + RC_OPCODE_ELSE, + + /** branch instruction: has no effect */ + RC_OPCODE_ENDIF, + + RC_OPCODE_BGNLOOP, + + RC_OPCODE_BRK, + + RC_OPCODE_ENDLOOP, + + RC_OPCODE_CONT, + + /** special instruction, used in R300-R500 fragment program pair instructions + * indicates that the result of the alpha operation shall be replicated + * across all other channels */ + RC_OPCODE_REPL_ALPHA, + + /** special instruction, used in R300-R500 fragment programs + * to indicate the start of a block of texture instructions that + * can run simultaneously. */ + RC_OPCODE_BEGIN_TEX, + + /** Stop execution of the shader (GLSL discard) */ + RC_OPCODE_KILP, + + MAX_RC_OPCODE +} rc_opcode; + + +struct rc_opcode_info { + rc_opcode Opcode; + const char * Name; + + /** true if the instruction reads from a texture. + * + * \note This is false for the KIL instruction, even though KIL is + * a texture instruction from a hardware point of view. */ + unsigned int HasTexture:1; + + unsigned int NumSrcRegs:2; + unsigned int HasDstReg:1; + + /** true if this instruction affects control flow */ + unsigned int IsFlowControl:1; + + /** true if this is a vector instruction that operates on components in parallel + * without any cross-component interaction */ + unsigned int IsComponentwise:1; + + /** true if this instruction sources only its operands X components + * to compute one result which is smeared across all output channels */ + unsigned int IsStandardScalar:1; +}; + +extern struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE]; + +static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode) +{ + assert((unsigned int)opcode < MAX_RC_OPCODE); + assert(rc_opcodes[opcode].Opcode == opcode); + + return &rc_opcodes[opcode]; +} + +struct rc_instruction; + +void rc_compute_sources_for_writemask( + const struct rc_instruction *inst, + unsigned int writemask, + unsigned int *srcmasks); + +#endif /* RADEON_OPCODES_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c new file mode 100644 index 00000000000..39dcb21d4f4 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c @@ -0,0 +1,700 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2010 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" +#include "radeon_swizzle.h" + +struct src_clobbered_reads_cb_data { + rc_register_file File; + unsigned int Index; + unsigned int Mask; + struct rc_reader_data * ReaderData; +}; + +typedef void (*rc_presub_replace_fn)(struct rc_instruction *, + struct rc_instruction *, + unsigned int); + +static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) +{ + struct rc_src_register combine; + combine.File = inner.File; + combine.Index = inner.Index; + combine.RelAddr = inner.RelAddr; + if (outer.Abs) { + combine.Abs = 1; + combine.Negate = outer.Negate; + } else { + combine.Abs = inner.Abs; + combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate); + combine.Negate ^= outer.Negate; + } + combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); + return combine; +} + +static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, + struct rc_src_register * src) +{ + rc_register_file file = src->File; + struct rc_reader_data * reader_data = data; + + if(!rc_inst_can_use_presub(inst, + reader_data->Writer->U.I.PreSub.Opcode, + rc_swizzle_to_writemask(src->Swizzle), + src, + &reader_data->Writer->U.I.PreSub.SrcReg[0], + &reader_data->Writer->U.I.PreSub.SrcReg[1])) { + reader_data->Abort = 1; + return; + } + + /* XXX This could probably be handled better. */ + if (file == RC_FILE_ADDRESS) { + reader_data->Abort = 1; + return; + } + + /* These instructions cannot read from the constants file. + * see radeonTransformTEX() + */ + if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && + reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT && + (inst->U.I.Opcode == RC_OPCODE_TEX || + inst->U.I.Opcode == RC_OPCODE_TXB || + inst->U.I.Opcode == RC_OPCODE_TXP || + inst->U.I.Opcode == RC_OPCODE_TXD || + inst->U.I.Opcode == RC_OPCODE_TXL || + inst->U.I.Opcode == RC_OPCODE_KIL)){ + reader_data->Abort = 1; + return; + } +} + +static void src_clobbered_reads_cb( + void * data, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct src_clobbered_reads_cb_data * sc_data = data; + + if (src->File == sc_data->File + && src->Index == sc_data->Index + && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) { + + sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; + } + + if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) { + sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; + } +} + +static void is_src_clobbered_scan_write( + void * data, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct src_clobbered_reads_cb_data sc_data; + struct rc_reader_data * reader_data = data; + sc_data.File = file; + sc_data.Index = index; + sc_data.Mask = mask; + sc_data.ReaderData = reader_data; + rc_for_all_reads_src(reader_data->Writer, + src_clobbered_reads_cb, &sc_data); +} + +static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) +{ + struct rc_reader_data reader_data; + unsigned int i; + + if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || + inst_mov->U.I.WriteALUResult || + inst_mov->U.I.SaturateMode) + return; + + /* Get a list of all the readers of this MOV instruction. */ + reader_data.ExitOnAbort = 1; + rc_get_readers(c, inst_mov, &reader_data, + copy_propagate_scan_read, NULL, + is_src_clobbered_scan_write); + + if (reader_data.Abort || reader_data.ReaderCount == 0) + return; + + /* Propagate the MOV instruction. */ + for (i = 0; i < reader_data.ReaderCount; i++) { + struct rc_instruction * inst = reader_data.Readers[i].Inst; + *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]); + + if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) + inst->U.I.PreSub = inst_mov->U.I.PreSub; + } + + /* Finally, remove the original MOV instruction */ + rc_remove_instruction(inst_mov); +} + +/** + * Check if a source register is actually always the same + * swizzle constant. + */ +static int is_src_uniform_constant(struct rc_src_register src, + rc_swizzle * pswz, unsigned int * pnegate) +{ + int have_used = 0; + + if (src.File != RC_FILE_NONE) { + *pswz = 0; + return 0; + } + + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(src.Swizzle, chan); + if (swz < 4) { + *pswz = 0; + return 0; + } + if (swz == RC_SWIZZLE_UNUSED) + continue; + + if (!have_used) { + *pswz = swz; + *pnegate = GET_BIT(src.Negate, chan); + have_used = 1; + } else { + if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) { + *pswz = 0; + return 0; + } + } + } + + return 1; +} + +static void constant_folding_mad(struct rc_instruction * inst) +{ + rc_swizzle swz = 0; + unsigned int negate= 0; + + if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MUL; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_ADD; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2]; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_ADD; + if (negate) + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } + } +} + +static void constant_folding_mul(struct rc_instruction * inst) +{ + rc_swizzle swz = 0; + unsigned int negate = 0; + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_MOV; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + return; + } + } +} + +static void constant_folding_add(struct rc_instruction * inst) +{ + rc_swizzle swz = 0; + unsigned int negate = 0; + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + return; + } + } +} + +/** + * Replace 0.0, 1.0 and 0.5 immediate constants by their + * respective swizzles. Simplify instructions like ADD dst, src, 0; + */ +static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int i; + + /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + struct rc_constant * constant; + struct rc_src_register newsrc; + int have_real_reference; + unsigned int chan; + + /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */ + for (chan = 0; chan < 4; ++chan) + if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3) + break; + if (chan == 4) { + inst->U.I.SrcReg[src].File = RC_FILE_NONE; + continue; + } + + /* Convert immediates to swizzles. */ + if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || + inst->U.I.SrcReg[src].RelAddr || + inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) + continue; + + constant = + &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; + + if (constant->Type != RC_CONSTANT_IMMEDIATE) + continue; + + newsrc = inst->U.I.SrcReg[src]; + have_real_reference = 0; + for (chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); + unsigned int newswz; + float imm; + float baseimm; + + if (swz >= 4) + continue; + + imm = constant->u.Immediate[swz]; + baseimm = imm; + if (imm < 0.0) + baseimm = -baseimm; + + if (baseimm == 0.0) { + newswz = RC_SWIZZLE_ZERO; + } else if (baseimm == 1.0) { + newswz = RC_SWIZZLE_ONE; + } else if (baseimm == 0.5 && c->has_half_swizzles) { + newswz = RC_SWIZZLE_HALF; + } else { + have_real_reference = 1; + continue; + } + + SET_SWZ(newsrc.Swizzle, chan, newswz); + if (imm < 0.0 && !newsrc.Abs) + newsrc.Negate ^= 1 << chan; + } + + if (!have_real_reference) { + newsrc.File = RC_FILE_NONE; + newsrc.Index = 0; + } + + /* don't make the swizzle worse */ + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) && + c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) + continue; + + inst->U.I.SrcReg[src] = newsrc; + } + + /* Simplify instructions based on constants */ + if (inst->U.I.Opcode == RC_OPCODE_MAD) + constant_folding_mad(inst); + + /* note: MAD can simplify to MUL or ADD */ + if (inst->U.I.Opcode == RC_OPCODE_MUL) + constant_folding_mul(inst); + else if (inst->U.I.Opcode == RC_OPCODE_ADD) + constant_folding_add(inst); + + /* In case this instruction has been converted, make sure all of the + * registers that are no longer used are empty. */ + opcode = rc_get_opcode_info(inst->U.I.Opcode); + for(i = opcode->NumSrcRegs; i < 3; i++) { + memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register)); + } +} + +/** + * If src and dst use the same register, this function returns a writemask that + * indicates wich components are read by src. Otherwise zero is returned. + */ +static unsigned int src_reads_dst_mask(struct rc_src_register src, + struct rc_dst_register dst) +{ + if (dst.File != src.File || dst.Index != src.Index) { + return 0; + } + return rc_swizzle_to_writemask(src.Swizzle); +} + +/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0) + * in any of its channels. Return 0 otherwise. */ +static int src_has_const_swz(struct rc_src_register src) { + int chan; + for(chan = 0; chan < 4; chan++) { + unsigned int swz = GET_SWZ(src.Swizzle, chan); + if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF + || swz == RC_SWIZZLE_ONE) { + return 1; + } + } + return 0; +} + +static void presub_scan_read( + void * data, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct rc_reader_data * reader_data = data; + rc_presubtract_op * presub_opcode = reader_data->CbData; + + if (!rc_inst_can_use_presub(inst, *presub_opcode, + reader_data->Writer->U.I.DstReg.WriteMask, + src, + &reader_data->Writer->U.I.SrcReg[0], + &reader_data->Writer->U.I.SrcReg[1])) { + reader_data->Abort = 1; + return; + } +} + +static int presub_helper( + struct radeon_compiler * c, + struct rc_instruction * inst_add, + rc_presubtract_op presub_opcode, + rc_presub_replace_fn presub_replace) +{ + struct rc_reader_data reader_data; + unsigned int i; + rc_presubtract_op cb_op = presub_opcode; + + reader_data.CbData = &cb_op; + reader_data.ExitOnAbort = 1; + rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, + is_src_clobbered_scan_write); + + if (reader_data.Abort || reader_data.ReaderCount == 0) + return 0; + + for(i = 0; i < reader_data.ReaderCount; i++) { + unsigned int src_index; + struct rc_reader reader = reader_data.Readers[i]; + const struct rc_opcode_info * info = + rc_get_opcode_info(reader.Inst->U.I.Opcode); + + for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { + if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src) + presub_replace(inst_add, reader.Inst, src_index); + } + } + return 1; +} + +/* This function assumes that inst_add->U.I.SrcReg[0] and + * inst_add->U.I.SrcReg[1] aren't both negative. */ +static void presub_replace_add( + struct rc_instruction * inst_add, + struct rc_instruction * inst_reader, + unsigned int src_index) +{ + rc_presubtract_op presub_opcode; + if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate) + presub_opcode = RC_PRESUB_SUB; + else + presub_opcode = RC_PRESUB_ADD; + + if (inst_add->U.I.SrcReg[1].Negate) { + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; + inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0]; + } else { + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0]; + inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1]; + } + inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; + inst_reader->U.I.PreSub.SrcReg[1].Negate = 0; + inst_reader->U.I.PreSub.Opcode = presub_opcode; + inst_reader->U.I.SrcReg[src_index] = + chain_srcregs(inst_reader->U.I.SrcReg[src_index], + inst_reader->U.I.PreSub.SrcReg[0]); + inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; + inst_reader->U.I.SrcReg[src_index].Index = presub_opcode; +} + +static int is_presub_candidate( + struct radeon_compiler * c, + struct rc_instruction * inst) +{ + const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int i; + unsigned int is_constant[2] = {0, 0}; + + assert(inst->U.I.Opcode == RC_OPCODE_ADD); + + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE + || inst->U.I.SaturateMode + || inst->U.I.WriteALUResult) { + return 0; + } + + /* If both sources use a constant swizzle, then we can't convert it to + * a presubtract operation. In fact for the ADD and SUB presubtract + * operations neither source can contain a constant swizzle. This + * specific case is checked in peephole_add_presub_add() when + * we make sure the swizzles for both sources are equal, so we + * don't need to worry about it here. */ + for (i = 0; i < 2; i++) { + int chan; + for (chan = 0; chan < 4; chan++) { + rc_swizzle swz = + get_swz(inst->U.I.SrcReg[i].Swizzle, chan); + if (swz == RC_SWIZZLE_ONE + || swz == RC_SWIZZLE_ZERO + || swz == RC_SWIZZLE_HALF) { + is_constant[i] = 1; + } + } + } + if (is_constant[0] && is_constant[1]) + return 0; + + for(i = 0; i < info->NumSrcRegs; i++) { + struct rc_src_register src = inst->U.I.SrcReg[i]; + if (src_reads_dst_mask(src, inst->U.I.DstReg)) + return 0; + + src.File = RC_FILE_PRESUB; + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src)) + return 0; + } + return 1; +} + +static int peephole_add_presub_add( + struct radeon_compiler * c, + struct rc_instruction * inst_add) +{ + unsigned dstmask = inst_add->U.I.DstReg.WriteMask; + unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; + unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; + + if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) + return 0; + + /* src0 and src1 can't have absolute values */ + if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) + return 0; + + /* presub_replace_add() assumes only one is negative */ + if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) + return 0; + + /* if src0 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) + return 0; + + /* if src1 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) + return 0; + + if (!is_presub_candidate(c, inst_add)) + return 0; + + if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { + rc_remove_instruction(inst_add); + return 1; + } + return 0; +} + +static void presub_replace_inv( + struct rc_instruction * inst_add, + struct rc_instruction * inst_reader, + unsigned int src_index) +{ + /* We must be careful not to modify inst_add, since it + * is possible it will remain part of the program.*/ + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; + inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; + inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV; + inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], + inst_reader->U.I.PreSub.SrcReg[0]); + + inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; + inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; +} + +/** + * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] + * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source + * of the add instruction must have the constatnt 1 swizzle. This function + * does not check const registers to see if their value is 1.0, so it should + * be called after the constant_folding optimization. + * @return + * 0 if the ADD instruction is still part of the program. + * 1 if the ADD instruction is no longer part of the program. + */ +static int peephole_add_presub_inv( + struct radeon_compiler * c, + struct rc_instruction * inst_add) +{ + unsigned int i, swz; + + if (!is_presub_candidate(c, inst_add)) + return 0; + + /* Check if src0 is 1. */ + /* XXX It would be nice to use is_src_uniform_constant here, but that + * function only works if the register's file is RC_FILE_NONE */ + for(i = 0; i < 4; i++ ) { + swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); + if(((1 << i) & inst_add->U.I.DstReg.WriteMask) + && swz != RC_SWIZZLE_ONE) { + return 0; + } + } + + /* Check src1. */ + if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != + inst_add->U.I.DstReg.WriteMask + || inst_add->U.I.SrcReg[1].Abs + || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY + && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT) + || src_has_const_swz(inst_add->U.I.SrcReg[1])) { + + return 0; + } + + if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) { + rc_remove_instruction(inst_add); + return 1; + } + return 0; +} + +/** + * @return + * 0 if inst is still part of the program. + * 1 if inst is no longer part of the program. + */ +static int peephole(struct radeon_compiler * c, struct rc_instruction * inst) +{ + switch(inst->U.I.Opcode){ + case RC_OPCODE_ADD: + if (c->has_presub) { + if(peephole_add_presub_inv(c, inst)) + return 1; + if(peephole_add_presub_add(c, inst)) + return 1; + } + break; + default: + break; + } + return 0; +} + +void rc_optimize(struct radeon_compiler * c, void *user) +{ + struct rc_instruction * inst = c->Program.Instructions.Next; + while(inst != &c->Program.Instructions) { + struct rc_instruction * cur = inst; + inst = inst->Next; + + constant_folding(c, cur); + + if(peephole(c, cur)) + continue; + + if (cur->U.I.Opcode == RC_OPCODE_MOV) { + copy_propagate(c, cur); + /* cur may no longer be part of the program */ + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c b/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c new file mode 100644 index 00000000000..1e9a2c09d44 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c @@ -0,0 +1,62 @@ + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" +#include "radeon_opcodes.h" +#include "radeon_program_pair.h" + +static void mark_used_presub(struct rc_pair_sub_instruction * sub) +{ + if (sub->Src[RC_PAIR_PRESUB_SRC].Used) { + unsigned int presub_reg_count = rc_presubtract_src_reg_count( + sub->Src[RC_PAIR_PRESUB_SRC].Index); + unsigned int i; + for (i = 0; i < presub_reg_count; i++) { + sub->Src[i].Used = 1; + } + } +} + +static void mark_used( + struct rc_instruction * inst, + struct rc_pair_sub_instruction * sub) +{ + unsigned int i; + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + for (i = 0; i < info->NumSrcRegs; i++) { + unsigned int src_type = rc_source_type_swz(sub->Arg[i].Swizzle); + if (src_type & RC_SOURCE_RGB) { + inst->U.P.RGB.Src[sub->Arg[i].Source].Used = 1; + } + + if (src_type & RC_SOURCE_ALPHA) { + inst->U.P.Alpha.Src[sub->Arg[i].Source].Used = 1; + } + } +} + +/** + * This pass finds sources that are not used by their instruction and marks + * them as unused. + */ +void rc_pair_remove_dead_sources(struct radeon_compiler * c, void *user) +{ + struct rc_instruction * inst; + for (inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + unsigned int i; + if (inst->Type == RC_INSTRUCTION_NORMAL) + continue; + + /* Mark all sources as unused */ + for (i = 0; i < 4; i++) { + inst->U.P.RGB.Src[i].Used = 0; + inst->U.P.Alpha.Src[i].Used = 0; + } + mark_used(inst, &inst->U.P.RGB); + mark_used(inst, &inst->U.P.Alpha); + + mark_used_presub(&inst->U.P.RGB); + mark_used_presub(&inst->U.P.Alpha); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c new file mode 100644 index 00000000000..49983d6ce75 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c @@ -0,0 +1,706 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2011 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include <stdio.h> + +#include "main/glheader.h" +#include "program/register_allocate.h" +#include "ralloc.h" + +#include "r300_fragprog_swizzle.h" +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" +#include "radeon_list.h" +#include "radeon_variable.h" + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + + + +struct register_info { + struct live_intervals Live[4]; + + unsigned int Used:1; + unsigned int Allocated:1; + unsigned int File:3; + unsigned int Index:RC_REGISTER_INDEX_BITS; + unsigned int Writemask; +}; + +struct regalloc_state { + struct radeon_compiler * C; + + struct register_info * Input; + unsigned int NumInputs; + + struct register_info * Temporary; + unsigned int NumTemporaries; + + unsigned int Simple; + int LoopEnd; +}; + +enum rc_reg_class { + RC_REG_CLASS_SINGLE, + RC_REG_CLASS_DOUBLE, + RC_REG_CLASS_TRIPLE, + RC_REG_CLASS_ALPHA, + RC_REG_CLASS_SINGLE_PLUS_ALPHA, + RC_REG_CLASS_DOUBLE_PLUS_ALPHA, + RC_REG_CLASS_TRIPLE_PLUS_ALPHA, + RC_REG_CLASS_X, + RC_REG_CLASS_Y, + RC_REG_CLASS_Z, + RC_REG_CLASS_XY, + RC_REG_CLASS_YZ, + RC_REG_CLASS_XZ, + RC_REG_CLASS_XW, + RC_REG_CLASS_YW, + RC_REG_CLASS_ZW, + RC_REG_CLASS_XYW, + RC_REG_CLASS_YZW, + RC_REG_CLASS_XZW, + RC_REG_CLASS_COUNT +}; + +struct rc_class { + enum rc_reg_class Class; + + unsigned int WritemaskCount; + + /** This is 1 if this class is being used by the register allocator + * and 0 otherwise */ + unsigned int Used; + + /** This is the ID number assigned to this class by ra. */ + unsigned int Id; + + /** List of writemasks that belong to this class */ + unsigned int Writemasks[3]; + + +}; + +static void print_live_intervals(struct live_intervals * src) +{ + if (!src || !src->Used) { + DBG("(null)"); + return; + } + + DBG("(%i,%i)", src->Start, src->End); +} + +static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b) +{ + if (VERBOSE) { + DBG("overlap_live_intervals: "); + print_live_intervals(a); + DBG(" to "); + print_live_intervals(b); + DBG("\n"); + } + + if (!a->Used || !b->Used) { + DBG(" unused interval\n"); + return 0; + } + + if (a->Start > b->Start) { + if (a->Start < b->End) { + DBG(" overlap\n"); + return 1; + } + } else if (b->Start > a->Start) { + if (b->Start < a->End) { + DBG(" overlap\n"); + return 1; + } + } else { /* a->Start == b->Start */ + if (a->Start != a->End && b->Start != b->End) { + DBG(" overlap\n"); + return 1; + } + } + + DBG(" no overlap\n"); + + return 0; +} + +static void scan_read_callback(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct regalloc_state * s = data; + struct register_info * reg; + unsigned int i; + + if (file != RC_FILE_INPUT) + return; + + s->Input[index].Used = 1; + reg = &s->Input[index]; + + for (i = 0; i < 4; i++) { + if (!((mask >> i) & 0x1)) { + continue; + } + reg->Live[i].Used = 1; + reg->Live[i].Start = 0; + reg->Live[i].End = + s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP; + } +} + +static void remap_register(void * data, struct rc_instruction * inst, + rc_register_file * file, unsigned int * index) +{ + struct regalloc_state * s = data; + const struct register_info * reg; + + if (*file == RC_FILE_TEMPORARY && s->Simple) + reg = &s->Temporary[*index]; + else if (*file == RC_FILE_INPUT) + reg = &s->Input[*index]; + else + return; + + if (reg->Allocated) { + *index = reg->Index; + } +} + +static void alloc_input_simple(void * data, unsigned int input, + unsigned int hwreg) +{ + struct regalloc_state * s = data; + + if (input >= s->NumInputs) + return; + + s->Input[input].Allocated = 1; + s->Input[input].File = RC_FILE_TEMPORARY; + s->Input[input].Index = hwreg; +} + +/* This functions offsets the temporary register indices by the number + * of input registers, because input registers are actually temporaries and + * should not occupy the same space. + * + * This pass is supposed to be used to maintain correct allocation of inputs + * if the standard register allocation is disabled. */ +static void do_regalloc_inputs_only(struct regalloc_state * s) +{ + for (unsigned i = 0; i < s->NumTemporaries; i++) { + s->Temporary[i].Allocated = 1; + s->Temporary[i].File = RC_FILE_TEMPORARY; + s->Temporary[i].Index = i + s->NumInputs; + } +} + +static unsigned int is_derivative(rc_opcode op) +{ + return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY); +} + +static int find_class( + struct rc_class * classes, + unsigned int writemask, + unsigned int max_writemask_count) +{ + unsigned int i; + for (i = 0; i < RC_REG_CLASS_COUNT; i++) { + unsigned int j; + if (classes[i].WritemaskCount > max_writemask_count) { + continue; + } + for (j = 0; j < 3; j++) { + if (classes[i].Writemasks[j] == writemask) { + return i; + } + } + } + return -1; +} + +static enum rc_reg_class variable_get_class( + struct rc_variable * variable, + struct rc_class * classes) +{ + unsigned int i; + unsigned int can_change_writemask= 1; + unsigned int writemask = rc_variable_writemask_sum(variable); + struct rc_list * readers = rc_variable_readers_union(variable); + int class_index; + + if (!variable->C->is_r500) { + struct rc_class c; + /* The assumption here is that if an instruction has type + * RC_INSTRUCTION_NORMAL then it is a TEX instruction. + * r300 and r400 can't swizzle the result of a TEX lookup. */ + if (variable->Inst->Type == RC_INSTRUCTION_NORMAL) { + writemask = RC_MASK_XYZW; + } + + /* Check if it is possible to do swizzle packing for r300/r400 + * without creating non-native swizzles. */ + class_index = find_class(classes, writemask, 3); + if (class_index < 0) { + goto error; + } + c = classes[class_index]; + for (i = 0; i < c.WritemaskCount; i++) { + int j; + unsigned int conversion_swizzle = + rc_make_conversion_swizzle( + writemask, c.Writemasks[i]); + for (j = 0; j < variable->ReaderCount; j++) { + unsigned int old_swizzle; + unsigned int new_swizzle; + struct rc_reader r = variable->Readers[j]; + if (r.Inst->Type == RC_INSTRUCTION_PAIR ) { + old_swizzle = r.U.P.Arg->Swizzle; + } else { + old_swizzle = r.U.I.Src->Swizzle; + } + new_swizzle = rc_adjust_channels( + old_swizzle, conversion_swizzle); + if (!r300_swizzle_is_native_basic(new_swizzle)) { + can_change_writemask = 0; + break; + } + } + if (!can_change_writemask) { + break; + } + } + } + + if (variable->Inst->Type == RC_INSTRUCTION_PAIR) { + /* DDX/DDY seem to always fail when their writemasks are + * changed.*/ + if (is_derivative(variable->Inst->U.P.RGB.Opcode) + || is_derivative(variable->Inst->U.P.Alpha.Opcode)) { + can_change_writemask = 0; + } + } + for ( ; readers; readers = readers->Next) { + struct rc_reader * r = readers->Item; + if (r->Inst->Type == RC_INSTRUCTION_PAIR) { + if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) { + can_change_writemask = 0; + break; + } + /* DDX/DDY also fail when their swizzles are changed. */ + if (is_derivative(r->Inst->U.P.RGB.Opcode) + || is_derivative(r->Inst->U.P.Alpha.Opcode)) { + can_change_writemask = 0; + break; + } + } + } + + class_index = find_class(classes, writemask, + can_change_writemask ? 3 : 1); + if (class_index > -1) { + return classes[class_index].Class; + } else { +error: + rc_error(variable->C, + "Could not find class for index=%u mask=%u\n", + variable->Dst.Index, writemask); + return 0; + } +} + +static unsigned int overlap_live_intervals_array( + struct live_intervals * a, + struct live_intervals * b) +{ + unsigned int a_chan, b_chan; + for (a_chan = 0; a_chan < 4; a_chan++) { + for (b_chan = 0; b_chan < 4; b_chan++) { + if (overlap_live_intervals(&a[a_chan], &b[b_chan])) { + return 1; + } + } + } + return 0; +} + +static unsigned int reg_get_index(int reg) +{ + return reg / RC_MASK_XYZW; +} + +static unsigned int reg_get_writemask(int reg) +{ + return (reg % RC_MASK_XYZW) + 1; +} + +static int get_reg_id(unsigned int index, unsigned int writemask) +{ + assert(writemask); + if (writemask == 0) { + return 0; + } + return (index * RC_MASK_XYZW) + (writemask - 1); +} + +#if VERBOSE +static void print_reg(int reg) +{ + unsigned int index = reg_get_index(reg); + unsigned int mask = reg_get_writemask(reg); + fprintf(stderr, "Temp[%u].%c%c%c%c", index, + mask & RC_MASK_X ? 'x' : '_', + mask & RC_MASK_Y ? 'y' : '_', + mask & RC_MASK_Z ? 'z' : '_', + mask & RC_MASK_W ? 'w' : '_'); +} +#endif + +static void add_register_conflicts( + struct ra_regs * regs, + unsigned int max_temp_regs) +{ + unsigned int index, a_mask, b_mask; + for (index = 0; index < max_temp_regs; index++) { + for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) { + for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW; + b_mask++) { + if (a_mask & b_mask) { + ra_add_reg_conflict(regs, + get_reg_id(index, a_mask), + get_reg_id(index, b_mask)); + } + } + } + } +} + +static void do_advanced_regalloc(struct regalloc_state * s) +{ + struct rc_class rc_class_list [] = { + {RC_REG_CLASS_SINGLE, 3, 0, 0, + {RC_MASK_X, + RC_MASK_Y, + RC_MASK_Z}}, + {RC_REG_CLASS_DOUBLE, 3, 0, 0, + {RC_MASK_X | RC_MASK_Y, + RC_MASK_X | RC_MASK_Z, + RC_MASK_Y | RC_MASK_Z}}, + {RC_REG_CLASS_TRIPLE, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_ALPHA, 1, 0, 0, + {RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0, + {RC_MASK_X | RC_MASK_W, + RC_MASK_Y | RC_MASK_W, + RC_MASK_Z | RC_MASK_W}}, + {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_W, + RC_MASK_X | RC_MASK_Z | RC_MASK_W, + RC_MASK_Y | RC_MASK_Z | RC_MASK_W}}, + {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_X, 1, 0, 0, + {RC_MASK_X, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_Y, 1, 0, 0, + {RC_MASK_Y, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_Z, 1, 0, 0, + {RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XY, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YZ, 1, 0, 0, + {RC_MASK_Y | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XZ, 1, 0, 0, + {RC_MASK_X | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XW, 1, 0, 0, + {RC_MASK_X | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YW, 1, 0, 0, + {RC_MASK_Y | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_ZW, 1, 0, 0, + {RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XYW, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YZW, 1, 0, 0, + {RC_MASK_Y | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XZW, 1, 0, 0, + {RC_MASK_X | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}} + }; + + unsigned int i, j, index, input_node, node_count, node_index; + unsigned int * node_classes; + unsigned int * input_classes; + struct rc_instruction * inst; + struct rc_list * var_ptr; + struct rc_list * variables; + struct ra_regs * regs; + struct ra_graph * graph; + + /* Allocate the main ra data structure */ + regs = ra_alloc_reg_set(s->C->max_temp_regs * RC_MASK_XYZW); + + /* Get list of program variables */ + variables = rc_get_variables(s->C); + node_count = rc_list_count(variables); + node_classes = memory_pool_malloc(&s->C->Pool, + node_count * sizeof(unsigned int)); + input_classes = memory_pool_malloc(&s->C->Pool, + s->NumInputs * sizeof(unsigned int)); + + for (var_ptr = variables, node_index = 0; var_ptr; + var_ptr = var_ptr->Next, node_index++) { + unsigned int class_index; + /* Compute the live intervals */ + rc_variable_compute_live_intervals(var_ptr->Item); + + class_index = variable_get_class(var_ptr->Item, rc_class_list); + + /* If we haven't used this register class yet, mark it + * as used and allocate space for it. */ + if (!rc_class_list[class_index].Used) { + rc_class_list[class_index].Used = 1; + rc_class_list[class_index].Id = ra_alloc_reg_class(regs); + } + + node_classes[node_index] = rc_class_list[class_index].Id; + } + + + /* Assign registers to the classes */ + for (i = 0; i < RC_REG_CLASS_COUNT; i++) { + struct rc_class class = rc_class_list[i]; + if (!class.Used) { + continue; + } + + for (index = 0; index < s->C->max_temp_regs; index++) { + for (j = 0; j < class.WritemaskCount; j++) { + int reg_id = get_reg_id(index, + class.Writemasks[j]); + ra_class_add_reg(regs, class.Id, reg_id); + } + } + } + + /* Add register conflicts */ + add_register_conflicts(regs, s->C->max_temp_regs); + + /* Calculate live intervals for input registers */ + for (inst = s->C->Program.Instructions.Next; + inst != &s->C->Program.Instructions; + inst = inst->Next) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_BGNLOOP) { + struct rc_instruction * endloop = + rc_match_bgnloop(inst); + if (endloop->IP > s->LoopEnd) { + s->LoopEnd = endloop->IP; + } + } + rc_for_all_reads_mask(inst, scan_read_callback, s); + } + + /* Create classes for input registers */ + for (i = 0; i < s->NumInputs; i++) { + unsigned int chan, class_id, writemask = 0; + for (chan = 0; chan < 4; chan++) { + if (s->Input[i].Live[chan].Used) { + writemask |= (1 << chan); + } + } + s->Input[i].Writemask = writemask; + if (!writemask) { + continue; + } + + class_id = ra_alloc_reg_class(regs); + input_classes[i] = class_id; + ra_class_add_reg(regs, class_id, + get_reg_id(s->Input[i].Index, writemask)); + } + + ra_set_finalize(regs); + + graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs); + + /* Build the interference graph */ + for (var_ptr = variables, node_index = 0; var_ptr; + var_ptr = var_ptr->Next,node_index++) { + struct rc_list * a, * b; + unsigned int b_index; + + ra_set_node_class(graph, node_index, node_classes[node_index]); + + for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1; + b; b = b->Next, b_index++) { + struct rc_variable * var_a = a->Item; + while (var_a) { + struct rc_variable * var_b = b->Item; + while (var_b) { + if (overlap_live_intervals_array(var_a->Live, var_b->Live)) { + ra_add_node_interference(graph, + node_index, b_index); + } + var_b = var_b->Friend; + } + var_a = var_a->Friend; + } + } + } + + /* Add input registers to the interference graph */ + for (i = 0, input_node = 0; i< s->NumInputs; i++) { + if (!s->Input[i].Writemask) { + continue; + } + ra_set_node_class(graph, node_count + input_node, + input_classes[i]); + for (var_ptr = variables, node_index = 0; + var_ptr; var_ptr = var_ptr->Next, node_index++) { + struct rc_variable * var = var_ptr->Item; + if (overlap_live_intervals_array(s->Input[i].Live, + var->Live)) { + ra_add_node_interference(graph, node_index, + node_count + input_node); + } + } + /* Manually allocate a register for this input */ + ra_set_node_reg(graph, node_count + input_node, get_reg_id( + s->Input[i].Index, s->Input[i].Writemask)); + input_node++; + } + + if (!ra_allocate_no_spills(graph)) { + rc_error(s->C, "Ran out of hardware temporaries\n"); + return; + } + + /* Rewrite the registers */ + for (var_ptr = variables, node_index = 0; var_ptr; + var_ptr = var_ptr->Next, node_index++) { + int reg = ra_get_node_reg(graph, node_index); + unsigned int writemask = reg_get_writemask(reg); + unsigned int index = reg_get_index(reg); + struct rc_variable * var = var_ptr->Item; + + if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) { + writemask = rc_variable_writemask_sum(var); + } + + if (var->Dst.File == RC_FILE_INPUT) { + continue; + } + rc_variable_change_dst(var, index, writemask); + } + + ralloc_free(graph); + ralloc_free(regs); +} + +/** + * @param user This parameter should be a pointer to an integer value. If this + * integer value is zero, then a simple register allocator will be used that + * only allocates space for input registers (\sa do_regalloc_inputs_only). If + * user is non-zero, then the regular register allocator will be used + * (\sa do_regalloc). + */ +void rc_pair_regalloc(struct radeon_compiler *cc, void *user) +{ + struct r300_fragment_program_compiler *c = + (struct r300_fragment_program_compiler*)cc; + struct regalloc_state s; + int * do_full_regalloc = (int*)user; + + memset(&s, 0, sizeof(s)); + s.C = cc; + s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1; + s.Input = memory_pool_malloc(&cc->Pool, + s.NumInputs * sizeof(struct register_info)); + memset(s.Input, 0, s.NumInputs * sizeof(struct register_info)); + + s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1; + s.Temporary = memory_pool_malloc(&cc->Pool, + s.NumTemporaries * sizeof(struct register_info)); + memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info)); + + rc_recompute_ips(s.C); + + c->AllocateHwInputs(c, &alloc_input_simple, &s); + if (*do_full_regalloc) { + do_advanced_regalloc(&s); + } else { + s.Simple = 1; + do_regalloc_inputs_only(&s); + } + + /* Rewrite inputs and if we are doing the simple allocation, rewrite + * temporaries too. */ + for (struct rc_instruction *inst = s.C->Program.Instructions.Next; + inst != &s.C->Program.Instructions; + inst = inst->Next) { + rc_remap_registers(inst, &remap_register, &s); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c new file mode 100644 index 00000000000..25cd52c9cd4 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c @@ -0,0 +1,1010 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include <stdio.h> + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" + + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + +struct schedule_instruction { + struct rc_instruction * Instruction; + + /** Next instruction in the linked list of ready instructions. */ + struct schedule_instruction *NextReady; + + /** Values that this instruction reads and writes */ + struct reg_value * WriteValues[4]; + struct reg_value * ReadValues[12]; + unsigned int NumWriteValues:3; + unsigned int NumReadValues:4; + + /** + * Number of (read and write) dependencies that must be resolved before + * this instruction can be scheduled. + */ + unsigned int NumDependencies:5; + + /** List of all readers (see rc_get_readers() for the definition of + * "all readers"), even those outside the basic block this instruction + * lives in. */ + struct rc_reader_data GlobalReaders; +}; + + +/** + * Used to keep track of which instructions read a value. + */ +struct reg_value_reader { + struct schedule_instruction *Reader; + struct reg_value_reader *Next; +}; + +/** + * Used to keep track which values are stored in each component of a + * RC_FILE_TEMPORARY. + */ +struct reg_value { + struct schedule_instruction * Writer; + + /** + * Unordered linked list of instructions that read from this value. + * When this value becomes available, we increase all readers' + * dependency count. + */ + struct reg_value_reader *Readers; + + /** + * Number of readers of this value. This is decremented each time + * a reader of the value is committed. + * When the reader cound reaches zero, the dependency count + * of the instruction writing \ref Next is decremented. + */ + unsigned int NumReaders; + + struct reg_value *Next; /**< Pointer to the next value to be written to the same register */ +}; + +struct register_state { + struct reg_value * Values[4]; +}; + +struct remap_reg { + struct rc_instruciont * Inst; + unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1); + unsigned int OldSwizzle:3; + unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1); + unsigned int NewSwizzle:3; + unsigned int OnlyTexReads:1; + struct remap_reg * Next; +}; + +struct schedule_state { + struct radeon_compiler * C; + struct schedule_instruction * Current; + + struct register_state Temporary[RC_REGISTER_MAX_INDEX]; + + /** + * Linked lists of instructions that can be scheduled right now, + * based on which ALU/TEX resources they require. + */ + /*@{*/ + struct schedule_instruction *ReadyFullALU; + struct schedule_instruction *ReadyRGB; + struct schedule_instruction *ReadyAlpha; + struct schedule_instruction *ReadyTEX; + /*@}*/ +}; + +static struct reg_value ** get_reg_valuep(struct schedule_state * s, + rc_register_file file, unsigned int index, unsigned int chan) +{ + if (file != RC_FILE_TEMPORARY) + return 0; + + if (index >= RC_REGISTER_MAX_INDEX) { + rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index); + return 0; + } + + return &s->Temporary[index].Values[chan]; +} + +static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst) +{ + inst->NextReady = *list; + *list = inst; +} + +static void add_inst_to_list_end(struct schedule_instruction ** list, + struct schedule_instruction * inst) +{ + if(!*list){ + *list = inst; + }else{ + struct schedule_instruction * temp = *list; + while(temp->NextReady){ + temp = temp->NextReady; + } + temp->NextReady = inst; + } +} + +static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst) +{ + DBG("%i is now ready\n", sinst->Instruction->IP); + + /* Adding Ready TEX instructions to the end of the "Ready List" helps + * us emit TEX instructions in blocks without losing our place. */ + if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) + add_inst_to_list_end(&s->ReadyTEX, sinst); + else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP) + add_inst_to_list(&s->ReadyRGB, sinst); + else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP) + add_inst_to_list(&s->ReadyAlpha, sinst); + else + add_inst_to_list(&s->ReadyFullALU, sinst); +} + +static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst) +{ + assert(sinst->NumDependencies > 0); + sinst->NumDependencies--; + if (!sinst->NumDependencies) + instruction_ready(s, sinst); +} + +/** + * This function decreases the dependencies of the next instruction that + * wants to write to each of sinst's read values. + */ +static void commit_update_reads(struct schedule_state * s, + struct schedule_instruction * sinst){ + unsigned int i; + for(i = 0; i < sinst->NumReadValues; ++i) { + struct reg_value * v = sinst->ReadValues[i]; + assert(v->NumReaders > 0); + v->NumReaders--; + if (!v->NumReaders) { + if (v->Next) + decrease_dependencies(s, v->Next->Writer); + } + } +} + +static void commit_update_writes(struct schedule_state * s, + struct schedule_instruction * sinst){ + unsigned int i; + for(i = 0; i < sinst->NumWriteValues; ++i) { + struct reg_value * v = sinst->WriteValues[i]; + if (v->NumReaders) { + for(struct reg_value_reader * r = v->Readers; r; r = r->Next) { + decrease_dependencies(s, r->Reader); + } + } else { + /* This happens in instruction sequences of the type + * OP r.x, ...; + * OP r.x, r.x, ...; + * See also the subtlety in how instructions that both + * read and write the same register are scanned. + */ + if (v->Next) + decrease_dependencies(s, v->Next->Writer); + } + } +} + +static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst) +{ + DBG("%i: commit\n", sinst->Instruction->IP); + + commit_update_reads(s, sinst); + + commit_update_writes(s, sinst); +} + +/** + * Emit all ready texture instructions in a single block. + * + * Emit as a single block to (hopefully) sample many textures in parallel, + * and to avoid hardware indirections on R300. + */ +static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before) +{ + struct schedule_instruction *readytex; + struct rc_instruction * inst_begin; + + assert(s->ReadyTEX); + + /* Node marker for R300 */ + inst_begin = rc_insert_new_instruction(s->C, before->Prev); + inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX; + + /* Link texture instructions back in */ + readytex = s->ReadyTEX; + while(readytex) { + rc_insert_instruction(before->Prev, readytex->Instruction); + DBG("%i: commit TEX reads\n", readytex->Instruction->IP); + + /* All of the TEX instructions in the same TEX block have + * their source registers read from before any of the + * instructions in that block write to their destination + * registers. This means that when we commit a TEX + * instruction, any other TEX instruction that wants to write + * to one of the committed instruction's source register can be + * marked as ready and should be emitted in the same TEX + * block. This prevents the following sequence from being + * emitted in two different TEX blocks: + * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0]; + * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0]; + */ + commit_update_reads(s, readytex); + readytex = readytex->NextReady; + } + readytex = s->ReadyTEX; + s->ReadyTEX = 0; + while(readytex){ + DBG("%i: commit TEX writes\n", readytex->Instruction->IP); + commit_update_writes(s, readytex); + readytex = readytex->NextReady; + } +} + +/* This is a helper function for destructive_merge_instructions(). It helps + * merge presubtract sources from two instructions and makes sure the + * presubtract sources end up in the correct spot. This function assumes that + * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb) + * but no scalar instruction (alpha). + * @return 0 if merging the presubtract sources fails. + * @retrun 1 if merging the presubtract sources succeeds. + */ +static int merge_presub_sources( + struct rc_pair_instruction * dst_full, + struct rc_pair_sub_instruction src, + unsigned int type) +{ + unsigned int srcp_src, srcp_regs, is_rgb, is_alpha; + struct rc_pair_sub_instruction * dst_sub; + const struct rc_opcode_info * info; + + assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP); + + switch(type) { + case RC_SOURCE_RGB: + is_rgb = 1; + is_alpha = 0; + dst_sub = &dst_full->RGB; + break; + case RC_SOURCE_ALPHA: + is_rgb = 0; + is_alpha = 1; + dst_sub = &dst_full->Alpha; + break; + default: + assert(0); + return 0; + } + + info = rc_get_opcode_info(dst_full->RGB.Opcode); + + if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used) + return 0; + + srcp_regs = rc_presubtract_src_reg_count( + src.Src[RC_PAIR_PRESUB_SRC].Index); + for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) { + unsigned int arg; + int free_source; + unsigned int one_way = 0; + struct rc_pair_instruction_source srcp = src.Src[srcp_src]; + struct rc_pair_instruction_source temp; + + free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha, + srcp.File, srcp.Index); + + /* If free_source < 0 then there are no free source + * slots. */ + if (free_source < 0) + return 0; + + temp = dst_sub->Src[srcp_src]; + dst_sub->Src[srcp_src] = dst_sub->Src[free_source]; + + /* srcp needs src0 and src1 to be the same */ + if (free_source < srcp_src) { + if (!temp.Used) + continue; + free_source = rc_pair_alloc_source(dst_full, is_rgb, + is_alpha, temp.File, temp.Index); + if (free_source < 0) + return 0; + one_way = 1; + } else { + dst_sub->Src[free_source] = temp; + } + + /* If free_source == srcp_src, then the presubtract + * source is already in the correct place. */ + if (free_source == srcp_src) + continue; + + /* Shuffle the sources, so we can put the + * presubtract source in the correct place. */ + for(arg = 0; arg < info->NumSrcRegs; arg++) { + /*If this arg does not read from an rgb source, + * do nothing. */ + if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) + & type)) { + continue; + } + + if (dst_full->RGB.Arg[arg].Source == srcp_src) + dst_full->RGB.Arg[arg].Source = free_source; + /* We need to do this just in case register + * is one of the sources already, but in the + * wrong spot. */ + else if(dst_full->RGB.Arg[arg].Source == free_source + && !one_way) { + dst_full->RGB.Arg[arg].Source = srcp_src; + } + } + } + return 1; +} + + +/* This function assumes that rgb.Alpha and alpha.RGB are unused */ +static int destructive_merge_instructions( + struct rc_pair_instruction * rgb, + struct rc_pair_instruction * alpha) +{ + const struct rc_opcode_info * opcode; + + assert(rgb->Alpha.Opcode == RC_OPCODE_NOP); + assert(alpha->RGB.Opcode == RC_OPCODE_NOP); + + /* Presubtract registers need to be merged first so that registers + * needed by the presubtract operation can be placed in src0 and/or + * src1. */ + + /* Merge the rgb presubtract registers. */ + if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) { + return 0; + } + } + /* Merge the alpha presubtract registers */ + if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { + if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){ + return 0; + } + } + + /* Copy alpha args into rgb */ + opcode = rc_get_opcode_info(alpha->Alpha.Opcode); + + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + unsigned int oldsrc = alpha->Alpha.Arg[arg].Source; + rc_register_file file = 0; + unsigned int index = 0; + int source; + + if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) { + srcrgb = 1; + file = alpha->RGB.Src[oldsrc].File; + index = alpha->RGB.Src[oldsrc].Index; + } else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) { + srcalpha = 1; + file = alpha->Alpha.Src[oldsrc].File; + index = alpha->Alpha.Src[oldsrc].Index; + } + + source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index); + if (source < 0) + return 0; + + rgb->Alpha.Arg[arg].Source = source; + rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle; + rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs; + rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate; + } + + /* Copy alpha opcode into rgb */ + rgb->Alpha.Opcode = alpha->Alpha.Opcode; + rgb->Alpha.DestIndex = alpha->Alpha.DestIndex; + rgb->Alpha.WriteMask = alpha->Alpha.WriteMask; + rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask; + rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask; + rgb->Alpha.Saturate = alpha->Alpha.Saturate; + + /* Merge ALU result writing */ + if (alpha->WriteALUResult) { + if (rgb->WriteALUResult) + return 0; + + rgb->WriteALUResult = alpha->WriteALUResult; + rgb->ALUResultCompare = alpha->ALUResultCompare; + } + + return 1; +} + +/** + * Try to merge the given instructions into the rgb instructions. + * + * Return true on success; on failure, return false, and keep + * the instructions untouched. + */ +static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha) +{ + struct rc_pair_instruction backup; + + /*Instructions can't write output registers and ALU result at the + * same time. */ + if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask) + || (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) { + return 0; + } + memcpy(&backup, rgb, sizeof(struct rc_pair_instruction)); + + if (destructive_merge_instructions(rgb, alpha)) + return 1; + + memcpy(rgb, &backup, sizeof(struct rc_pair_instruction)); + return 0; +} + +static void presub_nop(struct rc_instruction * emitted) { + int prev_rgb_index, prev_alpha_index, i, num_src; + + /* We don't need a nop if the previous instruction is a TEX. */ + if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) { + return; + } + if (emitted->Prev->U.P.RGB.WriteMask) + prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex; + else + prev_rgb_index = -1; + if (emitted->Prev->U.P.Alpha.WriteMask) + prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex; + else + prev_alpha_index = 1; + + /* Check the previous rgb instruction */ + if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + num_src = rc_presubtract_src_reg_count( + emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index); + for (i = 0; i < num_src; i++) { + unsigned int index = emitted->U.P.RGB.Src[i].Index; + if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY + && (index == prev_rgb_index + || index == prev_alpha_index)) { + emitted->Prev->U.P.Nop = 1; + return; + } + } + } + + /* Check the previous alpha instruction. */ + if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) + return; + + num_src = rc_presubtract_src_reg_count( + emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index); + for (i = 0; i < num_src; i++) { + unsigned int index = emitted->U.P.Alpha.Src[i].Index; + if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY + && (index == prev_rgb_index || index == prev_alpha_index)) { + emitted->Prev->U.P.Nop = 1; + return; + } + } +} + +static void rgb_to_alpha_remap ( + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + rc_register_file old_file, + rc_swizzle old_swz, + unsigned int new_index) +{ + int new_src_index; + unsigned int i; + + for (i = 0; i < 3; i++) { + if (get_swz(arg->Swizzle, i) == old_swz) { + SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W); + } + } + new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1, + old_file, new_index); + /* This conversion is not possible, we must have made a mistake in + * is_rgb_to_alpha_possible. */ + if (new_src_index < 0) { + assert(0); + return; + } + + arg->Source = new_src_index; +} + +static int can_remap(unsigned int opcode) +{ + switch(opcode) { + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + return 0; + default: + return 1; + } +} + +static int can_convert_opcode_to_alpha(unsigned int opcode) +{ + switch(opcode) { + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + case RC_OPCODE_DP2: + case RC_OPCODE_DP3: + case RC_OPCODE_DP4: + case RC_OPCODE_DPH: + return 0; + default: + return 1; + } +} + +static void is_rgb_to_alpha_possible( + void * userdata, + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src) +{ + unsigned int chan_count = 0; + unsigned int alpha_sources = 0; + unsigned int i; + struct rc_reader_data * reader_data = userdata; + + if (!can_remap(inst->U.P.RGB.Opcode) + || !can_remap(inst->U.P.Alpha.Opcode)) { + reader_data->Abort = 1; + return; + } + + if (!src) + return; + + /* XXX There are some cases where we can still do the conversion if + * a reader reads from a presubtract source, but for now we'll prevent + * it. */ + if (arg->Source == RC_PAIR_PRESUB_SRC) { + reader_data->Abort = 1; + return; + } + + /* Make sure the source only reads from one component. + * XXX We should allow the source to read from the same component twice. + * XXX If the index we will be converting to is the same as the + * current index, then it is OK to read from more than one component. + */ + for (i = 0; i < 3; i++) { + rc_swizzle swz = get_swz(arg->Swizzle, i); + switch(swz) { + case RC_SWIZZLE_X: + case RC_SWIZZLE_Y: + case RC_SWIZZLE_Z: + case RC_SWIZZLE_W: + chan_count++; + break; + default: + break; + } + } + if (chan_count > 1) { + reader_data->Abort = 1; + return; + } + + /* Make sure there are enough alpha sources. + * XXX If we know what register all the readers are going + * to be remapped to, then in some situations we can still do + * the subsitution, even if all 3 alpha sources are being used.*/ + for (i = 0; i < 3; i++) { + if (inst->U.P.Alpha.Src[i].Used) { + alpha_sources++; + } + } + if (alpha_sources > 2) { + reader_data->Abort = 1; + return; + } +} + +static int convert_rgb_to_alpha( + struct schedule_state * s, + struct schedule_instruction * sched_inst) +{ + struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P; + unsigned int old_mask = pair_inst->RGB.WriteMask; + unsigned int old_swz = rc_mask_to_swizzle(old_mask); + const struct rc_opcode_info * info = + rc_get_opcode_info(pair_inst->RGB.Opcode); + int new_index = -1; + unsigned int i; + + if (sched_inst->GlobalReaders.Abort) + return 0; + + if (!pair_inst->RGB.WriteMask) + return 0; + + if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode) + || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) { + return 0; + } + + assert(sched_inst->NumWriteValues == 1); + + if (!sched_inst->WriteValues[0]) { + assert(0); + return 0; + } + + /* We start at the old index, because if we can reuse the same + * register and just change the swizzle then it is more likely we + * will be able to convert all the readers. */ + for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) { + struct reg_value ** new_regvalp = get_reg_valuep( + s, RC_FILE_TEMPORARY, i, 3); + if (!*new_regvalp) { + struct reg_value ** old_regvalp = + get_reg_valuep(s, + RC_FILE_TEMPORARY, + pair_inst->RGB.DestIndex, + rc_mask_to_swizzle(old_mask)); + new_index = i; + *new_regvalp = *old_regvalp; + *old_regvalp = NULL; + new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3); + break; + } + } + if (new_index < 0) { + return 0; + } + + pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode; + pair_inst->Alpha.DestIndex = new_index; + pair_inst->Alpha.WriteMask = RC_MASK_W; + pair_inst->Alpha.Target = pair_inst->RGB.Target; + pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask; + pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask; + pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate; + memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg, + sizeof(pair_inst->Alpha.Arg)); + /* Move the swizzles into the first chan */ + for (i = 0; i < info->NumSrcRegs; i++) { + unsigned int j; + for (j = 0; j < 3; j++) { + unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j); + if (swz != RC_SWIZZLE_UNUSED) { + pair_inst->Alpha.Arg[i].Swizzle = + rc_init_swizzle(swz, 1); + break; + } + } + } + pair_inst->RGB.Opcode = RC_OPCODE_NOP; + pair_inst->RGB.DestIndex = 0; + pair_inst->RGB.WriteMask = 0; + pair_inst->RGB.Target = 0; + pair_inst->RGB.OutputWriteMask = 0; + pair_inst->RGB.DepthWriteMask = 0; + pair_inst->RGB.Saturate = 0; + memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg)); + + for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) { + struct rc_reader reader = sched_inst->GlobalReaders.Readers[i]; + rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg, + RC_FILE_TEMPORARY, old_swz, new_index); + } + return 1; +} + +/** + * Find a good ALU instruction or pair of ALU instruction and emit it. + * + * Prefer emitting full ALU instructions, so that when we reach a point + * where no full ALU instruction can be emitted, we have more candidates + * for RGB/Alpha pairing. + */ +static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before) +{ + struct schedule_instruction * sinst; + + if (s->ReadyFullALU) { + sinst = s->ReadyFullALU; + s->ReadyFullALU = s->ReadyFullALU->NextReady; + rc_insert_instruction(before->Prev, sinst->Instruction); + commit_alu_instruction(s, sinst); + } else { + struct schedule_instruction **prgb; + struct schedule_instruction **palpha; + struct schedule_instruction *prev; +pair: + /* Some pairings might fail because they require too + * many source slots; try all possible pairings if necessary */ + for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { + for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { + struct schedule_instruction * psirgb = *prgb; + struct schedule_instruction * psialpha = *palpha; + + if (!merge_instructions(&psirgb->Instruction->U.P, &psialpha->Instruction->U.P)) + continue; + + *prgb = (*prgb)->NextReady; + *palpha = (*palpha)->NextReady; + rc_insert_instruction(before->Prev, psirgb->Instruction); + commit_alu_instruction(s, psirgb); + commit_alu_instruction(s, psialpha); + goto success; + } + } + prev = NULL; + /* No success in pairing, now try to convert one of the RGB + * instructions to an Alpha so we can pair it with another RGB. + */ + if (s->ReadyRGB && s->ReadyRGB->NextReady) { + for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { + if ((*prgb)->NumWriteValues == 1) { + struct schedule_instruction * prgb_next; + if (!convert_rgb_to_alpha(s, *prgb)) + goto cont_loop; + prgb_next = (*prgb)->NextReady; + /* Add instruction to the Alpha ready list. */ + (*prgb)->NextReady = s->ReadyAlpha; + s->ReadyAlpha = *prgb; + /* Remove instruction from the RGB ready list.*/ + if (prev) + prev->NextReady = prgb_next; + else + s->ReadyRGB = prgb_next; + goto pair; + } +cont_loop: + prev = *prgb; + } + } + /* Still no success in pairing, just take the first RGB + * or alpha instruction. */ + if (s->ReadyRGB) { + sinst = s->ReadyRGB; + s->ReadyRGB = s->ReadyRGB->NextReady; + } else if (s->ReadyAlpha) { + sinst = s->ReadyAlpha; + s->ReadyAlpha = s->ReadyAlpha->NextReady; + } else { + /*XXX Something real bad has happened. */ + assert(0); + } + + rc_insert_instruction(before->Prev, sinst->Instruction); + commit_alu_instruction(s, sinst); + success: ; + } + /* If the instruction we just emitted uses a presubtract value, and + * the presubtract sources were written by the previous intstruction, + * the previous instruction needs a nop. */ + presub_nop(before->Prev); +} + +static void scan_read(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int chan) +{ + struct schedule_state * s = data; + struct reg_value ** v = get_reg_valuep(s, file, index, chan); + struct reg_value_reader * reader; + + if (!v) + return; + + if (*v && (*v)->Writer == s->Current) { + /* The instruction reads and writes to a register component. + * In this case, we only want to increment dependencies by one. + */ + return; + } + + DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); + + reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader)); + reader->Reader = s->Current; + if (!*v) { + /* In this situation, the instruction reads from a register + * that hasn't been written to or read from in the current + * block. */ + *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value)); + memset(*v, 0, sizeof(struct reg_value)); + (*v)->Readers = reader; + } else { + reader->Next = (*v)->Readers; + (*v)->Readers = reader; + /* Only update the current instruction's dependencies if the + * register it reads from has been written to in this block. */ + if ((*v)->Writer) { + s->Current->NumDependencies++; + } + } + (*v)->NumReaders++; + + if (s->Current->NumReadValues >= 12) { + rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__); + } else { + s->Current->ReadValues[s->Current->NumReadValues++] = *v; + } +} + +static void scan_write(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int chan) +{ + struct schedule_state * s = data; + struct reg_value ** pv = get_reg_valuep(s, file, index, chan); + struct reg_value * newv; + + if (!pv) + return; + + DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); + + newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv)); + memset(newv, 0, sizeof(*newv)); + + newv->Writer = s->Current; + + if (*pv) { + (*pv)->Next = newv; + s->Current->NumDependencies++; + } + + *pv = newv; + + if (s->Current->NumWriteValues >= 4) { + rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__); + } else { + s->Current->WriteValues[s->Current->NumWriteValues++] = newv; + } +} + +static void is_rgb_to_alpha_possible_normal( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct rc_reader_data * reader_data = userdata; + reader_data->Abort = 1; + +} + +static void schedule_block(struct r300_fragment_program_compiler * c, + struct rc_instruction * begin, struct rc_instruction * end) +{ + struct schedule_state s; + unsigned int ip; + + memset(&s, 0, sizeof(s)); + s.C = &c->Base; + + /* Scan instructions for data dependencies */ + ip = 0; + for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { + s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current)); + memset(s.Current, 0, sizeof(struct schedule_instruction)); + + s.Current->Instruction = inst; + inst->IP = ip++; + + DBG("%i: Scanning\n", inst->IP); + + /* The order of things here is subtle and maybe slightly + * counter-intuitive, to account for the case where an + * instruction writes to the same register as it reads + * from. */ + rc_for_all_writes_chan(inst, &scan_write, &s); + rc_for_all_reads_chan(inst, &scan_read, &s); + + DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies); + + if (!s.Current->NumDependencies) + instruction_ready(&s, s.Current); + + /* Get global readers for possible RGB->Alpha conversion. */ + s.Current->GlobalReaders.ExitOnAbort = 1; + rc_get_readers(s.C, inst, &s.Current->GlobalReaders, + is_rgb_to_alpha_possible_normal, + is_rgb_to_alpha_possible, NULL); + } + + /* Temporarily unlink all instructions */ + begin->Prev->Next = end; + end->Prev = begin->Prev; + + /* Schedule instructions back */ + while(!s.C->Error && + (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { + if (s.ReadyTEX) + emit_all_tex(&s, end); + + while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha)) + emit_one_alu(&s, end); + } +} + +static int is_controlflow(struct rc_instruction * inst) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + return opcode->IsFlowControl; + } + return 0; +} + +void rc_pair_schedule(struct radeon_compiler *cc, void *user) +{ + struct schedule_state s; + + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; + struct rc_instruction * inst = c->Base.Program.Instructions.Next; + + memset(&s, 0, sizeof(s)); + s.C = &c->Base; + while(inst != &c->Base.Program.Instructions) { + struct rc_instruction * first; + + if (is_controlflow(inst)) { + inst = inst->Next; + continue; + } + + first = inst; + + while(inst != &c->Base.Program.Instructions && !is_controlflow(inst)) + inst = inst->Next; + + DBG("Schedule one block\n"); + schedule_block(c, first, inst); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_translate.c b/src/gallium/drivers/r300/compiler/radeon_pair_translate.c new file mode 100644 index 00000000000..2dae56a2428 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_pair_translate.c @@ -0,0 +1,359 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" + + +/** + * Finally rewrite ADD, MOV, MUL as the appropriate native instruction + * and reverse the order of arguments for CMP. + */ +static void final_rewrite(struct rc_sub_instruction *inst) +{ + struct rc_src_register tmp; + + switch(inst->Opcode) { + case RC_OPCODE_ADD: + inst->SrcReg[2] = inst->SrcReg[1]; + inst->SrcReg[1].File = RC_FILE_NONE; + inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; + inst->SrcReg[1].Negate = RC_MASK_NONE; + inst->Opcode = RC_OPCODE_MAD; + break; + case RC_OPCODE_CMP: + tmp = inst->SrcReg[2]; + inst->SrcReg[2] = inst->SrcReg[0]; + inst->SrcReg[0] = tmp; + break; + case RC_OPCODE_MOV: + /* AMD say we should use CMP. + * However, when we transform + * KIL -r0; + * into + * CMP tmp, -r0, -r0, 0; + * KIL tmp; + * we get incorrect behaviour on R500 when r0 == 0.0. + * It appears that the R500 KIL hardware treats -0.0 as less + * than zero. + */ + inst->SrcReg[1].File = RC_FILE_NONE; + inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; + inst->SrcReg[2].File = RC_FILE_NONE; + inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; + inst->Opcode = RC_OPCODE_MAD; + break; + case RC_OPCODE_MUL: + inst->SrcReg[2].File = RC_FILE_NONE; + inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; + inst->Opcode = RC_OPCODE_MAD; + break; + default: + /* nothing to do */ + break; + } +} + + +/** + * Classify an instruction according to which ALUs etc. it needs + */ +static void classify_instruction(struct rc_sub_instruction * inst, + int * needrgb, int * needalpha, int * istranscendent) +{ + *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0; + *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0; + *istranscendent = 0; + + if (inst->WriteALUResult == RC_ALURESULT_X) + *needrgb = 1; + else if (inst->WriteALUResult == RC_ALURESULT_W) + *needalpha = 1; + + switch(inst->Opcode) { + case RC_OPCODE_ADD: + case RC_OPCODE_CMP: + case RC_OPCODE_CND: + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + case RC_OPCODE_FRC: + case RC_OPCODE_MAD: + case RC_OPCODE_MAX: + case RC_OPCODE_MIN: + case RC_OPCODE_MOV: + case RC_OPCODE_MUL: + break; + case RC_OPCODE_COS: + case RC_OPCODE_EX2: + case RC_OPCODE_LG2: + case RC_OPCODE_RCP: + case RC_OPCODE_RSQ: + case RC_OPCODE_SIN: + *istranscendent = 1; + *needalpha = 1; + break; + case RC_OPCODE_DP4: + *needalpha = 1; + /* fall through */ + case RC_OPCODE_DP3: + *needrgb = 1; + break; + default: + break; + } +} + +static void src_uses(struct rc_src_register src, unsigned int * rgb, + unsigned int * alpha) +{ + int j; + for(j = 0; j < 4; ++j) { + unsigned int swz = GET_SWZ(src.Swizzle, j); + if (swz < 3) + *rgb = 1; + else if (swz < 4) + *alpha = 1; + } +} + +/** + * Fill the given ALU instruction's opcodes and source operands into the given pair, + * if possible. + */ +static void set_pair_instruction(struct r300_fragment_program_compiler *c, + struct rc_pair_instruction * pair, + struct rc_sub_instruction * inst) +{ + int needrgb, needalpha, istranscendent; + const struct rc_opcode_info * opcode; + int i; + + memset(pair, 0, sizeof(struct rc_pair_instruction)); + + classify_instruction(inst, &needrgb, &needalpha, &istranscendent); + + if (needrgb) { + if (istranscendent) + pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA; + else + pair->RGB.Opcode = inst->Opcode; + if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) + pair->RGB.Saturate = 1; + } + if (needalpha) { + pair->Alpha.Opcode = inst->Opcode; + if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) + pair->Alpha.Saturate = 1; + } + + opcode = rc_get_opcode_info(inst->Opcode); + + /* Presubtract handling: + * We need to make sure that the values used by the presubtract + * operation end up in src0 or src1. */ + if(inst->PreSub.Opcode != RC_PRESUB_NONE) { + /* rc_pair_alloc_source() will fill in data for + * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */ + int j; + for(j = 0; j < 3; j++) { + int src_regs; + if(inst->SrcReg[j].File != RC_FILE_PRESUB) + continue; + + src_regs = rc_presubtract_src_reg_count( + inst->PreSub.Opcode); + for(i = 0; i < src_regs; i++) { + unsigned int rgb = 0; + unsigned int alpha = 0; + src_uses(inst->SrcReg[j], &rgb, &alpha); + if(rgb) { + pair->RGB.Src[i].File = + inst->PreSub.SrcReg[i].File; + pair->RGB.Src[i].Index = + inst->PreSub.SrcReg[i].Index; + pair->RGB.Src[i].Used = 1; + } + if(alpha) { + pair->Alpha.Src[i].File = + inst->PreSub.SrcReg[i].File; + pair->Alpha.Src[i].Index = + inst->PreSub.SrcReg[i].Index; + pair->Alpha.Src[i].Used = 1; + } + } + } + } + + for(i = 0; i < opcode->NumSrcRegs; ++i) { + int source; + if (needrgb && !istranscendent) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + unsigned int srcmask = 0; + int j; + /* We don't care about the alpha channel here. We only + * want the part of the swizzle that writes to rgb, + * since we are creating an rgb instruction. */ + for(j = 0; j < 3; ++j) { + unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); + + if (swz < RC_SWIZZLE_W) + srcrgb = 1; + else if (swz == RC_SWIZZLE_W) + srcalpha = 1; + + if (swz < RC_SWIZZLE_UNUSED) + srcmask |= 1 << j; + } + source = rc_pair_alloc_source(pair, srcrgb, srcalpha, + inst->SrcReg[i].File, inst->SrcReg[i].Index); + if (source < 0) { + rc_error(&c->Base, "Failed to translate " + "rgb instruction.\n"); + return; + } + pair->RGB.Arg[i].Source = source; + pair->RGB.Arg[i].Swizzle = + rc_init_swizzle(inst->SrcReg[i].Swizzle, 3); + pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z)); + } + if (needalpha) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, istranscendent ? 0 : 3); + if (swz < 3) + srcrgb = 1; + else if (swz < 4) + srcalpha = 1; + source = rc_pair_alloc_source(pair, srcrgb, srcalpha, + inst->SrcReg[i].File, inst->SrcReg[i].Index); + if (source < 0) { + rc_error(&c->Base, "Failed to translate " + "alpha instruction.\n"); + return; + } + pair->Alpha.Arg[i].Source = source; + pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1); + pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W); + } + } + + /* Destination handling */ + if (inst->DstReg.File == RC_FILE_OUTPUT) { + if (inst->DstReg.Index == c->OutputDepth) { + pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } else { + for (i = 0; i < 4; i++) { + if (inst->DstReg.Index == c->OutputColor[i]) { + pair->RGB.Target = i; + pair->Alpha.Target = i; + pair->RGB.OutputWriteMask |= + inst->DstReg.WriteMask & RC_MASK_XYZ; + pair->Alpha.OutputWriteMask |= + GET_BIT(inst->DstReg.WriteMask, 3); + break; + } + } + } + } else { + if (needrgb) { + pair->RGB.DestIndex = inst->DstReg.Index; + pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; + } + + if (needalpha) { + pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3); + if (pair->Alpha.WriteMask) { + pair->Alpha.DestIndex = inst->DstReg.Index; + } + } + } + + if (inst->WriteALUResult) { + pair->WriteALUResult = inst->WriteALUResult; + pair->ALUResultCompare = inst->ALUResultCompare; + } +} + + +static void check_opcode_support(struct r300_fragment_program_compiler *c, + struct rc_sub_instruction *inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + + if (opcode->HasDstReg) { + if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) { + rc_error(&c->Base, "Fragment program does not support signed Saturate.\n"); + return; + } + } + + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->SrcReg[i].RelAddr) { + rc_error(&c->Base, "Fragment program does not support relative addressing " + " of source operands.\n"); + return; + } + } +} + + +/** + * Translate all ALU instructions into corresponding pair instructions, + * performing no other changes. + */ +void rc_pair_translate(struct radeon_compiler *cc, void *user) +{ + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; + + for(struct rc_instruction * inst = c->Base.Program.Instructions.Next; + inst != &c->Base.Program.Instructions; + inst = inst->Next) { + const struct rc_opcode_info * opcode; + struct rc_sub_instruction copy; + + if (inst->Type != RC_INSTRUCTION_NORMAL) + continue; + + opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL) + continue; + + copy = inst->U.I; + + check_opcode_support(c, ©); + + final_rewrite(©); + inst->Type = RC_INSTRUCTION_PAIR; + set_pair_instruction(c, &inst->U.P, ©); + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program.c b/src/gallium/drivers/r300/compiler/radeon_program.c new file mode 100644 index 00000000000..fe5756ebc45 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program.c @@ -0,0 +1,225 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program.h" + +#include <stdio.h> + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + + +/** + * Transform the given clause in the following way: + * 1. Replace it with an empty clause + * 2. For every instruction in the original clause, try the given + * transformations in order. + * 3. If one of the transformations returns GL_TRUE, assume that it + * has emitted the appropriate instruction(s) into the new clause; + * otherwise, copy the instruction verbatim. + * + * \note The transformation is currently not recursive; in other words, + * instructions emitted by transformations are not transformed. + * + * \note The transform is called 'local' because it can only look at + * one instruction at a time. + */ +void rc_local_transform( + struct radeon_compiler * c, + void *user) +{ + struct radeon_program_transformation *transformations = + (struct radeon_program_transformation*)user; + struct rc_instruction * inst = c->Program.Instructions.Next; + + while(inst != &c->Program.Instructions) { + struct rc_instruction * current = inst; + int i; + + inst = inst->Next; + + for(i = 0; transformations[i].function; ++i) { + struct radeon_program_transformation* t = transformations + i; + + if (t->function(c, current, t->userData)) + break; + } + } +} + +struct get_used_temporaries_data { + unsigned char * Used; + unsigned int UsedLength; +}; + +static void get_used_temporaries_cb( + void * userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct get_used_temporaries_data * d = userdata; + + if (file != RC_FILE_TEMPORARY) + return; + + if (index >= d->UsedLength) + return; + + d->Used[index] |= mask; +} + +/** + * This function fills in the parameter 'used' with a writemask that + * represent which components of each temporary register are used by the + * program. This is meant to be combined with rc_find_free_temporary_list as a + * more efficient version of rc_find_free_temporary. + * @param used The function does not initialize this parameter. + */ +void rc_get_used_temporaries( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length) +{ + struct rc_instruction * inst; + struct get_used_temporaries_data d; + d.Used = used; + d.UsedLength = used_length; + + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + + rc_for_all_reads_mask(inst, get_used_temporaries_cb, &d); + rc_for_all_writes_mask(inst, get_used_temporaries_cb, &d); + } +} + +/* Search a list of used temporaries for a free one + * \sa rc_get_used_temporaries + * @note If this functions finds a free temporary, it will mark it as used + * in the used temporary list (param 'used') + * @param used list of used temporaries + * @param used_length number of items in param 'used' + * @param mask which components must be free in the temporary index that is + * returned. + * @return -1 If there are no more free temporaries, otherwise the index of + * a temporary register where the components specified in param 'mask' are + * not being used. + */ +int rc_find_free_temporary_list( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length, + unsigned int mask) +{ + int i; + for(i = 0; i < used_length; i++) { + if ((~used[i] & mask) == mask) { + used[i] |= mask; + return i; + } + } + return -1; +} + +unsigned int rc_find_free_temporary(struct radeon_compiler * c) +{ + unsigned char used[RC_REGISTER_MAX_INDEX]; + int free; + + memset(used, 0, sizeof(used)); + + rc_get_used_temporaries(c, used, RC_REGISTER_MAX_INDEX); + + free = rc_find_free_temporary_list(c, used, RC_REGISTER_MAX_INDEX, + RC_MASK_XYZW); + if (free < 0) { + rc_error(c, "Ran out of temporary registers\n"); + return 0; + } + return free; +} + + +struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c) +{ + struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction)); + + memset(inst, 0, sizeof(struct rc_instruction)); + + inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW; + + return inst; +} + +void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst) +{ + inst->Prev = after; + inst->Next = after->Next; + + inst->Prev->Next = inst; + inst->Next->Prev = inst; +} + +struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after) +{ + struct rc_instruction * inst = rc_alloc_instruction(c); + + rc_insert_instruction(after, inst); + + return inst; +} + +void rc_remove_instruction(struct rc_instruction * inst) +{ + inst->Prev->Next = inst->Next; + inst->Next->Prev = inst->Prev; +} + +/** + * Return the number of instructions in the program. + */ +unsigned int rc_recompute_ips(struct radeon_compiler * c) +{ + unsigned int ip = 0; + struct rc_instruction * inst; + + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + inst->IP = ip++; + } + + c->Program.Instructions.IP = 0xcafedead; + + return ip; +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program.h b/src/gallium/drivers/r300/compiler/radeon_program.h new file mode 100644 index 00000000000..b899eccbf53 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program.h @@ -0,0 +1,206 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_H_ +#define __RADEON_PROGRAM_H_ + +#include <stdint.h> +#include <string.h> + +#include "radeon_opcodes.h" +#include "radeon_code.h" +#include "radeon_program_constants.h" +#include "radeon_program_pair.h" + +struct radeon_compiler; + +struct rc_src_register { + unsigned int File:4; + + /** Negative values may be used for relative addressing. */ + signed int Index:(RC_REGISTER_INDEX_BITS+1); + unsigned int RelAddr:1; + + unsigned int Swizzle:12; + + /** Take the component-wise absolute value */ + unsigned int Abs:1; + + /** Post-Abs negation. */ + unsigned int Negate:4; +}; + +struct rc_dst_register { + unsigned int File:3; + unsigned int Index:RC_REGISTER_INDEX_BITS; + unsigned int WriteMask:4; +}; + +struct rc_presub_instruction { + rc_presubtract_op Opcode; + struct rc_src_register SrcReg[2]; +}; + +/** + * Instructions are maintained by the compiler in a doubly linked list + * of these structures. + * + * This instruction format is intended to be expanded for hardware-specific + * trickery. At different stages of compilation, a different set of + * instruction types may be valid. + */ +struct rc_sub_instruction { + struct rc_src_register SrcReg[3]; + struct rc_dst_register DstReg; + + /** + * Opcode of this instruction, according to \ref rc_opcode enums. + */ + unsigned int Opcode:8; + + /** + * Saturate each value of the result to the range [0,1] or [-1,1], + * according to \ref rc_saturate_mode enums. + */ + unsigned int SaturateMode:2; + + /** + * Writing to the special register RC_SPECIAL_ALU_RESULT + */ + /*@{*/ + unsigned int WriteALUResult:2; + unsigned int ALUResultCompare:3; + /*@}*/ + + /** + * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions. + */ + /*@{*/ + /** Source texture unit. */ + unsigned int TexSrcUnit:5; + + /** Source texture target, one of the \ref rc_texture_target enums */ + unsigned int TexSrcTarget:3; + + /** True if tex instruction should do shadow comparison */ + unsigned int TexShadow:1; + + /**R500 Only. How to swizzle the result of a TEX lookup*/ + unsigned int TexSwizzle:12; + /*@}*/ + + /** This holds information about the presubtract operation used by + * this instruction. */ + struct rc_presub_instruction PreSub; +}; + +typedef enum { + RC_INSTRUCTION_NORMAL = 0, + RC_INSTRUCTION_PAIR +} rc_instruction_type; + +struct rc_instruction { + struct rc_instruction * Prev; + struct rc_instruction * Next; + + rc_instruction_type Type; + union { + struct rc_sub_instruction I; + struct rc_pair_instruction P; + } U; + + /** + * Warning: IPs are not stable. If you want to use them, + * you need to recompute them at the beginning of each pass + * using \ref rc_recompute_ips + */ + unsigned int IP; +}; + +struct rc_program { + /** + * Instructions.Next points to the first instruction, + * Instructions.Prev points to the last instruction. + */ + struct rc_instruction Instructions; + + /* Long term, we should probably remove InputsRead & OutputsWritten, + * since updating dependent state can be fragile, and they aren't + * actually used very often. */ + uint32_t InputsRead; + uint32_t OutputsWritten; + uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */ + + struct rc_constant_list Constants; +}; + +/** + * A transformation that can be passed to \ref rc_local_transform. + * + * The function will be called once for each instruction. + * It has to either emit the appropriate transformed code for the instruction + * and return true, or return false if it doesn't understand the + * instruction. + * + * The function gets passed the userData as last parameter. + */ +struct radeon_program_transformation { + int (*function)( + struct radeon_compiler*, + struct rc_instruction*, + void*); + void *userData; +}; + +void rc_local_transform( + struct radeon_compiler *c, + void *user); + +void rc_get_used_temporaries( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length); + +int rc_find_free_temporary_list( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length, + unsigned int mask); + +unsigned int rc_find_free_temporary(struct radeon_compiler * c); + +struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c); +struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after); +void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst); +void rc_remove_instruction(struct rc_instruction * inst); + +unsigned int rc_recompute_ips(struct radeon_compiler * c); + +void rc_print_program(const struct rc_program *prog); + +rc_swizzle rc_mask_to_swizzle(unsigned int mask); +#endif diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c new file mode 100644 index 00000000000..e273bc40c26 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c @@ -0,0 +1,1154 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * + * Shareable transformations that transform "special" ALU instructions + * into ALU instructions that are supported by hardware. + * + */ + +#include "radeon_program_alu.h" + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" + + +static struct rc_instruction *emit1( + struct radeon_compiler * c, struct rc_instruction * after, + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg) +{ + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg; + return fpi; +} + +static struct rc_instruction *emit2( + struct radeon_compiler * c, struct rc_instruction * after, + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg0, struct rc_src_register SrcReg1) +{ + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg0; + fpi->U.I.SrcReg[1] = SrcReg1; + return fpi; +} + +static struct rc_instruction *emit3( + struct radeon_compiler * c, struct rc_instruction * after, + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg0, struct rc_src_register SrcReg1, + struct rc_src_register SrcReg2) +{ + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg0; + fpi->U.I.SrcReg[1] = SrcReg1; + fpi->U.I.SrcReg[2] = SrcReg2; + return fpi; +} + +static struct rc_dst_register dstregtmpmask(int index, int mask) +{ + struct rc_dst_register dst = {0, 0, 0}; + dst.File = RC_FILE_TEMPORARY; + dst.Index = index; + dst.WriteMask = mask; + return dst; +} + +static const struct rc_src_register builtin_zero = { + .File = RC_FILE_NONE, + .Index = 0, + .Swizzle = RC_SWIZZLE_0000 +}; +static const struct rc_src_register builtin_one = { + .File = RC_FILE_NONE, + .Index = 0, + .Swizzle = RC_SWIZZLE_1111 +}; +static const struct rc_src_register srcreg_undefined = { + .File = RC_FILE_NONE, + .Index = 0, + .Swizzle = RC_SWIZZLE_XYZW +}; + +static struct rc_src_register srcreg(int file, int index) +{ + struct rc_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + return src; +} + +static struct rc_src_register srcregswz(int file, int index, int swz) +{ + struct rc_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + src.Swizzle = swz; + return src; +} + +static struct rc_src_register absolute(struct rc_src_register reg) +{ + struct rc_src_register newreg = reg; + newreg.Abs = 1; + newreg.Negate = RC_MASK_NONE; + return newreg; +} + +static struct rc_src_register negate(struct rc_src_register reg) +{ + struct rc_src_register newreg = reg; + newreg.Negate = newreg.Negate ^ RC_MASK_XYZW; + return newreg; +} + +static struct rc_src_register swizzle(struct rc_src_register reg, + rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w) +{ + struct rc_src_register swizzled = reg; + swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w); + return swizzled; +} + +static struct rc_src_register swizzle_smear(struct rc_src_register reg, + rc_swizzle x) +{ + return swizzle(reg, x, x, x, x); +} + +static struct rc_src_register swizzle_xxxx(struct rc_src_register reg) +{ + return swizzle_smear(reg, RC_SWIZZLE_X); +} + +static struct rc_src_register swizzle_yyyy(struct rc_src_register reg) +{ + return swizzle_smear(reg, RC_SWIZZLE_Y); +} + +static struct rc_src_register swizzle_zzzz(struct rc_src_register reg) +{ + return swizzle_smear(reg, RC_SWIZZLE_Z); +} + +static struct rc_src_register swizzle_wwww(struct rc_src_register reg) +{ + return swizzle_smear(reg, RC_SWIZZLE_W); +} + +static int is_dst_safe_to_reuse(struct rc_instruction *inst) +{ + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + assert(info->HasDstReg); + + if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) + return 0; + + for (i = 0; i < info->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index) + return 0; + } + + return 1; +} + +static struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + unsigned tmp; + + if (is_dst_safe_to_reuse(inst)) + tmp = inst->U.I.DstReg.Index; + else + tmp = rc_find_free_temporary(c); + + return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask); +} + +static void transform_ABS(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_src_register src = inst->U.I.SrcReg[0]; + src.Abs = 1; + src.Negate = RC_MASK_NONE; + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src); + rc_remove_instruction(inst); +} + +static void transform_CEIL(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* Assuming: + * ceil(x) = -floor(-x) + * + * After inlining floor: + * ceil(x) = -(-x-frac(-x)) + * + * After simplification: + * ceil(x) = x+frac(-x) + */ + + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0])); + emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index)); + rc_remove_instruction(inst); +} + +static void transform_CLAMP(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + /* CLAMP dst, src, min, max + * into: + * MIN tmp, src, max + * MAX dst, tmp, min + */ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst, + inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]); + emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]); + rc_remove_instruction(inst); +} + +static void transform_DP2(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + struct rc_src_register src1 = inst->U.I.SrcReg[1]; + src0.Negate &= ~(RC_MASK_Z | RC_MASK_W); + src0.Swizzle &= ~(63 << (3 * 2)); + src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); + src1.Negate &= ~(RC_MASK_Z | RC_MASK_W); + src1.Swizzle &= ~(63 << (3 * 2)); + src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); + emit2(c, inst->Prev, RC_OPCODE_DP3, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); + rc_remove_instruction(inst); +} + +static void transform_DPH(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + src0.Negate &= ~RC_MASK_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3); + emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]); + rc_remove_instruction(inst); +} + +/** + * [1, src0.y*src1.y, src0.z, src1.w] + * So basically MUL with lotsa swizzling. + */ +static void transform_DST(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg, + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W)); + rc_remove_instruction(inst); +} + +static void transform_FLR(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]); + emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, + inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); + rc_remove_instruction(inst); +} + +/** + * Definition of LIT (from ARB_fragment_program): + * + * tmp = VectorLoad(op0); + * if (tmp.x < 0) tmp.x = 0; + * if (tmp.y < 0) tmp.y = 0; + * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); + * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; + * result.x = 1.0; + * result.y = tmp.x; + * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; + * result.w = 1.0; + * + * The longest path of computation is the one leading to result.z, + * consisting of 5 operations. This implementation of LIT takes + * 5 slots, if the subsequent optimization passes are clever enough + * to pair instructions correctly. + */ +static void transform_LIT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + unsigned int constant; + unsigned int constant_swizzle; + unsigned int temp; + struct rc_src_register srctemp; + + constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle); + + if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { + struct rc_instruction * inst_mov; + + inst_mov = emit1(c, inst, + RC_OPCODE_MOV, 0, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c))); + + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + } + + temp = inst->U.I.DstReg.Index; + srctemp = srcreg(RC_FILE_TEMPORARY, temp); + + /* tmp.x = max(0.0, Src.x); */ + /* tmp.y = max(0.0, Src.y); */ + /* tmp.w = clamp(Src.z, -128+eps, 128-eps); */ + emit2(c, inst->Prev, RC_OPCODE_MAX, 0, + dstregtmpmask(temp, RC_MASK_XYW), + inst->U.I.SrcReg[0], + swizzle(srcreg(RC_FILE_CONSTANT, constant), + RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3)); + emit2(c, inst->Prev, RC_OPCODE_MIN, 0, + dstregtmpmask(temp, RC_MASK_Z), + swizzle_wwww(srctemp), + negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle))); + + /* tmp.w = Pow(tmp.y, tmp.w) */ + emit1(c, inst->Prev, RC_OPCODE_LG2, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle_yyyy(srctemp)); + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle_wwww(srctemp), + swizzle_zzzz(srctemp)); + emit1(c, inst->Prev, RC_OPCODE_EX2, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle_wwww(srctemp)); + + /* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */ + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, + dstregtmpmask(temp, RC_MASK_Z), + negate(swizzle_xxxx(srctemp)), + swizzle_wwww(srctemp), + builtin_zero); + + /* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */ + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, + dstregtmpmask(temp, RC_MASK_XYW), + swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE)); + + rc_remove_instruction(inst); +} + +static void transform_LRP(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, + dst, + inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2])); + emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, + inst->U.I.DstReg, + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]); + + rc_remove_instruction(inst); +} + +static void transform_POW(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register tempdst = try_to_reuse_dst(c, inst); + struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index); + tempdst.WriteMask = RC_MASK_W; + tempsrc.Swizzle = RC_SWIZZLE_WWWW; + + emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0])); + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1])); + emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc); + + rc_remove_instruction(inst); +} + +static void transform_RSQ(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]); +} + +static void transform_SEQ(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one); + + rc_remove_instruction(inst); +} + +static void transform_SFL(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero); + rc_remove_instruction(inst); +} + +static void transform_SGE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); + + rc_remove_instruction(inst); +} + +static void transform_SGT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); + + rc_remove_instruction(inst); +} + +static void transform_SLE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); + + rc_remove_instruction(inst); +} + +static void transform_SLT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); + + rc_remove_instruction(inst); +} + +static void transform_SNE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero); + + rc_remove_instruction(inst); +} + +static void transform_SSG(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* result = sign(x) + * + * CMP tmp0, -x, 1, 0 + * CMP tmp1, x, 1, 0 + * ADD result, tmp0, -tmp1; + */ + struct rc_dst_register dst0; + unsigned tmp1; + + /* 0 < x */ + dst0 = try_to_reuse_dst(c, inst); + emit3(c, inst->Prev, RC_OPCODE_CMP, 0, + dst0, + negate(inst->U.I.SrcReg[0]), + builtin_one, + builtin_zero); + + /* x < 0 */ + tmp1 = rc_find_free_temporary(c); + emit3(c, inst->Prev, RC_OPCODE_CMP, 0, + dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + builtin_one, + builtin_zero); + + /* Either both are zero, or one of them is one and the other is zero. */ + /* result = tmp0 - tmp1 */ + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst0.Index), + negate(srcreg(RC_FILE_TEMPORARY, tmp1))); + + rc_remove_instruction(inst); +} + +static void transform_SUB(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + inst->U.I.Opcode = RC_OPCODE_ADD; + inst->U.I.SrcReg[1] = negate(inst->U.I.SrcReg[1]); +} + +static void transform_SWZ(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + inst->U.I.Opcode = RC_OPCODE_MOV; +} + +static void transform_XPD(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst, + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W)); + emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), + negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); + + rc_remove_instruction(inst); +} + + +/** + * Can be used as a transformation for @ref radeonClauseLocalTransform, + * no userData necessary. + * + * Eliminates the following ALU instructions: + * ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD + * using: + * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP + * + * Transforms RSQ to Radeon's native RSQ by explicitly setting + * absolute value. + * + * @note should be applicable to R300 and R500 fragment programs. + */ +int radeonTransformALU( + struct radeon_compiler * c, + struct rc_instruction* inst, + void* unused) +{ + switch(inst->U.I.Opcode) { + case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; + case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; + case RC_OPCODE_DP2: transform_DP2(c, inst); return 1; + case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; + case RC_OPCODE_DST: transform_DST(c, inst); return 1; + case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; + case RC_OPCODE_LIT: transform_LIT(c, inst); return 1; + case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; + case RC_OPCODE_POW: transform_POW(c, inst); return 1; + case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1; + case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1; + case RC_OPCODE_SFL: transform_SFL(c, inst); return 1; + case RC_OPCODE_SGE: transform_SGE(c, inst); return 1; + case RC_OPCODE_SGT: transform_SGT(c, inst); return 1; + case RC_OPCODE_SLE: transform_SLE(c, inst); return 1; + case RC_OPCODE_SLT: transform_SLT(c, inst); return 1; + case RC_OPCODE_SNE: transform_SNE(c, inst); return 1; + case RC_OPCODE_SSG: transform_SSG(c, inst); return 1; + case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; + case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; + case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; + default: + return 0; + } +} + + +static void transform_r300_vertex_ABS(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* Note: r500 can take absolute values, but r300 cannot. */ + inst->U.I.Opcode = RC_OPCODE_MAX; + inst->U.I.SrcReg[1] = inst->U.I.SrcReg[0]; + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; +} + +static void transform_r300_vertex_CMP(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* There is no decent CMP available, so let's rig one up. + * CMP is defined as dst = src0 < 0.0 ? src1 : src2 + * The following sequence consumes zero to two temps and two extra slots + * (the second temp and the second slot is consumed by transform_LRP), + * but should be equivalent: + * + * SLT tmp0, src0, 0.0 + * LRP dst, tmp0, src1, src2 + * + * Yes, I know, I'm a mad scientist. ~ C. & M. */ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + + /* SLT tmp0, src0, 0.0 */ + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dst, + inst->U.I.SrcReg[0], builtin_zero); + + /* LRP dst, tmp0, src1, src2 */ + transform_LRP(c, + emit3(c, inst->Prev, RC_OPCODE_LRP, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); + + rc_remove_instruction(inst); +} + +static void transform_r300_vertex_DP2(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_instruction *next_inst = inst->Next; + transform_DP2(c, inst); + next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4; +} + +static void transform_r300_vertex_DP3(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + struct rc_src_register src1 = inst->U.I.SrcReg[1]; + src0.Negate &= ~RC_MASK_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + src1.Negate &= ~RC_MASK_W; + src1.Swizzle &= ~(7 << (3 * 3)); + src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); + rc_remove_instruction(inst); +} + +static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + unsigned constant_swizzle; + int constant = rc_constants_add_immediate_scalar(&c->Program.Constants, + 0.0000000000000000001, + &constant_swizzle); + + /* MOV dst, src */ + dst.WriteMask = RC_MASK_XYZW; + emit1(c, inst->Prev, RC_OPCODE_MOV, 0, + dst, + inst->U.I.SrcReg[0]); + + /* MAX dst.y, src, 0.00...001 */ + emit2(c, inst->Prev, RC_OPCODE_MAX, 0, + dstregtmpmask(dst.Index, RC_MASK_Y), + srcreg(RC_FILE_TEMPORARY, dst.Index), + srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); + + inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index); +} + +static void transform_r300_vertex_SEQ(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + /* x = y <==> x >= y && y >= x */ + int tmp = rc_find_free_temporary(c); + + /* x <= y */ + emit2(c, inst->Prev, RC_OPCODE_SGE, 0, + dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + inst->U.I.SrcReg[1]); + + /* y <= x */ + emit2(c, inst->Prev, RC_OPCODE_SGE, 0, + inst->U.I.DstReg, + inst->U.I.SrcReg[1], + inst->U.I.SrcReg[0]); + + /* x && y = x * y */ + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tmp), + srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index)); + + rc_remove_instruction(inst); +} + +static void transform_r300_vertex_SNE(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + /* x != y <==> x < y || y < x */ + int tmp = rc_find_free_temporary(c); + + /* x < y */ + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + inst->U.I.SrcReg[1]); + + /* y < x */ + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + inst->U.I.DstReg, + inst->U.I.SrcReg[1], + inst->U.I.SrcReg[0]); + + /* x || y = max(x, y) */ + emit2(c, inst->Prev, RC_OPCODE_MAX, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tmp), + srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index)); + + rc_remove_instruction(inst); +} + +static void transform_r300_vertex_SGT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* x > y <==> -x < -y */ + inst->U.I.Opcode = RC_OPCODE_SLT; + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; +} + +static void transform_r300_vertex_SLE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* x <= y <==> -x >= -y */ + inst->U.I.Opcode = RC_OPCODE_SGE; + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; +} + +static void transform_r300_vertex_SSG(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* result = sign(x) + * + * SLT tmp0, 0, x; + * SLT tmp1, x, 0; + * ADD result, tmp0, -tmp1; + */ + struct rc_dst_register dst0 = try_to_reuse_dst(c, inst); + unsigned tmp1; + + /* 0 < x */ + dst0 = try_to_reuse_dst(c, inst); + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dst0, + builtin_zero, + inst->U.I.SrcReg[0]); + + /* x < 0 */ + tmp1 = rc_find_free_temporary(c); + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + builtin_zero); + + /* Either both are zero, or one of them is one and the other is zero. */ + /* result = tmp0 - tmp1 */ + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst0.Index), + negate(srcreg(RC_FILE_TEMPORARY, tmp1))); + + rc_remove_instruction(inst); +} + +/** + * For use with rc_local_transform, this transforms non-native ALU + * instructions of the r300 up to r500 vertex engine. + */ +int r300_transform_vertex_alu( + struct radeon_compiler * c, + struct rc_instruction* inst, + void* unused) +{ + switch(inst->U.I.Opcode) { + case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; + case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; + case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; + case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1; + case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1; + case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; + case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; + case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1; + case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; + case RC_OPCODE_SEQ: + if (!c->is_r500) { + transform_r300_vertex_SEQ(c, inst); + return 1; + } + return 0; + case RC_OPCODE_SFL: transform_SFL(c, inst); return 1; + case RC_OPCODE_SGT: transform_r300_vertex_SGT(c, inst); return 1; + case RC_OPCODE_SLE: transform_r300_vertex_SLE(c, inst); return 1; + case RC_OPCODE_SNE: + if (!c->is_r500) { + transform_r300_vertex_SNE(c, inst); + return 1; + } + return 0; + case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1; + case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; + case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; + case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; + default: + return 0; + } +} + +static void sincos_constants(struct radeon_compiler* c, unsigned int *constants) +{ + static const float SinCosConsts[2][4] = { + { + 1.273239545, /* 4/PI */ + -0.405284735, /* -4/(PI*PI) */ + 3.141592654, /* PI */ + 0.2225 /* weight */ + }, + { + 0.75, + 0.5, + 0.159154943, /* 1/(2*PI) */ + 6.283185307 /* 2*PI */ + } + }; + int i; + + for(i = 0; i < 2; ++i) + constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]); +} + +/** + * Approximate sin(x), where x is clamped to (-pi/2, pi/2). + * + * MUL tmp.xy, src, { 4/PI, -4/(PI^2) } + * MAD tmp.x, tmp.y, |src|, tmp.x + * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x + * MAD dest, tmp.y, weight, tmp.x + */ +static void sin_approx( + struct radeon_compiler* c, struct rc_instruction * inst, + struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants) +{ + unsigned int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY), + swizzle_xxxx(src), + srcreg(RC_FILE_CONSTANT, constants[0])); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X), + swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), + absolute(swizzle_xxxx(src)), + swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y), + swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)), + absolute(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))), + negate(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)))); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dst, + swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), + swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[0])), + swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))); +} + +/** + * Translate the trigonometric functions COS, SIN, and SCS + * using only the basic instructions + * MOV, ADD, MUL, MAD, FRC + */ +int r300_transform_trig_simple(struct radeon_compiler* c, + struct rc_instruction* inst, + void* unused) +{ + unsigned int constants[2]; + unsigned int tempreg; + + if (inst->U.I.Opcode != RC_OPCODE_COS && + inst->U.I.Opcode != RC_OPCODE_SIN && + inst->U.I.Opcode != RC_OPCODE_SCS) + return 0; + + tempreg = rc_find_free_temporary(c); + + sincos_constants(c, constants); + + if (inst->U.I.Opcode == RC_OPCODE_COS) { + /* MAD tmp.x, src, 1/(2*PI), 0.75 */ + /* FRC tmp.x, tmp.x */ + /* MAD tmp.z, tmp.x, 2*PI, -PI */ + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_xxxx(inst->U.I.SrcReg[0]), + swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), + swizzle_xxxx(srcreg(RC_FILE_CONSTANT, constants[1]))); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg))); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), + swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), + negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); + + sin_approx(c, inst, inst->U.I.DstReg, + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), + constants); + } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_xxxx(inst->U.I.SrcReg[0]), + swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), + swizzle_yyyy(srcreg(RC_FILE_CONSTANT, constants[1]))); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg))); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), + swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), + negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); + + sin_approx(c, inst, inst->U.I.DstReg, + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), + constants); + } else { + struct rc_dst_register dst; + + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), + swizzle_xxxx(inst->U.I.SrcReg[0]), + swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY), + srcreg(RC_FILE_TEMPORARY, tempreg)); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), + srcreg(RC_FILE_TEMPORARY, tempreg), + swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), + negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); + + dst = inst->U.I.DstReg; + + dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X; + sin_approx(c, inst, dst, + swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)), + constants); + + dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y; + sin_approx(c, inst, dst, + swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), + constants); + } + + rc_remove_instruction(inst); + + return 1; +} + +static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c, + struct rc_instruction *inst, + unsigned srctmp) +{ + if (inst->U.I.Opcode == RC_OPCODE_COS) { + emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { + emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, + inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } else if (inst->U.I.Opcode == RC_OPCODE_SCS) { + struct rc_dst_register moddst = inst->U.I.DstReg; + + if (inst->U.I.DstReg.WriteMask & RC_MASK_X) { + moddst.WriteMask = RC_MASK_X; + emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst, + srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } + if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) { + moddst.WriteMask = RC_MASK_Y; + emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst, + srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } + } + + rc_remove_instruction(inst); +} + + +/** + * Transform the trigonometric functions COS, SIN, and SCS + * to include pre-scaling by 1/(2*PI) and taking the fractional + * part, so that the input to COS and SIN is always in the range [0,1). + * SCS is replaced by one COS and one SIN instruction. + * + * @warning This transformation implicitly changes the semantics of SIN and COS! + */ +int radeonTransformTrigScale(struct radeon_compiler* c, + struct rc_instruction* inst, + void* unused) +{ + static const float RCP_2PI = 0.15915494309189535; + unsigned int temp; + unsigned int constant; + unsigned int constant_swizzle; + + if (inst->U.I.Opcode != RC_OPCODE_COS && + inst->U.I.Opcode != RC_OPCODE_SIN && + inst->U.I.Opcode != RC_OPCODE_SCS) + return 0; + + temp = rc_find_free_temporary(c); + constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle); + + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W), + swizzle_xxxx(inst->U.I.SrcReg[0]), + srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), + srcreg(RC_FILE_TEMPORARY, temp)); + + r300_transform_SIN_COS_SCS(c, inst, temp); + return 1; +} + +/** + * Transform the trigonometric functions COS, SIN, and SCS + * so that the input to COS and SIN is always in the range [-PI, PI]. + * SCS is replaced by one COS and one SIN instruction. + */ +int r300_transform_trig_scale_vertex(struct radeon_compiler *c, + struct rc_instruction *inst, + void *unused) +{ + static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979}; + unsigned int temp; + unsigned int constant; + + if (inst->U.I.Opcode != RC_OPCODE_COS && + inst->U.I.Opcode != RC_OPCODE_SIN && + inst->U.I.Opcode != RC_OPCODE_SCS) + return 0; + + /* Repeat x in the range [-PI, PI]: + * + * repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI + */ + + temp = rc_find_free_temporary(c); + constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons); + + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W), + swizzle_xxxx(inst->U.I.SrcReg[0]), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), + srcreg(RC_FILE_TEMPORARY, temp)); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W), + srcreg(RC_FILE_TEMPORARY, temp), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW)); + + r300_transform_SIN_COS_SCS(c, inst, temp); + return 1; +} + +/** + * Rewrite DDX/DDY instructions to properly work with r5xx shaders. + * The r5xx MDH/MDV instruction provides per-quad partial derivatives. + * It takes the form A*B+C. A and C are set by setting src0. B should be -1. + * + * @warning This explicitly changes the form of DDX and DDY! + */ + +int radeonTransformDeriv(struct radeon_compiler* c, + struct rc_instruction* inst, + void* unused) +{ + if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY) + return 0; + + inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111; + inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW; + + return 1; +} + +/** + * IF Temp[0].x -\ + * KILP - > KIL -abs(Temp[0].x) + * ENDIF -/ + * + * This needs to be done in its own pass, because it modifies the instructions + * before and after KILP. + */ +void rc_transform_KILP(struct radeon_compiler * c, void *user) +{ + struct rc_instruction * inst; + for (inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + + if (inst->U.I.Opcode != RC_OPCODE_KILP) + continue; + + inst->U.I.Opcode = RC_OPCODE_KIL; + + if (inst->Prev->U.I.Opcode != RC_OPCODE_IF + || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) { + inst->U.I.SrcReg[0] = negate(builtin_one); + } else { + + inst->U.I.SrcReg[0] = + negate(absolute(inst->Prev->U.I.SrcReg[0])); + /* Remove IF */ + rc_remove_instruction(inst->Prev); + /* Remove ENDIF */ + rc_remove_instruction(inst->Next); + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.h b/src/gallium/drivers/r300/compiler/radeon_program_alu.h new file mode 100644 index 00000000000..b5f361e624f --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.h @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_ALU_H_ +#define __RADEON_PROGRAM_ALU_H_ + +#include "radeon_program.h" + +int radeonTransformALU( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +int r300_transform_vertex_alu( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +int r300_transform_trig_simple( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +int radeonTransformTrigScale( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +int r300_transform_trig_scale_vertex( + struct radeon_compiler *c, + struct rc_instruction *inst, + void*); + +int radeonTransformDeriv( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +void rc_transform_KILP(struct radeon_compiler * c, + void *user); + +#endif /* __RADEON_PROGRAM_ALU_H_ */ diff --git a/src/gallium/drivers/r300/compiler/radeon_program_constants.h b/src/gallium/drivers/r300/compiler/radeon_program_constants.h new file mode 100644 index 00000000000..24577333450 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_constants.h @@ -0,0 +1,190 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_PROGRAM_CONSTANTS_H +#define RADEON_PROGRAM_CONSTANTS_H + +typedef enum { + RC_SATURATE_NONE = 0, + RC_SATURATE_ZERO_ONE, + RC_SATURATE_MINUS_PLUS_ONE +} rc_saturate_mode; + +typedef enum { + RC_TEXTURE_2D_ARRAY, + RC_TEXTURE_1D_ARRAY, + RC_TEXTURE_CUBE, + RC_TEXTURE_3D, + RC_TEXTURE_RECT, + RC_TEXTURE_2D, + RC_TEXTURE_1D +} rc_texture_target; + +typedef enum { + /** + * Used to indicate unused register descriptions and + * source register that use a constant swizzle. + */ + RC_FILE_NONE = 0, + RC_FILE_TEMPORARY, + + /** + * Input register. + * + * \note The compiler attaches no implicit semantics to input registers. + * Fragment/vertex program specific semantics must be defined explicitly + * using the appropriate compiler interfaces. + */ + RC_FILE_INPUT, + + /** + * Output register. + * + * \note The compiler attaches no implicit semantics to input registers. + * Fragment/vertex program specific semantics must be defined explicitly + * using the appropriate compiler interfaces. + */ + RC_FILE_OUTPUT, + RC_FILE_ADDRESS, + + /** + * Indicates a constant from the \ref rc_constant_list . + */ + RC_FILE_CONSTANT, + + /** + * Indicates a special register, see RC_SPECIAL_xxx. + */ + RC_FILE_SPECIAL, + + /** + * Indicates this register should use the result of the presubtract + * operation. + */ + RC_FILE_PRESUB +} rc_register_file; + +enum { + /** R500 fragment program ALU result "register" */ + RC_SPECIAL_ALU_RESULT = 0, + + /** Must be last */ + RC_NUM_SPECIAL_REGISTERS +}; + +#define RC_REGISTER_INDEX_BITS 10 +#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS) + +typedef enum { + RC_SWIZZLE_X = 0, + RC_SWIZZLE_Y, + RC_SWIZZLE_Z, + RC_SWIZZLE_W, + RC_SWIZZLE_ZERO, + RC_SWIZZLE_ONE, + RC_SWIZZLE_HALF, + RC_SWIZZLE_UNUSED +} rc_swizzle; + +#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9)) +#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a)) +#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7) +#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1) +#define SET_SWZ(swz, idx, newv) \ + do { \ + (swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \ + } while(0) + +#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W) +#define RC_SWIZZLE_XYZ0 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO) +#define RC_SWIZZLE_XYZZ RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z) +#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X) +#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y) +#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z) +#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W) +#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO) +#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE) +#define RC_SWIZZLE_HHHH RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF) +#define RC_SWIZZLE_UUUU RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED) + +/** + * \name Bitmasks for components of vectors. + * + * Used for write masks, negation masks, etc. + */ +/*@{*/ +#define RC_MASK_NONE 0 +#define RC_MASK_X 1 +#define RC_MASK_Y 2 +#define RC_MASK_Z 4 +#define RC_MASK_W 8 +#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y) +#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z) +#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W) +#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W) +/*@}*/ + +typedef enum { + RC_ALURESULT_NONE = 0, + RC_ALURESULT_X, + RC_ALURESULT_W +} rc_write_aluresult; + +typedef enum { + RC_PRESUB_NONE = 0, + + /** 1 - 2 * src0 */ + RC_PRESUB_BIAS, + + /** src1 - src0 */ + RC_PRESUB_SUB, + + /** src1 + src0 */ + RC_PRESUB_ADD, + + /** 1 - src0 */ + RC_PRESUB_INV +} rc_presubtract_op; + +static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){ + switch(op){ + case RC_PRESUB_BIAS: + case RC_PRESUB_INV: + return 1; + case RC_PRESUB_ADD: + case RC_PRESUB_SUB: + return 2; + default: + return 0; + } +} + +#define RC_SOURCE_NONE 0x0 +#define RC_SOURCE_RGB 0x1 +#define RC_SOURCE_ALPHA 0x2 + +#endif /* RADEON_PROGRAM_CONSTANTS_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_program_pair.c b/src/gallium/drivers/r300/compiler/radeon_program_pair.c new file mode 100644 index 00000000000..52315957520 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_pair.c @@ -0,0 +1,239 @@ +/* + * Copyright (C) 2008-2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include "radeon_compiler_util.h" + +#include <stdlib.h> + +/** + * Return the source slot where we installed the given register access, + * or -1 if no slot was free anymore. + */ +int rc_pair_alloc_source(struct rc_pair_instruction *pair, + unsigned int rgb, unsigned int alpha, + rc_register_file file, unsigned int index) +{ + int candidate = -1; + int candidate_quality = -1; + unsigned int alpha_used = 0; + unsigned int rgb_used = 0; + int i; + + if ((!rgb && !alpha) || file == RC_FILE_NONE) + return 0; + + /* Make sure only one presubtract operation is used per instruction. */ + if (file == RC_FILE_PRESUB) { + if (rgb && pair->RGB.Src[RC_PAIR_PRESUB_SRC].Used + && index != pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { + return -1; + } + + if (alpha && pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Used + && index != pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { + return -1; + } + } + + for(i = 0; i < 3; ++i) { + int q = 0; + if (rgb) { + if (pair->RGB.Src[i].Used) { + if (pair->RGB.Src[i].File != file || + pair->RGB.Src[i].Index != index) { + rgb_used++; + continue; + } + q++; + } + } + if (alpha) { + if (pair->Alpha.Src[i].Used) { + if (pair->Alpha.Src[i].File != file || + pair->Alpha.Src[i].Index != index) { + alpha_used++; + continue; + } + q++; + } + } + if (q > candidate_quality) { + candidate_quality = q; + candidate = i; + } + } + + if (file == RC_FILE_PRESUB) { + candidate = RC_PAIR_PRESUB_SRC; + } else if (candidate < 0 || (rgb && rgb_used > 2) + || (alpha && alpha_used > 2)) { + return -1; + } + + /* candidate >= 0 */ + + if (rgb) { + pair->RGB.Src[candidate].Used = 1; + pair->RGB.Src[candidate].File = file; + pair->RGB.Src[candidate].Index = index; + if (candidate == RC_PAIR_PRESUB_SRC) { + /* For registers with the RC_FILE_PRESUB file, + * the index stores the presubtract op. */ + int src_regs = rc_presubtract_src_reg_count(index); + for(i = 0; i < src_regs; i++) { + pair->RGB.Src[i].Used = 1; + } + } + } + if (alpha) { + pair->Alpha.Src[candidate].Used = 1; + pair->Alpha.Src[candidate].File = file; + pair->Alpha.Src[candidate].Index = index; + if (candidate == RC_PAIR_PRESUB_SRC) { + /* For registers with the RC_FILE_PRESUB file, + * the index stores the presubtract op. */ + int src_regs = rc_presubtract_src_reg_count(index); + for(i=0; i < src_regs; i++) { + pair->Alpha.Src[i].Used = 1; + } + } + } + + return candidate; +} + +static void pair_foreach_source_callback( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb, + unsigned int swz, + unsigned int src) +{ + /* swz > 3 means that the swizzle is either not used, or a constant + * swizzle (e.g. 0, 1, 0.5). */ + if(swz > 3) + return; + + if(swz == RC_SWIZZLE_W) { + if (src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + unsigned int src_count = rc_presubtract_src_reg_count( + pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index); + for(i = 0; i < src_count; i++) { + cb(data, &pair->Alpha.Src[i]); + } + } else { + cb(data, &pair->Alpha.Src[src]); + } + } else { + if (src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + unsigned int src_count = rc_presubtract_src_reg_count( + pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index); + for(i = 0; i < src_count; i++) { + cb(data, &pair->RGB.Src[i]); + } + } + else { + cb(data, &pair->RGB.Src[src]); + } + } +} + +void rc_pair_foreach_source_that_alpha_reads( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb) +{ + unsigned int i; + const struct rc_opcode_info * info = + rc_get_opcode_info(pair->Alpha.Opcode); + for(i = 0; i < info->NumSrcRegs; i++) { + pair_foreach_source_callback(pair, data, cb, + GET_SWZ(pair->Alpha.Arg[i].Swizzle, 0), + pair->Alpha.Arg[i].Source); + } +} + +void rc_pair_foreach_source_that_rgb_reads( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb) +{ + unsigned int i; + const struct rc_opcode_info * info = + rc_get_opcode_info(pair->RGB.Opcode); + for(i = 0; i < info->NumSrcRegs; i++) { + unsigned int chan; + unsigned int swz = RC_SWIZZLE_UNUSED; + /* Find a swizzle that is either X,Y,Z,or W. We assume here + * that if one channel swizzles X,Y, or Z, then none of the + * other channels swizzle W, and vice-versa. */ + for(chan = 0; chan < 4; chan++) { + swz = GET_SWZ(pair->RGB.Arg[i].Swizzle, chan); + if(swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y + || swz == RC_SWIZZLE_Z || swz == RC_SWIZZLE_W) + continue; + } + pair_foreach_source_callback(pair, data, cb, + swz, + pair->RGB.Arg[i].Source); + } +} + +struct rc_pair_instruction_source * rc_pair_get_src( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_arg * arg) +{ + unsigned int type; + + type = rc_source_type_swz(arg->Swizzle); + + if (type & RC_SOURCE_RGB) { + return &pair_inst->RGB.Src[arg->Source]; + } else if (type & RC_SOURCE_ALPHA) { + return &pair_inst->Alpha.Src[arg->Source]; + } else { + return NULL; + } +} + +int rc_pair_get_src_index( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_source * src) +{ + int i; + for (i = 0; i < 3; i++) { + if (&pair_inst->RGB.Src[i] == src + || &pair_inst->Alpha.Src[i] == src) { + return i; + } + } + return -1; +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program_pair.h b/src/gallium/drivers/r300/compiler/radeon_program_pair.h new file mode 100644 index 00000000000..a957ea9f7a0 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_pair.h @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_PAIR_H_ +#define __RADEON_PROGRAM_PAIR_H_ + +#include "radeon_code.h" +#include "radeon_opcodes.h" +#include "radeon_program_constants.h" + +struct radeon_compiler; + + +/** + * \file + * Represents a paired ALU instruction, as found in R300 and R500 + * fragment programs. + * + * Note that this representation is taking some liberties as far + * as register files are concerned, to allow separate register + * allocation. + * + * Also note that there are some subtleties in that the semantics + * of certain opcodes are implicitly changed in this representation; + * see \ref rc_pair_translate + */ + +/* For rgb and alpha instructions when arg[n].Source = RC_PAIR_PRESUB_SRC, then + * the presubtract value will be used, and + * {RGB,Alpha}.Src[RC_PAIR_PRESUB_SRC].File will be set to RC_FILE_PRESUB. + */ +#define RC_PAIR_PRESUB_SRC 3 + +struct rc_pair_instruction_source { + unsigned int Used:1; + unsigned int File:3; + unsigned int Index:RC_REGISTER_INDEX_BITS; +}; + +struct rc_pair_instruction_arg { + unsigned int Source:2; + unsigned int Swizzle:12; + unsigned int Abs:1; + unsigned int Negate:1; +}; + +struct rc_pair_sub_instruction { + unsigned int Opcode:8; + unsigned int DestIndex:RC_REGISTER_INDEX_BITS; + unsigned int WriteMask:4; + unsigned int Target:2; + unsigned int OutputWriteMask:3; + unsigned int DepthWriteMask:1; + unsigned int Saturate:1; + + struct rc_pair_instruction_source Src[4]; + struct rc_pair_instruction_arg Arg[3]; +}; + +struct rc_pair_instruction { + struct rc_pair_sub_instruction RGB; + struct rc_pair_sub_instruction Alpha; + + unsigned int WriteALUResult:2; + unsigned int ALUResultCompare:3; + unsigned int Nop:1; +}; + +typedef void (*rc_pair_foreach_src_fn) + (void *, struct rc_pair_instruction_source *); + +/** + * General helper functions for dealing with the paired instruction format. + */ +/*@{*/ +int rc_pair_alloc_source(struct rc_pair_instruction *pair, + unsigned int rgb, unsigned int alpha, + rc_register_file file, unsigned int index); + +void rc_pair_foreach_source_that_alpha_reads( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb); + +void rc_pair_foreach_source_that_rgb_reads( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb); + +struct rc_pair_instruction_source * rc_pair_get_src( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_arg * arg); + +int rc_pair_get_src_index( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_source * src); +/*@}*/ + + +/** + * Compiler passes that operate with the paired format. + */ +/*@{*/ +struct radeon_pair_handler; + +void rc_pair_translate(struct radeon_compiler *cc, void *user); +void rc_pair_schedule(struct radeon_compiler *cc, void *user); +void rc_pair_regalloc(struct radeon_compiler *cc, void *user); +void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user); +void rc_pair_remove_dead_sources(struct radeon_compiler *c, void *user); +/*@}*/ + +#endif /* __RADEON_PROGRAM_PAIR_H_ */ diff --git a/src/gallium/drivers/r300/compiler/radeon_program_print.c b/src/gallium/drivers/r300/compiler/radeon_program_print.c new file mode 100644 index 00000000000..390d1319460 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_print.c @@ -0,0 +1,418 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_program.h" + +#include <stdio.h> + +static const char * textarget_to_string(rc_texture_target target) +{ + switch(target) { + case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY"; + case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY"; + case RC_TEXTURE_CUBE: return "CUBE"; + case RC_TEXTURE_3D: return "3D"; + case RC_TEXTURE_RECT: return "RECT"; + case RC_TEXTURE_2D: return "2D"; + case RC_TEXTURE_1D: return "1D"; + default: return "BAD_TEXTURE_TARGET"; + } +} + +static const char * presubtract_op_to_string(rc_presubtract_op op) +{ + switch(op) { + case RC_PRESUB_NONE: + return "NONE"; + case RC_PRESUB_BIAS: + return "(1 - 2 * src0)"; + case RC_PRESUB_SUB: + return "(src1 - src0)"; + case RC_PRESUB_ADD: + return "(src1 + src0)"; + case RC_PRESUB_INV: + return "(1 - src0)"; + default: + return "BAD_PRESUBTRACT_OP"; + } +} + +static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs) +{ + if (func == RC_COMPARE_FUNC_NEVER) { + fprintf(f, "false"); + } else if (func == RC_COMPARE_FUNC_ALWAYS) { + fprintf(f, "true"); + } else { + const char * op; + switch(func) { + case RC_COMPARE_FUNC_LESS: op = "<"; break; + case RC_COMPARE_FUNC_EQUAL: op = "=="; break; + case RC_COMPARE_FUNC_LEQUAL: op = "<="; break; + case RC_COMPARE_FUNC_GREATER: op = ">"; break; + case RC_COMPARE_FUNC_NOTEQUAL: op = "!="; break; + case RC_COMPARE_FUNC_GEQUAL: op = ">="; break; + default: op = "???"; break; + } + fprintf(f, "%s %s %s", lhs, op, rhs); + } +} + +static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr) +{ + if (file == RC_FILE_NONE) { + fprintf(f, "none"); + } else if (file == RC_FILE_SPECIAL) { + switch(index) { + case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break; + default: fprintf(f, "special[%i]", index); break; + } + } else { + const char * filename; + switch(file) { + case RC_FILE_TEMPORARY: filename = "temp"; break; + case RC_FILE_INPUT: filename = "input"; break; + case RC_FILE_OUTPUT: filename = "output"; break; + case RC_FILE_ADDRESS: filename = "addr"; break; + case RC_FILE_CONSTANT: filename = "const"; break; + default: filename = "BAD FILE"; break; + } + fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : ""); + } +} + +static void rc_print_mask(FILE * f, unsigned int mask) +{ + if (mask & RC_MASK_X) fprintf(f, "x"); + if (mask & RC_MASK_Y) fprintf(f, "y"); + if (mask & RC_MASK_Z) fprintf(f, "z"); + if (mask & RC_MASK_W) fprintf(f, "w"); +} + +static void rc_print_dst_register(FILE * f, struct rc_dst_register dst) +{ + rc_print_register(f, dst.File, dst.Index, 0); + if (dst.WriteMask != RC_MASK_XYZW) { + fprintf(f, "."); + rc_print_mask(f, dst.WriteMask); + } +} + +static char rc_swizzle_char(unsigned int swz) +{ + switch(swz) { + case RC_SWIZZLE_X: return 'x'; + case RC_SWIZZLE_Y: return 'y'; + case RC_SWIZZLE_Z: return 'z'; + case RC_SWIZZLE_W: return 'w'; + case RC_SWIZZLE_ZERO: return '0'; + case RC_SWIZZLE_ONE: return '1'; + case RC_SWIZZLE_HALF: return 'H'; + case RC_SWIZZLE_UNUSED: return '_'; + } + fprintf(stderr, "bad swz: %u\n", swz); + return '?'; +} + +static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate) +{ + unsigned int comp; + for(comp = 0; comp < 4; ++comp) { + rc_swizzle swz = GET_SWZ(swizzle, comp); + if (GET_BIT(negate, comp)) + fprintf(f, "-"); + fprintf(f, "%c", rc_swizzle_char(swz)); + } +} + +static void rc_print_presub_instruction(FILE * f, + struct rc_presub_instruction inst) +{ + fprintf(f,"("); + switch(inst.Opcode){ + case RC_PRESUB_BIAS: + fprintf(f, "1 - 2 * "); + rc_print_register(f, inst.SrcReg[0].File, + inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); + break; + case RC_PRESUB_SUB: + rc_print_register(f, inst.SrcReg[1].File, + inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr); + fprintf(f, " - "); + rc_print_register(f, inst.SrcReg[0].File, + inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); + break; + case RC_PRESUB_ADD: + rc_print_register(f, inst.SrcReg[1].File, + inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr); + fprintf(f, " + "); + rc_print_register(f, inst.SrcReg[0].File, + inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); + break; + case RC_PRESUB_INV: + fprintf(f, "1 - "); + rc_print_register(f, inst.SrcReg[0].File, + inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); + break; + default: + break; + } + fprintf(f, ")"); +} + +static void rc_print_src_register(FILE * f, struct rc_instruction * inst, + struct rc_src_register src) +{ + int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW); + + if (src.Negate == RC_MASK_XYZW) + fprintf(f, "-"); + if (src.Abs) + fprintf(f, "|"); + + if(src.File == RC_FILE_PRESUB) + rc_print_presub_instruction(f, inst->U.I.PreSub); + else + rc_print_register(f, src.File, src.Index, src.RelAddr); + + if (src.Abs && !trivial_negate) + fprintf(f, "|"); + + if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) { + fprintf(f, "."); + rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate); + } + + if (src.Abs && trivial_negate) + fprintf(f, "|"); +} + +static unsigned update_branch_depth(rc_opcode opcode, unsigned *branch_depth) +{ + switch (opcode) { + case RC_OPCODE_IF: + case RC_OPCODE_BGNLOOP: + return (*branch_depth)++ * 2; + + case RC_OPCODE_ENDIF: + case RC_OPCODE_ENDLOOP: + assert(*branch_depth > 0); + return --(*branch_depth) * 2; + + case RC_OPCODE_ELSE: + assert(*branch_depth > 0); + return (*branch_depth - 1) * 2; + + default: + return *branch_depth * 2; + } +} + +static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst, unsigned *branch_depth) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int reg; + unsigned spaces = update_branch_depth(inst->U.I.Opcode, branch_depth); + + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); + + fprintf(f, "%s", opcode->Name); + + switch(inst->U.I.SaturateMode) { + case RC_SATURATE_NONE: break; + case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break; + case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break; + default: fprintf(f, "_BAD_SAT"); break; + } + + if (opcode->HasDstReg) { + fprintf(f, " "); + rc_print_dst_register(f, inst->U.I.DstReg); + if (opcode->NumSrcRegs) + fprintf(f, ","); + } + + for(reg = 0; reg < opcode->NumSrcRegs; ++reg) { + if (reg > 0) + fprintf(f, ","); + fprintf(f, " "); + rc_print_src_register(f, inst, inst->U.I.SrcReg[reg]); + } + + if (opcode->HasTexture) { + fprintf(f, ", %s%s[%u]", + textarget_to_string(inst->U.I.TexSrcTarget), + inst->U.I.TexShadow ? "SHADOW" : "", + inst->U.I.TexSrcUnit); + } + + fprintf(f, ";"); + + if (inst->U.I.WriteALUResult) { + fprintf(f, " [aluresult = ("); + rc_print_comparefunc(f, + (inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w", + inst->U.I.ALUResultCompare, "0"); + fprintf(f, ")]"); + } + + fprintf(f, "\n"); +} + +static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst, unsigned *branch_depth) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + int printedsrc = 0; + unsigned spaces = update_branch_depth(inst->RGB.Opcode != RC_OPCODE_NOP ? + inst->RGB.Opcode : inst->Alpha.Opcode, branch_depth); + + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); + + for(unsigned int src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used) { + if (printedsrc) + fprintf(f, ", "); + fprintf(f, "src%i.xyz = ", src); + rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0); + printedsrc = 1; + } + if (inst->Alpha.Src[src].Used) { + if (printedsrc) + fprintf(f, ", "); + fprintf(f, "src%i.w = ", src); + rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0); + printedsrc = 1; + } + } + if(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + fprintf(f, ", srcp.xyz = %s", + presubtract_op_to_string( + inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index)); + } + if(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { + fprintf(f, ", srcp.w = %s", + presubtract_op_to_string( + inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index)); + } + fprintf(f, "\n"); + + if (inst->RGB.Opcode != RC_OPCODE_NOP) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode); + + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); + + fprintf(f, " %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : ""); + if (inst->RGB.WriteMask) + fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex, + (inst->RGB.WriteMask & 1) ? "x" : "", + (inst->RGB.WriteMask & 2) ? "y" : "", + (inst->RGB.WriteMask & 4) ? "z" : ""); + if (inst->RGB.OutputWriteMask) + fprintf(f, " color[%i].%s%s%s", inst->RGB.Target, + (inst->RGB.OutputWriteMask & 1) ? "x" : "", + (inst->RGB.OutputWriteMask & 2) ? "y" : "", + (inst->RGB.OutputWriteMask & 4) ? "z" : ""); + if (inst->WriteALUResult == RC_ALURESULT_X) + fprintf(f, " aluresult"); + + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + const char* abs = inst->RGB.Arg[arg].Abs ? "|" : ""; + const char* neg = inst->RGB.Arg[arg].Negate ? "-" : ""; + fprintf(f, ", %s%ssrc", neg, abs); + if(inst->RGB.Arg[arg].Source == RC_PAIR_PRESUB_SRC) + fprintf(f,"p"); + else + fprintf(f,"%d", inst->RGB.Arg[arg].Source); + fprintf(f,".%c%c%c%s", + rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)), + rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)), + rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)), + abs); + } + fprintf(f, "\n"); + } + + if (inst->Alpha.Opcode != RC_OPCODE_NOP) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode); + + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); + + fprintf(f, " %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : ""); + if (inst->Alpha.WriteMask) + fprintf(f, " temp[%i].w", inst->Alpha.DestIndex); + if (inst->Alpha.OutputWriteMask) + fprintf(f, " color[%i].w", inst->Alpha.Target); + if (inst->Alpha.DepthWriteMask) + fprintf(f, " depth.w"); + if (inst->WriteALUResult == RC_ALURESULT_W) + fprintf(f, " aluresult"); + + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : ""; + const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : ""; + fprintf(f, ", %s%ssrc", neg, abs); + if(inst->Alpha.Arg[arg].Source == RC_PAIR_PRESUB_SRC) + fprintf(f,"p"); + else + fprintf(f,"%d", inst->Alpha.Arg[arg].Source); + fprintf(f,".%c%s", + rc_swizzle_char(GET_SWZ(inst->Alpha.Arg[arg].Swizzle, 0)), abs); + } + fprintf(f, "\n"); + } + + if (inst->WriteALUResult) { + for (unsigned i = 0; i < spaces; i++) + fprintf(f, " "); + + fprintf(f, " [aluresult = ("); + rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0"); + fprintf(f, ")]\n"); + } +} + +/** + * Print program to stderr, default options. + */ +void rc_print_program(const struct rc_program *prog) +{ + unsigned int linenum = 0; + unsigned branch_depth = 0; + struct rc_instruction *inst; + + fprintf(stderr, "# Radeon Compiler Program\n"); + + for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) { + fprintf(stderr, "%3d: ", linenum); + + if (inst->Type == RC_INSTRUCTION_PAIR) + rc_print_pair_instruction(stderr, inst, &branch_depth); + else + rc_print_normal_instruction(stderr, inst, &branch_depth); + + linenum++; + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program_tex.c b/src/gallium/drivers/r300/compiler/radeon_program_tex.c new file mode 100644 index 00000000000..9d69ebd18fb --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_tex.c @@ -0,0 +1,528 @@ +/* + * Copyright (C) 2010 Corbin Simpson + * Copyright (C) 2010 Marek Olšák <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_tex.h" + +#include "radeon_compiler_util.h" + +/* Series of transformations to be done on textures. */ + +static struct rc_src_register shadow_fail_value(struct r300_fragment_program_compiler *compiler, + int tmu) +{ + struct rc_src_register reg = { 0, 0, 0, 0, 0, 0 }; + + if (compiler->enable_shadow_ambient) { + reg.File = RC_FILE_CONSTANT; + reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants, + RC_STATE_SHADOW_AMBIENT, tmu); + reg.Swizzle = RC_SWIZZLE_WWWW; + } else { + reg.File = RC_FILE_NONE; + reg.Swizzle = RC_SWIZZLE_0000; + } + + reg.Swizzle = combine_swizzles(reg.Swizzle, + compiler->state.unit[tmu].texture_swizzle); + return reg; +} + +static struct rc_src_register shadow_pass_value(struct r300_fragment_program_compiler *compiler, + int tmu) +{ + struct rc_src_register reg = { 0, 0, 0, 0, 0, 0 }; + + reg.File = RC_FILE_NONE; + reg.Swizzle = combine_swizzles(RC_SWIZZLE_1111, + compiler->state.unit[tmu].texture_swizzle); + return reg; +} + +static void scale_texcoords(struct r300_fragment_program_compiler *compiler, + struct rc_instruction *inst, + unsigned state_constant) +{ + struct rc_instruction *inst_mov; + + unsigned temp = rc_find_free_temporary(&compiler->Base); + + inst_mov = rc_insert_new_instruction(&compiler->Base, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MUL; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mov->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mov->U.I.SrcReg[1].Index = + rc_constants_add_state(&compiler->Base.Program.Constants, + state_constant, inst->U.I.TexSrcUnit); + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; +} + +static void projective_divide(struct r300_fragment_program_compiler *compiler, + struct rc_instruction *inst) +{ + struct rc_instruction *inst_mul, *inst_rcp; + + unsigned temp = rc_find_free_temporary(&compiler->Base); + + inst_rcp = rc_insert_new_instruction(&compiler->Base, inst->Prev); + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = temp; + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + /* Because the input can be arbitrarily swizzled, + * read the component mapped to W. */ + inst_rcp->U.I.SrcReg[0].Swizzle = + RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); + + inst_mul = rc_insert_new_instruction(&compiler->Base, inst->Prev); + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = temp; + inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[1].Index = temp; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.Opcode = RC_OPCODE_TEX; + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; +} + +/** + * Transform TEX, TXP, TXB, and KIL instructions in the following ways: + * - implement texture compare (shadow extensions) + * - extract non-native source / destination operands + * - premultiply texture coordinates for RECT + * - extract operand swizzles + * - introduce a temporary register when write masks are needed + */ +int radeonTransformTEX( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data) +{ + struct r300_fragment_program_compiler *compiler = + (struct r300_fragment_program_compiler*)data; + rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode; + int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT || + compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords; + + if (inst->U.I.Opcode != RC_OPCODE_TEX && + inst->U.I.Opcode != RC_OPCODE_TXB && + inst->U.I.Opcode != RC_OPCODE_TXP && + inst->U.I.Opcode != RC_OPCODE_TXD && + inst->U.I.Opcode != RC_OPCODE_TXL && + inst->U.I.Opcode != RC_OPCODE_KIL) + return 0; + + /* ARB_shadow & EXT_shadow_funcs */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + ((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) || + (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) { + rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; + + if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.Opcode = RC_OPCODE_MOV; + + if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit); + } else { + inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit); + } + + return 1; + } else { + struct rc_instruction * inst_rcp = NULL; + struct rc_instruction *inst_mul, *inst_add, *inst_cmp; + unsigned tmp_texsample; + unsigned tmp_sum; + int pass, fail; + + /* Save the output register. */ + struct rc_dst_register output_reg = inst->U.I.DstReg; + unsigned saturate_mode = inst->U.I.SaturateMode; + + /* Redirect TEX to a new temp. */ + tmp_texsample = rc_find_free_temporary(c); + inst->U.I.SaturateMode = 0; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = tmp_texsample; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + + tmp_sum = rc_find_free_temporary(c); + + if (inst->U.I.Opcode == RC_OPCODE_TXP) { + /* Compute 1/W. */ + inst_rcp = rc_insert_new_instruction(c, inst); + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = tmp_sum; + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_rcp->U.I.SrcReg[0].Swizzle = + RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); + } + + /* Divide Z by W (if it's TXP) and saturate. */ + inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst); + inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = tmp_sum; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_W; + inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; + inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mul->U.I.SrcReg[0].Swizzle = + RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2)); + if (inst->U.I.Opcode == RC_OPCODE_TXP) { + inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[1].Index = tmp_sum; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + } + + /* Add the depth texture value. */ + inst_add = rc_insert_new_instruction(c, inst_mul); + inst_add->U.I.Opcode = RC_OPCODE_ADD; + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = tmp_sum; + inst_add->U.I.DstReg.WriteMask = RC_MASK_W; + inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_add->U.I.SrcReg[0].Index = tmp_sum; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_add->U.I.SrcReg[1].Index = tmp_texsample; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; + + /* Note that SrcReg[0] is r, SrcReg[1] is tex and: + * LESS: r < tex <=> -tex+r < 0 + * GEQUAL: r >= tex <=> not (-tex+r < 0) + * GREATER: r > tex <=> tex-r < 0 + * LEQUAL: r <= tex <=> not ( tex-r < 0) + * EQUAL: GEQUAL + * NOTEQUAL:LESS + */ + + /* This negates either r or tex: */ + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL || + comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL) + inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW; + else + inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; + + /* This negates the whole expresion: */ + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER || + comparefunc == RC_COMPARE_FUNC_NOTEQUAL) { + pass = 1; + fail = 2; + } else { + pass = 2; + fail = 1; + } + + inst_cmp = rc_insert_new_instruction(c, inst_add); + inst_cmp->U.I.Opcode = RC_OPCODE_CMP; + inst_cmp->U.I.SaturateMode = saturate_mode; + inst_cmp->U.I.DstReg = output_reg; + inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[0].Index = tmp_sum; + inst_cmp->U.I.SrcReg[0].Swizzle = + combine_swizzles(RC_SWIZZLE_WWWW, + compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle); + inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit); + inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit); + + assert(tmp_texsample != tmp_sum); + } + } + + /* R300 cannot sample from rectangles and the wrap mode fallback needs + * normalized coordinates anyway. */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) { + scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR); + inst->U.I.TexSrcTarget = RC_TEXTURE_2D; + } + + /* Divide by W if needed. */ + if (inst->U.I.Opcode == RC_OPCODE_TXP && + (wrapmode == RC_WRAP_REPEAT || wrapmode == RC_WRAP_MIRRORED_REPEAT || + compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch)) { + projective_divide(compiler, inst); + } + + /* Texture wrap modes don't work on NPOT textures. + * + * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and + * mirroring are not. If we need to repeat, we do: + * + * MUL temp, texcoord, <scaling factor constant> + * FRC temp, temp ; Discard integer portion of coords + * + * This gives us coords in [0, 1]. + * + * Mirroring is trickier. We're going to start out like repeat: + * + * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes + * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2] + * ; so scale to [0, 1] + * FRC temp, temp ; Make the pattern repeat + * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1] + * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern. + * ; The pattern is backwards, so reverse it (1-x). + * + * This gives us coords in [0, 1]. + * + * ~ C & M. ;) + */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + wrapmode != RC_WRAP_NONE) { + struct rc_instruction *inst_mov; + unsigned temp = rc_find_free_temporary(c); + + if (wrapmode == RC_WRAP_REPEAT) { + /* Both instructions will be paired up. */ + struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev); + + inst_frc->U.I.Opcode = RC_OPCODE_FRC; + inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_frc->U.I.DstReg.Index = temp; + inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) { + /* + * Function: + * f(v) = 1 - abs(frac(v * 0.5) * 2 - 1) + * + * Code: + * MUL temp, src0, 0.5 + * FRC temp, temp + * MAD temp, temp, 2, -1 + * ADD temp, 1, -abs(temp) + */ + + struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add; + unsigned two, two_swizzle; + + inst_mul = rc_insert_new_instruction(c, inst->Prev); + + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = temp; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH; + + inst_frc = rc_insert_new_instruction(c, inst->Prev); + + inst_frc->U.I.Opcode = RC_OPCODE_FRC; + inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_frc->U.I.DstReg.Index = temp; + inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_frc->U.I.SrcReg[0].Index = temp; + inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; + + two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle); + inst_mad = rc_insert_new_instruction(c, inst->Prev); + + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = temp; + inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[0].Index = temp; + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; + inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[1].Index = two; + inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle; + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111; + inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ; + + inst_add = rc_insert_new_instruction(c, inst->Prev); + + inst_add->U.I.Opcode = RC_OPCODE_ADD; + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = temp; + inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; + inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_add->U.I.SrcReg[1].Index = temp; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; + inst_add->U.I.SrcReg[1].Abs = 1; + inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ; + } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) { + /* + * Mirrored clamp modes are bloody simple, we just use abs + * to mirror [0, 1] into [-1, 0]. This works for + * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER. + */ + struct rc_instruction *inst_mov; + + inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mov->U.I.SrcReg[0].Abs = 1; + } + + /* Preserve W for TXP/TXB. */ + inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; + } + + /* NPOT -> POT conversion for 3D textures. */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) { + struct rc_instruction *inst_mov; + unsigned temp = rc_find_free_temporary(c); + + /* Saturate XYZ. */ + inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + /* Copy W. */ + inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; + + scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR); + } + + /* Convert SNORM-encoded ATI1N sampled as UNORM to SNORM. + * Formula: dst = tex > 0.5 ? tex*2-2 : tex*2 + */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + compiler->state.unit[inst->U.I.TexSrcUnit].convert_unorm_to_snorm) { + unsigned two, two_swizzle; + struct rc_instruction *inst_mul, *inst_mad, *inst_cnd; + + two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2.35, &two_swizzle); + + inst_mul = rc_insert_new_instruction(c, inst); + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mul->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[0].Index = rc_find_free_temporary(c); /* redirected TEX output */ + inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; /* 2 */ + inst_mul->U.I.SrcReg[1].Index = two; + inst_mul->U.I.SrcReg[1].Swizzle = two_swizzle; + + inst_mad = rc_insert_new_instruction(c, inst_mul); + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mad->U.I.SrcReg[0] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */ + inst_mad->U.I.SrcReg[1] = inst_mul->U.I.SrcReg[1]; /* 2 */ + inst_mad->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[1]; /* 2 */ + inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZW; + + inst_cnd = rc_insert_new_instruction(c, inst_mad); + inst_cnd->U.I.Opcode = RC_OPCODE_CND; + inst_cnd->U.I.SaturateMode = inst->U.I.SaturateMode; + inst_cnd->U.I.DstReg = inst->U.I.DstReg; + inst_cnd->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_cnd->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index; + inst_cnd->U.I.SrcReg[0].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle; + inst_cnd->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_cnd->U.I.SrcReg[1].Index = inst_mul->U.I.DstReg.Index; + inst_cnd->U.I.SrcReg[1].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle; + inst_cnd->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */ + + inst->U.I.SaturateMode = 0; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mul->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + } + + /* Cannot write texture to output registers or with saturate (all chips), + * or with masks (non-r500). */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || + inst->U.I.SaturateMode || + (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode; + inst_mov->U.I.DstReg = inst->U.I.DstReg; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); + + inst->U.I.SaturateMode = 0; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + } + + /* Cannot read texture coordinate from constants file */ + if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; + } + + return 1; +} diff --git a/src/gallium/drivers/r300/compiler/radeon_program_tex.h b/src/gallium/drivers/r300/compiler/radeon_program_tex.h new file mode 100644 index 00000000000..a0105051ac4 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_program_tex.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2010 Corbin Simpson + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_TEX_H_ +#define __RADEON_PROGRAM_TEX_H_ + +#include "radeon_compiler.h" +#include "radeon_program.h" + +int radeonTransformTEX( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data); + +#endif /* __RADEON_PROGRAM_TEX_H_ */ diff --git a/src/gallium/drivers/r300/compiler/radeon_remove_constants.c b/src/gallium/drivers/r300/compiler/radeon_remove_constants.c new file mode 100644 index 00000000000..7d76585a593 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_remove_constants.c @@ -0,0 +1,150 @@ +/* + * Copyright (C) 2010 Marek Olšák <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_remove_constants.h" +#include "radeon_dataflow.h" + +struct mark_used_data { + unsigned char * const_used; + unsigned * has_rel_addr; +}; + +static void remap_regs(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex) +{ + unsigned *inv_remap_table = userdata; + + if (*pfile == RC_FILE_CONSTANT) { + *pindex = inv_remap_table[*pindex]; + } +} + +static void mark_used(void * userdata, struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct mark_used_data * d = userdata; + + if (src->File == RC_FILE_CONSTANT) { + if (src->RelAddr) { + *d->has_rel_addr = 1; + } else { + d->const_used[src->Index] = 1; + } + } +} + +void rc_remove_unused_constants(struct radeon_compiler *c, void *user) +{ + unsigned **out_remap_table = (unsigned**)user; + unsigned char *const_used; + unsigned *remap_table; + unsigned *inv_remap_table; + unsigned has_rel_addr = 0; + unsigned is_identity = 1; + unsigned are_externals_remapped = 0; + struct rc_constant *constants = c->Program.Constants.Constants; + struct mark_used_data d; + unsigned new_count; + + if (!c->Program.Constants.Count) { + *out_remap_table = NULL; + return; + } + + const_used = malloc(c->Program.Constants.Count); + memset(const_used, 0, c->Program.Constants.Count); + + d.const_used = const_used; + d.has_rel_addr = &has_rel_addr; + + /* Pass 1: Mark used constants. */ + for (struct rc_instruction *inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + rc_for_all_reads_src(inst, mark_used, &d); + } + + /* Pass 2: If there is relative addressing or dead constant elimination + * is disabled, mark all externals as used. */ + if (has_rel_addr || !c->remove_unused_constants) { + for (unsigned i = 0; i < c->Program.Constants.Count; i++) + if (constants[i].Type == RC_CONSTANT_EXTERNAL) + const_used[i] = 1; + } + + /* Pass 3: Make the remapping table and remap constants. + * This pass removes unused constants simply by overwriting them by other constants. */ + remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned)); + inv_remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned)); + new_count = 0; + + for (unsigned i = 0; i < c->Program.Constants.Count; i++) { + if (const_used[i]) { + remap_table[new_count] = i; + inv_remap_table[i] = new_count; + + if (i != new_count) { + if (constants[i].Type == RC_CONSTANT_EXTERNAL) + are_externals_remapped = 1; + + constants[new_count] = constants[i]; + is_identity = 0; + } + new_count++; + } + } + + /* is_identity ==> new_count == old_count + * !is_identity ==> new_count < old_count */ + assert( is_identity || new_count < c->Program.Constants.Count); + assert(!((has_rel_addr || !c->remove_unused_constants) && are_externals_remapped)); + + /* Pass 4: Redirect reads of all constants to their new locations. */ + if (!is_identity) { + for (struct rc_instruction *inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + rc_remap_registers(inst, remap_regs, inv_remap_table); + } + } + + /* Set the new constant count. Note that new_count may be less than + * Count even though the remapping function is identity. In that case, + * the constants have been removed at the end of the array. */ + c->Program.Constants.Count = new_count; + + if (are_externals_remapped) { + *out_remap_table = remap_table; + } else { + *out_remap_table = NULL; + free(remap_table); + } + + free(const_used); + free(inv_remap_table); + + if (c->Debug & RC_DBG_LOG) + rc_constants_print(&c->Program.Constants); +} diff --git a/src/gallium/drivers/r300/compiler/radeon_remove_constants.h b/src/gallium/drivers/r300/compiler/radeon_remove_constants.h new file mode 100644 index 00000000000..f29113b922b --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_remove_constants.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2010 Marek Olšák <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_REMOVE_CONSTANTS_H +#define RADEON_REMOVE_CONSTANTS_H + +#include "radeon_compiler.h" + +void rc_remove_unused_constants(struct radeon_compiler *c, void *user); + +#endif diff --git a/src/gallium/drivers/r300/compiler/radeon_rename_regs.c b/src/gallium/drivers/r300/compiler/radeon_rename_regs.c new file mode 100644 index 00000000000..cafa0579734 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_rename_regs.c @@ -0,0 +1,92 @@ +/* + * Copyright 2010 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + */ + +#include "radeon_rename_regs.h" + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" +#include "radeon_program.h" + +/** + * This function renames registers in an attempt to get the code close to + * SSA form. After this function has completed, most of the register are only + * written to one time, with a few exceptions. + * + * This function assumes all the instructions are still of type + * RC_INSTRUCTION_NORMAL. + */ +void rc_rename_regs(struct radeon_compiler *c, void *user) +{ + unsigned int i, used_length; + int new_index; + struct rc_instruction * inst; + struct rc_reader_data reader_data; + unsigned char * used; + + /* XXX Remove this once the register allocation works with flow control. */ + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) + return; + } + + used_length = 2 * rc_recompute_ips(c); + used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length); + memset(used, 0, sizeof(unsigned char) * used_length); + + rc_get_used_temporaries(c, used, used_length); + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + + if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) + continue; + + reader_data.ExitOnAbort = 1; + rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL); + + if (reader_data.Abort || reader_data.ReaderCount == 0) + continue; + + new_index = rc_find_free_temporary_list(c, used, used_length, + RC_MASK_XYZW); + if (new_index < 0) { + rc_error(c, "Ran out of temporary registers\n"); + return; + } + + reader_data.Writer->U.I.DstReg.Index = new_index; + for(i = 0; i < reader_data.ReaderCount; i++) { + reader_data.Readers[i].U.I.Src->Index = new_index; + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_rename_regs.h b/src/gallium/drivers/r300/compiler/radeon_rename_regs.h new file mode 100644 index 00000000000..3baf29f6120 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_rename_regs.h @@ -0,0 +1,9 @@ + +#ifndef RADEON_RENAME_REGS_H +#define RADEON_RENAME_REGS_H + +struct radeon_compiler; + +void rc_rename_regs(struct radeon_compiler *c, void *user); + +#endif /* RADEON_RENAME_REGS_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_swizzle.h b/src/gallium/drivers/r300/compiler/radeon_swizzle.h new file mode 100644 index 00000000000..c81d5f7a5e9 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_swizzle.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_SWIZZLE_H +#define RADEON_SWIZZLE_H + +#include "radeon_program.h" + +struct rc_swizzle_split { + unsigned char NumPhases; + unsigned char Phase[4]; +}; + +/** + * Describe the swizzling capability of target hardware. + */ +struct rc_swizzle_caps { + /** + * Check whether the given swizzle, absolute and negate combination + * can be implemented natively by the hardware for this opcode. + * + * \return 1 if the swizzle is native for the given opcode + */ + int (*IsNative)(rc_opcode opcode, struct rc_src_register reg); + + /** + * Determine how to split access to the masked channels of the + * given source register to obtain ALU-native swizzles. + */ + void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split); +}; + +#endif /* RADEON_SWIZZLE_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_variable.c b/src/gallium/drivers/r300/compiler/radeon_variable.c new file mode 100644 index 00000000000..938fb8421f2 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_variable.c @@ -0,0 +1,517 @@ +/* + * Copyright 2011 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_variable.h" + +#include "memory_pool.h" +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" +#include "radeon_list.h" +#include "radeon_opcodes.h" +#include "radeon_program.h" + +/** + * Rewrite the index and writemask for the destination register of var + * and its friends to new_index and new_writemask. This function also takes + * care of rewriting the swizzles for the sources of var. + */ +void rc_variable_change_dst( + struct rc_variable * var, + unsigned int new_index, + unsigned int new_writemask) +{ + struct rc_variable * var_ptr; + struct rc_list * readers; + unsigned int old_mask = rc_variable_writemask_sum(var); + unsigned int conversion_swizzle = + rc_make_conversion_swizzle(old_mask, new_writemask); + + for (var_ptr = var; var_ptr; var_ptr = var_ptr->Friend) { + if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) { + rc_normal_rewrite_writemask(var_ptr->Inst, + conversion_swizzle); + var_ptr->Inst->U.I.DstReg.Index = new_index; + } else { + struct rc_pair_sub_instruction * sub; + if (var_ptr->Dst.WriteMask == RC_MASK_W) { + assert(new_writemask & RC_MASK_W); + sub = &var_ptr->Inst->U.P.Alpha; + } else { + sub = &var_ptr->Inst->U.P.RGB; + rc_pair_rewrite_writemask(sub, + conversion_swizzle); + } + sub->DestIndex = new_index; + } + } + + readers = rc_variable_readers_union(var); + + for ( ; readers; readers = readers->Next) { + struct rc_reader * reader = readers->Item; + if (reader->Inst->Type == RC_INSTRUCTION_NORMAL) { + reader->U.I.Src->Index = new_index; + reader->U.I.Src->Swizzle = rc_rewrite_swizzle( + reader->U.I.Src->Swizzle, conversion_swizzle); + } else { + struct rc_pair_instruction * pair_inst = + &reader->Inst->U.P; + unsigned int src_type = rc_source_type_swz( + reader->U.P.Arg->Swizzle); + + int src_index = reader->U.P.Arg->Source; + if (src_index == RC_PAIR_PRESUB_SRC) { + src_index = rc_pair_get_src_index( + pair_inst, reader->U.P.Src); + } + /* Try to delete the old src, it is OK if this fails, + * because rc_pair_alloc_source might be able to + * find a source the ca be reused. + */ + if (rc_pair_remove_src(reader->Inst, src_type, + src_index, old_mask)) { + /* Reuse the source index of the source that + * was just deleted and set its register + * index. We can't use rc_pair_alloc_source + * for this becuase it might return a source + * index that is already being used. */ + if (src_type & RC_SOURCE_RGB) { + pair_inst->RGB.Src[src_index] + .Used = 1; + pair_inst->RGB.Src[src_index] + .Index = new_index; + pair_inst->RGB.Src[src_index] + .File = RC_FILE_TEMPORARY; + } + if (src_type & RC_SOURCE_ALPHA) { + pair_inst->Alpha.Src[src_index] + .Used = 1; + pair_inst->Alpha.Src[src_index] + .Index = new_index; + pair_inst->Alpha.Src[src_index] + .File = RC_FILE_TEMPORARY; + } + } else { + src_index = rc_pair_alloc_source( + &reader->Inst->U.P, + src_type & RC_SOURCE_RGB, + src_type & RC_SOURCE_ALPHA, + RC_FILE_TEMPORARY, + new_index); + if (src_index < 0) { + rc_error(var->C, "Rewrite of inst %u failed " + "Can't allocate source for " + "Inst %u src_type=%x " + "new_index=%u new_mask=%u\n", + var->Inst->IP, reader->Inst->IP, src_type, new_index, new_writemask); + continue; + } + } + reader->U.P.Arg->Swizzle = rc_rewrite_swizzle( + reader->U.P.Arg->Swizzle, conversion_swizzle); + if (reader->U.P.Arg->Source != RC_PAIR_PRESUB_SRC) { + reader->U.P.Arg->Source = src_index; + } + } + } +} + +/** + * Compute the live intervals for var and its friends. + */ +void rc_variable_compute_live_intervals(struct rc_variable * var) +{ + while(var) { + unsigned int i; + unsigned int start = var->Inst->IP; + + for (i = 0; i < var->ReaderCount; i++) { + unsigned int chan; + unsigned int chan_start = start; + unsigned int chan_end = var->Readers[i].Inst->IP; + unsigned int mask = var->Readers[i].WriteMask; + struct rc_instruction * inst; + + /* Extend the live interval of T0 to the start of the + * loop for sequences like: + * BGNLOOP + * read T0 + * ... + * write T0 + * ENDLOOP + */ + if (var->Readers[i].Inst->IP < start) { + struct rc_instruction * bgnloop = + rc_match_endloop(var->Readers[i].Inst); + chan_start = bgnloop->IP; + } + + /* Extend the live interval of T0 to the start of the + * loop in case there is a BRK instruction in the loop + * (we don't actually check for a BRK instruction we + * assume there is one somewhere in the loop, which + * there usually is) for sequences like: + * BGNLOOP + * ... + * conditional BRK + * ... + * write T0 + * ENDLOOP + * read T0 + *************************************************** + * Extend the live interval of T0 to the end of the + * loop for sequences like: + * write T0 + * BGNLOOP + * ... + * read T0 + * ENDLOOP + */ + for (inst = var->Inst; inst != var->Readers[i].Inst; + inst = inst->Next) { + rc_opcode op = rc_get_flow_control_inst(inst); + if (op == RC_OPCODE_ENDLOOP) { + struct rc_instruction * bgnloop = + rc_match_endloop(inst); + if (bgnloop->IP < chan_start) { + chan_start = bgnloop->IP; + } + } else if (op == RC_OPCODE_BGNLOOP) { + struct rc_instruction * endloop = + rc_match_bgnloop(inst); + if (endloop->IP > chan_end) { + chan_end = endloop->IP; + } + } + } + + for (chan = 0; chan < 4; chan++) { + if ((mask >> chan) & 0x1) { + if (!var->Live[chan].Used + || chan_start < var->Live[chan].Start) { + var->Live[chan].Start = + chan_start; + } + if (!var->Live[chan].Used + || chan_end > var->Live[chan].End) { + var->Live[chan].End = chan_end; + } + var->Live[chan].Used = 1; + } + } + } + var = var->Friend; + } +} + +/** + * @return 1 if a and b share a reader + * @return 0 if they do not + */ +static unsigned int readers_intersect( + struct rc_variable * a, + struct rc_variable * b) +{ + unsigned int a_index, b_index; + for (a_index = 0; a_index < a->ReaderCount; a_index++) { + struct rc_reader reader_a = a->Readers[a_index]; + for (b_index = 0; b_index < b->ReaderCount; b_index++) { + struct rc_reader reader_b = b->Readers[b_index]; + if (reader_a.Inst->Type == RC_INSTRUCTION_NORMAL + && reader_b.Inst->Type == RC_INSTRUCTION_NORMAL + && reader_a.U.I.Src == reader_b.U.I.Src) { + + return 1; + } + if (reader_a.Inst->Type == RC_INSTRUCTION_PAIR + && reader_b.Inst->Type == RC_INSTRUCTION_PAIR + && reader_a.U.P.Src == reader_b.U.P.Src) { + + return 1; + } + } + } + return 0; +} + +void rc_variable_add_friend( + struct rc_variable * var, + struct rc_variable * friend) +{ + assert(var->Dst.Index == friend->Dst.Index); + while(var->Friend) { + var = var->Friend; + } + var->Friend = friend; +} + +struct rc_variable * rc_variable( + struct radeon_compiler * c, + unsigned int DstFile, + unsigned int DstIndex, + unsigned int DstWriteMask, + struct rc_reader_data * reader_data) +{ + struct rc_variable * new = + memory_pool_malloc(&c->Pool, sizeof(struct rc_variable)); + memset(new, 0, sizeof(struct rc_variable)); + new->C = c; + new->Dst.File = DstFile; + new->Dst.Index = DstIndex; + new->Dst.WriteMask = DstWriteMask; + if (reader_data) { + new->Inst = reader_data->Writer; + new->ReaderCount = reader_data->ReaderCount; + new->Readers = reader_data->Readers; + } + return new; +} + +static void get_variable_helper( + struct rc_list ** variable_list, + struct rc_variable * variable) +{ + struct rc_list * list_ptr; + for (list_ptr = *variable_list; list_ptr; list_ptr = list_ptr->Next) { + if (readers_intersect(variable, list_ptr->Item)) { + rc_variable_add_friend(list_ptr->Item, variable); + return; + } + } + rc_list_add(variable_list, rc_list(&variable->C->Pool, variable)); +} + +static void get_variable_pair_helper( + struct rc_list ** variable_list, + struct radeon_compiler * c, + struct rc_instruction * inst, + struct rc_pair_sub_instruction * sub_inst) +{ + struct rc_reader_data reader_data; + struct rc_variable * new_var; + rc_register_file file; + unsigned int writemask; + + if (sub_inst->Opcode == RC_OPCODE_NOP) { + return; + } + memset(&reader_data, 0, sizeof(struct rc_reader_data)); + rc_get_readers_sub(c, inst, sub_inst, &reader_data, NULL, NULL, NULL); + + if (reader_data.ReaderCount == 0) { + return; + } + + if (sub_inst->WriteMask) { + file = RC_FILE_TEMPORARY; + writemask = sub_inst->WriteMask; + } else if (sub_inst->OutputWriteMask) { + file = RC_FILE_OUTPUT; + writemask = sub_inst->OutputWriteMask; + } else { + writemask = 0; + file = RC_FILE_NONE; + } + new_var = rc_variable(c, file, sub_inst->DestIndex, writemask, + &reader_data); + get_variable_helper(variable_list, new_var); +} + +/** + * Generate a list of variables used by the shader program. Each instruction + * that writes to a register is considered a variable. The struct rc_variable + * data structure includes a list of readers and is essentially a + * definition-use chain. Any two variables that share a reader are considered + * "friends" and they are linked together via the Friend attribute. + */ +struct rc_list * rc_get_variables(struct radeon_compiler * c) +{ + struct rc_instruction * inst; + struct rc_list * variable_list = NULL; + + for (inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + struct rc_reader_data reader_data; + struct rc_variable * new_var; + memset(&reader_data, 0, sizeof(reader_data)); + + if (inst->Type == RC_INSTRUCTION_NORMAL) { + rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL); + if (reader_data.ReaderCount == 0) { + continue; + } + new_var = rc_variable(c, inst->U.I.DstReg.File, + inst->U.I.DstReg.Index, + inst->U.I.DstReg.WriteMask, &reader_data); + get_variable_helper(&variable_list, new_var); + } else { + get_variable_pair_helper(&variable_list, c, inst, + &inst->U.P.RGB); + get_variable_pair_helper(&variable_list, c, inst, + &inst->U.P.Alpha); + } + } + + return variable_list; +} + +/** + * @return The bitwise or of the writemasks of a variable and all of its + * friends. + */ +unsigned int rc_variable_writemask_sum(struct rc_variable * var) +{ + unsigned int writemask = 0; + while(var) { + writemask |= var->Dst.WriteMask; + var = var->Friend; + } + return writemask; +} + +/* + * @return A list of readers for a variable and its friends. Readers + * that read from two different variable friends are only included once in + * this list. + */ +struct rc_list * rc_variable_readers_union(struct rc_variable * var) +{ + struct rc_list * list = NULL; + while (var) { + unsigned int i; + for (i = 0; i < var->ReaderCount; i++) { + struct rc_list * temp; + struct rc_reader * a = &var->Readers[i]; + unsigned int match = 0; + for (temp = list; temp; temp = temp->Next) { + struct rc_reader * b = temp->Item; + if (a->Inst->Type != b->Inst->Type) { + continue; + } + if (a->Inst->Type == RC_INSTRUCTION_NORMAL) { + if (a->U.I.Src == b->U.I.Src) { + match = 1; + break; + } + } + if (a->Inst->Type == RC_INSTRUCTION_PAIR) { + if (a->U.P.Arg == b->U.P.Arg + && a->U.P.Src == b->U.P.Src) { + match = 1; + break; + } + } + } + if (match) { + continue; + } + rc_list_add(&list, rc_list(&var->C->Pool, a)); + } + var = var->Friend; + } + return list; +} + +static unsigned int reader_equals_src( + struct rc_reader reader, + unsigned int src_type, + void * src) +{ + if (reader.Inst->Type != src_type) { + return 0; + } + if (src_type == RC_INSTRUCTION_NORMAL) { + return reader.U.I.Src == src; + } else { + return reader.U.P.Src == src; + } +} + +static unsigned int variable_writes_src( + struct rc_variable * var, + unsigned int src_type, + void * src) +{ + unsigned int i; + for (i = 0; i < var->ReaderCount; i++) { + if (reader_equals_src(var->Readers[i], src_type, src)) { + return 1; + } + } + return 0; +} + + +struct rc_list * rc_variable_list_get_writers( + struct rc_list * var_list, + unsigned int src_type, + void * src) +{ + struct rc_list * list_ptr; + struct rc_list * writer_list = NULL; + for (list_ptr = var_list; list_ptr; list_ptr = list_ptr->Next) { + struct rc_variable * var = list_ptr->Item; + if (variable_writes_src(var, src_type, src)) { + struct rc_variable * friend; + rc_list_add(&writer_list, rc_list(&var->C->Pool, var)); + for (friend = var->Friend; friend; + friend = friend->Friend) { + if (variable_writes_src(friend, src_type, src)) { + rc_list_add(&writer_list, + rc_list(&var->C->Pool, friend)); + } + } + /* Once we have indentifed the variable and its + * friends that write this source, we can stop + * stop searching, because we know know of the + * other variables in the list will write this source. + * If they did they would be friends of var. + */ + break; + } + } + return writer_list; +} + +void rc_variable_print(struct rc_variable * var) +{ + unsigned int i; + while (var) { + fprintf(stderr, "%u: TEMP[%u].%u: ", + var->Inst->IP, var->Dst.Index, var->Dst.WriteMask); + for (i = 0; i < 4; i++) { + fprintf(stderr, "chan %u: start=%u end=%u ", i, + var->Live[i].Start, var->Live[i].End); + } + fprintf(stderr, "%u readers\n", var->ReaderCount); + if (var->Friend) { + fprintf(stderr, "Friend: \n\t"); + } + var = var->Friend; + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_variable.h b/src/gallium/drivers/r300/compiler/radeon_variable.h new file mode 100644 index 00000000000..9427bee18a7 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_variable.h @@ -0,0 +1,89 @@ +/* + * Copyright 2011 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_VARIABLE_H +#define RADEON_VARIABLE_H + +#include "radeon_compiler.h" + +struct radeon_compiler; +struct rc_list; +struct rc_reader_data; +struct rc_readers; + +struct live_intervals { + int Start; + int End; + int Used; +}; + +struct rc_variable { + struct radeon_compiler * C; + struct rc_dst_register Dst; + + struct rc_instruction * Inst; + unsigned int ReaderCount; + struct rc_reader * Readers; + struct live_intervals Live[4]; + + /* A friend is a variable that shares a reader with another variable. + */ + struct rc_variable * Friend; +}; + +void rc_variable_change_dst( + struct rc_variable * var, + unsigned int new_index, + unsigned int new_writemask); + +void rc_variable_compute_live_intervals(struct rc_variable * var); + +void rc_variable_add_friend( + struct rc_variable * var, + struct rc_variable * friend); + +struct rc_variable * rc_variable( + struct radeon_compiler * c, + unsigned int DstFile, + unsigned int DstIndex, + unsigned int DstWriteMask, + struct rc_reader_data * reader_data); + +struct rc_list * rc_get_variables(struct radeon_compiler * c); + +unsigned int rc_variable_writemask_sum(struct rc_variable * var); + +struct rc_list * rc_variable_readers_union(struct rc_variable * var); + +struct rc_list * rc_variable_list_get_writers( + struct rc_list * var_list, + unsigned int src_type, + void * src); + +void rc_variable_print(struct rc_variable * var); + +#endif /* RADEON_VARIABLE_H */ diff --git a/src/gallium/drivers/r300/compiler/tests/.gitignore b/src/gallium/drivers/r300/compiler/tests/.gitignore new file mode 100644 index 00000000000..85672fed777 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/.gitignore @@ -0,0 +1 @@ +radeon_compiler_util_tests diff --git a/src/gallium/drivers/r300/compiler/tests/Makefile b/src/gallium/drivers/r300/compiler/tests/Makefile new file mode 100644 index 00000000000..6eda34a2c00 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/Makefile @@ -0,0 +1,53 @@ +TOP = ../../../../../.. +include $(TOP)/configs/current + +CFLAGS += -Wall -Werror + +### Basic defines ### +TESTS = radeon_compiler_util_tests + +TEST_SOURCES := $(TESTS:=.c) + +SHARED_SOURCES = \ + rc_test_helpers.c \ + unit_test.c + +C_SOURCES = $(SHARED_SOURCES) $(TEST_SOURCES) + +INCLUDES = \ + -I. \ + -I.. + +COMPILER_LIB = ../../libr300.a + +##### TARGETS ##### + +default: depend run_tests + +depend: $(C_SOURCES) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $^ 2> /dev/null + +# Remove .o and backup files +clean: + rm -f $(TESTS) depend depend.bak + +$(TESTS): $(TESTS:=.o) $(SHARED_SOURCES:.c=.o) $(COMPILER_LIB) + $(APP_CC) -o $@ $^ + +run_tests: $(TESTS) + @echo "RUNNING TESTS:" + @echo "" + $(foreach test, $^, @./$(test)) + +.PHONY: $(COMPILER_LIB) +$(COMPILER_LIB): + $(MAKE) -C ../.. + +##### RULES ##### +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + + +sinclude depend diff --git a/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c b/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c new file mode 100644 index 00000000000..a2e3f2ab2e5 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c @@ -0,0 +1,76 @@ +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> + +#include "radeon_compiler_util.h" +#include "radeon_program.h" + +#include "rc_test_helpers.h" +#include "unit_test.h" + +static void test_rc_inst_can_use_presub( + struct test_result * result, + int expected, + const char * add_str, + const char * replace_str) +{ + struct rc_instruction add_inst, replace_inst; + int ret; + + test_begin(result); + init_rc_normal_instruction(&add_inst, add_str); + init_rc_normal_instruction(&replace_inst, replace_str); + + ret = rc_inst_can_use_presub(&replace_inst, RC_PRESUB_ADD, 0, + &replace_inst.U.I.SrcReg[0], + &add_inst.U.I.SrcReg[0], &add_inst.U.I.SrcReg[1]); + + test_check(result, ret == expected); +} + +static void test_runner_rc_inst_can_use_presub(struct test_result * result) +{ + + /* This tests the case where the source being replace has the same + * register file and register index as another source register in the + * CMP instruction. A previous version of this function was ignoring + * all registers that shared the same file and index as the replacement + * register when counting the number of source selects. + * + * https://bugs.freedesktop.org/show_bug.cgi?id=36527 + */ + test_rc_inst_can_use_presub(result, 0, + "ADD temp[0].z, temp[6].__x_, const[1].__x_;", + "CMP temp[0].y, temp[0]._z__, const[0]._z__, temp[0]._y__;"); + + + /* Testing a random case that should fail + * + * https://bugs.freedesktop.org/show_bug.cgi?id=36527 + */ + test_rc_inst_can_use_presub(result, 0, + "ADD temp[3], temp[1], temp[2];", + "MAD temp[1], temp[0], const[0].xxxx, -temp[3];"); + + /* This tests the case where the arguments of the ADD + * instruction share the same register file and index. Normally, we + * would need only one source select for these two arguments, but since + * they will be part of a presubtract operation we need to use the two + * source selects that the presubtract instruction expects + * (src0 and src1). + * + * https://bugs.freedesktop.org/show_bug.cgi?id=36527 + */ + test_rc_inst_can_use_presub(result, 0, + "ADD temp[3].x, temp[0].x___, temp[0].x___;", + "MAD temp[0].xyz, temp[2].xyz_, -temp[3].xxx_, input[5].xyz_;"); +} + +int main(int argc, char ** argv) +{ + struct test tests[] = { + {"rc_inst_can_use_presub()", test_runner_rc_inst_can_use_presub}, + {NULL, NULL} + }; + run_tests(tests); +} diff --git a/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c new file mode 100644 index 00000000000..ca4738af54d --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c @@ -0,0 +1,380 @@ +#include <errno.h> +#include <regex.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <sys/types.h> + +#include "../radeon_compiler_util.h" +#include "../radeon_opcodes.h" +#include "../radeon_program.h" + +#include "rc_test_helpers.h" + +/* This file contains some helper functions for filling out the rc_instruction + * data structures. These functions take a string as input based on the format + * output by rc_program_print(). + */ + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + +#define REGEX_ERR_BUF_SIZE 50 + +struct match_info { + const char * String; + int Length; +}; + +static int match_length(regmatch_t * matches, int index) +{ + return matches[index].rm_eo - matches[index].rm_so; +} + +static int regex_helper( + const char * regex_str, + const char * search_str, + regmatch_t * matches, + int num_matches) +{ + char err_buf[REGEX_ERR_BUF_SIZE]; + regex_t regex; + int err_code; + unsigned int i; + + err_code = regcomp(®ex, regex_str, REG_EXTENDED); + if (err_code) { + regerror(err_code, ®ex, err_buf, REGEX_ERR_BUF_SIZE); + fprintf(stderr, "Failed to compile regex: %s\n", err_buf); + return 0; + } + + err_code = regexec(®ex, search_str, num_matches, matches, 0); + DBG("Search string: '%s'\n", search_str); + for (i = 0; i < num_matches; i++) { + DBG("Match %u start = %d end = %d\n", i, + matches[i].rm_so, matches[i].rm_eo); + } + if (err_code) { + regerror(err_code, ®ex, err_buf, REGEX_ERR_BUF_SIZE); + fprintf(stderr, "Failed to match regex: %s\n", err_buf); + return 0; + } + return 1; +} + +#define REGEX_SRC_MATCHES 6 + +struct src_tokens { + struct match_info Negate; + struct match_info Abs; + struct match_info File; + struct match_info Index; + struct match_info Swizzle; +}; + +/** + * Initialize the source register at index src_index for the instruction based + * on src_str. + * + * NOTE: Warning in init_rc_normal_instruction() applies to this function as + * well. + * + * @param src_str A string that represents the source register. The format for + * this string is the same that is output by rc_program_print. + * @return 1 On success, 0 on failure + */ +int init_rc_normal_src( + struct rc_instruction * inst, + unsigned int src_index, + const char * src_str) +{ + const char * regex_str = "(-*)(\\|*)([[:lower:]]*)\\[([[:digit:]])\\](\\.*[[:lower:]-]*)"; + regmatch_t matches[REGEX_SRC_MATCHES]; + struct src_tokens tokens; + struct rc_src_register * src_reg = &inst->U.I.SrcReg[src_index]; + unsigned int i; + + /* Execute the regex */ + if (!regex_helper(regex_str, src_str, matches, REGEX_SRC_MATCHES)) { + fprintf(stderr, "Failed to execute regex for src register.\n"); + return 0; + } + + /* Create Tokens */ + tokens.Negate.String = src_str + matches[1].rm_so; + tokens.Negate.Length = match_length(matches, 1); + tokens.Abs.String = src_str + matches[2].rm_so; + tokens.Abs.Length = match_length(matches, 2); + tokens.File.String = src_str + matches[3].rm_so; + tokens.File.Length = match_length(matches, 3); + tokens.Index.String = src_str + matches[4].rm_so; + tokens.Index.Length = match_length(matches, 4); + tokens.Swizzle.String = src_str + matches[5].rm_so; + tokens.Swizzle.Length = match_length(matches, 5); + + /* Negate */ + if (tokens.Negate.Length > 0) { + src_reg->Negate = RC_MASK_XYZW; + } + + /* Abs */ + if (tokens.Abs.Length > 0) { + src_reg->Abs = 1; + } + + /* File */ + if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) { + src_reg->File = RC_FILE_TEMPORARY; + } else if (!strncmp(tokens.File.String, "input", tokens.File.Length)) { + src_reg->File = RC_FILE_INPUT; + } else if (!strncmp(tokens.File.String, "const", tokens.File.Length)) { + src_reg->File = RC_FILE_CONSTANT; + } else if (!strncmp(tokens.File.String, "none", tokens.File.Length)) { + src_reg->File = RC_FILE_NONE; + } + + /* Index */ + errno = 0; + src_reg->Index = strtol(tokens.Index.String, NULL, 10); + if (errno > 0) { + fprintf(stderr, "Could not convert src register index.\n"); + return 0; + } + + /* Swizzle */ + if (tokens.Swizzle.Length == 0) { + src_reg->Swizzle = RC_SWIZZLE_XYZW; + } else { + int str_index = 1; + src_reg->Swizzle = RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED); + if (tokens.Swizzle.String[0] != '.') { + fprintf(stderr, "First char of swizzle is not valid.\n"); + return 0; + } + for (i = 0; i < 4; i++, str_index++) { + if (tokens.Swizzle.String[str_index] == '-') { + src_reg->Negate |= (1 << i); + str_index++; + } + switch(tokens.Swizzle.String[str_index]) { + case 'x': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_X); + break; + case 'y': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Y); + break; + case 'z': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Z); + break; + case 'w': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_W); + break; + case '1': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ONE); + break; + case '0': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ZERO); + break; + case 'H': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_HALF); + break; + case '_': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_UNUSED); + break; + default: + fprintf(stderr, "Unknown src register swizzle.\n"); + return 0; + } + } + } + DBG("File=%u index=%u swizzle=%x negate=%u abs=%u\n", + src_reg->File, src_reg->Index, src_reg->Swizzle, + src_reg->Negate, src_reg->Abs); + return 1; +} + +#define REGEX_DST_MATCHES 4 + +struct dst_tokens { + struct match_info File; + struct match_info Index; + struct match_info WriteMask; +}; + +/** + * Initialize the destination for the instruction based on dst_str. + * + * NOTE: Warning in init_rc_normal_instruction() applies to this function as + * well. + * + * @param dst_str A string that represents the destination register. The format + * for this string is the same that is output by rc_program_print. + * @return 1 On success, 0 on failure + */ +int init_rc_normal_dst( + struct rc_instruction * inst, + const char * dst_str) +{ + const char * regex_str = "([[:lower:]]*)\\[([[:digit:]]*)\\](\\.*[[:lower:]]*)"; + regmatch_t matches[REGEX_DST_MATCHES]; + struct dst_tokens tokens; + unsigned int i; + + /* Execute the regex */ + if (!regex_helper(regex_str, dst_str, matches, REGEX_DST_MATCHES)) { + fprintf(stderr, "Failed to execute regex for dst register.\n"); + return 0; + } + + /* Create Tokens */ + tokens.File.String = dst_str + matches[1].rm_so; + tokens.File.Length = match_length(matches, 1); + tokens.Index.String = dst_str + matches[2].rm_so; + tokens.Index.Length = match_length(matches, 2); + tokens.WriteMask.String = dst_str + matches[3].rm_so; + tokens.WriteMask.Length = match_length(matches, 3); + + /* File Type */ + if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) { + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + } else if (!strncmp(tokens.File.String, "output", tokens.File.Length)) { + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + } else { + fprintf(stderr, "Unknown dst register file type.\n"); + return 0; + } + + /* File Index */ + errno = 0; + inst->U.I.DstReg.Index = strtol(tokens.Index.String, NULL, 10); + + if (errno > 0) { + fprintf(stderr, "Could not convert dst register index\n"); + return 0; + } + + /* WriteMask */ + if (tokens.WriteMask.Length == 0) { + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + } else { + /* The first character should be '.' */ + if (tokens.WriteMask.String[0] != '.') { + fprintf(stderr, "1st char of writemask is not valid.\n"); + return 0; + } + for (i = 1; i < tokens.WriteMask.Length; i++) { + switch(tokens.WriteMask.String[i]) { + case 'x': + inst->U.I.DstReg.WriteMask |= RC_MASK_X; + break; + case 'y': + inst->U.I.DstReg.WriteMask |= RC_MASK_Y; + break; + case 'z': + inst->U.I.DstReg.WriteMask |= RC_MASK_Z; + break; + case 'w': + inst->U.I.DstReg.WriteMask |= RC_MASK_W; + break; + default: + fprintf(stderr, "Unknown swizzle in writemask.\n"); + return 0; + } + } + } + DBG("Dst Reg File=%u Index=%d Writemask=%d\n", + inst->U.I.DstReg.File, + inst->U.I.DstReg.Index, + inst->U.I.DstReg.WriteMask); + return 1; +} + +#define REGEX_INST_MATCHES 7 + +struct inst_tokens { + struct match_info Opcode; + struct match_info Sat; + struct match_info Dst; + struct match_info Srcs[3]; +}; + +/** + * Initialize a normal instruction based on inst_str. + * + * WARNING: This function might not be able to handle every kind of format that + * rc_program_print() can output. If you are having problems with a + * particular string, you may need to add support for it to this functions. + * + * @param inst_str A string that represents the source register. The format for + * this string is the same that is output by rc_program_print. + * @return 1 On success, 0 on failure + */ +int init_rc_normal_instruction( + struct rc_instruction * inst, + const char * inst_str) +{ + const char * regex_str = "([[:upper:]]+)(_SAT)* ([^,]*)[, ]*([^,]*)[, ]*([^,]*)[, ]*([^;]*)"; + int i; + regmatch_t matches[REGEX_INST_MATCHES]; + struct inst_tokens tokens; + + /* Initialize inst */ + memset(inst, 0, sizeof(struct rc_instruction)); + inst->Type = RC_INSTRUCTION_NORMAL; + + /* Execute the regex */ + if (!regex_helper(regex_str, inst_str, matches, REGEX_INST_MATCHES)) { + return 0; + } + memset(&tokens, 0, sizeof(tokens)); + + /* Create Tokens */ + tokens.Opcode.String = inst_str + matches[1].rm_so; + tokens.Opcode.Length = match_length(matches, 1); + if (matches[2].rm_so > -1) { + tokens.Sat.String = inst_str + matches[2].rm_so; + tokens.Sat.Length = match_length(matches, 2); + } + + + /* Fill out the rest of the instruction. */ + for (i = 0; i < MAX_RC_OPCODE; i++) { + const struct rc_opcode_info * info = rc_get_opcode_info(i); + unsigned int first_src = 3; + unsigned int j; + if (strncmp(tokens.Opcode.String, info->Name, tokens.Opcode.Length)) { + continue; + } + inst->U.I.Opcode = info->Opcode; + if (info->HasDstReg) { + char * dst_str; + tokens.Dst.String = inst_str + matches[3].rm_so; + tokens.Dst.Length = match_length(matches, 3); + first_src++; + + dst_str = malloc(sizeof(char) * (tokens.Dst.Length + 1)); + strncpy(dst_str, tokens.Dst.String, tokens.Dst.Length); + dst_str[tokens.Dst.Length] = '\0'; + init_rc_normal_dst(inst, dst_str); + free(dst_str); + } + for (j = 0; j < info->NumSrcRegs; j++) { + char * src_str; + tokens.Srcs[j].String = + inst_str + matches[first_src + j].rm_so; + tokens.Srcs[j].Length = + match_length(matches, first_src + j); + + src_str = malloc(sizeof(char) * + (tokens.Srcs[j].Length + 1)); + strncpy(src_str, tokens.Srcs[j].String, + tokens.Srcs[j].Length); + src_str[tokens.Srcs[j].Length] = '\0'; + init_rc_normal_src(inst, j, src_str); + } + break; + } + return 1; +} diff --git a/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h new file mode 100644 index 00000000000..1a6bf9699ba --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h @@ -0,0 +1,13 @@ + +int init_rc_normal_src( + struct rc_instruction * inst, + unsigned int src_index, + const char * src_str); + +int init_rc_normal_dst( + struct rc_instruction * inst, + const char * dst_str); + +int init_rc_normal_instruction( + struct rc_instruction * inst, + const char * inst_str); diff --git a/src/gallium/drivers/r300/compiler/tests/unit_test.c b/src/gallium/drivers/r300/compiler/tests/unit_test.c new file mode 100644 index 00000000000..266f3365c58 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/unit_test.c @@ -0,0 +1,35 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "unit_test.h" + +void run_tests(struct test tests[]) +{ + int i; + for (i = 0; tests[i].name; i++) { + printf("Test %s\n", tests[i].name); + memset(&tests[i].result, 0, sizeof(tests[i].result)); + tests[i].test_func(&tests[i].result); + printf("Test %s (%d/%d) pass\n", tests[i].name, + tests[i].result.pass, tests[i].result.test_count); + } +} + +void test_begin(struct test_result * result) +{ + result->test_count++; +} + +void test_check(struct test_result * result, int cond) +{ + printf("Subtest %u -> ", result->test_count); + if (cond) { + result->pass++; + printf("Pass"); + } else { + result->fail++; + printf("Fail"); + } + printf("\n"); +} diff --git a/src/gallium/drivers/r300/compiler/tests/unit_test.h b/src/gallium/drivers/r300/compiler/tests/unit_test.h new file mode 100644 index 00000000000..441e8b655a5 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/tests/unit_test.h @@ -0,0 +1,17 @@ + +struct test_result { + unsigned int test_count; + unsigned int pass; + unsigned int fail; +}; + +struct test { + const char * name; + void (*test_func)(struct test_result * result); + struct test_result result; +}; + +void run_tests(struct test tests[]); + +void test_begin(struct test_result * result); +void test_check(struct test_result * result, int cond); diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 388ebcdbf32..ddf5448a34b 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -247,7 +247,7 @@ static void r300_clear(struct pipe_context* pipe, if (!r300->hyperz_enabled) { r300->hyperz_enabled = r300->rws->cs_request_feature(r300->cs, - RADEON_FID_HYPERZ_RAM_ACCESS, + RADEON_FID_R300_HYPERZ_ACCESS, TRUE); if (r300->hyperz_enabled) { /* Need to emit HyperZ buffer regs for the first time. */ @@ -409,10 +409,11 @@ void r300_decompress_zmask(struct r300_context *r300) void r300_decompress_zmask_locked_unsafe(struct r300_context *r300) { - struct pipe_framebuffer_state fb = {0}; + struct pipe_framebuffer_state fb; + + memset(&fb, 0, sizeof(fb)); fb.width = r300->locked_zbuffer->width; fb.height = r300->locked_zbuffer->height; - fb.nr_cbufs = 0; fb.zsbuf = r300->locked_zbuffer; r300->context.set_framebuffer_state(&r300->context, &fb); @@ -421,8 +422,9 @@ void r300_decompress_zmask_locked_unsafe(struct r300_context *r300) void r300_decompress_zmask_locked(struct r300_context *r300) { - struct pipe_framebuffer_state saved_fb = {0}; + struct pipe_framebuffer_state saved_fb; + memset(&saved_fb, 0, sizeof(saved_fb)); util_copy_framebuffer_state(&saved_fb, r300->fb_state.state); r300_decompress_zmask_locked_unsafe(r300); r300->context.set_framebuffer_state(&r300->context, &saved_fb); @@ -443,8 +445,8 @@ static void r300_hw_copy_region(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); r300_blitter_begin(r300, R300_COPY); - util_blitter_copy_region(r300->blitter, dst, dst_level, dstx, dsty, dstz, - src, src_level, src_box, TRUE); + util_blitter_copy_texture(r300->blitter, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box, TRUE); r300_blitter_end(r300); } diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 571986c3011..80148b80afb 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -31,9 +31,9 @@ * Radeons. */ /* Parse a PCI ID and fill an r300_capabilities struct with information. */ -void r300_parse_chipset(struct r300_capabilities* caps) +void r300_parse_chipset(uint32_t pci_id, struct r300_capabilities* caps) { - switch (caps->pci_id) { + switch (pci_id) { #define CHIPSET(pci_id, name, chipfamily) \ case pci_id: \ caps->family = CHIP_FAMILY_##chipfamily; \ @@ -43,7 +43,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) default: fprintf(stderr, "r300: Warning: Unknown chipset 0x%x\nAborting...", - caps->pci_id); + pci_id); abort(); } diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 4df6b5b6292..f96cdaf2580 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -43,16 +43,10 @@ enum r300_zmask_compression { /* Structure containing all the possible information about a specific Radeon * in the R3xx, R4xx, and R5xx families. */ struct r300_capabilities { - /* PCI ID */ - uint32_t pci_id; /* Chipset family */ int family; /* The number of vertex floating-point units */ unsigned num_vert_fpus; - /* The number of fragment pipes */ - unsigned num_frag_pipes; - /* The number of z pipes */ - unsigned num_z_pipes; /* The number of texture units. */ unsigned num_tex_units; /* Whether or not TCL is physically present */ @@ -121,6 +115,6 @@ enum { CHIP_FAMILY_RV570 }; -void r300_parse_chipset(struct r300_capabilities* caps); +void r300_parse_chipset(uint32_t pci_id, struct r300_capabilities* caps); #endif /* R300_CHIPSET_H */ diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index d94ac74f0e5..b304999d424 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -99,7 +99,7 @@ static void r300_destroy_context(struct pipe_context* context) struct r300_context* r300 = r300_context(context); if (r300->cs && r300->hyperz_enabled) { - r300->rws->cs_request_feature(r300->cs, RADEON_FID_HYPERZ_RAM_ACCESS, FALSE); + r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_HYPERZ_ACCESS, FALSE); } if (r300->blitter) @@ -173,7 +173,7 @@ static boolean r300_setup_atoms(struct r300_context* r300) boolean is_rv350 = r300->screen->caps.is_rv350; boolean is_r500 = r300->screen->caps.is_r500; boolean has_tcl = r300->screen->caps.has_tcl; - boolean drm_2_6_0 = r300->rws->get_value(r300->rws, RADEON_VID_DRM_2_6_0); + boolean drm_2_6_0 = r300->screen->info.drm_minor >= 6; /* Create the actual atom list. * @@ -380,7 +380,7 @@ static void r300_init_states(struct pipe_context *pipe) if (r300->screen->caps.is_r500 || (r300->screen->caps.is_rv350 && - r300->rws->get_value(r300->rws, RADEON_VID_DRM_2_6_0))) { + r300->screen->info.drm_minor >= 6)) { OUT_CB_REG(R300_GB_Z_PEQ_CONFIG, 0); } END_CB; @@ -457,7 +457,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_render_functions(r300); r300_init_states(&r300->context); - rws->cs_set_flush(r300->cs, r300_flush_callback, r300); + rws->cs_set_flush_callback(r300->cs, r300_flush_callback, r300); /* The KIL opcode needs the first texture unit to be enabled * on r3xx-r4xx. In order to calm down the CS checker, we bind this @@ -520,15 +520,15 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, "r300: DRM version: %d.%d.%d, Name: %s, ID: 0x%04x, GB: %d, Z: %d\n" "r300: GART size: %d MB, VRAM size: %d MB\n" "r300: AA compression RAM: %s, Z compression RAM: %s, HiZ RAM: %s\n", - rws->get_value(rws, RADEON_VID_DRM_MAJOR), - rws->get_value(rws, RADEON_VID_DRM_MINOR), - rws->get_value(rws, RADEON_VID_DRM_PATCHLEVEL), + r300->screen->info.drm_major, + r300->screen->info.drm_minor, + r300->screen->info.drm_patchlevel, screen->get_name(screen), - rws->get_value(rws, RADEON_VID_PCI_ID), - rws->get_value(rws, RADEON_VID_R300_GB_PIPES), - rws->get_value(rws, RADEON_VID_R300_Z_PIPES), - rws->get_value(rws, RADEON_VID_GART_SIZE) >> 20, - rws->get_value(rws, RADEON_VID_VRAM_SIZE) >> 20, + r300->screen->info.pci_id, + r300->screen->info.r300_num_gb_pipes, + r300->screen->info.r300_num_z_pipes, + r300->screen->info.gart_size >> 20, + r300->screen->info.vram_size >> 20, "YES", /* XXX really? */ r300->screen->caps.zmask_ram ? "YES" : "NO", r300->screen->caps.hiz_ram ? "YES" : "NO"); diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index d214af4cd5b..b953bd10f43 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -574,11 +574,12 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, struct r300_query *query) { struct r300_capabilities* caps = &r300->screen->caps; + uint32_t gb_pipes = r300->screen->info.r300_num_gb_pipes; CS_LOCALS(r300); - assert(caps->num_frag_pipes); + assert(gb_pipes); - BEGIN_CS(6 * caps->num_frag_pipes + 2); + BEGIN_CS(6 * gb_pipes + 2); /* I'm not so sure I like this switch, but it's hard to be elegant * when there's so many special cases... * @@ -587,7 +588,7 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, * 4-byte offset for each pipe. RV380 and older are special; they have * only two pipes, and the second pipe's enable is on bit 3, not bit 1, * so there's a chipset cap for that. */ - switch (caps->num_frag_pipes) { + switch (gb_pipes) { case 4: /* pipe 3 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 3); @@ -613,7 +614,7 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, break; default: fprintf(stderr, "r300: Implementation error: Chipset reports %d" - " pixel pipes!\n", caps->num_frag_pipes); + " pixel pipes!\n", gb_pipes); abort(); } @@ -663,7 +664,7 @@ void r300_emit_query_end(struct r300_context* r300) return; if (caps->family == CHIP_FAMILY_RV530) { - if (caps->num_z_pipes == 2) + if (r300->screen->info.r300_num_z_pipes == 2) rv530_emit_query_end_double_z(r300, query); else rv530_emit_query_end_single_z(r300, query); @@ -1237,13 +1238,12 @@ validate: r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf, r300_resource(index_buffer)->domain, 0); - /* Now do the validation. */ + /* Now do the validation (flush is called inside cs_validate on failure). */ if (!r300->rws->cs_validate(r300->cs)) { /* Ooops, an infinite loop, give up. */ if (flushed) return FALSE; - r300_flush(&r300->context, RADEON_FLUSH_ASYNC, NULL); flushed = TRUE; goto validate; } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 6c1c9d2fb13..234e043b071 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -24,7 +24,6 @@ #define R300_EMIT_H #include "r300_context.h" -#include "radeon_code.h" struct rX00_fragment_program_code; struct r300_vertex_program_code; diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 34f5419a864..dc596c4122a 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -76,7 +76,6 @@ void r300_flush(struct pipe_context *pipe, /* Create a fence, which is a dummy BO. */ *rfence = r300->rws->buffer_create(r300->rws, 1, 1, PIPE_BIND_VERTEX_BUFFER, - PIPE_USAGE_STATIC, RADEON_DOMAIN_GTT); /* Add the fence as a dummy relocation. */ r300->rws->cs_add_reloc(r300->cs, @@ -121,7 +120,7 @@ void r300_flush(struct pipe_context *pipe, } /* Release HyperZ. */ - r300->rws->cs_request_feature(r300->cs, RADEON_FID_HYPERZ_RAM_ACCESS, + r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_HYPERZ_ACCESS, FALSE); } r300->num_z_clears = 0; diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index e3a1bc4a0f4..6f21125f70a 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -38,8 +38,7 @@ #include "r300_texture.h" #include "r300_tgsi_to_rc.h" -#include "radeon_code.h" -#include "radeon_compiler.h" +#include "compiler/radeon_compiler.h" /* Convert info about FS input semantics to r300_shader_semantics. */ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, @@ -181,9 +180,10 @@ static void get_external_state( v->base.format == PIPE_FORMAT_LATC1_SNORM) { unsigned char swizzle[4]; - util_format_combine_swizzles(swizzle, + util_format_compose_swizzles( util_format_description(v->base.format)->swizzle, - v->swizzle); + v->swizzle, + swizzle); state->unit[i].texture_swizzle = RC_MAKE_SWIZZLE(swizzle[0], swizzle[1], diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h index c86a90b85ae..45c9e8801c3 100644 --- a/src/gallium/drivers/r300/r300_fs.h +++ b/src/gallium/drivers/r300/r300_fs.h @@ -27,7 +27,7 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" -#include "radeon_code.h" +#include "compiler/radeon_code.h" #include "r300_shader_semantics.h" struct r300_fragment_shader_code { diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 782f041e926..c0357f9d035 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -49,16 +49,15 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, q->buffer_size = 4096; if (r300screen->caps.family == CHIP_FAMILY_RV530) - q->num_pipes = r300screen->caps.num_z_pipes; + q->num_pipes = r300screen->info.r300_num_z_pipes; else - q->num_pipes = r300screen->caps.num_frag_pipes; + q->num_pipes = r300screen->info.r300_num_gb_pipes; insert_at_tail(&r300->query_list, q); /* Open up the occlusion query buffer. */ q->buf = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096, - PIPE_BIND_CUSTOM, PIPE_USAGE_STREAM, - q->domain); + PIPE_BIND_CUSTOM, q->domain); q->cs_buf = r300->rws->buffer_get_cs_handle(q->buf); return (struct pipe_query*)q; diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index bb30b1ab0be..5edbb22a743 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -2078,7 +2078,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ALU_OUTC_D2A (3 << 23) # define R300_ALU_OUTC_MIN (4 << 23) # define R300_ALU_OUTC_MAX (5 << 23) -# define R300_ALU_OUTC_CMPH (7 << 23) +# define R300_ALU_OUTC_CND (7 << 23) # define R300_ALU_OUTC_CMP (8 << 23) # define R300_ALU_OUTC_FRC (9 << 23) # define R300_ALU_OUTC_REPL_ALPHA (10 << 23) @@ -2944,6 +2944,23 @@ enum { /*\}*/ +#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \ + (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \ + | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \ + | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \ + | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \ + | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT)) + +#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \ + (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \ + | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \ + | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \ + | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \ + | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \ + | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT)) + /* BEGIN: Packet 3 commands */ /* A primitive emission dword. */ @@ -3249,6 +3266,8 @@ enum { # define R500_INST_RGB_CLAMP (1 << 19) # define R500_INST_ALPHA_CLAMP (1 << 20) # define R500_INST_ALU_RESULT_SEL (1 << 21) +# define R500_INST_ALU_RESULT_SEL_RED (0 << 21) +# define R500_INST_ALU_RESULT_SEL_ALPHA (1 << 21) # define R500_INST_ALPHA_PRED_INV (1 << 22) # define R500_INST_ALU_RESULT_OP_EQ (0 << 23) # define R500_INST_ALU_RESULT_OP_LT (1 << 23) diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index b31141a518e..d69b4cf4275 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -1267,33 +1267,31 @@ done: r300->sprite_coord_enable = last_sprite_coord_enable; } -static void r300_resource_resolve(struct pipe_context* pipe, - struct pipe_resource* dest, - unsigned dst_layer, - struct pipe_resource* src, - unsigned src_layer) +static void r300_resource_resolve(struct pipe_context *pipe, + const struct pipe_resolve_info *info) { - struct r300_context* r300 = r300_context(pipe); - struct pipe_surface* srcsurf, surf_tmpl; + struct r300_context *r300 = r300_context(pipe); + struct pipe_surface *srcsurf, *dstsurf, surf_tmpl; struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; float color[] = {0, 0, 0, 0}; memset(&surf_tmpl, 0, sizeof(surf_tmpl)); - surf_tmpl.format = src->format; - surf_tmpl.usage = 0; /* not really a surface hence no bind flags */ - surf_tmpl.u.tex.level = 0; /* msaa resources cannot have mipmaps */ - surf_tmpl.u.tex.first_layer = src_layer; - surf_tmpl.u.tex.last_layer = src_layer; - srcsurf = pipe->create_surface(pipe, src, &surf_tmpl); - surf_tmpl.format = dest->format; - surf_tmpl.u.tex.first_layer = dst_layer; - surf_tmpl.u.tex.last_layer = dst_layer; + surf_tmpl.format = info->src.res->format; + surf_tmpl.u.tex.first_layer = + surf_tmpl.u.tex.last_layer = info->src.layer; + srcsurf = pipe->create_surface(pipe, info->src.res, &surf_tmpl); + /* XXX Offset both surfaces by x0,y1. */ + + surf_tmpl.format = info->dst.res->format; + surf_tmpl.u.tex.level = info->dst.level; + surf_tmpl.u.tex.first_layer = + surf_tmpl.u.tex.last_layer = info->dst.layer; + dstsurf = pipe->create_surface(pipe, info->dst.res, &surf_tmpl); DBG(r300, DBG_DRAW, "r300: Resolving resource...\n"); /* Enable AA resolve. */ - aa->dest = r300_surface(pipe->create_surface(pipe, dest, &surf_tmpl)); - + aa->dest = r300_surface(dstsurf); aa->aaresolve_ctl = R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE | R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE; @@ -1301,16 +1299,19 @@ static void r300_resource_resolve(struct pipe_context* pipe, r300_mark_atom_dirty(r300, &r300->aa_state); /* Resolve the surface. */ + /* XXX: y1 < 0 ==> Y flip */ r300->context.clear_render_target(pipe, - srcsurf, color, 0, 0, src->width0, src->height0); + srcsurf, color, 0, 0, + info->dst.x1 - info->dst.x0, + info->dst.y1 - info->dst.y0); /* Disable AA resolve. */ aa->aaresolve_ctl = 0; r300->aa_state.size = 4; r300_mark_atom_dirty(r300, &r300->aa_state); - pipe_surface_reference((struct pipe_surface**)&srcsurf, NULL); - pipe_surface_reference((struct pipe_surface**)&aa->dest, NULL); + pipe_surface_reference(&srcsurf, NULL); + pipe_surface_reference(&dstsurf, NULL); } void r300_init_render_functions(struct r300_context *r300) diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index c8df45fb3e7..8c0500c7dfd 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -142,6 +142,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: case PIPE_CAP_SEAMLESS_CUBE_MAP: case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + case PIPE_CAP_SCALED_RESOLVE: return 0; /* SWTCL-only features. */ @@ -211,13 +212,12 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e case PIPE_SHADER_CAP_MAX_PREDS: return is_r500 ? 1 : 0; case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: - return 0; case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: - return 0; case PIPE_SHADER_CAP_SUBROUTINES: + case PIPE_SHADER_CAP_INTEGERS: return 0; } break; @@ -248,18 +248,15 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e return 1; /* XXX guessed */ case PIPE_SHADER_CAP_MAX_PREDS: return is_r500 ? 4 : 0; /* XXX guessed. */ + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return 1; case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: - return 0; case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: - return 0; - case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: - return 1; case PIPE_SHADER_CAP_SUBROUTINES: + case PIPE_SHADER_CAP_INTEGERS: return 0; - default: - break; } break; default: @@ -316,6 +313,8 @@ static int r300_get_video_param(struct pipe_screen *screen, case PIPE_VIDEO_CAP_MAX_WIDTH: case PIPE_VIDEO_CAP_MAX_HEIGHT: return vl_video_buffer_max_size(screen); + case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED: + return vl_num_buffers_desired(screen, profile); default: return 0; } @@ -327,9 +326,8 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, unsigned sample_count, unsigned usage) { - struct radeon_winsys *rws = r300_screen(screen)->rws; uint32_t retval = 0; - boolean drm_2_8_0 = rws->get_value(rws, RADEON_VID_DRM_2_8_0); + boolean drm_2_8_0 = r300_screen(screen)->info.drm_minor >= 8; boolean is_r500 = r300_screen(screen)->caps.is_r500; boolean is_r400 = r300_screen(screen)->caps.is_r400; boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM || @@ -458,7 +456,7 @@ static boolean r300_fence_signalled(struct pipe_screen *screen, struct radeon_winsys *rws = r300_screen(screen)->rws; struct pb_buffer *rfence = (struct pb_buffer*)fence; - return !rws->buffer_is_busy(rfence); + return !rws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE); } static boolean r300_fence_finish(struct pipe_screen *screen, @@ -475,7 +473,7 @@ static boolean r300_fence_finish(struct pipe_screen *screen, timeout /= 1000; /* Wait in a loop. */ - while (rws->buffer_is_busy(rfence)) { + while (rws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE)) { if (os_time_get() - start_time >= timeout) { return FALSE; } @@ -484,7 +482,7 @@ static boolean r300_fence_finish(struct pipe_screen *screen, return TRUE; } - rws->buffer_wait(rfence); + rws->buffer_wait(rfence, RADEON_USAGE_READWRITE); return TRUE; } @@ -497,19 +495,17 @@ struct pipe_screen* r300_screen_create(struct radeon_winsys *rws) return NULL; } - r300screen->caps.pci_id = rws->get_value(rws, RADEON_VID_PCI_ID); - r300screen->caps.num_frag_pipes = rws->get_value(rws, RADEON_VID_R300_GB_PIPES); - r300screen->caps.num_z_pipes = rws->get_value(rws, RADEON_VID_R300_Z_PIPES); + rws->query_info(rws, &r300screen->info); r300_init_debug(r300screen); - r300_parse_chipset(&r300screen->caps); + r300_parse_chipset(r300screen->info.pci_id, &r300screen->caps); if (SCREEN_DBG_ON(r300screen, DBG_NO_ZMASK)) r300screen->caps.zmask_ram = 0; if (SCREEN_DBG_ON(r300screen, DBG_NO_HIZ)) r300screen->caps.hiz_ram = 0; - if (!rws->get_value(rws, RADEON_VID_DRM_2_8_0)) + if (r300screen->info.drm_minor < 8) r300screen->caps.has_us_format = FALSE; pipe_mutex_init(r300screen->num_contexts_mutex); diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index e5c53bf3500..82b2068e7a0 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -24,23 +24,20 @@ #ifndef R300_SCREEN_H #define R300_SCREEN_H -#include "pipe/p_screen.h" - #include "r300_chipset.h" - +#include "../../winsys/radeon/drm/radeon_winsys.h" +#include "pipe/p_screen.h" #include "util/u_slab.h" - #include <stdio.h> -struct radeon_winsys; - struct r300_screen { /* Parent class */ struct pipe_screen screen; struct radeon_winsys *rws; - /* Chipset capabilities */ + /* Chipset info and capabilities. */ + struct radeon_info info; struct r300_capabilities caps; /* Memory pools. */ diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index 4154c81512e..c751a943b96 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -201,8 +201,7 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, rbuf->buf = r300screen->rws->buffer_create(r300screen->rws, rbuf->b.b.b.width0, alignment, - rbuf->b.b.b.bind, rbuf->b.b.b.usage, - rbuf->domain); + rbuf->b.b.b.bind, rbuf->domain); if (!rbuf->buf) { util_slab_free(&r300screen->pool_buffers, rbuf); return NULL; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index f63114e7eb7..45c11fce1fe 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -605,7 +605,6 @@ static uint32_t r300_get_border_color(enum pipe_format format, { const struct util_format_description *desc; float border_swizzled[4] = {0}; - unsigned i; union util_color uc = {0}; desc = util_format_description(format); @@ -629,22 +628,7 @@ static uint32_t r300_get_border_color(enum pipe_format format, } /* Apply inverse swizzle of the format. */ - for (i = 0; i < 4; i++) { - switch (desc->swizzle[i]) { - case UTIL_FORMAT_SWIZZLE_X: - border_swizzled[0] = border[i]; - break; - case UTIL_FORMAT_SWIZZLE_Y: - border_swizzled[1] = border[i]; - break; - case UTIL_FORMAT_SWIZZLE_Z: - border_swizzled[2] = border[i]; - break; - case UTIL_FORMAT_SWIZZLE_W: - border_swizzled[3] = border[i]; - break; - } - } + util_format_unswizzle_4f(border_swizzled, border, desc->swizzle); /* Compressed formats. */ if (util_format_is_compressed(format)) { diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 62c2f1fff6c..fc84004fb97 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -38,18 +38,6 @@ #include "pipe/p_screen.h" -void util_format_combine_swizzles(unsigned char *dst, - const unsigned char *swz1, - const unsigned char *swz2) -{ - unsigned i; - - for (i = 0; i < 4; i++) { - dst[i] = swz2[i] <= UTIL_FORMAT_SWIZZLE_W ? - swz1[swz2[i]] : swz2[i]; - } -} - unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, const unsigned char *swizzle_view, boolean dxtc_swizzle) @@ -72,7 +60,7 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, if (swizzle_view) { /* Combine two sets of swizzles. */ - util_format_combine_swizzles(swizzle, swizzle_format, swizzle_view); + util_format_compose_swizzles(swizzle_format, swizzle_view, swizzle); } else { memcpy(swizzle, swizzle_format, 4); } @@ -926,7 +914,7 @@ r300_texture_create_object(struct r300_screen *rscreen, if (!buffer) { tex->buf_size = tex->tex.size_in_bytes; tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048, - base->bind, base->usage, tex->domain); + base->bind, tex->domain); if (!tex->buf) { FREE(tex); diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 4586bb2e4dc..158a387478f 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -35,10 +35,6 @@ struct r300_texture_desc; struct r300_resource; struct r300_screen; -void util_format_combine_swizzles(unsigned char *dst, - const unsigned char *swz1, - const unsigned char *swz2); - unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, const unsigned char *swizzle_view, boolean dxtc_swizzle); diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index da5778be65e..fe4f8dd5679 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -360,9 +360,9 @@ static void r300_setup_hyperz_properties(struct r300_screen *screen, unsigned i, pipes; if (screen->caps.family == CHIP_FAMILY_RV530) { - pipes = screen->caps.num_z_pipes; + pipes = screen->info.r300_num_z_pipes; } else { - pipes = screen->caps.num_frag_pipes; + pipes = screen->info.r300_num_gb_pipes; } for (i = 0; i <= tex->b.b.b.last_level; i++) { diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 0561ab9bfa4..07a3f3caee7 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -22,8 +22,7 @@ #include "r300_tgsi_to_rc.h" -#include "radeon_compiler.h" -#include "radeon_program.h" +#include "compiler/radeon_compiler.h" #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index e2ea4cbf6c5..65964020adc 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -97,7 +97,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, referenced_hw = TRUE; } else { referenced_hw = - r300->rws->buffer_is_busy(tex->buf); + r300->rws->buffer_is_busy(tex->buf, RADEON_USAGE_READWRITE); } blittable = desc->layout == UTIL_FORMAT_LAYOUT_PLAIN || diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index b319890157f..a5e8fd680ff 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -32,7 +32,7 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_ureg.h" -#include "radeon_compiler.h" +#include "compiler/radeon_compiler.h" /* Convert info about VS output semantics into r300_shader_semantics. */ static void r300_shader_read_vs_outputs( diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 170de6c79db..a482ddce9c9 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -26,7 +26,7 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" -#include "radeon_code.h" +#include "compiler/radeon_code.h" #include "r300_context.h" #include "r300_shader_semantics.h" diff --git a/src/gallium/drivers/r600/Android.mk b/src/gallium/drivers/r600/Android.mk new file mode 100644 index 00000000000..994ae07789c --- /dev/null +++ b/src/gallium/drivers/r600/Android.mk @@ -0,0 +1,42 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +# get C_SOURCES +include $(LOCAL_PATH)/Makefile.sources + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(C_SOURCES) + +LOCAL_CFLAGS := -std=c99 + +LOCAL_C_INCLUDES := \ + $(DRM_TOP) \ + $(DRM_TOP)/include/drm + +LOCAL_MODULE := libmesa_pipe_r600 + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile index 7e21e3e32b1..0e68fe99345 100644 --- a/src/gallium/drivers/r600/Makefile +++ b/src/gallium/drivers/r600/Makefile @@ -6,20 +6,7 @@ LIBNAME = r600 LIBRARY_INCLUDES = \ $(shell pkg-config libdrm --cflags-only-I) -C_SOURCES = \ - r600_asm.c \ - r600_blit.c \ - r600_buffer.c \ - r600_pipe.c \ - r600_query.c \ - r600_resource.c \ - r600_shader.c \ - r600_state.c \ - r600_texture.c \ - r700_asm.c \ - evergreen_state.c \ - eg_asm.c \ - r600_translate.c \ - r600_state_common.c +# get C_SOURCES +include Makefile.sources include ../../Makefile.template diff --git a/src/gallium/drivers/r600/Makefile.sources b/src/gallium/drivers/r600/Makefile.sources new file mode 100644 index 00000000000..0366394e5a2 --- /dev/null +++ b/src/gallium/drivers/r600/Makefile.sources @@ -0,0 +1,15 @@ +C_SOURCES := \ + r600_asm.c \ + r600_blit.c \ + r600_buffer.c \ + r600_pipe.c \ + r600_query.c \ + r600_resource.c \ + r600_shader.c \ + r600_state.c \ + r600_texture.c \ + r700_asm.c \ + evergreen_state.c \ + eg_asm.c \ + r600_translate.c \ + r600_state_common.c diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript index 19f07b2bef8..be12255e4d0 100644 --- a/src/gallium/drivers/r600/SConscript +++ b/src/gallium/drivers/r600/SConscript @@ -11,22 +11,8 @@ env.Append(CPPPATH = [ r600 = env.ConvenienceLibrary( target = 'r600', - source = [ - 'r600_asm.c', - 'r600_buffer.c', - 'r600_blit.c', - 'r600_pipe.c', - 'r600_query.c', - 'r600_resource.c', - 'r600_shader.c', - 'r600_state.c', - 'r600_state_common.c', - 'r600_texture.c', - 'r600_translate.c', - 'r700_asm.c', - 'evergreen_state.c', - 'eg_asm.c', - ]) + source = env.ParseSourceList('Makefile.sources', 'C_SOURCES') + ) env.Alias('r600', r600) diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index c95872b0809..ca25b341ffd 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -29,7 +29,7 @@ #include "r600_opcodes.h" #include "evergreend.h" -int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) +int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) { unsigned id = cf->id; diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 4605c833dea..2135b8ac580 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -262,22 +262,16 @@ static uint32_t r600_translate_dbformat(enum pipe_format format) case PIPE_FORMAT_Z16_UNORM: return V_028040_Z_16; case PIPE_FORMAT_Z24X8_UNORM: - return V_028040_Z_24; case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_028040_Z_24; + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + return V_028040_Z_32_FLOAT; default: return ~0U; } } -static uint32_t r600_translate_stencilformat(enum pipe_format format) -{ - if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED) - return 1; - else - return 0; -} - static uint32_t r600_translate_colorswap(enum pipe_format format) { switch (format) { @@ -360,6 +354,7 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R11G11B10_FLOAT: case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT: case PIPE_FORMAT_R16G16_FLOAT: case PIPE_FORMAT_R16G16_UNORM: return V_028C70_SWAP_STD; @@ -369,6 +364,7 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: case PIPE_FORMAT_R16G16B16A16_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: @@ -453,7 +449,11 @@ static uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_S8_USCALED_Z24_UNORM: return V_028C70_COLOR_24_8; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + return V_028C70_COLOR_X24_8_32_FLOAT; + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT: return V_028C70_COLOR_32_FLOAT; case PIPE_FORMAT_R16G16_FLOAT: @@ -541,6 +541,7 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_028C70_COLOR_32_32_FLOAT: case V_028C70_COLOR_32_32: + case V_028C70_COLOR_X24_8_32_FLOAT: return ENDIAN_8IN32; /* 96-bit buffers. */ @@ -638,10 +639,10 @@ static void evergreen_set_blend_color(struct pipe_context *ctx, return; rstate->id = R600_PIPE_STATE_BLEND_COLOR; - r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_BLEND_COLOR]); rctx->states[R600_PIPE_STATE_BLEND_COLOR] = rstate; @@ -686,13 +687,13 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, blend->cb_target_mask = target_mask; r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, - color_control, 0xFFFFFFFD, NULL); + color_control, 0xFFFFFFFD, NULL, 0); if (rctx->chip_class != CAYMAN) - r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL, 0); else { - r600_pipe_state_add_reg(rstate, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 0xFFFFFFFF, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, 0xFFFFFFFF, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 0xFFFFFFFF, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, 0xFFFFFFFF, 0xFFFFFFFF, NULL, 0); } for (int i = 0; i < 8; i++) { @@ -723,7 +724,7 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, } } for (int i = 0; i < 8; i++) { - r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl[i], 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl[i], 0xFFFFFFFF, NULL, 0); } return rstate; @@ -791,27 +792,27 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx, S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE); /* TODO db_render_override depends on query */ - r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028430_DB_STENCILREFMASK, stencil_ref_mask, - 0xFFFFFFFF & C_028430_STENCILREF, NULL); + 0xFFFFFFFF & C_028430_STENCILREF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028434_DB_STENCILREFMASK_BF, stencil_ref_mask_bf, - 0xFFFFFFFF & C_028434_STENCILREF_BF, NULL); - r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL); + 0xFFFFFFFF & C_028434_STENCILREF_BF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL, 0); /* The DB_SHADER_CONTROL mask is 0xFFFFFFBC since Z_EXPORT_ENABLE, * STENCIL_EXPORT_ENABLE and KILL_ENABLE are controlled by * evergreen_pipe_shader_ps().*/ - r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL); - r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02800C_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AC8_DB_PRELOAD_CONTROL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02800C_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AC8_DB_PRELOAD_CONTROL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL, 0); return rstate; } @@ -856,7 +857,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, tmp |= S_0286D4_PNT_SPRITE_TOP_1(1); } } - r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL, 0); polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL || state->fill_back != PIPE_POLYGON_MODE_FILL); @@ -870,44 +871,44 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) | S_028814_POLY_MODE(polygon_dual_mode) | S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) | - S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL); + S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02881C_PA_CL_VS_OUT_CNTL, S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | - S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); /* point size 12.4 fixed point */ tmp = (unsigned)(state->point_size * 8.0); - r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL, 0); tmp = (unsigned)state->line_width * 8; - r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL, 0); if (rctx->chip_class == CAYMAN) { - r600_pipe_state_add_reg(rstate, CM_R_028BDC_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, CM_R_028BDC_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, CM_R_028BE4_PA_SU_VTX_CNTL, S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); } else { - r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL, S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); } - r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL, 0); return rstate; } @@ -933,22 +934,22 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | S_03C000_MAX_ANISO(r600_tex_aniso_filter(state->max_anisotropy)) | S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | - S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL); + S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg_noblock(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0, S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg_noblock(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | (state->seamless_cube_map ? 0 : S_03C008_DISABLE_CUBE_WRAP(1)) | S_03C008_TYPE(1), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); if (uc.ui) { - r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg_noblock(rstate, R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg_noblock(rstate, R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL, 0); } return rstate; } @@ -957,43 +958,39 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte struct pipe_resource *texture, const struct pipe_sampler_view *state) { - struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view); + struct r600_pipe_sampler_view *view = CALLOC_STRUCT(r600_pipe_sampler_view); struct r600_pipe_resource_state *rstate; - const struct util_format_description *desc; - struct r600_resource_texture *tmp; + struct r600_resource_texture *tmp = (struct r600_resource_texture*)texture; struct r600_resource *rbuffer; unsigned format, endian; uint32_t word4 = 0, yuv_format = 0, pitch = 0; unsigned char swizzle[4], array_mode = 0, tile_type = 0; struct r600_bo *bo[2]; - if (resource == NULL) + if (view == NULL) return NULL; - rstate = &resource->state; + rstate = &view->state; /* initialize base object */ - resource->base = *state; - resource->base.texture = NULL; + view->base = *state; + view->base.texture = NULL; pipe_reference(NULL, &texture->reference); - resource->base.texture = texture; - resource->base.reference.count = 1; - resource->base.context = ctx; + view->base.texture = texture; + view->base.reference.count = 1; + view->base.context = ctx; swizzle[0] = state->swizzle_r; swizzle[1] = state->swizzle_g; swizzle[2] = state->swizzle_b; swizzle[3] = state->swizzle_a; + format = r600_translate_texformat(ctx->screen, state->format, swizzle, &word4, &yuv_format); if (format == ~0) { format = 0; } - desc = util_format_description(state->format); - if (desc == NULL) { - R600_ERR("unknow format %d\n", state->format); - } - tmp = (struct r600_resource_texture *)texture; + if (tmp->depth && !tmp->is_flushing_texture) { r600_texture_depth_flush(ctx, texture, TRUE); tmp = tmp->flushed_depth_texture; @@ -1016,6 +1013,8 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte rstate->bo[0] = bo[0]; rstate->bo[1] = bo[1]; + rstate->bo_usage[0] = RADEON_USAGE_READ; + rstate->bo_usage[1] = RADEON_USAGE_READ; rstate->val[0] = (S_030000_DIM(r600_tex_dim(texture->target)) | S_030000_PITCH((pitch / 8) - 1) | S_030000_NON_DISP_TILING_ORDER(tile_type) | @@ -1023,8 +1022,8 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte rstate->val[1] = (S_030004_TEX_HEIGHT(texture->height0 - 1) | S_030004_TEX_DEPTH(texture->depth0 - 1) | S_030004_ARRAY_MODE(array_mode)); - rstate->val[2] = (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8; - rstate->val[3] = (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8; + rstate->val[2] = tmp->offset[0] >> 8; + rstate->val[3] = tmp->offset[1] >> 8; rstate->val[4] = (word4 | S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | S_030010_ENDIAN_SWAP(endian) | @@ -1036,7 +1035,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte rstate->val[7] = (S_03001C_DATA_FORMAT(format) | S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE)); - return &resource->base; + return &view->base; } static void evergreen_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, @@ -1131,21 +1130,21 @@ static void evergreen_set_clip_state(struct pipe_context *ctx, for (int i = 0; i < state->nr; i++) { r600_pipe_state_add_reg(rstate, R_0285BC_PA_CL_UCP0_X + i * 16, - fui(state->ucp[i][0]), 0xFFFFFFFF, NULL); + fui(state->ucp[i][0]), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0285C0_PA_CL_UCP0_Y + i * 16, - fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL); + fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0285C4_PA_CL_UCP0_Z + i * 16, - fui(state->ucp[i][2]), 0xFFFFFFFF, NULL); + fui(state->ucp[i][2]), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0285C8_PA_CL_UCP0_W + i * 16, - fui(state->ucp[i][3]), 0xFFFFFFFF, NULL); + fui(state->ucp[i][3]), 0xFFFFFFFF, NULL, 0); } r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, S_028810_PS_UCP_MODE(3) | ((1 << state->nr) - 1) | S_028810_ZCLIP_NEAR_DISABLE(state->depth_clamp) | - S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL); + S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_CLIP]); rctx->states[R600_PIPE_STATE_CLIP] = rstate; @@ -1176,28 +1175,28 @@ static void evergreen_set_scissor_state(struct pipe_context *ctx, br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); r600_pipe_state_add_reg(rstate, R_028210_PA_SC_CLIPRECT_0_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028214_PA_SC_CLIPRECT_0_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028218_PA_SC_CLIPRECT_1_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02821C_PA_SC_CLIPRECT_1_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028220_PA_SC_CLIPRECT_2_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028224_PA_SC_CLIPRECT_2_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028228_PA_SC_CLIPRECT_3_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02822C_PA_SC_CLIPRECT_3_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_SCISSOR]); rctx->states[R600_PIPE_STATE_SCISSOR] = rstate; @@ -1219,11 +1218,11 @@ static void evergreen_set_stencil_ref(struct pipe_context *ctx, tmp = S_028430_STENCILREF(state->ref_value[0]); r600_pipe_state_add_reg(rstate, R_028430_DB_STENCILREFMASK, tmp, - ~C_028430_STENCILREF, NULL); + ~C_028430_STENCILREF, NULL, 0); tmp = S_028434_STENCILREF_BF(state->ref_value[1]); r600_pipe_state_add_reg(rstate, R_028434_DB_STENCILREFMASK_BF, tmp, - ~C_028434_STENCILREF_BF, NULL); + ~C_028434_STENCILREF_BF, NULL, 0); free(rctx->states[R600_PIPE_STATE_STENCIL_REF]); rctx->states[R600_PIPE_STATE_STENCIL_REF] = rstate; @@ -1241,15 +1240,15 @@ static void evergreen_set_viewport_state(struct pipe_context *ctx, rctx->viewport = *state; rstate->id = R600_PIPE_STATE_VIEWPORT; - r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_VIEWPORT]); rctx->states[R600_PIPE_STATE_VIEWPORT] = rstate; @@ -1354,86 +1353,82 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state /* FIXME handle enabling of CB beyond BASE8 which has different offset */ r600_pipe_state_add_reg(rstate, R_028C60_CB_COLOR0_BASE + cb * 0x3C, - (offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + offset >> 8, 0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028C78_CB_COLOR0_DIM + cb * 0x3C, - 0x0, 0xFFFFFFFF, NULL); + 0x0, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C70_CB_COLOR0_INFO + cb * 0x3C, - color_info, 0xFFFFFFFF, bo[0]); + color_info, 0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028C64_CB_COLOR0_PITCH + cb * 0x3C, S_028C64_PITCH_TILE_MAX(pitch), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C68_CB_COLOR0_SLICE + cb * 0x3C, S_028C68_SLICE_TILE_MAX(slice), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C6C_CB_COLOR0_VIEW + cb * 0x3C, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C, S_028C74_NON_DISP_TILING_ORDER(tile_type), - 0xFFFFFFFF, bo[0]); + 0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE); } static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate, - const struct pipe_framebuffer_state *state) + const struct pipe_framebuffer_state *state) { struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; struct r600_surface *surf; - unsigned level; - unsigned pitch, slice, format, stencil_format; + unsigned level, first_layer; + unsigned pitch, slice, format; unsigned offset; if (state->zsbuf == NULL) return; - level = state->zsbuf->u.tex.level; - surf = (struct r600_surface *)state->zsbuf; - rtex = (struct r600_resource_texture*)state->zsbuf->texture; - - rbuffer = &rtex->resource; + rtex = (struct r600_resource_texture*)surf->base.texture; - /* XXX quite sure for dx10+ hw don't need any offset hacks */ - offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture, - level, state->zsbuf->u.tex.first_layer); + level = surf->base.u.tex.level; + first_layer = surf->base.u.tex.first_layer; + offset = r600_texture_get_offset(rtex, level, first_layer); pitch = rtex->pitch_in_blocks[level] / 8 - 1; slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; - format = r600_translate_dbformat(state->zsbuf->texture->format); - stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format); + format = r600_translate_dbformat(rtex->real_format); r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE, - (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + offset >> 8, 0xFFFFFFFF, rtex->resource.bo, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE, - (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + offset >> 8, 0xFFFFFFFF, rtex->resource.bo, RADEON_USAGE_READWRITE); + r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL, 0); - if (stencil_format) { - uint32_t stencil_offset; + if (rtex->stencil) { + uint32_t stencil_offset = + r600_texture_get_offset(rtex->stencil, level, first_layer); - stencil_offset = ((surf->aligned_height * rtex->pitch_in_bytes[level]) + 255) & ~255; r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE, - (offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + stencil_offset >> 8, 0xFFFFFFFF, rtex->stencil->resource.bo, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE, - (offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + stencil_offset >> 8, 0xFFFFFFFF, rtex->stencil->resource.bo, RADEON_USAGE_READWRITE); + r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO, + 1, 0xFFFFFFFF, rtex->stencil->resource.bo, RADEON_USAGE_READWRITE); + } else { + r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO, + 0, 0xFFFFFFFF, NULL, RADEON_USAGE_READWRITE); } - r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO, - S_028044_FORMAT(stencil_format), 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R_028040_DB_Z_INFO, S_028040_ARRAY_MODE(rtex->array_mode[level]) | S_028040_FORMAT(format), - 0xFFFFFFFF, rbuffer->bo); + 0xFFFFFFFF, rtex->resource.bo, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028058_DB_DEPTH_SIZE, S_028058_PITCH_TILE_MAX(pitch), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02805C_DB_DEPTH_SLICE, S_02805C_SLICE_TILE_MAX(slice), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); } static void evergreen_set_framebuffer_state(struct pipe_context *ctx, @@ -1492,49 +1487,49 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028240_PA_SC_GENERIC_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028244_PA_SC_GENERIC_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028254_PA_SC_VPORT_SCISSOR_0_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028034_PA_SC_SCREEN_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028208_PA_SC_WINDOW_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028238_CB_TARGET_MASK, - 0x00000000, target_mask, NULL); + 0x00000000, target_mask, NULL, 0); r600_pipe_state_add_reg(rstate, R_02823C_CB_SHADER_MASK, - shader_mask, 0xFFFFFFFF, NULL); + shader_mask, 0xFFFFFFFF, NULL, 0); if (rctx->chip_class == CAYMAN) { r600_pipe_state_add_reg(rstate, CM_R_028BE0_PA_SC_AA_CONFIG, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); } else { r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); } free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]); @@ -1609,78 +1604,78 @@ static void cayman_init_config(struct r600_pipe_context *rctx) tmp = 0x00000000; tmp |= S_008C00_EXPORT_SRC_C(1); - r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL, 0); /* always set the temp clauses */ - r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, S_008C04_NUM_CLAUSE_TEMP_GPRS(4), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210, 0xffffffff, 0); - r600_pipe_state_add_reg(rstate, CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98, 0xffffffff, 0); - - r600_pipe_state_add_reg(rstate, CM_R_0288E8_SQ_LDS_ALLOC, 0, 0xffffffff, NULL); - r600_pipe_state_add_reg(rstate, R_0288EC_SQ_LDS_ALLOC_PS, 0, 0xffffffff, NULL); - - r600_pipe_state_add_reg(rstate, CM_R_028804_DB_EQAA, 0x110000, 0xffffffff, NULL); + r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, S_008C04_NUM_CLAUSE_TEMP_GPRS(4), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210, 0xffffffff, NULL, 0); + r600_pipe_state_add_reg(rstate, CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98, 0xffffffff, NULL, 0); + + r600_pipe_state_add_reg(rstate, CM_R_0288E8_SQ_LDS_ALLOC, 0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288EC_SQ_LDS_ALLOC_PS, 0, 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, CM_R_028804_DB_EQAA, 0x110000, 0xFFFFFFFF, NULL, 0); r600_context_pipe_state_set(&rctx->ctx, rstate); } @@ -1964,39 +1959,39 @@ void evergreen_init_config(struct r600_pipe_context *rctx) tmp |= S_008C00_VS_PRIO(vs_prio); tmp |= S_008C00_GS_PRIO(gs_prio); tmp |= S_008C00_ES_PRIO(es_prio); - r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL, 0); /* enable dynamic GPR resource management */ if (r600_get_minor_version(rctx->radeon) >= 7) { /* always set temp clauses */ r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, - S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL); + S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, S_028838_PS_GPRS(0x1e) | S_028838_VS_GPRS(0x1e) | S_028838_GS_GPRS(0x1e) | S_028838_ES_GPRS(0x1e) | S_028838_HS_GPRS(0x1e) | - S_028838_LS_GPRS(0x1e), 0xFFFFFFFF, NULL); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/ + S_028838_LS_GPRS(0x1e), 0xFFFFFFFF, NULL, 0); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/ } else { tmp = 0; tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0); tmp = 0; tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs); tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs); - r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0); tmp = 0; tmp |= S_008C0C_NUM_HS_GPRS(num_hs_gprs); tmp |= S_008C0C_NUM_HS_GPRS(num_ls_gprs); - r600_pipe_state_add_reg(rstate, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL, 0); } tmp = 0; @@ -2004,109 +1999,109 @@ void evergreen_init_config(struct r600_pipe_context *rctx) tmp |= S_008C18_NUM_VS_THREADS(num_vs_threads); tmp |= S_008C18_NUM_GS_THREADS(num_gs_threads); tmp |= S_008C18_NUM_ES_THREADS(num_es_threads); - r600_pipe_state_add_reg(rstate, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0); tmp = 0; tmp |= S_008C1C_NUM_HS_THREADS(num_hs_threads); tmp |= S_008C1C_NUM_LS_THREADS(num_ls_threads); - r600_pipe_state_add_reg(rstate, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0); tmp = 0; tmp |= S_008C20_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); tmp |= S_008C20_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); - r600_pipe_state_add_reg(rstate, R_008C20_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C20_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0); tmp = 0; tmp |= S_008C24_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); tmp |= S_008C24_NUM_ES_STACK_ENTRIES(num_es_stack_entries); - r600_pipe_state_add_reg(rstate, R_008C24_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C24_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0); tmp = 0; tmp |= S_008C28_NUM_HS_STACK_ENTRIES(num_hs_stack_entries); tmp |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries); - r600_pipe_state_add_reg(rstate, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL, 0); tmp = 0; tmp |= S_008E2C_NUM_PS_LDS(0x1000); tmp |= S_008E2C_NUM_LS_LDS(0x1000); - r600_pipe_state_add_reg(rstate, R_008E2C_SQ_LDS_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008E2C_SQ_LDS_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_009100_SPI_CONFIG_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009100_SPI_CONFIG_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4), 0xFFFFFFFF, NULL, 0); #if 0 - r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x0, 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x0, 0xFFFFFFFF, NULL, 0); #endif - r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R_028900_SQ_ESGS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028904_SQ_GSVS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028908_SQ_ESTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02890C_SQ_GSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028910_SQ_VSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028914_SQ_PSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R_02891C_SQ_GS_VERT_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028920_SQ_GS_VERT_ITEMSIZE_1, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028924_SQ_GS_VERT_ITEMSIZE_2, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028928_SQ_GS_VERT_ITEMSIZE_3, 0x0, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, R_028900_SQ_ESGS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028904_SQ_GSVS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028908_SQ_ESTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02890C_SQ_GSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028910_SQ_VSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028914_SQ_PSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, R_02891C_SQ_GS_VERT_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028920_SQ_GS_VERT_ITEMSIZE_1, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028924_SQ_GS_VERT_ITEMSIZE_2, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028928_SQ_GS_VERT_ITEMSIZE_3, 0x0, 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL, 0); r600_context_pipe_state_set(&rctx->ctx, rstate); } @@ -2128,6 +2123,7 @@ void evergreen_polygon_offset_update(struct r600_pipe_context *rctx) offset_units *= 2.0f; break; case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: depth = -23; offset_units *= 1.0f; offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); @@ -2143,19 +2139,19 @@ void evergreen_polygon_offset_update(struct r600_pipe_context *rctx) offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(depth); r600_pipe_state_add_reg(&state, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, - fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, - fui(offset_units), 0xFFFFFFFF, NULL); + fui(offset_units), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, - fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, - fui(offset_units), 0xFFFFFFFF, NULL); + fui(offset_units), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, - offset_db_fmt_cntl, 0xFFFFFFFF, NULL); + offset_db_fmt_cntl, 0xFFFFFFFF, NULL, 0); r600_context_pipe_state_set(&rctx->ctx, &state); } } @@ -2252,32 +2248,32 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader S_0286E0_LINEAR_CENTROID_ENA(have_centroid); r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, - spi_ps_in_control_0, 0xFFFFFFFF, NULL); + spi_ps_in_control_0, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, - spi_ps_in_control_1, 0xFFFFFFFF, NULL); + spi_ps_in_control_1, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0286E4_SPI_PS_IN_CONTROL_2, - 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); + 0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028840_SQ_PGM_START_PS, - (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); + 0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ); r600_pipe_state_add_reg(rstate, R_028844_SQ_PGM_RESOURCES_PS, S_028844_NUM_GPRS(rshader->bc.ngpr) | S_028844_PRIME_CACHE_ON_DRAW(1) | S_028844_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028848_SQ_PGM_RESOURCES_2_PS, - 0x0, 0xFFFFFFFF, NULL); + 0x0, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02884C_SQ_PGM_EXPORTS_PS, - exports_ps, 0xFFFFFFFF, NULL); + exports_ps, 0xFFFFFFFF, NULL, 0); /* FIXME: Evergreen doesn't seem to support MULTIWRITE_ENABLE. */ /* only set some bits here, the other bits are set in the dsa state */ r600_pipe_state_add_reg(rstate, @@ -2286,10 +2282,10 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader S_02880C_Z_EXPORT_ENABLE(1) | S_02880C_STENCIL_EXPORT_ENABLE(1) | S_02880C_KILL_ENABLE(1), - NULL); + NULL, 0); r600_pipe_state_add_reg(rstate, R_03A200_SQ_LOOP_CONST_0, 0x01000FFF, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); } void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) @@ -2298,7 +2294,7 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; unsigned spi_vs_out_id[10]; - unsigned i, tmp; + unsigned i, tmp, nparams; /* clear previous register */ rstate->nregs = 0; @@ -2314,28 +2310,36 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader for (i = 0; i < 10; i++) { r600_pipe_state_add_reg(rstate, R_02861C_SPI_VS_OUT_ID_0 + i * 4, - spi_vs_out_id[i], 0xFFFFFFFF, NULL); + spi_vs_out_id[i], 0xFFFFFFFF, NULL, 0); } + /* Certain attributes (position, psize, etc.) don't count as params. + * VS is required to export at least one param and r600_shader_from_tgsi() + * takes care of adding a dummy export. + */ + nparams = rshader->noutput - rshader->npos; + if (nparams < 1) + nparams = 1; + r600_pipe_state_add_reg(rstate, R_0286C4_SPI_VS_OUT_CONFIG, - S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), - 0xFFFFFFFF, NULL); + S_0286C4_VS_EXPORT_COUNT(nparams - 1), + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028860_SQ_PGM_RESOURCES_VS, S_028860_NUM_GPRS(rshader->bc.ngpr) | S_028860_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028864_SQ_PGM_RESOURCES_2_VS, - 0x0, 0xFFFFFFFF, NULL); + 0x0, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02885C_SQ_PGM_START_VS, - (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); + 0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ); r600_pipe_state_add_reg(rstate, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); } void evergreen_fetch_shader(struct pipe_context *ctx, @@ -2346,10 +2350,10 @@ void evergreen_fetch_shader(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_FETCH_SHADER; rstate->nregs = 0; r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS, - (r600_bo_offset(ve->fetch_shader)) >> 8, - 0xFFFFFFFF, ve->fetch_shader); + 0, + 0xFFFFFFFF, ve->fetch_shader, RADEON_USAGE_READ); } void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx) @@ -2363,7 +2367,7 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx) r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, 0x0, - S_02880C_DUAL_EXPORT_ENABLE(1), NULL); + S_02880C_DUAL_EXPORT_ENABLE(1), NULL, 0); r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, S_028000_DEPTH_COPY_ENABLE(1) | @@ -2371,7 +2375,7 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx) S_028000_COPY_CENTROID(1), S_028000_DEPTH_COPY_ENABLE(1) | S_028000_STENCIL_COPY_ENABLE(1) | - S_028000_COPY_CENTROID(1), NULL); + S_028000_COPY_CENTROID(1), NULL, 0); return rstate; } @@ -2397,9 +2401,11 @@ void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx, void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, - unsigned offset, unsigned stride) + unsigned offset, unsigned stride, + enum radeon_bo_usage usage) { rstate->bo[0] = rbuffer->bo; + rstate->bo_usage[0] = usage; rstate->val[0] = offset; rstate->val[1] = rbuffer->bo_size - offset - 1; rstate->val[2] = S_030008_ENDIAN_SWAP(r600_endian_swap(32)) | diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 96dbd4da91b..9a8c353e4ee 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -1697,6 +1697,10 @@ #define R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL 0x00028A3C #define R_028A48_PA_SC_MODE_CNTL_0 0x00028A48 #define R_028A4C_PA_SC_MODE_CNTL_1 0x00028A4C +#define R_028A94_VGT_MULTI_PRIM_IB_RESET_EN 0x00028A94 +#define S_028A94_RESET_EN(x) (((x) & 0x1) << 0) +#define G_028A94_RESET_EN(x) (((x) >> 0) & 0x1) +#define C_028A94_RESET_EN 0xFFFFFFFE #define R_028AB4_VGT_REUSE_OFF 0x00028AB4 #define R_028AB8_VGT_VTX_CNT_EN 0x00028AB8 #define R_028ABC_DB_HTILE_SURFACE 0x00028ABC diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 61adc7ed988..f24146edcf1 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -26,14 +26,8 @@ #ifndef R600_H #define R600_H -#include <assert.h> -#include <stdint.h> -#include <stdio.h> -#include <util/u_double_list.h> -#include <util/u_inlines.h> -#include <pipe/p_compiler.h> - -#define RADEON_CTX_MAX_PM4 (64 * 1024 / 4) +#include "../../winsys/radeon/drm/radeon_winsys.h" +#include "util/u_double_list.h" #define R600_ERR(fmt, args...) \ fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, ##args) @@ -94,31 +88,32 @@ struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon); unsigned r600_get_clock_crystal_freq(struct radeon *radeon); unsigned r600_get_minor_version(struct radeon *radeon); unsigned r600_get_num_backends(struct radeon *radeon); +unsigned r600_get_num_tile_pipes(struct radeon *radeon); +unsigned r600_get_backend_map(struct radeon *radeon); /* r600_bo.c */ struct r600_bo; +struct radeon_winsys_cs; + struct r600_bo *r600_bo(struct radeon *radeon, unsigned size, unsigned alignment, unsigned binding, unsigned usage); -struct r600_bo *r600_bo_handle(struct radeon *radeon, - unsigned handle, unsigned *array_mode); -void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx); +struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whandle, + unsigned *stride, unsigned *array_mode); +void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, struct radeon_winsys_cs *cs, unsigned usage); void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo); boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *pb_bo, - unsigned stride, struct winsys_handle *whandle); -static INLINE unsigned r600_bo_offset(struct r600_bo *bo) -{ - return 0; -} -void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo); + unsigned stride, struct winsys_handle *whandle); + +void r600_bo_destroy(struct r600_bo *bo); /* this relies on the pipe_reference being the first member of r600_bo */ -static INLINE void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, struct r600_bo *src) +static INLINE void r600_bo_reference(struct r600_bo **dst, struct r600_bo *src) { struct r600_bo *old = *dst; if (pipe_reference((struct pipe_reference *)(*dst), (struct pipe_reference *)src)) { - r600_bo_destroy(radeon, old); + r600_bo_destroy(old); } *dst = src; } @@ -145,6 +140,7 @@ struct r600_pipe_reg { u32 mask; struct r600_block *block; struct r600_bo *bo; + enum radeon_bo_usage bo_usage; u32 id; }; @@ -157,7 +153,8 @@ struct r600_pipe_state { struct r600_pipe_resource_state { unsigned id; u32 val[8]; - struct r600_bo *bo[2]; + struct r600_bo *bo[2]; + enum radeon_bo_usage bo_usage[2]; /* XXX set these */ }; #define R600_BLOCK_STATUS_ENABLED (1 << 0) @@ -168,6 +165,7 @@ struct r600_pipe_resource_state { struct r600_block_reloc { struct r600_bo *bo; + enum radeon_bo_usage bo_usage; unsigned flush_flags; unsigned flush_mask; unsigned bo_pm4_index; @@ -195,18 +193,6 @@ struct r600_range { }; /* - * relocation - */ -#pragma pack(1) -struct r600_reloc { - uint32_t handle; - uint32_t read_domain; - uint32_t write_domain; - uint32_t flags; -}; -#pragma pack() - -/* * query */ struct r600_query { @@ -243,6 +229,8 @@ struct r600_query { struct r600_context { struct radeon *radeon; + struct radeon_winsys_cs *cs; + struct r600_range *range; unsigned nblocks; struct r600_block **blocks; @@ -250,18 +238,19 @@ struct r600_context { struct list_head resource_dirty; struct list_head enable_list; unsigned pm4_ndwords; - unsigned pm4_cdwords; unsigned pm4_dirty_cdwords; unsigned ctx_pm4_ndwords; unsigned init_dwords; - unsigned nreloc; + unsigned creloc; - struct r600_reloc *reloc; - struct radeon_bo **bo; + struct r600_bo **bo; + u32 *pm4; + unsigned pm4_cdwords; + struct list_head query_list; unsigned num_query_running; - struct list_head fenced_bo; + unsigned backend_mask; unsigned max_db; /* for OQ */ unsigned num_dest_buffers; unsigned flags; @@ -282,6 +271,7 @@ struct r600_draw { struct r600_bo *indices; }; +void r600_get_backend_mask(struct r600_context *ctx); int r600_context_init(struct r600_context *ctx, struct radeon *radeon); void r600_context_fini(struct r600_context *ctx); void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state); @@ -290,8 +280,7 @@ void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r6 void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid); void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); -void r600_context_flush(struct r600_context *ctx); -void r600_context_dump_bof(struct r600_context *ctx, const char *file); +void r600_context_flush(struct r600_context *ctx, unsigned flags); void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw); struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type); @@ -319,18 +308,21 @@ void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, stru void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); -struct radeon *radeon_decref(struct radeon *radeon); +struct radeon *radeon_destroy(struct radeon *radeon); void _r600_pipe_state_add_reg(struct r600_context *ctx, struct r600_pipe_state *state, u32 offset, u32 value, u32 mask, u32 range_id, u32 block_id, - struct r600_bo *bo); + struct r600_bo *bo, + enum radeon_bo_usage usage); void r600_pipe_state_add_reg_noblock(struct r600_pipe_state *state, u32 offset, u32 value, u32 mask, - struct r600_bo *bo); -#define r600_pipe_state_add_reg(state, offset, value, mask, bo) _r600_pipe_state_add_reg(&rctx->ctx, state, offset, value, mask, CTX_RANGE_ID(offset), CTX_BLOCK_ID(offset), bo) + struct r600_bo *bo, + enum radeon_bo_usage usage); + +#define r600_pipe_state_add_reg(state, offset, value, mask, bo, usage) _r600_pipe_state_add_reg(&rctx->ctx, state, offset, value, mask, CTX_RANGE_ID(offset), CTX_BLOCK_ID(offset), bo, usage) static inline void r600_pipe_state_mod_reg(struct r600_pipe_state *state, u32 value) @@ -340,10 +332,12 @@ static inline void r600_pipe_state_mod_reg(struct r600_pipe_state *state, } static inline void r600_pipe_state_mod_reg_bo(struct r600_pipe_state *state, - u32 value, struct r600_bo *bo) + u32 value, struct r600_bo *bo, + enum radeon_bo_usage usage) { state->regs[state->nregs].value = value; state->regs[state->nregs].bo = bo; + state->regs[state->nregs].bo_usage = usage; state->nregs++; } diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 5fae2b00c8b..27febdf9d03 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -36,7 +36,7 @@ #define NUM_OF_CYCLES 3 #define NUM_OF_COMPONENTS 4 -static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r600_bc_alu *alu) +static inline unsigned int r600_bytecode_get_num_operands(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { if(alu->is_op3) return 3; @@ -88,6 +88,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE: return 1; default: R600_ERR( "Need instruction operand number for 0x%x.\n", alu->inst); @@ -140,6 +141,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE: return 1; default: R600_ERR( "Need instruction operand number for 0x%x.\n", alu->inst); @@ -150,11 +152,11 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r return 3; } -int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id); +int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id); -static struct r600_bc_cf *r600_bc_cf(void) +static struct r600_bytecode_cf *r600_bytecode_cf(void) { - struct r600_bc_cf *cf = CALLOC_STRUCT(r600_bc_cf); + struct r600_bytecode_cf *cf = CALLOC_STRUCT(r600_bytecode_cf); if (cf == NULL) return NULL; @@ -165,9 +167,9 @@ static struct r600_bc_cf *r600_bc_cf(void) return cf; } -static struct r600_bc_alu *r600_bc_alu(void) +static struct r600_bytecode_alu *r600_bytecode_alu(void) { - struct r600_bc_alu *alu = CALLOC_STRUCT(r600_bc_alu); + struct r600_bytecode_alu *alu = CALLOC_STRUCT(r600_bytecode_alu); if (alu == NULL) return NULL; @@ -175,9 +177,9 @@ static struct r600_bc_alu *r600_bc_alu(void) return alu; } -static struct r600_bc_vtx *r600_bc_vtx(void) +static struct r600_bytecode_vtx *r600_bytecode_vtx(void) { - struct r600_bc_vtx *vtx = CALLOC_STRUCT(r600_bc_vtx); + struct r600_bytecode_vtx *vtx = CALLOC_STRUCT(r600_bytecode_vtx); if (vtx == NULL) return NULL; @@ -185,9 +187,9 @@ static struct r600_bc_vtx *r600_bc_vtx(void) return vtx; } -static struct r600_bc_tex *r600_bc_tex(void) +static struct r600_bytecode_tex *r600_bytecode_tex(void) { - struct r600_bc_tex *tex = CALLOC_STRUCT(r600_bc_tex); + struct r600_bytecode_tex *tex = CALLOC_STRUCT(r600_bytecode_tex); if (tex == NULL) return NULL; @@ -195,15 +197,15 @@ static struct r600_bc_tex *r600_bc_tex(void) return tex; } -void r600_bc_init(struct r600_bc *bc, enum chip_class chip_class) +void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class) { LIST_INITHEAD(&bc->cf); bc->chip_class = chip_class; } -static int r600_bc_add_cf(struct r600_bc *bc) +static int r600_bytecode_add_cf(struct r600_bytecode *bc) { - struct r600_bc_cf *cf = r600_bc_cf(); + struct r600_bytecode_cf *cf = r600_bytecode_cf(); if (cf == NULL) return -ENOMEM; @@ -217,7 +219,7 @@ static int r600_bc_add_cf(struct r600_bc *bc) return 0; } -int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output) +int r600_bytecode_add_output(struct r600_bytecode *bc, const struct r600_bytecode_output *output) { int r; @@ -252,16 +254,16 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output) } } - r = r600_bc_add_cf(bc); + r = r600_bytecode_add_cf(bc); if (r) return r; bc->cf_last->inst = output->inst; - memcpy(&bc->cf_last->output, output, sizeof(struct r600_bc_output)); + memcpy(&bc->cf_last->output, output, sizeof(struct r600_bytecode_output)); return 0; } /* alu instructions that can ony exits once per group */ -static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +static int is_alu_once_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { switch (bc->chip_class) { case R600: @@ -342,7 +344,7 @@ static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu) } } -static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +static int is_alu_reduction_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { switch (bc->chip_class) { case R600: @@ -363,7 +365,7 @@ static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu) } } -static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +static int is_alu_cube_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { switch (bc->chip_class) { case R600: @@ -378,7 +380,7 @@ static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu) } } -static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +static int is_alu_mova_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { switch (bc->chip_class) { case R600: @@ -396,7 +398,7 @@ static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu) } /* alu instructions that can only execute on the vector unit */ -static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +static int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { return is_alu_reduction_inst(bc, alu) || is_alu_mova_inst(bc, alu) || @@ -405,7 +407,7 @@ static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) } /* alu instructions that can only execute on the trans unit */ -static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +static int is_alu_trans_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { switch (bc->chip_class) { case R600: @@ -476,23 +478,23 @@ static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) } /* alu instructions that can execute on any unit */ -static int is_alu_any_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +static int is_alu_any_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { return !is_alu_vec_unit_inst(bc, alu) && !is_alu_trans_unit_inst(bc, alu); } -static int assign_alu_units(struct r600_bc *bc, struct r600_bc_alu *alu_first, - struct r600_bc_alu *assignment[5]) +static int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *alu_first, + struct r600_bytecode_alu *assignment[5]) { - struct r600_bc_alu *alu; + struct r600_bytecode_alu *alu; unsigned i, chan, trans; int max_slots = bc->chip_class == CAYMAN ? 4 : 5; for (i = 0; i < max_slots; i++) assignment[i] = NULL; - for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) { + for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bytecode_alu, alu->list.next, list)) { chan = alu->dst.chan; if (max_slots == 4) trans = 0; @@ -571,7 +573,7 @@ static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, return 0; } -static int reserve_cfile(struct r600_bc *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan) +static int reserve_cfile(struct r600_bytecode *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan) { int res, num_res = 4; if (bc->chip_class >= R700) { @@ -613,12 +615,12 @@ static int is_const(int sel) sel <= V_SQ_ALU_SRC_LITERAL); } -static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu, +static int check_vector(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, struct alu_bank_swizzle *bs, int bank_swizzle) { int r, src, num_src, sel, elem, cycle; - num_src = r600_bc_get_num_operands(bc, alu); + num_src = r600_bytecode_get_num_operands(bc, alu); for (src = 0; src < num_src; src++) { sel = alu->src[src].sel; elem = alu->src[src].chan; @@ -643,12 +645,12 @@ static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu, return 0; } -static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu, +static int check_scalar(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, struct alu_bank_swizzle *bs, int bank_swizzle) { int r, src, num_src, const_count, sel, elem, cycle; - num_src = r600_bc_get_num_operands(bc, alu); + num_src = r600_bytecode_get_num_operands(bc, alu); for (const_count = 0, src = 0; src < num_src; ++src) { sel = alu->src[src].sel; elem = alu->src[src].chan; @@ -689,20 +691,24 @@ static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu, return 0; } -static int check_and_set_bank_swizzle(struct r600_bc *bc, - struct r600_bc_alu *slots[5]) +static int check_and_set_bank_swizzle(struct r600_bytecode *bc, + struct r600_bytecode_alu *slots[5]) { struct alu_bank_swizzle bs; int bank_swizzle[5]; - int i, r = 0, forced = 0; + int i, r = 0, forced = 1; boolean scalar_only = bc->chip_class == CAYMAN ? false : true; int max_slots = bc->chip_class == CAYMAN ? 4 : 5; for (i = 0; i < max_slots; i++) { - if (slots[i] && slots[i]->bank_swizzle_force) { - slots[i]->bank_swizzle = slots[i]->bank_swizzle_force; - forced = 1; + if (slots[i]) { + if (slots[i]->bank_swizzle_force) { + slots[i]->bank_swizzle = slots[i]->bank_swizzle_force; + } else { + forced = 0; + } } + if (i < 4 && slots[i]) scalar_only = false; } @@ -712,7 +718,11 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, /* Just check every possible combination of bank swizzle. * Not very efficent, but works on the first try in most of the cases. */ for (i = 0; i < 4; i++) - bank_swizzle[i] = SQ_ALU_VEC_012; + if (!slots[i] || !slots[i]->bank_swizzle_force) + bank_swizzle[i] = SQ_ALU_VEC_012; + else + bank_swizzle[i] = slots[i]->bank_swizzle; + bank_swizzle[4] = SQ_ALU_SCL_210; while(bank_swizzle[4] <= SQ_ALU_SCL_221) { @@ -749,11 +759,13 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, bank_swizzle[4]++; } else { for (i = 0; i < max_slots; i++) { - bank_swizzle[i]++; - if (bank_swizzle[i] <= SQ_ALU_VEC_210) - break; - else - bank_swizzle[i] = SQ_ALU_VEC_012; + if (!slots[i] || !slots[i]->bank_swizzle_force) { + bank_swizzle[i]++; + if (bank_swizzle[i] <= SQ_ALU_VEC_210) + break; + else + bank_swizzle[i] = SQ_ALU_VEC_012; + } } } } @@ -762,10 +774,10 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, return -1; } -static int replace_gpr_with_pv_ps(struct r600_bc *bc, - struct r600_bc_alu *slots[5], struct r600_bc_alu *alu_prev) +static int replace_gpr_with_pv_ps(struct r600_bytecode *bc, + struct r600_bytecode_alu *slots[5], struct r600_bytecode_alu *alu_prev) { - struct r600_bc_alu *prev[5]; + struct r600_bytecode_alu *prev[5]; int gpr[5], chan[5]; int i, j, r, src, num_src; int max_slots = bc->chip_class == CAYMAN ? 4 : 5; @@ -775,7 +787,7 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, return r; for (i = 0; i < max_slots; ++i) { - if(prev[i] && prev[i]->dst.write && !prev[i]->dst.rel) { + if (prev[i] && (prev[i]->dst.write || prev[i]->is_op3) && !prev[i]->dst.rel) { gpr[i] = prev[i]->dst.sel; /* cube writes more than PV.X */ if (!is_alu_cube_inst(bc, prev[i]) && is_alu_reduction_inst(bc, prev[i])) @@ -787,11 +799,11 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, } for (i = 0; i < max_slots; ++i) { - struct r600_bc_alu *alu = slots[i]; + struct r600_bytecode_alu *alu = slots[i]; if(!alu) continue; - num_src = r600_bc_get_num_operands(bc, alu); + num_src = r600_bytecode_get_num_operands(bc, alu); for (src = 0; src < num_src; ++src) { if (!is_gpr(alu->src[src].sel) || alu->src[src].rel) continue; @@ -819,7 +831,7 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, return 0; } -void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg) +void r600_bytecode_special_constants(u32 value, unsigned *sel, unsigned *neg) { switch(value) { case 0: @@ -852,10 +864,10 @@ void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg) } /* compute how many literal are needed */ -static int r600_bc_alu_nliterals(struct r600_bc *bc, struct r600_bc_alu *alu, +static int r600_bytecode_alu_nliterals(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, uint32_t literal[4], unsigned *nliteral) { - unsigned num_src = r600_bc_get_num_operands(bc, alu); + unsigned num_src = r600_bytecode_get_num_operands(bc, alu); unsigned i, j; for (i = 0; i < num_src; ++i) { @@ -878,11 +890,11 @@ static int r600_bc_alu_nliterals(struct r600_bc *bc, struct r600_bc_alu *alu, return 0; } -static void r600_bc_alu_adjust_literals(struct r600_bc *bc, - struct r600_bc_alu *alu, +static void r600_bytecode_alu_adjust_literals(struct r600_bytecode *bc, + struct r600_bytecode_alu *alu, uint32_t literal[4], unsigned nliteral) { - unsigned num_src = r600_bc_get_num_operands(bc, alu); + unsigned num_src = r600_bytecode_get_num_operands(bc, alu); unsigned i, j; for (i = 0; i < num_src; ++i) { @@ -898,11 +910,11 @@ static void r600_bc_alu_adjust_literals(struct r600_bc *bc, } } -static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], - struct r600_bc_alu *alu_prev) +static int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu *slots[5], + struct r600_bytecode_alu *alu_prev) { - struct r600_bc_alu *prev[5]; - struct r600_bc_alu *result[5] = { NULL }; + struct r600_bytecode_alu *prev[5]; + struct r600_bytecode_alu *result[5] = { NULL }; uint32_t literal[4], prev_literal[4]; unsigned nliteral = 0, prev_nliteral = 0; @@ -917,13 +929,13 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], return r; for (i = 0; i < max_slots; ++i) { - struct r600_bc_alu *alu; + struct r600_bytecode_alu *alu; /* check number of literals */ if (prev[i]) { - if (r600_bc_alu_nliterals(bc, prev[i], literal, &nliteral)) + if (r600_bytecode_alu_nliterals(bc, prev[i], literal, &nliteral)) return 0; - if (r600_bc_alu_nliterals(bc, prev[i], prev_literal, &prev_nliteral)) + if (r600_bytecode_alu_nliterals(bc, prev[i], prev_literal, &prev_nliteral)) return 0; if (is_alu_mova_inst(bc, prev[i])) { if (have_rel) @@ -932,7 +944,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], } num_once_inst += is_alu_once_inst(bc, prev[i]); } - if (slots[i] && r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral)) + if (slots[i] && r600_bytecode_alu_nliterals(bc, slots[i], literal, &nliteral)) return 0; /* Let's check used slots. */ @@ -968,7 +980,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], } /* Let's check source gprs */ - num_src = r600_bc_get_num_operands(bc, alu); + num_src = r600_bytecode_get_num_operands(bc, alu); for (src = 0; src < num_src; ++src) { if (alu->src[src].rel) { if (have_mova) @@ -1018,7 +1030,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], } /* determine new last instruction */ - LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list)->last = 1; + LIST_ENTRY(struct r600_bytecode_alu, bc->cf_last->alu.prev, list)->last = 1; /* determine new first instruction */ for (i = 0; i < max_slots; ++i) { @@ -1038,9 +1050,9 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], * probably do slightly better by recognizing that we actually have two * consecutive lines of 16 constants, but the resulting code would also be * somewhat more complicated. */ -static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *alu, int type) +static int r600_bytecode_alloc_kcache_lines(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, int type) { - struct r600_bc_kcache *kcache = bc->cf_last->kcache; + struct r600_bytecode_kcache *kcache = bc->cf_last->kcache; unsigned int required_lines; unsigned int free_lines = 0; unsigned int cache_line[3]; @@ -1093,7 +1105,7 @@ static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *al /* Start a new ALU clause if needed. */ if (required_lines > free_lines) { - if ((r = r600_bc_add_cf(bc))) { + if ((r = r600_bytecode_add_cf(bc))) { return r; } bc->cf_last->inst = (type << 3); @@ -1148,15 +1160,15 @@ static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *al return 0; } -int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type) +int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, int type) { - struct r600_bc_alu *nalu = r600_bc_alu(); - struct r600_bc_alu *lalu; + struct r600_bytecode_alu *nalu = r600_bytecode_alu(); + struct r600_bytecode_alu *lalu; int i, r; if (nalu == NULL) return -ENOMEM; - memcpy(nalu, alu, sizeof(struct r600_bc_alu)); + memcpy(nalu, alu, sizeof(struct r600_bytecode_alu)); if (bc->cf_last != NULL && bc->cf_last->inst != (type << 3)) { /* check if we could add it anyway */ @@ -1174,7 +1186,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || bc->force_add_cf) { - r = r600_bc_add_cf(bc); + r = r600_bytecode_add_cf(bc); if (r) { free(nalu); return r; @@ -1184,7 +1196,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int /* Setup the kcache for this ALU instruction. This will start a new * ALU clause if needed. */ - if ((r = r600_bc_alloc_kcache_lines(bc, nalu, type))) { + if ((r = r600_bytecode_alloc_kcache_lines(bc, nalu, type))) { free(nalu); return r; } @@ -1198,7 +1210,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int bc->ngpr = nalu->src[i].sel + 1; } if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL) - r600_bc_special_constants(nalu->src[i].value, + r600_bytecode_special_constants(nalu->src[i].value, &nalu->src[i].sel, &nalu->src[i].neg); } if (nalu->dst.sel >= bc->ngpr) { @@ -1213,7 +1225,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int if (nalu->last) { uint32_t literal[4]; unsigned nliteral; - struct r600_bc_alu *slots[5]; + struct r600_bytecode_alu *slots[5]; int max_slots = bc->chip_class == CAYMAN ? 4 : 5; r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots); if (r) @@ -1237,7 +1249,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int for (i = 0, nliteral = 0; i < max_slots; i++) { if (slots[i]) { - r = r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral); + r = r600_bytecode_alu_nliterals(bc, slots[i], literal, &nliteral); if (r) return r; } @@ -1257,12 +1269,12 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int return 0; } -int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) +int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu) { - return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); + return r600_bytecode_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); } -static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc) +static unsigned r600_bytecode_num_tex_and_vtx_instructions(const struct r600_bytecode *bc) { switch (bc->chip_class) { case R600: @@ -1281,7 +1293,7 @@ static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc) } } -static inline boolean last_inst_was_vtx_fetch(struct r600_bc *bc) +static inline boolean last_inst_was_vtx_fetch(struct r600_bytecode *bc) { if (bc->chip_class == CAYMAN) { if (bc->cf_last->inst != CM_V_SQ_CF_WORD1_SQ_CF_INST_TC) @@ -1294,20 +1306,20 @@ static inline boolean last_inst_was_vtx_fetch(struct r600_bc *bc) return FALSE; } -int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) +int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx) { - struct r600_bc_vtx *nvtx = r600_bc_vtx(); + struct r600_bytecode_vtx *nvtx = r600_bytecode_vtx(); int r; if (nvtx == NULL) return -ENOMEM; - memcpy(nvtx, vtx, sizeof(struct r600_bc_vtx)); + memcpy(nvtx, vtx, sizeof(struct r600_bytecode_vtx)); /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || last_inst_was_vtx_fetch(bc) || bc->force_add_cf) { - r = r600_bc_add_cf(bc); + r = r600_bytecode_add_cf(bc); if (r) { free(nvtx); return r; @@ -1321,24 +1333,24 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) /* each fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; - if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc)) + if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc)) bc->force_add_cf = 1; return 0; } -int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) +int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex) { - struct r600_bc_tex *ntex = r600_bc_tex(); + struct r600_bytecode_tex *ntex = r600_bytecode_tex(); int r; if (ntex == NULL) return -ENOMEM; - memcpy(ntex, tex, sizeof(struct r600_bc_tex)); + memcpy(ntex, tex, sizeof(struct r600_bytecode_tex)); /* we can't fetch data und use it as texture lookup address in the same TEX clause */ if (bc->cf_last != NULL && bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) { - struct r600_bc_tex *ttex; + struct r600_bytecode_tex *ttex; LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) { if (ttex->dst_gpr == ntex->src_gpr) { bc->force_add_cf = 1; @@ -1354,7 +1366,7 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) if (bc->cf_last == NULL || bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX || bc->force_add_cf) { - r = r600_bc_add_cf(bc); + r = r600_bytecode_add_cf(bc); if (r) { free(ntex); return r; @@ -1371,15 +1383,15 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) /* each texture fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; - if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc)) + if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc)) bc->force_add_cf = 1; return 0; } -int r600_bc_add_cfinst(struct r600_bc *bc, int inst) +int r600_bytecode_add_cfinst(struct r600_bytecode *bc, int inst) { int r; - r = r600_bc_add_cf(bc); + r = r600_bytecode_add_cf(bc); if (r) return r; @@ -1388,13 +1400,13 @@ int r600_bc_add_cfinst(struct r600_bc *bc, int inst) return 0; } -int cm_bc_add_cf_end(struct r600_bc *bc) +int cm_bytecode_add_cf_end(struct r600_bytecode *bc) { - return r600_bc_add_cfinst(bc, CM_V_SQ_CF_WORD1_SQ_CF_INST_END); + return r600_bytecode_add_cfinst(bc, CM_V_SQ_CF_WORD1_SQ_CF_INST_END); } /* common to all 3 families */ -static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id) +static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecode_vtx *vtx, unsigned id) { bc->bytecode[id] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) | @@ -1423,7 +1435,7 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign } /* common to all 3 families */ -static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsigned id) +static int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id) { bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) | S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) | @@ -1453,7 +1465,7 @@ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsign } /* r600 only, r700/eg bits in r700_asm.c */ -static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) +static int r600_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id) { /* don't replace gpr by pv or ps for destination register */ bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | @@ -1494,7 +1506,7 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign return 0; } -static void r600_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf) +static void r600_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf) { *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); *bytecode++ = S_SQ_CF_WORD1_CF_INST(cf->inst) | @@ -1503,7 +1515,7 @@ static void r600_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf } /* common for r600/r700 - eg in eg_asm.c */ -static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) +static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) { unsigned id = cf->id; @@ -1529,9 +1541,9 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) case V_SQ_CF_WORD1_SQ_CF_INST_VTX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: if (bc->chip_class == R700) - r700_bc_cf_vtx_build(&bc->bytecode[id], cf); + r700_bytecode_cf_vtx_build(&bc->bytecode[id], cf); else - r600_bc_cf_vtx_build(&bc->bytecode[id], cf); + r600_bytecode_cf_vtx_build(&bc->bytecode[id], cf); break; case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: @@ -1571,12 +1583,12 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) return 0; } -int r600_bc_build(struct r600_bc *bc) +int r600_bytecode_build(struct r600_bytecode *bc) { - struct r600_bc_cf *cf; - struct r600_bc_alu *alu; - struct r600_bc_vtx *vtx; - struct r600_bc_tex *tex; + struct r600_bytecode_cf *cf; + struct r600_bytecode_alu *alu; + struct r600_bytecode_vtx *vtx; + struct r600_bytecode_tex *tex; uint32_t literal[4]; unsigned nliteral; unsigned addr; @@ -1636,9 +1648,9 @@ int r600_bc_build(struct r600_bc *bc) LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { addr = cf->addr; if (bc->chip_class >= EVERGREEN) - r = eg_bc_cf_build(bc, cf); + r = eg_bytecode_cf_build(bc, cf); else - r = r600_bc_cf_build(bc, cf); + r = r600_bytecode_cf_build(bc, cf); if (r) return r; switch (cf->inst) { @@ -1649,18 +1661,18 @@ int r600_bc_build(struct r600_bc *bc) nliteral = 0; memset(literal, 0, sizeof(literal)); LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { - r = r600_bc_alu_nliterals(bc, alu, literal, &nliteral); + r = r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral); if (r) return r; - r600_bc_alu_adjust_literals(bc, alu, literal, nliteral); + r600_bytecode_alu_adjust_literals(bc, alu, literal, nliteral); switch(bc->chip_class) { case R600: - r = r600_bc_alu_build(bc, alu, addr); + r = r600_bytecode_alu_build(bc, alu, addr); break; case R700: case EVERGREEN: /* eg alu is same encoding as r700 */ case CAYMAN: /* eg alu is same encoding as r700 */ - r = r700_bc_alu_build(bc, alu, addr); + r = r700_bytecode_alu_build(bc, alu, addr); break; default: R600_ERR("unknown chip class %d.\n", bc->chip_class); @@ -1681,7 +1693,7 @@ int r600_bc_build(struct r600_bc *bc) case V_SQ_CF_WORD1_SQ_CF_INST_VTX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { - r = r600_bc_vtx_build(bc, vtx, addr); + r = r600_bytecode_vtx_build(bc, vtx, addr); if (r) return r; addr += 4; @@ -1690,14 +1702,14 @@ int r600_bc_build(struct r600_bc *bc) case V_SQ_CF_WORD1_SQ_CF_INST_TEX: if (bc->chip_class == CAYMAN) { LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { - r = r600_bc_vtx_build(bc, vtx, addr); + r = r600_bytecode_vtx_build(bc, vtx, addr); if (r) return r; addr += 4; } } LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { - r = r600_bc_tex_build(bc, tex, addr); + r = r600_bytecode_tex_build(bc, tex, addr); if (r) return r; addr += 4; @@ -1726,17 +1738,17 @@ int r600_bc_build(struct r600_bc *bc) return 0; } -void r600_bc_clear(struct r600_bc *bc) +void r600_bytecode_clear(struct r600_bytecode *bc) { - struct r600_bc_cf *cf = NULL, *next_cf; + struct r600_bytecode_cf *cf = NULL, *next_cf; free(bc->bytecode); bc->bytecode = NULL; LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) { - struct r600_bc_alu *alu = NULL, *next_alu; - struct r600_bc_tex *tex = NULL, *next_tex; - struct r600_bc_tex *vtx = NULL, *next_vtx; + struct r600_bytecode_alu *alu = NULL, *next_alu; + struct r600_bytecode_tex *tex = NULL, *next_tex; + struct r600_bytecode_tex *vtx = NULL, *next_vtx; LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) { free(alu); @@ -1762,12 +1774,12 @@ void r600_bc_clear(struct r600_bc *bc) LIST_INITHEAD(&cf->list); } -void r600_bc_dump(struct r600_bc *bc) +void r600_bytecode_dump(struct r600_bytecode *bc) { - struct r600_bc_cf *cf = NULL; - struct r600_bc_alu *alu = NULL; - struct r600_bc_vtx *vtx = NULL; - struct r600_bc_tex *tex = NULL; + struct r600_bytecode_cf *cf = NULL; + struct r600_bytecode_alu *alu = NULL; + struct r600_bytecode_vtx *vtx = NULL; + struct r600_bytecode_tex *tex = NULL; unsigned i, id; uint32_t literal[4]; @@ -1866,7 +1878,7 @@ void r600_bc_dump(struct r600_bc *bc) id = cf->addr; nliteral = 0; LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { - r600_bc_alu_nliterals(bc, alu, literal, &nliteral); + r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral); fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel); @@ -2120,8 +2132,8 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru { static int dump_shaders = -1; - struct r600_bc bc; - struct r600_bc_vtx vtx; + struct r600_bytecode bc; + struct r600_bytecode_vtx vtx; struct pipe_vertex_element *elements = ve->elements; const struct util_format_description *desc; unsigned fetch_resource_start = rctx->chip_class >= EVERGREEN ? 0 : 160; @@ -2142,11 +2154,11 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru } memset(&bc, 0, sizeof(bc)); - r600_bc_init(&bc, rctx->chip_class); + r600_bytecode_init(&bc, rctx->chip_class); for (i = 0; i < ve->count; i++) { if (elements[i].instance_divisor > 1) { - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; memset(&alu, 0, sizeof(alu)); alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); @@ -2161,8 +2173,8 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru alu.dst.write = 1; alu.last = 1; - if ((r = r600_bc_add_alu(&bc, &alu))) { - r600_bc_clear(&bc); + if ((r = r600_bytecode_add_alu(&bc, &alu))) { + r600_bytecode_clear(&bc); return r; } } @@ -2173,7 +2185,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp, &endian); desc = util_format_description(ve->elements[i].src_format); if (desc == NULL) { - r600_bc_clear(&bc); + r600_bytecode_clear(&bc); R600_ERR("unknown format %d\n", ve->elements[i].src_format); return -EINVAL; } @@ -2198,16 +2210,16 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru vtx.offset = elements[i].src_offset; vtx.endian = endian; - if ((r = r600_bc_add_vtx(&bc, &vtx))) { - r600_bc_clear(&bc); + if ((r = r600_bytecode_add_vtx(&bc, &vtx))) { + r600_bytecode_clear(&bc); return r; } } - r600_bc_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN)); + r600_bytecode_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN)); - if ((r = r600_bc_build(&bc))) { - r600_bc_clear(&bc); + if ((r = r600_bytecode_build(&bc))) { + r600_bytecode_clear(&bc); return r; } @@ -2216,7 +2228,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru if (dump_shaders) { fprintf(stderr, "--------------------------------------------------------------\n"); - r600_bc_dump(&bc); + r600_bytecode_dump(&bc); fprintf(stderr, "______________________________________________________________\n"); } @@ -2225,14 +2237,14 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */ ve->fetch_shader = r600_bo(rctx->radeon, ve->fs_size, 256, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE); if (ve->fetch_shader == NULL) { - r600_bc_clear(&bc); + r600_bytecode_clear(&bc); return -ENOMEM; } - bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL); + bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, rctx->ctx.cs, PIPE_TRANSFER_WRITE); if (bytecode == NULL) { - r600_bc_clear(&bc); - r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); + r600_bytecode_clear(&bc); + r600_bo_reference(&ve->fetch_shader, NULL); return -ENOMEM; } @@ -2245,7 +2257,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru } r600_bo_unmap(rctx->radeon, ve->fetch_shader); - r600_bc_clear(&bc); + r600_bytecode_clear(&bc); if (rctx->chip_class >= EVERGREEN) evergreen_fetch_shader(&rctx->context, ve); diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index cbdaacf7178..61caa4b915e 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -23,12 +23,10 @@ #ifndef R600_ASM_H #define R600_ASM_H -#include "util/u_double_list.h" - struct r600_vertex_element; struct r600_pipe_context; -struct r600_bc_alu_src { +struct r600_bytecode_alu_src { unsigned sel; unsigned chan; unsigned neg; @@ -37,7 +35,7 @@ struct r600_bc_alu_src { uint32_t value; }; -struct r600_bc_alu_dst { +struct r600_bytecode_alu_dst { unsigned sel; unsigned chan; unsigned clamp; @@ -45,10 +43,10 @@ struct r600_bc_alu_dst { unsigned rel; }; -struct r600_bc_alu { +struct r600_bytecode_alu { struct list_head list; - struct r600_bc_alu_src src[3]; - struct r600_bc_alu_dst dst; + struct r600_bytecode_alu_src src[3]; + struct r600_bytecode_alu_dst dst; unsigned inst; unsigned last; unsigned is_op3; @@ -58,7 +56,7 @@ struct r600_bc_alu { unsigned omod; }; -struct r600_bc_tex { +struct r600_bytecode_tex { struct list_head list; unsigned inst; unsigned resource_id; @@ -85,7 +83,7 @@ struct r600_bc_tex { unsigned src_sel_w; }; -struct r600_bc_vtx { +struct r600_bytecode_vtx { struct list_head list; unsigned inst; unsigned fetch_type; @@ -107,7 +105,7 @@ struct r600_bc_vtx { unsigned endian; }; -struct r600_bc_output { +struct r600_bytecode_output { unsigned array_base; unsigned type; unsigned end_of_program; @@ -122,13 +120,13 @@ struct r600_bc_output { unsigned barrier; }; -struct r600_bc_kcache { +struct r600_bytecode_kcache { unsigned bank; unsigned mode; unsigned addr; }; -struct r600_bc_cf { +struct r600_bytecode_cf { struct list_head list; unsigned inst; unsigned addr; @@ -137,15 +135,15 @@ struct r600_bc_cf { unsigned cond; unsigned pop_count; unsigned cf_addr; /* control flow addr */ - struct r600_bc_kcache kcache[2]; + struct r600_bytecode_kcache kcache[2]; unsigned r6xx_uses_waterfall; struct list_head alu; struct list_head tex; struct list_head vtx; - struct r600_bc_output output; - struct r600_bc_alu *curr_bs_head; - struct r600_bc_alu *prev_bs_head; - struct r600_bc_alu *prev2_bs_head; + struct r600_bytecode_output output; + struct r600_bytecode_alu *curr_bs_head; + struct r600_bytecode_alu *prev_bs_head; + struct r600_bytecode_alu *prev2_bs_head; }; #define FC_NONE 0 @@ -157,8 +155,8 @@ struct r600_bc_cf { struct r600_cf_stack_entry { int type; - struct r600_bc_cf *start; - struct r600_bc_cf **mid; /* used to store the else point */ + struct r600_bytecode_cf *start; + struct r600_bytecode_cf **mid; /* used to store the else point */ int num_mid; }; @@ -170,11 +168,11 @@ struct r600_cf_callstack { int max; }; -struct r600_bc { +struct r600_bytecode { enum chip_class chip_class; int type; struct list_head cf; - struct r600_bc_cf *cf_last; + struct r600_bytecode_cf *cf_last; unsigned ndw; unsigned ncf; unsigned ngpr; @@ -189,27 +187,27 @@ struct r600_bc { }; /* eg_asm.c */ -int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf); +int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf); /* r600_asm.c */ -void r600_bc_init(struct r600_bc *bc, enum chip_class chip_class); -void r600_bc_clear(struct r600_bc *bc); -int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu); -int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx); -int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex); -int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output); -int r600_bc_build(struct r600_bc *bc); -int r600_bc_add_cfinst(struct r600_bc *bc, int inst); -int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type); -void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg); -void r600_bc_dump(struct r600_bc *bc); - -int cm_bc_add_cf_end(struct r600_bc *bc); +void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class); +void r600_bytecode_clear(struct r600_bytecode *bc); +int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu); +int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx); +int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex); +int r600_bytecode_add_output(struct r600_bytecode *bc, const struct r600_bytecode_output *output); +int r600_bytecode_build(struct r600_bytecode *bc); +int r600_bytecode_add_cfinst(struct r600_bytecode *bc, int inst); +int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, int type); +void r600_bytecode_special_constants(u32 value, unsigned *sel, unsigned *neg); +void r600_bytecode_dump(struct r600_bytecode *bc); + +int cm_bytecode_add_cf_end(struct r600_bytecode *bc); int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve); /* r700_asm.c */ -void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf); -int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id); +void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf); +int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id); #endif diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 35e68b6e222..2f7e871448a 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -111,7 +111,7 @@ void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_t if (!texture->dirty_db) return; - surf_tmpl.format = texture->resource.b.b.b.format; + surf_tmpl.format = texture->real_format; surf_tmpl.u.tex.level = level; surf_tmpl.u.tex.first_layer = 0; surf_tmpl.u.tex.last_layer = 0; @@ -119,7 +119,7 @@ void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_t zsurf = ctx->create_surface(ctx, &texture->resource.b.b.b, &surf_tmpl); - surf_tmpl.format = ((struct pipe_resource*)texture->flushed_depth_texture)->format; + surf_tmpl.format = texture->flushed_depth_texture->real_format; surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; cbsurf = ctx->create_surface(ctx, (struct pipe_resource*)texture->flushed_depth_texture, &surf_tmpl); @@ -233,8 +233,8 @@ static void r600_hw_copy_region(struct pipe_context *ctx, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; r600_blitter_begin(ctx, R600_COPY); - util_blitter_copy_region(rctx->blitter, dst, dst_level, dstx, dsty, dstz, - src, src_level, src_box, TRUE); + util_blitter_copy_texture(rctx->blitter, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box, TRUE); r600_blitter_end(ctx); } @@ -249,7 +249,7 @@ static void r600_compressed_to_blittable(struct pipe_resource *tex, struct texture_orig_info *orig) { struct r600_resource_texture *rtex = (struct r600_resource_texture*)tex; - unsigned pixsize = util_format_get_blocksize(tex->format); + unsigned pixsize = util_format_get_blocksize(rtex->real_format); int new_format; int new_height, new_width; @@ -269,7 +269,6 @@ static void r600_compressed_to_blittable(struct pipe_resource *tex, tex->width0 = new_width; tex->height0 = new_height; tex->format = new_format; - } static void r600_reset_blittable_to_compressed(struct pipe_resource *tex, diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 72f352df3c3..ca2415adb28 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -33,11 +33,6 @@ #include <util/u_memory.h> #include "util/u_upload_mgr.h" -#include "state_tracker/drm_driver.h" - -#include <xf86drm.h> -#include "radeon_drm.h" - #include "r600.h" #include "r600_pipe.h" @@ -48,7 +43,7 @@ static void r600_buffer_destroy(struct pipe_screen *screen, struct r600_resource_buffer *rbuffer = r600_buffer(buf); if (rbuffer->r.bo) { - r600_bo_reference((struct radeon*)screen->winsys, &rbuffer->r.bo, NULL); + r600_bo_reference(&rbuffer->r.bo, NULL); } rbuffer->r.bo = NULL; util_slab_free(&rscreen->pool_buffers, rbuffer); @@ -81,12 +76,13 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); + struct r600_pipe_context *rctx = (struct r600_pipe_context*)pipe; uint8_t *data; if (rbuffer->r.b.user_ptr) return (uint8_t*)rbuffer->r.b.user_ptr + transfer->box.x; - data = r600_bo_map((struct radeon*)pipe->winsys, rbuffer->r.bo, transfer->usage, pipe); + data = r600_bo_map(rctx->screen->radeon, rbuffer->r.bo, rctx->ctx.cs, transfer->usage); if (!data) return NULL; @@ -97,12 +93,13 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); + struct r600_pipe_context *rctx = (struct r600_pipe_context*)pipe; if (rbuffer->r.b.user_ptr) return; if (rbuffer->r.bo) - r600_bo_unmap((struct radeon*)pipe->winsys, rbuffer->r.bo); + r600_bo_unmap(rctx->screen->radeon, rbuffer->r.bo); } static void r600_buffer_transfer_flush_region(struct pipe_context *pipe, @@ -127,20 +124,20 @@ static void r600_buffer_transfer_inline_write(struct pipe_context *pipe, unsigned stride, unsigned layer_stride) { - struct radeon *ws = (struct radeon*)pipe->winsys; + struct r600_pipe_context *rctx = (struct r600_pipe_context*)pipe; + struct radeon *radeon = rctx->screen->radeon; struct r600_resource_buffer *rbuffer = r600_buffer(resource); uint8_t *map = NULL; assert(rbuffer->r.b.user_ptr == NULL); - map = r600_bo_map(ws, rbuffer->r.bo, - PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage, - pipe); + map = r600_bo_map(radeon, rbuffer->r.bo, rctx->ctx.cs, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage); memcpy(map + box->x, data, box->width); if (rbuffer->r.bo) - r600_bo_unmap(ws, rbuffer->r.bo); + r600_bo_unmap(radeon, rbuffer->r.bo); } static const struct u_resource_vtbl r600_buffer_vtbl = @@ -175,7 +172,7 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, rbuffer->r.size = rbuffer->r.b.b.b.width0; rbuffer->r.bo_size = rbuffer->r.size; - bo = r600_bo((struct radeon*)screen->winsys, + bo = r600_bo(rscreen->radeon, rbuffer->r.b.b.b.width0, alignment, rbuffer->r.b.b.b.bind, rbuffer->r.b.b.b.usage); @@ -219,18 +216,18 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle) { - struct radeon *rw = (struct radeon*)screen->winsys; + struct radeon *rw = ((struct r600_screen*)screen)->radeon; struct r600_resource *rbuffer; struct r600_bo *bo = NULL; - bo = r600_bo_handle(rw, whandle->handle, NULL); + bo = r600_bo_handle(rw, whandle, NULL, NULL); if (bo == NULL) { return NULL; } rbuffer = CALLOC_STRUCT(r600_resource); if (rbuffer == NULL) { - r600_bo_reference(rw, &bo, NULL); + r600_bo_reference(&bo, NULL); return NULL; } diff --git a/src/gallium/drivers/r600/r600_formats.h b/src/gallium/drivers/r600/r600_formats.h index 1c1089d89d2..b822cba9293 100644 --- a/src/gallium/drivers/r600/r600_formats.h +++ b/src/gallium/drivers/r600/r600_formats.h @@ -99,7 +99,6 @@ static INLINE bool r600_is_vertex_format_supported(enum pipe_format format) /* No fixed, no double. */ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || - desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED || (desc->channel[i].size == 64 && desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)) return false; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 615f0688eb3..ceaebbb4431 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -47,12 +47,14 @@ #include "r600_resource.h" #include "r600_shader.h" #include "r600_pipe.h" +#include "../../winsys/r600/drm/r600_drm_public.h" /* * pipe_context */ static struct r600_fence *r600_create_fence(struct r600_pipe_context *ctx) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_fence *fence = NULL; if (!ctx->fences.bo) { @@ -62,7 +64,8 @@ static struct r600_fence *r600_create_fence(struct r600_pipe_context *ctx) R600_ERR("r600: failed to create bo for fence objects\n"); return NULL; } - ctx->fences.data = r600_bo_map(ctx->radeon, ctx->fences.bo, PIPE_TRANSFER_UNSYNCHRONIZED, NULL); + ctx->fences.data = r600_bo_map(ctx->radeon, ctx->fences.bo, rctx->ctx.cs, + PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_WRITE); } if (!LIST_IS_EMPTY(&ctx->fences.pool)) { @@ -113,29 +116,28 @@ static struct r600_fence *r600_create_fence(struct r600_pipe_context *ctx) return fence; } -static void r600_flush(struct pipe_context *ctx, - struct pipe_fence_handle **fence) + +void r600_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence, + unsigned flags) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_fence **rfence = (struct r600_fence**)fence; -#if 0 - static int dc = 0; - char dname[256]; -#endif - if (rfence) *rfence = r600_create_fence(rctx); -#if 0 - sprintf(dname, "gallium-%08d.bof", dc); - if (dc < 20) { - r600_context_dump_bof(&rctx->ctx, dname); - R600_ERR("dumped %s\n", dname); - } - dc++; -#endif - r600_context_flush(&rctx->ctx); + r600_context_flush(&rctx->ctx, flags); +} + +static void r600_flush_from_st(struct pipe_context *ctx, + struct pipe_fence_handle **fence) +{ + r600_flush(ctx, fence, 0); +} + +static void r600_flush_from_winsys(void *ctx, unsigned flags) +{ + r600_flush((struct pipe_context*)ctx, NULL, flags); } static void r600_update_num_contexts(struct r600_screen *rscreen, int diff) @@ -184,7 +186,7 @@ static void r600_destroy_context(struct pipe_context *context) } r600_bo_unmap(rctx->radeon, rctx->fences.bo); - r600_bo_reference(rctx->radeon, &rctx->fences.bo, NULL); + r600_bo_reference(&rctx->fences.bo, NULL); } r600_update_num_contexts(rctx->screen, -1); @@ -206,7 +208,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void rctx->context.screen = screen; rctx->context.priv = priv; rctx->context.destroy = r600_destroy_context; - rctx->context.flush = r600_flush; + rctx->context.flush = r600_flush_from_st; /* Easy accessing of screen/winsys. */ rctx->screen = rscreen; @@ -256,6 +258,8 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } + rctx->screen->ws->cs_set_flush_callback(rctx->ctx.cs, r600_flush_from_winsys, rctx); + util_slab_create(&rctx->pool_transfers, sizeof(struct pipe_transfer), 64, UTIL_SLAB_SINGLETHREADED); @@ -269,6 +273,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void r600_destroy_context(&rctx->context); return NULL; } + rctx->vbuf_mgr->caps.format_fixed32 = 0; rctx->blitter = util_blitter_create(&rctx->context); if (rctx->blitter == NULL) { @@ -355,6 +360,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_SM3: case PIPE_CAP_SEAMLESS_CUBE_MAP: case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL: + case PIPE_CAP_PRIMITIVE_RESTART: return 1; /* Supported except the original R600. */ @@ -369,7 +375,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) /* Unsupported features. */ case PIPE_CAP_STREAM_OUTPUT: - case PIPE_CAP_PRIMITIVE_RESTART: case PIPE_CAP_TGSI_INSTANCEID: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: @@ -481,6 +486,8 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 0; + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: return 0; } @@ -498,6 +505,8 @@ static int r600_get_video_param(struct pipe_screen *screen, case PIPE_VIDEO_CAP_MAX_WIDTH: case PIPE_VIDEO_CAP_MAX_HEIGHT: return vl_video_buffer_max_size(screen); + case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED: + return vl_num_buffers_desired(screen, profile); default: return 0; } @@ -510,7 +519,8 @@ static void r600_destroy_screen(struct pipe_screen* pscreen) if (rscreen == NULL) return; - radeon_decref(rscreen->radeon); + radeon_destroy(rscreen->radeon); + rscreen->ws->destroy(rscreen->ws); util_slab_destroy(&rscreen->pool_buffers); pipe_mutex_destroy(rscreen->mutex_num_contexts); @@ -574,17 +584,19 @@ static boolean r600_fence_finish(struct pipe_screen *pscreen, return TRUE; } -struct pipe_screen *r600_screen_create(struct radeon *radeon) +struct pipe_screen *r600_screen_create(struct radeon_winsys *ws) { struct r600_screen *rscreen; + struct radeon *radeon = radeon_create(ws); rscreen = CALLOC_STRUCT(r600_screen); if (rscreen == NULL) { return NULL; } + rscreen->ws = ws; rscreen->radeon = radeon; - rscreen->screen.winsys = (struct pipe_winsys*)radeon; + rscreen->screen.winsys = (struct pipe_winsys*)ws; rscreen->screen.destroy = r600_destroy_screen; rscreen->screen.get_name = r600_get_name; rscreen->screen.get_vendor = r600_get_vendor; diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 6f399ed43b0..2747f54079c 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -26,6 +26,8 @@ #ifndef R600_PIPE_H #define R600_PIPE_H +#include "../../winsys/radeon/drm/radeon_winsys.h" + #include <pipe/p_state.h> #include <pipe/p_screen.h> #include <pipe/p_context.h> @@ -72,6 +74,7 @@ enum r600_pipe_state_id { struct r600_screen { struct pipe_screen screen; + struct radeon_winsys *ws; struct radeon *radeon; struct r600_tiling_info *tiling_info; struct util_slab_mempool pool_buffers; @@ -183,7 +186,7 @@ struct r600_pipe_context { struct r600_pipe_state *states[R600_PIPE_NSTATES]; struct r600_context ctx; struct r600_vertex_element *vertex_elements; - struct r600_pipe_resource_state fs_resource[PIPE_MAX_ATTRIBS]; + struct r600_pipe_resource_state fs_resource[PIPE_MAX_ATTRIBS]; struct pipe_framebuffer_state framebuffer; struct pipe_index_buffer index_buffer; unsigned cb_target_mask; @@ -247,7 +250,8 @@ void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx, struct r600_pipe_resource_state *rstate); void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, - unsigned offset, unsigned stride); + unsigned offset, unsigned stride, + enum radeon_bo_usage usage); boolean evergreen_is_format_supported(struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, @@ -270,6 +274,11 @@ struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle); void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw); + +/* r600_pipe.c */ +void r600_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence, + unsigned flags); + /* r600_query.c */ void r600_init_query_functions(struct r600_pipe_context *rctx); @@ -294,7 +303,8 @@ void r600_pipe_init_buffer_resource(struct r600_pipe_context *rctx, struct r600_pipe_resource_state *rstate); void r600_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, - unsigned offset, unsigned stride); + unsigned offset, unsigned stride, + enum radeon_bo_usage usage); void r600_adjust_gprs(struct r600_pipe_context *rctx); boolean r600_is_format_supported(struct pipe_screen *screen, enum pipe_format format, diff --git a/src/gallium/drivers/r600/r600_public.h b/src/gallium/drivers/r600/r600_public.h index f1970201e89..e4fe23a87b7 100644 --- a/src/gallium/drivers/r600/r600_public.h +++ b/src/gallium/drivers/r600/r600_public.h @@ -23,6 +23,8 @@ #ifndef R600_PUBLIC_H #define R600_PUBLIC_H -struct pipe_screen *r600_screen_create(struct radeon *radeon); +struct radeon_winsys; + +struct pipe_screen *r600_screen_create(struct radeon_winsys *ws); #endif diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 836e7491f1f..d9d29db7968 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -52,6 +52,12 @@ struct r600_resource { struct r600_resource_texture { struct r600_resource resource; + + /* If this resource is a depth-stencil buffer on evergreen, this contains + * the depth part of the format. There is a separate stencil resource + * for the stencil buffer below. */ + enum pipe_format real_format; + unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS]; /* transfer */ unsigned pitch_in_blocks[PIPE_MAX_TEXTURE_LEVELS]; /* texture resource */ @@ -62,6 +68,7 @@ struct r600_resource_texture { unsigned tile_type; unsigned depth; unsigned dirty_db; + struct r600_resource_texture *stencil; /* Stencil is in a separate buffer on Evergreen. */ struct r600_resource_texture *flushed_depth_texture; boolean is_flushing_texture; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 3e21ad1fdc6..c37bb729ce3 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -86,7 +86,7 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s if (shader->bo == NULL) { return -ENOMEM; } - ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, 0, NULL); + ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, rctx->ctx.cs, PIPE_TRANSFER_WRITE); if (R600_BIG_ENDIAN) { for (i = 0; i < rshader->bc.ndw; ++i) { ptr[i] = bswap_32(rshader->bc.bytecode[i]); @@ -140,13 +140,13 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s R600_ERR("translation from TGSI failed !\n"); return r; } - r = r600_bc_build(&shader->shader.bc); + r = r600_bytecode_build(&shader->shader.bc); if (r) { R600_ERR("building bytecode failed !\n"); return r; } if (dump_shaders) { - r600_bc_dump(&shader->shader.bc); + r600_bytecode_dump(&shader->shader.bc); fprintf(stderr, "______________________________________________________________\n"); } return r600_pipe_shader(ctx, shader); @@ -154,10 +154,8 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) { - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - r600_bo_reference(rctx->radeon, &shader->bo, NULL); - r600_bc_clear(&shader->shader.bc); + r600_bo_reference(&shader->bo, NULL); + r600_bytecode_clear(&shader->shader.bc); memset(&shader->shader,0,sizeof(struct r600_shader)); } @@ -185,7 +183,7 @@ struct r600_shader_ctx { unsigned temp_reg; unsigned ar_reg; struct r600_shader_tgsi_instruction *inst_info; - struct r600_bc *bc; + struct r600_bytecode *bc; struct r600_shader *shader; struct r600_shader_src src[4]; u32 *literals; @@ -246,7 +244,7 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) { int i, r; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int gpr = 0, base_chan = 0; int ij_index = 0; @@ -272,7 +270,7 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) base_chan = (2 * (ij_index % 2)) + 1; for (i = 0; i < 8; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); if (i < 4) alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; @@ -294,7 +292,7 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) alu.bank_swizzle_force = SQ_ALU_VEC_210; if ((i % 4) == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -332,6 +330,12 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->output[i].sid = d->Semantic.Index; ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; ctx->shader->output[i].interpolate = d->Declaration.Interpolate; + if (ctx->type == TGSI_PROCESSOR_VERTEX) { + /* these don't count as vertex param exports */ + if ((ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) || + (ctx->shader->output[i].name == TGSI_SEMANTIC_PSIZE)) + ctx->shader->npos++; + } break; case TGSI_FILE_CONSTANT: case TGSI_FILE_TEMPORARY: @@ -341,8 +345,8 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) case TGSI_FILE_SYSTEM_VALUE: if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { - struct r600_bc_alu alu; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + struct r600_bytecode_alu alu; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); alu.src[0].sel = 0; @@ -353,7 +357,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - if ((r = r600_bc_add_alu(ctx->bc, &alu))) + if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) return r; break; } @@ -436,7 +440,7 @@ static void tgsi_src(struct r600_shader_ctx *ctx, (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; - r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); + r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) return; } @@ -460,12 +464,12 @@ static void tgsi_src(struct r600_shader_ctx *ctx, static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) { - struct r600_bc_vtx vtx; + struct r600_bytecode_vtx vtx; unsigned int ar_reg; int r; if (offset) { - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; memset(&alu, 0, sizeof(alu)); @@ -479,7 +483,7 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset alu.dst.write = 1; alu.last = 1; - if ((r = r600_bc_add_alu(ctx->bc, &alu))) + if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) return r; ar_reg = dst_reg; @@ -502,7 +506,7 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ vtx.endian = r600_endian_swap(32); - if ((r = r600_bc_add_vtx(ctx->bc, &vtx))) + if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) return r; return 0; @@ -511,7 +515,7 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset static int tgsi_split_constant(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, j, k, nconst, r; for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { @@ -536,7 +540,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx) } else if (j > 0) { int treg = r600_get_temp(ctx); for (k = 0; k < 4; k++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = ctx->src[i].sel; alu.src[0].chan = k; @@ -546,7 +550,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx) alu.dst.write = 1; if (k == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -562,7 +566,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx) static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, j, k, nliteral, r; for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { @@ -574,7 +578,7 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { int treg = r600_get_temp(ctx); for (k = 0; k < 4; k++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = ctx->src[i].sel; alu.src[0].chan = k; @@ -584,7 +588,7 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) alu.dst.write = 1; if (k == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -602,14 +606,14 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi struct tgsi_full_immediate *immediate; struct tgsi_full_property *property; struct r600_shader_ctx ctx; - struct r600_bc_output output[32]; + struct r600_bytecode_output output[32]; unsigned output_done, noutput; unsigned opcode; int i, j, r = 0, pos0; ctx.bc = &shader->bc; ctx.shader = shader; - r600_bc_init(ctx.bc, rctx->chip_class); + r600_bytecode_init(ctx.bc, rctx->chip_class); ctx.tokens = tokens; tgsi_scan_shader(tokens, &ctx.info); tgsi_parse_init(&ctx.parse, tokens); @@ -649,18 +653,18 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi if (ctx.type == TGSI_PROCESSOR_VERTEX) { ctx.file_offset[TGSI_FILE_INPUT] = 1; if (ctx.bc->chip_class >= EVERGREEN) { - r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); + r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); } else { - r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); + r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); } } if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) { ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); } ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + - ctx.info.file_count[TGSI_FILE_INPUT]; + ctx.info.file_max[TGSI_FILE_INPUT] + 1; ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + - ctx.info.file_count[TGSI_FILE_OUTPUT]; + ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; /* Outside the GPR range. This will be translated to one of the * kcache banks later. */ @@ -668,7 +672,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + - ctx.info.file_count[TGSI_FILE_TEMPORARY]; + ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1; ctx.temp_reg = ctx.ar_reg + 1; ctx.nliterals = 0; @@ -742,8 +746,8 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi int j; for (j = 0; j < 4; j++) { - struct r600_bc_alu alu; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + struct r600_bytecode_alu alu; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); /* MOV_SAT R, R */ alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); @@ -757,7 +761,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi if (j == 3) { alu.last = 1; } - r = r600_bc_add_alu(ctx.bc, &alu); + r = r600_bytecode_add_alu(ctx.bc, &alu); if (r) return r; } @@ -768,7 +772,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi /* export output */ j = 0; for (i = 0, pos0 = 0; i < noutput; i++) { - memset(&output[i], 0, sizeof(struct r600_bc_output)); + memset(&output[i], 0, sizeof(struct r600_bytecode_output)); output[i + j].gpr = shader->output[i].gpr; output[i + j].elem_size = 3; output[i + j].swizzle_x = 0; @@ -801,7 +805,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) { for (j = 1; j < shader->nr_cbufs; j++) { - memset(&output[i + j], 0, sizeof(struct r600_bc_output)); + memset(&output[i + j], 0, sizeof(struct r600_bytecode_output)); output[i + j].gpr = shader->output[i].gpr; output[i + j].elem_size = 3; output[i + j].swizzle_x = 0; @@ -850,7 +854,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi } } if (!pos0) { - memset(&output[i], 0, sizeof(struct r600_bc_output)); + memset(&output[i], 0, sizeof(struct r600_bytecode_output)); output[i].gpr = 0; output[i].elem_size = 3; output[i].swizzle_x = 0; @@ -867,7 +871,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi } /* add fake pixel export */ if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { - memset(&output[0], 0, sizeof(struct r600_bc_output)); + memset(&output[0], 0, sizeof(struct r600_bytecode_output)); output[0].gpr = 0; output[0].elem_size = 3; output[0].swizzle_x = 7; @@ -895,13 +899,13 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi } /* add output to bytecode */ for (i = 0; i < noutput; i++) { - r = r600_bc_add_output(ctx.bc, &output[i]); + r = r600_bytecode_add_output(ctx.bc, &output[i]); if (r) goto out_err; } /* add program end */ if (ctx.bc->chip_class == CAYMAN) - cm_bc_add_cf_end(ctx.bc); + cm_bytecode_add_cf_end(ctx.bc); free(ctx.literals); tgsi_parse_free(&ctx.parse); @@ -924,7 +928,7 @@ static int tgsi_end(struct r600_shader_ctx *ctx) return 0; } -static void r600_bc_src(struct r600_bc_alu_src *bc_src, +static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, const struct r600_shader_src *shader_src, unsigned chan) { @@ -936,13 +940,13 @@ static void r600_bc_src(struct r600_bc_alu_src *bc_src, bc_src->value = shader_src->value[bc_src->chan]; } -static void r600_bc_src_set_abs(struct r600_bc_alu_src *bc_src) +static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src) { bc_src->abs = 1; bc_src->neg = 0; } -static void r600_bc_src_toggle_neg(struct r600_bc_alu_src *bc_src) +static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src) { bc_src->neg = !bc_src->neg; } @@ -950,7 +954,7 @@ static void r600_bc_src_toggle_neg(struct r600_bc_alu_src *bc_src) static void tgsi_dst(struct r600_shader_ctx *ctx, const struct tgsi_full_dst_register *tgsi_dst, unsigned swizzle, - struct r600_bc_alu_dst *r600_dst) + struct r600_bytecode_alu_dst *r600_dst) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -980,7 +984,7 @@ static int tgsi_last_instruction(unsigned writemask) static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, j, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); @@ -988,25 +992,25 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.inst = ctx->inst_info->r600_opcode; if (!swap) { for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r600_bc_src(&alu.src[j], &ctx->src[j], i); + r600_bytecode_src(&alu.src[j], &ctx->src[j], i); } } else { - r600_bc_src(&alu.src[0], &ctx->src[1], i); - r600_bc_src(&alu.src[1], &ctx->src[0], i); + r600_bytecode_src(&alu.src[0], &ctx->src[1], i); + r600_bytecode_src(&alu.src[1], &ctx->src[0], i); } /* handle some special cases */ switch (ctx->inst_info->tgsi_opcode) { case TGSI_OPCODE_SUB: - r600_bc_src_toggle_neg(&alu.src[1]); + r600_bytecode_src_toggle_neg(&alu.src[1]); break; case TGSI_OPCODE_ABS: - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src_set_abs(&alu.src[0]); break; default: break; @@ -1014,7 +1018,7 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) if (i == lasti) { alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1035,21 +1039,21 @@ static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; int i, j, r; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; for (i = 0 ; i < last_slot; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r600_bc_src(&alu.src[j], &ctx->src[j], 0); + r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); } tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; if (i == last_slot - 1) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1068,9 +1072,9 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) static float neg_pi = -3.1415926535; int r; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -1078,7 +1082,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; alu.src[1].chan = 0; @@ -1086,11 +1090,11 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_0_5; alu.src[2].chan = 0; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); alu.dst.chan = 0; @@ -1100,11 +1104,11 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -1130,7 +1134,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) } alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; return 0; @@ -1139,7 +1143,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) static int cayman_trig(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; int i, r; @@ -1149,7 +1153,7 @@ static int cayman_trig(struct r600_shader_ctx *ctx) for (i = 0; i < last_slot; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = ctx->inst_info->r600_opcode; alu.dst.chan = i; @@ -1160,7 +1164,7 @@ static int cayman_trig(struct r600_shader_ctx *ctx) alu.src[0].chan = 0; if (i == last_slot - 1) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1170,7 +1174,7 @@ static int cayman_trig(struct r600_shader_ctx *ctx) static int tgsi_trig(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); @@ -1178,7 +1182,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) if (r) return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = ctx->inst_info->r600_opcode; alu.dst.chan = 0; alu.dst.sel = ctx->temp_reg; @@ -1187,7 +1191,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; @@ -1196,14 +1200,14 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = ctx->temp_reg; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (i == lasti) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1213,7 +1217,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) static int tgsi_scs(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; /* We'll only need the trig stuff if we are going to write to the @@ -1229,7 +1233,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { if (ctx->bc->chip_class == CAYMAN) { for (i = 0 ; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -1241,19 +1245,19 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) alu.src[0].chan = 0; if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1263,7 +1267,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { if (ctx->bc->chip_class == CAYMAN) { for (i = 0 ; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (i == 1) @@ -1274,19 +1278,19 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) alu.src[0].chan = 0; if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1294,7 +1298,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) /* dst.z = 0.0; */ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); @@ -1305,14 +1309,14 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } /* dst.w = 1.0; */ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); @@ -1323,7 +1327,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1333,11 +1337,11 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) static int tgsi_kill(struct r600_shader_ctx *ctx) { - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = ctx->inst_info->r600_opcode; alu.dst.chan = i; @@ -1348,12 +1352,12 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) alu.src[1].sel = V_SQ_ALU_SRC_1; alu.src[1].neg = 1; } else { - r600_bc_src(&alu.src[1], &ctx->src[0], i); + r600_bytecode_src(&alu.src[1], &ctx->src[0], i); } if (i == 3) { alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1367,13 +1371,13 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) static int tgsi_lit(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int r; /* tmp.x = max(src.y, 0.0) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); - r600_bc_src(&alu.src[0], &ctx->src[0], 1); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ alu.src[1].chan = 1; @@ -1382,7 +1386,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; @@ -1395,7 +1399,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { /* tmp.z = log(tmp.x) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -1407,13 +1411,13 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) } else alu.dst.write = 0; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { /* tmp.z = log(tmp.x) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -1421,7 +1425,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) alu.dst.chan = 2; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1430,25 +1434,25 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) sel = alu.dst.sel; /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); alu.src[0].sel = sel; alu.src[0].chan = chan; - r600_bc_src(&alu.src[1], &ctx->src[0], 3); - r600_bc_src(&alu.src[2], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[1], &ctx->src[0], 3); + r600_bytecode_src(&alu.src[2], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; alu.dst.write = 1; alu.is_op3 = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { /* dst.z = exp(tmp.x) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -1458,56 +1462,56 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) alu.last = 1; } else alu.dst.write = 0; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { /* dst.z = exp(tmp.x) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } /* dst.x, <- 1.0 */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ alu.src[0].chan = 0; tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; /* dst.y = max(src.x, 0.0) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ alu.src[1].chan = 0; tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; /* dst.w, <- 1.0 */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; @@ -1517,10 +1521,10 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) static int tgsi_rsq(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); /* FIXME: * For state trackers other than OpenGL, we'll want to use @@ -1529,13 +1533,13 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - r600_bc_src(&alu.src[i], &ctx->src[i], 0); - r600_bc_src_set_abs(&alu.src[i]); + r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); + r600_bytecode_src_set_abs(&alu.src[i]); } alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; /* replicate result */ @@ -1545,11 +1549,11 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.src[0].sel = ctx->temp_reg; alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.dst.chan = i; @@ -1557,7 +1561,7 @@ static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; if (i == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1567,18 +1571,18 @@ static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = ctx->inst_info->r600_opcode; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - r600_bc_src(&alu.src[i], &ctx->src[i], 0); + r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); } alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; /* replicate result */ @@ -1589,38 +1593,38 @@ static int cayman_pow(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; int i, r; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; alu.dst.write = 1; if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } /* b * LOG2(a) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r600_bc_src(&alu.src[0], &ctx->src[1], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); alu.src[1].sel = ctx->temp_reg; alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; for (i = 0; i < last_slot; i++) { /* POW(a,b) = EXP2(b * LOG2(a))*/ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; @@ -1628,7 +1632,7 @@ static int cayman_pow(struct r600_shader_ctx *ctx) alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; if (i == last_slot - 1) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1637,38 +1641,38 @@ static int cayman_pow(struct r600_shader_ctx *ctx) static int tgsi_pow(struct r600_shader_ctx *ctx) { - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int r; /* LOG2(a) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; /* b * LOG2(a) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r600_bc_src(&alu.src[0], &ctx->src[1], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); alu.src[1].sel = ctx->temp_reg; alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; /* POW(a,b) = EXP2(b * LOG2(a))*/ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; return tgsi_helper_tempx_replicate(ctx); @@ -1677,32 +1681,32 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) static int tgsi_ssg(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; /* tmp = (src > 0 ? 1 : src) */ for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); alu.is_op3 = 1; alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; - r600_bc_src(&alu.src[0], &ctx->src[0], i); + r600_bytecode_src(&alu.src[0], &ctx->src[0], i); alu.src[1].sel = V_SQ_ALU_SRC_1; - r600_bc_src(&alu.src[2], &ctx->src[0], i); + r600_bytecode_src(&alu.src[2], &ctx->src[0], i); if (i == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } /* dst = (-tmp > 0 ? -1 : tmp) */ for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); alu.is_op3 = 1; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -1719,7 +1723,7 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) if (i == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1728,11 +1732,11 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) { - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); alu.dst.chan = i; @@ -1745,7 +1749,7 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru if (i == 3) { alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1755,7 +1759,7 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru static int tgsi_op3(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, j, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); @@ -1763,10 +1767,10 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r600_bc_src(&alu.src[j], &ctx->src[j], i); + r600_bytecode_src(&alu.src[j], &ctx->src[j], i); } tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -1776,7 +1780,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) if (i == lasti) { alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1786,14 +1790,14 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) static int tgsi_dp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, j, r; for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r600_bc_src(&alu.src[j], &ctx->src[j], i); + r600_bytecode_src(&alu.src[j], &ctx->src[j], i); } tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -1826,7 +1830,7 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) if (i == 3) { alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1853,8 +1857,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) { static float one_point_five = 1.5f; struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_tex tex; - struct r600_bc_alu alu; + struct r600_bytecode_tex tex; + struct r600_bytecode_alu alu; unsigned src_gpr; int r, i, j; int opcode; @@ -1872,7 +1876,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) for (i = 1; i < 3; i++) { /* set gradients h/v */ - memset(&tex, 0, sizeof(struct r600_bc_tex)); + memset(&tex, 0, sizeof(struct r600_bytecode_tex)); tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H : SQ_TEX_INST_SET_GRADIENTS_V; tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); @@ -1886,15 +1890,15 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.src_sel_w = 3; for (j = 0; j < 4; j++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r600_bc_src(&alu.src[0], &ctx->src[i], j); + r600_bytecode_src(&alu.src[0], &ctx->src[i], j); alu.dst.sel = tex.src_gpr; alu.dst.chan = j; if (j == 3) alu.last = 1; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -1915,7 +1919,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.coord_type_z = 1; tex.coord_type_w = 1; } - r = r600_bc_add_tex(ctx->bc, &tex); + r = r600_bytecode_add_tex(ctx->bc, &tex); if (r) return r; } @@ -1925,9 +1929,9 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (ctx->bc->chip_class == CAYMAN) { out_chan = 2; for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 3); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -1935,40 +1939,40 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.last = 1; if (out_chan == i) alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { out_chan = 3; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 3); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); alu.dst.sel = ctx->temp_reg; alu.dst.chan = out_chan; alu.last = 1; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = out_chan; - r600_bc_src(&alu.src[1], &ctx->src[0], i); + r600_bytecode_src(&alu.src[1], &ctx->src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; @@ -1976,7 +1980,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.dst.chan = 3; alu.last = 1; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; src_loaded = TRUE; @@ -1989,16 +1993,16 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); - r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); - r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); + r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) alu.last = 1; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2006,7 +2010,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) /* tmp1.z = RCP_e(|tmp1.z|) */ if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 2; @@ -2017,12 +2021,12 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.dst.write = 1; if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 2; @@ -2031,7 +2035,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.dst.chan = 2; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2040,7 +2044,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x * muladd has no writemask, have to use another temp */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -2057,11 +2061,11 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.dst.chan = 0; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -2079,7 +2083,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; @@ -2089,15 +2093,15 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (src_requires_loading && !src_loaded) { for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r600_bc_src(&alu.src[0], &ctx->src[0], i); + r600_bytecode_src(&alu.src[0], &ctx->src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) alu.last = 1; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2120,7 +2124,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } } - memset(&tex, 0, sizeof(struct r600_bc_tex)); + memset(&tex, 0, sizeof(struct r600_bytecode_tex)); tex.inst = opcode; tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); @@ -2167,7 +2171,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) tex.src_sel_w = tex.src_sel_z; - r = r600_bc_add_tex(ctx->bc, &tex); + r = r600_bytecode_add_tex(ctx->bc, &tex); if (r) return r; @@ -2178,7 +2182,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) static int tgsi_lrp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); unsigned i; int r; @@ -2189,17 +2193,17 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); - r600_bc_src(&alu.src[0], &ctx->src[1], i); - r600_bc_src(&alu.src[1], &ctx->src[2], i); + r600_bytecode_src(&alu.src[0], &ctx->src[1], i); + r600_bytecode_src(&alu.src[1], &ctx->src[2], i); alu.omod = 3; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; if (i == lasti) { alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2211,19 +2215,19 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; - r600_bc_src(&alu.src[1], &ctx->src[0], i); - r600_bc_src_toggle_neg(&alu.src[1]); + r600_bytecode_src(&alu.src[1], &ctx->src[0], i); + r600_bytecode_src_toggle_neg(&alu.src[1]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == lasti) { alu.last = 1; } alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2233,18 +2237,18 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; - r600_bc_src(&alu.src[1], &ctx->src[2], i); + r600_bytecode_src(&alu.src[1], &ctx->src[2], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == lasti) { alu.last = 1; } alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2254,11 +2258,11 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; - r600_bc_src(&alu.src[0], &ctx->src[0], i); - r600_bc_src(&alu.src[1], &ctx->src[1], i); + r600_bytecode_src(&alu.src[0], &ctx->src[0], i); + r600_bytecode_src(&alu.src[1], &ctx->src[1], i); alu.src[2].sel = ctx->temp_reg; alu.src[2].chan = i; @@ -2267,7 +2271,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (i == lasti) { alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2277,7 +2281,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) static int tgsi_cmp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); @@ -2285,18 +2289,18 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); - r600_bc_src(&alu.src[0], &ctx->src[0], i); - r600_bc_src(&alu.src[1], &ctx->src[2], i); - r600_bc_src(&alu.src[2], &ctx->src[1], i); + r600_bytecode_src(&alu.src[0], &ctx->src[0], i); + r600_bytecode_src(&alu.src[1], &ctx->src[2], i); + r600_bytecode_src(&alu.src[2], &ctx->src[1], i); tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; if (i == lasti) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2308,7 +2312,7 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; static const unsigned int src0_swizzle[] = {2, 0, 1}; static const unsigned int src1_swizzle[] = {1, 2, 0}; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; uint32_t use_temp = 0; int i, r; @@ -2316,11 +2320,11 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) use_temp = 1; for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); if (i < 3) { - r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); - r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); + r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); } else { alu.src[0].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = i; @@ -2334,18 +2338,18 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) if (i == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); if (i < 3) { - r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); - r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); + r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); } else { alu.src[0].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = i; @@ -2366,7 +2370,7 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) alu.is_op3 = 1; if (i == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2378,22 +2382,22 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) static int tgsi_exp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int r; int i; /* result.x = 2^floor(src); */ if (inst->Dst[0].Register.WriteMask & 1) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; @@ -2409,7 +2413,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) alu.dst.write = 1; if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2422,7 +2426,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) alu.dst.chan = 0; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2430,10 +2434,10 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) /* result.y = tmp - floor(tmp); */ if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; #if 0 @@ -2446,7 +2450,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2455,9 +2459,9 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -2466,14 +2470,14 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) alu.last = 1; } - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -2481,7 +2485,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2489,7 +2493,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) /* result.w = 1.0;*/ if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; @@ -2499,7 +2503,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) alu.dst.chan = 3; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2509,7 +2513,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) static int tgsi_log(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int r; int i; @@ -2517,11 +2521,11 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & 1) { if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -2529,23 +2533,23 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.dst.write = 1; if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2559,7 +2563,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2569,11 +2573,11 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -2582,28 +2586,28 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); alu.src[0].sel = ctx->temp_reg; @@ -2614,13 +2618,13 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 1; @@ -2632,12 +2636,12 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 1; @@ -2647,14 +2651,14 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 1; @@ -2666,12 +2670,12 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 1; @@ -2681,17 +2685,17 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src_set_abs(&alu.src[0]); alu.src[1].sel = ctx->temp_reg; alu.src[1].chan = 1; @@ -2701,7 +2705,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2710,11 +2714,11 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; if (i == 2) @@ -2723,23 +2727,23 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (i == 2) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r600_bc_src_set_abs(&alu.src[0]); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src_set_abs(&alu.src[0]); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.dst.chan = 2; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2747,7 +2751,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) /* result.w = 1.0; */ if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; @@ -2758,7 +2762,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2769,10 +2773,10 @@ static int tgsi_log(struct r600_shader_ctx *ctx) static int tgsi_eg_arl(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: @@ -2786,11 +2790,11 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) return -1; } - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.last = 1; alu.dst.sel = ctx->ar_reg; alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; @@ -2800,12 +2804,12 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) * between ARL and AR usage. The easy way to do that is to remove * the MOVA here, and load it for the first AR access after ar_reg * has been modified in each clause. */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; alu.src[0].sel = ctx->ar_reg; alu.src[0].chan = 0; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; return 0; @@ -2814,19 +2818,19 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) { /* TODO from r600c, ar values don't persist between clauses */ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int r; switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: memset(&alu, 0, sizeof(alu)); alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->ar_reg; alu.dst.write = 1; alu.last = 1; - if ((r = r600_bc_add_alu(ctx->bc, &alu))) + if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) return r; memset(&alu, 0, sizeof(alu)); @@ -2836,18 +2840,18 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; - if ((r = r600_bc_add_alu(ctx->bc, &alu))) + if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) return r; break; case TGSI_OPCODE_ARR: memset(&alu, 0, sizeof(alu)); alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->ar_reg; alu.dst.write = 1; alu.last = 1; - if ((r = r600_bc_add_alu(ctx->bc, &alu))) + if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) return r; break; default: @@ -2860,7 +2864,7 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) alu.src[0].sel = ctx->ar_reg; alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; ctx->bc->cf_last->r6xx_uses_waterfall = 1; @@ -2870,11 +2874,11 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) static int tgsi_opdst(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int i, r = 0; for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -2882,17 +2886,17 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) if (i == 0 || i == 3) { alu.src[0].sel = V_SQ_ALU_SRC_1; } else { - r600_bc_src(&alu.src[0], &ctx->src[0], i); + r600_bytecode_src(&alu.src[0], &ctx->src[0], i); } if (i == 0 || i == 2) { alu.src[1].sel = V_SQ_ALU_SRC_1; } else { - r600_bc_src(&alu.src[1], &ctx->src[1], i); + r600_bytecode_src(&alu.src[1], &ctx->src[1], i); } if (i == 3) alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } @@ -2901,10 +2905,10 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) { - struct r600_bc_alu alu; + struct r600_bytecode_alu alu; int r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = opcode; alu.predicate = 1; @@ -2912,13 +2916,13 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) alu.dst.write = 1; alu.dst.chan = 0; - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_0; alu.src[1].chan = 0; alu.last = 1; - r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); + r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); if (r) return r; return 0; @@ -2926,25 +2930,34 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) static int pops(struct r600_shader_ctx *ctx, int pops) { - int alu_pop = 3; - if (ctx->bc->cf_last) { - if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3) - alu_pop = 0; - else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3) - alu_pop = 1; - } - alu_pop += pops; - if (alu_pop == 1) { - ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3; - ctx->bc->force_add_cf = 1; - } else if (alu_pop == 2) { - ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3; - ctx->bc->force_add_cf = 1; - } else { - r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); + unsigned force_pop = ctx->bc->force_add_cf; + + if (!force_pop) { + int alu_pop = 3; + if (ctx->bc->cf_last) { + if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3) + alu_pop = 0; + else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3) + alu_pop = 1; + } + alu_pop += pops; + if (alu_pop == 1) { + ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3; + ctx->bc->force_add_cf = 1; + } else if (alu_pop == 2) { + ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3; + ctx->bc->force_add_cf = 1; + } else { + force_pop = 1; + } + } + + if (force_pop) { + r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); ctx->bc->cf_last->pop_count = pops; ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; } + return 0; } @@ -3011,8 +3024,8 @@ static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) { struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; - sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid, - sizeof(struct r600_bc_cf *) * (sp->num_mid + 1)); + sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid, + sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1)); sp->mid[sp->num_mid] = ctx->bc->cf_last; sp->num_mid++; } @@ -3040,14 +3053,14 @@ static void fc_poplevel(struct r600_shader_ctx *ctx) #if 0 static int emit_return(struct r600_shader_ctx *ctx) { - r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); + r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); return 0; } static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) { - r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); + r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); ctx->bc->cf_last->pop_count = pops; /* TODO work out offset */ return 0; @@ -3076,7 +3089,7 @@ static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) { emit_testflag(ctx); - r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); + r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); ctx->bc->cf_last->pop_count = 1; fc_set_mid(ctx, fc_sp); @@ -3089,7 +3102,7 @@ static int tgsi_if(struct r600_shader_ctx *ctx) { emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); - r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); + r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); fc_pushlevel(ctx, FC_IF); @@ -3099,7 +3112,7 @@ static int tgsi_if(struct r600_shader_ctx *ctx) static int tgsi_else(struct r600_shader_ctx *ctx) { - r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); + r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); ctx->bc->cf_last->pop_count = 1; fc_set_mid(ctx, ctx->bc->fc_sp); @@ -3129,7 +3142,7 @@ static int tgsi_endif(struct r600_shader_ctx *ctx) static int tgsi_bgnloop(struct r600_shader_ctx *ctx) { - r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); + r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); fc_pushlevel(ctx, FC_LOOP); @@ -3142,7 +3155,7 @@ static int tgsi_endloop(struct r600_shader_ctx *ctx) { int i; - r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); + r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { R600_ERR("loop/endloop in shader code are not paired.\n"); @@ -3182,7 +3195,7 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) return -EINVAL; } - r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); + r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); ctx->bc->cf_last->pop_count = 1; fc_set_mid(ctx, fscp); @@ -3228,7 +3241,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, - {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, @@ -3353,6 +3366,18 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, }; @@ -3386,7 +3411,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, - {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, @@ -3511,6 +3536,18 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, }; @@ -3544,7 +3581,7 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, - {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr}, {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr}, {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow}, @@ -3669,5 +3706,17 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, }; diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 3ba84bd8907..ada369ade68 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -37,9 +37,10 @@ struct r600_shader_io { struct r600_shader { unsigned processor_type; - struct r600_bc bc; + struct r600_bytecode bc; unsigned ninput; unsigned noutput; + unsigned npos; unsigned nlds; struct r600_shader_io input[32]; struct r600_shader_io output[32]; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 01406f2bad6..fba2af8a6ac 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -662,19 +662,19 @@ void r600_polygon_offset_update(struct r600_pipe_context *rctx) offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); r600_pipe_state_add_reg(&state, R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, - fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET, - fui(offset_units), 0xFFFFFFFF, NULL); + fui(offset_units), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE, - fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET, - fui(offset_units), 0xFFFFFFFF, NULL); + fui(offset_units), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&state, R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL, - offset_db_fmt_cntl, 0xFFFFFFFF, NULL); + offset_db_fmt_cntl, 0xFFFFFFFF, NULL, 0); r600_context_pipe_state_set(&rctx->ctx, &state); } } @@ -689,10 +689,10 @@ static void r600_set_blend_color(struct pipe_context *ctx, return; rstate->id = R600_PIPE_STATE_BLEND_COLOR; - r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_BLEND_COLOR]); rctx->states[R600_PIPE_STATE_BLEND_COLOR] = rstate; r600_context_pipe_state_set(&rctx->ctx, rstate); @@ -742,7 +742,7 @@ static void *r600_create_blend_state(struct pipe_context *ctx, blend->cb_target_mask = target_mask; /* MULTIWRITE_ENABLE is controlled by r600_pipe_shader_ps(). */ r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, - color_control, 0xFFFFFFFD, NULL); + color_control, 0xFFFFFFFD, NULL, 0); for (int i = 0; i < 8; i++) { /* state->rt entries > 0 only written if independent blending */ @@ -773,9 +773,9 @@ static void *r600_create_blend_state(struct pipe_context *ctx, /* R600 does not support per-MRT blends */ if (rctx->family > CHIP_R600) - r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL, 0); if (i == 0) - r600_pipe_state_add_reg(rstate, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL, 0); } return rstate; } @@ -842,28 +842,28 @@ static void *r600_create_dsa_state(struct pipe_context *ctx, S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); /* TODO db_render_override depends on query */ - r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028430_DB_STENCILREFMASK, stencil_ref_mask, - 0xFFFFFFFF & C_028430_STENCILREF, NULL); + 0xFFFFFFFF & C_028430_STENCILREF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028434_DB_STENCILREFMASK_BF, stencil_ref_mask_bf, - 0xFFFFFFFF & C_028434_STENCILREF_BF, NULL); - r600_pipe_state_add_reg(rstate, R_0286E0_SPI_FOG_FUNC_SCALE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL); + 0xFFFFFFFF & C_028434_STENCILREF_BF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286E0_SPI_FOG_FUNC_SCALE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL, 0); /* The DB_SHADER_CONTROL mask is 0xFFFFFFBC since Z_EXPORT_ENABLE, * STENCIL_EXPORT_ENABLE and KILL_ENABLE are controlled by * r600_pipe_shader_ps().*/ - r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL); - r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028D30_DB_PRELOAD_CONTROL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028D44_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028D30_DB_PRELOAD_CONTROL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028D44_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL, 0); return rstate; } @@ -907,7 +907,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx, tmp |= S_0286D4_PNT_SPRITE_TOP_1(1); } } - r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL, 0); polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL || state->fill_back != PIPE_POLYGON_MODE_FILL); @@ -921,33 +921,33 @@ static void *r600_create_rs_state(struct pipe_context *ctx, S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) | S_028814_POLY_MODE(polygon_dual_mode) | S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) | - S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL); + S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02881C_PA_CL_VS_OUT_CNTL, S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | - S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL); + S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); /* point size 12.4 fixed point */ tmp = (unsigned)(state->point_size * 8.0); - r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL, 0); tmp = (unsigned)state->line_width * 8; - r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_028A0C_PA_SC_LINE_STIPPLE, 0x00000005, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MPASS_PS_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A0C_PA_SC_LINE_STIPPLE, 0x00000005, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MPASS_PS_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL, S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL, 0); return rstate; } @@ -977,17 +977,17 @@ static void *r600_create_sampler_state(struct pipe_context *ctx, S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | S_03C000_MAX_ANISO(r600_tex_aniso_filter(state->max_anisotropy)) | S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | - S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL); + S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg_noblock(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0, S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | - S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg_noblock(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL); + S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg_noblock(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL, 0); if (uc.ui) { - r600_pipe_state_add_reg_noblock(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg_noblock(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL, 0); } return rstate; } @@ -996,10 +996,9 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c struct pipe_resource *texture, const struct pipe_sampler_view *state) { - struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view); + struct r600_pipe_sampler_view *view = CALLOC_STRUCT(r600_pipe_sampler_view); struct r600_pipe_resource_state *rstate; - const struct util_format_description *desc; - struct r600_resource_texture *tmp; + struct r600_resource_texture *tmp = (struct r600_resource_texture*)texture; struct r600_resource *rbuffer; unsigned format, endian; uint32_t word4 = 0, yuv_format = 0, pitch = 0; @@ -1007,43 +1006,42 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c struct r600_bo *bo[2]; unsigned width, height, depth, offset_level, last_level; - if (resource == NULL) + if (view == NULL) return NULL; - rstate = &resource->state; + rstate = &view->state; /* initialize base object */ - resource->base = *state; - resource->base.texture = NULL; + view->base = *state; + view->base.texture = NULL; pipe_reference(NULL, &texture->reference); - resource->base.texture = texture; - resource->base.reference.count = 1; - resource->base.context = ctx; + view->base.texture = texture; + view->base.reference.count = 1; + view->base.context = ctx; swizzle[0] = state->swizzle_r; swizzle[1] = state->swizzle_g; swizzle[2] = state->swizzle_b; swizzle[3] = state->swizzle_a; + format = r600_translate_texformat(ctx->screen, state->format, swizzle, &word4, &yuv_format); if (format == ~0) { format = 0; } - desc = util_format_description(state->format); - if (desc == NULL) { - R600_ERR("unknown format %d\n", state->format); - } - tmp = (struct r600_resource_texture *)texture; + if (tmp->depth && !tmp->is_flushing_texture) { r600_texture_depth_flush(ctx, texture, TRUE); tmp = tmp->flushed_depth_texture; } + endian = r600_colorformat_endian_swap(format); if (tmp->force_int_type) { word4 &= C_038010_NUM_FORMAT_ALL; word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); } + rbuffer = &tmp->resource; bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; @@ -1068,6 +1066,8 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c rstate->bo[0] = bo[0]; rstate->bo[1] = bo[1]; + rstate->bo_usage[0] = RADEON_USAGE_READ; + rstate->bo_usage[1] = RADEON_USAGE_READ; rstate->val[0] = (S_038000_DIM(r600_tex_dim(texture->target)) | S_038000_TILE_MODE(array_mode) | @@ -1077,8 +1077,8 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c rstate->val[1] = (S_038004_TEX_HEIGHT(height - 1) | S_038004_TEX_DEPTH(depth - 1) | S_038004_DATA_FORMAT(format)); - rstate->val[2] = (tmp->offset[offset_level] + r600_bo_offset(bo[0])) >> 8; - rstate->val[3] = (tmp->offset[offset_level+1] + r600_bo_offset(bo[1])) >> 8; + rstate->val[2] = tmp->offset[offset_level] >> 8; + rstate->val[3] = tmp->offset[offset_level+1] >> 8; rstate->val[4] = (word4 | S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | S_038010_REQUEST_SIZE(1) | @@ -1090,7 +1090,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c rstate->val[6] = (S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE) | S_038018_MAX_ANISO(4 /* max 16 samples */)); - return &resource->base; + return &view->base; } static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, @@ -1157,7 +1157,7 @@ static void r600_set_seamless_cubemap(struct r600_pipe_context *rctx, boolean en rstate->id = R600_PIPE_STATE_SEAMLESS_CUBEMAP; r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, (enable ? 0 : S_009508_DISABLE_CUBE_WRAP(1)), - 1, NULL); + 1, NULL, 0); free(rctx->states[R600_PIPE_STATE_SEAMLESS_CUBEMAP]); rctx->states[R600_PIPE_STATE_SEAMLESS_CUBEMAP] = rstate; @@ -1215,21 +1215,21 @@ static void r600_set_clip_state(struct pipe_context *ctx, for (int i = 0; i < state->nr; i++) { r600_pipe_state_add_reg(rstate, R_028E20_PA_CL_UCP0_X + i * 16, - fui(state->ucp[i][0]), 0xFFFFFFFF, NULL); + fui(state->ucp[i][0]), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028E24_PA_CL_UCP0_Y + i * 16, - fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL); + fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028E28_PA_CL_UCP0_Z + i * 16, - fui(state->ucp[i][2]), 0xFFFFFFFF, NULL); + fui(state->ucp[i][2]), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028E2C_PA_CL_UCP0_W + i * 16, - fui(state->ucp[i][3]), 0xFFFFFFFF, NULL); + fui(state->ucp[i][3]), 0xFFFFFFFF, NULL, 0); } r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, S_028810_PS_UCP_MODE(3) | ((1 << state->nr) - 1) | S_028810_ZCLIP_NEAR_DISABLE(state->depth_clamp) | - S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL); + S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_CLIP]); rctx->states[R600_PIPE_STATE_CLIP] = rstate; @@ -1260,28 +1260,28 @@ static void r600_set_scissor_state(struct pipe_context *ctx, br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); r600_pipe_state_add_reg(rstate, R_028210_PA_SC_CLIPRECT_0_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028214_PA_SC_CLIPRECT_0_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028218_PA_SC_CLIPRECT_1_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02821C_PA_SC_CLIPRECT_1_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028220_PA_SC_CLIPRECT_2_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028224_PA_SC_CLIPRECT_2_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028228_PA_SC_CLIPRECT_3_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02822C_PA_SC_CLIPRECT_3_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_SCISSOR]); rctx->states[R600_PIPE_STATE_SCISSOR] = rstate; @@ -1303,11 +1303,11 @@ static void r600_set_stencil_ref(struct pipe_context *ctx, tmp = S_028430_STENCILREF(state->ref_value[0]); r600_pipe_state_add_reg(rstate, R_028430_DB_STENCILREFMASK, tmp, - ~C_028430_STENCILREF, NULL); + ~C_028430_STENCILREF, NULL, 0); tmp = S_028434_STENCILREF_BF(state->ref_value[1]); r600_pipe_state_add_reg(rstate, R_028434_DB_STENCILREFMASK_BF, tmp, - ~C_028434_STENCILREF_BF, NULL); + ~C_028434_STENCILREF_BF, NULL, 0); free(rctx->states[R600_PIPE_STATE_STENCIL_REF]); rctx->states[R600_PIPE_STATE_STENCIL_REF] = rstate; @@ -1325,15 +1325,15 @@ static void r600_set_viewport_state(struct pipe_context *ctx, rctx->viewport = *state; rstate->id = R600_PIPE_STATE_VIEWPORT; - r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_VIEWPORT]); rctx->states[R600_PIPE_STATE_VIEWPORT] = rstate; @@ -1441,27 +1441,27 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta r600_pipe_state_add_reg(rstate, R_028040_CB_COLOR0_BASE + cb * 4, - (offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + offset >> 8, 0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_0280A0_CB_COLOR0_INFO + cb * 4, - color_info, 0xFFFFFFFF, bo[0]); + color_info, 0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028060_CB_COLOR0_SIZE + cb * 4, S_028060_PITCH_TILE_MAX(pitch) | S_028060_SLICE_TILE_MAX(slice), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028080_CB_COLOR0_VIEW + cb * 4, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0280E0_CB_COLOR0_FRAG + cb * 4, - r600_bo_offset(bo[1]) >> 8, 0xFFFFFFFF, bo[1]); + 0, 0xFFFFFFFF, bo[1], RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_0280C0_CB_COLOR0_TILE + cb * 4, - r600_bo_offset(bo[2]) >> 8, 0xFFFFFFFF, bo[2]); + 0, 0xFFFFFFFF, bo[2], RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028100_CB_COLOR0_MASK + cb * 4, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); } static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate, @@ -1492,16 +1492,16 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta format = r600_translate_dbformat(state->zsbuf->texture->format); r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE, - (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + offset >> 8, 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028000_DB_DEPTH_SIZE, S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028004_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028004_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028010_DB_DEPTH_INFO, S_028010_ARRAY_MODE(rtex->array_mode[level]) | S_028010_FORMAT(format), - 0xFFFFFFFF, rbuffer->bo); + 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT, - (surf->aligned_height / 8) - 1, 0xFFFFFFFF, NULL); + (surf->aligned_height / 8) - 1, 0xFFFFFFFF, NULL, 0); } static void r600_set_framebuffer_state(struct pipe_context *ctx, @@ -1546,59 +1546,59 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028034_PA_SC_SCREEN_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028208_PA_SC_WINDOW_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028240_PA_SC_GENERIC_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028244_PA_SC_GENERIC_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028254_PA_SC_VPORT_SCISSOR_0_BR, br, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); if (rctx->chip_class >= R700) { r600_pipe_state_add_reg(rstate, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); } r600_pipe_state_add_reg(rstate, R_0287A0_CB_SHADER_CONTROL, - shader_control, 0xFFFFFFFF, NULL); + shader_control, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028238_CB_TARGET_MASK, - 0x00000000, target_mask, NULL); + 0x00000000, target_mask, NULL, 0); r600_pipe_state_add_reg(rstate, R_02823C_CB_SHADER_MASK, - shader_mask, 0xFFFFFFFF, NULL); + shader_mask, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C30_CB_CLRCMP_CONTROL, - 0x01000000, 0xFFFFFFFF, NULL); + 0x01000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C34_CB_CLRCMP_SRC, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C38_CB_CLRCMP_DST, - 0x000000FF, 0xFFFFFFFF, NULL); + 0x000000FF, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C3C_CB_CLRCMP_MSK, - 0xFFFFFFFF, 0xFFFFFFFF, NULL); + 0xFFFFFFFF, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028C48_PA_SC_AA_MASK, - 0xFFFFFFFF, 0xFFFFFFFF, NULL); + 0xFFFFFFFF, 0xFFFFFFFF, NULL, 0); free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]); rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate; @@ -1674,7 +1674,7 @@ void r600_adjust_gprs(struct r600_pipe_context *rctx) if (rctx->chip_class >= EVERGREEN) return; - if (!rctx->ps_shader && !rctx->vs_shader) + if (!rctx->ps_shader || !rctx->vs_shader) return; if (rctx->ps_shader->shader.bc.ngpr > rctx->default_ps_gprs) @@ -1695,7 +1695,7 @@ void r600_adjust_gprs(struct r600_pipe_context *rctx) tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); rstate.nregs = 0; - r600_pipe_state_add_reg(&rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0x0FFFFFFF, NULL); + r600_pipe_state_add_reg(&rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0x0FFFFFFF, NULL, 0); r600_context_pipe_state_set(&rctx->ctx, &rstate); } @@ -1866,20 +1866,20 @@ void r600_init_config(struct r600_pipe_context *rctx) tmp |= S_008C00_VS_PRIO(vs_prio); tmp |= S_008C00_GS_PRIO(gs_prio); tmp |= S_008C00_ES_PRIO(es_prio); - r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL, 0); /* SQ_GPR_RESOURCE_MGMT_1 */ tmp = 0; tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0); /* SQ_GPR_RESOURCE_MGMT_2 */ tmp = 0; tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs); tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs); - r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0); /* SQ_THREAD_RESOURCE_MGMT */ tmp = 0; @@ -1887,78 +1887,78 @@ void r600_init_config(struct r600_pipe_context *rctx) tmp |= S_008C0C_NUM_VS_THREADS(num_vs_threads); tmp |= S_008C0C_NUM_GS_THREADS(num_gs_threads); tmp |= S_008C0C_NUM_ES_THREADS(num_es_threads); - r600_pipe_state_add_reg(rstate, R_008C0C_SQ_THREAD_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C0C_SQ_THREAD_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL, 0); /* SQ_STACK_RESOURCE_MGMT_1 */ tmp = 0; tmp |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); tmp |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); - r600_pipe_state_add_reg(rstate, R_008C10_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C10_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0); /* SQ_STACK_RESOURCE_MGMT_2 */ tmp = 0; tmp |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); tmp |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); - r600_pipe_state_add_reg(rstate, R_008C14_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C14_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0); - r600_pipe_state_add_reg(rstate, R_009714_VC_ENHANCE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009714_VC_ENHANCE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x00000000, 0xFFFFFFFF, NULL, 0); if (rctx->chip_class >= R700) { - r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, S_009508_DISABLE_CUBE_ANISO(1) | S_009508_SYNC_GRADIENT(1) | S_009508_SYNC_WALKER(1) | - S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514002, 0xFFFFFFFF, NULL); + S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514002, 0xFFFFFFFF, NULL, 0); } else { - r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, S_009508_DISABLE_CUBE_ANISO(1) | S_009508_SYNC_GRADIENT(1) | S_009508_SYNC_WALKER(1) | - S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00004012, 0xFFFFFFFF, NULL); - } - r600_pipe_state_add_reg(rstate, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288C0_SQ_FBUF_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288C4_SQ_REDUC_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288C8_SQ_GS_VERT_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AB0_VGT_STRMOUT_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000001, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028B20_VGT_STRMOUT_BUFFER_EN, 0x00000000, 0xFFFFFFFF, NULL); - - r600_pipe_state_add_reg(rstate, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A84_VGT_PRIMITIVEID_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028AA4_VGT_INSTANCE_STEP_RATE_1, 0x00000000, 0xFFFFFFFF, NULL); + S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00004012, 0xFFFFFFFF, NULL, 0); + } + r600_pipe_state_add_reg(rstate, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288C0_SQ_FBUF_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288C4_SQ_REDUC_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0288C8_SQ_GS_VERT_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AB0_VGT_STRMOUT_EN, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000001, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028B20_VGT_STRMOUT_BUFFER_EN, 0x00000000, 0xFFFFFFFF, NULL, 0); + + r600_pipe_state_add_reg(rstate, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A84_VGT_PRIMITIVEID_EN, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 0x00000000, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_028AA4_VGT_INSTANCE_STEP_RATE_1, 0x00000000, 0xFFFFFFFF, NULL, 0); r600_context_pipe_state_set(&rctx->ctx, rstate); } @@ -2022,38 +2022,38 @@ void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shad S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); } - r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028840_SQ_PGM_START_PS, - r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + 0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ); r600_pipe_state_add_reg(rstate, R_028850_SQ_PGM_RESOURCES_PS, S_028868_NUM_GPRS(rshader->bc.ngpr) | S_028868_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028854_SQ_PGM_EXPORTS_PS, - exports_ps, 0xFFFFFFFF, NULL); + exports_ps, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0288CC_SQ_PGM_CF_OFFSET_PS, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, S_028808_MULTIWRITE_ENABLE(!!rshader->fs_write_all), S_028808_MULTIWRITE_ENABLE(1), - NULL); + NULL, 0); /* only set some bits here, the other bits are set in the dsa state */ r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, S_02880C_Z_EXPORT_ENABLE(1) | S_02880C_STENCIL_REF_EXPORT_ENABLE(1) | S_02880C_KILL_ENABLE(1), - NULL); + NULL, 0); r600_pipe_state_add_reg(rstate, R_03E200_SQ_LOOP_CONST_0, 0x01000FFF, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); } void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) @@ -2062,7 +2062,7 @@ void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shad struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; unsigned spi_vs_out_id[10]; - unsigned i, tmp; + unsigned i, tmp, nparams; /* clear previous register */ rstate->nregs = 0; @@ -2081,28 +2081,36 @@ void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shad for (i = 0; i < 10; i++) { r600_pipe_state_add_reg(rstate, R_028614_SPI_VS_OUT_ID_0 + i * 4, - spi_vs_out_id[i], 0xFFFFFFFF, NULL); + spi_vs_out_id[i], 0xFFFFFFFF, NULL, 0); } + /* Certain attributes (position, psize, etc.) don't count as params. + * VS is required to export at least one param and r600_shader_from_tgsi() + * takes care of adding a dummy export. + */ + nparams = rshader->noutput - rshader->npos; + if (nparams < 1) + nparams = 1; + r600_pipe_state_add_reg(rstate, R_0286C4_SPI_VS_OUT_CONFIG, - S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), - 0xFFFFFFFF, NULL); + S_0286C4_VS_EXPORT_COUNT(nparams - 1), + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028868_SQ_PGM_RESOURCES_VS, S_028868_NUM_GPRS(rshader->bc.ngpr) | S_028868_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0288D0_SQ_PGM_CF_OFFSET_VS, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028858_SQ_PGM_START_VS, - r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + 0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ); r600_pipe_state_add_reg(rstate, R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); } void r600_fetch_shader(struct pipe_context *ctx, @@ -2115,12 +2123,12 @@ void r600_fetch_shader(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_FETCH_SHADER; rstate->nregs = 0; r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS, - 0x00000000, 0xFFFFFFFF, NULL); + 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS, - r600_bo_offset(ve->fetch_shader) >> 8, - 0xFFFFFFFF, ve->fetch_shader); + 0, + 0xFFFFFFFF, ve->fetch_shader, RADEON_USAGE_READ); } void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) @@ -2149,7 +2157,7 @@ void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, 0x0, - S_02880C_DUAL_EXPORT_ENABLE(1), NULL); + S_02880C_DUAL_EXPORT_ENABLE(1), NULL, 0); r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, S_028D0C_DEPTH_COPY_ENABLE(1) | @@ -2157,7 +2165,7 @@ void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) S_028D0C_COPY_CENTROID(1), S_028D0C_DEPTH_COPY_ENABLE(1) | S_028D0C_STENCIL_COPY_ENABLE(1) | - S_028D0C_COPY_CENTROID(1), NULL); + S_028D0C_COPY_CENTROID(1), NULL, 0); return rstate; } @@ -2178,10 +2186,12 @@ void r600_pipe_init_buffer_resource(struct r600_pipe_context *rctx, void r600_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, - unsigned offset, unsigned stride) + unsigned offset, unsigned stride, + enum radeon_bo_usage usage) { rstate->val[0] = offset; rstate->bo[0] = rbuffer->bo; + rstate->bo_usage[0] = usage; rstate->val[1] = rbuffer->bo_size - offset - 1; rstate->val[2] = S_038008_ENDIAN_SWAP(r600_endian_swap(32)) | S_038008_STRIDE(stride); diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 408eaed491b..853458f0156 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -150,7 +150,7 @@ void r600_delete_state(struct pipe_context *ctx, void *state) rctx->states[rstate->id] = NULL; } for (int i = 0; i < rstate->nregs; i++) { - r600_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL); + r600_bo_reference(&rstate->regs[i].bo, NULL); } free(rstate); } @@ -181,7 +181,7 @@ void r600_delete_vertex_element(struct pipe_context *ctx, void *state) if (rctx->vertex_elements == state) rctx->vertex_elements = NULL; - r600_bo_reference(rctx->radeon, &v->fetch_shader, NULL); + r600_bo_reference(&v->fetch_shader, NULL); u_vbuf_mgr_destroy_vertex_elements(rctx->vbuf_mgr, v->vmgr_elements); FREE(state); } @@ -336,7 +336,7 @@ static void r600_update_alpha_ref(struct r600_pipe_context *rctx) rstate.nregs = 0; if (rctx->export_16bpc) alpha_ref &= ~0x1FFF; - r600_pipe_state_add_reg(&rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL, 0); r600_context_pipe_state_set(&rctx->ctx, &rstate); rctx->alpha_ref_dirty = false; @@ -349,7 +349,7 @@ static void r600_spi_block_init(struct r600_pipe_context *rctx, struct r600_pipe rstate->nregs = 0; rstate->id = R600_PIPE_STATE_SPI; for (i = 0; i < 32; i++) { - r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, 0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, 0, 0xFFFFFFFF, NULL, 0); } } @@ -418,7 +418,6 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, } r600_upload_const_buffer(rctx, &rbuffer, &offset); - offset += r600_bo_offset(rbuffer->r.bo); switch (shader) { case PIPE_SHADER_VERTEX: @@ -426,10 +425,10 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, r600_pipe_state_add_reg(&rctx->vs_const_buffer, R_028180_ALU_CONST_BUFFER_SIZE_VS_0, ALIGN_DIVUP(buffer->width0 >> 4, 16), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&rctx->vs_const_buffer, R_028980_ALU_CONST_CACHE_VS_0, - offset >> 8, 0xFFFFFFFF, rbuffer->r.bo); + offset >> 8, 0xFFFFFFFF, rbuffer->r.bo, RADEON_USAGE_READ); r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); rstate = &rctx->vs_const_buffer_resource[index]; @@ -442,10 +441,10 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, } if (rctx->chip_class >= EVERGREEN) { - evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); + evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16, RADEON_USAGE_READ); evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); } else { - r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); + r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16, RADEON_USAGE_READ); r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); } break; @@ -454,10 +453,10 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, r600_pipe_state_add_reg(&rctx->ps_const_buffer, R_028140_ALU_CONST_BUFFER_SIZE_PS_0, ALIGN_DIVUP(buffer->width0 >> 4, 16), - 0xFFFFFFFF, NULL); + 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&rctx->ps_const_buffer, R_028940_ALU_CONST_CACHE_PS_0, - offset >> 8, 0xFFFFFFFF, rbuffer->r.bo); + offset >> 8, 0xFFFFFFFF, rbuffer->r.bo, RADEON_USAGE_READ); r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); rstate = &rctx->ps_const_buffer_resource[index]; @@ -469,10 +468,10 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, } } if (rctx->chip_class >= EVERGREEN) { - evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); + evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16, RADEON_USAGE_READ); evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); } else { - r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); + r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16, RADEON_USAGE_READ); r600_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); } break; @@ -518,7 +517,7 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) } if (vertex_buffer == NULL || rbuffer == NULL) continue; - offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); + offset += vertex_buffer->buffer_offset; if (!rstate->id) { if (rctx->chip_class >= EVERGREEN) { @@ -529,10 +528,10 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) } if (rctx->chip_class >= EVERGREEN) { - evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride); + evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ); evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); } else { - r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride); + r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ); r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); } } @@ -615,16 +614,18 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) if (rctx->vgt.id != R600_PIPE_STATE_VGT) { rctx->vgt.id = R600_PIPE_STATE_VGT; rctx->vgt.nregs = 0; - r600_pipe_state_add_reg(&rctx->vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vgt, R_03CFF4_SQ_VTX_START_INST_LOC, draw.info.start_instance, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(&rctx->vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(&rctx->vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(&rctx->vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(&rctx->vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(&rctx->vgt, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, draw.info.restart_index, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(&rctx->vgt, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, draw.info.primitive_restart, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(&rctx->vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL, 0); + r600_pipe_state_add_reg(&rctx->vgt, R_03CFF4_SQ_VTX_START_INST_LOC, draw.info.start_instance, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&rctx->vgt, R_028814_PA_SU_SC_MODE_CNTL, 0, - S_028814_PROVOKING_VTX_LAST(1), NULL); + S_028814_PROVOKING_VTX_LAST(1), NULL, 0); } @@ -634,6 +635,8 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) r600_pipe_state_mod_reg(&rctx->vgt, draw.info.max_index); r600_pipe_state_mod_reg(&rctx->vgt, draw.info.min_index); r600_pipe_state_mod_reg(&rctx->vgt, draw.info.index_bias); + r600_pipe_state_mod_reg(&rctx->vgt, draw.info.restart_index); + r600_pipe_state_mod_reg(&rctx->vgt, draw.info.primitive_restart); r600_pipe_state_mod_reg(&rctx->vgt, 0); r600_pipe_state_mod_reg(&rctx->vgt, draw.info.start_instance); if (draw.info.mode == PIPE_PRIM_QUADS || draw.info.mode == PIPE_PRIM_QUAD_STRIP || draw.info.mode == PIPE_PRIM_POLYGON) { @@ -676,11 +679,14 @@ void _r600_pipe_state_add_reg(struct r600_context *ctx, struct r600_pipe_state *state, u32 offset, u32 value, u32 mask, u32 range_id, u32 block_id, - struct r600_bo *bo) + struct r600_bo *bo, + enum radeon_bo_usage usage) { struct r600_range *range; struct r600_block *block; + if (bo) assert(usage); + range = &ctx->range[range_id]; block = range->blocks[block_id]; state->regs[state->nregs].block = block; @@ -689,6 +695,7 @@ void _r600_pipe_state_add_reg(struct r600_context *ctx, state->regs[state->nregs].value = value; state->regs[state->nregs].mask = mask; state->regs[state->nregs].bo = bo; + state->regs[state->nregs].bo_usage = usage; state->nregs++; assert(state->nregs < R600_BLOCK_MAX_REG); @@ -696,13 +703,17 @@ void _r600_pipe_state_add_reg(struct r600_context *ctx, void r600_pipe_state_add_reg_noblock(struct r600_pipe_state *state, u32 offset, u32 value, u32 mask, - struct r600_bo *bo) + struct r600_bo *bo, + enum radeon_bo_usage usage) { + if (bo) assert(usage); + state->regs[state->nregs].id = offset; state->regs[state->nregs].block = NULL; state->regs[state->nregs].value = value; state->regs[state->nregs].mask = mask; state->regs[state->nregs].bo = bo; + state->regs[state->nregs].bo_usage = usage; state->nregs++; assert(state->nregs < R600_BLOCK_MAX_REG); diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index e9e8b277243..7c1bd9d8ec6 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -31,7 +31,6 @@ #include <util/u_math.h> #include <util/u_inlines.h> #include <util/u_memory.h> -#include "state_tracker/drm_driver.h" #include "pipebuffer/pb_buffer.h" #include "r600_pipe.h" #include "r600_resource.h" @@ -67,7 +66,7 @@ static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600 rtransfer->staging_texture, 0, &sbox); - ctx->flush(ctx, NULL); + r600_flush(ctx, NULL, RADEON_FLUSH_ASYNC); } unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, @@ -174,15 +173,15 @@ static unsigned r600_texture_get_nblocksx(struct pipe_screen *screen, { struct pipe_resource *ptex = &rtex->resource.b.b.b; unsigned nblocksx, block_align, width; - unsigned blocksize = util_format_get_blocksize(ptex->format); + unsigned blocksize = util_format_get_blocksize(rtex->real_format); if (rtex->pitch_override) return rtex->pitch_override / blocksize; width = mip_minify(ptex->width0, level); - nblocksx = util_format_get_nblocksx(ptex->format, width); + nblocksx = util_format_get_nblocksx(rtex->real_format, width); - block_align = r600_get_block_alignment(screen, ptex->format, + block_align = r600_get_block_alignment(screen, rtex->real_format, rtex->array_mode[level]); nblocksx = align(nblocksx, block_align); return nblocksx; @@ -196,9 +195,19 @@ static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen, unsigned height, tile_height; height = mip_minify(ptex->height0, level); - height = util_format_get_nblocksy(ptex->format, height); + height = util_format_get_nblocksy(rtex->real_format, height); tile_height = r600_get_height_alignment(screen, rtex->array_mode[level]); + + /* XXX Hack around an alignment issue. Less tests fail with this. + * + * The thing is depth-stencil buffers should be tiled, i.e. + * the alignment should be >=8. If I make them tiled, stencil starts + * working because it no longer overlaps with the depth buffer + * in memory, but texturing like drawpix-stencil breaks. */ + if (util_format_is_depth_or_stencil(rtex->real_format) && tile_height < 8) + tile_height = 8; + height = align(height, tile_height); return height; } @@ -221,7 +230,7 @@ static void r600_texture_set_array_mode(struct pipe_screen *screen, unsigned w, h, tile_height, tile_width; tile_height = r600_get_height_alignment(screen, array_mode); - tile_width = r600_get_block_alignment(screen, ptex->format, array_mode); + tile_width = r600_get_block_alignment(screen, rtex->real_format, array_mode); w = mip_minify(ptex->width0, level); h = mip_minify(ptex->height0, level); @@ -239,14 +248,14 @@ static void r600_setup_miptree(struct pipe_screen *screen, unsigned array_mode) { struct pipe_resource *ptex = &rtex->resource.b.b.b; - struct radeon *radeon = (struct radeon *)screen->winsys; + struct radeon *radeon = ((struct r600_screen*)screen)->radeon; enum chip_class chipc = r600_get_family_class(radeon); unsigned size, layer_size, i, offset; - unsigned nblocksx, nblocksy, extra_size = 0; + unsigned nblocksx, nblocksy; for (i = 0, offset = 0; i <= ptex->last_level; i++) { - unsigned blocksize = util_format_get_blocksize(ptex->format); - unsigned base_align = r600_get_base_alignment(screen, ptex->format, array_mode); + unsigned blocksize = util_format_get_blocksize(rtex->real_format); + unsigned base_align = r600_get_base_alignment(screen, rtex->real_format, array_mode); r600_texture_set_array_mode(screen, rtex, i, array_mode); @@ -265,10 +274,6 @@ static void r600_setup_miptree(struct pipe_screen *screen, else size = layer_size * ptex->array_size; - /* evergreen stores depth and stencil separately */ - if ((chipc >= EVERGREEN) && util_format_is_depth_or_stencil(ptex->format)) - extra_size = align(extra_size + (nblocksx * nblocksy * 1), base_align); - /* align base image and start of miptree */ if ((i == 0) || (i == 1)) offset = align(offset, base_align); @@ -279,7 +284,7 @@ static void r600_setup_miptree(struct pipe_screen *screen, offset += size; } - rtex->size = offset + extra_size; + rtex->size = offset; } /* Figure out whether u_blitter will fallback to a transfer operation. @@ -329,7 +334,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, { struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; struct r600_resource *resource = &rtex->resource; - struct radeon *radeon = (struct radeon *)screen->winsys; + struct radeon *radeon = ((struct r600_screen*)screen)->radeon; return r600_bo_get_winsys_handle(radeon, resource->bo, rtex->pitch_in_bytes[0], whandle); @@ -340,13 +345,12 @@ static void r600_texture_destroy(struct pipe_screen *screen, { struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; struct r600_resource *resource = &rtex->resource; - struct radeon *radeon = (struct radeon *)screen->winsys; if (rtex->flushed_depth_texture) pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); if (resource->bo) { - r600_bo_reference(radeon, &resource->bo, NULL); + r600_bo_reference(&resource->bo, NULL); } FREE(rtex); } @@ -369,11 +373,12 @@ r600_texture_create_object(struct pipe_screen *screen, unsigned array_mode, unsigned pitch_in_bytes_override, unsigned max_buffer_size, - struct r600_bo *bo) + struct r600_bo *bo, + boolean alloc_bo) { struct r600_resource_texture *rtex; struct r600_resource *resource; - struct radeon *radeon = (struct radeon *)screen->winsys; + struct radeon *radeon = ((struct r600_screen*)screen)->radeon; rtex = CALLOC_STRUCT(r600_resource_texture); if (rtex == NULL) @@ -386,59 +391,112 @@ r600_texture_create_object(struct pipe_screen *screen, resource->b.b.b.screen = screen; resource->bo = bo; rtex->pitch_override = pitch_in_bytes_override; + rtex->real_format = base->format; + + /* We must split depth and stencil into two separate buffers on Evergreen. */ + if (!(base->flags & R600_RESOURCE_FLAG_TRANSFER) && + r600_get_family_class(((struct r600_screen*)screen)->radeon) >= EVERGREEN && + util_format_is_depth_and_stencil(base->format)) { + struct pipe_resource stencil; + unsigned stencil_pitch_override = 0; + + switch (base->format) { + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + rtex->real_format = PIPE_FORMAT_Z24X8_UNORM; + break; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + rtex->real_format = PIPE_FORMAT_X8Z24_UNORM; + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + rtex->real_format = PIPE_FORMAT_Z32_FLOAT; + break; + default: + assert(0); + FREE(rtex); + return NULL; + } + + /* Divide the pitch in bytes by 4 for stencil, because it has a smaller pixel size. */ + if (pitch_in_bytes_override) { + assert(base->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED || + base->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM); + stencil_pitch_override = pitch_in_bytes_override / 4; + } + + /* Allocate the stencil buffer. */ + stencil = *base; + stencil.format = PIPE_FORMAT_S8_USCALED; + rtex->stencil = r600_texture_create_object(screen, &stencil, array_mode, + stencil_pitch_override, + max_buffer_size, NULL, FALSE); + if (!rtex->stencil) { + FREE(rtex); + return NULL; + } + /* Proceed in creating the depth buffer. */ + } + /* only mark depth textures the HW can hit as depth textures */ - if (util_format_is_depth_or_stencil(base->format) && permit_hardware_blit(screen, base)) + if (util_format_is_depth_or_stencil(rtex->real_format) && permit_hardware_blit(screen, base)) rtex->depth = 1; r600_setup_miptree(screen, rtex, array_mode); + /* If we initialized separate stencil for Evergreen. place it after depth. */ + if (rtex->stencil) { + unsigned stencil_align, stencil_offset; + + stencil_align = r600_get_base_alignment(screen, rtex->stencil->real_format, array_mode); + stencil_offset = align(rtex->size, stencil_align); + + for (unsigned i = 0; i <= rtex->stencil->resource.b.b.b.last_level; i++) + rtex->stencil->offset[i] += stencil_offset; + + rtex->size = stencil_offset + rtex->stencil->size; + } + resource->size = rtex->size; - if (!resource->bo) { + /* Now create the backing buffer. */ + if (!resource->bo && alloc_bo) { struct pipe_resource *ptex = &rtex->resource.b.b.b; - int base_align = r600_get_base_alignment(screen, ptex->format, array_mode); + unsigned base_align = r600_get_base_alignment(screen, ptex->format, array_mode); resource->bo = r600_bo(radeon, rtex->size, base_align, base->bind, base->usage); if (!resource->bo) { + pipe_resource_reference((struct pipe_resource**)&rtex->stencil, NULL); FREE(rtex); return NULL; } } + + if (rtex->stencil) + rtex->stencil->resource.bo = rtex->resource.bo; return rtex; } +DEBUG_GET_ONCE_BOOL_OPTION(tiling_enabled, "R600_TILING", FALSE); + struct pipe_resource *r600_texture_create(struct pipe_screen *screen, const struct pipe_resource *templ) { + struct radeon *radeon = ((struct r600_screen*)screen)->radeon; unsigned array_mode = 0; - static int force_tiling = -1; - /* Would like some magic "get_bool_option_once" routine. - */ - if (force_tiling == -1) { -#if 0 - /* reenable when 2D tiling is fixed better */ - struct r600_screen *rscreen = (struct r600_screen *)screen; - if (r600_get_minor_version(rscreen->radeon) >= 9) - force_tiling = debug_get_bool_option("R600_TILING", TRUE); -#endif - force_tiling = debug_get_bool_option("R600_TILING", FALSE); - } - - if (force_tiling && permit_hardware_blit(screen, templ)) { - if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) && - !(templ->bind & PIPE_BIND_SCANOUT)) { + if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) && + !(templ->bind & PIPE_BIND_SCANOUT)) { + if (util_format_is_compressed(templ->format)) { + array_mode = V_038000_ARRAY_1D_TILED_THIN1; + } + else if (debug_get_option_tiling_enabled() && + r600_get_minor_version(radeon) >= 9 && + permit_hardware_blit(screen, templ)) { array_mode = V_038000_ARRAY_2D_TILED_THIN1; } } - if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) && - util_format_is_compressed(templ->format)) - array_mode = V_038000_ARRAY_1D_TILED_THIN1; - return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode, - 0, 0, NULL); - + 0, 0, NULL, TRUE); } static struct pipe_surface *r600_create_surface(struct pipe_context *pipe, @@ -483,8 +541,9 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, const struct pipe_resource *templ, struct winsys_handle *whandle) { - struct radeon *rw = (struct radeon*)screen->winsys; + struct radeon *rw = ((struct r600_screen*)screen)->radeon; struct r600_bo *bo = NULL; + unsigned stride = 0; unsigned array_mode = 0; /* Support only 2D textures without mipmaps */ @@ -492,15 +551,13 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, templ->depth0 != 1 || templ->last_level != 0) return NULL; - bo = r600_bo_handle(rw, whandle->handle, &array_mode); + bo = r600_bo_handle(rw, whandle, &stride, &array_mode); if (bo == NULL) { return NULL; } return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode, - whandle->stride, - 0, - bo); + stride, 0, bo, FALSE); } int r600_texture_depth_flush(struct pipe_context *ctx, @@ -590,6 +647,9 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, (texture->flags & R600_RESOURCE_FLAG_TRANSFER)) use_staging_texture = FALSE; + if (use_staging_texture && (usage & PIPE_TRANSFER_MAP_DIRECTLY)) + return NULL; + trans = CALLOC_STRUCT(r600_transfer); if (trans == NULL) return NULL; @@ -648,7 +708,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, if (usage & PIPE_TRANSFER_READ) { r600_copy_to_staging_texture(ctx, trans); /* Always referenced in the blit. */ - ctx->flush(ctx, NULL); + r600_flush(ctx, NULL, 0); } return &trans->transfer; } @@ -684,10 +744,11 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx, void* r600_texture_transfer_map(struct pipe_context *ctx, struct pipe_transfer* transfer) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; struct r600_bo *bo; enum pipe_format format = transfer->resource->format; - struct radeon *radeon = (struct radeon *)ctx->screen->winsys; + struct radeon *radeon = rctx->screen->radeon; unsigned offset = 0; char *map; @@ -706,7 +767,7 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); } - if (!(map = r600_bo_map(radeon, bo, transfer->usage, ctx))) { + if (!(map = r600_bo_map(radeon, bo, rctx->ctx.cs, transfer->usage))) { return NULL; } @@ -717,7 +778,7 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer* transfer) { struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; - struct radeon *radeon = (struct radeon *)ctx->screen->winsys; + struct radeon *radeon = ((struct r600_screen*)ctx->screen)->radeon; struct r600_bo *bo; if (rtransfer->staging_texture) { @@ -754,11 +815,7 @@ static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format, }; if (swizzle_view) { - /* Combine two sets of swizzles. */ - for (i = 0; i < 4; i++) { - swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ? - swizzle_format[swizzle_view[i]] : swizzle_view[i]; - } + util_format_compose_swizzles(swizzle_format, swizzle_view, swizzle); } else { memcpy(swizzle, swizzle_format, 4); } diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index f6eec24cc05..de458cf398a 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -28,6 +28,32 @@ #define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7 +/* evergreen values */ +#define EG_RESOURCE_OFFSET 0x00030000 +#define EG_RESOURCE_END 0x00034000 +#define EG_LOOP_CONST_OFFSET 0x0003A200 +#define EG_LOOP_CONST_END 0x0003A26C +#define EG_BOOL_CONST_OFFSET 0x0003A500 +#define EG_BOOL_CONST_END 0x0003A506 + +#define R600_CONFIG_REG_OFFSET 0X00008000 +#define R600_CONFIG_REG_END 0X0000AC00 +#define R600_CONTEXT_REG_OFFSET 0X00028000 +#define R600_CONTEXT_REG_END 0X00029000 +#define R600_ALU_CONST_OFFSET 0X00030000 +#define R600_ALU_CONST_END 0X00032000 +#define R600_RESOURCE_OFFSET 0X00038000 +#define R600_RESOURCE_END 0X0003C000 +#define R600_SAMPLER_OFFSET 0X0003C000 +#define R600_SAMPLER_END 0X0003CFF0 +#define R600_CTL_CONST_OFFSET 0X0003CFF0 +#define R600_CTL_CONST_END 0X0003E200 +#define R600_LOOP_CONST_OFFSET 0X0003E200 +#define R600_LOOP_CONST_END 0X0003E380 +#define R600_BOOL_CONST_OFFSET 0X0003E380 +#define R600_BOOL_CONST_END 0X00040000 + + #define PKT3_NOP 0x10 #define PKT3_INDIRECT_BUFFER_END 0x17 #define PKT3_SET_PREDICATION 0x20 @@ -66,11 +92,38 @@ #define PKT3_SET_SAMPLER 0x6E #define PKT3_SET_CTL_CONST 0x6F #define PKT3_SURFACE_BASE_UPDATE 0x73 +#define SURFACE_BASE_UPDATE_DEPTH (1 << 0) +#define SURFACE_BASE_UPDATE_COLOR(x) (2 << (x)) +#define SURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x)) + +#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10 +#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14 +#define EVENT_TYPE_ZPASS_DONE 0x15 +#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 +#define EVENT_TYPE(x) ((x) << 0) +#define EVENT_INDEX(x) ((x) << 8) + /* 0 - any non-TS event + * 1 - ZPASS_DONE + * 2 - SAMPLE_PIPELINESTAT + * 3 - SAMPLE_STREAMOUTSTAT* + * 4 - *S_PARTIAL_FLUSH + * 5 - TS events + */ #define PREDICATION_OP_CLEAR 0x0 #define PREDICATION_OP_ZPASS 0x1 #define PREDICATION_OP_PRIMCOUNT 0x2 +#define PRED_OP(x) ((x) << 16) + +#define PREDICATION_CONTINUE (1 << 31) + +#define PREDICATION_HINT_WAIT (0 << 12) +#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12) + +#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8) +#define PREDICATION_DRAW_VISIBLE (1 << 8) + #define PKT_TYPE_S(x) (((x) & 0x3) << 30) #define PKT_TYPE_G(x) (((x) >> 30) & 0x3) #define PKT_TYPE_C 0x3FFFFFFF @@ -83,8 +136,9 @@ #define PKT3_IT_OPCODE_S(x) (((x) & 0xFF) << 8) #define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF) #define PKT3_IT_OPCODE_C 0xFFFF00FF +#define PKT3_PRED_S(x) (((x) >> 0) & 0x1) #define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count)) -#define PKT3(op, count) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count)) +#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PRED_S(predicate)) /* Registers */ #define R_008C00_SQ_CONFIG 0x00008C00 diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c index b3c7d1494fc..74efe226530 100644 --- a/src/gallium/drivers/r600/r700_asm.c +++ b/src/gallium/drivers/r600/r700_asm.c @@ -26,7 +26,7 @@ #include "r600_asm.h" #include "r700_sq.h" -void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf) +void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf) { unsigned count = (cf->ndw / 4) - 1; *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); @@ -36,7 +36,7 @@ void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf) S_SQ_CF_WORD1_COUNT_3(count >> 3); } -int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) +int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id) { bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) | diff --git a/src/gallium/drivers/softpipe/Android.mk b/src/gallium/drivers/softpipe/Android.mk new file mode 100644 index 00000000000..d198fa5d0f2 --- /dev/null +++ b/src/gallium/drivers/softpipe/Android.mk @@ -0,0 +1,67 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +# from Makefile +C_SOURCES = \ + sp_fs_exec.c \ + sp_fs_sse.c \ + sp_clear.c \ + sp_fence.c \ + sp_flush.c \ + sp_query.c \ + sp_context.c \ + sp_draw_arrays.c \ + sp_prim_vbuf.c \ + sp_quad_pipe.c \ + sp_quad_stipple.c \ + sp_quad_depth_test.c \ + sp_quad_fs.c \ + sp_quad_blend.c \ + sp_screen.c \ + sp_setup.c \ + sp_state_blend.c \ + sp_state_clip.c \ + sp_state_derived.c \ + sp_state_sampler.c \ + sp_state_shader.c \ + sp_state_so.c \ + sp_state_rasterizer.c \ + sp_state_surface.c \ + sp_state_vertex.c \ + sp_texture.c \ + sp_tex_sample.c \ + sp_tex_tile_cache.c \ + sp_tile_cache.c \ + sp_surface.c + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + $(C_SOURCES) + +LOCAL_MODULE := libmesa_pipe_softpipe + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index ae3f00f3387..22e8a2e5817 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -60,7 +60,7 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, return; #if 0 - softpipe_update_derived(softpipe); /* not needed?? */ + softpipe_update_derived(softpipe, PIPE_PRIM_TRIANGLES); /* not needed?? */ #endif if (buffers & PIPE_CLEAR_COLOR) { diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 2c43602ea1c..c97b0333035 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -35,6 +35,7 @@ #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_pstipple.h" #include "util/u_inlines.h" #include "tgsi/tgsi_exec.h" #include "vl/vl_decoder.h" @@ -90,6 +91,14 @@ softpipe_destroy( struct pipe_context *pipe ) struct softpipe_context *softpipe = softpipe_context( pipe ); uint i; +#if DO_PSTIPPLE_IN_HELPER_MODULE + if (softpipe->pstipple.sampler) + pipe->delete_sampler_state(pipe, softpipe->pstipple.sampler); + + pipe_resource_reference(&softpipe->pstipple.texture, NULL); + pipe_sampler_view_reference(&softpipe->pstipple.sampler_view, NULL); +#endif + if (softpipe->draw) draw_destroy( softpipe->draw ); @@ -346,6 +355,11 @@ softpipe_create_context( struct pipe_screen *screen, sp_init_surface_functions(softpipe); +#if DO_PSTIPPLE_IN_HELPER_MODULE + /* create the polgon stipple sampler */ + softpipe->pstipple.sampler = util_pstipple_create_sampler(&softpipe->pipe); +#endif + return &softpipe->pipe; fail: diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index a572ee8cf00..410b0a65792 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -38,8 +38,11 @@ #include "sp_quad_pipe.h" -/** Do polygon stipple in the driver here, or in the draw module? */ -#define DO_PSTIPPLE_IN_DRAW_MODULE 1 +/** Do polygon stipple in the draw module? */ +#define DO_PSTIPPLE_IN_DRAW_MODULE 0 + +/** Do polygon stipple with the util module? */ +#define DO_PSTIPPLE_IN_HELPER_MODULE 1 struct softpipe_vbuf_render; @@ -64,6 +67,7 @@ struct softpipe_context { struct pipe_depth_stencil_alpha_state *depth_stencil; struct pipe_rasterizer_state *rasterizer; struct sp_fragment_shader *fs; + struct sp_fragment_shader_variant *fs_variant; struct sp_vertex_shader *vs; struct sp_geometry_shader *gs; struct sp_velems_state *velems; @@ -143,6 +147,13 @@ struct softpipe_context { struct pipe_query *render_cond_query; uint render_cond_mode; + /** Polygon stipple items */ + struct { + struct pipe_resource *texture; + struct pipe_sampler_state *sampler; + struct pipe_sampler_view *sampler_view; + } pstipple; + /** Software quad rendering pipeline */ struct { struct quad_stage *shade; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 01b4ca985d0..69b5b96b4fd 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -64,7 +64,7 @@ softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode) sp->reduced_api_prim = u_reduced_prim(mode); if (sp->dirty) { - softpipe_update_derived(sp); + softpipe_update_derived(sp, sp->reduced_api_prim); } softpipe_map_transfers(sp); @@ -122,7 +122,7 @@ softpipe_draw_vbo(struct pipe_context *pipe, sp->reduced_api_prim = u_reduced_prim(info->mode); if (sp->dirty) { - softpipe_update_derived(sp); + softpipe_update_derived(sp, sp->reduced_api_prim); } softpipe_map_transfers(sp); diff --git a/src/gallium/drivers/softpipe/sp_fs.h b/src/gallium/drivers/softpipe/sp_fs.h index 4792ace3a33..d46d7d5a657 100644 --- a/src/gallium/drivers/softpipe/sp_fs.h +++ b/src/gallium/drivers/softpipe/sp_fs.h @@ -31,17 +31,15 @@ #ifndef SP_FS_H #define SP_FS_H -struct sp_fragment_shader * -softpipe_create_fs_exec(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ); -struct sp_fragment_shader * -softpipe_create_fs_sse(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ); +struct sp_fragment_shader_variant * +softpipe_create_fs_variant_exec(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ); + +struct sp_fragment_shader_variant * +softpipe_create_fs_variant_sse(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ); -struct sp_fragment_shader * -softpipe_create_fs_llvm(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ); struct tgsi_interp_coef; struct tgsi_exec_vector; diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 346e1b402ba..779b8c4995c 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -42,25 +42,25 @@ /** - * Subclass of sp_fragment_shader + * Subclass of sp_fragment_shader_variant */ struct sp_exec_fragment_shader { - struct sp_fragment_shader base; + struct sp_fragment_shader_variant base; /* No other members for now */ }; /** cast wrapper */ static INLINE struct sp_exec_fragment_shader * -sp_exec_fragment_shader(const struct sp_fragment_shader *base) +sp_exec_fragment_shader(const struct sp_fragment_shader_variant *var) { - return (struct sp_exec_fragment_shader *) base; + return (struct sp_exec_fragment_shader *) var; } static void -exec_prepare( const struct sp_fragment_shader *base, +exec_prepare( const struct sp_fragment_shader_variant *var, struct tgsi_exec_machine *machine, struct tgsi_sampler **samplers ) { @@ -68,9 +68,9 @@ exec_prepare( const struct sp_fragment_shader *base, * Bind tokens/shader to the interpreter's machine state. * Avoid redundant binding. */ - if (machine->Tokens != base->shader.tokens) { + if (machine->Tokens != var->tokens) { tgsi_exec_machine_bind_shader( machine, - base->shader.tokens, + var->tokens, PIPE_MAX_SAMPLERS, samplers ); } @@ -118,7 +118,7 @@ setup_pos_vector(const struct tgsi_interp_coef *coef, * interface: */ static unsigned -exec_run( const struct sp_fragment_shader *base, +exec_run( const struct sp_fragment_shader_variant *var, struct tgsi_exec_machine *machine, struct quad_header *quad ) { @@ -136,9 +136,9 @@ exec_run( const struct sp_fragment_shader *base, /* store outputs */ { - const ubyte *sem_name = base->info.output_semantic_name; - const ubyte *sem_index = base->info.output_semantic_index; - const uint n = base->info.num_outputs; + const ubyte *sem_name = var->info.output_semantic_name; + const ubyte *sem_index = var->info.output_semantic_index; + const uint n = var->info.num_outputs; uint i; for (i = 0; i < n; i++) { switch (sem_name[i]) { @@ -180,29 +180,23 @@ exec_run( const struct sp_fragment_shader *base, static void -exec_delete( struct sp_fragment_shader *base ) +exec_delete( struct sp_fragment_shader_variant *var ) { - FREE((void *) base->shader.tokens); - FREE(base); + FREE( (void *) var->tokens ); + FREE(var); } -struct sp_fragment_shader * -softpipe_create_fs_exec(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ) +struct sp_fragment_shader_variant * +softpipe_create_fs_variant_exec(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ) { struct sp_exec_fragment_shader *shader; - /* Decide whether we'll be codegenerating this shader and if so do - * that now. - */ - shader = CALLOC_STRUCT(sp_exec_fragment_shader); if (!shader) return NULL; - /* we need to keep a local copy of the tokens */ - shader->base.shader.tokens = tgsi_dup_tokens(templ->tokens); shader->base.prepare = exec_prepare; shader->base.run = exec_run; shader->base.delete = exec_delete; diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 5b18cd035e3..c873af125bd 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -48,11 +48,11 @@ /** - * Subclass of sp_fragment_shader + * Subclass of sp_fragment_shader_variant */ struct sp_sse_fragment_shader { - struct sp_fragment_shader base; + struct sp_fragment_shader_variant base; struct x86_function sse2_program; tgsi_sse2_fs_function func; float immediates[TGSI_EXEC_NUM_IMMEDIATES][4]; @@ -61,14 +61,14 @@ struct sp_sse_fragment_shader /** cast wrapper */ static INLINE struct sp_sse_fragment_shader * -sp_sse_fragment_shader(const struct sp_fragment_shader *base) +sp_sse_fragment_shader(const struct sp_fragment_shader_variant *base) { return (struct sp_sse_fragment_shader *) base; } static void -fs_sse_prepare( const struct sp_fragment_shader *base, +fs_sse_prepare( const struct sp_fragment_shader_variant *base, struct tgsi_exec_machine *machine, struct tgsi_sampler **samplers ) { @@ -119,7 +119,7 @@ setup_pos_vector(const struct tgsi_interp_coef *coef, * TODO: process >1 quad at a time */ static unsigned -fs_sse_run( const struct sp_fragment_shader *base, +fs_sse_run( const struct sp_fragment_shader_variant *base, struct tgsi_exec_machine *machine, struct quad_header *quad ) { @@ -189,7 +189,7 @@ fs_sse_run( const struct sp_fragment_shader *base, static void -fs_sse_delete( struct sp_fragment_shader *base ) +fs_sse_delete( struct sp_fragment_shader_variant *base ) { struct sp_sse_fragment_shader *shader = sp_sse_fragment_shader(base); @@ -198,9 +198,9 @@ fs_sse_delete( struct sp_fragment_shader *base ) } -struct sp_fragment_shader * -softpipe_create_fs_sse(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ) +struct sp_fragment_shader_variant * +softpipe_create_fs_variant_sse(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ) { struct sp_sse_fragment_shader *shader; @@ -226,7 +226,6 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe, return NULL; } - shader->base.shader.tokens = NULL; /* don't hold reference to templ->tokens */ shader->base.prepare = fs_sse_prepare; shader->base.run = fs_sse_run; shader->base.delete = fs_sse_delete; @@ -239,9 +238,9 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe, /* Maybe put this variant in the header file. */ -struct sp_fragment_shader * -softpipe_create_fs_sse(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ) +struct sp_fragment_shader_variant * +softpipe_create_fs_variant_sse(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ) { return NULL; } diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 76cfc0bf51c..c881194768a 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -240,6 +240,7 @@ blend_quad(struct quad_stage *qs, static const float one[4] = { 1, 1, 1, 1 }; struct softpipe_context *softpipe = qs->softpipe; float source[4][QUAD_SIZE] = { { 0 } }; + float blend_dest[4][QUAD_SIZE]; /* * Compute src/first term RGB @@ -480,79 +481,85 @@ blend_quad(struct quad_stage *qs, assert(0 && "invalid alpha src factor"); } + /* Save the original dest for use in masking */ + VEC4_COPY(blend_dest[0], dest[0]); + VEC4_COPY(blend_dest[1], dest[1]); + VEC4_COPY(blend_dest[2], dest[2]); + VEC4_COPY(blend_dest[3], dest[3]); + /* - * Compute dest/second term RGB + * Compute blend_dest/second term RGB */ switch (softpipe->blend->rt[blend_index].rgb_dst_factor) { case PIPE_BLENDFACTOR_ONE: - /* dest = dest * 1 NO-OP, leave dest as-is */ + /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */ break; case PIPE_BLENDFACTOR_SRC_COLOR: - VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */ - VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */ - VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */ + VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[0]); /* R */ + VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[1]); /* G */ + VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[2]); /* B */ break; case PIPE_BLENDFACTOR_SRC_ALPHA: - VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */ - VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */ - VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */ + VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[3]); /* R * A */ + VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[3]); /* G * A */ + VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[3]); /* B * A */ break; case PIPE_BLENDFACTOR_DST_ALPHA: if (has_dst_alpha) { - VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ - VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ - VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ + VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[3]); /* R * A */ + VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[3]); /* G * A */ + VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[3]); /* B * A */ } else { - /* dest = dest * 1 NO-OP, leave dest as-is */ + /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */ } break; case PIPE_BLENDFACTOR_DST_COLOR: - VEC4_MUL(dest[0], dest[0], dest[0]); /* R */ - VEC4_MUL(dest[1], dest[1], dest[1]); /* G */ - VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ + VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[0]); /* R */ + VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[1]); /* G */ + VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[2]); /* B */ break; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: if (has_dst_alpha) { const float *alpha = quadColor[3]; float diff[4], temp[4]; - VEC4_SUB(diff, one, dest[3]); + VEC4_SUB(diff, one, blend_dest[3]); VEC4_MIN(temp, alpha, diff); - VEC4_MUL(dest[0], quadColor[0], temp); /* R */ - VEC4_MUL(dest[1], quadColor[1], temp); /* G */ - VEC4_MUL(dest[2], quadColor[2], temp); /* B */ + VEC4_MUL(blend_dest[0], quadColor[0], temp); /* R */ + VEC4_MUL(blend_dest[1], quadColor[1], temp); /* G */ + VEC4_MUL(blend_dest[2], quadColor[2], temp); /* B */ } else { - VEC4_COPY(dest[0], zero); /* R */ - VEC4_COPY(dest[1], zero); /* G */ - VEC4_COPY(dest[2], zero); /* B */ + VEC4_COPY(blend_dest[0], zero); /* R */ + VEC4_COPY(blend_dest[1], zero); /* G */ + VEC4_COPY(blend_dest[2], zero); /* B */ } break; case PIPE_BLENDFACTOR_CONST_COLOR: { float comp[4]; VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ - VEC4_MUL(dest[0], dest[0], comp); /* R */ + VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */ VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ - VEC4_MUL(dest[1], dest[1], comp); /* G */ + VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */ VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ - VEC4_MUL(dest[2], dest[2], comp); /* B */ + VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */ } break; case PIPE_BLENDFACTOR_CONST_ALPHA: { float comp[4]; VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ - VEC4_MUL(dest[0], dest[0], comp); /* R */ - VEC4_MUL(dest[1], dest[1], comp); /* G */ - VEC4_MUL(dest[2], dest[2], comp); /* B */ + VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */ + VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */ + VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */ } break; case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(dest[0], zero); /* R */ - VEC4_COPY(dest[1], zero); /* G */ - VEC4_COPY(dest[2], zero); /* B */ + VEC4_COPY(blend_dest[0], zero); /* R */ + VEC4_COPY(blend_dest[1], zero); /* G */ + VEC4_COPY(blend_dest[2], zero); /* B */ break; case PIPE_BLENDFACTOR_SRC1_COLOR: case PIPE_BLENDFACTOR_SRC1_ALPHA: @@ -563,45 +570,45 @@ blend_quad(struct quad_stage *qs, { float inv_comp[4]; VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ - VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ + VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */ VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ - VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ + VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */ VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ - VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */ } break; case PIPE_BLENDFACTOR_INV_SRC_ALPHA: { float one_minus_alpha[QUAD_SIZE]; VEC4_SUB(one_minus_alpha, one, quadColor[3]); - VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ - VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ - VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ + VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */ + VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */ + VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */ } break; case PIPE_BLENDFACTOR_INV_DST_ALPHA: if (has_dst_alpha) { float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[3]); /* A */ - VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ - VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ - VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */ + VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */ + VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */ + VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */ } else { - VEC4_COPY(dest[0], zero); /* R */ - VEC4_COPY(dest[1], zero); /* G */ - VEC4_COPY(dest[2], zero); /* B */ + VEC4_COPY(blend_dest[0], zero); /* R */ + VEC4_COPY(blend_dest[1], zero); /* G */ + VEC4_COPY(blend_dest[2], zero); /* B */ } break; case PIPE_BLENDFACTOR_INV_DST_COLOR: { float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[0]); /* R */ - VEC4_MUL(dest[0], dest[0], inv_comp); /* R */ - VEC4_SUB(inv_comp, one, dest[1]); /* G */ - VEC4_MUL(dest[1], dest[1], inv_comp); /* G */ - VEC4_SUB(inv_comp, one, dest[2]); /* B */ - VEC4_MUL(dest[2], dest[2], inv_comp); /* B */ + VEC4_SUB(inv_comp, one, blend_dest[0]); /* R */ + VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, blend_dest[1]); /* G */ + VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, blend_dest[2]); /* B */ + VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); /* B */ } break; case PIPE_BLENDFACTOR_INV_CONST_COLOR: @@ -609,22 +616,22 @@ blend_quad(struct quad_stage *qs, float inv_comp[4]; /* R */ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); - VEC4_MUL(dest[0], dest[0], inv_comp); + VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* G */ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); - VEC4_MUL(dest[1], dest[1], inv_comp); + VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* B */ VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); - VEC4_MUL(dest[2], dest[2], inv_comp); + VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); } break; case PIPE_BLENDFACTOR_INV_CONST_ALPHA: { float inv_comp[4]; VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(dest[0], dest[0], inv_comp); - VEC4_MUL(dest[1], dest[1], inv_comp); - VEC4_MUL(dest[2], dest[2], inv_comp); + VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); + VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); + VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); } break; case PIPE_BLENDFACTOR_INV_SRC1_COLOR: @@ -637,29 +644,29 @@ blend_quad(struct quad_stage *qs, } /* - * Compute dest/second term A + * Compute blend_dest/second term A */ switch (softpipe->blend->rt[blend_index].alpha_dst_factor) { case PIPE_BLENDFACTOR_ONE: - /* dest = dest * 1 NO-OP, leave dest as-is */ + /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */ break; case PIPE_BLENDFACTOR_SRC_COLOR: /* fall-through */ case PIPE_BLENDFACTOR_SRC_ALPHA: - VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */ + VEC4_MUL(blend_dest[3], blend_dest[3], quadColor[3]); /* A * A */ break; case PIPE_BLENDFACTOR_DST_COLOR: /* fall-through */ case PIPE_BLENDFACTOR_DST_ALPHA: if (has_dst_alpha) { - VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ + VEC4_MUL(blend_dest[3], blend_dest[3], blend_dest[3]); /* A */ } else { - /* dest = dest * 1 NO-OP, leave dest as-is */ + /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */ } break; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - /* dest = dest * 1 NO-OP, leave dest as-is */ + /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */ break; case PIPE_BLENDFACTOR_CONST_COLOR: /* fall-through */ @@ -667,11 +674,11 @@ blend_quad(struct quad_stage *qs, { float comp[4]; VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ - VEC4_MUL(dest[3], dest[3], comp); /* A */ + VEC4_MUL(blend_dest[3], blend_dest[3], comp); /* A */ } break; case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(dest[3], zero); /* A */ + VEC4_COPY(blend_dest[3], zero); /* A */ break; case PIPE_BLENDFACTOR_INV_SRC_COLOR: /* fall-through */ @@ -679,7 +686,7 @@ blend_quad(struct quad_stage *qs, { float one_minus_alpha[QUAD_SIZE]; VEC4_SUB(one_minus_alpha, one, quadColor[3]); - VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */ + VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */ } break; case PIPE_BLENDFACTOR_INV_DST_COLOR: @@ -687,11 +694,11 @@ blend_quad(struct quad_stage *qs, case PIPE_BLENDFACTOR_INV_DST_ALPHA: if (has_dst_alpha) { float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[3]); /* A */ - VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */ + VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */ + VEC4_MUL(blend_dest[3], inv_comp, blend_dest[3]); /* A */ } else { - VEC4_COPY(dest[3], zero); /* A */ + VEC4_COPY(blend_dest[3], zero); /* A */ } break; case PIPE_BLENDFACTOR_INV_CONST_COLOR: @@ -700,7 +707,7 @@ blend_quad(struct quad_stage *qs, { float inv_comp[4]; VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(dest[3], dest[3], inv_comp); + VEC4_MUL(blend_dest[3], blend_dest[3], inv_comp); } break; default: @@ -712,29 +719,29 @@ blend_quad(struct quad_stage *qs, */ switch (softpipe->blend->rt[blend_index].rgb_func) { case PIPE_BLEND_ADD: - VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */ - VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */ - VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */ + VEC4_ADD_SAT(quadColor[0], source[0], blend_dest[0]); /* R */ + VEC4_ADD_SAT(quadColor[1], source[1], blend_dest[1]); /* G */ + VEC4_ADD_SAT(quadColor[2], source[2], blend_dest[2]); /* B */ break; case PIPE_BLEND_SUBTRACT: - VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */ - VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */ - VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */ + VEC4_SUB_SAT(quadColor[0], source[0], blend_dest[0]); /* R */ + VEC4_SUB_SAT(quadColor[1], source[1], blend_dest[1]); /* G */ + VEC4_SUB_SAT(quadColor[2], source[2], blend_dest[2]); /* B */ break; case PIPE_BLEND_REVERSE_SUBTRACT: - VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */ - VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */ - VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */ + VEC4_SUB_SAT(quadColor[0], blend_dest[0], source[0]); /* R */ + VEC4_SUB_SAT(quadColor[1], blend_dest[1], source[1]); /* G */ + VEC4_SUB_SAT(quadColor[2], blend_dest[2], source[2]); /* B */ break; case PIPE_BLEND_MIN: - VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */ - VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */ - VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */ + VEC4_MIN(quadColor[0], source[0], blend_dest[0]); /* R */ + VEC4_MIN(quadColor[1], source[1], blend_dest[1]); /* G */ + VEC4_MIN(quadColor[2], source[2], blend_dest[2]); /* B */ break; case PIPE_BLEND_MAX: - VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */ - VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */ - VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */ + VEC4_MAX(quadColor[0], source[0], blend_dest[0]); /* R */ + VEC4_MAX(quadColor[1], source[1], blend_dest[1]); /* G */ + VEC4_MAX(quadColor[2], source[2], blend_dest[2]); /* B */ break; default: assert(0 && "invalid rgb blend func"); @@ -745,19 +752,19 @@ blend_quad(struct quad_stage *qs, */ switch (softpipe->blend->rt[blend_index].alpha_func) { case PIPE_BLEND_ADD: - VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */ + VEC4_ADD_SAT(quadColor[3], source[3], blend_dest[3]); /* A */ break; case PIPE_BLEND_SUBTRACT: - VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */ + VEC4_SUB_SAT(quadColor[3], source[3], blend_dest[3]); /* A */ break; case PIPE_BLEND_REVERSE_SUBTRACT: - VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */ + VEC4_SUB_SAT(quadColor[3], blend_dest[3], source[3]); /* A */ break; case PIPE_BLEND_MIN: - VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */ + VEC4_MIN(quadColor[3], source[3], blend_dest[3]); /* A */ break; case PIPE_BLEND_MAX: - VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ + VEC4_MAX(quadColor[3], source[3], blend_dest[3]); /* A */ break; default: assert(0 && "invalid alpha blend func"); @@ -797,7 +804,7 @@ blend_fallback(struct quad_stage *qs, unsigned cbuf; boolean write_all; - write_all = softpipe->fs->color0_writes_all_cbufs; + write_all = softpipe->fs_variant->info.color0_writes_all_cbufs; for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { @@ -810,17 +817,25 @@ blend_fallback(struct quad_stage *qs, quads[0]->input.y0); boolean has_dst_alpha = util_format_has_alpha(softpipe->framebuffer.cbufs[cbuf]->format); - uint q, i, j, qbuf; - - qbuf = write_all ? 0 : cbuf; + uint q, i, j; for (q = 0; q < nr; q++) { struct quad_header *quad = quads[q]; float (*quadColor)[4]; + float temp_quad_color[QUAD_SIZE][4]; const int itx = (quad->input.x0 & (TILE_SIZE-1)); const int ity = (quad->input.y0 & (TILE_SIZE-1)); - quadColor = quad->output.color[qbuf]; + if (write_all) { + for (j = 0; j < QUAD_SIZE; j++) { + for (i = 0; i < 4; i++) { + temp_quad_color[i][j] = quad->output.color[0][i][j]; + } + } + quadColor = temp_quad_color; + } else { + quadColor = quad->output.color[cbuf]; + } /* get/swizzle dest colors */ diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 15f3a8fd813..a349f0d1f3c 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -727,9 +727,9 @@ depth_test_quads_fallback(struct quad_stage *qs, unsigned nr) { unsigned i, pass = 0; - const struct sp_fragment_shader *fs = qs->softpipe->fs; - boolean interp_depth = !fs->info.writes_z; - boolean shader_stencil_ref = fs->info.writes_stencil; + const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info; + boolean interp_depth = !fsInfo->writes_z; + boolean shader_stencil_ref = fsInfo->writes_stencil; struct depth_data data; data.use_shader_stencil_refs = FALSE; @@ -838,7 +838,9 @@ choose_depth_test(struct quad_stage *qs, struct quad_header *quads[], unsigned nr) { - boolean interp_depth = !qs->softpipe->fs->info.writes_z; + const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info; + + boolean interp_depth = !fsInfo->writes_z; boolean alpha = qs->softpipe->depth_stencil->alpha.enabled; diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 90f4787d599..d74d6d4914e 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -74,7 +74,7 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad) struct tgsi_exec_machine *machine = softpipe->fs_machine; /* run shader */ - return softpipe->fs->run( softpipe->fs, machine, quad ); + return softpipe->fs_variant->run( softpipe->fs_variant, machine, quad ); } @@ -140,10 +140,10 @@ shade_begin(struct quad_stage *qs) { struct softpipe_context *softpipe = qs->softpipe; - softpipe->fs->prepare( softpipe->fs, - softpipe->fs_machine, - (struct tgsi_sampler **) - softpipe->tgsi.frag_samplers_list ); + softpipe->fs_variant->prepare( softpipe->fs_variant, + softpipe->fs_machine, + (struct tgsi_sampler **) + softpipe->tgsi.frag_samplers_list ); qs->next->begin(qs->next); } diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c index 2cfd02a22c6..0c4506ae8f4 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.c +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c @@ -30,9 +30,9 @@ #include "sp_state.h" #include "pipe/p_shader_tokens.h" + static void -sp_push_quad_first( struct softpipe_context *sp, - struct quad_stage *quad ) +insert_stage_at_head(struct softpipe_context *sp, struct quad_stage *quad) { quad->next = sp->quad.first; sp->quad.first = quad; @@ -46,24 +46,24 @@ sp_build_quad_pipeline(struct softpipe_context *sp) sp->depth_stencil->depth.enabled && sp->framebuffer.zsbuf && !sp->depth_stencil->alpha.enabled && - !sp->fs->info.uses_kill && - !sp->fs->info.writes_z && - !sp->fs->info.writes_stencil; + !sp->fs_variant->info.uses_kill && + !sp->fs_variant->info.writes_z && + !sp->fs_variant->info.writes_stencil; sp->quad.first = sp->quad.blend; if (early_depth_test) { - sp_push_quad_first( sp, sp->quad.shade ); - sp_push_quad_first( sp, sp->quad.depth_test ); + insert_stage_at_head( sp, sp->quad.shade ); + insert_stage_at_head( sp, sp->quad.depth_test ); } else { - sp_push_quad_first( sp, sp->quad.depth_test ); - sp_push_quad_first( sp, sp->quad.shade ); + insert_stage_at_head( sp, sp->quad.depth_test ); + insert_stage_at_head( sp, sp->quad.shade ); } -#if !DO_PSTIPPLE_IN_DRAW_MODULE +#if !DO_PSTIPPLE_IN_DRAW_MODULE && !DO_PSTIPPLE_IN_HELPER_MODULE if (sp->rasterizer->poly_stipple_enable) - sp_push_quad_first( sp, sp->quad.pstipple ); + insert_stage_at_head( sp, sp->quad.pstipple ); #endif } diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index 4ae69c1c2bd..88f42572f19 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -157,7 +157,7 @@ softpipe_get_query_result(struct pipe_context *pipe, /*os_get_time is in microseconds*/ td.frequency = 1000000; td.disjoint = FALSE; - memcpy(vresult, &sq->so, + memcpy(vresult, &td, sizeof(struct pipe_query_data_timestamp_disjoint)); } break; diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 1e58d27be88..960ab8c96ae 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -185,6 +185,8 @@ softpipe_get_video_param(struct pipe_screen *screen, case PIPE_VIDEO_CAP_MAX_WIDTH: case PIPE_VIDEO_CAP_MAX_HEIGHT: return vl_video_buffer_max_size(screen); + case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED: + return vl_num_buffers_desired(screen, profile); default: return 0; } diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 0ce28f4c6ee..656d001809f 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -568,17 +568,18 @@ tri_persp_coeff(struct setup_context *setup, static void setup_fragcoord_coeff(struct setup_context *setup, uint slot) { - struct sp_fragment_shader* spfs = setup->softpipe->fs; + const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info; + /*X*/ - setup->coef[slot].a0[0] = spfs->pixel_center_integer ? 0.0 : 0.5; + setup->coef[slot].a0[0] = fsInfo->pixel_center_integer ? 0.0 : 0.5; setup->coef[slot].dadx[0] = 1.0; setup->coef[slot].dady[0] = 0.0; /*Y*/ setup->coef[slot].a0[1] = - (spfs->origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0) - + (spfs->pixel_center_integer ? 0.0 : 0.5); + (fsInfo->origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0) + + (fsInfo->pixel_center_integer ? 0.0 : 0.5); setup->coef[slot].dadx[1] = 0.0; - setup->coef[slot].dady[1] = spfs->origin_lower_left ? -1.0 : 1.0; + setup->coef[slot].dady[1] = fsInfo->origin_lower_left ? -1.0 : 1.0; /*Z*/ setup->coef[slot].a0[2] = setup->posCoef.a0[2]; setup->coef[slot].dadx[2] = setup->posCoef.dadx[2]; @@ -599,7 +600,7 @@ static void setup_tri_coefficients(struct setup_context *setup) { struct softpipe_context *softpipe = setup->softpipe; - const struct sp_fragment_shader *spfs = softpipe->fs; + const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info; const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); uint fragSlot; float v[3]; @@ -618,7 +619,7 @@ setup_tri_coefficients(struct setup_context *setup) /* setup interpolation for all the remaining attributes: */ - for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) { const uint vertSlot = vinfo->attrib[fragSlot].src_index; uint j; @@ -632,7 +633,7 @@ setup_tri_coefficients(struct setup_context *setup) tri_apply_cylindrical_wrap(setup->vmin[vertSlot][j], setup->vmid[vertSlot][j], setup->vmax[vertSlot][j], - spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j), + fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j), v); tri_linear_coeff(setup, &setup->coef[fragSlot], j, v); } @@ -642,7 +643,7 @@ setup_tri_coefficients(struct setup_context *setup) tri_apply_cylindrical_wrap(setup->vmin[vertSlot][j], setup->vmid[vertSlot][j], setup->vmax[vertSlot][j], - spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j), + fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j), v); tri_persp_coeff(setup, &setup->coef[fragSlot], j, v); } @@ -654,7 +655,7 @@ setup_tri_coefficients(struct setup_context *setup) assert(0); } - if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { + if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { /* convert 0 to 1.0 and 1 to -1.0 */ setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f; setup->coef[fragSlot].dadx[0] = 0.0; @@ -939,7 +940,7 @@ setup_line_coefficients(struct setup_context *setup, const float (*v1)[4]) { struct softpipe_context *softpipe = setup->softpipe; - const struct sp_fragment_shader *spfs = softpipe->fs; + const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info; const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); uint fragSlot; float area; @@ -974,7 +975,7 @@ setup_line_coefficients(struct setup_context *setup, /* setup interpolation for all the remaining attributes: */ - for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) { const uint vertSlot = vinfo->attrib[fragSlot].src_index; uint j; @@ -987,7 +988,7 @@ setup_line_coefficients(struct setup_context *setup, for (j = 0; j < NUM_CHANNELS; j++) { line_apply_cylindrical_wrap(setup->vmin[vertSlot][j], setup->vmax[vertSlot][j], - spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j), + fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j), v); line_linear_coeff(setup, &setup->coef[fragSlot], j, v); } @@ -996,7 +997,7 @@ setup_line_coefficients(struct setup_context *setup, for (j = 0; j < NUM_CHANNELS; j++) { line_apply_cylindrical_wrap(setup->vmin[vertSlot][j], setup->vmax[vertSlot][j], - spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j), + fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j), v); line_persp_coeff(setup, &setup->coef[fragSlot], j, v); } @@ -1008,7 +1009,7 @@ setup_line_coefficients(struct setup_context *setup, assert(0); } - if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { + if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { /* convert 0 to 1.0 and 1 to -1.0 */ setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f; setup->coef[fragSlot].dadx[0] = 0.0; @@ -1188,7 +1189,7 @@ sp_setup_point(struct setup_context *setup, const float (*v0)[4]) { struct softpipe_context *softpipe = setup->softpipe; - const struct sp_fragment_shader *spfs = softpipe->fs; + const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info; const int sizeAttr = setup->softpipe->psize_slot; const float size = sizeAttr > 0 ? v0[sizeAttr][0] @@ -1232,7 +1233,7 @@ sp_setup_point(struct setup_context *setup, const_coeff(setup, &setup->posCoef, 0, 2); const_coeff(setup, &setup->posCoef, 0, 3); - for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) { const uint vertSlot = vinfo->attrib[fragSlot].src_index; uint j; @@ -1255,7 +1256,7 @@ sp_setup_point(struct setup_context *setup, assert(0); } - if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { + if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { /* convert 0 to 1.0 and 1 to -1.0 */ setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f; setup->coef[fragSlot].dadx[0] = 0.0; @@ -1396,7 +1397,7 @@ sp_setup_prepare(struct setup_context *setup) struct softpipe_context *sp = setup->softpipe; if (sp->dirty) { - softpipe_update_derived(sp); + softpipe_update_derived(sp, sp->reduced_api_prim); } /* Note: nr_attrs is only used for debugging (vertex printing) */ diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index bb19f8cff20..ec4c8cf5e4d 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -60,34 +60,45 @@ struct tgsi_exec_machine; struct vertex_info; -/** - * Subclass of pipe_shader_state (though it doesn't really need to be). - * - * This is starting to look an awful lot like a quad pipeline stage... - */ -struct sp_fragment_shader { - struct pipe_shader_state shader; +struct sp_fragment_shader_variant_key +{ + boolean polygon_stipple; +}; + +struct sp_fragment_shader_variant +{ + const struct tgsi_token *tokens; + struct sp_fragment_shader_variant_key key; struct tgsi_shader_info info; + unsigned stipple_sampler_unit; + + /* See comments about this elsewhere */ +#if 0 struct draw_fragment_shader *draw_shader; +#endif - boolean origin_lower_left; /**< fragment shader uses lower left position origin? */ - boolean pixel_center_integer; /**< fragment shader uses integer pixel center? */ - boolean color0_writes_all_cbufs; /**< fragment shader writes color0 to all bound cbufs */ - void (*prepare)( const struct sp_fragment_shader *shader, - struct tgsi_exec_machine *machine, - struct tgsi_sampler **samplers); + void (*prepare)(const struct sp_fragment_shader_variant *shader, + struct tgsi_exec_machine *machine, + struct tgsi_sampler **samplers); - /* Run the shader - this interface will get cleaned up in the - * future: - */ - unsigned (*run)( const struct sp_fragment_shader *shader, - struct tgsi_exec_machine *machine, - struct quad_header *quad ); + unsigned (*run)(const struct sp_fragment_shader_variant *shader, + struct tgsi_exec_machine *machine, + struct quad_header *quad); + /* Deletes this instance of the object */ + void (*delete)(struct sp_fragment_shader_variant *shader); - void (*delete)( struct sp_fragment_shader * ); + struct sp_fragment_shader_variant *next; +}; + + +/** Subclass of pipe_shader_state */ +struct sp_fragment_shader { + struct pipe_shader_state shader; + struct sp_fragment_shader_variant *variants; + struct draw_fragment_shader *draw_shader; }; @@ -141,7 +152,7 @@ softpipe_set_framebuffer_state(struct pipe_context *, const struct pipe_framebuffer_state *); void -softpipe_update_derived( struct softpipe_context *softpipe ); +softpipe_update_derived(struct softpipe_context *softpipe, unsigned prim); void softpipe_draw_vbo(struct pipe_context *pipe, @@ -170,4 +181,16 @@ struct vertex_info * softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe); +struct sp_fragment_shader_variant * +softpipe_find_fs_variant(struct softpipe_context *softpipe, + struct sp_fragment_shader *fs, + const struct sp_fragment_shader_variant_key *key); + + +struct sp_fragment_shader_variant * +softpipe_find_fs_variant(struct softpipe_context *softpipe, + struct sp_fragment_shader *fs, + const struct sp_fragment_shader_variant_key *key); + + #endif diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index f9590eb0b24..fd688089a3e 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -25,8 +25,10 @@ * **************************************************************************/ +#include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_pstipple.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "draw/draw_vertex.h" @@ -64,7 +66,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) if (vinfo->num_attribs == 0) { /* compute vertex layout now */ - const struct sp_fragment_shader *spfs = softpipe->fs; + const struct tgsi_shader_info *fsInfo = &softpipe->fs_variant->info; struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; const uint num = draw_num_shader_outputs(softpipe->draw); uint i; @@ -84,11 +86,11 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) * from the vertex shader. */ vinfo->num_attribs = 0; - for (i = 0; i < spfs->info.num_inputs; i++) { + for (i = 0; i < fsInfo->num_inputs; i++) { int src; enum interp_mode interp; - switch (spfs->info.input_interpolate[i]) { + switch (fsInfo->input_interpolate[i]) { case TGSI_INTERPOLATE_CONSTANT: interp = INTERP_CONSTANT; break; @@ -103,7 +105,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) interp = INTERP_LINEAR; } - switch (spfs->info.input_semantic_name[i]) { + switch (fsInfo->input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: interp = INTERP_POS; break; @@ -117,8 +119,8 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) /* this includes texcoords and varying vars */ src = draw_find_shader_output(softpipe->draw, - spfs->info.input_semantic_name[i], - spfs->info.input_semantic_index[i]); + fsInfo->input_semantic_name[i], + fsInfo->input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); } @@ -241,10 +243,101 @@ update_tgsi_samplers( struct softpipe_context *softpipe ) } +static void +update_fragment_shader(struct softpipe_context *softpipe, unsigned prim) +{ + struct sp_fragment_shader_variant_key key; + + memset(&key, 0, sizeof(key)); + + if (prim == PIPE_PRIM_TRIANGLES) + key.polygon_stipple = softpipe->rasterizer->poly_stipple_enable; + + if (softpipe->fs) { + softpipe->fs_variant = softpipe_find_fs_variant(softpipe, + softpipe->fs, &key); + } + else { + softpipe->fs_variant = NULL; + } + + /* This would be the logical place to pass the fragment shader + * to the draw module. However, doing this here, during state + * validation, causes problems with the 'draw' module helpers for + * wide/AA/stippled lines. + * In principle, the draw's fragment shader should be per-variant + * but that doesn't work. So we use a single draw fragment shader + * per fragment shader, not per variant. + */ +#if 0 + if (softpipe->fs_variant) { + draw_bind_fragment_shader(softpipe->draw, + softpipe->fs_variant->draw_shader); + } + else { + draw_bind_fragment_shader(softpipe->draw, NULL); + } +#endif +} + + +/** + * This should be called when the polygon stipple pattern changes. + * We create a new texture from the stipple pattern and create a new + * sampler view. + */ +static void +update_polygon_stipple_pattern(struct softpipe_context *softpipe) +{ + struct pipe_resource *tex; + struct pipe_sampler_view *view; + + tex = util_pstipple_create_stipple_texture(&softpipe->pipe, + softpipe->poly_stipple.stipple); + pipe_resource_reference(&softpipe->pstipple.texture, tex); + + view = util_pstipple_create_sampler_view(&softpipe->pipe, tex); + pipe_sampler_view_reference(&softpipe->pstipple.sampler_view, view); +} + + +/** + * Should be called when polygon stipple is enabled/disabled or when + * the fragment shader changes. + * We add/update the fragment sampler and sampler views to sample from + * the polygon stipple texture. The texture unit that we use depends on + * the fragment shader (we need to use a unit not otherwise used by the + * shader). + */ +static void +update_polygon_stipple_enable(struct softpipe_context *softpipe, unsigned prim) +{ + if (prim == PIPE_PRIM_TRIANGLES && + softpipe->fs_variant->key.polygon_stipple) { + const unsigned unit = softpipe->fs_variant->stipple_sampler_unit; + + assert(unit >= softpipe->num_fragment_samplers); + + /* sampler state */ + softpipe->fragment_samplers[unit] = softpipe->pstipple.sampler; + + /* sampler view */ + pipe_sampler_view_reference(&softpipe->fragment_sampler_views[unit], + softpipe->pstipple.sampler_view); + + sp_tex_tile_cache_set_sampler_view(softpipe->fragment_tex_cache[unit], + softpipe->pstipple.sampler_view); + + softpipe->dirty |= SP_NEW_SAMPLER; + } +} + + /* Hopefully this will remain quite simple, otherwise need to pull in * something like the state tracker mechanism. */ -void softpipe_update_derived( struct softpipe_context *softpipe ) +void +softpipe_update_derived(struct softpipe_context *softpipe, unsigned prim) { struct softpipe_screen *sp_screen = softpipe_screen(softpipe->pipe.screen); @@ -254,7 +347,24 @@ void softpipe_update_derived( struct softpipe_context *softpipe ) softpipe->tex_timestamp = sp_screen->timestamp; softpipe->dirty |= SP_NEW_TEXTURE; } - + +#if DO_PSTIPPLE_IN_HELPER_MODULE + if (softpipe->dirty & SP_NEW_STIPPLE) + /* before updating samplers! */ + update_polygon_stipple_pattern(softpipe); +#endif + + if (softpipe->dirty & (SP_NEW_RASTERIZER | + SP_NEW_FS)) + update_fragment_shader(softpipe, prim); + +#if DO_PSTIPPLE_IN_HELPER_MODULE + if (softpipe->dirty & (SP_NEW_RASTERIZER | + SP_NEW_STIPPLE | + SP_NEW_FS)) + update_polygon_stipple_enable(softpipe, prim); +#endif + if (softpipe->dirty & (SP_NEW_SAMPLER | SP_NEW_TEXTURE | SP_NEW_FS | diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 60331bc4976..16023c990a7 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -373,8 +373,9 @@ softpipe_reset_sampler_variants(struct softpipe_context *softpipe) } } - for (i = 0; i <= softpipe->fs->info.file_max[TGSI_FILE_SAMPLER]; i++) { + for (i = 0; i <= softpipe->fs_variant->info.file_max[TGSI_FILE_SAMPLER]; i++) { if (softpipe->fragment_samplers[i]) { + assert(softpipe->fragment_sampler_views[i]->texture); softpipe->tgsi.frag_samplers_list[i] = get_sampler_variant( i, sp_sampler(softpipe->fragment_samplers[i]), diff --git a/src/gallium/drivers/softpipe/sp_state_shader.c b/src/gallium/drivers/softpipe/sp_state_shader.c index 3dec5de3cc4..da895270aa9 100644 --- a/src/gallium/drivers/softpipe/sp_state_shader.c +++ b/src/gallium/drivers/softpipe/sp_state_shader.c @@ -33,6 +33,7 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" #include "util/u_inlines.h" +#include "util/u_pstipple.h" #include "draw/draw_context.h" #include "draw/draw_vs.h" #include "draw/draw_gs.h" @@ -42,46 +43,105 @@ #include "tgsi/tgsi_parse.h" +/** + * Create a new fragment shader variant. + */ +static struct sp_fragment_shader_variant * +create_fs_variant(struct softpipe_context *softpipe, + struct sp_fragment_shader *fs, + const struct sp_fragment_shader_variant_key *key) +{ + struct sp_fragment_shader_variant *var; + struct pipe_shader_state *stipple_fs = NULL, *curfs = &fs->shader; + unsigned unit = 0; + + if (key->polygon_stipple) { + /* get new shader that implements polygon stippling */ + stipple_fs = util_pstipple_create_fragment_shader(&softpipe->pipe, + curfs, &unit); + curfs = stipple_fs; + } + + /* codegen, create variant object */ + var = softpipe_create_fs_variant_sse(softpipe, curfs); + if (!var) { + var = softpipe_create_fs_variant_exec(softpipe, curfs); + } + + if (var) { + var->key = *key; + var->tokens = tgsi_dup_tokens(curfs->tokens); + var->stipple_sampler_unit = unit; + + tgsi_scan_shader(var->tokens, &var->info); + + /* See comments elsewhere about draw fragment shaders */ +#if 0 + /* draw's fs state */ + var->draw_shader = draw_create_fragment_shader(softpipe->draw, + &fs->shader); + if (!var->draw_shader) { + var->delete(var); + FREE((void *) var->tokens); + return NULL; + } +#endif + + /* insert variant into linked list */ + var->next = fs->variants; + fs->variants = var; + } + + if (stipple_fs) { + free((void *) stipple_fs->tokens); + free(stipple_fs); + } + + return var; +} + + +struct sp_fragment_shader_variant * +softpipe_find_fs_variant(struct softpipe_context *sp, + struct sp_fragment_shader *fs, + const struct sp_fragment_shader_variant_key *key) +{ + struct sp_fragment_shader_variant *var; + + for (var = fs->variants; var; var = var->next) { + if (memcmp(&var->key, key, sizeof(*key)) == 0) { + /* found it */ + return var; + } + } + + return create_fs_variant(sp, fs, key); +} + + static void * softpipe_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { struct softpipe_context *softpipe = softpipe_context(pipe); - struct sp_fragment_shader *state; - unsigned i; + struct sp_fragment_shader *state = CALLOC_STRUCT(sp_fragment_shader); /* debug */ if (softpipe->dump_fs) tgsi_dump(templ->tokens, 0); - /* codegen */ - state = softpipe_create_fs_sse( softpipe, templ ); - if (!state) { - state = softpipe_create_fs_exec( softpipe, templ ); - } - - if (!state) - return NULL; + /* we need to keep a local copy of the tokens */ + state->shader.tokens = tgsi_dup_tokens(templ->tokens); /* draw's fs state */ - state->draw_shader = draw_create_fragment_shader(softpipe->draw, templ); + state->draw_shader = draw_create_fragment_shader(softpipe->draw, + &state->shader); if (!state->draw_shader) { - state->delete( state ); + FREE((void *) state->shader.tokens); + FREE(state); return NULL; } - /* get/save the summary info for this shader */ - tgsi_scan_shader(templ->tokens, &state->info); - - for (i = 0; i < state->info.num_properties; ++i) { - if (state->info.properties[i].name == TGSI_PROPERTY_FS_COORD_ORIGIN) - state->origin_lower_left = state->info.properties[i].data[0]; - else if (state->info.properties[i].name == TGSI_PROPERTY_FS_COORD_PIXEL_CENTER) - state->pixel_center_integer = state->info.properties[i].data[0]; - else if (state->info.properties[i].name == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) - state->color0_writes_all_cbufs = state->info.properties[i].data[0]; - } - return state; } @@ -90,6 +150,7 @@ static void softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct softpipe_context *softpipe = softpipe_context(pipe); + struct sp_fragment_shader *state = (struct sp_fragment_shader *) fs; if (softpipe->fs == fs) return; @@ -98,8 +159,14 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) softpipe->fs = fs; - draw_bind_fragment_shader(softpipe->draw, - (softpipe->fs ? softpipe->fs->draw_shader : NULL)); + if (fs == NULL) + softpipe->fs_variant = NULL; + + if (state) + draw_bind_fragment_shader(softpipe->draw, + state->draw_shader); + else + draw_bind_fragment_shader(softpipe->draw, NULL); softpipe->dirty |= SP_NEW_FS; } @@ -110,8 +177,9 @@ softpipe_delete_fs_state(struct pipe_context *pipe, void *fs) { struct softpipe_context *softpipe = softpipe_context(pipe); struct sp_fragment_shader *state = fs; + struct sp_fragment_shader_variant *var, *next_var; - assert(fs != softpipe_context(pipe)->fs); + assert(fs != softpipe->fs); if (softpipe->fs_machine->Tokens == state->shader.tokens) { /* unbind the shader from the tgsi executor if we're @@ -120,9 +188,23 @@ softpipe_delete_fs_state(struct pipe_context *pipe, void *fs) tgsi_exec_machine_bind_shader(softpipe->fs_machine, NULL, 0, NULL); } + /* delete variants */ + for (var = state->variants; var; var = next_var) { + next_var = var->next; + + assert(var != softpipe->fs_variant); + + /* See comments elsewhere about draw fragment shaders */ +#if 0 + draw_delete_fragment_shader(softpipe->draw, var->draw_shader); +#endif + + var->delete(var); + } + draw_delete_fragment_shader(softpipe->draw, state->draw_shader); - state->delete( state ); + FREE((void *) state->shader.tokens); } diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index f7309480bb9..89c6536b1f4 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -2566,7 +2566,114 @@ sp_sampler_variant_destroy( struct sp_sampler_variant *samp ) FREE(samp); } +static void +sample_get_dims(struct tgsi_sampler *tgsi_sampler, int level, + int dims[4]) +{ + struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_sampler_view *view = samp->view; + const struct pipe_resource *texture = view->texture; + + /* undefined according to EXT_gpu_program */ + level += view->u.tex.first_level; + if (level > view->u.tex.last_level) + return; + + dims[0] = u_minify(texture->width0, level); + + switch(texture->target) { + case PIPE_TEXTURE_1D_ARRAY: + dims[1] = texture->array_size; + /* fallthrough */ + case PIPE_TEXTURE_1D: + case PIPE_BUFFER: + return; + case PIPE_TEXTURE_2D_ARRAY: + dims[2] = texture->array_size; + /* fallthrough */ + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_RECT: + dims[1] = u_minify(texture->height0, level); + return; + case PIPE_TEXTURE_3D: + dims[1] = u_minify(texture->height0, level); + dims[2] = u_minify(texture->depth0, level); + return; + default: + assert(!"unexpected texture target in sample_get_dims()"); + return; + } +} + +/* this function is only used for unfiltered texel gets + via the TGSI TXF opcode. */ +static void +sample_get_texels(struct tgsi_sampler *tgsi_sampler, + const int v_i[QUAD_SIZE], + const int v_j[QUAD_SIZE], + const int v_k[QUAD_SIZE], + const int lod[QUAD_SIZE], + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + union tex_tile_address addr; + const struct pipe_resource *texture = samp->view->texture; + int j, c; + const float *tx; + + addr.value = 0; + /* TODO write a better test for LOD */ + addr.bits.level = lod[0]; + switch(texture->target) { + case PIPE_TEXTURE_1D: + for (j = 0; j < QUAD_SIZE; j++) { + tx = get_texel_2d(samp, addr, v_i[j], 0); + for (c = 0; c < 4; c++) { + rgba[c][j] = tx[c]; + } + } + break; + case PIPE_TEXTURE_1D_ARRAY: + for (j = 0; j < QUAD_SIZE; j++) { + tx = get_texel_1d_array(samp, addr, v_i[j], v_j[j]); + for (c = 0; c < 4; c++) { + rgba[c][j] = tx[c]; + } + } + break; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + for (j = 0; j < QUAD_SIZE; j++) { + tx = get_texel_2d(samp, addr, v_i[j], v_j[j]); + for (c = 0; c < 4; c++) { + rgba[c][j] = tx[c]; + } + } + break; + case PIPE_TEXTURE_2D_ARRAY: + for (j = 0; j < QUAD_SIZE; j++) { + tx = get_texel_2d_array(samp, addr, v_i[j], v_j[j], v_k[j]); + for (c = 0; c < 4; c++) { + rgba[c][j] = tx[c]; + } + } + break; + case PIPE_TEXTURE_3D: + for (j = 0; j < QUAD_SIZE; j++) { + tx = get_texel_3d(samp, addr, v_i[j], v_j[j], v_k[j]); + for (c = 0; c < 4; c++) { + rgba[c][j] = tx[c]; + } + } + break; + case PIPE_TEXTURE_CUBE: /* TXF can't work on CUBE according to spec */ + default: + assert(!"Unknown or CUBE texture type in TXF processing\n"); + break; + } +} /** * Create a sampler variant for a given set of non-orthogonal state. */ @@ -2692,5 +2799,7 @@ sp_create_sampler_variant( const struct pipe_sampler_state *sampler, samp->base.get_samples = samp->sample_target; } + samp->base.get_dims = sample_get_dims; + samp->base.get_texel = sample_get_texels; return samp; } diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index b847cf331b3..4a60f635825 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -71,13 +71,22 @@ svga_get_vendor( struct pipe_screen *pscreen ) static const char * svga_get_name( struct pipe_screen *pscreen ) { + const char *build = "", *llvm = "", *mutex = ""; + static char name[100]; #ifdef DEBUG /* Only return internal details in the DEBUG version: */ - return "SVGA3D; build: DEBUG; mutex: " PIPE_ATOMIC; + build = "build: DEBUG;"; + mutex = "mutex: " PIPE_ATOMIC ";"; +#ifdef HAVE_LLVM + llvm = "LLVM;"; +#endif #else - return "SVGA3D; build: RELEASE; "; + build = "build: RELEASE;"; #endif + + util_snprintf(name, sizeof(name), "SVGA3D; %s %s %s", build, mutex, llvm); + return name; } @@ -245,6 +254,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en return 0; case PIPE_SHADER_CAP_SUBROUTINES: return 0; + case PIPE_SHADER_CAP_INTEGERS: + return 0; } break; case PIPE_SHADER_VERTEX: @@ -286,6 +297,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 0; + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: break; } diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index 8c788f4bb0e..05de9ff7cd0 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -67,7 +67,9 @@ extern "C" { #if !defined(__HAIKU__) && !defined(__USE_MISC) +#if !defined(PIPE_OS_ANDROID) typedef unsigned int uint; +#endif typedef unsigned short ushort; #endif typedef unsigned char ubyte; diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h index eea3d79e64b..b3a7b337bc6 100644 --- a/src/gallium/include/pipe/p_config.h +++ b/src/gallium/include/pipe/p_config.h @@ -99,9 +99,9 @@ #endif #endif -#if defined(__PPC__) +#if defined(__ppc__) || defined(__ppc64__) || defined(__PPC__) #define PIPE_ARCH_PPC -#if defined(__PPC64__) +#if defined(__ppc64__) || defined(__PPC64__) #define PIPE_ARCH_PPC_64 #endif #endif @@ -120,6 +120,15 @@ # define PIPE_ARCH_BIG_ENDIAN #endif +#elif defined(__APPLE__) +#include <machine/endian.h> + +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN +# define PIPE_ARCH_LITTLE_ENDIAN +#elif __DARWIN_BYTE_ORDER == __DARWIN_BIG_ENDIAN +# define PIPE_ARCH_BIG_ENDIAN +#endif + #else #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) @@ -145,6 +154,14 @@ #define PIPE_OS_UNIX #endif +/* + * Android defines __linux__ so PIPE_OS_LINUX and PIPE_OS_UNIX will also be + * defined. + */ +#if defined(ANDROID) +#define PIPE_OS_ANDROID +#endif + #if defined(__FreeBSD__) #define PIPE_OS_FREEBSD #define PIPE_OS_BSD diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 3f6d90d1bf4..da3ee87515f 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -49,6 +49,7 @@ struct pipe_index_buffer; struct pipe_query; struct pipe_poly_stipple; struct pipe_rasterizer_state; +struct pipe_resolve_info; struct pipe_resource; struct pipe_sampler_state; struct pipe_sampler_view; @@ -268,13 +269,10 @@ struct pipe_context { /** * Resolve a multisampled resource into a non-multisampled one. - * Source and destination must have the same size and same format. + * Source and destination must be of the same format. */ void (*resource_resolve)(struct pipe_context *pipe, - struct pipe_resource *dst, - unsigned dst_layer, - struct pipe_resource *src, - unsigned src_layer); + const struct pipe_resolve_info *info); /*@}*/ diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 79b89699566..795de1fbf62 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -99,6 +99,9 @@ enum pipe_error { #define PIPE_MASK_B 0x4 #define PIPE_MASK_A 0x8 #define PIPE_MASK_RGBA 0xf +#define PIPE_MASK_Z 0x10 +#define PIPE_MASK_S 0x20 +#define PIPE_MASK_ZS 0x30 /** @@ -468,6 +471,7 @@ enum pipe_cap { PIPE_CAP_MIXED_COLORBUFFER_FORMATS = 46, PIPE_CAP_SEAMLESS_CUBE_MAP = 47, PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE = 48, + PIPE_CAP_SCALED_RESOLVE = 49 }; /* Shader caps not specific to any single stage */ @@ -491,6 +495,7 @@ enum pipe_shader_cap PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR = 14, PIPE_SHADER_CAP_INDIRECT_CONST_ADDR = 15, PIPE_SHADER_CAP_SUBROUTINES = 16, /* BGNSUB, ENDSUB, CAL, RET */ + PIPE_SHADER_CAP_INTEGERS = 17 }; diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index d442c15c02a..840b3ee0e37 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -483,6 +483,34 @@ struct pipe_draw_info }; +/** + * Information to describe a resource_resolve call. + */ +struct pipe_resolve_info +{ + struct { + struct pipe_resource *res; + unsigned level; + unsigned layer; + int x0; /**< always left */ + int y0; /**< always top */ + int x1; /**< determines scale if PIPE_CAP_SCALED_RESOLVE is supported */ + int y1; /**< determines scale if PIPE_CAP_SCALED_RESOLVE is supported */ + } dst; + + struct { + struct pipe_resource *res; + unsigned layer; + int x0; + int y0; + int x1; /**< may be < x0 only if PIPE_CAP_SCALED_RESOLVE is supported */ + int y1; /**< may be < y1 even if PIPE_CAP_SCALED_RESOLVE not supported */ + } src; + + unsigned mask; /**< PIPE_MASK_RGBA, Z, S or ZS */ +}; + + #ifdef __cplusplus } #endif diff --git a/src/gallium/include/pipe/p_video_decoder.h b/src/gallium/include/pipe/p_video_decoder.h index f063d8f3a1b..2aa4001c179 100644 --- a/src/gallium/include/pipe/p_video_decoder.h +++ b/src/gallium/include/pipe/p_video_decoder.h @@ -59,75 +59,74 @@ struct pipe_video_decoder void (*destroy)(struct pipe_video_decoder *decoder); /** - * Creates a buffer as decoding input + * Creates a decoder buffer */ - struct pipe_video_decode_buffer *(*create_buffer)(struct pipe_video_decoder *decoder); + void *(*create_buffer)(struct pipe_video_decoder *decoder); /** - * flush decoder buffer to video hardware + * Destroys a decoder buffer */ - void (*flush_buffer)(struct pipe_video_decode_buffer *decbuf, - unsigned num_ycbcr_blocks[3], - struct pipe_video_buffer *ref_frames[2], - struct pipe_video_buffer *dst); -}; - -/** - * input buffer for a decoder - */ -struct pipe_video_decode_buffer -{ - struct pipe_video_decoder *decoder; + void (*destroy_buffer)(struct pipe_video_decoder *decoder, void *buffer); /** - * destroy this decode buffer + * set the current decoder buffer */ - void (*destroy)(struct pipe_video_decode_buffer *decbuf); + void (*set_decode_buffer)(struct pipe_video_decoder *decoder, void *buffer); /** - * map the input buffer into memory before starting decoding + * set the picture parameters for the next frame + * only used for bitstream decoding */ - void (*begin_frame)(struct pipe_video_decode_buffer *decbuf); + void (*set_picture_parameters)(struct pipe_video_decoder *decoder, + struct pipe_picture_desc *picture); /** * set the quantification matrixes */ - void (*set_quant_matrix)(struct pipe_video_decode_buffer *decbuf, - const uint8_t intra_matrix[64], - const uint8_t non_intra_matrix[64]); + void (*set_quant_matrix)(struct pipe_video_decoder *decoder, + const struct pipe_quant_matrix *matrix); /** - * get the pointer where to put the ycbcr blocks of a component + * set target where video data is decoded to */ - struct pipe_ycbcr_block *(*get_ycbcr_stream)(struct pipe_video_decode_buffer *, int component); + void (*set_decode_target)(struct pipe_video_decoder *decoder, + struct pipe_video_buffer *target); /** - * get the pointer where to put the ycbcr dct block data of a component + * set reference frames for motion compensation */ - short *(*get_ycbcr_buffer)(struct pipe_video_decode_buffer *, int component); + void (*set_reference_frames)(struct pipe_video_decoder *decoder, + struct pipe_video_buffer **ref_frames, + unsigned num_ref_frames); /** - * get the stride of the mv buffer + * start decoding of a new frame */ - unsigned (*get_mv_stream_stride)(struct pipe_video_decode_buffer *decbuf); + void (*begin_frame)(struct pipe_video_decoder *decoder); /** - * get the pointer where to put the motion vectors of a ref frame + * decode a macroblock */ - struct pipe_motionvector *(*get_mv_stream)(struct pipe_video_decode_buffer *decbuf, int ref_frame); + void (*decode_macroblock)(struct pipe_video_decoder *decoder, + const struct pipe_macroblock *macroblocks, + unsigned num_macroblocks); /** * decode a bitstream */ - void (*decode_bitstream)(struct pipe_video_decode_buffer *decbuf, - unsigned num_bytes, const void *data, - struct pipe_picture_desc *picture, - unsigned num_ycbcr_blocks[3]); + void (*decode_bitstream)(struct pipe_video_decoder *decoder, + unsigned num_bytes, const void *data); + + /** + * end decoding of the current frame + */ + void (*end_frame)(struct pipe_video_decoder *decoder); /** - * unmap decoder buffer before flushing + * flush any outstanding command buffers to the hardware + * should be called before a video_buffer is acessed by the state tracker again */ - void (*end_frame)(struct pipe_video_decode_buffer *decbuf); + void (*flush)(struct pipe_video_decoder *decoder); }; /** diff --git a/src/gallium/include/pipe/p_video_enums.h b/src/gallium/include/pipe/p_video_enums.h index 492ab84e33f..ea25a25883d 100644 --- a/src/gallium/include/pipe/p_video_enums.h +++ b/src/gallium/include/pipe/p_video_enums.h @@ -51,6 +51,7 @@ enum pipe_video_cap PIPE_VIDEO_CAP_NPOT_TEXTURES = 1, PIPE_VIDEO_CAP_MAX_WIDTH = 2, PIPE_VIDEO_CAP_MAX_HEIGHT = 3, + PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED = 4 }; enum pipe_video_codec diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h index 2a64ffb5601..f655ed411f4 100644 --- a/src/gallium/include/pipe/p_video_state.h +++ b/src/gallium/include/pipe/p_video_state.h @@ -43,65 +43,82 @@ struct pipe_video_rect unsigned x, y, w, h; }; -enum pipe_mpeg12_picture_type +/* + * see table 6-12 in the spec + */ +enum pipe_mpeg12_picture_coding_type { - PIPE_MPEG12_PICTURE_TYPE_FIELD_TOP, - PIPE_MPEG12_PICTURE_TYPE_FIELD_BOTTOM, - PIPE_MPEG12_PICTURE_TYPE_FRAME + PIPE_MPEG12_PICTURE_CODING_TYPE_I = 0x01, + PIPE_MPEG12_PICTURE_CODING_TYPE_P = 0x02, + PIPE_MPEG12_PICTURE_CODING_TYPE_B = 0x03, + PIPE_MPEG12_PICTURE_CODING_TYPE_D = 0x04 }; -enum pipe_mpeg12_dct_intra +/* + * see table 6-14 in the spec + */ +enum pipe_mpeg12_picture_structure { - PIPE_MPEG12_DCT_DELTA = 0, - PIPE_MPEG12_DCT_INTRA = 1 + PIPE_MPEG12_PICTURE_STRUCTURE_RESERVED = 0x00, + PIPE_MPEG12_PICTURE_STRUCTURE_FIELD_TOP = 0x01, + PIPE_MPEG12_PICTURE_STRUCTURE_FIELD_BOTTOM = 0x02, + PIPE_MPEG12_PICTURE_STRUCTURE_FRAME = 0x03 }; -enum pipe_mpeg12_dct_type +/* + * flags for macroblock_type, see section 6.3.17.1 in the spec + */ +enum pipe_mpeg12_macroblock_type { - PIPE_MPEG12_DCT_TYPE_FRAME = 0, - PIPE_MPEG12_DCT_TYPE_FIELD = 1 + PIPE_MPEG12_MB_TYPE_QUANT = 0x01, + PIPE_MPEG12_MB_TYPE_MOTION_FORWARD = 0x02, + PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD = 0x04, + PIPE_MPEG12_MB_TYPE_PATTERN = 0x08, + PIPE_MPEG12_MB_TYPE_INTRA = 0x10 }; -enum pipe_video_field_select +/* + * flags for motion_type, see table 6-17 and 6-18 in the spec + */ +enum pipe_mpeg12_motion_type { - PIPE_VIDEO_FRAME = 0, - PIPE_VIDEO_TOP_FIELD = 1, - PIPE_VIDEO_BOTTOM_FIELD = 3, - - /* TODO - PIPE_VIDEO_DUALPRIME - PIPE_VIDEO_16x8 - */ + PIPE_MPEG12_MO_TYPE_RESERVED = 0x00, + PIPE_MPEG12_MO_TYPE_FIELD = 0x01, + PIPE_MPEG12_MO_TYPE_FRAME = 0x02, + PIPE_MPEG12_MO_TYPE_16x8 = 0x02, + PIPE_MPEG12_MO_TYPE_DUAL_PRIME = 0x03 }; -enum pipe_video_mv_weight +/* + * see section 6.3.17.1 and table 6-19 in the spec + */ +enum pipe_mpeg12_dct_type { - PIPE_VIDEO_MV_WEIGHT_MIN = 0, - PIPE_VIDEO_MV_WEIGHT_HALF = 128, - PIPE_VIDEO_MV_WEIGHT_MAX = 256 + PIPE_MPEG12_DCT_TYPE_FRAME = 0, + PIPE_MPEG12_DCT_TYPE_FIELD = 1 }; -/* bitfields because this is used as a vertex buffer element */ -struct pipe_motionvector +enum pipe_mpeg12_field_select { - struct { - short x, y; - ushort field_select; /**< enum pipe_video_field_select */ - ushort weight; /**< enum pipe_video_mv_weight */ - } top, bottom; + PIPE_MPEG12_FS_FIRST_FORWARD = 0x01, + PIPE_MPEG12_FS_FIRST_BACKWARD = 0x02, + PIPE_MPEG12_FS_SECOND_FORWARD = 0x04, + PIPE_MPEG12_FS_SECOND_BACKWARD = 0x08 }; -/* bitfields because this is used as a vertex buffer element */ -struct pipe_ycbcr_block +struct pipe_picture_desc { - ubyte x, y; - ubyte intra; /**< enum pipe_mpeg12_dct_intra */ - ubyte coding; /**< enum pipe_mpeg12_dct_type */ + enum pipe_video_profile profile; }; -struct pipe_picture_desc +struct pipe_quant_matrix { - enum pipe_video_profile profile; + enum pipe_video_codec codec; +}; + +struct pipe_macroblock +{ + enum pipe_video_codec codec; }; struct pipe_mpeg12_picture_desc @@ -115,9 +132,58 @@ struct pipe_mpeg12_picture_desc unsigned alternate_scan; unsigned intra_vlc_format; unsigned concealment_motion_vectors; + unsigned intra_dc_precision; unsigned f_code[2][2]; }; +struct pipe_mpeg12_quant_matrix +{ + struct pipe_quant_matrix base; + + const uint8_t *intra_matrix; + const uint8_t *non_intra_matrix; +}; + +struct pipe_mpeg12_macroblock +{ + struct pipe_macroblock base; + + /* see section 6.3.17 in the spec */ + unsigned short x, y; + + /* see section 6.3.17.1 in the spec */ + unsigned char macroblock_type; + + union { + struct { + /* see table 6-17 in the spec */ + unsigned int frame_motion_type:2; + + /* see table 6-18 in the spec */ + unsigned int field_motion_type:2; + + /* see table 6-19 in the spec */ + unsigned int dct_type:1; + } bits; + unsigned int value; + } macroblock_modes; + + /* see section 6.3.17.2 in the spec */ + unsigned char motion_vertical_field_select; + + /* see Table 7-7 in the spec */ + short PMV[2][2][2]; + + /* see figure 6.10-12 in the spec */ + unsigned short coded_block_pattern; + + /* see figure 6.10-12 in the spec */ + short *blocks; + + /* Number of skipped macroblocks after this macroblock */ + unsigned short num_skipped_macroblocks; +}; + #ifdef __cplusplus } #endif diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h index 12f2aaddc91..0a31cf10a34 100644 --- a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h +++ b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h @@ -352,9 +352,9 @@ struct GalliumD3D10Device : public GalliumD3D10ScreenImpl<threadsafe> { for(unsigned i = 0; i < count; ++i) { - if(constbufs[i] != constant_buffers[s][i].p) + if(constbufs[i] != constant_buffers[s][start + i].p) { - constant_buffers[s][i] = constbufs[i]; + constant_buffers[s][start + i] = constbufs[i]; if(s < caps.stages && start + i < caps.constant_buffers[s]) pipe->set_constant_buffer(pipe, s, start + i, constbufs[i] ? constbufs[i]->resource : NULL); } @@ -391,11 +391,12 @@ struct GalliumD3D10Device : public GalliumD3D10ScreenImpl<threadsafe> { samplers[s][start + i] = samps[i]; sampler_csos[s].v[start + i] = samps[i] ? samps[i]->object : default_sampler; + last_different = i; } if(last_different >= 0) { num_samplers[s] = std::max(num_samplers[s], start + last_different + 1); - update_flags |= (UPDATE_SAMPLERS_SHIFT + s); + update_flags |= 1 << (UPDATE_SAMPLERS_SHIFT + s); } } } @@ -1726,9 +1727,26 @@ changed: SYNCHRONIZED; GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource; GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource; - unsigned dst_layer = d3d11_subresource_to_face(dst->resource, dst_subresource); - unsigned src_layer = d3d11_subresource_to_face(src->resource, src_subresource); - pipe->resource_resolve(pipe, dst->resource, dst_layer, src->resource, src_layer); + struct pipe_resolve_info info; + + info.dst.res = dst->resource; + info.src.res = src->resource; + info.dst.level = 0; + info.dst.layer = d3d11_subresource_to_face(dst->resource, dst_subresource); + info.src.layer = d3d11_subresource_to_face(src->resource, src_subresource); + + info.src.x0 = 0; + info.src.x1 = info.src.res->width0; + info.src.y0 = 0; + info.src.y1 = info.src.res->height0; + info.dst.x0 = 0; + info.dst.x1 = info.dst.res->width0; + info.dst.y0 = 0; + info.dst.y1 = info.dst.res->height0; + + info.mask = PIPE_MASK_RGBA | PIPE_MASK_ZS; + + pipe->resource_resolve(pipe, &info); } #if API >= 11 diff --git a/src/gallium/state_trackers/dri/common/dri_context.c b/src/gallium/state_trackers/dri/common/dri_context.c index e6612b1911d..bc8dacba1b7 100644 --- a/src/gallium/state_trackers/dri/common/dri_context.c +++ b/src/gallium/state_trackers/dri/common/dri_context.c @@ -48,6 +48,16 @@ dri_init_extensions(struct dri_context *ctx) driInitExtensions(st->ctx, NULL, GL_FALSE); } +static void +dri_pp_query(struct dri_context *ctx) +{ + unsigned int i; + + for (i = 0; i < PP_FILTERS; i++) { + ctx->pp_enabled[i] = driQueryOptioni(&ctx->optionCache, pp_filters[i].name); + } +} + GLboolean dri_create_context(gl_api api, const struct gl_config * visual, __DRIcontext * cPriv, void *sharedContextPrivate) @@ -105,6 +115,11 @@ dri_create_context(gl_api api, const struct gl_config * visual, if (api == API_OPENGL) dri_init_extensions(ctx); + // Context successfully created. See if post-processing is requested. + dri_pp_query(ctx); + + ctx->pp = pp_init(screen->base.screen, ctx->pp_enabled); + return GL_TRUE; fail: @@ -134,6 +149,8 @@ dri_destroy_context(__DRIcontext * cPriv) ctx->st->flush(ctx->st, 0, NULL); ctx->st->destroy(ctx->st); + if (ctx->pp) pp_free(ctx->pp); + FREE(ctx); } @@ -187,6 +204,13 @@ dri_make_current(__DRIcontext * cPriv, ctx->stapi->make_current(ctx->stapi, ctx->st, &draw->base, &read->base); + // This is ok to call here. If they are already init, it's a no-op. + if (draw->textures[ST_ATTACHMENT_BACK_LEFT] && draw->textures[ST_ATTACHMENT_DEPTH_STENCIL] + && ctx->pp) + pp_init_fbos(ctx->pp, draw->textures[ST_ATTACHMENT_BACK_LEFT]->width0, + draw->textures[ST_ATTACHMENT_BACK_LEFT]->height0, + draw->textures[ST_ATTACHMENT_DEPTH_STENCIL]); + return GL_TRUE; } diff --git a/src/gallium/state_trackers/dri/common/dri_context.h b/src/gallium/state_trackers/dri/common/dri_context.h index 35105e861f9..cfc8e3345e5 100644 --- a/src/gallium/state_trackers/dri/common/dri_context.h +++ b/src/gallium/state_trackers/dri/common/dri_context.h @@ -34,6 +34,7 @@ #include "pipe/p_compiler.h" #include "dri_wrapper.h" +#include "postprocess/filters.h" struct pipe_context; struct pipe_fence; @@ -61,6 +62,8 @@ struct dri_context /* gallium */ struct st_api *stapi; struct st_context_iface *st; + struct pp_queue_t *pp; + unsigned int pp_enabled[PP_FILTERS]; }; static INLINE struct dri_context * diff --git a/src/gallium/state_trackers/dri/common/dri_screen.c b/src/gallium/state_trackers/dri/common/dri_screen.c index 5931df993b0..dcb6fdf8f3c 100644 --- a/src/gallium/state_trackers/dri/common/dri_screen.c +++ b/src/gallium/state_trackers/dri/common/dri_screen.c @@ -42,15 +42,25 @@ #include "util/u_debug.h" PUBLIC const char __driConfigOptions[] = - DRI_CONF_BEGIN DRI_CONF_SECTION_PERFORMANCE - DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) - DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0) - DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY -/* DRI_CONF_FORCE_S3TC_ENABLE(false) */ - DRI_CONF_ALLOW_LARGE_TEXTURES(1) - DRI_CONF_SECTION_END DRI_CONF_END; - -static const uint __driNConfigOptions = 3; + DRI_CONF_BEGIN + DRI_CONF_SECTION_PERFORMANCE + DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) + DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0) + DRI_CONF_SECTION_END + + DRI_CONF_SECTION_QUALITY +/* DRI_CONF_FORCE_S3TC_ENABLE(false) */ + DRI_CONF_ALLOW_LARGE_TEXTURES(1) + DRI_CONF_PP_CELSHADE(0) + DRI_CONF_PP_NORED(0) + DRI_CONF_PP_NOGREEN(0) + DRI_CONF_PP_NOBLUE(0) + DRI_CONF_PP_JIMENEZMLAA(0, 0, 32) + DRI_CONF_PP_JIMENEZMLAA_COLOR(0, 0, 32) + DRI_CONF_SECTION_END + DRI_CONF_END; + +static const uint __driNConfigOptions = 9; static const __DRIconfig ** dri_fill_in_modes(struct dri_screen *screen, diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c index fe4ddb312be..6cf237577ec 100644 --- a/src/gallium/state_trackers/dri/drm/dri2.c +++ b/src/gallium/state_trackers/dri/drm/dri2.c @@ -44,8 +44,19 @@ * DRI2 flush extension. */ static void -dri2_flush_drawable(__DRIdrawable *draw) +dri2_flush_drawable(__DRIdrawable *dPriv) { + struct dri_context *ctx = dri_get_current(dPriv->driScreenPriv); + struct dri_drawable *drawable = dri_drawable(dPriv); + + struct pipe_resource *ptex = drawable->textures[ST_ATTACHMENT_BACK_LEFT]; + + if (ctx) { + if (ptex && ctx->pp && drawable->textures[ST_ATTACHMENT_DEPTH_STENCIL]) + pp_run(ctx->pp, ptex, ptex, drawable->textures[ST_ATTACHMENT_DEPTH_STENCIL]); + + ctx->st->flush(ctx->st, 0, NULL); + } } static void @@ -266,7 +277,6 @@ dri2_allocate_buffer(__DRIscreen *sPriv, struct dri_screen *screen = dri_screen(sPriv); struct dri2_buffer *buffer; struct pipe_resource templ; - enum st_attachment_type statt; enum pipe_format pf; unsigned bind = 0; struct winsys_handle whandle; @@ -274,22 +284,16 @@ dri2_allocate_buffer(__DRIscreen *sPriv, switch (attachment) { case __DRI_BUFFER_FRONT_LEFT: case __DRI_BUFFER_FAKE_FRONT_LEFT: - statt = ST_ATTACHMENT_FRONT_LEFT; bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; break; case __DRI_BUFFER_BACK_LEFT: - statt = ST_ATTACHMENT_BACK_LEFT; bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; break; case __DRI_BUFFER_DEPTH: case __DRI_BUFFER_DEPTH_STENCIL: case __DRI_BUFFER_STENCIL: - statt = ST_ATTACHMENT_DEPTH_STENCIL; bind = PIPE_BIND_DEPTH_STENCIL; /* XXX sampler? */ break; - default: - statt = ST_ATTACHMENT_INVALID; - break; } switch (format) { @@ -662,20 +666,6 @@ fail: } static boolean -dri2_create_context(gl_api api, const struct gl_config * visual, - __DRIcontext * cPriv, void *sharedContextPrivate) -{ - struct dri_context *ctx = NULL; - - if (!dri_create_context(api, visual, cPriv, sharedContextPrivate)) - return FALSE; - - ctx = cPriv->driverPrivate; - - return TRUE; -} - -static boolean dri2_create_buffer(__DRIscreen * sPriv, __DRIdrawable * dPriv, const struct gl_config * visual, boolean isPixmap) @@ -702,7 +692,7 @@ const struct __DriverAPIRec driDriverAPI = { .InitScreen = NULL, .InitScreen2 = dri2_init_screen, .DestroyScreen = dri_destroy_screen, - .CreateContext = dri2_create_context, + .CreateContext = dri_create_context, .DestroyContext = dri_destroy_context, .CreateBuffer = dri2_create_buffer, .DestroyBuffer = dri_destroy_buffer, diff --git a/src/gallium/state_trackers/dri/sw/drisw.c b/src/gallium/state_trackers/dri/sw/drisw.c index a1879a8f46a..082df55e8ea 100644 --- a/src/gallium/state_trackers/dri/sw/drisw.c +++ b/src/gallium/state_trackers/dri/sw/drisw.c @@ -136,6 +136,9 @@ drisw_swap_buffers(__DRIdrawable *dPriv) ptex = drawable->textures[ST_ATTACHMENT_BACK_LEFT]; if (ptex) { + if (ctx->pp && drawable->textures[ST_ATTACHMENT_DEPTH_STENCIL]) + pp_run(ctx->pp, ptex, ptex, drawable->textures[ST_ATTACHMENT_DEPTH_STENCIL]); + ctx->st->flush(ctx->st, ST_FLUSH_FRONT, NULL); drisw_copy_to_front(dPriv, ptex); diff --git a/src/gallium/state_trackers/egl/Android.mk b/src/gallium/state_trackers/egl/Android.mk new file mode 100644 index 00000000000..e459bd4655e --- /dev/null +++ b/src/gallium/state_trackers/egl/Android.mk @@ -0,0 +1,54 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +common_SOURCES := \ + common/egl_g3d.c \ + common/egl_g3d_api.c \ + common/egl_g3d_image.c \ + common/egl_g3d_st.c \ + common/egl_g3d_sync.c \ + common/native_helper.c + +android_SOURCES := \ + android/native_android.cpp + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + $(common_SOURCES) \ + $(android_SOURCES) + +LOCAL_CFLAGS := -DHAVE_ANDROID_BACKEND + +LOCAL_C_INCLUDES := \ + $(GALLIUM_TOP)/state_trackers/egl \ + $(GALLIUM_TOP)/winsys/sw \ + $(MESA_TOP)/src/egl/main \ + $(DRM_GRALLOC_TOP) + +LOCAL_MODULE := libmesa_st_egl + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/state_trackers/egl/android/native_android.cpp b/src/gallium/state_trackers/egl/android/native_android.cpp new file mode 100644 index 00000000000..211d6a2aeef --- /dev/null +++ b/src/gallium/state_trackers/egl/android/native_android.cpp @@ -0,0 +1,835 @@ +/* + * Mesa 3-D graphics library + * Version: 7.12 + * + * Copyright (C) 2010-2011 Chia-I Wu <[email protected]> + * Copyright (C) 2010-2011 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#define LOG_TAG "EGL-GALLIUM" +#include <cutils/log.h> +#include <cutils/properties.h> +#include <hardware/gralloc.h> +#include <utils/Errors.h> +#include <ui/android_native_buffer.h> + +extern "C" { +#include "egllog.h" +} + +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_box.h" +#include "common/native.h" +#include "common/native_helper.h" +#include "android/android_sw_winsys.h" +#include "state_tracker/drm_driver.h" + +struct android_config; + +struct android_display { + struct native_display base; + + boolean use_drm; + const struct native_event_handler *event_handler; + struct android_config *configs; + int num_configs; +}; + +struct android_surface { + struct native_surface base; + + struct android_display *adpy; + android_native_window_t *win; + + /* staging color buffer for when buffer preserving is enabled */ + struct pipe_resource *color_res; + + uint stamp; + android_native_buffer_t *buf; + struct pipe_resource *buf_res; + + /* cache the current back buffers */ + struct { + int width; + int height; + int format; + } cache_key; + void *cache_handles[2]; + struct pipe_resource *cache_resources[2]; +}; + +struct android_config { + struct native_config base; +}; + +static INLINE struct android_display * +android_display(const struct native_display *ndpy) +{ + return (struct android_display *) ndpy; +} + +static INLINE struct android_surface * +android_surface(const struct native_surface *nsurf) +{ + return (struct android_surface *) nsurf; +} + +static INLINE struct android_config * +android_config(const struct native_config *nconf) +{ + return (struct android_config *) nconf; +} + +namespace android { + +static enum pipe_format +get_pipe_format(int native) +{ + enum pipe_format fmt; + + switch (native) { + case HAL_PIXEL_FORMAT_RGBA_8888: + fmt = PIPE_FORMAT_R8G8B8A8_UNORM; + break; + case HAL_PIXEL_FORMAT_RGBX_8888: + fmt = PIPE_FORMAT_R8G8B8X8_UNORM; + break; + case HAL_PIXEL_FORMAT_RGB_888: + fmt = PIPE_FORMAT_R8G8B8_UNORM; + break; + case HAL_PIXEL_FORMAT_RGB_565: + fmt = PIPE_FORMAT_B5G6R5_UNORM; + break; + case HAL_PIXEL_FORMAT_BGRA_8888: + fmt = PIPE_FORMAT_B8G8R8A8_UNORM; + break; + case HAL_PIXEL_FORMAT_RGBA_5551: + /* fmt = PIPE_FORMAT_A1B5G5R5_UNORM; */ + case HAL_PIXEL_FORMAT_RGBA_4444: + /* fmt = PIPE_FORMAT_A4B4G4R4_UNORM; */ + default: + LOGE("unsupported native format 0x%x", native); + fmt = PIPE_FORMAT_NONE; + break; + } + + return fmt; +} + +#include <gralloc_drm_handle.h> +static int +get_handle_name(buffer_handle_t handle) +{ + struct gralloc_drm_handle_t *dh; + + /* check that the buffer is allocated by drm_gralloc and cast */ + dh = gralloc_drm_handle(handle); + + return (dh) ? dh->name : 0; +} + +/** + * Import an android_native_buffer_t allocated by the server. + */ +static struct pipe_resource * +import_buffer(struct android_display *adpy, const struct pipe_resource *templ, + struct android_native_buffer_t *abuf) +{ + struct pipe_screen *screen = adpy->base.screen; + struct pipe_resource *res; + + if (templ->bind & PIPE_BIND_RENDER_TARGET) { + if (!screen->is_format_supported(screen, templ->format, + templ->target, 0, PIPE_BIND_RENDER_TARGET)) + LOGW("importing unsupported buffer as render target"); + } + if (templ->bind & PIPE_BIND_SAMPLER_VIEW) { + if (!screen->is_format_supported(screen, templ->format, + templ->target, 0, PIPE_BIND_SAMPLER_VIEW)) + LOGW("importing unsupported buffer as sampler view"); + } + + if (adpy->use_drm) { + struct winsys_handle handle; + + memset(&handle, 0, sizeof(handle)); + handle.type = DRM_API_HANDLE_TYPE_SHARED; + /* for DRM, we need the GEM name */ + handle.handle = get_handle_name(abuf->handle); + if (!handle.handle) { + LOGE("unable to import invalid buffer %p", abuf); + return NULL; + } + + handle.stride = + abuf->stride * util_format_get_blocksize(templ->format); + + res = screen->resource_from_handle(screen, templ, &handle); + } + else { + struct android_winsys_handle handle; + + memset(&handle, 0, sizeof(handle)); + handle.handle = abuf->handle; + handle.stride = + abuf->stride * util_format_get_blocksize(templ->format); + + res = screen->resource_from_handle(screen, + templ, (struct winsys_handle *) &handle); + } + + if (!res) + LOGE("failed to import buffer %p", abuf); + + return res; +} + +static void +android_surface_clear_cache(struct native_surface *nsurf) +{ + struct android_surface *asurf = android_surface(nsurf); + int i; + + for (i = 0; i < Elements(asurf->cache_handles); i++) { + asurf->cache_handles[i] = NULL; + pipe_resource_reference(&asurf->cache_resources[i], NULL); + } + + memset(&asurf->cache_key, 0, sizeof(asurf->cache_key)); +} + +static struct pipe_resource * +android_surface_add_cache(struct native_surface *nsurf, + struct android_native_buffer_t *abuf) +{ + struct android_surface *asurf = android_surface(nsurf); + void *handle; + int idx; + + /* how about abuf->usage? */ + if (asurf->cache_key.width != abuf->width || + asurf->cache_key.height != abuf->height || + asurf->cache_key.format != abuf->format) + android_surface_clear_cache(&asurf->base); + + if (asurf->adpy->use_drm) + handle = (void *) get_handle_name(abuf->handle); + else + handle = (void *) abuf->handle; + /* NULL is invalid */ + if (!handle) { + LOGE("invalid buffer native buffer %p", abuf); + return NULL; + } + + /* find the slot to use */ + for (idx = 0; idx < Elements(asurf->cache_handles); idx++) { + if (asurf->cache_handles[idx] == handle || !asurf->cache_handles[idx]) + break; + } + if (idx == Elements(asurf->cache_handles)) { + LOGW("cache full: buf %p, width %d, height %d, format %d, usage 0x%x", + abuf, abuf->width, abuf->height, abuf->format, abuf->usage); + android_surface_clear_cache(&asurf->base); + idx = 0; + } + + if (idx == 0) { + asurf->cache_key.width = abuf->width; + asurf->cache_key.height = abuf->height; + asurf->cache_key.format = abuf->format; + } + + if (!asurf->cache_handles[idx]) { + struct pipe_resource templ; + + assert(!asurf->cache_resources[idx]); + + memset(&templ, 0, sizeof(templ)); + templ.target = PIPE_TEXTURE_2D; + templ.format = get_pipe_format(asurf->buf->format); + templ.bind = PIPE_BIND_RENDER_TARGET; + if (!asurf->adpy->use_drm) { + templ.bind |= PIPE_BIND_TRANSFER_WRITE | + PIPE_BIND_TRANSFER_READ; + } + + templ.width0 = asurf->buf->width; + templ.height0 = asurf->buf->height; + templ.depth0 = 1; + templ.array_size = 1; + + if (templ.format != PIPE_FORMAT_NONE) { + asurf->cache_resources[idx] = + import_buffer(asurf->adpy, &templ, asurf->buf); + } + else { + asurf->cache_resources[idx] = NULL; + } + + asurf->cache_handles[idx] = handle; + } + + return asurf->cache_resources[idx]; +} + +/** + * Dequeue the next back buffer for rendering. + */ +static boolean +android_surface_dequeue_buffer(struct native_surface *nsurf) +{ + struct android_surface *asurf = android_surface(nsurf); + struct pipe_resource *res; + + if (asurf->win->dequeueBuffer(asurf->win, &asurf->buf) != NO_ERROR) { + LOGE("failed to dequeue window %p", asurf->win); + return FALSE; + } + + asurf->buf->common.incRef(&asurf->buf->common); + asurf->win->lockBuffer(asurf->win, asurf->buf); + + res = android_surface_add_cache(&asurf->base, asurf->buf); + if (!res) + return FALSE; + + pipe_resource_reference(&asurf->buf_res, res); + + return TRUE; +} + +/** + * Enqueue the back buffer. This will make it the next front buffer. + */ +static boolean +android_surface_enqueue_buffer(struct native_surface *nsurf) +{ + struct android_surface *asurf = android_surface(nsurf); + + pipe_resource_reference(&asurf->buf_res, NULL); + + asurf->win->queueBuffer(asurf->win, asurf->buf); + + asurf->buf->common.decRef(&asurf->buf->common); + asurf->buf = NULL; + + return TRUE; +} + +static boolean +android_surface_swap_buffers(struct native_surface *nsurf) +{ + struct android_surface *asurf = android_surface(nsurf); + struct android_display *adpy = asurf->adpy; + + if (!asurf->buf) + return TRUE; + + android_surface_enqueue_buffer(&asurf->base); + + asurf->stamp++; + adpy->event_handler->invalid_surface(&adpy->base, + &asurf->base, asurf->stamp); + + return TRUE; +} + +static void +copy_resources(struct native_display *ndpy, + struct pipe_resource *src, + struct pipe_resource *dst) +{ + struct pipe_context *pipe; + struct pipe_box box; + + pipe = ndpy_get_copy_context(ndpy); + if (!pipe) + return; + + u_box_origin_2d(src->width0, src->height0, &box); + pipe->resource_copy_region(pipe, dst, 0, 0, 0, 0, src, 0, &box); + pipe->flush(pipe, NULL); +} + +static boolean +android_surface_present(struct native_surface *nsurf, + enum native_attachment natt, + boolean preserve, + uint swap_interval) +{ + struct android_surface *asurf = android_surface(nsurf); + struct android_display *adpy = asurf->adpy; + boolean ret; + + if (swap_interval || natt != NATIVE_ATTACHMENT_BACK_LEFT) + return FALSE; + + /* we always render to color_res first when it exists */ + if (asurf->color_res) { + copy_resources(&adpy->base, asurf->color_res, asurf->buf_res); + if (!preserve) + pipe_resource_reference(&asurf->color_res, NULL); + } + else if (preserve) { + struct pipe_resource templ; + + memset(&templ, 0, sizeof(templ)); + templ.target = asurf->buf_res->target; + templ.format = asurf->buf_res->format; + templ.bind = PIPE_BIND_RENDER_TARGET; + templ.width0 = asurf->buf_res->width0; + templ.height0 = asurf->buf_res->height0; + templ.depth0 = asurf->buf_res->depth0; + templ.array_size = asurf->buf_res->array_size; + + asurf->color_res = + adpy->base.screen->resource_create(adpy->base.screen, &templ); + if (!asurf->color_res) + return FALSE; + + /* preserve the contents */ + copy_resources(&adpy->base, asurf->buf_res, asurf->color_res); + } + + return android_surface_swap_buffers(nsurf); +} + +static boolean +android_surface_validate(struct native_surface *nsurf, uint attachment_mask, + unsigned int *seq_num, struct pipe_resource **textures, + int *width, int *height) +{ + struct android_surface *asurf = android_surface(nsurf); + struct winsys_handle handle; + + if (!asurf->buf) { + if (!android_surface_dequeue_buffer(&asurf->base)) + return FALSE; + + /* color_res must be compatible with buf_res */ + if (asurf->color_res && + (asurf->color_res->format != asurf->buf_res->format || + asurf->color_res->width0 != asurf->buf_res->width0 || + asurf->color_res->height0 != asurf->buf_res->height0)) + pipe_resource_reference(&asurf->color_res, NULL); + } + + if (textures) { + /* we have access to only the back buffer */ + const enum native_attachment att = NATIVE_ATTACHMENT_BACK_LEFT; + + if (native_attachment_mask_test(attachment_mask, att)) { + textures[att] = NULL; + pipe_resource_reference(&textures[att], + (asurf->color_res) ? asurf->color_res : asurf->buf_res); + } + } + + if (seq_num) + *seq_num = asurf->stamp; + if (width) + *width = asurf->buf->width; + if (height) + *height = asurf->buf->height; + + return TRUE; +} + +static void +android_surface_wait(struct native_surface *nsurf) +{ +} + +static void +android_surface_destroy(struct native_surface *nsurf) +{ + struct android_surface *asurf = android_surface(nsurf); + int i; + + pipe_resource_reference(&asurf->color_res, NULL); + + if (asurf->buf) + android_surface_enqueue_buffer(&asurf->base); + + android_surface_clear_cache(&asurf->base); + + asurf->win->common.decRef(&asurf->win->common); + + FREE(asurf); +} + +static struct native_surface * +android_display_create_window_surface(struct native_display *ndpy, + EGLNativeWindowType win, + const struct native_config *nconf) +{ + struct android_display *adpy = android_display(ndpy); + struct android_config *aconf = android_config(nconf); + struct android_surface *asurf; + enum pipe_format format; + int val; + + if (win->common.magic != ANDROID_NATIVE_WINDOW_MAGIC) { + LOGE("invalid native window with magic 0x%x", win->common.magic); + return NULL; + } + if (win->query(win, NATIVE_WINDOW_FORMAT, &val)) { + LOGE("failed to query native window format"); + return NULL; + } + format = get_pipe_format(val); + if (format != nconf->color_format) { + LOGW("native window format 0x%x != config format 0x%x", + format, nconf->color_format); + if (!adpy->base.screen->is_format_supported(adpy->base.screen, + format, PIPE_TEXTURE_2D, 0, PIPE_BIND_RENDER_TARGET)) { + LOGE("and the native window cannot be used as a render target"); + return NULL; + } + } + + asurf = CALLOC_STRUCT(android_surface); + if (!asurf) + return NULL; + + asurf->adpy = adpy; + asurf->win = win; + asurf->win->common.incRef(&asurf->win->common); + + /* request buffers that are for CPU access */ + if (!adpy->use_drm) { + native_window_set_usage(asurf->win, + GRALLOC_USAGE_SW_READ_OFTEN | GRALLOC_USAGE_SW_WRITE_OFTEN); + } + + asurf->base.destroy = android_surface_destroy; + asurf->base.present = android_surface_present; + asurf->base.validate = android_surface_validate; + asurf->base.wait = android_surface_wait; + + return &asurf->base; +} + +static boolean +android_display_init_configs(struct native_display *ndpy) +{ + struct android_display *adpy = android_display(ndpy); + const int native_formats[] = { + HAL_PIXEL_FORMAT_RGBA_8888, + HAL_PIXEL_FORMAT_RGBX_8888, + HAL_PIXEL_FORMAT_RGB_888, + HAL_PIXEL_FORMAT_RGB_565, + HAL_PIXEL_FORMAT_BGRA_8888, + }; + int i; + + adpy->configs = (struct android_config *) + CALLOC(Elements(native_formats), sizeof(*adpy->configs)); + if (!adpy->configs) + return FALSE; + + for (i = 0; i < Elements(native_formats); i++) { + enum pipe_format color_format; + struct android_config *aconf; + + color_format = get_pipe_format(native_formats[i]); + if (color_format == PIPE_FORMAT_NONE || + !adpy->base.screen->is_format_supported(adpy->base.screen, + color_format, PIPE_TEXTURE_2D, 0, PIPE_BIND_RENDER_TARGET)) { + LOGI("skip unsupported native format 0x%x", native_formats[i]); + continue; + } + + aconf = &adpy->configs[adpy->num_configs++]; + /* only the back buffer */ + aconf->base.buffer_mask = 1 << NATIVE_ATTACHMENT_BACK_LEFT; + aconf->base.color_format = color_format; + aconf->base.window_bit = TRUE; + + aconf->base.native_visual_id = native_formats[i]; + aconf->base.native_visual_type = native_formats[i]; + } + + return TRUE; +} + +static boolean +android_display_init_drm(struct native_display *ndpy) +{ + struct android_display *adpy = android_display(ndpy); + const hw_module_t *mod; + int fd, err; + + /* get the authorized fd from gralloc */ + err = hw_get_module(GRALLOC_HARDWARE_MODULE_ID, &mod); + if (!err) { + const gralloc_module_t *gr = (gralloc_module_t *) mod; + + err = -EINVAL; + if (gr->perform) + err = gr->perform(gr, GRALLOC_MODULE_PERFORM_GET_DRM_FD, &fd); + } + if (!err && fd >= 0) { + adpy->base.screen = + adpy->event_handler->new_drm_screen(&adpy->base, NULL, fd); + } + + if (adpy->base.screen) { + LOGI("using DRM screen"); + return TRUE; + } + else { + LOGW("failed to create DRM screen"); + LOGW("will fall back to other EGL drivers if any"); + return FALSE; + } +} + +static boolean +android_display_init_sw(struct native_display *ndpy) +{ + struct android_display *adpy = android_display(ndpy); + struct sw_winsys *ws; + + ws = android_create_sw_winsys(); + if (ws) { + adpy->base.screen = + adpy->event_handler->new_sw_screen(&adpy->base, ws); + } + + if (adpy->base.screen) { + LOGI("using SW screen"); + return TRUE; + } + else { + LOGE("failed to create SW screen"); + return FALSE; + } +} + +static boolean +android_display_init_screen(struct native_display *ndpy) +{ + struct android_display *adpy = android_display(ndpy); + + if (adpy->use_drm) + android_display_init_drm(&adpy->base); + else + android_display_init_sw(&adpy->base); + + if (!adpy->base.screen) + return FALSE; + + if (!android_display_init_configs(&adpy->base)) { + adpy->base.screen->destroy(adpy->base.screen); + adpy->base.screen = NULL; + return FALSE; + } + + return TRUE; +} + +static void +android_display_destroy(struct native_display *ndpy) +{ + struct android_display *adpy = android_display(ndpy); + + FREE(adpy->configs); + if (adpy->base.screen) + adpy->base.screen->destroy(adpy->base.screen); + FREE(adpy); +} + +static const struct native_config ** +android_display_get_configs(struct native_display *ndpy, int *num_configs) +{ + struct android_display *adpy = android_display(ndpy); + const struct native_config **configs; + int i; + + configs = (const struct native_config **) + MALLOC(adpy->num_configs * sizeof(*configs)); + if (configs) { + for (i = 0; i < adpy->num_configs; i++) + configs[i] = (const struct native_config *) &adpy->configs[i]; + if (num_configs) + *num_configs = adpy->num_configs; + } + + return configs; +} + +static int +android_display_get_param(struct native_display *ndpy, + enum native_param_type param) +{ + int val; + + switch (param) { + case NATIVE_PARAM_PRESERVE_BUFFER: + val = 1; + break; + default: + val = 0; + break; + } + + return val; +} + +static struct pipe_resource * +android_display_import_buffer(struct native_display *ndpy, + struct native_buffer *nbuf) +{ + struct android_display *adpy = android_display(ndpy); + struct android_native_buffer_t *abuf; + enum pipe_format format; + struct pipe_resource templ; + + if (nbuf->type != NATIVE_BUFFER_ANDROID) + return NULL; + + abuf = nbuf->u.android; + + if (!abuf || abuf->common.magic != ANDROID_NATIVE_BUFFER_MAGIC || + abuf->common.version != sizeof(*abuf)) { + LOGE("invalid android native buffer"); + return NULL; + } + + format = get_pipe_format(abuf->format); + if (format == PIPE_FORMAT_NONE) + return NULL; + + memset(&templ, 0, sizeof(templ)); + templ.target = PIPE_TEXTURE_2D; + templ.format = format; + /* assume for texturing only */ + templ.bind = PIPE_BIND_SAMPLER_VIEW; + templ.width0 = abuf->width; + templ.height0 = abuf->height; + templ.depth0 = 1; + templ.array_size = 1; + + return import_buffer(adpy, &templ, abuf); +} + +static boolean +android_display_export_buffer(struct native_display *ndpy, + struct pipe_resource *res, + struct native_buffer *nbuf) +{ + return FALSE; +} + +static struct native_display_buffer android_display_buffer = { + android_display_import_buffer, + android_display_export_buffer +}; + +static struct android_display * +android_display_create(const struct native_event_handler *event_handler, + boolean use_sw) +{ + struct android_display *adpy; + char value[PROPERTY_VALUE_MAX]; + boolean force_sw; + + /* check if SW renderer is forced */ + if (property_get("debug.mesa.software", value, NULL)) + force_sw = (atoi(value) != 0); + else + force_sw = debug_get_bool_option("EGL_SOFTWARE", FALSE); + if (force_sw) + use_sw = TRUE; + + adpy = CALLOC_STRUCT(android_display); + if (!adpy) + return NULL; + + adpy->event_handler = event_handler; + adpy->use_drm = !use_sw; + + adpy->base.init_screen = android_display_init_screen; + adpy->base.destroy = android_display_destroy; + adpy->base.get_param = android_display_get_param; + adpy->base.get_configs = android_display_get_configs; + adpy->base.create_window_surface = android_display_create_window_surface; + + adpy->base.buffer = &android_display_buffer; + + return adpy; +} + +static const struct native_event_handler *android_event_handler; + +static struct native_display * +native_create_display(void *dpy, boolean use_sw) +{ + struct android_display *adpy; + + adpy = android_display_create(android_event_handler, use_sw); + + return (adpy) ? &adpy->base : NULL; +} + +static const struct native_platform android_platform = { + "Android", /* name */ + native_create_display +}; + +}; /* namespace android */ + +using namespace android; + +static void +android_log(EGLint level, const char *msg) +{ + switch (level) { + case _EGL_DEBUG: + LOGD("%s", msg); + break; + case _EGL_INFO: + LOGI("%s", msg); + break; + case _EGL_WARNING: + LOGW("%s", msg); + break; + case _EGL_FATAL: + LOG_FATAL("%s", msg); + break; + default: + break; + } +} + +const struct native_platform * +native_get_android_platform(const struct native_event_handler *event_handler) +{ + android_event_handler = event_handler; + /* use Android logger */ + _eglSetLogProc(android_log); + + return &android_platform; +} diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c b/src/gallium/state_trackers/egl/common/egl_g3d.c index 6649f02b244..b5e3d99b811 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d.c @@ -132,6 +132,12 @@ egl_g3d_get_platform(_EGLDriver *drv, _EGLPlatformType plat) nplat = native_get_fbdev_platform(&egl_g3d_native_event_handler); #endif break; + case _EGL_PLATFORM_ANDROID: + plat_name = "Android"; +#ifdef HAVE_ANDROID_BACKEND + nplat = native_get_android_platform(&egl_g3d_native_event_handler); +#endif + break; default: break; } @@ -572,6 +578,11 @@ egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy) if (dpy->Platform == _EGL_PLATFORM_WAYLAND && gdpy->native->buffer) dpy->Extensions.MESA_drm_image = EGL_TRUE; +#ifdef EGL_ANDROID_image_native_buffer + if (dpy->Platform == _EGL_PLATFORM_ANDROID && gdpy->native->buffer) + dpy->Extensions.ANDROID_image_native_buffer = EGL_TRUE; +#endif + #ifdef EGL_WL_bind_wayland_display if (gdpy->native->wayland_bufmgr) dpy->Extensions.WL_bind_wayland_display = EGL_TRUE; diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_image.c b/src/gallium/state_trackers/egl/common/egl_g3d_image.c index 7e9a29b0284..4d90c400319 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_image.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d_image.c @@ -202,6 +202,24 @@ egl_g3d_reference_wl_buffer(_EGLDisplay *dpy, struct wl_buffer *buffer, #endif /* EGL_WL_bind_wayland_display */ +#ifdef EGL_ANDROID_image_native_buffer + +static struct pipe_resource * +egl_g3d_reference_android_native_buffer(_EGLDisplay *dpy, + struct android_native_buffer_t *buf) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + struct native_buffer nbuf; + + memset(&nbuf, 0, sizeof(nbuf)); + nbuf.type = NATIVE_BUFFER_ANDROID; + nbuf.u.android = buf; + + return gdpy->native->buffer->import_buffer(gdpy->native, &nbuf); +} + +#endif /* EGL_ANDROID_image_native_buffer */ + _EGLImage * egl_g3d_create_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx, EGLenum target, EGLClientBuffer buffer, @@ -239,6 +257,12 @@ egl_g3d_create_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx, (struct wl_buffer *) buffer, &gimg->base, attribs); break; #endif +#ifdef EGL_ANDROID_image_native_buffer + case EGL_NATIVE_BUFFER_ANDROID: + ptex = egl_g3d_reference_android_native_buffer(dpy, + (struct android_native_buffer_t *) buffer); + break; +#endif default: ptex = NULL; break; diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_st.c b/src/gallium/state_trackers/egl/common/egl_g3d_st.c index 60c3e332ac9..b839f848d7b 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_st.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d_st.c @@ -126,7 +126,7 @@ pbuffer_reference_openvg_image(struct egl_g3d_surface *gsurf) } static void -pbuffer_allocate_render_texture(struct egl_g3d_surface *gsurf) +pbuffer_allocate_pbuffer_texture(struct egl_g3d_surface *gsurf) { struct egl_g3d_display *gdpy = egl_g3d_display(gsurf->base.Resource.Display); @@ -141,7 +141,8 @@ pbuffer_allocate_render_texture(struct egl_g3d_surface *gsurf) templ.depth0 = 1; templ.array_size = 1; templ.format = gsurf->stvis.color_format; - templ.bind = PIPE_BIND_RENDER_TARGET; + /* for rendering and binding to texture */ + templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; ptex = screen->resource_create(screen, &templ); gsurf->render_texture = ptex; @@ -166,7 +167,7 @@ egl_g3d_st_framebuffer_validate_pbuffer(struct st_framebuffer_iface *stfbi, if (!gsurf->render_texture) { switch (gsurf->client_buffer_type) { case EGL_NONE: - pbuffer_allocate_render_texture(gsurf); + pbuffer_allocate_pbuffer_texture(gsurf); break; case EGL_OPENVG_IMAGE: pbuffer_reference_openvg_image(gsurf); diff --git a/src/gallium/state_trackers/egl/common/native.h b/src/gallium/state_trackers/egl/common/native.h index fc50ee485fe..58593a489cd 100644 --- a/src/gallium/state_trackers/egl/common/native.h +++ b/src/gallium/state_trackers/egl/common/native.h @@ -293,6 +293,9 @@ native_get_drm_platform(const struct native_event_handler *event_handler); const struct native_platform * native_get_fbdev_platform(const struct native_event_handler *event_handler); +const struct native_platform * +native_get_android_platform(const struct native_event_handler *event_handler); + #ifdef __cplusplus } #endif diff --git a/src/gallium/state_trackers/egl/common/native_buffer.h b/src/gallium/state_trackers/egl/common/native_buffer.h index b8a66d17e12..503ed580b05 100644 --- a/src/gallium/state_trackers/egl/common/native_buffer.h +++ b/src/gallium/state_trackers/egl/common/native_buffer.h @@ -33,9 +33,11 @@ #include "pipe/p_state.h" struct native_display; +struct android_native_buffer_t; enum native_buffer_type { NATIVE_BUFFER_DRM, + NATIVE_BUFFER_ANDROID, NUM_NATIVE_BUFFERS }; @@ -50,6 +52,8 @@ struct native_buffer { unsigned handle; /**< the handle of the GEM object */ unsigned stride; } drm; + + struct android_native_buffer_t *android; /**< opaque native buffer */ } u; }; diff --git a/src/gallium/state_trackers/egl/drm/native_drm.c b/src/gallium/state_trackers/egl/drm/native_drm.c index 47910de8d3c..c013769e57d 100644 --- a/src/gallium/state_trackers/egl/drm/native_drm.c +++ b/src/gallium/state_trackers/egl/drm/native_drm.c @@ -134,8 +134,11 @@ drm_display_destroy(struct native_display *ndpy) if (drmdpy->device_name) FREE(drmdpy->device_name); - if (drmdpy->fd >= 0) - close(drmdpy->fd); + if (drmdpy->own_gbm) { + gbm_device_destroy(&drmdpy->gbmdrm->base.base); + if (drmdpy->fd >= 0) + close(drmdpy->fd); + } FREE(drmdpy); } @@ -258,7 +261,7 @@ drm_display_init_screen(struct native_display *ndpy) } static struct native_display * -drm_create_display(struct gbm_gallium_drm_device *gbmdrm, +drm_create_display(struct gbm_gallium_drm_device *gbmdrm, int own_gbm, const struct native_event_handler *event_handler) { struct drm_display *drmdpy; @@ -267,6 +270,8 @@ drm_create_display(struct gbm_gallium_drm_device *gbmdrm, if (!drmdpy) return NULL; + drmdpy->gbmdrm = gbmdrm; + drmdpy->own_gbm = own_gbm; drmdpy->fd = gbmdrm->base.base.fd; drmdpy->device_name = drm_get_device_name(drmdpy->fd); @@ -302,22 +307,30 @@ native_create_display(void *dpy, boolean use_sw) { struct gbm_gallium_drm_device *gbm; int fd; + int own_gbm = 0; gbm = dpy; if (gbm == NULL) { fd = open("/dev/dri/card0", O_RDWR); + /* FIXME: Use an internal constructor to create a gbm + * device with gallium backend directly, without setenv */ + setenv("GBM_BACKEND", "gbm_gallium_drm.so", 1); gbm = gbm_gallium_drm_device(gbm_create_device(fd)); + own_gbm = 1; } if (gbm == NULL) return NULL; if (strcmp(gbm_device_get_backend_name(&gbm->base.base), "drm") != 0 || - gbm->base.type != GBM_DRM_DRIVER_TYPE_GALLIUM) + gbm->base.type != GBM_DRM_DRIVER_TYPE_GALLIUM) { + if (own_gbm) + gbm_device_destroy(&gbm->base.base); return NULL; + } - return drm_create_display(gbm, drm_event_handler); + return drm_create_display(gbm, own_gbm, drm_event_handler); } static const struct native_platform drm_platform = { diff --git a/src/gallium/state_trackers/egl/drm/native_drm.h b/src/gallium/state_trackers/egl/drm/native_drm.h index 675a58a1922..18cebf4e276 100644 --- a/src/gallium/state_trackers/egl/drm/native_drm.h +++ b/src/gallium/state_trackers/egl/drm/native_drm.h @@ -41,6 +41,8 @@ #include "common/native_wayland_drm_bufmgr_helper.h" #endif +#include "gbm_gallium_drmint.h" + struct drm_config; struct drm_crtc; struct drm_connector; @@ -52,6 +54,8 @@ struct drm_display { const struct native_event_handler *event_handler; + struct gbm_gallium_drm_device *gbmdrm; + int own_gbm; int fd; char *device_name; struct drm_config *config; diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c index 269c7a4baf8..50d63ea3f73 100644 --- a/src/gallium/state_trackers/vdpau/decode.c +++ b/src/gallium/state_trackers/vdpau/decode.c @@ -82,13 +82,22 @@ vlVdpDecoderCreate(VdpDevice device, goto error_decoder; } + vldecoder->num_buffers = pipe->screen->get_video_param + ( + pipe->screen, p_profile, + PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED + ); vldecoder->cur_buffer = 0; - for (i = 0; i < VL_NUM_DECODE_BUFFERS; ++i) { - vldecoder->buffer[i] = vldecoder->decoder->create_buffer(vldecoder->decoder); - if (!vldecoder->buffer[i]) { + vldecoder->buffers = CALLOC(vldecoder->num_buffers, sizeof(void*)); + if (!vldecoder->buffers) + goto error_alloc_buffers; + + for (i = 0; i < vldecoder->num_buffers; ++i) { + vldecoder->buffers[i] = vldecoder->decoder->create_buffer(vldecoder->decoder); + if (!vldecoder->buffers[i]) { ret = VDP_STATUS_ERROR; - goto error_buffer; + goto error_create_buffers; } } @@ -103,11 +112,15 @@ vlVdpDecoderCreate(VdpDevice device, return VDP_STATUS_OK; error_handle: -error_buffer: +error_create_buffers: + + for (i = 0; i < vldecoder->num_buffers; ++i) + if (vldecoder->buffers[i]) + vldecoder->decoder->destroy_buffer(vldecoder->decoder, vldecoder->buffers[i]); - for (i = 0; i < VL_NUM_DECODE_BUFFERS; ++i) - if (vldecoder->buffer[i]) - vldecoder->buffer[i]->destroy(vldecoder->buffer[i]); + FREE(vldecoder->buffers); + +error_alloc_buffers: vldecoder->decoder->destroy(vldecoder->decoder); @@ -128,9 +141,11 @@ vlVdpDecoderDestroy(VdpDecoder decoder) if (!vldecoder) return VDP_STATUS_INVALID_HANDLE; - for (i = 0; i < VL_NUM_DECODE_BUFFERS; ++i) - if (vldecoder->buffer[i]) - vldecoder->buffer[i]->destroy(vldecoder->buffer[i]); + for (i = 0; i < vldecoder->num_buffers; ++i) + if (vldecoder->buffers[i]) + vldecoder->decoder->destroy_buffer(vldecoder->decoder, vldecoder->buffers[i]); + + FREE(vldecoder->buffers); vldecoder->decoder->destroy(vldecoder->decoder); @@ -161,38 +176,37 @@ vlVdpDecoderGetParameters(VdpDecoder decoder, } static VdpStatus -vlVdpDecoderRenderMpeg2(struct pipe_video_decoder *decoder, - struct pipe_video_decode_buffer *buffer, - struct pipe_video_buffer *target, - VdpPictureInfoMPEG1Or2 *picture_info, - uint32_t bitstream_buffer_count, - VdpBitstreamBuffer const *bitstream_buffers) +vlVdpDecoderRenderMpeg12(struct pipe_video_decoder *decoder, + VdpPictureInfoMPEG1Or2 *picture_info, + uint32_t bitstream_buffer_count, + VdpBitstreamBuffer const *bitstream_buffers) { struct pipe_mpeg12_picture_desc picture; + struct pipe_mpeg12_quant_matrix quant; struct pipe_video_buffer *ref_frames[2]; - uint8_t intra_quantizer_matrix[64]; - unsigned num_ycbcr_blocks[3] = { 0, 0, 0 }; unsigned i; VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Decoding MPEG2\n"); + i = 0; + /* if surfaces equals VDP_STATUS_INVALID_HANDLE, they are not used */ - if (picture_info->forward_reference == VDP_INVALID_HANDLE) - ref_frames[0] = NULL; - else { - ref_frames[0] = ((vlVdpSurface *)vlGetDataHTAB(picture_info->forward_reference))->video_buffer; - if (!ref_frames[0]) + if (picture_info->forward_reference != VDP_INVALID_HANDLE) { + ref_frames[i] = ((vlVdpSurface *)vlGetDataHTAB(picture_info->forward_reference))->video_buffer; + if (!ref_frames[i]) return VDP_STATUS_INVALID_HANDLE; + ++i; } - if (picture_info->backward_reference == VDP_INVALID_HANDLE) - ref_frames[1] = NULL; - else { - ref_frames[1] = ((vlVdpSurface *)vlGetDataHTAB(picture_info->backward_reference))->video_buffer; - if (!ref_frames[1]) + if (picture_info->backward_reference != VDP_INVALID_HANDLE) { + ref_frames[i] = ((vlVdpSurface *)vlGetDataHTAB(picture_info->backward_reference))->video_buffer; + if (!ref_frames[i]) return VDP_STATUS_INVALID_HANDLE; + ++i; } + decoder->set_reference_frames(decoder, ref_frames, i); + memset(&picture, 0, sizeof(picture)); picture.base.profile = decoder->profile; picture.picture_coding_type = picture_info->picture_coding_type; @@ -202,24 +216,28 @@ vlVdpDecoderRenderMpeg2(struct pipe_video_decoder *decoder, picture.alternate_scan = picture_info->alternate_scan; picture.intra_vlc_format = picture_info->intra_vlc_format; picture.concealment_motion_vectors = picture_info->concealment_motion_vectors; + picture.intra_dc_precision = picture_info->intra_dc_precision; picture.f_code[0][0] = picture_info->f_code[0][0] - 1; picture.f_code[0][1] = picture_info->f_code[0][1] - 1; picture.f_code[1][0] = picture_info->f_code[1][0] - 1; picture.f_code[1][1] = picture_info->f_code[1][1] - 1; - buffer->begin_frame(buffer); + decoder->set_picture_parameters(decoder, &picture.base); - memcpy(intra_quantizer_matrix, picture_info->intra_quantizer_matrix, sizeof(intra_quantizer_matrix)); - intra_quantizer_matrix[0] = 1 << (7 - picture_info->intra_dc_precision); - buffer->set_quant_matrix(buffer, intra_quantizer_matrix, picture_info->non_intra_quantizer_matrix); + memset(&quant, 0, sizeof(quant)); + quant.base.codec = PIPE_VIDEO_CODEC_MPEG12; + quant.intra_matrix = picture_info->intra_quantizer_matrix; + quant.non_intra_matrix = picture_info->non_intra_quantizer_matrix; - for (i = 0; i < bitstream_buffer_count; ++i) - buffer->decode_bitstream(buffer, bitstream_buffers[i].bitstream_bytes, - bitstream_buffers[i].bitstream, &picture.base, num_ycbcr_blocks); + decoder->set_quant_matrix(decoder, &quant.base); + + decoder->begin_frame(decoder); - buffer->end_frame(buffer); + for (i = 0; i < bitstream_buffer_count; ++i) + decoder->decode_bitstream(decoder, bitstream_buffers[i].bitstream_bytes, + bitstream_buffers[i].bitstream); - decoder->flush_buffer(buffer, num_ycbcr_blocks, ref_frames, target); + decoder->end_frame(decoder); return VDP_STATUS_OK; } @@ -254,17 +272,19 @@ vlVdpDecoderRender(VdpDecoder decoder, // TODO: Recreate decoder with correct chroma return VDP_STATUS_INVALID_CHROMA_TYPE; - // TODO: Right now only mpeg2 is supported. + // TODO: Right now only mpeg 1 & 2 is supported. switch (vldecoder->decoder->profile) { + case PIPE_VIDEO_PROFILE_MPEG1: case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE: case PIPE_VIDEO_PROFILE_MPEG2_MAIN: ++vldecoder->cur_buffer; - vldecoder->cur_buffer %= VL_NUM_DECODE_BUFFERS; - return vlVdpDecoderRenderMpeg2(vldecoder->decoder, - vldecoder->buffer[vldecoder->cur_buffer], - vlsurf->video_buffer, - (VdpPictureInfoMPEG1Or2 *)picture_info, - bitstream_buffer_count,bitstream_buffers); + vldecoder->cur_buffer %= vldecoder->num_buffers; + + vldecoder->decoder->set_decode_buffer(vldecoder->decoder, vldecoder->buffers[vldecoder->cur_buffer]); + vldecoder->decoder->set_decode_target(vldecoder->decoder, vlsurf->video_buffer); + + return vlVdpDecoderRenderMpeg12(vldecoder->decoder, (VdpPictureInfoMPEG1Or2 *)picture_info, + bitstream_buffer_count, bitstream_buffers); break; default: diff --git a/src/gallium/state_trackers/vdpau/mixer.c b/src/gallium/state_trackers/vdpau/mixer.c index d5187006bfc..fbd24a29414 100644 --- a/src/gallium/state_trackers/vdpau/mixer.c +++ b/src/gallium/state_trackers/vdpau/mixer.c @@ -157,8 +157,7 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer, vl_compositor_clear_layers(&vmixer->compositor); vl_compositor_set_buffer_layer(&vmixer->compositor, 0, surf->video_buffer, NULL, NULL); - vl_compositor_render(&vmixer->compositor, PIPE_MPEG12_PICTURE_TYPE_FRAME, - dst->surface, NULL, NULL); + vl_compositor_render(&vmixer->compositor, dst->surface, NULL, NULL); return VDP_STATUS_OK; } diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c index 1176c7a30b7..7e324db5589 100644 --- a/src/gallium/state_trackers/vdpau/presentation.c +++ b/src/gallium/state_trackers/vdpau/presentation.c @@ -169,8 +169,7 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue, vl_compositor_clear_layers(&pq->compositor); vl_compositor_set_rgba_layer(&pq->compositor, 0, surf->sampler_view, NULL, NULL); - vl_compositor_render(&pq->compositor, PIPE_MPEG12_PICTURE_TYPE_FRAME, - drawable_surface, NULL, NULL); + vl_compositor_render(&pq->compositor, drawable_surface, NULL, NULL); pq->device->context->pipe->screen->flush_frontbuffer ( diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h index e5d945629fb..5482eff0630 100644 --- a/src/gallium/state_trackers/vdpau/vdpau_private.h +++ b/src/gallium/state_trackers/vdpau/vdpau_private.h @@ -46,7 +46,6 @@ #define TOSTRING(x) QUOTEME(x) #define INFORMATION_STRING TOSTRING(INFORMATION) #define VL_HANDLES -#define VL_NUM_DECODE_BUFFERS 4 static inline enum pipe_video_chroma_format ChromaToPipe(VdpChromaType vdpau_type) @@ -256,7 +255,8 @@ typedef struct { vlVdpDevice *device; struct pipe_video_decoder *decoder; - struct pipe_video_decode_buffer *buffer[VL_NUM_DECODE_BUFFERS]; + unsigned num_buffers; + void **buffers; unsigned cur_buffer; } vlVdpDecoder; diff --git a/src/gallium/state_trackers/xorg/SConscript b/src/gallium/state_trackers/xorg/SConscript index 4ea4ec4ee8b..1768f701e48 100644 --- a/src/gallium/state_trackers/xorg/SConscript +++ b/src/gallium/state_trackers/xorg/SConscript @@ -32,6 +32,7 @@ sources = [ 'xorg_output.c', 'xorg_renderer.c', 'xorg_xv.c', + 'xorg_xvmc.c', ] st_xorg = env.ConvenienceLibrary( diff --git a/src/gallium/state_trackers/xorg/xorg_composite.c b/src/gallium/state_trackers/xorg/xorg_composite.c index f696b72e1e3..61ba6bdddf7 100644 --- a/src/gallium/state_trackers/xorg/xorg_composite.c +++ b/src/gallium/state_trackers/xorg/xorg_composite.c @@ -4,6 +4,7 @@ #include "xorg_exa_tgsi.h" #include "cso_cache/cso_context.h" +#include "util/u_format.h" #include "util/u_sampler.h" @@ -52,18 +53,17 @@ static const struct xorg_composite_blend xorg_blends[] = { static INLINE void -pixel_to_float4(Pixel pixel, float *color) +pixel_to_float4(Pixel pixel, float *color, enum pipe_format format) { - CARD32 r, g, b, a; - - a = (pixel >> 24) & 0xff; - r = (pixel >> 16) & 0xff; - g = (pixel >> 8) & 0xff; - b = (pixel >> 0) & 0xff; - color[0] = ((float)r) / 255.; - color[1] = ((float)g) / 255.; - color[2] = ((float)b) / 255.; - color[3] = ((float)a) / 255.; + const struct util_format_description *format_desc; + uint8_t packed[4]; + + format_desc = util_format_description(format); + packed[0] = pixel; + packed[1] = pixel >> 8; + packed[2] = pixel >> 16; + packed[3] = pixel >> 24; + format_desc->unpack_rgba_float(color, 0, packed, 0, 1, 1); } static boolean @@ -311,7 +311,7 @@ bind_shaders(struct exa_context *exa, int op, vs_traits |= VS_SOLID_FILL; debug_assert(pSrcPicture->format == PICT_a8r8g8b8); pixel_to_float4(pSrcPicture->pSourcePict->solidFill.color, - exa->solid_color); + exa->solid_color, PIPE_FORMAT_B8G8R8A8_UNORM); exa->has_solid_color = TRUE; } else { debug_assert("!gradients not supported"); @@ -533,7 +533,7 @@ boolean xorg_solid_bind_state(struct exa_context *exa, unsigned vs_traits, fs_traits; struct xorg_shader shader; - pixel_to_float4(fg, exa->solid_color); + pixel_to_float4(fg, exa->solid_color, pixmap->tex->format); exa->has_solid_color = TRUE; #if 0 diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c index 6f2c52eabb6..3350ac736cf 100644 --- a/src/gallium/state_trackers/xorg/xorg_dri2.c +++ b/src/gallium/state_trackers/xorg/xorg_dri2.c @@ -372,13 +372,15 @@ dri2_copy_region(DrawablePtr pDraw, RegionPtr pRegion, save_accel = ms->exa->accel; ms->exa->accel = TRUE; - /* In case it won't be though, make sure the GPU copy contents of the - * source pixmap will be used for the software fallback - presumably the - * client modified them before calling in here. - */ - exaMoveInPixmap(src_priv->pPixmap); - DamageRegionAppend(src_draw, pRegion); - DamageRegionProcessPending(src_draw); + if (pSrcBuffer->attachment != DRI2BufferFrontLeft) { + /* In case it won't be though, make sure the GPU copy contents of the + * source pixmap will be used for the software fallback - presumably the + * client modified them before calling in here. + */ + exaMoveInPixmap(src_priv->pPixmap); + DamageRegionAppend(src_draw, pRegion); + DamageRegionProcessPending(src_draw); + } if (cust && cust->winsys_context_throttle) cust->winsys_context_throttle(cust, ms->ctx, THROTTLE_SWAP); diff --git a/src/gallium/state_trackers/xorg/xorg_driver.c b/src/gallium/state_trackers/xorg/xorg_driver.c index 063ae92f6be..0ade319cdc3 100644 --- a/src/gallium/state_trackers/xorg/xorg_driver.c +++ b/src/gallium/state_trackers/xorg/xorg_driver.c @@ -817,7 +817,7 @@ drv_screen_init(int scrnIndex, ScreenPtr pScreen, int argc, char **argv) &ms->swapThrottling) ? X_CONFIG : X_DEFAULT; - ms->dirtyThrottling = cust ? cust->dirty_throttling : TRUE; + ms->dirtyThrottling = cust ? cust->dirty_throttling : FALSE; from_dt = xf86GetOptValBool(ms->Options, OPTION_THROTTLE_DIRTY, &ms->dirtyThrottling) ? X_CONFIG : X_DEFAULT; diff --git a/src/gallium/state_trackers/xorg/xorg_tracker.h b/src/gallium/state_trackers/xorg/xorg_tracker.h index 664e8c75730..84a3a2fa4e2 100644 --- a/src/gallium/state_trackers/xorg/xorg_tracker.h +++ b/src/gallium/state_trackers/xorg/xorg_tracker.h @@ -222,4 +222,11 @@ void xorg_xv_init(ScreenPtr pScreen); +/*********************************************************************** + * xorg_xvmc.c + */ +void +xorg_xvmc_init(ScreenPtr pScreen, char *name); + + #endif /* _XORG_TRACKER_H_ */ diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c index af4992fc2ed..67fd6dfb501 100644 --- a/src/gallium/state_trackers/xorg/xorg_xv.c +++ b/src/gallium/state_trackers/xorg/xorg_xv.c @@ -750,6 +750,8 @@ xorg_xv_init(ScreenPtr pScreen) if (num_adaptors) { xf86XVScreenInit(pScreen, adaptors, num_adaptors); + if (textured_adapter) + xorg_xvmc_init(pScreen, textured_adapter->name); } else { xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "Disabling Xv because no adaptors could be initialized.\n"); diff --git a/src/gallium/state_trackers/xorg/xorg_xvmc.c b/src/gallium/state_trackers/xorg/xorg_xvmc.c new file mode 100644 index 00000000000..0f3f3f00907 --- /dev/null +++ b/src/gallium/state_trackers/xorg/xorg_xvmc.c @@ -0,0 +1,119 @@ +#include "xorg_tracker.h" + +#include <xf86.h> +#include <xf86xv.h> +#include <xf86xvmc.h> +#include <X11/extensions/Xv.h> +#include <X11/extensions/XvMC.h> +#include <fourcc.h> + +#define FOURCC_RGB 0x0000003 +#define XVIMAGE_RGB \ +{ \ + FOURCC_RGB, \ + XvRGB, \ + LSBFirst, \ + { \ + 'R', 'G', 'B', 0x00, \ + 0x00,0x00,0x00,0x10,0x80,0x00,0x00,0xAA,0x00,0x38,0x9B,0x71 \ + }, \ + 32, \ + XvPacked, \ + 1, \ + 24, 0x00FF0000, 0x0000FF00, 0x000000FF, \ + 0, 0, 0, \ + 0, 0, 0, \ + 0, 0, 0, \ + { \ + 'B','G','R','X', \ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 \ + }, \ + XvTopToBottom \ +} + +static int subpicture_index_list[] = { + FOURCC_RGB, + FOURCC_IA44, + FOURCC_AI44 +}; + +static XF86MCImageIDList subpicture_list = +{ + sizeof(subpicture_index_list)/sizeof(*subpicture_index_list), + subpicture_index_list +}; + +static XF86MCSurfaceInfoRec yv12_mpeg2_surface = +{ + FOURCC_I420, + XVMC_CHROMA_FORMAT_420, + 0, + 2048, 2048, 2048, 2048, + XVMC_IDCT | XVMC_MPEG_2, + XVMC_SUBPICTURE_INDEPENDENT_SCALING | XVMC_BACKEND_SUBPICTURE, + &subpicture_list +}; + +static const XF86MCSurfaceInfoRec uyvy_mpeg2_surface = +{ + FOURCC_UYVY, + XVMC_CHROMA_FORMAT_422, + 0, + 2048, 2048, 2048, 2048, + XVMC_IDCT | XVMC_MPEG_2, + XVMC_SUBPICTURE_INDEPENDENT_SCALING | XVMC_BACKEND_SUBPICTURE, + &subpicture_list +}; + +static XF86MCSurfaceInfoPtr surfaces[] = +{ + (XF86MCSurfaceInfoPtr)&yv12_mpeg2_surface, + (XF86MCSurfaceInfoPtr)&uyvy_mpeg2_surface +}; + +static const XF86ImageRec rgb_subpicture = XVIMAGE_RGB; +static const XF86ImageRec ia44_subpicture = XVIMAGE_IA44; +static const XF86ImageRec ai44_subpicture = XVIMAGE_AI44; + +static XF86ImagePtr subpictures[] = +{ + (XF86ImagePtr)&rgb_subpicture, + (XF86ImagePtr)&ia44_subpicture, + (XF86ImagePtr)&ai44_subpicture +}; + +static const XF86MCAdaptorRec adaptor_template = +{ + "", + sizeof(surfaces)/sizeof(*surfaces), + surfaces, + sizeof(subpictures)/sizeof(*subpictures), + subpictures, + (xf86XvMCCreateContextProcPtr)NULL, + (xf86XvMCDestroyContextProcPtr)NULL, + (xf86XvMCCreateSurfaceProcPtr)NULL, + (xf86XvMCDestroySurfaceProcPtr)NULL, + (xf86XvMCCreateSubpictureProcPtr)NULL, + (xf86XvMCDestroySubpictureProcPtr)NULL +}; + +void +xorg_xvmc_init(ScreenPtr pScreen, char *name) +{ + ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; + XF86MCAdaptorPtr adaptorXvMC = xf86XvMCCreateAdaptorRec(); + if (!adaptorXvMC) + return; + + *adaptorXvMC = adaptor_template; + adaptorXvMC->name = name; + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "[XvMC] Associated with %s.\n", name); + if (!xf86XvMCScreenInit(pScreen, 1, &adaptorXvMC)) + xf86DrvMsg(pScrn->scrnIndex, X_ERROR, + "[XvMC] Failed to initialize extension.\n"); + else + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "[XvMC] Extension initialized.\n"); + xf86XvMCDestroyAdaptorRec(adaptorXvMC); +} diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c index 0c53b730287..79bd9c618ce 100644 --- a/src/gallium/state_trackers/xorg/xvmc/surface.c +++ b/src/gallium/state_trackers/xorg/xvmc/surface.c @@ -42,266 +42,125 @@ #include "xvmc_private.h" -static const unsigned const_empty_block_mask_420[3][2][2] = { - { { 0x20, 0x10 }, { 0x08, 0x04 } }, - { { 0x02, 0x02 }, { 0x02, 0x02 } }, - { { 0x01, 0x01 }, { 0x01, 0x01 } } -}; - -static enum pipe_mpeg12_picture_type PictureToPipe(int xvmc_pic) -{ - switch (xvmc_pic) { - case XVMC_TOP_FIELD: - return PIPE_MPEG12_PICTURE_TYPE_FIELD_TOP; - case XVMC_BOTTOM_FIELD: - return PIPE_MPEG12_PICTURE_TYPE_FIELD_BOTTOM; - case XVMC_FRAME_PICTURE: - return PIPE_MPEG12_PICTURE_TYPE_FRAME; - default: - assert(0); - } - - XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized picture type 0x%08X.\n", xvmc_pic); - - return -1; -} - -static inline void -MacroBlockTypeToPipeWeights(const XvMCMacroBlock *xvmc_mb, unsigned weights[2]) -{ - assert(xvmc_mb); - - switch (xvmc_mb->macroblock_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) { - case XVMC_MB_TYPE_MOTION_FORWARD: - weights[0] = PIPE_VIDEO_MV_WEIGHT_MAX; - weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN; - break; - - case (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD): - weights[0] = PIPE_VIDEO_MV_WEIGHT_HALF; - weights[1] = PIPE_VIDEO_MV_WEIGHT_HALF; - break; - - case XVMC_MB_TYPE_MOTION_BACKWARD: - weights[0] = PIPE_VIDEO_MV_WEIGHT_MIN; - weights[1] = PIPE_VIDEO_MV_WEIGHT_MAX; - break; - - default: - /* workaround for xines xxmc video out plugin */ - if (!(xvmc_mb->macroblock_type & ~XVMC_MB_TYPE_PATTERN)) { - weights[0] = PIPE_VIDEO_MV_WEIGHT_MAX; - weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN; - } else { - weights[0] = PIPE_VIDEO_MV_WEIGHT_MIN; - weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN; - } - break; - } -} - -static inline struct pipe_motionvector -MotionVectorToPipe(const XvMCMacroBlock *xvmc_mb, unsigned vector, - unsigned field_select_mask, unsigned weight) -{ - struct pipe_motionvector mv; - - assert(xvmc_mb); - - switch (xvmc_mb->motion_type) { - case XVMC_PREDICTION_FRAME: - mv.top.x = xvmc_mb->PMV[0][vector][0]; - mv.top.y = xvmc_mb->PMV[0][vector][1]; - mv.top.field_select = PIPE_VIDEO_FRAME; - mv.top.weight = weight; - - mv.bottom.x = xvmc_mb->PMV[0][vector][0]; - mv.bottom.y = xvmc_mb->PMV[0][vector][1]; - mv.bottom.weight = weight; - mv.bottom.field_select = PIPE_VIDEO_FRAME; - break; - - case XVMC_PREDICTION_FIELD: - mv.top.x = xvmc_mb->PMV[0][vector][0]; - mv.top.y = xvmc_mb->PMV[0][vector][1]; - mv.top.field_select = (xvmc_mb->motion_vertical_field_select & field_select_mask) ? - PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD; - mv.top.weight = weight; - - mv.bottom.x = xvmc_mb->PMV[1][vector][0]; - mv.bottom.y = xvmc_mb->PMV[1][vector][1]; - mv.bottom.field_select = (xvmc_mb->motion_vertical_field_select & (field_select_mask << 2)) ? - PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD; - mv.bottom.weight = weight; - break; - - default: // TODO: Support DUALPRIME and 16x8 - break; - } - - return mv; -} - -static inline void -UploadYcbcrBlocks(XvMCSurfacePrivate *surface, +static void +MacroBlocksToPipe(XvMCContextPrivate *context, + XvMCSurfacePrivate *surface, + unsigned int xvmc_picture_structure, const XvMCMacroBlock *xvmc_mb, - const XvMCBlockArray *xvmc_blocks) + const XvMCBlockArray *xvmc_blocks, + struct pipe_mpeg12_macroblock *mb, + unsigned int num_macroblocks) { - enum pipe_mpeg12_dct_intra intra; - enum pipe_mpeg12_dct_type coding; + unsigned int i, j, k; - unsigned tb, x, y, luma_blocks; - short *blocks; - - assert(surface); assert(xvmc_mb); + assert(xvmc_blocks); + assert(num_macroblocks); - if (!xvmc_mb->coded_block_pattern) - return; - - intra = xvmc_mb->macroblock_type & XVMC_MB_TYPE_INTRA ? - PIPE_MPEG12_DCT_INTRA : PIPE_MPEG12_DCT_DELTA; - - coding = xvmc_mb->dct_type == XVMC_DCT_TYPE_FIELD ? - PIPE_MPEG12_DCT_TYPE_FIELD : PIPE_MPEG12_DCT_TYPE_FRAME; - - blocks = xvmc_blocks->blocks + xvmc_mb->index * BLOCK_SIZE_SAMPLES; - - for (y = 0, luma_blocks = 0; y < 2; ++y) { - for (x = 0; x < 2; ++x, ++tb) { - if (xvmc_mb->coded_block_pattern & const_empty_block_mask_420[0][y][x]) { + for (; num_macroblocks > 0; --num_macroblocks) { + mb->base.codec = PIPE_VIDEO_CODEC_MPEG12; + mb->x = xvmc_mb->x; + mb->y = xvmc_mb->y; + mb->macroblock_type = xvmc_mb->macroblock_type; - struct pipe_ycbcr_block *stream = surface->ycbcr[0].stream; - stream->x = xvmc_mb->x * 2 + x; - stream->y = xvmc_mb->y * 2 + y; - stream->intra = intra; - stream->coding = coding; + switch (xvmc_picture_structure) { + case XVMC_FRAME_PICTURE: + mb->macroblock_modes.bits.frame_motion_type = xvmc_mb->motion_type; + mb->macroblock_modes.bits.field_motion_type = 0; + break; - surface->ycbcr[0].num_blocks_added++; - surface->ycbcr[0].stream++; + case XVMC_TOP_FIELD: + case XVMC_BOTTOM_FIELD: + mb->macroblock_modes.bits.frame_motion_type = 0; + mb->macroblock_modes.bits.field_motion_type = xvmc_mb->motion_type; + break; - luma_blocks++; - } + default: + assert(0); } - } - - if (luma_blocks > 0) { - memcpy(surface->ycbcr[0].buffer, blocks, BLOCK_SIZE_BYTES * luma_blocks); - surface->ycbcr[0].buffer += BLOCK_SIZE_SAMPLES * luma_blocks; - blocks += BLOCK_SIZE_SAMPLES * luma_blocks; - } - - /* TODO: Implement 422, 444 */ - //assert(ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420); - for (tb = 1; tb < 3; ++tb) { - if (xvmc_mb->coded_block_pattern & const_empty_block_mask_420[tb][0][0]) { + mb->macroblock_modes.bits.dct_type = xvmc_mb->dct_type; + mb->motion_vertical_field_select = xvmc_mb->motion_vertical_field_select; - struct pipe_ycbcr_block *stream = surface->ycbcr[tb].stream; - stream->x = xvmc_mb->x; - stream->y = xvmc_mb->y; - stream->intra = intra; - stream->coding = PIPE_MPEG12_DCT_TYPE_FRAME; + for (i = 0; i < 2; ++i) + for (j = 0; j < 2; ++j) + for (k = 0; k < 2; ++k) + mb->PMV[i][j][k] = xvmc_mb->PMV[i][j][k]; - memcpy(surface->ycbcr[tb].buffer, blocks, BLOCK_SIZE_BYTES); + mb->coded_block_pattern = xvmc_mb->coded_block_pattern; + mb->blocks = xvmc_blocks->blocks + xvmc_mb->index * BLOCK_SIZE_SAMPLES; + mb->num_skipped_macroblocks = 0; - surface->ycbcr[tb].num_blocks_added++; - surface->ycbcr[tb].stream++; - surface->ycbcr[tb].buffer += BLOCK_SIZE_SAMPLES; - blocks += BLOCK_SIZE_SAMPLES; - } + ++xvmc_mb; + ++mb; } - } static void -MacroBlocksToPipe(XvMCSurfacePrivate *surface, - unsigned int xvmc_picture_structure, - const XvMCMacroBlock *xvmc_mb, - const XvMCBlockArray *xvmc_blocks, - unsigned int num_macroblocks) +SetDecoderStatus(XvMCSurfacePrivate *surface) { - unsigned int i, j; + struct pipe_video_decoder *decoder; + struct pipe_video_buffer *ref_frames[2]; - assert(xvmc_mb); - assert(xvmc_blocks); - assert(num_macroblocks); + XvMCContextPrivate *context_priv; - for (i = 0; i < num_macroblocks; ++i) { - unsigned mv_pos = xvmc_mb->x + surface->mv_stride * xvmc_mb->y; - unsigned mv_weights[2]; + unsigned i, num_refs = 0; - if (xvmc_mb->macroblock_type & (XVMC_MB_TYPE_PATTERN | XVMC_MB_TYPE_INTRA)) - UploadYcbcrBlocks(surface, xvmc_mb, xvmc_blocks); + assert(surface); - MacroBlockTypeToPipeWeights(xvmc_mb, mv_weights); + context_priv = surface->context->privData; + decoder = context_priv->decoder; - for (j = 0; j < 2; ++j) { - if (!surface->ref[j].mv) continue; + decoder->set_decode_buffer(decoder, surface->decode_buffer); + decoder->set_decode_target(decoder, surface->video_buffer); - surface->ref[j].mv[mv_pos] = MotionVectorToPipe - ( - xvmc_mb, j, - j ? XVMC_SELECT_FIRST_BACKWARD : XVMC_SELECT_FIRST_FORWARD, - mv_weights[j] - ); - } + for (i = 0; i < 2; ++i) { + if (surface->ref[i]) { + XvMCSurfacePrivate *ref = surface->ref[i]->privData; - ++xvmc_mb; + if (ref) + ref_frames[num_refs++] = ref->video_buffer; + } } + decoder->set_reference_frames(decoder, ref_frames, num_refs); } static void -unmap_and_flush_surface(XvMCSurfacePrivate *surface) +RecursiveEndFrame(XvMCSurfacePrivate *surface) { - struct pipe_video_buffer *ref_frames[2]; XvMCContextPrivate *context_priv; - unsigned i, num_ycbcr_blocks[3]; + unsigned i; assert(surface); context_priv = surface->context->privData; for ( i = 0; i < 2; ++i ) { - if (surface->ref[i].surface) { - XvMCSurfacePrivate *ref = surface->ref[i].surface->privData; + if (surface->ref[i]) { + XvMCSurface *ref = surface->ref[i]; assert(ref); - unmap_and_flush_surface(ref); - surface->ref[i].surface = NULL; - ref_frames[i] = ref->video_buffer; - } else { - ref_frames[i] = NULL; + surface->ref[i] = NULL; + RecursiveEndFrame(ref->privData); + surface->ref[i] = ref; } } - if (surface->mapped) { - surface->decode_buffer->end_frame(surface->decode_buffer); - for (i = 0; i < 3; ++i) - num_ycbcr_blocks[i] = surface->ycbcr[i].num_blocks_added; - context_priv->decoder->flush_buffer(surface->decode_buffer, - num_ycbcr_blocks, - ref_frames, - surface->video_buffer); - surface->mapped = 0; + if (surface->frame_started) { + surface->frame_started = 0; + SetDecoderStatus(surface); + + for (i = 0; i < 2; ++i) + surface->ref[i] = NULL; + + context_priv->decoder->end_frame(context_priv->decoder); } } PUBLIC Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surface) { - static const uint8_t dummy_quant[64] = { - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 - }; - XvMCContextPrivate *context_priv; struct pipe_context *pipe; XvMCSurfacePrivate *surface_priv; @@ -323,9 +182,6 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac return BadAlloc; surface_priv->decode_buffer = context_priv->decoder->create_buffer(context_priv->decoder); - surface_priv->decode_buffer->set_quant_matrix(surface_priv->decode_buffer, dummy_quant, dummy_quant); - - surface_priv->mv_stride = surface_priv->decode_buffer->get_mv_stream_stride(surface_priv->decode_buffer); surface_priv->video_buffer = pipe->create_video_buffer ( pipe, PIPE_FORMAT_NV12, context_priv->decoder->chroma_format, @@ -355,15 +211,15 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur XvMCMacroBlockArray *macroblocks, XvMCBlockArray *blocks ) { - struct pipe_video_decode_buffer *t_buffer; + struct pipe_mpeg12_macroblock mb[num_macroblocks]; + struct pipe_video_decoder *decoder; + XvMCContextPrivate *context_priv; XvMCSurfacePrivate *target_surface_priv; XvMCSurfacePrivate *past_surface_priv; XvMCSurfacePrivate *future_surface_priv; XvMCMacroBlock *xvmc_mb; - unsigned i; - XVMC_MSG(XVMC_TRACE, "[XvMC] Rendering to surface %p, with past %p and future %p\n", target_surface, past_surface, future_surface); @@ -394,6 +250,9 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur assert(flags == 0 || flags == XVMC_SECOND_FIELD); + context_priv = context->privData; + decoder = context_priv->decoder; + target_surface_priv = target_surface->privData; past_surface_priv = past_surface ? past_surface->privData : NULL; future_surface_priv = future_surface ? future_surface->privData : NULL; @@ -402,50 +261,39 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur assert(!past_surface || past_surface_priv->context == context); assert(!future_surface || future_surface_priv->context == context); - t_buffer = target_surface_priv->decode_buffer; - - // enshure that all reference frames are flushed - // not really nessasary, but speeds ups rendering + // call end frame on all referenced frames if (past_surface) - unmap_and_flush_surface(past_surface->privData); + RecursiveEndFrame(past_surface->privData); if (future_surface) - unmap_and_flush_surface(future_surface->privData); + RecursiveEndFrame(future_surface->privData); xvmc_mb = macroblocks->macro_blocks + first_macroblock; /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */ - if (target_surface_priv->mapped && ( - target_surface_priv->ref[0].surface != past_surface || - target_surface_priv->ref[1].surface != future_surface || + if (target_surface_priv->frame_started && ( + target_surface_priv->ref[0] != past_surface || + target_surface_priv->ref[1] != future_surface || (xvmc_mb->x == 0 && xvmc_mb->y == 0))) { - // If they change anyway we need to clear our surface - unmap_and_flush_surface(target_surface_priv); + // If they change anyway we must assume that the current frame is ended + RecursiveEndFrame(target_surface_priv); } - if (!target_surface_priv->mapped) { - t_buffer->begin_frame(t_buffer); - - for (i = 0; i < 3; ++i) { - target_surface_priv->ycbcr[i].num_blocks_added = 0; - target_surface_priv->ycbcr[i].stream = t_buffer->get_ycbcr_stream(t_buffer, i); - target_surface_priv->ycbcr[i].buffer = t_buffer->get_ycbcr_buffer(t_buffer, i); - } + target_surface_priv->ref[0] = past_surface; + target_surface_priv->ref[1] = future_surface; - for (i = 0; i < 2; ++i) { - target_surface_priv->ref[i].surface = i == 0 ? past_surface : future_surface; - - if (target_surface_priv->ref[i].surface) - target_surface_priv->ref[i].mv = t_buffer->get_mv_stream(t_buffer, i); - else - target_surface_priv->ref[i].mv = NULL; - } + SetDecoderStatus(target_surface_priv); - target_surface_priv->mapped = 1; + if (!target_surface_priv->frame_started) { + target_surface_priv->frame_started = 1; + decoder->begin_frame(decoder); } - MacroBlocksToPipe(target_surface_priv, picture_structure, xvmc_mb, blocks, num_macroblocks); + MacroBlocksToPipe(context_priv, target_surface_priv, picture_structure, + xvmc_mb, blocks, mb, num_macroblocks); + + context_priv->decoder->decode_macroblock(context_priv->decoder, &mb[0].base, num_macroblocks); XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for rendering.\n", target_surface); @@ -543,7 +391,9 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable, assert(desty + desth - 1 < drawable_surface->height); */ - unmap_and_flush_surface(surface_priv); + RecursiveEndFrame(surface_priv); + + context_priv->decoder->flush(context_priv->decoder); vl_compositor_clear_layers(compositor); vl_compositor_set_buffer_layer(compositor, 0, surface_priv->video_buffer, &src_rect, NULL); @@ -567,7 +417,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable, // Workaround for r600g, there seems to be a bug in the fence refcounting code pipe->screen->fence_reference(pipe->screen, &surface_priv->fence, NULL); - vl_compositor_render(compositor, PictureToPipe(flags), context_priv->drawable_surface, &dst_rect, NULL); + vl_compositor_render(compositor, context_priv->drawable_surface, &dst_rect, NULL); pipe->flush(pipe, &surface_priv->fence); @@ -630,6 +480,7 @@ PUBLIC Status XvMCDestroySurface(Display *dpy, XvMCSurface *surface) { XvMCSurfacePrivate *surface_priv; + XvMCContextPrivate *context_priv; XVMC_MSG(XVMC_TRACE, "[XvMC] Destroying surface %p.\n", surface); @@ -639,10 +490,13 @@ Status XvMCDestroySurface(Display *dpy, XvMCSurface *surface) return XvMCBadSurface; surface_priv = surface->privData; + context_priv = surface_priv->context->privData; - if (surface_priv->mapped) - surface_priv->decode_buffer->end_frame(surface_priv->decode_buffer); - surface_priv->decode_buffer->destroy(surface_priv->decode_buffer); + if (surface_priv->frame_started) { + SetDecoderStatus(surface_priv); + context_priv->decoder->end_frame(context_priv->decoder); + } + context_priv->decoder->destroy_buffer(context_priv->decoder, surface_priv->decode_buffer); surface_priv->video_buffer->destroy(surface_priv->video_buffer); FREE(surface_priv); surface->privData = NULL; diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h index 5f8d9d13cb3..fd14ac916ee 100644 --- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h +++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h @@ -45,7 +45,6 @@ struct vl_context; struct pipe_video_decoder; -struct pipe_video_decode_buffer; struct pipe_video_buffer; struct pipe_sampler_view; @@ -70,22 +69,13 @@ typedef struct typedef struct { - struct pipe_video_decode_buffer *decode_buffer; + void *decode_buffer; struct pipe_video_buffer *video_buffer; - bool mapped; // are we still mapped to memory? + // have we allready told the decoder to start a frame + bool frame_started; - struct { - unsigned num_blocks_added; - struct pipe_ycbcr_block *stream; - short *buffer; - } ycbcr[3]; - - unsigned mv_stride; - struct { - XvMCSurface *surface; - struct pipe_motionvector *mv; - } ref[2]; + XvMCSurface *ref[2]; struct pipe_fence_handle *fence; diff --git a/src/gallium/targets/dri-r300/target.c b/src/gallium/targets/dri-r300/target.c index b48bcad3710..9b6d816fb62 100644 --- a/src/gallium/targets/dri-r300/target.c +++ b/src/gallium/targets/dri-r300/target.c @@ -1,4 +1,3 @@ - #include "target-helpers/inline_debug_helper.h" #include "state_tracker/drm_driver.h" #include "radeon/drm/radeon_drm_public.h" diff --git a/src/gallium/targets/dri-r600/Makefile b/src/gallium/targets/dri-r600/Makefile index 0c4de203d35..2d7463008fe 100644 --- a/src/gallium/targets/dri-r600/Makefile +++ b/src/gallium/targets/dri-r600/Makefile @@ -7,6 +7,7 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/drivers/r600/libr600.a \ $(TOP)/src/gallium/state_trackers/dri/drm/libdridrm.a \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/drivers/rbug/librbug.a \ $(TOP)/src/gallium/drivers/noop/libnoop.a diff --git a/src/gallium/targets/dri-r600/SConscript b/src/gallium/targets/dri-r600/SConscript index 1df11a8747b..c10d65b595a 100644 --- a/src/gallium/targets/dri-r600/SConscript +++ b/src/gallium/targets/dri-r600/SConscript @@ -6,6 +6,7 @@ env.Append(CPPDEFINES = ['GALLIUM_RBUG', 'GALLIUM_TRACE']) env.Prepend(LIBS = [ st_dri, + radeonwinsys, r600winsys, r600, trace, diff --git a/src/gallium/targets/dri-r600/target.c b/src/gallium/targets/dri-r600/target.c index 8753e2bab17..1b8b6816ec1 100644 --- a/src/gallium/targets/dri-r600/target.c +++ b/src/gallium/targets/dri-r600/target.c @@ -1,14 +1,14 @@ #include "state_tracker/drm_driver.h" #include "target-helpers/inline_debug_helper.h" -#include "r600/drm/r600_drm_public.h" +#include "radeon/drm/radeon_drm_public.h" #include "r600/r600_public.h" static struct pipe_screen *create_screen(int fd) { - struct radeon *radeon; + struct radeon_winsys *radeon; struct pipe_screen *screen; - radeon = r600_drm_winsys_create(fd); + radeon = radeon_drm_winsys_create(fd); if (!radeon) return NULL; diff --git a/src/gallium/targets/egl-static/Android.mk b/src/gallium/targets/egl-static/Android.mk new file mode 100644 index 00000000000..ebc89ead454 --- /dev/null +++ b/src/gallium/targets/egl-static/Android.mk @@ -0,0 +1,56 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + egl.c \ + egl_pipe.c \ + egl_st.c + +LOCAL_CFLAGS := \ + -DFEATURE_ES1=1 \ + -DFEATURE_ES2=1 \ + -D_EGL_MAIN=_eglBuiltInDriverGALLIUM + +LOCAL_C_INCLUDES := \ + $(GALLIUM_TOP)/state_trackers/vega \ + $(GALLIUM_TOP)/state_trackers/egl \ + $(MESA_TOP)/src/egl/main \ + $(MESA_TOP)/src/mesa \ + $(DRM_TOP)/include/drm \ + $(DRM_TOP) + +# swrast +LOCAL_CFLAGS += -DGALLIUM_SOFTPIPE + +ifneq ($(filter r600g, $(MESA_GPU_DRIVERS)),) +LOCAL_CFLAGS += -D_EGL_PIPE_R600=1 +endif + +LOCAL_MODULE := libmesa_egl_gallium + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/targets/egl-static/Makefile b/src/gallium/targets/egl-static/Makefile index 69e7eecdf0c..1583ab181ea 100644 --- a/src/gallium/targets/egl-static/Makefile +++ b/src/gallium/targets/egl-static/Makefile @@ -117,11 +117,12 @@ endif # r300 ifneq ($(findstring radeon/drm,$(GALLIUM_WINSYS_DIRS)),) +ifneq ($(findstring r300,$(GALLIUM_DRIVERS_DIRS)),) egl_CPPFLAGS += -D_EGL_PIPE_R300=1 egl_LIBS += \ $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/r300/libr300.a -egl_SYS += -ldrm_radeon +endif endif # r600 @@ -129,8 +130,8 @@ ifneq ($(findstring r600/drm,$(GALLIUM_WINSYS_DIRS)),) egl_CPPFLAGS += -D_EGL_PIPE_R600=1 egl_LIBS += \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/r600/libr600.a -egl_SYS += -ldrm_radeon endif # vmwgfx @@ -141,10 +142,18 @@ egl_LIBS += \ $(TOP)/src/gallium/drivers/svga/libsvga.a endif -# swrast +# softpipe +ifneq ($(findstring softpipe,$(GALLIUM_DRIVERS_DIRS)),) egl_CPPFLAGS += -DGALLIUM_SOFTPIPE -DGALLIUM_RBUG -DGALLIUM_TRACE egl_LIBS += $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a egl_SYS += -lm +endif + +# llvmpipe +ifneq ($(findstring llvmpipe,$(GALLIUM_DRIVERS_DIRS)),) +egl_CPPFLAGS += -DGALLIUM_LLVMPIPE +egl_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a +endif # sort to remove duplicates egl_CPPFLAGS := $(sort $(egl_CPPFLAGS)) @@ -158,8 +167,6 @@ st_GL_SYS := -lm -lpthread $(DLOPEN_LIBS) # LLVM ifeq ($(MESA_LLVM),1) -egl_CPPFLAGS += -DGALLIUM_LLVMPIPE -egl_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a egl_SYS += $(LLVM_LIBS) LDFLAGS += $(LLVM_LDFLAGS) diff --git a/src/gallium/targets/egl-static/egl.c b/src/gallium/targets/egl-static/egl.c index 568f5498dd4..a7aee27238b 100644 --- a/src/gallium/targets/egl-static/egl.c +++ b/src/gallium/targets/egl-static/egl.c @@ -109,6 +109,70 @@ out: return (*chip_id >= 0); } +#elif defined(PIPE_OS_ANDROID) + +#include <xf86drm.h> +/* for i915 */ +#include <i915_drm.h> +/* for radeon */ +#include <radeon_drm.h> +/* for util_strcmp */ +#include "util/u_string.h" + +static boolean +drm_fd_get_pci_id(int fd, int *vendor_id, int *chip_id) +{ + drmVersionPtr version; + + *chip_id = -1; + + version = drmGetVersion(fd); + if (!version) { + _eglLog(_EGL_WARNING, "invalid drm fd"); + return FALSE; + } + if (!version->name) { + _eglLog(_EGL_WARNING, "unable to determine the driver name"); + drmFreeVersion(version); + return FALSE; + } + + if (util_strcmp(version->name, "i915") == 0) { + struct drm_i915_getparam gp; + int ret; + + *vendor_id = 0x8086; + + memset(&gp, 0, sizeof(gp)); + gp.param = I915_PARAM_CHIPSET_ID; + gp.value = chip_id; + ret = drmCommandWriteRead(fd, DRM_I915_GETPARAM, &gp, sizeof(gp)); + if (ret) { + _eglLog(_EGL_WARNING, "failed to get param for i915"); + *chip_id = -1; + } + } + else if (util_strcmp(version->name, "radeon") == 0) { + struct drm_radeon_info info; + int ret; + + *vendor_id = 0x1002; + + memset(&info, 0, sizeof(info)); + info.request = RADEON_INFO_DEVICE_ID; + info.value = (unsigned long) chip_id; + ret = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); + if (ret) { + _eglLog(_EGL_WARNING, "failed to get info for radeon"); + *chip_id = -1; + } + } + + drmFreeVersion(version); + + return (*chip_id >= 0); +} + #else static boolean @@ -157,13 +221,21 @@ drm_fd_get_screen_name(int fd) static struct pipe_screen * create_drm_screen(const char *name, int fd) { + struct pipe_screen *screen; + if (!name) { name = drm_fd_get_screen_name(fd); if (!name) return NULL; } - return egl_pipe_create_drm_screen(name, fd); + screen = egl_pipe_create_drm_screen(name, fd); + if (screen) + _eglLog(_EGL_INFO, "created a pipe screen for %s", name); + else + _eglLog(_EGL_WARNING, "failed to create a pipe screen for %s", name); + + return screen; } static struct pipe_screen * diff --git a/src/gallium/targets/egl-static/egl_pipe.c b/src/gallium/targets/egl-static/egl_pipe.c index 658c532b404..f2b50bd0eab 100644 --- a/src/gallium/targets/egl-static/egl_pipe.c +++ b/src/gallium/targets/egl-static/egl_pipe.c @@ -42,7 +42,6 @@ #include "radeon/drm/radeon_drm_public.h" #include "r300/r300_public.h" /* for r600 */ -#include "r600/drm/r600_drm_public.h" #include "r600/r600_public.h" /* for vmwgfx */ #include "svga/drm/svga_drm_public.h" @@ -141,10 +140,10 @@ static struct pipe_screen * pipe_r600_create_screen(int fd) { #if _EGL_PIPE_R600 - struct radeon *rw; + struct radeon_winsys *rw; struct pipe_screen *screen; - rw = r600_drm_winsys_create(fd); + rw = radeon_drm_winsys_create(fd); if (!rw) return NULL; diff --git a/src/gallium/targets/gbm/Makefile b/src/gallium/targets/gbm/Makefile index 53104253d4f..033a1acaaf9 100644 --- a/src/gallium/targets/gbm/Makefile +++ b/src/gallium/targets/gbm/Makefile @@ -15,7 +15,7 @@ GBM_INCLUDES = \ -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/include \ -GBM_LIBS = $(LIBUDEV_LIBS) $(LIBDRM_LIB) \ +GBM_LIBS = $(LIBUDEV_LIBS) $(LIBDRM_LIB) -lm \ $(TOP)/src/gallium/state_trackers/gbm/libgbm.a \ $(TOP)/src/gallium/drivers/identity/libidentity.a \ $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ @@ -79,26 +79,30 @@ nouveau_SYS = -ldrm_nouveau r300_LIBS = \ $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/r300/libr300.a -r300_SYS = -ldrm_radeon # r600 pipe driver r600_LIBS = \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/r600/libr600.a -r600_SYS = -ldrm_radeon # vmwgfx pipe driver vmwgfx_LIBS = \ $(TOP)/src/gallium/winsys/svga/drm/libsvgadrm.a \ $(TOP)/src/gallium/drivers/svga/libsvga.a + + # LLVM ifeq ($(MESA_LLVM),1) -pipe_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a pipe_SYS += $(LLVM_LIBS) pipe_LDFLAGS += $(LLVM_LDFLAGS) endif +ifneq ($(findstring llvmpipe,$(GALLIUM_DRIVERS_DIRS)),) +pipe_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a +endif + # determine the targets/sources pipe_TARGETS = pipe_SOURCES = @@ -119,9 +123,11 @@ pipe_SOURCES += pipe_nouveau.c endif ifneq ($(findstring radeon/drm,$(GALLIUM_WINSYS_DIRS)),) +ifneq ($(findstring r300,$(GALLIUM_DRIVERS_DIRS)),) pipe_TARGETS += $(PIPE_PREFIX)r300.so pipe_SOURCES += pipe_r300.c endif +endif ifneq ($(findstring r600/drm,$(GALLIUM_WINSYS_DIRS)),) pipe_TARGETS += $(PIPE_PREFIX)r600.so @@ -148,7 +154,7 @@ $(GBM_EXTRA_TARGETS): $(TOP)/$(LIB_DIR)/gbm/%: % @$(INSTALL) -d $(dir $@) $(INSTALL) $< $(dir $@) -$(pipe_TARGETS): $(PIPE_PREFIX)%.so: pipe_%.o +$(pipe_TARGETS): $(PIPE_PREFIX)%.so: pipe_%.o $(pipe_LIBS) $($*_LIBS) $(MKLIB) -o $@ -noprefix -linker '$(CC)' \ -ldflags '-L$(TOP)/$(LIB_DIR) $(pipe_LDFLAGS) $(LDFLAGS)' \ $(MKLIB_OPTIONS) $< \ diff --git a/src/gallium/targets/gbm/pipe_r600.c b/src/gallium/targets/gbm/pipe_r600.c index 486a6592585..9f61a51404a 100644 --- a/src/gallium/targets/gbm/pipe_r600.c +++ b/src/gallium/targets/gbm/pipe_r600.c @@ -1,16 +1,15 @@ - #include "state_tracker/drm_driver.h" #include "target-helpers/inline_debug_helper.h" -#include "r600/drm/r600_drm_public.h" +#include "radeon/drm/radeon_drm_public.h" #include "r600/r600_public.h" static struct pipe_screen * create_screen(int fd) { - struct radeon *rw; + struct radeon_winsys *rw; struct pipe_screen *screen; - rw = r600_drm_winsys_create(fd); + rw = radeon_drm_winsys_create(fd); if (!rw) return NULL; diff --git a/src/gallium/targets/va-r600/Makefile b/src/gallium/targets/va-r600/Makefile index 28797ad528d..d09a3aa8ad2 100644 --- a/src/gallium/targets/va-r600/Makefile +++ b/src/gallium/targets/va-r600/Makefile @@ -10,6 +10,7 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/drivers/r600/libr600.a \ $(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/auxiliary/libgallium.a diff --git a/src/gallium/targets/va-r600/target.c b/src/gallium/targets/va-r600/target.c index 8753e2bab17..1b8b6816ec1 100644 --- a/src/gallium/targets/va-r600/target.c +++ b/src/gallium/targets/va-r600/target.c @@ -1,14 +1,14 @@ #include "state_tracker/drm_driver.h" #include "target-helpers/inline_debug_helper.h" -#include "r600/drm/r600_drm_public.h" +#include "radeon/drm/radeon_drm_public.h" #include "r600/r600_public.h" static struct pipe_screen *create_screen(int fd) { - struct radeon *radeon; + struct radeon_winsys *radeon; struct pipe_screen *screen; - radeon = r600_drm_winsys_create(fd); + radeon = radeon_drm_winsys_create(fd); if (!radeon) return NULL; diff --git a/src/gallium/targets/vdpau-r600/Makefile b/src/gallium/targets/vdpau-r600/Makefile index 0fd817b8e82..c2d95af295a 100644 --- a/src/gallium/targets/vdpau-r600/Makefile +++ b/src/gallium/targets/vdpau-r600/Makefile @@ -7,6 +7,7 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/drivers/r600/libr600.a \ $(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/auxiliary/libgallium.a diff --git a/src/gallium/targets/vdpau-r600/target.c b/src/gallium/targets/vdpau-r600/target.c index 8753e2bab17..1b8b6816ec1 100644 --- a/src/gallium/targets/vdpau-r600/target.c +++ b/src/gallium/targets/vdpau-r600/target.c @@ -1,14 +1,14 @@ #include "state_tracker/drm_driver.h" #include "target-helpers/inline_debug_helper.h" -#include "r600/drm/r600_drm_public.h" +#include "radeon/drm/radeon_drm_public.h" #include "r600/r600_public.h" static struct pipe_screen *create_screen(int fd) { - struct radeon *radeon; + struct radeon_winsys *radeon; struct pipe_screen *screen; - radeon = r600_drm_winsys_create(fd); + radeon = radeon_drm_winsys_create(fd); if (!radeon) return NULL; diff --git a/src/gallium/targets/xorg-r600/Makefile b/src/gallium/targets/xorg-r600/Makefile new file mode 100644 index 00000000000..4577ba605a5 --- /dev/null +++ b/src/gallium/targets/xorg-r600/Makefile @@ -0,0 +1,25 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = r600g_drv.so + +C_SOURCES = \ + target.c \ + xorg.c + +DRIVER_DEFINES = \ + -DHAVE_CONFIG_H -DGALLIUM_RBUG -DGALLIUM_TRACE -DGALLIUM_GALAHAD + +DRIVER_PIPES = \ + $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ + $(TOP)/src/gallium/drivers/r600/libr600.a \ + $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ + $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/rbug/librbug.a + +DRIVER_LINKS = \ + $(shell pkg-config --libs libdrm) + +include ../Makefile.xorg diff --git a/src/gallium/targets/xorg-r600/target.c b/src/gallium/targets/xorg-r600/target.c new file mode 100644 index 00000000000..60424359a7b --- /dev/null +++ b/src/gallium/targets/xorg-r600/target.c @@ -0,0 +1,26 @@ + +#include "target-helpers/inline_debug_helper.h" +#include "state_tracker/drm_driver.h" +#include "radeon/drm/radeon_drm_public.h" +#include "r600/r600_public.h" + +static struct pipe_screen * +create_screen(int fd) +{ + struct radeon_winsys *sws; + struct pipe_screen *screen; + + sws = radeon_drm_winsys_create(fd); + if (!sws) + return NULL; + + screen = r600_screen_create(sws); + if (!screen) + return NULL; + + screen = debug_screen_wrap(screen); + + return screen; +} + +DRM_DRIVER_DESCRIPTOR("r600", "radeon", create_screen) diff --git a/src/gallium/targets/xorg-r600/xorg.c b/src/gallium/targets/xorg-r600/xorg.c new file mode 100644 index 00000000000..120cf6da6fd --- /dev/null +++ b/src/gallium/targets/xorg-r600/xorg.c @@ -0,0 +1,148 @@ +/* + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * + * Author: Alan Hourihane <[email protected]> + * Author: Jakob Bornecrantz <[email protected]> + * Author: Corbin Simpson <[email protected]> + * + */ + +#include "../../state_trackers/xorg/xorg_winsys.h" + +static void r600_xorg_identify(int flags); +static Bool r600_xorg_pci_probe(DriverPtr driver, + int entity_num, + struct pci_device *device, + intptr_t match_data); + +static const struct pci_id_match r600_xorg_device_match[] = { + {0x1002, PCI_MATCH_ANY, PCI_MATCH_ANY, PCI_MATCH_ANY, 0, 0, 0}, + {0, 0, 0}, +}; + +static SymTabRec r600_xorg_chipsets[] = { + {PCI_MATCH_ANY, "AMD R6xx Graphics Chipset"}, + {-1, NULL} +}; + +static PciChipsets r600_xorg_pci_devices[] = { + {PCI_MATCH_ANY, PCI_MATCH_ANY, NULL}, + {-1, -1, NULL} +}; + +static XF86ModuleVersionInfo r600_xorg_version = { + "r600g", + MODULEVENDORSTRING, + MODINFOSTRING1, + MODINFOSTRING2, + XORG_VERSION_CURRENT, + 0, 1, 0, /* major, minor, patch */ + ABI_CLASS_VIDEODRV, + ABI_VIDEODRV_VERSION, + MOD_CLASS_VIDEODRV, + {0, 0, 0, 0} +}; + +/* + * Xorg driver exported structures + */ + +_X_EXPORT DriverRec r600_driver = { + 1, + "r600g", + r600_xorg_identify, + NULL, + xorg_tracker_available_options, + NULL, + 0, + NULL, + r600_xorg_device_match, + r600_xorg_pci_probe +}; + +static MODULESETUPPROTO(r600_xorg_setup); + +_X_EXPORT XF86ModuleData r600gModuleData = { + &r600_xorg_version, + r600_xorg_setup, + NULL +}; + +/* + * Xorg driver functions + */ + +static pointer +r600_xorg_setup(pointer module, pointer opts, int *errmaj, int *errmin) +{ + static Bool setupDone = 0; + + /* This module should be loaded only once, but check to be sure. + */ + if (!setupDone) { + setupDone = 1; + xf86AddDriver(&r600_driver, module, HaveDriverFuncs); + + /* + * The return value must be non-NULL on success even though there + * is no TearDownProc. + */ + return (pointer) 1; + } else { + if (errmaj) + *errmaj = LDR_ONCEONLY; + return NULL; + } +} + +static void +r600_xorg_identify(int flags) +{ + xf86PrintChipsets("r600", "Driver for R6xx Gallium with KMS", + r600_xorg_chipsets); +} + +static Bool +r600_xorg_pci_probe(DriverPtr driver, + int entity_num, struct pci_device *device, intptr_t match_data) +{ + ScrnInfoPtr scrn = NULL; + EntityInfoPtr entity; + + scrn = xf86ConfigPciEntity(scrn, 0, entity_num, r600_xorg_pci_devices, + NULL, NULL, NULL, NULL, NULL); + if (scrn != NULL) { + scrn->driverVersion = 1; + scrn->driverName = "r600"; + scrn->name = "R600G"; + scrn->Probe = NULL; + + entity = xf86GetEntityInfo(entity_num); + + /* Use all the functions from the xorg tracker */ + xorg_tracker_set_functions(scrn); + } + return scrn != NULL; +} diff --git a/src/gallium/targets/xvmc-r600/Makefile b/src/gallium/targets/xvmc-r600/Makefile index 0bb72f1eff9..a10a42d18ed 100644 --- a/src/gallium/targets/xvmc-r600/Makefile +++ b/src/gallium/targets/xvmc-r600/Makefile @@ -7,6 +7,7 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/drivers/r600/libr600.a \ $(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/auxiliary/libgallium.a diff --git a/src/gallium/targets/xvmc-r600/target.c b/src/gallium/targets/xvmc-r600/target.c index 8753e2bab17..1b8b6816ec1 100644 --- a/src/gallium/targets/xvmc-r600/target.c +++ b/src/gallium/targets/xvmc-r600/target.c @@ -1,14 +1,14 @@ #include "state_tracker/drm_driver.h" #include "target-helpers/inline_debug_helper.h" -#include "r600/drm/r600_drm_public.h" +#include "radeon/drm/radeon_drm_public.h" #include "r600/r600_public.h" static struct pipe_screen *create_screen(int fd) { - struct radeon *radeon; + struct radeon_winsys *radeon; struct pipe_screen *screen; - radeon = r600_drm_winsys_create(fd); + radeon = radeon_drm_winsys_create(fd); if (!radeon) return NULL; diff --git a/src/gallium/winsys/r600/drm/Android.mk b/src/gallium/winsys/r600/drm/Android.mk new file mode 100644 index 00000000000..eb79caa19ca --- /dev/null +++ b/src/gallium/winsys/r600/drm/Android.mk @@ -0,0 +1,43 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +# get C_SOURCES +include $(LOCAL_PATH)/Makefile.sources + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(C_SOURCES) + +LOCAL_CFLAGS := -std=c99 + +LOCAL_C_INCLUDES := \ + $(GALLIUM_TOP)/drivers/r600 \ + $(DRM_TOP) \ + $(DRM_TOP)/include/drm + +LOCAL_MODULE := libmesa_winsys_r600 + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/winsys/r600/drm/Makefile b/src/gallium/winsys/r600/drm/Makefile index fb7b09b3a0d..c23286c8197 100644 --- a/src/gallium/winsys/r600/drm/Makefile +++ b/src/gallium/winsys/r600/drm/Makefile @@ -4,15 +4,8 @@ include $(TOP)/configs/current LIBNAME = r600winsys -C_SOURCES = \ - bof.c \ - evergreen_hw_context.c \ - radeon_bo.c \ - radeon_pciid.c \ - r600_bo.c \ - r600_drm.c \ - r600_hw_context.c \ - r600_bomgr.c +# get C_SOURCES +include Makefile.sources LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/r600 \ -I$(TOP)/include \ diff --git a/src/gallium/winsys/r600/drm/Makefile.sources b/src/gallium/winsys/r600/drm/Makefile.sources new file mode 100644 index 00000000000..97980170e6b --- /dev/null +++ b/src/gallium/winsys/r600/drm/Makefile.sources @@ -0,0 +1,7 @@ +C_SOURCES := \ + evergreen_hw_context.c \ + radeon_pciid.c \ + r600_bo.c \ + r600_drm.c \ + r600_hw_context.c + diff --git a/src/gallium/winsys/r600/drm/SConscript b/src/gallium/winsys/r600/drm/SConscript index f55bb265226..2d0d80e8bb9 100644 --- a/src/gallium/winsys/r600/drm/SConscript +++ b/src/gallium/winsys/r600/drm/SConscript @@ -2,16 +2,7 @@ Import('*') env = env.Clone() -r600_sources = [ - 'bof.c', - 'evergreen_hw_context.c', - 'radeon_bo.c', - 'radeon_pciid.c', - 'r600_bo.c', - 'r600_drm.c', - 'r600_hw_context.c', - 'r600_bomgr.c', -] +r600_sources = env.ParseSourceList('Makefile.sources', 'C_SOURCES') env.PkgUseModules('DRM_RADEON') diff --git a/src/gallium/winsys/r600/drm/bof.c b/src/gallium/winsys/r600/drm/bof.c deleted file mode 100644 index 5c923ad38d6..00000000000 --- a/src/gallium/winsys/r600/drm/bof.c +++ /dev/null @@ -1,477 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - */ -#include <errno.h> -#include <stdlib.h> -#include <string.h> -#include "bof.h" - -/* - * helpers - */ -static int bof_entry_grow(bof_t *bof) -{ - bof_t **array; - - if (bof->array_size < bof->nentry) - return 0; - array = realloc(bof->array, (bof->nentry + 16) * sizeof(void*)); - if (array == NULL) - return -ENOMEM; - bof->array = array; - bof->nentry += 16; - return 0; -} - -/* - * object - */ -bof_t *bof_object(void) -{ - bof_t *object; - - object = calloc(1, sizeof(bof_t)); - if (object == NULL) - return NULL; - object->refcount = 1; - object->type = BOF_TYPE_OBJECT; - object->size = 12; - return object; -} - -bof_t *bof_object_get(bof_t *object, const char *keyname) -{ - unsigned i; - - for (i = 0; i < object->array_size; i += 2) { - if (!strcmp(object->array[i]->value, keyname)) { - return object->array[i + 1]; - } - } - return NULL; -} - -int bof_object_set(bof_t *object, const char *keyname, bof_t *value) -{ - bof_t *key; - int r; - - if (object->type != BOF_TYPE_OBJECT) - return -EINVAL; - r = bof_entry_grow(object); - if (r) - return r; - key = bof_string(keyname); - if (key == NULL) - return -ENOMEM; - object->array[object->array_size++] = key; - object->array[object->array_size++] = value; - object->size += value->size; - object->size += key->size; - bof_incref(value); - return 0; -} - -/* - * array - */ -bof_t *bof_array(void) -{ - bof_t *array = bof_object(); - - if (array == NULL) - return NULL; - array->type = BOF_TYPE_ARRAY; - array->size = 12; - return array; -} - -int bof_array_append(bof_t *array, bof_t *value) -{ - int r; - if (array->type != BOF_TYPE_ARRAY) - return -EINVAL; - r = bof_entry_grow(array); - if (r) - return r; - array->array[array->array_size++] = value; - array->size += value->size; - bof_incref(value); - return 0; -} - -bof_t *bof_array_get(bof_t *bof, unsigned i) -{ - if (!bof_is_array(bof) || i >= bof->array_size) - return NULL; - return bof->array[i]; -} - -unsigned bof_array_size(bof_t *bof) -{ - if (!bof_is_array(bof)) - return 0; - return bof->array_size; -} - -/* - * blob - */ -bof_t *bof_blob(unsigned size, void *value) -{ - bof_t *blob = bof_object(); - - if (blob == NULL) - return NULL; - blob->type = BOF_TYPE_BLOB; - blob->value = calloc(1, size); - if (blob->value == NULL) { - bof_decref(blob); - return NULL; - } - blob->size = size; - memcpy(blob->value, value, size); - blob->size += 12; - return blob; -} - -unsigned bof_blob_size(bof_t *bof) -{ - if (!bof_is_blob(bof)) - return 0; - return bof->size - 12; -} - -void *bof_blob_value(bof_t *bof) -{ - if (!bof_is_blob(bof)) - return NULL; - return bof->value; -} - -/* - * string - */ -bof_t *bof_string(const char *value) -{ - bof_t *string = bof_object(); - - if (string == NULL) - return NULL; - string->type = BOF_TYPE_STRING; - string->size = strlen(value) + 1; - string->value = calloc(1, string->size); - if (string->value == NULL) { - bof_decref(string); - return NULL; - } - strcpy(string->value, value); - string->size += 12; - return string; -} - -/* - * int32 - */ -bof_t *bof_int32(int32_t value) -{ - bof_t *int32 = bof_object(); - - if (int32 == NULL) - return NULL; - int32->type = BOF_TYPE_INT32; - int32->size = 4; - int32->value = calloc(1, int32->size); - if (int32->value == NULL) { - bof_decref(int32); - return NULL; - } - memcpy(int32->value, &value, 4); - int32->size += 12; - return int32; -} - -int32_t bof_int32_value(bof_t *bof) -{ - return *((uint32_t*)bof->value); -} - -/* - * common - */ -static void bof_indent(int level) -{ - int i; - - for (i = 0; i < level; i++) - fprintf(stderr, " "); -} - -static void bof_print_bof(bof_t *bof, int level, int entry) -{ - bof_indent(level); - if (bof == NULL) { - fprintf(stderr, "--NULL-- for entry %d\n", entry); - return; - } - switch (bof->type) { - case BOF_TYPE_STRING: - fprintf(stderr, "%p string [%s %d]\n", bof, (char*)bof->value, bof->size); - break; - case BOF_TYPE_INT32: - fprintf(stderr, "%p int32 [%d %d]\n", bof, *(int*)bof->value, bof->size); - break; - case BOF_TYPE_BLOB: - fprintf(stderr, "%p blob [%d]\n", bof, bof->size); - break; - case BOF_TYPE_NULL: - fprintf(stderr, "%p null [%d]\n", bof, bof->size); - break; - case BOF_TYPE_OBJECT: - fprintf(stderr, "%p object [%d %d]\n", bof, bof->array_size / 2, bof->size); - break; - case BOF_TYPE_ARRAY: - fprintf(stderr, "%p array [%d %d]\n", bof, bof->array_size, bof->size); - break; - default: - fprintf(stderr, "%p unknown [%d]\n", bof, bof->type); - return; - } -} - -static void bof_print_rec(bof_t *bof, int level, int entry) -{ - unsigned i; - - bof_print_bof(bof, level, entry); - for (i = 0; i < bof->array_size; i++) { - bof_print_rec(bof->array[i], level + 2, i); - } -} - -void bof_print(bof_t *bof) -{ - bof_print_rec(bof, 0, 0); -} - -static int bof_read(bof_t *root, FILE *file, long end, int level) -{ - bof_t *bof = NULL; - int r; - - if (ftell(file) >= end) { - return 0; - } - r = bof_entry_grow(root); - if (r) - return r; - bof = bof_object(); - if (bof == NULL) - return -ENOMEM; - bof->offset = ftell(file); - r = fread(&bof->type, 4, 1, file); - if (r != 1) - goto out_err; - r = fread(&bof->size, 4, 1, file); - if (r != 1) - goto out_err; - r = fread(&bof->array_size, 4, 1, file); - if (r != 1) - goto out_err; - switch (bof->type) { - case BOF_TYPE_STRING: - case BOF_TYPE_INT32: - case BOF_TYPE_BLOB: - bof->value = calloc(1, bof->size - 12); - if (bof->value == NULL) { - goto out_err; - } - r = fread(bof->value, bof->size - 12, 1, file); - if (r != 1) { - fprintf(stderr, "error reading %d\n", bof->size - 12); - goto out_err; - } - break; - case BOF_TYPE_NULL: - return 0; - case BOF_TYPE_OBJECT: - case BOF_TYPE_ARRAY: - r = bof_read(bof, file, bof->offset + bof->size, level + 2); - if (r) - goto out_err; - break; - default: - fprintf(stderr, "invalid type %d\n", bof->type); - goto out_err; - } - root->array[root->centry++] = bof; - return bof_read(root, file, end, level); -out_err: - bof_decref(bof); - return -EINVAL; -} - -bof_t *bof_load_file(const char *filename) -{ - bof_t *root = bof_object(); - int r; - - if (root == NULL) { - fprintf(stderr, "%s failed to create root object\n", __func__); - return NULL; - } - root->file = fopen(filename, "r"); - if (root->file == NULL) - goto out_err; - r = fseek(root->file, 0L, SEEK_SET); - if (r) { - fprintf(stderr, "%s failed to seek into file %s\n", __func__, filename); - goto out_err; - } - root->offset = ftell(root->file); - r = fread(&root->type, 4, 1, root->file); - if (r != 1) - goto out_err; - r = fread(&root->size, 4, 1, root->file); - if (r != 1) - goto out_err; - r = fread(&root->array_size, 4, 1, root->file); - if (r != 1) - goto out_err; - r = bof_read(root, root->file, root->offset + root->size, 2); - if (r) - goto out_err; - return root; -out_err: - bof_decref(root); - return NULL; -} - -void bof_incref(bof_t *bof) -{ - bof->refcount++; -} - -void bof_decref(bof_t *bof) -{ - unsigned i; - - if (bof == NULL) - return; - if (--bof->refcount > 0) - return; - for (i = 0; i < bof->array_size; i++) { - bof_decref(bof->array[i]); - bof->array[i] = NULL; - } - bof->array_size = 0; - if (bof->file) { - fclose(bof->file); - bof->file = NULL; - } - free(bof->array); - free(bof->value); - free(bof); -} - -static int bof_file_write(bof_t *bof, FILE *file) -{ - unsigned i; - int r; - - r = fwrite(&bof->type, 4, 1, file); - if (r != 1) - return -EINVAL; - r = fwrite(&bof->size, 4, 1, file); - if (r != 1) - return -EINVAL; - r = fwrite(&bof->array_size, 4, 1, file); - if (r != 1) - return -EINVAL; - switch (bof->type) { - case BOF_TYPE_NULL: - if (bof->size) - return -EINVAL; - break; - case BOF_TYPE_STRING: - case BOF_TYPE_INT32: - case BOF_TYPE_BLOB: - r = fwrite(bof->value, bof->size - 12, 1, file); - if (r != 1) - return -EINVAL; - break; - case BOF_TYPE_OBJECT: - case BOF_TYPE_ARRAY: - for (i = 0; i < bof->array_size; i++) { - r = bof_file_write(bof->array[i], file); - if (r) - return r; - } - break; - default: - return -EINVAL; - } - return 0; -} - -int bof_dump_file(bof_t *bof, const char *filename) -{ - unsigned i; - int r = 0; - - if (bof->file) { - fclose(bof->file); - bof->file = NULL; - } - bof->file = fopen(filename, "w"); - if (bof->file == NULL) { - fprintf(stderr, "%s failed to open file %s\n", __func__, filename); - r = -EINVAL; - goto out_err; - } - r = fseek(bof->file, 0L, SEEK_SET); - if (r) { - fprintf(stderr, "%s failed to seek into file %s\n", __func__, filename); - goto out_err; - } - r = fwrite(&bof->type, 4, 1, bof->file); - if (r != 1) - goto out_err; - r = fwrite(&bof->size, 4, 1, bof->file); - if (r != 1) - goto out_err; - r = fwrite(&bof->array_size, 4, 1, bof->file); - if (r != 1) - goto out_err; - for (i = 0; i < bof->array_size; i++) { - r = bof_file_write(bof->array[i], bof->file); - if (r) - return r; - } -out_err: - fclose(bof->file); - bof->file = NULL; - return r; -} diff --git a/src/gallium/winsys/r600/drm/bof.h b/src/gallium/winsys/r600/drm/bof.h deleted file mode 100644 index 014affb74f1..00000000000 --- a/src/gallium/winsys/r600/drm/bof.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - */ -#ifndef BOF_H -#define BOF_H - -#include <stdio.h> -#include <stdint.h> - -#define BOF_TYPE_STRING 0 -#define BOF_TYPE_NULL 1 -#define BOF_TYPE_BLOB 2 -#define BOF_TYPE_OBJECT 3 -#define BOF_TYPE_ARRAY 4 -#define BOF_TYPE_INT32 5 - -struct bof; - -typedef struct bof { - struct bof **array; - unsigned centry; - unsigned nentry; - unsigned refcount; - FILE *file; - uint32_t type; - uint32_t size; - uint32_t array_size; - void *value; - long offset; -} bof_t; - -extern int bof_file_flush(bof_t *root); -extern bof_t *bof_file_new(const char *filename); -extern int bof_object_dump(bof_t *object, const char *filename); - -/* object */ -extern bof_t *bof_object(void); -extern bof_t *bof_object_get(bof_t *object, const char *keyname); -extern int bof_object_set(bof_t *object, const char *keyname, bof_t *value); -/* array */ -extern bof_t *bof_array(void); -extern int bof_array_append(bof_t *array, bof_t *value); -extern bof_t *bof_array_get(bof_t *bof, unsigned i); -extern unsigned bof_array_size(bof_t *bof); -/* blob */ -extern bof_t *bof_blob(unsigned size, void *value); -extern unsigned bof_blob_size(bof_t *bof); -extern void *bof_blob_value(bof_t *bof); -/* string */ -extern bof_t *bof_string(const char *value); -/* int32 */ -extern bof_t *bof_int32(int32_t value); -extern int32_t bof_int32_value(bof_t *bof); -/* common functions */ -extern void bof_decref(bof_t *bof); -extern void bof_incref(bof_t *bof); -extern bof_t *bof_load_file(const char *filename); -extern int bof_dump_file(bof_t *bof, const char *filename); -extern void bof_print(bof_t *bof); - -static inline int bof_is_object(bof_t *bof){return (bof->type == BOF_TYPE_OBJECT);} -static inline int bof_is_blob(bof_t *bof){return (bof->type == BOF_TYPE_BLOB);} -static inline int bof_is_null(bof_t *bof){return (bof->type == BOF_TYPE_NULL);} -static inline int bof_is_int32(bof_t *bof){return (bof->type == BOF_TYPE_INT32);} -static inline int bof_is_array(bof_t *bof){return (bof->type == BOF_TYPE_ARRAY);} -static inline int bof_is_string(bof_t *bof){return (bof->type == BOF_TYPE_STRING);} - -#endif diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index 60d2e289396..3417eb39192 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -23,20 +23,11 @@ * Authors: * Jerome Glisse */ -#include <errno.h> -#include <stdint.h> -#include <string.h> -#include <stdlib.h> -#include <assert.h> -#include "xf86drm.h" #include "r600.h" +#include "r600_priv.h" #include "evergreend.h" -#include "radeon_drm.h" -#include "bof.h" -#include "pipe/p_compiler.h" -#include "util/u_inlines.h" #include "util/u_memory.h" -#include "r600_priv.h" +#include <errno.h> #define GROUP_FORCE_NEW_BLOCK 0 @@ -168,6 +159,7 @@ static const struct r600_reg evergreen_context_reg_list[] = { {R_028404_VGT_MIN_VTX_INDX, 0, 0, 0}, {R_028408_VGT_INDX_OFFSET, 0, 0, 0}, {R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0, 0, 0}, + {R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0, 0, 0}, {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, {R_028410_SX_ALPHA_TEST_CONTROL, 0, 0, 0}, {R_028414_CB_BLEND_RED, 0, 0, 0}, @@ -532,6 +524,7 @@ static const struct r600_reg cayman_context_reg_list[] = { {R_028404_VGT_MIN_VTX_INDX, 0, 0, 0}, {R_028408_VGT_INDX_OFFSET, 0, 0, 0}, {R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0, 0, 0}, + {R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0, 0, 0}, {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, {R_028410_SX_ALPHA_TEST_CONTROL, 0, 0, 0}, {R_028414_CB_BLEND_RED, 0, 0, 0}, @@ -909,6 +902,7 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon) memset(ctx, 0, sizeof(struct r600_context)); ctx->radeon = radeon; + LIST_INITHEAD(&ctx->query_list); /* init dirty list */ @@ -992,33 +986,23 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon) if (r) goto out_err; + ctx->cs = radeon->ws->cs_create(radeon->ws); + /* allocate cs variables */ - ctx->nreloc = RADEON_CTX_MAX_PM4; - ctx->reloc = calloc(ctx->nreloc, sizeof(struct r600_reloc)); - if (ctx->reloc == NULL) { - r = -ENOMEM; - goto out_err; - } - ctx->bo = calloc(ctx->nreloc, sizeof(void *)); + ctx->bo = calloc(RADEON_MAX_CMDBUF_DWORDS, sizeof(void *)); if (ctx->bo == NULL) { r = -ENOMEM; goto out_err; } - ctx->pm4_ndwords = RADEON_CTX_MAX_PM4; - ctx->pm4 = calloc(ctx->pm4_ndwords, 4); - if (ctx->pm4 == NULL) { - r = -ENOMEM; - goto out_err; - } + ctx->pm4_ndwords = RADEON_MAX_CMDBUF_DWORDS; + ctx->pm4 = ctx->cs->buf; r600_init_cs(ctx); /* save 16dwords space for fence mecanism */ ctx->pm4_ndwords -= 16; - ctx->max_db = 8; - LIST_INITHEAD(&ctx->fenced_bo); - + r600_get_backend_mask(ctx); return 0; out_err: r600_context_fini(ctx); @@ -1154,10 +1138,6 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr if (draw->indices) { ndwords = 11; - /* make sure there is enough relocation space before scheduling draw */ - if (ctx->creloc >= (ctx->nreloc - 1)) { - r600_context_flush(ctx); - } } /* queries need some special values */ @@ -1174,11 +1154,11 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr /* update the max dword count to make sure we have enough space * reserved for flushing the destination caches */ - ctx->pm4_ndwords = RADEON_CTX_MAX_PM4 - ctx->num_dest_buffers * 7 - 16; + ctx->pm4_ndwords = RADEON_MAX_CMDBUF_DWORDS - ctx->num_dest_buffers * 7 - 16; if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); } /* at that point everythings is flushed and ctx->pm4_cdwords = 0 */ if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) { @@ -1203,13 +1183,12 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr pm4[3] = draw->vgt_num_instances; if (draw->indices) { pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); - pm4[5] = draw->indices_bo_offset + r600_bo_offset(draw->indices); + pm4[5] = draw->indices_bo_offset; pm4[6] = 0; pm4[7] = draw->vgt_num_indices; pm4[8] = draw->vgt_draw_initiator; pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); - pm4[10] = 0; - r600_context_bo_reloc(ctx, &pm4[10], draw->indices); + pm4[10] = r600_context_bo_reloc(ctx, draw->indices, RADEON_USAGE_READ); } else { pm4[4] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); pm4[5] = draw->vgt_num_indices; @@ -1270,4 +1249,3 @@ void evergreen_context_flush_dest_caches(struct r600_context *ctx) ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY; } - diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c index 0f5b063cf5a..4beedad233e 100644 --- a/src/gallium/winsys/r600/drm/r600_bo.c +++ b/src/gallium/winsys/r600/drm/r600_bo.c @@ -23,176 +23,109 @@ * Authors: * Dave Airlie */ -#include <pipe/p_compiler.h> -#include <pipe/p_screen.h> -#include <pipebuffer/pb_bufmgr.h> -#include "state_tracker/drm_driver.h" #include "r600_priv.h" #include "r600d.h" -#include "drm.h" -#include "radeon_drm.h" +#include "state_tracker/drm_driver.h" struct r600_bo *r600_bo(struct radeon *radeon, unsigned size, unsigned alignment, unsigned binding, unsigned usage) { struct r600_bo *bo; - struct radeon_bo *rbo; + struct pb_buffer *pb; uint32_t initial_domain, domains; /* Staging resources particpate in transfers and blits only * and are used for uploads and downloads from regular * resources. We generate them internally for some transfers. */ - if (usage == PIPE_USAGE_STAGING) - domains = RADEON_GEM_DOMAIN_CPU | RADEON_GEM_DOMAIN_GTT; - else - domains = (RADEON_GEM_DOMAIN_CPU | - RADEON_GEM_DOMAIN_GTT | - RADEON_GEM_DOMAIN_VRAM); - - if (binding & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) { - bo = r600_bomgr_bo_create(radeon->bomgr, size, alignment, *radeon->cfence); - if (bo) { - bo->domains = domains; - return bo; + if (usage == PIPE_USAGE_STAGING) { + domains = RADEON_DOMAIN_GTT; + initial_domain = RADEON_DOMAIN_GTT; + } else { + domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM; + + switch(usage) { + case PIPE_USAGE_DYNAMIC: + case PIPE_USAGE_STREAM: + case PIPE_USAGE_STAGING: + initial_domain = RADEON_DOMAIN_GTT; + break; + case PIPE_USAGE_DEFAULT: + case PIPE_USAGE_STATIC: + case PIPE_USAGE_IMMUTABLE: + default: + initial_domain = RADEON_DOMAIN_VRAM; + break; } } - switch(usage) { - case PIPE_USAGE_DYNAMIC: - case PIPE_USAGE_STREAM: - case PIPE_USAGE_STAGING: - initial_domain = RADEON_GEM_DOMAIN_GTT; - break; - case PIPE_USAGE_DEFAULT: - case PIPE_USAGE_STATIC: - case PIPE_USAGE_IMMUTABLE: - default: - initial_domain = RADEON_GEM_DOMAIN_VRAM; - break; - } - rbo = radeon_bo(radeon, 0, size, alignment, initial_domain); - if (rbo == NULL) { + pb = radeon->ws->buffer_create(radeon->ws, size, alignment, binding, initial_domain); + if (!pb) { return NULL; } bo = calloc(1, sizeof(struct r600_bo)); - bo->size = size; - bo->alignment = alignment; bo->domains = domains; - bo->bo = rbo; - if (binding & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) { - r600_bomgr_bo_init(radeon->bomgr, bo); - } + bo->buf = pb; + bo->cs_buf = radeon->ws->buffer_get_cs_handle(pb); pipe_reference_init(&bo->reference, 1); return bo; } -struct r600_bo *r600_bo_handle(struct radeon *radeon, - unsigned handle, unsigned *array_mode) +struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whandle, + unsigned *stride, unsigned *array_mode) { + struct pb_buffer *pb; struct r600_bo *bo = calloc(1, sizeof(struct r600_bo)); - struct radeon_bo *rbo; - rbo = bo->bo = radeon_bo(radeon, handle, 0, 0, 0); - if (rbo == NULL) { + pb = bo->buf = radeon->ws->buffer_from_handle(radeon->ws, whandle, stride, NULL); + if (!pb) { free(bo); return NULL; } - bo->size = rbo->size; - bo->domains = (RADEON_GEM_DOMAIN_CPU | - RADEON_GEM_DOMAIN_GTT | - RADEON_GEM_DOMAIN_VRAM); pipe_reference_init(&bo->reference, 1); + bo->domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM; + bo->cs_buf = radeon->ws->buffer_get_cs_handle(pb); + + if (stride) + *stride = whandle->stride; - radeon_bo_get_tiling_flags(radeon, rbo, &bo->tiling_flags, &bo->kernel_pitch); if (array_mode) { - if (bo->tiling_flags) { - if (bo->tiling_flags & RADEON_TILING_MACRO) - *array_mode = V_0280A0_ARRAY_2D_TILED_THIN1; - else if (bo->tiling_flags & RADEON_TILING_MICRO) - *array_mode = V_0280A0_ARRAY_1D_TILED_THIN1; - } else { + enum radeon_bo_layout micro, macro; + + radeon->ws->buffer_get_tiling(bo->buf, µ, ¯o); + + if (macro == RADEON_LAYOUT_TILED) + *array_mode = V_0280A0_ARRAY_2D_TILED_THIN1; + else if (micro == RADEON_LAYOUT_TILED) + *array_mode = V_0280A0_ARRAY_1D_TILED_THIN1; + else *array_mode = 0; - } } return bo; } -void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx) +void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, struct radeon_winsys_cs *cs, unsigned usage) { - struct pipe_context *pctx = ctx; - - if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) { - radeon_bo_map(radeon, bo->bo); - return (uint8_t *) bo->bo->data + bo->offset; - } - - if (p_atomic_read(&bo->bo->reference.count) > 1) { - if (usage & PIPE_TRANSFER_DONTBLOCK) { - return NULL; - } - if (ctx) { - pctx->flush(pctx, NULL); - } - } - - if (usage & PIPE_TRANSFER_DONTBLOCK) { - uint32_t domain; - - if (radeon_bo_busy(radeon, bo->bo, &domain)) - return NULL; - if (radeon_bo_map(radeon, bo->bo)) { - return NULL; - } - goto out; - } - - radeon_bo_map(radeon, bo->bo); - if (radeon_bo_wait(radeon, bo->bo)) { - radeon_bo_unmap(radeon, bo->bo); - return NULL; - } - -out: - return (uint8_t *) bo->bo->data + bo->offset; + return radeon->ws->buffer_map(bo->buf, cs, usage); } void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo) { - radeon_bo_unmap(radeon, bo->bo); + radeon->ws->buffer_unmap(bo->buf); } -void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo) +void r600_bo_destroy(struct r600_bo *bo) { - if (bo->manager_id) { - if (!r600_bomgr_bo_destroy(radeon->bomgr, bo)) { - /* destroy is delayed by buffer manager */ - return; - } - } - radeon_bo_reference(radeon, &bo->bo, NULL); + pb_reference(&bo->buf, NULL); free(bo); } boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *bo, - unsigned stride, struct winsys_handle *whandle) + unsigned stride, struct winsys_handle *whandle) { - whandle->stride = stride; - switch(whandle->type) { - case DRM_API_HANDLE_TYPE_KMS: - whandle->handle = bo->bo->handle; - break; - case DRM_API_HANDLE_TYPE_SHARED: - if (radeon_bo_get_name(radeon, bo->bo, &whandle->handle)) - return FALSE; - break; - default: - return FALSE; - } - - return TRUE; + return radeon->ws->buffer_get_handle(bo->buf, stride, whandle); } diff --git a/src/gallium/winsys/r600/drm/r600_bomgr.c b/src/gallium/winsys/r600/drm/r600_bomgr.c deleted file mode 100644 index 4918d5eb0b1..00000000000 --- a/src/gallium/winsys/r600/drm/r600_bomgr.c +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright 2010 VMWare. - * Copyright 2010 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jose Fonseca <jrfonseca-at-vmware-dot-com> - * Thomas Hellström <thomas-at-vmware-dot-com> - * Jerome Glisse <[email protected]> - */ -#include <util/u_memory.h> -#include <util/u_double_list.h> -#include <util/u_time.h> -#include <pipebuffer/pb_bufmgr.h> -#include "r600_priv.h" - -static void r600_bomgr_timeout_flush(struct r600_bomgr *mgr) -{ - struct r600_bo *bo, *tmp; - int64_t now; - - now = os_time_get(); - LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &mgr->delayed, list) { - if(!os_time_timeout(bo->start, bo->end, now)) - break; - - mgr->num_delayed--; - bo->manager_id = 0; - LIST_DEL(&bo->list); - r600_bo_destroy(mgr->radeon, bo); - } -} - -static INLINE int r600_bo_is_compat(struct r600_bomgr *mgr, - struct r600_bo *bo, - unsigned size, - unsigned alignment, - unsigned cfence) -{ - if(bo->size < size) { - return 0; - } - - /* be lenient with size */ - if(bo->size >= 2*size) { - return 0; - } - - if(!pb_check_alignment(alignment, bo->alignment)) { - return 0; - } - - if (!fence_is_after(cfence, bo->fence)) { - return 0; - } - - return 1; -} - -struct r600_bo *r600_bomgr_bo_create(struct r600_bomgr *mgr, - unsigned size, - unsigned alignment, - unsigned cfence) -{ - struct r600_bo *bo, *tmp; - int64_t now; - - - pipe_mutex_lock(mgr->mutex); - - now = os_time_get(); - LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &mgr->delayed, list) { - if(r600_bo_is_compat(mgr, bo, size, alignment, cfence)) { - LIST_DEL(&bo->list); - --mgr->num_delayed; - r600_bomgr_timeout_flush(mgr); - pipe_mutex_unlock(mgr->mutex); - LIST_INITHEAD(&bo->list); - pipe_reference_init(&bo->reference, 1); - return bo; - } - - if(os_time_timeout(bo->start, bo->end, now)) { - mgr->num_delayed--; - bo->manager_id = 0; - LIST_DEL(&bo->list); - r600_bo_destroy(mgr->radeon, bo); - } - } - - pipe_mutex_unlock(mgr->mutex); - return NULL; -} - -void r600_bomgr_bo_init(struct r600_bomgr *mgr, struct r600_bo *bo) -{ - LIST_INITHEAD(&bo->list); - bo->manager_id = 1; -} - -boolean r600_bomgr_bo_destroy(struct r600_bomgr *mgr, struct r600_bo *bo) -{ - bo->start = os_time_get(); - bo->end = bo->start + mgr->usecs; - pipe_mutex_lock(mgr->mutex); - LIST_ADDTAIL(&bo->list, &mgr->delayed); - ++mgr->num_delayed; - pipe_mutex_unlock(mgr->mutex); - return FALSE; -} - -void r600_bomgr_destroy(struct r600_bomgr *mgr) -{ - struct r600_bo *bo, *tmp; - - pipe_mutex_lock(mgr->mutex); - LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &mgr->delayed, list) { - mgr->num_delayed--; - bo->manager_id = 0; - LIST_DEL(&bo->list); - r600_bo_destroy(mgr->radeon, bo); - } - pipe_mutex_unlock(mgr->mutex); - - FREE(mgr); -} - -struct r600_bomgr *r600_bomgr_create(struct radeon *radeon, unsigned usecs) -{ - struct r600_bomgr *mgr; - - mgr = CALLOC_STRUCT(r600_bomgr); - if (mgr == NULL) - return NULL; - - mgr->radeon = radeon; - mgr->usecs = usecs; - LIST_INITHEAD(&mgr->delayed); - mgr->num_delayed = 0; - pipe_mutex_init(mgr->mutex); - - return mgr; -} diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index ab0afea5bf5..7d5583fd287 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -25,29 +25,18 @@ * Corbin Simpson <[email protected]> * Joakim Sindholt <[email protected]> */ -#include <stdio.h> -#include <errno.h> -#include <sys/ioctl.h> -#include "util/u_inlines.h" -#include "util/u_debug.h" -#include "util/u_hash_table.h" -#include <pipebuffer/pb_bufmgr.h> -#include "r600.h" + #include "r600_priv.h" #include "r600_drm_public.h" -#include "xf86drm.h" -#include "radeon_drm.h" - -#ifndef RADEON_INFO_TILING_CONFIG -#define RADEON_INFO_TILING_CONFIG 0x6 -#endif +#include "util/u_memory.h" +#include <errno.h> -#ifndef RADEON_INFO_CLOCK_CRYSTAL_FREQ -#define RADEON_INFO_CLOCK_CRYSTAL_FREQ 0x9 +#ifndef RADEON_INFO_NUM_TILE_PIPES +#define RADEON_INFO_NUM_TILE_PIPES 0xb #endif -#ifndef RADEON_INFO_NUM_BACKENDS -#define RADEON_INFO_NUM_BACKENDS 0xa +#ifndef RADEON_INFO_BACKEND_MAP +#define RADEON_INFO_BACKEND_MAP 0xd #endif enum radeon_family r600_get_family(struct radeon *r600) @@ -67,31 +56,27 @@ struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon) unsigned r600_get_clock_crystal_freq(struct radeon *radeon) { - return radeon->clock_crystal_freq; + return radeon->info.r600_clock_crystal_freq; } unsigned r600_get_num_backends(struct radeon *radeon) { - return radeon->num_backends; + return radeon->info.r600_num_backends; } -unsigned r600_get_minor_version(struct radeon *radeon) +unsigned r600_get_num_tile_pipes(struct radeon *radeon) { - return radeon->minor_version; + return radeon->info.r600_num_tile_pipes; } - -static int radeon_get_device(struct radeon *radeon) +unsigned r600_get_backend_map(struct radeon *radeon) { - struct drm_radeon_info info = {}; - int r; + return radeon->info.r600_backend_map; +} - radeon->device = 0; - info.request = RADEON_INFO_DEVICE_ID; - info.value = (uintptr_t)&radeon->device; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, - sizeof(struct drm_radeon_info)); - return r; +unsigned r600_get_minor_version(struct radeon *radeon) +{ + return radeon->info.drm_minor; } static int r600_interpret_tiling(struct radeon *radeon, uint32_t tiling_config) @@ -186,124 +171,32 @@ static int eg_interpret_tiling(struct radeon *radeon, uint32_t tiling_config) static int radeon_drm_get_tiling(struct radeon *radeon) { - struct drm_radeon_info info = {}; - int r; - uint32_t tiling_config = 0; + uint32_t tiling_config = radeon->info.r600_tiling_config; - info.request = RADEON_INFO_TILING_CONFIG; - info.value = (uintptr_t)&tiling_config; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, - sizeof(struct drm_radeon_info)); - - if (r) + if (!tiling_config) return 0; if (radeon->chip_class == R600 || radeon->chip_class == R700) { - r = r600_interpret_tiling(radeon, tiling_config); + return r600_interpret_tiling(radeon, tiling_config); } else { - r = eg_interpret_tiling(radeon, tiling_config); - } - return r; -} - -static int radeon_get_clock_crystal_freq(struct radeon *radeon) -{ - struct drm_radeon_info info = {}; - uint32_t clock_crystal_freq = 0; - int r; - - info.request = RADEON_INFO_CLOCK_CRYSTAL_FREQ; - info.value = (uintptr_t)&clock_crystal_freq; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, - sizeof(struct drm_radeon_info)); - if (r) - return r; - - radeon->clock_crystal_freq = clock_crystal_freq; - return 0; -} - - -static int radeon_get_num_backends(struct radeon *radeon) -{ - struct drm_radeon_info info = {}; - uint32_t num_backends = 0; - int r; - - info.request = RADEON_INFO_NUM_BACKENDS; - info.value = (uintptr_t)&num_backends; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, - sizeof(struct drm_radeon_info)); - if (r) - return r; - - radeon->num_backends = num_backends; - return 0; -} - - -static int radeon_init_fence(struct radeon *radeon) -{ - radeon->fence = 1; - radeon->fence_bo = r600_bo(radeon, 4096, 0, 0, 0); - if (radeon->fence_bo == NULL) { - return -ENOMEM; + return eg_interpret_tiling(radeon, tiling_config); } - radeon->cfence = r600_bo_map(radeon, radeon->fence_bo, PIPE_TRANSFER_UNSYNCHRONIZED, NULL); - *radeon->cfence = 0; - return 0; -} - -#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x))) - -static unsigned handle_hash(void *key) -{ - return PTR_TO_UINT(key); -} - -static int handle_compare(void *key1, void *key2) -{ - return PTR_TO_UINT(key1) != PTR_TO_UINT(key2); } -static struct radeon *radeon_new(int fd, unsigned device) +struct radeon *radeon_create(struct radeon_winsys *ws) { - struct radeon *radeon; - int r; - drmVersionPtr version; - - radeon = calloc(1, sizeof(*radeon)); + struct radeon *radeon = CALLOC_STRUCT(radeon); if (radeon == NULL) { return NULL; } - radeon->fd = fd; - radeon->device = device; - radeon->refcount = 1; - - version = drmGetVersion(radeon->fd); - if (version->version_major != 2) { - fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is " - "only compatible with 2.x.x\n", __FUNCTION__, - version->version_major, version->version_minor, - version->version_patchlevel); - drmFreeVersion(version); - exit(1); - } - - radeon->minor_version = version->version_minor; - drmFreeVersion(version); + radeon->ws = ws; + ws->query_info(ws, &radeon->info); - r = radeon_get_device(radeon); - if (r) { - fprintf(stderr, "Failed to get device id\n"); - return radeon_decref(radeon); - } - - radeon->family = radeon_family_from_device(radeon->device); + radeon->family = radeon_family_from_device(radeon->info.pci_id); if (radeon->family == CHIP_UNKNOWN) { - fprintf(stderr, "Unknown chipset 0x%04X\n", radeon->device); - return radeon_decref(radeon); + fprintf(stderr, "Unknown chipset 0x%04X\n", radeon->info.pci_id); + return radeon_destroy(radeon); } /* setup class */ switch (radeon->family) { @@ -349,56 +242,21 @@ static struct radeon *radeon_new(int fd, unsigned device) break; default: fprintf(stderr, "%s unknown or unsupported chipset 0x%04X\n", - __func__, radeon->device); + __func__, radeon->info.pci_id); break; } if (radeon_drm_get_tiling(radeon)) return NULL; - /* get the GPU counter frequency, failure is non fatal */ - radeon_get_clock_crystal_freq(radeon); - - if (radeon->minor_version >= 9) - radeon_get_num_backends(radeon); - - radeon->bomgr = r600_bomgr_create(radeon, 1000000); - if (radeon->bomgr == NULL) { - return NULL; - } - r = radeon_init_fence(radeon); - if (r) { - radeon_decref(radeon); - return NULL; - } - - radeon->bo_handles = util_hash_table_create(handle_hash, handle_compare); - pipe_mutex_init(radeon->bo_handles_mutex); return radeon; } -struct radeon *r600_drm_winsys_create(int drmfd) -{ - return radeon_new(drmfd, 0); -} - -struct radeon *radeon_decref(struct radeon *radeon) +struct radeon *radeon_destroy(struct radeon *radeon) { if (radeon == NULL) return NULL; - if (--radeon->refcount > 0) { - return NULL; - } - - util_hash_table_destroy(radeon->bo_handles); - pipe_mutex_destroy(radeon->bo_handles_mutex); - if (radeon->fence_bo) { - r600_bo_reference(radeon, &radeon->fence_bo, NULL); - } - - if (radeon->bomgr) - r600_bomgr_destroy(radeon->bomgr); - free(radeon); + FREE(radeon); return NULL; } diff --git a/src/gallium/winsys/r600/drm/r600_drm_public.h b/src/gallium/winsys/r600/drm/r600_drm_public.h index cfce8df9c2c..b8a37c7574f 100644 --- a/src/gallium/winsys/r600/drm/r600_drm_public.h +++ b/src/gallium/winsys/r600/drm/r600_drm_public.h @@ -26,8 +26,8 @@ #ifndef R600_DRM_PUBLIC_H #define R600_DRM_PUBLIC_H -struct radeon; +struct radeon_winsys; -struct radeon *r600_drm_winsys_create(int drmFD); +struct radeon *radeon_create(struct radeon_winsys *ws); #endif diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 07bd544d1a0..6c5b4b8953a 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -23,23 +23,97 @@ * Authors: * Jerome Glisse */ -#include <errno.h> -#include <stdint.h> -#include <string.h> -#include <stdlib.h> -#include <assert.h> -#include <pipe/p_compiler.h> -#include <util/u_inlines.h> -#include <util/u_memory.h> -#include <pipebuffer/pb_bufmgr.h> -#include "xf86drm.h" -#include "radeon_drm.h" #include "r600_priv.h" -#include "bof.h" #include "r600d.h" +#include "util/u_memory.h" +#include <errno.h> #define GROUP_FORCE_NEW_BLOCK 0 +/* Get backends mask */ +void r600_get_backend_mask(struct r600_context *ctx) +{ + struct r600_bo * buffer; + u32 * results; + unsigned num_backends = r600_get_num_backends(ctx->radeon); + unsigned i, mask = 0; + + /* if backend_map query is supported by the kernel */ + if (ctx->radeon->info.r600_backend_map_valid) { + unsigned num_tile_pipes = r600_get_num_tile_pipes(ctx->radeon); + unsigned backend_map = r600_get_backend_map(ctx->radeon); + unsigned item_width, item_mask; + + if (ctx->radeon->chip_class >= EVERGREEN) { + item_width = 4; + item_mask = 0x7; + } else { + item_width = 2; + item_mask = 0x3; + } + + while(num_tile_pipes--) { + i = backend_map & item_mask; + mask |= (1<<i); + backend_map >>= item_width; + } + if (mask != 0) { + ctx->backend_mask = mask; + return; + } + } + + /* otherwise backup path for older kernels */ + + /* create buffer for event data */ + buffer = r600_bo(ctx->radeon, ctx->max_db*16, 1, 0, + PIPE_USAGE_STAGING); + if (!buffer) + goto err; + + /* initialize buffer with zeroes */ + results = r600_bo_map(ctx->radeon, buffer, ctx->cs, PIPE_TRANSFER_WRITE); + if (results) { + memset(results, 0, ctx->max_db * 4 * 4); + r600_bo_unmap(ctx->radeon, buffer); + + /* emit EVENT_WRITE for ZPASS_DONE */ + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0); + ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); + ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = 0; + + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, buffer, RADEON_USAGE_WRITE); + + /* execute */ + r600_context_flush(ctx, 0); + + /* analyze results */ + results = r600_bo_map(ctx->radeon, buffer, ctx->cs, PIPE_TRANSFER_READ); + if (results) { + for(i = 0; i < ctx->max_db; i++) { + /* at least highest bit will be set if backend is used */ + if (results[i*4 + 1]) + mask |= (1<<i); + } + r600_bo_unmap(ctx->radeon, buffer); + } + } + + r600_bo_reference(&buffer, NULL); + + if (mask != 0) { + ctx->backend_mask = mask; + return; + } + +err: + /* fallback to old method - set num_backends lower bits to 1 */ + ctx->backend_mask = (~((u32)0))>>(32-num_backends); + return; +} + static inline void r600_context_ps_partial_flush(struct r600_context *ctx) { if (!(ctx->flags & R600_CONTEXT_DRAW_PENDING)) @@ -66,32 +140,6 @@ void r600_init_cs(struct r600_context *ctx) ctx->init_dwords = ctx->pm4_cdwords; } -static void INLINE r600_context_update_fenced_list(struct r600_context *ctx) -{ - for (int i = 0; i < ctx->creloc; i++) { - if (!LIST_IS_EMPTY(&ctx->bo[i]->fencedlist)) - LIST_DELINIT(&ctx->bo[i]->fencedlist); - LIST_ADDTAIL(&ctx->bo[i]->fencedlist, &ctx->fenced_bo); - ctx->bo[i]->fence = ctx->radeon->fence; - ctx->bo[i]->ctx = ctx; - } -} - -static void INLINE r600_context_fence_wraparound(struct r600_context *ctx, unsigned fence) -{ - struct radeon_bo *bo = NULL; - struct radeon_bo *tmp; - - LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &ctx->fenced_bo, fencedlist) { - if (bo->fence <= *ctx->radeon->cfence) { - LIST_DELINIT(&bo->fencedlist); - bo->fence = 0; - } else { - bo->fence = fence; - } - } -} - static void r600_init_block(struct r600_context *ctx, struct r600_block *block, const struct r600_reg *reg, int index, int nreg, @@ -680,17 +728,6 @@ static int r600_loop_const_init(struct r600_context *ctx, u32 offset) return r600_context_add_block(ctx, r600_loop_consts, nreg, PKT3_SET_LOOP_CONST, R600_LOOP_CONST_OFFSET); } -static void r600_context_clear_fenced_bo(struct r600_context *ctx) -{ - struct radeon_bo *bo, *tmp; - - LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &ctx->fenced_bo, fencedlist) { - LIST_DELINIT(&bo->fencedlist); - bo->fence = 0; - bo->ctx = NULL; - } -} - static void r600_free_resource_range(struct r600_context *ctx, struct r600_range *range, int nblocks) { struct r600_block *block; @@ -699,7 +736,7 @@ static void r600_free_resource_range(struct r600_context *ctx, struct r600_range block = range->blocks[i]; if (block) { for (int k = 1; k <= block->nbo; k++) - r600_bo_reference(ctx->radeon, &block->reloc[k].bo, NULL); + r600_bo_reference(&block->reloc[k].bo, NULL); free(block); } } @@ -724,7 +761,7 @@ void r600_context_fini(struct r600_context *ctx) range->blocks[CTX_BLOCK_ID(offset)] = NULL; } for (int k = 1; k <= block->nbo; k++) { - r600_bo_reference(ctx->radeon, &block->reloc[k].bo, NULL); + r600_bo_reference(&block->reloc[k].bo, NULL); } free(block); } @@ -736,11 +773,9 @@ void r600_context_fini(struct r600_context *ctx) r600_free_resource_range(ctx, &ctx->fs_resources, ctx->num_fs_resources); free(ctx->range); free(ctx->blocks); - free(ctx->reloc); free(ctx->bo); - free(ctx->pm4); + ctx->radeon->ws->cs_destroy(ctx->cs); - r600_context_clear_fenced_bo(ctx); memset(ctx, 0, sizeof(struct r600_context)); } @@ -797,6 +832,7 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) memset(ctx, 0, sizeof(struct r600_context)); ctx->radeon = radeon; + LIST_INITHEAD(&ctx->query_list); /* init dirty list */ @@ -872,33 +908,23 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) if (r) goto out_err; + ctx->cs = radeon->ws->cs_create(radeon->ws); + /* allocate cs variables */ - ctx->nreloc = RADEON_CTX_MAX_PM4; - ctx->reloc = calloc(ctx->nreloc, sizeof(struct r600_reloc)); - if (ctx->reloc == NULL) { - r = -ENOMEM; - goto out_err; - } - ctx->bo = calloc(ctx->nreloc, sizeof(void *)); + ctx->bo = calloc(RADEON_MAX_CMDBUF_DWORDS, sizeof(void *)); if (ctx->bo == NULL) { r = -ENOMEM; goto out_err; } - ctx->pm4_ndwords = RADEON_CTX_MAX_PM4; - ctx->pm4 = calloc(ctx->pm4_ndwords, 4); - if (ctx->pm4 == NULL) { - r = -ENOMEM; - goto out_err; - } + ctx->pm4_ndwords = RADEON_MAX_CMDBUF_DWORDS; + ctx->pm4 = ctx->cs->buf; r600_init_cs(ctx); /* save 16dwords space for fence mecanism */ ctx->pm4_ndwords -= 16; - - LIST_INITHEAD(&ctx->fenced_bo); - ctx->max_db = 4; + r600_get_backend_mask(ctx); return 0; out_err: r600_context_fini(ctx); @@ -912,7 +938,7 @@ void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags) if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, ctx->predicate_drawing); @@ -923,11 +949,8 @@ void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags) } void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, - unsigned flush_mask, struct r600_bo *rbo) + unsigned flush_mask, struct r600_bo *bo) { - struct radeon_bo *bo; - - bo = rbo->bo; /* if bo has already been flushed */ if (!(~bo->last_flush & flush_flags)) { bo->last_flush &= flush_mask; @@ -959,29 +982,15 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, } else { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = flush_flags; - ctx->pm4[ctx->pm4_cdwords++] = (bo->size + 255) >> 8; + ctx->pm4[ctx->pm4_cdwords++] = (bo->buf->base.size + 255) >> 8; ctx->pm4[ctx->pm4_cdwords++] = 0x00000000; ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A; ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = bo->reloc_id; + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, bo, RADEON_USAGE_WRITE); } bo->last_flush = (bo->last_flush | flush_flags) & flush_mask; } -void r600_context_get_reloc(struct r600_context *ctx, struct r600_bo *rbo) -{ - struct radeon_bo *bo = rbo->bo; - bo->reloc = &ctx->reloc[ctx->creloc]; - bo->reloc_id = ctx->creloc * sizeof(struct r600_reloc) / 4; - ctx->reloc[ctx->creloc].handle = bo->handle; - ctx->reloc[ctx->creloc].read_domain = rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM); - ctx->reloc[ctx->creloc].write_domain = rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM); - ctx->reloc[ctx->creloc].flags = 0; - radeon_bo_reference(ctx->radeon, &ctx->bo[ctx->creloc], bo); - rbo->fence = ctx->radeon->fence; - ctx->creloc++; -} - void r600_context_reg(struct r600_context *ctx, unsigned offset, unsigned value, unsigned mask) @@ -1057,8 +1066,8 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat if (block->pm4_bo_index[id]) { /* find relocation */ reloc_id = block->pm4_bo_index[id]; - r600_bo_reference(ctx->radeon, &block->reloc[reloc_id].bo, reg->bo); - reg->bo->fence = ctx->radeon->fence; + r600_bo_reference(&block->reloc[reloc_id].bo, reg->bo); + block->reloc[reloc_id].bo_usage = reg->bo_usage; /* always force dirty for relocs for now */ dirty |= R600_BLOCK_STATUS_DIRTY; } @@ -1094,10 +1103,10 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_ if (state == NULL) { block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_RESOURCE_DIRTY); if (block->reloc[1].bo) - block->reloc[1].bo->bo->binding &= ~BO_BOUND_TEXTURE; + block->reloc[1].bo->binding &= ~BO_BOUND_TEXTURE; - r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL); - r600_bo_reference(ctx->radeon, &block->reloc[2].bo, NULL); + r600_bo_reference(&block->reloc[1].bo, NULL); + r600_bo_reference(&block->reloc[2].bo, NULL); LIST_DELINIT(&block->list); LIST_DELINIT(&block->enable_list); return; @@ -1117,39 +1126,32 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_ if (!dirty) { if (is_vertex) { - if (block->reloc[1].bo->bo->handle != state->bo[0]->bo->handle) + if (block->reloc[1].bo->buf != state->bo[0]->buf) dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; } else { - if ((block->reloc[1].bo->bo->handle != state->bo[0]->bo->handle) || - (block->reloc[2].bo->bo->handle != state->bo[1]->bo->handle)) + if ((block->reloc[1].bo->buf != state->bo[0]->buf) || + (block->reloc[2].bo->buf != state->bo[1]->buf)) dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; } } - if (!dirty) { - if (is_vertex) - state->bo[0]->fence = ctx->radeon->fence; - else { - state->bo[0]->fence = ctx->radeon->fence; - state->bo[1]->fence = ctx->radeon->fence; - } - } else { + + if (dirty) { if (is_vertex) { /* VERTEX RESOURCE, we preted there is 2 bo to relocate so * we have single case btw VERTEX & TEXTURE resource */ - r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->bo[0]); - r600_bo_reference(ctx->radeon, &block->reloc[2].bo, NULL); - state->bo[0]->fence = ctx->radeon->fence; + r600_bo_reference(&block->reloc[1].bo, state->bo[0]); + block->reloc[1].bo_usage = state->bo_usage[0]; + r600_bo_reference(&block->reloc[2].bo, NULL); } else { /* TEXTURE RESOURCE */ - r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->bo[0]); - r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->bo[1]); - state->bo[0]->fence = ctx->radeon->fence; - state->bo[1]->fence = ctx->radeon->fence; - state->bo[0]->bo->binding |= BO_BOUND_TEXTURE; + r600_bo_reference(&block->reloc[1].bo, state->bo[0]); + block->reloc[1].bo_usage = state->bo_usage[0]; + r600_bo_reference(&block->reloc[2].bo, state->bo[1]); + block->reloc[2].bo_usage = state->bo_usage[1]; + state->bo[0]->binding |= BO_BOUND_TEXTURE; } - } - if (dirty) { + if (is_vertex) block->status |= R600_BLOCK_STATUS_RESOURCE_VERTEX; else @@ -1281,7 +1283,6 @@ struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset) void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block) { - int id; int optional = block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS); int cp_dwords = block->pm4_ndwords, start_dword = 0; int new_dwords = 0; @@ -1297,14 +1298,13 @@ void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block * for (int j = 0; j < block->nreg; j++) { if (block->pm4_bo_index[j]) { /* find relocation */ - id = block->pm4_bo_index[j]; - r600_context_bo_reloc(ctx, - &block->pm4[block->reloc[id].bo_pm4_index], - block->reloc[id].bo); + struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]]; + block->pm4[reloc->bo_pm4_index] = + r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage); r600_context_bo_flush(ctx, - block->reloc[id].flush_flags, - block->reloc[id].flush_mask, - block->reloc[id].bo); + reloc->flush_flags, + reloc->flush_mask, + reloc->bo); nbo--; if (nbo == 0) break; @@ -1338,7 +1338,6 @@ out: void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r600_block *block) { - int id; int cp_dwords = block->pm4_ndwords; int nbo = block->nbo; @@ -1352,14 +1351,13 @@ void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r60 for (int j = 0; j < nbo; j++) { if (block->pm4_bo_index[j]) { /* find relocation */ - id = block->pm4_bo_index[j]; - r600_context_bo_reloc(ctx, - &block->pm4[block->reloc[id].bo_pm4_index], - block->reloc[id].bo); + struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]]; + block->pm4[reloc->bo_pm4_index] = + r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage); r600_context_bo_flush(ctx, - block->reloc[id].flush_flags, - block->reloc[id].flush_mask, - block->reloc[id].bo); + reloc->flush_flags, + reloc->flush_mask, + reloc->bo); } } ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; @@ -1418,10 +1416,6 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) if (draw->indices) { ndwords = 11; - /* make sure there is enough relocation space before scheduling draw */ - if (ctx->creloc >= (ctx->nreloc - 1)) { - r600_context_flush(ctx); - } } /* queries need some special values */ @@ -1440,11 +1434,11 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) /* update the max dword count to make sure we have enough space * reserved for flushing the destination caches */ - ctx->pm4_ndwords = RADEON_CTX_MAX_PM4 - ctx->num_dest_buffers * 7 - 16; + ctx->pm4_ndwords = RADEON_MAX_CMDBUF_DWORDS - ctx->num_dest_buffers * 7 - 16; if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); } /* at that point everythings is flushed and ctx->pm4_cdwords = 0 */ if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) { @@ -1469,13 +1463,12 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) pm4[3] = draw->vgt_num_instances; if (draw->indices) { pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); - pm4[5] = draw->indices_bo_offset + r600_bo_offset(draw->indices); + pm4[5] = draw->indices_bo_offset; pm4[6] = 0; pm4[7] = draw->vgt_num_indices; pm4[8] = draw->vgt_draw_initiator; pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); - pm4[10] = 0; - r600_context_bo_reloc(ctx, &pm4[10], draw->indices); + pm4[10] = r600_context_bo_reloc(ctx, draw->indices, RADEON_USAGE_READ); } else { pm4[4] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); pm4[5] = draw->vgt_num_indices; @@ -1489,13 +1482,8 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) ctx->pm4_dirty_cdwords = 0; } -void r600_context_flush(struct r600_context *ctx) +void r600_context_flush(struct r600_context *ctx, unsigned flags) { - struct drm_radeon_cs drmib = {}; - struct drm_radeon_cs_chunk chunks[2]; - uint64_t chunk_array[2]; - unsigned fence; - int r; struct r600_block *enable_block = NULL; if (ctx->pm4_cdwords == ctx->init_dwords) @@ -1512,54 +1500,19 @@ void r600_context_flush(struct r600_context *ctx) /* partial flush is needed to avoid lockups on some chips with user fences */ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); - /* emit fence */ - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); - ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); - ctx->pm4[ctx->pm4_cdwords++] = 0; - ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24); - ctx->pm4[ctx->pm4_cdwords++] = ctx->radeon->fence; - ctx->pm4[ctx->pm4_cdwords++] = 0; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], ctx->radeon->fence_bo); - -#if 1 - /* emit cs */ - drmib.num_chunks = 2; - drmib.chunks = (uint64_t)(uintptr_t)chunk_array; - chunks[0].chunk_id = RADEON_CHUNK_ID_IB; - chunks[0].length_dw = ctx->pm4_cdwords; - chunks[0].chunk_data = (uint64_t)(uintptr_t)ctx->pm4; - chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; - chunks[1].length_dw = ctx->creloc * sizeof(struct r600_reloc) / 4; - chunks[1].chunk_data = (uint64_t)(uintptr_t)ctx->reloc; - chunk_array[0] = (uint64_t)(uintptr_t)&chunks[0]; - chunk_array[1] = (uint64_t)(uintptr_t)&chunks[1]; - r = drmCommandWriteRead(ctx->radeon->fd, DRM_RADEON_CS, &drmib, - sizeof(struct drm_radeon_cs)); - if (r) { - fprintf(stderr, "radeon: The kernel rejected CS, " - "see dmesg for more information.\n"); - } -#else - *ctx->radeon->cfence = ctx->radeon->fence; -#endif - - r600_context_update_fenced_list(ctx); - - fence = ctx->radeon->fence + 1; - if (fence < ctx->radeon->fence) { - /* wrap around */ - fence = 1; - r600_context_fence_wraparound(ctx, fence); - } - ctx->radeon->fence = fence; + + /* Flush the CS. */ + ctx->cs->cdw = ctx->pm4_cdwords; + ctx->radeon->ws->cs_flush(ctx->cs, flags); + + /* We need to get the pointer to the other CS, + * the command streams are double-buffered. */ + ctx->pm4 = ctx->cs->buf; /* restart */ for (int i = 0; i < ctx->creloc; i++) { - ctx->bo[i]->reloc = NULL; ctx->bo[i]->last_flush = 0; - radeon_bo_reference(ctx->radeon, &ctx->bo[i], NULL); + r600_bo_reference(&ctx->bo[i], NULL); } ctx->creloc = 0; ctx->pm4_dirty_cdwords = 0; @@ -1596,10 +1549,9 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_bo *fence_bo, { unsigned ndwords = 10; - if (((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) || - (ctx->creloc >= (ctx->nreloc - 1))) { + if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0); @@ -1611,90 +1563,7 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_bo *fence_bo, ctx->pm4[ctx->pm4_cdwords++] = value; /* DATA_LO */ ctx->pm4[ctx->pm4_cdwords++] = 0; /* DATA_HI */ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], fence_bo); -} - -void r600_context_dump_bof(struct r600_context *ctx, const char *file) -{ - bof_t *bcs, *blob, *array, *bo, *size, *handle, *device_id, *root; - unsigned i; - - root = device_id = bcs = blob = array = bo = size = handle = NULL; - root = bof_object(); - if (root == NULL) - goto out_err; - device_id = bof_int32(ctx->radeon->device); - if (device_id == NULL) - goto out_err; - if (bof_object_set(root, "device_id", device_id)) - goto out_err; - bof_decref(device_id); - device_id = NULL; - /* dump relocs */ - blob = bof_blob(ctx->creloc * 16, ctx->reloc); - if (blob == NULL) - goto out_err; - if (bof_object_set(root, "reloc", blob)) - goto out_err; - bof_decref(blob); - blob = NULL; - /* dump cs */ - blob = bof_blob(ctx->pm4_cdwords * 4, ctx->pm4); - if (blob == NULL) - goto out_err; - if (bof_object_set(root, "pm4", blob)) - goto out_err; - bof_decref(blob); - blob = NULL; - /* dump bo */ - array = bof_array(); - if (array == NULL) - goto out_err; - for (i = 0; i < ctx->creloc; i++) { - struct radeon_bo *rbo = ctx->bo[i]; - bo = bof_object(); - if (bo == NULL) - goto out_err; - size = bof_int32(rbo->size); - if (size == NULL) - goto out_err; - if (bof_object_set(bo, "size", size)) - goto out_err; - bof_decref(size); - size = NULL; - handle = bof_int32(rbo->handle); - if (handle == NULL) - goto out_err; - if (bof_object_set(bo, "handle", handle)) - goto out_err; - bof_decref(handle); - handle = NULL; - radeon_bo_map(ctx->radeon, rbo); - blob = bof_blob(rbo->size, rbo->data); - radeon_bo_unmap(ctx->radeon, rbo); - if (blob == NULL) - goto out_err; - if (bof_object_set(bo, "data", blob)) - goto out_err; - bof_decref(blob); - blob = NULL; - if (bof_array_append(array, bo)) - goto out_err; - bof_decref(bo); - bo = NULL; - } - if (bof_object_set(root, "bo", array)) - goto out_err; - bof_dump_file(root, file); -out_err: - bof_decref(blob); - bof_decref(array); - bof_decref(bo); - bof_decref(size); - bof_decref(handle); - bof_decref(device_id); - bof_decref(root); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, fence_bo, RADEON_USAGE_WRITE); } static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait) @@ -1704,9 +1573,9 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu u32 *results, *current_result; if (wait) - results = r600_bo_map(ctx->radeon, query->buffer, PIPE_TRANSFER_READ, NULL); + results = r600_bo_map(ctx->radeon, query->buffer, ctx->cs, PIPE_TRANSFER_READ); else - results = r600_bo_map(ctx->radeon, query->buffer, PIPE_TRANSFER_DONTBLOCK | PIPE_TRANSFER_READ, NULL); + results = r600_bo_map(ctx->radeon, query->buffer, ctx->cs, PIPE_TRANSFER_DONTBLOCK | PIPE_TRANSFER_READ); if (!results) return FALSE; @@ -1735,7 +1604,6 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu void r600_query_begin(struct r600_context *ctx, struct r600_query *query) { unsigned required_space, new_results_end; - int num_backends = r600_get_num_backends(ctx->radeon); /* query request needs 6/8 dwords for begin + 6/8 dwords for end */ if (query->type == PIPE_QUERY_TIME_ELAPSED) @@ -1745,7 +1613,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) if ((required_space + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ - r600_context_flush(ctx); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); } if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) { @@ -1756,7 +1624,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) query->queries_emitted = 1; } else { if (++query->queries_emitted > query->buffer_size / query->result_size / 2) - r600_context_flush(ctx); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); } } @@ -1767,7 +1635,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) /* collect current results if query buffer is full */ if (new_results_end == query->results_start) { if (!(query->state & R600_QUERY_STATE_FLUSHED)) - r600_context_flush(ctx); + r600_context_flush(ctx, 0); r600_query_result(ctx, query, TRUE); } @@ -1775,15 +1643,17 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) u32 *results; int i; - results = r600_bo_map(ctx->radeon, query->buffer, PIPE_TRANSFER_WRITE, NULL); + results = r600_bo_map(ctx->radeon, query->buffer, ctx->cs, PIPE_TRANSFER_WRITE); if (results) { results = (u32*)((char*)results + query->results_end); memset(results, 0, query->result_size); /* Set top bits for unused backends */ - for (i = num_backends; i < ctx->max_db; i++) { - results[(i * 4)+1] = 0x80000000; - results[(i * 4)+3] = 0x80000000; + for (i = 0; i < ctx->max_db; i++) { + if (!(ctx->backend_mask & (1<<i))) { + results[(i * 4)+1] = 0x80000000; + results[(i * 4)+3] = 0x80000000; + } } r600_bo_unmap(ctx->radeon, query->buffer); } @@ -1793,19 +1663,18 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) if (query->type == PIPE_QUERY_TIME_ELAPSED) { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); - ctx->pm4[ctx->pm4_cdwords++] = query->results_end + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = query->results_end; ctx->pm4[ctx->pm4_cdwords++] = (3 << 29); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = 0; } else { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); - ctx->pm4[ctx->pm4_cdwords++] = query->results_end + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = query->results_end; ctx->pm4[ctx->pm4_cdwords++] = 0; } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE); query->state |= R600_QUERY_STATE_STARTED; query->state ^= R600_QUERY_STATE_ENDED; @@ -1818,19 +1687,18 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query) if (query->type == PIPE_QUERY_TIME_ELAPSED) { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); - ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8 + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8; ctx->pm4[ctx->pm4_cdwords++] = (3 << 29); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = 0; } else { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); - ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8 + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8; ctx->pm4[ctx->pm4_cdwords++] = 0; } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE); query->results_end += query->result_size; if (query->results_end >= query->buffer_size) @@ -1848,7 +1716,7 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query, { if (operation == PREDICATION_OP_CLEAR) { if (ctx->pm4_cdwords + 3 > ctx->pm4_ndwords) - r600_context_flush(ctx); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0); ctx->pm4[ctx->pm4_cdwords++] = 0; @@ -1864,7 +1732,7 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query, count /= query->result_size; if (ctx->pm4_cdwords + 5 * count > ctx->pm4_ndwords) - r600_context_flush(ctx); + r600_context_flush(ctx, RADEON_FLUSH_ASYNC); op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE | (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW); @@ -1872,11 +1740,11 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query, /* emit predicate packets for all data blocks */ while (results_base != query->results_end) { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0); - ctx->pm4[ctx->pm4_cdwords++] = results_base + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = results_base; ctx->pm4[ctx->pm4_cdwords++] = op; ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer, + RADEON_USAGE_READ); results_base += query->result_size; if (results_base >= query->buffer_size) results_base = 0; @@ -1926,7 +1794,7 @@ struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query) { - r600_bo_reference(ctx->radeon, &query->buffer, NULL); + r600_bo_reference(&query->buffer, NULL); LIST_DELINIT(&query->list); free(query); } @@ -1938,7 +1806,7 @@ boolean r600_context_query_result(struct r600_context *ctx, uint64_t *result = (uint64_t*)vresult; if (!(query->state & R600_QUERY_STATE_FLUSHED)) { - r600_context_flush(ctx); + r600_context_flush(ctx, 0); } if (!r600_query_result(ctx, query, wait)) return FALSE; diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 69f7251c043..1e901897efd 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -26,40 +26,20 @@ #ifndef R600_PRIV_H #define R600_PRIV_H -#include <errno.h> -#include <stdint.h> -#include <stdlib.h> -#include <assert.h> -#include <util/u_double_list.h> -#include <util/u_inlines.h> -#include "util/u_hash_table.h" -#include <os/os_thread.h> #include "r600.h" +#include "../../radeon/drm/radeon_winsys.h" +#include "util/u_hash_table.h" +#include "os/os_thread.h" #define PKT_COUNT_C 0xC000FFFF #define PKT_COUNT_S(x) (((x) & 0x3FFF) << 16) -struct r600_bomgr; -struct r600_bo; - struct radeon { - int fd; - int refcount; - unsigned device; + struct radeon_winsys *ws; + struct radeon_info info; unsigned family; enum chip_class chip_class; struct r600_tiling_info tiling_info; - struct r600_bomgr *bomgr; - unsigned fence; - unsigned *cfence; - struct r600_bo *fence_bo; - unsigned clock_crystal_freq; - unsigned num_backends; - unsigned minor_version; - - /* List of buffer handles and its mutex. */ - struct util_hash_table *bo_handles; - pipe_mutex bo_handles_mutex; }; /* these flags are used in register flags and added into block flags */ @@ -79,85 +59,25 @@ struct r600_reg { }; #define BO_BOUND_TEXTURE 1 -struct radeon_bo { - struct pipe_reference reference; - unsigned handle; - unsigned size; - unsigned alignment; - int map_count; - void *data; - struct list_head fencedlist; - unsigned fence; - struct r600_context *ctx; - boolean shared; - struct r600_reloc *reloc; - unsigned reloc_id; - unsigned last_flush; - unsigned name; - unsigned binding; -}; struct r600_bo { struct pipe_reference reference; /* this must be the first member for the r600_bo_reference inline to work */ /* DO NOT MOVE THIS ^ */ - unsigned size; - unsigned tiling_flags; - unsigned kernel_pitch; + struct pb_buffer *buf; + struct radeon_winsys_cs_handle *cs_buf; unsigned domains; - struct radeon_bo *bo; - unsigned fence; - /* manager data */ - struct list_head list; - unsigned manager_id; - unsigned alignment; - unsigned offset; - int64_t start; - int64_t end; -}; - -struct r600_bomgr { - struct radeon *radeon; - unsigned usecs; - pipe_mutex mutex; - struct list_head delayed; - unsigned num_delayed; + unsigned last_flush; + unsigned binding; }; /* - * r600_drm.c - */ -struct radeon *r600_new(int fd, unsigned device); -void r600_delete(struct radeon *r600); - -/* * radeon_pciid.c */ unsigned radeon_family_from_device(unsigned device); /* - * radeon_bo.c - */ -struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, - unsigned size, unsigned alignment, unsigned initial_domain); -void radeon_bo_reference(struct radeon *radeon, struct radeon_bo **dst, - struct radeon_bo *src); -int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo); -int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain); -int radeon_bo_fencelist(struct radeon *radeon, struct radeon_bo **bolist, uint32_t num_bo); -int radeon_bo_get_tiling_flags(struct radeon *radeon, - struct radeon_bo *bo, - uint32_t *tiling_flags, - uint32_t *pitch); -int radeon_bo_get_name(struct radeon *radeon, - struct radeon_bo *bo, - uint32_t *name); -int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo); - -/* * r600_hw_context.c */ -int r600_context_init_fence(struct r600_context *ctx); -void r600_context_get_reloc(struct r600_context *ctx, struct r600_bo *rbo); void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, unsigned flush_mask, struct r600_bo *rbo); struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset); @@ -175,70 +95,23 @@ void r600_context_reg(struct r600_context *ctx, void r600_init_cs(struct r600_context *ctx); int r600_resource_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride, struct r600_reg *reg, int nreg, unsigned offset_base); -static INLINE void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct r600_bo *rbo) +static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r600_bo *rbo, + enum radeon_bo_usage usage) { - struct radeon_bo *bo = rbo->bo; + enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? rbo->domains : 0; + enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? rbo->domains : 0; - assert(bo != NULL); + assert(usage); - if (!bo->reloc) - r600_context_get_reloc(ctx, rbo); + unsigned reloc_index = + ctx->radeon->ws->cs_add_reloc(ctx->cs, rbo->cs_buf, + rd, wd); - /* set PKT3 to point to proper reloc */ - *pm4 = bo->reloc_id; -} + if (reloc_index >= ctx->creloc) + ctx->creloc = reloc_index+1; -/* - * r600_bo.c - */ -void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo); - -/* - * r600_bomgr.c - */ -struct r600_bomgr *r600_bomgr_create(struct radeon *radeon, unsigned usecs); -void r600_bomgr_destroy(struct r600_bomgr *mgr); -boolean r600_bomgr_bo_destroy(struct r600_bomgr *mgr, struct r600_bo *bo); -void r600_bomgr_bo_init(struct r600_bomgr *mgr, struct r600_bo *bo); -struct r600_bo *r600_bomgr_bo_create(struct r600_bomgr *mgr, - unsigned size, - unsigned alignment, - unsigned cfence); - - -/* - * helpers - */ - - -/* - * radeon_bo.c - */ -static inline int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo) -{ - if (bo->map_count == 0 && !bo->data) - return radeon_bo_fixed_map(radeon, bo); - bo->map_count++; - return 0; -} - -static inline void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo) -{ - bo->map_count--; - assert(bo->map_count >= 0); -} - -/* - * fence - */ -static inline boolean fence_is_after(unsigned fence, unsigned ofence) -{ - /* handle wrap around */ - if (fence < 0x80000000 && ofence > 0x80000000) - return TRUE; - if (fence > ofence) - return TRUE; - return FALSE; + r600_bo_reference(&ctx->bo[reloc_index], rbo); + return reloc_index * 4; } #endif diff --git a/src/gallium/winsys/r600/drm/r600d.h b/src/gallium/winsys/r600/drm/r600d.h deleted file mode 100644 index 4a19dcf8ddf..00000000000 --- a/src/gallium/winsys/r600/drm/r600d.h +++ /dev/null @@ -1,2241 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - */ -#ifndef R600D_H -#define R600D_H - -/* evergreen values */ -#define EG_RESOURCE_OFFSET 0x00030000 -#define EG_RESOURCE_END 0x00034000 -#define EG_LOOP_CONST_OFFSET 0x0003A200 -#define EG_LOOP_CONST_END 0x0003A26C -#define EG_BOOL_CONST_OFFSET 0x0003A500 -#define EG_BOOL_CONST_END 0x0003A506 - - -#define R600_CONFIG_REG_OFFSET 0X00008000 -#define R600_CONFIG_REG_END 0X0000AC00 -#define R600_CONTEXT_REG_OFFSET 0X00028000 -#define R600_CONTEXT_REG_END 0X00029000 -#define R600_ALU_CONST_OFFSET 0X00030000 -#define R600_ALU_CONST_END 0X00032000 -#define R600_RESOURCE_OFFSET 0X00038000 -#define R600_RESOURCE_END 0X0003C000 -#define R600_SAMPLER_OFFSET 0X0003C000 -#define R600_SAMPLER_END 0X0003CFF0 -#define R600_CTL_CONST_OFFSET 0X0003CFF0 -#define R600_CTL_CONST_END 0X0003E200 -#define R600_LOOP_CONST_OFFSET 0X0003E200 -#define R600_LOOP_CONST_END 0X0003E380 -#define R600_BOOL_CONST_OFFSET 0X0003E380 -#define R600_BOOL_CONST_END 0X00040000 - -#define PKT3_NOP 0x10 -#define PKT3_INDIRECT_BUFFER_END 0x17 -#define PKT3_SET_PREDICATION 0x20 -#define PKT3_REG_RMW 0x21 -#define PKT3_COND_EXEC 0x22 -#define PKT3_PRED_EXEC 0x23 -#define PKT3_START_3D_CMDBUF 0x24 -#define PKT3_DRAW_INDEX_2 0x27 -#define PKT3_CONTEXT_CONTROL 0x28 -#define PKT3_DRAW_INDEX_IMMD_BE 0x29 -#define PKT3_INDEX_TYPE 0x2A -#define PKT3_DRAW_INDEX 0x2B -#define PKT3_DRAW_INDEX_AUTO 0x2D -#define PKT3_DRAW_INDEX_IMMD 0x2E -#define PKT3_NUM_INSTANCES 0x2F -#define PKT3_STRMOUT_BUFFER_UPDATE 0x34 -#define PKT3_INDIRECT_BUFFER_MP 0x38 -#define PKT3_MEM_SEMAPHORE 0x39 -#define PKT3_MPEG_INDEX 0x3A -#define PKT3_WAIT_REG_MEM 0x3C -#define PKT3_MEM_WRITE 0x3D -#define PKT3_INDIRECT_BUFFER 0x32 -#define PKT3_CP_INTERRUPT 0x40 -#define PKT3_SURFACE_SYNC 0x43 -#define PKT3_ME_INITIALIZE 0x44 -#define PKT3_COND_WRITE 0x45 -#define PKT3_EVENT_WRITE 0x46 -#define PKT3_EVENT_WRITE_EOP 0x47 -#define PKT3_ONE_REG_WRITE 0x57 -#define PKT3_SET_CONFIG_REG 0x68 -#define PKT3_SET_CONTEXT_REG 0x69 -#define PKT3_SET_ALU_CONST 0x6A -#define PKT3_SET_BOOL_CONST 0x6B -#define PKT3_SET_LOOP_CONST 0x6C -#define PKT3_SET_RESOURCE 0x6D -#define PKT3_SET_SAMPLER 0x6E -#define PKT3_SET_CTL_CONST 0x6F -#define PKT3_SURFACE_BASE_UPDATE 0x73 -#define SURFACE_BASE_UPDATE_DEPTH (1 << 0) -#define SURFACE_BASE_UPDATE_COLOR(x) (2 << (x)) -#define SURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x)) - -#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10 -#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14 -#define EVENT_TYPE_ZPASS_DONE 0x15 -#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 -#define EVENT_TYPE(x) ((x) << 0) -#define EVENT_INDEX(x) ((x) << 8) - /* 0 - any non-TS event - * 1 - ZPASS_DONE - * 2 - SAMPLE_PIPELINESTAT - * 3 - SAMPLE_STREAMOUTSTAT* - * 4 - *S_PARTIAL_FLUSH - * 5 - TS events - */ - -#define PREDICATION_OP_CLEAR 0x0 -#define PREDICATION_OP_ZPASS 0x1 -#define PREDICATION_OP_PRIMCOUNT 0x2 - -#define PRED_OP(x) ((x) << 16) - -#define PREDICATION_CONTINUE (1 << 31) - -#define PREDICATION_HINT_WAIT (0 << 12) -#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12) - -#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8) -#define PREDICATION_DRAW_VISIBLE (1 << 8) - -#define PKT_TYPE_S(x) (((x) & 0x3) << 30) -#define PKT_TYPE_G(x) (((x) >> 30) & 0x3) -#define PKT_TYPE_C 0x3FFFFFFF -#define PKT_COUNT_S(x) (((x) & 0x3FFF) << 16) -#define PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF) -#define PKT_COUNT_C 0xC000FFFF -#define PKT0_BASE_INDEX_S(x) (((x) & 0xFFFF) << 0) -#define PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF) -#define PKT0_BASE_INDEX_C 0xFFFF0000 -#define PKT3_IT_OPCODE_S(x) (((x) & 0xFF) << 8) -#define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF) -#define PKT3_IT_OPCODE_C 0xFFFF00FF -#define PKT3_PRED_S(x) (((x) >> 0) & 0x1) -#define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count)) -#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PRED_S(predicate)) - -/* Registers */ -#define R_0280A0_CB_COLOR0_INFO 0x0280A0 -#define S_0280A0_ENDIAN(x) (((x) & 0x3) << 0) -#define G_0280A0_ENDIAN(x) (((x) >> 0) & 0x3) -#define C_0280A0_ENDIAN 0xFFFFFFFC -#define S_0280A0_FORMAT(x) (((x) & 0x3F) << 2) -#define G_0280A0_FORMAT(x) (((x) >> 2) & 0x3F) -#define C_0280A0_FORMAT 0xFFFFFF03 -#define V_0280A0_COLOR_INVALID 0x00000000 -#define V_0280A0_COLOR_8 0x00000001 -#define V_0280A0_COLOR_4_4 0x00000002 -#define V_0280A0_COLOR_3_3_2 0x00000003 -#define V_0280A0_COLOR_16 0x00000005 -#define V_0280A0_COLOR_16_FLOAT 0x00000006 -#define V_0280A0_COLOR_8_8 0x00000007 -#define V_0280A0_COLOR_5_6_5 0x00000008 -#define V_0280A0_COLOR_6_5_5 0x00000009 -#define V_0280A0_COLOR_1_5_5_5 0x0000000A -#define V_0280A0_COLOR_4_4_4_4 0x0000000B -#define V_0280A0_COLOR_5_5_5_1 0x0000000C -#define V_0280A0_COLOR_32 0x0000000D -#define V_0280A0_COLOR_32_FLOAT 0x0000000E -#define V_0280A0_COLOR_16_16 0x0000000F -#define V_0280A0_COLOR_16_16_FLOAT 0x00000010 -#define V_0280A0_COLOR_8_24 0x00000011 -#define V_0280A0_COLOR_8_24_FLOAT 0x00000012 -#define V_0280A0_COLOR_24_8 0x00000013 -#define V_0280A0_COLOR_24_8_FLOAT 0x00000014 -#define V_0280A0_COLOR_10_11_11 0x00000015 -#define V_0280A0_COLOR_10_11_11_FLOAT 0x00000016 -#define V_0280A0_COLOR_11_11_10 0x00000017 -#define V_0280A0_COLOR_11_11_10_FLOAT 0x00000018 -#define V_0280A0_COLOR_2_10_10_10 0x00000019 -#define V_0280A0_COLOR_8_8_8_8 0x0000001A -#define V_0280A0_COLOR_10_10_10_2 0x0000001B -#define V_0280A0_COLOR_X24_8_32_FLOAT 0x0000001C -#define V_0280A0_COLOR_32_32 0x0000001D -#define V_0280A0_COLOR_32_32_FLOAT 0x0000001E -#define V_0280A0_COLOR_16_16_16_16 0x0000001F -#define V_0280A0_COLOR_16_16_16_16_FLOAT 0x00000020 -#define V_0280A0_COLOR_32_32_32_32 0x00000022 -#define V_0280A0_COLOR_32_32_32_32_FLOAT 0x00000023 -#define S_0280A0_ARRAY_MODE(x) (((x) & 0xF) << 8) -#define G_0280A0_ARRAY_MODE(x) (((x) >> 8) & 0xF) -#define C_0280A0_ARRAY_MODE 0xFFFFF0FF -#define V_0280A0_ARRAY_LINEAR_GENERAL 0x00000000 -#define V_0280A0_ARRAY_LINEAR_ALIGNED 0x00000001 -#define V_0280A0_ARRAY_1D_TILED_THIN1 0x00000002 -#define V_0280A0_ARRAY_2D_TILED_THIN1 0x00000004 -#define S_0280A0_NUMBER_TYPE(x) (((x) & 0x7) << 12) -#define G_0280A0_NUMBER_TYPE(x) (((x) >> 12) & 0x7) -#define C_0280A0_NUMBER_TYPE 0xFFFF8FFF -#define S_0280A0_READ_SIZE(x) (((x) & 0x1) << 15) -#define G_0280A0_READ_SIZE(x) (((x) >> 15) & 0x1) -#define C_0280A0_READ_SIZE 0xFFFF7FFF -#define S_0280A0_COMP_SWAP(x) (((x) & 0x3) << 16) -#define G_0280A0_COMP_SWAP(x) (((x) >> 16) & 0x3) -#define C_0280A0_COMP_SWAP 0xFFFCFFFF -#define S_0280A0_TILE_MODE(x) (((x) & 0x3) << 18) -#define G_0280A0_TILE_MODE(x) (((x) >> 18) & 0x3) -#define C_0280A0_TILE_MODE 0xFFF3FFFF -#define S_0280A0_BLEND_CLAMP(x) (((x) & 0x1) << 20) -#define G_0280A0_BLEND_CLAMP(x) (((x) >> 20) & 0x1) -#define C_0280A0_BLEND_CLAMP 0xFFEFFFFF -#define S_0280A0_CLEAR_COLOR(x) (((x) & 0x1) << 21) -#define G_0280A0_CLEAR_COLOR(x) (((x) >> 21) & 0x1) -#define C_0280A0_CLEAR_COLOR 0xFFDFFFFF -#define S_0280A0_BLEND_BYPASS(x) (((x) & 0x1) << 22) -#define G_0280A0_BLEND_BYPASS(x) (((x) >> 22) & 0x1) -#define C_0280A0_BLEND_BYPASS 0xFFBFFFFF -#define S_0280A0_BLEND_FLOAT32(x) (((x) & 0x1) << 23) -#define G_0280A0_BLEND_FLOAT32(x) (((x) >> 23) & 0x1) -#define C_0280A0_BLEND_FLOAT32 0xFF7FFFFF -#define S_0280A0_SIMPLE_FLOAT(x) (((x) & 0x1) << 24) -#define G_0280A0_SIMPLE_FLOAT(x) (((x) >> 24) & 0x1) -#define C_0280A0_SIMPLE_FLOAT 0xFEFFFFFF -#define S_0280A0_ROUND_MODE(x) (((x) & 0x1) << 25) -#define G_0280A0_ROUND_MODE(x) (((x) >> 25) & 0x1) -#define C_0280A0_ROUND_MODE 0xFDFFFFFF -#define S_0280A0_TILE_COMPACT(x) (((x) & 0x1) << 26) -#define G_0280A0_TILE_COMPACT(x) (((x) >> 26) & 0x1) -#define C_0280A0_TILE_COMPACT 0xFBFFFFFF -#define S_0280A0_SOURCE_FORMAT(x) (((x) & 0x1) << 27) -#define G_0280A0_SOURCE_FORMAT(x) (((x) >> 27) & 0x1) -#define C_0280A0_SOURCE_FORMAT 0xF7FFFFFF -#define R_028060_CB_COLOR0_SIZE 0x028060 -#define S_028060_PITCH_TILE_MAX(x) (((x) & 0x3FF) << 0) -#define G_028060_PITCH_TILE_MAX(x) (((x) >> 0) & 0x3FF) -#define C_028060_PITCH_TILE_MAX 0xFFFFFC00 -#define S_028060_SLICE_TILE_MAX(x) (((x) & 0xFFFFF) << 10) -#define G_028060_SLICE_TILE_MAX(x) (((x) >> 10) & 0xFFFFF) -#define C_028060_SLICE_TILE_MAX 0xC00003FF -#define R_028800_DB_DEPTH_CONTROL 0x028800 -#define S_028800_STENCIL_ENABLE(x) (((x) & 0x1) << 0) -#define G_028800_STENCIL_ENABLE(x) (((x) >> 0) & 0x1) -#define C_028800_STENCIL_ENABLE 0xFFFFFFFE -#define S_028800_Z_ENABLE(x) (((x) & 0x1) << 1) -#define G_028800_Z_ENABLE(x) (((x) >> 1) & 0x1) -#define C_028800_Z_ENABLE 0xFFFFFFFD -#define S_028800_Z_WRITE_ENABLE(x) (((x) & 0x1) << 2) -#define G_028800_Z_WRITE_ENABLE(x) (((x) >> 2) & 0x1) -#define C_028800_Z_WRITE_ENABLE 0xFFFFFFFB -#define S_028800_ZFUNC(x) (((x) & 0x7) << 4) -#define G_028800_ZFUNC(x) (((x) >> 4) & 0x7) -#define C_028800_ZFUNC 0xFFFFFF8F -#define S_028800_BACKFACE_ENABLE(x) (((x) & 0x1) << 7) -#define G_028800_BACKFACE_ENABLE(x) (((x) >> 7) & 0x1) -#define C_028800_BACKFACE_ENABLE 0xFFFFFF7F -#define S_028800_STENCILFUNC(x) (((x) & 0x7) << 8) -#define G_028800_STENCILFUNC(x) (((x) >> 8) & 0x7) -#define C_028800_STENCILFUNC 0xFFFFF8FF -#define S_028800_STENCILFAIL(x) (((x) & 0x7) << 11) -#define G_028800_STENCILFAIL(x) (((x) >> 11) & 0x7) -#define C_028800_STENCILFAIL 0xFFFFC7FF -#define S_028800_STENCILZPASS(x) (((x) & 0x7) << 14) -#define G_028800_STENCILZPASS(x) (((x) >> 14) & 0x7) -#define C_028800_STENCILZPASS 0xFFFE3FFF -#define S_028800_STENCILZFAIL(x) (((x) & 0x7) << 17) -#define G_028800_STENCILZFAIL(x) (((x) >> 17) & 0x7) -#define C_028800_STENCILZFAIL 0xFFF1FFFF -#define S_028800_STENCILFUNC_BF(x) (((x) & 0x7) << 20) -#define G_028800_STENCILFUNC_BF(x) (((x) >> 20) & 0x7) -#define C_028800_STENCILFUNC_BF 0xFF8FFFFF -#define S_028800_STENCILFAIL_BF(x) (((x) & 0x7) << 23) -#define G_028800_STENCILFAIL_BF(x) (((x) >> 23) & 0x7) -#define C_028800_STENCILFAIL_BF 0xFC7FFFFF -#define S_028800_STENCILZPASS_BF(x) (((x) & 0x7) << 26) -#define G_028800_STENCILZPASS_BF(x) (((x) >> 26) & 0x7) -#define C_028800_STENCILZPASS_BF 0xE3FFFFFF -#define S_028800_STENCILZFAIL_BF(x) (((x) & 0x7) << 29) -#define G_028800_STENCILZFAIL_BF(x) (((x) >> 29) & 0x7) -#define C_028800_STENCILZFAIL_BF 0x1FFFFFFF -#define R_028010_DB_DEPTH_INFO 0x028010 -#define S_028010_FORMAT(x) (((x) & 0x7) << 0) -#define G_028010_FORMAT(x) (((x) >> 0) & 0x7) -#define C_028010_FORMAT 0xFFFFFFF8 -#define V_028010_DEPTH_INVALID 0x00000000 -#define V_028010_DEPTH_16 0x00000001 -#define V_028010_DEPTH_X8_24 0x00000002 -#define V_028010_DEPTH_8_24 0x00000003 -#define V_028010_DEPTH_X8_24_FLOAT 0x00000004 -#define V_028010_DEPTH_8_24_FLOAT 0x00000005 -#define V_028010_DEPTH_32_FLOAT 0x00000006 -#define V_028010_DEPTH_X24_8_32_FLOAT 0x00000007 -#define S_028010_READ_SIZE(x) (((x) & 0x1) << 3) -#define G_028010_READ_SIZE(x) (((x) >> 3) & 0x1) -#define C_028010_READ_SIZE 0xFFFFFFF7 -#define S_028010_ARRAY_MODE(x) (((x) & 0xF) << 15) -#define G_028010_ARRAY_MODE(x) (((x) >> 15) & 0xF) -#define C_028010_ARRAY_MODE 0xFFF87FFF -#define S_028010_TILE_SURFACE_ENABLE(x) (((x) & 0x1) << 25) -#define G_028010_TILE_SURFACE_ENABLE(x) (((x) >> 25) & 0x1) -#define C_028010_TILE_SURFACE_ENABLE 0xFDFFFFFF -#define S_028010_TILE_COMPACT(x) (((x) & 0x1) << 26) -#define G_028010_TILE_COMPACT(x) (((x) >> 26) & 0x1) -#define C_028010_TILE_COMPACT 0xFBFFFFFF -#define S_028010_ZRANGE_PRECISION(x) (((x) & 0x1) << 31) -#define G_028010_ZRANGE_PRECISION(x) (((x) >> 31) & 0x1) -#define C_028010_ZRANGE_PRECISION 0x7FFFFFFF -#define R_028000_DB_DEPTH_SIZE 0x028000 -#define S_028000_PITCH_TILE_MAX(x) (((x) & 0x3FF) << 0) -#define G_028000_PITCH_TILE_MAX(x) (((x) >> 0) & 0x3FF) -#define C_028000_PITCH_TILE_MAX 0xFFFFFC00 -#define S_028000_SLICE_TILE_MAX(x) (((x) & 0xFFFFF) << 10) -#define G_028000_SLICE_TILE_MAX(x) (((x) >> 10) & 0xFFFFF) -#define C_028000_SLICE_TILE_MAX 0xC00003FF -#define R_028004_DB_DEPTH_VIEW 0x028004 -#define S_028004_SLICE_START(x) (((x) & 0x7FF) << 0) -#define G_028004_SLICE_START(x) (((x) >> 0) & 0x7FF) -#define C_028004_SLICE_START 0xFFFFF800 -#define S_028004_SLICE_MAX(x) (((x) & 0x7FF) << 13) -#define G_028004_SLICE_MAX(x) (((x) >> 13) & 0x7FF) -#define C_028004_SLICE_MAX 0xFF001FFF -#define R_028D24_DB_HTILE_SURFACE 0x028D24 -#define S_028D24_HTILE_WIDTH(x) (((x) & 0x1) << 0) -#define G_028D24_HTILE_WIDTH(x) (((x) >> 0) & 0x1) -#define C_028D24_HTILE_WIDTH 0xFFFFFFFE -#define S_028D24_HTILE_HEIGHT(x) (((x) & 0x1) << 1) -#define G_028D24_HTILE_HEIGHT(x) (((x) >> 1) & 0x1) -#define C_028D24_HTILE_HEIGHT 0xFFFFFFFD -#define S_028D24_LINEAR(x) (((x) & 0x1) << 2) -#define G_028D24_LINEAR(x) (((x) >> 2) & 0x1) -#define C_028D24_LINEAR 0xFFFFFFFB -#define S_028D24_FULL_CACHE(x) (((x) & 0x1) << 3) -#define G_028D24_FULL_CACHE(x) (((x) >> 3) & 0x1) -#define C_028D24_FULL_CACHE 0xFFFFFFF7 -#define S_028D24_HTILE_USES_PRELOAD_WIN(x) (((x) & 0x1) << 4) -#define G_028D24_HTILE_USES_PRELOAD_WIN(x) (((x) >> 4) & 0x1) -#define C_028D24_HTILE_USES_PRELOAD_WIN 0xFFFFFFEF -#define S_028D24_PRELOAD(x) (((x) & 0x1) << 5) -#define G_028D24_PRELOAD(x) (((x) >> 5) & 0x1) -#define C_028D24_PRELOAD 0xFFFFFFDF -#define S_028D24_PREFETCH_WIDTH(x) (((x) & 0x3F) << 6) -#define G_028D24_PREFETCH_WIDTH(x) (((x) >> 6) & 0x3F) -#define C_028D24_PREFETCH_WIDTH 0xFFFFF03F -#define S_028D24_PREFETCH_HEIGHT(x) (((x) & 0x3F) << 12) -#define G_028D24_PREFETCH_HEIGHT(x) (((x) >> 12) & 0x3F) -#define C_028D24_PREFETCH_HEIGHT 0xFFFC0FFF -#define R_028D34_DB_PREFETCH_LIMIT 0x028D34 -#define S_028D34_DEPTH_HEIGHT_TILE_MAX(x) (((x) & 0x3FF) << 0) -#define G_028D34_DEPTH_HEIGHT_TILE_MAX(x) (((x) >> 0) & 0x3FF) -#define C_028D34_DEPTH_HEIGHT_TILE_MAX 0xFFFFFC00 -#define R_028D10_DB_RENDER_OVERRIDE 0x028D10 -#define S_028D10_FORCE_HIZ_ENABLE(x) (((x) & 0x3) << 0) -#define G_028D10_FORCE_HIZ_ENABLE(x) (((x) >> 0) & 0x3) -#define C_028D10_FORCE_HIZ_ENABLE 0xFFFFFFFC -#define S_028D10_FORCE_HIS_ENABLE0(x) (((x) & 0x3) << 2) -#define G_028D10_FORCE_HIS_ENABLE0(x) (((x) >> 2) & 0x3) -#define C_028D10_FORCE_HIS_ENABLE0 0xFFFFFFF3 -#define S_028D10_FORCE_HIS_ENABLE1(x) (((x) & 0x3) << 4) -#define G_028D10_FORCE_HIS_ENABLE1(x) (((x) >> 4) & 0x3) -#define C_028D10_FORCE_HIS_ENABLE1 0xFFFFFFCF -#define S_028D10_FORCE_SHADER_Z_ORDER(x) (((x) & 0x1) << 6) -#define G_028D10_FORCE_SHADER_Z_ORDER(x) (((x) >> 6) & 0x1) -#define C_028D10_FORCE_SHADER_Z_ORDER 0xFFFFFFBF -#define S_028D10_FAST_Z_DISABLE(x) (((x) & 0x1) << 7) -#define G_028D10_FAST_Z_DISABLE(x) (((x) >> 7) & 0x1) -#define C_028D10_FAST_Z_DISABLE 0xFFFFFF7F -#define S_028D10_FAST_STENCIL_DISABLE(x) (((x) & 0x1) << 8) -#define G_028D10_FAST_STENCIL_DISABLE(x) (((x) >> 8) & 0x1) -#define C_028D10_FAST_STENCIL_DISABLE 0xFFFFFEFF -#define S_028D10_NOOP_CULL_DISABLE(x) (((x) & 0x1) << 9) -#define G_028D10_NOOP_CULL_DISABLE(x) (((x) >> 9) & 0x1) -#define C_028D10_NOOP_CULL_DISABLE 0xFFFFFDFF -#define S_028D10_FORCE_COLOR_KILL(x) (((x) & 0x1) << 10) -#define G_028D10_FORCE_COLOR_KILL(x) (((x) >> 10) & 0x1) -#define C_028D10_FORCE_COLOR_KILL 0xFFFFFBFF -#define S_028D10_FORCE_Z_READ(x) (((x) & 0x1) << 11) -#define G_028D10_FORCE_Z_READ(x) (((x) >> 11) & 0x1) -#define C_028D10_FORCE_Z_READ 0xFFFFF7FF -#define S_028D10_FORCE_STENCIL_READ(x) (((x) & 0x1) << 12) -#define G_028D10_FORCE_STENCIL_READ(x) (((x) >> 12) & 0x1) -#define C_028D10_FORCE_STENCIL_READ 0xFFFFEFFF -#define S_028D10_FORCE_FULL_Z_RANGE(x) (((x) & 0x3) << 13) -#define G_028D10_FORCE_FULL_Z_RANGE(x) (((x) >> 13) & 0x3) -#define C_028D10_FORCE_FULL_Z_RANGE 0xFFFF9FFF -#define S_028D10_FORCE_QC_SMASK_CONFLICT(x) (((x) & 0x1) << 15) -#define G_028D10_FORCE_QC_SMASK_CONFLICT(x) (((x) >> 15) & 0x1) -#define C_028D10_FORCE_QC_SMASK_CONFLICT 0xFFFF7FFF -#define S_028D10_DISABLE_VIEWPORT_CLAMP(x) (((x) & 0x1) << 16) -#define G_028D10_DISABLE_VIEWPORT_CLAMP(x) (((x) >> 16) & 0x1) -#define C_028D10_DISABLE_VIEWPORT_CLAMP 0xFFFEFFFF -#define S_028D10_IGNORE_SC_ZRANGE(x) (((x) & 0x1) << 17) -#define G_028D10_IGNORE_SC_ZRANGE(x) (((x) >> 17) & 0x1) -#define C_028D10_IGNORE_SC_ZRANGE 0xFFFDFFFF -#define R_028A40_VGT_GS_MODE 0x028A40 -#define S_028A40_MODE(x) (((x) & 0x3) << 0) -#define G_028A40_MODE(x) (((x) >> 0) & 0x3) -#define C_028A40_MODE 0xFFFFFFFC -#define S_028A40_ES_PASSTHRU(x) (((x) & 0x1) << 2) -#define G_028A40_ES_PASSTHRU(x) (((x) >> 2) & 0x1) -#define C_028A40_ES_PASSTHRU 0xFFFFFFFB -#define S_028A40_CUT_MODE(x) (((x) & 0x3) << 3) -#define G_028A40_CUT_MODE(x) (((x) >> 3) & 0x3) -#define C_028A40_CUT_MODE 0xFFFFFFE7 -#define R_008DFC_SQ_CF_WORD0 0x008DFC -#define S_008DFC_ADDR(x) (((x) & 0xFFFFFFFF) << 0) -#define G_008DFC_ADDR(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_008DFC_ADDR 0x00000000 -#define R_008DFC_SQ_CF_WORD1 0x008DFC -#define S_008DFC_POP_COUNT(x) (((x) & 0x7) << 0) -#define G_008DFC_POP_COUNT(x) (((x) >> 0) & 0x7) -#define C_008DFC_POP_COUNT 0xFFFFFFF8 -#define S_008DFC_CF_CONST(x) (((x) & 0x1F) << 3) -#define G_008DFC_CF_CONST(x) (((x) >> 3) & 0x1F) -#define C_008DFC_CF_CONST 0xFFFFFF07 -#define S_008DFC_COND(x) (((x) & 0x3) << 8) -#define G_008DFC_COND(x) (((x) >> 8) & 0x3) -#define C_008DFC_COND 0xFFFFFCFF -#define S_008DFC_COUNT(x) (((x) & 0x7) << 10) -#define G_008DFC_COUNT(x) (((x) >> 10) & 0x7) -#define C_008DFC_COUNT 0xFFFFE3FF -#define S_008DFC_CALL_COUNT(x) (((x) & 0x3F) << 13) -#define G_008DFC_CALL_COUNT(x) (((x) >> 13) & 0x3F) -#define C_008DFC_CALL_COUNT 0xFFF81FFF -#define S_008DFC_END_OF_PROGRAM(x) (((x) & 0x1) << 21) -#define G_008DFC_END_OF_PROGRAM(x) (((x) >> 21) & 0x1) -#define C_008DFC_END_OF_PROGRAM 0xFFDFFFFF -#define S_008DFC_VALID_PIXEL_MODE(x) (((x) & 0x1) << 22) -#define G_008DFC_VALID_PIXEL_MODE(x) (((x) >> 22) & 0x1) -#define C_008DFC_VALID_PIXEL_MODE 0xFFBFFFFF -#define S_008DFC_CF_INST(x) (((x) & 0x7F) << 23) -#define G_008DFC_CF_INST(x) (((x) >> 23) & 0x7F) -#define C_008DFC_CF_INST 0xC07FFFFF -#define V_008DFC_SQ_CF_INST_NOP 0x00000000 -#define V_008DFC_SQ_CF_INST_TEX 0x00000001 -#define V_008DFC_SQ_CF_INST_VTX 0x00000002 -#define V_008DFC_SQ_CF_INST_VTX_TC 0x00000003 -#define V_008DFC_SQ_CF_INST_LOOP_START 0x00000004 -#define V_008DFC_SQ_CF_INST_LOOP_END 0x00000005 -#define V_008DFC_SQ_CF_INST_LOOP_START_DX10 0x00000006 -#define V_008DFC_SQ_CF_INST_LOOP_START_NO_AL 0x00000007 -#define V_008DFC_SQ_CF_INST_LOOP_CONTINUE 0x00000008 -#define V_008DFC_SQ_CF_INST_LOOP_BREAK 0x00000009 -#define V_008DFC_SQ_CF_INST_JUMP 0x0000000A -#define V_008DFC_SQ_CF_INST_PUSH 0x0000000B -#define V_008DFC_SQ_CF_INST_PUSH_ELSE 0x0000000C -#define V_008DFC_SQ_CF_INST_ELSE 0x0000000D -#define V_008DFC_SQ_CF_INST_POP 0x0000000E -#define V_008DFC_SQ_CF_INST_POP_JUMP 0x0000000F -#define V_008DFC_SQ_CF_INST_POP_PUSH 0x00000010 -#define V_008DFC_SQ_CF_INST_POP_PUSH_ELSE 0x00000011 -#define V_008DFC_SQ_CF_INST_CALL 0x00000012 -#define V_008DFC_SQ_CF_INST_CALL_FS 0x00000013 -#define V_008DFC_SQ_CF_INST_RETURN 0x00000014 -#define V_008DFC_SQ_CF_INST_EMIT_VERTEX 0x00000015 -#define V_008DFC_SQ_CF_INST_EMIT_CUT_VERTEX 0x00000016 -#define V_008DFC_SQ_CF_INST_CUT_VERTEX 0x00000017 -#define V_008DFC_SQ_CF_INST_KILL 0x00000018 -#define S_008DFC_WHOLE_QUAD_MODE(x) (((x) & 0x1) << 30) -#define G_008DFC_WHOLE_QUAD_MODE(x) (((x) >> 30) & 0x1) -#define C_008DFC_WHOLE_QUAD_MODE 0xBFFFFFFF -#define S_008DFC_BARRIER(x) (((x) & 0x1) << 31) -#define G_008DFC_BARRIER(x) (((x) >> 31) & 0x1) -#define C_008DFC_BARRIER 0x7FFFFFFF -#define R_008DFC_SQ_CF_ALU_WORD0 0x008DFC -#define S_008DFC_ALU_ADDR(x) (((x) & 0x3FFFFF) << 0) -#define G_008DFC_ALU_ADDR(x) (((x) >> 0) & 0x3FFFFF) -#define C_008DFC_ALU_ADDR 0xFFC00000 -#define S_008DFC_KCACHE_BANK0(x) (((x) & 0xF) << 22) -#define G_008DFC_KCACHE_BANK0(x) (((x) >> 22) & 0xF) -#define C_008DFC_KCACHE_BANK0 0xFC3FFFFF -#define S_008DFC_KCACHE_BANK1(x) (((x) & 0xF) << 26) -#define G_008DFC_KCACHE_BANK1(x) (((x) >> 26) & 0xF) -#define C_008DFC_KCACHE_BANK1 0xC3FFFFFF -#define S_008DFC_KCACHE_MODE0(x) (((x) & 0x3) << 30) -#define G_008DFC_KCACHE_MODE0(x) (((x) >> 30) & 0x3) -#define C_008DFC_KCACHE_MODE0 0x3FFFFFFF -#define R_008DFC_SQ_CF_ALU_WORD1 0x008DFC -#define S_008DFC_KCACHE_MODE1(x) (((x) & 0x3) << 0) -#define G_008DFC_KCACHE_MODE1(x) (((x) >> 0) & 0x3) -#define C_008DFC_KCACHE_MODE1 0xFFFFFFFC -#define S_008DFC_KCACHE_ADDR0(x) (((x) & 0xFF) << 2) -#define G_008DFC_KCACHE_ADDR0(x) (((x) >> 2) & 0xFF) -#define C_008DFC_KCACHE_ADDR0 0xFFFFFC03 -#define S_008DFC_KCACHE_ADDR1(x) (((x) & 0xFF) << 10) -#define G_008DFC_KCACHE_ADDR1(x) (((x) >> 10) & 0xFF) -#define C_008DFC_KCACHE_ADDR1 0xFFFC03FF -#define S_008DFC_ALU_COUNT(x) (((x) & 0x7F) << 18) -#define G_008DFC_ALU_COUNT(x) (((x) >> 18) & 0x7F) -#define C_008DFC_ALU_COUNT 0xFE03FFFF -#define S_008DFC_USES_WATERFALL(x) (((x) & 0x1) << 25) -#define G_008DFC_USES_WATERFALL(x) (((x) >> 25) & 0x1) -#define C_008DFC_USES_WATERFALL 0xFDFFFFFF -#define S_008DFC_CF_ALU_INST(x) (((x) & 0xF) << 26) -#define G_008DFC_CF_ALU_INST(x) (((x) >> 26) & 0xF) -#define C_008DFC_CF_ALU_INST 0xC3FFFFFF -#define V_008DFC_SQ_CF_INST_ALU 0x00000008 -#define V_008DFC_SQ_CF_INST_ALU_PUSH_BEFORE 0x00000009 -#define V_008DFC_SQ_CF_INST_ALU_POP_AFTER 0x0000000A -#define V_008DFC_SQ_CF_INST_ALU_POP2_AFTER 0x0000000B -#define V_008DFC_SQ_CF_INST_ALU_CONTINUE 0x0000000D -#define V_008DFC_SQ_CF_INST_ALU_BREAK 0x0000000E -#define V_008DFC_SQ_CF_INST_ALU_ELSE_AFTER 0x0000000F -#define S_008DFC_WHOLE_QUAD_MODE(x) (((x) & 0x1) << 30) -#define G_008DFC_WHOLE_QUAD_MODE(x) (((x) >> 30) & 0x1) -#define C_008DFC_WHOLE_QUAD_MODE 0xBFFFFFFF -#define S_008DFC_BARRIER(x) (((x) & 0x1) << 31) -#define G_008DFC_BARRIER(x) (((x) >> 31) & 0x1) -#define C_008DFC_BARRIER 0x7FFFFFFF -#define R_008DFC_SQ_CF_ALLOC_EXPORT_WORD0 0x008DFC -#define S_008DFC_ARRAY_BASE(x) (((x) & 0x1FFF) << 0) -#define G_008DFC_ARRAY_BASE(x) (((x) >> 0) & 0x1FFF) -#define C_008DFC_ARRAY_BASE 0xFFFFE000 -#define S_008DFC_TYPE(x) (((x) & 0x3) << 13) -#define G_008DFC_TYPE(x) (((x) >> 13) & 0x3) -#define C_008DFC_TYPE 0xFFFF9FFF -#define S_008DFC_RW_GPR(x) (((x) & 0x7F) << 15) -#define G_008DFC_RW_GPR(x) (((x) >> 15) & 0x7F) -#define C_008DFC_RW_GPR 0xFFC07FFF -#define S_008DFC_RW_REL(x) (((x) & 0x1) << 22) -#define G_008DFC_RW_REL(x) (((x) >> 22) & 0x1) -#define C_008DFC_RW_REL 0xFFBFFFFF -#define S_008DFC_INDEX_GPR(x) (((x) & 0x7F) << 23) -#define G_008DFC_INDEX_GPR(x) (((x) >> 23) & 0x7F) -#define C_008DFC_INDEX_GPR 0xC07FFFFF -#define S_008DFC_ELEM_SIZE(x) (((x) & 0x3) << 30) -#define G_008DFC_ELEM_SIZE(x) (((x) >> 30) & 0x3) -#define C_008DFC_ELEM_SIZE 0x3FFFFFFF -#define R_008DFC_SQ_CF_ALLOC_EXPORT_WORD1 0x008DFC -#define S_008DFC_BURST_COUNT(x) (((x) & 0xF) << 17) -#define G_008DFC_BURST_COUNT(x) (((x) >> 17) & 0xF) -#define C_008DFC_BURST_COUNT 0xFFE1FFFF -#define S_008DFC_END_OF_PROGRAM(x) (((x) & 0x1) << 21) -#define G_008DFC_END_OF_PROGRAM(x) (((x) >> 21) & 0x1) -#define C_008DFC_END_OF_PROGRAM 0xFFDFFFFF -#define S_008DFC_VALID_PIXEL_MODE(x) (((x) & 0x1) << 22) -#define G_008DFC_VALID_PIXEL_MODE(x) (((x) >> 22) & 0x1) -#define C_008DFC_VALID_PIXEL_MODE 0xFFBFFFFF -#define S_008DFC_CF_INST(x) (((x) & 0x7F) << 23) -#define G_008DFC_CF_INST(x) (((x) >> 23) & 0x7F) -#define C_008DFC_CF_INST 0xC07FFFFF -#define V_008DFC_SQ_CF_INST_MEM_STREAM0 0x00000020 -#define V_008DFC_SQ_CF_INST_MEM_STREAM1 0x00000021 -#define V_008DFC_SQ_CF_INST_MEM_STREAM2 0x00000022 -#define V_008DFC_SQ_CF_INST_MEM_STREAM3 0x00000023 -#define V_008DFC_SQ_CF_INST_MEM_SCRATCH 0x00000024 -#define V_008DFC_SQ_CF_INST_MEM_REDUCTION 0x00000025 -#define V_008DFC_SQ_CF_INST_MEM_RING 0x00000026 -#define V_008DFC_SQ_CF_INST_EXPORT 0x00000027 -#define V_008DFC_SQ_CF_INST_EXPORT_DONE 0x00000028 -#define S_008DFC_WHOLE_QUAD_MODE(x) (((x) & 0x1) << 30) -#define G_008DFC_WHOLE_QUAD_MODE(x) (((x) >> 30) & 0x1) -#define C_008DFC_WHOLE_QUAD_MODE 0xBFFFFFFF -#define S_008DFC_BARRIER(x) (((x) & 0x1) << 31) -#define G_008DFC_BARRIER(x) (((x) >> 31) & 0x1) -#define C_008DFC_BARRIER 0x7FFFFFFF -#define R_008DFC_SQ_CF_ALLOC_EXPORT_WORD1_BUF 0x008DFC -#define S_008DFC_ARRAY_SIZE(x) (((x) & 0xFFF) << 0) -#define G_008DFC_ARRAY_SIZE(x) (((x) >> 0) & 0xFFF) -#define C_008DFC_ARRAY_SIZE 0xFFFFF000 -#define S_008DFC_COMP_MASK(x) (((x) & 0xF) << 12) -#define G_008DFC_COMP_MASK(x) (((x) >> 12) & 0xF) -#define C_008DFC_COMP_MASK 0xFFFF0FFF -#define R_008DFC_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ 0x008DFC -#define S_008DFC_SEL_X(x) (((x) & 0x7) << 0) -#define G_008DFC_SEL_X(x) (((x) >> 0) & 0x7) -#define C_008DFC_SEL_X 0xFFFFFFF8 -#define S_008DFC_SEL_Y(x) (((x) & 0x7) << 3) -#define G_008DFC_SEL_Y(x) (((x) >> 3) & 0x7) -#define C_008DFC_SEL_Y 0xFFFFFFC7 -#define S_008DFC_SEL_Z(x) (((x) & 0x7) << 6) -#define G_008DFC_SEL_Z(x) (((x) >> 6) & 0x7) -#define C_008DFC_SEL_Z 0xFFFFFE3F -#define S_008DFC_SEL_W(x) (((x) & 0x7) << 9) -#define G_008DFC_SEL_W(x) (((x) >> 9) & 0x7) -#define C_008DFC_SEL_W 0xFFFFF1FF -#define R_008DFC_SQ_VTX_WORD0 0x008DFC -#define S_008DFC_VTX_INST(x) (((x) & 0x1F) << 0) -#define G_008DFC_VTX_INST(x) (((x) >> 0) & 0x1F) -#define C_008DFC_VTX_INST 0xFFFFFFE0 -#define S_008DFC_FETCH_TYPE(x) (((x) & 0x3) << 5) -#define G_008DFC_FETCH_TYPE(x) (((x) >> 5) & 0x3) -#define C_008DFC_FETCH_TYPE 0xFFFFFF9F -#define S_008DFC_FETCH_WHOLE_QUAD(x) (((x) & 0x1) << 7) -#define G_008DFC_FETCH_WHOLE_QUAD(x) (((x) >> 7) & 0x1) -#define C_008DFC_FETCH_WHOLE_QUAD 0xFFFFFF7F -#define S_008DFC_BUFFER_ID(x) (((x) & 0xFF) << 8) -#define G_008DFC_BUFFER_ID(x) (((x) >> 8) & 0xFF) -#define C_008DFC_BUFFER_ID 0xFFFF00FF -#define S_008DFC_SRC_GPR(x) (((x) & 0x7F) << 16) -#define G_008DFC_SRC_GPR(x) (((x) >> 16) & 0x7F) -#define C_008DFC_SRC_GPR 0xFF80FFFF -#define S_008DFC_SRC_REL(x) (((x) & 0x1) << 23) -#define G_008DFC_SRC_REL(x) (((x) >> 23) & 0x1) -#define C_008DFC_SRC_REL 0xFF7FFFFF -#define S_008DFC_SRC_SEL_X(x) (((x) & 0x3) << 24) -#define G_008DFC_SRC_SEL_X(x) (((x) >> 24) & 0x3) -#define C_008DFC_SRC_SEL_X 0xFCFFFFFF -#define S_008DFC_MEGA_FETCH_COUNT(x) (((x) & 0x3F) << 26) -#define G_008DFC_MEGA_FETCH_COUNT(x) (((x) >> 26) & 0x3F) -#define C_008DFC_MEGA_FETCH_COUNT 0x03FFFFFF -#define R_008DFC_SQ_VTX_WORD1 0x008DFC -#define S_008DFC_DST_SEL_X(x) (((x) & 0x7) << 9) -#define G_008DFC_DST_SEL_X(x) (((x) >> 9) & 0x7) -#define C_008DFC_DST_SEL_X 0xFFFFF1FF -#define S_008DFC_DST_SEL_Y(x) (((x) & 0x7) << 12) -#define G_008DFC_DST_SEL_Y(x) (((x) >> 12) & 0x7) -#define C_008DFC_DST_SEL_Y 0xFFFF8FFF -#define S_008DFC_DST_SEL_Z(x) (((x) & 0x7) << 15) -#define G_008DFC_DST_SEL_Z(x) (((x) >> 15) & 0x7) -#define C_008DFC_DST_SEL_Z 0xFFFC7FFF -#define S_008DFC_DST_SEL_W(x) (((x) & 0x7) << 18) -#define G_008DFC_DST_SEL_W(x) (((x) >> 18) & 0x7) -#define C_008DFC_DST_SEL_W 0xFFE3FFFF -#define S_008DFC_USE_CONST_FIELDS(x) (((x) & 0x1) << 21) -#define G_008DFC_USE_CONST_FIELDS(x) (((x) >> 21) & 0x1) -#define C_008DFC_USE_CONST_FIELDS 0xFFDFFFFF -#define S_008DFC_DATA_FORMAT(x) (((x) & 0x3F) << 22) -#define G_008DFC_DATA_FORMAT(x) (((x) >> 22) & 0x3F) -#define C_008DFC_DATA_FORMAT 0xF03FFFFF -#define S_008DFC_NUM_FORMAT_ALL(x) (((x) & 0x3) << 28) -#define G_008DFC_NUM_FORMAT_ALL(x) (((x) >> 28) & 0x3) -#define C_008DFC_NUM_FORMAT_ALL 0xCFFFFFFF -#define S_008DFC_FORMAT_COMP_ALL(x) (((x) & 0x1) << 30) -#define G_008DFC_FORMAT_COMP_ALL(x) (((x) >> 30) & 0x1) -#define C_008DFC_FORMAT_COMP_ALL 0xBFFFFFFF -#define S_008DFC_SRF_MODE_ALL(x) (((x) & 0x1) << 31) -#define G_008DFC_SRF_MODE_ALL(x) (((x) >> 31) & 0x1) -#define C_008DFC_SRF_MODE_ALL 0x7FFFFFFF -#define R_008DFC_SQ_VTX_WORD1_GPR 0x008DFC -#define S_008DFC_DST_GPR(x) (((x) & 0x7F) << 0) -#define G_008DFC_DST_GPR(x) (((x) >> 0) & 0x7F) -#define C_008DFC_DST_GPR 0xFFFFFF80 -#define S_008DFC_DST_REL(x) (((x) & 0x1) << 7) -#define G_008DFC_DST_REL(x) (((x) >> 7) & 0x1) -#define C_008DFC_DST_REL 0xFFFFFF7F -#define R_008DFC_SQ_VTX_WORD2 0x008DFC -#define S_008DFC_OFFSET(x) (((x) & 0xFFFF) << 0) -#define G_008DFC_OFFSET(x) (((x) >> 0) & 0xFFFF) -#define C_008DFC_OFFSET 0xFFFF0000 -#define S_008DFC_ENDIAN_SWAP(x) (((x) & 0x3) << 16) -#define G_008DFC_ENDIAN_SWAP(x) (((x) >> 16) & 0x3) -#define C_008DFC_ENDIAN_SWAP 0xFFFCFFFF -#define S_008DFC_CONST_BUF_NO_STRIDE(x) (((x) & 0x1) << 18) -#define G_008DFC_CONST_BUF_NO_STRIDE(x) (((x) >> 18) & 0x1) -#define C_008DFC_CONST_BUF_NO_STRIDE 0xFFFBFFFF -#define S_008DFC_MEGA_FETCH(x) (((x) & 0x1) << 19) -#define G_008DFC_MEGA_FETCH(x) (((x) >> 19) & 0x1) -#define C_008DFC_MEGA_FETCH 0xFFF7FFFF -#define S_008DFC_ALT_CONST(x) (((x) & 0x1) << 20) -#define G_008DFC_ALT_CONST(x) (((x) >> 20) & 0x1) -#define C_008DFC_ALT_CONST 0xFFEFFFFF -#define R_008040_WAIT_UNTIL 0x008040 -#define S_008040_WAIT_CP_DMA_IDLE(x) (((x) & 0x1) << 8) -#define G_008040_WAIT_CP_DMA_IDLE(x) (((x) >> 8) & 0x1) -#define C_008040_WAIT_CP_DMA_IDLE 0xFFFFFEFF -#define S_008040_WAIT_CMDFIFO(x) (((x) & 0x1) << 10) -#define G_008040_WAIT_CMDFIFO(x) (((x) >> 10) & 0x1) -#define C_008040_WAIT_CMDFIFO 0xFFFFFBFF -#define S_008040_WAIT_2D_IDLE(x) (((x) & 0x1) << 14) -#define G_008040_WAIT_2D_IDLE(x) (((x) >> 14) & 0x1) -#define C_008040_WAIT_2D_IDLE 0xFFFFBFFF -#define S_008040_WAIT_3D_IDLE(x) (((x) & 0x1) << 15) -#define G_008040_WAIT_3D_IDLE(x) (((x) >> 15) & 0x1) -#define C_008040_WAIT_3D_IDLE 0xFFFF7FFF -#define S_008040_WAIT_2D_IDLECLEAN(x) (((x) & 0x1) << 16) -#define G_008040_WAIT_2D_IDLECLEAN(x) (((x) >> 16) & 0x1) -#define C_008040_WAIT_2D_IDLECLEAN 0xFFFEFFFF -#define S_008040_WAIT_3D_IDLECLEAN(x) (((x) & 0x1) << 17) -#define G_008040_WAIT_3D_IDLECLEAN(x) (((x) >> 17) & 0x1) -#define C_008040_WAIT_3D_IDLECLEAN 0xFFFDFFFF -#define S_008040_WAIT_EXTERN_SIG(x) (((x) & 0x1) << 19) -#define G_008040_WAIT_EXTERN_SIG(x) (((x) >> 19) & 0x1) -#define C_008040_WAIT_EXTERN_SIG 0xFFF7FFFF -#define S_008040_CMDFIFO_ENTRIES(x) (((x) & 0x1F) << 20) -#define G_008040_CMDFIFO_ENTRIES(x) (((x) >> 20) & 0x1F) -#define C_008040_CMDFIFO_ENTRIES 0xFE0FFFFF -#define R_0286CC_SPI_PS_IN_CONTROL_0 0x0286CC -#define S_0286CC_NUM_INTERP(x) (((x) & 0x3F) << 0) -#define G_0286CC_NUM_INTERP(x) (((x) >> 0) & 0x3F) -#define C_0286CC_NUM_INTERP 0xFFFFFFC0 -#define S_0286CC_POSITION_ENA(x) (((x) & 0x1) << 8) -#define G_0286CC_POSITION_ENA(x) (((x) >> 8) & 0x1) -#define C_0286CC_POSITION_ENA 0xFFFFFEFF -#define S_0286CC_POSITION_CENTROID(x) (((x) & 0x1) << 9) -#define G_0286CC_POSITION_CENTROID(x) (((x) >> 9) & 0x1) -#define C_0286CC_POSITION_CENTROID 0xFFFFFDFF -#define S_0286CC_POSITION_ADDR(x) (((x) & 0x1F) << 10) -#define G_0286CC_POSITION_ADDR(x) (((x) >> 10) & 0x1F) -#define C_0286CC_POSITION_ADDR 0xFFFF83FF -#define S_0286CC_PARAM_GEN(x) (((x) & 0xF) << 15) -#define G_0286CC_PARAM_GEN(x) (((x) >> 15) & 0xF) -#define C_0286CC_PARAM_GEN 0xFFF87FFF -#define S_0286CC_PARAM_GEN_ADDR(x) (((x) & 0x7F) << 19) -#define G_0286CC_PARAM_GEN_ADDR(x) (((x) >> 19) & 0x7F) -#define C_0286CC_PARAM_GEN_ADDR 0xFC07FFFF -#define S_0286CC_BARYC_SAMPLE_CNTL(x) (((x) & 0x3) << 26) -#define G_0286CC_BARYC_SAMPLE_CNTL(x) (((x) >> 26) & 0x3) -#define C_0286CC_BARYC_SAMPLE_CNTL 0xF3FFFFFF -#define S_0286CC_PERSP_GRADIENT_ENA(x) (((x) & 0x1) << 28) -#define G_0286CC_PERSP_GRADIENT_ENA(x) (((x) >> 28) & 0x1) -#define C_0286CC_PERSP_GRADIENT_ENA 0xEFFFFFFF -#define S_0286CC_LINEAR_GRADIENT_ENA(x) (((x) & 0x1) << 29) -#define G_0286CC_LINEAR_GRADIENT_ENA(x) (((x) >> 29) & 0x1) -#define C_0286CC_LINEAR_GRADIENT_ENA 0xDFFFFFFF -#define S_0286CC_POSITION_SAMPLE(x) (((x) & 0x1) << 30) -#define G_0286CC_POSITION_SAMPLE(x) (((x) >> 30) & 0x1) -#define C_0286CC_POSITION_SAMPLE 0xBFFFFFFF -#define S_0286CC_BARYC_AT_SAMPLE_ENA(x) (((x) & 0x1) << 31) -#define G_0286CC_BARYC_AT_SAMPLE_ENA(x) (((x) >> 31) & 0x1) -#define C_0286CC_BARYC_AT_SAMPLE_ENA 0x7FFFFFFF -#define R_0286D0_SPI_PS_IN_CONTROL_1 0x0286D0 -#define S_0286D0_GEN_INDEX_PIX(x) (((x) & 0x1) << 0) -#define G_0286D0_GEN_INDEX_PIX(x) (((x) >> 0) & 0x1) -#define C_0286D0_GEN_INDEX_PIX 0xFFFFFFFE -#define S_0286D0_GEN_INDEX_PIX_ADDR(x) (((x) & 0x7F) << 1) -#define G_0286D0_GEN_INDEX_PIX_ADDR(x) (((x) >> 1) & 0x7F) -#define C_0286D0_GEN_INDEX_PIX_ADDR 0xFFFFFF01 -#define S_0286D0_FRONT_FACE_ENA(x) (((x) & 0x1) << 8) -#define G_0286D0_FRONT_FACE_ENA(x) (((x) >> 8) & 0x1) -#define C_0286D0_FRONT_FACE_ENA 0xFFFFFEFF -#define S_0286D0_FRONT_FACE_CHAN(x) (((x) & 0x3) << 9) -#define G_0286D0_FRONT_FACE_CHAN(x) (((x) >> 9) & 0x3) -#define C_0286D0_FRONT_FACE_CHAN 0xFFFFF9FF -#define S_0286D0_FRONT_FACE_ALL_BITS(x) (((x) & 0x1) << 11) -#define G_0286D0_FRONT_FACE_ALL_BITS(x) (((x) >> 11) & 0x1) -#define C_0286D0_FRONT_FACE_ALL_BITS 0xFFFFF7FF -#define S_0286D0_FRONT_FACE_ADDR(x) (((x) & 0x1F) << 12) -#define G_0286D0_FRONT_FACE_ADDR(x) (((x) >> 12) & 0x1F) -#define C_0286D0_FRONT_FACE_ADDR 0xFFFE0FFF -#define S_0286D0_FOG_ADDR(x) (((x) & 0x7F) << 17) -#define G_0286D0_FOG_ADDR(x) (((x) >> 17) & 0x7F) -#define C_0286D0_FOG_ADDR 0xFF01FFFF -#define S_0286D0_FIXED_PT_POSITION_ENA(x) (((x) & 0x1) << 24) -#define G_0286D0_FIXED_PT_POSITION_ENA(x) (((x) >> 24) & 0x1) -#define C_0286D0_FIXED_PT_POSITION_ENA 0xFEFFFFFF -#define S_0286D0_FIXED_PT_POSITION_ADDR(x) (((x) & 0x1F) << 25) -#define G_0286D0_FIXED_PT_POSITION_ADDR(x) (((x) >> 25) & 0x1F) -#define C_0286D0_FIXED_PT_POSITION_ADDR 0xC1FFFFFF -#define R_0286C4_SPI_VS_OUT_CONFIG 0x0286C4 -#define S_0286C4_VS_PER_COMPONENT(x) (((x) & 0x1) << 0) -#define G_0286C4_VS_PER_COMPONENT(x) (((x) >> 0) & 0x1) -#define C_0286C4_VS_PER_COMPONENT 0xFFFFFFFE -#define S_0286C4_VS_EXPORT_COUNT(x) (((x) & 0x1F) << 1) -#define G_0286C4_VS_EXPORT_COUNT(x) (((x) >> 1) & 0x1F) -#define C_0286C4_VS_EXPORT_COUNT 0xFFFFFFC1 -#define S_0286C4_VS_EXPORTS_FOG(x) (((x) & 0x1) << 8) -#define G_0286C4_VS_EXPORTS_FOG(x) (((x) >> 8) & 0x1) -#define C_0286C4_VS_EXPORTS_FOG 0xFFFFFEFF -#define S_0286C4_VS_OUT_FOG_VEC_ADDR(x) (((x) & 0x1F) << 9) -#define G_0286C4_VS_OUT_FOG_VEC_ADDR(x) (((x) >> 9) & 0x1F) -#define C_0286C4_VS_OUT_FOG_VEC_ADDR 0xFFFFC1FF -#define R_028240_PA_SC_GENERIC_SCISSOR_TL 0x028240 -#define S_028240_TL_X(x) (((x) & 0x3FFF) << 0) -#define G_028240_TL_X(x) (((x) >> 0) & 0x3FFF) -#define C_028240_TL_X 0xFFFFC000 -#define S_028240_TL_Y(x) (((x) & 0x3FFF) << 16) -#define G_028240_TL_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028240_TL_Y 0xC000FFFF -#define S_028240_WINDOW_OFFSET_DISABLE(x) (((x) & 0x1) << 31) -#define G_028240_WINDOW_OFFSET_DISABLE(x) (((x) >> 31) & 0x1) -#define C_028240_WINDOW_OFFSET_DISABLE 0x7FFFFFFF -#define R_028244_PA_SC_GENERIC_SCISSOR_BR 0x028244 -#define S_028244_BR_X(x) (((x) & 0x3FFF) << 0) -#define G_028244_BR_X(x) (((x) >> 0) & 0x3FFF) -#define C_028244_BR_X 0xFFFFC000 -#define S_028244_BR_Y(x) (((x) & 0x3FFF) << 16) -#define G_028244_BR_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028244_BR_Y 0xC000FFFF -#define R_028030_PA_SC_SCREEN_SCISSOR_TL 0x028030 -#define S_028030_TL_X(x) (((x) & 0x7FFF) << 0) -#define G_028030_TL_X(x) (((x) >> 0) & 0x7FFF) -#define C_028030_TL_X 0xFFFF8000 -#define S_028030_TL_Y(x) (((x) & 0x7FFF) << 16) -#define G_028030_TL_Y(x) (((x) >> 16) & 0x7FFF) -#define C_028030_TL_Y 0x8000FFFF -#define R_028034_PA_SC_SCREEN_SCISSOR_BR 0x028034 -#define S_028034_BR_X(x) (((x) & 0x7FFF) << 0) -#define G_028034_BR_X(x) (((x) >> 0) & 0x7FFF) -#define C_028034_BR_X 0xFFFF8000 -#define S_028034_BR_Y(x) (((x) & 0x7FFF) << 16) -#define G_028034_BR_Y(x) (((x) >> 16) & 0x7FFF) -#define C_028034_BR_Y 0x8000FFFF -#define R_028204_PA_SC_WINDOW_SCISSOR_TL 0x028204 -#define S_028204_TL_X(x) (((x) & 0x3FFF) << 0) -#define G_028204_TL_X(x) (((x) >> 0) & 0x3FFF) -#define C_028204_TL_X 0xFFFFC000 -#define S_028204_TL_Y(x) (((x) & 0x3FFF) << 16) -#define G_028204_TL_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028204_TL_Y 0xC000FFFF -#define S_028204_WINDOW_OFFSET_DISABLE(x) (((x) & 0x1) << 31) -#define G_028204_WINDOW_OFFSET_DISABLE(x) (((x) >> 31) & 0x1) -#define C_028204_WINDOW_OFFSET_DISABLE 0x7FFFFFFF -#define R_028208_PA_SC_WINDOW_SCISSOR_BR 0x028208 -#define S_028208_BR_X(x) (((x) & 0x3FFF) << 0) -#define G_028208_BR_X(x) (((x) >> 0) & 0x3FFF) -#define C_028208_BR_X 0xFFFFC000 -#define S_028208_BR_Y(x) (((x) & 0x3FFF) << 16) -#define G_028208_BR_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028208_BR_Y 0xC000FFFF -#define R_0287F0_VGT_DRAW_INITIATOR 0x0287F0 -#define S_0287F0_SOURCE_SELECT(x) (((x) & 0x3) << 0) -#define G_0287F0_SOURCE_SELECT(x) (((x) >> 0) & 0x3) -#define C_0287F0_SOURCE_SELECT 0xFFFFFFFC -#define S_0287F0_MAJOR_MODE(x) (((x) & 0x3) << 2) -#define G_0287F0_MAJOR_MODE(x) (((x) >> 2) & 0x3) -#define C_0287F0_MAJOR_MODE 0xFFFFFFF3 -#define S_0287F0_SPRITE_EN(x) (((x) & 0x1) << 4) -#define G_0287F0_SPRITE_EN(x) (((x) >> 4) & 0x1) -#define C_0287F0_SPRITE_EN 0xFFFFFFEF -#define S_0287F0_NOT_EOP(x) (((x) & 0x1) << 5) -#define G_0287F0_NOT_EOP(x) (((x) >> 5) & 0x1) -#define C_0287F0_NOT_EOP 0xFFFFFFDF -#define S_0287F0_USE_OPAQUE(x) (((x) & 0x1) << 6) -#define G_0287F0_USE_OPAQUE(x) (((x) >> 6) & 0x1) -#define C_0287F0_USE_OPAQUE 0xFFFFFFBF -#define R_0280A0_CB_COLOR0_INFO 0x0280A0 -#define R_0280A4_CB_COLOR1_INFO 0x0280A4 -#define R_0280A8_CB_COLOR2_INFO 0x0280A8 -#define R_0280AC_CB_COLOR3_INFO 0x0280AC -#define R_0280B0_CB_COLOR4_INFO 0x0280B0 -#define R_0280B4_CB_COLOR5_INFO 0x0280B4 -#define R_0280B8_CB_COLOR6_INFO 0x0280B8 -#define R_0280BC_CB_COLOR7_INFO 0x0280BC -#define R_02800C_DB_DEPTH_BASE 0x02800C -#define R_028000_DB_DEPTH_SIZE 0x028000 -#define R_028004_DB_DEPTH_VIEW 0x028004 -#define R_028010_DB_DEPTH_INFO 0x028010 -#define R_028D24_DB_HTILE_SURFACE 0x028D24 -#define R_028D34_DB_PREFETCH_LIMIT 0x028D34 -#define R_0286D4_SPI_INTERP_CONTROL_0 0x0286D4 -#define R_028A48_PA_SC_MPASS_PS_CNTL 0x028A48 -#define R_028C00_PA_SC_LINE_CNTL 0x028C00 -#define R_028C04_PA_SC_AA_CONFIG 0x028C04 -#define R_028C08_PA_SU_VTX_CNTL 0x028C08 -#define R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX 0x028C1C -#define R_028C48_PA_SC_AA_MASK 0x028C48 -#define R_028810_PA_CL_CLIP_CNTL 0x028810 -#define R_02881C_PA_CL_VS_OUT_CNTL 0x02881C -#define R_028820_PA_CL_NANINF_CNTL 0x028820 -#define R_028C0C_PA_CL_GB_VERT_CLIP_ADJ 0x028C0C -#define R_028C10_PA_CL_GB_VERT_DISC_ADJ 0x028C10 -#define R_028C14_PA_CL_GB_HORZ_CLIP_ADJ 0x028C14 -#define R_028C18_PA_CL_GB_HORZ_DISC_ADJ 0x028C18 -#define R_028814_PA_SU_SC_MODE_CNTL 0x028814 -#define R_028A00_PA_SU_POINT_SIZE 0x028A00 -#define R_028A04_PA_SU_POINT_MINMAX 0x028A04 -#define R_028A08_PA_SU_LINE_CNTL 0x028A08 -#define R_028A0C_PA_SC_LINE_STIPPLE 0x028A0C -#define R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL 0x028DF8 -#define R_028DFC_PA_SU_POLY_OFFSET_CLAMP 0x028DFC -#define R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE 0x028E00 -#define R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET 0x028E04 -#define R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE 0x028E08 -#define R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET 0x028E0C -#define R_028818_PA_CL_VTE_CNTL 0x028818 -#define R_02843C_PA_CL_VPORT_XSCALE_0 0x02843C -#define R_028444_PA_CL_VPORT_YSCALE_0 0x028444 -#define R_02844C_PA_CL_VPORT_ZSCALE_0 0x02844C -#define R_028440_PA_CL_VPORT_XOFFSET_0 0x028440 -#define R_028448_PA_CL_VPORT_YOFFSET_0 0x028448 -#define R_028450_PA_CL_VPORT_ZOFFSET_0 0x028450 -#define R_028250_PA_SC_VPORT_SCISSOR_0_TL 0x028250 -#define R_028254_PA_SC_VPORT_SCISSOR_0_BR 0x028254 -#define R_028780_CB_BLEND0_CONTROL 0x028780 -#define R_028784_CB_BLEND1_CONTROL 0x028784 -#define R_028788_CB_BLEND2_CONTROL 0x028788 -#define R_02878C_CB_BLEND3_CONTROL 0x02878C -#define R_028790_CB_BLEND4_CONTROL 0x028790 -#define R_028794_CB_BLEND5_CONTROL 0x028794 -#define R_028798_CB_BLEND6_CONTROL 0x028798 -#define R_02879C_CB_BLEND7_CONTROL 0x02879C -#define R_028804_CB_BLEND_CONTROL 0x028804 -#define R_028028_DB_STENCIL_CLEAR 0x028028 -#define R_02802C_DB_DEPTH_CLEAR 0x02802C -#define R_028430_DB_STENCILREFMASK 0x028430 -#define R_028434_DB_STENCILREFMASK_BF 0x028434 -#define R_028800_DB_DEPTH_CONTROL 0x028800 -#define R_02880C_DB_SHADER_CONTROL 0x02880C -#define R_028D0C_DB_RENDER_CONTROL 0x028D0C -#define S_028D0C_DEPTH_CLEAR_ENABLE(x) (((x) & 0x1) << 0) -#define S_028D0C_STENCIL_CLEAR_ENABLE(x) (((x) & 0x1) << 1) -#define S_028D0C_DEPTH_COPY_ENABLE(x) (((x) & 0x1) << 2) -#define S_028D0C_STENCIL_COPY_ENABLE(x) (((x) & 0x1) << 3) -#define S_028D0C_RESUMMARIZE_ENABLE(x) (((x) & 0x1) << 4) -#define S_028D0C_STENCIL_COMPRESS_DISABLE(x) (((x) & 0x1) << 5) -#define S_028D0C_DEPTH_COMPRESS_DISABLE(x) (((x) & 0x1) << 6) -#define S_028D0C_COPY_CENTROID(x) (((x) & 0x1) << 7) -#define S_028D0C_COPY_SAMPLE(x) (((x) & 0x1) << 8) -#define S_028D0C_R700_PERFECT_ZPASS_COUNTS(x) (((x) & 0x1) << 15) -#define R_028D10_DB_RENDER_OVERRIDE 0x028D10 -#define R_028D2C_DB_SRESULTS_COMPARE_STATE1 0x028D2C -#define R_028D30_DB_PRELOAD_CONTROL 0x028D30 -#define R_028D44_DB_ALPHA_TO_MASK 0x028D44 -#define R_028868_SQ_PGM_RESOURCES_VS 0x028868 -#define R_0286CC_SPI_PS_IN_CONTROL_0 0x0286CC -#define R_0286D0_SPI_PS_IN_CONTROL_1 0x0286D0 -#define R_028644_SPI_PS_INPUT_CNTL_0 0x028644 -#define R_028648_SPI_PS_INPUT_CNTL_1 0x028648 -#define R_02864C_SPI_PS_INPUT_CNTL_2 0x02864C -#define R_028650_SPI_PS_INPUT_CNTL_3 0x028650 -#define R_028654_SPI_PS_INPUT_CNTL_4 0x028654 -#define R_028658_SPI_PS_INPUT_CNTL_5 0x028658 -#define R_02865C_SPI_PS_INPUT_CNTL_6 0x02865C -#define R_028660_SPI_PS_INPUT_CNTL_7 0x028660 -#define R_028664_SPI_PS_INPUT_CNTL_8 0x028664 -#define R_028668_SPI_PS_INPUT_CNTL_9 0x028668 -#define R_02866C_SPI_PS_INPUT_CNTL_10 0x02866C -#define R_028670_SPI_PS_INPUT_CNTL_11 0x028670 -#define R_028674_SPI_PS_INPUT_CNTL_12 0x028674 -#define R_028678_SPI_PS_INPUT_CNTL_13 0x028678 -#define R_02867C_SPI_PS_INPUT_CNTL_14 0x02867C -#define R_028680_SPI_PS_INPUT_CNTL_15 0x028680 -#define R_028684_SPI_PS_INPUT_CNTL_16 0x028684 -#define R_028688_SPI_PS_INPUT_CNTL_17 0x028688 -#define R_02868C_SPI_PS_INPUT_CNTL_18 0x02868C -#define R_028690_SPI_PS_INPUT_CNTL_19 0x028690 -#define R_028694_SPI_PS_INPUT_CNTL_20 0x028694 -#define R_028698_SPI_PS_INPUT_CNTL_21 0x028698 -#define R_02869C_SPI_PS_INPUT_CNTL_22 0x02869C -#define R_0286A0_SPI_PS_INPUT_CNTL_23 0x0286A0 -#define R_0286A4_SPI_PS_INPUT_CNTL_24 0x0286A4 -#define R_0286A8_SPI_PS_INPUT_CNTL_25 0x0286A8 -#define R_0286AC_SPI_PS_INPUT_CNTL_26 0x0286AC -#define R_0286B0_SPI_PS_INPUT_CNTL_27 0x0286B0 -#define R_0286B4_SPI_PS_INPUT_CNTL_28 0x0286B4 -#define R_0286B8_SPI_PS_INPUT_CNTL_29 0x0286B8 -#define R_0286BC_SPI_PS_INPUT_CNTL_30 0x0286BC -#define R_0286C0_SPI_PS_INPUT_CNTL_31 0x0286C0 -#define R_028850_SQ_PGM_RESOURCES_PS 0x028850 -#define R_028854_SQ_PGM_EXPORTS_PS 0x028854 -#define R_008958_VGT_PRIMITIVE_TYPE 0x008958 -#define R_028A7C_VGT_DMA_INDEX_TYPE 0x028A7C -#define R_028A88_VGT_DMA_NUM_INSTANCES 0x028A88 -#define R_008970_VGT_NUM_INDICES 0x008970 -#define R_0287F0_VGT_DRAW_INITIATOR 0x0287F0 -#define R_028238_CB_TARGET_MASK 0x028238 -#define R_02823C_CB_SHADER_MASK 0x02823C -#define R_028060_CB_COLOR0_SIZE 0x028060 -#define S_028060_PITCH_TILE_MAX(x) (((x) & 0x3FF) << 0) -#define G_028060_PITCH_TILE_MAX(x) (((x) >> 0) & 0x3FF) -#define C_028060_PITCH_TILE_MAX 0xFFFFFC00 -#define S_028060_SLICE_TILE_MAX(x) (((x) & 0xFFFFF) << 10) -#define G_028060_SLICE_TILE_MAX(x) (((x) >> 10) & 0xFFFFF) -#define C_028060_SLICE_TILE_MAX 0xC00003FF -#define R_028064_CB_COLOR1_SIZE 0x028064 -#define R_028068_CB_COLOR2_SIZE 0x028068 -#define R_02806C_CB_COLOR3_SIZE 0x02806C -#define R_028070_CB_COLOR4_SIZE 0x028070 -#define R_028074_CB_COLOR5_SIZE 0x028074 -#define R_028078_CB_COLOR6_SIZE 0x028078 -#define R_02807C_CB_COLOR7_SIZE 0x02807C -#define R_028040_CB_COLOR0_BASE 0x028040 -#define R_028044_CB_COLOR1_BASE 0x028044 -#define R_028048_CB_COLOR2_BASE 0x028048 -#define R_02804C_CB_COLOR3_BASE 0x02804C -#define R_028050_CB_COLOR4_BASE 0x028050 -#define R_028054_CB_COLOR5_BASE 0x028054 -#define R_028058_CB_COLOR6_BASE 0x028058 -#define R_02805C_CB_COLOR7_BASE 0x02805C -#define R_028240_PA_SC_GENERIC_SCISSOR_TL 0x028240 -#define S_028240_TL_X(x) (((x) & 0x3FFF) << 0) -#define G_028240_TL_X(x) (((x) >> 0) & 0x3FFF) -#define C_028240_TL_X 0xFFFFC000 -#define S_028240_TL_Y(x) (((x) & 0x3FFF) << 16) -#define G_028240_TL_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028240_TL_Y 0xC000FFFF -#define R_028C04_PA_SC_AA_CONFIG 0x028C04 -#define S_028C04_MSAA_NUM_SAMPLES(x) (((x) & 0x3) << 0) -#define G_028C04_MSAA_NUM_SAMPLES(x) (((x) >> 0) & 0x3) -#define C_028C04_MSAA_NUM_SAMPLES 0xFFFFFFFC -#define S_028C04_AA_MASK_CENTROID_DTMN(x) (((x) & 0x1) << 4) -#define G_028C04_AA_MASK_CENTROID_DTMN(x) (((x) >> 4) & 0x1) -#define C_028C04_AA_MASK_CENTROID_DTMN 0xFFFFFFEF -#define S_028C04_MAX_SAMPLE_DIST(x) (((x) & 0xF) << 13) -#define G_028C04_MAX_SAMPLE_DIST(x) (((x) >> 13) & 0xF) -#define C_028C04_MAX_SAMPLE_DIST 0xFFFE1FFF -#define R_0288CC_SQ_PGM_CF_OFFSET_PS 0x0288CC -#define R_0288DC_SQ_PGM_CF_OFFSET_FS 0x0288DC -#define R_0288D0_SQ_PGM_CF_OFFSET_VS 0x0288D0 -#define R_028840_SQ_PGM_START_PS 0x028840 -#define R_028894_SQ_PGM_START_FS 0x028894 -#define R_028858_SQ_PGM_START_VS 0x028858 -#define R_028080_CB_COLOR0_VIEW 0x028080 -#define S_028080_SLICE_START(x) (((x) & 0x7FF) << 0) -#define G_028080_SLICE_START(x) (((x) >> 0) & 0x7FF) -#define C_028080_SLICE_START 0xFFFFF800 -#define S_028080_SLICE_MAX(x) (((x) & 0x7FF) << 13) -#define G_028080_SLICE_MAX(x) (((x) >> 13) & 0x7FF) -#define C_028080_SLICE_MAX 0xFF001FFF -#define R_028084_CB_COLOR1_VIEW 0x028084 -#define R_028088_CB_COLOR2_VIEW 0x028088 -#define R_02808C_CB_COLOR3_VIEW 0x02808C -#define R_028090_CB_COLOR4_VIEW 0x028090 -#define R_028094_CB_COLOR5_VIEW 0x028094 -#define R_028098_CB_COLOR6_VIEW 0x028098 -#define R_02809C_CB_COLOR7_VIEW 0x02809C -#define R_028100_CB_COLOR0_MASK 0x028100 -#define S_028100_CMASK_BLOCK_MAX(x) (((x) & 0xFFF) << 0) -#define G_028100_CMASK_BLOCK_MAX(x) (((x) >> 0) & 0xFFF) -#define C_028100_CMASK_BLOCK_MAX 0xFFFFF000 -#define S_028100_FMASK_TILE_MAX(x) (((x) & 0xFFFFF) << 12) -#define G_028100_FMASK_TILE_MAX(x) (((x) >> 12) & 0xFFFFF) -#define C_028100_FMASK_TILE_MAX 0x00000FFF -#define R_028104_CB_COLOR1_MASK 0x028104 -#define R_028108_CB_COLOR2_MASK 0x028108 -#define R_02810C_CB_COLOR3_MASK 0x02810C -#define R_028110_CB_COLOR4_MASK 0x028110 -#define R_028114_CB_COLOR5_MASK 0x028114 -#define R_028118_CB_COLOR6_MASK 0x028118 -#define R_02811C_CB_COLOR7_MASK 0x02811C -#define R_028040_CB_COLOR0_BASE 0x028040 -#define S_028040_BASE_256B(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028040_BASE_256B(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028040_BASE_256B 0x00000000 -#define R_0280E0_CB_COLOR0_FRAG 0x0280E0 -#define S_0280E0_BASE_256B(x) (((x) & 0xFFFFFFFF) << 0) -#define G_0280E0_BASE_256B(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_0280E0_BASE_256B 0x00000000 -#define R_0280E4_CB_COLOR1_FRAG 0x0280E4 -#define R_0280E8_CB_COLOR2_FRAG 0x0280E8 -#define R_0280EC_CB_COLOR3_FRAG 0x0280EC -#define R_0280F0_CB_COLOR4_FRAG 0x0280F0 -#define R_0280F4_CB_COLOR5_FRAG 0x0280F4 -#define R_0280F8_CB_COLOR6_FRAG 0x0280F8 -#define R_0280FC_CB_COLOR7_FRAG 0x0280FC -#define R_0280C0_CB_COLOR0_TILE 0x0280C0 -#define S_0280C0_BASE_256B(x) (((x) & 0xFFFFFFFF) << 0) -#define G_0280C0_BASE_256B(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_0280C0_BASE_256B 0x00000000 -#define R_0280C4_CB_COLOR1_TILE 0x0280C4 -#define R_0280C8_CB_COLOR2_TILE 0x0280C8 -#define R_0280CC_CB_COLOR3_TILE 0x0280CC -#define R_0280D0_CB_COLOR4_TILE 0x0280D0 -#define R_0280D4_CB_COLOR5_TILE 0x0280D4 -#define R_0280D8_CB_COLOR6_TILE 0x0280D8 -#define R_0280DC_CB_COLOR7_TILE 0x0280DC -#define R_028808_CB_COLOR_CONTROL 0x028808 -#define S_028808_FOG_ENABLE(x) (((x) & 0x1) << 0) -#define G_028808_FOG_ENABLE(x) (((x) >> 0) & 0x1) -#define C_028808_FOG_ENABLE 0xFFFFFFFE -#define S_028808_MULTIWRITE_ENABLE(x) (((x) & 0x1) << 1) -#define G_028808_MULTIWRITE_ENABLE(x) (((x) >> 1) & 0x1) -#define C_028808_MULTIWRITE_ENABLE 0xFFFFFFFD -#define S_028808_DITHER_ENABLE(x) (((x) & 0x1) << 2) -#define G_028808_DITHER_ENABLE(x) (((x) >> 2) & 0x1) -#define C_028808_DITHER_ENABLE 0xFFFFFFFB -#define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3) -#define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1) -#define C_028808_DEGAMMA_ENABLE 0xFFFFFFF7 -#define S_028808_SPECIAL_OP(x) (((x) & 0x7) << 4) -#define G_028808_SPECIAL_OP(x) (((x) >> 4) & 0x7) -#define C_028808_SPECIAL_OP 0xFFFFFF8F -#define S_028808_PER_MRT_BLEND(x) (((x) & 0x1) << 7) -#define G_028808_PER_MRT_BLEND(x) (((x) >> 7) & 0x1) -#define C_028808_PER_MRT_BLEND 0xFFFFFF7F -#define S_028808_TARGET_BLEND_ENABLE(x) (((x) & 0xFF) << 8) -#define G_028808_TARGET_BLEND_ENABLE(x) (((x) >> 8) & 0xFF) -#define C_028808_TARGET_BLEND_ENABLE 0xFFFF00FF -#define S_028808_ROP3(x) (((x) & 0xFF) << 16) -#define G_028808_ROP3(x) (((x) >> 16) & 0xFF) -#define C_028808_ROP3 0xFF00FFFF -#define R_028614_SPI_VS_OUT_ID_0 0x028614 -#define S_028614_SEMANTIC_0(x) (((x) & 0xFF) << 0) -#define G_028614_SEMANTIC_0(x) (((x) >> 0) & 0xFF) -#define C_028614_SEMANTIC_0 0xFFFFFF00 -#define S_028614_SEMANTIC_1(x) (((x) & 0xFF) << 8) -#define G_028614_SEMANTIC_1(x) (((x) >> 8) & 0xFF) -#define C_028614_SEMANTIC_1 0xFFFF00FF -#define S_028614_SEMANTIC_2(x) (((x) & 0xFF) << 16) -#define G_028614_SEMANTIC_2(x) (((x) >> 16) & 0xFF) -#define C_028614_SEMANTIC_2 0xFF00FFFF -#define S_028614_SEMANTIC_3(x) (((x) & 0xFF) << 24) -#define G_028614_SEMANTIC_3(x) (((x) >> 24) & 0xFF) -#define C_028614_SEMANTIC_3 0x00FFFFFF -#define R_028618_SPI_VS_OUT_ID_1 0x028618 -#define R_02861C_SPI_VS_OUT_ID_2 0x02861C -#define R_028620_SPI_VS_OUT_ID_3 0x028620 -#define R_028624_SPI_VS_OUT_ID_4 0x028624 -#define R_028628_SPI_VS_OUT_ID_5 0x028628 -#define R_02862C_SPI_VS_OUT_ID_6 0x02862C -#define R_028630_SPI_VS_OUT_ID_7 0x028630 -#define R_028634_SPI_VS_OUT_ID_8 0x028634 -#define R_028638_SPI_VS_OUT_ID_9 0x028638 -#define R_038000_SQ_TEX_RESOURCE_WORD0_0 0x038000 -#define S_038000_DIM(x) (((x) & 0x7) << 0) -#define G_038000_DIM(x) (((x) >> 0) & 0x7) -#define C_038000_DIM 0xFFFFFFF8 -#define S_038000_TILE_MODE(x) (((x) & 0xF) << 3) -#define G_038000_TILE_MODE(x) (((x) >> 3) & 0xF) -#define C_038000_TILE_MODE 0xFFFFFF87 -#define S_038000_TILE_TYPE(x) (((x) & 0x1) << 7) -#define G_038000_TILE_TYPE(x) (((x) >> 7) & 0x1) -#define C_038000_TILE_TYPE 0xFFFFFF7F -#define S_038000_PITCH(x) (((x) & 0x7FF) << 8) -#define G_038000_PITCH(x) (((x) >> 8) & 0x7FF) -#define C_038000_PITCH 0xFFF800FF -#define S_038000_TEX_WIDTH(x) (((x) & 0x1FFF) << 19) -#define G_038000_TEX_WIDTH(x) (((x) >> 19) & 0x1FFF) -#define C_038000_TEX_WIDTH 0x0007FFFF -#define R_038004_SQ_TEX_RESOURCE_WORD1_0 0x038004 -#define S_038004_TEX_HEIGHT(x) (((x) & 0x1FFF) << 0) -#define G_038004_TEX_HEIGHT(x) (((x) >> 0) & 0x1FFF) -#define C_038004_TEX_HEIGHT 0xFFFFE000 -#define S_038004_TEX_DEPTH(x) (((x) & 0x1FFF) << 13) -#define G_038004_TEX_DEPTH(x) (((x) >> 13) & 0x1FFF) -#define C_038004_TEX_DEPTH 0xFC001FFF -#define S_038004_DATA_FORMAT(x) (((x) & 0x3F) << 26) -#define G_038004_DATA_FORMAT(x) (((x) >> 26) & 0x3F) -#define C_038004_DATA_FORMAT 0x03FFFFFF -#define V_038004_COLOR_INVALID 0x00000000 -#define V_038004_COLOR_8 0x00000001 -#define V_038004_COLOR_4_4 0x00000002 -#define V_038004_COLOR_3_3_2 0x00000003 -#define V_038004_COLOR_16 0x00000005 -#define V_038004_COLOR_16_FLOAT 0x00000006 -#define V_038004_COLOR_8_8 0x00000007 -#define V_038004_COLOR_5_6_5 0x00000008 -#define V_038004_COLOR_6_5_5 0x00000009 -#define V_038004_COLOR_1_5_5_5 0x0000000A -#define V_038004_COLOR_4_4_4_4 0x0000000B -#define V_038004_COLOR_5_5_5_1 0x0000000C -#define V_038004_COLOR_32 0x0000000D -#define V_038004_COLOR_32_FLOAT 0x0000000E -#define V_038004_COLOR_16_16 0x0000000F -#define V_038004_COLOR_16_16_FLOAT 0x00000010 -#define V_038004_COLOR_8_24 0x00000011 -#define V_038004_COLOR_8_24_FLOAT 0x00000012 -#define V_038004_COLOR_24_8 0x00000013 -#define V_038004_COLOR_24_8_FLOAT 0x00000014 -#define V_038004_COLOR_10_11_11 0x00000015 -#define V_038004_COLOR_10_11_11_FLOAT 0x00000016 -#define V_038004_COLOR_11_11_10 0x00000017 -#define V_038004_COLOR_11_11_10_FLOAT 0x00000018 -#define V_038004_COLOR_2_10_10_10 0x00000019 -#define V_038004_COLOR_8_8_8_8 0x0000001A -#define V_038004_COLOR_10_10_10_2 0x0000001B -#define V_038004_COLOR_X24_8_32_FLOAT 0x0000001C -#define V_038004_COLOR_32_32 0x0000001D -#define V_038004_COLOR_32_32_FLOAT 0x0000001E -#define V_038004_COLOR_16_16_16_16 0x0000001F -#define V_038004_COLOR_16_16_16_16_FLOAT 0x00000020 -#define V_038004_COLOR_32_32_32_32 0x00000022 -#define V_038004_COLOR_32_32_32_32_FLOAT 0x00000023 -#define R_038008_SQ_TEX_RESOURCE_WORD2_0 0x038008 -#define S_038008_BASE_ADDRESS(x) (((x) & 0xFFFFFFFF) << 0) -#define G_038008_BASE_ADDRESS(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_038008_BASE_ADDRESS 0x00000000 -#define R_03800C_SQ_TEX_RESOURCE_WORD3_0 0x03800C -#define S_03800C_MIP_ADDRESS(x) (((x) & 0xFFFFFFFF) << 0) -#define G_03800C_MIP_ADDRESS(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_03800C_MIP_ADDRESS 0x00000000 -#define R_038010_SQ_TEX_RESOURCE_WORD4_0 0x038010 -#define S_038010_FORMAT_COMP_X(x) (((x) & 0x3) << 0) -#define G_038010_FORMAT_COMP_X(x) (((x) >> 0) & 0x3) -#define C_038010_FORMAT_COMP_X 0xFFFFFFFC -#define S_038010_FORMAT_COMP_Y(x) (((x) & 0x3) << 2) -#define G_038010_FORMAT_COMP_Y(x) (((x) >> 2) & 0x3) -#define C_038010_FORMAT_COMP_Y 0xFFFFFFF3 -#define S_038010_FORMAT_COMP_Z(x) (((x) & 0x3) << 4) -#define G_038010_FORMAT_COMP_Z(x) (((x) >> 4) & 0x3) -#define C_038010_FORMAT_COMP_Z 0xFFFFFFCF -#define S_038010_FORMAT_COMP_W(x) (((x) & 0x3) << 6) -#define G_038010_FORMAT_COMP_W(x) (((x) >> 6) & 0x3) -#define C_038010_FORMAT_COMP_W 0xFFFFFF3F -#define S_038010_NUM_FORMAT_ALL(x) (((x) & 0x3) << 8) -#define G_038010_NUM_FORMAT_ALL(x) (((x) >> 8) & 0x3) -#define C_038010_NUM_FORMAT_ALL 0xFFFFFCFF -#define S_038010_SRF_MODE_ALL(x) (((x) & 0x1) << 10) -#define G_038010_SRF_MODE_ALL(x) (((x) >> 10) & 0x1) -#define C_038010_SRF_MODE_ALL 0xFFFFFBFF -#define S_038010_FORCE_DEGAMMA(x) (((x) & 0x1) << 11) -#define G_038010_FORCE_DEGAMMA(x) (((x) >> 11) & 0x1) -#define C_038010_FORCE_DEGAMMA 0xFFFFF7FF -#define S_038010_ENDIAN_SWAP(x) (((x) & 0x3) << 12) -#define G_038010_ENDIAN_SWAP(x) (((x) >> 12) & 0x3) -#define C_038010_ENDIAN_SWAP 0xFFFFCFFF -#define S_038010_REQUEST_SIZE(x) (((x) & 0x3) << 14) -#define G_038010_REQUEST_SIZE(x) (((x) >> 14) & 0x3) -#define C_038010_REQUEST_SIZE 0xFFFF3FFF -#define S_038010_DST_SEL_X(x) (((x) & 0x7) << 16) -#define G_038010_DST_SEL_X(x) (((x) >> 16) & 0x7) -#define C_038010_DST_SEL_X 0xFFF8FFFF -#define S_038010_DST_SEL_Y(x) (((x) & 0x7) << 19) -#define G_038010_DST_SEL_Y(x) (((x) >> 19) & 0x7) -#define C_038010_DST_SEL_Y 0xFFC7FFFF -#define S_038010_DST_SEL_Z(x) (((x) & 0x7) << 22) -#define G_038010_DST_SEL_Z(x) (((x) >> 22) & 0x7) -#define C_038010_DST_SEL_Z 0xFE3FFFFF -#define S_038010_DST_SEL_W(x) (((x) & 0x7) << 25) -#define G_038010_DST_SEL_W(x) (((x) >> 25) & 0x7) -#define C_038010_DST_SEL_W 0xF1FFFFFF -#define S_038010_BASE_LEVEL(x) (((x) & 0xF) << 28) -#define G_038010_BASE_LEVEL(x) (((x) >> 28) & 0xF) -#define C_038010_BASE_LEVEL 0x0FFFFFFF -#define R_038014_SQ_TEX_RESOURCE_WORD5_0 0x038014 -#define S_038014_LAST_LEVEL(x) (((x) & 0xF) << 0) -#define G_038014_LAST_LEVEL(x) (((x) >> 0) & 0xF) -#define C_038014_LAST_LEVEL 0xFFFFFFF0 -#define S_038014_BASE_ARRAY(x) (((x) & 0x1FFF) << 4) -#define G_038014_BASE_ARRAY(x) (((x) >> 4) & 0x1FFF) -#define C_038014_BASE_ARRAY 0xFFFE000F -#define S_038014_LAST_ARRAY(x) (((x) & 0x1FFF) << 17) -#define G_038014_LAST_ARRAY(x) (((x) >> 17) & 0x1FFF) -#define C_038014_LAST_ARRAY 0xC001FFFF -#define R_038018_SQ_TEX_RESOURCE_WORD6_0 0x038018 -#define S_038018_MPEG_CLAMP(x) (((x) & 0x3) << 0) -#define G_038018_MPEG_CLAMP(x) (((x) >> 0) & 0x3) -#define C_038018_MPEG_CLAMP 0xFFFFFFFC -#define S_038018_PERF_MODULATION(x) (((x) & 0x7) << 5) -#define G_038018_PERF_MODULATION(x) (((x) >> 5) & 0x7) -#define C_038018_PERF_MODULATION 0xFFFFFF1F -#define S_038018_INTERLACED(x) (((x) & 0x1) << 8) -#define G_038018_INTERLACED(x) (((x) >> 8) & 0x1) -#define C_038018_INTERLACED 0xFFFFFEFF -#define S_038018_TYPE(x) (((x) & 0x3) << 30) -#define G_038018_TYPE(x) (((x) >> 30) & 0x3) -#define C_038018_TYPE 0x3FFFFFFF -#define R_008040_WAIT_UNTIL 0x008040 -#define S_008040_WAIT_CP_DMA_IDLE(x) (((x) & 0x1) << 8) -#define G_008040_WAIT_CP_DMA_IDLE(x) (((x) >> 8) & 0x1) -#define C_008040_WAIT_CP_DMA_IDLE 0xFFFFFEFF -#define S_008040_WAIT_CMDFIFO(x) (((x) & 0x1) << 10) -#define G_008040_WAIT_CMDFIFO(x) (((x) >> 10) & 0x1) -#define C_008040_WAIT_CMDFIFO 0xFFFFFBFF -#define S_008040_WAIT_2D_IDLE(x) (((x) & 0x1) << 14) -#define G_008040_WAIT_2D_IDLE(x) (((x) >> 14) & 0x1) -#define C_008040_WAIT_2D_IDLE 0xFFFFBFFF -#define S_008040_WAIT_3D_IDLE(x) (((x) & 0x1) << 15) -#define G_008040_WAIT_3D_IDLE(x) (((x) >> 15) & 0x1) -#define C_008040_WAIT_3D_IDLE 0xFFFF7FFF -#define S_008040_WAIT_2D_IDLECLEAN(x) (((x) & 0x1) << 16) -#define G_008040_WAIT_2D_IDLECLEAN(x) (((x) >> 16) & 0x1) -#define C_008040_WAIT_2D_IDLECLEAN 0xFFFEFFFF -#define S_008040_WAIT_3D_IDLECLEAN(x) (((x) & 0x1) << 17) -#define G_008040_WAIT_3D_IDLECLEAN(x) (((x) >> 17) & 0x1) -#define C_008040_WAIT_3D_IDLECLEAN 0xFFFDFFFF -#define S_008040_WAIT_EXTERN_SIG(x) (((x) & 0x1) << 19) -#define G_008040_WAIT_EXTERN_SIG(x) (((x) >> 19) & 0x1) -#define C_008040_WAIT_EXTERN_SIG 0xFFF7FFFF -#define S_008040_CMDFIFO_ENTRIES(x) (((x) & 0x1F) << 20) -#define G_008040_CMDFIFO_ENTRIES(x) (((x) >> 20) & 0x1F) -#define C_008040_CMDFIFO_ENTRIES 0xFE0FFFFF -#define R_008958_VGT_PRIMITIVE_TYPE 0x008958 -#define S_008958_PRIM_TYPE(x) (((x) & 0x3F) << 0) -#define G_008958_PRIM_TYPE(x) (((x) >> 0) & 0x3F) -#define C_008958_PRIM_TYPE 0xFFFFFFC0 -#define R_008C00_SQ_CONFIG 0x008C00 -#define S_008C00_VC_ENABLE(x) (((x) & 0x1) << 0) -#define G_008C00_VC_ENABLE(x) (((x) >> 0) & 0x1) -#define C_008C00_VC_ENABLE 0xFFFFFFFE -#define S_008C00_EXPORT_SRC_C(x) (((x) & 0x1) << 1) -#define G_008C00_EXPORT_SRC_C(x) (((x) >> 1) & 0x1) -#define C_008C00_EXPORT_SRC_C 0xFFFFFFFD -#define S_008C00_DX9_CONSTS(x) (((x) & 0x1) << 2) -#define G_008C00_DX9_CONSTS(x) (((x) >> 2) & 0x1) -#define C_008C00_DX9_CONSTS 0xFFFFFFFB -#define S_008C00_ALU_INST_PREFER_VECTOR(x) (((x) & 0x1) << 3) -#define G_008C00_ALU_INST_PREFER_VECTOR(x) (((x) >> 3) & 0x1) -#define C_008C00_ALU_INST_PREFER_VECTOR 0xFFFFFFF7 -#define S_008C00_DX10_CLAMP(x) (((x) & 0x1) << 4) -#define G_008C00_DX10_CLAMP(x) (((x) >> 4) & 0x1) -#define C_008C00_DX10_CLAMP 0xFFFFFFEF -#define S_008C00_ALU_PREFER_ONE_WATERFALL(x) (((x) & 0x1) << 5) -#define G_008C00_ALU_PREFER_ONE_WATERFALL(x) (((x) >> 5) & 0x1) -#define C_008C00_ALU_PREFER_ONE_WATERFALL 0xFFFFFFDF -#define S_008C00_ALU_MAX_ONE_WATERFALL(x) (((x) & 0x1) << 6) -#define G_008C00_ALU_MAX_ONE_WATERFALL(x) (((x) >> 6) & 0x1) -#define C_008C00_ALU_MAX_ONE_WATERFALL 0xFFFFFFBF -#define S_008C00_CLAUSE_SEQ_PRIO(x) (((x) & 0x3) << 8) -#define G_008C00_CLAUSE_SEQ_PRIO(x) (((x) >> 8) & 0x3) -#define C_008C00_CLAUSE_SEQ_PRIO 0xFFFFFCFF -#define S_008C00_PS_PRIO(x) (((x) & 0x3) << 24) -#define G_008C00_PS_PRIO(x) (((x) >> 24) & 0x3) -#define C_008C00_PS_PRIO 0xFCFFFFFF -#define S_008C00_VS_PRIO(x) (((x) & 0x3) << 26) -#define G_008C00_VS_PRIO(x) (((x) >> 26) & 0x3) -#define C_008C00_VS_PRIO 0xF3FFFFFF -#define S_008C00_GS_PRIO(x) (((x) & 0x3) << 28) -#define G_008C00_GS_PRIO(x) (((x) >> 28) & 0x3) -#define C_008C00_GS_PRIO 0xCFFFFFFF -#define S_008C00_ES_PRIO(x) (((x) & 0x3) << 30) -#define G_008C00_ES_PRIO(x) (((x) >> 30) & 0x3) -#define C_008C00_ES_PRIO 0x3FFFFFFF -#define R_008C04_SQ_GPR_RESOURCE_MGMT_1 0x008C04 -#define S_008C04_NUM_PS_GPRS(x) (((x) & 0xFF) << 0) -#define G_008C04_NUM_PS_GPRS(x) (((x) >> 0) & 0xFF) -#define C_008C04_NUM_PS_GPRS 0xFFFFFF00 -#define S_008C04_NUM_VS_GPRS(x) (((x) & 0xFF) << 16) -#define G_008C04_NUM_VS_GPRS(x) (((x) >> 16) & 0xFF) -#define C_008C04_NUM_VS_GPRS 0xFF00FFFF -#define S_008C04_NUM_CLAUSE_TEMP_GPRS(x) (((x) & 0xF) << 28) -#define G_008C04_NUM_CLAUSE_TEMP_GPRS(x) (((x) >> 28) & 0xF) -#define C_008C04_NUM_CLAUSE_TEMP_GPRS 0x0FFFFFFF -#define R_008C08_SQ_GPR_RESOURCE_MGMT_2 0x008C08 -#define S_008C08_NUM_GS_GPRS(x) (((x) & 0xFF) << 0) -#define G_008C08_NUM_GS_GPRS(x) (((x) >> 0) & 0xFF) -#define C_008C08_NUM_GS_GPRS 0xFFFFFF00 -#define S_008C08_NUM_ES_GPRS(x) (((x) & 0xFF) << 16) -#define G_008C08_NUM_ES_GPRS(x) (((x) >> 16) & 0xFF) -#define C_008C08_NUM_ES_GPRS 0xFF00FFFF -#define R_008C0C_SQ_THREAD_RESOURCE_MGMT 0x008C0C -#define S_008C0C_NUM_PS_THREADS(x) (((x) & 0xFF) << 0) -#define G_008C0C_NUM_PS_THREADS(x) (((x) >> 0) & 0xFF) -#define C_008C0C_NUM_PS_THREADS 0xFFFFFF00 -#define S_008C0C_NUM_VS_THREADS(x) (((x) & 0xFF) << 8) -#define G_008C0C_NUM_VS_THREADS(x) (((x) >> 8) & 0xFF) -#define C_008C0C_NUM_VS_THREADS 0xFFFF00FF -#define S_008C0C_NUM_GS_THREADS(x) (((x) & 0xFF) << 16) -#define G_008C0C_NUM_GS_THREADS(x) (((x) >> 16) & 0xFF) -#define C_008C0C_NUM_GS_THREADS 0xFF00FFFF -#define S_008C0C_NUM_ES_THREADS(x) (((x) & 0xFF) << 24) -#define G_008C0C_NUM_ES_THREADS(x) (((x) >> 24) & 0xFF) -#define C_008C0C_NUM_ES_THREADS 0x00FFFFFF -#define R_008C10_SQ_STACK_RESOURCE_MGMT_1 0x008C10 -#define S_008C10_NUM_PS_STACK_ENTRIES(x) (((x) & 0xFFF) << 0) -#define G_008C10_NUM_PS_STACK_ENTRIES(x) (((x) >> 0) & 0xFFF) -#define C_008C10_NUM_PS_STACK_ENTRIES 0xFFFFF000 -#define S_008C10_NUM_VS_STACK_ENTRIES(x) (((x) & 0xFFF) << 16) -#define G_008C10_NUM_VS_STACK_ENTRIES(x) (((x) >> 16) & 0xFFF) -#define C_008C10_NUM_VS_STACK_ENTRIES 0xF000FFFF -#define R_008C14_SQ_STACK_RESOURCE_MGMT_2 0x008C14 -#define S_008C14_NUM_GS_STACK_ENTRIES(x) (((x) & 0xFFF) << 0) -#define G_008C14_NUM_GS_STACK_ENTRIES(x) (((x) >> 0) & 0xFFF) -#define C_008C14_NUM_GS_STACK_ENTRIES 0xFFFFF000 -#define S_008C14_NUM_ES_STACK_ENTRIES(x) (((x) & 0xFFF) << 16) -#define G_008C14_NUM_ES_STACK_ENTRIES(x) (((x) >> 16) & 0xFFF) -#define C_008C14_NUM_ES_STACK_ENTRIES 0xF000FFFF -#define R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ 0x008D8C -#define S_008D8C_RING0_OFFSET(x) (((x) & 0xFF) << 0) -#define G_008D8C_RING0_OFFSET(x) (((x) >> 0) & 0xFF) -#define C_008D8C_RING0_OFFSET 0xFFFFFF00 -#define S_008D8C_ISOLATE_ES_ENABLE(x) (((x) & 0x1) << 12) -#define G_008D8C_ISOLATE_ES_ENABLE(x) (((x) >> 12) & 0x1) -#define C_008D8C_ISOLATE_ES_ENABLE 0xFFFFEFFF -#define S_008D8C_ISOLATE_GS_ENABLE(x) (((x) & 0x1) << 13) -#define G_008D8C_ISOLATE_GS_ENABLE(x) (((x) >> 13) & 0x1) -#define C_008D8C_ISOLATE_GS_ENABLE 0xFFFFDFFF -#define S_008D8C_VS_PC_LIMIT_ENABLE(x) (((x) & 0x1) << 14) -#define G_008D8C_VS_PC_LIMIT_ENABLE(x) (((x) >> 14) & 0x1) -#define C_008D8C_VS_PC_LIMIT_ENABLE 0xFFFFBFFF -#define R_009508_TA_CNTL_AUX 0x009508 -#define S_009508_DISABLE_CUBE_WRAP(x) (((x) & 0x1) << 0) -#define G_009508_DISABLE_CUBE_WRAP(x) (((x) >> 0) & 0x1) -#define C_009508_DISABLE_CUBE_WRAP 0xFFFFFFFE -#define S_009508_SYNC_GRADIENT(x) (((x) & 0x1) << 24) -#define G_009508_SYNC_GRADIENT(x) (((x) >> 24) & 0x1) -#define C_009508_SYNC_GRADIENT 0xFEFFFFFF -#define S_009508_SYNC_WALKER(x) (((x) & 0x1) << 25) -#define G_009508_SYNC_WALKER(x) (((x) >> 25) & 0x1) -#define C_009508_SYNC_WALKER 0xFDFFFFFF -#define S_009508_SYNC_ALIGNER(x) (((x) & 0x1) << 26) -#define G_009508_SYNC_ALIGNER(x) (((x) >> 26) & 0x1) -#define C_009508_SYNC_ALIGNER 0xFBFFFFFF -#define S_009508_BILINEAR_PRECISION(x) (((x) & 0x1) << 31) -#define G_009508_BILINEAR_PRECISION(x) (((x) >> 31) & 0x1) -#define C_009508_BILINEAR_PRECISION 0x7FFFFFFF -#define R_009714_VC_ENHANCE 0x009714 -#define R_009830_DB_DEBUG 0x009830 -#define R_009838_DB_WATERMARKS 0x009838 -#define S_009838_DEPTH_FREE(x) (((x) & 0x1F) << 0) -#define G_009838_DEPTH_FREE(x) (((x) >> 0) & 0x1F) -#define C_009838_DEPTH_FREE 0xFFFFFFE0 -#define S_009838_DEPTH_FLUSH(x) (((x) & 0x3F) << 5) -#define G_009838_DEPTH_FLUSH(x) (((x) >> 5) & 0x3F) -#define C_009838_DEPTH_FLUSH 0xFFFFF81F -#define S_009838_FORCE_SUMMARIZE(x) (((x) & 0xF) << 11) -#define G_009838_FORCE_SUMMARIZE(x) (((x) >> 11) & 0xF) -#define C_009838_FORCE_SUMMARIZE 0xFFFF87FF -#define S_009838_DEPTH_PENDING_FREE(x) (((x) & 0x1F) << 15) -#define G_009838_DEPTH_PENDING_FREE(x) (((x) >> 15) & 0x1F) -#define C_009838_DEPTH_PENDING_FREE 0xFFF07FFF -#define S_009838_DEPTH_CACHELINE_FREE(x) (((x) & 0x1F) << 20) -#define G_009838_DEPTH_CACHELINE_FREE(x) (((x) >> 20) & 0x1F) -#define C_009838_DEPTH_CACHELINE_FREE 0xFE0FFFFF -#define S_009838_EARLY_Z_PANIC_DISABLE(x) (((x) & 0x1) << 25) -#define G_009838_EARLY_Z_PANIC_DISABLE(x) (((x) >> 25) & 0x1) -#define C_009838_EARLY_Z_PANIC_DISABLE 0xFDFFFFFF -#define S_009838_LATE_Z_PANIC_DISABLE(x) (((x) & 0x1) << 26) -#define G_009838_LATE_Z_PANIC_DISABLE(x) (((x) >> 26) & 0x1) -#define C_009838_LATE_Z_PANIC_DISABLE 0xFBFFFFFF -#define S_009838_RE_Z_PANIC_DISABLE(x) (((x) & 0x1) << 27) -#define G_009838_RE_Z_PANIC_DISABLE(x) (((x) >> 27) & 0x1) -#define C_009838_RE_Z_PANIC_DISABLE 0xF7FFFFFF -#define S_009838_DB_EXTRA_DEBUG(x) (((x) & 0xF) << 28) -#define G_009838_DB_EXTRA_DEBUG(x) (((x) >> 28) & 0xF) -#define C_009838_DB_EXTRA_DEBUG 0x0FFFFFFF -#define R_028030_PA_SC_SCREEN_SCISSOR_TL 0x028030 -#define S_028030_TL_X(x) (((x) & 0x7FFF) << 0) -#define G_028030_TL_X(x) (((x) >> 0) & 0x7FFF) -#define C_028030_TL_X 0xFFFF8000 -#define S_028030_TL_Y(x) (((x) & 0x7FFF) << 16) -#define G_028030_TL_Y(x) (((x) >> 16) & 0x7FFF) -#define C_028030_TL_Y 0x8000FFFF -#define R_028034_PA_SC_SCREEN_SCISSOR_BR 0x028034 -#define S_028034_BR_X(x) (((x) & 0x7FFF) << 0) -#define G_028034_BR_X(x) (((x) >> 0) & 0x7FFF) -#define C_028034_BR_X 0xFFFF8000 -#define S_028034_BR_Y(x) (((x) & 0x7FFF) << 16) -#define G_028034_BR_Y(x) (((x) >> 16) & 0x7FFF) -#define C_028034_BR_Y 0x8000FFFF -#define R_028200_PA_SC_WINDOW_OFFSET 0x028200 -#define S_028200_WINDOW_X_OFFSET(x) (((x) & 0x7FFF) << 0) -#define G_028200_WINDOW_X_OFFSET(x) (((x) >> 0) & 0x7FFF) -#define C_028200_WINDOW_X_OFFSET 0xFFFF8000 -#define S_028200_WINDOW_Y_OFFSET(x) (((x) & 0x7FFF) << 16) -#define G_028200_WINDOW_Y_OFFSET(x) (((x) >> 16) & 0x7FFF) -#define C_028200_WINDOW_Y_OFFSET 0x8000FFFF -#define R_028204_PA_SC_WINDOW_SCISSOR_TL 0x028204 -#define S_028204_TL_X(x) (((x) & 0x3FFF) << 0) -#define G_028204_TL_X(x) (((x) >> 0) & 0x3FFF) -#define C_028204_TL_X 0xFFFFC000 -#define S_028204_TL_Y(x) (((x) & 0x3FFF) << 16) -#define G_028204_TL_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028204_TL_Y 0xC000FFFF -#define S_028204_WINDOW_OFFSET_DISABLE(x) (((x) & 0x1) << 31) -#define G_028204_WINDOW_OFFSET_DISABLE(x) (((x) >> 31) & 0x1) -#define C_028204_WINDOW_OFFSET_DISABLE 0x7FFFFFFF -#define R_028208_PA_SC_WINDOW_SCISSOR_BR 0x028208 -#define S_028208_BR_X(x) (((x) & 0x3FFF) << 0) -#define G_028208_BR_X(x) (((x) >> 0) & 0x3FFF) -#define C_028208_BR_X 0xFFFFC000 -#define S_028208_BR_Y(x) (((x) & 0x3FFF) << 16) -#define G_028208_BR_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028208_BR_Y 0xC000FFFF -#define R_02820C_PA_SC_CLIPRECT_RULE 0x02820C -#define S_02820C_CLIP_RULE(x) (((x) & 0xFFFF) << 0) -#define G_02820C_CLIP_RULE(x) (((x) >> 0) & 0xFFFF) -#define C_02820C_CLIP_RULE 0xFFFF0000 -#define R_028210_PA_SC_CLIPRECT_0_TL 0x028210 -#define S_028210_TL_X(x) (((x) & 0x3FFF) << 0) -#define G_028210_TL_X(x) (((x) >> 0) & 0x3FFF) -#define C_028210_TL_X 0xFFFFC000 -#define S_028210_TL_Y(x) (((x) & 0x3FFF) << 16) -#define G_028210_TL_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028210_TL_Y 0xC000FFFF -#define R_028214_PA_SC_CLIPRECT_0_BR 0x028214 -#define S_028214_BR_X(x) (((x) & 0x3FFF) << 0) -#define G_028214_BR_X(x) (((x) >> 0) & 0x3FFF) -#define C_028214_BR_X 0xFFFFC000 -#define S_028214_BR_Y(x) (((x) & 0x3FFF) << 16) -#define G_028214_BR_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028214_BR_Y 0xC000FFFF -#define R_028218_PA_SC_CLIPRECT_1_TL 0x028218 -#define R_02821C_PA_SC_CLIPRECT_1_BR 0x02821C -#define R_028220_PA_SC_CLIPRECT_2_TL 0x028220 -#define R_028224_PA_SC_CLIPRECT_2_BR 0x028224 -#define R_028228_PA_SC_CLIPRECT_3_TL 0x028228 -#define R_02822C_PA_SC_CLIPRECT_3_BR 0x02822C -#define R_028230_PA_SC_EDGERULE 0x028230 -#define R_028240_PA_SC_GENERIC_SCISSOR_TL 0x028240 -#define S_028240_TL_X(x) (((x) & 0x3FFF) << 0) -#define G_028240_TL_X(x) (((x) >> 0) & 0x3FFF) -#define C_028240_TL_X 0xFFFFC000 -#define S_028240_TL_Y(x) (((x) & 0x3FFF) << 16) -#define G_028240_TL_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028240_TL_Y 0xC000FFFF -#define S_028240_WINDOW_OFFSET_DISABLE(x) (((x) & 0x1) << 31) -#define G_028240_WINDOW_OFFSET_DISABLE(x) (((x) >> 31) & 0x1) -#define C_028240_WINDOW_OFFSET_DISABLE 0x7FFFFFFF -#define R_028244_PA_SC_GENERIC_SCISSOR_BR 0x028244 -#define S_028244_BR_X(x) (((x) & 0x3FFF) << 0) -#define G_028244_BR_X(x) (((x) >> 0) & 0x3FFF) -#define C_028244_BR_X 0xFFFFC000 -#define S_028244_BR_Y(x) (((x) & 0x3FFF) << 16) -#define G_028244_BR_Y(x) (((x) >> 16) & 0x3FFF) -#define C_028244_BR_Y 0xC000FFFF -#define R_0282D0_PA_SC_VPORT_ZMIN_0 0x0282D0 -#define S_0282D0_VPORT_ZMIN(x) (((x) & 0xFFFFFFFF) << 0) -#define G_0282D0_VPORT_ZMIN(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_0282D0_VPORT_ZMIN 0x00000000 -#define R_0282D4_PA_SC_VPORT_ZMAX_0 0x0282D4 -#define S_0282D4_VPORT_ZMAX(x) (((x) & 0xFFFFFFFF) << 0) -#define G_0282D4_VPORT_ZMAX(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_0282D4_VPORT_ZMAX 0x00000000 -#define R_028350_SX_MISC 0x028350 -#define S_028350_MULTIPASS(x) (((x) & 0x1) << 0) -#define G_028350_MULTIPASS(x) (((x) >> 0) & 0x1) -#define C_028350_MULTIPASS 0xFFFFFFFE -#define R_028380_SQ_VTX_SEMANTIC_0 0x028380 -#define S_028380_SEMANTIC_ID(x) (((x) & 0xFF) << 0) -#define G_028380_SEMANTIC_ID(x) (((x) >> 0) & 0xFF) -#define C_028380_SEMANTIC_ID 0xFFFFFF00 -#define R_028384_SQ_VTX_SEMANTIC_1 0x028384 -#define R_028388_SQ_VTX_SEMANTIC_2 0x028388 -#define R_02838C_SQ_VTX_SEMANTIC_3 0x02838C -#define R_028390_SQ_VTX_SEMANTIC_4 0x028390 -#define R_028394_SQ_VTX_SEMANTIC_5 0x028394 -#define R_028398_SQ_VTX_SEMANTIC_6 0x028398 -#define R_02839C_SQ_VTX_SEMANTIC_7 0x02839C -#define R_0283A0_SQ_VTX_SEMANTIC_8 0x0283A0 -#define R_0283A4_SQ_VTX_SEMANTIC_9 0x0283A4 -#define R_0283A8_SQ_VTX_SEMANTIC_10 0x0283A8 -#define R_0283AC_SQ_VTX_SEMANTIC_11 0x0283AC -#define R_0283B0_SQ_VTX_SEMANTIC_12 0x0283B0 -#define R_0283B4_SQ_VTX_SEMANTIC_13 0x0283B4 -#define R_0283B8_SQ_VTX_SEMANTIC_14 0x0283B8 -#define R_0283BC_SQ_VTX_SEMANTIC_15 0x0283BC -#define R_0283C0_SQ_VTX_SEMANTIC_16 0x0283C0 -#define R_0283C4_SQ_VTX_SEMANTIC_17 0x0283C4 -#define R_0283C8_SQ_VTX_SEMANTIC_18 0x0283C8 -#define R_0283CC_SQ_VTX_SEMANTIC_19 0x0283CC -#define R_0283D0_SQ_VTX_SEMANTIC_20 0x0283D0 -#define R_0283D4_SQ_VTX_SEMANTIC_21 0x0283D4 -#define R_0283D8_SQ_VTX_SEMANTIC_22 0x0283D8 -#define R_0283DC_SQ_VTX_SEMANTIC_23 0x0283DC -#define R_0283E0_SQ_VTX_SEMANTIC_24 0x0283E0 -#define R_0283E4_SQ_VTX_SEMANTIC_25 0x0283E4 -#define R_0283E8_SQ_VTX_SEMANTIC_26 0x0283E8 -#define R_0283EC_SQ_VTX_SEMANTIC_27 0x0283EC -#define R_0283F0_SQ_VTX_SEMANTIC_28 0x0283F0 -#define R_0283F4_SQ_VTX_SEMANTIC_29 0x0283F4 -#define R_0283F8_SQ_VTX_SEMANTIC_30 0x0283F8 -#define R_0283FC_SQ_VTX_SEMANTIC_31 0x0283FC -#define R_028400_VGT_MAX_VTX_INDX 0x028400 -#define S_028400_MAX_INDX(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028400_MAX_INDX(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028400_MAX_INDX 0x00000000 -#define R_028404_VGT_MIN_VTX_INDX 0x028404 -#define S_028404_MIN_INDX(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028404_MIN_INDX(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028404_MIN_INDX 0x00000000 -#define R_028408_VGT_INDX_OFFSET 0x028408 -#define S_028408_INDX_OFFSET(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028408_INDX_OFFSET(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028408_INDX_OFFSET 0x00000000 -#define R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX 0x02840C -#define S_02840C_RESET_INDX(x) (((x) & 0xFFFFFFFF) << 0) -#define G_02840C_RESET_INDX(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_02840C_RESET_INDX 0x00000000 -#define R_028410_SX_ALPHA_TEST_CONTROL 0x028410 -#define S_028410_ALPHA_FUNC(x) (((x) & 0x7) << 0) -#define G_028410_ALPHA_FUNC(x) (((x) >> 0) & 0x7) -#define C_028410_ALPHA_FUNC 0xFFFFFFF8 -#define S_028410_ALPHA_TEST_ENABLE(x) (((x) & 0x1) << 3) -#define G_028410_ALPHA_TEST_ENABLE(x) (((x) >> 3) & 0x1) -#define C_028410_ALPHA_TEST_ENABLE 0xFFFFFFF7 -#define S_028410_ALPHA_TEST_BYPASS(x) (((x) & 0x1) << 8) -#define G_028410_ALPHA_TEST_BYPASS(x) (((x) >> 8) & 0x1) -#define C_028410_ALPHA_TEST_BYPASS 0xFFFFFEFF -#define R_028414_CB_BLEND_RED 0x028414 -#define S_028414_BLEND_RED(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028414_BLEND_RED(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028414_BLEND_RED 0x00000000 -#define R_028418_CB_BLEND_GREEN 0x028418 -#define S_028418_BLEND_GREEN(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028418_BLEND_GREEN(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028418_BLEND_GREEN 0x00000000 -#define R_02841C_CB_BLEND_BLUE 0x02841C -#define S_02841C_BLEND_BLUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_02841C_BLEND_BLUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_02841C_BLEND_BLUE 0x00000000 -#define R_028420_CB_BLEND_ALPHA 0x028420 -#define S_028420_BLEND_ALPHA(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028420_BLEND_ALPHA(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028420_BLEND_ALPHA 0x00000000 -#define R_028438_SX_ALPHA_REF 0x028438 -#define S_028438_ALPHA_REF(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028438_ALPHA_REF(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028438_ALPHA_REF 0x00000000 -#define R_0286C8_SPI_THREAD_GROUPING 0x0286C8 -#define S_0286C8_PS_GROUPING(x) (((x) & 0x1F) << 0) -#define G_0286C8_PS_GROUPING(x) (((x) >> 0) & 0x1F) -#define C_0286C8_PS_GROUPING 0xFFFFFFE0 -#define S_0286C8_VS_GROUPING(x) (((x) & 0x1F) << 8) -#define G_0286C8_VS_GROUPING(x) (((x) >> 8) & 0x1F) -#define C_0286C8_VS_GROUPING 0xFFFFE0FF -#define S_0286C8_GS_GROUPING(x) (((x) & 0x1F) << 16) -#define G_0286C8_GS_GROUPING(x) (((x) >> 16) & 0x1F) -#define C_0286C8_GS_GROUPING 0xFFE0FFFF -#define S_0286C8_ES_GROUPING(x) (((x) & 0x1F) << 24) -#define G_0286C8_ES_GROUPING(x) (((x) >> 24) & 0x1F) -#define C_0286C8_ES_GROUPING 0xE0FFFFFF -#define R_0286D8_SPI_INPUT_Z 0x0286D8 -#define S_0286D8_PROVIDE_Z_TO_SPI(x) (((x) & 0x1) << 0) -#define G_0286D8_PROVIDE_Z_TO_SPI(x) (((x) >> 0) & 0x1) -#define C_0286D8_PROVIDE_Z_TO_SPI 0xFFFFFFFE -#define R_0286DC_SPI_FOG_CNTL 0x0286DC -#define S_0286DC_PASS_FOG_THROUGH_PS(x) (((x) & 0x1) << 0) -#define G_0286DC_PASS_FOG_THROUGH_PS(x) (((x) >> 0) & 0x1) -#define C_0286DC_PASS_FOG_THROUGH_PS 0xFFFFFFFE -#define S_0286DC_PIXEL_FOG_FUNC(x) (((x) & 0x3) << 1) -#define G_0286DC_PIXEL_FOG_FUNC(x) (((x) >> 1) & 0x3) -#define C_0286DC_PIXEL_FOG_FUNC 0xFFFFFFF9 -#define S_0286DC_PIXEL_FOG_SRC_SEL(x) (((x) & 0x1) << 3) -#define G_0286DC_PIXEL_FOG_SRC_SEL(x) (((x) >> 3) & 0x1) -#define C_0286DC_PIXEL_FOG_SRC_SEL 0xFFFFFFF7 -#define S_0286DC_VS_FOG_CLAMP_DISABLE(x) (((x) & 0x1) << 4) -#define G_0286DC_VS_FOG_CLAMP_DISABLE(x) (((x) >> 4) & 0x1) -#define C_0286DC_VS_FOG_CLAMP_DISABLE 0xFFFFFFEF -#define R_0286E0_SPI_FOG_FUNC_SCALE 0x0286E0 -#define S_0286E0_VALUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_0286E0_VALUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_0286E0_VALUE 0x00000000 -#define R_0286E4_SPI_FOG_FUNC_BIAS 0x0286E4 -#define S_0286E4_VALUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_0286E4_VALUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_0286E4_VALUE 0x00000000 -#define R_0287A0_CB_SHADER_CONTROL 0x0287A0 -#define S_0287A0_RT0_ENABLE(x) (((x) & 0x1) << 0) -#define G_0287A0_RT0_ENABLE(x) (((x) >> 0) & 0x1) -#define C_0287A0_RT0_ENABLE 0xFFFFFFFE -#define S_0287A0_RT1_ENABLE(x) (((x) & 0x1) << 1) -#define G_0287A0_RT1_ENABLE(x) (((x) >> 1) & 0x1) -#define C_0287A0_RT1_ENABLE 0xFFFFFFFD -#define S_0287A0_RT2_ENABLE(x) (((x) & 0x1) << 2) -#define G_0287A0_RT2_ENABLE(x) (((x) >> 2) & 0x1) -#define C_0287A0_RT2_ENABLE 0xFFFFFFFB -#define S_0287A0_RT3_ENABLE(x) (((x) & 0x1) << 3) -#define G_0287A0_RT3_ENABLE(x) (((x) >> 3) & 0x1) -#define C_0287A0_RT3_ENABLE 0xFFFFFFF7 -#define S_0287A0_RT4_ENABLE(x) (((x) & 0x1) << 4) -#define G_0287A0_RT4_ENABLE(x) (((x) >> 4) & 0x1) -#define C_0287A0_RT4_ENABLE 0xFFFFFFEF -#define S_0287A0_RT5_ENABLE(x) (((x) & 0x1) << 5) -#define G_0287A0_RT5_ENABLE(x) (((x) >> 5) & 0x1) -#define C_0287A0_RT5_ENABLE 0xFFFFFFDF -#define S_0287A0_RT6_ENABLE(x) (((x) & 0x1) << 6) -#define G_0287A0_RT6_ENABLE(x) (((x) >> 6) & 0x1) -#define C_0287A0_RT6_ENABLE 0xFFFFFFBF -#define S_0287A0_RT7_ENABLE(x) (((x) & 0x1) << 7) -#define G_0287A0_RT7_ENABLE(x) (((x) >> 7) & 0x1) -#define C_0287A0_RT7_ENABLE 0xFFFFFF7F -#define R_028894_SQ_PGM_START_FS 0x028894 -#define S_028894_PGM_START(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028894_PGM_START(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028894_PGM_START 0x00000000 -#define R_0288A4_SQ_PGM_RESOURCES_FS 0x0288A4 -#define S_0288A4_NUM_GPRS(x) (((x) & 0xFF) << 0) -#define G_0288A4_NUM_GPRS(x) (((x) >> 0) & 0xFF) -#define C_0288A4_NUM_GPRS 0xFFFFFF00 -#define S_0288A4_STACK_SIZE(x) (((x) & 0xFF) << 8) -#define G_0288A4_STACK_SIZE(x) (((x) >> 8) & 0xFF) -#define C_0288A4_STACK_SIZE 0xFFFF00FF -#define S_0288A4_DX10_CLAMP(x) (((x) & 0x1) << 21) -#define G_0288A4_DX10_CLAMP(x) (((x) >> 21) & 0x1) -#define C_0288A4_DX10_CLAMP 0xFFDFFFFF -#define R_0288A8_SQ_ESGS_RING_ITEMSIZE 0x0288A8 -#define S_0288A8_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288A8_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288A8_ITEMSIZE 0xFFFF8000 -#define R_0288AC_SQ_GSVS_RING_ITEMSIZE 0x0288AC -#define S_0288AC_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288AC_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288AC_ITEMSIZE 0xFFFF8000 -#define R_0288B0_SQ_ESTMP_RING_ITEMSIZE 0x0288B0 -#define S_0288B0_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288B0_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288B0_ITEMSIZE 0xFFFF8000 -#define R_0288B4_SQ_GSTMP_RING_ITEMSIZE 0x0288B4 -#define S_0288B4_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288B4_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288B4_ITEMSIZE 0xFFFF8000 -#define R_0288B8_SQ_VSTMP_RING_ITEMSIZE 0x0288B8 -#define S_0288B8_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288B8_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288B8_ITEMSIZE 0xFFFF8000 -#define R_0288BC_SQ_PSTMP_RING_ITEMSIZE 0x0288BC -#define S_0288BC_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288BC_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288BC_ITEMSIZE 0xFFFF8000 -#define R_0288C0_SQ_FBUF_RING_ITEMSIZE 0x0288C0 -#define S_0288C0_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288C0_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288C0_ITEMSIZE 0xFFFF8000 -#define R_0288C4_SQ_REDUC_RING_ITEMSIZE 0x0288C4 -#define S_0288C4_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288C4_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288C4_ITEMSIZE 0xFFFF8000 -#define R_0288C8_SQ_GS_VERT_ITEMSIZE 0x0288C8 -#define S_0288C8_ITEMSIZE(x) (((x) & 0x7FFF) << 0) -#define G_0288C8_ITEMSIZE(x) (((x) >> 0) & 0x7FFF) -#define C_0288C8_ITEMSIZE 0xFFFF8000 -#define R_0288DC_SQ_PGM_CF_OFFSET_FS 0x0288DC -#define S_0288DC_PGM_CF_OFFSET(x) (((x) & 0xFFFFF) << 0) -#define G_0288DC_PGM_CF_OFFSET(x) (((x) >> 0) & 0xFFFFF) -#define C_0288DC_PGM_CF_OFFSET 0xFFF00000 -#define R_028A10_VGT_OUTPUT_PATH_CNTL 0x028A10 -#define S_028A10_PATH_SELECT(x) (((x) & 0x3) << 0) -#define G_028A10_PATH_SELECT(x) (((x) >> 0) & 0x3) -#define C_028A10_PATH_SELECT 0xFFFFFFFC -#define R_028A14_VGT_HOS_CNTL 0x028A14 -#define S_028A14_TESS_MODE(x) (((x) & 0x3) << 0) -#define G_028A14_TESS_MODE(x) (((x) >> 0) & 0x3) -#define C_028A14_TESS_MODE 0xFFFFFFFC -#define R_028A18_VGT_HOS_MAX_TESS_LEVEL 0x028A18 -#define S_028A18_MAX_TESS(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028A18_MAX_TESS(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028A18_MAX_TESS 0x00000000 -#define R_028A1C_VGT_HOS_MIN_TESS_LEVEL 0x028A1C -#define S_028A1C_MIN_TESS(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028A1C_MIN_TESS(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028A1C_MIN_TESS 0x00000000 -#define R_028A20_VGT_HOS_REUSE_DEPTH 0x028A20 -#define S_028A20_REUSE_DEPTH(x) (((x) & 0xFF) << 0) -#define G_028A20_REUSE_DEPTH(x) (((x) >> 0) & 0xFF) -#define C_028A20_REUSE_DEPTH 0xFFFFFF00 -#define R_028A24_VGT_GROUP_PRIM_TYPE 0x028A24 -#define S_028A24_PRIM_TYPE(x) (((x) & 0x1F) << 0) -#define G_028A24_PRIM_TYPE(x) (((x) >> 0) & 0x1F) -#define C_028A24_PRIM_TYPE 0xFFFFFFE0 -#define S_028A24_RETAIN_ORDER(x) (((x) & 0x1) << 14) -#define G_028A24_RETAIN_ORDER(x) (((x) >> 14) & 0x1) -#define C_028A24_RETAIN_ORDER 0xFFFFBFFF -#define S_028A24_RETAIN_QUADS(x) (((x) & 0x1) << 15) -#define G_028A24_RETAIN_QUADS(x) (((x) >> 15) & 0x1) -#define C_028A24_RETAIN_QUADS 0xFFFF7FFF -#define S_028A24_PRIM_ORDER(x) (((x) & 0x7) << 16) -#define G_028A24_PRIM_ORDER(x) (((x) >> 16) & 0x7) -#define C_028A24_PRIM_ORDER 0xFFF8FFFF -#define R_028A28_VGT_GROUP_FIRST_DECR 0x028A28 -#define S_028A28_FIRST_DECR(x) (((x) & 0xF) << 0) -#define G_028A28_FIRST_DECR(x) (((x) >> 0) & 0xF) -#define C_028A28_FIRST_DECR 0xFFFFFFF0 -#define R_028A2C_VGT_GROUP_DECR 0x028A2C -#define S_028A2C_DECR(x) (((x) & 0xF) << 0) -#define G_028A2C_DECR(x) (((x) >> 0) & 0xF) -#define C_028A2C_DECR 0xFFFFFFF0 -#define R_028A30_VGT_GROUP_VECT_0_CNTL 0x028A30 -#define S_028A30_COMP_X_EN(x) (((x) & 0x1) << 0) -#define G_028A30_COMP_X_EN(x) (((x) >> 0) & 0x1) -#define C_028A30_COMP_X_EN 0xFFFFFFFE -#define S_028A30_COMP_Y_EN(x) (((x) & 0x1) << 1) -#define G_028A30_COMP_Y_EN(x) (((x) >> 1) & 0x1) -#define C_028A30_COMP_Y_EN 0xFFFFFFFD -#define S_028A30_COMP_Z_EN(x) (((x) & 0x1) << 2) -#define G_028A30_COMP_Z_EN(x) (((x) >> 2) & 0x1) -#define C_028A30_COMP_Z_EN 0xFFFFFFFB -#define S_028A30_COMP_W_EN(x) (((x) & 0x1) << 3) -#define G_028A30_COMP_W_EN(x) (((x) >> 3) & 0x1) -#define C_028A30_COMP_W_EN 0xFFFFFFF7 -#define S_028A30_STRIDE(x) (((x) & 0xFF) << 8) -#define G_028A30_STRIDE(x) (((x) >> 8) & 0xFF) -#define C_028A30_STRIDE 0xFFFF00FF -#define S_028A30_SHIFT(x) (((x) & 0xFF) << 16) -#define G_028A30_SHIFT(x) (((x) >> 16) & 0xFF) -#define C_028A30_SHIFT 0xFF00FFFF -#define R_028A34_VGT_GROUP_VECT_1_CNTL 0x028A34 -#define S_028A34_COMP_X_EN(x) (((x) & 0x1) << 0) -#define G_028A34_COMP_X_EN(x) (((x) >> 0) & 0x1) -#define C_028A34_COMP_X_EN 0xFFFFFFFE -#define S_028A34_COMP_Y_EN(x) (((x) & 0x1) << 1) -#define G_028A34_COMP_Y_EN(x) (((x) >> 1) & 0x1) -#define C_028A34_COMP_Y_EN 0xFFFFFFFD -#define S_028A34_COMP_Z_EN(x) (((x) & 0x1) << 2) -#define G_028A34_COMP_Z_EN(x) (((x) >> 2) & 0x1) -#define C_028A34_COMP_Z_EN 0xFFFFFFFB -#define S_028A34_COMP_W_EN(x) (((x) & 0x1) << 3) -#define G_028A34_COMP_W_EN(x) (((x) >> 3) & 0x1) -#define C_028A34_COMP_W_EN 0xFFFFFFF7 -#define S_028A34_STRIDE(x) (((x) & 0xFF) << 8) -#define G_028A34_STRIDE(x) (((x) >> 8) & 0xFF) -#define C_028A34_STRIDE 0xFFFF00FF -#define S_028A34_SHIFT(x) (((x) & 0xFF) << 16) -#define G_028A34_SHIFT(x) (((x) >> 16) & 0xFF) -#define C_028A34_SHIFT 0xFF00FFFF -#define R_028A38_VGT_GROUP_VECT_0_FMT_CNTL 0x028A38 -#define S_028A38_X_CONV(x) (((x) & 0xF) << 0) -#define G_028A38_X_CONV(x) (((x) >> 0) & 0xF) -#define C_028A38_X_CONV 0xFFFFFFF0 -#define S_028A38_X_OFFSET(x) (((x) & 0xF) << 4) -#define G_028A38_X_OFFSET(x) (((x) >> 4) & 0xF) -#define C_028A38_X_OFFSET 0xFFFFFF0F -#define S_028A38_Y_CONV(x) (((x) & 0xF) << 8) -#define G_028A38_Y_CONV(x) (((x) >> 8) & 0xF) -#define C_028A38_Y_CONV 0xFFFFF0FF -#define S_028A38_Y_OFFSET(x) (((x) & 0xF) << 12) -#define G_028A38_Y_OFFSET(x) (((x) >> 12) & 0xF) -#define C_028A38_Y_OFFSET 0xFFFF0FFF -#define S_028A38_Z_CONV(x) (((x) & 0xF) << 16) -#define G_028A38_Z_CONV(x) (((x) >> 16) & 0xF) -#define C_028A38_Z_CONV 0xFFF0FFFF -#define S_028A38_Z_OFFSET(x) (((x) & 0xF) << 20) -#define G_028A38_Z_OFFSET(x) (((x) >> 20) & 0xF) -#define C_028A38_Z_OFFSET 0xFF0FFFFF -#define S_028A38_W_CONV(x) (((x) & 0xF) << 24) -#define G_028A38_W_CONV(x) (((x) >> 24) & 0xF) -#define C_028A38_W_CONV 0xF0FFFFFF -#define S_028A38_W_OFFSET(x) (((x) & 0xF) << 28) -#define G_028A38_W_OFFSET(x) (((x) >> 28) & 0xF) -#define C_028A38_W_OFFSET 0x0FFFFFFF -#define R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL 0x028A3C -#define S_028A3C_X_CONV(x) (((x) & 0xF) << 0) -#define G_028A3C_X_CONV(x) (((x) >> 0) & 0xF) -#define C_028A3C_X_CONV 0xFFFFFFF0 -#define S_028A3C_X_OFFSET(x) (((x) & 0xF) << 4) -#define G_028A3C_X_OFFSET(x) (((x) >> 4) & 0xF) -#define C_028A3C_X_OFFSET 0xFFFFFF0F -#define S_028A3C_Y_CONV(x) (((x) & 0xF) << 8) -#define G_028A3C_Y_CONV(x) (((x) >> 8) & 0xF) -#define C_028A3C_Y_CONV 0xFFFFF0FF -#define S_028A3C_Y_OFFSET(x) (((x) & 0xF) << 12) -#define G_028A3C_Y_OFFSET(x) (((x) >> 12) & 0xF) -#define C_028A3C_Y_OFFSET 0xFFFF0FFF -#define S_028A3C_Z_CONV(x) (((x) & 0xF) << 16) -#define G_028A3C_Z_CONV(x) (((x) >> 16) & 0xF) -#define C_028A3C_Z_CONV 0xFFF0FFFF -#define S_028A3C_Z_OFFSET(x) (((x) & 0xF) << 20) -#define G_028A3C_Z_OFFSET(x) (((x) >> 20) & 0xF) -#define C_028A3C_Z_OFFSET 0xFF0FFFFF -#define S_028A3C_W_CONV(x) (((x) & 0xF) << 24) -#define G_028A3C_W_CONV(x) (((x) >> 24) & 0xF) -#define C_028A3C_W_CONV 0xF0FFFFFF -#define S_028A3C_W_OFFSET(x) (((x) & 0xF) << 28) -#define G_028A3C_W_OFFSET(x) (((x) >> 28) & 0xF) -#define C_028A3C_W_OFFSET 0x0FFFFFFF -#define R_028A40_VGT_GS_MODE 0x028A40 -#define S_028A40_MODE(x) (((x) & 0x3) << 0) -#define G_028A40_MODE(x) (((x) >> 0) & 0x3) -#define C_028A40_MODE 0xFFFFFFFC -#define S_028A40_ES_PASSTHRU(x) (((x) & 0x1) << 2) -#define G_028A40_ES_PASSTHRU(x) (((x) >> 2) & 0x1) -#define C_028A40_ES_PASSTHRU 0xFFFFFFFB -#define S_028A40_CUT_MODE(x) (((x) & 0x3) << 3) -#define G_028A40_CUT_MODE(x) (((x) >> 3) & 0x3) -#define C_028A40_CUT_MODE 0xFFFFFFE7 -#define R_028A4C_PA_SC_MODE_CNTL 0x028A4C -#define S_028A4C_MSAA_ENABLE(x) (((x) & 0x1) << 0) -#define G_028A4C_MSAA_ENABLE(x) (((x) >> 0) & 0x1) -#define C_028A4C_MSAA_ENABLE 0xFFFFFFFE -#define S_028A4C_CLIPRECT_ENABLE(x) (((x) & 0x1) << 1) -#define G_028A4C_CLIPRECT_ENABLE(x) (((x) >> 1) & 0x1) -#define C_028A4C_CLIPRECT_ENABLE 0xFFFFFFFD -#define S_028A4C_LINE_STIPPLE_ENABLE(x) (((x) & 0x1) << 2) -#define G_028A4C_LINE_STIPPLE_ENABLE(x) (((x) >> 2) & 0x1) -#define C_028A4C_LINE_STIPPLE_ENABLE 0xFFFFFFFB -#define S_028A4C_MULTI_CHIP_PRIM_DISCARD_ENAB(x) (((x) & 0x1) << 3) -#define G_028A4C_MULTI_CHIP_PRIM_DISCARD_ENAB(x) (((x) >> 3) & 0x1) -#define C_028A4C_MULTI_CHIP_PRIM_DISCARD_ENAB 0xFFFFFFF7 -#define S_028A4C_WALK_ORDER_ENABLE(x) (((x) & 0x1) << 4) -#define G_028A4C_WALK_ORDER_ENABLE(x) (((x) >> 4) & 0x1) -#define C_028A4C_WALK_ORDER_ENABLE 0xFFFFFFEF -#define S_028A4C_HALVE_DETAIL_SAMPLE_PERF(x) (((x) & 0x1) << 5) -#define G_028A4C_HALVE_DETAIL_SAMPLE_PERF(x) (((x) >> 5) & 0x1) -#define C_028A4C_HALVE_DETAIL_SAMPLE_PERF 0xFFFFFFDF -#define S_028A4C_WALK_SIZE(x) (((x) & 0x1) << 6) -#define G_028A4C_WALK_SIZE(x) (((x) >> 6) & 0x1) -#define C_028A4C_WALK_SIZE 0xFFFFFFBF -#define S_028A4C_WALK_ALIGNMENT(x) (((x) & 0x1) << 7) -#define G_028A4C_WALK_ALIGNMENT(x) (((x) >> 7) & 0x1) -#define C_028A4C_WALK_ALIGNMENT 0xFFFFFF7F -#define S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(x) (((x) & 0x1) << 8) -#define G_028A4C_WALK_ALIGN8_PRIM_FITS_ST(x) (((x) >> 8) & 0x1) -#define C_028A4C_WALK_ALIGN8_PRIM_FITS_ST 0xFFFFFEFF -#define S_028A4C_TILE_COVER_NO_SCISSOR(x) (((x) & 0x1) << 9) -#define G_028A4C_TILE_COVER_NO_SCISSOR(x) (((x) >> 9) & 0x1) -#define C_028A4C_TILE_COVER_NO_SCISSOR 0xFFFFFDFF -#define S_028A4C_KILL_PIX_POST_HI_Z(x) (((x) & 0x1) << 10) -#define G_028A4C_KILL_PIX_POST_HI_Z(x) (((x) >> 10) & 0x1) -#define C_028A4C_KILL_PIX_POST_HI_Z 0xFFFFFBFF -#define S_028A4C_KILL_PIX_POST_DETAIL_MASK(x) (((x) & 0x1) << 11) -#define G_028A4C_KILL_PIX_POST_DETAIL_MASK(x) (((x) >> 11) & 0x1) -#define C_028A4C_KILL_PIX_POST_DETAIL_MASK 0xFFFFF7FF -#define S_028A4C_MULTI_CHIP_SUPERTILE_ENABLE(x) (((x) & 0x1) << 12) -#define G_028A4C_MULTI_CHIP_SUPERTILE_ENABLE(x) (((x) >> 12) & 0x1) -#define C_028A4C_MULTI_CHIP_SUPERTILE_ENABLE 0xFFFFEFFF -#define S_028A4C_TILE_COVER_DISABLE(x) (((x) & 0x1) << 13) -#define G_028A4C_TILE_COVER_DISABLE(x) (((x) >> 13) & 0x1) -#define C_028A4C_TILE_COVER_DISABLE 0xFFFFDFFF -#define S_028A4C_FORCE_EOV_CNTDWN_ENABLE(x) (((x) & 0x1) << 14) -#define G_028A4C_FORCE_EOV_CNTDWN_ENABLE(x) (((x) >> 14) & 0x1) -#define C_028A4C_FORCE_EOV_CNTDWN_ENABLE 0xFFFFBFFF -#define S_028A4C_FORCE_EOV_TILE_ENABLE(x) (((x) & 0x1) << 15) -#define G_028A4C_FORCE_EOV_TILE_ENABLE(x) (((x) >> 15) & 0x1) -#define C_028A4C_FORCE_EOV_TILE_ENABLE 0xFFFF7FFF -#define S_028A4C_FORCE_EOV_REZ_ENABLE(x) (((x) & 0x1) << 16) -#define G_028A4C_FORCE_EOV_REZ_ENABLE(x) (((x) >> 16) & 0x1) -#define C_028A4C_FORCE_EOV_REZ_ENABLE 0xFFFEFFFF -#define S_028A4C_PS_ITER_SAMPLE(x) (((x) & 0x1) << 17) -#define G_028A4C_PS_ITER_SAMPLE(x) (((x) >> 17) & 0x1) -#define C_028A4C_PS_ITER_SAMPLE 0xFFFDFFFF -#define R_028A84_VGT_PRIMITIVEID_EN 0x028A84 -#define S_028A84_PRIMITIVEID_EN(x) (((x) & 0x1) << 0) -#define G_028A84_PRIMITIVEID_EN(x) (((x) >> 0) & 0x1) -#define C_028A84_PRIMITIVEID_EN 0xFFFFFFFE -#define R_028A94_VGT_MULTI_PRIM_IB_RESET_EN 0x028A94 -#define S_028A94_RESET_EN(x) (((x) & 0x1) << 0) -#define G_028A94_RESET_EN(x) (((x) >> 0) & 0x1) -#define C_028A94_RESET_EN 0xFFFFFFFE -#define R_028AA0_VGT_INSTANCE_STEP_RATE_0 0x028AA0 -#define S_028AA0_STEP_RATE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028AA0_STEP_RATE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028AA0_STEP_RATE 0x00000000 -#define R_028AA4_VGT_INSTANCE_STEP_RATE_1 0x028AA4 -#define S_028AA4_STEP_RATE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028AA4_STEP_RATE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028AA4_STEP_RATE 0x00000000 -#define R_028AB0_VGT_STRMOUT_EN 0x028AB0 -#define S_028AB0_STREAMOUT(x) (((x) & 0x1) << 0) -#define G_028AB0_STREAMOUT(x) (((x) >> 0) & 0x1) -#define C_028AB0_STREAMOUT 0xFFFFFFFE -#define R_028AB4_VGT_REUSE_OFF 0x028AB4 -#define S_028AB4_REUSE_OFF(x) (((x) & 0x1) << 0) -#define G_028AB4_REUSE_OFF(x) (((x) >> 0) & 0x1) -#define C_028AB4_REUSE_OFF 0xFFFFFFFE -#define R_028AB8_VGT_VTX_CNT_EN 0x028AB8 -#define S_028AB8_VTX_CNT_EN(x) (((x) & 0x1) << 0) -#define G_028AB8_VTX_CNT_EN(x) (((x) >> 0) & 0x1) -#define C_028AB8_VTX_CNT_EN 0xFFFFFFFE -#define R_028B20_VGT_STRMOUT_BUFFER_EN 0x028B20 -#define S_028B20_BUFFER_0_EN(x) (((x) & 0x1) << 0) -#define G_028B20_BUFFER_0_EN(x) (((x) >> 0) & 0x1) -#define C_028B20_BUFFER_0_EN 0xFFFFFFFE -#define S_028B20_BUFFER_1_EN(x) (((x) & 0x1) << 1) -#define G_028B20_BUFFER_1_EN(x) (((x) >> 1) & 0x1) -#define C_028B20_BUFFER_1_EN 0xFFFFFFFD -#define S_028B20_BUFFER_2_EN(x) (((x) & 0x1) << 2) -#define G_028B20_BUFFER_2_EN(x) (((x) >> 2) & 0x1) -#define C_028B20_BUFFER_2_EN 0xFFFFFFFB -#define S_028B20_BUFFER_3_EN(x) (((x) & 0x1) << 3) -#define G_028B20_BUFFER_3_EN(x) (((x) >> 3) & 0x1) -#define C_028B20_BUFFER_3_EN 0xFFFFFFF7 -#define R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX 0x028C20 -#define S_028C20_S4_X(x) (((x) & 0xF) << 0) -#define G_028C20_S4_X(x) (((x) >> 0) & 0xF) -#define C_028C20_S4_X 0xFFFFFFF0 -#define S_028C20_S4_Y(x) (((x) & 0xF) << 4) -#define G_028C20_S4_Y(x) (((x) >> 4) & 0xF) -#define C_028C20_S4_Y 0xFFFFFF0F -#define S_028C20_S5_X(x) (((x) & 0xF) << 8) -#define G_028C20_S5_X(x) (((x) >> 8) & 0xF) -#define C_028C20_S5_X 0xFFFFF0FF -#define S_028C20_S5_Y(x) (((x) & 0xF) << 12) -#define G_028C20_S5_Y(x) (((x) >> 12) & 0xF) -#define C_028C20_S5_Y 0xFFFF0FFF -#define S_028C20_S6_X(x) (((x) & 0xF) << 16) -#define G_028C20_S6_X(x) (((x) >> 16) & 0xF) -#define C_028C20_S6_X 0xFFF0FFFF -#define S_028C20_S6_Y(x) (((x) & 0xF) << 20) -#define G_028C20_S6_Y(x) (((x) >> 20) & 0xF) -#define C_028C20_S6_Y 0xFF0FFFFF -#define S_028C20_S7_X(x) (((x) & 0xF) << 24) -#define G_028C20_S7_X(x) (((x) >> 24) & 0xF) -#define C_028C20_S7_X 0xF0FFFFFF -#define S_028C20_S7_Y(x) (((x) & 0xF) << 28) -#define G_028C20_S7_Y(x) (((x) >> 28) & 0xF) -#define C_028C20_S7_Y 0x0FFFFFFF -#define R_028C30_CB_CLRCMP_CONTROL 0x028C30 -#define S_028C30_CLRCMP_FCN_SRC(x) (((x) & 0x7) << 0) -#define G_028C30_CLRCMP_FCN_SRC(x) (((x) >> 0) & 0x7) -#define C_028C30_CLRCMP_FCN_SRC 0xFFFFFFF8 -#define S_028C30_CLRCMP_FCN_DST(x) (((x) & 0x7) << 8) -#define G_028C30_CLRCMP_FCN_DST(x) (((x) >> 8) & 0x7) -#define C_028C30_CLRCMP_FCN_DST 0xFFFFF8FF -#define S_028C30_CLRCMP_FCN_SEL(x) (((x) & 0x3) << 24) -#define G_028C30_CLRCMP_FCN_SEL(x) (((x) >> 24) & 0x3) -#define C_028C30_CLRCMP_FCN_SEL 0xFCFFFFFF -#define R_028C34_CB_CLRCMP_SRC 0x028C34 -#define S_028C34_CLRCMP_SRC(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028C34_CLRCMP_SRC(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028C34_CLRCMP_SRC 0x00000000 -#define R_028C38_CB_CLRCMP_DST 0x028C38 -#define S_028C38_CLRCMP_DST(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028C38_CLRCMP_DST(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028C38_CLRCMP_DST 0x00000000 -#define R_028C3C_CB_CLRCMP_MSK 0x028C3C -#define S_028C3C_CLRCMP_MSK(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028C3C_CLRCMP_MSK(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028C3C_CLRCMP_MSK 0x00000000 -#define R_0085F0_CP_COHER_CNTL 0x0085F0 -#define S_0085F0_DEST_BASE_0_ENA(x) (((x) & 0x1) << 0) -#define G_0085F0_DEST_BASE_0_ENA(x) (((x) >> 0) & 0x1) -#define C_0085F0_DEST_BASE_0_ENA 0xFFFFFFFE -#define S_0085F0_DEST_BASE_1_ENA(x) (((x) & 0x1) << 1) -#define G_0085F0_DEST_BASE_1_ENA(x) (((x) >> 1) & 0x1) -#define C_0085F0_DEST_BASE_1_ENA 0xFFFFFFFD -#define S_0085F0_SO0_DEST_BASE_ENA(x) (((x) & 0x1) << 2) -#define G_0085F0_SO0_DEST_BASE_ENA(x) (((x) >> 2) & 0x1) -#define C_0085F0_SO0_DEST_BASE_ENA 0xFFFFFFFB -#define S_0085F0_SO1_DEST_BASE_ENA(x) (((x) & 0x1) << 3) -#define G_0085F0_SO1_DEST_BASE_ENA(x) (((x) >> 3) & 0x1) -#define C_0085F0_SO1_DEST_BASE_ENA 0xFFFFFFF7 -#define S_0085F0_SO2_DEST_BASE_ENA(x) (((x) & 0x1) << 4) -#define G_0085F0_SO2_DEST_BASE_ENA(x) (((x) >> 4) & 0x1) -#define C_0085F0_SO2_DEST_BASE_ENA 0xFFFFFFEF -#define S_0085F0_SO3_DEST_BASE_ENA(x) (((x) & 0x1) << 5) -#define G_0085F0_SO3_DEST_BASE_ENA(x) (((x) >> 5) & 0x1) -#define C_0085F0_SO3_DEST_BASE_ENA 0xFFFFFFDF -#define S_0085F0_CB0_DEST_BASE_ENA(x) (((x) & 0x1) << 6) -#define G_0085F0_CB0_DEST_BASE_ENA(x) (((x) >> 6) & 0x1) -#define C_0085F0_CB0_DEST_BASE_ENA 0xFFFFFFBF -#define S_0085F0_CB1_DEST_BASE_ENA(x) (((x) & 0x1) << 7) -#define G_0085F0_CB1_DEST_BASE_ENA(x) (((x) >> 7) & 0x1) -#define C_0085F0_CB1_DEST_BASE_ENA 0xFFFFFF7F -#define S_0085F0_CB2_DEST_BASE_ENA(x) (((x) & 0x1) << 8) -#define G_0085F0_CB2_DEST_BASE_ENA(x) (((x) >> 8) & 0x1) -#define C_0085F0_CB2_DEST_BASE_ENA 0xFFFFFEFF -#define S_0085F0_CB3_DEST_BASE_ENA(x) (((x) & 0x1) << 9) -#define G_0085F0_CB3_DEST_BASE_ENA(x) (((x) >> 9) & 0x1) -#define C_0085F0_CB3_DEST_BASE_ENA 0xFFFFFDFF -#define S_0085F0_CB4_DEST_BASE_ENA(x) (((x) & 0x1) << 10) -#define G_0085F0_CB4_DEST_BASE_ENA(x) (((x) >> 10) & 0x1) -#define C_0085F0_CB4_DEST_BASE_ENA 0xFFFFFBFF -#define S_0085F0_CB5_DEST_BASE_ENA(x) (((x) & 0x1) << 11) -#define G_0085F0_CB5_DEST_BASE_ENA(x) (((x) >> 11) & 0x1) -#define C_0085F0_CB5_DEST_BASE_ENA 0xFFFFF7FF -#define S_0085F0_CB6_DEST_BASE_ENA(x) (((x) & 0x1) << 12) -#define G_0085F0_CB6_DEST_BASE_ENA(x) (((x) >> 12) & 0x1) -#define C_0085F0_CB6_DEST_BASE_ENA 0xFFFFEFFF -#define S_0085F0_CB7_DEST_BASE_ENA(x) (((x) & 0x1) << 13) -#define G_0085F0_CB7_DEST_BASE_ENA(x) (((x) >> 13) & 0x1) -#define C_0085F0_CB7_DEST_BASE_ENA 0xFFFFDFFF -#define S_0085F0_DB_DEST_BASE_ENA(x) (((x) & 0x1) << 14) -#define G_0085F0_DB_DEST_BASE_ENA(x) (((x) >> 14) & 0x1) -#define C_0085F0_DB_DEST_BASE_ENA 0xFFFFBFFF -#define S_0085F0_CR_DEST_BASE_ENA(x) (((x) & 0x1) << 15) -#define G_0085F0_CR_DEST_BASE_ENA(x) (((x) >> 15) & 0x1) -#define C_0085F0_CR_DEST_BASE_ENA 0xFFFF7FFF -#define S_0085F0_TC_ACTION_ENA(x) (((x) & 0x1) << 23) -#define G_0085F0_TC_ACTION_ENA(x) (((x) >> 23) & 0x1) -#define C_0085F0_TC_ACTION_ENA 0xFF7FFFFF -#define S_0085F0_VC_ACTION_ENA(x) (((x) & 0x1) << 24) -#define G_0085F0_VC_ACTION_ENA(x) (((x) >> 24) & 0x1) -#define C_0085F0_VC_ACTION_ENA 0xFEFFFFFF -#define S_0085F0_CB_ACTION_ENA(x) (((x) & 0x1) << 25) -#define G_0085F0_CB_ACTION_ENA(x) (((x) >> 25) & 0x1) -#define C_0085F0_CB_ACTION_ENA 0xFDFFFFFF -#define S_0085F0_DB_ACTION_ENA(x) (((x) & 0x1) << 26) -#define G_0085F0_DB_ACTION_ENA(x) (((x) >> 26) & 0x1) -#define C_0085F0_DB_ACTION_ENA 0xFBFFFFFF -#define S_0085F0_SH_ACTION_ENA(x) (((x) & 0x1) << 27) -#define G_0085F0_SH_ACTION_ENA(x) (((x) >> 27) & 0x1) -#define C_0085F0_SH_ACTION_ENA 0xF7FFFFFF -#define S_0085F0_SMX_ACTION_ENA(x) (((x) & 0x1) << 28) -#define G_0085F0_SMX_ACTION_ENA(x) (((x) >> 28) & 0x1) -#define C_0085F0_SMX_ACTION_ENA 0xEFFFFFFF -#define S_0085F0_CR0_ACTION_ENA(x) (((x) & 0x1) << 29) -#define G_0085F0_CR0_ACTION_ENA(x) (((x) >> 29) & 0x1) -#define C_0085F0_CR0_ACTION_ENA 0xDFFFFFFF -#define S_0085F0_CR1_ACTION_ENA(x) (((x) & 0x1) << 30) -#define G_0085F0_CR1_ACTION_ENA(x) (((x) >> 30) & 0x1) -#define C_0085F0_CR1_ACTION_ENA 0xBFFFFFFF -#define S_0085F0_CR2_ACTION_ENA(x) (((x) & 0x1) << 31) -#define G_0085F0_CR2_ACTION_ENA(x) (((x) >> 31) & 0x1) -#define C_0085F0_CR2_ACTION_ENA 0x7FFFFFFF - - -#define R_02812C_CB_CLEAR_ALPHA 0x02812C -#define S_02812C_CLEAR_ALPHA(x) (((x) & 0xFFFFFFFF) << 0) -#define G_02812C_CLEAR_ALPHA(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_02812C_CLEAR_ALPHA 0x00000000 -#define R_028128_CB_CLEAR_BLUE 0x028128 -#define S_028128_CLEAR_BLUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028128_CLEAR_BLUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028128_CLEAR_BLUE 0x00000000 -#define R_028124_CB_CLEAR_GREEN 0x028124 -#define S_028124_CLEAR_GREEN(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028124_CLEAR_GREEN(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028124_CLEAR_GREEN 0x00000000 -#define R_028120_CB_CLEAR_RED 0x028120 -#define S_028120_CLEAR_RED(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028120_CLEAR_RED(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028120_CLEAR_RED 0x00000000 -#define R_02842C_CB_FOG_BLUE 0x02842C -#define S_02842C_FOG_BLUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_02842C_FOG_BLUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_02842C_FOG_BLUE 0x00000000 -#define R_028428_CB_FOG_GREEN 0x028428 -#define S_028428_FOG_GREEN(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028428_FOG_GREEN(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028428_FOG_GREEN 0x00000000 -#define R_028424_CB_FOG_RED 0x028424 -#define S_028424_FOG_RED(x) (((x) & 0xFFFFFFFF) << 0) -#define G_028424_FOG_RED(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_028424_FOG_RED 0x00000000 -#define R_03C000_SQ_TEX_SAMPLER_WORD0_0 0x03C000 -#define S_03C000_CLAMP_X(x) (((x) & 0x7) << 0) -#define G_03C000_CLAMP_X(x) (((x) >> 0) & 0x7) -#define C_03C000_CLAMP_X 0xFFFFFFF8 -#define S_03C000_CLAMP_Y(x) (((x) & 0x7) << 3) -#define G_03C000_CLAMP_Y(x) (((x) >> 3) & 0x7) -#define C_03C000_CLAMP_Y 0xFFFFFFC7 -#define S_03C000_CLAMP_Z(x) (((x) & 0x7) << 6) -#define G_03C000_CLAMP_Z(x) (((x) >> 6) & 0x7) -#define C_03C000_CLAMP_Z 0xFFFFFE3F -#define S_03C000_XY_MAG_FILTER(x) (((x) & 0x7) << 9) -#define G_03C000_XY_MAG_FILTER(x) (((x) >> 9) & 0x7) -#define C_03C000_XY_MAG_FILTER 0xFFFFF1FF -#define S_03C000_XY_MIN_FILTER(x) (((x) & 0x7) << 12) -#define G_03C000_XY_MIN_FILTER(x) (((x) >> 12) & 0x7) -#define C_03C000_XY_MIN_FILTER 0xFFFF8FFF -#define S_03C000_Z_FILTER(x) (((x) & 0x3) << 15) -#define G_03C000_Z_FILTER(x) (((x) >> 15) & 0x3) -#define C_03C000_Z_FILTER 0xFFFE7FFF -#define S_03C000_MIP_FILTER(x) (((x) & 0x3) << 17) -#define G_03C000_MIP_FILTER(x) (((x) >> 17) & 0x3) -#define C_03C000_MIP_FILTER 0xFFF9FFFF -#define S_03C000_BORDER_COLOR_TYPE(x) (((x) & 0x3) << 22) -#define G_03C000_BORDER_COLOR_TYPE(x) (((x) >> 22) & 0x3) -#define C_03C000_BORDER_COLOR_TYPE 0xFF3FFFFF -#define S_03C000_POINT_SAMPLING_CLAMP(x) (((x) & 0x1) << 24) -#define G_03C000_POINT_SAMPLING_CLAMP(x) (((x) >> 24) & 0x1) -#define C_03C000_POINT_SAMPLING_CLAMP 0xFEFFFFFF -#define S_03C000_TEX_ARRAY_OVERRIDE(x) (((x) & 0x1) << 25) -#define G_03C000_TEX_ARRAY_OVERRIDE(x) (((x) >> 25) & 0x1) -#define C_03C000_TEX_ARRAY_OVERRIDE 0xFDFFFFFF -#define S_03C000_DEPTH_COMPARE_FUNCTION(x) (((x) & 0x7) << 26) -#define G_03C000_DEPTH_COMPARE_FUNCTION(x) (((x) >> 26) & 0x7) -#define C_03C000_DEPTH_COMPARE_FUNCTION 0xE3FFFFFF -#define S_03C000_CHROMA_KEY(x) (((x) & 0x3) << 29) -#define G_03C000_CHROMA_KEY(x) (((x) >> 29) & 0x3) -#define C_03C000_CHROMA_KEY 0x9FFFFFFF -#define S_03C000_LOD_USES_MINOR_AXIS(x) (((x) & 0x1) << 31) -#define G_03C000_LOD_USES_MINOR_AXIS(x) (((x) >> 31) & 0x1) -#define C_03C000_LOD_USES_MINOR_AXIS 0x7FFFFFFF -#define R_03C004_SQ_TEX_SAMPLER_WORD1_0 0x03C004 -#define S_03C004_MIN_LOD(x) (((x) & 0x3FF) << 0) -#define G_03C004_MIN_LOD(x) (((x) >> 0) & 0x3FF) -#define C_03C004_MIN_LOD 0xFFFFFC00 -#define S_03C004_MAX_LOD(x) (((x) & 0x3FF) << 10) -#define G_03C004_MAX_LOD(x) (((x) >> 10) & 0x3FF) -#define C_03C004_MAX_LOD 0xFFF003FF -#define S_03C004_LOD_BIAS(x) (((x) & 0xFFF) << 20) -#define G_03C004_LOD_BIAS(x) (((x) >> 20) & 0xFFF) -#define C_03C004_LOD_BIAS 0x000FFFFF -#define R_03C008_SQ_TEX_SAMPLER_WORD2_0 0x03C008 -#define S_03C008_LOD_BIAS_SEC(x) (((x) & 0xFFF) << 0) -#define G_03C008_LOD_BIAS_SEC(x) (((x) >> 0) & 0xFFF) -#define C_03C008_LOD_BIAS_SEC 0xFFFFF000 -#define S_03C008_MC_COORD_TRUNCATE(x) (((x) & 0x1) << 12) -#define G_03C008_MC_COORD_TRUNCATE(x) (((x) >> 12) & 0x1) -#define C_03C008_MC_COORD_TRUNCATE 0xFFFFEFFF -#define S_03C008_FORCE_DEGAMMA(x) (((x) & 0x1) << 13) -#define G_03C008_FORCE_DEGAMMA(x) (((x) >> 13) & 0x1) -#define C_03C008_FORCE_DEGAMMA 0xFFFFDFFF -#define S_03C008_HIGH_PRECISION_FILTER(x) (((x) & 0x1) << 14) -#define G_03C008_HIGH_PRECISION_FILTER(x) (((x) >> 14) & 0x1) -#define C_03C008_HIGH_PRECISION_FILTER 0xFFFFBFFF -#define S_03C008_PERF_MIP(x) (((x) & 0x7) << 15) -#define G_03C008_PERF_MIP(x) (((x) >> 15) & 0x7) -#define C_03C008_PERF_MIP 0xFFFC7FFF -#define S_03C008_PERF_Z(x) (((x) & 0x3) << 18) -#define G_03C008_PERF_Z(x) (((x) >> 18) & 0x3) -#define C_03C008_PERF_Z 0xFFF3FFFF -#define S_03C008_FETCH_4(x) (((x) & 0x1) << 26) -#define G_03C008_FETCH_4(x) (((x) >> 26) & 0x1) -#define C_03C008_FETCH_4 0xFBFFFFFF -#define S_03C008_SAMPLE_IS_PCF(x) (((x) & 0x1) << 27) -#define G_03C008_SAMPLE_IS_PCF(x) (((x) >> 27) & 0x1) -#define C_03C008_SAMPLE_IS_PCF 0xF7FFFFFF -#define S_03C008_TYPE(x) (((x) & 0x1) << 31) -#define G_03C008_TYPE(x) (((x) >> 31) & 0x1) -#define C_03C008_TYPE 0x7FFFFFFF -#define R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA 0x00A40C -#define S_00A40C_BORDER_ALPHA(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A40C_BORDER_ALPHA(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A40C_BORDER_ALPHA 0x00000000 -#define R_00A408_TD_PS_SAMPLER0_BORDER_BLUE 0x00A408 -#define S_00A408_BORDER_BLUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A408_BORDER_BLUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A408_BORDER_BLUE 0x00000000 -#define R_00A404_TD_PS_SAMPLER0_BORDER_GREEN 0x00A404 -#define S_00A404_BORDER_GREEN(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A404_BORDER_GREEN(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A404_BORDER_GREEN 0x00000000 -#define R_00A400_TD_PS_SAMPLER0_BORDER_RED 0x00A400 -#define S_00A400_BORDER_RED(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A400_BORDER_RED(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A400_BORDER_RED 0x00000000 -#define R_00A60C_TD_VS_SAMPLER0_BORDER_ALPHA 0x00A60C -#define S_00A60C_BORDER_ALPHA(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A60C_BORDER_ALPHA(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A60C_BORDER_ALPHA 0x00000000 -#define R_00A608_TD_VS_SAMPLER0_BORDER_BLUE 0x00A608 -#define S_00A608_BORDER_BLUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A608_BORDER_BLUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A608_BORDER_BLUE 0x00000000 -#define R_00A604_TD_VS_SAMPLER0_BORDER_GREEN 0x00A604 -#define S_00A604_BORDER_GREEN(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A604_BORDER_GREEN(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A604_BORDER_GREEN 0x00000000 -#define R_00A600_TD_VS_SAMPLER0_BORDER_RED 0x00A600 -#define S_00A600_BORDER_RED(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A600_BORDER_RED(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A600_BORDER_RED 0x00000000 -#define R_00A80C_TD_GS_SAMPLER0_BORDER_ALPHA 0x00A80C -#define S_00A80C_BORDER_ALPHA(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A80C_BORDER_ALPHA(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A80C_BORDER_ALPHA 0x00000000 -#define R_00A808_TD_GS_SAMPLER0_BORDER_BLUE 0x00A808 -#define S_00A808_BORDER_BLUE(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A808_BORDER_BLUE(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A808_BORDER_BLUE 0x00000000 -#define R_00A804_TD_GS_SAMPLER0_BORDER_GREEN 0x00A804 -#define S_00A804_BORDER_GREEN(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A804_BORDER_GREEN(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A804_BORDER_GREEN 0x00000000 -#define R_00A800_TD_GS_SAMPLER0_BORDER_RED 0x00A800 -#define S_00A800_BORDER_RED(x) (((x) & 0xFFFFFFFF) << 0) -#define G_00A800_BORDER_RED(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_00A800_BORDER_RED 0x00000000 -#define R_030000_SQ_ALU_CONSTANT0_0 0x030000 -#define S_030000_X(x) (((x) & 0xFFFFFFFF) << 0) -#define G_030000_X(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_030000_X 0x00000000 -#define R_030004_SQ_ALU_CONSTANT1_0 0x030004 -#define S_030004_Y(x) (((x) & 0xFFFFFFFF) << 0) -#define G_030004_Y(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_030004_Y 0x00000000 -#define R_030008_SQ_ALU_CONSTANT2_0 0x030008 -#define S_030008_Z(x) (((x) & 0xFFFFFFFF) << 0) -#define G_030008_Z(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_030008_Z 0x00000000 -#define R_03000C_SQ_ALU_CONSTANT3_0 0x03000C -#define S_03000C_W(x) (((x) & 0xFFFFFFFF) << 0) -#define G_03000C_W(x) (((x) >> 0) & 0xFFFFFFFF) -#define C_03000C_W 0x00000000 -#define R_0287E4_VGT_DMA_BASE_HI 0x0287E4 -#define R_0287E8_VGT_DMA_BASE 0x0287E8 -#define R_028E20_PA_CL_UCP0_X 0x028E20 -#define R_028E24_PA_CL_UCP0_Y 0x028E24 -#define R_028E28_PA_CL_UCP0_Z 0x028E28 -#define R_028E2C_PA_CL_UCP0_W 0x028E2C -#define R_028E30_PA_CL_UCP1_X 0x028E30 -#define R_028E34_PA_CL_UCP1_Y 0x028E34 -#define R_028E38_PA_CL_UCP1_Z 0x028E38 -#define R_028E3C_PA_CL_UCP1_W 0x028E3C -#define R_028E40_PA_CL_UCP2_X 0x028E40 -#define R_028E44_PA_CL_UCP2_Y 0x028E44 -#define R_028E48_PA_CL_UCP2_Z 0x028E48 -#define R_028E4C_PA_CL_UCP2_W 0x028E4C -#define R_028E50_PA_CL_UCP3_X 0x028E50 -#define R_028E54_PA_CL_UCP3_Y 0x028E54 -#define R_028E58_PA_CL_UCP3_Z 0x028E58 -#define R_028E5C_PA_CL_UCP3_W 0x028E5C -#define R_028E60_PA_CL_UCP4_X 0x028E60 -#define R_028E64_PA_CL_UCP4_Y 0x028E64 -#define R_028E68_PA_CL_UCP4_Z 0x028E68 -#define R_028E6C_PA_CL_UCP4_W 0x028E6C -#define R_028E70_PA_CL_UCP5_X 0x028E70 -#define R_028E74_PA_CL_UCP5_Y 0x028E74 -#define R_028E78_PA_CL_UCP5_Z 0x028E78 -#define R_028E7C_PA_CL_UCP5_W 0x028E7C -#define R_038000_RESOURCE0_WORD0 0x038000 -#define R_038004_RESOURCE0_WORD1 0x038004 -#define R_038008_RESOURCE0_WORD2 0x038008 -#define R_03800C_RESOURCE0_WORD3 0x03800C -#define R_038010_RESOURCE0_WORD4 0x038010 -#define R_038014_RESOURCE0_WORD5 0x038014 -#define R_038018_RESOURCE0_WORD6 0x038018 - -#define R_028140_ALU_CONST_BUFFER_SIZE_PS_0 0x00028140 -#define R_028180_ALU_CONST_BUFFER_SIZE_VS_0 0x00028180 -#define R_028940_ALU_CONST_CACHE_PS_0 0x00028940 -#define R_028980_ALU_CONST_CACHE_VS_0 0x00028980 - -#define R_03CFF0_SQ_VTX_BASE_VTX_LOC 0x03CFF0 -#define R_03CFF4_SQ_VTX_START_INST_LOC 0x03CFF4 - -#endif diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c deleted file mode 100644 index 45cf6f09671..00000000000 --- a/src/gallium/winsys/r600/drm/radeon_bo.c +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - */ -#define _FILE_OFFSET_BITS 64 -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/mman.h> -#include <errno.h> -#include "r600_priv.h" -#include "xf86drm.h" -#include "radeon_drm.h" - -int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo) -{ - struct drm_radeon_gem_mmap args; - void *ptr; - int r; - - /* Zero out args to make valgrind happy */ - memset(&args, 0, sizeof(args)); - args.handle = bo->handle; - args.offset = 0; - args.size = (uint64_t)bo->size; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_MMAP, - &args, sizeof(args)); - if (r) { - fprintf(stderr, "error mapping %p 0x%08X (error = %d)\n", - bo, bo->handle, r); - return r; - } - ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, radeon->fd, args.addr_ptr); - if (ptr == MAP_FAILED) { - fprintf(stderr, "%s failed to map bo\n", __func__); - return -errno; - } - bo->data = ptr; - - bo->map_count++; - return 0; -} - -static void radeon_bo_fixed_unmap(struct radeon *radeon, struct radeon_bo *bo) -{ - if (bo->data) { - munmap(bo->data, bo->size); - bo->data = NULL; - } -} - -struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, - unsigned size, unsigned alignment, unsigned initial_domain) -{ - struct radeon_bo *bo; - int r; - - if (handle) { - pipe_mutex_lock(radeon->bo_handles_mutex); - bo = util_hash_table_get(radeon->bo_handles, - (void *)(uintptr_t)handle); - if (bo) { - struct radeon_bo *b = NULL; - radeon_bo_reference(radeon, &b, bo); - goto done; - } - } - bo = calloc(1, sizeof(*bo)); - if (bo == NULL) { - return NULL; - } - bo->size = size; - bo->handle = handle; - pipe_reference_init(&bo->reference, 1); - bo->alignment = alignment; - LIST_INITHEAD(&bo->fencedlist); - - if (handle) { - struct drm_gem_open open_arg; - - memset(&open_arg, 0, sizeof(open_arg)); - open_arg.name = handle; - r = drmIoctl(radeon->fd, DRM_IOCTL_GEM_OPEN, &open_arg); - if (r != 0) { - free(bo); - return NULL; - } - bo->name = handle; - bo->handle = open_arg.handle; - bo->size = open_arg.size; - bo->shared = TRUE; - } else { - struct drm_radeon_gem_create args = {}; - - args.size = size; - args.alignment = alignment; - args.initial_domain = initial_domain; - args.flags = 0; - args.handle = 0; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_CREATE, - &args, sizeof(args)); - bo->handle = args.handle; - if (r) { - fprintf(stderr, "Failed to allocate :\n"); - fprintf(stderr, " size : %d bytes\n", size); - fprintf(stderr, " alignment : %d bytes\n", alignment); - free(bo); - return NULL; - } - } - - if (handle) - util_hash_table_set(radeon->bo_handles, (void *)(uintptr_t)handle, bo); -done: - if (handle) - pipe_mutex_unlock(radeon->bo_handles_mutex); - - return bo; -} - -static void radeon_bo_destroy(struct radeon *radeon, struct radeon_bo *bo) -{ - struct drm_gem_close args; - - if (bo->name) { - pipe_mutex_lock(radeon->bo_handles_mutex); - util_hash_table_remove(radeon->bo_handles, - (void *)(uintptr_t)bo->name); - pipe_mutex_unlock(radeon->bo_handles_mutex); - } - LIST_DEL(&bo->fencedlist); - radeon_bo_fixed_unmap(radeon, bo); - memset(&args, 0, sizeof(args)); - args.handle = bo->handle; - drmIoctl(radeon->fd, DRM_IOCTL_GEM_CLOSE, &args); - memset(bo, 0, sizeof(struct radeon_bo)); - free(bo); -} - -void radeon_bo_reference(struct radeon *radeon, - struct radeon_bo **dst, - struct radeon_bo *src) -{ - struct radeon_bo *old = *dst; - if (pipe_reference(&(*dst)->reference, &src->reference)) { - radeon_bo_destroy(radeon, old); - } - *dst = src; -} - -int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo) -{ - struct drm_radeon_gem_wait_idle args; - int ret; - - if (!bo->shared) { - if (!bo->fence) - return 0; - if (bo->fence <= *radeon->cfence) { - LIST_DELINIT(&bo->fencedlist); - bo->fence = 0; - return 0; - } - } - - /* Zero out args to make valgrind happy */ - memset(&args, 0, sizeof(args)); - args.handle = bo->handle; - do { - ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_WAIT_IDLE, - &args, sizeof(args)); - } while (ret == -EBUSY); - return ret; -} - -int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain) -{ - struct drm_radeon_gem_busy args; - int ret; - - if (!bo->shared) { - if (!bo->fence) - return 0; - if (bo->fence <= *radeon->cfence) { - LIST_DELINIT(&bo->fencedlist); - bo->fence = 0; - return 0; - } - } - - memset(&args, 0, sizeof(args)); - args.handle = bo->handle; - args.domain = 0; - - ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_BUSY, - &args, sizeof(args)); - - *domain = args.domain; - return ret; -} - -int radeon_bo_get_tiling_flags(struct radeon *radeon, - struct radeon_bo *bo, - uint32_t *tiling_flags, - uint32_t *pitch) -{ - struct drm_radeon_gem_get_tiling args = {}; - int ret; - - args.handle = bo->handle; - ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_GET_TILING, - &args, sizeof(args)); - if (ret) - return ret; - - *tiling_flags = args.tiling_flags; - *pitch = args.pitch; - return ret; -} - -int radeon_bo_get_name(struct radeon *radeon, - struct radeon_bo *bo, - uint32_t *name) -{ - struct drm_gem_flink flink; - int ret; - - flink.handle = bo->handle; - ret = drmIoctl(radeon->fd, DRM_IOCTL_GEM_FLINK, &flink); - if (ret) - return ret; - - *name = flink.name; - return ret; -} diff --git a/src/gallium/winsys/r600/drm/radeon_pciid.c b/src/gallium/winsys/r600/drm/radeon_pciid.c index f54a7c8fe72..87572417c80 100644 --- a/src/gallium/winsys/r600/drm/radeon_pciid.c +++ b/src/gallium/winsys/r600/drm/radeon_pciid.c @@ -23,7 +23,6 @@ * Authors: * Jerome Glisse */ -#include <stdlib.h> #include "r600_priv.h" struct pci_id { diff --git a/src/gallium/winsys/radeon/drm/Android.mk b/src/gallium/winsys/radeon/drm/Android.mk new file mode 100644 index 00000000000..c1922498225 --- /dev/null +++ b/src/gallium/winsys/radeon/drm/Android.mk @@ -0,0 +1,40 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2011 Chia-I Wu <[email protected]> +# Copyright (C) 2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +# get C_SOURCES +include $(LOCAL_PATH)/Makefile.sources + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(C_SOURCES) + +LOCAL_C_INCLUDES := \ + $(DRM_TOP) \ + $(DRM_TOP)/include/drm + +LOCAL_MODULE := libmesa_winsys_radeon + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/winsys/radeon/drm/Makefile b/src/gallium/winsys/radeon/drm/Makefile index 913e6ad186a..68b9efebfa4 100644 --- a/src/gallium/winsys/radeon/drm/Makefile +++ b/src/gallium/winsys/radeon/drm/Makefile @@ -4,10 +4,8 @@ include $(TOP)/configs/current LIBNAME = radeonwinsys -C_SOURCES = \ - radeon_drm_bo.c \ - radeon_drm_cs.c \ - radeon_drm_winsys.c +# get C_SOURCES +include Makefile.sources LIBRARY_INCLUDES = -I$(TOP)/include \ $(shell pkg-config libdrm --cflags-only-I) diff --git a/src/gallium/winsys/radeon/drm/Makefile.sources b/src/gallium/winsys/radeon/drm/Makefile.sources new file mode 100644 index 00000000000..1d18d6164d5 --- /dev/null +++ b/src/gallium/winsys/radeon/drm/Makefile.sources @@ -0,0 +1,4 @@ +C_SOURCES := \ + radeon_drm_bo.c \ + radeon_drm_cs.c \ + radeon_drm_winsys.c diff --git a/src/gallium/winsys/radeon/drm/SConscript b/src/gallium/winsys/radeon/drm/SConscript index 2edb1e94645..e5048d6255d 100644 --- a/src/gallium/winsys/radeon/drm/SConscript +++ b/src/gallium/winsys/radeon/drm/SConscript @@ -2,11 +2,7 @@ Import('*') env = env.Clone() -radeon_sources = [ - 'radeon_drm_bo.c', - 'radeon_drm_cs.c', - 'radeon_drm_winsys.c', -] +radeon_sources = env.ParseSourceList('Makefile.sources', 'C_SOURCES') env.PkgUseModules('DRM') diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index b6f12727e81..b45efe5f49c 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -31,11 +31,11 @@ #include "util/u_memory.h" #include "util/u_simple_list.h" #include "os/os_thread.h" +#include "os/os_mman.h" #include "state_tracker/drm_driver.h" #include <sys/ioctl.h> -#include <sys/mman.h> #include <xf86drm.h> #include <errno.h> @@ -43,6 +43,21 @@ #define RADEON_BO_FLAGS_MICRO_TILE 2 #define RADEON_BO_FLAGS_MICRO_TILE_SQUARE 0x20 +#ifndef DRM_RADEON_GEM_WAIT +#define DRM_RADEON_GEM_WAIT 0x2b + +#define RADEON_GEM_NO_WAIT 0x1 +#define RADEON_GEM_USAGE_READ 0x2 +#define RADEON_GEM_USAGE_WRITE 0x4 + +struct drm_radeon_gem_wait { + uint32_t handle; + uint32_t flags; /* one of RADEON_GEM_* */ +}; + +#endif + + extern const struct pb_vtbl radeon_bo_vtbl; @@ -87,39 +102,49 @@ static struct radeon_bo *get_radeon_bo(struct pb_buffer *_buf) return bo; } -static void radeon_bo_wait(struct pb_buffer *_buf) +static void radeon_bo_wait(struct pb_buffer *_buf, enum radeon_bo_usage usage) { - struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); - struct drm_radeon_gem_wait_idle args = {}; + struct radeon_bo *bo = get_radeon_bo(_buf); while (p_atomic_read(&bo->num_active_ioctls)) { sched_yield(); } - args.handle = bo->handle; - while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE, - &args, sizeof(args)) == -EBUSY); - - bo->busy_for_write = FALSE; + if (bo->rws->info.drm_minor >= 12) { + struct drm_radeon_gem_wait args = {}; + args.handle = bo->handle; + args.flags = usage; + while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT, + &args, sizeof(args)) == -EBUSY); + } else { + struct drm_radeon_gem_wait_idle args = {}; + args.handle = bo->handle; + while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE, + &args, sizeof(args)) == -EBUSY); + } } -static boolean radeon_bo_is_busy(struct pb_buffer *_buf) +static boolean radeon_bo_is_busy(struct pb_buffer *_buf, + enum radeon_bo_usage usage) { - struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); - struct drm_radeon_gem_busy args = {}; - boolean busy; + struct radeon_bo *bo = get_radeon_bo(_buf); if (p_atomic_read(&bo->num_active_ioctls)) { return TRUE; } - args.handle = bo->handle; - busy = drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY, - &args, sizeof(args)) != 0; - - if (!busy) - bo->busy_for_write = FALSE; - return busy; + if (bo->rws->info.drm_minor >= 12) { + struct drm_radeon_gem_wait args = {}; + args.handle = bo->handle; + args.flags = usage | RADEON_GEM_NO_WAIT; + return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT, + &args, sizeof(args)) != 0; + } else { + struct drm_radeon_gem_busy args = {}; + args.handle = bo->handle; + return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY, + &args, sizeof(args)) != 0; + } } static void radeon_bo_destroy(struct pb_buffer *_buf) @@ -135,7 +160,7 @@ static void radeon_bo_destroy(struct pb_buffer *_buf) } if (bo->ptr) - munmap(bo->ptr, bo->size); + os_munmap(bo->ptr, bo->size); /* Close object. */ args.handle = bo->handle; @@ -172,13 +197,33 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf, if (!(flags & PB_USAGE_UNSYNCHRONIZED)) { /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */ if (flags & PB_USAGE_DONTBLOCK) { - if (radeon_bo_is_referenced_by_cs(cs, bo)) { - cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC); - return NULL; - } + if (!(flags & PB_USAGE_CPU_WRITE)) { + /* Mapping for read. + * + * Since we are mapping for read, we don't need to wait + * if the GPU is using the buffer for read too + * (neither one is changing it). + * + * Only check whether the buffer is being used for write. */ + if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) { + cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC); + return NULL; + } + + if (radeon_bo_is_busy((struct pb_buffer*)bo, + RADEON_USAGE_WRITE)) { + return NULL; + } + } else { + if (radeon_bo_is_referenced_by_cs(cs, bo)) { + cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC); + return NULL; + } - if (radeon_bo_is_busy((struct pb_buffer*)bo)) { - return NULL; + if (radeon_bo_is_busy((struct pb_buffer*)bo, + RADEON_USAGE_READWRITE)) { + return NULL; + } } } else { if (!(flags & PB_USAGE_CPU_WRITE)) { @@ -191,14 +236,9 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf, * Only check whether the buffer is being used for write. */ if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) { cs->flush_cs(cs->flush_data, 0); - radeon_bo_wait((struct pb_buffer*)bo); - } else if (bo->busy_for_write) { - /* Update the busy_for_write field (done by radeon_bo_is_busy) - * and wait if needed. */ - if (radeon_bo_is_busy((struct pb_buffer*)bo)) { - radeon_bo_wait((struct pb_buffer*)bo); - } } + radeon_bo_wait((struct pb_buffer*)bo, + RADEON_USAGE_WRITE); } else { /* Mapping for write. */ if (radeon_bo_is_referenced_by_cs(cs, bo)) { @@ -209,7 +249,7 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf, radeon_drm_cs_sync_flush(cs); } - radeon_bo_wait((struct pb_buffer*)bo); + radeon_bo_wait((struct pb_buffer*)bo, RADEON_USAGE_READWRITE); } } } @@ -238,7 +278,7 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf, return NULL; } - ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, + ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, bo->rws->fd, args.addr_ptr); if (ptr == MAP_FAILED) { pipe_mutex_unlock(bo->map_mutex); @@ -345,7 +385,7 @@ static boolean radeon_bomgr_is_buffer_busy(struct pb_manager *_mgr, return TRUE; } - if (radeon_bo_is_busy((struct pb_buffer*)bo)) { + if (radeon_bo_is_busy((struct pb_buffer*)bo, RADEON_USAGE_READWRITE)) { return TRUE; } @@ -395,16 +435,14 @@ static void *radeon_bo_map(struct pb_buffer *buf, struct radeon_winsys_cs *cs, enum pipe_transfer_usage usage) { - struct pb_buffer *_buf = pb_buffer(buf); - - return pb_map(_buf, get_pb_usage_from_transfer_flags(usage), cs); + return pb_map(buf, get_pb_usage_from_transfer_flags(usage), cs); } static void radeon_bo_get_tiling(struct pb_buffer *_buf, enum radeon_bo_layout *microtiled, enum radeon_bo_layout *macrotiled) { - struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); + struct radeon_bo *bo = get_radeon_bo(_buf); struct drm_radeon_gem_set_tiling args = {}; args.handle = bo->handle; @@ -429,7 +467,7 @@ static void radeon_bo_set_tiling(struct pb_buffer *_buf, enum radeon_bo_layout macrotiled, uint32_t pitch) { - struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); + struct radeon_bo *bo = get_radeon_bo(_buf); struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct drm_radeon_gem_set_tiling args = {}; @@ -464,12 +502,10 @@ static struct radeon_winsys_cs_handle *radeon_drm_get_cs_handle( struct pb_buffer *_buf) { /* return radeon_bo. */ - return (struct radeon_winsys_cs_handle*) - get_radeon_bo(pb_buffer(_buf)); + return (struct radeon_winsys_cs_handle*)get_radeon_bo(_buf); } -static unsigned get_pb_usage_from_create_flags(unsigned bind, unsigned usage, - enum radeon_bo_domain domain) +static unsigned get_pb_usage_from_create_flags(enum radeon_bo_domain domain) { unsigned res = 0; @@ -487,7 +523,6 @@ radeon_winsys_bo_create(struct radeon_winsys *rws, unsigned size, unsigned alignment, unsigned bind, - unsigned usage, enum radeon_bo_domain domain) { struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); @@ -497,10 +532,11 @@ radeon_winsys_bo_create(struct radeon_winsys *rws, memset(&desc, 0, sizeof(desc)); desc.alignment = alignment; - desc.usage = get_pb_usage_from_create_flags(bind, usage, domain); + desc.usage = get_pb_usage_from_create_flags(domain); /* Assign a buffer manager. */ - if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) + if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER | + PIPE_BIND_CONSTANT_BUFFER)) provider = ws->cman; else provider = ws->kman; @@ -587,7 +623,7 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer, struct winsys_handle *whandle) { struct drm_gem_flink flink = {}; - struct radeon_bo *bo = get_radeon_bo(pb_buffer(buffer)); + struct radeon_bo *bo = get_radeon_bo(buffer); if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { if (!bo->flinked) { diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h index b94881bc4ce..047ea6b1cf2 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h @@ -60,13 +60,6 @@ struct radeon_bo { * thread, is this bo referenced in? */ int num_active_ioctls; - /* Whether the buffer has been relocated for write and is busy since then. - * This field is updated in: - * - radeon_drm_cs_flush (to TRUE if it's relocated for write) - * - radeon_bo_is_busy (to FALSE if it's not busy) - * - radeon_bo_wait (to FALSE) */ - boolean busy_for_write; - boolean flinked; uint32_t flink; }; @@ -80,10 +73,4 @@ void radeon_bo_reference(struct radeon_bo **dst, struct radeon_bo *src) pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src); } -static INLINE struct pb_buffer * -pb_buffer(struct pb_buffer *buffer) -{ - return (struct pb_buffer *)buffer; -} - #endif diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 0139de1973a..c309354785a 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -115,6 +115,7 @@ static void radeon_cs_context_cleanup(struct radeon_cs_context *csc) } csc->crelocs = 0; + csc->validated_crelocs = 0; csc->chunks[0].length_dw = 0; csc->chunks[1].length_dw = 0; csc->used_gart = 0; @@ -218,11 +219,11 @@ int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo) return -1; } -static void radeon_add_reloc(struct radeon_cs_context *csc, - struct radeon_bo *bo, - enum radeon_bo_domain rd, - enum radeon_bo_domain wd, - enum radeon_bo_domain *added_domains) +static unsigned radeon_add_reloc(struct radeon_cs_context *csc, + struct radeon_bo *bo, + enum radeon_bo_domain rd, + enum radeon_bo_domain wd, + enum radeon_bo_domain *added_domains) { struct drm_radeon_cs_reloc *reloc; unsigned i; @@ -232,7 +233,7 @@ static void radeon_add_reloc(struct radeon_cs_context *csc, reloc = csc->relocs_hashlist[hash]; if (reloc->handle == bo->handle) { update_domains(reloc, rd, wd, added_domains); - return; + return csc->reloc_indices_hashlist[hash]; } /* Hash collision, look for the BO in the list of relocs linearly. */ @@ -245,7 +246,7 @@ static void radeon_add_reloc(struct radeon_cs_context *csc, csc->relocs_hashlist[hash] = reloc; csc->reloc_indices_hashlist[hash] = i; /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/ - return; + return i; } } } @@ -279,37 +280,64 @@ static void radeon_add_reloc(struct radeon_cs_context *csc, csc->reloc_indices_hashlist[hash] = csc->crelocs; csc->chunks[1].length_dw += RELOC_DWORDS; - csc->crelocs++; *added_domains = rd | wd; + return csc->crelocs++; } -static void radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs, - struct radeon_winsys_cs_handle *buf, - enum radeon_bo_domain rd, - enum radeon_bo_domain wd) +static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs, + struct radeon_winsys_cs_handle *buf, + enum radeon_bo_domain rd, + enum radeon_bo_domain wd) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_bo *bo = (struct radeon_bo*)buf; enum radeon_bo_domain added_domains; - radeon_add_reloc(cs->csc, bo, rd, wd, &added_domains); - - if (!added_domains) - return; + unsigned index = radeon_add_reloc(cs->csc, bo, rd, wd, &added_domains); if (added_domains & RADEON_DOMAIN_GTT) cs->csc->used_gart += bo->size; if (added_domains & RADEON_DOMAIN_VRAM) cs->csc->used_vram += bo->size; + + return index; } static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + boolean status = + cs->csc->used_gart < cs->ws->info.gart_size * 0.8 && + cs->csc->used_vram < cs->ws->info.vram_size * 0.8; + + if (status) { + cs->csc->validated_crelocs = cs->csc->crelocs; + } else { + /* Remove lately-added relocations. The validation failed with them + * and the CS is about to be flushed because of that. Keep only + * the already-validated relocations. */ + unsigned i; + + for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) { + p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references); + radeon_bo_reference(&cs->csc->relocs_bo[i], NULL); + } + cs->csc->crelocs = cs->csc->validated_crelocs; + + /* Flush if there are any relocs. Clean up otherwise. */ + if (cs->csc->crelocs) { + cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC); + } else { + radeon_cs_context_cleanup(cs->csc); - return cs->csc->used_gart < cs->ws->gart_size * 0.8 && - cs->csc->used_vram < cs->ws->vram_size * 0.8; + assert(cs->base.cdw == 0); + if (cs->base.cdw != 0) { + fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__); + } + } + } + return status; } static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs, @@ -351,6 +379,8 @@ static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param) for (i = 0; i < csc->crelocs; i++) p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls); + + radeon_cs_context_cleanup(csc); return NULL; } @@ -381,11 +411,6 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags) for (i = 0; i < crelocs; i++) { /* Update the number of active asynchronous CS ioctls for the buffer. */ p_atomic_inc(&cs->csc->relocs_bo[i]->num_active_ioctls); - - /* Update whether the buffer is busy for write. */ - if (cs->csc->relocs[i].write_domain) { - cs->csc->relocs_bo[i]->busy_for_write = TRUE; - } } if (cs->ws->num_cpus > 1 && debug_get_option_thread() && @@ -395,6 +420,8 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags) } else { radeon_drm_cs_emit_ioctl(cs->csc); } + } else { + radeon_cs_context_cleanup(cs->csc); } /* Flip command streams. */ @@ -403,8 +430,6 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags) cs->cst = tmp; /* Prepare a new CS. */ - radeon_cs_context_cleanup(cs->csc); - cs->base.buf = cs->csc->buf; cs->base.cdw = 0; } @@ -447,6 +472,6 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) ws->base.cs_validate = radeon_drm_cs_validate; ws->base.cs_write_reloc = radeon_drm_cs_write_reloc; ws->base.cs_flush = radeon_drm_cs_flush; - ws->base.cs_set_flush = radeon_drm_cs_set_flush; + ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush; ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index 339beedc6ab..fe285326884 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -41,6 +41,7 @@ struct radeon_cs_context { /* Relocs. */ unsigned nrelocs; unsigned crelocs; + unsigned validated_crelocs; struct radeon_bo **relocs_bo; struct drm_radeon_cs_reloc *relocs; @@ -88,8 +89,9 @@ static INLINE boolean radeon_bo_is_referenced_by_cs(struct radeon_drm_cs *cs, struct radeon_bo *bo) { - return bo->num_cs_references == bo->rws->num_cs || - (bo->num_cs_references && radeon_get_reloc(cs->csc, bo) != -1); + int num_refs = bo->num_cs_references; + return num_refs == bo->rws->num_cs || + (num_refs && radeon_get_reloc(cs->csc, bo) != -1); } static INLINE boolean @@ -111,7 +113,7 @@ radeon_bo_is_referenced_by_cs_for_write(struct radeon_drm_cs *cs, static INLINE boolean radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo) { - return bo->num_cs_references; + return bo->num_cs_references != 0; } void radeon_drm_cs_sync_flush(struct radeon_drm_cs *cs); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 0474b381ade..e234321d934 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -41,13 +41,34 @@ #include <xf86drm.h> #include <stdio.h> +#ifndef RADEON_INFO_TILING_CONFIG +#define RADEON_INFO_TILING_CONFIG 6 +#endif + #ifndef RADEON_INFO_WANT_HYPERZ #define RADEON_INFO_WANT_HYPERZ 7 #endif + #ifndef RADEON_INFO_WANT_CMASK #define RADEON_INFO_WANT_CMASK 8 #endif +#ifndef RADEON_INFO_CLOCK_CRYSTAL_FREQ +#define RADEON_INFO_CLOCK_CRYSTAL_FREQ 9 +#endif + +#ifndef RADEON_INFO_NUM_BACKENDS +#define RADEON_INFO_NUM_BACKENDS 0xa +#endif + +#ifndef RADEON_INFO_NUM_TILE_PIPES +#define RADEON_INFO_NUM_TILE_PIPES 0xb +#endif + +#ifndef RADEON_INFO_BACKEND_MAP +#define RADEON_INFO_BACKEND_MAP 0xd +#endif + /* Enable/disable feature access for one command stream. * If enable == TRUE, return TRUE on success. * Otherwise, return FALSE. @@ -103,17 +124,31 @@ static boolean radeon_set_fd_access(struct radeon_drm_cs *applier, return FALSE; } +static boolean radeon_get_drm_value(int fd, unsigned request, + const char *errname, uint32_t *out) +{ + struct drm_radeon_info info = {0}; + int retval; + + info.value = (unsigned long)out; + info.request = request; + + retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); + if (retval && errname) { + fprintf(stderr, "radeon: Failed to get %s, error number %d\n", + errname, retval); + return FALSE; + } + return TRUE; +} + /* Helper function to do the ioctls needed for setup and init. */ -static void do_ioctls(struct radeon_drm_winsys *winsys) +static boolean do_winsys_init(struct radeon_drm_winsys *ws) { struct drm_radeon_gem_info gem_info = {0}; - struct drm_radeon_info info = {0}; - int target = 0; int retval; drmVersionPtr version; - info.value = (unsigned long)⌖ - /* We do things in a specific order here. * * DRM version first. We need to be sure we're running on a KMS chipset. @@ -123,71 +158,108 @@ static void do_ioctls(struct radeon_drm_winsys *winsys) * for all Radeons. If this fails, we probably got handed an FD for some * non-Radeon card. * + * The GEM info is actually bogus on the kernel side, as well as our side + * (see radeon_gem_info_ioctl in radeon_gem.c) but that's alright because + * we don't actually use the info for anything yet. + * * The GB and Z pipe requests should always succeed, but they might not * return sensical values for all chipsets, but that's alright because * the pipe drivers already know that. - * - * The GEM info is actually bogus on the kernel side, as well as our side - * (see radeon_gem_info_ioctl in radeon_gem.c) but that's alright because - * we don't actually use the info for anything yet. */ + */ - version = drmGetVersion(winsys->fd); + /* Get DRM version. */ + version = drmGetVersion(ws->fd); if (version->version_major != 2 || version->version_minor < 3) { fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is " - "only compatible with 2.3.x (kernel 2.6.34) and later.\n", + "only compatible with 2.3.x (kernel 2.6.34) or later.\n", __FUNCTION__, version->version_major, version->version_minor, version->version_patchlevel); drmFreeVersion(version); - exit(1); + return FALSE; } - winsys->drm_major = version->version_major; - winsys->drm_minor = version->version_minor; - winsys->drm_patchlevel = version->version_patchlevel; + ws->info.drm_major = version->version_major; + ws->info.drm_minor = version->version_minor; + ws->info.drm_patchlevel = version->version_patchlevel; + drmFreeVersion(version); - info.request = RADEON_INFO_DEVICE_ID; - retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_INFO, &info, sizeof(info)); - if (retval) { - fprintf(stderr, "%s: Failed to get PCI ID, " - "error number %d\n", __FUNCTION__, retval); - exit(1); - } - winsys->pci_id = target; + /* Get PCI ID. */ + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_DEVICE_ID, "PCI ID", + &ws->info.pci_id)) + return FALSE; - info.request = RADEON_INFO_NUM_GB_PIPES; - retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_INFO, &info, sizeof(info)); - if (retval) { - fprintf(stderr, "%s: Failed to get GB pipe count, " - "error number %d\n", __FUNCTION__, retval); - exit(1); - } - winsys->gb_pipes = target; + /* Check PCI ID. */ + switch (ws->info.pci_id) { +#define CHIPSET(pci_id, name, family) case pci_id: +#include "pci_ids/r300_pci_ids.h" +#undef CHIPSET + ws->gen = R300; + break; - info.request = RADEON_INFO_NUM_Z_PIPES; - retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_INFO, &info, sizeof(info)); - if (retval) { - fprintf(stderr, "%s: Failed to get Z pipe count, " - "error number %d\n", __FUNCTION__, retval); - exit(1); +#define CHIPSET(pci_id, name, family) case pci_id: +#include "pci_ids/r600_pci_ids.h" +#undef CHIPSET + ws->gen = R600; + break; + + default: + fprintf(stderr, "radeon: Invalid PCI ID.\n"); + return FALSE; } - winsys->z_pipes = target; - retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_GEM_INFO, + /* Get GEM info. */ + retval = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_INFO, &gem_info, sizeof(gem_info)); if (retval) { - fprintf(stderr, "%s: Failed to get MM info, error number %d\n", - __FUNCTION__, retval); - exit(1); + fprintf(stderr, "radeon: Failed to get MM info, error number %d\n", + retval); + return FALSE; } - winsys->gart_size = gem_info.gart_size; - winsys->vram_size = gem_info.vram_size; + ws->info.gart_size = gem_info.gart_size; + ws->info.vram_size = gem_info.vram_size; - drmFreeVersion(version); + ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN); + + /* Generation-specific queries. */ + if (ws->gen == R300) { + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_GB_PIPES, + "GB pipe count", + &ws->info.r300_num_gb_pipes)) + return FALSE; - winsys->num_cpus = sysconf(_SC_NPROCESSORS_ONLN); + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_Z_PIPES, + "Z pipe count", + &ws->info.r300_num_z_pipes)) + return FALSE; + } + else if (ws->gen == R600) { + if (ws->info.drm_minor >= 9 && + !radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BACKENDS, + "num backends", + &ws->info.r600_num_backends)) + return FALSE; + + /* get the GPU counter frequency, failure is not fatal */ + radeon_get_drm_value(ws->fd, RADEON_INFO_CLOCK_CRYSTAL_FREQ, NULL, + &ws->info.r600_clock_crystal_freq); + + radeon_get_drm_value(ws->fd, RADEON_INFO_TILING_CONFIG, NULL, + &ws->info.r600_tiling_config); + + if (ws->info.drm_minor >= 11) { + radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL, + &ws->info.r600_num_tile_pipes); + + if (radeon_get_drm_value(ws->fd, RADEON_INFO_BACKEND_MAP, NULL, + &ws->info.r600_backend_map)) + ws->info.r600_backend_map_valid = TRUE; + } + } + + return TRUE; } static void radeon_winsys_destroy(struct radeon_winsys *rws) @@ -202,34 +274,10 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws) FREE(rws); } -static uint32_t radeon_get_value(struct radeon_winsys *rws, - enum radeon_value_id id) +static void radeon_query_info(struct radeon_winsys *rws, + struct radeon_info *info) { - struct radeon_drm_winsys *ws = (struct radeon_drm_winsys *)rws; - - switch(id) { - case RADEON_VID_PCI_ID: - return ws->pci_id; - case RADEON_VID_R300_GB_PIPES: - return ws->gb_pipes; - case RADEON_VID_R300_Z_PIPES: - return ws->z_pipes; - case RADEON_VID_GART_SIZE: - return ws->gart_size; - case RADEON_VID_VRAM_SIZE: - return ws->vram_size; - case RADEON_VID_DRM_MAJOR: - return ws->drm_major; - case RADEON_VID_DRM_MINOR: - return ws->drm_minor; - case RADEON_VID_DRM_PATCHLEVEL: - return ws->drm_patchlevel; - case RADEON_VID_DRM_2_6_0: - return ws->drm_major*100 + ws->drm_minor >= 206; - case RADEON_VID_DRM_2_8_0: - return ws->drm_major*100 + ws->drm_minor >= 208; - } - return 0; + *info = ((struct radeon_drm_winsys *)rws)->info; } static boolean radeon_cs_request_feature(struct radeon_winsys_cs *rcs, @@ -239,7 +287,7 @@ static boolean radeon_cs_request_feature(struct radeon_winsys_cs *rcs, struct radeon_drm_cs *cs = radeon_drm_cs(rcs); switch (fid) { - case RADEON_FID_HYPERZ_RAM_ACCESS: + case RADEON_FID_R300_HYPERZ_ACCESS: if (debug_get_bool_option("RADEON_HYPERZ", FALSE)) { return radeon_set_fd_access(cs, &cs->ws->hyperz_owner, &cs->ws->hyperz_owner_mutex, @@ -248,7 +296,7 @@ static boolean radeon_cs_request_feature(struct radeon_winsys_cs *rcs, return FALSE; } - case RADEON_FID_CMASK_RAM_ACCESS: + case RADEON_FID_R300_CMASK_ACCESS: if (debug_get_bool_option("RADEON_CMASK", FALSE)) { return radeon_set_fd_access(cs, &cs->ws->cmask_owner, &cs->ws->cmask_owner_mutex, @@ -268,16 +316,9 @@ struct radeon_winsys *radeon_drm_winsys_create(int fd) } ws->fd = fd; - do_ioctls(ws); - switch (ws->pci_id) { -#define CHIPSET(pci_id, name, family) case pci_id: -#include "pci_ids/r300_pci_ids.h" -#undef CHIPSET - break; - default: - goto fail; - } + if (!do_winsys_init(ws)) + goto fail; /* Create managers. */ ws->kman = radeon_bomgr_create(ws); @@ -289,7 +330,7 @@ struct radeon_winsys *radeon_drm_winsys_create(int fd) /* Set functions. */ ws->base.destroy = radeon_winsys_destroy; - ws->base.get_value = radeon_get_value; + ws->base.query_info = radeon_query_info; ws->base.cs_request_feature = radeon_cs_request_feature; radeon_bomgr_init_functions(ws); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h index d5186bc4d17..69216448496 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h @@ -31,29 +31,27 @@ #define RADEON_DRM_WINSYS_H #include "radeon_winsys.h" - #include "os/os_thread.h" +enum radeon_generation { + R300, + R600 +}; + struct radeon_drm_winsys { struct radeon_winsys base; int fd; /* DRM file descriptor */ int num_cs; /* The number of command streams created. */ + enum radeon_generation gen; + struct radeon_info info; + struct pb_manager *kman; struct pb_manager *cman; - uint32_t pci_id; /* PCI ID */ - uint32_t gb_pipes; /* GB pipe count */ - uint32_t z_pipes; /* Z pipe count (rv530 only) */ - uint32_t gart_size; /* GART size. */ - uint32_t vram_size; /* VRAM size. */ uint32_t num_cpus; /* Number of CPUs. */ - unsigned drm_major; - unsigned drm_minor; - unsigned drm_patchlevel; - struct radeon_drm_cs *hyperz_owner; pipe_mutex hyperz_owner_mutex; struct radeon_drm_cs *cmask_owner; diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 3a64e4abc35..90583e3ab8c 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -26,6 +26,20 @@ /* The public winsys interface header for the radeon driver. */ +/* R300 features in DRM. + * + * 2.6.0: + * - Hyper-Z + * - GB_Z_PEQ_CONFIG on rv350->r4xx + * - R500 FG_ALPHA_VALUE + * + * 2.8.0: + * - R500 US_FORMAT regs + * - R500 ARGB2101010 colorbuffer + * - CMask and AA regs + * - R16F/RG16F + */ + #include "pipebuffer/pb_bufmgr.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" @@ -47,6 +61,12 @@ enum radeon_bo_domain { /* bitfield */ RADEON_DOMAIN_VRAM = 4 }; +enum radeon_bo_usage { /* bitfield */ + RADEON_USAGE_READ = 2, + RADEON_USAGE_WRITE = 4, + RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE +}; + struct winsys_handle; struct radeon_winsys_cs_handle; /* for write_reloc etc. */ @@ -55,43 +75,29 @@ struct radeon_winsys_cs { uint32_t *buf; /* The command buffer. */ }; -enum radeon_value_id { - RADEON_VID_PCI_ID, - RADEON_VID_R300_GB_PIPES, - RADEON_VID_R300_Z_PIPES, - RADEON_VID_GART_SIZE, - RADEON_VID_VRAM_SIZE, - RADEON_VID_DRM_MAJOR, - RADEON_VID_DRM_MINOR, - RADEON_VID_DRM_PATCHLEVEL, - - /* These should probably go away: */ - - /* R300 features: - * - Hyper-Z - * - GB_Z_PEQ_CONFIG on rv350->r4xx - * - R500 FG_ALPHA_VALUE - * - * R600 features: - * - TBD - */ - RADEON_VID_DRM_2_6_0, +struct radeon_info { + uint32_t pci_id; + uint32_t gart_size; + uint32_t vram_size; - /* R300 features: - * - R500 US_FORMAT regs - * - R500 ARGB2101010 colorbuffer - * - CMask and AA regs - * - R16F/RG16F - * - * R600 features: - * - TBD - */ - RADEON_VID_DRM_2_8_0, + uint32_t drm_major; /* version */ + uint32_t drm_minor; + uint32_t drm_patchlevel; + + uint32_t r300_num_gb_pipes; + uint32_t r300_num_z_pipes; + + uint32_t r600_num_backends; + uint32_t r600_clock_crystal_freq; + uint32_t r600_tiling_config; + uint32_t r600_num_tile_pipes; + uint32_t r600_backend_map; + boolean r600_backend_map_valid; }; enum radeon_feature_id { - RADEON_FID_HYPERZ_RAM_ACCESS, /* ZMask + HiZ */ - RADEON_FID_CMASK_RAM_ACCESS, + RADEON_FID_R300_HYPERZ_ACCESS, /* ZMask + HiZ */ + RADEON_FID_R300_CMASK_ACCESS, }; struct radeon_winsys { @@ -103,13 +109,13 @@ struct radeon_winsys { void (*destroy)(struct radeon_winsys *ws); /** - * Query a system value from a winsys. + * Query an info structure from winsys. * * \param ws The winsys this function is called from. - * \param vid One of the RADEON_VID_* enums. + * \param info Return structure */ - uint32_t (*get_value)(struct radeon_winsys *ws, - enum radeon_value_id vid); + void (*query_info)(struct radeon_winsys *ws, + struct radeon_info *info); /************************************************************************** * Buffer management. Buffer attributes are mostly fixed over its lifetime. @@ -126,7 +132,6 @@ struct radeon_winsys { * \param size The size to allocate. * \param alignment An alignment of the buffer in memory. * \param bind A bitmask of the PIPE_BIND_* flags. - * \param usage A bitmask of the PIPE_USAGE_* flags. * \param domain A bitmask of the RADEON_DOMAIN_* flags. * \return The created buffer object. */ @@ -134,7 +139,6 @@ struct radeon_winsys { unsigned size, unsigned alignment, unsigned bind, - unsigned usage, enum radeon_bo_domain domain); struct radeon_winsys_cs_handle *(*buffer_get_cs_handle)( @@ -164,8 +168,10 @@ struct radeon_winsys { * Return TRUE if a buffer object is being used by the GPU. * * \param buf A winsys buffer object. + * \param usage Only check whether the buffer is busy for the given usage. */ - boolean (*buffer_is_busy)(struct pb_buffer *buf); + boolean (*buffer_is_busy)(struct pb_buffer *buf, + enum radeon_bo_usage usage); /** * Wait for a buffer object until it is not used by a GPU. This is @@ -173,8 +179,10 @@ struct radeon_winsys { * and synchronizing to the fence. * * \param buf A winsys buffer object to wait for. + * \param usage Only wait until the buffer is idle for the given usage, + * but may still be busy for some other usage. */ - void (*buffer_wait)(struct pb_buffer *buf); + void (*buffer_wait)(struct pb_buffer *buf, enum radeon_bo_usage usage); /** * Return tiling flags describing a memory layout of a buffer object. @@ -263,15 +271,18 @@ struct radeon_winsys { * \param buf A winsys buffer to validate. * \param rd A read domain containing a bitmask of the RADEON_DOMAIN_* flags. * \param wd A write domain containing a bitmask of the RADEON_DOMAIN_* flags. + * \return Relocation index. */ - void (*cs_add_reloc)(struct radeon_winsys_cs *cs, - struct radeon_winsys_cs_handle *buf, - enum radeon_bo_domain rd, - enum radeon_bo_domain wd); + unsigned (*cs_add_reloc)(struct radeon_winsys_cs *cs, + struct radeon_winsys_cs_handle *buf, + enum radeon_bo_domain rd, + enum radeon_bo_domain wd); /** * Return TRUE if there is enough memory in VRAM and GTT for the relocs - * added so far. + * added so far. If the validation fails, all the relocations which have + * been added since the last call of cs_validate will be removed and + * the CS will be flushed (provided there are still any relocations). * * \param cs A command stream to validate. */ @@ -304,9 +315,9 @@ struct radeon_winsys { * \param flush A flush callback function associated with the command stream. * \param user A user pointer that will be passed to the flush callback. */ - void (*cs_set_flush)(struct radeon_winsys_cs *cs, - void (*flush)(void *ctx, unsigned flags), - void *user); + void (*cs_set_flush_callback)(struct radeon_winsys_cs *cs, + void (*flush)(void *ctx, unsigned flags), + void *ctx); /** * Return TRUE if a buffer is referenced by a command stream. @@ -321,7 +332,8 @@ struct radeon_winsys { * Request access to a feature for a command stream. * * \param cs A command stream. - * \param fid A winsys buffer. + * \param fid Feature ID, one of RADEON_FID_* + * \param enable Whether to enable or disable the feature. */ boolean (*cs_request_feature)(struct radeon_winsys_cs *cs, enum radeon_feature_id fid, diff --git a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c index d92ba389d35..afdbd44458d 100644 --- a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c +++ b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c @@ -42,7 +42,8 @@ #include "xf86drm.h" #include "vmwgfx_drm.h" -#include <sys/mman.h> +#include "os/os_mman.h" + #include <errno.h> #include <unistd.h> @@ -94,7 +95,7 @@ static void vmw_ioctl_fifo_unmap(struct vmw_winsys_screen *vws, void *mapping) { VMW_FUNC; - (void)munmap(mapping, getpagesize()); + (void)os_munmap(mapping, getpagesize()); } @@ -106,7 +107,7 @@ vmw_ioctl_fifo_map(struct vmw_winsys_screen *vws, VMW_FUNC; - map = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, + map = os_mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, vws->ioctl.drm_fd, fifo_offset); if (map == MAP_FAILED) { @@ -362,7 +363,7 @@ vmw_ioctl_region_destroy(struct vmw_region *region) region->ptr.gmrId, region->ptr.offset); if (region->data) { - munmap(region->data, region->size); + os_munmap(region->data, region->size); region->data = NULL; } @@ -388,7 +389,7 @@ vmw_ioctl_region_map(struct vmw_region *region) region->ptr.gmrId, region->ptr.offset); if (region->data == NULL) { - map = mmap(NULL, region->size, PROT_READ | PROT_WRITE, MAP_SHARED, + map = os_mmap(NULL, region->size, PROT_READ | PROT_WRITE, MAP_SHARED, region->drm_fd, region->map_handle); if (map == MAP_FAILED) { debug_printf("%s: Map failed.\n", __FUNCTION__); diff --git a/src/gallium/winsys/sw/android/Android.mk b/src/gallium/winsys/sw/android/Android.mk new file mode 100644 index 00000000000..4fb2715a56c --- /dev/null +++ b/src/gallium/winsys/sw/android/Android.mk @@ -0,0 +1,34 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + android_sw_winsys.cpp + +LOCAL_MODULE := libmesa_winsys_sw_android + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/winsys/sw/android/android_sw_winsys.cpp b/src/gallium/winsys/sw/android/android_sw_winsys.cpp new file mode 100644 index 00000000000..02faf1e0cca --- /dev/null +++ b/src/gallium/winsys/sw/android/android_sw_winsys.cpp @@ -0,0 +1,255 @@ +/* + * Mesa 3-D graphics library + * Version: 7.12 + * + * Copyright (C) 2010-2011 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <[email protected]> + */ + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "util/u_memory.h" +#include "util/u_format.h" +#include "state_tracker/sw_winsys.h" + +#include <utils/Errors.h> +#include <private/ui/sw_gralloc_handle.h> + +#include <hardware/gralloc.h> + +#include "android_sw_winsys.h" + +struct android_sw_winsys +{ + struct sw_winsys base; + + const gralloc_module_t *grmod; +}; + +struct android_sw_displaytarget +{ + buffer_handle_t handle; + int stride; + int width, height; + int usage; /* gralloc usage */ + + void *mapped; +}; + +static INLINE struct android_sw_winsys * +android_sw_winsys(struct sw_winsys *ws) +{ + return (struct android_sw_winsys *) ws; +} + +static INLINE struct android_sw_displaytarget * +android_sw_displaytarget(struct sw_displaytarget *dt) +{ + return (struct android_sw_displaytarget *) dt; +} + +namespace android { + +static void +android_displaytarget_display(struct sw_winsys *ws, + struct sw_displaytarget *dt, + void *context_private) +{ +} + +static struct sw_displaytarget * +android_displaytarget_create(struct sw_winsys *ws, + unsigned tex_usage, + enum pipe_format format, + unsigned width, unsigned height, + unsigned alignment, + unsigned *stride) +{ + return NULL; +} + +static void +android_displaytarget_destroy(struct sw_winsys *ws, + struct sw_displaytarget *dt) +{ + struct android_sw_displaytarget *adt = android_sw_displaytarget(dt); + + assert(!adt->mapped); + FREE(adt); +} + +static void +android_displaytarget_unmap(struct sw_winsys *ws, + struct sw_displaytarget *dt) +{ + struct android_sw_winsys *droid = android_sw_winsys(ws); + struct android_sw_displaytarget *adt = android_sw_displaytarget(dt); + + if (adt->mapped) { + if (sw_gralloc_handle_t::validate(adt->handle) >= 0) { + adt->mapped = NULL; + } + else { + droid->grmod->unlock(droid->grmod, adt->handle); + adt->mapped = NULL; + } + } +} + +static void * +android_displaytarget_map(struct sw_winsys *ws, + struct sw_displaytarget *dt, + unsigned flags) +{ + struct android_sw_winsys *droid = android_sw_winsys(ws); + struct android_sw_displaytarget *adt = android_sw_displaytarget(dt); + + if (!adt->mapped) { + if (sw_gralloc_handle_t::validate(adt->handle) >= 0) { + const sw_gralloc_handle_t *swhandle = + reinterpret_cast<const sw_gralloc_handle_t *>(adt->handle); + adt->mapped = reinterpret_cast<void *>(swhandle->base); + } + else { + /* lock the buffer for CPU access */ + droid->grmod->lock(droid->grmod, adt->handle, + adt->usage, 0, 0, adt->width, adt->height, &adt->mapped); + } + } + + return adt->mapped; +} + +static struct sw_displaytarget * +android_displaytarget_from_handle(struct sw_winsys *ws, + const struct pipe_resource *templ, + struct winsys_handle *whandle, + unsigned *stride) +{ + struct android_winsys_handle *ahandle = + (struct android_winsys_handle *) whandle; + struct android_sw_displaytarget *adt; + + adt = CALLOC_STRUCT(android_sw_displaytarget); + if (!adt) + return NULL; + + adt->handle = ahandle->handle; + adt->stride = ahandle->stride; + adt->width = templ->width0; + adt->height = templ->height0; + + if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_TRANSFER_WRITE)) + adt->usage |= GRALLOC_USAGE_SW_WRITE_OFTEN; + if (templ->bind & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_TRANSFER_READ)) + adt->usage |= GRALLOC_USAGE_SW_READ_OFTEN; + + if (stride) + *stride = adt->stride; + + return reinterpret_cast<struct sw_displaytarget *>(adt); +} + +static boolean +android_displaytarget_get_handle(struct sw_winsys *ws, + struct sw_displaytarget *dt, + struct winsys_handle *whandle) +{ + return FALSE; +} + +static boolean +android_is_displaytarget_format_supported(struct sw_winsys *ws, + unsigned tex_usage, + enum pipe_format format) +{ + struct android_sw_winsys *droid = android_sw_winsys(ws); + int fmt = -1; + + switch (format) { + case PIPE_FORMAT_R8G8B8A8_UNORM: + fmt = HAL_PIXEL_FORMAT_RGBA_8888; + break; + case PIPE_FORMAT_R8G8B8X8_UNORM: + fmt = HAL_PIXEL_FORMAT_RGBX_8888; + break; + case PIPE_FORMAT_R8G8B8_UNORM: + fmt = HAL_PIXEL_FORMAT_RGB_888; + break; + case PIPE_FORMAT_B5G6R5_UNORM: + fmt = HAL_PIXEL_FORMAT_RGB_565; + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + fmt = HAL_PIXEL_FORMAT_BGRA_8888; + break; + default: + break; + } + + return (fmt != -1); +} + +static void +android_destroy(struct sw_winsys *ws) +{ + struct android_sw_winsys *droid = android_sw_winsys(ws); + + FREE(droid); +} + +}; /* namespace android */ + +using namespace android; + +struct sw_winsys * +android_create_sw_winsys(void) +{ + struct android_sw_winsys *droid; + const hw_module_t *mod; + + droid = CALLOC_STRUCT(android_sw_winsys); + if (!droid) + return NULL; + + if (hw_get_module(GRALLOC_HARDWARE_MODULE_ID, &mod)) { + FREE(droid); + return NULL; + } + + droid->grmod = (const gralloc_module_t *) mod; + + droid->base.destroy = android_destroy; + droid->base.is_displaytarget_format_supported = + android_is_displaytarget_format_supported; + + droid->base.displaytarget_create = android_displaytarget_create; + droid->base.displaytarget_destroy = android_displaytarget_destroy; + droid->base.displaytarget_from_handle = android_displaytarget_from_handle; + droid->base.displaytarget_get_handle = android_displaytarget_get_handle; + + droid->base.displaytarget_map = android_displaytarget_map; + droid->base.displaytarget_unmap = android_displaytarget_unmap; + droid->base.displaytarget_display = android_displaytarget_display; + + return &droid->base; +} diff --git a/src/gallium/winsys/sw/android/android_sw_winsys.h b/src/gallium/winsys/sw/android/android_sw_winsys.h new file mode 100644 index 00000000000..79392dc0f97 --- /dev/null +++ b/src/gallium/winsys/sw/android/android_sw_winsys.h @@ -0,0 +1,49 @@ +/* + * Mesa 3-D graphics library + * Version: 7.12 + * + * Copyright (C) 2010-2011 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <[email protected]> + */ + +#ifndef ANDROID_SW_WINSYS +#define ANDROID_SW_WINSYS + +#include <sys/cdefs.h> +#include <hardware/gralloc.h> + +__BEGIN_DECLS + +struct sw_winsys; + +struct android_winsys_handle { + buffer_handle_t handle; + int stride; +}; + +struct sw_winsys * +android_create_sw_winsys(void); + +__END_DECLS + +#endif /* ANDROID_SW_WINSYS */ diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c index 6bb7848d830..9de8cb61162 100644 --- a/src/gbm/backends/dri/gbm_dri.c +++ b/src/gbm/backends/dri/gbm_dri.c @@ -194,6 +194,8 @@ dri_screen_create(struct gbm_dri_device *dri) dri->screen = dri->dri2->createNewScreen(0, dri->base.base.fd, dri->extensions, &dri->driver_configs, dri); + if (dri->screen == NULL) + return -1; extensions = dri->core->getExtensions(dri->screen); if (dri_bind_extensions(dri, dri_core_extensions, extensions) < 0) { diff --git a/src/gbm/main/gbm.h b/src/gbm/main/gbm.h index d79a03e4b3f..05d2292dc75 100644 --- a/src/gbm/main/gbm.h +++ b/src/gbm/main/gbm.h @@ -28,6 +28,11 @@ #ifndef _GBM_H_ #define _GBM_H_ +#ifdef __cplusplus +extern "C" { +#endif + + #define __GBM__ 1 #include <stdint.h> @@ -97,4 +102,8 @@ gbm_bo_get_handle(struct gbm_bo *bo); void gbm_bo_destroy(struct gbm_bo *bo); +#ifdef __cplusplus +} +#endif + #endif diff --git a/src/glsl/.gitignore b/src/glsl/.gitignore index dfbd572d894..d26839a3e3e 100644 --- a/src/glsl/.gitignore +++ b/src/glsl/.gitignore @@ -5,3 +5,4 @@ glsl_parser.h glsl_parser.output builtin_function.cpp builtin_compiler +glsl_test diff --git a/src/glsl/Android.gen.mk b/src/glsl/Android.gen.mk new file mode 100644 index 00000000000..e4ccb7291ef --- /dev/null +++ b/src/glsl/Android.gen.mk @@ -0,0 +1,98 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# included by glsl Android.mk for source generation + +ifeq ($(LOCAL_MODULE_CLASS),) +LOCAL_MODULE_CLASS := STATIC_LIBRARIES +endif + +intermediates := $(call local-intermediates-dir) + +sources := \ + glsl_lexer.cpp \ + glsl_parser.cpp \ + glcpp/glcpp-lex.c \ + glcpp/glcpp-parse.c + +ifneq ($(LOCAL_IS_HOST_MODULE),true) +sources += builtin_function.cpp +endif + +LOCAL_SRC_FILES := $(filter-out $(sources), $(LOCAL_SRC_FILES)) + +LOCAL_C_INCLUDES += $(intermediates) $(intermediates)/glcpp $(MESA_TOP)/src/glsl/glcpp + +sources := $(addprefix $(intermediates)/, $(sources)) +LOCAL_GENERATED_SOURCES += $(sources) + +define local-l-or-ll-to-c-or-cpp + @mkdir -p $(dir $@) + @echo "Mesa Lex: $(PRIVATE_MODULE) <= $<" + $(hide) $(LEX) --nounistd -o$@ $< +endef + +define local-y-to-c-and-h + @mkdir -p $(dir $@) + @echo "Mesa Yacc: $(PRIVATE_MODULE) <= $<" + $(hide) $(YACC) -o $@ $< +endef + +define local-yy-to-cpp-and-h + @mkdir -p $(dir $@) + @echo "Mesa Yacc: $(PRIVATE_MODULE) <= $<" + $(hide) $(YACC) -p "_mesa_glsl_" -o $@ $< + touch $(@:$1=$(YACC_HEADER_SUFFIX)) + echo '#ifndef '$(@F:$1=_h) > $(@:$1=.h) + echo '#define '$(@F:$1=_h) >> $(@:$1=.h) + cat $(@:$1=$(YACC_HEADER_SUFFIX)) >> $(@:$1=.h) + echo '#endif' >> $(@:$1=.h) + rm -f $(@:$1=$(YACC_HEADER_SUFFIX)) +endef + +$(intermediates)/glsl_lexer.cpp: $(LOCAL_PATH)/glsl_lexer.ll + $(call local-l-or-ll-to-c-or-cpp) + +$(intermediates)/glsl_parser.cpp: $(LOCAL_PATH)/glsl_parser.yy + $(call local-yy-to-cpp-and-h,.cpp) + +$(intermediates)/glcpp/glcpp-lex.c: $(LOCAL_PATH)/glcpp/glcpp-lex.l + $(call local-l-or-ll-to-c-or-cpp) + +$(intermediates)/glcpp/glcpp-parse.c: $(LOCAL_PATH)/glcpp/glcpp-parse.y + $(call local-y-to-c-and-h) + +BUILTIN_COMPILER := $(BUILD_OUT_EXECUTABLES)/mesa_builtin_compiler$(BUILD_EXECUTABLE_SUFFIX) + +builtin_function_deps := \ + $(LOCAL_PATH)/builtins/tools/generate_builtins.py \ + $(LOCAL_PATH)/builtins/tools/texture_builtins.py \ + $(BUILTIN_COMPILER) \ + $(wildcard $(LOCAL_PATH)/builtins/profiles/*) \ + $(wildcard $(LOCAL_PATH)/builtins/ir/*) + +$(intermediates)/builtin_function.cpp: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/builtins/tools/generate_builtins.py +$(intermediates)/builtin_function.cpp: $(builtin_function_deps) + @mkdir -p $(dir $@) + @echo "Gen GLSL: $(PRIVATE_MODULE) <= $(notdir $@)" + $(hide) $(PRIVATE_SCRIPT) $(BUILTIN_COMPILER) > $@ || rm -f $@ diff --git a/src/glsl/Android.mk b/src/glsl/Android.mk new file mode 100644 index 00000000000..d0b3ff3becf --- /dev/null +++ b/src/glsl/Android.mk @@ -0,0 +1,171 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Android.mk for glsl + +LOCAL_PATH := $(call my-dir) + +# from Makefile +LIBGLCPP_SOURCES = \ + glcpp/glcpp-lex.c \ + glcpp/glcpp-parse.c \ + glcpp/pp.c + +C_SOURCES = \ + strtod.c \ + ralloc.c \ + $(LIBGLCPP_SOURCES) + +CXX_SOURCES = \ + ast_expr.cpp \ + ast_function.cpp \ + ast_to_hir.cpp \ + ast_type.cpp \ + glsl_lexer.cpp \ + glsl_parser.cpp \ + glsl_parser_extras.cpp \ + glsl_types.cpp \ + glsl_symbol_table.cpp \ + hir_field_selection.cpp \ + ir_basic_block.cpp \ + ir_clone.cpp \ + ir_constant_expression.cpp \ + ir.cpp \ + ir_expression_flattening.cpp \ + ir_function_can_inline.cpp \ + ir_function_detect_recursion.cpp \ + ir_function.cpp \ + ir_hierarchical_visitor.cpp \ + ir_hv_accept.cpp \ + ir_import_prototypes.cpp \ + ir_print_visitor.cpp \ + ir_reader.cpp \ + ir_rvalue_visitor.cpp \ + ir_set_program_inouts.cpp \ + ir_validate.cpp \ + ir_variable.cpp \ + ir_variable_refcount.cpp \ + linker.cpp \ + link_functions.cpp \ + loop_analysis.cpp \ + loop_controls.cpp \ + loop_unroll.cpp \ + lower_discard.cpp \ + lower_if_to_cond_assign.cpp \ + lower_instructions.cpp \ + lower_jumps.cpp \ + lower_mat_op_to_vec.cpp \ + lower_noise.cpp \ + lower_texture_projection.cpp \ + lower_variable_index_to_cond_assign.cpp \ + lower_vec_index_to_cond_assign.cpp \ + lower_vec_index_to_swizzle.cpp \ + lower_vector.cpp \ + opt_algebraic.cpp \ + opt_constant_folding.cpp \ + opt_constant_propagation.cpp \ + opt_constant_variable.cpp \ + opt_copy_propagation.cpp \ + opt_copy_propagation_elements.cpp \ + opt_dead_code.cpp \ + opt_dead_code_local.cpp \ + opt_dead_functions.cpp \ + opt_discard_simplification.cpp \ + opt_function_inlining.cpp \ + opt_if_simplification.cpp \ + opt_noop_swizzle.cpp \ + opt_redundant_jumps.cpp \ + opt_structure_splitting.cpp \ + opt_swizzle_swizzle.cpp \ + opt_tree_grafting.cpp \ + s_expression.cpp + +# --------------------------------------- +# Build libmesa_glsl +# --------------------------------------- + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + $(C_SOURCES) \ + $(CXX_SOURCES) \ + builtin_function.cpp + +LOCAL_C_INCLUDES := \ + $(MESA_TOP)/src/mapi \ + $(MESA_TOP)/src/mesa + +LOCAL_MODULE := libmesa_glsl + +include $(LOCAL_PATH)/Android.gen.mk +include $(MESA_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) + +# --------------------------------------- +# Build mesa_builtin_compiler for host +# --------------------------------------- + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + $(C_SOURCES) \ + $(CXX_SOURCES) \ + builtin_stubs.cpp \ + main.cpp \ + standalone_scaffolding.cpp + +LOCAL_C_INCLUDES := \ + $(MESA_TOP)/src/mapi \ + $(MESA_TOP)/src/mesa + +LOCAL_STATIC_LIBRARIES := libmesa_glsl_utils + +LOCAL_MODULE := mesa_builtin_compiler + +LOCAL_MODULE_CLASS := EXECUTABLES +LOCAL_IS_HOST_MODULE := true +include $(LOCAL_PATH)/Android.gen.mk +include $(MESA_COMMON_MK) +include $(BUILD_HOST_EXECUTABLE) + +# --------------------------------------- +# Build glsl_compiler +# --------------------------------------- + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + main.cpp \ + standalone_scaffolding.cpp + +LOCAL_C_INCLUDES := \ + $(MESA_TOP)/src/mapi \ + $(MESA_TOP)/src/mesa + +LOCAL_STATIC_LIBRARIES := libmesa_glsl libmesa_glsl_utils + +LOCAL_MODULE_TAGS := eng +LOCAL_MODULE := glsl_compiler + +include $(MESA_COMMON_MK) +include $(BUILD_EXECUTABLE) diff --git a/src/glsl/Makefile b/src/glsl/Makefile index e0776c1b55d..c20a6c9edd9 100644 --- a/src/glsl/Makefile +++ b/src/glsl/Makefile @@ -39,6 +39,7 @@ CXX_SOURCES = \ ir.cpp \ ir_expression_flattening.cpp \ ir_function_can_inline.cpp \ + ir_function_detect_recursion.cpp \ ir_function.cpp \ ir_hierarchical_visitor.cpp \ ir_hv_accept.cpp \ @@ -88,18 +89,32 @@ CXX_SOURCES = \ LIBS = \ $(TOP)/src/glsl/libglsl.a -APPS = glsl_compiler glcpp/glcpp +APPS = glsl_compiler glsl_test glcpp/glcpp GLSL2_C_SOURCES = \ ../mesa/program/hash_table.c \ ../mesa/program/symbol_table.c GLSL2_CXX_SOURCES = \ - main.cpp + main.cpp \ + standalone_scaffolding.cpp GLSL2_OBJECTS = \ $(GLSL2_C_SOURCES:.c=.o) \ $(GLSL2_CXX_SOURCES:.cpp=.o) +TEST_C_SOURCES = \ + ../mesa/program/hash_table.c \ + ../mesa/program/symbol_table.c + +TEST_CXX_SOURCES = \ + standalone_scaffolding.cpp \ + test.cpp \ + test_optpass.cpp + +TEST_OBJECTS = \ + $(TEST_C_SOURCES:.c=.o) \ + $(TEST_CXX_SOURCES:.cpp=.o) + ### Basic defines ### DEFINES += \ @@ -128,7 +143,9 @@ ALL_SOURCES = \ $(C_SOURCES) \ $(CXX_SOURCES) \ $(GLSL2_CXX_SOURCES) \ - $(GLSL2_C_SOURCES) + $(GLSL2_C_SOURCES) \ + $(TEST_CXX_SOURCES) \ + $(TEST_C_SOURCES) ##### TARGETS ##### @@ -147,10 +164,11 @@ depend: $(ALL_SOURCES) Makefile rm -f depend touch depend $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(ALL_SOURCES) 2> /dev/null + $(MKDEP) $(MKDEP_OPTIONS) -a -p $(DRICORE_OBJ_DIR)/ $(INCLUDES) $(ALL_SOURCES) 2> /dev/null # Remove .o and backup files clean: clean-dricore - rm -f $(GLCPP_OBJECTS) $(GLSL2_OBJECTS) $(OBJECTS) lib$(LIBNAME).a depend depend.bak builtin_function.cpp builtin_function.o builtin_stubs.o builtin_compiler + rm -f $(GLCPP_OBJECTS) $(GLSL2_OBJECTS) $(TEST_OBJECTS) $(OBJECTS) lib$(LIBNAME).a depend depend.bak builtin_function.cpp builtin_function.o builtin_stubs.o builtin_compiler -rm -f $(APPS) clean-dricore: @@ -173,6 +191,9 @@ install-dricore: default glsl_compiler: $(GLSL2_OBJECTS) libglsl.a builtin_stubs.o $(APP_CXX) $(INCLUDES) $(CFLAGS) $(LDFLAGS) $(GLSL2_OBJECTS) builtin_stubs.o $(LIBS) -o $@ +glsl_test: $(TEST_OBJECTS) libglsl.a builtin_stubs.o + $(APP_CXX) $(INCLUDES) $(CFLAGS) $(LDFLAGS) $(TEST_OBJECTS) builtin_stubs.o $(LIBS) -o $@ + glcpp: glcpp/glcpp glcpp/glcpp: $(GLCPP_OBJECTS) $(APP_CC) $(INCLUDES) $(CFLAGS) $(LDFLAGS) $(GLCPP_OBJECTS) -o $@ diff --git a/src/glsl/SConscript b/src/glsl/SConscript index 1441cc74bd8..1da58a91f91 100644 --- a/src/glsl/SConscript +++ b/src/glsl/SConscript @@ -50,6 +50,7 @@ glsl_sources = [ 'ir.cpp', 'ir_expression_flattening.cpp', 'ir_function_can_inline.cpp', + 'ir_function_detect_recursion.cpp', 'ir_function.cpp', 'ir_hierarchical_visitor.cpp', 'ir_hv_accept.cpp', @@ -95,6 +96,7 @@ glsl_sources = [ 'opt_tree_grafting.cpp', 'ralloc.c', 's_expression.cpp', + 'standalone_scaffolding.cpp', 'strtod.c', ] diff --git a/src/glsl/TODO b/src/glsl/TODO index a3762384ff2..c99d7e152d6 100644 --- a/src/glsl/TODO +++ b/src/glsl/TODO @@ -9,11 +9,12 @@ - Implement support for ir_binop_dot in ir_algebraic.cpp. Perform transformations such as "dot(v, vec3(0.0, 1.0, 0.0))" -> v.y. -1.30 features: - -- Implement AST-to-HIR conversion of bit-shift operators. +- Track source locations throughout the IR. There are currently several + places where we cannot emit line numbers for errors (and currently emit 0:0) + because we've "lost" the line number information. This is particularly + noticeable at link time. -- Implement AST-to-HIR conversion of bit-wise {&,|,^,!} operators. +1.30 features: - Implement AST-to-HIR conversion of switch-statements - switch @@ -21,18 +22,6 @@ - Update break to correcly handle mixed nexting of switch-statements and loops. -- Handle currently unsupported constant expression types - - ir_unop_bit_not - - ir_binop_mod - - ir_binop_lshift - - ir_binop_rshift - - ir_binop_bit_and - - ir_binop_bit_xor - - ir_binop_bit_or - -- Implement support for 1.30 style shadow compares which only return a float - instead of a vec4. - - Implement support for gl_ClipDistance. This is non-trivial because gl_ClipDistance is exposed as a float[8], but all hardware actually implements it as vec4[2].
\ No newline at end of file diff --git a/src/glsl/ast.h b/src/glsl/ast.h index 878f48b2070..d1de2271873 100644 --- a/src/glsl/ast.h +++ b/src/glsl/ast.h @@ -730,7 +730,6 @@ _mesa_ast_field_selection_to_hir(const ast_expression *expr, struct _mesa_glsl_parse_state *state); void -emit_function(_mesa_glsl_parse_state *state, exec_list *instructions, - ir_function *f); +emit_function(_mesa_glsl_parse_state *state, ir_function *f); #endif /* AST_H */ diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp index 60a2c617f70..ca45934a478 100644 --- a/src/glsl/ast_function.cpp +++ b/src/glsl/ast_function.cpp @@ -62,8 +62,10 @@ process_parameters(exec_list *instructions, exec_list *actual_parameters, * * \param return_type Return type of the function. May be \c NULL. * \param name Name of the function. - * \param parameters Parameter list for the function. This may be either a - * formal or actual parameter list. Only the type is used. + * \param parameters List of \c ir_instruction nodes representing the + * parameter list for the function. This may be either a + * formal (\c ir_variable) or actual (\c ir_rvalue) + * parameter list. Only the type is used. * * \return * A ralloced string representing the prototype of the function. @@ -123,7 +125,7 @@ match_function_by_name(exec_list *instructions, const char *name, if (f == NULL) { f = new(ctx) ir_function(name); state->symbols->add_global_function(f); - emit_function(state, instructions, f); + emit_function(state, f); } f->add_signature(sig->clone_prototype(f, NULL)); @@ -132,6 +134,8 @@ match_function_by_name(exec_list *instructions, const char *name, } } + exec_list post_call_conversions; + if (sig != NULL) { /* Verify that 'out' and 'inout' actual parameters are lvalues. This * isn't done in ir_function::matching_signature because that function @@ -139,6 +143,12 @@ match_function_by_name(exec_list *instructions, const char *name, * * Also, validate that 'const_in' formal parameters (an extension of our * IR) correspond to ir_constant actual parameters. + * + * Also, perform implicit conversion of arguments. Note: to implicitly + * convert out parameters, we need to place them in a temporary + * variable, and do the conversion after the call takes place. Since we + * haven't emitted the call yet, we'll place the post-call conversions + * in a temporary exec_list, and emit them later. */ exec_list_iterator actual_iter = actual_parameters->iterator(); exec_list_iterator formal_iter = sig->parameters.iterator(); @@ -154,6 +164,7 @@ match_function_by_name(exec_list *instructions, const char *name, _mesa_glsl_error(loc, state, "parameter `%s' must be a constant expression", formal->name); + return ir_call::get_error_instruction(ctx); } if ((formal->mode == ir_var_out) @@ -183,8 +194,64 @@ match_function_by_name(exec_list *instructions, const char *name, } if (formal->type->is_numeric() || formal->type->is_boolean()) { - ir_rvalue *converted = convert_component(actual, formal->type); - actual->replace_with(converted); + switch (formal->mode) { + case ir_var_const_in: + case ir_var_in: { + ir_rvalue *converted + = convert_component(actual, formal->type); + actual->replace_with(converted); + break; + } + case ir_var_out: + if (actual->type != formal->type) { + /* To convert an out parameter, we need to create a + * temporary variable to hold the value before conversion, + * and then perform the conversion after the function call + * returns. + * + * This has the effect of transforming code like this: + * + * void f(out int x); + * float value; + * f(value); + * + * Into IR that's equivalent to this: + * + * void f(out int x); + * float value; + * int out_parameter_conversion; + * f(out_parameter_conversion); + * value = float(out_parameter_conversion); + */ + ir_variable *tmp = + new(ctx) ir_variable(formal->type, + "out_parameter_conversion", + ir_var_temporary); + instructions->push_tail(tmp); + ir_dereference_variable *deref_tmp_1 + = new(ctx) ir_dereference_variable(tmp); + ir_dereference_variable *deref_tmp_2 + = new(ctx) ir_dereference_variable(tmp); + ir_rvalue *converted_tmp + = convert_component(deref_tmp_1, actual->type); + ir_assignment *assignment + = new(ctx) ir_assignment(actual, converted_tmp); + post_call_conversions.push_tail(assignment); + actual->replace_with(deref_tmp_2); + } + break; + case ir_var_inout: + /* Inout parameters should never require conversion, since that + * would require an implicit conversion to exist both to and + * from the formal parameter type, and there are no + * bidirectional implicit conversions. + */ + assert (actual->type == formal->type); + break; + default: + assert (!"Illegal formal parameter mode"); + break; + } } actual_iter.next(); @@ -194,11 +261,27 @@ match_function_by_name(exec_list *instructions, const char *name, /* Always insert the call in the instruction stream, and return a deref * of its return val if it returns a value, since we don't know if * the rvalue is going to be assigned to anything or not. + * + * Also insert any out parameter conversions after the call. */ ir_call *call = new(ctx) ir_call(sig, actual_parameters); + ir_dereference_variable *deref; if (!sig->return_type->is_void()) { + /* If the function call is a constant expression, don't + * generate the instructions to call it; just generate an + * ir_constant representing the constant value. + * + * Function calls can only be constant expressions starting + * in GLSL 1.20. + */ + if (state->language_version >= 120) { + ir_constant *const_val = call->constant_expression_value(); + if (const_val) { + return const_val; + } + } + ir_variable *var; - ir_dereference_variable *deref; var = new(ctx) ir_variable(sig->return_type, ralloc_asprintf(ctx, "%s_retval", @@ -209,15 +292,14 @@ match_function_by_name(exec_list *instructions, const char *name, deref = new(ctx) ir_dereference_variable(var); ir_assignment *assign = new(ctx) ir_assignment(deref, call, NULL); instructions->push_tail(assign); - if (state->language_version >= 120) - var->constant_value = call->constant_expression_value(); deref = new(ctx) ir_dereference_variable(var); - return deref; } else { instructions->push_tail(call); - return NULL; + deref = NULL; } + instructions->append_list(&post_call_conversions); + return deref; } else { char *str = prototype_string(NULL, name, actual_parameters); @@ -440,13 +522,21 @@ process_array_constructor(exec_list *instructions, ir_rvalue *ir = (ir_rvalue *) n; ir_rvalue *result = ir; - /* Apply implicit conversions (not the scalar constructor rules!) */ + /* Apply implicit conversions (not the scalar constructor rules!). See + * the spec quote above. */ if (constructor_type->element_type()->is_float()) { const glsl_type *desired_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, ir->type->vector_elements, ir->type->matrix_columns); - result = convert_component(ir, desired_type); + if (result->type->can_implicitly_convert_to(desired_type)) { + /* Even though convert_component() implements the constructor + * conversion rules (not the implicit conversion rules), its safe + * to use it here because we already checked that the implicit + * conversion is legal. + */ + result = convert_component(ir, desired_type); + } } if (result->type != constructor_type->element_type()) { diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 2312c297c40..9e7496b4b43 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -66,6 +66,8 @@ _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state) state->current_function = NULL; + state->toplevel_ir = instructions; + /* Section 4.2 of the GLSL 1.20 specification states: * "The built-in functions are scoped in a scope outside the global scope * users declare global variables in. That is, a shader's global scope, @@ -83,6 +85,10 @@ _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state) foreach_list_typed (ast_node, ast, link, & state->translation_unit) ast->hir(instructions, state); + + detect_recursion_unlinked(state, instructions); + + state->toplevel_ir = NULL; } @@ -647,6 +653,16 @@ validate_assignment(struct _mesa_glsl_parse_state *state, return NULL; } +static void +mark_whole_array_access(ir_rvalue *access) +{ + ir_dereference_variable *deref = access->as_dereference_variable(); + + if (deref && deref->var) { + deref->var->max_array_access = deref->type->length - 1; + } +} + ir_rvalue * do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state, ir_rvalue *lhs, ir_rvalue *rhs, bool is_initializer, @@ -707,6 +723,7 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state, rhs->type->array_size()); d->type = var->type; } + mark_whole_array_access(lhs); } /* Most callers of do_assignment (assign, add_assign, pre_inc/dec, @@ -767,16 +784,6 @@ ast_node::hir(exec_list *instructions, return NULL; } -static void -mark_whole_array_access(ir_rvalue *access) -{ - ir_dereference_variable *deref = access->as_dereference_variable(); - - if (deref) { - deref->var->max_array_access = deref->type->length - 1; - } -} - static ir_rvalue * do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1) { @@ -1763,11 +1770,6 @@ process_array_type(YYLTYPE *loc, const glsl_type *base, ast_node *array_size, ir_rvalue *const ir = array_size->hir(& dummy_instructions, state); YYLTYPE loc = array_size->get_location(); - /* FINISHME: Verify that the grammar forbids side-effects in array - * FINISHME: sizes. i.e., 'vec4 [x = 12] data' - */ - assert(dummy_instructions.is_empty()); - if (ir != NULL) { if (!ir->type->is_integer()) { _mesa_glsl_error(& loc, state, "array size must be integer type"); @@ -1784,6 +1786,14 @@ process_array_type(YYLTYPE *loc, const glsl_type *base, ast_node *array_size, } else { assert(size->type == ir->type); length = size->value.u[0]; + + /* If the array size is const (and we've verified that + * it is) then no instructions should have been emitted + * when we converted it to HIR. If they were emitted, + * then either the array size isn't const after all, or + * we are emitting unnecessary instructions. + */ + assert(dummy_instructions.is_empty()); } } } @@ -2397,12 +2407,12 @@ ast_declarator_list::hir(exec_list *instructions, decl_type = this->type->specifier->glsl_type(& type_name, state); if (this->declarations.is_empty()) { - /* The only valid case where the declaration list can be empty is when - * the declaration is setting the default precision of a built-in type - * (e.g., 'precision highp vec4;'). - */ - if (decl_type != NULL) { + /* Warn if this empty declaration is not for declaring a structure. + */ + if (this->type->specifier->structure == NULL) { + _mesa_glsl_warning(&loc, state, "empty declaration"); + } } else { _mesa_glsl_error(& loc, state, "incomplete declaration"); } @@ -2924,23 +2934,16 @@ ast_parameter_declarator::parameters_to_hir(exec_list *ast_parameters, void -emit_function(_mesa_glsl_parse_state *state, exec_list *instructions, - ir_function *f) +emit_function(_mesa_glsl_parse_state *state, ir_function *f) { - /* Emit the new function header */ - if (state->current_function == NULL) { - instructions->push_tail(f); - } else { - /* IR invariants disallow function declarations or definitions nested - * within other function definitions. Insert the new ir_function - * block in the instruction sequence before the ir_function block - * containing the current ir_function_signature. - */ - ir_function *const curr = - const_cast<ir_function *>(state->current_function->function()); - - curr->insert_before(f); - } + /* IR invariants disallow function declarations or definitions + * nested within other function definitions. But there is no + * requirement about the relative order of function declarations + * and definitions with respect to one another. So simply insert + * the new ir_function block at the end of the toplevel instruction + * list. + */ + state->toplevel_ir->push_tail(f); } @@ -3067,7 +3070,7 @@ ast_function::hir(exec_list *instructions, return NULL; } - emit_function(state, instructions, f); + emit_function(state, f); } /* Verify the return type of main() */ diff --git a/src/glsl/builtins/ir/asin b/src/glsl/builtins/ir/asin index e230ad614ee..45d9e672958 100644 --- a/src/glsl/builtins/ir/asin +++ b/src/glsl/builtins/ir/asin @@ -5,23 +5,26 @@ ((return (expression float * (expression float sign (var_ref x)) (expression float - - (expression float * - (constant float (3.1415926)) - (constant float (0.5))) + (constant float (1.5707964)) (expression float * (expression float sqrt (expression float - (constant float (1.0)) (expression float abs (var_ref x)))) (expression float + - (constant float (1.5707288)) + (constant float (1.5707964)) (expression float * (expression float abs (var_ref x)) (expression float + - (constant float (-0.2121144)) + (constant float (-0.21460183)) (expression float * - (constant float (0.0742610)) - (expression float abs (var_ref x)))))))))))) + (expression float abs (var_ref x)) + (expression float + + (constant float (0.086566724)) + (expression float * + (expression float abs (var_ref x)) + (constant float (-0.03102955)) + )))))))))))) (signature vec2 (parameters @@ -29,23 +32,26 @@ ((return (expression vec2 * (expression vec2 sign (var_ref x)) (expression vec2 - - (expression float * - (constant float (3.1415926)) - (constant float (0.5))) + (constant float (1.5707964)) (expression vec2 * (expression vec2 sqrt (expression vec2 - (constant float (1.0)) (expression vec2 abs (var_ref x)))) (expression vec2 + - (constant float (1.5707288)) + (constant float (1.5707964)) (expression vec2 * (expression vec2 abs (var_ref x)) (expression vec2 + - (constant float (-0.2121144)) + (constant float (-0.21460183)) (expression vec2 * - (constant float (0.0742610)) - (expression vec2 abs (var_ref x)))))))))))) + (expression vec2 abs (var_ref x)) + (expression vec2 + + (constant float (0.086566724)) + (expression vec2 * + (expression vec2 abs (var_ref x)) + (constant float (-0.03102955)) + )))))))))))) (signature vec3 (parameters @@ -53,23 +59,26 @@ ((return (expression vec3 * (expression vec3 sign (var_ref x)) (expression vec3 - - (expression float * - (constant float (3.1415926)) - (constant float (0.5))) + (constant float (1.5707964)) (expression vec3 * (expression vec3 sqrt (expression vec3 - (constant float (1.0)) (expression vec3 abs (var_ref x)))) (expression vec3 + - (constant float (1.5707288)) + (constant float (1.5707964)) (expression vec3 * (expression vec3 abs (var_ref x)) (expression vec3 + - (constant float (-0.2121144)) + (constant float (-0.21460183)) (expression vec3 * - (constant float (0.0742610)) - (expression vec3 abs (var_ref x)))))))))))) + (expression vec3 abs (var_ref x)) + (expression vec3 + + (constant float (0.086566724)) + (expression vec3 * + (expression vec3 abs (var_ref x)) + (constant float (-0.03102955)) + )))))))))))) (signature vec4 (parameters @@ -77,21 +86,24 @@ ((return (expression vec4 * (expression vec4 sign (var_ref x)) (expression vec4 - - (expression float * - (constant float (3.1415926)) - (constant float (0.5))) + (constant float (1.5707964)) (expression vec4 * (expression vec4 sqrt (expression vec4 - (constant float (1.0)) (expression vec4 abs (var_ref x)))) (expression vec4 + - (constant float (1.5707288)) + (constant float (1.5707964)) (expression vec4 * (expression vec4 abs (var_ref x)) (expression vec4 + - (constant float (-0.2121144)) + (constant float (-0.21460183)) (expression vec4 * - (constant float (0.0742610)) - (expression vec4 abs (var_ref x)))))))))))) + (expression vec4 abs (var_ref x)) + (expression vec4 + + (constant float (0.086566724)) + (expression vec4 * + (expression vec4 abs (var_ref x)) + (constant float (-0.03102955)) + )))))))))))) )) diff --git a/src/glsl/builtins/ir/atan b/src/glsl/builtins/ir/atan index cfecc1f1749..7b5ea13c6ba 100644 --- a/src/glsl/builtins/ir/atan +++ b/src/glsl/builtins/ir/atan @@ -54,7 +54,9 @@ ) ( (declare () float r) - (if (expression bool > (expression float abs (var_ref x)) (constant float (0.000100))) ( + (if (expression bool > + (expression float abs (var_ref x)) + (expression float * (constant float (1.0e-8)) (expression float abs (var_ref y)))) ( (assign (x) (var_ref r) (call atan ((expression float / (var_ref y) (var_ref x))))) (if (expression bool < (var_ref x) (constant float (0.000000)) ) ( (if (expression bool >= (var_ref y) (constant float (0.000000)) ) diff --git a/src/glsl/builtins/ir/radians b/src/glsl/builtins/ir/radians index 6a0f5d2e219..a419101cf16 100644 --- a/src/glsl/builtins/ir/radians +++ b/src/glsl/builtins/ir/radians @@ -2,20 +2,20 @@ (signature float (parameters (declare (in) float arg0)) - ((return (expression float * (var_ref arg0) (constant float (0.017453)))))) + ((return (expression float * (var_ref arg0) (constant float (0.0174532925)))))) (signature vec2 (parameters (declare (in) vec2 arg0)) - ((return (expression vec2 * (var_ref arg0) (constant float (0.017453)))))) + ((return (expression vec2 * (var_ref arg0) (constant float (0.0174532925)))))) (signature vec3 (parameters (declare (in) vec3 arg0)) - ((return (expression vec3 * (var_ref arg0) (constant float (0.017453)))))) + ((return (expression vec3 * (var_ref arg0) (constant float (0.0174532925)))))) (signature vec4 (parameters (declare (in) vec4 arg0)) - ((return (expression vec4 * (var_ref arg0) (constant float (0.017453)))))) + ((return (expression vec4 * (var_ref arg0) (constant float (0.0174532925)))))) )) diff --git a/src/glsl/builtins/profiles/130.frag b/src/glsl/builtins/profiles/130.frag index 0e3c7ac4199..c121859f14c 100644 --- a/src/glsl/builtins/profiles/130.frag +++ b/src/glsl/builtins/profiles/130.frag @@ -465,7 +465,6 @@ bvec4 not(bvec4 x); * 8.7 - Texture Lookup Functions */ -#if 0 /* textureSize */ int textureSize( sampler1D sampler, int lod); int textureSize(isampler1D sampler, int lod); @@ -496,7 +495,6 @@ ivec3 textureSize(usampler2DArray sampler, int lod); ivec2 textureSize(sampler1DArrayShadow sampler, int lod); ivec3 textureSize(sampler2DArrayShadow sampler, int lod); -#endif /* texture - no bias */ vec4 texture( sampler1D sampler, float P); diff --git a/src/glsl/builtins/profiles/130.vert b/src/glsl/builtins/profiles/130.vert index f85b27f8f8c..ebd9a508851 100644 --- a/src/glsl/builtins/profiles/130.vert +++ b/src/glsl/builtins/profiles/130.vert @@ -467,7 +467,6 @@ bvec4 not(bvec4 x); * 8.7 - Texture Lookup Functions */ -#if 0 /* textureSize */ int textureSize( sampler1D sampler, int lod); int textureSize(isampler1D sampler, int lod); @@ -498,7 +497,6 @@ ivec3 textureSize(usampler2DArray sampler, int lod); ivec2 textureSize(sampler1DArrayShadow sampler, int lod); ivec3 textureSize(sampler2DArrayShadow sampler, int lod); -#endif /* texture - no bias */ vec4 texture( sampler1D sampler, float P); diff --git a/src/glsl/builtins/tools/texture_builtins.py b/src/glsl/builtins/tools/texture_builtins.py index a4054caac98..7e569bf562b 100755 --- a/src/glsl/builtins/tools/texture_builtins.py +++ b/src/glsl/builtins/tools/texture_builtins.py @@ -44,6 +44,11 @@ def get_extra_dim(sampler_type, use_proj, unused_fields): extra_dim += 1 return extra_dim +def get_txs_dim(sampler_type): + if sampler_type.startswith("Cube"): + return 2 + return get_coord_dim(sampler_type) + def generate_sigs(g, tex_inst, sampler_type, variant = 0, unused_fields = 0): coord_dim = get_coord_dim(sampler_type) extra_dim = get_extra_dim(sampler_type, variant & Proj, unused_fields) @@ -51,17 +56,20 @@ def generate_sigs(g, tex_inst, sampler_type, variant = 0, unused_fields = 0): if variant & Single: return_type = "float" + elif tex_inst == "txs": + return_type = vec_type("i", get_txs_dim(sampler_type)) else: return_type = g + "vec4" # Print parameters print " (signature", return_type print " (parameters" - print " (declare (in) " + g + "sampler" + sampler_type + " sampler)" - print " (declare (in) " + vec_type("i" if tex_inst == "txf" else "", coord_dim + extra_dim) + " P)", + print " (declare (in) " + g + "sampler" + sampler_type + " sampler)", + if tex_inst != "txs": + print "\n (declare (in) " + vec_type("i" if tex_inst == "txf" else "", coord_dim + extra_dim) + " P)", if tex_inst == "txl": print "\n (declare (in) float lod)", - elif tex_inst == "txf": + elif tex_inst == "txf" or tex_inst == "txs": print "\n (declare (in) int lod)", elif tex_inst == "txd": grad_type = vec_type("", coord_dim) @@ -75,18 +83,19 @@ def generate_sigs(g, tex_inst, sampler_type, variant = 0, unused_fields = 0): print ")\n ((return (" + tex_inst, return_type, "(var_ref sampler)", - # Coordinate - if extra_dim > 0: - print "(swiz " + "xyzw"[:coord_dim] + " (var_ref P))", - else: - print "(var_ref P)", + if tex_inst != "txs": + # Coordinate + if extra_dim > 0: + print "(swiz " + "xyzw"[:coord_dim] + " (var_ref P))", + else: + print "(var_ref P)", - if variant & Offset: - print "(var_ref offset)", - else: - print "0", + if variant & Offset: + print "(var_ref offset)", + else: + print "0", - if tex_inst != "txf": + if tex_inst != "txf" and tex_inst != "txs": # Projective divisor if variant & Proj: print "(swiz " + "xyzw"[coord_dim + extra_dim-1] + " (var_ref P))", @@ -104,7 +113,7 @@ def generate_sigs(g, tex_inst, sampler_type, variant = 0, unused_fields = 0): # Bias/explicit LOD/gradient: if tex_inst == "txb": print "(var_ref bias)", - elif tex_inst == "txl" or tex_inst == "txf": + elif tex_inst == "txl" or tex_inst == "txf" or tex_inst == "txs": print "(var_ref lod)", elif tex_inst == "txd": print "((var_ref dPdx) (var_ref dPdy))", @@ -130,6 +139,19 @@ def end_function(fs, name): # # Takes a dictionary as an argument. def generate_texture_functions(fs): + start_function("textureSize") + generate_fiu_sigs("txs", "1D") + generate_fiu_sigs("txs", "2D") + generate_fiu_sigs("txs", "3D") + generate_fiu_sigs("txs", "Cube") + generate_fiu_sigs("txs", "1DArray") + generate_fiu_sigs("txs", "2DArray") + generate_sigs("", "txs", "1DShadow") + generate_sigs("", "txs", "2DShadow") + generate_sigs("", "txs", "1DArrayShadow") + generate_sigs("", "txs", "2DArrayShadow") + end_function(fs, "textureSize") + start_function("texture") generate_fiu_sigs("tex", "1D") generate_fiu_sigs("tex", "2D") diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y index 0a35e88cec7..940830416c0 100644 --- a/src/glsl/glcpp/glcpp-parse.y +++ b/src/glsl/glcpp/glcpp-parse.y @@ -1132,8 +1132,10 @@ glcpp_parser_create (const struct gl_extensions *extensions, int api) if (extensions->ARB_shader_texture_lod) add_builtin_define(parser, "GL_ARB_shader_texture_lod", 1); - if (extensions->AMD_conservative_depth) + if (extensions->AMD_conservative_depth) { add_builtin_define(parser, "GL_AMD_conservative_depth", 1); + add_builtin_define(parser, "GL_ARB_conservative_depth", 1); + } } language_version = 110; diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy index 2c0498ece7a..25d02fb1eaf 100644 --- a/src/glsl/glsl_parser.yy +++ b/src/glsl/glsl_parser.yy @@ -971,13 +971,9 @@ single_declaration: fully_specified_type { void *ctx = state; - if ($1->specifier->type_specifier != ast_struct) { - _mesa_glsl_error(& @1, state, "empty declaration list\n"); - YYERROR; - } else { - $$ = new(ctx) ast_declarator_list($1); - $$->set_location(yylloc); - } + /* Empty declaration list is valid. */ + $$ = new(ctx) ast_declarator_list($1); + $$->set_location(yylloc); } | fully_specified_type any_identifier { @@ -1115,7 +1111,7 @@ layout_qualifier_id: } } - /* Layout qualifiers for AMD_conservative_depth. */ + /* Layout qualifiers for AMD/ARB_conservative_depth. */ if (!got_one && state->AMD_conservative_depth_enable) { if (strcmp($1, "depth_any") == 0) { got_one = true; @@ -1133,7 +1129,7 @@ layout_qualifier_id: if (got_one && state->AMD_conservative_depth_warn) { _mesa_glsl_warning(& @1, state, - "GL_AMD_conservative_depth " + "GL_ARB_conservative_depth " "layout qualifier `%s' is used\n", $1); } } diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index cc781378d76..8f740e6a8e9 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -253,6 +253,7 @@ struct _mesa_glsl_extension { static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { /* target availability API availability */ /* name VS GS FS GL ES supported flag */ + EXT(ARB_conservative_depth, true, false, true, true, false, AMD_conservative_depth), EXT(ARB_draw_buffers, false, false, true, true, false, dummy_true), EXT(ARB_draw_instanced, true, false, false, true, false, ARB_draw_instanced), EXT(ARB_explicit_attrib_location, true, false, true, true, false, ARB_explicit_attrib_location), diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h index 2f4d3cba77f..dc6911d1c9a 100644 --- a/src/glsl/glsl_parser_extras.h +++ b/src/glsl/glsl_parser_extras.h @@ -129,6 +129,12 @@ struct _mesa_glsl_parse_state { */ class ir_function_signature *current_function; + /** + * During AST to IR conversion, pointer to the toplevel IR + * instruction list being generated. + */ + exec_list *toplevel_ir; + /** Have we found a return statement in this function? */ bool found_return; @@ -174,6 +180,8 @@ struct _mesa_glsl_parse_state { bool ARB_shader_stencil_export_warn; bool AMD_conservative_depth_enable; bool AMD_conservative_depth_warn; + bool ARB_conservative_depth_enable; + bool ARB_conservative_depth_warn; bool AMD_shader_stencil_export_enable; bool AMD_shader_stencil_export_warn; bool OES_texture_3D_enable; diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp index a5e21bbb96c..c94aec0d2da 100644 --- a/src/glsl/glsl_types.cpp +++ b/src/glsl/glsl_types.cpp @@ -523,3 +523,19 @@ glsl_type::component_slots() const return 0; } } + +bool +glsl_type::can_implicitly_convert_to(const glsl_type *desired) const +{ + if (this == desired) + return true; + + /* There is no conversion among matrix types. */ + if (this->matrix_columns > 1 || desired->matrix_columns > 1) + return false; + + /* int and uint can be converted to float. */ + return desired->is_float() + && this->is_integer() + && this->vector_elements == desired->vector_elements; +} diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h index 87f57e7c756..048696693be 100644 --- a/src/glsl/glsl_types.h +++ b/src/glsl/glsl_types.h @@ -224,6 +224,41 @@ struct glsl_type { */ unsigned component_slots() const; + /** + * \brief Can this type be implicitly converted to another? + * + * \return True if the types are identical or if this type can be converted + * to \c desired according to Section 4.1.10 of the GLSL spec. + * + * \verbatim + * From page 25 (31 of the pdf) of the GLSL 1.50 spec, Section 4.1.10 + * Implicit Conversions: + * + * In some situations, an expression and its type will be implicitly + * converted to a different type. The following table shows all allowed + * implicit conversions: + * + * Type of expression | Can be implicitly converted to + * -------------------------------------------------- + * int float + * uint + * + * ivec2 vec2 + * uvec2 + * + * ivec3 vec3 + * uvec3 + * + * ivec4 vec4 + * uvec4 + * + * There are no implicit array or structure conversions. For example, + * an array of int cannot be implicitly converted to an array of float. + * There are no implicit conversions between signed and unsigned + * integers. + * \endverbatim + */ + bool can_implicitly_convert_to(const glsl_type *desired) const; /** * Query whether or not a type is a scalar (non-vector and non-matrix). diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp index 827fe8e17a7..41ed4f114ca 100644 --- a/src/glsl/ir.cpp +++ b/src/glsl/ir.cpp @@ -1096,7 +1096,7 @@ ir_dereference_record::ir_dereference_record(ir_variable *var, } bool -ir_dereference::is_lvalue() +ir_dereference::is_lvalue() const { ir_variable *var = this->variable_referenced(); @@ -1121,7 +1121,7 @@ ir_dereference::is_lvalue() } -const char *tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf" }; +const char *tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txs" }; const char *ir_texture::opcode_string() { @@ -1150,11 +1150,15 @@ ir_texture::set_sampler(ir_dereference *sampler, const glsl_type *type) this->sampler = sampler; this->type = type; - assert(sampler->type->sampler_type == (int) type->base_type); - if (sampler->type->sampler_shadow) - assert(type->vector_elements == 4 || type->vector_elements == 1); - else - assert(type->vector_elements == 4); + if (this->op == ir_txs) { + assert(type->base_type == GLSL_TYPE_INT); + } else { + assert(sampler->type->sampler_type == (int) type->base_type); + if (sampler->type->sampler_shadow) + assert(type->vector_elements == 4 || type->vector_elements == 1); + else + assert(type->vector_elements == 4); + } } @@ -1310,7 +1314,7 @@ ir_swizzle::create(ir_rvalue *val, const char *str, unsigned vector_length) #undef I ir_variable * -ir_swizzle::variable_referenced() +ir_swizzle::variable_referenced() const { return this->val->variable_referenced(); } diff --git a/src/glsl/ir.h b/src/glsl/ir.h index 80ad3dd295e..2e899f3ed6f 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -144,7 +144,7 @@ public: ir_rvalue *as_rvalue_to_saturate(); - virtual bool is_lvalue() + virtual bool is_lvalue() const { return false; } @@ -152,7 +152,7 @@ public: /** * Get the variable that is ultimately referenced by an r-value */ - virtual ir_variable *variable_referenced() + virtual ir_variable *variable_referenced() const { return NULL; } @@ -236,7 +236,7 @@ enum ir_variable_interpolation { /** * \brief Layout qualifiers for gl_FragDepth. * - * The AMD_conservative_depth extension allows gl_FragDepth to be redeclared + * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared * with a layout qualifier. */ enum ir_depth_layout { @@ -1212,7 +1212,8 @@ enum ir_texture_opcode { ir_txb, /**< Texture look-up with LOD bias */ ir_txl, /**< Texture look-up with explicit LOD */ ir_txd, /**< Texture look-up with partial derivatvies */ - ir_txf /**< Texel fetch with explicit LOD */ + ir_txf, /**< Texel fetch with explicit LOD */ + ir_txs /**< Texture size */ }; @@ -1233,6 +1234,7 @@ enum ir_texture_opcode { * (txl <type> <sampler> <coordinate> 0 1 ( ) <lod>) * (txd <type> <sampler> <coordinate> 0 1 ( ) (dPdx dPdy)) * (txf <type> <sampler> <coordinate> 0 <lod>) + * (txs <type> <sampler> <lod>) */ class ir_texture : public ir_rvalue { public: @@ -1355,7 +1357,7 @@ public: virtual ir_visitor_status accept(ir_hierarchical_visitor *); - bool is_lvalue() + bool is_lvalue() const { return val->is_lvalue() && !mask.has_duplicates; } @@ -1363,7 +1365,7 @@ public: /** * Get the variable that is ultimately referenced by an r-value */ - virtual ir_variable *variable_referenced(); + virtual ir_variable *variable_referenced() const; ir_rvalue *val; ir_swizzle_mask mask; @@ -1387,12 +1389,12 @@ public: return this; } - bool is_lvalue(); + bool is_lvalue() const; /** * Get the variable that is ultimately referenced by an r-value */ - virtual ir_variable *variable_referenced() = 0; + virtual ir_variable *variable_referenced() const = 0; }; @@ -1413,7 +1415,7 @@ public: /** * Get the variable that is ultimately referenced by an r-value */ - virtual ir_variable *variable_referenced() + virtual ir_variable *variable_referenced() const { return this->var; } @@ -1462,7 +1464,7 @@ public: /** * Get the variable that is ultimately referenced by an r-value */ - virtual ir_variable *variable_referenced() + virtual ir_variable *variable_referenced() const { return this->array->variable_referenced(); } @@ -1496,7 +1498,7 @@ public: /** * Get the variable that is ultimately referenced by an r-value */ - virtual ir_variable *variable_referenced() + virtual ir_variable *variable_referenced() const { return this->record->variable_referenced(); } @@ -1635,6 +1637,32 @@ visit_exec_list(exec_list *list, ir_visitor *visitor); */ void validate_ir_tree(exec_list *instructions); +struct _mesa_glsl_parse_state; +struct gl_shader_program; + +/** + * Detect whether an unlinked shader contains static recursion + * + * If the list of instructions is determined to contain static recursion, + * \c _mesa_glsl_error will be called to emit error messages for each function + * that is in the recursion cycle. + */ +void +detect_recursion_unlinked(struct _mesa_glsl_parse_state *state, + exec_list *instructions); + +/** + * Detect whether a linked shader contains static recursion + * + * If the list of instructions is determined to contain static recursion, + * \c link_error_printf will be called to emit error messages for each function + * that is in the recursion cycle. In addition, + * \c gl_shader_program::LinkStatus will be set to false. + */ +void +detect_recursion_linked(struct gl_shader_program *prog, + exec_list *instructions); + /** * Make a clone of each IR instruction in a list * @@ -1669,4 +1697,8 @@ ir_has_call(ir_instruction *ir); extern void do_set_program_inouts(exec_list *instructions, struct gl_program *prog); +extern char * +prototype_string(const glsl_type *return_type, const char *name, + exec_list *parameters); + #endif /* IR_H */ diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp index 069bb85e8de..f0757365dd2 100644 --- a/src/glsl/ir_clone.cpp +++ b/src/glsl/ir_clone.cpp @@ -222,7 +222,8 @@ ir_texture::clone(void *mem_ctx, struct hash_table *ht) const new_tex->type = this->type; new_tex->sampler = this->sampler->clone(mem_ctx, ht); - new_tex->coordinate = this->coordinate->clone(mem_ctx, ht); + if (this->coordinate) + new_tex->coordinate = this->coordinate->clone(mem_ctx, ht); if (this->projector) new_tex->projector = this->projector->clone(mem_ctx, ht); if (this->shadow_comparitor) { @@ -240,6 +241,7 @@ ir_texture::clone(void *mem_ctx, struct hash_table *ht) const break; case ir_txl: case ir_txf: + case ir_txs: new_tex->lod_info.lod = this->lod_info.lod->clone(mem_ctx, ht); break; case ir_txd: diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp index 0f2f1a0eea4..51d32b46f98 100644 --- a/src/glsl/ir_function.cpp +++ b/src/glsl/ir_function.cpp @@ -24,73 +24,28 @@ #include "glsl_types.h" #include "ir.h" -int -type_compare(const glsl_type *a, const glsl_type *b) -{ - /* If the types are the same, they trivially match. - */ - if (a == b) - return 0; - - switch (a->base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_BOOL: - /* There is no implicit conversion to or from integer types or bool. - */ - if ((a->is_integer() != b->is_integer()) - || (a->is_boolean() != b->is_boolean())) - return -1; - - /* FALLTHROUGH */ - - case GLSL_TYPE_FLOAT: - if ((a->vector_elements != b->vector_elements) - || (a->matrix_columns != b->matrix_columns)) - return -1; - - return 1; - - case GLSL_TYPE_SAMPLER: - case GLSL_TYPE_STRUCT: - /* Samplers and structures must match exactly. - */ - return -1; - - case GLSL_TYPE_ARRAY: - if ((b->base_type != GLSL_TYPE_ARRAY) - || (a->length != b->length)) - return -1; - - /* From GLSL 1.50 spec, page 27 (page 33 of the PDF): - * "There are no implicit array or structure conversions." - * - * If the comparison of the array element types detects that a conversion - * would be required, the array types do not match. - */ - return (type_compare(a->fields.array, b->fields.array) == 0) ? 0 : -1; - - case GLSL_TYPE_VOID: - case GLSL_TYPE_ERROR: - default: - /* These are all error conditions. It is invalid for a parameter to - * a function to be declared as error, void, or a function. - */ - return -1; - } - - /* This point should be unreachable. - */ - assert(0); -} - - -static int +typedef enum { + PARAMETER_LIST_NO_MATCH, + PARAMETER_LIST_EXACT_MATCH, + PARAMETER_LIST_INEXACT_MATCH /*< Match requires implicit conversion. */ +} parameter_list_match_t; + +/** + * \brief Check if two parameter lists match. + * + * \param list_a Parameters of the function definition. + * \param list_b Actual parameters passed to the function. + * \see matching_signature() + */ +static parameter_list_match_t parameter_lists_match(const exec_list *list_a, const exec_list *list_b) { const exec_node *node_a = list_a->head; const exec_node *node_b = list_b->head; - int total_score = 0; + + /* This is set to true if there is an inexact match requiring an implicit + * conversion. */ + bool inexact_match = false; for (/* empty */ ; !node_a->is_tail_sentinel() @@ -100,18 +55,17 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b) * do not match. */ if (node_b->is_tail_sentinel()) - return -1; + return PARAMETER_LIST_NO_MATCH; const ir_variable *const param = (ir_variable *) node_a; const ir_instruction *const actual = (ir_instruction *) node_b; - /* Determine whether or not the types match. If the types are an - * exact match, the match score is zero. If the types don't match - * but the actual parameter can be coerced to the type of the declared - * parameter, the match score is one. - */ - int score; + if (param->type == actual->type) + continue; + + /* Try to find an implicit conversion from actual to param. */ + inexact_match = true; switch ((enum ir_variable_mode)(param->mode)) { case ir_var_auto: case ir_var_uniform: @@ -121,15 +75,17 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b) * as uniform. */ assert(0); - return -1; + return PARAMETER_LIST_NO_MATCH; case ir_var_const_in: case ir_var_in: - score = type_compare(param->type, actual->type); + if (!actual->type->can_implicitly_convert_to(param->type)) + return PARAMETER_LIST_NO_MATCH; break; case ir_var_out: - score = type_compare(actual->type, param->type); + if (!param->type->can_implicitly_convert_to(actual->type)) + return PARAMETER_LIST_NO_MATCH; break; case ir_var_inout: @@ -137,17 +93,12 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b) * there is int -> float but no float -> int), inout parameters must * be exact matches. */ - score = (type_compare(actual->type, param->type) == 0) ? 0 : -1; - break; + return PARAMETER_LIST_NO_MATCH; default: assert(false); + return PARAMETER_LIST_NO_MATCH; } - - if (score < 0) - return -1; - - total_score += score; } /* If all of the parameters from the other parameter list have been @@ -155,9 +106,12 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b) * match. */ if (!node_b->is_tail_sentinel()) - return -1; + return PARAMETER_LIST_NO_MATCH; - return total_score; + if (inexact_match) + return PARAMETER_LIST_INEXACT_MATCH; + else + return PARAMETER_LIST_EXACT_MATCH; } @@ -181,18 +135,20 @@ ir_function::matching_signature(const exec_list *actual_parameters) ir_function_signature *const sig = (ir_function_signature *) iter.get(); - const int score = parameter_lists_match(& sig->parameters, - actual_parameters); - - /* If we found an exact match, simply return it */ - if (score == 0) + switch (parameter_lists_match(& sig->parameters, actual_parameters)) { + case PARAMETER_LIST_EXACT_MATCH: return sig; - - if (score > 0) { + case PARAMETER_LIST_INEXACT_MATCH: if (match == NULL) match = sig; else multiple_inexact_matches = true; + continue; + case PARAMETER_LIST_NO_MATCH: + continue; + default: + assert(false); + return NULL; } } diff --git a/src/glsl/ir_function_detect_recursion.cpp b/src/glsl/ir_function_detect_recursion.cpp new file mode 100644 index 00000000000..8f805bf1ba9 --- /dev/null +++ b/src/glsl/ir_function_detect_recursion.cpp @@ -0,0 +1,370 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_function_detect_recursion.cpp + * Determine whether a shader contains static recursion. + * + * Consider the (possibly disjoint) graph of function calls in a shader. If a + * program contains recursion, this graph will contain a cycle. If a function + * is part of a cycle, it will have a caller and it will have a callee (it + * calls another function). + * + * To detect recursion, the function call graph is constructed. The graph is + * repeatedly reduced by removing any function that either has no callees + * (leaf functions) or has no caller. Eventually the only functions that + * remain will be the functions in the cycles. + * + * The GLSL spec is a bit wishy-washy about recursion. + * + * From page 39 (page 45 of the PDF) of the GLSL 1.10 spec: + * + * "Behavior is undefined if recursion is used. Recursion means having any + * function appearing more than once at any one time in the run-time stack + * of function calls. That is, a function may not call itself either + * directly or indirectly. Compilers may give diagnostic messages when + * this is detectable at compile time, but not all such cases can be + * detected at compile time." + * + * From page 79 (page 85 of the PDF): + * + * "22) Should recursion be supported? + * + * DISCUSSION: Probably not necessary, but another example of limiting + * the language based on how it would directly map to hardware. One + * thought is that recursion would benefit ray tracing shaders. On the + * other hand, many recursion operations can also be implemented with the + * user managing the recursion through arrays. RenderMan doesn't support + * recursion. This could be added at a later date, if it proved to be + * necessary. + * + * RESOLVED on September 10, 2002: Implementations are not required to + * support recursion. + * + * CLOSED on September 10, 2002." + * + * From page 79 (page 85 of the PDF): + * + * "56) Is it an error for an implementation to support recursion if the + * specification says recursion is not supported? + * + * ADDED on September 10, 2002. + * + * DISCUSSION: This issues is related to Issue (22). If we say that + * recursion (or some other piece of functionality) is not supported, is + * it an error for an implementation to support it? Perhaps the + * specification should remain silent on these kind of things so that they + * could be gracefully added later as an extension or as part of the + * standard. + * + * RESOLUTION: Languages, in general, have programs that are not + * well-formed in ways a compiler cannot detect. Portability is only + * ensured for well-formed programs. Detecting recursion is an example of + * this. The language will say a well-formed program may not recurse, but + * compilers are not forced to detect that recursion may happen. + * + * CLOSED: November 29, 2002." + * + * In GLSL 1.10 the behavior of recursion is undefined. Compilers don't have + * to reject shaders (at compile-time or link-time) that contain recursion. + * Instead they could work, or crash, or kill a kitten. + * + * From page 44 (page 50 of the PDF) of the GLSL 1.20 spec: + * + * "Recursion is not allowed, not even statically. Static recursion is + * present if the static function call graph of the program contains + * cycles." + * + * This langauge clears things up a bit, but it still leaves a lot of + * questions unanswered. + * + * - Is the error generated at compile-time or link-time? + * + * - Is it an error to have a recursive function that is never statically + * called by main or any function called directly or indirectly by main? + * Technically speaking, such a function is not in the "static function + * call graph of the program" at all. + * + * \bug + * If a shader has multiple cycles, this algorithm may erroneously complain + * about functions that aren't in any cycle, but are in the part of the call + * tree that connects them. For example, if the call graph consists of a + * cycle between A and B, and a cycle between D and E, and B also calls C + * which calls D, then this algorithm will report C as a function which "has + * static recursion" even though it is not part of any cycle. + * + * A better algorithm for cycle detection that doesn't have this drawback can + * be found here: + * + * http://en.wikipedia.org/wiki/Tarjan%E2%80%99s_strongly_connected_components_algorithm + * + * \author Ian Romanick <[email protected]> + */ +#include "main/core.h" +#include "ir.h" +#include "glsl_parser_extras.h" +#include "linker.h" +#include "program/hash_table.h" +#include "program.h" + +struct call_node : public exec_node { + class function *func; +}; + +class function { +public: + function(ir_function_signature *sig) + : sig(sig) + { + /* empty */ + } + + + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = ralloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + /* If the user *does* call delete, that's OK, we will just + * ralloc_free in that case. */ + static void operator delete(void *node) + { + ralloc_free(node); + } + + ir_function_signature *sig; + + /** List of functions called by this function. */ + exec_list callees; + + /** List of functions that call this function. */ + exec_list callers; +}; + +class has_recursion_visitor : public ir_hierarchical_visitor { +public: + has_recursion_visitor() + : current(NULL) + { + this->mem_ctx = ralloc_context(NULL); + this->function_hash = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); + } + + ~has_recursion_visitor() + { + hash_table_dtor(this->function_hash); + ralloc_free(this->mem_ctx); + } + + function *get_function(ir_function_signature *sig) + { + function *f = (function *) hash_table_find(this->function_hash, sig); + if (f == NULL) { + f = new(mem_ctx) function(sig); + hash_table_insert(this->function_hash, f, sig); + } + + return f; + } + + virtual ir_visitor_status visit_enter(ir_function_signature *sig) + { + this->current = this->get_function(sig); + return visit_continue; + } + + virtual ir_visitor_status visit_leave(ir_function_signature *sig) + { + (void) sig; + this->current = NULL; + return visit_continue; + } + + virtual ir_visitor_status visit_enter(ir_call *call) + { + /* At global scope this->current will be NULL. Since there is no way to + * call global scope, it can never be part of a cycle. Don't bother + * adding calls from global scope to the graph. + */ + if (this->current == NULL) + return visit_continue; + + function *const target = this->get_function(call->get_callee()); + + /* Create a link from the caller to the callee. + */ + call_node *node = new(mem_ctx) call_node; + node->func = target; + this->current->callees.push_tail(node); + + /* Create a link from the callee to the caller. + */ + node = new(mem_ctx) call_node; + node->func = this->current; + target->callers.push_tail(node); + return visit_continue; + } + + function *current; + struct hash_table *function_hash; + void *mem_ctx; + bool progress; +}; + +static void +destroy_links(exec_list *list, function *f) +{ + foreach_list_safe(node, list) { + struct call_node *n = (struct call_node *) node; + + /* If this is the right function, remove it. Note that the loop cannot + * terminate now. There can be multiple links to a function if it is + * either called multiple times or calls multiple times. + */ + if (n->func == f) + n->remove(); + } +} + + +/** + * Remove a function if it has either no in or no out links + */ +static void +remove_unlinked_functions(const void *key, void *data, void *closure) +{ + has_recursion_visitor *visitor = (has_recursion_visitor *) closure; + function *f = (function *) data; + + if (f->callers.is_empty() || f->callees.is_empty()) { + while (!f->callers.is_empty()) { + struct call_node *n = (struct call_node *) f->callers.pop_head(); + destroy_links(& n->func->callees, f); + } + + while (!f->callees.is_empty()) { + struct call_node *n = (struct call_node *) f->callees.pop_head(); + destroy_links(& n->func->callers, f); + } + + hash_table_remove(visitor->function_hash, key); + visitor->progress = true; + } +} + + +static void +emit_errors_unlinked(const void *key, void *data, void *closure) +{ + struct _mesa_glsl_parse_state *state = + (struct _mesa_glsl_parse_state *) closure; + function *f = (function *) data; + YYLTYPE loc; + + char *proto = prototype_string(f->sig->return_type, + f->sig->function_name(), + &f->sig->parameters); + + memset(&loc, 0, sizeof(loc)); + _mesa_glsl_error(&loc, state, + "function `%s' has static recursion.", + proto); + ralloc_free(proto); +} + + +static void +emit_errors_linked(const void *key, void *data, void *closure) +{ + struct gl_shader_program *prog = + (struct gl_shader_program *) closure; + function *f = (function *) data; + + char *proto = prototype_string(f->sig->return_type, + f->sig->function_name(), + &f->sig->parameters); + + linker_error(prog, "function `%s' has static recursion.\n", proto); + ralloc_free(proto); + prog->LinkStatus = false; +} + + +void +detect_recursion_unlinked(struct _mesa_glsl_parse_state *state, + exec_list *instructions) +{ + has_recursion_visitor v; + + /* Collect all of the information about which functions call which other + * functions. + */ + v.run(instructions); + + /* Remove from the set all of the functions that either have no caller or + * call no other functions. Repeat until no functions are removed. + */ + do { + v.progress = false; + hash_table_call_foreach(v.function_hash, remove_unlinked_functions, & v); + } while (v.progress); + + + /* At this point any functions still in the hash must be part of a cycle. + */ + hash_table_call_foreach(v.function_hash, emit_errors_unlinked, state); +} + + +void +detect_recursion_linked(struct gl_shader_program *prog, + exec_list *instructions) +{ + has_recursion_visitor v; + + /* Collect all of the information about which functions call which other + * functions. + */ + v.run(instructions); + + /* Remove from the set all of the functions that either have no caller or + * call no other functions. Repeat until no functions are removed. + */ + do { + v.progress = false; + hash_table_call_foreach(v.function_hash, remove_unlinked_functions, & v); + } while (v.progress); + + + /* At this point any functions still in the hash must be part of a cycle. + */ + hash_table_call_foreach(v.function_hash, emit_errors_linked, prog); +} diff --git a/src/glsl/ir_hv_accept.cpp b/src/glsl/ir_hv_accept.cpp index 4a607dc8749..d33fc85bf0a 100644 --- a/src/glsl/ir_hv_accept.cpp +++ b/src/glsl/ir_hv_accept.cpp @@ -171,9 +171,11 @@ ir_texture::accept(ir_hierarchical_visitor *v) if (s != visit_continue) return (s == visit_continue_with_parent) ? visit_continue : s; - s = this->coordinate->accept(v); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; + if (this->coordinate) { + s = this->coordinate->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + } if (this->projector) { s = this->projector->accept(v); @@ -203,6 +205,7 @@ ir_texture::accept(ir_hierarchical_visitor *v) break; case ir_txl: case ir_txf: + case ir_txs: s = this->lod_info.lod->accept(v); if (s != visit_continue) return (s == visit_continue_with_parent) ? visit_continue : s; diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 59a040751d9..f7808bdda9a 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -69,3 +69,7 @@ bool lower_variable_index_to_cond_assign(exec_list *instructions, bool lower_input, bool lower_output, bool lower_temp, bool lower_uniform); bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz); bool optimize_redundant_jumps(exec_list *instructions); + +ir_rvalue * +compare_index_block(exec_list *instructions, ir_variable *index, + unsigned base, unsigned components, void *mem_ctx); diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp index 518910bd129..ea7858224bb 100644 --- a/src/glsl/ir_print_visitor.cpp +++ b/src/glsl/ir_print_visitor.cpp @@ -244,19 +244,21 @@ void ir_print_visitor::visit(ir_texture *ir) ir->sampler->accept(this); printf(" "); - ir->coordinate->accept(this); + if (ir->op != ir_txs) { + ir->coordinate->accept(this); - printf(" "); + printf(" "); - if (ir->offset != NULL) { - ir->offset->accept(this); - } else { - printf("0"); - } + if (ir->offset != NULL) { + ir->offset->accept(this); + } else { + printf("0"); + } - printf(" "); + printf(" "); + } - if (ir->op != ir_txf) { + if (ir->op != ir_txf && ir->op != ir_txs) { if (ir->projector) ir->projector->accept(this); else @@ -280,6 +282,7 @@ void ir_print_visitor::visit(ir_texture *ir) break; case ir_txl: case ir_txf: + case ir_txs: ir->lod_info.lod->accept(this); break; case ir_txd: diff --git a/src/glsl/ir_reader.cpp b/src/glsl/ir_reader.cpp index f3a621734ba..22009eebcb9 100644 --- a/src/glsl/ir_reader.cpp +++ b/src/glsl/ir_reader.cpp @@ -885,6 +885,8 @@ ir_reader::read_texture(s_expression *expr) { "tex", s_type, s_sampler, s_coord, s_offset, s_proj, s_shadow }; s_pattern txf_pattern[] = { "txf", s_type, s_sampler, s_coord, s_offset, s_lod }; + s_pattern txs_pattern[] = + { "txs", s_type, s_sampler, s_lod }; s_pattern other_pattern[] = { tag, s_type, s_sampler, s_coord, s_offset, s_proj, s_shadow, s_lod }; @@ -892,6 +894,8 @@ ir_reader::read_texture(s_expression *expr) op = ir_tex; } else if (MATCH(expr, txf_pattern)) { op = ir_txf; + } else if (MATCH(expr, txs_pattern)) { + op = ir_txs; } else if (MATCH(expr, other_pattern)) { op = ir_texture::get_opcode(tag->value()); if (op == -1) @@ -920,25 +924,27 @@ ir_reader::read_texture(s_expression *expr) } tex->set_sampler(sampler, type); - // Read coordinate (any rvalue) - tex->coordinate = read_rvalue(s_coord); - if (tex->coordinate == NULL) { - ir_read_error(NULL, "when reading coordinate in (%s ...)", - tex->opcode_string()); - return NULL; - } - - // Read texel offset - either 0 or an rvalue. - s_int *si_offset = SX_AS_INT(s_offset); - if (si_offset == NULL || si_offset->value() != 0) { - tex->offset = read_rvalue(s_offset); - if (tex->offset == NULL) { - ir_read_error(s_offset, "expected 0 or an expression"); + if (op != ir_txs) { + // Read coordinate (any rvalue) + tex->coordinate = read_rvalue(s_coord); + if (tex->coordinate == NULL) { + ir_read_error(NULL, "when reading coordinate in (%s ...)", + tex->opcode_string()); return NULL; } + + // Read texel offset - either 0 or an rvalue. + s_int *si_offset = SX_AS_INT(s_offset); + if (si_offset == NULL || si_offset->value() != 0) { + tex->offset = read_rvalue(s_offset); + if (tex->offset == NULL) { + ir_read_error(s_offset, "expected 0 or an expression"); + return NULL; + } + } } - if (op != ir_txf) { + if (op != ir_txf && op != ir_txs) { s_int *proj_as_int = SX_AS_INT(s_proj); if (proj_as_int && proj_as_int->value() == 1) { tex->projector = NULL; @@ -973,6 +979,7 @@ ir_reader::read_texture(s_expression *expr) break; case ir_txl: case ir_txf: + case ir_txs: tex->lod_info.lod = read_rvalue(s_lod); if (tex->lod_info.lod == NULL) { ir_read_error(NULL, "when reading LOD in (%s ...)", diff --git a/src/glsl/ir_rvalue_visitor.cpp b/src/glsl/ir_rvalue_visitor.cpp index ed6c7cb6a1a..193bcd2d7bd 100644 --- a/src/glsl/ir_rvalue_visitor.cpp +++ b/src/glsl/ir_rvalue_visitor.cpp @@ -63,6 +63,7 @@ ir_rvalue_visitor::visit_leave(ir_texture *ir) break; case ir_txf: case ir_txl: + case ir_txs: handle_rvalue(&ir->lod_info.lod); break; case ir_txd: diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp index f3fceb2a57d..2d1c6097c57 100644 --- a/src/glsl/ir_validate.cpp +++ b/src/glsl/ir_validate.cpp @@ -59,7 +59,8 @@ public: virtual ir_visitor_status visit(ir_variable *v); virtual ir_visitor_status visit(ir_dereference_variable *ir); - virtual ir_visitor_status visit(ir_if *ir); + + virtual ir_visitor_status visit_enter(ir_if *ir); virtual ir_visitor_status visit_leave(ir_loop *ir); virtual ir_visitor_status visit_enter(ir_function *ir); @@ -102,7 +103,7 @@ ir_validate::visit(ir_dereference_variable *ir) } ir_visitor_status -ir_validate::visit(ir_if *ir) +ir_validate::visit_enter(ir_if *ir) { if (ir->condition->type != glsl_type::bool_type) { printf("ir_if condition %s type instead of bool.\n", @@ -541,7 +542,43 @@ ir_validate::visit_enter(ir_call *ir) abort(); } + const exec_node *formal_param_node = callee->parameters.head; + const exec_node *actual_param_node = ir->actual_parameters.head; + while (true) { + if (formal_param_node->is_tail_sentinel() + != actual_param_node->is_tail_sentinel()) { + printf("ir_call has the wrong number of parameters:\n"); + goto dump_ir; + } + if (formal_param_node->is_tail_sentinel()) { + break; + } + const ir_variable *formal_param + = (const ir_variable *) formal_param_node; + const ir_rvalue *actual_param + = (const ir_rvalue *) actual_param_node; + if (formal_param->type != actual_param->type) { + printf("ir_call parameter type mismatch:\n"); + goto dump_ir; + } + if (formal_param->mode == ir_var_out + || formal_param->mode == ir_var_inout) { + if (!actual_param->is_lvalue()) { + printf("ir_call out/inout parameters must be lvalues:\n"); + goto dump_ir; + } + } + formal_param_node = formal_param_node->next; + actual_param_node = actual_param_node->next; + } + return visit_continue; + +dump_ir: + ir->print(); + printf("callee:\n"); + callee->print(); + abort(); } void diff --git a/src/glsl/link_functions.cpp b/src/glsl/link_functions.cpp index 7ba760daa1a..acee3271249 100644 --- a/src/glsl/link_functions.cpp +++ b/src/glsl/link_functions.cpp @@ -91,8 +91,8 @@ public: if (sig == NULL) { /* FINISHME: Log the full signature of unresolved function. */ - linker_error_printf(this->prog, "unresolved reference to function " - "`%s'\n", name); + linker_error(this->prog, "unresolved reference to function `%s'\n", + name); this->success = false; return visit_stop; } @@ -104,10 +104,12 @@ public: if (f == NULL) { f = new(linked) ir_function(name); - /* Add the new function to the linked IR. + /* Add the new function to the linked IR. Put it at the end + * so that it comes after any global variable declarations + * that it refers to. */ linked->symbols->add_function(f); - linked->ir->push_head(f); + linked->ir->push_tail(f); } ir_function_signature *linked_sig = diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 34b64837a46..ba81c59ff2c 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -164,7 +164,7 @@ private: void -linker_error_printf(gl_shader_program *prog, const char *fmt, ...) +linker_error(gl_shader_program *prog, const char *fmt, ...) { va_list ap; @@ -172,6 +172,21 @@ linker_error_printf(gl_shader_program *prog, const char *fmt, ...) va_start(ap, fmt); ralloc_vasprintf_append(&prog->InfoLog, fmt, ap); va_end(ap); + + prog->LinkStatus = false; +} + + +void +linker_warning(gl_shader_program *prog, const char *fmt, ...) +{ + va_list ap; + + ralloc_strcat(&prog->InfoLog, "error: "); + va_start(ap, fmt); + ralloc_vasprintf_append(&prog->InfoLog, fmt, ap); + va_end(ap); + } @@ -243,8 +258,7 @@ validate_vertex_shader_executable(struct gl_shader_program *prog, find_assignment_visitor find("gl_Position"); find.run(shader->ir); if (!find.variable_found()) { - linker_error_printf(prog, - "vertex shader does not write to `gl_Position'\n"); + linker_error(prog, "vertex shader does not write to `gl_Position'\n"); return false; } @@ -271,8 +285,8 @@ validate_fragment_shader_executable(struct gl_shader_program *prog, frag_data.run(shader->ir); if (frag_color.variable_found() && frag_data.variable_found()) { - linker_error_printf(prog, "fragment shader writes to both " - "`gl_FragColor' and `gl_FragData'\n"); + linker_error(prog, "fragment shader writes to both " + "`gl_FragColor' and `gl_FragData'\n"); return false; } @@ -357,11 +371,11 @@ cross_validate_globals(struct gl_shader_program *prog, existing->type = var->type; } } else { - linker_error_printf(prog, "%s `%s' declared as type " - "`%s' and type `%s'\n", - mode_string(var), - var->name, var->type->name, - existing->type->name); + linker_error(prog, "%s `%s' declared as type " + "`%s' and type `%s'\n", + mode_string(var), + var->name, var->type->name, + existing->type->name); return false; } } @@ -369,9 +383,9 @@ cross_validate_globals(struct gl_shader_program *prog, if (var->explicit_location) { if (existing->explicit_location && (var->location != existing->location)) { - linker_error_printf(prog, "explicit locations for %s " - "`%s' have differing values\n", - mode_string(var), var->name); + linker_error(prog, "explicit locations for %s " + "`%s' have differing values\n", + mode_string(var), var->name); return false; } @@ -381,7 +395,7 @@ cross_validate_globals(struct gl_shader_program *prog, /* Validate layout qualifiers for gl_FragDepth. * - * From the AMD_conservative_depth spec: + * From the AMD/ARB_conservative_depth specs: * "If gl_FragDepth is redeclared in any fragment shader in * a program, it must be redeclared in all fragment shaders in that * program that have static assignments to gl_FragDepth. All @@ -392,12 +406,12 @@ cross_validate_globals(struct gl_shader_program *prog, bool layout_declared = var->depth_layout != ir_depth_layout_none; bool layout_differs = var->depth_layout != existing->depth_layout; if (layout_declared && layout_differs) { - linker_error_printf(prog, + linker_error(prog, "All redeclarations of gl_FragDepth in all fragment shaders " "in a single program must have the same set of qualifiers."); } if (var->used && layout_differs) { - linker_error_printf(prog, + linker_error(prog, "If gl_FragDepth is redeclared with a layout qualifier in" "any fragment shader, it must be redeclared with the same" "layout qualifier in all fragment shaders that have" @@ -410,9 +424,9 @@ cross_validate_globals(struct gl_shader_program *prog, if (var->constant_value != NULL) { if (existing->constant_value != NULL) { if (!var->constant_value->has_value(existing->constant_value)) { - linker_error_printf(prog, "initializers for %s " - "`%s' have differing values\n", - mode_string(var), var->name); + linker_error(prog, "initializers for %s " + "`%s' have differing values\n", + mode_string(var), var->name); return false; } } else @@ -433,15 +447,15 @@ cross_validate_globals(struct gl_shader_program *prog, } if (existing->invariant != var->invariant) { - linker_error_printf(prog, "declarations for %s `%s' have " - "mismatching invariant qualifiers\n", - mode_string(var), var->name); + linker_error(prog, "declarations for %s `%s' have " + "mismatching invariant qualifiers\n", + mode_string(var), var->name); return false; } if (existing->centroid != var->centroid) { - linker_error_printf(prog, "declarations for %s `%s' have " - "mismatching centroid qualifiers\n", - mode_string(var), var->name); + linker_error(prog, "declarations for %s `%s' have " + "mismatching centroid qualifiers\n", + mode_string(var), var->name); return false; } } else @@ -529,13 +543,12 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog, */ if (!output->type->is_array() || (strncmp("gl_", output->name, 3) != 0)) { - linker_error_printf(prog, - "%s shader output `%s' declared as " - "type `%s', but %s shader input declared " - "as type `%s'\n", - producer_stage, output->name, - output->type->name, - consumer_stage, input->type->name); + linker_error(prog, + "%s shader output `%s' declared as type `%s', " + "but %s shader input declared as type `%s'\n", + producer_stage, output->name, + output->type->name, + consumer_stage, input->type->name); return false; } } @@ -543,40 +556,40 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog, /* Check that all of the qualifiers match between stages. */ if (input->centroid != output->centroid) { - linker_error_printf(prog, - "%s shader output `%s' %s centroid qualifier, " - "but %s shader input %s centroid qualifier\n", - producer_stage, - output->name, - (output->centroid) ? "has" : "lacks", - consumer_stage, - (input->centroid) ? "has" : "lacks"); + linker_error(prog, + "%s shader output `%s' %s centroid qualifier, " + "but %s shader input %s centroid qualifier\n", + producer_stage, + output->name, + (output->centroid) ? "has" : "lacks", + consumer_stage, + (input->centroid) ? "has" : "lacks"); return false; } if (input->invariant != output->invariant) { - linker_error_printf(prog, - "%s shader output `%s' %s invariant qualifier, " - "but %s shader input %s invariant qualifier\n", - producer_stage, - output->name, - (output->invariant) ? "has" : "lacks", - consumer_stage, - (input->invariant) ? "has" : "lacks"); + linker_error(prog, + "%s shader output `%s' %s invariant qualifier, " + "but %s shader input %s invariant qualifier\n", + producer_stage, + output->name, + (output->invariant) ? "has" : "lacks", + consumer_stage, + (input->invariant) ? "has" : "lacks"); return false; } if (input->interpolation != output->interpolation) { - linker_error_printf(prog, - "%s shader output `%s' specifies %s " - "interpolation qualifier, " - "but %s shader input specifies %s " - "interpolation qualifier\n", - producer_stage, - output->name, - output->interpolation_string(), - consumer_stage, - input->interpolation_string()); + linker_error(prog, + "%s shader output `%s' specifies %s " + "interpolation qualifier, " + "but %s shader input specifies %s " + "interpolation qualifier\n", + producer_stage, + output->name, + output->interpolation_string(), + consumer_stage, + input->interpolation_string()); return false; } } @@ -823,9 +836,8 @@ link_intrastage_shaders(void *mem_ctx, if ((other_sig != NULL) && other_sig->is_defined && !other_sig->is_builtin) { - linker_error_printf(prog, - "function `%s' is multiply defined", - f->name); + linker_error(prog, "function `%s' is multiply defined", + f->name); return NULL; } } @@ -849,9 +861,9 @@ link_intrastage_shaders(void *mem_ctx, } if (main == NULL) { - linker_error_printf(prog, "%s shader lacks `main'\n", - (shader_list[0]->Type == GL_VERTEX_SHADER) - ? "vertex" : "fragment"); + linker_error(prog, "%s shader lacks `main'\n", + (shader_list[0]->Type == GL_VERTEX_SHADER) + ? "vertex" : "fragment"); return NULL; } @@ -910,6 +922,14 @@ link_intrastage_shaders(void *mem_ctx, free(linking_shaders); +#ifdef DEBUG + /* At this point linked should contain all of the linked IR, so + * validate it to make sure nothing went wrong. + */ + if (linked) + validate_ir_tree(linked->ir); +#endif + /* Make a pass over all variable declarations to ensure that arrays with * unspecified sizes have a size specified. The size is inferred from the * max_array_access field. @@ -1309,10 +1329,10 @@ assign_attribute_or_color_locations(gl_shader_program *prog, * attribute overlaps any previously allocated bits. */ if ((~(use_mask << attr) & used_locations) != used_locations) { - linker_error_printf(prog, - "insufficient contiguous attribute locations " - "available for vertex shader input `%s'", - var->name); + linker_error(prog, + "insufficient contiguous attribute locations " + "available for vertex shader input `%s'", + var->name); return false; } @@ -1343,7 +1363,7 @@ assign_attribute_or_color_locations(gl_shader_program *prog, foreach_list(node, sh->ir) { ir_variable *const var = ((ir_instruction *) node)->as_variable(); - if ((var == NULL) || (var->mode != direction)) + if ((var == NULL) || (var->mode != (unsigned) direction)) continue; if (var->explicit_location) { @@ -1353,11 +1373,10 @@ assign_attribute_or_color_locations(gl_shader_program *prog, if ((var->location >= (int)(max_index + generic_base)) || (var->location < 0)) { - linker_error_printf(prog, - "invalid explicit location %d specified for " - "`%s'\n", - (var->location < 0) ? var->location : attr, - var->name); + linker_error(prog, + "invalid explicit location %d specified for `%s'\n", + (var->location < 0) ? var->location : attr, + var->name); return false; } else if (var->location >= generic_base) { used_locations |= (use_mask << attr); @@ -1406,10 +1425,10 @@ assign_attribute_or_color_locations(gl_shader_program *prog, const char *const string = (target_index == MESA_SHADER_VERTEX) ? "vertex shader input" : "fragment shader output"; - linker_error_printf(prog, - "insufficient contiguous attribute locations " - "available for %s `%s'", - string, to_assign[i].var->name); + linker_error(prog, + "insufficient contiguous attribute locations " + "available for %s `%s'", + string, to_assign[i].var->name); return false; } @@ -1525,9 +1544,8 @@ assign_varying_locations(struct gl_context *ctx, * "glsl1-varying read but not written" in piglit. */ - linker_error_printf(prog, "fragment shader varying %s not written " - "by vertex shader\n.", var->name); - prog->LinkStatus = false; + linker_error(prog, "fragment shader varying %s not written " + "by vertex shader\n.", var->name); } /* An 'in' variable is only really a shader input if its @@ -1544,17 +1562,17 @@ assign_varying_locations(struct gl_context *ctx, if (ctx->API == API_OPENGLES2 || prog->Version == 100) { if (varying_vectors > ctx->Const.MaxVarying) { - linker_error_printf(prog, "shader uses too many varying vectors " - "(%u > %u)\n", - varying_vectors, ctx->Const.MaxVarying); + linker_error(prog, "shader uses too many varying vectors " + "(%u > %u)\n", + varying_vectors, ctx->Const.MaxVarying); return false; } } else { const unsigned float_components = varying_vectors * 4; if (float_components > ctx->Const.MaxVarying * 4) { - linker_error_printf(prog, "shader uses too many varying components " - "(%u > %u)\n", - float_components, ctx->Const.MaxVarying * 4); + linker_error(prog, "shader uses too many varying components " + "(%u > %u)\n", + float_components, ctx->Const.MaxVarying * 4); return false; } } @@ -1618,8 +1636,8 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) assert(max_version <= 130); if ((max_version >= 130 || min_version == 100) && min_version != max_version) { - linker_error_printf(prog, "all shaders must use same shading " - "language version\n"); + linker_error(prog, "all shaders must use same shading " + "language version\n"); goto done; } @@ -1702,6 +1720,10 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) if (prog->_LinkedShaders[i] == NULL) continue; + detect_recursion_linked(prog, prog->_LinkedShaders[i]->ir); + if (!prog->LinkStatus) + goto done; + while (do_common_optimization(prog->_LinkedShaders[i]->ir, true, 32)) ; } @@ -1716,12 +1738,10 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) * FINISHME: at least 16, so hardcode 16 for now. */ if (!assign_attribute_or_color_locations(prog, MESA_SHADER_VERTEX, 16)) { - prog->LinkStatus = false; goto done; } if (!assign_attribute_or_color_locations(prog, MESA_SHADER_FRAGMENT, ctx->Const.MaxDrawBuffers)) { - prog->LinkStatus = false; goto done; } @@ -1738,7 +1758,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) if (!assign_varying_locations(ctx, prog, prog->_LinkedShaders[prev], prog->_LinkedShaders[i])) { - prog->LinkStatus = false; goto done; } @@ -1770,11 +1789,9 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) */ if (ctx->API == API_OPENGLES2 || prog->Version == 100) { if (prog->_LinkedShaders[MESA_SHADER_VERTEX] == NULL) { - linker_error_printf(prog, "program lacks a vertex shader\n"); - prog->LinkStatus = false; + linker_error(prog, "program lacks a vertex shader\n"); } else if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) { - linker_error_printf(prog, "program lacks a fragment shader\n"); - prog->LinkStatus = false; + linker_error(prog, "program lacks a fragment shader\n"); } } diff --git a/src/glsl/linker.h b/src/glsl/linker.h index a8ce16a7ec1..769cf68b6ad 100644 --- a/src/glsl/linker.h +++ b/src/glsl/linker.h @@ -25,9 +25,6 @@ #ifndef GLSL_LINKER_H #define GLSL_LINKER_H -extern void -linker_error_printf(gl_shader_program *prog, const char *fmt, ...); - extern bool link_function_calls(gl_shader_program *prog, gl_shader *main, gl_shader **shader_list, unsigned num_shaders); diff --git a/src/glsl/lower_if_to_cond_assign.cpp b/src/glsl/lower_if_to_cond_assign.cpp index b637eb4fe1d..7b89a1539ce 100644 --- a/src/glsl/lower_if_to_cond_assign.cpp +++ b/src/glsl/lower_if_to_cond_assign.cpp @@ -47,6 +47,7 @@ #include "glsl_types.h" #include "ir.h" +#include "program/hash_table.h" class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor { public: @@ -55,6 +56,14 @@ public: this->progress = false; this->max_depth = max_depth; this->depth = 0; + + this->condition_variables = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); + } + + ~ir_if_to_cond_assign_visitor() + { + hash_table_dtor(this->condition_variables); } ir_visitor_status visit_enter(ir_if *); @@ -63,6 +72,8 @@ public: bool progress; unsigned max_depth; unsigned depth; + + struct hash_table *condition_variables; }; bool @@ -94,40 +105,43 @@ check_control_flow(ir_instruction *ir, void *data) void move_block_to_cond_assign(void *mem_ctx, - ir_if *if_ir, ir_variable *cond_var, bool then) + ir_if *if_ir, ir_rvalue *cond_expr, + exec_list *instructions, + struct hash_table *ht) { - exec_list *instructions; - - if (then) { - instructions = &if_ir->then_instructions; - } else { - instructions = &if_ir->else_instructions; - } - - foreach_iter(exec_list_iterator, iter, *instructions) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list_safe(node, instructions) { + ir_instruction *ir = (ir_instruction *) node; if (ir->ir_type == ir_type_assignment) { ir_assignment *assign = (ir_assignment *)ir; - ir_rvalue *cond_expr; - ir_dereference *deref = new(mem_ctx) ir_dereference_variable(cond_var); - - if (then) { - cond_expr = deref; - } else { - cond_expr = new(mem_ctx) ir_expression(ir_unop_logic_not, - glsl_type::bool_type, - deref, - NULL); - } - if (!assign->condition) { - assign->condition = cond_expr; - } else { - assign->condition = new(mem_ctx) ir_expression(ir_binop_logic_and, - glsl_type::bool_type, - cond_expr, - assign->condition); + if (hash_table_find(ht, assign) == NULL) { + hash_table_insert(ht, assign, assign); + + /* If the LHS of the assignment is a condition variable that was + * previously added, insert an additional assignment of false to + * the variable. + */ + const bool assign_to_cv = + hash_table_find(ht, assign->lhs->variable_referenced()) != NULL; + + if (!assign->condition) { + if (assign_to_cv) { + assign->rhs = + new(mem_ctx) ir_expression(ir_binop_logic_and, + glsl_type::bool_type, + cond_expr->clone(mem_ctx, NULL), + assign->rhs); + } else { + assign->condition = cond_expr->clone(mem_ctx, NULL); + } + } else { + assign->condition = + new(mem_ctx) ir_expression(ir_binop_logic_and, + glsl_type::bool_type, + cond_expr->clone(mem_ctx, NULL), + assign->condition); + } } } @@ -142,6 +156,7 @@ ir_if_to_cond_assign_visitor::visit_enter(ir_if *ir) { (void) ir; this->depth++; + return visit_continue; } @@ -153,9 +168,7 @@ ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir) return visit_continue; bool found_control_flow = false; - ir_variable *cond_var; ir_assignment *assign; - ir_dereference_variable *deref; /* Check that both blocks don't contain anything we can't support. */ foreach_iter(exec_list_iterator, then_iter, ir->then_instructions) { @@ -171,24 +184,62 @@ ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir) void *mem_ctx = ralloc_parent(ir); - /* Store the condition to a variable so the assignment conditions are - * simpler. + /* Store the condition to a variable. Move all of the instructions from + * the then-clause of the if-statement. Use the condition variable as a + * condition for all assignments. */ - cond_var = new(mem_ctx) ir_variable(glsl_type::bool_type, - "if_to_cond_assign_condition", - ir_var_temporary); - ir->insert_before(cond_var); - - deref = new(mem_ctx) ir_dereference_variable(cond_var); - assign = new(mem_ctx) ir_assignment(deref, - ir->condition, NULL); + ir_variable *const then_var = + new(mem_ctx) ir_variable(glsl_type::bool_type, + "if_to_cond_assign_then", + ir_var_temporary); + ir->insert_before(then_var); + + ir_dereference_variable *then_cond = + new(mem_ctx) ir_dereference_variable(then_var); + + assign = new(mem_ctx) ir_assignment(then_cond, ir->condition); ir->insert_before(assign); - /* Now, move all of the instructions out of the if blocks, putting - * conditions on assignments. + move_block_to_cond_assign(mem_ctx, ir, then_cond, + &ir->then_instructions, + this->condition_variables); + + /* Add the new condition variable to the hash table. This allows us to + * find this variable when lowering other (enclosing) if-statements. + */ + hash_table_insert(this->condition_variables, then_var, then_var); + + /* If there are instructions in the else-clause, store the inverse of the + * condition to a variable. Move all of the instructions from the + * else-clause if the if-statement. Use the (inverse) condition variable + * as a condition for all assignments. */ - move_block_to_cond_assign(mem_ctx, ir, cond_var, true); - move_block_to_cond_assign(mem_ctx, ir, cond_var, false); + if (!ir->else_instructions.is_empty()) { + ir_variable *const else_var = + new(mem_ctx) ir_variable(glsl_type::bool_type, + "if_to_cond_assign_else", + ir_var_temporary); + ir->insert_before(else_var); + + ir_dereference_variable *else_cond = + new(mem_ctx) ir_dereference_variable(else_var); + + ir_rvalue *inverse = + new(mem_ctx) ir_expression(ir_unop_logic_not, + then_cond->clone(mem_ctx, NULL)); + + assign = new(mem_ctx) ir_assignment(else_cond, inverse); + ir->insert_before(assign); + + move_block_to_cond_assign(mem_ctx, ir, else_cond, + &ir->else_instructions, + this->condition_variables); + + /* Add the new condition variable to the hash table. This allows us to + * find this variable when lowering other (enclosing) if-statements. + */ + hash_table_insert(this->condition_variables, else_var, else_var); + } ir->remove(); diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp index 806f8639959..23aa19bde6f 100644 --- a/src/glsl/lower_instructions.cpp +++ b/src/glsl/lower_instructions.cpp @@ -166,6 +166,10 @@ lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir) else op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL); + vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, + ir->type->vector_elements, + ir->type->matrix_columns); + op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1); if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) { diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp index 8eb1612f0a0..f8e4a1de428 100644 --- a/src/glsl/lower_variable_index_to_cond_assign.cpp +++ b/src/glsl/lower_variable_index_to_cond_assign.cpp @@ -29,6 +29,21 @@ * * Pre-DX10 GPUs often don't have a native way to do this operation, * and this works around that. + * + * The lowering process proceeds as follows. Each non-constant index + * found in an r-value is converted to a canonical form \c array[i]. Each + * element of the array is conditionally assigned to a temporary by comparing + * \c i to a constant index. This is done by cloning the canonical form and + * replacing all occurances of \c i with a constant. Each remaining occurance + * of the canonical form in the IR is replaced with a dereference of the + * temporary variable. + * + * L-values with non-constant indices are handled similarly. In this case, + * the RHS of the assignment is assigned to a temporary. The non-constant + * index is replace with the canonical form (just like for r-values). The + * temporary is conditionally assigned to each element of the canonical form + * by comparing \c i with each index. The same clone-and-replace scheme is + * used. */ #include "ir.h" @@ -37,10 +52,140 @@ #include "glsl_types.h" #include "main/macros.h" +/** + * Generate a comparison value for a block of indices + * + * Lowering passes for non-constant indexing of arrays, matrices, or vectors + * can use this to generate blocks of index comparison values. + * + * \param instructions List where new instructions will be appended + * \param index \c ir_variable containing the desired index + * \param base Base value for this block of comparisons + * \param components Number of unique index values to compare. This must + * be on the range [1, 4]. + * \param mem_ctx ralloc memory context to be used for all allocations. + * + * \returns + * An \c ir_rvalue that \b must be cloned for each use in conditional + * assignments, etc. + */ +ir_rvalue * +compare_index_block(exec_list *instructions, ir_variable *index, + unsigned base, unsigned components, void *mem_ctx) +{ + ir_rvalue *broadcast_index = new(mem_ctx) ir_dereference_variable(index); + + assert(index->type->is_scalar()); + assert(index->type->base_type == GLSL_TYPE_INT); + assert(components >= 1 && components <= 4); + + if (components > 1) { + const ir_swizzle_mask m = { 0, 0, 0, 0, components, false }; + broadcast_index = new(mem_ctx) ir_swizzle(broadcast_index, m); + } + + /* Compare the desired index value with the next block of four indices. + */ + ir_constant_data test_indices_data; + memset(&test_indices_data, 0, sizeof(test_indices_data)); + test_indices_data.i[0] = base; + test_indices_data.i[1] = base + 1; + test_indices_data.i[2] = base + 2; + test_indices_data.i[3] = base + 3; + + ir_constant *const test_indices = + new(mem_ctx) ir_constant(broadcast_index->type, + &test_indices_data); + + ir_rvalue *const condition_val = + new(mem_ctx) ir_expression(ir_binop_equal, + &glsl_type::bool_type[components - 1], + broadcast_index, + test_indices); + + ir_variable *const condition = + new(mem_ctx) ir_variable(condition_val->type, + "dereference_condition", + ir_var_temporary); + instructions->push_tail(condition); + + ir_rvalue *const cond_deref = + new(mem_ctx) ir_dereference_variable(condition); + instructions->push_tail(new(mem_ctx) ir_assignment(cond_deref, condition_val, 0)); + + return cond_deref; +} + +static inline bool +is_array_or_matrix(const ir_instruction *ir) +{ + return (ir->type->is_array() || ir->type->is_matrix()); +} + +/** + * Replace a dereference of a variable with a specified r-value + * + * Each time a dereference of the specified value is replaced, the r-value + * tree is cloned. + */ +class deref_replacer : public ir_rvalue_visitor { +public: + deref_replacer(const ir_variable *variable_to_replace, ir_rvalue *value) + : variable_to_replace(variable_to_replace), value(value), + progress(false) + { + assert(this->variable_to_replace != NULL); + assert(this->value != NULL); + } + + virtual void handle_rvalue(ir_rvalue **rvalue) + { + ir_dereference_variable *const dv = (*rvalue)->as_dereference_variable(); + + if ((dv != NULL) && (dv->var == this->variable_to_replace)) { + this->progress = true; + *rvalue = this->value->clone(ralloc_parent(*rvalue), NULL); + } + } + + const ir_variable *variable_to_replace; + ir_rvalue *value; + bool progress; +}; + +/** + * Find a variable index dereference of an array in an rvalue tree + */ +class find_variable_index : public ir_hierarchical_visitor { +public: + find_variable_index() + : deref(NULL) + { + /* empty */ + } + + virtual ir_visitor_status visit_enter(ir_dereference_array *ir) + { + if (is_array_or_matrix(ir->array) + && (ir->array_index->as_constant() == NULL)) { + this->deref = ir; + return visit_stop; + } + + return visit_continue; + } + + /** + * First array dereference found in the tree that has a non-constant index. + */ + ir_dereference_array *deref; +}; + struct assignment_generator { ir_instruction* base_ir; - ir_rvalue* array; + ir_dereference *rvalue; + ir_variable *old_index; bool is_write; unsigned int write_mask; ir_variable* var; @@ -55,18 +200,23 @@ struct assignment_generator * underlying variable. */ void *mem_ctx = ralloc_parent(base_ir); - ir_dereference *element = - new(mem_ctx) ir_dereference_array(this->array->clone(mem_ctx, NULL), - new(mem_ctx) ir_constant(i)); - ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var); - ir_assignment *assignment; - if (is_write) { - assignment = new(mem_ctx) ir_assignment(element, variable, condition, - write_mask); - } else { - assignment = new(mem_ctx) ir_assignment(variable, element, condition); - } + /* Clone the old r-value in its entirety. Then replace any occurances of + * the old variable index with the new constant index. + */ + ir_dereference *element = this->rvalue->clone(mem_ctx, NULL); + ir_constant *const index = new(mem_ctx) ir_constant(i); + deref_replacer r(this->old_index, index); + element->accept(&r); + assert(r.progress); + + /* Generate a conditional assignment to (or from) the constant indexed + * array dereference. + */ + ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var); + ir_assignment *const assignment = (is_write) + ? new(mem_ctx) ir_assignment(element, variable, condition, write_mask) + : new(mem_ctx) ir_assignment(variable, element, condition); list->push_tail(assignment); } @@ -118,54 +268,17 @@ struct switch_generator for (unsigned i = first; i < end; i += 4) { const unsigned comps = MIN2(condition_components, end - i); - ir_rvalue *broadcast_index = - new(this->mem_ctx) ir_dereference_variable(index); - - if (comps) { - const ir_swizzle_mask m = { 0, 0, 0, 0, comps, false }; - broadcast_index = new(this->mem_ctx) ir_swizzle(broadcast_index, m); - } - - /* Compare the desired index value with the next block of four indices. - */ - ir_constant_data test_indices_data; - memset(&test_indices_data, 0, sizeof(test_indices_data)); - test_indices_data.i[0] = i; - test_indices_data.i[1] = i + 1; - test_indices_data.i[2] = i + 2; - test_indices_data.i[3] = i + 3; - ir_constant *const test_indices = - new(this->mem_ctx) ir_constant(broadcast_index->type, - &test_indices_data); - - ir_rvalue *const condition_val = - new(this->mem_ctx) ir_expression(ir_binop_equal, - &glsl_type::bool_type[comps - 1], - broadcast_index, - test_indices); - - ir_variable *const condition = - new(this->mem_ctx) ir_variable(condition_val->type, - "dereference_array_condition", - ir_var_temporary); - list->push_tail(condition); - ir_rvalue *const cond_deref = - new(this->mem_ctx) ir_dereference_variable(condition); - list->push_tail(new(this->mem_ctx) ir_assignment(cond_deref, - condition_val, 0)); + compare_index_block(list, index, i, comps, this->mem_ctx); if (comps == 1) { - ir_rvalue *const cond_deref = - new(this->mem_ctx) ir_dereference_variable(condition); - - this->generator.generate(i, cond_deref, list); + this->generator.generate(i, cond_deref->clone(this->mem_ctx, NULL), + list); } else { for (unsigned j = 0; j < comps; j++) { - ir_rvalue *const cond_deref = - new(this->mem_ctx) ir_dereference_variable(condition); ir_rvalue *const cond_swiz = - new(this->mem_ctx) ir_swizzle(cond_deref, j, 0, 0, 0, 1); + new(this->mem_ctx) ir_swizzle(cond_deref->clone(this->mem_ctx, NULL), + j, 0, 0, 0, 1); this->generator.generate(i + j, cond_swiz, list); } @@ -233,21 +346,18 @@ public: bool lower_temps; bool lower_uniforms; - bool is_array_or_matrix(const ir_instruction *ir) const - { - return (ir->type->is_array() || ir->type->is_matrix()); - } - - bool needs_lowering(ir_dereference_array *deref) const + bool storage_type_needs_lowering(ir_dereference_array *deref) const { - if (deref == NULL || deref->array_index->as_constant() - || !is_array_or_matrix(deref->array)) - return false; - - if (deref->array->ir_type == ir_type_constant) + /* If a variable isn't eventually the target of this dereference, then + * it must be a constant or some sort of anonymous temporary storage. + * + * FINISHME: Is this correct? Most drivers treat arrays of constants as + * FINISHME: uniforms. It seems like this should do the same. + */ + const ir_variable *const var = deref->array->variable_referenced(); + if (var == NULL) return this->lower_temps; - const ir_variable *const var = deref->array->variable_referenced(); switch (var->mode) { case ir_var_auto: case ir_var_temporary: @@ -267,8 +377,18 @@ public: return false; } + bool needs_lowering(ir_dereference_array *deref) const + { + if (deref == NULL || deref->array_index->as_constant() + || !is_array_or_matrix(deref->array)) + return false; + + return this->storage_type_needs_lowering(deref); + } + ir_variable *convert_dereference_array(ir_dereference_array *orig_deref, - ir_assignment* orig_assign) + ir_assignment* orig_assign, + ir_dereference *orig_base) { assert(is_array_or_matrix(orig_deref->array)); @@ -314,9 +434,12 @@ public: new(mem_ctx) ir_assignment(lhs, orig_deref->array_index, NULL); base_ir->insert_before(assign); + orig_deref->array_index = lhs->clone(mem_ctx, NULL); + assignment_generator ag; - ag.array = orig_deref->array; + ag.rvalue = orig_base; ag.base_ir = base_ir; + ag.old_index = index; ag.var = var; if (orig_assign) { ag.is_write = true; @@ -327,21 +450,40 @@ public: switch_generator sg(ag, index, 4, 4); - exec_list list; - sg.generate(0, length, &list); - base_ir->insert_before(&list); + /* If the original assignment has a condition, respect that original + * condition! This is acomplished by wrapping the new conditional + * assignments in an if-statement that uses the original condition. + */ + if ((orig_assign != NULL) && (orig_assign->condition != NULL)) { + /* No need to clone the condition because the IR that it hangs on is + * going to be removed from the instruction sequence. + */ + ir_if *if_stmt = new(mem_ctx) ir_if(orig_assign->condition); + + sg.generate(0, length, &if_stmt->then_instructions); + base_ir->insert_before(if_stmt); + } else { + exec_list list; + + sg.generate(0, length, &list); + base_ir->insert_before(&list); + } return var; } virtual void handle_rvalue(ir_rvalue **pir) { + if (this->in_assignee) + return; + if (!*pir) return; ir_dereference_array* orig_deref = (*pir)->as_dereference_array(); if (needs_lowering(orig_deref)) { - ir_variable* var = convert_dereference_array(orig_deref, 0); + ir_variable *var = + convert_dereference_array(orig_deref, NULL, orig_deref); assert(var); *pir = new(ralloc_parent(base_ir)) ir_dereference_variable(var); this->progress = true; @@ -353,10 +495,11 @@ public: { ir_rvalue_visitor::visit_leave(ir); - ir_dereference_array *orig_deref = ir->lhs->as_dereference_array(); + find_variable_index f; + ir->lhs->accept(&f); - if (needs_lowering(orig_deref)) { - convert_dereference_array(orig_deref, ir); + if ((f.deref != NULL) && storage_type_needs_lowering(f.deref)) { + convert_dereference_array(f.deref, ir, ir->lhs); ir->remove(); this->progress = true; } @@ -377,7 +520,17 @@ lower_variable_index_to_cond_assign(exec_list *instructions, lower_temp, lower_uniform); - visit_list_elements(&v, instructions); - - return v.progress; + /* Continue lowering until no progress is made. If there are multiple + * levels of indirection (e.g., non-constant indexing of array elements and + * matrix columns of an array of matrix), each pass will only lower one + * level of indirection. + */ + bool progress_ever = false; + do { + v.progress = false; + visit_list_elements(&v, instructions); + progress_ever = v.progress || progress_ever; + } while (v.progress); + + return progress_ever; } diff --git a/src/glsl/lower_vec_index_to_cond_assign.cpp b/src/glsl/lower_vec_index_to_cond_assign.cpp index 3c4d93201d2..fce9c3424a1 100644 --- a/src/glsl/lower_vec_index_to_cond_assign.cpp +++ b/src/glsl/lower_vec_index_to_cond_assign.cpp @@ -71,8 +71,6 @@ ir_vec_index_to_cond_assign_visitor::convert_vec_index_to_cond_assign(ir_rvalue ir_assignment *assign; ir_variable *index, *var; ir_dereference *deref; - ir_expression *condition; - ir_swizzle *swizzle; int i; if (!orig_deref) @@ -86,39 +84,52 @@ ir_vec_index_to_cond_assign_visitor::convert_vec_index_to_cond_assign(ir_rvalue assert(orig_deref->array_index->type->base_type == GLSL_TYPE_INT); + exec_list list; + /* Store the index to a temporary to avoid reusing its tree. */ index = new(base_ir) ir_variable(glsl_type::int_type, "vec_index_tmp_i", ir_var_temporary); - base_ir->insert_before(index); + list.push_tail(index); deref = new(base_ir) ir_dereference_variable(index); assign = new(base_ir) ir_assignment(deref, orig_deref->array_index, NULL); - base_ir->insert_before(assign); + list.push_tail(assign); /* Temporary where we store whichever value we swizzle out. */ var = new(base_ir) ir_variable(ir->type, "vec_index_tmp_v", ir_var_temporary); - base_ir->insert_before(var); + list.push_tail(var); + + /* Generate a single comparison condition "mask" for all of the components + * in the vector. + */ + ir_rvalue *const cond_deref = + compare_index_block(&list, index, 0, + orig_deref->array->type->vector_elements, + mem_ctx); /* Generate a conditional move of each vector element to the temp. */ for (i = 0; i < orig_deref->array->type->vector_elements; i++) { - deref = new(base_ir) ir_dereference_variable(index); - condition = new(base_ir) ir_expression(ir_binop_equal, - glsl_type::bool_type, - deref, - new(base_ir) ir_constant(i)); + ir_rvalue *condition_swizzle = + new(base_ir) ir_swizzle(cond_deref->clone(ir, NULL), i, 0, 0, 0, 1); /* Just clone the rest of the deref chain when trying to get at the * underlying variable. */ - swizzle = new(base_ir) ir_swizzle(orig_deref->array->clone(mem_ctx, NULL), - i, 0, 0, 0, 1); + ir_rvalue *swizzle = + new(base_ir) ir_swizzle(orig_deref->array->clone(mem_ctx, NULL), + i, 0, 0, 0, 1); deref = new(base_ir) ir_dereference_variable(var); - assign = new(base_ir) ir_assignment(deref, swizzle, condition); - base_ir->insert_before(assign); + assign = new(base_ir) ir_assignment(deref, swizzle, condition_swizzle); + list.push_tail(assign); } + /* Put all of the new instructions in the IR stream before the old + * instruction. + */ + base_ir->insert_before(&list); + this->progress = true; return new(base_ir) ir_dereference_variable(var); } @@ -171,42 +182,66 @@ ir_vec_index_to_cond_assign_visitor::visit_leave(ir_assignment *ir) assert(orig_deref->array_index->type->base_type == GLSL_TYPE_INT); + exec_list list; + /* Store the index to a temporary to avoid reusing its tree. */ index = new(ir) ir_variable(glsl_type::int_type, "vec_index_tmp_i", ir_var_temporary); - ir->insert_before(index); + list.push_tail(index); deref = new(ir) ir_dereference_variable(index); assign = new(ir) ir_assignment(deref, orig_deref->array_index, NULL); - ir->insert_before(assign); + list.push_tail(assign); /* Store the RHS to a temporary to avoid reusing its tree. */ var = new(ir) ir_variable(ir->rhs->type, "vec_index_tmp_v", ir_var_temporary); - ir->insert_before(var); + list.push_tail(var); deref = new(ir) ir_dereference_variable(var); assign = new(ir) ir_assignment(deref, ir->rhs, NULL); - ir->insert_before(assign); + list.push_tail(assign); + + /* Generate a single comparison condition "mask" for all of the components + * in the vector. + */ + ir_rvalue *const cond_deref = + compare_index_block(&list, index, 0, + orig_deref->array->type->vector_elements, + mem_ctx); /* Generate a conditional move of each vector element to the temp. */ for (i = 0; i < orig_deref->array->type->vector_elements; i++) { - ir_rvalue *condition, *swizzle; + ir_rvalue *condition_swizzle = + new(ir) ir_swizzle(cond_deref->clone(ir, NULL), i, 0, 0, 0, 1); - deref = new(ir) ir_dereference_variable(index); - condition = new(ir) ir_expression(ir_binop_equal, - glsl_type::bool_type, - deref, - new(ir) ir_constant(i)); /* Just clone the rest of the deref chain when trying to get at the * underlying variable. */ - swizzle = new(ir) ir_swizzle(orig_deref->array->clone(mem_ctx, NULL), - i, 0, 0, 0, 1); + ir_rvalue *swizzle = + new(ir) ir_swizzle(orig_deref->array->clone(mem_ctx, NULL), + i, 0, 0, 0, 1); deref = new(ir) ir_dereference_variable(var); - assign = new(ir) ir_assignment(swizzle, deref, condition); - ir->insert_before(assign); + assign = new(ir) ir_assignment(swizzle, deref, condition_swizzle); + list.push_tail(assign); } + + /* If the original assignment has a condition, respect that original + * condition! This is acomplished by wrapping the new conditional + * assignments in an if-statement that uses the original condition. + */ + if (ir->condition != NULL) { + /* No need to clone the condition because the IR that it hangs on is + * going to be removed from the instruction sequence. + */ + ir_if *if_stmt = new(mem_ctx) ir_if(ir->condition); + + list.move_nodes_to(&if_stmt->then_instructions); + ir->insert_before(if_stmt); + } else { + ir->insert_before(&list); + } + ir->remove(); this->progress = true; diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp index 7952bb1a3e3..01921375070 100644 --- a/src/glsl/main.cpp +++ b/src/glsl/main.cpp @@ -24,85 +24,30 @@ #include "ast.h" #include "glsl_parser_extras.h" -#include "glsl_parser.h" #include "ir_optimization.h" #include "ir_print_visitor.h" #include "program.h" #include "loop_analysis.h" - -extern "C" struct gl_shader * -_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type); - -extern "C" void -_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, - struct gl_shader *sh); - -/* Copied from shader_api.c for the stand-alone compiler. - */ -void -_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, - struct gl_shader *sh) -{ - *ptr = sh; -} - -struct gl_shader * -_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type) -{ - struct gl_shader *shader; - - (void) ctx; - - assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER); - shader = rzalloc(NULL, struct gl_shader); - if (shader) { - shader->Type = type; - shader->Name = name; - shader->RefCount = 1; - } - return shader; -} +#include "standalone_scaffolding.h" static void initialize_context(struct gl_context *ctx, gl_api api) { - memset(ctx, 0, sizeof(*ctx)); - - ctx->API = api; - - ctx->Extensions.ARB_ES2_compatibility = GL_TRUE; - ctx->Extensions.ARB_draw_buffers = GL_TRUE; - ctx->Extensions.ARB_draw_instanced = GL_TRUE; - ctx->Extensions.ARB_fragment_coord_conventions = GL_TRUE; - ctx->Extensions.EXT_texture_array = GL_TRUE; - ctx->Extensions.NV_texture_rectangle = GL_TRUE; - ctx->Extensions.EXT_texture3D = GL_TRUE; + initialize_context_to_defaults(ctx, api); /* GLSL 1.30 isn't fully supported, but we need to advertise 1.30 so that * the built-in functions for 1.30 can be built. */ ctx->Const.GLSLVersion = 130; - /* 1.10 minimums. */ - ctx->Const.MaxLights = 8; ctx->Const.MaxClipPlanes = 8; - ctx->Const.MaxTextureUnits = 2; + ctx->Const.MaxDrawBuffers = 2; /* More than the 1.10 minimum to appease parser tests taken from * apps that (hopefully) already checked the number of coords. */ ctx->Const.MaxTextureCoordUnits = 4; - ctx->Const.VertexProgram.MaxAttribs = 16; - ctx->Const.VertexProgram.MaxUniformComponents = 512; - ctx->Const.MaxVarying = 8; - ctx->Const.MaxVertexTextureImageUnits = 0; - ctx->Const.MaxCombinedTextureImageUnits = 2; - ctx->Const.MaxTextureImageUnits = 2; - ctx->Const.FragmentProgram.MaxUniformComponents = 64; - - ctx->Const.MaxDrawBuffers = 2; - ctx->Driver.NewShader = _mesa_new_shader; } @@ -275,6 +220,7 @@ main(int argc, char **argv) whole_program = rzalloc (NULL, struct gl_shader_program); assert(whole_program != NULL); + whole_program->InfoLog = ralloc_strdup(whole_program, ""); for (/* empty */; argc > optind; optind++) { whole_program->Shaders = diff --git a/src/glsl/opt_dead_functions.cpp b/src/glsl/opt_dead_functions.cpp index 7c64c618c0c..51c77e3b947 100644 --- a/src/glsl/opt_dead_functions.cpp +++ b/src/glsl/opt_dead_functions.cpp @@ -50,7 +50,6 @@ public: ir_dead_functions_visitor() { this->mem_ctx = ralloc_context(NULL); - this->seen_another_function_signature = false; } ~ir_dead_functions_visitor() @@ -65,8 +64,6 @@ public: bool (*predicate)(ir_instruction *ir); - bool seen_another_function_signature; - /* List of signature_entry */ exec_list signature_list; void *mem_ctx; @@ -97,13 +94,7 @@ ir_dead_functions_visitor::visit_enter(ir_function_signature *ir) entry->used = true; } - /* If this is the first signature to look at, no need to descend to see - * if it has calls to another function signature. - */ - if (!this->seen_another_function_signature) { - this->seen_another_function_signature = true; - return visit_continue_with_parent; - } + return visit_continue; } diff --git a/src/glsl/opt_tree_grafting.cpp b/src/glsl/opt_tree_grafting.cpp index 1ef940f9c72..22a1749b9dd 100644 --- a/src/glsl/opt_tree_grafting.cpp +++ b/src/glsl/opt_tree_grafting.cpp @@ -258,6 +258,7 @@ ir_tree_grafting_visitor::visit_enter(ir_texture *ir) break; case ir_txf: case ir_txl: + case ir_txs: if (do_graft(&ir->lod_info.lod)) return visit_stop; break; diff --git a/src/glsl/program.h b/src/glsl/program.h index db602fa9ec2..437ca1462fa 100644 --- a/src/glsl/program.h +++ b/src/glsl/program.h @@ -25,3 +25,11 @@ extern void link_shaders(struct gl_context *ctx, struct gl_shader_program *prog); + +extern void +linker_error(gl_shader_program *prog, const char *fmt, ...) + PRINTFLIKE(2, 3); + +extern void +linker_warning(gl_shader_program *prog, const char *fmt, ...) + PRINTFLIKE(2, 3); diff --git a/src/glsl/ralloc.c b/src/glsl/ralloc.c index 6a5eac6b9a3..fb48a91c564 100644 --- a/src/glsl/ralloc.c +++ b/src/glsl/ralloc.c @@ -28,6 +28,11 @@ #include <string.h> #include <stdint.h> +/* Android defines SIZE_MAX in limits.h, instead of the standard stdint.h */ +#ifdef ANDROID +#include <limits.h> +#endif + #include "ralloc.h" #ifdef __GNUC__ diff --git a/src/glsl/s_expression.cpp b/src/glsl/s_expression.cpp index a922a50d3b9..e704a3be20d 100644 --- a/src/glsl/s_expression.cpp +++ b/src/glsl/s_expression.cpp @@ -25,10 +25,13 @@ #include <assert.h> #include "s_expression.h" -s_symbol::s_symbol(const char *tmp, size_t n) +s_symbol::s_symbol(const char *str, size_t n) { - this->str = ralloc_strndup (this, tmp, n); - assert(this->str != NULL); + /* Assume the given string is already nul-terminated and in memory that + * will live as long as this node. + */ + assert(str[n] == '\0'); + this->str = str; } s_list::s_list() @@ -36,22 +39,26 @@ s_list::s_list() } static void -skip_whitespace(const char *& src) +skip_whitespace(const char *&src, char *&symbol_buffer) { - src += strspn(src, " \v\t\r\n"); + size_t n = strspn(src, " \v\t\r\n"); + src += n; + symbol_buffer += n; /* Also skip Scheme-style comments: semi-colon 'til end of line */ if (src[0] == ';') { - src += strcspn(src, "\n"); - skip_whitespace(src); + n = strcspn(src, "\n"); + src += n; + symbol_buffer += n; + skip_whitespace(src, symbol_buffer); } } static s_expression * -read_atom(void *ctx, const char *& src) +read_atom(void *ctx, const char *&src, char *&symbol_buffer) { s_expression *expr = NULL; - skip_whitespace(src); + skip_whitespace(src, symbol_buffer); size_t n = strcspn(src, "( \v\t\r\n);"); if (n == 0) @@ -70,44 +77,65 @@ read_atom(void *ctx, const char *& src) expr = new(ctx) s_int(i); } else { // Not a number; return a symbol. - expr = new(ctx) s_symbol(src, n); + symbol_buffer[n] = '\0'; + expr = new(ctx) s_symbol(symbol_buffer, n); } src += n; + symbol_buffer += n; return expr; } -s_expression * -s_expression::read_expression(void *ctx, const char *&src) +static s_expression * +__read_expression(void *ctx, const char *&src, char *&symbol_buffer) { - assert(src != NULL); - - s_expression *atom = read_atom(ctx, src); + s_expression *atom = read_atom(ctx, src, symbol_buffer); if (atom != NULL) return atom; - skip_whitespace(src); + skip_whitespace(src, symbol_buffer); if (src[0] == '(') { ++src; + ++symbol_buffer; s_list *list = new(ctx) s_list; s_expression *expr; - while ((expr = read_expression(ctx, src)) != NULL) { + while ((expr = __read_expression(ctx, src, symbol_buffer)) != NULL) { list->subexpressions.push_tail(expr); } - skip_whitespace(src); + skip_whitespace(src, symbol_buffer); if (src[0] != ')') { printf("Unclosed expression (check your parenthesis).\n"); return NULL; } ++src; + ++symbol_buffer; return list; } return NULL; } +s_expression * +s_expression::read_expression(void *ctx, const char *&src) +{ + assert(src != NULL); + + /* When we encounter a Symbol, we need to save a nul-terminated copy of + * the string. However, ralloc_strndup'ing every individual Symbol is + * extremely expensive. We could avoid this by simply overwriting the + * next character (guaranteed to be whitespace, parens, or semicolon) with + * a nul-byte. But overwriting non-whitespace would mess up parsing. + * + * So, just copy the whole buffer ahead of time. Walk both, leaving the + * original source string unmodified, and altering the copy to contain the + * necessary nul-bytes whenever we encounter a symbol. + */ + char *symbol_buffer = ralloc_strdup(ctx, src); + return __read_expression(ctx, src, symbol_buffer); +} + void s_int::print() { printf("%d", this->val); diff --git a/src/glsl/s_expression.h b/src/glsl/s_expression.h index c9dc676b319..642af19b439 100644 --- a/src/glsl/s_expression.h +++ b/src/glsl/s_expression.h @@ -129,7 +129,7 @@ public: void print(); private: - char *str; + const char *str; }; /* Lists of expressions: (expr1 ... exprN) */ diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp new file mode 100644 index 00000000000..bbd7bb91310 --- /dev/null +++ b/src/glsl/standalone_scaffolding.cpp @@ -0,0 +1,93 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* This file declares stripped-down versions of functions that + * normally exist outside of the glsl folder, so that they can be used + * when running the GLSL compiler standalone (for unit testing or + * compiling builtins). + */ + +#include "standalone_scaffolding.h" + +#include <assert.h> +#include <string.h> +#include "ralloc.h" + +void +_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, + struct gl_shader *sh) +{ + *ptr = sh; +} + +struct gl_shader * +_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type) +{ + struct gl_shader *shader; + + (void) ctx; + + assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER); + shader = rzalloc(NULL, struct gl_shader); + if (shader) { + shader->Type = type; + shader->Name = name; + shader->RefCount = 1; + } + return shader; +} + +void initialize_context_to_defaults(struct gl_context *ctx, gl_api api) +{ + memset(ctx, 0, sizeof(*ctx)); + + ctx->API = api; + + ctx->Extensions.dummy_false = false; + ctx->Extensions.dummy_true = true; + ctx->Extensions.ARB_ES2_compatibility = true; + ctx->Extensions.ARB_draw_buffers = true; + ctx->Extensions.ARB_draw_instanced = true; + ctx->Extensions.ARB_fragment_coord_conventions = true; + ctx->Extensions.EXT_texture_array = true; + ctx->Extensions.NV_texture_rectangle = true; + ctx->Extensions.EXT_texture3D = true; + + ctx->Const.GLSLVersion = 120; + + /* 1.20 minimums. */ + ctx->Const.MaxLights = 8; + ctx->Const.MaxClipPlanes = 6; + ctx->Const.MaxTextureUnits = 2; + ctx->Const.MaxTextureCoordUnits = 2; + ctx->Const.VertexProgram.MaxAttribs = 16; + + ctx->Const.VertexProgram.MaxUniformComponents = 512; + ctx->Const.MaxVarying = 8; /* == gl_MaxVaryingFloats / 4 */ + ctx->Const.MaxVertexTextureImageUnits = 0; + ctx->Const.MaxCombinedTextureImageUnits = 2; + ctx->Const.MaxTextureImageUnits = 2; + ctx->Const.FragmentProgram.MaxUniformComponents = 64; + + ctx->Const.MaxDrawBuffers = 1; +} diff --git a/src/glsl/standalone_scaffolding.h b/src/glsl/standalone_scaffolding.h new file mode 100644 index 00000000000..87733200670 --- /dev/null +++ b/src/glsl/standalone_scaffolding.h @@ -0,0 +1,54 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* This file declares stripped-down versions of functions that + * normally exist outside of the glsl folder, so that they can be used + * when running the GLSL compiler standalone (for unit testing or + * compiling builtins). + */ + +#pragma once +#ifndef STANDALONE_SCAFFOLDING_H +#define STANDALONE_SCAFFOLDING_H + +#include "main/mtypes.h" + +extern "C" void +_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, + struct gl_shader *sh); + +extern "C" struct gl_shader * +_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type); + +/** + * Initialize the given gl_context structure to a reasonable set of + * defaults representing the minimum capabilities required by the + * OpenGL spec. + * + * This is used when compiling builtin functions and in testing, when + * we don't have a connection to an actual driver. + */ +void initialize_context_to_defaults(struct gl_context *ctx, gl_api api); + + +#endif /* STANDALONE_SCAFFOLDING_H */ diff --git a/src/glsl/test.cpp b/src/glsl/test.cpp new file mode 100644 index 00000000000..b1ff92ed1d4 --- /dev/null +++ b/src/glsl/test.cpp @@ -0,0 +1,78 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file test.cpp + * + * Standalone tests for the GLSL compiler. + * + * This file provides a standalone executable which can be used to + * test components of the GLSL. + * + * Each test is a function with the same signature as main(). The + * main function interprets its first argument as the name of the test + * to run, strips out that argument, and then calls the test function. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "test_optpass.h" + +/** + * Print proper usage and exit with failure. + */ +static void +usage_fail(const char *name) +{ + printf("*** usage: %s <command> <options>\n", name); + printf("\n"); + printf("Possible commands are:\n"); + printf(" optpass: test an optimization pass in isolation\n"); + exit(EXIT_FAILURE); +} + +static const char *extract_command_from_argv(int *argc, char **argv) +{ + if (*argc < 2) { + usage_fail(argv[0]); + } + const char *command = argv[1]; + --*argc; + memmove(&argv[1], &argv[2], (*argc) * sizeof(argv[1])); + return command; +} + +int main(int argc, char **argv) +{ + const char *command = extract_command_from_argv(&argc, argv); + if (strcmp(command, "optpass") == 0) { + return test_optpass(argc, argv); + } else { + usage_fail(argv[0]); + } + + /* Execution should never reach here. */ + return EXIT_FAILURE; +} diff --git a/src/glsl/test_optpass.cpp b/src/glsl/test_optpass.cpp new file mode 100644 index 00000000000..89b7f8338dc --- /dev/null +++ b/src/glsl/test_optpass.cpp @@ -0,0 +1,273 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file test_optpass.cpp + * + * Standalone test for optimization passes. + * + * This file provides the "optpass" command for the standalone + * glsl_test app. It accepts either GLSL or high-level IR as input, + * and performs the optimiation passes specified on the command line. + * It outputs the IR, both before and after optimiations. + */ + +#include <string> +#include <iostream> +#include <sstream> +#include <getopt.h> + +#include "ast.h" +#include "ir_optimization.h" +#include "ir_print_visitor.h" +#include "program.h" +#include "ir_reader.h" +#include "standalone_scaffolding.h" + +using namespace std; + +static string read_stdin_to_eof() +{ + stringbuf sb; + cin.get(sb, '\0'); + return sb.str(); +} + +static GLboolean +do_optimization(struct exec_list *ir, const char *optimization) +{ + int int_0; + int int_1; + int int_2; + int int_3; + int int_4; + + if (sscanf(optimization, "do_common_optimization ( %d , %d ) ", + &int_0, &int_1) == 2) { + return do_common_optimization(ir, int_0 != 0, int_1); + } else if (strcmp(optimization, "do_algebraic") == 0) { + return do_algebraic(ir); + } else if (strcmp(optimization, "do_constant_folding") == 0) { + return do_constant_folding(ir); + } else if (strcmp(optimization, "do_constant_variable") == 0) { + return do_constant_variable(ir); + } else if (strcmp(optimization, "do_constant_variable_unlinked") == 0) { + return do_constant_variable_unlinked(ir); + } else if (strcmp(optimization, "do_copy_propagation") == 0) { + return do_copy_propagation(ir); + } else if (strcmp(optimization, "do_copy_propagation_elements") == 0) { + return do_copy_propagation_elements(ir); + } else if (strcmp(optimization, "do_constant_propagation") == 0) { + return do_constant_propagation(ir); + } else if (strcmp(optimization, "do_dead_code") == 0) { + return do_dead_code(ir); + } else if (strcmp(optimization, "do_dead_code_local") == 0) { + return do_dead_code_local(ir); + } else if (strcmp(optimization, "do_dead_code_unlinked") == 0) { + return do_dead_code_unlinked(ir); + } else if (strcmp(optimization, "do_dead_functions") == 0) { + return do_dead_functions(ir); + } else if (strcmp(optimization, "do_function_inlining") == 0) { + return do_function_inlining(ir); + } else if (sscanf(optimization, + "do_lower_jumps ( %d , %d , %d , %d , %d ) ", + &int_0, &int_1, &int_2, &int_3, &int_4) == 5) { + return do_lower_jumps(ir, int_0 != 0, int_1 != 0, int_2 != 0, + int_3 != 0, int_4 != 0); + } else if (strcmp(optimization, "do_lower_texture_projection") == 0) { + return do_lower_texture_projection(ir); + } else if (strcmp(optimization, "do_if_simplification") == 0) { + return do_if_simplification(ir); + } else if (strcmp(optimization, "do_discard_simplification") == 0) { + return do_discard_simplification(ir); + } else if (sscanf(optimization, "lower_if_to_cond_assign ( %d ) ", + &int_0) == 1) { + return lower_if_to_cond_assign(ir, int_0); + } else if (strcmp(optimization, "do_mat_op_to_vec") == 0) { + return do_mat_op_to_vec(ir); + } else if (strcmp(optimization, "do_noop_swizzle") == 0) { + return do_noop_swizzle(ir); + } else if (strcmp(optimization, "do_structure_splitting") == 0) { + return do_structure_splitting(ir); + } else if (strcmp(optimization, "do_swizzle_swizzle") == 0) { + return do_swizzle_swizzle(ir); + } else if (strcmp(optimization, "do_tree_grafting") == 0) { + return do_tree_grafting(ir); + } else if (strcmp(optimization, "do_vec_index_to_cond_assign") == 0) { + return do_vec_index_to_cond_assign(ir); + } else if (strcmp(optimization, "do_vec_index_to_swizzle") == 0) { + return do_vec_index_to_swizzle(ir); + } else if (strcmp(optimization, "lower_discard") == 0) { + return lower_discard(ir); + } else if (sscanf(optimization, "lower_instructions ( %d ) ", + &int_0) == 1) { + return lower_instructions(ir, int_0); + } else if (strcmp(optimization, "lower_noise") == 0) { + return lower_noise(ir); + } else if (sscanf(optimization, "lower_variable_index_to_cond_assign " + "( %d , %d , %d , %d ) ", &int_0, &int_1, &int_2, + &int_3) == 4) { + return lower_variable_index_to_cond_assign(ir, int_0 != 0, int_1 != 0, + int_2 != 0, int_3 != 0); + } else if (sscanf(optimization, "lower_quadop_vector ( %d ) ", + &int_0) == 1) { + return lower_quadop_vector(ir, int_0 != 0); + } else if (strcmp(optimization, "optimize_redundant_jumps") == 0) { + return optimize_redundant_jumps(ir); + } else { + printf("Unrecognized optimization %s\n", optimization); + exit(EXIT_FAILURE); + return false; + } +} + +static GLboolean +do_optimization_passes(struct exec_list *ir, char **optimizations, + int num_optimizations, bool quiet) +{ + GLboolean overall_progress = false; + + for (int i = 0; i < num_optimizations; ++i) { + const char *optimization = optimizations[i]; + if (!quiet) { + printf("*** Running optimization %s...", optimization); + } + GLboolean progress = do_optimization(ir, optimization); + if (!quiet) { + printf("%s\n", progress ? "progress" : "no progress"); + } + validate_ir_tree(ir); + + overall_progress = overall_progress || progress; + } + + return overall_progress; +} + +int test_optpass(int argc, char **argv) +{ + int input_format_ir = 0; /* 0=glsl, 1=ir */ + int loop = 0; + int shader_type = GL_VERTEX_SHADER; + int quiet = 0; + + const struct option optpass_opts[] = { + { "input-ir", no_argument, &input_format_ir, 1 }, + { "input-glsl", no_argument, &input_format_ir, 0 }, + { "loop", no_argument, &loop, 1 }, + { "vertex-shader", no_argument, &shader_type, GL_VERTEX_SHADER }, + { "fragment-shader", no_argument, &shader_type, GL_FRAGMENT_SHADER }, + { "quiet", no_argument, &quiet, 1 }, + { NULL, 0, NULL, 0 } + }; + + int idx = 0; + int c; + while ((c = getopt_long(argc, argv, "", optpass_opts, &idx)) != -1) { + if (c != 0) { + printf("*** usage: %s optpass <optimizations> <options>\n", argv[0]); + printf("\n"); + printf("Possible options are:\n"); + printf(" --input-ir: input format is IR\n"); + printf(" --input-glsl: input format is GLSL (the default)\n"); + printf(" --loop: run optimizations repeatedly until no progress\n"); + printf(" --vertex-shader: test with a vertex shader (the default)\n"); + printf(" --fragment-shader: test with a fragment shader\n"); + exit(EXIT_FAILURE); + } + } + + struct gl_context local_ctx; + struct gl_context *ctx = &local_ctx; + initialize_context_to_defaults(ctx, API_OPENGL); + + ctx->Driver.NewShader = _mesa_new_shader; + + struct gl_shader *shader = rzalloc(NULL, struct gl_shader); + shader->Type = shader_type; + + string input = read_stdin_to_eof(); + + struct _mesa_glsl_parse_state *state + = new(shader) _mesa_glsl_parse_state(ctx, shader->Type, shader); + + if (input_format_ir) { + shader->ir = new(shader) exec_list; + _mesa_glsl_initialize_types(state); + _mesa_glsl_read_ir(state, shader->ir, input.c_str(), true); + } else { + shader->Source = input.c_str(); + const char *source = shader->Source; + state->error = preprocess(state, &source, &state->info_log, + state->extensions, ctx->API) != 0; + + if (!state->error) { + _mesa_glsl_lexer_ctor(state, source); + _mesa_glsl_parse(state); + _mesa_glsl_lexer_dtor(state); + } + + shader->ir = new(shader) exec_list; + if (!state->error && !state->translation_unit.is_empty()) + _mesa_ast_to_hir(shader->ir, state); + } + + /* Print out the initial IR */ + if (!state->error && !quiet) { + printf("*** pre-optimization IR:\n"); + _mesa_print_ir(shader->ir, state); + printf("\n--\n"); + } + + /* Optimization passes */ + if (!state->error) { + GLboolean progress; + do { + progress = do_optimization_passes(shader->ir, &argv[optind], + argc - optind, quiet != 0); + } while (loop && progress); + } + + /* Print out the resulting IR */ + if (!state->error) { + if (!quiet) { + printf("*** resulting IR:\n"); + } + _mesa_print_ir(shader->ir, state); + if (!quiet) { + printf("\n--\n"); + } + } + + if (state->error) { + printf("*** error(s) occurred:\n"); + printf("%s\n", state->info_log); + printf("--\n"); + } + + ralloc_free(state); + ralloc_free(shader); + + return state->error; +} + diff --git a/src/glsl/test_optpass.h b/src/glsl/test_optpass.h new file mode 100644 index 00000000000..923ccf3dece --- /dev/null +++ b/src/glsl/test_optpass.h @@ -0,0 +1,30 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef TEST_OPTPASS_H +#define TEST_OPTPASS_H + +int test_optpass(int argc, char **argv); + +#endif /* TEST_OPTPASS_H */ diff --git a/src/glsl/tests/compare_ir b/src/glsl/tests/compare_ir new file mode 100755 index 00000000000..a40fc810cf3 --- /dev/null +++ b/src/glsl/tests/compare_ir @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# coding=utf-8 +# +# Copyright © 2011 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Compare two files containing IR code. Ignore formatting differences +# and declaration order. + +import os +import os.path +import subprocess +import sys +import tempfile + +from sexps import * + +if len(sys.argv) != 3: + print 'Usage: compare_ir <file1> <file2>' + exit(1) + +with open(sys.argv[1]) as f: + ir1 = sort_decls(parse_sexp(f.read())) +with open(sys.argv[2]) as f: + ir2 = sort_decls(parse_sexp(f.read())) + +if ir1 == ir2: + exit(0) +else: + file1, path1 = tempfile.mkstemp(os.path.basename(sys.argv[1])) + file2, path2 = tempfile.mkstemp(os.path.basename(sys.argv[2])) + try: + os.write(file1, '{0}\n'.format(sexp_to_string(ir1))) + os.close(file1) + os.write(file2, '{0}\n'.format(sexp_to_string(ir2))) + os.close(file2) + subprocess.call(['diff', '-u', path1, path2]) + finally: + os.remove(path1) + os.remove(path2) + exit(1) diff --git a/src/glsl/tests/lower_jumps/.gitignore b/src/glsl/tests/lower_jumps/.gitignore new file mode 100644 index 00000000000..f47cb2045f1 --- /dev/null +++ b/src/glsl/tests/lower_jumps/.gitignore @@ -0,0 +1 @@ +*.out diff --git a/src/glsl/tests/lower_jumps/create_test_cases.py b/src/glsl/tests/lower_jumps/create_test_cases.py new file mode 100644 index 00000000000..fbc6f0a84ea --- /dev/null +++ b/src/glsl/tests/lower_jumps/create_test_cases.py @@ -0,0 +1,643 @@ +# coding=utf-8 +# +# Copyright © 2011 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +import os +import os.path +import re +import subprocess +import sys + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) # For access to sexps.py, which is in parent dir +from sexps import * + +def make_test_case(f_name, ret_type, body): + """Create a simple optimization test case consisting of a single + function with the given name, return type, and body. + + Global declarations are automatically created for any undeclared + variables that are referenced by the function. All undeclared + variables are assumed to be floats. + """ + check_sexp(body) + declarations = {} + def make_declarations(sexp, already_declared = ()): + if isinstance(sexp, list): + if len(sexp) == 2 and sexp[0] == 'var_ref': + if sexp[1] not in already_declared: + declarations[sexp[1]] = [ + 'declare', ['in'], 'float', sexp[1]] + elif len(sexp) == 4 and sexp[0] == 'assign': + assert sexp[2][0] == 'var_ref' + if sexp[2][1] not in already_declared: + declarations[sexp[2][1]] = [ + 'declare', ['out'], 'float', sexp[2][1]] + make_declarations(sexp[3], already_declared) + else: + already_declared = set(already_declared) + for s in sexp: + if isinstance(s, list) and len(s) >= 4 and \ + s[0] == 'declare': + already_declared.add(s[3]) + else: + make_declarations(s, already_declared) + make_declarations(body) + return declarations.values() + \ + [['function', f_name, ['signature', ret_type, ['parameters'], body]]] + + +# The following functions can be used to build expressions. + +def const_float(value): + """Create an expression representing the given floating point value.""" + return ['constant', 'float', ['{0:.6f}'.format(value)]] + +def const_bool(value): + """Create an expression representing the given boolean value. + + If value is not a boolean, it is converted to a boolean. So, for + instance, const_bool(1) is equivalent to const_bool(True). + """ + return ['constant', 'bool', ['{0}'.format(1 if value else 0)]] + +def gt_zero(var_name): + """Create Construct the expression var_name > 0""" + return ['expression', 'bool', '>', ['var_ref', var_name], const_float(0)] + + +# The following functions can be used to build complex control flow +# statements. All of these functions return statement lists (even +# those which only create a single statement), so that statements can +# be sequenced together using the '+' operator. + +def return_(value = None): + """Create a return statement.""" + if value is not None: + return [['return', value]] + else: + return [['return']] + +def break_(): + """Create a break statement.""" + return ['break'] + +def continue_(): + """Create a continue statement.""" + return ['continue'] + +def simple_if(var_name, then_statements, else_statements = None): + """Create a statement of the form + + if (var_name > 0.0) { + <then_statements> + } else { + <else_statements> + } + + else_statements may be omitted. + """ + if else_statements is None: + else_statements = [] + check_sexp(then_statements) + check_sexp(else_statements) + return [['if', gt_zero(var_name), then_statements, else_statements]] + +def loop(statements): + """Create a loop containing the given statements as its loop + body. + """ + check_sexp(statements) + return [['loop', [], [], [], [], statements]] + +def declare_temp(var_type, var_name): + """Create a declaration of the form + + (declare (temporary) <var_type> <var_name) + """ + return [['declare', ['temporary'], var_type, var_name]] + +def assign_x(var_name, value): + """Create a statement that assigns <value> to the variable + <var_name>. The assignment uses the mask (x). + """ + check_sexp(value) + return [['assign', ['x'], ['var_ref', var_name], value]] + +def complex_if(var_prefix, statements): + """Create a statement of the form + + if (<var_prefix>a > 0.0) { + if (<var_prefix>b > 0.0) { + <statements> + } + } + + This is useful in testing jump lowering, because if <statements> + ends in a jump, lower_jumps.cpp won't try to combine this + construct with the code that follows it, as it might do for a + simple if. + + All variables used in the if statement are prefixed with + var_prefix. This can be used to ensure uniqueness. + """ + check_sexp(statements) + return simple_if(var_prefix + 'a', simple_if(var_prefix + 'b', statements)) + +def declare_execute_flag(): + """Create the statements that lower_jumps.cpp uses to declare and + initialize the temporary boolean execute_flag. + """ + return declare_temp('bool', 'execute_flag') + \ + assign_x('execute_flag', const_bool(True)) + +def declare_return_flag(): + """Create the statements that lower_jumps.cpp uses to declare and + initialize the temporary boolean return_flag. + """ + return declare_temp('bool', 'return_flag') + \ + assign_x('return_flag', const_bool(False)) + +def declare_return_value(): + """Create the statements that lower_jumps.cpp uses to declare and + initialize the temporary variable return_value. Assume that + return_value is a float. + """ + return declare_temp('float', 'return_value') + +def declare_break_flag(): + """Create the statements that lower_jumps.cpp uses to declare and + initialize the temporary boolean break_flag. + """ + return declare_temp('bool', 'break_flag') + \ + assign_x('break_flag', const_bool(False)) + +def lowered_return_simple(value = None): + """Create the statements that lower_jumps.cpp lowers a return + statement to, in situations where it does not need to clear the + execute flag. + """ + if value: + result = assign_x('return_value', value) + else: + result = [] + return result + assign_x('return_flag', const_bool(True)) + +def lowered_return(value = None): + """Create the statements that lower_jumps.cpp lowers a return + statement to, in situations where it needs to clear the execute + flag. + """ + return lowered_return_simple(value) + \ + assign_x('execute_flag', const_bool(False)) + +def lowered_continue(): + """Create the statement that lower_jumps.cpp lowers a continue + statement to. + """ + return assign_x('execute_flag', const_bool(False)) + +def lowered_break_simple(): + """Create the statement that lower_jumps.cpp lowers a break + statement to, in situations where it does not need to clear the + execute flag. + """ + return assign_x('break_flag', const_bool(True)) + +def lowered_break(): + """Create the statement that lower_jumps.cpp lowers a break + statement to, in situations where it needs to clear the execute + flag. + """ + return lowered_break_simple() + assign_x('execute_flag', const_bool(False)) + +def if_execute_flag(statements): + """Wrap statements in an if test so that they will only execute if + execute_flag is True. + """ + check_sexp(statements) + return [['if', ['var_ref', 'execute_flag'], statements, []]] + +def if_not_return_flag(statements): + """Wrap statements in an if test so that they will only execute if + return_flag is False. + """ + check_sexp(statements) + return [['if', ['var_ref', 'return_flag'], [], statements]] + +def final_return(): + """Create the return statement that lower_jumps.cpp places at the + end of a function when lowering returns. + """ + return [['return', ['var_ref', 'return_value']]] + +def final_break(): + """Create the conditional break statement that lower_jumps.cpp + places at the end of a function when lowering breaks. + """ + return [['if', ['var_ref', 'break_flag'], break_(), []]] + +def bash_quote(*args): + """Quote the arguments appropriately so that bash will understand + each argument as a single word. + """ + def quote_word(word): + for c in word: + if not (c.isalpha() or c.isdigit() or c in '@%_-+=:,./'): + break + else: + if not word: + return "''" + return word + return "'{0}'".format(word.replace("'", "'\"'\"'")) + return ' '.join(quote_word(word) for word in args) + +def create_test_case(doc_string, input_sexp, expected_sexp, test_name, + pull_out_jumps=False, lower_sub_return=False, + lower_main_return=False, lower_continue=False, + lower_break=False): + """Create a test case that verifies that do_lower_jumps transforms + the given code in the expected way. + """ + doc_lines = [line.strip() for line in doc_string.splitlines()] + doc_string = ''.join('# {0}\n'.format(line) for line in doc_lines if line != '') + check_sexp(input_sexp) + check_sexp(expected_sexp) + input_str = sexp_to_string(sort_decls(input_sexp)) + expected_output = sexp_to_string(sort_decls(expected_sexp)) + + optimization = ( + 'do_lower_jumps({0:d}, {1:d}, {2:d}, {3:d}, {4:d})'.format( + pull_out_jumps, lower_sub_return, lower_main_return, + lower_continue, lower_break)) + args = ['../../glsl_test', 'optpass', '--quiet', '--input-ir', optimization] + test_file = '{0}.opt_test'.format(test_name) + with open(test_file, 'w') as f: + f.write('#!/bin/bash\n#\n# This file was generated by create_test_cases.py.\n#\n') + f.write(doc_string) + f.write('{0} <<EOF\n'.format(bash_quote(*args))) + f.write('{0}\nEOF\n'.format(input_str)) + os.chmod(test_file, 0774) + expected_file = '{0}.opt_test.expected'.format(test_name) + with open(expected_file, 'w') as f: + f.write('{0}\n'.format(expected_output)) + +def test_lower_returns_main(): + doc_string = """Test that do_lower_jumps respects the lower_main_return + flag in deciding whether to lower returns in the main + function. + """ + input_sexp = make_test_case('main', 'void', ( + complex_if('', return_()) + )) + expected_sexp = make_test_case('main', 'void', ( + declare_execute_flag() + + declare_return_flag() + + complex_if('', lowered_return()) + )) + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_returns_main_true', + lower_main_return=True) + create_test_case(doc_string, input_sexp, input_sexp, 'lower_returns_main_false', + lower_main_return=False) + +def test_lower_returns_sub(): + doc_string = """Test that do_lower_jumps respects the lower_sub_return flag + in deciding whether to lower returns in subroutines. + """ + input_sexp = make_test_case('sub', 'void', ( + complex_if('', return_()) + )) + expected_sexp = make_test_case('sub', 'void', ( + declare_execute_flag() + + declare_return_flag() + + complex_if('', lowered_return()) + )) + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_returns_sub_true', + lower_sub_return=True) + create_test_case(doc_string, input_sexp, input_sexp, 'lower_returns_sub_false', + lower_sub_return=False) + +def test_lower_returns_1(): + doc_string = """Test that a void return at the end of a function is + eliminated. + """ + input_sexp = make_test_case('main', 'void', ( + assign_x('a', const_float(1)) + + return_() + )) + expected_sexp = make_test_case('main', 'void', ( + assign_x('a', const_float(1)) + )) + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_returns_1', + lower_main_return=True) + +def test_lower_returns_2(): + doc_string = """Test that lowering is not performed on a non-void return at + the end of subroutine. + """ + input_sexp = make_test_case('sub', 'float', ( + assign_x('a', const_float(1)) + + return_(const_float(1)) + )) + create_test_case(doc_string, input_sexp, input_sexp, 'lower_returns_2', + lower_sub_return=True) + +def test_lower_returns_3(): + doc_string = """Test lowering of returns when there is one nested inside a + complex structure of ifs, and one at the end of a function. + + In this case, the latter return needs to be lowered because it + will not be at the end of the function once the final return + is inserted. + """ + input_sexp = make_test_case('sub', 'float', ( + complex_if('', return_(const_float(1))) + + return_(const_float(2)) + )) + expected_sexp = make_test_case('sub', 'float', ( + declare_execute_flag() + + declare_return_value() + + declare_return_flag() + + complex_if('', lowered_return(const_float(1))) + + if_execute_flag(lowered_return(const_float(2))) + + final_return() + )) + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_returns_3', + lower_sub_return=True) + +def test_lower_returns_4(): + doc_string = """Test that returns are properly lowered when they occur in + both branches of an if-statement. + """ + input_sexp = make_test_case('sub', 'float', ( + simple_if('a', return_(const_float(1)), + return_(const_float(2))) + )) + expected_sexp = make_test_case('sub', 'float', ( + declare_execute_flag() + + declare_return_value() + + declare_return_flag() + + simple_if('a', lowered_return(const_float(1)), + lowered_return(const_float(2))) + + final_return() + )) + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_returns_4', + lower_sub_return=True) + +def test_lower_unified_returns(): + doc_string = """If both branches of an if statement end in a return, and + pull_out_jumps is True, then those returns should be lifted + outside the if and then properly lowered. + + Verify that this lowering occurs during the same pass as the + lowering of other returns by checking that extra temporary + variables aren't generated. + """ + input_sexp = make_test_case('main', 'void', ( + complex_if('a', return_()) + + simple_if('b', simple_if('c', return_(), return_())) + )) + expected_sexp = make_test_case('main', 'void', ( + declare_execute_flag() + + declare_return_flag() + + complex_if('a', lowered_return()) + + if_execute_flag(simple_if('b', (simple_if('c', [], []) + + lowered_return()))) + )) + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_unified_returns', + lower_main_return=True, pull_out_jumps=True) + +def test_lower_pulled_out_jump(): + doc_string = """If one branch of an if ends in a jump, and control cannot + fall out the bottom of the other branch, and pull_out_jumps is + True, then the jump is lifted outside the if. + + Verify that this lowering occurs during the same pass as the + lowering of other jumps by checking that extra temporary + variables aren't generated. + """ + input_sexp = make_test_case('main', 'void', ( + complex_if('a', return_()) + + loop(simple_if('b', simple_if('c', break_(), continue_()), + return_())) + + assign_x('d', const_float(1)) + )) + # Note: optimization produces two other effects: the break + # gets lifted out of the if statements, and the code after the + # loop gets guarded so that it only executes if the return + # flag is clear. + expected_sexp = make_test_case('main', 'void', ( + declare_execute_flag() + + declare_return_flag() + + complex_if('a', lowered_return()) + + if_execute_flag( + loop(simple_if('b', simple_if('c', [], continue_()), + lowered_return_simple()) + + break_()) + + if_not_return_flag(assign_x('d', const_float(1)))) + )) + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_pulled_out_jump', + lower_main_return=True, pull_out_jumps=True) + +def test_lower_breaks_1(): + doc_string = """If a loop contains an unconditional break at the bottom of + it, it should not be lowered.""" + input_sexp = make_test_case('main', 'void', ( + loop(assign_x('a', const_float(1)) + + break_()) + )) + expected_sexp = input_sexp + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_1', lower_break=True) + +def test_lower_breaks_2(): + doc_string = """If a loop contains a conditional break at the bottom of it, + it should not be lowered if it is in the then-clause. + """ + input_sexp = make_test_case('main', 'void', ( + loop(assign_x('a', const_float(1)) + + simple_if('b', break_())) + )) + expected_sexp = input_sexp + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_2', lower_break=True) + +def test_lower_breaks_3(): + doc_string = """If a loop contains a conditional break at the bottom of it, + it should not be lowered if it is in the then-clause, even if + there are statements preceding the break. + """ + input_sexp = make_test_case('main', 'void', ( + loop(assign_x('a', const_float(1)) + + simple_if('b', (assign_x('c', const_float(1)) + + break_()))) + )) + expected_sexp = input_sexp + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_3', lower_break=True) + +def test_lower_breaks_4(): + doc_string = """If a loop contains a conditional break at the bottom of it, + it should not be lowered if it is in the else-clause. + """ + input_sexp = make_test_case('main', 'void', ( + loop(assign_x('a', const_float(1)) + + simple_if('b', [], break_())) + )) + expected_sexp = input_sexp + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_4', lower_break=True) + +def test_lower_breaks_5(): + doc_string = """If a loop contains a conditional break at the bottom of it, + it should not be lowered if it is in the else-clause, even if + there are statements preceding the break. + """ + input_sexp = make_test_case('main', 'void', ( + loop(assign_x('a', const_float(1)) + + simple_if('b', [], (assign_x('c', const_float(1)) + + break_()))) + )) + expected_sexp = input_sexp + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_5', lower_break=True) + +def test_lower_breaks_6(): + doc_string = """If a loop contains conditional breaks and continues, and + ends in an unconditional break, then the unconditional break + needs to be lowered, because it will no longer be at the end + of the loop after the final break is added. + """ + input_sexp = make_test_case('main', 'void', ( + loop(simple_if('a', (complex_if('b', continue_()) + + complex_if('c', break_()))) + + break_()) + )) + expected_sexp = make_test_case('main', 'void', ( + declare_break_flag() + + loop(declare_execute_flag() + + simple_if( + 'a', + (complex_if('b', lowered_continue()) + + if_execute_flag( + complex_if('c', lowered_break())))) + + if_execute_flag(lowered_break_simple()) + + final_break()) + )) + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_6', + lower_break=True, lower_continue=True) + +def test_lower_guarded_conditional_break(): + doc_string = """Normally a conditional break at the end of a loop isn't + lowered, however if the conditional break gets placed inside + an if(execute_flag) because of earlier lowering of continues, + then the break needs to be lowered. + """ + input_sexp = make_test_case('main', 'void', ( + loop(complex_if('a', continue_()) + + simple_if('b', break_())) + )) + expected_sexp = make_test_case('main', 'void', ( + declare_break_flag() + + loop(declare_execute_flag() + + complex_if('a', lowered_continue()) + + if_execute_flag(simple_if('b', lowered_break())) + + final_break()) + )) + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_guarded_conditional_break', + lower_break=True, lower_continue=True) + +def test_remove_continue_at_end_of_loop(): + doc_string = """Test that a redundant continue-statement at the end of a + loop is removed. + """ + input_sexp = make_test_case('main', 'void', ( + loop(assign_x('a', const_float(1)) + + continue_()) + )) + expected_sexp = make_test_case('main', 'void', ( + loop(assign_x('a', const_float(1))) + )) + create_test_case(doc_string, input_sexp, expected_sexp, 'remove_continue_at_end_of_loop') + +def test_lower_return_void_at_end_of_loop(): + doc_string = """Test that a return of void at the end of a loop is properly + lowered. + """ + input_sexp = make_test_case('main', 'void', ( + loop(assign_x('a', const_float(1)) + + return_()) + + assign_x('b', const_float(2)) + )) + expected_sexp = make_test_case('main', 'void', ( + declare_return_flag() + + loop(assign_x('a', const_float(1)) + + lowered_return_simple() + + break_()) + + if_not_return_flag(assign_x('b', const_float(2))) + )) + create_test_case(doc_string, input_sexp, input_sexp, 'return_void_at_end_of_loop_lower_nothing') + create_test_case(doc_string, input_sexp, expected_sexp, 'return_void_at_end_of_loop_lower_return', + lower_main_return=True) + create_test_case(doc_string, input_sexp, expected_sexp, 'return_void_at_end_of_loop_lower_return_and_break', + lower_main_return=True, lower_break=True) + +def test_lower_return_non_void_at_end_of_loop(): + doc_string = """Test that a non-void return at the end of a loop is + properly lowered. + """ + input_sexp = make_test_case('sub', 'float', ( + loop(assign_x('a', const_float(1)) + + return_(const_float(2))) + + assign_x('b', const_float(3)) + + return_(const_float(4)) + )) + expected_sexp = make_test_case('sub', 'float', ( + declare_execute_flag() + + declare_return_value() + + declare_return_flag() + + loop(assign_x('a', const_float(1)) + + lowered_return_simple(const_float(2)) + + break_()) + + if_not_return_flag(assign_x('b', const_float(3)) + + lowered_return(const_float(4))) + + final_return() + )) + create_test_case(doc_string, input_sexp, input_sexp, 'return_non_void_at_end_of_loop_lower_nothing') + create_test_case(doc_string, input_sexp, expected_sexp, 'return_non_void_at_end_of_loop_lower_return', + lower_sub_return=True) + create_test_case(doc_string, input_sexp, expected_sexp, 'return_non_void_at_end_of_loop_lower_return_and_break', + lower_sub_return=True, lower_break=True) + +if __name__ == '__main__': + test_lower_returns_main() + test_lower_returns_sub() + test_lower_returns_1() + test_lower_returns_2() + test_lower_returns_3() + test_lower_returns_4() + test_lower_unified_returns() + test_lower_pulled_out_jump() + test_lower_breaks_1() + test_lower_breaks_2() + test_lower_breaks_3() + test_lower_breaks_4() + test_lower_breaks_5() + test_lower_breaks_6() + test_lower_guarded_conditional_break() + test_remove_continue_at_end_of_loop() + test_lower_return_void_at_end_of_loop() + test_lower_return_non_void_at_end_of_loop() diff --git a/src/glsl/tests/lower_jumps/lower_breaks_1.opt_test b/src/glsl/tests/lower_jumps/lower_breaks_1.opt_test new file mode 100755 index 00000000000..01ad7087a28 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_breaks_1.opt_test @@ -0,0 +1,13 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# If a loop contains an unconditional break at the bottom of +# it, it should not be lowered. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 1)' <<EOF +((declare (out) float a) + (function main + (signature void (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) break)))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_breaks_1.opt_test.expected b/src/glsl/tests/lower_jumps/lower_breaks_1.opt_test.expected new file mode 100644 index 00000000000..d4bb6fc0274 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_breaks_1.opt_test.expected @@ -0,0 +1,5 @@ +((declare (out) float a) + (function main + (signature void (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) break)))))) diff --git a/src/glsl/tests/lower_jumps/lower_breaks_2.opt_test b/src/glsl/tests/lower_jumps/lower_breaks_2.opt_test new file mode 100755 index 00000000000..0be22f953e1 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_breaks_2.opt_test @@ -0,0 +1,15 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# If a loop contains a conditional break at the bottom of it, +# it should not be lowered if it is in the then-clause. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 1)' <<EOF +((declare (in) float b) (declare (out) float a) + (function main + (signature void (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) + (if (expression bool > (var_ref b) (constant float (0.000000))) (break) + ()))))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_breaks_2.opt_test.expected b/src/glsl/tests/lower_jumps/lower_breaks_2.opt_test.expected new file mode 100644 index 00000000000..a4cb2d6a125 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_breaks_2.opt_test.expected @@ -0,0 +1,7 @@ +((declare (in) float b) (declare (out) float a) + (function main + (signature void (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) + (if (expression bool > (var_ref b) (constant float (0.000000))) (break) + ()))))))) diff --git a/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test b/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test new file mode 100755 index 00000000000..4149360b5d0 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test @@ -0,0 +1,17 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# If a loop contains a conditional break at the bottom of it, +# it should not be lowered if it is in the then-clause, even if +# there are statements preceding the break. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 1)' <<EOF +((declare (in) float b) (declare (out) float a) (declare (out) float c) + (function main + (signature void (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) + (if (expression bool > (var_ref b) (constant float (0.000000))) + ((assign (x) (var_ref c) (constant float (1.000000))) break) + ()))))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test.expected b/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test.expected new file mode 100644 index 00000000000..325f7b49a5d --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test.expected @@ -0,0 +1,8 @@ +((declare (in) float b) (declare (out) float a) (declare (out) float c) + (function main + (signature void (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) + (if (expression bool > (var_ref b) (constant float (0.000000))) + ((assign (x) (var_ref c) (constant float (1.000000))) break) + ()))))))) diff --git a/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test b/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test new file mode 100755 index 00000000000..70458bb4f8e --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test @@ -0,0 +1,15 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# If a loop contains a conditional break at the bottom of it, +# it should not be lowered if it is in the else-clause. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 1)' <<EOF +((declare (in) float b) (declare (out) float a) + (function main + (signature void (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) + (if (expression bool > (var_ref b) (constant float (0.000000))) () + (break)))))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test.expected b/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test.expected new file mode 100644 index 00000000000..a7735457cb8 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test.expected @@ -0,0 +1,7 @@ +((declare (in) float b) (declare (out) float a) + (function main + (signature void (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) + (if (expression bool > (var_ref b) (constant float (0.000000))) () + (break)))))))) diff --git a/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test b/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test new file mode 100755 index 00000000000..da9eef1105e --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test @@ -0,0 +1,16 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# If a loop contains a conditional break at the bottom of it, +# it should not be lowered if it is in the else-clause, even if +# there are statements preceding the break. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 1)' <<EOF +((declare (in) float b) (declare (out) float a) (declare (out) float c) + (function main + (signature void (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) + (if (expression bool > (var_ref b) (constant float (0.000000))) () + ((assign (x) (var_ref c) (constant float (1.000000))) break)))))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test.expected b/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test.expected new file mode 100644 index 00000000000..0dd4a529383 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test.expected @@ -0,0 +1,7 @@ +((declare (in) float b) (declare (out) float a) (declare (out) float c) + (function main + (signature void (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) + (if (expression bool > (var_ref b) (constant float (0.000000))) () + ((assign (x) (var_ref c) (constant float (1.000000))) break)))))))) diff --git a/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test b/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test new file mode 100755 index 00000000000..9440dfec897 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test @@ -0,0 +1,29 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# If a loop contains conditional breaks and continues, and +# ends in an unconditional break, then the unconditional break +# needs to be lowered, because it will no longer be at the end +# of the loop after the final break is added. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 1, 1)' <<EOF +((declare (in) float a) (declare (in) float ba) (declare (in) float bb) + (declare (in) float ca) + (declare (in) float cb) + (function main + (signature void (parameters) + ((loop () () () () + ((if (expression bool > (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref ba) (constant float (0.000000))) + ((if (expression bool > (var_ref bb) (constant float (0.000000))) + (continue) + ())) + ()) + (if (expression bool > (var_ref ca) (constant float (0.000000))) + ((if (expression bool > (var_ref cb) (constant float (0.000000))) + (break) + ())) + ())) + ()) + break)))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test.expected b/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test.expected new file mode 100644 index 00000000000..8222328e00c --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test.expected @@ -0,0 +1,29 @@ +((declare (in) float a) (declare (in) float ba) (declare (in) float bb) + (declare (in) float ca) + (declare (in) float cb) + (function main + (signature void (parameters) + ((declare (temporary) bool break_flag) + (assign (x) (var_ref break_flag) (constant bool (0))) + (loop () () () () + ((declare (temporary) bool execute_flag) + (assign (x) (var_ref execute_flag) (constant bool (1))) + (if (expression bool > (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref ba) (constant float (0.000000))) + ((if (expression bool > (var_ref bb) (constant float (0.000000))) + ((assign (x) (var_ref execute_flag) (constant bool (0)))) + ())) + ()) + (if (var_ref execute_flag) + ((if (expression bool > (var_ref ca) (constant float (0.000000))) + ((if (expression bool > (var_ref cb) (constant float (0.000000))) + ((assign (x) (var_ref break_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0)))) + ())) + ())) + ())) + ()) + (if (var_ref execute_flag) + ((assign (x) (var_ref break_flag) (constant bool (1)))) + ()) + (if (var_ref break_flag) (break) ()))))))) diff --git a/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test b/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test new file mode 100755 index 00000000000..379aa59b5a2 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test @@ -0,0 +1,21 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Normally a conditional break at the end of a loop isn't +# lowered, however if the conditional break gets placed inside +# an if(execute_flag) because of earlier lowering of continues, +# then the break needs to be lowered. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 1, 1)' <<EOF +((declare (in) float aa) (declare (in) float ab) (declare (in) float b) + (function main + (signature void (parameters) + ((loop () () () () + ((if (expression bool > (var_ref aa) (constant float (0.000000))) + ((if (expression bool > (var_ref ab) (constant float (0.000000))) + (continue) + ())) + ()) + (if (expression bool > (var_ref b) (constant float (0.000000))) (break) + ()))))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test.expected b/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test.expected new file mode 100644 index 00000000000..7c6e73f77f8 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test.expected @@ -0,0 +1,20 @@ +((declare (in) float aa) (declare (in) float ab) (declare (in) float b) + (function main + (signature void (parameters) + ((declare (temporary) bool break_flag) + (assign (x) (var_ref break_flag) (constant bool (0))) + (loop () () () () + ((declare (temporary) bool execute_flag) + (assign (x) (var_ref execute_flag) (constant bool (1))) + (if (expression bool > (var_ref aa) (constant float (0.000000))) + ((if (expression bool > (var_ref ab) (constant float (0.000000))) + ((assign (x) (var_ref execute_flag) (constant bool (0)))) + ())) + ()) + (if (var_ref execute_flag) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((assign (x) (var_ref break_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0)))) + ())) + ()) + (if (var_ref break_flag) (break) ()))))))) diff --git a/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test b/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test new file mode 100755 index 00000000000..15f3c41d5a2 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test @@ -0,0 +1,28 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# If one branch of an if ends in a jump, and control cannot +# fall out the bottom of the other branch, and pull_out_jumps is +# True, then the jump is lifted outside the if. +# Verify that this lowering occurs during the same pass as the +# lowering of other jumps by checking that extra temporary +# variables aren't generated. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(1, 0, 1, 0, 0)' <<EOF +((declare (in) float aa) (declare (in) float ab) (declare (in) float b) + (declare (in) float c) + (declare (out) float d) + (function main + (signature void (parameters) + ((if (expression bool > (var_ref aa) (constant float (0.000000))) + ((if (expression bool > (var_ref ab) (constant float (0.000000))) + ((return)) + ())) + ()) + (loop () () () () + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((if (expression bool > (var_ref c) (constant float (0.000000))) (break) + (continue))) + ((return))))) + (assign (x) (var_ref d) (constant float (1.000000))))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test.expected b/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test.expected new file mode 100644 index 00000000000..bf45c2c93b6 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test.expected @@ -0,0 +1,25 @@ +((declare (in) float aa) (declare (in) float ab) (declare (in) float b) + (declare (in) float c) + (declare (out) float d) + (function main + (signature void (parameters) + ((declare (temporary) bool execute_flag) + (assign (x) (var_ref execute_flag) (constant bool (1))) + (declare (temporary) bool return_flag) + (assign (x) (var_ref return_flag) (constant bool (0))) + (if (expression bool > (var_ref aa) (constant float (0.000000))) + ((if (expression bool > (var_ref ab) (constant float (0.000000))) + ((assign (x) (var_ref return_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0)))) + ())) + ()) + (if (var_ref execute_flag) + ((loop () () () () + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((if (expression bool > (var_ref c) (constant float (0.000000))) () + (continue))) + ((assign (x) (var_ref return_flag) (constant bool (1))))) + break)) + (if (var_ref return_flag) () + ((assign (x) (var_ref d) (constant float (1.000000)))))) + ()))))) diff --git a/src/glsl/tests/lower_jumps/lower_returns_1.opt_test b/src/glsl/tests/lower_jumps/lower_returns_1.opt_test new file mode 100755 index 00000000000..a1f895bbf78 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_1.opt_test @@ -0,0 +1,12 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Test that a void return at the end of a function is +# eliminated. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 1, 0, 0)' <<EOF +((declare (out) float a) + (function main + (signature void (parameters) + ((assign (x) (var_ref a) (constant float (1.000000))) (return))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_returns_1.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_1.opt_test.expected new file mode 100644 index 00000000000..7c3919c016e --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_1.opt_test.expected @@ -0,0 +1,4 @@ +((declare (out) float a) + (function main + (signature void (parameters) + ((assign (x) (var_ref a) (constant float (1.000000))))))) diff --git a/src/glsl/tests/lower_jumps/lower_returns_2.opt_test b/src/glsl/tests/lower_jumps/lower_returns_2.opt_test new file mode 100755 index 00000000000..61673d4ef66 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_2.opt_test @@ -0,0 +1,13 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Test that lowering is not performed on a non-void return at +# the end of subroutine. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 1, 0, 0, 0)' <<EOF +((declare (out) float a) + (function sub + (signature float (parameters) + ((assign (x) (var_ref a) (constant float (1.000000))) + (return (constant float (1.000000))))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_returns_2.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_2.opt_test.expected new file mode 100644 index 00000000000..7777927f5a3 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_2.opt_test.expected @@ -0,0 +1,5 @@ +((declare (out) float a) + (function sub + (signature float (parameters) + ((assign (x) (var_ref a) (constant float (1.000000))) + (return (constant float (1.000000))))))) diff --git a/src/glsl/tests/lower_jumps/lower_returns_3.opt_test b/src/glsl/tests/lower_jumps/lower_returns_3.opt_test new file mode 100755 index 00000000000..9881e249270 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_3.opt_test @@ -0,0 +1,20 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Test lowering of returns when there is one nested inside a +# complex structure of ifs, and one at the end of a function. +# In this case, the latter return needs to be lowered because it +# will not be at the end of the function once the final return +# is inserted. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 1, 0, 0, 0)' <<EOF +((declare (in) float a) (declare (in) float b) + (function sub + (signature float (parameters) + ((if (expression bool > (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((return (constant float (1.000000)))) + ())) + ()) + (return (constant float (2.000000))))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_returns_3.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_3.opt_test.expected new file mode 100644 index 00000000000..d4835e96b7c --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_3.opt_test.expected @@ -0,0 +1,21 @@ +((declare (in) float a) (declare (in) float b) + (function sub + (signature float (parameters) + ((declare (temporary) bool execute_flag) + (assign (x) (var_ref execute_flag) (constant bool (1))) + (declare (temporary) float return_value) + (declare (temporary) bool return_flag) + (assign (x) (var_ref return_flag) (constant bool (0))) + (if (expression bool > (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((assign (x) (var_ref return_value) (constant float (1.000000))) + (assign (x) (var_ref return_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0)))) + ())) + ()) + (if (var_ref execute_flag) + ((assign (x) (var_ref return_value) (constant float (2.000000))) + (assign (x) (var_ref return_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0)))) + ()) + (return (var_ref return_value)))))) diff --git a/src/glsl/tests/lower_jumps/lower_returns_4.opt_test b/src/glsl/tests/lower_jumps/lower_returns_4.opt_test new file mode 100755 index 00000000000..9f54c67a180 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_4.opt_test @@ -0,0 +1,14 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Test that returns are properly lowered when they occur in +# both branches of an if-statement. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 1, 0, 0, 0)' <<EOF +((declare (in) float a) + (function sub + (signature float (parameters) + ((if (expression bool > (var_ref a) (constant float (0.000000))) + ((return (constant float (1.000000)))) + ((return (constant float (2.000000))))))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_returns_4.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_4.opt_test.expected new file mode 100644 index 00000000000..b551a066f43 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_4.opt_test.expected @@ -0,0 +1,16 @@ +((declare (in) float a) + (function sub + (signature float (parameters) + ((declare (temporary) bool execute_flag) + (assign (x) (var_ref execute_flag) (constant bool (1))) + (declare (temporary) float return_value) + (declare (temporary) bool return_flag) + (assign (x) (var_ref return_flag) (constant bool (0))) + (if (expression bool > (var_ref a) (constant float (0.000000))) + ((assign (x) (var_ref return_value) (constant float (1.000000))) + (assign (x) (var_ref return_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0)))) + ((assign (x) (var_ref return_value) (constant float (2.000000))) + (assign (x) (var_ref return_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0))))) + (return (var_ref return_value)))))) diff --git a/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test b/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test new file mode 100755 index 00000000000..5f97bfd3f5a --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test @@ -0,0 +1,17 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Test that do_lower_jumps respects the lower_main_return +# flag in deciding whether to lower returns in the main +# function. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 0)' <<EOF +((declare (in) float a) (declare (in) float b) + (function main + (signature void (parameters) + ((if (expression bool > (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((return)) + ())) + ()))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test.expected new file mode 100644 index 00000000000..e8b36f14478 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test.expected @@ -0,0 +1,8 @@ +((declare (in) float a) (declare (in) float b) + (function main + (signature void (parameters) + ((if (expression bool > (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((return)) + ())) + ()))))) diff --git a/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test b/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test new file mode 100755 index 00000000000..59c7ba1dd52 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test @@ -0,0 +1,17 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Test that do_lower_jumps respects the lower_main_return +# flag in deciding whether to lower returns in the main +# function. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 1, 0, 0)' <<EOF +((declare (in) float a) (declare (in) float b) + (function main + (signature void (parameters) + ((if (expression bool > (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((return)) + ())) + ()))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test.expected new file mode 100644 index 00000000000..e15a97d1db2 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test.expected @@ -0,0 +1,13 @@ +((declare (in) float a) (declare (in) float b) + (function main + (signature void (parameters) + ((declare (temporary) bool execute_flag) + (assign (x) (var_ref execute_flag) (constant bool (1))) + (declare (temporary) bool return_flag) + (assign (x) (var_ref return_flag) (constant bool (0))) + (if (expression bool > (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((assign (x) (var_ref return_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0)))) + ())) + ()))))) diff --git a/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test b/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test new file mode 100755 index 00000000000..40e784e3318 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test @@ -0,0 +1,16 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Test that do_lower_jumps respects the lower_sub_return flag +# in deciding whether to lower returns in subroutines. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 0)' <<EOF +((declare (in) float a) (declare (in) float b) + (function sub + (signature void (parameters) + ((if (expression bool > (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((return)) + ())) + ()))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test.expected new file mode 100644 index 00000000000..07db6e708f4 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test.expected @@ -0,0 +1,8 @@ +((declare (in) float a) (declare (in) float b) + (function sub + (signature void (parameters) + ((if (expression bool > (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((return)) + ())) + ()))))) diff --git a/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test b/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test new file mode 100755 index 00000000000..9fe6b90f085 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test @@ -0,0 +1,16 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Test that do_lower_jumps respects the lower_sub_return flag +# in deciding whether to lower returns in subroutines. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 1, 0, 0, 0)' <<EOF +((declare (in) float a) (declare (in) float b) + (function sub + (signature void (parameters) + ((if (expression bool > (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((return)) + ())) + ()))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test.expected new file mode 100644 index 00000000000..31109802351 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test.expected @@ -0,0 +1,13 @@ +((declare (in) float a) (declare (in) float b) + (function sub + (signature void (parameters) + ((declare (temporary) bool execute_flag) + (assign (x) (var_ref execute_flag) (constant bool (1))) + (declare (temporary) bool return_flag) + (assign (x) (var_ref return_flag) (constant bool (0))) + (if (expression bool > (var_ref a) (constant float (0.000000))) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((assign (x) (var_ref return_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0)))) + ())) + ()))))) diff --git a/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test b/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test new file mode 100755 index 00000000000..e7168131487 --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test @@ -0,0 +1,26 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# If both branches of an if statement end in a return, and +# pull_out_jumps is True, then those returns should be lifted +# outside the if and then properly lowered. +# Verify that this lowering occurs during the same pass as the +# lowering of other returns by checking that extra temporary +# variables aren't generated. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(1, 0, 1, 0, 0)' <<EOF +((declare (in) float aa) (declare (in) float ab) (declare (in) float b) + (declare (in) float c) + (function main + (signature void (parameters) + ((if (expression bool > (var_ref aa) (constant float (0.000000))) + ((if (expression bool > (var_ref ab) (constant float (0.000000))) + ((return)) + ())) + ()) + (if (expression bool > (var_ref b) (constant float (0.000000))) + ((if (expression bool > (var_ref c) (constant float (0.000000))) + ((return)) + ((return)))) + ()))))) +EOF diff --git a/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test.expected b/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test.expected new file mode 100644 index 00000000000..271cd3b462e --- /dev/null +++ b/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test.expected @@ -0,0 +1,21 @@ +((declare (in) float aa) (declare (in) float ab) (declare (in) float b) + (declare (in) float c) + (function main + (signature void (parameters) + ((declare (temporary) bool execute_flag) + (assign (x) (var_ref execute_flag) (constant bool (1))) + (declare (temporary) bool return_flag) + (assign (x) (var_ref return_flag) (constant bool (0))) + (if (expression bool > (var_ref aa) (constant float (0.000000))) + ((if (expression bool > (var_ref ab) (constant float (0.000000))) + ((assign (x) (var_ref return_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0)))) + ())) + ()) + (if (var_ref execute_flag) + ((if (expression bool > (var_ref b) (constant float (0.000000))) + ((if (expression bool > (var_ref c) (constant float (0.000000))) () ()) + (assign (x) (var_ref return_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0)))) + ())) + ()))))) diff --git a/src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test b/src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test new file mode 100755 index 00000000000..18efc37f6e1 --- /dev/null +++ b/src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test @@ -0,0 +1,13 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Test that a redundant continue-statement at the end of a +# loop is removed. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 0)' <<EOF +((declare (out) float a) + (function main + (signature void (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) continue)))))) +EOF diff --git a/src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test.expected b/src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test.expected new file mode 100644 index 00000000000..d2a02c6f380 --- /dev/null +++ b/src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test.expected @@ -0,0 +1,5 @@ +((declare (out) float a) + (function main + (signature void (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))))))))) diff --git a/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_nothing.opt_test b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_nothing.opt_test new file mode 100755 index 00000000000..79c0e824512 --- /dev/null +++ b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_nothing.opt_test @@ -0,0 +1,16 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Test that a non-void return at the end of a loop is +# properly lowered. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 0)' <<EOF +((declare (out) float a) (declare (out) float b) + (function sub + (signature float (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) + (return (constant float (2.000000))))) + (assign (x) (var_ref b) (constant float (3.000000))) + (return (constant float (4.000000))))))) +EOF diff --git a/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_nothing.opt_test.expected b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_nothing.opt_test.expected new file mode 100644 index 00000000000..2cf117a5ee1 --- /dev/null +++ b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_nothing.opt_test.expected @@ -0,0 +1,8 @@ +((declare (out) float a) (declare (out) float b) + (function sub + (signature float (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) + (return (constant float (2.000000))))) + (assign (x) (var_ref b) (constant float (3.000000))) + (return (constant float (4.000000))))))) diff --git a/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return.opt_test b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return.opt_test new file mode 100755 index 00000000000..920d2ad9fba --- /dev/null +++ b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return.opt_test @@ -0,0 +1,16 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Test that a non-void return at the end of a loop is +# properly lowered. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 1, 0, 0, 0)' <<EOF +((declare (out) float a) (declare (out) float b) + (function sub + (signature float (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) + (return (constant float (2.000000))))) + (assign (x) (var_ref b) (constant float (3.000000))) + (return (constant float (4.000000))))))) +EOF diff --git a/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return.opt_test.expected b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return.opt_test.expected new file mode 100644 index 00000000000..0bab8f16f30 --- /dev/null +++ b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return.opt_test.expected @@ -0,0 +1,19 @@ +((declare (out) float a) (declare (out) float b) + (function sub + (signature float (parameters) + ((declare (temporary) bool execute_flag) + (assign (x) (var_ref execute_flag) (constant bool (1))) + (declare (temporary) float return_value) + (declare (temporary) bool return_flag) + (assign (x) (var_ref return_flag) (constant bool (0))) + (loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) + (assign (x) (var_ref return_value) (constant float (2.000000))) + (assign (x) (var_ref return_flag) (constant bool (1))) + break)) + (if (var_ref return_flag) () + ((assign (x) (var_ref b) (constant float (3.000000))) + (assign (x) (var_ref return_value) (constant float (4.000000))) + (assign (x) (var_ref return_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0))))) + (return (var_ref return_value)))))) diff --git a/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return_and_break.opt_test b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return_and_break.opt_test new file mode 100755 index 00000000000..99f1f863506 --- /dev/null +++ b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return_and_break.opt_test @@ -0,0 +1,16 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Test that a non-void return at the end of a loop is +# properly lowered. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 1, 0, 0, 1)' <<EOF +((declare (out) float a) (declare (out) float b) + (function sub + (signature float (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) + (return (constant float (2.000000))))) + (assign (x) (var_ref b) (constant float (3.000000))) + (return (constant float (4.000000))))))) +EOF diff --git a/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return_and_break.opt_test.expected b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return_and_break.opt_test.expected new file mode 100644 index 00000000000..0bab8f16f30 --- /dev/null +++ b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return_and_break.opt_test.expected @@ -0,0 +1,19 @@ +((declare (out) float a) (declare (out) float b) + (function sub + (signature float (parameters) + ((declare (temporary) bool execute_flag) + (assign (x) (var_ref execute_flag) (constant bool (1))) + (declare (temporary) float return_value) + (declare (temporary) bool return_flag) + (assign (x) (var_ref return_flag) (constant bool (0))) + (loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) + (assign (x) (var_ref return_value) (constant float (2.000000))) + (assign (x) (var_ref return_flag) (constant bool (1))) + break)) + (if (var_ref return_flag) () + ((assign (x) (var_ref b) (constant float (3.000000))) + (assign (x) (var_ref return_value) (constant float (4.000000))) + (assign (x) (var_ref return_flag) (constant bool (1))) + (assign (x) (var_ref execute_flag) (constant bool (0))))) + (return (var_ref return_value)))))) diff --git a/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_nothing.opt_test b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_nothing.opt_test new file mode 100755 index 00000000000..63487d32691 --- /dev/null +++ b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_nothing.opt_test @@ -0,0 +1,14 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Test that a return of void at the end of a loop is properly +# lowered. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 0)' <<EOF +((declare (out) float a) (declare (out) float b) + (function main + (signature void (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) (return))) + (assign (x) (var_ref b) (constant float (2.000000))))))) +EOF diff --git a/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_nothing.opt_test.expected b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_nothing.opt_test.expected new file mode 100644 index 00000000000..0bd8037bf00 --- /dev/null +++ b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_nothing.opt_test.expected @@ -0,0 +1,6 @@ +((declare (out) float a) (declare (out) float b) + (function main + (signature void (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) (return))) + (assign (x) (var_ref b) (constant float (2.000000))))))) diff --git a/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return.opt_test b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return.opt_test new file mode 100755 index 00000000000..523c92a686d --- /dev/null +++ b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return.opt_test @@ -0,0 +1,14 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Test that a return of void at the end of a loop is properly +# lowered. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 1, 0, 0)' <<EOF +((declare (out) float a) (declare (out) float b) + (function main + (signature void (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) (return))) + (assign (x) (var_ref b) (constant float (2.000000))))))) +EOF diff --git a/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return.opt_test.expected b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return.opt_test.expected new file mode 100644 index 00000000000..53814eaacad --- /dev/null +++ b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return.opt_test.expected @@ -0,0 +1,11 @@ +((declare (out) float a) (declare (out) float b) + (function main + (signature void (parameters) + ((declare (temporary) bool return_flag) + (assign (x) (var_ref return_flag) (constant bool (0))) + (loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) + (assign (x) (var_ref return_flag) (constant bool (1))) + break)) + (if (var_ref return_flag) () + ((assign (x) (var_ref b) (constant float (2.000000))))))))) diff --git a/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return_and_break.opt_test b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return_and_break.opt_test new file mode 100755 index 00000000000..22b5581cbda --- /dev/null +++ b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return_and_break.opt_test @@ -0,0 +1,14 @@ +#!/bin/bash +# +# This file was generated by create_test_cases.py. +# +# Test that a return of void at the end of a loop is properly +# lowered. +../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 1, 0, 1)' <<EOF +((declare (out) float a) (declare (out) float b) + (function main + (signature void (parameters) + ((loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) (return))) + (assign (x) (var_ref b) (constant float (2.000000))))))) +EOF diff --git a/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return_and_break.opt_test.expected b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return_and_break.opt_test.expected new file mode 100644 index 00000000000..53814eaacad --- /dev/null +++ b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return_and_break.opt_test.expected @@ -0,0 +1,11 @@ +((declare (out) float a) (declare (out) float b) + (function main + (signature void (parameters) + ((declare (temporary) bool return_flag) + (assign (x) (var_ref return_flag) (constant bool (0))) + (loop () () () () + ((assign (x) (var_ref a) (constant float (1.000000))) + (assign (x) (var_ref return_flag) (constant bool (1))) + break)) + (if (var_ref return_flag) () + ((assign (x) (var_ref b) (constant float (2.000000))))))))) diff --git a/src/glsl/tests/optimization-test b/src/glsl/tests/optimization-test new file mode 100755 index 00000000000..0c130be1379 --- /dev/null +++ b/src/glsl/tests/optimization-test @@ -0,0 +1,28 @@ +#!/bin/bash + +total=0 +pass=0 + +echo "====== Testing optimization passes ======" +for test in `find . -iname '*.opt_test'`; do + echo -n "Testing $test..." + (cd `dirname "$test"`; ./`basename "$test"`) > "$test.out" 2>&1 + total=$((total+1)) + if ./compare_ir "$test.expected" "$test.out" >/dev/null 2>&1; then + echo "PASS" + pass=$((pass+1)) + else + echo "FAIL" + ./compare_ir "$test.expected" "$test.out" + fi +done + +echo "" +echo "$pass/$total tests returned correct results" +echo "" + +if [[ $pass == $total ]]; then + exit 0 +else + exit 1 +fi diff --git a/src/glsl/tests/sexps.py b/src/glsl/tests/sexps.py new file mode 100644 index 00000000000..a714af8d236 --- /dev/null +++ b/src/glsl/tests/sexps.py @@ -0,0 +1,103 @@ +# coding=utf-8 +# +# Copyright © 2011 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# This file contains helper functions for manipulating sexps in Python. +# +# We represent a sexp in Python using nested lists containing strings. +# So, for example, the sexp (constant float (1.000000)) is represented +# as ['constant', 'float', ['1.000000']]. + +import re + +def check_sexp(sexp): + """Verify that the argument is a proper sexp. + + That is, raise an exception if the argument is not a string or a + list, or if it contains anything that is not a string or a list at + any nesting level. + """ + if isinstance(sexp, list): + for s in sexp: + check_sexp(s) + elif not isinstance(sexp, basestring): + raise Exception('Not a sexp: {0!r}'.format(sexp)) + +def parse_sexp(sexp): + """Convert a string, of the form that would be output by mesa, + into a sexp represented as nested lists containing strings. + """ + sexp_token_regexp = re.compile( + '[a-zA-Z_]+(@[0-9]+)?|[0-9]+(\\.[0-9]+)?|[^ \n]') + stack = [[]] + for match in sexp_token_regexp.finditer(sexp): + token = match.group(0) + if token == '(': + stack.append([]) + elif token == ')': + if len(stack) == 1: + raise Exception('Unmatched )') + sexp = stack.pop() + stack[-1].append(sexp) + else: + stack[-1].append(token) + if len(stack) != 1: + raise Exception('Unmatched (') + if len(stack[0]) != 1: + raise Exception('Multiple sexps') + return stack[0][0] + +def sexp_to_string(sexp): + """Convert a sexp, represented as nested lists containing strings, + into a single string of the form parseable by mesa. + """ + if isinstance(sexp, basestring): + return sexp + assert isinstance(sexp, list) + result = '' + for s in sexp: + sub_result = sexp_to_string(s) + if result == '': + result = sub_result + elif '\n' not in result and '\n' not in sub_result and \ + len(result) + len(sub_result) + 1 <= 70: + result += ' ' + sub_result + else: + result += '\n' + sub_result + return '({0})'.format(result.replace('\n', '\n ')) + +def sort_decls(sexp): + """Sort all toplevel variable declarations in sexp. + + This is used to work around the fact that + ir_reader::read_instructions reorders declarations. + """ + assert isinstance(sexp, list) + decls = [] + other_code = [] + for s in sexp: + if isinstance(s, list) and len(s) >= 4 and s[0] == 'declare': + decls.append(s) + else: + other_code.append(s) + return sorted(decls) + other_code + diff --git a/src/glw/GLwDrawA.c b/src/glw/GLwDrawA.c deleted file mode 100644 index 30304a40801..00000000000 --- a/src/glw/GLwDrawA.c +++ /dev/null @@ -1,684 +0,0 @@ -/* - * (c) Copyright 1993, Silicon Graphics, Inc. - * ALL RIGHTS RESERVED - * Permission to use, copy, modify, and distribute this software for - * any purpose and without fee is hereby granted, provided that the above - * copyright notice appear in all copies and that both the copyright notice - * and this permission notice appear in supporting documentation, and that - * the name of Silicon Graphics, Inc. not be used in advertising - * or publicity pertaining to distribution of the software without specific, - * written prior permission. - * - * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS" - * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR - * FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL SILICON - * GRAPHICS, INC. BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT, - * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY - * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION, - * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF - * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC. HAS BEEN - * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE - * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE. - * - * - * US Government Users Restricted Rights - * Use, duplication, or disclosure by the Government is subject to - * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph - * (c)(1)(ii) of the Rights in Technical Data and Computer Software - * clause at DFARS 252.227-7013 and/or in similar or successor - * clauses in the FAR or the DOD or NASA FAR Supplement. - * Unpublished-- rights reserved under the copyright laws of the - * United States. Contractor/manufacturer is Silicon Graphics, - * Inc., 2011 N. Shoreline Blvd., Mountain View, CA 94039-7311. - * - * OpenGL(TM) is a trademark of Silicon Graphics, Inc. - */ - -/* - * - * This file has been slightly modified from the original for use with Mesa - * - * Jeroen van der Zijp - * - * - */ -#include <X11/IntrinsicP.h> -#include <X11/StringDefs.h> -#include <GL/glx.h> -#include <GL/gl.h> -#ifdef __GLX_MOTIF -#include <Xm/PrimitiveP.h> -#include "GLwMDrawAP.h" -#else -#include "GLwDrawAP.h" -#endif -#include <assert.h> -#include <stdio.h> - -#ifdef __GLX_MOTIF -#define GLwDrawingAreaWidget GLwMDrawingAreaWidget -#define GLwDrawingAreaClassRec GLwMDrawingAreaClassRec -#define glwDrawingAreaClassRec glwMDrawingAreaClassRec -#define glwDrawingAreaWidgetClass glwMDrawingAreaWidgetClass -#define GLwDrawingAreaRec GLwMDrawingAreaRec -#endif - -#define ATTRIBLIST_SIZE 32 - -#define offset(field) XtOffset(GLwDrawingAreaWidget,glwDrawingArea.field) - - -/* forward definitions */ -static void createColormap(GLwDrawingAreaWidget w,int offset,XrmValue *value); -static void Initialize(GLwDrawingAreaWidget req,GLwDrawingAreaWidget neww,ArgList args,Cardinal *num_args); -static void Realize(Widget w,Mask *valueMask,XSetWindowAttributes *attributes); -static void Redraw(GLwDrawingAreaWidget w,XEvent *event,Region region); -static void Resize(GLwDrawingAreaWidget glw); -static void Destroy(GLwDrawingAreaWidget glw); -static void glwInput(GLwDrawingAreaWidget glw,XEvent *event,String *params,Cardinal *numParams); - - - -static char defaultTranslations[] = -#ifdef __GLX_MOTIF - "<Key>osfHelp:PrimitiveHelp() \n" -#endif - "<KeyDown>: glwInput() \n\ - <KeyUp>: glwInput() \n\ - <BtnDown>: glwInput() \n\ - <BtnUp>: glwInput() \n\ - <BtnMotion>: glwInput() "; - - -static XtActionsRec actions[] = { - {"glwInput",(XtActionProc)glwInput}, /* key or mouse input */ - }; - - -/* - * There is a bit of unusual handling of the resources here. - * Because Xt insists on allocating the colormap resource when it is - * processing the core resources (even if we redeclare the colormap - * resource here, we need to do a little trick. When Xt first allocates - * the colormap, we allow it to allocate the default one, since we have - * not yet determined the appropriate visual (which is determined from - * resources parsed after the colormap). We also let it allocate colors - * in that default colormap. - * - * In the initialize proc we calculate the actual visual. Then, we - * reobtain the colormap resource using XtGetApplicationResources in - * the initialize proc. If requested, we also reallocate colors in - * that colormap using the same method. - */ - -static XtResource resources[] = { - /* The GLX attributes. Add any new attributes here */ - - {GLwNbufferSize, GLwCBufferSize, XtRInt, sizeof (int), - offset(bufferSize), XtRImmediate, (XtPointer) 0}, - - {GLwNlevel, GLwCLevel, XtRInt, sizeof (int), - offset(level), XtRImmediate, (XtPointer) 0}, - - {GLwNrgba, GLwCRgba, XtRBoolean, sizeof (Boolean), - offset(rgba), XtRImmediate, (XtPointer) FALSE}, - - {GLwNdoublebuffer, GLwCDoublebuffer, XtRBoolean, sizeof (Boolean), - offset(doublebuffer), XtRImmediate, (XtPointer) FALSE}, - - {GLwNstereo, GLwCStereo, XtRBoolean, sizeof (Boolean), - offset(stereo), XtRImmediate, (XtPointer) FALSE}, - - {GLwNauxBuffers, GLwCAuxBuffers, XtRInt, sizeof (int), - offset(auxBuffers), XtRImmediate, (XtPointer) 0}, - - {GLwNredSize, GLwCColorSize, XtRInt, sizeof (int), - offset(redSize), XtRImmediate, (XtPointer) 1}, - - {GLwNgreenSize, GLwCColorSize, XtRInt, sizeof (int), - offset(greenSize), XtRImmediate, (XtPointer) 1}, - - {GLwNblueSize, GLwCColorSize, XtRInt, sizeof (int), - offset(blueSize), XtRImmediate, (XtPointer) 1}, - - {GLwNalphaSize, GLwCAlphaSize, XtRInt, sizeof (int), - offset(alphaSize), XtRImmediate, (XtPointer) 0}, - - {GLwNdepthSize, GLwCDepthSize, XtRInt, sizeof (int), - offset(depthSize), XtRImmediate, (XtPointer) 0}, - - {GLwNstencilSize, GLwCStencilSize, XtRInt, sizeof (int), - offset(stencilSize), XtRImmediate, (XtPointer) 0}, - - {GLwNaccumRedSize, GLwCAccumColorSize, XtRInt, sizeof (int), - offset(accumRedSize), XtRImmediate, (XtPointer) 0}, - - {GLwNaccumGreenSize, GLwCAccumColorSize, XtRInt, sizeof (int), - offset(accumGreenSize), XtRImmediate, (XtPointer) 0}, - - {GLwNaccumBlueSize, GLwCAccumColorSize, XtRInt, sizeof (int), - offset(accumBlueSize), XtRImmediate, (XtPointer) 0}, - - {GLwNaccumAlphaSize, GLwCAccumAlphaSize, XtRInt, sizeof (int), - offset(accumAlphaSize), XtRImmediate, (XtPointer) 0}, - - /* the attribute list */ - {GLwNattribList, GLwCAttribList, XtRPointer, sizeof(int *), - offset(attribList), XtRImmediate, (XtPointer) NULL}, - - /* the visual info */ - {GLwNvisualInfo, GLwCVisualInfo, GLwRVisualInfo, sizeof (XVisualInfo *), - offset(visualInfo), XtRImmediate, (XtPointer) NULL}, - - /* miscellaneous resources */ - {GLwNinstallColormap, GLwCInstallColormap, XtRBoolean, sizeof (Boolean), - offset(installColormap), XtRImmediate, (XtPointer) TRUE}, - - {GLwNallocateBackground, GLwCAllocateColors, XtRBoolean, sizeof (Boolean), - offset(allocateBackground), XtRImmediate, (XtPointer) FALSE}, - - {GLwNallocateOtherColors, GLwCAllocateColors, XtRBoolean, sizeof (Boolean), - offset(allocateOtherColors), XtRImmediate, (XtPointer) FALSE}, - - {GLwNinstallBackground, GLwCInstallBackground, XtRBoolean, sizeof (Boolean), - offset(installBackground), XtRImmediate, (XtPointer) TRUE}, - - {GLwNginitCallback, GLwCCallback, XtRCallback, sizeof (XtCallbackList), - offset(ginitCallback), XtRImmediate, (XtPointer) NULL}, - - {GLwNinputCallback, GLwCCallback, XtRCallback, sizeof (XtCallbackList), - offset(inputCallback), XtRImmediate, (XtPointer) NULL}, - - {GLwNresizeCallback, GLwCCallback, XtRCallback, sizeof (XtCallbackList), - offset(resizeCallback), XtRImmediate, (XtPointer) NULL}, - - {GLwNexposeCallback, GLwCCallback, XtRCallback, sizeof (XtCallbackList), - offset(exposeCallback), XtRImmediate, (XtPointer) NULL}, - - /* Changes to Motif primitive resources */ -#ifdef __GLX_MOTIF - {XmNtraversalOn, XmCTraversalOn, XmRBoolean, sizeof (Boolean), - XtOffset (GLwDrawingAreaWidget, primitive.traversal_on), XmRImmediate, - (XtPointer)FALSE}, - - /* highlighting is normally disabled, as when Motif tries to disable - * highlighting, it tries to reset the color back to the parent's - * background (usually Motif blue). Unfortunately, that is in a - * different colormap, and doesn't work too well. - */ - {XmNhighlightOnEnter, XmCHighlightOnEnter, XmRBoolean, sizeof (Boolean), - XtOffset (GLwDrawingAreaWidget, primitive.highlight_on_enter), - XmRImmediate, (XtPointer) FALSE}, - - {XmNhighlightThickness, XmCHighlightThickness, XmRHorizontalDimension, - sizeof (Dimension), - XtOffset (GLwDrawingAreaWidget, primitive.highlight_thickness), - XmRImmediate, (XtPointer) 0}, -#endif - }; - - -/* -** The following resources are reobtained using XtGetApplicationResources -** in the initialize proc. -*/ - -/* The colormap */ -static XtResource initializeResources[] = { - /* reobtain the colormap with the new visual */ - {XtNcolormap, XtCColormap, XtRColormap, sizeof(Colormap), - XtOffset(GLwDrawingAreaWidget, core.colormap), - XtRCallProc,(XtPointer) createColormap}, - }; - - -/* reallocate any colors we need in the new colormap */ - -/* The background is obtained only if the allocateBackground resource is TRUE*/ -static XtResource backgroundResources[] = { -#ifdef __GLX_MOTIF - {XmNbackground, XmCBackground,XmRPixel, - sizeof(Pixel),XtOffset(GLwDrawingAreaWidget,core.background_pixel), - XmRString,(XtPointer)"lightgrey"}, - /*XmRCallProc,(XtPointer)_XmBackgroundColorDefault},*/ - - {XmNbackgroundPixmap,XmCPixmap,XmRXmBackgroundPixmap, - sizeof(Pixmap),XtOffset(GLwDrawingAreaWidget,core.background_pixmap), - XmRImmediate,(XtPointer)XmUNSPECIFIED_PIXMAP}, - -#else - {XtNbackground,XtCBackground,XtRPixel,sizeof(Pixel), - XtOffset(GLwDrawingAreaWidget,core.background_pixel), - XtRString,(XtPointer)"lightgrey"}, - /*XtRString,(XtPointer)"XtDefaultBackground"},*/ - - {XtNbackgroundPixmap, XtCPixmap, XtRPixmap, sizeof(Pixmap), - XtOffset(GLwDrawingAreaWidget,core.background_pixmap), - XtRImmediate,(XtPointer)XtUnspecifiedPixmap}, -#endif - }; - - - -/* The other colors such as the foreground are allocated only if - * allocateOtherColors are set. These resources only exist in Motif. - */ -#ifdef __GLX_MOTIF -static XtResource otherColorResources[] = { - {XmNforeground,XmCForeground,XmRPixel, - sizeof(Pixel),XtOffset(GLwDrawingAreaWidget,primitive.foreground), - XmRString,(XtPointer)"lighgrey"}, - /*XmRCallProc, (XtPointer) _XmForegroundColorDefault},*/ - - {XmNhighlightColor,XmCHighlightColor,XmRPixel,sizeof(Pixel), - XtOffset(GLwDrawingAreaWidget,primitive.highlight_color), - XmRString,(XtPointer)"lightgrey"}, - /*XmRCallProc,(XtPointer)_XmHighlightColorDefault},*/ - - {XmNhighlightPixmap,XmCHighlightPixmap,XmRPrimHighlightPixmap, - sizeof(Pixmap), - XtOffset(GLwDrawingAreaWidget,primitive.highlight_pixmap), - XmRImmediate,(XtPointer)XmUNSPECIFIED_PIXMAP}, - /*XmRCallProc,(XtPointer)_XmPrimitiveHighlightPixmapDefault},*/ - }; -#endif - - -#undef offset - - -GLwDrawingAreaClassRec glwDrawingAreaClassRec = { - { /* core fields */ -#ifdef __GLX_MOTIF - /* superclass */ (WidgetClass) &xmPrimitiveClassRec, - /* class_name */ "GLwMDrawingArea", -#else /* not __GLX_MOTIF */ - /* superclass */ (WidgetClass) &widgetClassRec, - /* class_name */ "GLwDrawingArea", -#endif /* __GLX_MOTIF */ - /* widget_size */ sizeof(GLwDrawingAreaRec), - /* class_initialize */ NULL, - /* class_part_initialize */ NULL, - /* class_inited */ FALSE, - /* initialize */ (XtInitProc) Initialize, - /* initialize_hook */ NULL, - /* realize */ Realize, - /* actions */ actions, - /* num_actions */ XtNumber(actions), - /* resources */ resources, - /* num_resources */ XtNumber(resources), - /* xrm_class */ NULLQUARK, - /* compress_motion */ TRUE, - /* compress_exposure */ TRUE, - /* compress_enterleave */ TRUE, - /* visible_interest */ TRUE, - /* destroy */ (XtWidgetProc) Destroy, - /* resize */ (XtWidgetProc) Resize, - /* expose */ (XtExposeProc) Redraw, - /* set_values */ NULL, - /* set_values_hook */ NULL, - /* set_values_almost */ XtInheritSetValuesAlmost, - /* get_values_hook */ NULL, - /* accept_focus */ NULL, - /* version */ XtVersion, - /* callback_private */ NULL, - /* tm_table */ defaultTranslations, - /* query_geometry */ XtInheritQueryGeometry, - /* display_accelerator */ XtInheritDisplayAccelerator, - /* extension */ NULL - }, -#ifdef __GLX_MOTIF /* primitive resources */ - { - /* border_highlight */ XmInheritBorderHighlight, - /* border_unhighlight */ XmInheritBorderUnhighlight, - /* translations */ XtInheritTranslations, - /* arm_and_activate */ NULL, - /* get_resources */ NULL, - /* num get_resources */ 0, - /* extension */ NULL, - } -#endif - }; - -WidgetClass glwDrawingAreaWidgetClass=(WidgetClass)&glwDrawingAreaClassRec; - - - -static void error(Widget w,char* string){ - char buf[100]; -#ifdef __GLX_MOTIF - sprintf(buf,"GLwMDrawingArea: %s\n",string); -#else - sprintf(buf,"GLwDrawingArea: %s\n",string); -#endif - XtAppError(XtWidgetToApplicationContext(w),buf); - } - - -static void warning(Widget w,char* string){ - char buf[100]; -#ifdef __GLX_MOTIF - sprintf (buf, "GLwMDraw: %s\n", string); -#else - sprintf (buf, "GLwDraw: %s\n", string); -#endif - XtAppWarning(XtWidgetToApplicationContext(w), buf); - } - - - -/* Initialize the attribList based on the attributes */ -static void createAttribList(GLwDrawingAreaWidget w){ - int *ptr; - w->glwDrawingArea.attribList = (int*)XtMalloc(ATTRIBLIST_SIZE*sizeof(int)); - if(!w->glwDrawingArea.attribList){ - error((Widget)w,"Unable to allocate attribute list"); - } - ptr = w->glwDrawingArea.attribList; - *ptr++ = GLX_BUFFER_SIZE; - *ptr++ = w->glwDrawingArea.bufferSize; - *ptr++ = GLX_LEVEL; - *ptr++ = w->glwDrawingArea.level; - if(w->glwDrawingArea.rgba) *ptr++ = GLX_RGBA; - if(w->glwDrawingArea.doublebuffer) *ptr++ = GLX_DOUBLEBUFFER; - if(w->glwDrawingArea.stereo) *ptr++ = GLX_STEREO; - *ptr++ = GLX_AUX_BUFFERS; - *ptr++ = w->glwDrawingArea.auxBuffers; - *ptr++ = GLX_RED_SIZE; - *ptr++ = w->glwDrawingArea.redSize; - *ptr++ = GLX_GREEN_SIZE; - *ptr++ = w->glwDrawingArea.greenSize; - *ptr++ = GLX_BLUE_SIZE; - *ptr++ = w->glwDrawingArea.blueSize; - *ptr++ = GLX_ALPHA_SIZE; - *ptr++ = w->glwDrawingArea.alphaSize; - *ptr++ = GLX_DEPTH_SIZE; - *ptr++ = w->glwDrawingArea.depthSize; - *ptr++ = GLX_STENCIL_SIZE; - *ptr++ = w->glwDrawingArea.stencilSize; - *ptr++ = GLX_ACCUM_RED_SIZE; - *ptr++ = w->glwDrawingArea.accumRedSize; - *ptr++ = GLX_ACCUM_GREEN_SIZE; - *ptr++ = w->glwDrawingArea.accumGreenSize; - *ptr++ = GLX_ACCUM_BLUE_SIZE; - *ptr++ = w->glwDrawingArea.accumBlueSize; - *ptr++ = GLX_ACCUM_ALPHA_SIZE; - *ptr++ = w->glwDrawingArea.accumAlphaSize; - *ptr++ = None; - assert((ptr-w->glwDrawingArea.attribList)<ATTRIBLIST_SIZE); - } - - - -/* Initialize the visualInfo based on the attribute list */ -static void createVisualInfo(GLwDrawingAreaWidget w){ - assert(w->glwDrawingArea.attribList); - w->glwDrawingArea.visualInfo=glXChooseVisual(XtDisplay(w),XScreenNumberOfScreen(XtScreen(w)),w->glwDrawingArea.attribList); - if(!w->glwDrawingArea.visualInfo) error((Widget)w,"requested visual not supported"); - } - - - -/* Initialize the colormap based on the visual info. - * This routine maintains a cache of visual-infos to colormaps. If two - * widgets share the same visual info, they share the same colormap. - * This function is called by the callProc of the colormap resource entry. - */ -static void createColormap(GLwDrawingAreaWidget w,int offset,XrmValue *value){ - static struct cmapCache { Visual *visual; Colormap cmap; } *cmapCache; - static int cacheEntries=0; - static int cacheMalloced=0; - register int i; - - assert(w->glwDrawingArea.visualInfo); - - /* see if we can find it in the cache */ - for(i=0; i<cacheEntries; i++){ - if(cmapCache[i].visual==w->glwDrawingArea.visualInfo->visual){ - value->addr=(XtPointer)(&cmapCache[i].cmap); - return; - } - } - - /* not in the cache, create a new entry */ - if(cacheEntries >= cacheMalloced){ - /* need to malloc a new one. Since we are likely to have only a - * few colormaps, we allocate one the first time, and double - * each subsequent time. - */ - if(cacheMalloced==0){ - cacheMalloced=1; - cmapCache=(struct cmapCache*)XtMalloc(sizeof(struct cmapCache)); - } - else{ - cacheMalloced<<=1; - cmapCache=(struct cmapCache*)XtRealloc((char*)cmapCache,sizeof(struct cmapCache)*cacheMalloced); - } - } - - cmapCache[cacheEntries].cmap=XCreateColormap(XtDisplay(w), - RootWindow(XtDisplay(w), - w->glwDrawingArea.visualInfo->screen), - w->glwDrawingArea.visualInfo->visual, - AllocNone); - cmapCache[cacheEntries].visual=w->glwDrawingArea.visualInfo->visual; - value->addr=(XtPointer)(&cmapCache[cacheEntries++].cmap); - } - - - -static void Initialize(GLwDrawingAreaWidget req,GLwDrawingAreaWidget neww,ArgList args,Cardinal *num_args){ - - /* fix size */ - if(req->core.width==0) neww->core.width=100; - if(req->core.height==0) neww->core.width=100; - - /* create the attribute list if needed */ - neww->glwDrawingArea.myList=FALSE; - if(neww->glwDrawingArea.attribList==NULL){ - neww->glwDrawingArea.myList=TRUE; - createAttribList(neww); - } - - /* Gotta have it */ - assert(neww->glwDrawingArea.attribList); - - /* determine the visual info if needed */ - neww->glwDrawingArea.myVisual=FALSE; - if(neww->glwDrawingArea.visualInfo==NULL){ - neww->glwDrawingArea.myVisual=TRUE; - createVisualInfo(neww); - } - - /* Gotta have that too */ - assert(neww->glwDrawingArea.visualInfo); - - neww->core.depth=neww->glwDrawingArea.visualInfo->depth; - - /* Reobtain the colormap and colors in it using XtGetApplicationResources*/ - XtGetApplicationResources((Widget)neww,neww,initializeResources,XtNumber(initializeResources),args,*num_args); - - /* obtain the color resources if appropriate */ - if(req->glwDrawingArea.allocateBackground){ - XtGetApplicationResources((Widget)neww,neww,backgroundResources,XtNumber(backgroundResources),args,*num_args); - } - -#ifdef __GLX_MOTIF - if(req->glwDrawingArea.allocateOtherColors){ - XtGetApplicationResources((Widget)neww,neww,otherColorResources,XtNumber(otherColorResources),args,*num_args); - } -#endif - } - - - -static void Realize(Widget w,Mask *valueMask,XSetWindowAttributes *attributes){ - register GLwDrawingAreaWidget glw=(GLwDrawingAreaWidget)w; - GLwDrawingAreaCallbackStruct cb; - Widget parentShell; - Status status; - Window windows[2],*windowsReturn,*windowList; - int countReturn,i; - - /* if we haven't requested that the background be both installed and - * allocated, don't install it. - */ - if(!(glw->glwDrawingArea.installBackground && glw->glwDrawingArea.allocateBackground)){ - *valueMask&=~CWBackPixel; - } - - XtCreateWindow(w,(unsigned int)InputOutput,glw->glwDrawingArea.visualInfo->visual,*valueMask,attributes); - - /* if appropriate, call XSetWMColormapWindows to install the colormap */ - if(glw->glwDrawingArea.installColormap){ - - /* Get parent shell */ - for(parentShell=XtParent(w); parentShell&&!XtIsShell(parentShell); parentShell=XtParent(parentShell)); - - if(parentShell && XtWindow(parentShell)){ - - /* check to see if there is already a property */ - status=XGetWMColormapWindows(XtDisplay(parentShell),XtWindow(parentShell),&windowsReturn,&countReturn); - - /* if no property, just create one */ - if(!status){ - windows[0]=XtWindow(w); - windows[1]=XtWindow(parentShell); - XSetWMColormapWindows(XtDisplay(parentShell),XtWindow(parentShell),windows,2); - } - - /* there was a property, add myself to the beginning */ - else{ - windowList=(Window *)XtMalloc((sizeof(Window))*(countReturn+1)); - windowList[0]=XtWindow(w); - for(i=0; i<countReturn; i++) windowList[i+1]=windowsReturn[i]; - XSetWMColormapWindows(XtDisplay(parentShell),XtWindow(parentShell),windowList,countReturn+1); - XtFree((char*)windowList); - XtFree((char*)windowsReturn); - } - } - else{ - warning(w,"Could not set colormap property on parent shell"); - } - } - - /* Invoke callbacks */ - cb.reason=GLwCR_GINIT; - cb.event=NULL; - cb.width=glw->core.width; - cb.height=glw->core.height; - XtCallCallbackList((Widget)glw,glw->glwDrawingArea.ginitCallback,&cb); - } - - - -static void Redraw(GLwDrawingAreaWidget w,XEvent *event,Region region){ - GLwDrawingAreaCallbackStruct cb; - if(!XtIsRealized((Widget)w)) return; - cb.reason=GLwCR_EXPOSE; - cb.event=event; - cb.width=w->core.width; - cb.height=w->core.height; - XtCallCallbackList((Widget)w,w->glwDrawingArea.exposeCallback,&cb); - } - - - -static void Resize(GLwDrawingAreaWidget glw){ - GLwDrawingAreaCallbackStruct cb; - if(!XtIsRealized((Widget)glw)) return; - cb.reason=GLwCR_RESIZE; - cb.event=NULL; - cb.width=glw->core.width; - cb.height=glw->core.height; - XtCallCallbackList((Widget)glw,glw->glwDrawingArea.resizeCallback,&cb); - } - - - -static void Destroy(GLwDrawingAreaWidget glw){ - Window *windowsReturn; - Widget parentShell; - Status status; - int countReturn; - register int i; - - if(glw->glwDrawingArea.myList && glw->glwDrawingArea.attribList){ - XtFree((XtPointer)glw->glwDrawingArea.attribList); - } - - if(glw->glwDrawingArea.myVisual && glw->glwDrawingArea.visualInfo){ - XtFree((XtPointer)glw->glwDrawingArea.visualInfo); - } - - /* if my colormap was installed, remove it */ - if(glw->glwDrawingArea.installColormap){ - - /* Get parent shell */ - for(parentShell=XtParent(glw); parentShell&&!XtIsShell(parentShell); parentShell=XtParent(parentShell)); - - if(parentShell && XtWindow(parentShell)){ - - /* make sure there is a property */ - status=XGetWMColormapWindows(XtDisplay(parentShell),XtWindow(parentShell),&windowsReturn,&countReturn); - - /* if no property, just return. If there was a property, continue */ - if(status){ - - /* search for a match */ - for(i=0; i<countReturn; i++){ - if(windowsReturn[i]==XtWindow(glw)){ - - /* we found a match, now copy the rest down */ - for(i++; i<countReturn; i++){ windowsReturn[i-1]=windowsReturn[i]; } - - XSetWMColormapWindows(XtDisplay(parentShell),XtWindow(parentShell),windowsReturn,countReturn-1); - break; - } - } - XtFree((char *)windowsReturn); - } - } - } - } - - - -/* Action routine for keyboard and mouse events */ -static void glwInput(GLwDrawingAreaWidget glw,XEvent *event,String *params,Cardinal *numParams){ - GLwDrawingAreaCallbackStruct cb; - cb.reason=GLwCR_INPUT; - cb.event=event; - cb.width=glw->core.width; - cb.height=glw->core.height; - XtCallCallbackList((Widget)glw,glw->glwDrawingArea.inputCallback,&cb); - } - - -#ifdef __GLX_MOTIF - -/* Create routine */ -Widget GLwCreateMDrawingArea(Widget parent, char *name,ArgList arglist,Cardinal argcount){ - return XtCreateWidget(name,glwMDrawingAreaWidgetClass, parent, arglist,argcount); - } - -#endif - - -#ifndef __GLX_MOTIF - -/* Make context current */ -void GLwDrawingAreaMakeCurrent(Widget w,GLXContext ctx){ - glXMakeCurrent(XtDisplay(w),XtWindow(w),ctx); - } - - -/* Swap buffers convenience function */ -void GLwDrawingAreaSwapBuffers(Widget w){ - glXSwapBuffers(XtDisplay(w),XtWindow(w)); - } - -#endif diff --git a/src/glw/GLwDrawA.h b/src/glw/GLwDrawA.h deleted file mode 100644 index b9711c216bc..00000000000 --- a/src/glw/GLwDrawA.h +++ /dev/null @@ -1,195 +0,0 @@ -/* - * (c) Copyright 1993, Silicon Graphics, Inc. - * ALL RIGHTS RESERVED - * Permission to use, copy, modify, and distribute this software for - * any purpose and without fee is hereby granted, provided that the above - * copyright notice appear in all copies and that both the copyright notice - * and this permission notice appear in supporting documentation, and that - * the name of Silicon Graphics, Inc. not be used in advertising - * or publicity pertaining to distribution of the software without specific, - * written prior permission. - * - * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS" - * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR - * FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL SILICON - * GRAPHICS, INC. BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT, - * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY - * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION, - * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF - * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC. HAS BEEN - * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE - * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE. - * - * - * US Government Users Restricted Rights - * Use, duplication, or disclosure by the Government is subject to - * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph - * (c)(1)(ii) of the Rights in Technical Data and Computer Software - * clause at DFARS 252.227-7013 and/or in similar or successor - * clauses in the FAR or the DOD or NASA FAR Supplement. - * Unpublished-- rights reserved under the copyright laws of the - * United States. Contractor/manufacturer is Silicon Graphics, - * Inc., 2011 N. Shoreline Blvd., Mountain View, CA 94039-7311. - * - * OpenGL(TM) is a trademark of Silicon Graphics, Inc. - */ -#ifndef _GLwDrawA_h -#define _GLwDrawA_h - -#include <GL/glx.h> -#include <GL/gl.h> - -/**************************************************************** - * - * GLwDrawingArea widgets - * - ****************************************************************/ - -/* Resources: - - Name Class RepType Default Value - ---- ----- ------- ------------- - attribList AttribList int * NULL - visualInfo VisualInfo VisualInfo NULL - installColormap InstallColormap Boolean TRUE - allocateBackground AllocateColors Boolean FALSE - allocateOtherColors AllocateColors Boolean FALSE - installBackground InstallBackground Boolean TRUE - exposeCallback Callback Pointer NULL - ginitCallback Callback Pointer NULL - inputCallback Callback Pointer NULL - resizeCallback Callback Pointer NULL - -*** The following resources all correspond to the GLX configuration -*** attributes and are used to create the attribList if it is NULL - bufferSize BufferSize int 0 - level Level int 0 - rgba Rgba Boolean FALSE - doublebuffer Doublebuffer Boolean FALSE - stereo Stereo Boolean FALSE - auxBuffers AuxBuffers int 0 - redSize ColorSize int 1 - greenSize ColorSize int 1 - blueSize ColorSize int 1 - alphaSize AlphaSize int 0 - depthSize DepthSize int 0 - stencilSize StencilSize int 0 - accumRedSize AccumColorSize int 0 - accumGreenSize AccumColorSize int 0 - accumBlueSize AccumColorSize int 0 - accumAlphaSize AccumAlphaSize int 0 -*/ - -#define GLwNattribList "attribList" -#define GLwCAttribList "AttribList" -#define GLwNvisualInfo "visualInfo" -#define GLwCVisualInfo "VisualInfo" -#define GLwRVisualInfo "VisualInfo" - -#define GLwNinstallColormap "installColormap" -#define GLwCInstallColormap "InstallColormap" -#define GLwNallocateBackground "allocateBackground" -#define GLwNallocateOtherColors "allocateOtherColors" -#define GLwCAllocateColors "AllocateColors" -#define GLwNinstallBackground "installBackground" -#define GLwCInstallBackground "InstallBackground" - -#define GLwCCallback "Callback" -#define GLwNexposeCallback "exposeCallback" -#define GLwNginitCallback "ginitCallback" -#define GLwNresizeCallback "resizeCallback" -#define GLwNinputCallback "inputCallback" - -#define GLwNbufferSize "bufferSize" -#define GLwCBufferSize "BufferSize" -#define GLwNlevel "level" -#define GLwCLevel "Level" -#define GLwNrgba "rgba" -#define GLwCRgba "Rgba" -#define GLwNdoublebuffer "doublebuffer" -#define GLwCDoublebuffer "Doublebuffer" -#define GLwNstereo "stereo" -#define GLwCStereo "Stereo" -#define GLwNauxBuffers "auxBuffers" -#define GLwCAuxBuffers "AuxBuffers" -#define GLwNredSize "redSize" -#define GLwNgreenSize "greenSize" -#define GLwNblueSize "blueSize" -#define GLwCColorSize "ColorSize" -#define GLwNalphaSize "alphaSize" -#define GLwCAlphaSize "AlphaSize" -#define GLwNdepthSize "depthSize" -#define GLwCDepthSize "DepthSize" -#define GLwNstencilSize "stencilSize" -#define GLwCStencilSize "StencilSize" -#define GLwNaccumRedSize "accumRedSize" -#define GLwNaccumGreenSize "accumGreenSize" -#define GLwNaccumBlueSize "accumBlueSize" -#define GLwCAccumColorSize "AccumColorSize" -#define GLwNaccumAlphaSize "accumAlphaSize" -#define GLwCAccumAlphaSize "AccumAlphaSize" - -#ifdef __GLX_MOTIF - -typedef struct _GLwMDrawingAreaClassRec *GLwMDrawingAreaWidgetClass; -typedef struct _GLwMDrawingAreaRec *GLwMDrawingAreaWidget; - -GLAPI WidgetClass glwMDrawingAreaWidgetClass; - - -#else - -typedef struct _GLwDrawingAreaClassRec *GLwDrawingAreaWidgetClass; -typedef struct _GLwDrawingAreaRec *GLwDrawingAreaWidget; - -GLAPI WidgetClass glwDrawingAreaWidgetClass; - - -#endif - - -/* Callback reasons */ -#ifdef __GLX_MOTIF -#define GLwCR_EXPOSE XmCR_EXPOSE -#define GLwCR_RESIZE XmCR_RESIZE -#define GLwCR_INPUT XmCR_INPUT -#else -/* The same values as Motif, but don't use Motif constants */ -#define GLwCR_EXPOSE 38 -#define GLwCR_RESIZE 39 -#define GLwCR_INPUT 40 -#endif - -#define GLwCR_GINIT 32135 /* Arbitrary number that should neverr clash */ - -typedef struct - { - int reason; - XEvent *event; - Dimension width,height; - } - GLwDrawingAreaCallbackStruct; - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - -/* front ends to glXMakeCurrent and glXSwapBuffers */ -GLAPI void GLwDrawingAreaMakeCurrent(Widget w,GLXContext ctx); -GLAPI void GLwDrawingAreaSwapBuffers(Widget w); - -#ifdef __GLX_MOTIF -#ifdef _NO_PROTO -GLAPI Widget GLwCreateMDrawingArea(); -#else -GLAPI Widget GLwCreateMDrawingArea(Widget parent,char *name,ArgList arglist,Cardinal argcount); -#endif -#endif - -#if defined(__cplusplus) || defined(c_plusplus) -} -#endif - -#endif diff --git a/src/glw/GLwDrawAP.h b/src/glw/GLwDrawAP.h deleted file mode 100644 index 4ff21b426dd..00000000000 --- a/src/glw/GLwDrawAP.h +++ /dev/null @@ -1,130 +0,0 @@ -/* - * (c) Copyright 1993, Silicon Graphics, Inc. - * ALL RIGHTS RESERVED - * Permission to use, copy, modify, and distribute this software for - * any purpose and without fee is hereby granted, provided that the above - * copyright notice appear in all copies and that both the copyright notice - * and this permission notice appear in supporting documentation, and that - * the name of Silicon Graphics, Inc. not be used in advertising - * or publicity pertaining to distribution of the software without specific, - * written prior permission. - * - * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS" - * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR - * FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL SILICON - * GRAPHICS, INC. BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT, - * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY - * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION, - * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF - * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC. HAS BEEN - * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE - * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE. - * - * - * US Government Users Restricted Rights - * Use, duplication, or disclosure by the Government is subject to - * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph - * (c)(1)(ii) of the Rights in Technical Data and Computer Software - * clause at DFARS 252.227-7013 and/or in similar or successor - * clauses in the FAR or the DOD or NASA FAR Supplement. - * Unpublished-- rights reserved under the copyright laws of the - * United States. Contractor/manufacturer is Silicon Graphics, - * Inc., 2011 N. Shoreline Blvd., Mountain View, CA 94039-7311. - * - * OpenGL(TM) is a trademark of Silicon Graphics, Inc. - */ -#ifndef _GLwDrawAP_h -#define _GLwDrawAP_h - - -/* MOTIF */ -#ifdef __GLX_MOTIF -#include "GLwMDrawA.h" -#else -#include "GLwDrawA.h" -#endif - -typedef struct _GLwDrawingAreaClassPart { - caddr_t extension; - } GLwDrawingAreaClassPart; - - -#ifdef __GLX_MOTIF -typedef struct _GLwMDrawingAreaClassRec { - CoreClassPart core_class; - XmPrimitiveClassPart primitive_class; - GLwDrawingAreaClassPart glwDrawingArea_class; - } GLwMDrawingAreaClassRec; - - -GLAPI GLwMDrawingAreaClassRec glwMDrawingAreaClassRec; - - -/* XT */ -#else - -typedef struct _GLwDrawingAreaClassRec { - CoreClassPart core_class; - GLwDrawingAreaClassPart glwDrawingArea_class; - } GLwDrawingAreaClassRec; - -GLAPI GLwDrawingAreaClassRec glwDrawingAreaClassRec; - - -#endif - - - -typedef struct { - /* resources */ - int * attribList; - XVisualInfo * visualInfo; - Boolean myList; /* TRUE if we malloced the attribList*/ - Boolean myVisual; /* TRUE if we created the visualInfo*/ - Boolean installColormap; - Boolean allocateBackground; - Boolean allocateOtherColors; - Boolean installBackground; - XtCallbackList ginitCallback; - XtCallbackList resizeCallback; - XtCallbackList exposeCallback; - XtCallbackList inputCallback; - /* specific attributes; add as we get new attributes */ - int bufferSize; - int level; - Boolean rgba; - Boolean doublebuffer; - Boolean stereo; - int auxBuffers; - int redSize; - int greenSize; - int blueSize; - int alphaSize; - int depthSize; - int stencilSize; - int accumRedSize; - int accumGreenSize; - int accumBlueSize; - int accumAlphaSize; - } GLwDrawingAreaPart; - -#ifdef __GLX_MOTIF - -typedef struct _GLwMDrawingAreaRec { - CorePart core; - XmPrimitivePart primitive; - GLwDrawingAreaPart glwDrawingArea; - } GLwMDrawingAreaRec; - -#else - -typedef struct _GLwDrawingAreaRec { - CorePart core; - GLwDrawingAreaPart glwDrawingArea; - } GLwDrawingAreaRec; - -#endif - -#endif diff --git a/src/glw/GLwMDrawA.c b/src/glw/GLwMDrawA.c deleted file mode 100644 index bdefe92a6d0..00000000000 --- a/src/glw/GLwMDrawA.c +++ /dev/null @@ -1,41 +0,0 @@ -/* - * (c) Copyright 1993, Silicon Graphics, Inc. - * ALL RIGHTS RESERVED - * Permission to use, copy, modify, and distribute this software for - * any purpose and without fee is hereby granted, provided that the above - * copyright notice appear in all copies and that both the copyright notice - * and this permission notice appear in supporting documentation, and that - * the name of Silicon Graphics, Inc. not be used in advertising - * or publicity pertaining to distribution of the software without specific, - * written prior permission. - * - * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS" - * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR - * FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL SILICON - * GRAPHICS, INC. BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT, - * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY - * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION, - * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF - * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC. HAS BEEN - * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE - * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE. - * - * - * US Government Users Restricted Rights - * Use, duplication, or disclosure by the Government is subject to - * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph - * (c)(1)(ii) of the Rights in Technical Data and Computer Software - * clause at DFARS 252.227-7013 and/or in similar or successor - * clauses in the FAR or the DOD or NASA FAR Supplement. - * Unpublished-- rights reserved under the copyright laws of the - * United States. Contractor/manufacturer is Silicon Graphics, - * Inc., 2011 N. Shoreline Blvd., Mountain View, CA 94039-7311. - * - * OpenGL(TM) is a trademark of Silicon Graphics, Inc. - */ -#ifndef __GLX_MOTIF -#define __GLX_MOTIF 1 -#endif -#include "GLwDrawA.c" diff --git a/src/glw/GLwMDrawA.h b/src/glw/GLwMDrawA.h deleted file mode 100644 index 2e245890410..00000000000 --- a/src/glw/GLwMDrawA.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * (c) Copyright 1993, Silicon Graphics, Inc. - * ALL RIGHTS RESERVED - * Permission to use, copy, modify, and distribute this software for - * any purpose and without fee is hereby granted, provided that the above - * copyright notice appear in all copies and that both the copyright notice - * and this permission notice appear in supporting documentation, and that - * the name of Silicon Graphics, Inc. not be used in advertising - * or publicity pertaining to distribution of the software without specific, - * written prior permission. - * - * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS" - * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR - * FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL SILICON - * GRAPHICS, INC. BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT, - * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY - * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION, - * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF - * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC. HAS BEEN - * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE - * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE. - * - * - * US Government Users Restricted Rights - * Use, duplication, or disclosure by the Government is subject to - * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph - * (c)(1)(ii) of the Rights in Technical Data and Computer Software - * clause at DFARS 252.227-7013 and/or in similar or successor - * clauses in the FAR or the DOD or NASA FAR Supplement. - * Unpublished-- rights reserved under the copyright laws of the - * United States. Contractor/manufacturer is Silicon Graphics, - * Inc., 2011 N. Shoreline Blvd., Mountain View, CA 94039-7311. - * - * OpenGL(TM) is a trademark of Silicon Graphics, Inc. - */ -#ifndef __GLX_MOTIF -#define __GLX_MOTIF 1 -#endif -#include "GLwDrawA.h" diff --git a/src/glw/GLwMDrawAP.h b/src/glw/GLwMDrawAP.h deleted file mode 100644 index a0a689bb996..00000000000 --- a/src/glw/GLwMDrawAP.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * (c) Copyright 1993, Silicon Graphics, Inc. - * ALL RIGHTS RESERVED - * Permission to use, copy, modify, and distribute this software for - * any purpose and without fee is hereby granted, provided that the above - * copyright notice appear in all copies and that both the copyright notice - * and this permission notice appear in supporting documentation, and that - * the name of Silicon Graphics, Inc. not be used in advertising - * or publicity pertaining to distribution of the software without specific, - * written prior permission. - * - * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS" - * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR - * FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL SILICON - * GRAPHICS, INC. BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT, - * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY - * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION, - * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF - * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC. HAS BEEN - * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE - * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE. - * - * - * US Government Users Restricted Rights - * Use, duplication, or disclosure by the Government is subject to - * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph - * (c)(1)(ii) of the Rights in Technical Data and Computer Software - * clause at DFARS 252.227-7013 and/or in similar or successor - * clauses in the FAR or the DOD or NASA FAR Supplement. - * Unpublished-- rights reserved under the copyright laws of the - * United States. Contractor/manufacturer is Silicon Graphics, - * Inc., 2011 N. Shoreline Blvd., Mountain View, CA 94039-7311. - * - * OpenGL(TM) is a trademark of Silicon Graphics, Inc. - */ -#ifndef __GLX_MOTIF -#define __GLX_MOTIF 1 -#endif -#include "GLwDrawAP.h" diff --git a/src/glw/Makefile b/src/glw/Makefile deleted file mode 100644 index 776b1aa5bfb..00000000000 --- a/src/glw/Makefile +++ /dev/null @@ -1,74 +0,0 @@ -# src/glw/Makefile - -TOP = ../.. -include $(TOP)/configs/current - -MAJOR = 1 -MINOR = 0 -TINY = 0 - -INCDIRS = -I$(TOP)/include $(MOTIF_CFLAGS) $(X11_INCLUDES) - - -OBJECTS = $(GLW_SOURCES:.c=.o) - - - -##### RULES ##### - -.c.o: - $(CC) -c $(INCDIRS) $(CFLAGS) $(GLW_CFLAGS) $< - - - -##### TARGETS ##### - -default: $(TOP)/$(LIB_DIR)/$(GLW_LIB_NAME) - -# GLU pkg-config file -pcedit = sed \ - -e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \ - -e 's,@INSTALL_LIB_DIR@,$(INSTALL_LIB_DIR),' \ - -e 's,@INSTALL_INC_DIR@,$(INSTALL_INC_DIR),' \ - -e 's,@VERSION@,$(MAJOR).$(MINOR).$(TINY),' \ - -e 's,@GLW_PC_REQ_PRIV@,$(GLW_PC_REQ_PRIV),' \ - -e 's,@GLW_PC_LIB_PRIV@,$(GLW_PC_LIB_PRIV),' \ - -e 's,@GLW_PC_CFLAGS@,$(GLW_PC_CFLAGS),' \ - -e 's,@GLW_LIB@,$(GLW_LIB),' -glw.pc: glw.pc.in - $(pcedit) $< > $@ - -install: glw.pc - $(INSTALL) -d $(DESTDIR)$(INSTALL_INC_DIR)/GL - $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR) - $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig - $(INSTALL) -m 644 *.h $(DESTDIR)$(INSTALL_INC_DIR)/GL - $(MINSTALL) $(TOP)/$(LIB_DIR)/$(GLW_LIB_GLOB) $(DESTDIR)$(INSTALL_LIB_DIR) - $(INSTALL) -m 644 glw.pc $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig - -clean: - -rm -f depend depend.bak - -rm -f *.o *.pc *~ - - -# Make the library -$(TOP)/$(LIB_DIR)/$(GLW_LIB_NAME): $(OBJECTS) - $(MKLIB) -o $(GLW_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ - -major $(MAJOR) -minor $(MINOR) -patch $(TINY) \ - $(MKLIB_OPTIONS) -install $(TOP)/$(LIB_DIR) \ - -id $(INSTALL_LIB_DIR)/lib$(GLW_LIB).$(MAJOR).dylib \ - $(GLW_LIB_DEPS) $(OBJECTS) - - -# -# Run 'make depend' to update the dependencies if you change what's included -# by any source file. -# -depend: $(GLW_SOURCES) - rm -f depend - touch depend - $(MKDEP) $(MKDEP_OPTIONS) -I$(TOP)/include $(GLW_SOURCES) \ - $(X11_INCLUDES) > /dev/null - - --include depend diff --git a/src/glw/README b/src/glw/README deleted file mode 100644 index 70f4f7bc2ee..00000000000 --- a/src/glw/README +++ /dev/null @@ -1,56 +0,0 @@ - - widgets README file - - -This directory contains the source code for SGI's OpenGL Xt/Motif widgets, -slightly modified by Jeroen van der Zijp to work better with Mesa. - -To compile the widget code (producing lib/libGLw.a) cd to the widgets/ -directory and type 'make <config>' where <config> is the system configuration -you used to compile Mesa (like 'make linux'). This hasn't been tested on -many systems so let us know if you have trouble. - -If you want to make a Linux ELF shared lib instead of the non-shared .a -file see the notes in the Makefile. - -If you want to build with Motif support, edit Makefile.X11, looking -for the "Motif" information. - -The SGI copyright is as follows. - - - * (c) Copyright 1993, Silicon Graphics, Inc. - * ALL RIGHTS RESERVED - * Permission to use, copy, modify, and distribute this software for - * any purpose and without fee is hereby granted, provided that the above - * copyright notice appear in all copies and that both the copyright notice - * and this permission notice appear in supporting documentation, and that - * the name of Silicon Graphics, Inc. not be used in advertising - * or publicity pertaining to distribution of the software without specific, - * written prior permission. - * - * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS" - * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR - * FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL SILICON - * GRAPHICS, INC. BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT, - * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY - * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION, - * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF - * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC. HAS BEEN - * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE - * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE. - * - * - * US Government Users Restricted Rights - * Use, duplication, or disclosure by the Government is subject to - * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph - * (c)(1)(ii) of the Rights in Technical Data and Computer Software - * clause at DFARS 252.227-7013 and/or in similar or successor - * clauses in the FAR or the DOD or NASA FAR Supplement. - * Unpublished-- rights reserved under the copyright laws of the - * United States. Contractor/manufacturer is Silicon Graphics, - * Inc., 2011 N. Shoreline Blvd., Mountain View, CA 94039-7311. - * - * OpenGL(TM) is a trademark of Silicon Graphics, Inc. diff --git a/src/glw/glw.pc.in b/src/glw/glw.pc.in deleted file mode 100644 index 19a7c307c01..00000000000 --- a/src/glw/glw.pc.in +++ /dev/null @@ -1,13 +0,0 @@ -prefix=@INSTALL_DIR@ -exec_prefix=${prefix} -libdir=@INSTALL_LIB_DIR@ -includedir=@INSTALL_INC_DIR@ - -Name: glw -Description: Mesa OpenGL widget library -Requires: gl -Requires.private: @GLW_PC_REQ_PRIV@ -Version: @VERSION@ -Libs: -L${libdir} -l@GLW_LIB@ -Libs.private: @GLW_PC_LIB_PRIV@ -Cflags: -I${includedir} @GLW_PC_CFLAGS@ diff --git a/src/glx/dri2.c b/src/glx/dri2.c index 229840d6919..b1b5013d048 100644 --- a/src/glx/dri2.c +++ b/src/glx/dri2.c @@ -190,6 +190,15 @@ DRI2Error(Display *display, xError *err, XExtCodes *codes, int *ret_code) err->minorCode == X_DRI2DestroyDrawable) return True; + /* If the server is non-local DRI2Connect will raise BadRequest. + * Swallow this so that DRI2Connect can signal this in its return code */ + if (err->majorCode == codes->major_opcode && + err->minorCode == X_DRI2Connect && + err->errorCode == BadRequest) { + *ret_code = False; + return True; + } + return False; } diff --git a/src/glx/dri2_glx.c b/src/glx/dri2_glx.c index 80e4da30beb..9fa0d5ad362 100644 --- a/src/glx/dri2_glx.c +++ b/src/glx/dri2_glx.c @@ -455,16 +455,20 @@ dri2_wait_gl(struct glx_context *gc) static void dri2FlushFrontBuffer(__DRIdrawable *driDrawable, void *loaderPrivate) { + struct glx_display *priv; + struct dri2_display *pdp; + struct glx_context *gc; struct dri2_drawable *pdraw = loaderPrivate; + if (!pdraw) return; if (!pdraw->base.psc) return; - struct glx_display *priv = __glXInitialize(pdraw->base.psc->dpy); - struct dri2_display *pdp = (struct dri2_display *)priv->dri2Display; - struct glx_context *gc = __glXGetCurrentContext(); + priv = __glXInitialize(pdraw->base.psc->dpy); + pdp = (struct dri2_display *) priv->dri2Display; + gc = __glXGetCurrentContext(); /* Old servers don't send invalidate events */ if (!pdp->invalidateAvailable) @@ -539,6 +543,11 @@ dri2SwapBuffers(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor, (struct dri2_display *)dpyPriv->dri2Display; CARD64 ret = 0; + /* Old servers can't handle swapbuffers */ + if (!pdp->swapAvailable) { + dri2CopySubBuffer(pdraw, 0, 0, priv->width, priv->height); + } else { +#ifdef X_DRI2SwapBuffers #ifdef __DRI2_FLUSH if (psc->f) { struct glx_context *gc = __glXGetCurrentContext(); @@ -549,21 +558,15 @@ dri2SwapBuffers(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor, } #endif + DRI2SwapBuffers(psc->base.dpy, pdraw->xDrawable, + target_msc, divisor, remainder, &ret); +#endif + } + /* Old servers don't send invalidate events */ if (!pdp->invalidateAvailable) dri2InvalidateBuffers(dpyPriv->dpy, pdraw->xDrawable); - /* Old servers can't handle swapbuffers */ - if (!pdp->swapAvailable) { - dri2CopySubBuffer(pdraw, 0, 0, priv->width, priv->height); - return 0; - } - -#ifdef X_DRI2SwapBuffers - DRI2SwapBuffers(psc->base.dpy, pdraw->xDrawable, target_msc, divisor, - remainder, &ret); -#endif - return ret; } diff --git a/src/glx/dri_common.c b/src/glx/dri_common.c index bac0c9e5911..e7dba5a68de 100644 --- a/src/glx/dri_common.c +++ b/src/glx/dri_common.c @@ -388,7 +388,7 @@ driFetchDrawable(struct glx_context *gc, GLXDrawable glxDrawable) _X_HIDDEN void driReleaseDrawables(struct glx_context *gc) { - struct glx_display *const priv = __glXInitialize(gc->psc->dpy); + const struct glx_display *priv = gc->psc->display; __GLXDRIdrawable *pdraw; if (priv == NULL) diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c index fc0a07901a7..c8ec9c21fed 100644 --- a/src/glx/glxcmds.c +++ b/src/glx/glxcmds.c @@ -794,15 +794,17 @@ glXSwapBuffers(Display * dpy, GLXDrawable drawable) gc = __glXGetCurrentContext(); #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) - __GLXDRIdrawable *pdraw = GetGLXDRIDrawable(dpy, drawable); + { + __GLXDRIdrawable *pdraw = GetGLXDRIDrawable(dpy, drawable); - if (pdraw != NULL) { - if (gc && drawable == gc->currentDrawable) { - glFlush(); - } + if (pdraw != NULL) { + if (gc && drawable == gc->currentDrawable) { + glFlush(); + } - (*pdraw->psc->driScreen->swapBuffers)(pdraw, 0, 0, 0); - return; + (*pdraw->psc->driScreen->swapBuffers)(pdraw, 0, 0, 0); + return; + } } #endif diff --git a/src/glx/glxext.c b/src/glx/glxext.c index 8704c484f96..8254544d1c0 100644 --- a/src/glx/glxext.c +++ b/src/glx/glxext.c @@ -260,24 +260,19 @@ glx_display_free(struct glx_display *priv) static int __glXCloseDisplay(Display * dpy, XExtCodes * codes) { - struct glx_display *priv, **prev, *next; + struct glx_display *priv, **prev; _XLockMutex(_Xglobal_lock); prev = &glx_displays; for (priv = glx_displays; priv; prev = &priv->next, priv = priv->next) { if (priv->dpy == dpy) { + *prev = priv->next; break; } } + _XUnlockMutex(_Xglobal_lock); - /* Only remove the display from the list after it's destroyed. The cleanup - * code (e.g. driReleaseDrawables()) ends up calling __glXInitialize(), - * which would create a new glx_display while we're trying to destroy this - * one. */ - next = priv->next; glx_display_free(priv); - *prev = next; - _XUnlockMutex(_Xglobal_lock); return 1; } diff --git a/src/mapi/Android.mk b/src/mapi/Android.mk new file mode 100644 index 00000000000..0d09ee13f99 --- /dev/null +++ b/src/mapi/Android.mk @@ -0,0 +1,60 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Android.mk for glapi + +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +include $(LOCAL_PATH)/mapi/sources.mak +LOCAL_SRC_FILES := $(addprefix mapi/, $(MAPI_GLAPI_SOURCES)) + +LOCAL_CFLAGS := \ + -DMAPI_MODE_GLAPI \ + -DMAPI_ABI_HEADER=\"shared-glapi/glapi_mapi_tmp.h\" + +LOCAL_C_INCLUDES := \ + $(MESA_TOP)/src/mapi + +LOCAL_MODULE := libglapi + +LOCAL_MODULE_CLASS := SHARED_LIBRARIES +intermediates := $(call local-intermediates-dir) +mapi_abi_header := $(intermediates)/shared-glapi/glapi_mapi_tmp.h +LOCAL_GENERATED_SOURCES := $(mapi_abi_header) + +mapi_abi_deps := \ + $(wildcard $(LOCAL_PATH)/glapi/gen/*.py) \ + $(wildcard $(LOCAL_PATH)/glapi/gen/*.xml) \ + $(LOCAL_PATH)/mapi/mapi_abi.py + +$(mapi_abi_header): PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/mapi/mapi_abi.py +$(mapi_abi_header): PRIVATE_APIXML := $(LOCAL_PATH)/glapi/gen/gl_and_es_API.xml +$(mapi_abi_header): $(mapi_abi_deps) + @mkdir -p $(dir $@) + @echo "Gen GLAPI: $(PRIVATE_MODULE) <= $(notdir $@)" + $(hide) $(PRIVATE_SCRIPT) --printer shared-glapi --mode lib $(PRIVATE_APIXML) > $@ + +include $(MESA_COMMON_MK) +include $(BUILD_SHARED_LIBRARY) diff --git a/src/mapi/es1api/.gitignore b/src/mapi/es1api/.gitignore index b21f1d14c6f..dfe465677c4 100644 --- a/src/mapi/es1api/.gitignore +++ b/src/mapi/es1api/.gitignore @@ -1,4 +1 @@ glapi_mapi_tmp.h -glapi-stamp -glapi -main diff --git a/src/mapi/es1api/Makefile b/src/mapi/es1api/Makefile index aef694866c2..0a0449b10a3 100644 --- a/src/mapi/es1api/Makefile +++ b/src/mapi/es1api/Makefile @@ -34,8 +34,6 @@ ESAPI = $(ES)api GLAPI := $(TOP)/src/mapi/glapi MAPI := $(TOP)/src/mapi/mapi -# directory for generated sources/headers -GEN := glapi esapi_CPPFLAGS := \ -I$(TOP)/include \ @@ -68,13 +66,11 @@ lib$(ESAPI).a: $(esapi_OBJECTS) $(esapi_OBJECTS): %.o: $(MAPI)/%.c $(CC) -c $(esapi_CPPFLAGS) $(CFLAGS) $< -o $@ -$(esapi_SOURCES): | glapi-stamp +$(esapi_SOURCES): glapi_mapi_tmp.h -.PHONY: glapi-stamp -glapi-stamp: - @# generate sources/headers - @$(MAKE) -C $(GLAPI)/gen-es $(ES) - @touch $@ +include $(GLAPI)/gen/glapi_gen.mk +glapi_mapi_tmp.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps) + $(call glapi_gen_mapi,$<,$(ESAPI)) .PHONY: clean clean: @@ -83,9 +79,7 @@ clean: -rm -f lib$(ESAPI).a -rm -f $(esapi_OBJECTS) -rm -f depend depend.bak - -rm -f glapi-stamp - @# clean generated sources/headers - @$(MAKE) -C $(GLAPI)/gen-es clean-$(ES) + -rm -f glapi_mapi_tmp.h pcedit = \ -e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \ diff --git a/src/mapi/glapi/SConscript b/src/mapi/glapi/SConscript index a7764745eda..fdd65790013 100644 --- a/src/mapi/glapi/SConscript +++ b/src/mapi/glapi/SConscript @@ -74,6 +74,11 @@ if env['platform'] != 'winddk': else: pass + if env['toolchain'] == 'crossmingw': + # compile these files without -gstabs option + glapi_sources = env.compile_without_gstabs(glapi_sources, "glapi_dispatch.c") + glapi_sources = env.compile_without_gstabs(glapi_sources, "glapi_getproc.c") + glapi = env.ConvenienceLibrary( target = 'glapi', source = glapi_sources, diff --git a/src/mapi/glapi/gen-es/Makefile b/src/mapi/glapi/gen-es/Makefile deleted file mode 100644 index bf66ec037cf..00000000000 --- a/src/mapi/glapi/gen-es/Makefile +++ /dev/null @@ -1,91 +0,0 @@ -TOP = ../../../.. -MAPI = $(TOP)/src/mapi/mapi -GLAPI = ../gen -include $(TOP)/configs/current - -OUTPUTS := \ - glapi_mapi_tmp.h \ - main/dispatch.h \ - main/remap_helper.h - -COMMON = gl_and_es_API.xml gl_XML.py glX_XML.py license.py typeexpr.py -COMMON := $(addprefix $(GLAPI)/, $(COMMON)) - -ES1_APIXML := es1_API.xml -ES2_APIXML := es2_API.xml -ES1_OUTPUT_DIR := $(TOP)/src/mapi/es1api -ES2_OUTPUT_DIR := $(TOP)/src/mapi/es2api - -ES1_DEPS = $(ES1_APIXML) base1_API.xml es1_EXT.xml es_EXT.xml \ - es1_COMPAT.xml es_COMPAT.xml -ES2_DEPS = $(ES2_APIXML) base2_API.xml es2_EXT.xml es_EXT.xml \ - es2_COMPAT.xml es_COMPAT.xml - -ES1_OUTPUTS := $(addprefix $(ES1_OUTPUT_DIR)/, $(OUTPUTS)) -ES2_OUTPUTS := $(addprefix $(ES2_OUTPUT_DIR)/, $(OUTPUTS)) - -SHARED_GLAPI_APIXML := $(GLAPI)/gl_and_es_API.xml -SHARED_GLAPI_OUTPUT_DIR := $(TOP)/src/mapi/shared-glapi -SHARED_GLAPI_DEPS := $(SHARED_GLAPI_APIXML) -SHARED_GLAPI_OUTPUTS = $(SHARED_GLAPI_OUTPUT_DIR)/glapi_mapi_tmp.h - -all: es1 es2 shared-glapi - -es1: $(ES1_OUTPUTS) -es2: $(ES2_OUTPUTS) -shared-glapi: $(SHARED_GLAPI_OUTPUTS) - -$(ES1_OUTPUTS): APIXML := $(ES1_APIXML) -$(ES1_OUTPUTS): PRINTER := es1api -$(ES1_OUTPUTS): $(ES1_DEPS) - -$(ES2_OUTPUTS): APIXML := $(ES2_APIXML) -$(ES2_OUTPUTS): PRINTER := es2api -$(ES2_OUTPUTS): $(ES2_DEPS) - -$(SHARED_GLAPI_OUTPUTS): APIXML := $(SHARED_GLAPI_APIXML) -$(SHARED_GLAPI_OUTPUTS): PRINTER := shared-glapi -$(SHARED_GLAPI_OUTPUTS): $(SHARED_GLAPI_DEPS) - -define gen-glapi - @mkdir -p $(dir $@) - $(PYTHON2) $(PYTHON_FLAGS) $< -f $(APIXML) $(1) > $@ -endef - -%/glapi_mapi_tmp.h: $(MAPI)/mapi_abi.py $(COMMON) - @mkdir -p $(dir $@) - $(PYTHON2) $(PYTHON_FLAGS) $< \ - --printer $(PRINTER) --mode lib $(GLAPI)/gl_and_es_API.xml > $@ - -%/main/dispatch.h: $(GLAPI)/gl_table.py $(COMMON) - $(call gen-glapi,-c -m remap_table) - -%/main/remap_helper.h: $(GLAPI)/remap_helper.py $(COMMON) - $(call gen-glapi) - -verify_xml: - @if [ ! -f gl.h ]; then \ - echo "Please copy gl.h and gl2.h to this directory"; \ - exit 1; \ - fi - @echo "Verifying that es1_API.xml covers OpenGL ES 1.1..." - @$(PYTHON2) $(PYTHON_FLAGS) gl_parse_header.py gl.h > tmp.xml - @$(PYTHON2) $(PYTHON_FLAGS) gl_compare.py difference tmp.xml es1_API.xml - @echo "Verifying that es2_API.xml covers OpenGL ES 2.0..." - @$(PYTHON2) $(PYTHON_FLAGS) gl_parse_header.py gl2.h > tmp.xml - @$(PYTHON2) $(PYTHON_FLAGS) gl_compare.py difference tmp.xml es2_API.xml - @rm -f tmp.xml - -clean-es1: - -rm -f $(ES1_OUTPUTS) - -rm -rf $(ES1_OUTPUT_DIR)/main - -clean-es2: - -rm -f $(ES2_OUTPUTS) - -rm -rf $(ES2_OUTPUT_DIR)/main - -clean-shared-glapi: - -rm -f $(SHARED_GLAPI_OUTPUTS) - -clean: clean-es1 clean-es2 clean-shared-glapi - -rm -f *~ *.pyc *.pyo diff --git a/src/mapi/glapi/gen-es/base1_API.xml b/src/mapi/glapi/gen-es/base1_API.xml deleted file mode 100644 index 720be257ca2..00000000000 --- a/src/mapi/glapi/gen-es/base1_API.xml +++ /dev/null @@ -1,744 +0,0 @@ -<?xml version="1.0"?> -<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd"> - -<!-- OpenGL and OpenGL ES 1.x APIs - This file defines the base categories that can be shared by all APIs. - They are defined in an incremental fashion. ---> - -<OpenGLAPI> - -<!-- base subset of OpenGL 1.0 --> -<category name="base1.0"> - <enum name="FALSE" value="0x0"/> - <enum name="TRUE" value="0x1"/> - <enum name="ZERO" value="0x0"/> - <enum name="ONE" value="0x1"/> - <enum name="NO_ERROR" value="0x0"/> - - <enum name="POINTS" value="0x0000"/> - <enum name="LINES" value="0x0001"/> - <enum name="LINE_LOOP" value="0x0002"/> - <enum name="LINE_STRIP" value="0x0003"/> - <enum name="TRIANGLES" value="0x0004"/> - <enum name="TRIANGLE_STRIP" value="0x0005"/> - <enum name="TRIANGLE_FAN" value="0x0006"/> - <enum name="NEVER" value="0x0200"/> - <enum name="LESS" value="0x0201"/> - <enum name="EQUAL" value="0x0202"/> - <enum name="LEQUAL" value="0x0203"/> - <enum name="GREATER" value="0x0204"/> - <enum name="NOTEQUAL" value="0x0205"/> - <enum name="GEQUAL" value="0x0206"/> - <enum name="ALWAYS" value="0x0207"/> - <enum name="SRC_COLOR" value="0x0300"/> - <enum name="ONE_MINUS_SRC_COLOR" value="0x0301"/> - <enum name="SRC_ALPHA" value="0x0302"/> - <enum name="ONE_MINUS_SRC_ALPHA" value="0x0303"/> - <enum name="DST_ALPHA" value="0x0304"/> - <enum name="ONE_MINUS_DST_ALPHA" value="0x0305"/> - <enum name="DST_COLOR" value="0x0306"/> - <enum name="ONE_MINUS_DST_COLOR" value="0x0307"/> - <enum name="SRC_ALPHA_SATURATE" value="0x0308"/> - <enum name="FRONT" value="0x0404"/> - <enum name="BACK" value="0x0405"/> - <enum name="FRONT_AND_BACK" value="0x0408"/> - <enum name="INVALID_ENUM" value="0x0500"/> - <enum name="INVALID_VALUE" value="0x0501"/> - <enum name="INVALID_OPERATION" value="0x0502"/> - <enum name="OUT_OF_MEMORY" value="0x0505"/> - <enum name="CW" value="0x0900"/> - <enum name="CCW" value="0x0901"/> - <enum name="CULL_FACE" count="1" value="0x0B44"> - <size name="Get" mode="get"/> - </enum> - <enum name="DEPTH_TEST" count="1" value="0x0B71"> - <size name="Get" mode="get"/> - </enum> - <enum name="STENCIL_TEST" count="1" value="0x0B90"> - <size name="Get" mode="get"/> - </enum> - <enum name="DITHER" count="1" value="0x0BD0"> - <size name="Get" mode="get"/> - </enum> - <enum name="BLEND" count="1" value="0x0BE2"> - <size name="Get" mode="get"/> - </enum> - <enum name="SCISSOR_TEST" count="1" value="0x0C11"> - <size name="Get" mode="get"/> - </enum> - <enum name="UNPACK_ALIGNMENT" count="1" value="0x0CF5"> - <size name="Get" mode="get"/> - </enum> - <enum name="PACK_ALIGNMENT" count="1" value="0x0D05"> - <size name="Get" mode="get"/> - </enum> - <enum name="MAX_TEXTURE_SIZE" count="1" value="0x0D33"> - <size name="Get" mode="get"/> - </enum> - <enum name="MAX_VIEWPORT_DIMS" count="2" value="0x0D3A"> - <size name="Get" mode="get"/> - </enum> - <enum name="SUBPIXEL_BITS" count="1" value="0x0D50"> - <size name="Get" mode="get"/> - </enum> - <enum name="RED_BITS" count="1" value="0x0D52"> - <size name="Get" mode="get"/> - </enum> - <enum name="GREEN_BITS" count="1" value="0x0D53"> - <size name="Get" mode="get"/> - </enum> - <enum name="BLUE_BITS" count="1" value="0x0D54"> - <size name="Get" mode="get"/> - </enum> - <enum name="ALPHA_BITS" count="1" value="0x0D55"> - <size name="Get" mode="get"/> - </enum> - <enum name="DEPTH_BITS" count="1" value="0x0D56"> - <size name="Get" mode="get"/> - </enum> - <enum name="STENCIL_BITS" count="1" value="0x0D57"> - <size name="Get" mode="get"/> - </enum> - <enum name="TEXTURE_2D" count="1" value="0x0DE1"> - <size name="Get" mode="get"/> - </enum> - <enum name="DONT_CARE" value="0x1100"/> - <enum name="FASTEST" value="0x1101"/> - <enum name="NICEST" value="0x1102"/> - <enum name="BYTE" count="1" value="0x1400"> - <size name="CallLists"/> - </enum> - <enum name="UNSIGNED_BYTE" count="1" value="0x1401"> - <size name="CallLists"/> - </enum> - <enum name="SHORT" count="2" value="0x1402"> - <size name="CallLists"/> - </enum> - <enum name="UNSIGNED_SHORT" count="2" value="0x1403"> - <size name="CallLists"/> - </enum> - <enum name="FLOAT" count="4" value="0x1406"> - <size name="CallLists"/> - </enum> - <enum name="INVERT" value="0x150A"/> - <enum name="TEXTURE" value="0x1702"/> - <enum name="ALPHA" value="0x1906"/> - <enum name="RGB" value="0x1907"/> - <enum name="RGBA" value="0x1908"/> - <enum name="LUMINANCE" value="0x1909"/> - <enum name="LUMINANCE_ALPHA" value="0x190A"/> - <enum name="KEEP" value="0x1E00"/> - <enum name="REPLACE" value="0x1E01"/> - <enum name="INCR" value="0x1E02"/> - <enum name="DECR" value="0x1E03"/> - <enum name="VENDOR" value="0x1F00"/> - <enum name="RENDERER" value="0x1F01"/> - <enum name="VERSION" value="0x1F02"/> - <enum name="EXTENSIONS" value="0x1F03"/> - <enum name="NEAREST" value="0x2600"/> - <enum name="LINEAR" value="0x2601"/> - <enum name="NEAREST_MIPMAP_NEAREST" value="0x2700"/> - <enum name="LINEAR_MIPMAP_NEAREST" value="0x2701"/> - <enum name="NEAREST_MIPMAP_LINEAR" value="0x2702"/> - <enum name="LINEAR_MIPMAP_LINEAR" value="0x2703"/> - <enum name="TEXTURE_MAG_FILTER" count="1" value="0x2800"> - <size name="TexParameterfv"/> - <size name="TexParameteriv"/> - <size name="GetTexParameterfv" mode="get"/> - <size name="GetTexParameteriv" mode="get"/> - </enum> - <enum name="TEXTURE_MIN_FILTER" count="1" value="0x2801"> - <size name="TexParameterfv"/> - <size name="TexParameteriv"/> - <size name="GetTexParameterfv" mode="get"/> - <size name="GetTexParameteriv" mode="get"/> - </enum> - <enum name="TEXTURE_WRAP_S" count="1" value="0x2802"> - <size name="TexParameterfv"/> - <size name="TexParameteriv"/> - <size name="GetTexParameterfv" mode="get"/> - <size name="GetTexParameteriv" mode="get"/> - </enum> - <enum name="TEXTURE_WRAP_T" count="1" value="0x2803"> - <size name="TexParameterfv"/> - <size name="TexParameteriv"/> - <size name="GetTexParameterfv" mode="get"/> - <size name="GetTexParameteriv" mode="get"/> - </enum> - <enum name="REPEAT" value="0x2901"/> - - <enum name="DEPTH_BUFFER_BIT" value="0x00000100"/> - <enum name="STENCIL_BUFFER_BIT" value="0x00000400"/> - <enum name="COLOR_BUFFER_BIT" value="0x00004000"/> - - <type name="float" size="4" float="true" glx_name="FLOAT32"/> - <type name="clampf" size="4" float="true" glx_name="FLOAT32"/> - - <type name="int" size="4" glx_name="CARD32"/> - <type name="uint" size="4" unsigned="true" glx_name="CARD32"/> - <type name="sizei" size="4" glx_name="CARD32"/> - <type name="enum" size="4" unsigned="true" glx_name="ENUM"/> - <type name="bitfield" size="4" unsigned="true" glx_name="CARD32"/> - - <type name="short" size="2" glx_name="CARD16"/> - <type name="ushort" size="2" unsigned="true" glx_name="CARD16"/> - - <type name="byte" size="1" glx_name="CARD8"/> - <type name="ubyte" size="1" unsigned="true" glx_name="CARD8"/> - <type name="boolean" size="1" unsigned="true" glx_name="CARD8"/> - - <type name="void" size="1"/> - - <function name="BlendFunc" offset="241"> - <param name="sfactor" type="GLenum"/> - <param name="dfactor" type="GLenum"/> - <glx rop="160"/> - </function> - - <function name="Clear" offset="203"> - <param name="mask" type="GLbitfield"/> - <glx rop="127"/> - </function> - - <function name="ClearColor" offset="206"> - <param name="red" type="GLclampf"/> - <param name="green" type="GLclampf"/> - <param name="blue" type="GLclampf"/> - <param name="alpha" type="GLclampf"/> - <glx rop="130"/> - </function> - - <function name="ClearStencil" offset="207"> - <param name="s" type="GLint"/> - <glx rop="131"/> - </function> - - <function name="ColorMask" offset="210"> - <param name="red" type="GLboolean"/> - <param name="green" type="GLboolean"/> - <param name="blue" type="GLboolean"/> - <param name="alpha" type="GLboolean"/> - <glx rop="134"/> - </function> - - <function name="CullFace" offset="152"> - <param name="mode" type="GLenum"/> - <glx rop="79"/> - </function> - - <function name="DepthFunc" offset="245"> - <param name="func" type="GLenum"/> - <glx rop="164"/> - </function> - - <function name="DepthMask" offset="211"> - <param name="flag" type="GLboolean"/> - <glx rop="135"/> - </function> - - <function name="Disable" offset="214"> - <param name="cap" type="GLenum"/> - <glx rop="138" handcode="client"/> - </function> - - <function name="Enable" offset="215"> - <param name="cap" type="GLenum"/> - <glx rop="139" handcode="client"/> - </function> - - <function name="Finish" offset="216"> - <glx sop="108" handcode="true"/> - </function> - - <function name="Flush" offset="217"> - <glx sop="142" handcode="true"/> - </function> - - <function name="FrontFace" offset="157"> - <param name="mode" type="GLenum"/> - <glx rop="84"/> - </function> - - <function name="GetError" offset="261"> - <return type="GLenum"/> - <glx sop="115" handcode="client"/> - </function> - - <function name="GetIntegerv" offset="263"> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true" variable_param="pname"/> - <glx sop="117" handcode="client"/> - </function> - - <function name="GetString" offset="275"> - <param name="name" type="GLenum"/> - <return type="const GLubyte *"/> - <glx sop="129" handcode="true"/> - </function> - - <function name="Hint" offset="158"> - <param name="target" type="GLenum"/> - <param name="mode" type="GLenum"/> - <glx rop="85"/> - </function> - - <function name="LineWidth" offset="168"> - <param name="width" type="GLfloat"/> - <glx rop="95"/> - </function> - - <function name="PixelStorei" offset="250"> - <param name="pname" type="GLenum"/> - <param name="param" type="GLint"/> - <glx sop="110" handcode="client"/> - </function> - - <function name="ReadPixels" offset="256"> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="pixels" type="GLvoid *" output="true" img_width="width" img_height="height" img_format="format" img_type="type" img_target="0"/> - <glx sop="111"/> - </function> - - <function name="Scissor" offset="176"> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <glx rop="103"/> - </function> - - <function name="StencilFunc" offset="243"> - <param name="func" type="GLenum"/> - <param name="ref" type="GLint"/> - <param name="mask" type="GLuint"/> - <glx rop="162"/> - </function> - - <function name="StencilMask" offset="209"> - <param name="mask" type="GLuint"/> - <glx rop="133"/> - </function> - - <function name="StencilOp" offset="244"> - <param name="fail" type="GLenum"/> - <param name="zfail" type="GLenum"/> - <param name="zpass" type="GLenum"/> - <glx rop="163"/> - </function> - - <function name="TexParameterf" offset="178"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx rop="105"/> - </function> - - <function name="Viewport" offset="305"> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <glx rop="191"/> - </function> - - <!-- these are not in OpenGL ES 1.0 --> - <enum name="LINE_WIDTH" count="1" value="0x0B21"> - <size name="Get" mode="get"/> - </enum> - <enum name="CULL_FACE_MODE" count="1" value="0x0B45"> - <size name="Get" mode="get"/> - </enum> - <enum name="FRONT_FACE" count="1" value="0x0B46"> - <size name="Get" mode="get"/> - </enum> - <enum name="DEPTH_RANGE" count="2" value="0x0B70"> - <size name="Get" mode="get"/> - </enum> - <enum name="DEPTH_WRITEMASK" count="1" value="0x0B72"> - <size name="Get" mode="get"/> - </enum> - <enum name="DEPTH_CLEAR_VALUE" count="1" value="0x0B73"> - <size name="Get" mode="get"/> - </enum> - <enum name="DEPTH_FUNC" count="1" value="0x0B74"> - <size name="Get" mode="get"/> - </enum> - <enum name="STENCIL_CLEAR_VALUE" count="1" value="0x0B91"> - <size name="Get" mode="get"/> - </enum> - <enum name="STENCIL_FUNC" count="1" value="0x0B92"> - <size name="Get" mode="get"/> - </enum> - <enum name="STENCIL_VALUE_MASK" count="1" value="0x0B93"> - <size name="Get" mode="get"/> - </enum> - <enum name="STENCIL_FAIL" count="1" value="0x0B94"> - <size name="Get" mode="get"/> - </enum> - <enum name="STENCIL_PASS_DEPTH_FAIL" count="1" value="0x0B95"> - <size name="Get" mode="get"/> - </enum> - <enum name="STENCIL_PASS_DEPTH_PASS" count="1" value="0x0B96"> - <size name="Get" mode="get"/> - </enum> - <enum name="STENCIL_REF" count="1" value="0x0B97"> - <size name="Get" mode="get"/> - </enum> - <enum name="STENCIL_WRITEMASK" count="1" value="0x0B98"> - <size name="Get" mode="get"/> - </enum> - <enum name="VIEWPORT" count="4" value="0x0BA2"> - <size name="Get" mode="get"/> - </enum> - <enum name="SCISSOR_BOX" count="4" value="0x0C10"> - <size name="Get" mode="get"/> - </enum> - <enum name="COLOR_CLEAR_VALUE" count="4" value="0x0C22"> - <size name="Get" mode="get"/> - </enum> - <enum name="COLOR_WRITEMASK" count="4" value="0x0C23"> - <size name="Get" mode="get"/> - </enum> - - <function name="TexParameterfv" offset="179"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="106"/> - </function> - - <function name="TexParameteri" offset="180"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLint"/> - <glx rop="107"/> - </function> - - <function name="TexParameteriv" offset="181"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLint *" variable_param="pname"/> - <glx rop="108"/> - </function> - - <function name="GetBooleanv" offset="258"> - <param name="pname" type="GLenum"/> - <param name="params" type="GLboolean *" output="true" variable_param="pname"/> - <glx sop="112" handcode="client"/> - </function> - - <function name="GetFloatv" offset="262"> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat *" output="true" variable_param="pname"/> - <glx sop="116" handcode="client"/> - </function> - - <function name="GetTexParameterfv" offset="282"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat *" output="true" variable_param="pname"/> - <glx sop="136"/> - </function> - - <function name="GetTexParameteriv" offset="283"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true" variable_param="pname"/> - <glx sop="137"/> - </function> - - <function name="IsEnabled" offset="286"> - <param name="cap" type="GLenum"/> - <return type="GLboolean"/> - <glx sop="140" handcode="client"/> - </function> -</category> - -<!-- base subset of OpenGL 1.1 --> -<category name="base1.1"> - <enum name="POLYGON_OFFSET_FILL" value="0x8037"/> - - <function name="BindTexture" offset="307"> - <param name="target" type="GLenum"/> - <param name="texture" type="GLuint"/> - <glx rop="4117"/> - </function> - - <function name="CopyTexImage2D" offset="324"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="internalformat" type="GLenum"/> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="border" type="GLint"/> - <glx rop="4120"/> - </function> - - <function name="CopyTexSubImage2D" offset="326"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="xoffset" type="GLint"/> - <param name="yoffset" type="GLint"/> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <glx rop="4122"/> - </function> - - <function name="DeleteTextures" offset="327"> - <param name="n" type="GLsizei" counter="true"/> - <param name="textures" type="const GLuint *" count="n"/> - <glx sop="144"/> - </function> - - <function name="DrawArrays" offset="310"> - <param name="mode" type="GLenum"/> - <param name="first" type="GLint"/> - <param name="count" type="GLsizei"/> - <glx rop="193" handcode="true"/> - </function> - - <function name="DrawElements" offset="311"> - <param name="mode" type="GLenum"/> - <param name="count" type="GLsizei"/> - <param name="type" type="GLenum"/> - <param name="indices" type="const GLvoid *"/> - <glx handcode="true"/> - </function> - - <function name="GenTextures" offset="328"> - <param name="n" type="GLsizei" counter="true"/> - <param name="textures" type="GLuint *" output="true" count="n"/> - <glx sop="145" always_array="true"/> - </function> - - <function name="PolygonOffset" offset="319"> - <param name="factor" type="GLfloat"/> - <param name="units" type="GLfloat"/> - <glx rop="192"/> - </function> - - <function name="TexSubImage2D" offset="333"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="xoffset" type="GLint"/> - <param name="yoffset" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="UNUSED" type="GLuint" padding="true"/> - <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_xoff="xoffset" img_yoff="yoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/> - <glx rop="4100" large="true"/> - </function> - - <!-- these are not in OpenGL ES 1.0 --> - <enum name="POLYGON_OFFSET_UNITS" count="1" value="0x2A00"> - <size name="Get" mode="get"/> - </enum> - <enum name="POLYGON_OFFSET_FACTOR" count="1" value="0x8038"> - <size name="Get" mode="get"/> - </enum> - <enum name="TEXTURE_BINDING_2D" count="1" value="0x8069"> - <size name="Get" mode="get"/> - </enum> - - <function name="IsTexture" offset="330"> - <param name="texture" type="GLuint"/> - <return type="GLboolean"/> - <glx sop="146"/> - </function> -</category> - -<!-- base subset of OpenGL 1.2 --> -<category name="base1.2"> - <enum name="UNSIGNED_SHORT_4_4_4_4" value="0x8033"/> - <enum name="UNSIGNED_SHORT_5_5_5_1" value="0x8034"/> - <enum name="CLAMP_TO_EDGE" value="0x812F"/> - <enum name="UNSIGNED_SHORT_5_6_5" value="0x8363"/> - <enum name="ALIASED_POINT_SIZE_RANGE" count="2" value="0x846D"> - <size name="Get" mode="get"/> - </enum> - <enum name="ALIASED_LINE_WIDTH_RANGE" count="2" value="0x846E"> - <size name="Get" mode="get"/> - </enum> -</category> - -<!-- base subset of OpenGL 1.3 --> -<category name="base1.3"> - <enum name="SAMPLE_ALPHA_TO_COVERAGE" count="1" value="0x809E"> - <size name="Get" mode="get"/> - </enum> - <enum name="SAMPLE_COVERAGE" count="1" value="0x80A0"> - <size name="Get" mode="get"/> - </enum> - <enum name="TEXTURE0" value="0x84C0"/> - <enum name="TEXTURE1" value="0x84C1"/> - <enum name="TEXTURE2" value="0x84C2"/> - <enum name="TEXTURE3" value="0x84C3"/> - <enum name="TEXTURE4" value="0x84C4"/> - <enum name="TEXTURE5" value="0x84C5"/> - <enum name="TEXTURE6" value="0x84C6"/> - <enum name="TEXTURE7" value="0x84C7"/> - <enum name="TEXTURE8" value="0x84C8"/> - <enum name="TEXTURE9" value="0x84C9"/> - <enum name="TEXTURE10" value="0x84CA"/> - <enum name="TEXTURE11" value="0x84CB"/> - <enum name="TEXTURE12" value="0x84CC"/> - <enum name="TEXTURE13" value="0x84CD"/> - <enum name="TEXTURE14" value="0x84CE"/> - <enum name="TEXTURE15" value="0x84CF"/> - <enum name="TEXTURE16" value="0x84D0"/> - <enum name="TEXTURE17" value="0x84D1"/> - <enum name="TEXTURE18" value="0x84D2"/> - <enum name="TEXTURE19" value="0x84D3"/> - <enum name="TEXTURE20" value="0x84D4"/> - <enum name="TEXTURE21" value="0x84D5"/> - <enum name="TEXTURE22" value="0x84D6"/> - <enum name="TEXTURE23" value="0x84D7"/> - <enum name="TEXTURE24" value="0x84D8"/> - <enum name="TEXTURE25" value="0x84D9"/> - <enum name="TEXTURE26" value="0x84DA"/> - <enum name="TEXTURE27" value="0x84DB"/> - <enum name="TEXTURE28" value="0x84DC"/> - <enum name="TEXTURE29" value="0x84DD"/> - <enum name="TEXTURE30" value="0x84DE"/> - <enum name="TEXTURE31" value="0x84DF"/> - <enum name="NUM_COMPRESSED_TEXTURE_FORMATS" count="1" value="0x86A2"> - <size name="Get" mode="get"/> - </enum> - <enum name="COMPRESSED_TEXTURE_FORMATS" count="-1" value="0x86A3"> - <size name="Get" mode="get"/> - </enum> - - <function name="ActiveTexture" offset="374"> - <param name="texture" type="GLenum"/> - <glx rop="197"/> - </function> - - <function name="CompressedTexImage2D" offset="assign"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="internalformat" type="GLenum"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="border" type="GLint"/> - <param name="imageSize" type="GLsizei" counter="true"/> - <param name="data" type="const GLvoid *" count="imageSize"/> - <glx rop="215" handcode="client"/> - </function> - - <function name="CompressedTexSubImage2D" offset="assign"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="xoffset" type="GLint"/> - <param name="yoffset" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="format" type="GLenum"/> - <param name="imageSize" type="GLsizei" counter="true"/> - <param name="data" type="const GLvoid *" count="imageSize"/> - <glx rop="218" handcode="client"/> - </function> - - <function name="SampleCoverage" offset="assign"> - <param name="value" type="GLclampf"/> - <param name="invert" type="GLboolean"/> - <glx rop="229"/> - </function> - - <!-- these are not in OpenGL ES 1.0 --> - <enum name="SAMPLE_BUFFERS" count="1" value="0x80A8"> - <size name="Get" mode="get"/> - </enum> - <enum name="SAMPLES" count="1" value="0x80A9"> - <size name="Get" mode="get"/> - </enum> - <enum name="SAMPLE_COVERAGE_VALUE" count="1" value="0x80AA"> - <size name="Get" mode="get"/> - </enum> - <enum name="SAMPLE_COVERAGE_INVERT" count="1" value="0x80AB"> - <size name="Get" mode="get"/> - </enum> - <enum name="ACTIVE_TEXTURE" count="1" value="0x84E0"> - <size name="Get" mode="get"/> - </enum> -</category> - -<!-- base subset of OpenGL 1.4 --> -<category name="base1.4"> - <enum name="GENERATE_MIPMAP_HINT" value="0x8192"/> -</category> - -<!-- base subset of OpenGL 1.5 --> -<category name="base1.5"> - <enum name="BUFFER_SIZE" value="0x8764"/> - <enum name="BUFFER_USAGE" value="0x8765"/> - <enum name="ARRAY_BUFFER" value="0x8892"/> - <enum name="ELEMENT_ARRAY_BUFFER" value="0x8893"/> - <enum name="ARRAY_BUFFER_BINDING" value="0x8894"/> - <enum name="ELEMENT_ARRAY_BUFFER_BINDING" value="0x8895"/> - <enum name="STATIC_DRAW" value="0x88E4"/> - <enum name="DYNAMIC_DRAW" value="0x88E8"/> - - <type name="intptr" size="4" glx_name="CARD32"/> - <type name="sizeiptr" size="4" glx_name="CARD32"/> - - <function name="BindBuffer" offset="assign"> - <param name="target" type="GLenum"/> - <param name="buffer" type="GLuint"/> - <glx ignore="true"/> - </function> - - <function name="BufferData" offset="assign"> - <param name="target" type="GLenum"/> - <param name="size" type="GLsizeiptr" counter="true"/> - <param name="data" type="const GLvoid *" count="size" img_null_flag="true"/> - <param name="usage" type="GLenum"/> - <glx ignore="true"/> - </function> - - <function name="BufferSubData" offset="assign"> - <param name="target" type="GLenum"/> - <param name="offset" type="GLintptr"/> - <param name="size" type="GLsizeiptr" counter="true"/> - <param name="data" type="const GLvoid *" count="size"/> - <glx ignore="true"/> - </function> - - <function name="DeleteBuffers" offset="assign"> - <param name="n" type="GLsizei" counter="true"/> - <param name="buffer" type="const GLuint *" count="n"/> - <glx ignore="true"/> - </function> - - <function name="GenBuffers" offset="assign"> - <param name="n" type="GLsizei" counter="true"/> - <param name="buffer" type="GLuint *" output="true" count="n"/> - <glx ignore="true"/> - </function> - - <function name="GetBufferParameteriv" offset="assign"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true" variable_param="pname"/> - <glx ignore="true"/> - </function> - - <function name="IsBuffer" offset="assign"> - <param name="buffer" type="GLuint"/> - <return type="GLboolean"/> - <glx ignore="true"/> - </function> -</category> - -</OpenGLAPI> diff --git a/src/mapi/glapi/gen-es/base2_API.xml b/src/mapi/glapi/gen-es/base2_API.xml deleted file mode 100644 index b59ef62de64..00000000000 --- a/src/mapi/glapi/gen-es/base2_API.xml +++ /dev/null @@ -1,533 +0,0 @@ -<?xml version="1.0"?> -<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd"> - -<!-- OpenGL and OpenGL ES 2.x APIs --> - -<OpenGLAPI> - -<xi:include href="base1_API.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/> - -<!-- base subset of OpenGL 2.0 --> -<category name="base2.0"> - <enum name="BLEND_EQUATION_RGB" count="1" value="0x8009"> <!-- same as BLEND_EQUATION --> - <size name="Get" mode="get"/> - </enum> - <enum name="VERTEX_ATTRIB_ARRAY_ENABLED" count="1" value="0x8622"> - <size name="GetVertexAttribdv" mode="get"/> - <size name="GetVertexAttribfv" mode="get"/> - <size name="GetVertexAttribiv" mode="get"/> - </enum> - <enum name="VERTEX_ATTRIB_ARRAY_SIZE" count="1" value="0x8623"> - <size name="GetVertexAttribdv" mode="get"/> - <size name="GetVertexAttribfv" mode="get"/> - <size name="GetVertexAttribiv" mode="get"/> - </enum> - <enum name="VERTEX_ATTRIB_ARRAY_STRIDE" count="1" value="0x8624"> - <size name="GetVertexAttribdv" mode="get"/> - <size name="GetVertexAttribfv" mode="get"/> - <size name="GetVertexAttribiv" mode="get"/> - </enum> - <enum name="VERTEX_ATTRIB_ARRAY_TYPE" count="1" value="0x8625"> - <size name="GetVertexAttribdv" mode="get"/> - <size name="GetVertexAttribfv" mode="get"/> - <size name="GetVertexAttribiv" mode="get"/> - </enum> - <enum name="CURRENT_VERTEX_ATTRIB" count="1" value="0x8626"> - <size name="GetVertexAttribdv" mode="get"/> - <size name="GetVertexAttribfv" mode="get"/> - <size name="GetVertexAttribiv" mode="get"/> - </enum> - <enum name="VERTEX_ATTRIB_ARRAY_POINTER" value="0x8645"/> - <enum name="STENCIL_BACK_FUNC" count="1" value="0x8800"> - <size name="Get" mode="get"/> - </enum> - <enum name="STENCIL_BACK_FAIL" count="1" value="0x8801"> - <size name="Get" mode="get"/> - </enum> - <enum name="STENCIL_BACK_PASS_DEPTH_FAIL" count="1" value="0x8802"> - <size name="Get" mode="get"/> - </enum> - <enum name="STENCIL_BACK_PASS_DEPTH_PASS" count="1" value="0x8803"> - <size name="Get" mode="get"/> - </enum> - <enum name="BLEND_EQUATION_ALPHA" count="1" value="0x883D"> - <size name="Get" mode="get"/> - </enum> - <enum name="MAX_VERTEX_ATTRIBS" count="1" value="0x8869"> - <size name="Get" mode="get"/> - </enum> - <enum name="VERTEX_ATTRIB_ARRAY_NORMALIZED" value="0x886A"/> - <enum name="MAX_TEXTURE_IMAGE_UNITS" count="1" value="0x8872"> - <size name="Get" mode="get"/> - </enum> - <enum name="FRAGMENT_SHADER" value="0x8B30"/> - <enum name="VERTEX_SHADER" value="0x8B31"/> - <enum name="MAX_VERTEX_TEXTURE_IMAGE_UNITS" value="0x8B4C"/> - <enum name="MAX_COMBINED_TEXTURE_IMAGE_UNITS" value="0x8B4D"/> - <enum name="SHADER_TYPE" value="0x8B4F"/> - <enum name="FLOAT_VEC2" value="0x8B50"/> - <enum name="FLOAT_VEC3" value="0x8B51"/> - <enum name="FLOAT_VEC4" value="0x8B52"/> - <enum name="INT_VEC2" value="0x8B53"/> - <enum name="INT_VEC3" value="0x8B54"/> - <enum name="INT_VEC4" value="0x8B55"/> - <enum name="BOOL" value="0x8B56"/> - <enum name="BOOL_VEC2" value="0x8B57"/> - <enum name="BOOL_VEC3" value="0x8B58"/> - <enum name="BOOL_VEC4" value="0x8B59"/> - <enum name="FLOAT_MAT2" value="0x8B5A"/> - <enum name="FLOAT_MAT3" value="0x8B5B"/> - <enum name="FLOAT_MAT4" value="0x8B5C"/> - <enum name="SAMPLER_2D" value="0x8B5E"/> - <enum name="SAMPLER_CUBE" value="0x8B60"/> - <enum name="DELETE_STATUS" value="0x8B80"/> - <enum name="COMPILE_STATUS" value="0x8B81"/> - <enum name="LINK_STATUS" value="0x8B82"/> - <enum name="VALIDATE_STATUS" value="0x8B83"/> - <enum name="INFO_LOG_LENGTH" value="0x8B84"/> - <enum name="ATTACHED_SHADERS" value="0x8B85"/> - <enum name="ACTIVE_UNIFORMS" value="0x8B86"/> - <enum name="ACTIVE_UNIFORM_MAX_LENGTH" value="0x8B87"/> - <enum name="SHADER_SOURCE_LENGTH" value="0x8B88"/> - <enum name="ACTIVE_ATTRIBUTES" value="0x8B89"/> - <enum name="ACTIVE_ATTRIBUTE_MAX_LENGTH" value="0x8B8A"/> - <enum name="SHADING_LANGUAGE_VERSION" value="0x8B8C"/> - <enum name="CURRENT_PROGRAM" value="0x8B8D"/> - <enum name="STENCIL_BACK_REF" value="0x8CA3"/> - <enum name="STENCIL_BACK_VALUE_MASK" value="0x8CA4"/> - <enum name="STENCIL_BACK_WRITEMASK" value="0x8CA5"/> - - <type name="char" size="1" glx_name="CARD8"/> - - <function name="AttachShader" offset="assign"> - <param name="program" type="GLuint"/> - <param name="shader" type="GLuint"/> - <glx ignore="true"/> - </function> - - <function name="BindAttribLocation" offset="assign"> - <param name="program" type="GLuint"/> - <param name="index" type="GLuint"/> - <param name="name" type="const GLchar *"/> - <glx ignore="true"/> - </function> - - <function name="BlendEquationSeparate" offset="assign"> - <param name="modeRGB" type="GLenum"/> - <param name="modeA" type="GLenum"/> - <glx rop="4228"/> - </function> - - <function name="CompileShader" offset="assign"> - <param name="shader" type="GLuint"/> - <glx ignore="true"/> - </function> - - <function name="CreateProgram" offset="assign"> - <return type="GLuint"/> - <glx ignore="true"/> - </function> - - <function name="CreateShader" offset="assign"> - <param name="type" type="GLenum"/> - <return type="GLuint"/> - <glx ignore="true"/> - </function> - - <function name="DeleteProgram" offset="assign"> - <param name="program" type="GLuint"/> - <glx ignore="true"/> - </function> - - <function name="DeleteShader" offset="assign"> - <param name="program" type="GLuint"/> - <glx ignore="true"/> - </function> - - <function name="DetachShader" offset="assign"> - <param name="program" type="GLuint"/> - <param name="shader" type="GLuint"/> - <glx ignore="true"/> - </function> - - <function name="DisableVertexAttribArray" offset="assign"> - <param name="index" type="GLuint"/> - <glx ignore="true"/> - </function> - - <function name="EnableVertexAttribArray" offset="assign"> - <param name="index" type="GLuint"/> - <glx ignore="true"/> - </function> - - <function name="GetActiveAttrib" offset="assign"> - <param name="program" type="GLuint"/> - <param name="index" type="GLuint"/> - <param name="bufSize" type="GLsizei "/> - <param name="length" type="GLsizei *" output="true"/> - <param name="size" type="GLint *" output="true"/> - <param name="type" type="GLenum *" output="true"/> - <param name="name" type="GLchar *" output="true"/> - <glx ignore="true"/> - </function> - - <function name="GetActiveUniform" offset="assign"> - <param name="program" type="GLuint"/> - <param name="index" type="GLuint"/> - <param name="bufSize" type="GLsizei"/> - <param name="length" type="GLsizei *" output="true"/> - <param name="size" type="GLint *" output="true"/> - <param name="type" type="GLenum *" output="true"/> - <param name="name" type="GLchar *" output="true"/> - <glx ignore="true"/> - </function> - - <function name="GetAttachedShaders" offset="assign"> - <param name="program" type="GLuint"/> - <param name="maxCount" type="GLsizei"/> - <param name="count" type="GLsizei *" output="true"/> - <param name="obj" type="GLuint *" output="true"/> - <glx ignore="true"/> - </function> - - <function name="GetAttribLocation" offset="assign"> - <param name="program" type="GLuint"/> - <param name="name" type="const GLchar *"/> - <return type="GLint"/> - <glx ignore="true"/> - </function> - - <function name="GetProgramiv" offset="assign"> - <param name="program" type="GLuint"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *"/> - <glx ignore="true"/> - </function> - - <function name="GetProgramInfoLog" offset="assign"> - <param name="program" type="GLuint"/> - <param name="bufSize" type="GLsizei"/> - <param name="length" type="GLsizei *"/> - <param name="infoLog" type="GLchar *"/> - <glx ignore="true"/> - </function> - - <function name="GetShaderiv" offset="assign"> - <param name="shader" type="GLuint"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *"/> - <glx ignore="true"/> - </function> - - <function name="GetShaderInfoLog" offset="assign"> - <param name="shader" type="GLuint"/> - <param name="bufSize" type="GLsizei"/> - <param name="length" type="GLsizei *"/> - <param name="infoLog" type="GLchar *"/> - <glx ignore="true"/> - </function> - - <function name="GetShaderSource" offset="assign"> - <param name="shader" type="GLuint"/> - <param name="bufSize" type="GLsizei"/> - <param name="length" type="GLsizei *" output="true"/> - <param name="source" type="GLchar *" output="true"/> - <glx ignore="true"/> - </function> - - <function name="GetUniformfv" offset="assign"> - <param name="program" type="GLuint"/> - <param name="location" type="GLint"/> - <param name="params" type="GLfloat *" output="true"/> - <glx ignore="true"/> - </function> - - <function name="GetUniformiv" offset="assign"> - <param name="program" type="GLuint"/> - <param name="location" type="GLint"/> - <param name="params" type="GLint *"/> - <glx ignore="true"/> - </function> - - <function name="GetUniformLocation" offset="assign"> - <param name="program" type="GLuint"/> - <param name="name" type="const GLchar *"/> - <return type="GLint"/> - <glx ignore="true"/> - </function> - - <function name="GetVertexAttribfv" offset="assign"> - <param name="index" type="GLuint"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat *" output="true" variable_param="pname"/> - <glx ignore="true"/> - </function> - - <function name="GetVertexAttribiv" offset="assign"> - <param name="index" type="GLuint"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true" variable_param="pname"/> - <glx ignore="true"/> - </function> - - <function name="GetVertexAttribPointerv" offset="assign"> - <param name="index" type="GLuint"/> - <param name="pname" type="GLenum"/> - <param name="pointer" type="GLvoid **" output="true"/> - <glx ignore="true"/> - </function> - - <function name="IsProgram" offset="assign"> - <param name="program" type="GLuint"/> - <return type="GLboolean"/> - <glx ignore="true"/> - </function> - - <function name="IsShader" offset="assign"> - <param name="shader" type="GLuint"/> - <return type="GLboolean"/> - <glx ignore="true"/> - </function> - - <function name="LinkProgram" offset="assign"> - <param name="program" type="GLuint"/> - <glx ignore="true"/> - </function> - - <function name="ShaderSource" offset="assign"> - <param name="shader" type="GLuint"/> - <param name="count" type="GLsizei"/> - <param name="string" type="const GLchar **"/> - <param name="length" type="const GLint *"/> - <glx ignore="true"/> - </function> - - <function name="StencilFuncSeparate" offset="assign"> - <param name="face" type="GLenum"/> - <param name="func" type="GLenum"/> - <param name="ref" type="GLint"/> - <param name="mask" type="GLuint"/> - <glx ignore="true"/> - </function> - - <function name="StencilOpSeparate" offset="assign"> - <param name="face" type="GLenum"/> - <param name="sfail" type="GLenum"/> - <param name="zfail" type="GLenum"/> - <param name="zpass" type="GLenum"/> - <glx ignore="true"/> - </function> - - <function name="StencilMaskSeparate" offset="assign"> - <param name="face" type="GLenum"/> - <param name="mask" type="GLuint"/> - <glx ignore="true"/> - </function> - - <function name="Uniform1f" offset="assign"> - <param name="location" type="GLint"/> - <param name="v0" type="GLfloat"/> - <glx ignore="true"/> - </function> - - <function name="Uniform1fv" offset="assign"> - <param name="location" type="GLint"/> - <param name="count" type="GLsizei"/> - <param name="value" type="const GLfloat *"/> - <glx ignore="true"/> - </function> - - <function name="Uniform1i" offset="assign"> - <param name="location" type="GLint"/> - <param name="v0" type="GLint"/> - <glx ignore="true"/> - </function> - - <function name="Uniform1iv" offset="assign"> - <param name="location" type="GLint"/> - <param name="count" type="GLsizei"/> - <param name="value" type="const GLint *"/> - <glx ignore="true"/> - </function> - - <function name="Uniform2f" offset="assign"> - <param name="location" type="GLint"/> - <param name="v0" type="GLfloat"/> - <param name="v1" type="GLfloat"/> - <glx ignore="true"/> - </function> - - <function name="Uniform2fv" offset="assign"> - <param name="location" type="GLint"/> - <param name="count" type="GLsizei"/> - <param name="value" type="const GLfloat *"/> - <glx ignore="true"/> - </function> - - <function name="Uniform2i" offset="assign"> - <param name="location" type="GLint"/> - <param name="v0" type="GLint"/> - <param name="v1" type="GLint"/> - <glx ignore="true"/> - </function> - - <function name="Uniform2iv" offset="assign"> - <param name="location" type="GLint"/> - <param name="count" type="GLsizei"/> - <param name="value" type="const GLint *"/> - <glx ignore="true"/> - </function> - - <function name="Uniform3f" offset="assign"> - <param name="location" type="GLint"/> - <param name="v0" type="GLfloat"/> - <param name="v1" type="GLfloat"/> - <param name="v2" type="GLfloat"/> - <glx ignore="true"/> - </function> - - <function name="Uniform3fv" offset="assign"> - <param name="location" type="GLint"/> - <param name="count" type="GLsizei"/> - <param name="value" type="const GLfloat *"/> - <glx ignore="true"/> - </function> - - <function name="Uniform3i" offset="assign"> - <param name="location" type="GLint"/> - <param name="v0" type="GLint"/> - <param name="v1" type="GLint"/> - <param name="v2" type="GLint"/> - <glx ignore="true"/> - </function> - - <function name="Uniform3iv" offset="assign"> - <param name="location" type="GLint"/> - <param name="count" type="GLsizei"/> - <param name="value" type="const GLint *"/> - <glx ignore="true"/> - </function> - - <function name="Uniform4f" offset="assign"> - <param name="location" type="GLint"/> - <param name="v0" type="GLfloat"/> - <param name="v1" type="GLfloat"/> - <param name="v2" type="GLfloat"/> - <param name="v3" type="GLfloat"/> - <glx ignore="true"/> - </function> - - <function name="Uniform4fv" offset="assign"> - <param name="location" type="GLint"/> - <param name="count" type="GLsizei"/> - <param name="value" type="const GLfloat *"/> - <glx ignore="true"/> - </function> - - <function name="Uniform4i" offset="assign"> - <param name="location" type="GLint"/> - <param name="v0" type="GLint"/> - <param name="v1" type="GLint"/> - <param name="v2" type="GLint"/> - <param name="v3" type="GLint"/> - <glx ignore="true"/> - </function> - - <function name="Uniform4iv" offset="assign"> - <param name="location" type="GLint"/> - <param name="count" type="GLsizei"/> - <param name="value" type="const GLint *"/> - <glx ignore="true"/> - </function> - - <function name="UniformMatrix2fv" offset="assign"> - <param name="location" type="GLint"/> - <param name="count" type="GLsizei"/> - <param name="transpose" type="GLboolean"/> - <param name="value" type="const GLfloat *"/> - <glx ignore="true"/> - </function> - - <function name="UniformMatrix3fv" offset="assign"> - <param name="location" type="GLint"/> - <param name="count" type="GLsizei"/> - <param name="transpose" type="GLboolean"/> - <param name="value" type="const GLfloat *"/> - <glx ignore="true"/> - </function> - - <function name="UniformMatrix4fv" offset="assign"> - <param name="location" type="GLint"/> - <param name="count" type="GLsizei"/> - <param name="transpose" type="GLboolean"/> - <param name="value" type="const GLfloat *"/> - <glx ignore="true"/> - </function> - - <function name="UseProgram" offset="assign"> - <param name="program" type="GLuint"/> - <glx ignore="true"/> - </function> - - <function name="ValidateProgram" offset="assign"> - <param name="program" type="GLuint"/> - <glx ignore="true"/> - </function> - - <function name="VertexAttrib1f" offset="assign"> - <param name="index" type="GLuint"/> - <param name="x" type="GLfloat"/> - </function> - - <function name="VertexAttrib1fv" offset="assign"> - <param name="index" type="GLuint"/> - <param name="v" type="const GLfloat *"/> - </function> - - <function name="VertexAttrib2f" offset="assign"> - <param name="index" type="GLuint"/> - <param name="x" type="GLfloat"/> - <param name="y" type="GLfloat"/> - </function> - - <function name="VertexAttrib2fv" offset="assign"> - <param name="index" type="GLuint"/> - <param name="v" type="const GLfloat *"/> - </function> - - <function name="VertexAttrib3f" offset="assign"> - <param name="index" type="GLuint"/> - <param name="x" type="GLfloat"/> - <param name="y" type="GLfloat"/> - <param name="z" type="GLfloat"/> - </function> - - <function name="VertexAttrib3fv" offset="assign"> - <param name="index" type="GLuint"/> - <param name="v" type="const GLfloat *"/> - </function> - - <function name="VertexAttrib4f" offset="assign"> - <param name="index" type="GLuint"/> - <param name="x" type="GLfloat"/> - <param name="y" type="GLfloat"/> - <param name="z" type="GLfloat"/> - <param name="w" type="GLfloat"/> - </function> - - <function name="VertexAttrib4fv" offset="assign"> - <param name="index" type="GLuint"/> - <param name="v" type="const GLfloat *"/> - </function> - - <function name="VertexAttribPointer" offset="assign"> - <param name="index" type="GLuint"/> - <param name="size" type="GLint"/> - <param name="type" type="GLenum"/> - <param name="normalized" type="GLboolean"/> - <param name="stride" type="GLsizei"/> - <param name="pointer" type="const GLvoid *"/> - </function> -</category> - -</OpenGLAPI> diff --git a/src/mapi/glapi/gen-es/es1_API.xml b/src/mapi/glapi/gen-es/es1_API.xml deleted file mode 100644 index 3428ae5616a..00000000000 --- a/src/mapi/glapi/gen-es/es1_API.xml +++ /dev/null @@ -1,1100 +0,0 @@ -<?xml version="1.0"?> -<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd"> - -<!-- OpenGL ES 1.x API --> - -<OpenGLAPI> - -<xi:include href="base1_API.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/> - -<!-- core subset of OpenGL 1.3 defined in OpenGL ES 1.0 --> -<category name="core1.0"> - <!-- addition to base1.0 --> - <enum name="ADD" value="0x0104"/> - <enum name="STACK_OVERFLOW" value="0x0503"/> - <enum name="STACK_UNDERFLOW" value="0x0504"/> - <enum name="EXP" value="0x0800"/> - <enum name="EXP2" value="0x0801"/> - <enum name="POINT_SMOOTH" count="1" value="0x0B10"> - <size name="Get" mode="get"/> - </enum> - <enum name="LINE_SMOOTH" count="1" value="0x0B20"> - <size name="Get" mode="get"/> - </enum> - <enum name="LIGHTING" count="1" value="0x0B50"> - <size name="Get" mode="get"/> - </enum> - <enum name="LIGHT_MODEL_TWO_SIDE" count="1" value="0x0B52"> - <size name="LightModelfv"/> - <size name="LightModeliv"/> - <size name="Get" mode="get"/> - </enum> - <enum name="LIGHT_MODEL_AMBIENT" count="4" value="0x0B53"> - <size name="LightModelfv"/> - <size name="LightModeliv"/> - <size name="Get" mode="get"/> - </enum> - <enum name="COLOR_MATERIAL" count="1" value="0x0B57"> - <size name="Get" mode="get"/> - </enum> - <enum name="FOG" count="1" value="0x0B60"> - <size name="Get" mode="get"/> - </enum> - <enum name="FOG_DENSITY" count="1" value="0x0B62"> - <size name="Fogfv"/> - <size name="Fogiv"/> - <size name="Get" mode="get"/> - </enum> - <enum name="FOG_START" count="1" value="0x0B63"> - <size name="Fogfv"/> - <size name="Fogiv"/> - <size name="Get" mode="get"/> - </enum> - <enum name="FOG_END" count="1" value="0x0B64"> - <size name="Fogfv"/> - <size name="Fogiv"/> - <size name="Get" mode="get"/> - </enum> - <enum name="FOG_MODE" count="1" value="0x0B65"> - <size name="Fogfv"/> - <size name="Fogiv"/> - <size name="Get" mode="get"/> - </enum> - <enum name="FOG_COLOR" count="4" value="0x0B66"> - <size name="Fogfv"/> - <size name="Fogiv"/> - <size name="Get" mode="get"/> - </enum> - <enum name="NORMALIZE" count="1" value="0x0BA1"> - <size name="Get" mode="get"/> - </enum> - <enum name="ALPHA_TEST" count="1" value="0x0BC0"> - <size name="Get" mode="get"/> - </enum> - <enum name="PERSPECTIVE_CORRECTION_HINT" count="1" value="0x0C50"> - <size name="Get" mode="get"/> - </enum> - <enum name="POINT_SMOOTH_HINT" count="1" value="0x0C51"> - <size name="Get" mode="get"/> - </enum> - <enum name="LINE_SMOOTH_HINT" count="1" value="0x0C52"> - <size name="Get" mode="get"/> - </enum> - <enum name="POLYGON_SMOOTH_HINT" count="1" value="0x0C53"> - <size name="Get" mode="get"/> - </enum> - <enum name="FOG_HINT" count="1" value="0x0C54"> - <size name="Get" mode="get"/> - </enum> - <enum name="MAX_LIGHTS" count="1" value="0x0D31"> - <size name="Get" mode="get"/> - </enum> - <enum name="MAX_MODELVIEW_STACK_DEPTH" count="1" value="0x0D36"> - <size name="Get" mode="get"/> - </enum> - <enum name="MAX_PROJECTION_STACK_DEPTH" count="1" value="0x0D38"> - <size name="Get" mode="get"/> - </enum> - <enum name="MAX_TEXTURE_STACK_DEPTH" count="1" value="0x0D39"> - <size name="Get" mode="get"/> - </enum> - <enum name="AMBIENT" count="4" value="0x1200"> - <size name="Materialfv"/> - <size name="Materialiv"/> - <size name="Lightfv"/> - <size name="Lightiv"/> - <size name="GetMaterialfv" mode="get"/> - <size name="GetMaterialiv" mode="get"/> - <size name="GetLightfv" mode="get"/> - <size name="GetLightiv" mode="get"/> - </enum> - <enum name="DIFFUSE" count="4" value="0x1201"> - <size name="Materialfv"/> - <size name="Materialiv"/> - <size name="Lightfv"/> - <size name="Lightiv"/> - <size name="GetMaterialfv" mode="get"/> - <size name="GetMaterialiv" mode="get"/> - <size name="GetLightfv" mode="get"/> - <size name="GetLightiv" mode="get"/> - </enum> - <enum name="SPECULAR" count="4" value="0x1202"> - <size name="Materialfv"/> - <size name="Materialiv"/> - <size name="Lightfv"/> - <size name="Lightiv"/> - <size name="GetMaterialfv" mode="get"/> - <size name="GetMaterialiv" mode="get"/> - <size name="GetLightfv" mode="get"/> - <size name="GetLightiv" mode="get"/> - </enum> - <enum name="POSITION" count="4" value="0x1203"> - <size name="Lightfv"/> - <size name="Lightiv"/> - <size name="GetLightfv" mode="get"/> - <size name="GetLightiv" mode="get"/> - </enum> - <enum name="SPOT_DIRECTION" count="3" value="0x1204"> - <size name="Lightfv"/> - <size name="Lightiv"/> - <size name="GetLightfv" mode="get"/> - <size name="GetLightiv" mode="get"/> - </enum> - <enum name="SPOT_EXPONENT" count="1" value="0x1205"> - <size name="Lightfv"/> - <size name="Lightiv"/> - <size name="GetLightfv" mode="get"/> - <size name="GetLightiv" mode="get"/> - </enum> - <enum name="SPOT_CUTOFF" count="1" value="0x1206"> - <size name="Lightfv"/> - <size name="Lightiv"/> - <size name="GetLightfv" mode="get"/> - <size name="GetLightiv" mode="get"/> - </enum> - <enum name="CONSTANT_ATTENUATION" count="1" value="0x1207"> - <size name="Lightfv"/> - <size name="Lightiv"/> - <size name="GetLightfv" mode="get"/> - <size name="GetLightiv" mode="get"/> - </enum> - <enum name="LINEAR_ATTENUATION" count="1" value="0x1208"> - <size name="Lightfv"/> - <size name="Lightiv"/> - <size name="GetLightfv" mode="get"/> - <size name="GetLightiv" mode="get"/> - </enum> - <enum name="QUADRATIC_ATTENUATION" count="1" value="0x1209"> - <size name="Lightfv"/> - <size name="Lightiv"/> - <size name="GetLightfv" mode="get"/> - <size name="GetLightiv" mode="get"/> - </enum> - <enum name="CLEAR" value="0x1500"/> - <enum name="AND" value="0x1501"/> - <enum name="AND_REVERSE" value="0x1502"/> - <enum name="COPY" value="0x1503"/> - <enum name="AND_INVERTED" value="0x1504"/> - <enum name="NOOP" value="0x1505"/> - <enum name="XOR" value="0x1506"/> - <enum name="OR" value="0x1507"/> - <enum name="NOR" value="0x1508"/> - <enum name="EQUIV" value="0x1509"/> - <enum name="OR_REVERSE" value="0x150B"/> - <enum name="COPY_INVERTED" value="0x150C"/> - <enum name="OR_INVERTED" value="0x150D"/> - <enum name="NAND" value="0x150E"/> - <enum name="SET" value="0x150F"/> - <enum name="EMISSION" count="4" value="0x1600"> - <size name="Materialfv"/> - <size name="Materialiv"/> - <size name="GetMaterialfv" mode="get"/> - <size name="GetMaterialiv" mode="get"/> - </enum> - <enum name="SHININESS" count="1" value="0x1601"> - <size name="Materialfv"/> - <size name="Materialiv"/> - <size name="GetMaterialfv" mode="get"/> - <size name="GetMaterialiv" mode="get"/> - </enum> - <enum name="AMBIENT_AND_DIFFUSE" count="4" value="0x1602"> - <size name="Materialfv"/> - <size name="Materialiv"/> - <size name="GetMaterialfv" mode="get"/> - <size name="GetMaterialiv" mode="get"/> - </enum> - <enum name="MODELVIEW" value="0x1700"/> - <enum name="PROJECTION" value="0x1701"/> - <enum name="FLAT" value="0x1D00"/> - <enum name="SMOOTH" value="0x1D01"/> - <enum name="MODULATE" value="0x2100"/> - <enum name="DECAL" value="0x2101"/> - <enum name="TEXTURE_ENV_MODE" count="1" value="0x2200"> - <size name="TexEnvfv"/> - <size name="TexEnviv"/> - <size name="GetTexEnvfv" mode="get"/> - <size name="GetTexEnviv" mode="get"/> - </enum> - <enum name="TEXTURE_ENV_COLOR" count="4" value="0x2201"> - <size name="TexEnvfv"/> - <size name="TexEnviv"/> - <size name="GetTexEnvfv" mode="get"/> - <size name="GetTexEnviv" mode="get"/> - </enum> - <enum name="TEXTURE_ENV" value="0x2300"/> - <enum name="LIGHT0" count="1" value="0x4000"> - <size name="Get" mode="get"/> - </enum> - <enum name="LIGHT1" count="1" value="0x4001"> - <size name="Get" mode="get"/> - </enum> - <enum name="LIGHT2" count="1" value="0x4002"> - <size name="Get" mode="get"/> - </enum> - <enum name="LIGHT3" count="1" value="0x4003"> - <size name="Get" mode="get"/> - </enum> - <enum name="LIGHT4" count="1" value="0x4004"> - <size name="Get" mode="get"/> - </enum> - <enum name="LIGHT5" count="1" value="0x4005"> - <size name="Get" mode="get"/> - </enum> - <enum name="LIGHT6" count="1" value="0x4006"> - <size name="Get" mode="get"/> - </enum> - <enum name="LIGHT7" count="1" value="0x4007"> - <size name="Get" mode="get"/> - </enum> - - <function name="AlphaFunc" offset="240"> - <param name="func" type="GLenum"/> - <param name="ref" type="GLclampf"/> - <glx rop="159"/> - </function> - - <function name="Color4f" offset="29" vectorequiv="Color4fv"> - <param name="red" type="GLfloat"/> - <param name="green" type="GLfloat"/> - <param name="blue" type="GLfloat"/> - <param name="alpha" type="GLfloat"/> - </function> - - <function name="Fogf" offset="153"> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx rop="80"/> - </function> - - <function name="Fogfv" offset="154"> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="81"/> - </function> - - <function name="Lightf" offset="159"> - <param name="light" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx rop="86"/> - </function> - - <function name="Lightfv" offset="160"> - <param name="light" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="87"/> - </function> - - <function name="LightModelf" offset="163"> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx rop="90"/> - </function> - - <function name="LightModelfv" offset="164"> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="91"/> - </function> - - <function name="LoadIdentity" offset="290"> - <glx rop="176"/> - </function> - - <function name="LoadMatrixf" offset="291"> - <param name="m" type="const GLfloat *" count="16"/> - <glx rop="177"/> - </function> - - <function name="LogicOp" offset="242"> - <param name="opcode" type="GLenum"/> - <glx rop="161"/> - </function> - - <function name="Materialf" offset="169"> - <param name="face" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx rop="96"/> - </function> - - <function name="Materialfv" offset="170"> - <param name="face" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="97"/> - </function> - - <function name="MatrixMode" offset="293"> - <param name="mode" type="GLenum"/> - <glx rop="179"/> - </function> - - <function name="MultMatrixf" offset="294"> - <param name="m" type="const GLfloat *" count="16"/> - <glx rop="180"/> - </function> - - <function name="Normal3f" offset="56" vectorequiv="Normal3fv"> - <param name="nx" type="GLfloat"/> - <param name="ny" type="GLfloat"/> - <param name="nz" type="GLfloat"/> - </function> - - <function name="PointSize" offset="173"> - <param name="size" type="GLfloat"/> - <glx rop="100"/> - </function> - - <function name="PopMatrix" offset="297"> - <glx rop="183"/> - </function> - - <function name="PushMatrix" offset="298"> - <glx rop="184"/> - </function> - - <function name="Rotatef" offset="300"> - <param name="angle" type="GLfloat"/> - <param name="x" type="GLfloat"/> - <param name="y" type="GLfloat"/> - <param name="z" type="GLfloat"/> - <glx rop="186"/> - </function> - - <function name="Scalef" offset="302"> - <param name="x" type="GLfloat"/> - <param name="y" type="GLfloat"/> - <param name="z" type="GLfloat"/> - <glx rop="188"/> - </function> - - <function name="ShadeModel" offset="177"> - <param name="mode" type="GLenum"/> - <glx rop="104"/> - </function> - - <function name="TexEnvf" offset="184"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx rop="111"/> - </function> - - <function name="TexEnvfv" offset="185"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="112"/> - </function> - - <function name="TexImage2D" offset="183"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="internalformat" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="border" type="GLint"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_format="format" img_type="type" img_target="target" img_send_null="true" img_pad_dimensions="true"/> - <glx rop="110" large="true"/> - </function> - - <function name="Translatef" offset="304"> - <param name="x" type="GLfloat"/> - <param name="y" type="GLfloat"/> - <param name="z" type="GLfloat"/> - <glx rop="190"/> - </function> - - <!-- addition to base1.1 --> - <enum name="COLOR_LOGIC_OP" value="0x0BF2"/> - <enum name="VERTEX_ARRAY" count="1" value="0x8074"> - <size name="Get" mode="get"/> - </enum> - <enum name="NORMAL_ARRAY" count="1" value="0x8075"> - <size name="Get" mode="get"/> - </enum> - <enum name="COLOR_ARRAY" count="1" value="0x8076"> - <size name="Get" mode="get"/> - </enum> - <enum name="TEXTURE_COORD_ARRAY" count="1" value="0x8078"> - <size name="Get" mode="get"/> - </enum> - - <function name="ColorPointer" offset="308"> - <param name="size" type="GLint"/> - <param name="type" type="GLenum"/> - <param name="stride" type="GLsizei"/> - <param name="pointer" type="const GLvoid *"/> - <glx handcode="true"/> - </function> - - <function name="DisableClientState" offset="309"> - <param name="array" type="GLenum"/> - <glx handcode="true"/> - </function> - - <function name="EnableClientState" offset="313"> - <param name="array" type="GLenum"/> - <glx handcode="true"/> - </function> - - <function name="NormalPointer" offset="318"> - <param name="type" type="GLenum"/> - <param name="stride" type="GLsizei"/> - <param name="pointer" type="const GLvoid *"/> - <glx handcode="true"/> - </function> - - <function name="TexCoordPointer" offset="320"> - <param name="size" type="GLint"/> - <param name="type" type="GLenum"/> - <param name="stride" type="GLsizei"/> - <param name="pointer" type="const GLvoid *"/> - <glx handcode="true"/> - </function> - - <function name="VertexPointer" offset="321"> - <param name="size" type="GLint"/> - <param name="type" type="GLenum"/> - <param name="stride" type="GLsizei"/> - <param name="pointer" type="const GLvoid *"/> - <glx handcode="true"/> - </function> - - <!-- addition to base1.2 --> - <enum name="SMOOTH_POINT_SIZE_RANGE" count="2" value="0x0B12"> - <size name="Get" mode="get"/> - </enum> - <enum name="SMOOTH_LINE_WIDTH_RANGE" count="2" value="0x0B22"> - <size name="Get" mode="get"/> - </enum> - <enum name="RESCALE_NORMAL" count="1" value="0x803A"> - <size name="Get" mode="get"/> - </enum> - <enum name="MAX_ELEMENTS_VERTICES" count="1" value="0x80E8"> - <size name="Get" mode="get"/> - </enum> - <enum name="MAX_ELEMENTS_INDICES" count="1" value="0x80E9"> - <size name="Get" mode="get"/> - </enum> - - <!-- addition to base1.3 --> - <enum name="MULTISAMPLE" count="1" value="0x809D"> - <size name="Get" mode="get"/> - </enum> - <enum name="SAMPLE_ALPHA_TO_ONE" count="1" value="0x809F"> - <size name="Get" mode="get"/> - </enum> - <enum name="MAX_TEXTURE_UNITS" count="1" value="0x84E2"> - <size name="Get" mode="get"/> - </enum> - - <function name="ClientActiveTexture" offset="375"> - <param name="texture" type="GLenum"/> - <glx handcode="true"/> - </function> - - <function name="MultiTexCoord4f" offset="402" vectorequiv="MultiTexCoord4fv"> - <param name="target" type="GLenum"/> - <param name="s" type="GLfloat"/> - <param name="t" type="GLfloat"/> - <param name="r" type="GLfloat"/> - <param name="q" type="GLfloat"/> - </function> -</category> - -<!-- core subset of OpenGL 1.5 defined in OpenGL ES 1.1 --> -<category name="core1.1"> - <!-- addition to base1.0 --> - <enum name="CURRENT_COLOR" count="4" value="0x0B00"> - <size name="Get" mode="get"/> - </enum> - <enum name="CURRENT_NORMAL" count="3" value="0x0B02"> - <size name="Get" mode="get"/> - </enum> - <enum name="CURRENT_TEXTURE_COORDS" count="4" value="0x0B03"> - <size name="Get" mode="get"/> - </enum> - <enum name="POINT_SIZE" count="1" value="0x0B11"> - <size name="Get" mode="get"/> - </enum> - <enum name="SHADE_MODEL" count="1" value="0x0B54"> - <size name="Get" mode="get"/> - </enum> - <enum name="MATRIX_MODE" count="1" value="0x0BA0"> - <size name="Get" mode="get"/> - </enum> - <enum name="MODELVIEW_STACK_DEPTH" count="1" value="0x0BA3"> - <size name="Get" mode="get"/> - </enum> - <enum name="PROJECTION_STACK_DEPTH" count="1" value="0x0BA4"> - <size name="Get" mode="get"/> - </enum> - <enum name="TEXTURE_STACK_DEPTH" count="1" value="0x0BA5"> - <size name="Get" mode="get"/> - </enum> - <enum name="MODELVIEW_MATRIX" count="16" value="0x0BA6"> - <size name="Get" mode="get"/> - </enum> - <enum name="PROJECTION_MATRIX" count="16" value="0x0BA7"> - <size name="Get" mode="get"/> - </enum> - <enum name="TEXTURE_MATRIX" count="16" value="0x0BA8"> - <size name="Get" mode="get"/> - </enum> - <enum name="ALPHA_TEST_FUNC" count="1" value="0x0BC1"> - <size name="Get" mode="get"/> - </enum> - <enum name="ALPHA_TEST_REF" count="1" value="0x0BC2"> - <size name="Get" mode="get"/> - </enum> - <enum name="BLEND_DST" count="1" value="0x0BE0"> - <size name="Get" mode="get"/> - </enum> - <enum name="BLEND_SRC" count="1" value="0x0BE1"> - <size name="Get" mode="get"/> - </enum> - <enum name="LOGIC_OP_MODE" count="1" value="0x0BF0"> - <size name="Get" mode="get"/> - </enum> - <enum name="ALPHA_SCALE" count="1" value="0x0D1C"> - <size name="TexEnvfv"/> - <size name="TexEnviv"/> - <size name="GetTexEnvfv" mode="get"/> - <size name="GetTexEnviv" mode="get"/> - <size name="Get" mode="get"/> - </enum> - <enum name="MAX_CLIP_PLANES" count="1" value="0x0D32"> - <size name="Get" mode="get"/> - </enum> - <enum name="CLIP_PLANE0" count="1" value="0x3000"> - <size name="Get" mode="get"/> - </enum> - <enum name="CLIP_PLANE1" count="1" value="0x3001"> - <size name="Get" mode="get"/> - </enum> - <enum name="CLIP_PLANE2" count="1" value="0x3002"> - <size name="Get" mode="get"/> - </enum> - <enum name="CLIP_PLANE3" count="1" value="0x3003"> - <size name="Get" mode="get"/> - </enum> - <enum name="CLIP_PLANE4" count="1" value="0x3004"> - <size name="Get" mode="get"/> - </enum> - <enum name="CLIP_PLANE5" count="1" value="0x3005"> - <size name="Get" mode="get"/> - </enum> - - <function name="Color4ub" offset="35" vectorequiv="Color4ubv"> - <param name="red" type="GLubyte"/> - <param name="green" type="GLubyte"/> - <param name="blue" type="GLubyte"/> - <param name="alpha" type="GLubyte"/> - </function> - - <function name="GetLightfv" offset="264"> - <param name="light" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat *" output="true" variable_param="pname"/> - <glx sop="118"/> - </function> - - <function name="GetMaterialfv" offset="269"> - <param name="face" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat *" output="true" variable_param="pname"/> - <glx sop="123"/> - </function> - - <function name="GetTexEnvfv" offset="276"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat *" output="true" variable_param="pname"/> - <glx sop="130"/> - </function> - - <function name="GetTexEnviv" offset="277"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true" variable_param="pname"/> - <glx sop="131"/> - </function> - - <function name="TexEnvi" offset="186"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLint"/> - <glx rop="113"/> - </function> - - <function name="TexEnviv" offset="187"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLint *" variable_param="pname"/> - <glx rop="114"/> - </function> - - <!-- addition to base1.1 --> - <enum name="VERTEX_ARRAY_SIZE" count="1" value="0x807A"> - <size name="Get" mode="get"/> - </enum> - <enum name="VERTEX_ARRAY_TYPE" count="1" value="0x807B"> - <size name="Get" mode="get"/> - </enum> - <enum name="VERTEX_ARRAY_STRIDE" count="1" value="0x807C"> - <size name="Get" mode="get"/> - </enum> - <enum name="NORMAL_ARRAY_TYPE" count="1" value="0x807E"> - <size name="Get" mode="get"/> - </enum> - <enum name="NORMAL_ARRAY_STRIDE" count="1" value="0x807F"> - <size name="Get" mode="get"/> - </enum> - <enum name="COLOR_ARRAY_SIZE" count="1" value="0x8081"> - <size name="Get" mode="get"/> - </enum> - <enum name="COLOR_ARRAY_TYPE" count="1" value="0x8082"> - <size name="Get" mode="get"/> - </enum> - <enum name="COLOR_ARRAY_STRIDE" count="1" value="0x8083"> - <size name="Get" mode="get"/> - </enum> - <enum name="TEXTURE_COORD_ARRAY_SIZE" count="1" value="0x8088"> - <size name="Get" mode="get"/> - </enum> - <enum name="TEXTURE_COORD_ARRAY_TYPE" count="1" value="0x8089"> - <size name="Get" mode="get"/> - </enum> - <enum name="TEXTURE_COORD_ARRAY_STRIDE" count="1" value="0x808A"> - <size name="Get" mode="get"/> - </enum> - <enum name="VERTEX_ARRAY_POINTER" value="0x808E"/> - <enum name="NORMAL_ARRAY_POINTER" value="0x808F"/> - <enum name="COLOR_ARRAY_POINTER" value="0x8090"/> - <enum name="TEXTURE_COORD_ARRAY_POINTER" value="0x8092"/> - - <function name="GetPointerv" offset="329"> - <param name="pname" type="GLenum"/> - <param name="params" type="GLvoid **" output="true"/> - <glx handcode="true"/> - </function> - - <!-- addition to base1.2 --> - - <!-- addition to base1.3 --> - <enum name="CLIENT_ACTIVE_TEXTURE" count="1" value="0x84E1"> - <size name="Get" mode="get"/> - </enum> - <enum name="SUBTRACT" value="0x84E7"/> - <enum name="COMBINE" value="0x8570"/> - <enum name="COMBINE_RGB" count="1" value="0x8571"> - <size name="TexEnvfv"/> - <size name="TexEnviv"/> - <size name="GetTexEnvfv" mode="get"/> - <size name="GetTexEnviv" mode="get"/> - </enum> - <enum name="COMBINE_ALPHA" count="1" value="0x8572"> - <size name="TexEnvfv"/> - <size name="TexEnviv"/> - <size name="GetTexEnvfv" mode="get"/> - <size name="GetTexEnviv" mode="get"/> - </enum> - <enum name="RGB_SCALE" count="1" value="0x8573"> - <size name="TexEnvfv"/> - <size name="TexEnviv"/> - <size name="GetTexEnvfv" mode="get"/> - <size name="GetTexEnviv" mode="get"/> - </enum> - <enum name="ADD_SIGNED" value="0x8574"/> - <enum name="INTERPOLATE" value="0x8575"/> - <enum name="CONSTANT" value="0x8576"/> - <enum name="PRIMARY_COLOR" value="0x8577"/> - <enum name="PREVIOUS" value="0x8578"/> - <enum name="OPERAND0_RGB" count="1" value="0x8590"> - <size name="TexEnvfv"/> - <size name="TexEnviv"/> - <size name="GetTexEnvfv" mode="get"/> - <size name="GetTexEnviv" mode="get"/> - </enum> - <enum name="OPERAND1_RGB" count="1" value="0x8591"> - <size name="TexEnvfv"/> - <size name="TexEnviv"/> - <size name="GetTexEnvfv" mode="get"/> - <size name="GetTexEnviv" mode="get"/> - </enum> - <enum name="OPERAND2_RGB" count="1" value="0x8592"> - <size name="TexEnvfv"/> - <size name="TexEnviv"/> - <size name="GetTexEnvfv" mode="get"/> - <size name="GetTexEnviv" mode="get"/> - </enum> - <enum name="OPERAND0_ALPHA" count="1" value="0x8598"> - <size name="TexEnvfv"/> - <size name="TexEnviv"/> - <size name="GetTexEnvfv" mode="get"/> - <size name="GetTexEnviv" mode="get"/> - </enum> - <enum name="OPERAND1_ALPHA" count="1" value="0x8599"> - <size name="TexEnvfv"/> - <size name="TexEnviv"/> - <size name="GetTexEnvfv" mode="get"/> - <size name="GetTexEnviv" mode="get"/> - </enum> - <enum name="OPERAND2_ALPHA" count="1" value="0x859A"> - <size name="TexEnvfv"/> - <size name="TexEnviv"/> - <size name="GetTexEnvfv" mode="get"/> - <size name="GetTexEnviv" mode="get"/> - </enum> - <enum name="DOT3_RGB" value="0x86AE"/> - <enum name="DOT3_RGBA" value="0x86AF"/> - - <!-- addition to base1.4 --> - <enum name="POINT_SIZE_MIN" count="1" value="0x8126"> - <size name="PointParameterfv"/> - <size name="Get" mode="get"/> - </enum> - <enum name="POINT_SIZE_MAX" count="1" value="0x8127"> - <size name="PointParameterfv"/> - <size name="Get" mode="get"/> - </enum> - <enum name="POINT_FADE_THRESHOLD_SIZE" count="1" value="0x8128"> - <size name="PointParameterfv"/> - <size name="Get" mode="get"/> - </enum> - <enum name="POINT_DISTANCE_ATTENUATION" count="3" value="0x8129"> - <size name="PointParameterfv"/> - <size name="Get" mode="get"/> - </enum> - <enum name="GENERATE_MIPMAP" count="1" value="0x8191"> - <size name="TexParameterfv"/> - <size name="TexParameteriv"/> - <size name="GetTexParameterfv" mode="get"/> - <size name="GetTexParameteriv" mode="get"/> - </enum> - - <function name="PointParameterf" offset="assign"> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx rop="2065"/> - </function> - - <function name="PointParameterfv" offset="assign"> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="2066"/> - </function> - - <!-- addition to base1.5 --> - <enum name="SRC0_RGB" value="0x8580"/> - <enum name="SRC1_RGB" value="0x8581"/> - <enum name="SRC2_RGB" value="0x8582"/> - <enum name="SRC0_ALPHA" value="0x8588"/> - <enum name="SRC1_ALPHA" value="0x8589"/> - <enum name="SRC2_ALPHA" value="0x858A"/> - <enum name="VERTEX_ARRAY_BUFFER_BINDING" count="1" value="0x8896"> - <size name="Get" mode="get"/> - </enum> - <enum name="NORMAL_ARRAY_BUFFER_BINDING" count="1" value="0x8897"> - <size name="Get" mode="get"/> - </enum> - <enum name="COLOR_ARRAY_BUFFER_BINDING" count="1" value="0x8898"> - <size name="Get" mode="get"/> - </enum> - <enum name="TEXTURE_COORD_ARRAY_BUFFER_BINDING" count="1" value="0x889A"> - <size name="Get" mode="get"/> - </enum> -</category> - -<!-- OpenGL ES 1.0 --> -<category name="es1.0"> - <!-- addition to core1.0 --> - - <!-- from GL_OES_fixed_point --> - <enum name="FIXED" value="0x140C"/> - - <type name="fixed" size="4" /> - <type name="clampx" size="4" /> - - <function name="AlphaFuncx" offset="assign"> - <param name="func" type="GLenum"/> - <param name="ref" type="GLclampx"/> - </function> - - <function name="ClearColorx" offset="assign"> - <param name="red" type="GLclampx"/> - <param name="green" type="GLclampx"/> - <param name="blue" type="GLclampx"/> - <param name="alpha" type="GLclampx"/> - </function> - - <function name="ClearDepthx" offset="assign"> - <param name="depth" type="GLclampx"/> - </function> - - <function name="Color4x" offset="assign"> - <param name="red" type="GLfixed"/> - <param name="green" type="GLfixed"/> - <param name="blue" type="GLfixed"/> - <param name="alpha" type="GLfixed"/> - </function> - - <function name="DepthRangex" offset="assign"> - <param name="zNear" type="GLclampx"/> - <param name="zFar" type="GLclampx"/> - </function> - - <function name="Fogx" offset="assign"> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfixed"/> - </function> - - <function name="Fogxv" offset="assign"> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfixed *" variable_param="pname"/> - </function> - - <function name="Frustumx" offset="assign"> - <param name="left" type="GLfixed"/> - <param name="right" type="GLfixed"/> - <param name="bottom" type="GLfixed"/> - <param name="top" type="GLfixed"/> - <param name="zNear" type="GLfixed"/> - <param name="zFar" type="GLfixed"/> - </function> - - <function name="LightModelx" offset="assign"> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfixed"/> - </function> - - <function name="LightModelxv" offset="assign"> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfixed *" variable_param="pname"/> - </function> - - <function name="Lightx" offset="assign"> - <param name="light" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfixed"/> - </function> - - <function name="Lightxv" offset="assign"> - <param name="light" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfixed *" variable_param="pname"/> - </function> - - <function name="LineWidthx" offset="assign"> - <param name="width" type="GLfixed"/> - </function> - - <function name="LoadMatrixx" offset="assign"> - <param name="m" type="const GLfixed *" count="16"/> - </function> - - <function name="Materialx" offset="assign"> - <param name="face" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfixed"/> - </function> - - <function name="Materialxv" offset="assign"> - <param name="face" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfixed *" variable_param="pname"/> - </function> - - <function name="MultMatrixx" offset="assign"> - <param name="m" type="const GLfixed *" count="16"/> - </function> - - <function name="MultiTexCoord4x" offset="assign"> - <param name="target" type="GLenum"/> - <param name="s" type="GLfixed"/> - <param name="t" type="GLfixed"/> - <param name="r" type="GLfixed"/> - <param name="q" type="GLfixed"/> - </function> - - <function name="Normal3x" offset="assign"> - <param name="nx" type="GLfixed"/> - <param name="ny" type="GLfixed"/> - <param name="nz" type="GLfixed"/> - </function> - - <function name="Orthox" offset="assign"> - <param name="left" type="GLfixed"/> - <param name="right" type="GLfixed"/> - <param name="bottom" type="GLfixed"/> - <param name="top" type="GLfixed"/> - <param name="zNear" type="GLfixed"/> - <param name="zFar" type="GLfixed"/> - </function> - - <function name="PointSizex" offset="assign"> - <param name="size" type="GLfixed"/> - </function> - - <function name="PolygonOffsetx" offset="assign"> - <param name="factor" type="GLfixed"/> - <param name="units" type="GLfixed"/> - </function> - - <function name="Rotatex" offset="assign"> - <param name="angle" type="GLfixed"/> - <param name="x" type="GLfixed"/> - <param name="y" type="GLfixed"/> - <param name="z" type="GLfixed"/> - </function> - - <function name="SampleCoveragex" offset="assign"> - <param name="value" type="GLclampx"/> - <param name="invert" type="GLboolean"/> - </function> - - <function name="Scalex" offset="assign"> - <param name="x" type="GLfixed"/> - <param name="y" type="GLfixed"/> - <param name="z" type="GLfixed"/> - </function> - - <function name="TexEnvx" offset="assign"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfixed"/> - </function> - - <function name="TexEnvxv" offset="assign"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfixed *" variable_param="pname"/> - </function> - - <function name="TexParameterx" offset="assign"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfixed"/> - </function> - - <function name="Translatex" offset="assign"> - <param name="x" type="GLfixed"/> - <param name="y" type="GLfixed"/> - <param name="z" type="GLfixed"/> - </function> - - <!-- from GL_OES_single_precision --> - <function name="ClearDepthf" offset="assign"> - <param name="depth" type="GLclampf"/> - </function> - - <function name="DepthRangef" offset="assign"> - <param name="zNear" type="GLclampf"/> - <param name="zFar" type="GLclampf"/> - </function> - - <function name="Frustumf" offset="assign"> - <param name="left" type="GLfloat"/> - <param name="right" type="GLfloat"/> - <param name="bottom" type="GLfloat"/> - <param name="top" type="GLfloat"/> - <param name="zNear" type="GLfloat"/> - <param name="zFar" type="GLfloat"/> - </function> - - <function name="Orthof" offset="assign"> - <param name="left" type="GLfloat"/> - <param name="right" type="GLfloat"/> - <param name="bottom" type="GLfloat"/> - <param name="top" type="GLfloat"/> - <param name="zNear" type="GLfloat"/> - <param name="zFar" type="GLfloat"/> - </function> -</category> - -<!-- OpenGL ES 1.1 --> -<category name="es1.1"> - <!-- addition to core1.1 --> - - <!-- from GL_OES_fixed_point --> - <function name="ClipPlanex" offset="assign"> - <param name="plane" type="GLenum"/> - <param name="equation" type="const GLfixed *" count="4"/> - </function> - - <function name="GetClipPlanex" offset="assign"> - <param name="plane" type="GLenum"/> - <param name="equation" type="GLfixed *" output="true" count="4"/> - </function> - - <function name="GetFixedv" offset="assign"> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfixed *" output="true" variable_param="pname"/> - </function> - - <function name="GetLightxv" offset="assign"> - <param name="light" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfixed *" output="true" variable_param="pname"/> - </function> - - <function name="GetMaterialxv" offset="assign"> - <param name="face" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfixed *" output="true" variable_param="pname"/> - </function> - - <function name="GetTexEnvxv" offset="assign"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfixed *" output="true" variable_param="pname"/> - </function> - - <function name="GetTexParameterxv" offset="assign"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfixed *" output="true" variable_param="pname"/> - </function> - - <function name="PointParameterx" offset="assign"> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfixed"/> - </function> - - <function name="PointParameterxv" offset="assign"> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfixed *"/> - </function> - - <function name="TexParameterxv" offset="assign"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfixed *" variable_param="pname"/> - </function> - - <!-- from GL_OES_matrix_get --> - <enum name="MODELVIEW_MATRIX_FLOAT_AS_INT_BITS_OES" value="0x898D"/> - <enum name="PROJECTION_MATRIX_FLOAT_AS_INT_BITS_OES" value="0x898E"/> - <enum name="TEXTURE_MATRIX_FLOAT_AS_INT_BITS_OES" value="0x898F"/> - - <!-- from GL_OES_single_precision --> - <function name="ClipPlanef" offset="assign"> - <param name="plane" type="GLenum"/> - <param name="equation" type="const GLfloat *" count="4"/> - </function> - - <function name="GetClipPlanef" offset="assign"> - <param name="plane" type="GLenum"/> - <param name="equation" type="GLfloat *" output="true" count="4"/> - </function> -</category> - -<xi:include href="es1_EXT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/> -<xi:include href="es1_COMPAT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/> - -</OpenGLAPI> diff --git a/src/mapi/glapi/gen-es/es1_COMPAT.xml b/src/mapi/glapi/gen-es/es1_COMPAT.xml deleted file mode 100644 index 096cea88d6f..00000000000 --- a/src/mapi/glapi/gen-es/es1_COMPAT.xml +++ /dev/null @@ -1,135 +0,0 @@ -<?xml version="1.0"?> -<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd"> - -<OpenGLAPI> - -<!-- This file defines the functions that are needed by Mesa. It - makes sure the generated glapi headers are compatible with Mesa. - It mainly consists of missing functions and aliases in OpenGL ES. ---> - -<xi:include href="es_COMPAT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/> - -<!-- except for those defined by es_COMPAT.xml, these are also needed --> -<category name="compat"> - <!-- OpenGL 1.0 --> - <function name="TexGenf" alias="TexGenfOES" static_dispatch="false"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx rop="117"/> - </function> - - <function name="TexGenfv" alias="TexGenfvOES" static_dispatch="false"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="118"/> - </function> - - <function name="TexGeni" alias="TexGeniOES" static_dispatch="false"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLint"/> - <glx rop="119"/> - </function> - - <function name="TexGeniv" alias="TexGenivOES" static_dispatch="false"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLint *" variable_param="pname"/> - <glx rop="120"/> - </function> - - <function name="GetTexGenfv" alias="GetTexGenfvOES" static_dispatch="false"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat *" output="true" variable_param="pname"/> - <glx sop="133"/> - </function> - - <function name="GetTexGeniv" alias="GetTexGenivOES" static_dispatch="false"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true" variable_param="pname"/> - <glx sop="134"/> - </function> - - <!-- OpenGL 1.2 --> - <function name="BlendColor" offset="336" static_dispatch="false"> - <param name="red" type="GLclampf"/> - <param name="green" type="GLclampf"/> - <param name="blue" type="GLclampf"/> - <param name="alpha" type="GLclampf"/> - <glx rop="4096"/> - </function> - - <function name="BlendEquation" alias="BlendEquationOES" static_dispatch="false"> - <param name="mode" type="GLenum"/> - <glx rop="4097"/> - </function> - - <function name="TexImage3D" offset="371" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="internalformat" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="depth" type="GLsizei"/> - <param name="border" type="GLint"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_format="format" img_type="type" img_target="target" img_null_flag="true" img_pad_dimensions="true"/> - <glx rop="4114" large="true"/> - </function> - - <function name="TexSubImage3D" offset="372" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="xoffset" type="GLint"/> - <param name="yoffset" type="GLint"/> - <param name="zoffset" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="depth" type="GLsizei"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="UNUSED" type="GLuint" padding="true"/> - <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_xoff="xoffset" img_yoff="yoffset" img_zoff="zoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/> - <glx rop="4115" large="true"/> - </function> - - <function name="CopyTexSubImage3D" offset="373" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="xoffset" type="GLint"/> - <param name="yoffset" type="GLint"/> - <param name="zoffset" type="GLint"/> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <glx rop="4123"/> - </function> - - <!-- GL_ARB_multitexture --> - <function name="ActiveTextureARB" alias="ActiveTexture" static_dispatch="false"> - <param name="texture" type="GLenum"/> - <glx rop="197"/> - </function> - - <function name="ClientActiveTextureARB" alias="ClientActiveTexture" static_dispatch="false"> - <param name="texture" type="GLenum"/> - <glx handcode="true"/> - </function> - - <function name="MultiTexCoord4fARB" alias="MultiTexCoord4f" vectorequiv="MultiTexCoord4fvARB" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="s" type="GLfloat"/> - <param name="t" type="GLfloat"/> - <param name="r" type="GLfloat"/> - <param name="q" type="GLfloat"/> - </function> -</category> - -</OpenGLAPI> diff --git a/src/mapi/glapi/gen-es/es1_EXT.xml b/src/mapi/glapi/gen-es/es1_EXT.xml deleted file mode 100644 index c1e86373d8b..00000000000 --- a/src/mapi/glapi/gen-es/es1_EXT.xml +++ /dev/null @@ -1,699 +0,0 @@ -<?xml version="1.0"?> -<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd"> - -<!-- OpenGL ES 1.x extensions --> - -<OpenGLAPI> - -<xi:include href="es_EXT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/> - -<!-- part of es1.1 extension pack --> -<category name="GL_OES_blend_equation_separate" number="1"> - <enum name="BLEND_EQUATION_RGB_OES" count="1" value="0x8009"> - <size name="Get" mode="get"/> - </enum> - <enum name="BLEND_EQUATION_ALPHA_OES" count="1" value="0x883D"> - <size name="Get" mode="get"/> - </enum> - - <function name="BlendEquationSeparateOES" offset="assign"> - <param name="modeRGB" type="GLenum"/> - <param name="modeA" type="GLenum"/> - <glx rop="4228"/> - </function> -</category> - -<!-- part of es1.1 extension pack --> -<category name="GL_OES_blend_func_separate" number="2"> - <enum name="BLEND_DST_RGB_OES" count="1" value="0x80C8"> - <size name="Get" mode="get"/> - </enum> - <enum name="BLEND_SRC_RGB_OES" count="1" value="0x80C9"> - <size name="Get" mode="get"/> - </enum> - <enum name="BLEND_DST_ALPHA_OES" count="1" value="0x80CA"> - <size name="Get" mode="get"/> - </enum> - <enum name="BLEND_SRC_ALPHA_OES" count="1" value="0x80CB"> - <size name="Get" mode="get"/> - </enum> - - <function name="BlendFuncSeparateOES" offset="assign"> - <param name="sfactorRGB" type="GLenum"/> - <param name="dfactorRGB" type="GLenum"/> - <param name="sfactorAlpha" type="GLenum"/> - <param name="dfactorAlpha" type="GLenum"/> - <glx rop="4134"/> - </function> -</category> - -<!-- part of es1.1 extension pack --> -<category name="GL_OES_blend_subtract" number="3"> - <enum name="FUNC_ADD_OES" value="0x8006"/> - <enum name="BLEND_EQUATION_OES" count="1" value="0x8009"> - <size name="Get" mode="get"/> - </enum> - <enum name="FUNC_SUBTRACT_OES" value="0x800A"/> - <enum name="FUNC_REVERSE_SUBTRACT_OES" value="0x800B"/> - - <function name="BlendEquationOES" offset="337"> - <param name="mode" type="GLenum"/> - <glx rop="4097"/> - </function> -</category> - -<!-- core addition to es1.0 and later --> -<category name="GL_OES_byte_coordinates" number="4"> - <enum name="BYTE" value="0x1400"/> -</category> - -<!-- optional for es1.1 --> -<category name="GL_OES_draw_texture" number="7"> - <enum name="TEXTURE_CROP_RECT_OES" value="0x8B9D"/> - - <function name="DrawTexiOES" offset="assign"> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="z" type="GLint"/> - <param name="width" type="GLint"/> - <param name="height" type="GLint"/> - </function> - - <function name="DrawTexivOES" offset="assign"> - <param name="coords" type="const GLint *" count="5"/> - </function> - - <function name="DrawTexfOES" offset="assign"> - <param name="x" type="GLfloat"/> - <param name="y" type="GLfloat"/> - <param name="z" type="GLfloat"/> - <param name="width" type="GLfloat"/> - <param name="height" type="GLfloat"/> - </function> - - <function name="DrawTexfvOES" offset="assign"> - <param name="coords" type="const GLfloat *" count="5"/> - </function> - - <function name="DrawTexsOES" offset="assign"> - <param name="x" type="GLshort"/> - <param name="y" type="GLshort"/> - <param name="z" type="GLshort"/> - <param name="width" type="GLshort"/> - <param name="height" type="GLshort"/> - </function> - - <function name="DrawTexsvOES" offset="assign"> - <param name="coords" type="const GLshort *" count="5"/> - </function> - - <function name="DrawTexxOES" offset="assign"> - <param name="x" type="GLfixed"/> - <param name="y" type="GLfixed"/> - <param name="z" type="GLfixed"/> - <param name="width" type="GLfixed"/> - <param name="height" type="GLfixed"/> - </function> - - <function name="DrawTexxvOES" offset="assign"> - <param name="coords" type="const GLfixed *" count="5"/> - </function> - - <!-- TexParameter{ifx}v is skipped here --> -</category> - -<!-- core addition to es1.0 and later --> -<category name="GL_OES_fixed_point" number="9"> - <enum name="FIXED_OES" value="0x140C"/> - - <!-- additon to es1.0 --> - <function name="AlphaFuncxOES" alias="AlphaFuncx"> - <param name="func" type="GLenum"/> - <param name="ref" type="GLclampx"/> - </function> - - <function name="ClearColorxOES" alias="ClearColorx"> - <param name="red" type="GLclampx"/> - <param name="green" type="GLclampx"/> - <param name="blue" type="GLclampx"/> - <param name="alpha" type="GLclampx"/> - </function> - - <function name="ClearDepthxOES" alias="ClearDepthx"> - <param name="depth" type="GLclampx"/> - </function> - - <function name="Color4xOES" alias="Color4x"> - <param name="red" type="GLfixed"/> - <param name="green" type="GLfixed"/> - <param name="blue" type="GLfixed"/> - <param name="alpha" type="GLfixed"/> - </function> - - <function name="DepthRangexOES" alias="DepthRangex"> - <param name="zNear" type="GLclampx"/> - <param name="zFar" type="GLclampx"/> - </function> - - <function name="FogxOES" alias="Fogx"> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfixed"/> - </function> - - <function name="FogxvOES" alias="Fogxv"> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfixed *" variable_param="pname"/> - </function> - - <function name="FrustumxOES" alias="Frustumx"> - <param name="left" type="GLfixed"/> - <param name="right" type="GLfixed"/> - <param name="bottom" type="GLfixed"/> - <param name="top" type="GLfixed"/> - <param name="zNear" type="GLfixed"/> - <param name="zFar" type="GLfixed"/> - </function> - - <function name="LightModelxOES" alias="LightModelx"> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfixed"/> - </function> - - <function name="LightModelxvOES" alias="LightModelxv"> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfixed *" variable_param="pname"/> - </function> - - <function name="LightxOES" alias="Lightx"> - <param name="light" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfixed"/> - </function> - - <function name="LightxvOES" alias="Lightxv"> - <param name="light" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfixed *" variable_param="pname"/> - </function> - - <function name="LineWidthxOES" alias="LineWidthx"> - <param name="width" type="GLfixed"/> - </function> - - <function name="LoadMatrixxOES" alias="LoadMatrixx"> - <param name="m" type="const GLfixed *" count="16"/> - </function> - - <function name="MaterialxOES" alias="Materialx"> - <param name="face" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfixed"/> - </function> - - <function name="MaterialxvOES" alias="Materialxv"> - <param name="face" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfixed *" variable_param="pname"/> - </function> - - <function name="MultiTexCoord4xOES" alias="MultiTexCoord4x"> - <param name="target" type="GLenum"/> - <param name="s" type="GLfixed"/> - <param name="t" type="GLfixed"/> - <param name="r" type="GLfixed"/> - <param name="q" type="GLfixed"/> - </function> - - <function name="MultMatrixxOES" alias="MultMatrixx"> - <param name="m" type="const GLfixed *" count="16"/> - </function> - - <function name="Normal3xOES" alias="Normal3x"> - <param name="nx" type="GLfixed"/> - <param name="ny" type="GLfixed"/> - <param name="nz" type="GLfixed"/> - </function> - - <function name="OrthoxOES" alias="Orthox"> - <param name="left" type="GLfixed"/> - <param name="right" type="GLfixed"/> - <param name="bottom" type="GLfixed"/> - <param name="top" type="GLfixed"/> - <param name="zNear" type="GLfixed"/> - <param name="zFar" type="GLfixed"/> - </function> - - <function name="PointSizexOES" alias="PointSizex"> - <param name="size" type="GLfixed"/> - </function> - - <function name="PolygonOffsetxOES" alias="PolygonOffsetx"> - <param name="factor" type="GLfixed"/> - <param name="units" type="GLfixed"/> - </function> - - <function name="RotatexOES" alias="Rotatex"> - <param name="angle" type="GLfixed"/> - <param name="x" type="GLfixed"/> - <param name="y" type="GLfixed"/> - <param name="z" type="GLfixed"/> - </function> - - <function name="SampleCoveragexOES" alias="SampleCoveragex"> - <param name="value" type="GLclampx"/> - <param name="invert" type="GLboolean"/> - </function> - - <function name="ScalexOES" alias="Scalex"> - <param name="x" type="GLfixed"/> - <param name="y" type="GLfixed"/> - <param name="z" type="GLfixed"/> - </function> - - <function name="TexEnvxOES" alias="TexEnvx"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfixed"/> - </function> - - <function name="TexEnvxvOES" alias="TexEnvxv"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfixed *" variable_param="pname"/> - </function> - - <function name="TexParameterxOES" alias="TexParameterx"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfixed"/> - </function> - - <function name="TranslatexOES" alias="Translatex"> - <param name="x" type="GLfixed"/> - <param name="y" type="GLfixed"/> - <param name="z" type="GLfixed"/> - </function> - - <!-- additon to es1.1 --> - <function name="ClipPlanexOES" alias="ClipPlanex"> - <param name="plane" type="GLenum"/> - <param name="equation" type="const GLfixed *" count="4"/> - </function> - - <function name="GetClipPlanexOES" alias="GetClipPlanex"> - <param name="plane" type="GLenum"/> - <param name="equation" type="GLfixed *" output="true" count="4"/> - </function> - - <function name="GetFixedvOES" alias="GetFixedv"> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfixed *" output="true" variable_param="pname"/> - </function> - - <function name="GetLightxvOES" alias="GetLightxv"> - <param name="light" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfixed *" output="true" variable_param="pname"/> - </function> - - <function name="GetMaterialxvOES" alias="GetMaterialxv"> - <param name="face" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfixed *" output="true" variable_param="pname"/> - </function> - - <function name="GetTexEnvxvOES" alias="GetTexEnvxv"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfixed *" output="true" variable_param="pname"/> - </function> - - <function name="GetTexParameterxvOES" alias="GetTexParameterxv"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfixed *" output="true" variable_param="pname"/> - </function> - - <function name="PointParameterxOES" alias="PointParameterx"> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfixed"/> - </function> - - <function name="PointParameterxvOES" alias="PointParameterxv"> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfixed *"/> - </function> - - <function name="TexParameterxvOES" alias="TexParameterxv"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfixed *" variable_param="pname"/> - </function> -</category> - -<!-- part of es1.1 extension pack --> -<category name="GL_OES_framebuffer_object" number="10"> - <enum name="NONE_OES" value="0"/> - <enum name="INVALID_FRAMEBUFFER_OPERATION_OES" value="0x0506"/> - <enum name="RGBA4_OES" value="0x8056"/> - <enum name="RGB5_A1_OES" value="0x8057"/> - <enum name="DEPTH_COMPONENT16_OES" value="0x81A5"/> - - <enum name="MAX_RENDERBUFFER_SIZE_OES" value="0x84E8"/> - <enum name="FRAMEBUFFER_BINDING_OES" value="0x8CA6"/> - <enum name="RENDERBUFFER_BINDING_OES" value="0x8CA7"/> - <enum name="FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE_OES" value="0x8CD0"/> - <enum name="FRAMEBUFFER_ATTACHMENT_OBJECT_NAME_OES" value="0x8CD1"/> - <enum name="FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL_OES" value="0x8CD2"/> - <enum name="FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE_OES" value="0x8CD3"/> - <enum name="FRAMEBUFFER_ATTACHMENT_TEXTURE_3D_ZOFFSET_OES" value="0x8CD4"/> - <enum name="FRAMEBUFFER_COMPLETE_OES" value="0x8CD5"/> - <enum name="FRAMEBUFFER_INCOMPLETE_ATTACHMENT_OES" value="0x8CD6"/> - <enum name="FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_OES" value="0x8CD7"/> - <enum name="FRAMEBUFFER_INCOMPLETE_DIMENSIONS_OES" value="0x8CD9"/> - <enum name="FRAMEBUFFER_INCOMPLETE_FORMATS_OES" value="0x8CDA"/> - <enum name="FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER_OES" value="0x8CDB"/> - <enum name="FRAMEBUFFER_INCOMPLETE_READ_BUFFER_OES" value="0x8CDC"/> - <enum name="FRAMEBUFFER_UNSUPPORTED_OES" value="0x8CDD"/> - <enum name="COLOR_ATTACHMENT0_OES" value="0x8CE0"/> - <enum name="DEPTH_ATTACHMENT_OES" value="0x8D00"/> - <enum name="STENCIL_ATTACHMENT_OES" value="0x8D20"/> - <enum name="FRAMEBUFFER_OES" value="0x8D40"/> - <enum name="RENDERBUFFER_OES" value="0x8D41"/> - <enum name="RENDERBUFFER_WIDTH_OES" value="0x8D42"/> - <enum name="RENDERBUFFER_HEIGHT_OES" value="0x8D43"/> - <enum name="RENDERBUFFER_INTERNAL_FORMAT_OES" value="0x8D44"/> - <enum name="STENCIL_INDEX1_OES" value="0x8D46"/> - <enum name="STENCIL_INDEX4_OES" value="0x8D47"/> - <enum name="STENCIL_INDEX8_OES" value="0x8D48"/> - <enum name="RENDERBUFFER_RED_SIZE_OES" value="0x8D50"/> - <enum name="RENDERBUFFER_GREEN_SIZE_OES" value="0x8D51"/> - <enum name="RENDERBUFFER_BLUE_SIZE_OES" value="0x8D52"/> - <enum name="RENDERBUFFER_ALPHA_SIZE_OES" value="0x8D53"/> - <enum name="RENDERBUFFER_DEPTH_SIZE_OES" value="0x8D54"/> - <enum name="RENDERBUFFER_STENCIL_SIZE_OES" value="0x8D55"/> - <enum name="RGB565_OES" value="0x8D62"/> - - <function name="BindFramebufferOES" offset="assign"> - <param name="target" type="GLenum"/> - <param name="framebuffer" type="GLuint"/> - </function> - - <function name="BindRenderbufferOES" offset="assign"> - <param name="target" type="GLenum"/> - <param name="renderbuffer" type="GLuint"/> - </function> - - <function name="CheckFramebufferStatusOES" offset="assign"> - <param name="target" type="GLenum"/> - <return type="GLenum"/> - </function> - - <function name="DeleteFramebuffersOES" offset="assign"> - <param name="n" type="GLsizei" counter="true"/> - <param name="framebuffers" type="const GLuint *" count="n"/> - </function> - - <function name="DeleteRenderbuffersOES" offset="assign"> - <param name="n" type="GLsizei" counter="true"/> - <param name="renderbuffers" type="const GLuint *" count="n"/> - </function> - - <function name="FramebufferRenderbufferOES" offset="assign"> - <param name="target" type="GLenum"/> - <param name="attachment" type="GLenum"/> - <param name="renderbuffertarget" type="GLenum"/> - <param name="renderbuffer" type="GLuint"/> - </function> - - <function name="FramebufferTexture2DOES" offset="assign"> - <param name="target" type="GLenum"/> - <param name="attachment" type="GLenum"/> - <param name="textarget" type="GLenum"/> - <param name="texture" type="GLuint"/> - <param name="level" type="GLint"/> - </function> - - <function name="GenerateMipmapOES" offset="assign"> - <param name="target" type="GLenum"/> - </function> - - <function name="GenFramebuffersOES" offset="assign"> - <param name="n" type="GLsizei" counter="true"/> - <param name="framebuffers" type="GLuint *" count="n" output="true"/> - </function> - - <function name="GenRenderbuffersOES" offset="assign"> - <param name="n" type="GLsizei" counter="true"/> - <param name="renderbuffers" type="GLuint *" count="n" output="true"/> - </function> - - <function name="GetFramebufferAttachmentParameterivOES" offset="assign"> - <param name="target" type="GLenum"/> - <param name="attachment" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true"/> - </function> - - <function name="GetRenderbufferParameterivOES" offset="assign"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true"/> - </function> - - <function name="IsFramebufferOES" offset="assign"> - <param name="framebuffer" type="GLuint"/> - <return type="GLboolean"/> - </function> - - <function name="IsRenderbufferOES" offset="assign"> - <param name="renderbuffer" type="GLuint"/> - <return type="GLboolean"/> - </function> - - <function name="RenderbufferStorageOES" offset="assign"> - <param name="target" type="GLenum"/> - <param name="internalformat" type="GLenum"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - </function> -</category> - -<!-- core addition to es1.1 --> -<category name="GL_OES_matrix_get" number="11"> - <enum name="MODELVIEW_MATRIX_FLOAT_AS_INT_BITS_OES" value="0x898D"/> - <enum name="PROJECTION_MATRIX_FLOAT_AS_INT_BITS_OES" value="0x898E"/> - <enum name="TEXTURE_MATRIX_FLOAT_AS_INT_BITS_OES" value="0x898F"/> -</category> - -<!-- optional for es1.1 --> -<category name="GL_OES_matrix_palette" number="12"> - <enum name="MAX_VERTEX_UNITS_OES" value="0x86A4"/> - <enum name="WEIGHT_ARRAY_TYPE_OES" value="0x86A9"/> - <enum name="WEIGHT_ARRAY_STRIDE_OES" value="0x86AA"/> - <enum name="WEIGHT_ARRAY_SIZE_OES" value="0x86AB"/> - <enum name="WEIGHT_ARRAY_POINTER_OES" value="0x86AC"/> - <enum name="WEIGHT_ARRAY_OES" value="0x86AD"/> - <enum name="MATRIX_PALETTE_OES" value="0x8840"/> - <enum name="MAX_PALETTE_MATRICES_OES" value="0x8842"/> - <enum name="CURRENT_PALETTE_MATRIX_OES" value="0x8843"/> - <enum name="MATRIX_INDEX_ARRAY_OES" value="0x8844"/> - <enum name="MATRIX_INDEX_ARRAY_SIZE_OES" value="0x8846"/> - <enum name="MATRIX_INDEX_ARRAY_TYPE_OES" value="0x8847"/> - <enum name="MATRIX_INDEX_ARRAY_STRIDE_OES" value="0x8848"/> - <enum name="MATRIX_INDEX_ARRAY_POINTER_OES" value="0x8849"/> - <enum name="WEIGHT_ARRAY_BUFFER_BINDING_OES" value="0x889E"/> - <enum name="MATRIX_INDEX_ARRAY_BUFFER_BINDING_OES" value="0x8B9E"/> - - <function name="CurrentPaletteMatrixOES"> - <param name="matrixpaletteindex" type="GLuint"/> - </function> - - <function name="LoadPaletteFromModelViewMatrixOES"> - </function> - - <function name="MatrixIndexPointerOES"> - <param name="size" type="GLint"/> - <param name="type" type="GLenum"/> - <param name="stride" type="GLsizei"/> - <param name="pointer" type="const GLvoid *"/> - </function> - - <function name="WeightPointerOES"> - <param name="size" type="GLint"/> - <param name="type" type="GLenum"/> - <param name="stride" type="GLsizei"/> - <param name="pointer" type="const GLvoid *"/> - </function> -</category> - -<!-- required for es1.1 --> -<category name="GL_OES_point_size_array" number="14"> - <enum name="POINT_SIZE_ARRAY_TYPE_OES" value="0x898A"/> - <enum name="POINT_SIZE_ARRAY_STRIDE_OES" value="0x898B"/> - <enum name="POINT_SIZE_ARRAY_POINTER_OES" value="0x898C"/> - <enum name="POINT_SIZE_ARRAY_OES" value="0x8B9C"/> - <enum name="POINT_SIZE_ARRAY_BUFFER_BINDING_OES" value="0x8B9F"/> - - <function name="PointSizePointerOES" offset="assign"> - <param name="type" type="GLenum"/> - <param name="stride" type="GLsizei"/> - <param name="pointer" type="const GLvoid *"/> - </function> -</category> - -<!-- required for es1.1 --> -<category name="GL_OES_point_sprite" number="15"> - <enum name="POINT_SPRITE_OES" value="0x8861"/> - <enum name="COORD_REPLACE_OES" value="0x8862"/> -</category> - -<!-- optional for es1.0 --> -<category name="GL_OES_query_matrix" number="16"> - <function name="QueryMatrixxOES" offset="assign"> - <param name="mantissa" type="GLfixed *" count="16" /> - <param name="exponent" type="GLint *" count="16" /> - <return type="GLbitfield"/> - </function> -</category> - -<!-- required for es1.0 and later --> -<category name="GL_OES_read_format" number="17"> - <enum name="IMPLEMENTATION_COLOR_READ_TYPE_OES" value="0x8B9A"/> - <enum name="IMPLEMENTATION_COLOR_READ_FORMAT_OES" value="0x8B9B"/> -</category> - -<!-- core addition to es1.0 and later --> -<category name="GL_OES_single_precision" number="18"> - <!-- additon to es1.0 --> - <function name="ClearDepthfOES" alias="ClearDepthf"> - <param name="depth" type="GLclampf"/> - </function> - - <function name="DepthRangefOES" alias="DepthRangef"> - <param name="zNear" type="GLclampf"/> - <param name="zFar" type="GLclampf"/> - </function> - - <function name="FrustumfOES" alias="Frustumf"> - <param name="left" type="GLfloat"/> - <param name="right" type="GLfloat"/> - <param name="bottom" type="GLfloat"/> - <param name="top" type="GLfloat"/> - <param name="zNear" type="GLfloat"/> - <param name="zFar" type="GLfloat"/> - </function> - - <function name="OrthofOES" alias="Orthof"> - <param name="left" type="GLfloat"/> - <param name="right" type="GLfloat"/> - <param name="bottom" type="GLfloat"/> - <param name="top" type="GLfloat"/> - <param name="zNear" type="GLfloat"/> - <param name="zFar" type="GLfloat"/> - </function> - - <!-- additon to es1.1 --> - <function name="ClipPlanefOES" alias="ClipPlanef"> - <param name="plane" type="GLenum"/> - <param name="equation" type="const GLfloat *" count="4"/> - </function> - - <function name="GetClipPlanefOES" alias="GetClipPlanef"> - <param name="plane" type="GLenum"/> - <param name="equation" type="GLfloat *" output="true" count="4"/> - </function> -</category> - -<!-- part of es1.1 extension pack --> -<category name="GL_OES_texture_cube_map" number="20"> - <enum name="TEXTURE_GEN_MODE_OES" value="0x2500"/> - <enum name="NORMAL_MAP_OES" value="0x8511"/> - <enum name="REFLECTION_MAP_OES" value="0x8512"/> - <enum name="TEXTURE_CUBE_MAP_OES" value="0x8513"/> - <enum name="TEXTURE_BINDING_CUBE_MAP_OES" value="0x8514"/> - <enum name="TEXTURE_CUBE_MAP_POSITIVE_X_OES" value="0x8515"/> - <enum name="TEXTURE_CUBE_MAP_NEGATIVE_X_OES" value="0x8516"/> - <enum name="TEXTURE_CUBE_MAP_POSITIVE_Y_OES" value="0x8517"/> - <enum name="TEXTURE_CUBE_MAP_NEGATIVE_Y_OES" value="0x8518"/> - <enum name="TEXTURE_CUBE_MAP_POSITIVE_Z_OES" value="0x8519"/> - <enum name="TEXTURE_CUBE_MAP_NEGATIVE_Z_OES" value="0x851A"/> - <enum name="MAX_CUBE_MAP_TEXTURE_SIZE_OES" value="0x851C"/> - <enum name="TEXTURE_GEN_STR_OES" value="0x8D60"/> - - <function name="GetTexGenfvOES" offset="279"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat *" output="true" variable_param="pname"/> - <glx sop="133"/> - </function> - - <function name="GetTexGenivOES" offset="280"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true" variable_param="pname"/> - <glx sop="134"/> - </function> - - <function name="GetTexGenxvOES" offset="assign"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfixed *" output="true" variable_param="pname"/> - </function> - - <function name="TexGenfOES" offset="190"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx rop="117"/> - </function> - - <function name="TexGenfvOES" offset="191"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="118"/> - </function> - - <function name="TexGeniOES" offset="192"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLint"/> - <glx rop="119"/> - </function> - - <function name="TexGenivOES" offset="193"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLint *" variable_param="pname"/> - <glx rop="120"/> - </function> - - <function name="TexGenxOES" offset="assign"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLint"/> - </function> - - <function name="TexGenxvOES" offset="assign"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfixed *" variable_param="pname"/> - </function> -</category> - -<category name="GL_OES_texture_env_crossbar" number="21"> - <!-- No new functions, types, enums. --> -</category> - -<category name="GL_OES_texture_mirrored_repeat" number="22"> - <!-- No new functions, types, enums. --> -</category> - -<category name="GL_EXT_texture_lod_bias" number="60"> - <enum name="TEXTURE_FILTER_CONTROL_EXT" value="0x8500"/> - <enum name="TEXTURE_LOD_BIAS_EXT" value="0x8501"/> - <enum name="MAX_TEXTURE_LOD_BIAS_EXT" value="0x84FD"/> -</category> - -</OpenGLAPI> diff --git a/src/mapi/glapi/gen-es/es2_API.xml b/src/mapi/glapi/gen-es/es2_API.xml deleted file mode 100644 index f8af63b94fe..00000000000 --- a/src/mapi/glapi/gen-es/es2_API.xml +++ /dev/null @@ -1,294 +0,0 @@ -<?xml version="1.0"?> -<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd"> - -<!-- OpenGL ES 2.x API --> - -<OpenGLAPI> - -<xi:include href="base2_API.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/> - -<!-- core subset of OpenGL 2.0 defined in OpenGL ES 2.0 --> -<category name="core2.0"> - <!-- addition to base1.0 --> - <enum name="NONE" value="0x0"/> - <enum name="INT" count="4" value="0x1404"> - <size name="CallLists"/> - </enum> - <enum name="UNSIGNED_INT" count="4" value="0x1405"> - <size name="CallLists"/> - </enum> - <enum name="STENCIL_INDEX" value="0x1901"/> - <enum name="DEPTH_COMPONENT" value="0x1902"/> - - <function name="TexImage2D" offset="183"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="internalformat" type="GLint"/> <!-- XXX the actual type is GLenum... --> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="border" type="GLint"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_format="format" img_type="type" img_target="target" img_send_null="true" img_pad_dimensions="true"/> - <glx rop="110" large="true"/> - </function> - - <!-- addition to base1.1 --> - <enum name="RGBA4" value="0x8056"/> - <enum name="RGB5_A1" value="0x8057"/> - - <!-- addition to base1.2 --> - <enum name="CONSTANT_COLOR" value="0x8001"/> - <enum name="ONE_MINUS_CONSTANT_COLOR" value="0x8002"/> - <enum name="CONSTANT_ALPHA" value="0x8003"/> - <enum name="ONE_MINUS_CONSTANT_ALPHA" value="0x8004"/> - <enum name="BLEND_COLOR" count="4" value="0x8005"> - <size name="Get" mode="get"/> - </enum> - <enum name="FUNC_ADD" value="0x8006"/> - <enum name="BLEND_EQUATION" count="1" value="0x8009"> - <size name="Get" mode="get"/> - </enum> - <enum name="FUNC_SUBTRACT" value="0x800A"/> - <enum name="FUNC_REVERSE_SUBTRACT" value="0x800B"/> - - <function name="BlendColor" offset="336"> - <param name="red" type="GLclampf"/> - <param name="green" type="GLclampf"/> - <param name="blue" type="GLclampf"/> - <param name="alpha" type="GLclampf"/> - <glx rop="4096"/> - </function> - - <function name="BlendEquation" offset="337"> - <param name="mode" type="GLenum"/> - <glx rop="4097"/> - </function> - - <!-- addition to base1.3 --> - <enum name="TEXTURE_CUBE_MAP" count="1" value="0x8513"> - <size name="Get" mode="get"/> - </enum> - <enum name="TEXTURE_BINDING_CUBE_MAP" count="1" value="0x8514"> - <size name="Get" mode="get"/> - </enum> - <enum name="TEXTURE_CUBE_MAP_POSITIVE_X" value="0x8515"/> - <enum name="TEXTURE_CUBE_MAP_NEGATIVE_X" value="0x8516"/> - <enum name="TEXTURE_CUBE_MAP_POSITIVE_Y" value="0x8517"/> - <enum name="TEXTURE_CUBE_MAP_NEGATIVE_Y" value="0x8518"/> - <enum name="TEXTURE_CUBE_MAP_POSITIVE_Z" value="0x8519"/> - <enum name="TEXTURE_CUBE_MAP_NEGATIVE_Z" value="0x851A"/> - <enum name="MAX_CUBE_MAP_TEXTURE_SIZE" count="1" value="0x851C"> - <size name="Get" mode="get"/> - </enum> - - <!-- addition to base1.4 --> - <enum name="BLEND_DST_RGB" count="1" value="0x80C8"> - <size name="Get" mode="get"/> - </enum> - <enum name="BLEND_SRC_RGB" count="1" value="0x80C9"> - <size name="Get" mode="get"/> - </enum> - <enum name="BLEND_DST_ALPHA" count="1" value="0x80CA"> - <size name="Get" mode="get"/> - </enum> - <enum name="BLEND_SRC_ALPHA" count="1" value="0x80CB"> - <size name="Get" mode="get"/> - </enum> - <enum name="DEPTH_COMPONENT16" value="0x81A5"/> - <enum name="MIRRORED_REPEAT" value="0x8370"/> - <enum name="INCR_WRAP" value="0x8507"/> - <enum name="DECR_WRAP" value="0x8508"/> - - <function name="BlendFuncSeparate" offset="assign"> - <param name="sfactorRGB" type="GLenum"/> - <param name="dfactorRGB" type="GLenum"/> - <param name="sfactorAlpha" type="GLenum"/> - <param name="dfactorAlpha" type="GLenum"/> - <glx rop="4134"/> - </function> - - <!-- addition to base1.5 --> - <enum name="VERTEX_ATTRIB_ARRAY_BUFFER_BINDING" count="1" value="0x889F"> - <size name="GetVertexAttribdv" mode="get"/> - <size name="GetVertexAttribfv" mode="get"/> - <size name="GetVertexAttribiv" mode="get"/> - </enum> - <enum name="STREAM_DRAW" value="0x88E0"/> - - <!-- addition to base2.0 --> - <!-- base2.0 should have everything defined --> -</category> - -<!-- OpenGL ES 2.0 --> -<category name="es2.0"> - <!-- addition to core2.0 --> - <enum name="LOW_FLOAT" value="0x8DF0"/> - <enum name="MEDIUM_FLOAT" value="0x8DF1"/> - <enum name="HIGH_FLOAT" value="0x8DF2"/> - <enum name="LOW_INT" value="0x8DF3"/> - <enum name="MEDIUM_INT" value="0x8DF4"/> - <enum name="HIGH_INT" value="0x8DF5"/> - <enum name="SHADER_BINARY_FORMATS" value="0x8DF8"/> - <enum name="NUM_SHADER_BINARY_FORMATS" value="0x8DF9"/> - <enum name="SHADER_COMPILER" value="0x8DFA"/> - <enum name="MAX_VERTEX_UNIFORM_VECTORS" value="0x8DFB"/> - <enum name="MAX_VARYING_VECTORS" value="0x8DFC"/> - <enum name="MAX_FRAGMENT_UNIFORM_VECTORS" value="0x8DFD"/> - - <function name="GetShaderPrecisionFormat" offset="assign"> - <param name="shadertype" type="GLenum"/> - <param name="precisiontype" type="GLenum"/> - <param name="range" type="GLint *"/> - <param name="precision" type="GLint *"/> - </function> - - <function name="ReleaseShaderCompiler" offset="assign"> - </function> - - <function name="ShaderBinary" offset="assign"> - <param name="n" type="GLsizei"/> - <param name="shaders" type="const GLuint *"/> - <param name="binaryformat" type="GLenum"/> - <param name="binary" type="const GLvoid *"/> - <param name="length" type="GLsizei"/> - </function> - - <!-- from GL_OES_fixed_point --> - <enum name="FIXED" value="0x140C"/> - <type name="fixed" size="4" /> - - <!-- from GL_OES_framebuffer_object --> - <enum name="INVALID_FRAMEBUFFER_OPERATION" value="0x0506"/> - <enum name="MAX_RENDERBUFFER_SIZE" value="0x84E8"/> - <enum name="FRAMEBUFFER_BINDING" value="0x8CA6"/> - <enum name="RENDERBUFFER_BINDING" value="0x8CA7"/> - <enum name="FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE" value="0x8CD0"/> - <enum name="FRAMEBUFFER_ATTACHMENT_OBJECT_NAME" value="0x8CD1"/> - <enum name="FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL" value="0x8CD2"/> - <enum name="FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE" value="0x8CD3"/> - <enum name="FRAMEBUFFER_COMPLETE" value="0x8CD5"/> - <enum name="FRAMEBUFFER_INCOMPLETE_ATTACHMENT" value="0x8CD6"/> - <enum name="FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT" value="0x8CD7"/> - <enum name="FRAMEBUFFER_INCOMPLETE_DIMENSIONS" value="0x8CD9"/> - <enum name="FRAMEBUFFER_UNSUPPORTED" value="0x8CDD"/> - <enum name="COLOR_ATTACHMENT0" value="0x8CE0"/> - <enum name="DEPTH_ATTACHMENT" value="0x8D00"/> - <enum name="STENCIL_ATTACHMENT" value="0x8D20"/> - <enum name="FRAMEBUFFER" value="0x8D40"/> - <enum name="RENDERBUFFER" value="0x8D41"/> - <enum name="RENDERBUFFER_WIDTH" value="0x8D42"/> - <enum name="RENDERBUFFER_HEIGHT" value="0x8D43"/> - <enum name="RENDERBUFFER_INTERNAL_FORMAT" value="0x8D44"/> - <enum name="STENCIL_INDEX8" value="0x8D48"/> - <enum name="RENDERBUFFER_RED_SIZE" value="0x8D50"/> - <enum name="RENDERBUFFER_GREEN_SIZE" value="0x8D51"/> - <enum name="RENDERBUFFER_BLUE_SIZE" value="0x8D52"/> - <enum name="RENDERBUFFER_ALPHA_SIZE" value="0x8D53"/> - <enum name="RENDERBUFFER_DEPTH_SIZE" value="0x8D54"/> - <enum name="RENDERBUFFER_STENCIL_SIZE" value="0x8D55"/> - <enum name="RGB565" value="0x8D62"/> - - <function name="BindFramebuffer" offset="assign"> - <param name="target" type="GLenum"/> - <param name="framebuffer" type="GLuint"/> - </function> - - <function name="BindRenderbuffer" offset="assign"> - <param name="target" type="GLenum"/> - <param name="renderbuffer" type="GLuint"/> - </function> - - <function name="CheckFramebufferStatus" offset="assign"> - <param name="target" type="GLenum"/> - <return type="GLenum"/> - </function> - - <function name="DeleteFramebuffers" offset="assign"> - <param name="n" type="GLsizei" counter="true"/> - <param name="framebuffers" type="const GLuint *" count="n"/> - </function> - - <function name="DeleteRenderbuffers" offset="assign"> - <param name="n" type="GLsizei" counter="true"/> - <param name="renderbuffers" type="const GLuint *" count="n"/> - </function> - - <function name="FramebufferRenderbuffer" offset="assign"> - <param name="target" type="GLenum"/> - <param name="attachment" type="GLenum"/> - <param name="renderbuffertarget" type="GLenum"/> - <param name="renderbuffer" type="GLuint"/> - </function> - - <function name="FramebufferTexture2D" offset="assign"> - <param name="target" type="GLenum"/> - <param name="attachment" type="GLenum"/> - <param name="textarget" type="GLenum"/> - <param name="texture" type="GLuint"/> - <param name="level" type="GLint"/> - </function> - - <function name="GenerateMipmap" offset="assign"> - <param name="target" type="GLenum"/> - </function> - - <function name="GenFramebuffers" offset="assign"> - <param name="n" type="GLsizei" counter="true"/> - <param name="framebuffers" type="GLuint *" count="n" output="true"/> - </function> - - <function name="GenRenderbuffers" offset="assign"> - <param name="n" type="GLsizei" counter="true"/> - <param name="renderbuffers" type="GLuint *" count="n" output="true"/> - </function> - - <function name="GetFramebufferAttachmentParameteriv" offset="assign"> - <param name="target" type="GLenum"/> - <param name="attachment" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true"/> - </function> - - <function name="GetRenderbufferParameteriv" offset="assign"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true"/> - </function> - - <function name="IsFramebuffer" offset="assign"> - <param name="framebuffer" type="GLuint"/> - <return type="GLboolean"/> - </function> - - <function name="IsRenderbuffer" offset="assign"> - <param name="renderbuffer" type="GLuint"/> - <return type="GLboolean"/> - </function> - - <function name="RenderbufferStorage" offset="assign"> - <param name="target" type="GLenum"/> - <param name="internalformat" type="GLenum"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - </function> - - <!-- from GL_OES_read_format --> - <enum name="IMPLEMENTATION_COLOR_READ_TYPE" value="0x8B9A"/> - <enum name="IMPLEMENTATION_COLOR_READ_FORMAT" value="0x8B9B"/> - - <!-- from GL_OES_single_precision --> - <function name="ClearDepthf" offset="assign"> - <param name="depth" type="GLclampf"/> - </function> - - <function name="DepthRangef" offset="assign"> - <param name="zNear" type="GLclampf"/> - <param name="zFar" type="GLclampf"/> - </function> -</category> - -<xi:include href="es2_EXT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/> -<xi:include href="es2_COMPAT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/> - -</OpenGLAPI> diff --git a/src/mapi/glapi/gen-es/es2_COMPAT.xml b/src/mapi/glapi/gen-es/es2_COMPAT.xml deleted file mode 100644 index 1bd3569635b..00000000000 --- a/src/mapi/glapi/gen-es/es2_COMPAT.xml +++ /dev/null @@ -1,368 +0,0 @@ -<?xml version="1.0"?> -<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd"> - -<OpenGLAPI> - -<!-- This file defines the functions that are needed by Mesa. It - makes sure the generated glapi headers are compatible with Mesa. - It mainly consists of missing functions and aliases in OpenGL ES. ---> - -<xi:include href="es_COMPAT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/> - -<!-- except for those defined by es_COMPAT.xml, these are also needed --> -<category name="compat"> - <!-- OpenGL 1.0 --> - <function name="Color4f" offset="29" vectorequiv="Color4fv" static_dispatch="false"> - <param name="red" type="GLfloat"/> - <param name="green" type="GLfloat"/> - <param name="blue" type="GLfloat"/> - <param name="alpha" type="GLfloat"/> - </function> - - <function name="Color4ub" offset="35" vectorequiv="Color4ubv" static_dispatch="false"> - <param name="red" type="GLubyte"/> - <param name="green" type="GLubyte"/> - <param name="blue" type="GLubyte"/> - <param name="alpha" type="GLubyte"/> - </function> - - <function name="Normal3f" offset="56" vectorequiv="Normal3fv" static_dispatch="false"> - <param name="nx" type="GLfloat"/> - <param name="ny" type="GLfloat"/> - <param name="nz" type="GLfloat"/> - </function> - - <function name="Fogf" offset="153" static_dispatch="false"> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx rop="80"/> - </function> - - <function name="Fogfv" offset="154" static_dispatch="false"> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="81"/> - </function> - - <function name="Lightf" offset="159" static_dispatch="false"> - <param name="light" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx rop="86"/> - </function> - - <function name="Lightfv" offset="160" static_dispatch="false"> - <param name="light" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="87"/> - </function> - - <function name="LightModelf" offset="163" static_dispatch="false"> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx rop="90"/> - </function> - - <function name="LightModelfv" offset="164" static_dispatch="false"> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="91"/> - </function> - - <function name="Materialf" offset="169" static_dispatch="false"> - <param name="face" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx rop="96"/> - </function> - - <function name="Materialfv" offset="170" static_dispatch="false"> - <param name="face" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="97"/> - </function> - - <function name="PointSize" offset="173" static_dispatch="false"> - <param name="size" type="GLfloat"/> - <glx rop="100"/> - </function> - - <function name="ShadeModel" offset="177" static_dispatch="false"> - <param name="mode" type="GLenum"/> - <glx rop="104"/> - </function> - - <function name="TexEnvf" offset="184" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx rop="111"/> - </function> - - <function name="TexEnvfv" offset="185" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="112"/> - </function> - - <function name="TexEnvi" offset="186" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLint"/> - <glx rop="113"/> - </function> - - <function name="TexEnviv" offset="187" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLint *" variable_param="pname"/> - <glx rop="114"/> - </function> - - <function name="TexGenf" offset="190" static_dispatch="false"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx rop="117"/> - </function> - - <function name="TexGenfv" offset="191" static_dispatch="false"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="118"/> - </function> - - <function name="TexGeni" offset="192" static_dispatch="false"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLint"/> - <glx rop="119"/> - </function> - - <function name="TexGeniv" offset="193" static_dispatch="false"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLint *" variable_param="pname"/> - <glx rop="120"/> - </function> - - <function name="AlphaFunc" offset="240" static_dispatch="false"> - <param name="func" type="GLenum"/> - <param name="ref" type="GLclampf"/> - <glx rop="159"/> - </function> - - <function name="LogicOp" offset="242" static_dispatch="false"> - <param name="opcode" type="GLenum"/> - <glx rop="161"/> - </function> - - <function name="GetLightfv" offset="264" static_dispatch="false"> - <param name="light" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat *" output="true" variable_param="pname"/> - <glx sop="118"/> - </function> - - <function name="GetMaterialfv" offset="269" static_dispatch="false"> - <param name="face" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat *" output="true" variable_param="pname"/> - <glx sop="123"/> - </function> - - <function name="GetTexEnvfv" offset="276" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat *" output="true" variable_param="pname"/> - <glx sop="130"/> - </function> - - <function name="GetTexEnviv" offset="277" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true" variable_param="pname"/> - <glx sop="131"/> - </function> - - <function name="GetTexGenfv" offset="279" static_dispatch="false"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat *" output="true" variable_param="pname"/> - <glx sop="133"/> - </function> - - <function name="GetTexGeniv" offset="280" static_dispatch="false"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true" variable_param="pname"/> - <glx sop="134"/> - </function> - - <function name="LoadIdentity" offset="290" static_dispatch="false"> - <glx rop="176"/> - </function> - - <function name="LoadMatrixf" offset="291" static_dispatch="false"> - <param name="m" type="const GLfloat *" count="16"/> - <glx rop="177"/> - </function> - - <function name="MatrixMode" offset="293" static_dispatch="false"> - <param name="mode" type="GLenum"/> - <glx rop="179"/> - </function> - - <function name="MultMatrixf" offset="294" static_dispatch="false"> - <param name="m" type="const GLfloat *" count="16"/> - <glx rop="180"/> - </function> - - <function name="PopMatrix" offset="297" static_dispatch="false"> - <glx rop="183"/> - </function> - - <function name="PushMatrix" offset="298" static_dispatch="false"> - <glx rop="184"/> - </function> - - <function name="Rotatef" offset="300" static_dispatch="false"> - <param name="angle" type="GLfloat"/> - <param name="x" type="GLfloat"/> - <param name="y" type="GLfloat"/> - <param name="z" type="GLfloat"/> - <glx rop="186"/> - </function> - - <function name="Scalef" offset="302" static_dispatch="false"> - <param name="x" type="GLfloat"/> - <param name="y" type="GLfloat"/> - <param name="z" type="GLfloat"/> - <glx rop="188"/> - </function> - - <function name="Translatef" offset="304" static_dispatch="false"> - <param name="x" type="GLfloat"/> - <param name="y" type="GLfloat"/> - <param name="z" type="GLfloat"/> - <glx rop="190"/> - </function> - - <!-- OpenGL 1.1 --> - <function name="ColorPointer" offset="308" static_dispatch="false"> - <param name="size" type="GLint"/> - <param name="type" type="GLenum"/> - <param name="stride" type="GLsizei"/> - <param name="pointer" type="const GLvoid *"/> - <glx handcode="true"/> - </function> - - <function name="DisableClientState" offset="309" static_dispatch="false"> - <param name="array" type="GLenum"/> - <glx handcode="true"/> - </function> - - <function name="EnableClientState" offset="313" static_dispatch="false"> - <param name="array" type="GLenum"/> - <glx handcode="true"/> - </function> - - <function name="NormalPointer" offset="318" static_dispatch="false"> - <param name="type" type="GLenum"/> - <param name="stride" type="GLsizei"/> - <param name="pointer" type="const GLvoid *"/> - <glx handcode="true"/> - </function> - - <function name="TexCoordPointer" offset="320" static_dispatch="false"> - <param name="size" type="GLint"/> - <param name="type" type="GLenum"/> - <param name="stride" type="GLsizei"/> - <param name="pointer" type="const GLvoid *"/> - <glx handcode="true"/> - </function> - - <function name="VertexPointer" offset="321" static_dispatch="false"> - <param name="size" type="GLint"/> - <param name="type" type="GLenum"/> - <param name="stride" type="GLsizei"/> - <param name="pointer" type="const GLvoid *"/> - <glx handcode="true"/> - </function> - - <function name="GetPointerv" offset="329" static_dispatch="false"> - <param name="pname" type="GLenum"/> - <param name="params" type="GLvoid **" output="true"/> - <glx handcode="true"/> - </function> - - <!-- OpenGL 1.2 --> - <function name="TexImage3D" alias="TexImage3DOES" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="internalformat" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="depth" type="GLsizei"/> - <param name="border" type="GLint"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_format="format" img_type="type" img_target="target" img_null_flag="true" img_pad_dimensions="true"/> - <glx rop="4114" large="true"/> - </function> - - <function name="TexSubImage3D" alias="TexSubImage3DOES" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="xoffset" type="GLint"/> - <param name="yoffset" type="GLint"/> - <param name="zoffset" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="depth" type="GLsizei"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="UNUSED" type="GLuint" padding="true"/> - <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_xoff="xoffset" img_yoff="yoffset" img_zoff="zoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/> - <glx rop="4115" large="true"/> - </function> - - <function name="CopyTexSubImage3D" alias="CopyTexSubImage3DOES" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="xoffset" type="GLint"/> - <param name="yoffset" type="GLint"/> - <param name="zoffset" type="GLint"/> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <glx rop="4123"/> - </function> - - <!-- GL_ARB_multitexture --> - <function name="ActiveTextureARB" alias="ActiveTexture" static_dispatch="false"> - <param name="texture" type="GLenum"/> - <glx rop="197"/> - </function> - - <function name="ClientActiveTextureARB" offset="375" static_dispatch="false"> - <param name="texture" type="GLenum"/> - <glx handcode="true"/> - </function> - - <function name="MultiTexCoord4fARB" offset="402" vectorequiv="MultiTexCoord4fvARB" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="s" type="GLfloat"/> - <param name="t" type="GLfloat"/> - <param name="r" type="GLfloat"/> - <param name="q" type="GLfloat"/> - </function> -</category> - -</OpenGLAPI> diff --git a/src/mapi/glapi/gen-es/es2_EXT.xml b/src/mapi/glapi/gen-es/es2_EXT.xml deleted file mode 100644 index 4a67952e5c3..00000000000 --- a/src/mapi/glapi/gen-es/es2_EXT.xml +++ /dev/null @@ -1,162 +0,0 @@ -<?xml version="1.0"?> -<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd"> - -<!-- OpenGL ES 2.x extensions --> - -<OpenGLAPI> - -<xi:include href="es_EXT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/> - -<category name="GL_OES_texture_3D" number="34"> - <enum name="TEXTURE_BINDING_3D_OES" value="0x806A"/> - <enum name="TEXTURE_3D_OES" value="0x806F"/> - <enum name="TEXTURE_WRAP_R_OES" value="0x8072"/> - <enum name="MAX_3D_TEXTURE_SIZE_OES" value="0x8073"/> - <enum name="SAMPLER_3D_OES" value="0x8B5F"/> - <enum name="FRAMEBUFFER_ATTACHMENT_TEXTURE_3D_ZOFFSET_OES" value="0x8CD4"/> - - <function name="CompressedTexImage3DOES" offset="assign"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="internalformat" type="GLenum"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="depth" type="GLsizei"/> - <param name="border" type="GLint"/> - <param name="imageSize" type="GLsizei" counter="true"/> - <param name="data" type="const GLvoid *" count="imageSize"/> - <glx rop="216" handcode="client"/> - </function> - - <function name="CompressedTexSubImage3DOES" offset="assign"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="xoffset" type="GLint"/> - <param name="yoffset" type="GLint"/> - <param name="zoffset" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="depth" type="GLsizei"/> - <param name="format" type="GLenum"/> - <param name="imageSize" type="GLsizei" counter="true"/> - <param name="data" type="const GLvoid *" count="imageSize"/> - <glx rop="219" handcode="client"/> - </function> - - <function name="CopyTexSubImage3DOES" offset="373"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="xoffset" type="GLint"/> - <param name="yoffset" type="GLint"/> - <param name="zoffset" type="GLint"/> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <glx rop="4123"/> - </function> - - <function name="FramebufferTexture3DOES" offset="assign"> - <param name="target" type="GLenum"/> - <param name="attachment" type="GLenum"/> - <param name="textarget" type="GLenum"/> - <param name="texture" type="GLuint"/> - <param name="level" type="GLint"/> - <param name="zoffset" type="GLint"/> - <glx rop="4323"/> - </function> - - <function name="TexImage3DOES" offset="371"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="internalformat" type="GLenum"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="depth" type="GLsizei"/> - <param name="border" type="GLint"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_format="format" img_type="type" img_target="target" img_null_flag="true" img_pad_dimensions="true"/> - <glx rop="4114" large="true"/> - </function> - - <function name="TexSubImage3DOES" offset="372"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="xoffset" type="GLint"/> - <param name="yoffset" type="GLint"/> - <param name="zoffset" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="depth" type="GLsizei"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="UNUSED" type="GLuint" padding="true"/> - <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_xoff="xoffset" img_yoff="yoffset" img_zoff="zoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/> - <glx rop="4115" large="true"/> - </function> -</category> - -<!-- the other name is OES_texture_float_linear --> -<category name="OES_texture_half_float_linear" number="35"> - <!-- No new functions, types, enums. --> -</category> - -<!-- the other name is OES_texture_float --> -<category name="OES_texture_half_float" number="36"> - <enum name="HALF_FLOAT_OES" value="0x8D61"/> -</category> - -<category name="GL_OES_texture_npot" number="37"> - <!-- No new functions, types, enums. --> -</category> - -<category name="GL_OES_vertex_half_float" number="38"> - <enum name="HALF_FLOAT_OES" value="0x8D61"/> -</category> - -<category name="GL_EXT_texture_type_2_10_10_10_REV" number="42"> - <enum name="UNSIGNED_INT_2_10_10_10_REV_EXT" value="0x8368"/> -</category> - -<category name="GL_OES_packed_depth_stencil" number="43"> - <enum name="DEPTH_STENCIL_OES" value="0x84F9"/> - <enum name="UNSIGNED_INT_24_8_OES" value="0x84FA"/> - <enum name="DEPTH24_STENCIL8_OES" value="0x88F0"/> -</category> - -<category name="GL_OES_depth_texture" number="44"> - <!-- No new functions, types, enums. --> -</category> - -<category name="GL_OES_standard_derivatives" number="45"> - <enum name="FRAGMENT_SHADER_DERIVATIVE_HINT_OES" value="0x8B8B"/> -</category> - -<category name="GL_OES_vertex_type_10_10_10_2" number="46"> - <enum name="UNSIGNED_INT_10_10_10_2_OES" value="0x8DF6"/> - <enum name="INT_10_10_10_2_OES" value="0x8DF7"/> -</category> - -<category name="GL_OES_get_program_binary" number="47"> - <enum name="PROGRAM_BINARY_LENGTH_OES" value="0x8741"/> - <enum name="NUM_PROGRAM_BINARY_FORMATS_OES" value="0x87FE"/> - <enum name="PROGRAM_BINARY_FORMATS_OES" value="0x87FF"/> - - <function name="GetProgramBinaryOES" offset="assign"> - <param name="program" type="GLuint"/> - <param name="bufSize" type="GLsizei"/> - <param name="length" type="GLsizei *"/> - <param name="binaryFormat" type="GLenum *"/> - <param name="binary" type="GLvoid *"/> - </function> - - <function name="ProgramBinaryOES" offset="assign"> - <param name="program" type="GLuint"/> - <param name="binaryFormat" type="GLenum"/> - <param name="binary" type="const GLvoid *"/> - <param name="length" type="GLint"/> - </function> -</category> - -</OpenGLAPI> diff --git a/src/mapi/glapi/gen-es/es_COMPAT.xml b/src/mapi/glapi/gen-es/es_COMPAT.xml deleted file mode 100644 index 7c729261105..00000000000 --- a/src/mapi/glapi/gen-es/es_COMPAT.xml +++ /dev/null @@ -1,2646 +0,0 @@ -<?xml version="1.0"?> -<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd"> - -<OpenGLAPI> - -<!-- This file defines the following categories - - a subset of 1.0 - a subset of 1.1 - a subset of 1.2 - a subset of GL_ARB_multitexture - GL_APPLE_vertex_array_object - - to make sure the generated glapi headers are compatible with Mesa. - It is included by es1_COMPAT.xml and es2_COMPAT.xml. ---> - -<category name="1.0"> - <type name="double" size="8" float="true" glx_name="FLOAT64"/> - <type name="clampd" size="8" float="true" glx_name="FLOAT64"/> - - <type name="float" size="4" float="true" glx_name="FLOAT32"/> - <type name="clampf" size="4" float="true" glx_name="FLOAT32"/> - - <type name="int" size="4" glx_name="CARD32"/> - <type name="uint" size="4" unsigned="true" glx_name="CARD32"/> - <type name="sizei" size="4" unsigned="true" glx_name="CARD32"/> - <type name="enum" size="4" unsigned="true" glx_name="ENUM"/> - <type name="bitfield" size="4" unsigned="true" glx_name="CARD32"/> - - <type name="short" size="2" glx_name="CARD16"/> - <type name="ushort" size="2" unsigned="true" glx_name="CARD16"/> - - <type name="byte" size="1" glx_name="CARD8"/> - <type name="ubyte" size="1" unsigned="true" glx_name="CARD8"/> - <type name="boolean" size="1" unsigned="true" glx_name="CARD8"/> - - <type name="void" size="1"/> - - <function name="NewList" offset="0" static_dispatch="false"> - <param name="list" type="GLuint"/> - <param name="mode" type="GLenum"/> - <glx sop="101"/> - </function> - - <function name="EndList" offset="1" static_dispatch="false"> - <glx sop="102"/> - </function> - - <function name="CallList" offset="2" static_dispatch="false"> - <param name="list" type="GLuint"/> - <glx rop="1"/> - </function> - - <function name="CallLists" offset="3" static_dispatch="false"> - <param name="n" type="GLsizei" counter="true"/> - <param name="type" type="GLenum"/> - <param name="lists" type="const GLvoid *" variable_param="type" count="n"/> - <glx rop="2" large="true"/> - </function> - - <function name="DeleteLists" offset="4" static_dispatch="false"> - <param name="list" type="GLuint"/> - <param name="range" type="GLsizei"/> - <glx sop="103"/> - </function> - - <function name="GenLists" offset="5" static_dispatch="false"> - <param name="range" type="GLsizei"/> - <return type="GLuint"/> - <glx sop="104"/> - </function> - - <function name="ListBase" offset="6" static_dispatch="false"> - <param name="base" type="GLuint"/> - <glx rop="3"/> - </function> - - <function name="Begin" offset="7" static_dispatch="false"> - <param name="mode" type="GLenum"/> - <glx rop="4"/> - </function> - - <function name="Bitmap" offset="8" static_dispatch="false"> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="xorig" type="GLfloat"/> - <param name="yorig" type="GLfloat"/> - <param name="xmove" type="GLfloat"/> - <param name="ymove" type="GLfloat"/> - <param name="bitmap" type="const GLubyte *" img_width="width" img_height="height" img_format="GL_COLOR_INDEX" img_type="GL_BITMAP" img_target="0" img_pad_dimensions="false"/> - <glx rop="5" large="true"/> - </function> - - <function name="Color3b" offset="9" vectorequiv="Color3bv" static_dispatch="false"> - <param name="red" type="GLbyte"/> - <param name="green" type="GLbyte"/> - <param name="blue" type="GLbyte"/> - </function> - - <function name="Color3bv" offset="10" static_dispatch="false"> - <param name="v" type="const GLbyte *" count="3"/> - <glx rop="6"/> - </function> - - <function name="Color3d" offset="11" vectorequiv="Color3dv" static_dispatch="false"> - <param name="red" type="GLdouble"/> - <param name="green" type="GLdouble"/> - <param name="blue" type="GLdouble"/> - </function> - - <function name="Color3dv" offset="12" static_dispatch="false"> - <param name="v" type="const GLdouble *" count="3"/> - <glx rop="7"/> - </function> - - <function name="Color3f" offset="13" vectorequiv="Color3fv" static_dispatch="false"> - <param name="red" type="GLfloat"/> - <param name="green" type="GLfloat"/> - <param name="blue" type="GLfloat"/> - </function> - - <function name="Color3fv" offset="14" static_dispatch="false"> - <param name="v" type="const GLfloat *" count="3"/> - <glx rop="8"/> - </function> - - <function name="Color3i" offset="15" vectorequiv="Color3iv" static_dispatch="false"> - <param name="red" type="GLint"/> - <param name="green" type="GLint"/> - <param name="blue" type="GLint"/> - </function> - - <function name="Color3iv" offset="16" static_dispatch="false"> - <param name="v" type="const GLint *" count="3"/> - <glx rop="9"/> - </function> - - <function name="Color3s" offset="17" vectorequiv="Color3sv" static_dispatch="false"> - <param name="red" type="GLshort"/> - <param name="green" type="GLshort"/> - <param name="blue" type="GLshort"/> - </function> - - <function name="Color3sv" offset="18" static_dispatch="false"> - <param name="v" type="const GLshort *" count="3"/> - <glx rop="10"/> - </function> - - <function name="Color3ub" offset="19" vectorequiv="Color3ubv" static_dispatch="false"> - <param name="red" type="GLubyte"/> - <param name="green" type="GLubyte"/> - <param name="blue" type="GLubyte"/> - </function> - - <function name="Color3ubv" offset="20" static_dispatch="false"> - <param name="v" type="const GLubyte *" count="3"/> - <glx rop="11"/> - </function> - - <function name="Color3ui" offset="21" vectorequiv="Color3uiv" static_dispatch="false"> - <param name="red" type="GLuint"/> - <param name="green" type="GLuint"/> - <param name="blue" type="GLuint"/> - </function> - - <function name="Color3uiv" offset="22" static_dispatch="false"> - <param name="v" type="const GLuint *" count="3"/> - <glx rop="12"/> - </function> - - <function name="Color3us" offset="23" vectorequiv="Color3usv" static_dispatch="false"> - <param name="red" type="GLushort"/> - <param name="green" type="GLushort"/> - <param name="blue" type="GLushort"/> - </function> - - <function name="Color3usv" offset="24" static_dispatch="false"> - <param name="v" type="const GLushort *" count="3"/> - <glx rop="13"/> - </function> - - <function name="Color4b" offset="25" vectorequiv="Color4bv" static_dispatch="false"> - <param name="red" type="GLbyte"/> - <param name="green" type="GLbyte"/> - <param name="blue" type="GLbyte"/> - <param name="alpha" type="GLbyte"/> - </function> - - <function name="Color4bv" offset="26" static_dispatch="false"> - <param name="v" type="const GLbyte *" count="4"/> - <glx rop="14"/> - </function> - - <function name="Color4d" offset="27" vectorequiv="Color4dv" static_dispatch="false"> - <param name="red" type="GLdouble"/> - <param name="green" type="GLdouble"/> - <param name="blue" type="GLdouble"/> - <param name="alpha" type="GLdouble"/> - </function> - - <function name="Color4dv" offset="28" static_dispatch="false"> - <param name="v" type="const GLdouble *" count="4"/> - <glx rop="15"/> - </function> - - <!--function name="Color4f" offset="29" vectorequiv="Color4fv" static_dispatch="false"> - <param name="red" type="GLfloat"/> - <param name="green" type="GLfloat"/> - <param name="blue" type="GLfloat"/> - <param name="alpha" type="GLfloat"/> - </function--> - - <function name="Color4fv" offset="30" static_dispatch="false"> - <param name="v" type="const GLfloat *" count="4"/> - <glx rop="16"/> - </function> - - <function name="Color4i" offset="31" vectorequiv="Color4iv" static_dispatch="false"> - <param name="red" type="GLint"/> - <param name="green" type="GLint"/> - <param name="blue" type="GLint"/> - <param name="alpha" type="GLint"/> - </function> - - <function name="Color4iv" offset="32" static_dispatch="false"> - <param name="v" type="const GLint *" count="4"/> - <glx rop="17"/> - </function> - - <function name="Color4s" offset="33" vectorequiv="Color4sv" static_dispatch="false"> - <param name="red" type="GLshort"/> - <param name="green" type="GLshort"/> - <param name="blue" type="GLshort"/> - <param name="alpha" type="GLshort"/> - </function> - - <function name="Color4sv" offset="34" static_dispatch="false"> - <param name="v" type="const GLshort *" count="4"/> - <glx rop="18"/> - </function> - - <!--function name="Color4ub" offset="35" vectorequiv="Color4ubv" static_dispatch="false"> - <param name="red" type="GLubyte"/> - <param name="green" type="GLubyte"/> - <param name="blue" type="GLubyte"/> - <param name="alpha" type="GLubyte"/> - </function--> - - <function name="Color4ubv" offset="36" static_dispatch="false"> - <param name="v" type="const GLubyte *" count="4"/> - <glx rop="19"/> - </function> - - <function name="Color4ui" offset="37" vectorequiv="Color4uiv" static_dispatch="false"> - <param name="red" type="GLuint"/> - <param name="green" type="GLuint"/> - <param name="blue" type="GLuint"/> - <param name="alpha" type="GLuint"/> - </function> - - <function name="Color4uiv" offset="38" static_dispatch="false"> - <param name="v" type="const GLuint *" count="4"/> - <glx rop="20"/> - </function> - - <function name="Color4us" offset="39" vectorequiv="Color4usv" static_dispatch="false"> - <param name="red" type="GLushort"/> - <param name="green" type="GLushort"/> - <param name="blue" type="GLushort"/> - <param name="alpha" type="GLushort"/> - </function> - - <function name="Color4usv" offset="40" static_dispatch="false"> - <param name="v" type="const GLushort *" count="4"/> - <glx rop="21"/> - </function> - - <function name="EdgeFlag" offset="41" vectorequiv="EdgeFlagv" static_dispatch="false"> - <param name="flag" type="GLboolean"/> - </function> - - <function name="EdgeFlagv" offset="42" static_dispatch="false"> - <param name="flag" type="const GLboolean *" count="1"/> - <glx rop="22"/> - </function> - - <function name="End" offset="43" static_dispatch="false"> - <glx rop="23"/> - </function> - - <function name="Indexd" offset="44" vectorequiv="Indexdv" static_dispatch="false"> - <param name="c" type="GLdouble"/> - </function> - - <function name="Indexdv" offset="45" static_dispatch="false"> - <param name="c" type="const GLdouble *" count="1"/> - <glx rop="24"/> - </function> - - <function name="Indexf" offset="46" vectorequiv="Indexfv" static_dispatch="false"> - <param name="c" type="GLfloat"/> - </function> - - <function name="Indexfv" offset="47" static_dispatch="false"> - <param name="c" type="const GLfloat *" count="1"/> - <glx rop="25"/> - </function> - - <function name="Indexi" offset="48" vectorequiv="Indexiv" static_dispatch="false"> - <param name="c" type="GLint"/> - </function> - - <function name="Indexiv" offset="49" static_dispatch="false"> - <param name="c" type="const GLint *" count="1"/> - <glx rop="26"/> - </function> - - <function name="Indexs" offset="50" vectorequiv="Indexsv" static_dispatch="false"> - <param name="c" type="GLshort"/> - </function> - - <function name="Indexsv" offset="51" static_dispatch="false"> - <param name="c" type="const GLshort *" count="1"/> - <glx rop="27"/> - </function> - - <function name="Normal3b" offset="52" vectorequiv="Normal3bv" static_dispatch="false"> - <param name="nx" type="GLbyte"/> - <param name="ny" type="GLbyte"/> - <param name="nz" type="GLbyte"/> - </function> - - <function name="Normal3bv" offset="53" static_dispatch="false"> - <param name="v" type="const GLbyte *" count="3"/> - <glx rop="28"/> - </function> - - <function name="Normal3d" offset="54" vectorequiv="Normal3dv" static_dispatch="false"> - <param name="nx" type="GLdouble"/> - <param name="ny" type="GLdouble"/> - <param name="nz" type="GLdouble"/> - </function> - - <function name="Normal3dv" offset="55" static_dispatch="false"> - <param name="v" type="const GLdouble *" count="3"/> - <glx rop="29"/> - </function> - - <!--function name="Normal3f" offset="56" vectorequiv="Normal3fv" static_dispatch="false"> - <param name="nx" type="GLfloat"/> - <param name="ny" type="GLfloat"/> - <param name="nz" type="GLfloat"/> - </function--> - - <function name="Normal3fv" offset="57" static_dispatch="false"> - <param name="v" type="const GLfloat *" count="3"/> - <glx rop="30"/> - </function> - - <function name="Normal3i" offset="58" vectorequiv="Normal3iv" static_dispatch="false"> - <param name="nx" type="GLint"/> - <param name="ny" type="GLint"/> - <param name="nz" type="GLint"/> - </function> - - <function name="Normal3iv" offset="59" static_dispatch="false"> - <param name="v" type="const GLint *" count="3"/> - <glx rop="31"/> - </function> - - <function name="Normal3s" offset="60" vectorequiv="Normal3sv" static_dispatch="false"> - <param name="nx" type="GLshort"/> - <param name="ny" type="GLshort"/> - <param name="nz" type="GLshort"/> - </function> - - <function name="Normal3sv" offset="61" static_dispatch="false"> - <param name="v" type="const GLshort *" count="3"/> - <glx rop="32"/> - </function> - - <function name="RasterPos2d" offset="62" vectorequiv="RasterPos2dv" static_dispatch="false"> - <param name="x" type="GLdouble"/> - <param name="y" type="GLdouble"/> - </function> - - <function name="RasterPos2dv" offset="63" static_dispatch="false"> - <param name="v" type="const GLdouble *" count="2"/> - <glx rop="33"/> - </function> - - <function name="RasterPos2f" offset="64" vectorequiv="RasterPos2fv" static_dispatch="false"> - <param name="x" type="GLfloat"/> - <param name="y" type="GLfloat"/> - </function> - - <function name="RasterPos2fv" offset="65" static_dispatch="false"> - <param name="v" type="const GLfloat *" count="2"/> - <glx rop="34"/> - </function> - - <function name="RasterPos2i" offset="66" vectorequiv="RasterPos2iv" static_dispatch="false"> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - </function> - - <function name="RasterPos2iv" offset="67" static_dispatch="false"> - <param name="v" type="const GLint *" count="2"/> - <glx rop="35"/> - </function> - - <function name="RasterPos2s" offset="68" vectorequiv="RasterPos2sv" static_dispatch="false"> - <param name="x" type="GLshort"/> - <param name="y" type="GLshort"/> - </function> - - <function name="RasterPos2sv" offset="69" static_dispatch="false"> - <param name="v" type="const GLshort *" count="2"/> - <glx rop="36"/> - </function> - - <function name="RasterPos3d" offset="70" vectorequiv="RasterPos3dv" static_dispatch="false"> - <param name="x" type="GLdouble"/> - <param name="y" type="GLdouble"/> - <param name="z" type="GLdouble"/> - </function> - - <function name="RasterPos3dv" offset="71" static_dispatch="false"> - <param name="v" type="const GLdouble *" count="3"/> - <glx rop="37"/> - </function> - - <function name="RasterPos3f" offset="72" vectorequiv="RasterPos3fv" static_dispatch="false"> - <param name="x" type="GLfloat"/> - <param name="y" type="GLfloat"/> - <param name="z" type="GLfloat"/> - </function> - - <function name="RasterPos3fv" offset="73" static_dispatch="false"> - <param name="v" type="const GLfloat *" count="3"/> - <glx rop="38"/> - </function> - - <function name="RasterPos3i" offset="74" vectorequiv="RasterPos3iv" static_dispatch="false"> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="z" type="GLint"/> - </function> - - <function name="RasterPos3iv" offset="75" static_dispatch="false"> - <param name="v" type="const GLint *" count="3"/> - <glx rop="39"/> - </function> - - <function name="RasterPos3s" offset="76" vectorequiv="RasterPos3sv" static_dispatch="false"> - <param name="x" type="GLshort"/> - <param name="y" type="GLshort"/> - <param name="z" type="GLshort"/> - </function> - - <function name="RasterPos3sv" offset="77" static_dispatch="false"> - <param name="v" type="const GLshort *" count="3"/> - <glx rop="40"/> - </function> - - <function name="RasterPos4d" offset="78" vectorequiv="RasterPos4dv" static_dispatch="false"> - <param name="x" type="GLdouble"/> - <param name="y" type="GLdouble"/> - <param name="z" type="GLdouble"/> - <param name="w" type="GLdouble"/> - </function> - - <function name="RasterPos4dv" offset="79" static_dispatch="false"> - <param name="v" type="const GLdouble *" count="4"/> - <glx rop="41"/> - </function> - - <function name="RasterPos4f" offset="80" vectorequiv="RasterPos4fv" static_dispatch="false"> - <param name="x" type="GLfloat"/> - <param name="y" type="GLfloat"/> - <param name="z" type="GLfloat"/> - <param name="w" type="GLfloat"/> - </function> - - <function name="RasterPos4fv" offset="81" static_dispatch="false"> - <param name="v" type="const GLfloat *" count="4"/> - <glx rop="42"/> - </function> - - <function name="RasterPos4i" offset="82" vectorequiv="RasterPos4iv" static_dispatch="false"> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="z" type="GLint"/> - <param name="w" type="GLint"/> - </function> - - <function name="RasterPos4iv" offset="83" static_dispatch="false"> - <param name="v" type="const GLint *" count="4"/> - <glx rop="43"/> - </function> - - <function name="RasterPos4s" offset="84" vectorequiv="RasterPos4sv" static_dispatch="false"> - <param name="x" type="GLshort"/> - <param name="y" type="GLshort"/> - <param name="z" type="GLshort"/> - <param name="w" type="GLshort"/> - </function> - - <function name="RasterPos4sv" offset="85" static_dispatch="false"> - <param name="v" type="const GLshort *" count="4"/> - <glx rop="44"/> - </function> - - <function name="Rectd" offset="86" vectorequiv="Rectdv" static_dispatch="false"> - <param name="x1" type="GLdouble"/> - <param name="y1" type="GLdouble"/> - <param name="x2" type="GLdouble"/> - <param name="y2" type="GLdouble"/> - </function> - - <function name="Rectdv" offset="87" static_dispatch="false"> - <param name="v1" type="const GLdouble *" count="2"/> - <param name="v2" type="const GLdouble *" count="2"/> - <glx rop="45"/> - </function> - - <function name="Rectf" offset="88" vectorequiv="Rectfv" static_dispatch="false"> - <param name="x1" type="GLfloat"/> - <param name="y1" type="GLfloat"/> - <param name="x2" type="GLfloat"/> - <param name="y2" type="GLfloat"/> - </function> - - <function name="Rectfv" offset="89" static_dispatch="false"> - <param name="v1" type="const GLfloat *" count="2"/> - <param name="v2" type="const GLfloat *" count="2"/> - <glx rop="46"/> - </function> - - <function name="Recti" offset="90" vectorequiv="Rectiv" static_dispatch="false"> - <param name="x1" type="GLint"/> - <param name="y1" type="GLint"/> - <param name="x2" type="GLint"/> - <param name="y2" type="GLint"/> - </function> - - <function name="Rectiv" offset="91" static_dispatch="false"> - <param name="v1" type="const GLint *" count="2"/> - <param name="v2" type="const GLint *" count="2"/> - <glx rop="47"/> - </function> - - <function name="Rects" offset="92" vectorequiv="Rectsv" static_dispatch="false"> - <param name="x1" type="GLshort"/> - <param name="y1" type="GLshort"/> - <param name="x2" type="GLshort"/> - <param name="y2" type="GLshort"/> - </function> - - <function name="Rectsv" offset="93" static_dispatch="false"> - <param name="v1" type="const GLshort *" count="2"/> - <param name="v2" type="const GLshort *" count="2"/> - <glx rop="48"/> - </function> - - <function name="TexCoord1d" offset="94" vectorequiv="TexCoord1dv" static_dispatch="false"> - <param name="s" type="GLdouble"/> - </function> - - <function name="TexCoord1dv" offset="95" static_dispatch="false"> - <param name="v" type="const GLdouble *" count="1"/> - <glx rop="49"/> - </function> - - <function name="TexCoord1f" offset="96" vectorequiv="TexCoord1fv" static_dispatch="false"> - <param name="s" type="GLfloat"/> - </function> - - <function name="TexCoord1fv" offset="97" static_dispatch="false"> - <param name="v" type="const GLfloat *" count="1"/> - <glx rop="50"/> - </function> - - <function name="TexCoord1i" offset="98" vectorequiv="TexCoord1iv" static_dispatch="false"> - <param name="s" type="GLint"/> - </function> - - <function name="TexCoord1iv" offset="99" static_dispatch="false"> - <param name="v" type="const GLint *" count="1"/> - <glx rop="51"/> - </function> - - <function name="TexCoord1s" offset="100" vectorequiv="TexCoord1sv" static_dispatch="false"> - <param name="s" type="GLshort"/> - </function> - - <function name="TexCoord1sv" offset="101" static_dispatch="false"> - <param name="v" type="const GLshort *" count="1"/> - <glx rop="52"/> - </function> - - <function name="TexCoord2d" offset="102" vectorequiv="TexCoord2dv" static_dispatch="false"> - <param name="s" type="GLdouble"/> - <param name="t" type="GLdouble"/> - </function> - - <function name="TexCoord2dv" offset="103" static_dispatch="false"> - <param name="v" type="const GLdouble *" count="2"/> - <glx rop="53"/> - </function> - - <function name="TexCoord2f" offset="104" vectorequiv="TexCoord2fv" static_dispatch="false"> - <param name="s" type="GLfloat"/> - <param name="t" type="GLfloat"/> - </function> - - <function name="TexCoord2fv" offset="105" static_dispatch="false"> - <param name="v" type="const GLfloat *" count="2"/> - <glx rop="54"/> - </function> - - <function name="TexCoord2i" offset="106" vectorequiv="TexCoord2iv" static_dispatch="false"> - <param name="s" type="GLint"/> - <param name="t" type="GLint"/> - </function> - - <function name="TexCoord2iv" offset="107" static_dispatch="false"> - <param name="v" type="const GLint *" count="2"/> - <glx rop="55"/> - </function> - - <function name="TexCoord2s" offset="108" vectorequiv="TexCoord2sv" static_dispatch="false"> - <param name="s" type="GLshort"/> - <param name="t" type="GLshort"/> - </function> - - <function name="TexCoord2sv" offset="109" static_dispatch="false"> - <param name="v" type="const GLshort *" count="2"/> - <glx rop="56"/> - </function> - - <function name="TexCoord3d" offset="110" vectorequiv="TexCoord3dv" static_dispatch="false"> - <param name="s" type="GLdouble"/> - <param name="t" type="GLdouble"/> - <param name="r" type="GLdouble"/> - </function> - - <function name="TexCoord3dv" offset="111" static_dispatch="false"> - <param name="v" type="const GLdouble *" count="3"/> - <glx rop="57"/> - </function> - - <function name="TexCoord3f" offset="112" vectorequiv="TexCoord3fv" static_dispatch="false"> - <param name="s" type="GLfloat"/> - <param name="t" type="GLfloat"/> - <param name="r" type="GLfloat"/> - </function> - - <function name="TexCoord3fv" offset="113" static_dispatch="false"> - <param name="v" type="const GLfloat *" count="3"/> - <glx rop="58"/> - </function> - - <function name="TexCoord3i" offset="114" vectorequiv="TexCoord3iv" static_dispatch="false"> - <param name="s" type="GLint"/> - <param name="t" type="GLint"/> - <param name="r" type="GLint"/> - </function> - - <function name="TexCoord3iv" offset="115" static_dispatch="false"> - <param name="v" type="const GLint *" count="3"/> - <glx rop="59"/> - </function> - - <function name="TexCoord3s" offset="116" vectorequiv="TexCoord3sv" static_dispatch="false"> - <param name="s" type="GLshort"/> - <param name="t" type="GLshort"/> - <param name="r" type="GLshort"/> - </function> - - <function name="TexCoord3sv" offset="117" static_dispatch="false"> - <param name="v" type="const GLshort *" count="3"/> - <glx rop="60"/> - </function> - - <function name="TexCoord4d" offset="118" vectorequiv="TexCoord4dv" static_dispatch="false"> - <param name="s" type="GLdouble"/> - <param name="t" type="GLdouble"/> - <param name="r" type="GLdouble"/> - <param name="q" type="GLdouble"/> - </function> - - <function name="TexCoord4dv" offset="119" static_dispatch="false"> - <param name="v" type="const GLdouble *" count="4"/> - <glx rop="61"/> - </function> - - <function name="TexCoord4f" offset="120" vectorequiv="TexCoord4fv" static_dispatch="false"> - <param name="s" type="GLfloat"/> - <param name="t" type="GLfloat"/> - <param name="r" type="GLfloat"/> - <param name="q" type="GLfloat"/> - </function> - - <function name="TexCoord4fv" offset="121" static_dispatch="false"> - <param name="v" type="const GLfloat *" count="4"/> - <glx rop="62"/> - </function> - - <function name="TexCoord4i" offset="122" vectorequiv="TexCoord4iv" static_dispatch="false"> - <param name="s" type="GLint"/> - <param name="t" type="GLint"/> - <param name="r" type="GLint"/> - <param name="q" type="GLint"/> - </function> - - <function name="TexCoord4iv" offset="123" static_dispatch="false"> - <param name="v" type="const GLint *" count="4"/> - <glx rop="63"/> - </function> - - <function name="TexCoord4s" offset="124" vectorequiv="TexCoord4sv" static_dispatch="false"> - <param name="s" type="GLshort"/> - <param name="t" type="GLshort"/> - <param name="r" type="GLshort"/> - <param name="q" type="GLshort"/> - </function> - - <function name="TexCoord4sv" offset="125" static_dispatch="false"> - <param name="v" type="const GLshort *" count="4"/> - <glx rop="64"/> - </function> - - <function name="Vertex2d" offset="126" vectorequiv="Vertex2dv" static_dispatch="false"> - <param name="x" type="GLdouble"/> - <param name="y" type="GLdouble"/> - </function> - - <function name="Vertex2dv" offset="127" static_dispatch="false"> - <param name="v" type="const GLdouble *" count="2"/> - <glx rop="65"/> - </function> - - <function name="Vertex2f" offset="128" vectorequiv="Vertex2fv" static_dispatch="false"> - <param name="x" type="GLfloat"/> - <param name="y" type="GLfloat"/> - </function> - - <function name="Vertex2fv" offset="129" static_dispatch="false"> - <param name="v" type="const GLfloat *" count="2"/> - <glx rop="66"/> - </function> - - <function name="Vertex2i" offset="130" vectorequiv="Vertex2iv" static_dispatch="false"> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - </function> - - <function name="Vertex2iv" offset="131" static_dispatch="false"> - <param name="v" type="const GLint *" count="2"/> - <glx rop="67"/> - </function> - - <function name="Vertex2s" offset="132" vectorequiv="Vertex2sv" static_dispatch="false"> - <param name="x" type="GLshort"/> - <param name="y" type="GLshort"/> - </function> - - <function name="Vertex2sv" offset="133" static_dispatch="false"> - <param name="v" type="const GLshort *" count="2"/> - <glx rop="68"/> - </function> - - <function name="Vertex3d" offset="134" vectorequiv="Vertex3dv" static_dispatch="false"> - <param name="x" type="GLdouble"/> - <param name="y" type="GLdouble"/> - <param name="z" type="GLdouble"/> - </function> - - <function name="Vertex3dv" offset="135" static_dispatch="false"> - <param name="v" type="const GLdouble *" count="3"/> - <glx rop="69"/> - </function> - - <function name="Vertex3f" offset="136" vectorequiv="Vertex3fv" static_dispatch="false"> - <param name="x" type="GLfloat"/> - <param name="y" type="GLfloat"/> - <param name="z" type="GLfloat"/> - </function> - - <function name="Vertex3fv" offset="137" static_dispatch="false"> - <param name="v" type="const GLfloat *" count="3"/> - <glx rop="70"/> - </function> - - <function name="Vertex3i" offset="138" vectorequiv="Vertex3iv" static_dispatch="false"> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="z" type="GLint"/> - </function> - - <function name="Vertex3iv" offset="139" static_dispatch="false"> - <param name="v" type="const GLint *" count="3"/> - <glx rop="71"/> - </function> - - <function name="Vertex3s" offset="140" vectorequiv="Vertex3sv" static_dispatch="false"> - <param name="x" type="GLshort"/> - <param name="y" type="GLshort"/> - <param name="z" type="GLshort"/> - </function> - - <function name="Vertex3sv" offset="141" static_dispatch="false"> - <param name="v" type="const GLshort *" count="3"/> - <glx rop="72"/> - </function> - - <function name="Vertex4d" offset="142" vectorequiv="Vertex4dv" static_dispatch="false"> - <param name="x" type="GLdouble"/> - <param name="y" type="GLdouble"/> - <param name="z" type="GLdouble"/> - <param name="w" type="GLdouble"/> - </function> - - <function name="Vertex4dv" offset="143" static_dispatch="false"> - <param name="v" type="const GLdouble *" count="4"/> - <glx rop="73"/> - </function> - - <function name="Vertex4f" offset="144" vectorequiv="Vertex4fv" static_dispatch="false"> - <param name="x" type="GLfloat"/> - <param name="y" type="GLfloat"/> - <param name="z" type="GLfloat"/> - <param name="w" type="GLfloat"/> - </function> - - <function name="Vertex4fv" offset="145" static_dispatch="false"> - <param name="v" type="const GLfloat *" count="4"/> - <glx rop="74"/> - </function> - - <function name="Vertex4i" offset="146" vectorequiv="Vertex4iv" static_dispatch="false"> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="z" type="GLint"/> - <param name="w" type="GLint"/> - </function> - - <function name="Vertex4iv" offset="147" static_dispatch="false"> - <param name="v" type="const GLint *" count="4"/> - <glx rop="75"/> - </function> - - <function name="Vertex4s" offset="148" vectorequiv="Vertex4sv" static_dispatch="false"> - <param name="x" type="GLshort"/> - <param name="y" type="GLshort"/> - <param name="z" type="GLshort"/> - <param name="w" type="GLshort"/> - </function> - - <function name="Vertex4sv" offset="149" static_dispatch="false"> - <param name="v" type="const GLshort *" count="4"/> - <glx rop="76"/> - </function> - - <function name="ClipPlane" offset="150" static_dispatch="false"> - <param name="plane" type="GLenum"/> - <param name="equation" type="const GLdouble *" count="4"/> - <glx rop="77"/> - </function> - - <function name="ColorMaterial" offset="151" static_dispatch="false"> - <param name="face" type="GLenum"/> - <param name="mode" type="GLenum"/> - <glx rop="78"/> - </function> - - <!--function name="CullFace" offset="152" static_dispatch="false"> - <param name="mode" type="GLenum"/> - <glx rop="79"/> - </function> - - <function name="Fogf" offset="153" static_dispatch="false"> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx rop="80"/> - </function> - - <function name="Fogfv" offset="154" static_dispatch="false"> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="81"/> - </function--> - - <function name="Fogi" offset="155" static_dispatch="false"> - <param name="pname" type="GLenum"/> - <param name="param" type="GLint"/> - <glx rop="82"/> - </function> - - <function name="Fogiv" offset="156" static_dispatch="false"> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLint *" variable_param="pname"/> - <glx rop="83"/> - </function> - - <!--function name="FrontFace" offset="157" static_dispatch="false"> - <param name="mode" type="GLenum"/> - <glx rop="84"/> - </function> - - <function name="Hint" offset="158" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="mode" type="GLenum"/> - <glx rop="85"/> - </function> - - <function name="Lightf" offset="159" static_dispatch="false"> - <param name="light" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx rop="86"/> - </function> - - <function name="Lightfv" offset="160" static_dispatch="false"> - <param name="light" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="87"/> - </function--> - - <function name="Lighti" offset="161" static_dispatch="false"> - <param name="light" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLint"/> - <glx rop="88"/> - </function> - - <function name="Lightiv" offset="162" static_dispatch="false"> - <param name="light" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLint *" variable_param="pname"/> - <glx rop="89"/> - </function> - - <!--function name="LightModelf" offset="163" static_dispatch="false"> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx rop="90"/> - </function> - - <function name="LightModelfv" offset="164" static_dispatch="false"> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="91"/> - </function--> - - <function name="LightModeli" offset="165" static_dispatch="false"> - <param name="pname" type="GLenum"/> - <param name="param" type="GLint"/> - <glx rop="92"/> - </function> - - <function name="LightModeliv" offset="166" static_dispatch="false"> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLint *" variable_param="pname"/> - <glx rop="93"/> - </function> - - <function name="LineStipple" offset="167" static_dispatch="false"> - <param name="factor" type="GLint"/> - <param name="pattern" type="GLushort"/> - <glx rop="94"/> - </function> - - <!--function name="LineWidth" offset="168" static_dispatch="false"> - <param name="width" type="GLfloat"/> - <glx rop="95"/> - </function> - - <function name="Materialf" offset="169" static_dispatch="false"> - <param name="face" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx rop="96"/> - </function> - - <function name="Materialfv" offset="170" static_dispatch="false"> - <param name="face" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="97"/> - </function--> - - <function name="Materiali" offset="171" static_dispatch="false"> - <param name="face" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLint"/> - <glx rop="98"/> - </function> - - <function name="Materialiv" offset="172" static_dispatch="false"> - <param name="face" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLint *" variable_param="pname"/> - <glx rop="99"/> - </function> - - <!--function name="PointSize" offset="173" static_dispatch="false"> - <param name="size" type="GLfloat"/> - <glx rop="100"/> - </function--> - - <function name="PolygonMode" offset="174" static_dispatch="false"> - <param name="face" type="GLenum"/> - <param name="mode" type="GLenum"/> - <glx rop="101"/> - </function> - - <function name="PolygonStipple" offset="175" static_dispatch="false"> - <param name="mask" type="const GLubyte *" img_width="32" img_height="32" img_format="GL_COLOR_INDEX" img_type="GL_BITMAP" img_target="0" img_pad_dimensions="false"/> - <glx rop="102"/> - </function> - - <!--function name="Scissor" offset="176" static_dispatch="false"> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <glx rop="103"/> - </function> - - <function name="ShadeModel" offset="177" static_dispatch="false"> - <param name="mode" type="GLenum"/> - <glx rop="104"/> - </function> - - <function name="TexParameterf" offset="178" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx rop="105"/> - </function> - - <function name="TexParameterfv" offset="179" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="106"/> - </function> - - <function name="TexParameteri" offset="180" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLint"/> - <glx rop="107"/> - </function> - - <function name="TexParameteriv" offset="181" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLint *" variable_param="pname"/> - <glx rop="108"/> - </function--> - - <function name="TexImage1D" offset="182" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="internalformat" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="border" type="GLint"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="pixels" type="const GLvoid *" img_width="width" img_format="format" img_type="type" img_target="target" img_send_null="true" img_pad_dimensions="true"/> - <glx rop="109" large="true"/> - </function> - - <!--function name="TexImage2D" offset="183" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="internalformat" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="border" type="GLint"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_format="format" img_type="type" img_target="target" img_send_null="true" img_pad_dimensions="true"/> - <glx rop="110" large="true"/> - </function> - - <function name="TexEnvf" offset="184" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx rop="111"/> - </function> - - <function name="TexEnvfv" offset="185" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="112"/> - </function> - - <function name="TexEnvi" offset="186" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLint"/> - <glx rop="113"/> - </function> - - <function name="TexEnviv" offset="187" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLint *" variable_param="pname"/> - <glx rop="114"/> - </function--> - - <function name="TexGend" offset="188" static_dispatch="false"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLdouble"/> - <glx rop="115"/> - </function> - - <function name="TexGendv" offset="189" static_dispatch="false"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLdouble *" variable_param="pname"/> - <glx rop="116"/> - </function> - - <!--function name="TexGenf" offset="190" static_dispatch="false"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx rop="117"/> - </function> - - <function name="TexGenfv" offset="191" static_dispatch="false"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="118"/> - </function> - - <function name="TexGeni" offset="192" static_dispatch="false"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="param" type="GLint"/> - <glx rop="119"/> - </function> - - <function name="TexGeniv" offset="193" static_dispatch="false"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLint *" variable_param="pname"/> - <glx rop="120"/> - </function--> - - <function name="FeedbackBuffer" offset="194" static_dispatch="false"> - <param name="size" type="GLsizei"/> - <param name="type" type="GLenum"/> - <param name="buffer" type="GLfloat *" output="true"/> - <glx sop="105" handcode="true"/> - </function> - - <function name="SelectBuffer" offset="195" static_dispatch="false"> - <param name="size" type="GLsizei"/> - <param name="buffer" type="GLuint *" output="true"/> - <glx sop="106" handcode="true"/> - </function> - - <function name="RenderMode" offset="196" static_dispatch="false"> - <param name="mode" type="GLenum"/> - <return type="GLint"/> - <glx sop="107" handcode="true"/> - </function> - - <function name="InitNames" offset="197" static_dispatch="false"> - <glx rop="121"/> - </function> - - <function name="LoadName" offset="198" static_dispatch="false"> - <param name="name" type="GLuint"/> - <glx rop="122"/> - </function> - - <function name="PassThrough" offset="199" static_dispatch="false"> - <param name="token" type="GLfloat"/> - <glx rop="123"/> - </function> - - <function name="PopName" offset="200" static_dispatch="false"> - <glx rop="124"/> - </function> - - <function name="PushName" offset="201" static_dispatch="false"> - <param name="name" type="GLuint"/> - <glx rop="125"/> - </function> - - <function name="DrawBuffer" offset="202" static_dispatch="false"> - <param name="mode" type="GLenum"/> - <glx rop="126"/> - </function> - - <!--function name="Clear" offset="203" static_dispatch="false"> - <param name="mask" type="GLbitfield"/> - <glx rop="127"/> - </function--> - - <function name="ClearAccum" offset="204" static_dispatch="false"> - <param name="red" type="GLfloat"/> - <param name="green" type="GLfloat"/> - <param name="blue" type="GLfloat"/> - <param name="alpha" type="GLfloat"/> - <glx rop="128"/> - </function> - - <function name="ClearIndex" offset="205" static_dispatch="false"> - <param name="c" type="GLfloat"/> - <glx rop="129"/> - </function> - - <!--function name="ClearColor" offset="206" static_dispatch="false"> - <param name="red" type="GLclampf"/> - <param name="green" type="GLclampf"/> - <param name="blue" type="GLclampf"/> - <param name="alpha" type="GLclampf"/> - <glx rop="130"/> - </function> - - <function name="ClearStencil" offset="207" static_dispatch="false"> - <param name="s" type="GLint"/> - <glx rop="131"/> - </function--> - - <function name="ClearDepth" offset="208" static_dispatch="false"> - <param name="depth" type="GLclampd"/> - <glx rop="132"/> - </function> - - <!--function name="StencilMask" offset="209" static_dispatch="false"> - <param name="mask" type="GLuint"/> - <glx rop="133"/> - </function> - - <function name="ColorMask" offset="210" static_dispatch="false"> - <param name="red" type="GLboolean"/> - <param name="green" type="GLboolean"/> - <param name="blue" type="GLboolean"/> - <param name="alpha" type="GLboolean"/> - <glx rop="134"/> - </function> - - <function name="DepthMask" offset="211" static_dispatch="false"> - <param name="flag" type="GLboolean"/> - <glx rop="135"/> - </function--> - - <function name="IndexMask" offset="212" static_dispatch="false"> - <param name="mask" type="GLuint"/> - <glx rop="136"/> - </function> - - <function name="Accum" offset="213" static_dispatch="false"> - <param name="op" type="GLenum"/> - <param name="value" type="GLfloat"/> - <glx rop="137"/> - </function> - - <!--function name="Disable" offset="214" static_dispatch="false"> - <param name="cap" type="GLenum"/> - <glx rop="138" handcode="client"/> - </function> - - <function name="Enable" offset="215" static_dispatch="false"> - <param name="cap" type="GLenum"/> - <glx rop="139" handcode="client"/> - </function> - - <function name="Finish" offset="216" static_dispatch="false"> - <glx sop="108" handcode="true"/> - </function> - - <function name="Flush" offset="217" static_dispatch="false"> - <glx sop="142" handcode="true"/> - </function--> - - <function name="PopAttrib" offset="218" static_dispatch="false"> - <glx rop="141"/> - </function> - - <function name="PushAttrib" offset="219" static_dispatch="false"> - <param name="mask" type="GLbitfield"/> - <glx rop="142"/> - </function> - - <function name="Map1d" offset="220" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="u1" type="GLdouble"/> - <param name="u2" type="GLdouble"/> - <param name="stride" type="GLint" client_only="true"/> - <param name="order" type="GLint"/> - <param name="points" type="const GLdouble *" variable_param="order"/> - <glx rop="143" handcode="true"/> - </function> - - <function name="Map1f" offset="221" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="u1" type="GLfloat"/> - <param name="u2" type="GLfloat"/> - <param name="stride" type="GLint" client_only="true"/> - <param name="order" type="GLint"/> - <param name="points" type="const GLfloat *" variable_param="order"/> - <glx rop="144" handcode="true"/> - </function> - - <function name="Map2d" offset="222" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="u1" type="GLdouble"/> - <param name="u2" type="GLdouble"/> - <param name="ustride" type="GLint" client_only="true"/> - <param name="uorder" type="GLint"/> - <param name="v1" type="GLdouble"/> - <param name="v2" type="GLdouble"/> - <param name="vstride" type="GLint" client_only="true"/> - <param name="vorder" type="GLint"/> - <param name="points" type="const GLdouble *" variable_param="uorder"/> - <glx rop="145" handcode="true"/> - </function> - - <function name="Map2f" offset="223" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="u1" type="GLfloat"/> - <param name="u2" type="GLfloat"/> - <param name="ustride" type="GLint" client_only="true"/> - <param name="uorder" type="GLint"/> - <param name="v1" type="GLfloat"/> - <param name="v2" type="GLfloat"/> - <param name="vstride" type="GLint" client_only="true"/> - <param name="vorder" type="GLint"/> - <param name="points" type="const GLfloat *" variable_param="uorder"/> - <glx rop="146" handcode="true"/> - </function> - - <function name="MapGrid1d" offset="224" static_dispatch="false"> - <param name="un" type="GLint"/> - <param name="u1" type="GLdouble"/> - <param name="u2" type="GLdouble"/> - <glx rop="147"/> - </function> - - <function name="MapGrid1f" offset="225" static_dispatch="false"> - <param name="un" type="GLint"/> - <param name="u1" type="GLfloat"/> - <param name="u2" type="GLfloat"/> - <glx rop="148"/> - </function> - - <function name="MapGrid2d" offset="226" static_dispatch="false"> - <param name="un" type="GLint"/> - <param name="u1" type="GLdouble"/> - <param name="u2" type="GLdouble"/> - <param name="vn" type="GLint"/> - <param name="v1" type="GLdouble"/> - <param name="v2" type="GLdouble"/> - <glx rop="149"/> - </function> - - <function name="MapGrid2f" offset="227" static_dispatch="false"> - <param name="un" type="GLint"/> - <param name="u1" type="GLfloat"/> - <param name="u2" type="GLfloat"/> - <param name="vn" type="GLint"/> - <param name="v1" type="GLfloat"/> - <param name="v2" type="GLfloat"/> - <glx rop="150"/> - </function> - - <function name="EvalCoord1d" offset="228" vectorequiv="EvalCoord1dv" static_dispatch="false"> - <param name="u" type="GLdouble"/> - </function> - - <function name="EvalCoord1dv" offset="229" static_dispatch="false"> - <param name="u" type="const GLdouble *" count="1"/> - <glx rop="151"/> - </function> - - <function name="EvalCoord1f" offset="230" vectorequiv="EvalCoord1fv" static_dispatch="false"> - <param name="u" type="GLfloat"/> - </function> - - <function name="EvalCoord1fv" offset="231" static_dispatch="false"> - <param name="u" type="const GLfloat *" count="1"/> - <glx rop="152"/> - </function> - - <function name="EvalCoord2d" offset="232" vectorequiv="EvalCoord2dv" static_dispatch="false"> - <param name="u" type="GLdouble"/> - <param name="v" type="GLdouble"/> - </function> - - <function name="EvalCoord2dv" offset="233" static_dispatch="false"> - <param name="u" type="const GLdouble *" count="2"/> - <glx rop="153"/> - </function> - - <function name="EvalCoord2f" offset="234" vectorequiv="EvalCoord2fv" static_dispatch="false"> - <param name="u" type="GLfloat"/> - <param name="v" type="GLfloat"/> - </function> - - <function name="EvalCoord2fv" offset="235" static_dispatch="false"> - <param name="u" type="const GLfloat *" count="2"/> - <glx rop="154"/> - </function> - - <function name="EvalMesh1" offset="236" static_dispatch="false"> - <param name="mode" type="GLenum"/> - <param name="i1" type="GLint"/> - <param name="i2" type="GLint"/> - <glx rop="155"/> - </function> - - <function name="EvalPoint1" offset="237" static_dispatch="false"> - <param name="i" type="GLint"/> - <glx rop="156"/> - </function> - - <function name="EvalMesh2" offset="238" static_dispatch="false"> - <param name="mode" type="GLenum"/> - <param name="i1" type="GLint"/> - <param name="i2" type="GLint"/> - <param name="j1" type="GLint"/> - <param name="j2" type="GLint"/> - <glx rop="157"/> - </function> - - <function name="EvalPoint2" offset="239" static_dispatch="false"> - <param name="i" type="GLint"/> - <param name="j" type="GLint"/> - <glx rop="158"/> - </function> - - <!--function name="AlphaFunc" offset="240" static_dispatch="false"> - <param name="func" type="GLenum"/> - <param name="ref" type="GLclampf"/> - <glx rop="159"/> - </function> - - <function name="BlendFunc" offset="241" static_dispatch="false"> - <param name="sfactor" type="GLenum"/> - <param name="dfactor" type="GLenum"/> - <glx rop="160"/> - </function> - - <function name="LogicOp" offset="242" static_dispatch="false"> - <param name="opcode" type="GLenum"/> - <glx rop="161"/> - </function> - - <function name="StencilFunc" offset="243" static_dispatch="false"> - <param name="func" type="GLenum"/> - <param name="ref" type="GLint"/> - <param name="mask" type="GLuint"/> - <glx rop="162"/> - </function> - - <function name="StencilOp" offset="244" static_dispatch="false"> - <param name="fail" type="GLenum"/> - <param name="zfail" type="GLenum"/> - <param name="zpass" type="GLenum"/> - <glx rop="163"/> - </function> - - <function name="DepthFunc" offset="245" static_dispatch="false"> - <param name="func" type="GLenum"/> - <glx rop="164"/> - </function--> - - <function name="PixelZoom" offset="246" static_dispatch="false"> - <param name="xfactor" type="GLfloat"/> - <param name="yfactor" type="GLfloat"/> - <glx rop="165"/> - </function> - - <function name="PixelTransferf" offset="247" static_dispatch="false"> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx rop="166"/> - </function> - - <function name="PixelTransferi" offset="248" static_dispatch="false"> - <param name="pname" type="GLenum"/> - <param name="param" type="GLint"/> - <glx rop="167"/> - </function> - - <function name="PixelStoref" offset="249" static_dispatch="false"> - <param name="pname" type="GLenum"/> - <param name="param" type="GLfloat"/> - <glx sop="109" handcode="client"/> - </function> - - <!--function name="PixelStorei" offset="250" static_dispatch="false"> - <param name="pname" type="GLenum"/> - <param name="param" type="GLint"/> - <glx sop="110" handcode="client"/> - </function--> - - <function name="PixelMapfv" offset="251" static_dispatch="false"> - <param name="map" type="GLenum"/> - <param name="mapsize" type="GLsizei" counter="true"/> - <param name="values" type="const GLfloat *" count="mapsize"/> - <glx rop="168" large="true"/> - </function> - - <function name="PixelMapuiv" offset="252" static_dispatch="false"> - <param name="map" type="GLenum"/> - <param name="mapsize" type="GLsizei" counter="true"/> - <param name="values" type="const GLuint *" count="mapsize"/> - <glx rop="169" large="true"/> - </function> - - <function name="PixelMapusv" offset="253" static_dispatch="false"> - <param name="map" type="GLenum"/> - <param name="mapsize" type="GLsizei" counter="true"/> - <param name="values" type="const GLushort *" count="mapsize"/> - <glx rop="170" large="true"/> - </function> - - <function name="ReadBuffer" offset="254" static_dispatch="false"> - <param name="mode" type="GLenum"/> - <glx rop="171"/> - </function> - - <function name="CopyPixels" offset="255" static_dispatch="false"> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="type" type="GLenum"/> - <glx rop="172"/> - </function> - - <!--function name="ReadPixels" offset="256" static_dispatch="false"> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="pixels" type="GLvoid *" output="true" img_width="width" img_height="height" img_format="format" img_type="type" img_target="0"/> - <glx sop="111"/> - </function--> - - <function name="DrawPixels" offset="257" static_dispatch="false"> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_format="format" img_type="type" img_target="0" img_pad_dimensions="false"/> - <glx rop="173" large="true"/> - </function> - - <!--function name="GetBooleanv" offset="258" static_dispatch="false"> - <param name="pname" type="GLenum"/> - <param name="params" type="GLboolean *" output="true" variable_param="pname"/> - <glx sop="112" handcode="client"/> - </function--> - - <function name="GetClipPlane" offset="259" static_dispatch="false"> - <param name="plane" type="GLenum"/> - <param name="equation" type="GLdouble *" output="true" count="4"/> - <glx sop="113" always_array="true"/> - </function> - - <function name="GetDoublev" offset="260" static_dispatch="false"> - <param name="pname" type="GLenum"/> - <param name="params" type="GLdouble *" output="true" variable_param="pname"/> - <glx sop="114" handcode="client"/> - </function> - - <!--function name="GetError" offset="261" static_dispatch="false"> - <return type="GLenum"/> - <glx sop="115" handcode="client"/> - </function> - - <function name="GetFloatv" offset="262" static_dispatch="false"> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat *" output="true" variable_param="pname"/> - <glx sop="116" handcode="client"/> - </function> - - <function name="GetIntegerv" offset="263" static_dispatch="false"> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true" variable_param="pname"/> - <glx sop="117" handcode="client"/> - </function> - - <function name="GetLightfv" offset="264" static_dispatch="false"> - <param name="light" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat *" output="true" variable_param="pname"/> - <glx sop="118"/> - </function--> - - <function name="GetLightiv" offset="265" static_dispatch="false"> - <param name="light" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true" variable_param="pname"/> - <glx sop="119"/> - </function> - - <function name="GetMapdv" offset="266" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="query" type="GLenum"/> - <param name="v" type="GLdouble *" output="true" variable_param="target query"/> - <glx sop="120"/> - </function> - - <function name="GetMapfv" offset="267" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="query" type="GLenum"/> - <param name="v" type="GLfloat *" output="true" variable_param="target query"/> - <glx sop="121"/> - </function> - - <function name="GetMapiv" offset="268" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="query" type="GLenum"/> - <param name="v" type="GLint *" output="true" variable_param="target query"/> - <glx sop="122"/> - </function> - - <!--function name="GetMaterialfv" offset="269" static_dispatch="false"> - <param name="face" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat *" output="true" variable_param="pname"/> - <glx sop="123"/> - </function--> - - <function name="GetMaterialiv" offset="270" static_dispatch="false"> - <param name="face" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true" variable_param="pname"/> - <glx sop="124"/> - </function> - - <function name="GetPixelMapfv" offset="271" static_dispatch="false"> - <param name="map" type="GLenum"/> - <param name="values" type="GLfloat *" output="true" variable_param="map"/> - <glx sop="125"/> - </function> - - <function name="GetPixelMapuiv" offset="272" static_dispatch="false"> - <param name="map" type="GLenum"/> - <param name="values" type="GLuint *" output="true" variable_param="map"/> - <glx sop="126"/> - </function> - - <function name="GetPixelMapusv" offset="273" static_dispatch="false"> - <param name="map" type="GLenum"/> - <param name="values" type="GLushort *" output="true" variable_param="map"/> - <glx sop="127"/> - </function> - - <function name="GetPolygonStipple" offset="274" static_dispatch="false"> - <param name="mask" type="GLubyte *" output="true" img_width="32" img_height="32" img_format="GL_COLOR_INDEX" img_type="GL_BITMAP"/> - <glx sop="128"/> - </function> - - <!--function name="GetString" offset="275" static_dispatch="false"> - <param name="name" type="GLenum"/> - <return type="const GLubyte *"/> - <glx sop="129" handcode="true"/> - </function> - - <function name="GetTexEnvfv" offset="276" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat *" output="true" variable_param="pname"/> - <glx sop="130"/> - </function> - - <function name="GetTexEnviv" offset="277" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true" variable_param="pname"/> - <glx sop="131"/> - </function--> - - <function name="GetTexGendv" offset="278" static_dispatch="false"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLdouble *" output="true" variable_param="pname"/> - <glx sop="132"/> - </function> - - <!--function name="GetTexGenfv" offset="279" static_dispatch="false"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat *" output="true" variable_param="pname"/> - <glx sop="133"/> - </function> - - <function name="GetTexGeniv" offset="280" static_dispatch="false"> - <param name="coord" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true" variable_param="pname"/> - <glx sop="134"/> - </function--> - - <function name="GetTexImage" offset="281" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="pixels" type="GLvoid *" output="true" img_width="width" img_height="height" img_depth="depth" img_format="format" img_type="type"/> - <glx sop="135" dimensions_in_reply="true"/> - </function> - - <!--function name="GetTexParameterfv" offset="282" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat *" output="true" variable_param="pname"/> - <glx sop="136"/> - </function> - - <function name="GetTexParameteriv" offset="283" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true" variable_param="pname"/> - <glx sop="137"/> - </function--> - - <function name="GetTexLevelParameterfv" offset="284" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat *" output="true" variable_param="pname"/> - <glx sop="138"/> - </function> - - <function name="GetTexLevelParameteriv" offset="285" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true" variable_param="pname"/> - <glx sop="139"/> - </function> - - <!--function name="IsEnabled" offset="286" static_dispatch="false"> - <param name="cap" type="GLenum"/> - <return type="GLboolean"/> - <glx sop="140" handcode="client"/> - </function--> - - <function name="IsList" offset="287" static_dispatch="false"> - <param name="list" type="GLuint"/> - <return type="GLboolean"/> - <glx sop="141"/> - </function> - - <function name="DepthRange" offset="288" static_dispatch="false"> - <param name="zNear" type="GLclampd"/> - <param name="zFar" type="GLclampd"/> - <glx rop="174"/> - </function> - - <function name="Frustum" offset="289" static_dispatch="false"> - <param name="left" type="GLdouble"/> - <param name="right" type="GLdouble"/> - <param name="bottom" type="GLdouble"/> - <param name="top" type="GLdouble"/> - <param name="zNear" type="GLdouble"/> - <param name="zFar" type="GLdouble"/> - <glx rop="175"/> - </function> - - <!--function name="LoadIdentity" offset="290" static_dispatch="false"> - <glx rop="176"/> - </function> - - <function name="LoadMatrixf" offset="291" static_dispatch="false"> - <param name="m" type="const GLfloat *" count="16"/> - <glx rop="177"/> - </function--> - - <function name="LoadMatrixd" offset="292" static_dispatch="false"> - <param name="m" type="const GLdouble *" count="16"/> - <glx rop="178"/> - </function> - - <!--function name="MatrixMode" offset="293" static_dispatch="false"> - <param name="mode" type="GLenum"/> - <glx rop="179"/> - </function> - - <function name="MultMatrixf" offset="294" static_dispatch="false"> - <param name="m" type="const GLfloat *" count="16"/> - <glx rop="180"/> - </function--> - - <function name="MultMatrixd" offset="295" static_dispatch="false"> - <param name="m" type="const GLdouble *" count="16"/> - <glx rop="181"/> - </function> - - <function name="Ortho" offset="296" static_dispatch="false"> - <param name="left" type="GLdouble"/> - <param name="right" type="GLdouble"/> - <param name="bottom" type="GLdouble"/> - <param name="top" type="GLdouble"/> - <param name="zNear" type="GLdouble"/> - <param name="zFar" type="GLdouble"/> - <glx rop="182"/> - </function> - - <!--function name="PopMatrix" offset="297" static_dispatch="false"> - <glx rop="183"/> - </function> - - <function name="PushMatrix" offset="298" static_dispatch="false"> - <glx rop="184"/> - </function--> - - <function name="Rotated" offset="299" static_dispatch="false"> - <param name="angle" type="GLdouble"/> - <param name="x" type="GLdouble"/> - <param name="y" type="GLdouble"/> - <param name="z" type="GLdouble"/> - <glx rop="185"/> - </function> - - <!--function name="Rotatef" offset="300" static_dispatch="false"> - <param name="angle" type="GLfloat"/> - <param name="x" type="GLfloat"/> - <param name="y" type="GLfloat"/> - <param name="z" type="GLfloat"/> - <glx rop="186"/> - </function--> - - <function name="Scaled" offset="301" static_dispatch="false"> - <param name="x" type="GLdouble"/> - <param name="y" type="GLdouble"/> - <param name="z" type="GLdouble"/> - <glx rop="187"/> - </function> - - <!--function name="Scalef" offset="302" static_dispatch="false"> - <param name="x" type="GLfloat"/> - <param name="y" type="GLfloat"/> - <param name="z" type="GLfloat"/> - <glx rop="188"/> - </function--> - - <function name="Translated" offset="303" static_dispatch="false"> - <param name="x" type="GLdouble"/> - <param name="y" type="GLdouble"/> - <param name="z" type="GLdouble"/> - <glx rop="189"/> - </function> - - <!--function name="Translatef" offset="304" static_dispatch="false"> - <param name="x" type="GLfloat"/> - <param name="y" type="GLfloat"/> - <param name="z" type="GLfloat"/> - <glx rop="190"/> - </function> - - <function name="Viewport" offset="305" static_dispatch="false"> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <glx rop="191"/> - </function--> -</category> - -<category name="1.1"> - <function name="ArrayElement" offset="306" static_dispatch="false"> - <param name="i" type="GLint"/> - <glx handcode="true"/> - </function> - - <!--function name="ColorPointer" offset="308" static_dispatch="false"> - <param name="size" type="GLint"/> - <param name="type" type="GLenum"/> - <param name="stride" type="GLsizei"/> - <param name="pointer" type="const GLvoid *"/> - <glx handcode="true"/> - </function> - - <function name="DisableClientState" offset="309" static_dispatch="false"> - <param name="array" type="GLenum"/> - <glx handcode="true"/> - </function> - - <function name="DrawArrays" offset="310" static_dispatch="false"> - <param name="mode" type="GLenum"/> - <param name="first" type="GLint"/> - <param name="count" type="GLsizei"/> - <glx rop="193" handcode="true"/> - </function> - - <function name="DrawElements" offset="311" static_dispatch="false"> - <param name="mode" type="GLenum"/> - <param name="count" type="GLsizei"/> - <param name="type" type="GLenum"/> - <param name="indices" type="const GLvoid *"/> - <glx handcode="true"/> - </function--> - - <function name="EdgeFlagPointer" offset="312" static_dispatch="false"> - <param name="stride" type="GLsizei"/> - <param name="pointer" type="const GLvoid *"/> - <glx handcode="true"/> - </function> - - <!--function name="EnableClientState" offset="313" static_dispatch="false"> - <param name="array" type="GLenum"/> - <glx handcode="true"/> - </function> - - <function name="GetPointerv" offset="329" static_dispatch="false"> - <param name="pname" type="GLenum"/> - <param name="params" type="GLvoid **" output="true"/> - <glx handcode="true"/> - </function--> - - <function name="IndexPointer" offset="314" static_dispatch="false"> - <param name="type" type="GLenum"/> - <param name="stride" type="GLsizei"/> - <param name="pointer" type="const GLvoid *"/> - <glx handcode="true"/> - </function> - - <function name="InterleavedArrays" offset="317" static_dispatch="false"> - <param name="format" type="GLenum"/> - <param name="stride" type="GLsizei"/> - <param name="pointer" type="const GLvoid *"/> - <glx handcode="true"/> - </function> - - <!--function name="NormalPointer" offset="318" static_dispatch="false"> - <param name="type" type="GLenum"/> - <param name="stride" type="GLsizei"/> - <param name="pointer" type="const GLvoid *"/> - <glx handcode="true"/> - </function> - - <function name="TexCoordPointer" offset="320" static_dispatch="false"> - <param name="size" type="GLint"/> - <param name="type" type="GLenum"/> - <param name="stride" type="GLsizei"/> - <param name="pointer" type="const GLvoid *"/> - <glx handcode="true"/> - </function> - - <function name="VertexPointer" offset="321" static_dispatch="false"> - <param name="size" type="GLint"/> - <param name="type" type="GLenum"/> - <param name="stride" type="GLsizei"/> - <param name="pointer" type="const GLvoid *"/> - <glx handcode="true"/> - </function> - - <function name="PolygonOffset" offset="319" static_dispatch="false"> - <param name="factor" type="GLfloat"/> - <param name="units" type="GLfloat"/> - <glx rop="192"/> - </function--> - - <function name="CopyTexImage1D" offset="323" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="internalformat" type="GLenum"/> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="border" type="GLint"/> - <glx rop="4119"/> - </function> - - <!--function name="CopyTexImage2D" offset="324" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="internalformat" type="GLenum"/> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="border" type="GLint"/> - <glx rop="4120"/> - </function--> - - <function name="CopyTexSubImage1D" offset="325" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="xoffset" type="GLint"/> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="width" type="GLsizei"/> - <glx rop="4121"/> - </function> - - <!--function name="CopyTexSubImage2D" offset="326" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="xoffset" type="GLint"/> - <param name="yoffset" type="GLint"/> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <glx rop="4122"/> - </function--> - - <function name="TexSubImage1D" offset="332" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="xoffset" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="UNUSED" type="GLuint" padding="true"/> - <param name="pixels" type="const GLvoid *" img_width="width" img_xoff="xoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/> - <glx rop="4099" large="true"/> - </function> - - <!--function name="TexSubImage2D" offset="333" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="xoffset" type="GLint"/> - <param name="yoffset" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="UNUSED" type="GLuint" padding="true"/> - <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_xoff="xoffset" img_yoff="yoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/> - <glx rop="4100" large="true"/> - </function--> - - <function name="AreTexturesResident" offset="322" static_dispatch="false"> - <param name="n" type="GLsizei" counter="true"/> - <param name="textures" type="const GLuint *" count="n"/> - <param name="residences" type="GLboolean *" output="true" count="n"/> - <return type="GLboolean"/> - <glx sop="143" handcode="client" always_array="true"/> - </function> - - <!--function name="BindTexture" offset="307" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="texture" type="GLuint"/> - <glx rop="4117"/> - </function> - - <function name="DeleteTextures" offset="327" static_dispatch="false"> - <param name="n" type="GLsizei" counter="true"/> - <param name="textures" type="const GLuint *" count="n"/> - <glx sop="144"/> - </function> - - <function name="GenTextures" offset="328" static_dispatch="false"> - <param name="n" type="GLsizei" counter="true"/> - <param name="textures" type="GLuint *" output="true" count="n"/> - <glx sop="145" always_array="true"/> - </function> - - <function name="IsTexture" offset="330" static_dispatch="false"> - <param name="texture" type="GLuint"/> - <return type="GLboolean"/> - <glx sop="146"/> - </function--> - - <function name="PrioritizeTextures" offset="331" static_dispatch="false"> - <param name="n" type="GLsizei" counter="true"/> - <param name="textures" type="const GLuint *" count="n"/> - <param name="priorities" type="const GLclampf *" count="n"/> - <glx rop="4118"/> - </function> - - <function name="Indexub" offset="315" vectorequiv="Indexubv" static_dispatch="false"> - <param name="c" type="GLubyte"/> - </function> - - <function name="Indexubv" offset="316" static_dispatch="false"> - <param name="c" type="const GLubyte *" count="1"/> - <glx rop="194"/> - </function> - - <function name="PopClientAttrib" offset="334" static_dispatch="false"> - <glx handcode="true"/> - </function> - - <function name="PushClientAttrib" offset="335" static_dispatch="false"> - <param name="mask" type="GLbitfield"/> - <glx handcode="true"/> - </function> -</category> - -<category name="1.2"> - <!--function name="BlendColor" offset="336" static_dispatch="false"> - <param name="red" type="GLclampf"/> - <param name="green" type="GLclampf"/> - <param name="blue" type="GLclampf"/> - <param name="alpha" type="GLclampf"/> - <glx rop="4096"/> - </function> - - <function name="BlendEquation" offset="337" static_dispatch="false"> - <param name="mode" type="GLenum"/> - <glx rop="4097"/> - </function--> - - <function name="DrawRangeElements" offset="338" static_dispatch="false"> - <param name="mode" type="GLenum"/> - <param name="start" type="GLuint"/> - <param name="end" type="GLuint"/> - <param name="count" type="GLsizei"/> - <param name="type" type="GLenum"/> - <param name="indices" type="const GLvoid *"/> - <glx handcode="true"/> - </function> - - <function name="ColorTable" offset="339" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="internalformat" type="GLenum"/> - <param name="width" type="GLsizei"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="table" type="const GLvoid *" img_width="width" img_pad_dimensions="false" img_format="format" img_type="type" img_target="target"/> - <glx rop="2053" large="true"/> - </function> - - <function name="ColorTableParameterfv" offset="340" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="2054"/> - </function> - - <function name="ColorTableParameteriv" offset="341" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLint *" variable_param="pname"/> - <glx rop="2055"/> - </function> - - <function name="CopyColorTable" offset="342" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="internalformat" type="GLenum"/> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="width" type="GLsizei"/> - <glx rop="2056"/> - </function> - - <function name="GetColorTable" offset="343" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="table" type="GLvoid *" output="true" img_width="width" img_format="format" img_type="type"/> - <glx sop="147" dimensions_in_reply="true"/> - </function> - - <function name="GetColorTableParameterfv" offset="344" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat *" output="true" variable_param="pname"/> - <glx sop="148"/> - </function> - - <function name="GetColorTableParameteriv" offset="345" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true" variable_param="pname"/> - <glx sop="149"/> - </function> - - <function name="ColorSubTable" offset="346" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="start" type="GLsizei"/> - <param name="count" type="GLsizei"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="data" type="const GLvoid *" img_width="count" img_pad_dimensions="false" img_format="format" img_type="type" img_target="target"/> - <glx rop="195" large="true"/> - </function> - - <function name="CopyColorSubTable" offset="347" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="start" type="GLsizei"/> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="width" type="GLsizei"/> - <glx rop="196"/> - </function> - - <function name="ConvolutionFilter1D" offset="348" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="internalformat" type="GLenum"/> - <param name="width" type="GLsizei"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="image" type="const GLvoid *" img_width="width" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/> - <glx rop="4101" large="true"/> - </function> - - <function name="ConvolutionFilter2D" offset="349" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="internalformat" type="GLenum"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="image" type="const GLvoid *" img_width="width" img_height="height" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/> - <glx rop="4102" large="true"/> - </function> - - <function name="ConvolutionParameterf" offset="350" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat"/> - <glx rop="4103"/> - </function> - - <function name="ConvolutionParameterfv" offset="351" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLfloat *" variable_param="pname"/> - <glx rop="4104"/> - </function> - - <function name="ConvolutionParameteri" offset="352" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint"/> - <glx rop="4105"/> - </function> - - <function name="ConvolutionParameteriv" offset="353" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="const GLint *" variable_param="pname"/> - <glx rop="4106"/> - </function> - - <function name="CopyConvolutionFilter1D" offset="354" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="internalformat" type="GLenum"/> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="width" type="GLsizei"/> - <glx rop="4107"/> - </function> - - <function name="CopyConvolutionFilter2D" offset="355" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="internalformat" type="GLenum"/> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <glx rop="4108"/> - </function> - - <function name="GetConvolutionFilter" offset="356" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="image" type="GLvoid *" output="true" img_width="width" img_height="height" img_format="format" img_type="type"/> - <glx sop="150" dimensions_in_reply="true"/> - </function> - - <function name="GetConvolutionParameterfv" offset="357" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat *" output="true" variable_param="pname"/> - <glx sop="151"/> - </function> - - <function name="GetConvolutionParameteriv" offset="358" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true" variable_param="pname"/> - <glx sop="152"/> - </function> - - <function name="GetSeparableFilter" offset="359" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="row" type="GLvoid *" output="true"/> - <param name="column" type="GLvoid *" output="true"/> - <param name="span" type="GLvoid *" output="true"/> - <glx sop="153" handcode="true"/> - </function> - - <function name="SeparableFilter2D" offset="360" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="internalformat" type="GLenum"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="row" type="const GLvoid *"/> - <param name="column" type="const GLvoid *"/> - <glx rop="4109" handcode="true"/> - </function> - - <function name="GetHistogram" offset="361" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="reset" type="GLboolean"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="values" type="GLvoid *" output="true" img_width="width" img_format="format" img_type="type"/> - <glx sop="154" dimensions_in_reply="true" img_reset="reset"/> - </function> - - <function name="GetHistogramParameterfv" offset="362" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat *" output="true" variable_param="pname"/> - <glx sop="155"/> - </function> - - <function name="GetHistogramParameteriv" offset="363" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true" variable_param="pname"/> - <glx sop="156"/> - </function> - - <function name="GetMinmax" offset="364" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="reset" type="GLboolean"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="values" type="GLvoid *" output="true" img_width="2" img_format="format" img_type="type"/> - <glx sop="157" img_reset="reset"/> - </function> - - <function name="GetMinmaxParameterfv" offset="365" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLfloat *" output="true" variable_param="pname"/> - <glx sop="158"/> - </function> - - <function name="GetMinmaxParameteriv" offset="366" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLint *" output="true" variable_param="pname"/> - <glx sop="159"/> - </function> - - <function name="Histogram" offset="367" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="width" type="GLsizei"/> - <param name="internalformat" type="GLenum"/> - <param name="sink" type="GLboolean"/> - <glx rop="4110"/> - </function> - - <function name="Minmax" offset="368" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="internalformat" type="GLenum"/> - <param name="sink" type="GLboolean"/> - <glx rop="4111"/> - </function> - - <function name="ResetHistogram" offset="369" static_dispatch="false"> - <param name="target" type="GLenum"/> - <glx rop="4112"/> - </function> - - <function name="ResetMinmax" offset="370" static_dispatch="false"> - <param name="target" type="GLenum"/> - <glx rop="4113"/> - </function> - - <!--function name="TexImage3D" offset="371" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="internalformat" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="depth" type="GLsizei"/> - <param name="border" type="GLint"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_format="format" img_type="type" img_target="target" img_null_flag="true" img_pad_dimensions="true"/> - <glx rop="4114" large="true"/> - </function> - - <function name="TexSubImage3D" offset="372" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="xoffset" type="GLint"/> - <param name="yoffset" type="GLint"/> - <param name="zoffset" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <param name="depth" type="GLsizei"/> - <param name="format" type="GLenum"/> - <param name="type" type="GLenum"/> - <param name="UNUSED" type="GLuint" padding="true"/> - <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_xoff="xoffset" img_yoff="yoffset" img_zoff="zoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/> - <glx rop="4115" large="true"/> - </function> - - <function name="CopyTexSubImage3D" offset="373" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="level" type="GLint"/> - <param name="xoffset" type="GLint"/> - <param name="yoffset" type="GLint"/> - <param name="zoffset" type="GLint"/> - <param name="x" type="GLint"/> - <param name="y" type="GLint"/> - <param name="width" type="GLsizei"/> - <param name="height" type="GLsizei"/> - <glx rop="4123"/> - </function--> -</category> - -<category name="GL_ARB_multitexture" number="1"> - <!--function name="ActiveTextureARB" offset="374" static_dispatch="false"> - <param name="texture" type="GLenum"/> - <glx rop="197"/> - </function> - - <function name="ClientActiveTextureARB" offset="375" static_dispatch="false"> - <param name="texture" type="GLenum"/> - <glx handcode="true"/> - </function--> - - <function name="MultiTexCoord1dARB" offset="376" vectorequiv="MultiTexCoord1dvARB" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="s" type="GLdouble"/> - </function> - - <function name="MultiTexCoord1dvARB" offset="377" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="v" type="const GLdouble *" count="1"/> - <glx rop="198"/> - </function> - - <function name="MultiTexCoord1fARB" offset="378" vectorequiv="MultiTexCoord1fvARB" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="s" type="GLfloat"/> - </function> - - <function name="MultiTexCoord1fvARB" offset="379" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="v" type="const GLfloat *" count="1"/> - <glx rop="199"/> - </function> - - <function name="MultiTexCoord1iARB" offset="380" vectorequiv="MultiTexCoord1ivARB" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="s" type="GLint"/> - </function> - - <function name="MultiTexCoord1ivARB" offset="381" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="v" type="const GLint *" count="1"/> - <glx rop="200"/> - </function> - - <function name="MultiTexCoord1sARB" offset="382" vectorequiv="MultiTexCoord1svARB" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="s" type="GLshort"/> - </function> - - <function name="MultiTexCoord1svARB" offset="383" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="v" type="const GLshort *" count="1"/> - <glx rop="201"/> - </function> - - <function name="MultiTexCoord2dARB" offset="384" vectorequiv="MultiTexCoord2dvARB" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="s" type="GLdouble"/> - <param name="t" type="GLdouble"/> - </function> - - <function name="MultiTexCoord2dvARB" offset="385" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="v" type="const GLdouble *" count="2"/> - <glx rop="202"/> - </function> - - <function name="MultiTexCoord2fARB" offset="386" vectorequiv="MultiTexCoord2fvARB" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="s" type="GLfloat"/> - <param name="t" type="GLfloat"/> - </function> - - <function name="MultiTexCoord2fvARB" offset="387" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="v" type="const GLfloat *" count="2"/> - <glx rop="203"/> - </function> - - <function name="MultiTexCoord2iARB" offset="388" vectorequiv="MultiTexCoord2ivARB" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="s" type="GLint"/> - <param name="t" type="GLint"/> - </function> - - <function name="MultiTexCoord2ivARB" offset="389" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="v" type="const GLint *" count="2"/> - <glx rop="204"/> - </function> - - <function name="MultiTexCoord2sARB" offset="390" vectorequiv="MultiTexCoord2svARB" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="s" type="GLshort"/> - <param name="t" type="GLshort"/> - </function> - - <function name="MultiTexCoord2svARB" offset="391" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="v" type="const GLshort *" count="2"/> - <glx rop="205"/> - </function> - - <function name="MultiTexCoord3dARB" offset="392" vectorequiv="MultiTexCoord3dvARB" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="s" type="GLdouble"/> - <param name="t" type="GLdouble"/> - <param name="r" type="GLdouble"/> - </function> - - <function name="MultiTexCoord3dvARB" offset="393" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="v" type="const GLdouble *" count="3"/> - <glx rop="206"/> - </function> - - <function name="MultiTexCoord3fARB" offset="394" vectorequiv="MultiTexCoord3fvARB" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="s" type="GLfloat"/> - <param name="t" type="GLfloat"/> - <param name="r" type="GLfloat"/> - </function> - - <function name="MultiTexCoord3fvARB" offset="395" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="v" type="const GLfloat *" count="3"/> - <glx rop="207"/> - </function> - - <function name="MultiTexCoord3iARB" offset="396" vectorequiv="MultiTexCoord3ivARB" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="s" type="GLint"/> - <param name="t" type="GLint"/> - <param name="r" type="GLint"/> - </function> - - <function name="MultiTexCoord3ivARB" offset="397" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="v" type="const GLint *" count="3"/> - <glx rop="208"/> - </function> - - <function name="MultiTexCoord3sARB" offset="398" vectorequiv="MultiTexCoord3svARB" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="s" type="GLshort"/> - <param name="t" type="GLshort"/> - <param name="r" type="GLshort"/> - </function> - - <function name="MultiTexCoord3svARB" offset="399" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="v" type="const GLshort *" count="3"/> - <glx rop="209"/> - </function> - - <function name="MultiTexCoord4dARB" offset="400" vectorequiv="MultiTexCoord4dvARB" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="s" type="GLdouble"/> - <param name="t" type="GLdouble"/> - <param name="r" type="GLdouble"/> - <param name="q" type="GLdouble"/> - </function> - - <function name="MultiTexCoord4dvARB" offset="401" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="v" type="const GLdouble *" count="4"/> - <glx rop="210"/> - </function> - - <!--function name="MultiTexCoord4fARB" offset="402" vectorequiv="MultiTexCoord4fvARB" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="s" type="GLfloat"/> - <param name="t" type="GLfloat"/> - <param name="r" type="GLfloat"/> - <param name="q" type="GLfloat"/> - </function--> - - <function name="MultiTexCoord4fvARB" offset="403" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="v" type="const GLfloat *" count="4"/> - <glx rop="211"/> - </function> - - <function name="MultiTexCoord4iARB" offset="404" vectorequiv="MultiTexCoord4ivARB" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="s" type="GLint"/> - <param name="t" type="GLint"/> - <param name="r" type="GLint"/> - <param name="q" type="GLint"/> - </function> - - <function name="MultiTexCoord4ivARB" offset="405" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="v" type="const GLint *" count="4"/> - <glx rop="212"/> - </function> - - <function name="MultiTexCoord4sARB" offset="406" vectorequiv="MultiTexCoord4svARB" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="s" type="GLshort"/> - <param name="t" type="GLshort"/> - <param name="r" type="GLshort"/> - <param name="q" type="GLshort"/> - </function> - - <function name="MultiTexCoord4svARB" offset="407" static_dispatch="false"> - <param name="target" type="GLenum"/> - <param name="v" type="const GLshort *" count="4"/> - <glx rop="213"/> - </function> -</category> - -<xi:include href="../gen/APPLE_vertex_array_object.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/> - -</OpenGLAPI> diff --git a/src/mapi/glapi/gen-es/es_EXT.xml b/src/mapi/glapi/gen-es/es_EXT.xml deleted file mode 100644 index 0013df87e82..00000000000 --- a/src/mapi/glapi/gen-es/es_EXT.xml +++ /dev/null @@ -1,125 +0,0 @@ -<?xml version="1.0"?> -<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd"> - -<!-- OpenGL ES extensions --> - -<OpenGLAPI> - -<category name="GL_OES_compressed_paletted_texture" number="6"> - <enum name="PALETTE4_RGB8_OES" value="0x8B90"/> - <enum name="PALETTE4_RGBA8_OES" value="0x8B91"/> - <enum name="PALETTE4_R5_G6_B5_OES" value="0x8B92"/> - <enum name="PALETTE4_RGBA4_OES" value="0x8B93"/> - <enum name="PALETTE4_RGB5_A1_OES" value="0x8B94"/> - <enum name="PALETTE8_RGB8_OES" value="0x8B95"/> - <enum name="PALETTE8_RGBA8_OES" value="0x8B96"/> - <enum name="PALETTE8_R5_G6_B5_OES" value="0x8B97"/> - <enum name="PALETTE8_RGBA4_OES" value="0x8B98"/> - <enum name="PALETTE8_RGB5_A1_OES" value="0x8B99"/> -</category> - -<!-- 23. GL_OES_EGL_image --> -<xi:include href="../gen/OES_EGL_image.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/> - -<category name="GL_OES_depth24" number="24"> - <enum name="DEPTH_COMPONENT24_OES" value="0x81A6"/> -</category> - -<category name="GL_OES_depth32" number="25"> - <enum name="DEPTH_COMPONENT32_OES" value="0x81A7"/> -</category> - -<category name="GL_OES_element_index_uint" number="26"> - <!-- No new functions, types, enums. --> -</category> - -<category name="GL_OES_fbo_render_mipmap" number="27"> - <!-- No new functions, types, enums. --> -</category> - -<category name="GL_OES_mapbuffer" number="29"> - <enum name="WRITE_ONLY_OES" value="0x88B9"/> - <enum name="BUFFER_ACCESS_OES" value="0x88BB"/> - <enum name="BUFFER_MAPPED_OES" value="0x88BC"/> - <enum name="BUFFER_MAP_POINTER_OES" value="0x88BD"/> - - <function name="GetBufferPointervOES" offset="assign"> - <param name="target" type="GLenum"/> - <param name="pname" type="GLenum"/> - <param name="params" type="GLvoid **"/> - </function> - - <function name="MapBufferOES" offset="assign"> - <param name="target" type="GLenum"/> - <param name="access" type="GLenum"/> - <return type="GLvoid *"/> - </function> - - <function name="UnmapBufferOES" offset="assign"> - <param name="target" type="GLenum"/> - <return type="GLboolean"/> - </function> -</category> - -<category name="GL_OES_rgb8_rgba8" number="30"> - <enum name="RGB8_OES" value="0x8051"/> - <enum name="RGBA8_OES" value="0x8058"/> -</category> - -<category name="GL_OES_stencil1" number="31"> - <enum name="STENCIL_INDEX1_OES" value="0x8D46"/> -</category> - -<category name="GL_OES_stencil4" number="32"> - <enum name="STENCIL_INDEX4_OES" value="0x8D47"/> -</category> - -<category name="GL_OES_stencil8" number="33"> - <enum name="STENCIL_INDEX8_OES" value="0x8D48"/> -</category> - -<category name="GL_EXT_texture_filter_anisotropic" number="41"> - <enum name="TEXTURE_MAX_ANISOTROPY_EXT" value="0x84FE"/> - <enum name="MAX_TEXTURE_MAX_ANISOTROPY_EXT" value="0x84FF"/> -</category> - -<category name="GL_EXT_texture_compression_dxt1" number="49"> - <enum name="COMPRESSED_RGB_S3TC_DXT1_EXT" value="0x83F0"/> - <enum name="COMPRESSED_RGBA_S3TC_DXT1_EXT" value="0x83F1"/> -</category> - -<category name="GL_EXT_texture_format_BGRA8888" number="51"> - <enum name="BGRA_EXT" value="0x80E1"/> -</category> - -<category name="GL_EXT_blend_minmax" number="65"> - <enum name="MIN_EXT" value="0x8007"/> - <enum name="MAX_EXT" value="0x8008"/> -</category> - -<category name="GL_EXT_read_format_bgra" number="66"> - <enum name="BGRA_EXT" value="0x80E1"/> - <enum name="UNSIGNED_SHORT_4_4_4_4_REV_EXT" value="0x8365"/> - <enum name="UNSIGNED_SHORT_1_5_5_5_REV_EXT" value="0x8366"/> -</category> - -<category name="GL_EXT_multi_draw_arrays" number="69"> - <function name="MultiDrawArraysEXT" offset="assign"> - <param name="mode" type="GLenum"/> - <param name="first" type="const GLint *"/> - <param name="count" type="const GLsizei *"/> - <param name="primcount" type="GLsizei"/> - <glx handcode="true"/> - </function> - - <function name="MultiDrawElementsEXT" offset="assign"> - <param name="mode" type="GLenum"/> - <param name="count" type="const GLsizei *"/> - <param name="type" type="GLenum"/> - <param name="indices" type="const GLvoid **"/> - <param name="primcount" type="GLsizei"/> - <glx handcode="true"/> - </function> -</category> - -</OpenGLAPI> diff --git a/src/mapi/glapi/gen-es/gl_compare.py b/src/mapi/glapi/gen-es/gl_compare.py deleted file mode 100644 index 6b5e43bb98b..00000000000 --- a/src/mapi/glapi/gen-es/gl_compare.py +++ /dev/null @@ -1,354 +0,0 @@ -#!/usr/bin/python -# -# Copyright (C) 2009 Chia-I Wu <[email protected]> -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# on the rights to use, copy, modify, merge, publish, distribute, sub -# license, and/or sell copies of the Software, and to permit persons to whom -# the Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL -# IBM AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. - -import sys -import os.path -import getopt - -GLAPI = "../../glapi/gen" -sys.path.append(GLAPI) - -import gl_XML -import glX_XML - -class ApiSet(object): - def __init__(self, api, elts=["enum", "type", "function"]): - self.api = api - self.elts = elts - - def _check_enum(self, e1, e2, strict=True): - if e1.name != e2.name: - raise ValueError("%s: name mismatch" % e1.name) - if e1.value != e2.value: - raise ValueError("%s: value 0x%04x != 0x%04x" - % (e1.name, e1.value, e2.value)) - - def _check_type(self, t1, t2, strict=True): - if t1.name != t2.name: - raise ValueError("%s: name mismatch" % t1.name) - if t1.type_expr.string() != t2.type_expr.string(): - raise ValueError("%s: type %s != %s" - % (t1.name, t1.type_expr.string(), t2.type_expr.string())) - - def _check_function(self, f1, f2, strict=True): - if f1.name != f2.name: - raise ValueError("%s: name mismatch" % f1.name) - if f1.return_type != f2.return_type: - raise ValueError("%s: return type %s != %s" - % (f1.name, f1.return_type, f2.return_type)) - # there might be padded parameters - if strict and len(f1.parameters) != len(f2.parameters): - raise ValueError("%s: parameter length %d != %d" - % (f1.name, len(f1.parameters), len(f2.parameters))) - if f1.assign_offset != f2.assign_offset: - if ((f1.assign_offset and f2.offset < 0) or - (f2.assign_offset and f1.offset < 0)): - raise ValueError("%s: assign offset %d != %d" - % (f1.name, f1.assign_offset, f2.assign_offset)) - elif not f1.assign_offset: - if f1.offset != f2.offset: - raise ValueError("%s: offset %d != %d" - % (f1.name, f1.offset, f2.offset)) - - if strict: - l1 = f1.entry_points - l2 = f2.entry_points - l1.sort() - l2.sort() - if l1 != l2: - raise ValueError("%s: entry points %s != %s" - % (f1.name, l1, l2)) - - l1 = f1.static_entry_points - l2 = f2.static_entry_points - l1.sort() - l2.sort() - if l1 != l2: - raise ValueError("%s: static entry points %s != %s" - % (f1.name, l1, l2)) - - pad = 0 - for i in xrange(len(f1.parameters)): - p1 = f1.parameters[i] - p2 = f2.parameters[i + pad] - - if not strict and p1.is_padding != p2.is_padding: - if p1.is_padding: - pad -= 1 - continue - else: - pad += 1 - p2 = f2.parameters[i + pad] - - if strict and p1.name != p2.name: - raise ValueError("%s: parameter %d name %s != %s" - % (f1.name, i, p1.name, p2.name)) - if p1.type_expr.string() != p2.type_expr.string(): - if (strict or - # special case - f1.name == "TexImage2D" and p1.name != "internalformat"): - raise ValueError("%s: parameter %s type %s != %s" - % (f1.name, p1.name, p1.type_expr.string(), - p2.type_expr.string())) - - def union(self, other): - union = gl_XML.gl_api(None) - - if "enum" in self.elts: - union.enums_by_name = other.enums_by_name.copy() - for key, val in self.api.enums_by_name.iteritems(): - if key not in union.enums_by_name: - union.enums_by_name[key] = val - else: - self._check_enum(val, other.enums_by_name[key]) - - if "type" in self.elts: - union.types_by_name = other.types_by_name.copy() - for key, val in self.api.types_by_name.iteritems(): - if key not in union.types_by_name: - union.types_by_name[key] = val - else: - self._check_type(val, other.types_by_name[key]) - - if "function" in self.elts: - union.functions_by_name = other.functions_by_name.copy() - for key, val in self.api.functions_by_name.iteritems(): - if key not in union.functions_by_name: - union.functions_by_name[key] = val - else: - self._check_function(val, other.functions_by_name[key]) - - return union - - def intersection(self, other): - intersection = gl_XML.gl_api(None) - - if "enum" in self.elts: - for key, val in self.api.enums_by_name.iteritems(): - if key in other.enums_by_name: - self._check_enum(val, other.enums_by_name[key]) - intersection.enums_by_name[key] = val - - if "type" in self.elts: - for key, val in self.api.types_by_name.iteritems(): - if key in other.types_by_name: - self._check_type(val, other.types_by_name[key]) - intersection.types_by_name[key] = val - - if "function" in self.elts: - for key, val in self.api.functions_by_name.iteritems(): - if key in other.functions_by_name: - self._check_function(val, other.functions_by_name[key]) - intersection.functions_by_name[key] = val - - return intersection - - def difference(self, other): - difference = gl_XML.gl_api(None) - - if "enum" in self.elts: - for key, val in self.api.enums_by_name.iteritems(): - if key not in other.enums_by_name: - difference.enums_by_name[key] = val - else: - self._check_enum(val, other.enums_by_name[key]) - - if "type" in self.elts: - for key, val in self.api.types_by_name.iteritems(): - if key not in other.types_by_name: - difference.types_by_name[key] = val - else: - self._check_type(val, other.types_by_name[key]) - - if "function" in self.elts: - for key, val in self.api.functions_by_name.iteritems(): - if key not in other.functions_by_name: - difference.functions_by_name[key] = val - else: - self._check_function(val, other.functions_by_name[key], False) - - return difference - -def cmp_enum(e1, e2): - if e1.value < e2.value: - return -1 - elif e1.value > e2.value: - return 1 - else: - return 0 - -def cmp_type(t1, t2): - return t1.size - t2.size - -def cmp_function(f1, f2): - if f1.name > f2.name: - return 1 - elif f1.name < f2.name: - return -1 - else: - return 0 - -def spaces(n, str=""): - spaces = n - len(str) - if spaces < 1: - spaces = 1 - return " " * spaces - -def output_enum(e, indent=0): - attrs = 'name="%s"' % e.name - if e.default_count > 0: - tab = spaces(37, attrs) - attrs += '%scount="%d"' % (tab, e.default_count) - tab = spaces(48, attrs) - val = "%04x" % e.value - val = "0x" + val.upper() - attrs += '%svalue="%s"' % (tab, val) - - # no child - if not e.functions: - print '%s<enum %s/>' % (spaces(indent), attrs) - return - - print '%s<enum %s>' % (spaces(indent), attrs) - for key, val in e.functions.iteritems(): - attrs = 'name="%s"' % key - if val[0] != e.default_count: - attrs += ' count="%d"' % val[0] - if not val[1]: - attrs += ' mode="get"' - - print '%s<size %s/>' % (spaces(indent * 2), attrs) - - print '%s</enum>' % spaces(indent) - -def output_type(t, indent=0): - tab = spaces(16, t.name) - attrs = 'name="%s"%ssize="%d"' % (t.name, tab, t.size) - ctype = t.type_expr.string() - if ctype.find("unsigned") != -1: - attrs += ' unsigned="true"' - elif ctype.find("signed") == -1: - attrs += ' float="true"' - print '%s<type %s/>' % (spaces(indent), attrs) - -def output_function(f, indent=0): - attrs = 'name="%s"' % f.name - if f.offset > 0: - if f.assign_offset: - attrs += ' offset="assign"' - else: - attrs += ' offset="%d"' % f.offset - print '%s<function %s>' % (spaces(indent), attrs) - - for p in f.parameters: - attrs = 'name="%s" type="%s"' \ - % (p.name, p.type_expr.original_string) - print '%s<param %s/>' % (spaces(indent * 2), attrs) - if f.return_type != "void": - attrs = 'type="%s"' % f.return_type - print '%s<return %s/>' % (spaces(indent * 2), attrs) - - print '%s</function>' % spaces(indent) - -def output_category(api, indent=0): - enums = api.enums_by_name.values() - enums.sort(cmp_enum) - types = api.types_by_name.values() - types.sort(cmp_type) - functions = api.functions_by_name.values() - functions.sort(cmp_function) - - for e in enums: - output_enum(e, indent) - if enums and types: - print - for t in types: - output_type(t, indent) - if enums or types: - print - for f in functions: - output_function(f, indent) - if f != functions[-1]: - print - -def is_api_empty(api): - return bool(not api.enums_by_name and - not api.types_by_name and - not api.functions_by_name) - -def show_usage(ops): - print "Usage: %s [-k elts] <%s> <file1> <file2>" % (sys.argv[0], "|".join(ops)) - print " -k elts A comma separated string of types of elements to" - print " skip. Possible types are enum, type, and function." - sys.exit(1) - -def main(): - ops = ["union", "intersection", "difference"] - elts = ["enum", "type", "function"] - - try: - options, args = getopt.getopt(sys.argv[1:], "k:") - except Exception, e: - show_usage(ops) - - if len(args) != 3: - show_usage(ops) - op, file1, file2 = args - if op not in ops: - show_usage(ops) - - skips = [] - for opt, val in options: - if opt == "-k": - skips = val.split(",") - - for elt in skips: - try: - elts.remove(elt) - except ValueError: - show_usage(ops) - - api1 = gl_XML.parse_GL_API(file1, glX_XML.glx_item_factory()) - api2 = gl_XML.parse_GL_API(file2, glX_XML.glx_item_factory()) - - set = ApiSet(api1, elts) - func = getattr(set, op) - result = func(api2) - - if not is_api_empty(result): - cat_name = "%s_of_%s_and_%s" \ - % (op, os.path.basename(file1), os.path.basename(file2)) - - print '<?xml version="1.0"?>' - print '<!DOCTYPE OpenGLAPI SYSTEM "%s/gl_API.dtd">' % GLAPI - print - print '<OpenGLAPI>' - print - print '<category name="%s">' % (cat_name) - output_category(result, 4) - print '</category>' - print - print '</OpenGLAPI>' - -if __name__ == "__main__": - main() diff --git a/src/mapi/glapi/gen-es/gl_parse_header.py b/src/mapi/glapi/gen-es/gl_parse_header.py deleted file mode 100644 index 5382eba35c3..00000000000 --- a/src/mapi/glapi/gen-es/gl_parse_header.py +++ /dev/null @@ -1,450 +0,0 @@ -#!/usr/bin/python -# -# Copyright (C) 2009 Chia-I Wu <[email protected]> -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# on the rights to use, copy, modify, merge, publish, distribute, sub -# license, and/or sell copies of the Software, and to permit persons to whom -# the Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL -# IBM AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. - -import sys -import os.path -import getopt -import re - -GLAPI = "../../glapi/gen" -sys.path.append(GLAPI) - -class HeaderParser(object): - """Parser for GL header files.""" - - def __init__(self, verbose=0): - # match #if and #ifdef - self.IFDEF = re.compile('#\s*if(n?def\s+(?P<ifdef>\w+)|\s+(?P<if>.+))') - # match #endif - self.ENDIF = re.compile('#\s*endif') - # match typedef abc def; - self.TYPEDEF = re.compile('typedef\s+(?P<from>[\w ]+)\s+(?P<to>\w+);') - # match #define XYZ VAL - self.DEFINE = re.compile('#\s*define\s+(?P<key>\w+)(?P<value>\s+[\w"]*)?') - # match GLAPI - self.GLAPI = re.compile('^GL_?API(CALL)?\s+(?P<return>[\w\s*]+[\w*])\s+(GL)?_?APIENTRY\s+(?P<name>\w+)\s*\((?P<params>[\w\s(,*\[\])]+)\)\s*;') - - self.split_params = re.compile('\s*,\s*') - self.split_ctype = re.compile('(\W)') - # ignore GL_VERSION_X_Y - self.ignore_enum = re.compile('GL(_ES)?_VERSION(_ES_C[ML])?_\d_\d') - - self.verbose = verbose - self._reset() - - def _reset(self): - """Reset to initial state.""" - self.ifdef_levels = [] - self.need_char = False - - # use typeexpr? - def _format_ctype(self, ctype, fix=True): - """Format a ctype string, optionally fix it.""" - # split the type string - tmp = self.split_ctype.split(ctype) - tmp = [s for s in tmp if s and s != " "] - - pretty = "" - for i in xrange(len(tmp)): - # add missing GL prefix - if (fix and tmp[i] != "const" and tmp[i] != "*" and - not tmp[i].startswith("GL")): - tmp[i] = "GL" + tmp[i] - - if i == 0: - pretty = tmp[i] - else: - sep = " " - if tmp[i - 1] == "*": - sep = "" - pretty += sep + tmp[i] - return pretty - - # use typeexpr? - def _get_ctype_attrs(self, ctype): - """Get the attributes of a ctype.""" - is_float = (ctype.find("float") != -1 or ctype.find("double") != -1) - is_signed = not (ctype.find("unsigned") != -1) - - size = 0 - if ctype.find("char") != -1: - size = 1 - elif ctype.find("short") != -1: - size = 2 - elif ctype.find("int") != -1: - size = 4 - elif is_float: - if ctype.find("float") != -1: - size = 4 - else: - size = 8 - - return (size, is_float, is_signed) - - def _parse_define(self, line): - """Parse a #define line for an <enum>.""" - m = self.DEFINE.search(line) - if not m: - if self.verbose and line.find("#define") >= 0: - print "ignore %s" % (line) - return None - - key = m.group("key").strip() - val = m.group("value").strip() - - # enum must begin with GL_ and be all uppercase - if ((not (key.startswith("GL_") and key.isupper())) or - (self.ignore_enum.match(key) and val == "1")): - if self.verbose: - print "ignore enum %s" % (key) - return None - - return (key, val) - - def _parse_typedef(self, line): - """Parse a typedef line for a <type>.""" - m = self.TYPEDEF.search(line) - if not m: - if self.verbose and line.find("typedef") >= 0: - print "ignore %s" % (line) - return None - - f = m.group("from").strip() - t = m.group("to").strip() - if not t.startswith("GL"): - if self.verbose: - print "ignore type %s" % (t) - return None - attrs = self._get_ctype_attrs(f) - - return (f, t, attrs) - - def _parse_gl_api(self, line): - """Parse a GLAPI line for a <function>.""" - m = self.GLAPI.search(line) - if not m: - if self.verbose and line.find("APIENTRY") >= 0: - print "ignore %s" % (line) - return None - - rettype = m.group("return") - rettype = self._format_ctype(rettype) - if rettype == "GLvoid": - rettype = "" - - name = m.group("name") - - param_str = m.group("params") - chunks = self.split_params.split(param_str) - chunks = [s.strip() for s in chunks] - if len(chunks) == 1 and (chunks[0] == "void" or chunks[0] == "GLvoid"): - chunks = [] - - params = [] - for c in chunks: - # split type and variable name - idx = c.rfind("*") - if idx < 0: - idx = c.rfind(" ") - if idx >= 0: - idx += 1 - ctype = c[:idx] - var = c[idx:] - else: - ctype = c - var = "unnamed" - - # convert array to pointer - idx = var.find("[") - if idx >= 0: - var = var[:idx] - ctype += "*" - - ctype = self._format_ctype(ctype) - var = var.strip() - - if not self.need_char and ctype.find("GLchar") >= 0: - self.need_char = True - - params.append((ctype, var)) - - return (rettype, name, params) - - def _change_level(self, line): - """Parse a #ifdef line and change level.""" - m = self.IFDEF.search(line) - if m: - ifdef = m.group("ifdef") - if not ifdef: - ifdef = m.group("if") - self.ifdef_levels.append(ifdef) - return True - m = self.ENDIF.search(line) - if m: - self.ifdef_levels.pop() - return True - return False - - def _read_header(self, header): - """Open a header file and read its contents.""" - lines = [] - try: - fp = open(header, "rb") - lines = fp.readlines() - fp.close() - except IOError, e: - print "failed to read %s: %s" % (header, e) - return lines - - def _cmp_enum(self, enum1, enum2): - """Compare two enums.""" - # sort by length of the values as strings - val1 = enum1[1] - val2 = enum2[1] - ret = len(val1) - len(val2) - # sort by the values - if not ret: - val1 = int(val1, 16) - val2 = int(val2, 16) - ret = val1 - val2 - # in case int cannot hold the result - if ret > 0: - ret = 1 - elif ret < 0: - ret = -1 - # sort by the names - if not ret: - if enum1[0] < enum2[0]: - ret = -1 - elif enum1[0] > enum2[0]: - ret = 1 - return ret - - def _cmp_type(self, type1, type2): - """Compare two types.""" - attrs1 = type1[2] - attrs2 = type2[2] - # sort by type size - ret = attrs1[0] - attrs2[0] - # float is larger - if not ret: - ret = attrs1[1] - attrs2[1] - # signed is larger - if not ret: - ret = attrs1[2] - attrs2[2] - # reverse - ret = -ret - return ret - - def _cmp_function(self, func1, func2): - """Compare two functions.""" - name1 = func1[1] - name2 = func2[1] - ret = 0 - # sort by the names - if name1 < name2: - ret = -1 - elif name1 > name2: - ret = 1 - return ret - - def _postprocess_dict(self, hdict): - """Post-process a header dict and return an ordered list.""" - hlist = [] - largest = 0 - for key, cat in hdict.iteritems(): - size = len(cat["enums"]) + len(cat["types"]) + len(cat["functions"]) - # ignore empty category - if not size: - continue - - cat["enums"].sort(self._cmp_enum) - # remove duplicates - dup = [] - for i in xrange(1, len(cat["enums"])): - if cat["enums"][i] == cat["enums"][i - 1]: - dup.insert(0, i) - for i in dup: - e = cat["enums"].pop(i) - if self.verbose: - print "remove duplicate enum %s" % e[0] - - cat["types"].sort(self._cmp_type) - cat["functions"].sort(self._cmp_function) - - # largest category comes first - if size > largest: - hlist.insert(0, (key, cat)) - largest = size - else: - hlist.append((key, cat)) - return hlist - - def parse(self, header): - """Parse a header file.""" - self._reset() - - if self.verbose: - print "Parsing %s" % (header) - - hdict = {} - lines = self._read_header(header) - for line in lines: - if self._change_level(line): - continue - - # skip until the first ifdef (i.e. __gl_h_) - if not self.ifdef_levels: - continue - - cat_name = os.path.basename(header) - # check if we are in an extension - if (len(self.ifdef_levels) > 1 and - self.ifdef_levels[-1].startswith("GL_")): - cat_name = self.ifdef_levels[-1] - - try: - cat = hdict[cat_name] - except KeyError: - cat = { - "enums": [], - "types": [], - "functions": [] - } - hdict[cat_name] = cat - - key = "enums" - elem = self._parse_define(line) - if not elem: - key = "types" - elem = self._parse_typedef(line) - if not elem: - key = "functions" - elem = self._parse_gl_api(line) - - if elem: - cat[key].append(elem) - - if self.need_char: - if self.verbose: - print "define GLchar" - elem = self._parse_typedef("typedef char GLchar;") - cat["types"].append(elem) - return self._postprocess_dict(hdict) - -def spaces(n, str=""): - spaces = n - len(str) - if spaces < 1: - spaces = 1 - return " " * spaces - -def output_xml(name, hlist): - """Output a parsed header in OpenGLAPI XML.""" - - for i in xrange(len(hlist)): - cat_name, cat = hlist[i] - - print '<category name="%s">' % (cat_name) - indent = 4 - - for enum in cat["enums"]: - name = enum[0][3:] - value = enum[1] - tab = spaces(41, name) - attrs = 'name="%s"%svalue="%s"' % (name, tab, value) - print '%s<enum %s/>' % (spaces(indent), attrs) - - if cat["enums"] and cat["types"]: - print - - for type in cat["types"]: - ctype = type[0] - size, is_float, is_signed = type[2] - - attrs = 'name="%s"' % (type[1][2:]) - attrs += spaces(16, attrs) + 'size="%d"' % (size) - if is_float: - attrs += ' float="true"' - elif not is_signed: - attrs += ' unsigned="true"' - - print '%s<type %s/>' % (spaces(indent), attrs) - - for func in cat["functions"]: - print - ret = func[0] - name = func[1][2:] - params = func[2] - - attrs = 'name="%s" offset="assign"' % name - print '%s<function %s>' % (spaces(indent), attrs) - - for param in params: - attrs = 'name="%s" type="%s"' % (param[1], param[0]) - print '%s<param %s/>' % (spaces(indent * 2), attrs) - if ret: - attrs = 'type="%s"' % ret - print '%s<return %s/>' % (spaces(indent * 2), attrs) - - print '%s</function>' % spaces(indent) - - print '</category>' - print - -def show_usage(): - print "Usage: %s [-v] <header> ..." % sys.argv[0] - sys.exit(1) - -def main(): - try: - args, headers = getopt.getopt(sys.argv[1:], "v") - except Exception, e: - show_usage() - if not headers: - show_usage() - - verbose = 0 - for arg in args: - if arg[0] == "-v": - verbose += 1 - - need_xml_header = True - parser = HeaderParser(verbose) - for h in headers: - h = os.path.abspath(h) - hlist = parser.parse(h) - - if need_xml_header: - print '<?xml version="1.0"?>' - print '<!DOCTYPE OpenGLAPI SYSTEM "%s/gl_API.dtd">' % GLAPI - need_xml_header = False - - print - print '<!-- %s -->' % (h) - print '<OpenGLAPI>' - print - output_xml(h, hlist) - print '</OpenGLAPI>' - -if __name__ == '__main__': - main() diff --git a/src/mapi/glapi/gen/Makefile b/src/mapi/glapi/gen/Makefile index 3e101f3a10f..c386b8766c4 100644 --- a/src/mapi/glapi/gen/Makefile +++ b/src/mapi/glapi/gen/Makefile @@ -180,10 +180,8 @@ $(MESA_GLAPI_DIR)/glapi_sparc.S: gl_SPARC_asm.py $(COMMON) ###################################################################### -$(MESA_DIR)/main/enums.c: gl_enums.py $(COMMON) $(ES_API) - $(PYTHON2) $(PYTHON_FLAGS) $< -f gl_API.xml \ - -f $(MESA_GLAPI_DIR)/gen-es/es1_API.xml \ - -f $(MESA_GLAPI_DIR)/gen-es/es2_API.xml > $@ +$(MESA_DIR)/main/enums.c: gl_enums.py $(COMMON_ES) + $(PYTHON2) $(PYTHON_FLAGS) $< -f gl_and_es_API.xml > $@ $(MESA_DIR)/main/dispatch.h: gl_table.py $(COMMON) $(PYTHON2) $(PYTHON_FLAGS) $< -m remap_table > $@ diff --git a/src/mapi/glapi/gen/gl_XML.py b/src/mapi/glapi/gen/gl_XML.py index 4d414e8b0f8..4dc2e8fa7fb 100644 --- a/src/mapi/glapi/gen/gl_XML.py +++ b/src/mapi/glapi/gen/gl_XML.py @@ -618,7 +618,7 @@ class gl_function( gl_item ): # for each entry-point. Otherwise, they may generate code # that won't compile. - self.parameter_strings = {} + self.entry_point_parameters = {} self.process_element( element ) @@ -703,12 +703,34 @@ class gl_function( gl_item ): if element.children: self.initialized = 1 - self.parameter_strings[name] = create_parameter_string(parameters, 1) + self.entry_point_parameters[name] = parameters else: - self.parameter_strings[name] = None + self.entry_point_parameters[name] = [] return + def filter_entry_points(self, entry_point_list): + """Filter out entry points not in entry_point_list.""" + if not self.initialized: + raise RuntimeError('%s is not initialized yet' % self.name) + + entry_points = [] + for ent in self.entry_points: + if ent not in entry_point_list: + if ent in self.static_entry_points: + self.static_entry_points.remove(ent) + self.entry_point_parameters.pop(ent) + else: + entry_points.append(ent) + + if not entry_points: + raise RuntimeError('%s has no entry point after filtering' % self.name) + + self.entry_points = entry_points + if self.name not in entry_points: + # use the first remaining entry point + self.name = entry_points[0] + self.parameters = self.entry_point_parameters[entry_points[0]] def get_images(self): """Return potentially empty list of input images.""" @@ -721,11 +743,11 @@ class gl_function( gl_item ): def get_parameter_string(self, entrypoint = None): if entrypoint: - s = self.parameter_strings[ entrypoint ] - if s: - return s + params = self.entry_point_parameters[ entrypoint ] + else: + params = self.parameters - return create_parameter_string( self.parameters, 1 ) + return create_parameter_string( params, 1 ) def get_called_parameter_string(self): p_string = "" @@ -791,6 +813,16 @@ class gl_api: typeexpr.create_initial_types() return + def filter_functions(self, entry_point_list): + """Filter out entry points not in entry_point_list.""" + functions_by_name = {} + for func in self.functions_by_name.itervalues(): + entry_points = [ent for ent in func.entry_points if ent in entry_point_list] + if entry_points: + func.filter_entry_points(entry_points) + functions_by_name[func.name] = func + + self.functions_by_name = functions_by_name def process_element(self, doc): element = doc.children diff --git a/src/mapi/glapi/gen/gl_and_es_API.xml b/src/mapi/glapi/gen/gl_and_es_API.xml index ac7d43ceda7..1313da0f5d6 100644 --- a/src/mapi/glapi/gen/gl_and_es_API.xml +++ b/src/mapi/glapi/gen/gl_and_es_API.xml @@ -3,6 +3,11 @@ <!-- OpenGL + OpenGL ES --> +<!-- IMPORTANT + Remember to update gles_api.py when new OpenGL ES specific entry points + are added. Otherwise, they will be filtered out. +--> + <OpenGLAPI> <xi:include href="gl_API.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/> diff --git a/src/mapi/glapi/gen/gl_table.py b/src/mapi/glapi/gen/gl_table.py index 05979e3813f..2cbbd971a86 100644 --- a/src/mapi/glapi/gen/gl_table.py +++ b/src/mapi/glapi/gen/gl_table.py @@ -211,28 +211,28 @@ class PrintRemapTable(gl_XML.gl_print_base): def show_usage(): - print "Usage: %s [-f input_file_name] [-m mode] [-c]" % sys.argv[0] + print "Usage: %s [-f input_file_name] [-m mode] [-c ver]" % sys.argv[0] print " -m mode Mode can be 'table' or 'remap_table'." - print " -c Enable compatibility with OpenGL ES." + print " -c ver Version can be 'es1' or 'es2'." sys.exit(1) if __name__ == '__main__': file_name = "gl_API.xml" try: - (args, trail) = getopt.getopt(sys.argv[1:], "f:m:c") + (args, trail) = getopt.getopt(sys.argv[1:], "f:m:c:") except Exception,e: show_usage() mode = "table" - es = False + es = None for (arg,val) in args: if arg == "-f": file_name = val elif arg == "-m": mode = val elif arg == "-c": - es = True + es = val if mode == "table": printer = PrintGlTable(es) @@ -243,4 +243,14 @@ if __name__ == '__main__': api = gl_XML.parse_GL_API( file_name ) + if es is not None: + import gles_api + + api_map = { + 'es1': gles_api.es1_api, + 'es2': gles_api.es2_api, + } + + api.filter_functions(api_map[es]) + printer.Print( api ) diff --git a/src/mapi/glapi/gen/glapi_gen.mk b/src/mapi/glapi/gen/glapi_gen.mk new file mode 100644 index 00000000000..c7fa7c0153d --- /dev/null +++ b/src/mapi/glapi/gen/glapi_gen.mk @@ -0,0 +1,44 @@ +# Helpers for glapi header generation + +ifndef TOP +$(error TOP must be defined.) +endif + +glapi_gen_common_deps := \ + $(wildcard $(TOP)/src/mapi/glapi/gen/*.xml) \ + $(wildcard $(TOP)/src/mapi/glapi/gen/*.py) + +glapi_gen_mapi_script := $(TOP)/src/mapi/mapi/mapi_abi.py +glapi_gen_mapi_deps := \ + $(glapi_gen_mapi_script) \ + $(glapi_gen_common_deps) + +# $(1): path to an XML file +# $(2): name of the printer +define glapi_gen_mapi +@mkdir -p $(dir $@) +$(PYTHON2) $(PYTHON_FLAGS) $(glapi_gen_mapi_script) \ + --mode lib --printer $(2) $(1) > $@ +endef + +glapi_gen_dispatch_script := $(TOP)/src/mapi/glapi/gen/gl_table.py +glapi_gen_dispatch_deps := $(glapi_gen_common_deps) + +# $(1): path to an XML file +# $(2): empty, es1, or es2 for entry point filtering +define glapi_gen_dispatch +@mkdir -p $(dir $@) +$(PYTHON2) $(PYTHON_FLAGS) $(glapi_gen_dispatch_script) \ + -f $(1) -m remap_table $(if $(2),-c $(2),) > $@ +endef + +glapi_gen_remap_script := $(TOP)/src/mapi/glapi/gen/remap_helper.py +glapi_gen_remap_deps := $(glapi_gen_common_deps) + +# $(1): path to an XML file +# $(2): empty, es1, or es2 for entry point filtering +define glapi_gen_remap +@mkdir -p $(dir $@) +$(PYTHON2) $(PYTHON_FLAGS) $(glapi_gen_remap_script) \ + -f $(1) $(if $(2),-c $(2),) > $@ +endef diff --git a/src/mapi/glapi/gen/gles_api.py b/src/mapi/glapi/gen/gles_api.py new file mode 100644 index 00000000000..4cde9e544d5 --- /dev/null +++ b/src/mapi/glapi/gen/gles_api.py @@ -0,0 +1,452 @@ +#!/usr/bin/env python + +# Mesa 3-D graphics library +# Version: 7.12 +# +# Copyright (C) 2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# +# Authors: +# Chia-I Wu <[email protected]> + +# These info should be part of GLAPI XML. Until that is possible, scripts have +# to use tables here to filter gl_api. + +es1_core = ( + # OpenGL ES 1.1 + 'ActiveTexture', + 'AlphaFunc', + 'AlphaFuncx', + 'BindBuffer', + 'BindTexture', + 'BlendFunc', + 'BufferData', + 'BufferSubData', + 'Clear', + 'ClearColor', + 'ClearColorx', + 'ClearDepthf', + 'ClearDepthx', + 'ClearStencil', + 'ClientActiveTexture', + 'ClipPlanef', + 'ClipPlanex', + 'Color4f', + 'Color4ub', + 'Color4x', + 'ColorMask', + 'ColorPointer', + 'CompressedTexImage2D', + 'CompressedTexSubImage2D', + 'CopyTexImage2D', + 'CopyTexSubImage2D', + 'CullFace', + 'DeleteBuffers', + 'DeleteTextures', + 'DepthFunc', + 'DepthMask', + 'DepthRangef', + 'DepthRangex', + 'Disable', + 'DisableClientState', + 'DrawArrays', + 'DrawElements', + 'Enable', + 'EnableClientState', + 'Finish', + 'Flush', + 'Fogf', + 'Fogfv', + 'Fogx', + 'Fogxv', + 'FrontFace', + 'Frustumf', + 'Frustumx', + 'GenBuffers', + 'GenTextures', + 'GetBooleanv', + 'GetBufferParameteriv', + 'GetClipPlanef', + 'GetClipPlanex', + 'GetError', + 'GetFixedv', + 'GetFloatv', + 'GetIntegerv', + 'GetLightfv', + 'GetLightxv', + 'GetMaterialfv', + 'GetMaterialxv', + 'GetPointerv', + 'GetString', + 'GetTexEnvfv', + 'GetTexEnviv', + 'GetTexEnvxv', + 'GetTexParameterfv', + 'GetTexParameteriv', + 'GetTexParameterxv', + 'Hint', + 'IsBuffer', + 'IsEnabled', + 'IsTexture', + 'Lightf', + 'Lightfv', + 'LightModelf', + 'LightModelfv', + 'LightModelx', + 'LightModelxv', + 'Lightx', + 'Lightxv', + 'LineWidth', + 'LineWidthx', + 'LoadIdentity', + 'LoadMatrixf', + 'LoadMatrixx', + 'LogicOp', + 'Materialf', + 'Materialfv', + 'Materialx', + 'Materialxv', + 'MatrixMode', + 'MultiTexCoord4f', + 'MultiTexCoord4x', + 'MultMatrixf', + 'MultMatrixx', + 'Normal3f', + 'Normal3x', + 'NormalPointer', + 'Orthof', + 'Orthox', + 'PixelStorei', + 'PointParameterf', + 'PointParameterfv', + 'PointParameterx', + 'PointParameterxv', + 'PointSize', + 'PointSizex', + 'PolygonOffset', + 'PolygonOffsetx', + 'PopMatrix', + 'PushMatrix', + 'ReadPixels', + 'Rotatef', + 'Rotatex', + 'SampleCoverage', + 'SampleCoveragex', + 'Scalef', + 'Scalex', + 'Scissor', + 'ShadeModel', + 'StencilFunc', + 'StencilMask', + 'StencilOp', + 'TexCoordPointer', + 'TexEnvf', + 'TexEnvfv', + 'TexEnvi', + 'TexEnviv', + 'TexEnvx', + 'TexEnvxv', + 'TexImage2D', + 'TexParameterf', + 'TexParameterfv', + 'TexParameteri', + 'TexParameteriv', + 'TexParameterx', + 'TexParameterxv', + 'TexSubImage2D', + 'Translatef', + 'Translatex', + 'VertexPointer', + 'Viewport', +) + +es1_api = es1_core + ( + # GL_OES_EGL_image + 'EGLImageTargetTexture2DOES', + 'EGLImageTargetRenderbufferStorageOES', + # GL_OES_mapbuffer + 'GetBufferPointervOES', + 'MapBufferOES', + 'UnmapBufferOES', + # GL_EXT_multi_draw_arrays + 'MultiDrawArraysEXT', + 'MultiDrawElementsEXT', + # GL_OES_blend_equation_separate + 'BlendEquationSeparateOES', + # GL_OES_blend_func_separate + 'BlendFuncSeparateOES', + # GL_OES_blend_subtract + 'BlendEquationOES', + # GL_OES_draw_texture + 'DrawTexiOES', + 'DrawTexivOES', + 'DrawTexfOES', + 'DrawTexfvOES', + 'DrawTexsOES', + 'DrawTexsvOES', + 'DrawTexxOES', + 'DrawTexxvOES', + # GL_OES_fixed_point + 'AlphaFuncxOES', + 'ClearColorxOES', + 'ClearDepthxOES', + 'Color4xOES', + 'DepthRangexOES', + 'FogxOES', + 'FogxvOES', + 'FrustumxOES', + 'LightModelxOES', + 'LightModelxvOES', + 'LightxOES', + 'LightxvOES', + 'LineWidthxOES', + 'LoadMatrixxOES', + 'MaterialxOES', + 'MaterialxvOES', + 'MultiTexCoord4xOES', + 'MultMatrixxOES', + 'Normal3xOES', + 'OrthoxOES', + 'PointSizexOES', + 'PolygonOffsetxOES', + 'RotatexOES', + 'SampleCoveragexOES', + 'ScalexOES', + 'TexEnvxOES', + 'TexEnvxvOES', + 'TexParameterxOES', + 'TranslatexOES', + 'ClipPlanexOES', + 'GetClipPlanexOES', + 'GetFixedvOES', + 'GetLightxvOES', + 'GetMaterialxvOES', + 'GetTexEnvxvOES', + 'GetTexParameterxvOES', + 'PointParameterxOES', + 'PointParameterxvOES', + 'TexParameterxvOES', + # GL_OES_framebuffer_object + 'BindFramebufferOES', + 'BindRenderbufferOES', + 'CheckFramebufferStatusOES', + 'DeleteFramebuffersOES', + 'DeleteRenderbuffersOES', + 'FramebufferRenderbufferOES', + 'FramebufferTexture2DOES', + 'GenerateMipmapOES', + 'GenFramebuffersOES', + 'GenRenderbuffersOES', + 'GetFramebufferAttachmentParameterivOES', + 'GetRenderbufferParameterivOES', + 'IsFramebufferOES', + 'IsRenderbufferOES', + 'RenderbufferStorageOES', + # GL_OES_point_size_array + 'PointSizePointerOES', + # GL_OES_query_matrix + 'QueryMatrixxOES', + # GL_OES_single_precision + 'ClearDepthfOES', + 'DepthRangefOES', + 'FrustumfOES', + 'OrthofOES', + 'ClipPlanefOES', + 'GetClipPlanefOES', + # GL_OES_texture_cube_map + 'GetTexGenfvOES', + 'GetTexGenivOES', + 'GetTexGenxvOES', + 'TexGenfOES', + 'TexGenfvOES', + 'TexGeniOES', + 'TexGenivOES', + 'TexGenxOES', + 'TexGenxvOES', +) + +es2_core = ( + # OpenGL ES 2.0 + "ActiveTexture", + "AttachShader", + "BindAttribLocation", + "BindBuffer", + "BindFramebuffer", + "BindRenderbuffer", + "BindTexture", + "BlendColor", + "BlendEquation", + "BlendEquationSeparate", + "BlendFunc", + "BlendFuncSeparate", + "BufferData", + "BufferSubData", + "CheckFramebufferStatus", + "Clear", + "ClearColor", + "ClearDepthf", + "ClearStencil", + "ColorMask", + "CompileShader", + "CompressedTexImage2D", + "CompressedTexSubImage2D", + "CopyTexImage2D", + "CopyTexSubImage2D", + "CreateProgram", + "CreateShader", + "CullFace", + "DeleteBuffers", + "DeleteFramebuffers", + "DeleteProgram", + "DeleteRenderbuffers", + "DeleteShader", + "DeleteTextures", + "DepthFunc", + "DepthMask", + "DepthRangef", + "DetachShader", + "Disable", + "DisableVertexAttribArray", + "DrawArrays", + "DrawElements", + "Enable", + "EnableVertexAttribArray", + "Finish", + "Flush", + "FramebufferRenderbuffer", + "FramebufferTexture2D", + "FrontFace", + "GenBuffers", + "GenerateMipmap", + "GenFramebuffers", + "GenRenderbuffers", + "GenTextures", + "GetActiveAttrib", + "GetActiveUniform", + "GetAttachedShaders", + "GetAttribLocation", + "GetBooleanv", + "GetBufferParameteriv", + "GetError", + "GetFloatv", + "GetFramebufferAttachmentParameteriv", + "GetIntegerv", + "GetProgramInfoLog", + "GetProgramiv", + "GetRenderbufferParameteriv", + "GetShaderInfoLog", + "GetShaderiv", + "GetShaderPrecisionFormat", + "GetShaderSource", + "GetString", + "GetTexParameterfv", + "GetTexParameteriv", + "GetUniformfv", + "GetUniformiv", + "GetUniformLocation", + "GetVertexAttribfv", + "GetVertexAttribiv", + "GetVertexAttribPointerv", + "Hint", + "IsBuffer", + "IsEnabled", + "IsFramebuffer", + "IsProgram", + "IsRenderbuffer", + "IsShader", + "IsTexture", + "LineWidth", + "LinkProgram", + "PixelStorei", + "PolygonOffset", + "ReadPixels", + "ReleaseShaderCompiler", + "RenderbufferStorage", + "SampleCoverage", + "Scissor", + "ShaderBinary", + "ShaderSource", + "StencilFunc", + "StencilFuncSeparate", + "StencilMask", + "StencilMaskSeparate", + "StencilOp", + "StencilOpSeparate", + "TexImage2D", + "TexParameterf", + "TexParameterfv", + "TexParameteri", + "TexParameteriv", + "TexSubImage2D", + "Uniform1f", + "Uniform1fv", + "Uniform1i", + "Uniform1iv", + "Uniform2f", + "Uniform2fv", + "Uniform2i", + "Uniform2iv", + "Uniform3f", + "Uniform3fv", + "Uniform3i", + "Uniform3iv", + "Uniform4f", + "Uniform4fv", + "Uniform4i", + "Uniform4iv", + "UniformMatrix2fv", + "UniformMatrix3fv", + "UniformMatrix4fv", + "UseProgram", + "ValidateProgram", + "VertexAttrib1f", + "VertexAttrib1fv", + "VertexAttrib2f", + "VertexAttrib2fv", + "VertexAttrib3f", + "VertexAttrib3fv", + "VertexAttrib4f", + "VertexAttrib4fv", + "VertexAttribPointer", + "Viewport", +) + +es2_api = es2_core + ( + # GL_OES_EGL_image + 'EGLImageTargetTexture2DOES', + 'EGLImageTargetRenderbufferStorageOES', + # GL_OES_mapbuffer + 'GetBufferPointervOES', + 'MapBufferOES', + 'UnmapBufferOES', + # GL_EXT_multi_draw_arrays + 'MultiDrawArraysEXT', + 'MultiDrawElementsEXT', + # GL_OES_texture_3D + 'CompressedTexImage3DOES', + 'CompressedTexSubImage3DOES', + 'CopyTexSubImage3DOES', + 'FramebufferTexture3DOES', + 'TexImage3DOES', + 'TexSubImage3DOES', + # GL_OES_get_program_binary + 'GetProgramBinaryOES', + 'ProgramBinaryOES', +) diff --git a/src/mapi/glapi/gen/remap_helper.py b/src/mapi/glapi/gen/remap_helper.py index 69b8e5e9d02..367ae24c75c 100644 --- a/src/mapi/glapi/gen/remap_helper.py +++ b/src/mapi/glapi/gen/remap_helper.py @@ -197,22 +197,36 @@ class PrintGlRemap(gl_XML.gl_print_base): def show_usage(): - print "Usage: %s [-f input_file_name]" % sys.argv[0] + print "Usage: %s [-f input_file_name] [-c ver]" % sys.argv[0] + print " -c ver Version can be 'es1' or 'es2'." sys.exit(1) if __name__ == '__main__': file_name = "gl_API.xml" try: - (args, trail) = getopt.getopt(sys.argv[1:], "f:") + (args, trail) = getopt.getopt(sys.argv[1:], "f:c:") except Exception,e: show_usage() + es = None for (arg,val) in args: if arg == "-f": file_name = val + elif arg == "-c": + es = val api = gl_XML.parse_GL_API( file_name ) + if es is not None: + import gles_api + + api_map = { + 'es1': gles_api.es1_api, + 'es2': gles_api.es2_api, + } + + api.filter_functions(api_map[es]) + printer = PrintGlRemap() printer.Print( api ) diff --git a/src/mapi/mapi/mapi_abi.py b/src/mapi/mapi/mapi_abi.py index cb9fc0ef841..e3d3f6518ec 100644 --- a/src/mapi/mapi/mapi_abi.py +++ b/src/mapi/mapi/mapi_abi.py @@ -27,6 +27,11 @@ # Chia-I Wu <[email protected]> import sys +# make it possible to import glapi +import os +GLAPI = "./%s/../glapi/gen" % (os.path.dirname(sys.argv[0])) +sys.path.append(GLAPI) + import re from optparse import OptionParser @@ -128,9 +133,6 @@ class ABIEntry(object): def abi_parse_xml(xml): """Parse a GLAPI XML file for ABI entries.""" - import os - GLAPI = "./%s/../glapi/gen" % (os.path.dirname(sys.argv[0])) - sys.path.append(GLAPI) import gl_XML, glX_XML api = gl_XML.parse_GL_API(xml, glX_XML.glx_item_factory()) @@ -749,255 +751,7 @@ class ES1APIPrinter(GLAPIPrinter): """OpenGL ES 1.x API Printer""" def __init__(self, entries): - es1_api = [ - # OpenGL ES 1.1 - 'ActiveTexture', - 'AlphaFunc', - 'AlphaFuncx', - 'BindBuffer', - 'BindTexture', - 'BlendFunc', - 'BufferData', - 'BufferSubData', - 'Clear', - 'ClearColor', - 'ClearColorx', - 'ClearDepthf', - 'ClearDepthx', - 'ClearStencil', - 'ClientActiveTexture', - 'ClipPlanef', - 'ClipPlanex', - 'Color4f', - 'Color4ub', - 'Color4x', - 'ColorMask', - 'ColorPointer', - 'CompressedTexImage2D', - 'CompressedTexSubImage2D', - 'CopyTexImage2D', - 'CopyTexSubImage2D', - 'CullFace', - 'DeleteBuffers', - 'DeleteTextures', - 'DepthFunc', - 'DepthMask', - 'DepthRangef', - 'DepthRangex', - 'Disable', - 'DisableClientState', - 'DrawArrays', - 'DrawElements', - 'Enable', - 'EnableClientState', - 'Finish', - 'Flush', - 'Fogf', - 'Fogfv', - 'Fogx', - 'Fogxv', - 'FrontFace', - 'Frustumf', - 'Frustumx', - 'GenBuffers', - 'GenTextures', - 'GetBooleanv', - 'GetBufferParameteriv', - 'GetClipPlanef', - 'GetClipPlanex', - 'GetError', - 'GetFixedv', - 'GetFloatv', - 'GetIntegerv', - 'GetLightfv', - 'GetLightxv', - 'GetMaterialfv', - 'GetMaterialxv', - 'GetPointerv', - 'GetString', - 'GetTexEnvfv', - 'GetTexEnviv', - 'GetTexEnvxv', - 'GetTexParameterfv', - 'GetTexParameteriv', - 'GetTexParameterxv', - 'Hint', - 'IsBuffer', - 'IsEnabled', - 'IsTexture', - 'Lightf', - 'Lightfv', - 'LightModelf', - 'LightModelfv', - 'LightModelx', - 'LightModelxv', - 'Lightx', - 'Lightxv', - 'LineWidth', - 'LineWidthx', - 'LoadIdentity', - 'LoadMatrixf', - 'LoadMatrixx', - 'LogicOp', - 'Materialf', - 'Materialfv', - 'Materialx', - 'Materialxv', - 'MatrixMode', - 'MultiTexCoord4f', - 'MultiTexCoord4x', - 'MultMatrixf', - 'MultMatrixx', - 'Normal3f', - 'Normal3x', - 'NormalPointer', - 'Orthof', - 'Orthox', - 'PixelStorei', - 'PointParameterf', - 'PointParameterfv', - 'PointParameterx', - 'PointParameterxv', - 'PointSize', - 'PointSizex', - 'PolygonOffset', - 'PolygonOffsetx', - 'PopMatrix', - 'PushMatrix', - 'ReadPixels', - 'Rotatef', - 'Rotatex', - 'SampleCoverage', - 'SampleCoveragex', - 'Scalef', - 'Scalex', - 'Scissor', - 'ShadeModel', - 'StencilFunc', - 'StencilMask', - 'StencilOp', - 'TexCoordPointer', - 'TexEnvf', - 'TexEnvfv', - 'TexEnvi', - 'TexEnviv', - 'TexEnvx', - 'TexEnvxv', - 'TexImage2D', - 'TexParameterf', - 'TexParameterfv', - 'TexParameteri', - 'TexParameteriv', - 'TexParameterx', - 'TexParameterxv', - 'TexSubImage2D', - 'Translatef', - 'Translatex', - 'VertexPointer', - 'Viewport', - # GL_OES_EGL_image - 'EGLImageTargetTexture2DOES', - 'EGLImageTargetRenderbufferStorageOES', - # GL_OES_mapbuffer - 'GetBufferPointervOES', - 'MapBufferOES', - 'UnmapBufferOES', - # GL_EXT_multi_draw_arrays - 'MultiDrawArraysEXT', - 'MultiDrawElementsEXT', - # GL_OES_blend_equation_separate - 'BlendEquationSeparateOES', - # GL_OES_blend_func_separate - 'BlendFuncSeparateOES', - # GL_OES_blend_subtract - 'BlendEquationOES', - # GL_OES_draw_texture - 'DrawTexiOES', - 'DrawTexivOES', - 'DrawTexfOES', - 'DrawTexfvOES', - 'DrawTexsOES', - 'DrawTexsvOES', - 'DrawTexxOES', - 'DrawTexxvOES', - # GL_OES_fixed_point - 'AlphaFuncxOES', - 'ClearColorxOES', - 'ClearDepthxOES', - 'Color4xOES', - 'DepthRangexOES', - 'FogxOES', - 'FogxvOES', - 'FrustumxOES', - 'LightModelxOES', - 'LightModelxvOES', - 'LightxOES', - 'LightxvOES', - 'LineWidthxOES', - 'LoadMatrixxOES', - 'MaterialxOES', - 'MaterialxvOES', - 'MultiTexCoord4xOES', - 'MultMatrixxOES', - 'Normal3xOES', - 'OrthoxOES', - 'PointSizexOES', - 'PolygonOffsetxOES', - 'RotatexOES', - 'SampleCoveragexOES', - 'ScalexOES', - 'TexEnvxOES', - 'TexEnvxvOES', - 'TexParameterxOES', - 'TranslatexOES', - 'ClipPlanexOES', - 'GetClipPlanexOES', - 'GetFixedvOES', - 'GetLightxvOES', - 'GetMaterialxvOES', - 'GetTexEnvxvOES', - 'GetTexParameterxvOES', - 'PointParameterxOES', - 'PointParameterxvOES', - 'TexParameterxvOES', - # GL_OES_framebuffer_object - 'BindFramebufferOES', - 'BindRenderbufferOES', - 'CheckFramebufferStatusOES', - 'DeleteFramebuffersOES', - 'DeleteRenderbuffersOES', - 'FramebufferRenderbufferOES', - 'FramebufferTexture2DOES', - 'GenerateMipmapOES', - 'GenFramebuffersOES', - 'GenRenderbuffersOES', - 'GetFramebufferAttachmentParameterivOES', - 'GetRenderbufferParameterivOES', - 'IsFramebufferOES', - 'IsRenderbufferOES', - 'RenderbufferStorageOES', - # GL_OES_point_size_array - 'PointSizePointerOES', - # GL_OES_query_matrix - 'QueryMatrixxOES', - # GL_OES_single_precision - 'ClearDepthfOES', - 'DepthRangefOES', - 'FrustumfOES', - 'OrthofOES', - 'ClipPlanefOES', - 'GetClipPlanefOES', - # GL_OES_texture_cube_map - 'GetTexGenfvOES', - 'GetTexGenivOES', - 'GetTexGenxvOES', - 'TexGenfOES', - 'TexGenfvOES', - 'TexGeniOES', - 'TexGenivOES', - 'TexGenxOES', - 'TexGenxvOES', - ] + from gles_api import es1_api super(ES1APIPrinter, self).__init__(entries, es1_api) self.prefix_lib = 'gl' @@ -1016,171 +770,7 @@ class ES2APIPrinter(GLAPIPrinter): """OpenGL ES 2.x API Printer""" def __init__(self, entries): - es2_api = [ - # OpenGL ES 2.0 - "ActiveTexture", - "AttachShader", - "BindAttribLocation", - "BindBuffer", - "BindFramebuffer", - "BindRenderbuffer", - "BindTexture", - "BlendColor", - "BlendEquation", - "BlendEquationSeparate", - "BlendFunc", - "BlendFuncSeparate", - "BufferData", - "BufferSubData", - "CheckFramebufferStatus", - "Clear", - "ClearColor", - "ClearDepthf", - "ClearStencil", - "ColorMask", - "CompileShader", - "CompressedTexImage2D", - "CompressedTexSubImage2D", - "CopyTexImage2D", - "CopyTexSubImage2D", - "CreateProgram", - "CreateShader", - "CullFace", - "DeleteBuffers", - "DeleteFramebuffers", - "DeleteProgram", - "DeleteRenderbuffers", - "DeleteShader", - "DeleteTextures", - "DepthFunc", - "DepthMask", - "DepthRangef", - "DetachShader", - "Disable", - "DisableVertexAttribArray", - "DrawArrays", - "DrawElements", - "Enable", - "EnableVertexAttribArray", - "Finish", - "Flush", - "FramebufferRenderbuffer", - "FramebufferTexture2D", - "FrontFace", - "GenBuffers", - "GenerateMipmap", - "GenFramebuffers", - "GenRenderbuffers", - "GenTextures", - "GetActiveAttrib", - "GetActiveUniform", - "GetAttachedShaders", - "GetAttribLocation", - "GetBooleanv", - "GetBufferParameteriv", - "GetError", - "GetFloatv", - "GetFramebufferAttachmentParameteriv", - "GetIntegerv", - "GetProgramInfoLog", - "GetProgramiv", - "GetRenderbufferParameteriv", - "GetShaderInfoLog", - "GetShaderiv", - "GetShaderPrecisionFormat", - "GetShaderSource", - "GetString", - "GetTexParameterfv", - "GetTexParameteriv", - "GetUniformfv", - "GetUniformiv", - "GetUniformLocation", - "GetVertexAttribfv", - "GetVertexAttribiv", - "GetVertexAttribPointerv", - "Hint", - "IsBuffer", - "IsEnabled", - "IsFramebuffer", - "IsProgram", - "IsRenderbuffer", - "IsShader", - "IsTexture", - "LineWidth", - "LinkProgram", - "PixelStorei", - "PolygonOffset", - "ReadPixels", - "ReleaseShaderCompiler", - "RenderbufferStorage", - "SampleCoverage", - "Scissor", - "ShaderBinary", - "ShaderSource", - "StencilFunc", - "StencilFuncSeparate", - "StencilMask", - "StencilMaskSeparate", - "StencilOp", - "StencilOpSeparate", - "TexImage2D", - "TexParameterf", - "TexParameterfv", - "TexParameteri", - "TexParameteriv", - "TexSubImage2D", - "Uniform1f", - "Uniform1fv", - "Uniform1i", - "Uniform1iv", - "Uniform2f", - "Uniform2fv", - "Uniform2i", - "Uniform2iv", - "Uniform3f", - "Uniform3fv", - "Uniform3i", - "Uniform3iv", - "Uniform4f", - "Uniform4fv", - "Uniform4i", - "Uniform4iv", - "UniformMatrix2fv", - "UniformMatrix3fv", - "UniformMatrix4fv", - "UseProgram", - "ValidateProgram", - "VertexAttrib1f", - "VertexAttrib1fv", - "VertexAttrib2f", - "VertexAttrib2fv", - "VertexAttrib3f", - "VertexAttrib3fv", - "VertexAttrib4f", - "VertexAttrib4fv", - "VertexAttribPointer", - "Viewport", - # GL_OES_EGL_image - 'EGLImageTargetTexture2DOES', - 'EGLImageTargetRenderbufferStorageOES', - # GL_OES_mapbuffer - 'GetBufferPointervOES', - 'MapBufferOES', - 'UnmapBufferOES', - # GL_EXT_multi_draw_arrays - 'MultiDrawArraysEXT', - 'MultiDrawElementsEXT', - # GL_OES_texture_3D - 'CompressedTexImage3DOES', - 'CompressedTexSubImage3DOES', - 'CopyTexSubImage3DOES', - 'FramebufferTexture3DOES', - 'TexImage3DOES', - 'TexSubImage3DOES', - # GL_OES_get_program_binary - 'GetProgramBinaryOES', - 'ProgramBinaryOES', - ] + from gles_api import es2_api super(ES2APIPrinter, self).__init__(entries, es2_api) self.prefix_lib = 'gl' diff --git a/src/mapi/shared-glapi/Makefile b/src/mapi/shared-glapi/Makefile index c928f822c81..3de864d891c 100644 --- a/src/mapi/shared-glapi/Makefile +++ b/src/mapi/shared-glapi/Makefile @@ -34,17 +34,16 @@ $(glapi_OBJECTS): %.o: $(MAPI)/%.c $(glapi_SOURCES): glapi_mapi_tmp.h -.PHONY: glapi_mapi_tmp.h -glapi_mapi_tmp.h: - @$(MAKE) -C $(GLAPI)/gen-es shared-glapi +include $(GLAPI)/gen/glapi_gen.mk +glapi_mapi_tmp.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps) + $(call glapi_gen_mapi,$<,shared-glapi) .PHONY: clean clean: -rm -f $(TOP)/$(LIB_DIR)/$(GLAPI_LIB_NAME) -rm -f $(glapi_OBJECTS) -rm -f depend depend.bak - @# clean generated sources/headers - @$(MAKE) -C $(GLAPI)/gen-es clean-shared-glapi + -rm -f glapi_mapi_tmp.h install: $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR) diff --git a/src/mesa/Android.gen.mk b/src/mesa/Android.gen.mk new file mode 100644 index 00000000000..2a08184aee6 --- /dev/null +++ b/src/mesa/Android.gen.mk @@ -0,0 +1,131 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# included by core mesa Android.mk for source generation + +ifeq ($(LOCAL_MODULE_CLASS),) +LOCAL_MODULE_CLASS := STATIC_LIBRARIES +endif + +intermediates := $(call local-intermediates-dir) + +sources := \ + main/api_exec_es1.c \ + main/api_exec_es1_dispatch.h \ + main/api_exec_es1_remap_helper.h \ + main/api_exec_es2.c \ + main/api_exec_es2_dispatch.h \ + main/api_exec_es2_remap_helper.h \ + program/lex.yy.c \ + program/program_parse.tab.c + +LOCAL_SRC_FILES := $(filter-out $(sources), $(LOCAL_SRC_FILES)) + +LOCAL_C_INCLUDES += $(intermediates)/main + +ifeq ($(strip $(MESA_ENABLE_ASM)),true) +ifeq ($(TARGET_ARCH),x86) +sources += x86/matypes.h +LOCAL_C_INCLUDES += $(intermediates)/x86 +endif +endif + +sources += main/git_sha1.h + +sources := $(addprefix $(intermediates)/, $(sources)) +LOCAL_GENERATED_SOURCES += $(sources) + +glapi := $(MESA_TOP)/src/mapi/glapi/gen + +es_src_deps := \ + $(LOCAL_PATH)/main/APIspec.xml \ + $(LOCAL_PATH)/main/es_generator.py \ + $(LOCAL_PATH)/main/APIspecutil.py \ + $(LOCAL_PATH)/main/APIspec.py + +es_hdr_deps := \ + $(wildcard $(glapi)/*.py) \ + $(wildcard $(glapi)/*.xml) + +define es-gen + @mkdir -p $(dir $@) + @echo "Gen ES: $(PRIVATE_MODULE) <= $(notdir $(@))" + $(hide) $(PRIVATE_SCRIPT) $(1) $(PRIVATE_XML) > $@ +endef + +define local-l-to-c + @mkdir -p $(dir $@) + @echo "Mesa Lex: $(PRIVATE_MODULE) <= $<" + $(hide) $(LEX) -o$@ $< +endef + +define local-y-to-c-and-h + @mkdir -p $(dir $@) + @echo "Mesa Yacc: $(PRIVATE_MODULE) <= $<" + $(hide) $(YACC) -o $@ $< +endef + +$(intermediates)/main/api_exec_%.c: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/main/es_generator.py +$(intermediates)/main/api_exec_%.c: PRIVATE_XML := -S $(LOCAL_PATH)/main/APIspec.xml +$(intermediates)/main/api_exec_%_dispatch.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(glapi)/gl_table.py +$(intermediates)/main/api_exec_%_dispatch.h: PRIVATE_XML := -f $(glapi)/gl_and_es_API.xml +$(intermediates)/main/api_exec_%_remap_helper.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(glapi)/remap_helper.py +$(intermediates)/main/api_exec_%_remap_helper.h: PRIVATE_XML := -f $(glapi)/gl_and_es_API.xml + +$(intermediates)/main/api_exec_es1.c: $(es_src_deps) + $(call es-gen,-V GLES1.1) + +$(intermediates)/main/api_exec_es2.c: $(es_src_deps) + $(call es-gen,-V GLES2.0) + +$(intermediates)/main/api_exec_%_dispatch.h: $(es_hdr_deps) + $(call es-gen, -c $* -m remap_table) + +$(intermediates)/main/api_exec_%_remap_helper.h: $(es_hdr_deps) + $(call es-gen, -c $*) + +$(intermediates)/program/program_parse.tab.c: $(LOCAL_PATH)/program/program_parse.y + $(local-y-to-c-and-h) + +$(intermediates)/program/lex.yy.c: $(LOCAL_PATH)/program/program_lexer.l + $(local-l-to-c) + +$(intermediates)/main/git_sha1.h: + @mkdir -p $(dir $@) + @echo "GIT-SHA1: $(PRIVATE_MODULE) <= git" + $(hide) touch $@ + $(hide) if which git > /dev/null; then \ + git --git-dir $(PRIVATE_PATH)/../../.git log -n 1 --oneline | \ + sed 's/^\([^ ]*\) .*/#define MESA_GIT_SHA1 "git-\1"/' \ + > $@; \ + fi + +matypes_deps := \ + $(BUILD_OUT_EXECUTABLES)/mesa_gen_matypes$(BUILD_EXECUTABLE_SUFFIX) \ + $(LOCAL_PATH)/main/mtypes.h \ + $(LOCAL_PATH)/tnl/t_context.h + +$(intermediates)/x86/matypes.h: $(matypes_deps) + @mkdir -p $(dir $@) + @echo "MATYPES: $(PRIVATE_MODULE) <= $(notdir $@)" + $(hide) $< > $@ diff --git a/src/mesa/Android.mk b/src/mesa/Android.mk new file mode 100644 index 00000000000..67808d491ac --- /dev/null +++ b/src/mesa/Android.mk @@ -0,0 +1,115 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Android.mk for core mesa + +LOCAL_PATH := $(call my-dir) + +include $(LOCAL_PATH)/sources.mak + +common_CFLAGS := \ + -DFEATURE_ES1=1 \ + -DFEATURE_ES2=1 + +common_C_INCLUDES := \ + $(MESA_TOP)/src/mapi \ + $(MESA_TOP)/src/glsl + +common_ASM := + +# --------------------------------------- +# Build mesa_gen_matypes for host +# --------------------------------------- + +ifeq ($(strip $(MESA_ENABLE_ASM)),true) +ifeq ($(TARGET_ARCH),x86) +common_ASM += $(X86_SOURCES) + +include $(CLEAR_VARS) +LOCAL_SRC_FILES := x86/gen_matypes.c +LOCAL_CFLAGS := $(common_CFLAGS) +LOCAL_C_INCLUDES := $(common_C_INCLUDES) +LOCAL_MODULE := mesa_gen_matypes +include $(MESA_COMMON_MK) +include $(BUILD_HOST_EXECUTABLE) + +endif # x86 +endif # MESA_ENABLE_ASM + +# --------------------------------------- +# Build libmesa_st_mesa +# --------------------------------------- + +ifeq ($(strip $(MESA_BUILD_GALLIUM)),true) +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + $(MESA_GALLIUM_SOURCES) \ + $(MESA_GALLIUM_CXX_SOURCES) \ + $(common_ASM) + +LOCAL_CFLAGS := $(common_CFLAGS) + +LOCAL_C_INCLUDES := \ + $(common_C_INCLUDES) \ + $(MESA_TOP)/src/gallium/include \ + $(MESA_TOP)/src/gallium/auxiliary + +LOCAL_MODULE := libmesa_st_mesa + +include $(LOCAL_PATH)/Android.gen.mk +include $(MESA_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) +endif # MESA_BUILD_GALLIUM + +# --------------------------------------- +# Build libmesa_glsl_utils +# +# It is used to avoid circular dependency between core mesa and glsl. +# --------------------------------------- + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + program/hash_table.c \ + program/symbol_table.c + +LOCAL_MODULE := libmesa_glsl_utils + +include $(MESA_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) + +# --------------------------------------- +# Build libmesa_glsl_utils for host +# --------------------------------------- + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + program/hash_table.c \ + program/symbol_table.c + +LOCAL_MODULE := libmesa_glsl_utils + +include $(MESA_COMMON_MK) +include $(BUILD_HOST_STATIC_LIBRARY) diff --git a/src/mesa/Makefile b/src/mesa/Makefile index a903a260ac9..0e15d61bd8d 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -12,11 +12,10 @@ DRICORE_OBJ_DIR := objs-dricore include sources.mak # adjust object dirs +DRICORE_OBJECTS := $(addprefix $(DRICORE_OBJ_DIR)/, $(MESA_OBJECTS)) MESA_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_OBJECTS)) MESA_GALLIUM_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_GALLIUM_OBJECTS)) -DRICORE_OBJECTS := $(addprefix $(DRICORE_OBJ_DIR)/, $(MESA_OBJECTS)) - # define preprocessor flags MESA_CPPFLAGS := $(API_DEFINES) $(DEFINES) @@ -68,6 +67,26 @@ $(DRICORE_OBJ_DIR)/%.o: %.S # then convenience libs (.a) and finally the device drivers: default: $(DEPENDS) asm_subdirs $(MESA_LIBS) $(DRICORE_LIBS) driver_subdirs +# include glapi_gen.mk for generating glapi headers for GLES +GLAPI := $(TOP)/src/mapi/glapi/gen +include $(GLAPI)/glapi_gen.mk + +main/api_exec_es1_dispatch.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_dispatch_deps) + $(call glapi_gen_dispatch,$<,es1) + +main/api_exec_es1_remap_helper.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_remap_deps) + $(call glapi_gen_remap,$<,es1) + +main/api_exec_es1.o: main/api_exec_es1_dispatch.h main/api_exec_es1_remap_helper.h + +main/api_exec_es2_dispatch.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_dispatch_deps) + $(call glapi_gen_dispatch,$<,es2) + +main/api_exec_es2_remap_helper.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_remap_deps) + $(call glapi_gen_remap,$<,es2) + +main/api_exec_es2.o: main/api_exec_es2_dispatch.h main/api_exec_es2_remap_helper.h + main/api_exec_es1.c: main/APIspec.xml main/es_generator.py main/APIspecutil.py main/APIspec.py $(PYTHON2) $(PYTHON_FLAGS) main/es_generator.py -S main/APIspec.xml -V GLES1.1 > $@ @@ -124,6 +143,8 @@ depend: $(ALL_SOURCES) @ touch depend @$(MKDEP) $(MKDEP_OPTIONS) -p$(MESA_OBJ_DIR)/ $(MESA_CPPFLAGS) \ $(ALL_SOURCES) > /dev/null 2>/dev/null + @$(MKDEP) $(MKDEP_OPTIONS) -a -p$(DRICORE_OBJ_DIR)/ $(MESA_CPPFLAGS) \ + $(ALL_SOURCES) > /dev/null 2>/dev/null ###################################################################### # Installation rules diff --git a/src/mesa/SConscript b/src/mesa/SConscript index 24e2155c387..b0c3334fa48 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -264,6 +264,7 @@ statetracker_sources = [ 'state_tracker/st_draw_feedback.c', 'state_tracker/st_extensions.c', 'state_tracker/st_format.c', + 'state_tracker/st_glsl_to_tgsi.cpp', 'state_tracker/st_gen_mipmap.c', 'state_tracker/st_manager.c', 'state_tracker/st_mesa_to_tgsi.c', @@ -292,6 +293,7 @@ program_sources = [ 'program/prog_instruction.c', 'program/prog_noise.c', 'program/prog_optimize.c', + 'program/prog_opt_constant_fold.c', 'program/prog_parameter.c', 'program/prog_parameter_layout.c', 'program/prog_print.c', @@ -346,28 +348,28 @@ if env['gles']: GLAPI = '#src/mapi/glapi/' gles_headers = [] gles_headers += env.CodeGenerate( - target = 'es1api/main/dispatch.h', + target = 'main/api_exec_es1_dispatch.h', script = GLAPI + 'gen/gl_table.py', - source = GLAPI + 'gen-es/es1_API.xml', - command = python_cmd + ' $SCRIPT -c -m remap_table -f $SOURCE > $TARGET', + source = GLAPI + 'gen/gl_and_es_API.xml', + command = python_cmd + ' $SCRIPT -c es1 -m remap_table -f $SOURCE > $TARGET', ) gles_headers += env.CodeGenerate( - target = 'es1api/main/remap_helper.h', + target = 'main/api_exec_es1_remap_helper.h', script = GLAPI + 'gen/remap_helper.py', - source = GLAPI + 'gen-es/es1_API.xml', - command = python_cmd + ' $SCRIPT -f $SOURCE > $TARGET', + source = GLAPI + 'gen/gl_and_es_API.xml', + command = python_cmd + ' $SCRIPT -c es1 -f $SOURCE > $TARGET', ) gles_headers += env.CodeGenerate( - target = 'es2api/main/dispatch.h', + target = 'main/api_exec_es2_dispatch.h', script = GLAPI + 'gen/gl_table.py', - source = GLAPI + 'gen-es/es2_API.xml', - command = python_cmd + ' $SCRIPT -c -m remap_table -f $SOURCE > $TARGET', + source = GLAPI + 'gen/gl_and_es_API.xml', + command = python_cmd + ' $SCRIPT -c es2 -m remap_table -f $SOURCE > $TARGET', ) gles_headers += env.CodeGenerate( - target = 'es2api/main/remap_helper.h', + target = 'main/api_exec_es2_remap_helper.h', script = GLAPI + 'gen/remap_helper.py', - source = GLAPI + 'gen-es/es2_API.xml', - command = python_cmd + ' $SCRIPT -f $SOURCE > $TARGET', + source = GLAPI + 'gen/gl_and_es_API.xml', + command = python_cmd + ' $SCRIPT -c es2 -f $SOURCE > $TARGET', ) env.Depends(gles_sources, gles_headers) diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 8ab129dd73d..a6174ee2f56 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -95,8 +95,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->TexSubImage2D = _mesa_store_texsubimage2d; driver->TexSubImage3D = _mesa_store_texsubimage3d; driver->GetTexImage = _mesa_get_teximage; - driver->CopyTexImage1D = _mesa_meta_CopyTexImage1D; - driver->CopyTexImage2D = _mesa_meta_CopyTexImage2D; driver->CopyTexSubImage1D = _mesa_meta_CopyTexSubImage1D; driver->CopyTexSubImage2D = _mesa_meta_CopyTexSubImage2D; driver->CopyTexSubImage3D = _mesa_meta_CopyTexSubImage3D; @@ -250,10 +248,10 @@ _mesa_init_driver_state(struct gl_context *ctx) GLuint i; for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) { ctx->Driver.ColorMaskIndexed(ctx, i, - ctx->Color.ColorMask[0][RCOMP], - ctx->Color.ColorMask[0][GCOMP], - ctx->Color.ColorMask[0][BCOMP], - ctx->Color.ColorMask[0][ACOMP]); + ctx->Color.ColorMask[i][RCOMP], + ctx->Color.ColorMask[i][GCOMP], + ctx->Color.ColorMask[i][BCOMP], + ctx->Color.ColorMask[i][ACOMP]); } } else { @@ -288,7 +286,10 @@ _mesa_init_driver_state(struct gl_context *ctx) ctx->Driver.Enable(ctx, GL_TEXTURE_CUBE_MAP, GL_FALSE); ctx->Driver.Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color); - ctx->Driver.Fogfv(ctx, GL_FOG_MODE, 0); + { + GLfloat mode = (GLfloat) ctx->Fog.Mode; + ctx->Driver.Fogfv(ctx, GL_FOG_MODE, &mode); + } ctx->Driver.Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density); ctx->Driver.Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start); ctx->Driver.Fogfv(ctx, GL_FOG_END, &ctx->Fog.End); diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 0e58aeca3f5..291d912121b 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -62,6 +62,7 @@ #include "main/teximage.h" #include "main/texparam.h" #include "main/texstate.h" +#include "main/uniforms.h" #include "main/varray.h" #include "main/viewport.h" #include "program/program.h" @@ -72,63 +73,36 @@ /** Return offset in bytes of the field within a vertex struct */ #define OFFSET(FIELD) ((void *) offsetof(struct vertex, FIELD)) - -/** - * Flags passed to _mesa_meta_begin(). - */ -/*@{*/ -#define META_ALL ~0x0 -#define META_ALPHA_TEST 0x1 -#define META_BLEND 0x2 /**< includes logicop */ -#define META_COLOR_MASK 0x4 -#define META_DEPTH_TEST 0x8 -#define META_FOG 0x10 -#define META_PIXEL_STORE 0x20 -#define META_PIXEL_TRANSFER 0x40 -#define META_RASTERIZATION 0x80 -#define META_SCISSOR 0x100 -#define META_SHADER 0x200 -#define META_STENCIL_TEST 0x400 -#define META_TRANSFORM 0x800 /**< modelview, projection, clip planes */ -#define META_TEXTURE 0x1000 -#define META_VERTEX 0x2000 -#define META_VIEWPORT 0x4000 -#define META_CLAMP_FRAGMENT_COLOR 0x8000 -#define META_CLAMP_VERTEX_COLOR 0x10000 -#define META_CONDITIONAL_RENDER 0x20000 -/*@}*/ - - /** * State which we may save/restore across meta ops. * XXX this may be incomplete... */ struct save_state { - GLbitfield SavedState; /**< bitmask of META_* flags */ + GLbitfield SavedState; /**< bitmask of MESA_META_* flags */ - /** META_ALPHA_TEST */ + /** MESA_META_ALPHA_TEST */ GLboolean AlphaEnabled; GLenum AlphaFunc; GLclampf AlphaRef; - /** META_BLEND */ + /** MESA_META_BLEND */ GLbitfield BlendEnabled; GLboolean ColorLogicOpEnabled; - /** META_COLOR_MASK */ + /** MESA_META_COLOR_MASK */ GLubyte ColorMask[MAX_DRAW_BUFFERS][4]; - /** META_DEPTH_TEST */ + /** MESA_META_DEPTH_TEST */ struct gl_depthbuffer_attrib Depth; - /** META_FOG */ + /** MESA_META_FOG */ GLboolean Fog; - /** META_PIXEL_STORE */ + /** MESA_META_PIXEL_STORE */ struct gl_pixelstore_attrib Pack, Unpack; - /** META_PIXEL_TRANSFER */ + /** MESA_META_PIXEL_TRANSFER */ GLfloat RedBias, RedScale; GLfloat GreenBias, GreenScale; GLfloat BlueBias, BlueScale; @@ -136,17 +110,17 @@ struct save_state GLfloat DepthBias, DepthScale; GLboolean MapColorFlag; - /** META_RASTERIZATION */ + /** MESA_META_RASTERIZATION */ GLenum FrontPolygonMode, BackPolygonMode; GLboolean PolygonOffset; GLboolean PolygonSmooth; GLboolean PolygonStipple; GLboolean PolygonCull; - /** META_SCISSOR */ + /** MESA_META_SCISSOR */ struct gl_scissor_attrib Scissor; - /** META_SHADER */ + /** MESA_META_SHADER */ GLboolean VertexProgramEnabled; struct gl_vertex_program *VertexProgram; GLboolean FragmentProgramEnabled; @@ -156,17 +130,19 @@ struct save_state struct gl_shader_program *FragmentShader; struct gl_shader_program *ActiveShader; - /** META_STENCIL_TEST */ + /** MESA_META_STENCIL_TEST */ struct gl_stencil_attrib Stencil; - /** META_TRANSFORM */ + /** MESA_META_TRANSFORM */ GLenum MatrixMode; GLfloat ModelviewMatrix[16]; GLfloat ProjectionMatrix[16]; GLfloat TextureMatrix[16]; + + /** MESA_META_CLIP */ GLbitfield ClipPlanesEnabled; - /** META_TEXTURE */ + /** MESA_META_TEXTURE */ GLuint ActiveUnit; GLuint ClientActiveUnit; /** for unit[0] only */ @@ -176,21 +152,21 @@ struct save_state GLbitfield TexGenEnabled[MAX_TEXTURE_UNITS]; GLuint EnvMode; /* unit[0] only */ - /** META_VERTEX */ + /** MESA_META_VERTEX */ struct gl_array_object *ArrayObj; struct gl_buffer_object *ArrayBufferObj; - /** META_VIEWPORT */ + /** MESA_META_VIEWPORT */ GLint ViewportX, ViewportY, ViewportW, ViewportH; GLclampd DepthNear, DepthFar; - /** META_CLAMP_FRAGMENT_COLOR */ + /** MESA_META_CLAMP_FRAGMENT_COLOR */ GLenum ClampFragmentColor; - /** META_CLAMP_VERTEX_COLOR */ + /** MESA_META_CLAMP_VERTEX_COLOR */ GLenum ClampVertexColor; - /** META_CONDITIONAL_RENDER */ + /** MESA_META_CONDITIONAL_RENDER */ struct gl_query_object *CondRenderQuery; GLenum CondRenderMode; @@ -235,6 +211,8 @@ struct clear_state { GLuint ArrayObj; GLuint VBO; + GLuint ShaderProg; + GLint ColorLocation; }; @@ -336,10 +314,10 @@ _mesa_meta_free(struct gl_context *ctx) * Enter meta state. This is like a light-weight version of glPushAttrib * but it also resets most GL state back to default values. * - * \param state bitmask of META_* flags indicating which attribute groups + * \param state bitmask of MESA_META_* flags indicating which attribute groups * to save and reset to their defaults */ -static void +void _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) { struct save_state *save; @@ -351,7 +329,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) memset(save, 0, sizeof(*save)); save->SavedState = state; - if (state & META_ALPHA_TEST) { + if (state & MESA_META_ALPHA_TEST) { save->AlphaEnabled = ctx->Color.AlphaEnabled; save->AlphaFunc = ctx->Color.AlphaFunc; save->AlphaRef = ctx->Color.AlphaRef; @@ -359,7 +337,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) _mesa_set_enable(ctx, GL_ALPHA_TEST, GL_FALSE); } - if (state & META_BLEND) { + if (state & MESA_META_BLEND) { save->BlendEnabled = ctx->Color.BlendEnabled; if (ctx->Color.BlendEnabled) { if (ctx->Extensions.EXT_draw_buffers2) { @@ -377,7 +355,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) _mesa_set_enable(ctx, GL_COLOR_LOGIC_OP, GL_FALSE); } - if (state & META_COLOR_MASK) { + if (state & MESA_META_COLOR_MASK) { memcpy(save->ColorMask, ctx->Color.ColorMask, sizeof(ctx->Color.ColorMask)); if (!ctx->Color.ColorMask[0][0] || @@ -387,26 +365,26 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); } - if (state & META_DEPTH_TEST) { + if (state & MESA_META_DEPTH_TEST) { save->Depth = ctx->Depth; /* struct copy */ if (ctx->Depth.Test) _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_FALSE); } - if (state & META_FOG) { + if (state & MESA_META_FOG) { save->Fog = ctx->Fog.Enabled; if (ctx->Fog.Enabled) _mesa_set_enable(ctx, GL_FOG, GL_FALSE); } - if (state & META_PIXEL_STORE) { + if (state & MESA_META_PIXEL_STORE) { save->Pack = ctx->Pack; save->Unpack = ctx->Unpack; ctx->Pack = ctx->DefaultPacking; ctx->Unpack = ctx->DefaultPacking; } - if (state & META_PIXEL_TRANSFER) { + if (state & MESA_META_PIXEL_TRANSFER) { save->RedScale = ctx->Pixel.RedScale; save->RedBias = ctx->Pixel.RedBias; save->GreenScale = ctx->Pixel.GreenScale; @@ -429,7 +407,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) ctx->NewState |=_NEW_PIXEL; } - if (state & META_RASTERIZATION) { + if (state & MESA_META_RASTERIZATION) { save->FrontPolygonMode = ctx->Polygon.FrontMode; save->BackPolygonMode = ctx->Polygon.BackMode; save->PolygonOffset = ctx->Polygon.OffsetFill; @@ -443,12 +421,12 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) _mesa_set_enable(ctx, GL_CULL_FACE, GL_FALSE); } - if (state & META_SCISSOR) { + if (state & MESA_META_SCISSOR) { save->Scissor = ctx->Scissor; /* struct copy */ _mesa_set_enable(ctx, GL_SCISSOR_TEST, GL_FALSE); } - if (state & META_SHADER) { + if (state & MESA_META_SHADER) { if (ctx->Extensions.ARB_vertex_program) { save->VertexProgramEnabled = ctx->VertexProgram.Enabled; _mesa_reference_vertprog(ctx, &save->VertexProgram, @@ -477,14 +455,14 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) } } - if (state & META_STENCIL_TEST) { + if (state & MESA_META_STENCIL_TEST) { save->Stencil = ctx->Stencil; /* struct copy */ if (ctx->Stencil.Enabled) _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_FALSE); /* NOTE: other stencil state not reset */ } - if (state & META_TEXTURE) { + if (state & MESA_META_TEXTURE) { GLuint u, tgt; save->ActiveUnit = ctx->Texture.CurrentUnit; @@ -523,7 +501,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); } - if (state & META_TRANSFORM) { + if (state & MESA_META_TRANSFORM) { GLuint activeTexture = ctx->Texture.CurrentUnit; memcpy(save->ModelviewMatrix, ctx->ModelviewMatrixStack.Top->m, 16 * sizeof(GLfloat)); @@ -544,6 +522,9 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) _mesa_Ortho(0.0, ctx->DrawBuffer->Width, 0.0, ctx->DrawBuffer->Height, -1.0, 1.0); + } + + if (state & MESA_META_CLIP) { save->ClipPlanesEnabled = ctx->Transform.ClipPlanesEnabled; if (ctx->Transform.ClipPlanesEnabled) { GLuint i; @@ -553,7 +534,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) } } - if (state & META_VERTEX) { + if (state & MESA_META_VERTEX) { /* save vertex array object state */ _mesa_reference_array_object(ctx, &save->ArrayObj, ctx->Array.ArrayObj); @@ -562,7 +543,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) /* set some default state? */ } - if (state & META_VIEWPORT) { + if (state & MESA_META_VIEWPORT) { /* save viewport state */ save->ViewportX = ctx->Viewport.X; save->ViewportY = ctx->Viewport.Y; @@ -583,7 +564,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) _mesa_DepthRange(0.0, 1.0); } - if (state & META_CLAMP_FRAGMENT_COLOR) { + if (state & MESA_META_CLAMP_FRAGMENT_COLOR) { save->ClampFragmentColor = ctx->Color.ClampFragmentColor; /* Generally in here we want to do clamping according to whether @@ -594,7 +575,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE); } - if (state & META_CLAMP_VERTEX_COLOR) { + if (state & MESA_META_CLAMP_VERTEX_COLOR) { save->ClampVertexColor = ctx->Light.ClampVertexColor; /* Generally in here we never want vertex color clamping -- @@ -603,7 +584,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) _mesa_ClampColorARB(GL_CLAMP_VERTEX_COLOR, GL_FALSE); } - if (state & META_CONDITIONAL_RENDER) { + if (state & MESA_META_CONDITIONAL_RENDER) { save->CondRenderQuery = ctx->Query.CondRenderQuery; save->CondRenderMode = ctx->Query.CondRenderMode; @@ -623,19 +604,19 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) /** * Leave meta state. This is like a light-weight version of glPopAttrib(). */ -static void +void _mesa_meta_end(struct gl_context *ctx) { struct save_state *save = &ctx->Meta->Save[--ctx->Meta->SaveStackDepth]; const GLbitfield state = save->SavedState; - if (state & META_ALPHA_TEST) { + if (state & MESA_META_ALPHA_TEST) { if (ctx->Color.AlphaEnabled != save->AlphaEnabled) _mesa_set_enable(ctx, GL_ALPHA_TEST, save->AlphaEnabled); _mesa_AlphaFunc(save->AlphaFunc, save->AlphaRef); } - if (state & META_BLEND) { + if (state & MESA_META_BLEND) { if (ctx->Color.BlendEnabled != save->BlendEnabled) { if (ctx->Extensions.EXT_draw_buffers2) { GLuint i; @@ -651,7 +632,7 @@ _mesa_meta_end(struct gl_context *ctx) _mesa_set_enable(ctx, GL_COLOR_LOGIC_OP, save->ColorLogicOpEnabled); } - if (state & META_COLOR_MASK) { + if (state & MESA_META_COLOR_MASK) { GLuint i; for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) { if (!TEST_EQ_4V(ctx->Color.ColorMask[i], save->ColorMask[i])) { @@ -670,23 +651,23 @@ _mesa_meta_end(struct gl_context *ctx) } } - if (state & META_DEPTH_TEST) { + if (state & MESA_META_DEPTH_TEST) { if (ctx->Depth.Test != save->Depth.Test) _mesa_set_enable(ctx, GL_DEPTH_TEST, save->Depth.Test); _mesa_DepthFunc(save->Depth.Func); _mesa_DepthMask(save->Depth.Mask); } - if (state & META_FOG) { + if (state & MESA_META_FOG) { _mesa_set_enable(ctx, GL_FOG, save->Fog); } - if (state & META_PIXEL_STORE) { + if (state & MESA_META_PIXEL_STORE) { ctx->Pack = save->Pack; ctx->Unpack = save->Unpack; } - if (state & META_PIXEL_TRANSFER) { + if (state & MESA_META_PIXEL_TRANSFER) { ctx->Pixel.RedScale = save->RedScale; ctx->Pixel.RedBias = save->RedBias; ctx->Pixel.GreenScale = save->GreenScale; @@ -700,7 +681,7 @@ _mesa_meta_end(struct gl_context *ctx) ctx->NewState |=_NEW_PIXEL; } - if (state & META_RASTERIZATION) { + if (state & MESA_META_RASTERIZATION) { _mesa_PolygonMode(GL_FRONT, save->FrontPolygonMode); _mesa_PolygonMode(GL_BACK, save->BackPolygonMode); _mesa_set_enable(ctx, GL_POLYGON_STIPPLE, save->PolygonStipple); @@ -709,13 +690,13 @@ _mesa_meta_end(struct gl_context *ctx) _mesa_set_enable(ctx, GL_CULL_FACE, save->PolygonCull); } - if (state & META_SCISSOR) { + if (state & MESA_META_SCISSOR) { _mesa_set_enable(ctx, GL_SCISSOR_TEST, save->Scissor.Enabled); _mesa_Scissor(save->Scissor.X, save->Scissor.Y, save->Scissor.Width, save->Scissor.Height); } - if (state & META_SHADER) { + if (state & MESA_META_SHADER) { if (ctx->Extensions.ARB_vertex_program) { _mesa_set_enable(ctx, GL_VERTEX_PROGRAM_ARB, save->VertexProgramEnabled); @@ -747,7 +728,7 @@ _mesa_meta_end(struct gl_context *ctx) save->ActiveShader); } - if (state & META_STENCIL_TEST) { + if (state & MESA_META_STENCIL_TEST) { const struct gl_stencil_attrib *stencil = &save->Stencil; _mesa_set_enable(ctx, GL_STENCIL_TEST, stencil->Enabled); @@ -778,7 +759,7 @@ _mesa_meta_end(struct gl_context *ctx) stencil->ZPassFunc[1]); } - if (state & META_TEXTURE) { + if (state & MESA_META_TEXTURE) { GLuint u, tgt; ASSERT(ctx->Texture.CurrentUnit == 0); @@ -829,7 +810,7 @@ _mesa_meta_end(struct gl_context *ctx) _mesa_ClientActiveTextureARB(GL_TEXTURE0 + save->ClientActiveUnit); } - if (state & META_TRANSFORM) { + if (state & MESA_META_TRANSFORM) { GLuint activeTexture = ctx->Texture.CurrentUnit; _mesa_ActiveTextureARB(GL_TEXTURE0); _mesa_MatrixMode(GL_TEXTURE); @@ -843,7 +824,9 @@ _mesa_meta_end(struct gl_context *ctx) _mesa_LoadMatrixf(save->ProjectionMatrix); _mesa_MatrixMode(save->MatrixMode); + } + if (state & MESA_META_CLIP) { if (save->ClipPlanesEnabled) { GLuint i; for (i = 0; i < ctx->Const.MaxClipPlanes; i++) { @@ -854,7 +837,7 @@ _mesa_meta_end(struct gl_context *ctx) } } - if (state & META_VERTEX) { + if (state & MESA_META_VERTEX) { /* restore vertex buffer object */ _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, save->ArrayBufferObj->Name); _mesa_reference_buffer_object(ctx, &save->ArrayBufferObj, NULL); @@ -864,7 +847,7 @@ _mesa_meta_end(struct gl_context *ctx) _mesa_reference_array_object(ctx, &save->ArrayObj, NULL); } - if (state & META_VIEWPORT) { + if (state & MESA_META_VIEWPORT) { if (save->ViewportX != ctx->Viewport.X || save->ViewportY != ctx->Viewport.Y || save->ViewportW != ctx->Viewport.Width || @@ -875,15 +858,15 @@ _mesa_meta_end(struct gl_context *ctx) _mesa_DepthRange(save->DepthNear, save->DepthFar); } - if (state & META_CLAMP_FRAGMENT_COLOR) { + if (state & MESA_META_CLAMP_FRAGMENT_COLOR) { _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, save->ClampFragmentColor); } - if (state & META_CLAMP_VERTEX_COLOR) { + if (state & MESA_META_CLAMP_VERTEX_COLOR) { _mesa_ClampColorARB(GL_CLAMP_VERTEX_COLOR, save->ClampVertexColor); } - if (state & META_CONDITIONAL_RENDER) { + if (state & MESA_META_CONDITIONAL_RENDER) { if (save->CondRenderQuery) _mesa_BeginConditionalRender(save->CondRenderQuery->Id, save->CondRenderMode); @@ -1349,7 +1332,7 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx, } /* only scissor effects blit so save/clear all other relevant state */ - _mesa_meta_begin(ctx, ~META_SCISSOR); + _mesa_meta_begin(ctx, ~MESA_META_SCISSOR); if (blit->ArrayObj == 0) { /* one-time setup */ @@ -1478,15 +1461,15 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers) }; struct vertex verts[4]; /* save all state but scissor, pixel pack/unpack */ - GLbitfield metaSave = (META_ALL - - META_SCISSOR - - META_PIXEL_STORE - - META_CONDITIONAL_RENDER); + GLbitfield metaSave = (MESA_META_ALL - + MESA_META_SCISSOR - + MESA_META_PIXEL_STORE - + MESA_META_CONDITIONAL_RENDER); const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1; if (buffers & BUFFER_BITS_COLOR) { /* if clearing color buffers, don't save/restore colormask */ - metaSave -= META_COLOR_MASK; + metaSave -= MESA_META_COLOR_MASK; } _mesa_meta_begin(ctx, metaSave); @@ -1521,7 +1504,7 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers) _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE); } else { - ASSERT(metaSave & META_COLOR_MASK); + ASSERT(metaSave & MESA_META_COLOR_MASK); _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); } @@ -1589,10 +1572,166 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers) _mesa_meta_end(ctx); } +static void +meta_glsl_clear_init(struct gl_context *ctx, struct clear_state *clear) +{ + const char *vs_source = + "attribute vec4 position;\n" + "void main()\n" + "{\n" + " gl_Position = position;\n" + "}\n"; + const char *fs_source = + "uniform vec4 color;\n" + "void main()\n" + "{\n" + " gl_FragColor = color;\n" + "}\n"; + GLuint vs, fs; + + if (clear->ArrayObj != 0) + return; + + /* create vertex array object */ + _mesa_GenVertexArrays(1, &clear->ArrayObj); + _mesa_BindVertexArray(clear->ArrayObj); + + /* create vertex array buffer */ + _mesa_GenBuffersARB(1, &clear->VBO); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO); + + /* setup vertex arrays */ + _mesa_VertexAttribPointerARB(0, 3, GL_FLOAT, GL_FALSE, 0, (void *)0); + _mesa_EnableVertexAttribArrayARB(0); + + vs = _mesa_CreateShaderObjectARB(GL_VERTEX_SHADER); + _mesa_ShaderSourceARB(vs, 1, &vs_source, NULL); + _mesa_CompileShaderARB(vs); + + fs = _mesa_CreateShaderObjectARB(GL_FRAGMENT_SHADER); + _mesa_ShaderSourceARB(fs, 1, &fs_source, NULL); + _mesa_CompileShaderARB(fs); + + clear->ShaderProg = _mesa_CreateProgramObjectARB(); + _mesa_AttachShader(clear->ShaderProg, fs); + _mesa_AttachShader(clear->ShaderProg, vs); + _mesa_BindAttribLocationARB(clear->ShaderProg, 0, "position"); + _mesa_LinkProgramARB(clear->ShaderProg); + + clear->ColorLocation = _mesa_GetUniformLocationARB(clear->ShaderProg, + "color"); +} + +/** + * Meta implementation of ctx->Driver.Clear() in terms of polygon rendering. + */ +void +_mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers) +{ + struct clear_state *clear = &ctx->Meta->Clear; + GLbitfield metaSave; + const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1; + struct gl_framebuffer *fb = ctx->DrawBuffer; + const float x0 = ((float)fb->_Xmin / fb->Width) * 2.0f - 1.0f; + const float y0 = ((float)fb->_Ymin / fb->Height) * 2.0f - 1.0f; + const float x1 = ((float)fb->_Xmax / fb->Width) * 2.0f - 1.0f; + const float y1 = ((float)fb->_Ymax / fb->Height) * 2.0f - 1.0f; + const float z = -invert_z(ctx->Depth.Clear); + struct vertex { + GLfloat x, y, z; + } verts[4]; + + metaSave = (MESA_META_ALPHA_TEST | + MESA_META_BLEND | + MESA_META_DEPTH_TEST | + MESA_META_RASTERIZATION | + MESA_META_SHADER | + MESA_META_STENCIL_TEST | + MESA_META_VERTEX | + MESA_META_VIEWPORT | + MESA_META_CLIP | + MESA_META_CLAMP_FRAGMENT_COLOR); + + if (!(buffers & BUFFER_BITS_COLOR)) { + /* We'll use colormask to disable color writes. Otherwise, + * respect color mask + */ + metaSave |= MESA_META_COLOR_MASK; + } + + _mesa_meta_begin(ctx, metaSave); + + meta_glsl_clear_init(ctx, clear); + + _mesa_UseProgramObjectARB(clear->ShaderProg); + _mesa_Uniform4fvARB(clear->ColorLocation, 1, + ctx->Color.ClearColorUnclamped); + + _mesa_BindVertexArray(clear->ArrayObj); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO); + + /* GL_COLOR_BUFFER_BIT */ + if (buffers & BUFFER_BITS_COLOR) { + /* leave colormask, glDrawBuffer state as-is */ + + /* Clears never have the color clamped. */ + _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE); + } + else { + ASSERT(metaSave & MESA_META_COLOR_MASK); + _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + } + + /* GL_DEPTH_BUFFER_BIT */ + if (buffers & BUFFER_BIT_DEPTH) { + _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_TRUE); + _mesa_DepthFunc(GL_ALWAYS); + _mesa_DepthMask(GL_TRUE); + } + else { + assert(!ctx->Depth.Test); + } + + /* GL_STENCIL_BUFFER_BIT */ + if (buffers & BUFFER_BIT_STENCIL) { + _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_TRUE); + _mesa_StencilOpSeparate(GL_FRONT_AND_BACK, + GL_REPLACE, GL_REPLACE, GL_REPLACE); + _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS, + ctx->Stencil.Clear & stencilMax, + ctx->Stencil.WriteMask[0]); + } + else { + assert(!ctx->Stencil.Enabled); + } + + /* vertex positions */ + verts[0].x = x0; + verts[0].y = y0; + verts[0].z = z; + verts[1].x = x1; + verts[1].y = y0; + verts[1].z = z; + verts[2].x = x1; + verts[2].y = y1; + verts[2].z = z; + verts[3].x = x0; + verts[3].y = y1; + verts[3].z = z; + + /* upload new vertex data */ + _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts), verts, + GL_DYNAMIC_DRAW_ARB); + + /* draw quad */ + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + + _mesa_meta_end(ctx); +} /** * Meta implementation of ctx->Driver.CopyPixels() in terms - * of texture mapping and polygon rendering. + * of texture mapping and polygon rendering and GLSL shaders. */ void _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcX, GLint srcY, @@ -1621,12 +1760,13 @@ _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcX, GLint srcY, /* Most GL state applies to glCopyPixels, but a there's a few things * we need to override: */ - _mesa_meta_begin(ctx, (META_RASTERIZATION | - META_SHADER | - META_TEXTURE | - META_TRANSFORM | - META_VERTEX | - META_VIEWPORT)); + _mesa_meta_begin(ctx, (MESA_META_RASTERIZATION | + MESA_META_SHADER | + MESA_META_TEXTURE | + MESA_META_TRANSFORM | + MESA_META_CLIP | + MESA_META_VERTEX | + MESA_META_VIEWPORT)); if (copypix->ArrayObj == 0) { /* one-time setup */ @@ -1901,10 +2041,10 @@ _mesa_meta_DrawPixels(struct gl_context *ctx, * in [0,1]. */ texIntFormat = GL_ALPHA; - metaExtraSave = (META_COLOR_MASK | - META_DEPTH_TEST | - META_SHADER | - META_STENCIL_TEST); + metaExtraSave = (MESA_META_COLOR_MASK | + MESA_META_DEPTH_TEST | + MESA_META_SHADER | + MESA_META_STENCIL_TEST); } else { fallback = GL_TRUE; @@ -1914,7 +2054,7 @@ _mesa_meta_DrawPixels(struct gl_context *ctx, if (ctx->Extensions.ARB_depth_texture && ctx->Extensions.ARB_fragment_program) { texIntFormat = GL_DEPTH_COMPONENT; - metaExtraSave = (META_SHADER); + metaExtraSave = (MESA_META_SHADER); } else { fallback = GL_TRUE; @@ -1942,13 +2082,14 @@ _mesa_meta_DrawPixels(struct gl_context *ctx, /* Most GL state applies to glDrawPixels (like blending, stencil, etc), * but a there's a few things we need to override: */ - _mesa_meta_begin(ctx, (META_RASTERIZATION | - META_SHADER | - META_TEXTURE | - META_TRANSFORM | - META_VERTEX | - META_VIEWPORT | - META_CLAMP_FRAGMENT_COLOR | + _mesa_meta_begin(ctx, (MESA_META_RASTERIZATION | + MESA_META_SHADER | + MESA_META_TEXTURE | + MESA_META_TRANSFORM | + MESA_META_CLIP | + MESA_META_VERTEX | + MESA_META_VIEWPORT | + MESA_META_CLAMP_FRAGMENT_COLOR | metaExtraSave)); newTex = alloc_texture(tex, width, height, texIntFormat); @@ -2149,14 +2290,15 @@ _mesa_meta_Bitmap(struct gl_context *ctx, /* Most GL state applies to glBitmap (like blending, stencil, etc), * but a there's a few things we need to override: */ - _mesa_meta_begin(ctx, (META_ALPHA_TEST | - META_PIXEL_STORE | - META_RASTERIZATION | - META_SHADER | - META_TEXTURE | - META_TRANSFORM | - META_VERTEX | - META_VIEWPORT)); + _mesa_meta_begin(ctx, (MESA_META_ALPHA_TEST | + MESA_META_PIXEL_STORE | + MESA_META_RASTERIZATION | + MESA_META_SHADER | + MESA_META_TEXTURE | + MESA_META_TRANSFORM | + MESA_META_CLIP | + MESA_META_VERTEX | + MESA_META_VIEWPORT)); if (bitmap->ArrayObj == 0) { /* one-time setup */ @@ -2282,7 +2424,9 @@ _mesa_meta_check_generate_mipmap_fallback(struct gl_context *ctx, GLenum target, /* check for fallbacks */ if (!ctx->Extensions.EXT_framebuffer_object || - target == GL_TEXTURE_3D) { + target == GL_TEXTURE_3D || + target == GL_TEXTURE_1D_ARRAY || + target == GL_TEXTURE_2D_ARRAY) { return GL_TRUE; } @@ -2334,7 +2478,8 @@ _mesa_meta_check_generate_mipmap_fallback(struct gl_context *ctx, GLenum target, /** * Called via ctx->Driver.GenerateMipmap() - * Note: texture borders and 3D texture support not yet complete. + * Note: We don't yet support 3D textures, 1D/2D array textures or texture + * borders. */ void _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, @@ -2374,7 +2519,7 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, faceTarget = target; } - _mesa_meta_begin(ctx, META_ALL); + _mesa_meta_begin(ctx, MESA_META_ALL); if (original_active_unit != 0) _mesa_BindTexture(target, texObj->Name); @@ -2678,119 +2823,6 @@ get_temp_image_type(struct gl_context *ctx, GLenum baseFormat) /** - * Helper for _mesa_meta_CopyTexImage1/2D() functions. - * Have to be careful with locking and meta state for pixel transfer. - */ -static void -copy_tex_image(struct gl_context *ctx, GLuint dims, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLsizei height, GLint border) -{ - struct gl_texture_object *texObj; - struct gl_texture_image *texImage; - GLenum format, type; - GLint bpp; - void *buf; - struct gl_renderbuffer *read_rb = ctx->ReadBuffer->_ColorReadBuffer; - - texObj = _mesa_get_current_tex_object(ctx, target); - texImage = _mesa_get_tex_image(ctx, texObj, target, level); - - /* Choose format/type for temporary image buffer */ - format = _mesa_base_tex_format(ctx, internalFormat); - - if (format == GL_LUMINANCE && - _mesa_get_format_base_format(read_rb->Format) != GL_LUMINANCE) { - /* The glReadPixels() path will convert RGB to luminance by - * summing R+G+B. glCopyTexImage() is supposed to behave as - * glCopyPixels, which doesn't do that change, and instead - * leaves it up to glTexImage which converts RGB to luminance by - * just taking the R channel. To avoid glReadPixels() trashing - * our data, use RGBA for our temporary image. - */ - format = GL_RGBA; - } - - type = get_temp_image_type(ctx, format); - bpp = _mesa_bytes_per_pixel(format, type); - if (bpp <= 0) { - _mesa_problem(ctx, "Bad bpp in meta copy_tex_image()"); - return; - } - - /* - * Alloc image buffer (XXX could use a PBO) - */ - buf = malloc(width * height * bpp); - if (!buf) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims); - return; - } - - _mesa_unlock_texture(ctx, texObj); /* need to unlock first */ - - /* - * Read image from framebuffer (disable pixel transfer ops) - */ - _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER); - ctx->Driver.ReadPixels(ctx, x, y, width, height, - format, type, &ctx->Pack, buf); - _mesa_meta_end(ctx); - - if (texImage->Data) { - ctx->Driver.FreeTexImageData(ctx, texImage); - } - - /* The texture's format was already chosen in _mesa_CopyTexImage() */ - ASSERT(texImage->TexFormat != MESA_FORMAT_NONE); - - /* - * Store texture data (with pixel transfer ops) - */ - _mesa_meta_begin(ctx, META_PIXEL_STORE); - - _mesa_update_state(ctx); /* to update pixel transfer state */ - - if (target == GL_TEXTURE_1D) { - ctx->Driver.TexImage1D(ctx, target, level, internalFormat, - width, border, format, type, - buf, &ctx->Unpack, texObj, texImage); - } - else { - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, format, type, - buf, &ctx->Unpack, texObj, texImage); - } - _mesa_meta_end(ctx); - - _mesa_lock_texture(ctx, texObj); /* re-lock */ - - free(buf); -} - - -void -_mesa_meta_CopyTexImage1D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLint border) -{ - copy_tex_image(ctx, 1, target, level, internalFormat, x, y, - width, 1, border); -} - - -void -_mesa_meta_CopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLsizei height, GLint border) -{ - copy_tex_image(ctx, 2, target, level, internalFormat, x, y, - width, height, border); -} - - - -/** * Helper for _mesa_meta_CopyTexSubImage1/2/3D() functions. * Have to be careful with locking and meta state for pixel transfer. */ @@ -2812,6 +2844,16 @@ copy_tex_sub_image(struct gl_context *ctx, /* Choose format/type for temporary image buffer */ format = _mesa_get_format_base_format(texImage->TexFormat); + if (format == GL_LUMINANCE || + format == GL_LUMINANCE_ALPHA || + format == GL_INTENSITY) { + /* We don't want to use GL_LUMINANCE, GL_INTENSITY, etc. for the + * temp image buffer because glReadPixels will do L=R+G+B which is + * not what we want (should be L=R). + */ + format = GL_RGBA; + } + type = get_temp_image_type(ctx, format); bpp = _mesa_bytes_per_pixel(format, type); if (bpp <= 0) { @@ -2833,7 +2875,7 @@ copy_tex_sub_image(struct gl_context *ctx, /* * Read image from framebuffer (disable pixel transfer ops) */ - _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER); + _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE | MESA_META_PIXEL_TRANSFER); ctx->Driver.ReadPixels(ctx, x, y, width, height, format, type, &ctx->Pack, buf); _mesa_meta_end(ctx); @@ -2843,7 +2885,7 @@ copy_tex_sub_image(struct gl_context *ctx, /* * Store texture data (with pixel transfer ops) */ - _mesa_meta_begin(ctx, META_PIXEL_STORE); + _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE); if (target == GL_TEXTURE_1D) { ctx->Driver.TexSubImage1D(ctx, target, level, xoffset, width, format, type, buf, @@ -2915,7 +2957,7 @@ _mesa_meta_CopyColorTable(struct gl_context *ctx, /* * Read image from framebuffer (disable pixel transfer ops) */ - _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER); + _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE | MESA_META_PIXEL_TRANSFER); ctx->Driver.ReadPixels(ctx, x, y, width, 1, GL_RGBA, GL_FLOAT, &ctx->Pack, buf); @@ -2942,7 +2984,7 @@ _mesa_meta_CopyColorSubTable(struct gl_context *ctx,GLenum target, GLsizei start /* * Read image from framebuffer (disable pixel transfer ops) */ - _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER); + _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE | MESA_META_PIXEL_TRANSFER); ctx->Driver.ReadPixels(ctx, x, y, width, 1, GL_RGBA, GL_FLOAT, &ctx->Pack, buf); diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h index b0797d3d91a..ac20e370eb8 100644 --- a/src/mesa/drivers/common/meta.h +++ b/src/mesa/drivers/common/meta.h @@ -26,6 +26,33 @@ #ifndef META_H #define META_H +/** + * \name Flags for meta operations + * \{ + * + * These flags are passed to _mesa_meta_begin(). + */ +#define MESA_META_ALL ~0x0 +#define MESA_META_ALPHA_TEST 0x1 +#define MESA_META_BLEND 0x2 /**< includes logicop */ +#define MESA_META_COLOR_MASK 0x4 +#define MESA_META_DEPTH_TEST 0x8 +#define MESA_META_FOG 0x10 +#define MESA_META_PIXEL_STORE 0x20 +#define MESA_META_PIXEL_TRANSFER 0x40 +#define MESA_META_RASTERIZATION 0x80 +#define MESA_META_SCISSOR 0x100 +#define MESA_META_SHADER 0x200 +#define MESA_META_STENCIL_TEST 0x400 +#define MESA_META_TRANSFORM 0x800 /**< modelview/projection matrix state */ +#define MESA_META_TEXTURE 0x1000 +#define MESA_META_VERTEX 0x2000 +#define MESA_META_VIEWPORT 0x4000 +#define MESA_META_CLAMP_FRAGMENT_COLOR 0x8000 +#define MESA_META_CLAMP_VERTEX_COLOR 0x10000 +#define MESA_META_CONDITIONAL_RENDER 0x20000 +#define MESA_META_CLIP 0x40000 +/**\}*/ extern void _mesa_meta_init(struct gl_context *ctx); @@ -34,6 +61,12 @@ extern void _mesa_meta_free(struct gl_context *ctx); extern void +_mesa_meta_begin(struct gl_context *ctx, GLbitfield state); + +extern void +_mesa_meta_end(struct gl_context *ctx); + +extern void _mesa_meta_BlitFramebuffer(struct gl_context *ctx, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, @@ -43,6 +76,9 @@ extern void _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers); extern void +_mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers); + +extern void _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, GLsizei width, GLsizei height, GLint dstx, GLint dsty, GLenum type); @@ -69,16 +105,6 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, struct gl_texture_object *texObj); extern void -_mesa_meta_CopyTexImage1D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLint border); - -extern void -_mesa_meta_CopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLsizei height, GLint border); - -extern void _mesa_meta_CopyTexSubImage1D(struct gl_context *ctx, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width); diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c index 77967ac2a43..12dd31bb162 100644 --- a/src/mesa/drivers/dri/common/xmlconfig.c +++ b/src/mesa/drivers/dri/common/xmlconfig.c @@ -567,7 +567,7 @@ static void parseOptInfoAttr (struct OptInfoData *data, const XML_Char **attr) { } else defaultVal = attrVal[OA_DEFAULT]; if (!parseValue (&cache->values[opt], cache->info[opt].type, defaultVal)) - XML_FATAL ("illegal default value: %s.", defaultVal); + XML_FATAL ("illegal default value for %s: %s.", cache->info[opt].name, defaultVal); if (attrVal[OA_VALID]) { if (cache->info[opt].type == DRI_BOOL) diff --git a/src/mesa/drivers/dri/common/xmlpool.h b/src/mesa/drivers/dri/common/xmlpool.h index 587517ea10a..ffea430024d 100644 --- a/src/mesa/drivers/dri/common/xmlpool.h +++ b/src/mesa/drivers/dri/common/xmlpool.h @@ -60,7 +60,7 @@ #define DRI_CONF_OPT_BEGIN(name,type,def) \ "<option name=\""#name"\" type=\""#type"\" default=\""#def"\">\n" -/** \brief Begin an option definition with qouted default value */ +/** \brief Begin an option definition with quoted default value */ #define DRI_CONF_OPT_BEGIN_Q(name,type,def) \ "<option name=\""#name"\" type=\""#type"\" default="#def">\n" diff --git a/src/mesa/drivers/dri/common/xmlpool/options.h b/src/mesa/drivers/dri/common/xmlpool/options.h index d76595578c7..1e584ba086a 100644 --- a/src/mesa/drivers/dri/common/xmlpool/options.h +++ b/src/mesa/drivers/dri/common/xmlpool/options.h @@ -425,6 +425,66 @@ DRI_CONF_OPT_BEGIN(hyperz,bool,def) \ DRI_CONF_DESC(sv,"Använd HyperZ för att maximera prestandan") \ DRI_CONF_OPT_END +#define DRI_CONF_PP_CELSHADE(def) \ +DRI_CONF_OPT_BEGIN_V(pp_celshade,enum,def,"0:1") \ + DRI_CONF_DESC(en,"A post-processing filter to cel-shade the output") \ + DRI_CONF_DESC(de,"A post-processing filter to cel-shade the output") \ + DRI_CONF_DESC(es,"A post-processing filter to cel-shade the output") \ + DRI_CONF_DESC(nl,"A post-processing filter to cel-shade the output") \ + DRI_CONF_DESC(fr,"A post-processing filter to cel-shade the output") \ + DRI_CONF_DESC(sv,"A post-processing filter to cel-shade the output") \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_NORED(def) \ +DRI_CONF_OPT_BEGIN_V(pp_nored,enum,def,"0:1") \ + DRI_CONF_DESC(en,"A post-processing filter to remove the red channel") \ + DRI_CONF_DESC(de,"A post-processing filter to remove the red channel") \ + DRI_CONF_DESC(es,"A post-processing filter to remove the red channel") \ + DRI_CONF_DESC(nl,"A post-processing filter to remove the red channel") \ + DRI_CONF_DESC(fr,"A post-processing filter to remove the red channel") \ + DRI_CONF_DESC(sv,"A post-processing filter to remove the red channel") \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_NOGREEN(def) \ +DRI_CONF_OPT_BEGIN_V(pp_nogreen,enum,def,"0:1") \ + DRI_CONF_DESC(en,"A post-processing filter to remove the green channel") \ + DRI_CONF_DESC(de,"A post-processing filter to remove the green channel") \ + DRI_CONF_DESC(es,"A post-processing filter to remove the green channel") \ + DRI_CONF_DESC(nl,"A post-processing filter to remove the green channel") \ + DRI_CONF_DESC(fr,"A post-processing filter to remove the green channel") \ + DRI_CONF_DESC(sv,"A post-processing filter to remove the green channel") \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_NOBLUE(def) \ +DRI_CONF_OPT_BEGIN_V(pp_noblue,enum,def,"0:1") \ + DRI_CONF_DESC(en,"A post-processing filter to remove the blue channel") \ + DRI_CONF_DESC(de,"A post-processing filter to remove the blue channel") \ + DRI_CONF_DESC(es,"A post-processing filter to remove the blue channel") \ + DRI_CONF_DESC(nl,"A post-processing filter to remove the blue channel") \ + DRI_CONF_DESC(fr,"A post-processing filter to remove the blue channel") \ + DRI_CONF_DESC(sv,"A post-processing filter to remove the blue channel") \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_JIMENEZMLAA(def,min,max) \ +DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa,int,def, # min ":" # max ) \ + DRI_CONF_DESC(en,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \ + DRI_CONF_DESC(de,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \ + DRI_CONF_DESC(es,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \ + DRI_CONF_DESC(nl,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \ + DRI_CONF_DESC(fr,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \ + DRI_CONF_DESC(sv,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_JIMENEZMLAA_COLOR(def,min,max) \ +DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa_color,int,def, # min ":" # max ) \ + DRI_CONF_DESC(en,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \ + DRI_CONF_DESC(de,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \ + DRI_CONF_DESC(es,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \ + DRI_CONF_DESC(nl,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \ + DRI_CONF_DESC(fr,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \ + DRI_CONF_DESC(sv,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \ +DRI_CONF_OPT_END + #define DRI_CONF_MAX_TEXTURE_UNITS(def,min,max) \ DRI_CONF_OPT_BEGIN_V(texture_units,int,def, # min ":" # max ) \ DRI_CONF_DESC(en,"Number of texture units used") \ diff --git a/src/mesa/drivers/dri/common/xmlpool/t_options.h b/src/mesa/drivers/dri/common/xmlpool/t_options.h index 5fd6ec65bf8..2427aa77f5b 100644 --- a/src/mesa/drivers/dri/common/xmlpool/t_options.h +++ b/src/mesa/drivers/dri/common/xmlpool/t_options.h @@ -191,6 +191,36 @@ DRI_CONF_OPT_BEGIN(hyperz,bool,def) \ DRI_CONF_DESC(en,gettext("Use HyperZ to boost performance")) \ DRI_CONF_OPT_END +#define DRI_CONF_PP_CELSHADE(def) \ +DRI_CONF_OPT_BEGIN_V(pp_celshade,enum,def,"0:1") \ + DRI_CONF_DESC(en,gettext("A post-processing filter to cel-shade the output")) \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_NORED(def) \ +DRI_CONF_OPT_BEGIN_V(pp_nored,enum,def,"0:1") \ + DRI_CONF_DESC(en,gettext("A post-processing filter to remove the red channel")) \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_NOGREEN(def) \ +DRI_CONF_OPT_BEGIN_V(pp_nogreen,enum,def,"0:1") \ + DRI_CONF_DESC(en,gettext("A post-processing filter to remove the green channel")) \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_NOBLUE(def) \ +DRI_CONF_OPT_BEGIN_V(pp_noblue,enum,def,"0:1") \ + DRI_CONF_DESC(en,gettext("A post-processing filter to remove the blue channel")) \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_JIMENEZMLAA(def,min,max) \ +DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa,int,def, # min ":" # max ) \ + DRI_CONF_DESC(en,gettext("Morphological anti-aliasing based on Jimenez\\\' MLAA. 0 to disable, 8 for default quality")) \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_JIMENEZMLAA_COLOR(def,min,max) \ +DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa_color,int,def, # min ":" # max ) \ + DRI_CONF_DESC(en,gettext("Morphological anti-aliasing based on Jimenez\\\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps")) \ +DRI_CONF_OPT_END + #define DRI_CONF_MAX_TEXTURE_UNITS(def,min,max) \ DRI_CONF_OPT_BEGIN_V(texture_units,int,def, # min ":" # max ) \ DRI_CONF_DESC(en,gettext("Number of texture units used")) \ diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 6d43726beb1..ed5286fd7d9 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -881,6 +881,12 @@ i830_invalidate_state(struct intel_context *intel, GLuint new_state) i830_update_provoking_vertex(&intel->ctx); } +static bool +i830_is_hiz_depth_format(struct intel_context *intel, gl_format format) +{ + return false; +} + void i830InitVtbl(struct i830_context *i830) { @@ -898,4 +904,5 @@ i830InitVtbl(struct i830_context *i830) i830->intel.vtbl.finish_batch = intel_finish_vb; i830->intel.vtbl.invalidate_state = i830_invalidate_state; i830->intel.vtbl.render_target_supported = i830_render_target_supported; + i830->intel.vtbl.is_hiz_depth_format = i830_is_hiz_depth_format; } diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 6e1d7092237..d155b85ffca 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -175,10 +175,8 @@ src_vector(struct i915_fragment_program *p, case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: case PROGRAM_UNIFORM: - src = - i915_emit_param4fv(p, - program->Base.Parameters->ParameterValues[source-> - Index]); + src = i915_emit_param4fv(p, + &program->Base.Parameters->ParameterValues[source->Index][0].f); break; default: @@ -303,7 +301,7 @@ do { \ /* * TODO: consider moving this into core */ -static void calc_live_regs( struct i915_fragment_program *p ) +static bool calc_live_regs( struct i915_fragment_program *p ) { const struct gl_fragment_program *program = &p->FragProg; GLuint regsUsed = 0xffff0000; @@ -317,6 +315,9 @@ static void calc_live_regs( struct i915_fragment_program *p ) /* Register is written to: unmark as live for this and preceeding ops */ if (inst->DstReg.File == PROGRAM_TEMPORARY) { + if (inst->DstReg.Index > 16) + return false; + live_components[inst->DstReg.Index] &= ~inst->DstReg.WriteMask; if (live_components[inst->DstReg.Index] == 0) regsUsed &= ~(1 << inst->DstReg.Index); @@ -327,6 +328,9 @@ static void calc_live_regs( struct i915_fragment_program *p ) if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) { unsigned c; + if (inst->SrcReg[a].Index > 16) + return false; + regsUsed |= 1 << inst->SrcReg[a].Index; for (c = 0; c < 4; c++) { @@ -340,6 +344,8 @@ static void calc_live_regs( struct i915_fragment_program *p ) p->usedRegs[i] = regsUsed; } + + return true; } static GLuint get_live_regs( struct i915_fragment_program *p, @@ -394,7 +400,10 @@ upload_program(struct i915_fragment_program *p) /* Not always needed: */ - calc_live_regs(p); + if (!calc_live_regs(p)) { + i915_program_error(p, "Could not allocate registers"); + return; + } while (1) { GLuint src0, src1, src2, flags; diff --git a/src/mesa/drivers/dri/i915/i915_program.c b/src/mesa/drivers/dri/i915/i915_program.c index ca1949b223e..0a600d30bef 100644 --- a/src/mesa/drivers/dri/i915/i915_program.c +++ b/src/mesa/drivers/dri/i915/i915_program.c @@ -442,14 +442,16 @@ i915_emit_param4fv(struct i915_fragment_program * p, const GLfloat * values) void i915_program_error(struct i915_fragment_program *p, const char *fmt, ...) { - va_list args; + if (unlikely((INTEL_DEBUG & (DEBUG_WM | DEBUG_FALLBACKS)) != 0)) { + va_list args; - fprintf(stderr, "i915_program_error: "); - va_start(args, fmt); - vfprintf(stderr, fmt, args); - va_end(args); + fprintf(stderr, "i915_program_error: "); + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); - fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + } p->error = 1; } diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 44f28cd9d15..d9c885da65b 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -124,7 +124,11 @@ CXX_SOURCES = \ brw_fs_reg_allocate.cpp \ brw_fs_schedule_instructions.cpp \ brw_fs_vector_splitting.cpp \ - brw_shader.cpp + brw_shader.cpp \ + brw_vec4.cpp \ + brw_vec4_emit.cpp \ + brw_vec4_reg_allocate.cpp \ + brw_vec4_visitor.cpp ASM_SOURCES = diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 471015cf9d0..df63fe1d52c 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -212,6 +212,7 @@ enum state_struct_type { AUB_TRACE_BINDING_TABLE = 0x101, AUB_TRACE_SURFACE_STATE = 0x102, AUB_TRACE_VS_CONSTANTS = 0x103, + AUB_TRACE_WM_CONSTANTS = 0x104, }; /** Subclass of Mesa vertex program */ @@ -247,6 +248,7 @@ enum param_conversion { PARAM_CONVERT_F2I, PARAM_CONVERT_F2U, PARAM_CONVERT_F2B, + PARAM_CONVERT_ZERO, }; /* Data about a particular attempt to compile a program. Note that @@ -310,12 +312,20 @@ struct brw_vs_prog_data { GLuint total_grf; GLbitfield64 outputs_written; GLuint nr_params; /**< number of float params/constants */ + GLuint total_scratch; GLuint inputs_read; /* Used for calculating urb partitions: */ GLuint urb_entry_size; + + const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */ + enum param_conversion param_convert[MAX_UNIFORMS * 4]; + const float *pull_param[MAX_UNIFORMS * 4]; + enum param_conversion pull_param_convert[MAX_UNIFORMS * 4]; + + bool uses_new_param_layout; }; @@ -528,7 +538,7 @@ struct brw_context * the CURBE, the depth buffer, and a query BO. */ drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16]; - int validated_bo_count; + unsigned int validated_bo_count; } state; struct brw_cache cache; @@ -662,6 +672,7 @@ struct brw_context struct brw_vs_prog_data *prog_data; int8_t *constant_map; /* variable array following prog_data */ + drm_intel_bo *scratch_bo; drm_intel_bo *const_bo; /** Offset in the program cache to the VS program */ uint32_t prog_offset; @@ -674,6 +685,23 @@ struct brw_context uint32_t push_const_offset; /* Offset in the batchbuffer */ int push_const_size; /* in 256-bit register increments */ + + /** @{ register allocator */ + + struct ra_regs *regs; + + /** + * Array of the ra classes for the unaligned contiguous register + * block sizes used. + */ + int *classes; + + /** + * Mapping for register-allocated objects in *regs to the first + * GRF for that object. + */ + uint8_t *ra_reg_to_grf; + /** @} */ } vs; struct { @@ -726,7 +754,6 @@ struct brw_context GLuint render_surf; GLuint nr_surfaces; - GLuint max_threads; drm_intel_bo *scratch_bo; GLuint sampler_count; @@ -747,6 +774,29 @@ struct brw_context * Pre-gen6, push constants live in the CURBE. */ uint32_t push_const_offset; + + /** @{ register allocator */ + + struct ra_regs *regs; + + /** Array of the ra classes for the unaligned contiguous + * register block sizes used. + */ + int *classes; + + /** + * Mapping for register-allocated objects in *regs to the first + * GRF for that object. + */ + uint8_t *ra_reg_to_grf; + + /** + * ra class for the aligned pairs we use for PLN, which doesn't + * appear in *classes. + */ + int aligned_pairs_class; + + /** @} */ } wm; @@ -827,6 +877,10 @@ void brw_validate_textures( struct brw_context *brw ); */ void brwInitFragProgFuncs( struct dd_function_table *functions ); +int brw_get_scratch_size(int size); +void brw_get_scratch_bo(struct intel_context *intel, + drm_intel_bo **scratch_bo, int size); + /* brw_urb.c */ @@ -874,7 +928,7 @@ brw_fragment_program_const(const struct gl_fragment_program *p) } static inline -float convert_param(enum param_conversion conversion, float param) +float convert_param(enum param_conversion conversion, const float *param) { union { float f; @@ -884,21 +938,23 @@ float convert_param(enum param_conversion conversion, float param) switch (conversion) { case PARAM_NO_CONVERT: - return param; + return *param; case PARAM_CONVERT_F2I: - fi.i = param; + fi.i = *param; return fi.f; case PARAM_CONVERT_F2U: - fi.u = param; + fi.u = *param; return fi.f; case PARAM_CONVERT_F2B: - if (param != 0.0) + if (*param != 0.0) fi.i = 1; else fi.i = 0; return fi.f; + case PARAM_CONVERT_ZERO: + return 0.0; default: - return param; + return *param; } } diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index ae11c487a2c..960be10006e 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -203,7 +203,7 @@ static void prepare_constant_buffer(struct brw_context *brw) /* copy float constants */ for (i = 0; i < brw->wm.prog_data->nr_params; i++) { buf[offset + i] = convert_param(brw->wm.prog_data->param_convert[i], - *brw->wm.prog_data->param[i]); + brw->wm.prog_data->param[i]); } } @@ -244,15 +244,22 @@ static void prepare_constant_buffer(struct brw_context *brw) GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; - /* Load the subset of push constants that will get used when - * we also have a pull constant buffer. - */ - for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { - if (brw->vs.constant_map[i] != -1) { - assert(brw->vs.constant_map[i] <= nr); - memcpy(buf + offset + brw->vs.constant_map[i] * 4, - vp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); + if (brw->vs.prog_data->uses_new_param_layout) { + for (i = 0; i < brw->vs.prog_data->nr_params; i++) { + buf[offset + i] = convert_param(brw->vs.prog_data->param_convert[i], + brw->vs.prog_data->param[i]); + } + } else { + /* Load the subset of push constants that will get used when + * we also have a pull constant buffer. + */ + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + assert(brw->vs.constant_map[i] <= nr); + memcpy(buf + offset + brw->vs.constant_map[i] * 4, + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + } } } } diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 0a3027d04ad..d1799c0ab4f 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -557,58 +557,93 @@ #define BRW_WE_ALL 1 /** @} */ -#define BRW_OPCODE_MOV 1 -#define BRW_OPCODE_SEL 2 -#define BRW_OPCODE_NOT 4 -#define BRW_OPCODE_AND 5 -#define BRW_OPCODE_OR 6 -#define BRW_OPCODE_XOR 7 -#define BRW_OPCODE_SHR 8 -#define BRW_OPCODE_SHL 9 -#define BRW_OPCODE_RSR 10 -#define BRW_OPCODE_RSL 11 -#define BRW_OPCODE_ASR 12 -#define BRW_OPCODE_CMP 16 -#define BRW_OPCODE_CMPN 17 -#define BRW_OPCODE_JMPI 32 -#define BRW_OPCODE_IF 34 -#define BRW_OPCODE_IFF 35 -#define BRW_OPCODE_ELSE 36 -#define BRW_OPCODE_ENDIF 37 -#define BRW_OPCODE_DO 38 -#define BRW_OPCODE_WHILE 39 -#define BRW_OPCODE_BREAK 40 -#define BRW_OPCODE_CONTINUE 41 -#define BRW_OPCODE_HALT 42 -#define BRW_OPCODE_MSAVE 44 -#define BRW_OPCODE_MRESTORE 45 -#define BRW_OPCODE_PUSH 46 -#define BRW_OPCODE_POP 47 -#define BRW_OPCODE_WAIT 48 -#define BRW_OPCODE_SEND 49 -#define BRW_OPCODE_SENDC 50 -#define BRW_OPCODE_MATH 56 -#define BRW_OPCODE_ADD 64 -#define BRW_OPCODE_MUL 65 -#define BRW_OPCODE_AVG 66 -#define BRW_OPCODE_FRC 67 -#define BRW_OPCODE_RNDU 68 -#define BRW_OPCODE_RNDD 69 -#define BRW_OPCODE_RNDE 70 -#define BRW_OPCODE_RNDZ 71 -#define BRW_OPCODE_MAC 72 -#define BRW_OPCODE_MACH 73 -#define BRW_OPCODE_LZD 74 -#define BRW_OPCODE_SAD2 80 -#define BRW_OPCODE_SADA2 81 -#define BRW_OPCODE_DP4 84 -#define BRW_OPCODE_DPH 85 -#define BRW_OPCODE_DP3 86 -#define BRW_OPCODE_DP2 87 -#define BRW_OPCODE_DPA2 88 -#define BRW_OPCODE_LINE 89 -#define BRW_OPCODE_PLN 90 -#define BRW_OPCODE_NOP 126 +enum opcode { + /* These are the actual hardware opcodes. */ + BRW_OPCODE_MOV = 1, + BRW_OPCODE_SEL = 2, + BRW_OPCODE_NOT = 4, + BRW_OPCODE_AND = 5, + BRW_OPCODE_OR = 6, + BRW_OPCODE_XOR = 7, + BRW_OPCODE_SHR = 8, + BRW_OPCODE_SHL = 9, + BRW_OPCODE_RSR = 10, + BRW_OPCODE_RSL = 11, + BRW_OPCODE_ASR = 12, + BRW_OPCODE_CMP = 16, + BRW_OPCODE_CMPN = 17, + BRW_OPCODE_JMPI = 32, + BRW_OPCODE_IF = 34, + BRW_OPCODE_IFF = 35, + BRW_OPCODE_ELSE = 36, + BRW_OPCODE_ENDIF = 37, + BRW_OPCODE_DO = 38, + BRW_OPCODE_WHILE = 39, + BRW_OPCODE_BREAK = 40, + BRW_OPCODE_CONTINUE = 41, + BRW_OPCODE_HALT = 42, + BRW_OPCODE_MSAVE = 44, + BRW_OPCODE_MRESTORE = 45, + BRW_OPCODE_PUSH = 46, + BRW_OPCODE_POP = 47, + BRW_OPCODE_WAIT = 48, + BRW_OPCODE_SEND = 49, + BRW_OPCODE_SENDC = 50, + BRW_OPCODE_MATH = 56, + BRW_OPCODE_ADD = 64, + BRW_OPCODE_MUL = 65, + BRW_OPCODE_AVG = 66, + BRW_OPCODE_FRC = 67, + BRW_OPCODE_RNDU = 68, + BRW_OPCODE_RNDD = 69, + BRW_OPCODE_RNDE = 70, + BRW_OPCODE_RNDZ = 71, + BRW_OPCODE_MAC = 72, + BRW_OPCODE_MACH = 73, + BRW_OPCODE_LZD = 74, + BRW_OPCODE_SAD2 = 80, + BRW_OPCODE_SADA2 = 81, + BRW_OPCODE_DP4 = 84, + BRW_OPCODE_DPH = 85, + BRW_OPCODE_DP3 = 86, + BRW_OPCODE_DP2 = 87, + BRW_OPCODE_DPA2 = 88, + BRW_OPCODE_LINE = 89, + BRW_OPCODE_PLN = 90, + BRW_OPCODE_NOP = 126, + + /* These are compiler backend opcodes that get translated into other + * instructions. + */ + FS_OPCODE_FB_WRITE = 128, + SHADER_OPCODE_RCP, + SHADER_OPCODE_RSQ, + SHADER_OPCODE_SQRT, + SHADER_OPCODE_EXP2, + SHADER_OPCODE_LOG2, + SHADER_OPCODE_POW, + SHADER_OPCODE_SIN, + SHADER_OPCODE_COS, + FS_OPCODE_DDX, + FS_OPCODE_DDY, + FS_OPCODE_PIXEL_X, + FS_OPCODE_PIXEL_Y, + FS_OPCODE_CINTERP, + FS_OPCODE_LINTERP, + FS_OPCODE_TEX, + FS_OPCODE_TXB, + FS_OPCODE_TXD, + FS_OPCODE_TXL, + FS_OPCODE_TXS, + FS_OPCODE_DISCARD, + FS_OPCODE_SPILL, + FS_OPCODE_UNSPILL, + FS_OPCODE_PULL_CONSTANT_LOAD, + + VS_OPCODE_URB_WRITE, + VS_OPCODE_SCRATCH_READ, + VS_OPCODE_SCRATCH_WRITE, +}; #define BRW_PREDICATE_NONE 0 #define BRW_PREDICATE_NORMAL 1 @@ -734,7 +769,6 @@ #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0 #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1 #define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 -#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2 #define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 #define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 #define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 @@ -747,6 +781,7 @@ #define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4 #define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5 #define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10 /* for GEN5 only */ #define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index af41c848308..927b0b4acc9 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -309,6 +309,35 @@ char *target_function[16] = { [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner" }; +char *target_function_gen6[16] = { + [BRW_MESSAGE_TARGET_NULL] = "null", + [BRW_MESSAGE_TARGET_MATH] = "math", + [BRW_MESSAGE_TARGET_SAMPLER] = "sampler", + [BRW_MESSAGE_TARGET_GATEWAY] = "gateway", + [GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE] = "sampler", + [GEN6_MESSAGE_TARGET_DP_RENDER_CACHE] = "render", + [GEN6_MESSAGE_TARGET_DP_CONST_CACHE] = "const", + [BRW_MESSAGE_TARGET_URB] = "urb", + [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner" +}; + +char *dp_rc_msg_type_gen6[16] = { + [BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read", + [GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read", + [GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read", + [GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read", + [GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] = "OWORD unaligned block read", + [GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read", + [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write", + [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write", + [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] = "OWORD dual block write", + [GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write", + [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] = "DWORD scattered write", + [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write", + [GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write", + [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORMc write", +}; + char *math_function[16] = { [BRW_MATH_FUNCTION_INV] = "inv", [BRW_MATH_FUNCTION_LOG] = "log", @@ -927,8 +956,14 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) newline (file); pad (file, 16); space = 0; - err |= control (file, "target function", target_function, - target, &space); + + if (gen >= 6) { + err |= control (file, "target function", target_function_gen6, + target, &space); + } else { + err |= control (file, "target function", target_function, + target, &space); + } switch (target) { case BRW_MESSAGE_TARGET_MATH: @@ -985,9 +1020,16 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) inst->bits3.dp_read.msg_type); } break; + case BRW_MESSAGE_TARGET_DATAPORT_WRITE: if (gen >= 6) { - format (file, " (%d, %d, %d, %d, %d, %d)", + format (file, " ("); + + err |= control (file, "DP rc message type", + dp_rc_msg_type_gen6, + inst->bits3.gen6_dp.msg_type, &space); + + format (file, ", %d, %d, %d, %d, %d, %d)", inst->bits3.gen6_dp.binding_table_index, inst->bits3.gen6_dp.msg_control, inst->bits3.gen6_dp.msg_type, @@ -1003,6 +1045,7 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) inst->bits3.dp_write.send_commit_msg); } break; + case BRW_MESSAGE_TARGET_URB: if (gen >= 5) { format (file, " %d", inst->bits3.urb_gen5.offset); diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 56a46ced6e3..7bc69c612e3 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -689,17 +689,17 @@ static void brw_prepare_indices(struct brw_context *brw) * rebase it into a temporary. */ if ((get_size(index_buffer->type) - 1) & offset) { - GLubyte *map = ctx->Driver.MapBuffer(ctx, - GL_ELEMENT_ARRAY_BUFFER_ARB, - GL_DYNAMIC_DRAW_ARB, - bufferobj); - map += offset; + GLubyte *map = ctx->Driver.MapBufferRange(ctx, + offset, + ib_size, + GL_MAP_WRITE_BIT, + bufferobj); intel_upload_data(&brw->intel, map, ib_size, ib_type_size, &bo, &offset); brw->ib.start_vertex_offset = offset / ib_type_size; - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj); + ctx->Driver.UnmapBuffer(ctx, bufferobj); } else { /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading * the index buffer state when we're just moving the start index diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 72d50eadbce..af50305fc2b 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -44,6 +44,9 @@ #define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) #define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) #define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) +#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1) +#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2) +#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3) #define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) @@ -798,6 +801,12 @@ void brw_init_compile(struct brw_context *, struct brw_compile *p, void *mem_ctx); const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz ); +struct brw_instruction *brw_next_insn(struct brw_compile *p, GLuint opcode); +void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg dest); +void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg reg); + /* Helpers for regular instructions: */ @@ -852,6 +861,27 @@ ROUND(RNDE) /* Helpers for SEND instruction: */ +void brw_set_dp_read_message(struct brw_compile *p, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint target_cache, + GLuint msg_length, + GLuint response_length); + +void brw_set_dp_write_message(struct brw_compile *p, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint msg_length, + GLboolean header_present, + GLuint pixel_scoreboard_clear, + GLuint response_length, + GLuint end_of_thread, + GLuint send_commit_msg); + void brw_urb_WRITE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index e7370f36064..c5013de7ec1 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -89,9 +89,9 @@ gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) } -static void brw_set_dest(struct brw_compile *p, - struct brw_instruction *insn, - struct brw_reg dest) +void +brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg dest) { if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && dest.file != BRW_MESSAGE_REGISTER_FILE) @@ -221,9 +221,9 @@ validate_reg(struct brw_instruction *insn, struct brw_reg reg) /* 10. Check destination issues. */ } -static void brw_set_src0(struct brw_compile *p, - struct brw_instruction *insn, - struct brw_reg reg) +void +brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg reg) { if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) assert(reg.nr < 128); @@ -504,17 +504,18 @@ static void brw_set_urb_message( struct brw_compile *p, } } -static void brw_set_dp_write_message( struct brw_compile *p, - struct brw_instruction *insn, - GLuint binding_table_index, - GLuint msg_control, - GLuint msg_type, - GLuint msg_length, - GLboolean header_present, - GLuint pixel_scoreboard_clear, - GLuint response_length, - GLuint end_of_thread, - GLuint send_commit_msg) +void +brw_set_dp_write_message(struct brw_compile *p, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint msg_length, + GLboolean header_present, + GLuint pixel_scoreboard_clear, + GLuint response_length, + GLuint end_of_thread, + GLuint send_commit_msg) { struct brw_context *brw = p->brw; struct intel_context *intel = &brw->intel; @@ -570,7 +571,7 @@ static void brw_set_dp_write_message( struct brw_compile *p, } } -static void +void brw_set_dp_read_message(struct brw_compile *p, struct brw_instruction *insn, GLuint binding_table_index, @@ -709,9 +710,9 @@ static void brw_set_sampler_message(struct brw_compile *p, } - -static struct brw_instruction *next_insn( struct brw_compile *p, - GLuint opcode ) +#define next_insn brw_next_insn +struct brw_instruction * +brw_next_insn(struct brw_compile *p, GLuint opcode) { struct brw_instruction *insn; @@ -732,7 +733,6 @@ static struct brw_instruction *next_insn( struct brw_compile *p, return insn; } - static struct brw_instruction *brw_alu1( struct brw_compile *p, GLuint opcode, struct brw_reg dest, @@ -1341,8 +1341,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, brw_set_src1(p, insn, brw_imm_ud(0)); insn->bits3.break_cont.jip = br * (do_insn - insn); - insn->header.execution_size = do_insn->header.execution_size; - assert(insn->header.execution_size == BRW_EXECUTE_8); + insn->header.execution_size = BRW_EXECUTE_8; } else if (intel->gen == 6) { insn = next_insn(p, BRW_OPCODE_WHILE); @@ -1351,8 +1350,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - insn->header.execution_size = do_insn->header.execution_size; - assert(insn->header.execution_size == BRW_EXECUTE_8); + insn->header.execution_size = BRW_EXECUTE_8; } else { if (p->single_program_flow) { insn = next_insn(p, BRW_OPCODE_ADD); @@ -2246,10 +2244,13 @@ void brw_urb_WRITE(struct brw_compile *p, if (intel->gen == 7) { /* Enable Channel Masks in the URB_WRITE_HWORD message header */ + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5), BRW_REGISTER_TYPE_UD), retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD), brw_imm_ud(0xff00)); + brw_pop_insn_state(p); } insn = next_insn(p, BRW_OPCODE_SEND); @@ -2311,7 +2312,7 @@ brw_find_loop_end(struct brw_compile *p, int start) if (insn->header.opcode == BRW_OPCODE_WHILE) { int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count : insn->bits3.break_cont.jip; - if (ip + jip / br < start) + if (ip + jip / br <= start) return ip; } } diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index b5ea943387d..0b0445ea142 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -143,20 +143,21 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) return 0; switch (inst->opcode) { - case FS_OPCODE_RCP: - case FS_OPCODE_RSQ: - case FS_OPCODE_SQRT: - case FS_OPCODE_EXP2: - case FS_OPCODE_LOG2: - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: return 1 * c->dispatch_width / 8; - case FS_OPCODE_POW: + case SHADER_OPCODE_POW: return 2 * c->dispatch_width / 8; case FS_OPCODE_TEX: case FS_OPCODE_TXB: case FS_OPCODE_TXD: case FS_OPCODE_TXL: + case FS_OPCODE_TXS: return 1; case FS_OPCODE_FB_WRITE: return 2; @@ -181,29 +182,26 @@ fs_visitor::virtual_grf_alloc(int size) virtual_grf_array_size *= 2; virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, virtual_grf_array_size); - - /* This slot is always unused. */ - virtual_grf_sizes[0] = 0; } virtual_grf_sizes[virtual_grf_next] = size; return virtual_grf_next++; } /** Fixed HW reg constructor. */ -fs_reg::fs_reg(enum register_file file, int hw_reg) +fs_reg::fs_reg(enum register_file file, int reg) { init(); this->file = file; - this->hw_reg = hw_reg; + this->reg = reg; this->type = BRW_REGISTER_TYPE_F; } /** Fixed HW reg constructor. */ -fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type) +fs_reg::fs_reg(enum register_file file, int reg, uint32_t type) { init(); this->file = file; - this->hw_reg = hw_reg; + this->reg = reg; this->type = type; } @@ -242,11 +240,12 @@ import_uniforms_callback(const void *key, * This brings in those uniform definitions */ void -fs_visitor::import_uniforms(struct hash_table *src_variable_ht) +fs_visitor::import_uniforms(fs_visitor *v) { - hash_table_call_foreach(src_variable_ht, + hash_table_call_foreach(v->variable_ht, import_uniforms_callback, variable_ht); + this->params_remap = v->params_remap; } /* Our support for uniforms is piggy-backed on the struct @@ -281,23 +280,27 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type) assert(param < ARRAY_SIZE(c->prog_data.param)); - switch (type->base_type) { - case GLSL_TYPE_FLOAT: + if (ctx->Const.NativeIntegers) { c->prog_data.param_convert[param] = PARAM_NO_CONVERT; - break; - case GLSL_TYPE_UINT: - c->prog_data.param_convert[param] = PARAM_CONVERT_F2U; - break; - case GLSL_TYPE_INT: - c->prog_data.param_convert[param] = PARAM_CONVERT_F2I; - break; - case GLSL_TYPE_BOOL: - c->prog_data.param_convert[param] = PARAM_CONVERT_F2B; - break; - default: - assert(!"not reached"); - c->prog_data.param_convert[param] = PARAM_NO_CONVERT; - break; + } else { + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + c->prog_data.param_convert[param] = PARAM_NO_CONVERT; + break; + case GLSL_TYPE_UINT: + c->prog_data.param_convert[param] = PARAM_CONVERT_F2U; + break; + case GLSL_TYPE_INT: + c->prog_data.param_convert[param] = PARAM_CONVERT_F2I; + break; + case GLSL_TYPE_BOOL: + c->prog_data.param_convert[param] = PARAM_CONVERT_F2B; + break; + default: + assert(!"not reached"); + c->prog_data.param_convert[param] = PARAM_NO_CONVERT; + break; + } } this->param_index[param] = loc; this->param_offset[param] = i; @@ -463,9 +466,21 @@ fs_visitor::emit_general_interpolation(ir_variable *ir) } else { /* Perspective interpolation case. */ for (unsigned int k = 0; k < type->vector_elements; k++) { - struct brw_reg interp = interp_reg(location, k); - emit(FS_OPCODE_LINTERP, attr, - this->delta_x, this->delta_y, fs_reg(interp)); + /* FINISHME: At some point we probably want to push + * this farther by giving similar treatment to the + * other potentially constant components of the + * attribute, as well as making brw_vs_constval.c + * handle varyings other than gl_TexCoord. + */ + if (location >= FRAG_ATTRIB_TEX0 && + location <= FRAG_ATTRIB_TEX7 && + k == 3 && !(c->key.proj_attrib_mask & (1 << location))) { + emit(BRW_OPCODE_MOV, attr, fs_reg(1.0f)); + } else { + struct brw_reg interp = interp_reg(location, k); + emit(FS_OPCODE_LINTERP, attr, + this->delta_x, this->delta_y, fs_reg(interp)); + } attr.reg_offset++; } @@ -512,16 +527,16 @@ fs_visitor::emit_frontfacing_interpolation(ir_variable *ir) } fs_inst * -fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) +fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src) { switch (opcode) { - case FS_OPCODE_RCP: - case FS_OPCODE_RSQ: - case FS_OPCODE_SQRT: - case FS_OPCODE_EXP2: - case FS_OPCODE_LOG2: - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: break; default: assert(!"not reached: bad math opcode"); @@ -555,12 +570,12 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) } fs_inst * -fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1) +fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) { int base_mrf = 2; fs_inst *inst; - assert(opcode == FS_OPCODE_POW); + assert(opcode == SHADER_OPCODE_POW); if (intel->gen >= 6) { /* Can't do hstride == 0 args to gen6 math, so expand it out. @@ -605,7 +620,7 @@ fs_visitor::setup_paramvalues_refs() /* Set up the pointers to ParamValues now that that array is finalized. */ for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { c->prog_data.param[i] = - fp->Base.Parameters->ParameterValues[this->param_index[i]] + + (const float *)fp->Base.Parameters->ParameterValues[this->param_index[i]] + this->param_offset[i]; } } @@ -621,12 +636,12 @@ fs_visitor::assign_curb_setup() } /* Map the offsets in the UNIFORM file to fixed HW regs. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == UNIFORM) { - int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; struct brw_reg brw_reg = brw_vec1_grf(c->nr_payload_regs + constant_nr / 8, constant_nr % 8); @@ -684,8 +699,8 @@ fs_visitor::assign_urb_setup() /* Offset all the urb_setup[] index by the actual position of the * setup regs, now that the location of the constants has been chosen. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->opcode == FS_OPCODE_LINTERP) { assert(inst->src[2].file == FIXED_HW_REG); @@ -739,8 +754,8 @@ fs_visitor::split_virtual_grfs() split_grf[this->delta_x.reg] = false; } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; /* Texturing produces 4 contiguous registers, so no splitting. */ if (inst->is_tex()) { @@ -763,8 +778,8 @@ fs_visitor::split_virtual_grfs() } } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->dst.file == GRF && split_grf[inst->dst.reg] && @@ -786,6 +801,86 @@ fs_visitor::split_virtual_grfs() this->live_intervals_valid = false; } +bool +fs_visitor::remove_dead_constants() +{ + if (c->dispatch_width == 8) { + this->params_remap = ralloc_array(mem_ctx, int, c->prog_data.nr_params); + + for (unsigned int i = 0; i < c->prog_data.nr_params; i++) + this->params_remap[i] = -1; + + /* Find which params are still in use. */ + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; + + for (int i = 0; i < 3; i++) { + int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; + + if (inst->src[i].file != UNIFORM) + continue; + + assert(constant_nr < (int)c->prog_data.nr_params); + + /* For now, set this to non-negative. We'll give it the + * actual new number in a moment, in order to keep the + * register numbers nicely ordered. + */ + this->params_remap[constant_nr] = 0; + } + } + + /* Figure out what the new numbers for the params will be. At some + * point when we're doing uniform array access, we're going to want + * to keep the distinction between .reg and .reg_offset, but for + * now we don't care. + */ + unsigned int new_nr_params = 0; + for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { + if (this->params_remap[i] != -1) { + this->params_remap[i] = new_nr_params++; + } + } + + /* Update the list of params to be uploaded to match our new numbering. */ + for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { + int remapped = this->params_remap[i]; + + if (remapped == -1) + continue; + + /* We've already done setup_paramvalues_refs() so no need to worry + * about param_index and param_offset. + */ + c->prog_data.param[remapped] = c->prog_data.param[i]; + c->prog_data.param_convert[remapped] = c->prog_data.param_convert[i]; + } + + c->prog_data.nr_params = new_nr_params; + } else { + /* This should have been generated in the 8-wide pass already. */ + assert(this->params_remap); + } + + /* Now do the renumbering of the shader to remove unused params. */ + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; + + for (int i = 0; i < 3; i++) { + int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; + + if (inst->src[i].file != UNIFORM) + continue; + + assert(this->params_remap[constant_nr] != -1); + inst->src[i].reg = this->params_remap[constant_nr]; + inst->src[i].reg_offset = 0; + } + } + + return true; +} + /** * Choose accesses from the UNIFORM file to demote to using the pull * constant buffer. @@ -815,14 +910,14 @@ fs_visitor::setup_pull_constants() int pull_uniform_base = max_uniform_components; int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base; - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (int i = 0; i < 3; i++) { if (inst->src[i].file != UNIFORM) continue; - int uniform_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset; if (uniform_nr < pull_uniform_base) continue; @@ -871,8 +966,8 @@ fs_visitor::calculate_live_intervals() } int ip = 0; - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->opcode == BRW_OPCODE_DO) { if (loop_depth++ == 0) @@ -892,7 +987,7 @@ fs_visitor::calculate_live_intervals() } } else { for (unsigned int i = 0; i < 3; i++) { - if (inst->src[i].file == GRF && inst->src[i].reg != 0) { + if (inst->src[i].file == GRF) { int reg = inst->src[i].reg; if (!loop_depth) { @@ -908,7 +1003,7 @@ fs_visitor::calculate_live_intervals() } } } - if (inst->dst.file == GRF && inst->dst.reg != 0) { + if (inst->dst.file == GRF) { int reg = inst->dst.reg; if (!loop_depth) { @@ -945,8 +1040,8 @@ fs_visitor::propagate_constants() calculate_live_intervals(); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->opcode != BRW_OPCODE_MOV || inst->predicated || @@ -965,11 +1060,9 @@ fs_visitor::propagate_constants() /* Found a move of a constant to a GRF. Find anything else using the GRF * before it's written, and replace it with the constant if we can. */ - exec_list_iterator scan_iter = iter; - scan_iter.next(); - for (; scan_iter.has_next(); scan_iter.next()) { - fs_inst *scan_inst = (fs_inst *)scan_iter.get(); - + for (fs_inst *scan_inst = (fs_inst *)inst->next; + !scan_inst->is_tail_sentinel(); + scan_inst = (fs_inst *)scan_inst->next) { if (scan_inst->opcode == BRW_OPCODE_DO || scan_inst->opcode == BRW_OPCODE_WHILE || scan_inst->opcode == BRW_OPCODE_ELSE || @@ -1046,6 +1139,24 @@ fs_visitor::propagate_constants() progress = true; } break; + + case SHADER_OPCODE_RCP: + /* The hardware doesn't do math on immediate values + * (because why are you doing that, seriously?), but + * the correct answer is to just constant fold it + * anyway. + */ + assert(i == 0); + if (inst->src[0].imm.f != 0.0f) { + scan_inst->opcode = BRW_OPCODE_MOV; + scan_inst->src[0] = inst->src[0]; + scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f; + progress = true; + } + break; + + default: + break; } } @@ -1063,6 +1174,49 @@ fs_visitor::propagate_constants() return progress; } + + +/** + * Attempts to move immediate constants into the immediate + * constant slot of following instructions. + * + * Immediate constants are a bit tricky -- they have to be in the last + * operand slot, you can't do abs/negate on them, + */ + +bool +fs_visitor::opt_algebraic() +{ + bool progress = false; + + calculate_live_intervals(); + + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; + + switch (inst->opcode) { + case BRW_OPCODE_MUL: + if (inst->src[1].file != IMM) + continue; + + /* a * 1.0 = a */ + if (inst->src[1].type == BRW_REGISTER_TYPE_F && + inst->src[1].imm.f == 1.0) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[1] = reg_undef; + progress = true; + break; + } + + break; + default: + break; + } + } + + return progress; +} + /** * Must be called after calculate_live_intervales() to remove unused * writes to registers -- register allocation will fail otherwise @@ -1077,8 +1231,8 @@ fs_visitor::dead_code_eliminate() calculate_live_intervals(); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) { inst->remove(); @@ -1101,8 +1255,8 @@ fs_visitor::register_coalesce() int if_depth = 0; int loop_depth = 0; - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; /* Make sure that we dominate the instructions we're going to * scan for interfering with our coalescing, or we won't have @@ -1123,6 +1277,8 @@ fs_visitor::register_coalesce() case BRW_OPCODE_ENDIF: if_depth--; break; + default: + break; } if (loop_depth || if_depth) continue; @@ -1130,7 +1286,8 @@ fs_visitor::register_coalesce() if (inst->opcode != BRW_OPCODE_MOV || inst->predicated || inst->saturate || - inst->dst.file != GRF || inst->src[0].file != GRF || + inst->dst.file != GRF || (inst->src[0].file != GRF && + inst->src[0].file != UNIFORM)|| inst->dst.type != inst->src[0].type) continue; @@ -1141,11 +1298,10 @@ fs_visitor::register_coalesce() * program. */ bool interfered = false; - exec_list_iterator scan_iter = iter; - scan_iter.next(); - for (; scan_iter.has_next(); scan_iter.next()) { - fs_inst *scan_inst = (fs_inst *)scan_iter.get(); + for (fs_inst *scan_inst = (fs_inst *)inst->next; + !scan_inst->is_tail_sentinel(); + scan_inst = (fs_inst *)scan_inst->next) { if (scan_inst->dst.file == GRF) { if (scan_inst->dst.reg == inst->dst.reg && (scan_inst->dst.reg_offset == inst->dst.reg_offset || @@ -1153,7 +1309,8 @@ fs_visitor::register_coalesce() interfered = true; break; } - if (scan_inst->dst.reg == inst->src[0].reg && + if (inst->src[0].file == GRF && + scan_inst->dst.reg == inst->src[0].reg && (scan_inst->dst.reg_offset == inst->src[0].reg_offset || scan_inst->is_tex())) { interfered = true; @@ -1161,10 +1318,13 @@ fs_visitor::register_coalesce() } } - /* The gen6 MATH instruction can't handle source modifiers, so avoid - * coalescing those for now. We should do something more specific. + /* The gen6 MATH instruction can't handle source modifiers or + * unusual register regions, so avoid coalescing those for + * now. We should do something more specific. */ - if (intel->gen >= 6 && scan_inst->is_math() && has_source_modifiers) { + if (intel->gen >= 6 && + scan_inst->is_math() && + (has_source_modifiers || inst->src[0].file == UNIFORM)) { interfered = true; break; } @@ -1176,19 +1336,17 @@ fs_visitor::register_coalesce() /* Rewrite the later usage to point at the source of the move to * be removed. */ - for (exec_list_iterator scan_iter = iter; scan_iter.has_next(); - scan_iter.next()) { - fs_inst *scan_inst = (fs_inst *)scan_iter.get(); - + for (fs_inst *scan_inst = inst; + !scan_inst->is_tail_sentinel(); + scan_inst = (fs_inst *)scan_inst->next) { for (int i = 0; i < 3; i++) { if (scan_inst->src[i].file == GRF && scan_inst->src[i].reg == inst->dst.reg && scan_inst->src[i].reg_offset == inst->dst.reg_offset) { - scan_inst->src[i].reg = inst->src[0].reg; - scan_inst->src[i].reg_offset = inst->src[0].reg_offset; - scan_inst->src[i].abs |= inst->src[0].abs; - scan_inst->src[i].negate ^= inst->src[0].negate; - scan_inst->src[i].smear = inst->src[0].smear; + fs_reg new_src = inst->src[0]; + new_src.negate ^= scan_inst->src[i].negate; + new_src.abs |= scan_inst->src[i].abs; + scan_inst->src[i] = new_src; } } } @@ -1212,8 +1370,8 @@ fs_visitor::compute_to_mrf() calculate_live_intervals(); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; int ip = next_ip; next_ip++; @@ -1228,9 +1386,9 @@ fs_visitor::compute_to_mrf() /* Work out which hardware MRF registers are written by this * instruction. */ - int mrf_low = inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int mrf_low = inst->dst.reg & ~BRW_MRF_COMPR4; int mrf_high; - if (inst->dst.hw_reg & BRW_MRF_COMPR4) { + if (inst->dst.reg & BRW_MRF_COMPR4) { mrf_high = mrf_low + 4; } else if (c->dispatch_width == 16 && (!inst->force_uncompressed && !inst->force_sechalf)) { @@ -1297,7 +1455,7 @@ fs_visitor::compute_to_mrf() if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) { /* Found the creator of our MRF's source value. */ scan_inst->dst.file = MRF; - scan_inst->dst.hw_reg = inst->dst.hw_reg; + scan_inst->dst.reg = inst->dst.reg; scan_inst->saturate |= inst->saturate; inst->remove(); progress = true; @@ -1334,10 +1492,10 @@ fs_visitor::compute_to_mrf() /* If somebody else writes our MRF here, we can't * compute-to-MRF before that. */ - int scan_mrf_low = scan_inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int scan_mrf_low = scan_inst->dst.reg & ~BRW_MRF_COMPR4; int scan_mrf_high; - if (scan_inst->dst.hw_reg & BRW_MRF_COMPR4) { + if (scan_inst->dst.reg & BRW_MRF_COMPR4) { scan_mrf_high = scan_mrf_low + 4; } else if (c->dispatch_width == 16 && (!scan_inst->force_uncompressed && @@ -1392,8 +1550,8 @@ fs_visitor::remove_duplicate_mrf_writes() memset(last_mrf_move, 0, sizeof(last_mrf_move)); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; switch (inst->opcode) { case BRW_OPCODE_DO: @@ -1409,7 +1567,7 @@ fs_visitor::remove_duplicate_mrf_writes() if (inst->opcode == BRW_OPCODE_MOV && inst->dst.file == MRF) { - fs_inst *prev_inst = last_mrf_move[inst->dst.hw_reg]; + fs_inst *prev_inst = last_mrf_move[inst->dst.reg]; if (prev_inst && inst->equals(prev_inst)) { inst->remove(); progress = true; @@ -1419,7 +1577,7 @@ fs_visitor::remove_duplicate_mrf_writes() /* Clear out the last-write records for MRFs that were overwritten. */ if (inst->dst.file == MRF) { - last_mrf_move[inst->dst.hw_reg] = NULL; + last_mrf_move[inst->dst.reg] = NULL; } if (inst->mlen > 0) { @@ -1445,7 +1603,7 @@ fs_visitor::remove_duplicate_mrf_writes() inst->dst.file == MRF && inst->src[0].file == GRF && !inst->predicated) { - last_mrf_move[inst->dst.hw_reg] = inst; + last_mrf_move[inst->dst.reg] = inst; } } @@ -1527,8 +1685,8 @@ fs_visitor::run() /* Generate FS IR for main(). (the visitor only descends into * functions called "main"). */ - foreach_iter(exec_list_iterator, iter, *shader->ir) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &*shader->ir) { + ir_instruction *ir = (ir_instruction *)node; base_ir = ir; this->result = reg_undef; ir->accept(this); @@ -1550,11 +1708,14 @@ fs_visitor::run() progress = remove_duplicate_mrf_writes() || progress; progress = propagate_constants() || progress; + progress = opt_algebraic() || progress; progress = register_coalesce() || progress; progress = compute_to_mrf() || progress; progress = dead_code_eliminate() || progress; } while (progress); + remove_dead_constants(); + schedule_instructions(); assign_curb_setup(); @@ -1563,7 +1724,7 @@ fs_visitor::run() if (0) { /* Debug of register spilling: Go spill everything. */ int virtual_grf_count = virtual_grf_next; - for (int i = 1; i < virtual_grf_count; i++) { + for (int i = 0; i < virtual_grf_count; i++) { spill_reg(i); } } @@ -1625,7 +1786,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c, fs_visitor v(c, prog, shader); if (!v.run()) { prog->LinkStatus = GL_FALSE; - prog->InfoLog = ralloc_strdup(prog, v.fail_msg); + ralloc_strcat(&prog->InfoLog, v.fail_msg); return false; } @@ -1633,7 +1794,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c, if (intel->gen >= 5 && c->prog_data.nr_pull_params == 0) { c->dispatch_width = 16; fs_visitor v2(c, prog, shader); - v2.import_uniforms(v.variable_ht); + v2.import_uniforms(&v); v2.run(); } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 2bf850e5dea..10f45f30fe9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -25,6 +25,8 @@ * */ +#include "brw_shader.h" + extern "C" { #include <sys/types.h> @@ -51,37 +53,10 @@ enum register_file { MRF = BRW_MESSAGE_REGISTER_FILE, IMM = BRW_IMMEDIATE_VALUE, FIXED_HW_REG, /* a struct brw_reg */ - UNIFORM, /* prog_data->params[hw_reg] */ + UNIFORM, /* prog_data->params[reg] */ BAD_FILE }; -enum fs_opcodes { - FS_OPCODE_FB_WRITE = 256, - FS_OPCODE_RCP, - FS_OPCODE_RSQ, - FS_OPCODE_SQRT, - FS_OPCODE_EXP2, - FS_OPCODE_LOG2, - FS_OPCODE_POW, - FS_OPCODE_SIN, - FS_OPCODE_COS, - FS_OPCODE_DDX, - FS_OPCODE_DDY, - FS_OPCODE_PIXEL_X, - FS_OPCODE_PIXEL_Y, - FS_OPCODE_CINTERP, - FS_OPCODE_LINTERP, - FS_OPCODE_TEX, - FS_OPCODE_TXB, - FS_OPCODE_TXD, - FS_OPCODE_TXL, - FS_OPCODE_DISCARD, - FS_OPCODE_SPILL, - FS_OPCODE_UNSPILL, - FS_OPCODE_PULL_CONSTANT_LOAD, -}; - - class fs_reg { public: /* Callers of this ralloc-based new need not call delete. It's @@ -99,7 +74,6 @@ public: void init() { memset(this, 0, sizeof(*this)); - this->hw_reg = -1; this->smear = -1; } @@ -146,8 +120,8 @@ public: this->type = fixed_hw_reg.type; } - fs_reg(enum register_file file, int hw_reg); - fs_reg(enum register_file file, int hw_reg, uint32_t type); + fs_reg(enum register_file file, int reg); + fs_reg(enum register_file file, int reg, uint32_t type); fs_reg(class fs_visitor *v, const struct glsl_type *type); bool equals(fs_reg *r) @@ -155,7 +129,6 @@ public: return (file == r->file && reg == r->reg && reg_offset == r->reg_offset && - hw_reg == r->hw_reg && type == r->type && negate == r->negate && abs == r->abs && @@ -167,12 +140,17 @@ public: /** Register file: ARF, GRF, MRF, IMM. */ enum register_file file; - /** virtual register number. 0 = fixed hw reg */ + /** + * Register number. For ARF/MRF, it's the hardware register. For + * GRF, it's a virtual register number until register allocation + */ int reg; - /** Offset within the virtual register. */ + /** + * For virtual registers, this is a hardware register offset from + * the start of the register block (for example, a constant index + * in an array access). + */ int reg_offset; - /** HW register number. Generally unset until register allocation. */ - int hw_reg; /** Register type. BRW_REGISTER_TYPE_* */ int type; bool negate; @@ -224,13 +202,13 @@ public: init(); } - fs_inst(int opcode) + fs_inst(enum opcode opcode) { init(); this->opcode = opcode; } - fs_inst(int opcode, fs_reg dst) + fs_inst(enum opcode opcode, fs_reg dst) { init(); this->opcode = opcode; @@ -240,7 +218,7 @@ public: assert(dst.reg_offset >= 0); } - fs_inst(int opcode, fs_reg dst, fs_reg src0) + fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0) { init(); this->opcode = opcode; @@ -253,7 +231,7 @@ public: assert(src[0].reg_offset >= 0); } - fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) + fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) { init(); this->opcode = opcode; @@ -269,7 +247,7 @@ public: assert(src[1].reg_offset >= 0); } - fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) + fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) { init(); this->opcode = opcode; @@ -313,22 +291,23 @@ public: return (opcode == FS_OPCODE_TEX || opcode == FS_OPCODE_TXB || opcode == FS_OPCODE_TXD || - opcode == FS_OPCODE_TXL); + opcode == FS_OPCODE_TXL || + opcode == FS_OPCODE_TXS); } bool is_math() { - return (opcode == FS_OPCODE_RCP || - opcode == FS_OPCODE_RSQ || - opcode == FS_OPCODE_SQRT || - opcode == FS_OPCODE_EXP2 || - opcode == FS_OPCODE_LOG2 || - opcode == FS_OPCODE_SIN || - opcode == FS_OPCODE_COS || - opcode == FS_OPCODE_POW); + return (opcode == SHADER_OPCODE_RCP || + opcode == SHADER_OPCODE_RSQ || + opcode == SHADER_OPCODE_SQRT || + opcode == SHADER_OPCODE_EXP2 || + opcode == SHADER_OPCODE_LOG2 || + opcode == SHADER_OPCODE_SIN || + opcode == SHADER_OPCODE_COS || + opcode == SHADER_OPCODE_POW); } - int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ + enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ fs_reg dst; fs_reg src[3]; bool saturate; @@ -402,7 +381,7 @@ public: this->base_ir = NULL; this->virtual_grf_sizes = NULL; - this->virtual_grf_next = 1; + this->virtual_grf_next = 0; this->virtual_grf_array_size = 0; this->virtual_grf_def = NULL; this->virtual_grf_use = NULL; @@ -421,7 +400,7 @@ public: fs_reg *variable_storage(ir_variable *var); int virtual_grf_alloc(int size); - void import_uniforms(struct hash_table *src_variable_ht); + void import_uniforms(fs_visitor *v); void visit(ir_variable *ir); void visit(ir_assignment *ir); @@ -445,27 +424,28 @@ public: fs_inst *emit(fs_inst inst); - fs_inst *emit(int opcode) + fs_inst *emit(enum opcode opcode) { return emit(fs_inst(opcode)); } - fs_inst *emit(int opcode, fs_reg dst) + fs_inst *emit(enum opcode opcode, fs_reg dst) { return emit(fs_inst(opcode, dst)); } - fs_inst *emit(int opcode, fs_reg dst, fs_reg src0) + fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0) { return emit(fs_inst(opcode, dst, src0)); } - fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) + fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) { return emit(fs_inst(opcode, dst, src0, src1)); } - fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) + fs_inst *emit(enum opcode opcode, fs_reg dst, + fs_reg src0, fs_reg src1, fs_reg src2) { return emit(fs_inst(opcode, dst, src0, src1, src2)); } @@ -485,9 +465,11 @@ public: void setup_pull_constants(); void calculate_live_intervals(); bool propagate_constants(); + bool opt_algebraic(); bool register_coalesce(); bool compute_to_mrf(); bool dead_code_eliminate(); + bool remove_dead_constants(); bool remove_duplicate_mrf_writes(); bool virtual_grf_interferes(int a, int b); void schedule_instructions(); @@ -524,8 +506,8 @@ public: int sampler); fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, int sampler); - fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0); - fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1); + fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0); + fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); bool try_emit_saturate(ir_expression *ir); void emit_bool_to_cond_code(ir_rvalue *condition); void emit_if_gen6(ir_if *ir); @@ -565,6 +547,13 @@ public: int *virtual_grf_use; bool live_intervals_valid; + /* This is the map from UNIFORM hw_reg + reg_offset as generated by + * the visitor to the packed uniform number after + * remove_dead_constants() that represents the actual uploaded + * uniform index. + */ + int *params_remap; + struct hash_table *variable_ht; ir_variable *frag_color, *frag_data, *frag_depth; int first_non_payload_grf; diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 1d89b8f1d11..28efbd3605f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -59,7 +59,8 @@ fs_visitor::generate_fb_write(fs_inst *inst) if (inst->target > 0) { /* Set the render target index for choosing BLEND_STATE. */ - brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2), + brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, + inst->base_mrf, 2), BRW_REGISTER_TYPE_UD), brw_imm_ud(inst->target)); } @@ -145,43 +146,12 @@ void fs_visitor::generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src) { - int op; - - switch (inst->opcode) { - case FS_OPCODE_RCP: - op = BRW_MATH_FUNCTION_INV; - break; - case FS_OPCODE_RSQ: - op = BRW_MATH_FUNCTION_RSQ; - break; - case FS_OPCODE_SQRT: - op = BRW_MATH_FUNCTION_SQRT; - break; - case FS_OPCODE_EXP2: - op = BRW_MATH_FUNCTION_EXP; - break; - case FS_OPCODE_LOG2: - op = BRW_MATH_FUNCTION_LOG; - break; - case FS_OPCODE_POW: - op = BRW_MATH_FUNCTION_POW; - break; - case FS_OPCODE_SIN: - op = BRW_MATH_FUNCTION_SIN; - break; - case FS_OPCODE_COS: - op = BRW_MATH_FUNCTION_COS; - break; - default: - assert(!"not reached: unknown math function"); - op = 0; - break; - } + int op = brw_math_function(inst->opcode); if (intel->gen >= 6) { assert(inst->mlen == 0); - if (inst->opcode == FS_OPCODE_POW) { + if (inst->opcode == SHADER_OPCODE_POW) { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math2(p, dst, op, src[0], src[1]); @@ -272,10 +242,16 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; } break; + case FS_OPCODE_TXS: + msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; + break; case FS_OPCODE_TXD: /* There is no sample_d_c message; comparisons are done manually */ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; break; + default: + assert(!"not reached"); + break; } } else { switch (inst->opcode) { @@ -316,6 +292,14 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) assert(inst->mlen == 7 || inst->mlen == 10); msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS; break; + case FS_OPCODE_TXS: + assert(inst->mlen == 3); + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_RESINFO; + simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; + break; + default: + assert(!"not reached"); + break; } } assert(msg_type != -1); @@ -537,11 +521,9 @@ brw_reg_from_fs_reg(fs_reg *reg) case ARF: case MRF: if (reg->smear == -1) { - brw_reg = brw_vec8_reg(reg->file, - reg->hw_reg, 0); + brw_reg = brw_vec8_reg(reg->file, reg->reg, 0); } else { - brw_reg = brw_vec1_reg(reg->file, - reg->hw_reg, reg->smear); + brw_reg = brw_vec1_reg(reg->file, reg->reg, reg->smear); } brw_reg = retype(brw_reg, reg->type); if (reg->sechalf) @@ -608,8 +590,8 @@ fs_visitor::generate_code() prog->Name, c->dispatch_width); } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; struct brw_reg src[3], dst; if (unlikely(INTEL_DEBUG & DEBUG_WM)) { @@ -656,6 +638,11 @@ fs_visitor::generate_code() case BRW_OPCODE_MUL: brw_MUL(p, dst, src[0], src[1]); break; + case BRW_OPCODE_MACH: + brw_set_acc_write_control(p, 1); + brw_MACH(p, dst, src[0], src[1]); + brw_set_acc_write_control(p, 0); + break; case BRW_OPCODE_FRC: brw_FRC(p, dst, src[0]); @@ -770,14 +757,14 @@ fs_visitor::generate_code() } break; - case FS_OPCODE_RCP: - case FS_OPCODE_RSQ: - case FS_OPCODE_SQRT: - case FS_OPCODE_EXP2: - case FS_OPCODE_LOG2: - case FS_OPCODE_POW: - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_POW: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: generate_math(inst, dst, src); break; case FS_OPCODE_PIXEL_X: @@ -796,6 +783,7 @@ fs_visitor::generate_code() case FS_OPCODE_TXB: case FS_OPCODE_TXD: case FS_OPCODE_TXL: + case FS_OPCODE_TXS: generate_tex(inst, dst, src[0]); break; case FS_OPCODE_DISCARD: diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index b4689d2c293..7c5414ac26c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -25,23 +25,6 @@ * */ -extern "C" { - -#include <sys/types.h> - -#include "main/macros.h" -#include "main/shaderobj.h" -#include "main/uniforms.h" -#include "program/prog_parameter.h" -#include "program/prog_print.h" -#include "program/prog_optimize.h" -#include "program/register_allocate.h" -#include "program/sampler.h" -#include "program/hash_table.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_wm.h" -} #include "brw_fs.h" #include "../glsl/glsl_types.h" #include "../glsl/ir_optimization.h" @@ -50,45 +33,115 @@ extern "C" { static void assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width) { - if (reg->file == GRF && reg->reg != 0) { + if (reg->file == GRF) { assert(reg->reg_offset >= 0); - reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width; - reg->reg = 0; + reg->reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width; + reg->reg_offset = 0; } } void fs_visitor::assign_regs_trivial() { - int last_grf = 0; - int hw_reg_mapping[this->virtual_grf_next]; + int hw_reg_mapping[this->virtual_grf_next + 1]; int i; int reg_width = c->dispatch_width / 8; - hw_reg_mapping[0] = 0; /* Note that compressed instructions require alignment to 2 registers. */ - hw_reg_mapping[1] = ALIGN(this->first_non_payload_grf, reg_width); - for (i = 2; i < this->virtual_grf_next; i++) { + hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width); + for (i = 1; i <= this->virtual_grf_next; i++) { hw_reg_mapping[i] = (hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1] * reg_width); } - last_grf = hw_reg_mapping[i - 1] + (this->virtual_grf_sizes[i - 1] * - reg_width); + this->grf_used = hw_reg_mapping[this->virtual_grf_next]; - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; assign_reg(hw_reg_mapping, &inst->dst, reg_width); assign_reg(hw_reg_mapping, &inst->src[0], reg_width); assign_reg(hw_reg_mapping, &inst->src[1], reg_width); } - if (last_grf >= BRW_MAX_GRF) { + if (this->grf_used >= BRW_MAX_GRF) { fail("Ran out of regs on trivial allocator (%d/%d)\n", - last_grf, BRW_MAX_GRF); + this->grf_used, BRW_MAX_GRF); + } + +} + +static void +brw_alloc_reg_set_for_classes(struct brw_context *brw, + int *class_sizes, + int class_count, + int reg_width, + int base_reg_count) +{ + struct intel_context *intel = &brw->intel; + + /* Compute the total number of registers across all classes. */ + int ra_reg_count = 0; + for (int i = 0; i < class_count; i++) { + ra_reg_count += base_reg_count - (class_sizes[i] - 1); + } + + ralloc_free(brw->wm.ra_reg_to_grf); + brw->wm.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count); + ralloc_free(brw->wm.regs); + brw->wm.regs = ra_alloc_reg_set(ra_reg_count); + ralloc_free(brw->wm.classes); + brw->wm.classes = ralloc_array(brw, int, class_count + 1); + + brw->wm.aligned_pairs_class = -1; + + /* Now, add the registers to their classes, and add the conflicts + * between them and the base GRF registers (and also each other). + */ + int reg = 0; + int pairs_base_reg = 0; + int pairs_reg_count = 0; + for (int i = 0; i < class_count; i++) { + int class_reg_count = base_reg_count - (class_sizes[i] - 1); + brw->wm.classes[i] = ra_alloc_reg_class(brw->wm.regs); + + /* Save this off for the aligned pair class at the end. */ + if (class_sizes[i] == 2) { + pairs_base_reg = reg; + pairs_reg_count = class_reg_count; + } + + for (int j = 0; j < class_reg_count; j++) { + ra_class_add_reg(brw->wm.regs, brw->wm.classes[i], reg); + + brw->wm.ra_reg_to_grf[reg] = j; + + for (int base_reg = j; + base_reg < j + class_sizes[i]; + base_reg++) { + ra_add_transitive_reg_conflict(brw->wm.regs, base_reg, reg); + } + + reg++; + } + } + assert(reg == ra_reg_count); + + /* Add a special class for aligned pairs, which we'll put delta_x/y + * in on gen5 so that we can do PLN. + */ + if (brw->has_pln && reg_width == 1 && intel->gen < 6) { + brw->wm.aligned_pairs_class = ra_alloc_reg_class(brw->wm.regs); + + for (int i = 0; i < pairs_reg_count; i++) { + if ((brw->wm.ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) { + ra_class_add_reg(brw->wm.regs, brw->wm.aligned_pairs_class, + pairs_base_reg + i); + } + } + class_count++; } - this->grf_used = last_grf + reg_width; + ra_set_finalize(brw->wm.regs); } bool @@ -101,12 +154,11 @@ fs_visitor::assign_regs() * for reg_width == 2. */ int reg_width = c->dispatch_width / 8; - int hw_reg_mapping[this->virtual_grf_next + 1]; + int hw_reg_mapping[this->virtual_grf_next]; int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width); int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width; int class_sizes[base_reg_count]; int class_count = 0; - int aligned_pair_class = -1; calculate_live_intervals(); @@ -125,7 +177,7 @@ fs_visitor::assign_regs() */ class_sizes[class_count++] = 2; } - for (int r = 1; r < this->virtual_grf_next; r++) { + for (int r = 0; r < this->virtual_grf_next; r++) { int i; for (i = 0; i < class_count; i++) { @@ -141,94 +193,26 @@ fs_visitor::assign_regs() } } - int ra_reg_count = 0; - int class_base_reg[class_count]; - int class_reg_count[class_count]; - int classes[class_count + 1]; - - for (int i = 0; i < class_count; i++) { - class_base_reg[i] = ra_reg_count; - class_reg_count[i] = base_reg_count - (class_sizes[i] - 1); - ra_reg_count += class_reg_count[i]; - } - - struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count); - for (int i = 0; i < class_count; i++) { - classes[i] = ra_alloc_reg_class(regs); - - for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { - ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r); - } + brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, + reg_width, base_reg_count); - /* Add conflicts between our contiguous registers aliasing - * base regs and other register classes' contiguous registers - * that alias base regs, or the base regs themselves for classes[0]. - */ - for (int c = 0; c <= i; c++) { - for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { - for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1)); - c_r < MIN2(class_reg_count[c], i_r + class_sizes[i]); - c_r++) { - - if (0) { - printf("%d/%d conflicts %d/%d\n", - class_sizes[i], first_assigned_grf + i_r, - class_sizes[c], first_assigned_grf + c_r); - } - - ra_add_reg_conflict(regs, - class_base_reg[i] + i_r, - class_base_reg[c] + c_r); - } - } - } - } - - /* Add a special class for aligned pairs, which we'll put delta_x/y - * in on gen5 so that we can do PLN. - */ - if (brw->has_pln && reg_width == 1 && intel->gen < 6) { - int reg_count = (base_reg_count - 1) / 2; - int unaligned_pair_class = 1; - assert(class_sizes[unaligned_pair_class] == 2); - - aligned_pair_class = class_count; - classes[aligned_pair_class] = ra_alloc_reg_class(regs); - class_sizes[aligned_pair_class] = 2; - class_base_reg[aligned_pair_class] = 0; - class_reg_count[aligned_pair_class] = 0; - int start = (first_assigned_grf & 1) ? 1 : 0; - - for (int i = 0; i < reg_count; i++) { - ra_class_add_reg(regs, classes[aligned_pair_class], - class_base_reg[unaligned_pair_class] + i * 2 + start); - } - class_count++; - } - - ra_set_finalize(regs); - - struct ra_graph *g = ra_alloc_interference_graph(regs, + struct ra_graph *g = ra_alloc_interference_graph(brw->wm.regs, this->virtual_grf_next); - /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1 - * with nodes. - */ - ra_set_node_class(g, 0, classes[0]); - for (int i = 1; i < this->virtual_grf_next; i++) { + for (int i = 0; i < this->virtual_grf_next; i++) { for (int c = 0; c < class_count; c++) { if (class_sizes[c] == this->virtual_grf_sizes[i]) { - if (aligned_pair_class >= 0 && + if (brw->wm.aligned_pairs_class >= 0 && this->delta_x.reg == i) { - ra_set_node_class(g, i, classes[aligned_pair_class]); + ra_set_node_class(g, i, brw->wm.aligned_pairs_class); } else { - ra_set_node_class(g, i, classes[c]); + ra_set_node_class(g, i, brw->wm.classes[c]); } break; } } - for (int j = 1; j < i; j++) { + for (int j = 0; j < i; j++) { if (virtual_grf_interferes(i, j)) { ra_add_node_interference(g, i, j); } @@ -253,7 +237,6 @@ fs_visitor::assign_regs() ralloc_free(g); - ralloc_free(regs); return false; } @@ -263,28 +246,18 @@ fs_visitor::assign_regs() * numbers. */ this->grf_used = first_assigned_grf; - hw_reg_mapping[0] = 0; /* unused */ - for (int i = 1; i < this->virtual_grf_next; i++) { + for (int i = 0; i < this->virtual_grf_next; i++) { int reg = ra_get_node_reg(g, i); - int hw_reg = -1; - - for (int c = 0; c < class_count; c++) { - if (reg >= class_base_reg[c] && - reg < class_base_reg[c] + class_reg_count[c]) { - hw_reg = reg - class_base_reg[c]; - break; - } - } - assert(hw_reg >= 0); - hw_reg_mapping[i] = first_assigned_grf + hw_reg * reg_width; + hw_reg_mapping[i] = (first_assigned_grf + + brw->wm.ra_reg_to_grf[reg] * reg_width); this->grf_used = MAX2(this->grf_used, hw_reg_mapping[i] + this->virtual_grf_sizes[i] * reg_width); } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; assign_reg(hw_reg_mapping, &inst->dst, reg_width); assign_reg(hw_reg_mapping, &inst->src[0], reg_width); @@ -292,7 +265,6 @@ fs_visitor::assign_regs() } ralloc_free(g); - ralloc_free(regs); return true; } @@ -336,8 +308,8 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) * spill/unspill we'll have to do, and guess that the insides of * loops run 10 times. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == GRF) { @@ -370,6 +342,9 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) if (inst->dst.file == GRF) no_spill[inst->dst.reg] = true; break; + + default: + break; } } @@ -394,8 +369,8 @@ fs_visitor::spill_reg(int spill_reg) * virtual grf of the same size. For most instructions, though, we * could just spill/unspill the GRF being accessed. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == GRF && diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index d8218c26edb..0ea4e5c36f0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -25,21 +25,6 @@ * */ -extern "C" { - -#include <sys/types.h> - -#include "main/macros.h" -#include "main/shaderobj.h" -#include "main/uniforms.h" -#include "program/prog_optimize.h" -#include "program/register_allocate.h" -#include "program/sampler.h" -#include "program/hash_table.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_wm.h" -} #include "brw_fs.h" #include "../glsl/glsl_types.h" #include "../glsl/ir_optimization.h" @@ -84,26 +69,26 @@ public: int math_latency = 22; switch (inst->opcode) { - case FS_OPCODE_RCP: + case SHADER_OPCODE_RCP: this->latency = 1 * chans * math_latency; break; - case FS_OPCODE_RSQ: + case SHADER_OPCODE_RSQ: this->latency = 2 * chans * math_latency; break; - case FS_OPCODE_SQRT: - case FS_OPCODE_LOG2: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_LOG2: /* full precision log. partial is 2. */ this->latency = 3 * chans * math_latency; break; - case FS_OPCODE_EXP2: + case SHADER_OPCODE_EXP2: /* full precision. partial is 3, same throughput. */ this->latency = 4 * chans * math_latency; break; - case FS_OPCODE_POW: + case SHADER_OPCODE_POW: this->latency = 8 * chans * math_latency; break; - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: /* minimum latency, max is 12 rounds. */ this->latency = 5 * chans * math_latency; break; @@ -283,8 +268,8 @@ instruction_scheduler::calculate_deps() memset(last_mrf_write, 0, sizeof(last_mrf_write)); /* top-to-bottom dependencies: RAW and WAW. */ - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list(node, &instructions) { + schedule_node *n = (schedule_node *)node; fs_inst *inst = n->inst; /* read-after-write deps. */ @@ -321,12 +306,12 @@ instruction_scheduler::calculate_deps() add_dep(last_grf_write[inst->dst.reg], n); last_grf_write[inst->dst.reg] = n; } else if (inst->dst.file == MRF) { - int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int reg = inst->dst.reg & ~BRW_MRF_COMPR4; add_dep(last_mrf_write[reg], n); last_mrf_write[reg] = n; if (is_compressed(inst)) { - if (inst->dst.hw_reg & BRW_MRF_COMPR4) + if (inst->dst.reg & BRW_MRF_COMPR4) reg += 4; else reg++; @@ -401,12 +386,12 @@ instruction_scheduler::calculate_deps() if (inst->dst.file == GRF) { last_grf_write[inst->dst.reg] = n; } else if (inst->dst.file == MRF) { - int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int reg = inst->dst.reg & ~BRW_MRF_COMPR4; last_mrf_write[reg] = n; if (is_compressed(inst)) { - if (inst->dst.hw_reg & BRW_MRF_COMPR4) + if (inst->dst.reg & BRW_MRF_COMPR4) reg += 4; else reg++; @@ -437,8 +422,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header) int time = 0; /* Remove non-DAG heads from the list. */ - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list_safe(node, &instructions) { + schedule_node *n = (schedule_node *)node; if (n->parent_count != 0) n->remove(); } @@ -447,8 +432,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header) schedule_node *chosen = NULL; int chosen_time = 0; - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list(node, &instructions) { + schedule_node *n = (schedule_node *)node; if (!chosen || n->unblocked_time < chosen_time) { chosen = n; @@ -490,8 +475,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header) * progress until the first is done. */ if (chosen->inst->is_math()) { - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list(node, &instructions) { + schedule_node *n = (schedule_node *)node; if (n->inst->is_math()) n->unblocked_time = MAX2(n->unblocked_time, diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp index 530ffa26580..a9a60c2fd8a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp @@ -122,8 +122,8 @@ ir_vector_reference_visitor::get_variable_entry(ir_variable *var) break; } - foreach_iter(exec_list_iterator, iter, this->variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list(node, &this->variable_list) { + variable_entry *entry = (variable_entry *)node; if (entry->var == var) return entry; } @@ -222,8 +222,8 @@ ir_vector_splitting_visitor::get_splitting_entry(ir_variable *var) if (!var->type->is_vector()) return NULL; - foreach_iter(exec_list_iterator, iter, *this->variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list(node, &*this->variable_list) { + variable_entry *entry = (variable_entry *)node; if (entry->var == var) { return entry; } @@ -341,8 +341,8 @@ brw_do_vector_splitting(exec_list *instructions) visit_list_elements(&refs, instructions); /* Trim out variables we can't split. */ - foreach_iter(exec_list_iterator, iter, refs.variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list_safe(node, &refs.variable_list) { + variable_entry *entry = (variable_entry *)node; if (debug) { printf("vector %s@%p: decl %d, whole_access %d\n", @@ -363,8 +363,8 @@ brw_do_vector_splitting(exec_list *instructions) /* Replace the decls of the vectors to be split with their split * components. */ - foreach_iter(exec_list_iterator, iter, refs.variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list(node, &refs.variable_list) { + variable_entry *entry = (variable_entry *)node; const struct glsl_type *type; type = glsl_type::get_instance(entry->var->type->base_type, 1, 1); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index cbe5cf428c5..cdaf543c88b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -142,9 +142,7 @@ fs_visitor::visit(ir_dereference_array *ir) this->result.type = brw_type_for_base_type(ir->type); if (index) { - assert(this->result.file == UNIFORM || - (this->result.file == GRF && - this->result.reg != 0)); + assert(this->result.file == UNIFORM || this->result.file == GRF); this->result.reg_offset += index->value.i[0] * element_size; } else { assert(!"FINISHME: non-constant array element"); @@ -252,14 +250,14 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_rcp: - emit_math(FS_OPCODE_RCP, this->result, op[0]); + emit_math(SHADER_OPCODE_RCP, this->result, op[0]); break; case ir_unop_exp2: - emit_math(FS_OPCODE_EXP2, this->result, op[0]); + emit_math(SHADER_OPCODE_EXP2, this->result, op[0]); break; case ir_unop_log2: - emit_math(FS_OPCODE_LOG2, this->result, op[0]); + emit_math(SHADER_OPCODE_LOG2, this->result, op[0]); break; case ir_unop_exp: case ir_unop_log: @@ -267,11 +265,11 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_sin: case ir_unop_sin_reduced: - emit_math(FS_OPCODE_SIN, this->result, op[0]); + emit_math(SHADER_OPCODE_SIN, this->result, op[0]); break; case ir_unop_cos: case ir_unop_cos_reduced: - emit_math(FS_OPCODE_COS, this->result, op[0]); + emit_math(SHADER_OPCODE_COS, this->result, op[0]); break; case ir_unop_dFdx: @@ -289,7 +287,23 @@ fs_visitor::visit(ir_expression *ir) break; case ir_binop_mul: - emit(BRW_OPCODE_MUL, this->result, op[0], op[1]); + if (ir->type->is_integer()) { + /* For integer multiplication, the MUL uses the low 16 bits + * of one of the operands (src0 on gen6, src1 on gen7). The + * MACH accumulates in the contribution of the upper 16 bits + * of that operand. + * + * FINISHME: Emit just the MUL if we know an operand is small + * enough. + */ + struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D); + + emit(BRW_OPCODE_MUL, acc, op[0], op[1]); + emit(BRW_OPCODE_MACH, reg_null_d, op[0], op[1]); + emit(BRW_OPCODE_MOV, this->result, fs_reg(acc)); + } else { + emit(BRW_OPCODE_MUL, this->result, op[0], op[1]); + } break; case ir_binop_div: assert(!"not reached: should be handled by ir_div_to_mul_rcp"); @@ -342,11 +356,11 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_sqrt: - emit_math(FS_OPCODE_SQRT, this->result, op[0]); + emit_math(SHADER_OPCODE_SQRT, this->result, op[0]); break; case ir_unop_rsq: - emit_math(FS_OPCODE_RSQ, this->result, op[0]); + emit_math(SHADER_OPCODE_RSQ, this->result, op[0]); break; case ir_unop_i2u: @@ -425,7 +439,7 @@ fs_visitor::visit(ir_expression *ir) break; case ir_binop_pow: - emit_math(FS_OPCODE_POW, this->result, op[0], op[1]); + emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]); break; case ir_unop_bit_not: @@ -496,7 +510,7 @@ fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, void fs_visitor::visit(ir_assignment *ir) { - struct fs_reg l, r; + fs_reg l, r; fs_inst *inst; /* FINISHME: arrays on the lhs */ @@ -603,9 +617,11 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ mlen += 3; } else if (ir->op == ir_txd) { + this->result = reg_undef; ir->lod_info.grad.dPdx->accept(this); fs_reg dPdx = this->result; + this->result = reg_undef; ir->lod_info.grad.dPdy->accept(this); fs_reg dPdy = this->result; @@ -620,6 +636,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, * dPdx = dudx, dvdx, drdx * dPdy = dudy, dvdy, drdy * + * 1-arg: Does not exist. + * * 2-arg: dudx dvdx dudy dvdy * dPdx.x dPdx.y dPdy.x dPdy.y * m4 m5 m6 m7 @@ -631,18 +649,26 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); dPdx.reg_offset++; - mlen++; } + mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2); for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) { emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); dPdy.reg_offset++; - mlen++; } + mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2); + } else if (ir->op == ir_txs) { + /* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */ + simd16 = true; + this->result = reg_undef; + ir->lod_info.lod->accept(this); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result); + mlen += 2; } else { /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod * instructions. We'll need to do SIMD16 here. */ + simd16 = true; assert(ir->op == ir_txb || ir->op == ir_txl); for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { @@ -671,16 +697,19 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, /* The unused upper half. */ mlen++; + } + if (simd16) { /* Now, since we're doing simd16, the return is 2 interleaved * vec4s where the odd-indexed ones are junk. We'll need to move * this weirdness around to the expected layout. */ - simd16 = true; orig_dst = dst; - dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, - 2)); - dst.type = BRW_REGISTER_TYPE_F; + const glsl_type *vec_type = + glsl_type::get_instance(ir->type->base_type, 4, 1); + dst = fs_reg(this, glsl_type::get_array_instance(vec_type, 2)); + dst.type = intel->is_g4x ? brw_type_for_base_type(ir->type) + : BRW_REGISTER_TYPE_F; } fs_inst *inst = NULL; @@ -697,6 +726,9 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, case ir_txd: inst = emit(FS_OPCODE_TXD, dst); break; + case ir_txs: + inst = emit(FS_OPCODE_TXS, dst); + break; case ir_txf: assert(!"GLSL 1.30 features unsupported"); break; @@ -732,6 +764,8 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, int base_mrf = 2; int reg_width = c->dispatch_width / 8; bool header_present = false; + const int vector_elements = + ir->coordinate ? ir->coordinate->type->vector_elements : 0; if (ir->offset) { /* The offsets set up by the ir_texture visitor are in the @@ -742,7 +776,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, base_mrf--; } - for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { + for (int i = 0; i < vector_elements; i++) { fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * reg_width), coordinate); @@ -750,7 +784,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, inst->saturate = true; coordinate.reg_offset++; } - mlen += ir->coordinate->type->vector_elements * reg_width; + mlen += vector_elements * reg_width; if (ir->shadow_comparitor && ir->op != ir_txd) { mlen = MAX2(mlen, header_present + 4 * reg_width); @@ -786,9 +820,11 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, inst = emit(FS_OPCODE_TXL, dst); break; case ir_txd: { + this->result = reg_undef; ir->lod_info.grad.dPdx->accept(this); fs_reg dPdx = this->result; + this->result = reg_undef; ir->lod_info.grad.dPdy->accept(this); fs_reg dPdy = this->result; @@ -816,6 +852,13 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, inst = emit(FS_OPCODE_TXD, dst); break; } + case ir_txs: + this->result = reg_undef; + ir->lod_info.lod->accept(this); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result); + mlen += reg_width; + inst = emit(FS_OPCODE_TXS, dst); + break; case ir_txf: assert(!"GLSL 1.30 features unsupported"); break; @@ -850,6 +893,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, } if (ir->shadow_comparitor && ir->op != ir_txd) { + this->result = reg_undef; ir->shadow_comparitor->accept(this); emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen += reg_width; @@ -860,11 +904,13 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, case ir_tex: break; case ir_txb: + this->result = reg_undef; ir->lod_info.bias->accept(this); emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen += reg_width; break; case ir_txl: + this->result = reg_undef; ir->lod_info.lod->accept(this); emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen += reg_width; @@ -873,9 +919,11 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, if (c->dispatch_width == 16) fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); + this->result = reg_undef; ir->lod_info.grad.dPdx->accept(this); fs_reg dPdx = this->result; + this->result = reg_undef; ir->lod_info.grad.dPdy->accept(this); fs_reg dPdy = this->result; @@ -900,13 +948,19 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, } break; } + case ir_txs: + this->result = reg_undef; + ir->lod_info.lod->accept(this); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result); + mlen += reg_width; + break; case ir_txf: assert(!"GLSL 1.30 features unsupported"); break; } /* Set up the coordinate (except for TXD where it was done earlier) */ - if (ir->op != ir_txd) { + if (ir->op != ir_txd && ir->op != ir_txs) { for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate); @@ -924,7 +978,8 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break; case ir_txl: inst = emit(FS_OPCODE_TXL, dst); break; case ir_txd: inst = emit(FS_OPCODE_TXD, dst); break; - case ir_txf: assert(!"TXF unsupported."); + case ir_txf: assert(!"TXF unsupported."); break; + case ir_txs: inst = emit(FS_OPCODE_TXS, dst); break; } inst->base_mrf = base_mrf; inst->mlen = mlen; @@ -959,7 +1014,8 @@ fs_visitor::visit(ir_texture *ir) } this->result = reg_undef; - ir->coordinate->accept(this); + if (ir->coordinate) + ir->coordinate->accept(this); fs_reg coordinate = this->result; if (ir->offset != NULL) { @@ -1000,7 +1056,8 @@ fs_visitor::visit(ir_texture *ir) * texture coordinates. We use the program parameter state * tracking to get the scaling factor. */ - if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { + if (intel->gen < 6 && + ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { struct gl_program_parameter_list *params = c->fp->program.Base.Parameters; int tokens[STATE_LENGTH] = { STATE_INTERNAL, @@ -1046,7 +1103,7 @@ fs_visitor::visit(ir_texture *ir) /* Writemasking doesn't eliminate channels on SIMD8 texture * samples, so don't worry about them. */ - fs_reg dst = fs_reg(this, glsl_type::vec4_type); + fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1)); if (intel->gen >= 7) { inst = emit_texture_gen7(ir, dst, coordinate, sampler); @@ -1070,6 +1127,7 @@ fs_visitor::visit(ir_texture *ir) if (hw_compare_supported) { inst->shadow_compare = true; } else { + this->result = reg_undef; ir->shadow_comparitor->accept(this); fs_reg ref = this->result; @@ -1465,8 +1523,8 @@ fs_visitor::visit(ir_if *ir) inst->predicated = true; } - foreach_iter(exec_list_iterator, iter, ir->then_instructions) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &ir->then_instructions) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; ir->accept(this); @@ -1475,8 +1533,8 @@ fs_visitor::visit(ir_if *ir) if (!ir->else_instructions.is_empty()) { emit(BRW_OPCODE_ELSE); - foreach_iter(exec_list_iterator, iter, ir->else_instructions) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &ir->else_instructions) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; ir->accept(this); @@ -1526,8 +1584,8 @@ fs_visitor::visit(ir_loop *ir) inst->predicated = true; } - foreach_iter(exec_list_iterator, iter, ir->body_instructions) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &ir->body_instructions) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; @@ -1583,8 +1641,8 @@ fs_visitor::visit(ir_function *ir) assert(sig); - foreach_iter(exec_list_iterator, iter, sig->body) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &sig->body) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; ir->accept(this); @@ -1684,7 +1742,7 @@ fs_visitor::emit_interpolation_setup_gen4() interp_reg(FRAG_ATTRIB_WPOS, 3)); /* Compute the pixel 1/W value from wpos.w. */ this->pixel_w = fs_reg(this, glsl_type::float_type); - emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); + emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w); this->current_annotation = NULL; } @@ -1721,7 +1779,7 @@ fs_visitor::emit_interpolation_setup_gen6() this->current_annotation = "compute pos.w"; this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); this->wpos_w = fs_reg(this, glsl_type::float_type); - emit_math(FS_OPCODE_RCP, this->wpos_w, this->pixel_w); + emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); this->delta_x = fs_reg(brw_vec8_grf(2, 0)); this->delta_y = fs_reg(brw_vec8_grf(3, 0)); @@ -1733,6 +1791,7 @@ void fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color) { int reg_width = c->dispatch_width / 8; + fs_inst *inst; if (c->dispatch_width == 8 || intel->gen == 6) { /* SIMD8 write looks like: @@ -1751,8 +1810,10 @@ fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color) * m + 6: a0 * m + 7: a1 */ - emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index * reg_width), - color); + inst = emit(BRW_OPCODE_MOV, + fs_reg(MRF, first_color_mrf + index * reg_width), + color); + inst->saturate = c->key.clamp_fragment_color; } else { /* pre-gen6 SIMD16 single source DP write looks like: * m + 0: r0 @@ -1770,16 +1831,22 @@ fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color) * usual destination + 1 for the second half we get * destination + 4. */ - emit(BRW_OPCODE_MOV, - fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), color); + inst = emit(BRW_OPCODE_MOV, + fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), + color); + inst->saturate = c->key.clamp_fragment_color; } else { push_force_uncompressed(); - emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), color); + inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), + color); + inst->saturate = c->key.clamp_fragment_color; pop_force_uncompressed(); push_force_sechalf(); color.sechalf = true; - emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), color); + inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), + color); + inst->saturate = c->key.clamp_fragment_color; pop_force_sechalf(); color.sechalf = false; } diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 03cebbb824b..f7e6e7c81d1 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -46,7 +46,7 @@ static void upload_drawing_rect(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE); + OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); OUT_BATCH(0); /* xmin, ymin */ OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) | ((ctx->DrawBuffer->Height - 1) << 16)); diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 6674f1640c8..09b5be4c96e 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -226,6 +226,34 @@ static GLboolean brwProgramStringNotify( struct gl_context *ctx, return GL_TRUE; } +/* Per-thread scratch space is a power-of-two multiple of 1KB. */ +int +brw_get_scratch_size(int size) +{ + int i; + + for (i = 1024; i < size; i *= 2) + ; + + return i; +} + +void +brw_get_scratch_bo(struct intel_context *intel, + drm_intel_bo **scratch_bo, int size) +{ + drm_intel_bo *old_bo = *scratch_bo; + + if (old_bo && old_bo->size < size) { + drm_intel_bo_unreference(old_bo); + old_bo = NULL; + } + + if (!old_bo) { + *scratch_bo = drm_intel_bo_alloc(intel->bufmgr, "scratch bo", size, 4096); + } +} + void brwInitFragProgFuncs( struct dd_function_table *functions ) { assert(functions->ProgramStringNotify == _tnl_program_string); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 9471883fb2b..3ff6bbaed47 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -24,6 +24,7 @@ extern "C" { #include "main/macros.h" #include "brw_context.h" +#include "brw_vs.h" } #include "brw_fs.h" #include "../glsl/ir_optimization.h" @@ -67,6 +68,9 @@ brw_shader_precompile(struct gl_context *ctx, struct gl_shader_program *prog) if (!brw_fs_precompile(ctx, prog)) return false; + if (!brw_vs_precompile(ctx, prog)) + return false; + return true; } @@ -75,10 +79,15 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) { struct brw_context *brw = brw_context(ctx); struct intel_context *intel = &brw->intel; + unsigned int stage; + + for (stage = 0; stage < ARRAY_SIZE(prog->_LinkedShaders); stage++) { + struct brw_shader *shader = + (struct brw_shader *)prog->_LinkedShaders[stage]; + + if (!shader) + continue; - struct brw_shader *shader = - (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; - if (shader != NULL) { void *mem_ctx = ralloc_context(NULL); bool progress; @@ -106,18 +115,22 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) brw_do_cubemap_normalize(shader->ir); lower_noise(shader->ir); lower_quadop_vector(shader->ir, false); + + bool input = true; + bool output = stage == MESA_SHADER_FRAGMENT; + bool temp = stage == MESA_SHADER_FRAGMENT; + bool uniform = true; + lower_variable_index_to_cond_assign(shader->ir, - GL_TRUE, /* input */ - GL_TRUE, /* output */ - GL_TRUE, /* temp */ - GL_TRUE /* uniform */ - ); + input, output, temp, uniform); do { progress = false; - brw_do_channel_expressions(shader->ir); - brw_do_vector_splitting(shader->ir); + if (stage == MESA_SHADER_FRAGMENT) { + brw_do_channel_expressions(shader->ir); + brw_do_vector_splitting(shader->ir); + } progress = do_lower_jumps(shader->ir, true, true, true, /* main return */ @@ -192,3 +205,29 @@ brw_conditional_for_comparison(unsigned int op) return BRW_CONDITIONAL_NZ; } } + +uint32_t +brw_math_function(enum opcode op) +{ + switch (op) { + case SHADER_OPCODE_RCP: + return BRW_MATH_FUNCTION_INV; + case SHADER_OPCODE_RSQ: + return BRW_MATH_FUNCTION_RSQ; + case SHADER_OPCODE_SQRT: + return BRW_MATH_FUNCTION_SQRT; + case SHADER_OPCODE_EXP2: + return BRW_MATH_FUNCTION_EXP; + case SHADER_OPCODE_LOG2: + return BRW_MATH_FUNCTION_LOG; + case SHADER_OPCODE_POW: + return BRW_MATH_FUNCTION_POW; + case SHADER_OPCODE_SIN: + return BRW_MATH_FUNCTION_SIN; + case SHADER_OPCODE_COS: + return BRW_MATH_FUNCTION_COS; + default: + assert(!"not reached: unknown math function"); + return 0; + } +} diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 4c568a26caa..1054d7a589e 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -21,5 +21,11 @@ * IN THE SOFTWARE. */ +#include <stdint.h> +#include "brw_defines.h" + +#pragma once + int brw_type_for_base_type(const struct glsl_type *type); uint32_t brw_conditional_for_comparison(unsigned int op); +uint32_t brw_math_function(enum opcode op); diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index b9e5cc1a534..cb7a3ef73d3 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -455,6 +455,23 @@ dump_vs_constants(struct brw_context *brw, uint32_t offset, uint32_t size) } } +static void +dump_wm_constants(struct brw_context *brw, uint32_t offset, uint32_t size) +{ + const char *name = "WM_CONST"; + struct intel_context *intel = &brw->intel; + uint32_t *as_uint = intel->batch.bo->virtual + offset; + float *as_float = intel->batch.bo->virtual + offset; + int i; + + for (i = 0; i < size / 4; i += 4) { + batch_out(brw, name, offset, i, "%3d: (% f % f % f % f) (0x%08x 0x%08x 0x%08x 0x%08x)\n", + i / 4, + as_float[i], as_float[i + 1], as_float[i + 2], as_float[i + 3], + as_uint[i], as_uint[i + 1], as_uint[i + 2], as_uint[i + 3]); + } +} + static void dump_binding_table(struct brw_context *brw, uint32_t offset, uint32_t size) { @@ -602,6 +619,9 @@ dump_state_batch(struct brw_context *brw) case AUB_TRACE_VS_CONSTANTS: dump_vs_constants(brw, offset, size); break; + case AUB_TRACE_WM_CONSTANTS: + dump_wm_constants(brw, offset, size); + break; default: break; } diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index f462f32b19a..46a417a08ed 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -60,7 +60,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel, * given in Volume 1 of the BSpec. */ h0 = ALIGN(mt->height0, align_h); - h1 = ALIGN(minify(h0), align_h); + h1 = ALIGN(minify(mt->height0), align_h); qpitch = (h0 + h1 + (intel->gen >= 7 ? 12 : 11) * align_h); if (mt->compressed) qpitch /= 4; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp new file mode 100644 index 00000000000..760bc1f7acd --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -0,0 +1,161 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_vec4.h" +extern "C" { +#include "main/macros.h" +#include "program/prog_parameter.h" +} + +#define MAX_INSTRUCTION (1 << 30) + +namespace brw { + +void +vec4_visitor::calculate_live_intervals() +{ + int *def = ralloc_array(mem_ctx, int, virtual_grf_count); + int *use = ralloc_array(mem_ctx, int, virtual_grf_count); + int loop_depth = 0; + int loop_start = 0; + + if (this->live_intervals_valid) + return; + + for (int i = 0; i < virtual_grf_count; i++) { + def[i] = MAX_INSTRUCTION; + use[i] = -1; + } + + int ip = 0; + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + if (inst->opcode == BRW_OPCODE_DO) { + if (loop_depth++ == 0) + loop_start = ip; + } else if (inst->opcode == BRW_OPCODE_WHILE) { + loop_depth--; + + if (loop_depth == 0) { + /* Patches up the use of vars marked for being live across + * the whole loop. + */ + for (int i = 0; i < virtual_grf_count; i++) { + if (use[i] == loop_start) { + use[i] = ip; + } + } + } + } else { + for (unsigned int i = 0; i < 3; i++) { + if (inst->src[i].file == GRF) { + int reg = inst->src[i].reg; + + if (!loop_depth) { + use[reg] = ip; + } else { + def[reg] = MIN2(loop_start, def[reg]); + use[reg] = loop_start; + + /* Nobody else is going to go smash our start to + * later in the loop now, because def[reg] now + * points before the bb header. + */ + } + } + } + if (inst->dst.file == GRF) { + int reg = inst->dst.reg; + + if (!loop_depth) { + def[reg] = MIN2(def[reg], ip); + } else { + def[reg] = MIN2(def[reg], loop_start); + } + } + } + + ip++; + } + + ralloc_free(this->virtual_grf_def); + ralloc_free(this->virtual_grf_use); + this->virtual_grf_def = def; + this->virtual_grf_use = use; + + this->live_intervals_valid = true; +} + +bool +vec4_visitor::virtual_grf_interferes(int a, int b) +{ + int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]); + int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]); + + /* We can't handle dead register writes here, without iterating + * over the whole instruction stream to find every single dead + * write to that register to compare to the live interval of the + * other register. Just assert that dead_code_eliminate() has been + * called. + */ + assert((this->virtual_grf_use[a] != -1 || + this->virtual_grf_def[a] == MAX_INSTRUCTION) && + (this->virtual_grf_use[b] != -1 || + this->virtual_grf_def[b] == MAX_INSTRUCTION)); + + return start < end; +} + +/** + * Must be called after calculate_live_intervales() to remove unused + * writes to registers -- register allocation will fail otherwise + * because something deffed but not used won't be considered to + * interfere with other regs. + */ +bool +vec4_visitor::dead_code_eliminate() +{ + bool progress = false; + int pc = 0; + + calculate_live_intervals(); + + foreach_list_safe(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) { + inst->remove(); + progress = true; + } + + pc++; + } + + if (progress) + live_intervals_valid = false; + + return progress; +} + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h new file mode 100644 index 00000000000..1db910e2b99 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -0,0 +1,489 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef BRW_VEC4_H +#define BRW_VEC4_H + +#include <stdint.h> +#include "brw_shader.h" +#include "main/compiler.h" +#include "program/hash_table.h" + +extern "C" { +#include "brw_vs.h" +#include "brw_context.h" +#include "brw_eu.h" +}; + +#include "../glsl/ir.h" + +namespace brw { + +class dst_reg; + +/** + * Common helper for constructing swizzles. When only a subset of + * channels of a vec4 are used, we don't want to reference the other + * channels, as that will tell optimization passes that those other + * channels are used. + */ +static int +swizzle_for_size(int size) +{ + int size_swizzles[4] = { + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), + }; + + assert((size >= 1) && (size <= 4)); + return size_swizzles[size - 1]; +} + +enum register_file { + ARF = BRW_ARCHITECTURE_REGISTER_FILE, + GRF = BRW_GENERAL_REGISTER_FILE, + MRF = BRW_MESSAGE_REGISTER_FILE, + IMM = BRW_IMMEDIATE_VALUE, + HW_REG, /* a struct brw_reg */ + ATTR, + UNIFORM, /* prog_data->params[hw_reg] */ + BAD_FILE +}; + +class reg +{ +public: + /** Register file: ARF, GRF, MRF, IMM. */ + enum register_file file; + /** virtual register number. 0 = fixed hw reg */ + int reg; + /** Offset within the virtual register. */ + int reg_offset; + /** Register type. BRW_REGISTER_TYPE_* */ + int type; + bool sechalf; + struct brw_reg fixed_hw_reg; + int smear; /* -1, or a channel of the reg to smear to all channels. */ + + /** Value for file == BRW_IMMMEDIATE_FILE */ + union { + int32_t i; + uint32_t u; + float f; + } imm; +}; + +class src_reg : public reg +{ +public: + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = ralloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + void init() + { + memset(this, 0, sizeof(*this)); + + this->file = BAD_FILE; + } + + src_reg(register_file file, int reg, const glsl_type *type) + { + init(); + + this->file = file; + this->reg = reg; + if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) + this->swizzle = swizzle_for_size(type->vector_elements); + else + this->swizzle = SWIZZLE_XYZW; + } + + /** Generic unset register constructor. */ + src_reg() + { + init(); + } + + src_reg(float f) + { + init(); + + this->file = IMM; + this->type = BRW_REGISTER_TYPE_F; + this->imm.f = f; + } + + src_reg(uint32_t u) + { + init(); + + this->file = IMM; + this->type = BRW_REGISTER_TYPE_UD; + this->imm.f = u; + } + + src_reg(int32_t i) + { + init(); + + this->file = IMM; + this->type = BRW_REGISTER_TYPE_D; + this->imm.i = i; + } + + src_reg(class vec4_visitor *v, const struct glsl_type *type); + + explicit src_reg(dst_reg reg); + + GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */ + bool negate; + bool abs; + + src_reg *reladdr; +}; + +class dst_reg : public reg +{ +public: + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = ralloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + void init() + { + memset(this, 0, sizeof(*this)); + this->file = BAD_FILE; + this->writemask = WRITEMASK_XYZW; + } + + dst_reg() + { + init(); + } + + dst_reg(register_file file, int reg) + { + init(); + + this->file = file; + this->reg = reg; + } + + dst_reg(struct brw_reg reg) + { + init(); + + this->file = HW_REG; + this->fixed_hw_reg = reg; + } + + dst_reg(class vec4_visitor *v, const struct glsl_type *type); + + explicit dst_reg(src_reg reg); + + int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ + + src_reg *reladdr; +}; + +class vec4_instruction : public exec_node { +public: + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = rzalloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + struct brw_reg get_dst(void); + struct brw_reg get_src(int i); + + enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ + dst_reg dst; + src_reg src[3]; + + bool saturate; + bool predicate_inverse; + uint32_t predicate; + + int conditional_mod; /**< BRW_CONDITIONAL_* */ + + int sampler; + int target; /**< MRT target. */ + bool shadow_compare; + + bool eot; + bool header_present; + int mlen; /**< SEND message length */ + int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ + + uint32_t offset; /* spill/unspill offset */ + /** @{ + * Annotation for the generated IR. One of the two can be set. + */ + ir_instruction *ir; + const char *annotation; +}; + +class vec4_visitor : public ir_visitor +{ +public: + vec4_visitor(struct brw_vs_compile *c, + struct gl_shader_program *prog, struct brw_shader *shader); + ~vec4_visitor(); + + dst_reg dst_null_f() + { + return dst_reg(brw_null_reg()); + } + + dst_reg dst_null_d() + { + return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + } + + dst_reg dst_null_cmp() + { + if (intel->gen > 4) + return dst_null_d(); + else + return dst_null_f(); + } + + struct brw_context *brw; + const struct gl_vertex_program *vp; + struct intel_context *intel; + struct gl_context *ctx; + struct brw_vs_compile *c; + struct brw_vs_prog_data *prog_data; + struct brw_compile *p; + struct brw_shader *shader; + struct gl_shader_program *prog; + void *mem_ctx; + exec_list instructions; + + char *fail_msg; + bool failed; + + /** + * GLSL IR currently being processed, which is associated with our + * driver IR instructions for debugging purposes. + */ + ir_instruction *base_ir; + const char *current_annotation; + + int *virtual_grf_sizes; + int virtual_grf_count; + int virtual_grf_array_size; + int first_non_payload_grf; + int *virtual_grf_def; + int *virtual_grf_use; + bool live_intervals_valid; + + dst_reg *variable_storage(ir_variable *var); + + void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr); + + src_reg src_reg_for_float(float val); + + /** + * \name Visit methods + * + * As typical for the visitor pattern, there must be one \c visit method for + * each concrete subclass of \c ir_instruction. Virtual base classes within + * the hierarchy should not have \c visit methods. + */ + /*@{*/ + virtual void visit(ir_variable *); + virtual void visit(ir_loop *); + virtual void visit(ir_loop_jump *); + virtual void visit(ir_function_signature *); + virtual void visit(ir_function *); + virtual void visit(ir_expression *); + virtual void visit(ir_swizzle *); + virtual void visit(ir_dereference_variable *); + virtual void visit(ir_dereference_array *); + virtual void visit(ir_dereference_record *); + virtual void visit(ir_assignment *); + virtual void visit(ir_constant *); + virtual void visit(ir_call *); + virtual void visit(ir_return *); + virtual void visit(ir_discard *); + virtual void visit(ir_texture *); + virtual void visit(ir_if *); + /*@}*/ + + src_reg result; + + /* Regs for vertex results. Generated at ir_variable visiting time + * for the ir->location's used. + */ + dst_reg output_reg[VERT_RESULT_MAX]; + int uniform_size[MAX_UNIFORMS]; + int uniforms; + + struct hash_table *variable_ht; + + bool run(void); + void fail(const char *msg, ...); + + int virtual_grf_alloc(int size); + int setup_uniform_values(int loc, const glsl_type *type); + void setup_builtin_uniform_values(ir_variable *ir); + int setup_attributes(int payload_reg); + int setup_uniforms(int payload_reg); + void setup_payload(); + void reg_allocate_trivial(); + void reg_allocate(); + void move_grf_array_access_to_scratch(); + void calculate_live_intervals(); + bool dead_code_eliminate(); + bool virtual_grf_interferes(int a, int b); + + vec4_instruction *emit(enum opcode opcode); + + vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0); + + vec4_instruction *emit(enum opcode opcode, dst_reg dst, + src_reg src0, src_reg src1); + + vec4_instruction *emit(enum opcode opcode, dst_reg dst, + src_reg src0, src_reg src1, src_reg src2); + + bool try_rewrite_rhs_to_dst(ir_assignment *ir, + dst_reg dst, + src_reg src, + vec4_instruction *pre_rhs_inst, + vec4_instruction *last_rhs_inst); + + /** Walks an exec_list of ir_instruction and sends it through this visitor. */ + void visit_instructions(const exec_list *list); + + void emit_bool_to_cond_code(ir_rvalue *ir); + void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1); + void emit_if_gen6(ir_if *ir); + + void emit_block_move(dst_reg *dst, src_reg *src, + const struct glsl_type *type, bool predicated); + + void emit_constant_values(dst_reg *dst, ir_constant *value); + + /** + * Emit the correct dot-product instruction for the type of arguments + */ + void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements); + + void emit_scalar(ir_instruction *ir, enum prog_opcode op, + dst_reg dst, src_reg src0); + + void emit_scalar(ir_instruction *ir, enum prog_opcode op, + dst_reg dst, src_reg src0, src_reg src1); + + void emit_scs(ir_instruction *ir, enum prog_opcode op, + dst_reg dst, const src_reg &src); + + void emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src); + void emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src); + void emit_math(enum opcode opcode, dst_reg dst, src_reg src); + void emit_math2_gen6(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); + void emit_math2_gen4(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); + void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); + + int emit_vue_header_gen6(int header_mrf); + int emit_vue_header_gen4(int header_mrf); + void emit_urb_writes(void); + + src_reg get_scratch_offset(vec4_instruction *inst, + src_reg *reladdr, int reg_offset); + void emit_scratch_read(vec4_instruction *inst, + dst_reg dst, + src_reg orig_src, + int base_offset); + void emit_scratch_write(vec4_instruction *inst, + src_reg temp, + dst_reg orig_dst, + int base_offset); + + GLboolean try_emit_sat(ir_expression *ir); + + bool process_move_condition(ir_rvalue *ir); + + void generate_code(); + void generate_vs_instruction(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg *src); + + void generate_math1_gen4(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src); + void generate_math1_gen6(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src); + void generate_math2_gen4(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1); + void generate_math2_gen6(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1); + + void generate_urb_write(vec4_instruction *inst); + void generate_oword_dual_block_offsets(struct brw_reg m1, + struct brw_reg index); + void generate_scratch_write(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg index); + void generate_scratch_read(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg index); +}; + +} /* namespace brw */ + +#endif /* BRW_VEC4_H */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp new file mode 100644 index 00000000000..65ac7d9dc09 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -0,0 +1,854 @@ +/* Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_vec4.h" +#include "../glsl/ir_print_visitor.h" + +extern "C" { +#include "brw_eu.h" +}; + +using namespace brw; + +namespace brw { + +int +vec4_visitor::setup_attributes(int payload_reg) +{ + int nr_attributes; + int attribute_map[VERT_ATTRIB_MAX]; + + nr_attributes = 0; + for (int i = 0; i < VERT_ATTRIB_MAX; i++) { + if (prog_data->inputs_read & BITFIELD64_BIT(i)) { + attribute_map[i] = payload_reg + nr_attributes; + nr_attributes++; + + /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED + * attributes come in as floating point conversions of the + * integer values. + */ + if (c->key.gl_fixed_input_size[i] != 0) { + struct brw_reg reg = brw_vec8_grf(attribute_map[i], 0); + + brw_MUL(p, + brw_writemask(reg, (1 << c->key.gl_fixed_input_size[i]) - 1), + reg, brw_imm_f(1.0 / 65536.0)); + } + } + } + + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + for (int i = 0; i < 3; i++) { + if (inst->src[i].file != ATTR) + continue; + + int grf = attribute_map[inst->src[i].reg + inst->src[i].reg_offset]; + + struct brw_reg reg = brw_vec8_grf(grf, 0); + reg.dw1.bits.swizzle = inst->src[i].swizzle; + if (inst->src[i].abs) + reg = brw_abs(reg); + if (inst->src[i].negate) + reg = negate(reg); + + inst->src[i].file = HW_REG; + inst->src[i].fixed_hw_reg = reg; + } + } + + /* The BSpec says we always have to read at least one thing from + * the VF, and it appears that the hardware wedges otherwise. + */ + if (nr_attributes == 0) + nr_attributes = 1; + + prog_data->urb_read_length = (nr_attributes + 1) / 2; + + return payload_reg + nr_attributes; +} + +int +vec4_visitor::setup_uniforms(int reg) +{ + /* User clip planes from curbe: + */ + if (c->key.nr_userclip) { + if (intel->gen >= 6) { + for (int i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride(brw_vec4_grf(reg + i / 2, + (i % 2) * 4), 0, 4, 1); + } + reg += ALIGN(c->key.nr_userclip, 2) / 2; + } else { + for (int i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2, + (i % 2) * 4), 0, 4, 1); + } + reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2; + } + } + + /* The pre-gen6 VS requires that some push constants get loaded no + * matter what, or the GPU would hang. + */ + if (intel->gen < 6 && this->uniforms == 0) { + this->uniform_size[this->uniforms] = 1; + + for (unsigned int i = 0; i < 4; i++) { + unsigned int slot = this->uniforms * 4 + i; + + c->prog_data.param[slot] = NULL; + c->prog_data.param_convert[slot] = PARAM_CONVERT_ZERO; + } + + this->uniforms++; + reg++; + } else { + reg += ALIGN(uniforms, 2) / 2; + } + + /* for now, we are not doing any elimination of unused slots, nor + * are we packing our uniforms. + */ + c->prog_data.nr_params = this->uniforms * 4; + + c->prog_data.curb_read_length = reg - 1; + c->prog_data.uses_new_param_layout = true; + + return reg; +} + +void +vec4_visitor::setup_payload(void) +{ + int reg = 0; + + /* The payload always contains important data in g0, which contains + * the URB handles that are passed on to the URB write at the end + * of the thread. So, we always start push constants at g1. + */ + reg++; + + reg = setup_uniforms(reg); + + reg = setup_attributes(reg); + + this->first_non_payload_grf = reg; +} + +struct brw_reg +vec4_instruction::get_dst(void) +{ + struct brw_reg brw_reg; + + switch (dst.file) { + case GRF: + brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0); + brw_reg = retype(brw_reg, dst.type); + brw_reg.dw1.bits.writemask = dst.writemask; + break; + + case HW_REG: + brw_reg = dst.fixed_hw_reg; + break; + + case BAD_FILE: + brw_reg = brw_null_reg(); + break; + + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; + } + return brw_reg; +} + +struct brw_reg +vec4_instruction::get_src(int i) +{ + struct brw_reg brw_reg; + + switch (src[i].file) { + case GRF: + brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0); + brw_reg = retype(brw_reg, src[i].type); + brw_reg.dw1.bits.swizzle = src[i].swizzle; + if (src[i].abs) + brw_reg = brw_abs(brw_reg); + if (src[i].negate) + brw_reg = negate(brw_reg); + break; + + case IMM: + switch (src[i].type) { + case BRW_REGISTER_TYPE_F: + brw_reg = brw_imm_f(src[i].imm.f); + break; + case BRW_REGISTER_TYPE_D: + brw_reg = brw_imm_d(src[i].imm.i); + break; + case BRW_REGISTER_TYPE_UD: + brw_reg = brw_imm_ud(src[i].imm.u); + break; + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; + } + break; + + case UNIFORM: + brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2, + ((src[i].reg + src[i].reg_offset) % 2) * 4), + 0, 4, 1); + brw_reg = retype(brw_reg, src[i].type); + brw_reg.dw1.bits.swizzle = src[i].swizzle; + if (src[i].abs) + brw_reg = brw_abs(brw_reg); + if (src[i].negate) + brw_reg = negate(brw_reg); + break; + + case HW_REG: + brw_reg = src[i].fixed_hw_reg; + break; + + case BAD_FILE: + /* Probably unused. */ + brw_reg = brw_null_reg(); + break; + case ATTR: + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; + } + + return brw_reg; +} + +void +vec4_visitor::generate_math1_gen4(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math(p, + dst, + brw_math_function(inst->opcode), + BRW_MATH_SATURATE_NONE, + inst->base_mrf, + src, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); +} + +static void +check_gen6_math_src_arg(struct brw_reg src) +{ + /* Source swizzles are ignored. */ + assert(!src.abs); + assert(!src.negate); + assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW); +} + +void +vec4_visitor::generate_math1_gen6(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src) +{ + /* Can't do writemask because math can't be align16. */ + assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); + check_gen6_math_src_arg(src); + + brw_set_access_mode(p, BRW_ALIGN_1); + brw_math(p, + dst, + brw_math_function(inst->opcode), + BRW_MATH_SATURATE_NONE, + inst->base_mrf, + src, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); + brw_set_access_mode(p, BRW_ALIGN_16); +} + +void +vec4_visitor::generate_math2_gen6(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) +{ + /* Can't do writemask because math can't be align16. */ + assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); + /* Source swizzles are ignored. */ + check_gen6_math_src_arg(src0); + check_gen6_math_src_arg(src1); + + brw_set_access_mode(p, BRW_ALIGN_1); + brw_math2(p, + dst, + brw_math_function(inst->opcode), + src0, src1); + brw_set_access_mode(p, BRW_ALIGN_16); +} + +void +vec4_visitor::generate_math2_gen4(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) +{ + /* Can't do writemask because math can't be align16. */ + assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); + + brw_MOV(p, brw_message_reg(inst->base_mrf + 1), src1); + + brw_set_access_mode(p, BRW_ALIGN_1); + brw_math(p, + dst, + brw_math_function(inst->opcode), + BRW_MATH_SATURATE_NONE, + inst->base_mrf, + src0, + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + brw_set_access_mode(p, BRW_ALIGN_16); +} + +void +vec4_visitor::generate_urb_write(vec4_instruction *inst) +{ + brw_urb_WRITE(p, + brw_null_reg(), /* dest */ + inst->base_mrf, /* starting mrf reg nr */ + brw_vec8_grf(0, 0), /* src */ + false, /* allocate */ + true, /* used */ + inst->mlen, + 0, /* response len */ + inst->eot, /* eot */ + inst->eot, /* writes complete */ + inst->offset, /* urb destination offset */ + BRW_URB_SWIZZLE_INTERLEAVE); +} + +void +vec4_visitor::generate_oword_dual_block_offsets(struct brw_reg m1, + struct brw_reg index) +{ + int second_vertex_offset; + + if (intel->gen >= 6) + second_vertex_offset = 1; + else + second_vertex_offset = 16; + + m1 = retype(m1, BRW_REGISTER_TYPE_D); + + /* Set up M1 (message payload). Only the block offsets in M1.0 and + * M1.4 are used, and the rest are ignored. + */ + struct brw_reg m1_0 = suboffset(vec1(m1), 0); + struct brw_reg m1_4 = suboffset(vec1(m1), 4); + struct brw_reg index_0 = suboffset(vec1(index), 0); + struct brw_reg index_4 = suboffset(vec1(index), 4); + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_access_mode(p, BRW_ALIGN_1); + + brw_MOV(p, m1_0, index_0); + + brw_set_predicate_inverse(p, true); + if (index.file == BRW_IMMEDIATE_VALUE) { + index_4.dw1.ud++; + brw_MOV(p, m1_4, index_4); + } else { + brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset)); + } + + brw_pop_insn_state(p); +} + +void +vec4_visitor::generate_scratch_read(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg index) +{ + if (intel->gen >= 6) { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, + retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D), + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D)); + brw_pop_insn_state(p); + } + + generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1), + index); + + uint32_t msg_type; + + if (intel->gen >= 6) + msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + else if (intel->gen == 5 || intel->is_g4x) + msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + else + msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + + /* Each of the 8 channel enables is considered for whether each + * dword is written. + */ + struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, send, dst); + brw_set_src0(p, send, brw_message_reg(inst->base_mrf)); + brw_set_dp_read_message(p, send, + 255, /* binding table index: stateless access */ + BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, + msg_type, + BRW_DATAPORT_READ_TARGET_RENDER_CACHE, + 2, /* mlen */ + 1 /* rlen */); +} + +void +vec4_visitor::generate_scratch_write(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg index) +{ + /* If the instruction is predicated, we'll predicate the send, not + * the header setup. + */ + brw_set_predicate_control(p, false); + + if (intel->gen >= 6) { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, + retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D), + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D)); + brw_pop_insn_state(p); + } + + generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1), + index); + + brw_MOV(p, + retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D), + retype(src, BRW_REGISTER_TYPE_D)); + + uint32_t msg_type; + + if (intel->gen >= 6) + msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; + else + msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; + + brw_set_predicate_control(p, inst->predicate); + + /* Each of the 8 channel enables is considered for whether each + * dword is written. + */ + struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, send, dst); + brw_set_src0(p, send, brw_message_reg(inst->base_mrf)); + brw_set_dp_write_message(p, send, + 255, /* binding table index: stateless access */ + BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, + msg_type, + 3, /* mlen */ + true, /* header present */ + false, /* pixel scoreboard */ + 0, /* rlen */ + false, /* eot */ + false /* commit */); +} + +void +vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, + struct brw_reg dst, + struct brw_reg *src) +{ + vec4_instruction *inst = (vec4_instruction *)instruction; + + switch (inst->opcode) { + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: + if (intel->gen >= 6) { + generate_math1_gen6(inst, dst, src[0]); + } else { + generate_math1_gen4(inst, dst, src[0]); + } + break; + + case SHADER_OPCODE_POW: + if (intel->gen >= 6) { + generate_math2_gen6(inst, dst, src[0], src[1]); + } else { + generate_math2_gen4(inst, dst, src[0], src[1]); + } + break; + + case VS_OPCODE_URB_WRITE: + generate_urb_write(inst); + break; + + case VS_OPCODE_SCRATCH_READ: + generate_scratch_read(inst, dst, src[0]); + break; + + case VS_OPCODE_SCRATCH_WRITE: + generate_scratch_write(inst, dst, src[0], src[1]); + break; + + default: + if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { + fail("unsupported opcode in `%s' in VS\n", + brw_opcodes[inst->opcode].name); + } else { + fail("Unsupported opcode %d in VS", inst->opcode); + } + } +} + +bool +vec4_visitor::run() +{ + /* Generate VS IR for main(). (the visitor only descends into + * functions called "main"). + */ + visit_instructions(shader->ir); + + emit_urb_writes(); + + /* Before any optimization, push array accesses out to scratch + * space where we need them to be. This pass may allocate new + * virtual GRFs, so we want to do it early. It also makes sure + * that we have reladdr computations available for CSE, since we'll + * often do repeated subexpressions for those. + */ + move_grf_array_access_to_scratch(); + + bool progress; + do { + progress = false; + progress = dead_code_eliminate() || progress; + } while (progress); + + if (failed) + return false; + + setup_payload(); + reg_allocate(); + + if (failed) + return false; + + brw_set_access_mode(p, BRW_ALIGN_16); + + generate_code(); + + return !failed; +} + +void +vec4_visitor::generate_code() +{ + int last_native_inst = p->nr_insn; + const char *last_annotation_string = NULL; + ir_instruction *last_annotation_ir = NULL; + + int loop_stack_array_size = 16; + int loop_stack_depth = 0; + brw_instruction **loop_stack = + rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size); + int *if_depth_in_loop = + rzalloc_array(this->mem_ctx, int, loop_stack_array_size); + + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + printf("Native code for vertex shader %d:\n", prog->Name); + } + + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + struct brw_reg src[3], dst; + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + if (last_annotation_ir != inst->ir) { + last_annotation_ir = inst->ir; + if (last_annotation_ir) { + printf(" "); + last_annotation_ir->print(); + printf("\n"); + } + } + if (last_annotation_string != inst->annotation) { + last_annotation_string = inst->annotation; + if (last_annotation_string) + printf(" %s\n", last_annotation_string); + } + } + + for (unsigned int i = 0; i < 3; i++) { + src[i] = inst->get_src(i); + } + dst = inst->get_dst(); + + brw_set_conditionalmod(p, inst->conditional_mod); + brw_set_predicate_control(p, inst->predicate); + brw_set_predicate_inverse(p, inst->predicate_inverse); + brw_set_saturate(p, inst->saturate); + + switch (inst->opcode) { + case BRW_OPCODE_MOV: + brw_MOV(p, dst, src[0]); + break; + case BRW_OPCODE_ADD: + brw_ADD(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_MUL: + brw_MUL(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_MACH: + brw_set_acc_write_control(p, 1); + brw_MACH(p, dst, src[0], src[1]); + brw_set_acc_write_control(p, 0); + break; + + case BRW_OPCODE_FRC: + brw_FRC(p, dst, src[0]); + break; + case BRW_OPCODE_RNDD: + brw_RNDD(p, dst, src[0]); + break; + case BRW_OPCODE_RNDE: + brw_RNDE(p, dst, src[0]); + break; + case BRW_OPCODE_RNDZ: + brw_RNDZ(p, dst, src[0]); + break; + + case BRW_OPCODE_AND: + brw_AND(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_OR: + brw_OR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_XOR: + brw_XOR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_NOT: + brw_NOT(p, dst, src[0]); + break; + case BRW_OPCODE_ASR: + brw_ASR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_SHR: + brw_SHR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_SHL: + brw_SHL(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_CMP: + brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); + break; + case BRW_OPCODE_SEL: + brw_SEL(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_DP4: + brw_DP4(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_DP3: + brw_DP3(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_DP2: + brw_DP2(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_IF: + if (inst->src[0].file != BAD_FILE) { + /* The instruction has an embedded compare (only allowed on gen6) */ + assert(intel->gen == 6); + gen6_IF(p, inst->conditional_mod, src[0], src[1]); + } else { + struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8); + brw_inst->header.predicate_control = inst->predicate; + } + if_depth_in_loop[loop_stack_depth]++; + break; + + case BRW_OPCODE_ELSE: + brw_ELSE(p); + break; + case BRW_OPCODE_ENDIF: + brw_ENDIF(p); + if_depth_in_loop[loop_stack_depth]--; + break; + + case BRW_OPCODE_DO: + loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); + if (loop_stack_array_size <= loop_stack_depth) { + loop_stack_array_size *= 2; + loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *, + loop_stack_array_size); + if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int, + loop_stack_array_size); + } + if_depth_in_loop[loop_stack_depth] = 0; + break; + + case BRW_OPCODE_BREAK: + brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case BRW_OPCODE_CONTINUE: + /* FINISHME: We need to write the loop instruction support still. */ + if (intel->gen >= 6) + gen6_CONT(p, loop_stack[loop_stack_depth - 1]); + else + brw_CONT(p, if_depth_in_loop[loop_stack_depth]); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + + case BRW_OPCODE_WHILE: { + struct brw_instruction *inst0, *inst1; + GLuint br = 1; + + if (intel->gen >= 5) + br = 2; + + assert(loop_stack_depth > 0); + loop_stack_depth--; + inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); + if (intel->gen < 6) { + /* patch all the BREAK/CONT instructions from last BGNLOOP */ + while (inst0 > loop_stack[loop_stack_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + } + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + } + } + } + } + break; + + default: + generate_vs_instruction(inst, dst, src); + break; + } + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { + if (0) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *)&p->store[i])[3], + ((uint32_t *)&p->store[i])[2], + ((uint32_t *)&p->store[i])[1], + ((uint32_t *)&p->store[i])[0]); + } + brw_disasm(stdout, &p->store[i], intel->gen); + } + } + + last_native_inst = p->nr_insn; + } + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + printf("\n"); + } + + ralloc_free(loop_stack); + ralloc_free(if_depth_in_loop); + + brw_set_uip_jip(p); + + /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS + * emit issues, it doesn't get the jump distances into the output, + * which is often something we want to debug. So this is here in + * case you're doing that. + */ + if (0) { + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + for (unsigned int i = 0; i < p->nr_insn; i++) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *)&p->store[i])[3], + ((uint32_t *)&p->store[i])[2], + ((uint32_t *)&p->store[i])[1], + ((uint32_t *)&p->store[i])[0]); + brw_disasm(stdout, &p->store[i], intel->gen); + } + } + } +} + +extern "C" { + +bool +brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c) +{ + if (!prog) + return false; + + struct brw_shader *shader = + (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; + if (!shader) + return false; + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + printf("GLSL IR for native vertex shader %d:\n", prog->Name); + _mesa_print_ir(shader->ir, NULL); + printf("\n\n"); + } + + vec4_visitor v(c, prog, shader); + if (!v.run()) { + prog->LinkStatus = GL_FALSE; + ralloc_strcat(&prog->InfoLog, v.fail_msg); + return false; + } + + return true; +} + +} /* extern "C" */ + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp new file mode 100644 index 00000000000..3f052ff64cf --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -0,0 +1,234 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +extern "C" { +#include "main/macros.h" +#include "program/register_allocate.h" +} /* extern "C" */ + +#include "brw_vec4.h" +#include "../glsl/ir_print_visitor.h" + +using namespace brw; + +namespace brw { + +static void +assign(int *reg_hw_locations, reg *reg) +{ + if (reg->file == GRF) { + reg->reg = reg_hw_locations[reg->reg]; + } +} + +void +vec4_visitor::reg_allocate_trivial() +{ + int hw_reg_mapping[this->virtual_grf_count]; + bool virtual_grf_used[this->virtual_grf_count]; + int i; + int next; + + /* Calculate which virtual GRFs are actually in use after whatever + * optimization passes have occurred. + */ + for (int i = 0; i < this->virtual_grf_count; i++) { + virtual_grf_used[i] = false; + } + + foreach_iter(exec_list_iterator, iter, this->instructions) { + vec4_instruction *inst = (vec4_instruction *)iter.get(); + + if (inst->dst.file == GRF) + virtual_grf_used[inst->dst.reg] = true; + + for (int i = 0; i < 3; i++) { + if (inst->src[i].file == GRF) + virtual_grf_used[inst->src[i].reg] = true; + } + } + + hw_reg_mapping[0] = this->first_non_payload_grf; + next = hw_reg_mapping[0] + this->virtual_grf_sizes[0]; + for (i = 1; i < this->virtual_grf_count; i++) { + if (virtual_grf_used[i]) { + hw_reg_mapping[i] = next; + next += this->virtual_grf_sizes[i]; + } + } + prog_data->total_grf = next; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + vec4_instruction *inst = (vec4_instruction *)iter.get(); + + assign(hw_reg_mapping, &inst->dst); + assign(hw_reg_mapping, &inst->src[0]); + assign(hw_reg_mapping, &inst->src[1]); + assign(hw_reg_mapping, &inst->src[2]); + } + + if (prog_data->total_grf > BRW_MAX_GRF) { + fail("Ran out of regs on trivial allocator (%d/%d)\n", + prog_data->total_grf, BRW_MAX_GRF); + } +} + +static void +brw_alloc_reg_set_for_classes(struct brw_context *brw, + int *class_sizes, + int class_count, + int base_reg_count) +{ + /* Compute the total number of registers across all classes. */ + int ra_reg_count = 0; + for (int i = 0; i < class_count; i++) { + ra_reg_count += base_reg_count - (class_sizes[i] - 1); + } + + ralloc_free(brw->vs.ra_reg_to_grf); + brw->vs.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count); + ralloc_free(brw->vs.regs); + brw->vs.regs = ra_alloc_reg_set(ra_reg_count); + ralloc_free(brw->vs.classes); + brw->vs.classes = ralloc_array(brw, int, class_count + 1); + + /* Now, add the registers to their classes, and add the conflicts + * between them and the base GRF registers (and also each other). + */ + int reg = 0; + for (int i = 0; i < class_count; i++) { + int class_reg_count = base_reg_count - (class_sizes[i] - 1); + brw->vs.classes[i] = ra_alloc_reg_class(brw->vs.regs); + + for (int j = 0; j < class_reg_count; j++) { + ra_class_add_reg(brw->vs.regs, brw->vs.classes[i], reg); + + brw->vs.ra_reg_to_grf[reg] = j; + + for (int base_reg = j; + base_reg < j + class_sizes[i]; + base_reg++) { + ra_add_transitive_reg_conflict(brw->vs.regs, base_reg, reg); + } + + reg++; + } + } + assert(reg == ra_reg_count); + + ra_set_finalize(brw->vs.regs); +} + +void +vec4_visitor::reg_allocate() +{ + int hw_reg_mapping[virtual_grf_count]; + int first_assigned_grf = this->first_non_payload_grf; + int base_reg_count = BRW_MAX_GRF - first_assigned_grf; + int class_sizes[base_reg_count]; + int class_count = 0; + + /* Using the trivial allocator can be useful in debugging undefined + * register access as a result of broken optimization passes. + */ + if (0) { + reg_allocate_trivial(); + return; + } + + calculate_live_intervals(); + + /* Set up the register classes. + * + * The base registers store a vec4. However, we'll need larger + * storage for arrays, structures, and matrices, which will be sets + * of contiguous registers. + */ + class_sizes[class_count++] = 1; + + for (int r = 0; r < virtual_grf_count; r++) { + int i; + + for (i = 0; i < class_count; i++) { + if (class_sizes[i] == this->virtual_grf_sizes[r]) + break; + } + if (i == class_count) { + if (this->virtual_grf_sizes[r] >= base_reg_count) { + fail("Object too large to register allocate.\n"); + } + + class_sizes[class_count++] = this->virtual_grf_sizes[r]; + } + } + + brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, base_reg_count); + + struct ra_graph *g = ra_alloc_interference_graph(brw->vs.regs, + virtual_grf_count); + + for (int i = 0; i < virtual_grf_count; i++) { + for (int c = 0; c < class_count; c++) { + if (class_sizes[c] == this->virtual_grf_sizes[i]) { + ra_set_node_class(g, i, brw->vs.classes[c]); + break; + } + } + + for (int j = 0; j < i; j++) { + if (virtual_grf_interferes(i, j)) { + ra_add_node_interference(g, i, j); + } + } + } + + if (!ra_allocate_no_spills(g)) { + ralloc_free(g); + fail("No register spilling support yet\n"); + } + + /* Get the chosen virtual registers for each node, and map virtual + * regs in the register classes back down to real hardware reg + * numbers. + */ + prog_data->total_grf = first_assigned_grf; + for (int i = 0; i < virtual_grf_count; i++) { + int reg = ra_get_node_reg(g, i); + + hw_reg_mapping[i] = first_assigned_grf + brw->vs.ra_reg_to_grf[reg]; + prog_data->total_grf = MAX2(prog_data->total_grf, hw_reg_mapping[i] + 1); + } + + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + assign(hw_reg_mapping, &inst->dst); + assign(hw_reg_mapping, &inst->src[0]); + assign(hw_reg_mapping, &inst->src[1]); + assign(hw_reg_mapping, &inst->src[2]); + } + + ralloc_free(g); +} + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp new file mode 100644 index 00000000000..b3a07bd0539 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -0,0 +1,2156 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_vec4.h" +extern "C" { +#include "main/macros.h" +#include "program/prog_parameter.h" +} + +namespace brw { + +src_reg::src_reg(dst_reg reg) +{ + init(); + + this->file = reg.file; + this->reg = reg.reg; + this->reg_offset = reg.reg_offset; + this->type = reg.type; + this->reladdr = reg.reladdr; + this->fixed_hw_reg = reg.fixed_hw_reg; + + int swizzles[4]; + int next_chan = 0; + int last = 0; + + for (int i = 0; i < 4; i++) { + if (!(reg.writemask & (1 << i))) + continue; + + swizzles[next_chan++] = last = i; + } + + for (; next_chan < 4; next_chan++) { + swizzles[next_chan] = last; + } + + this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], + swizzles[2], swizzles[3]); +} + +dst_reg::dst_reg(src_reg reg) +{ + init(); + + this->file = reg.file; + this->reg = reg.reg; + this->reg_offset = reg.reg_offset; + this->type = reg.type; + this->writemask = WRITEMASK_XYZW; + this->reladdr = reg.reladdr; + this->fixed_hw_reg = reg.fixed_hw_reg; +} + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode, dst_reg dst, + src_reg src0, src_reg src1, src_reg src2) +{ + vec4_instruction *inst = new(mem_ctx) vec4_instruction(); + + inst->opcode = opcode; + inst->dst = dst; + inst->src[0] = src0; + inst->src[1] = src1; + inst->src[2] = src2; + inst->ir = this->base_ir; + inst->annotation = this->current_annotation; + + this->instructions.push_tail(inst); + + return inst; +} + + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1) +{ + return emit(opcode, dst, src0, src1, src_reg()); +} + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0) +{ + assert(dst.writemask != 0); + return emit(opcode, dst, src0, src_reg(), src_reg()); +} + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode) +{ + return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg()); +} + +void +vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements) +{ + static enum opcode dot_opcodes[] = { + BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4 + }; + + emit(dot_opcodes[elements - 2], dst, src0, src1); +} + +void +vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) +{ + /* The gen6 math instruction ignores the source modifiers -- + * swizzle, abs, negate, and at least some parts of the register + * region description. + */ + src_reg temp_src = src_reg(this, glsl_type::vec4_type); + emit(BRW_OPCODE_MOV, dst_reg(temp_src), src); + + if (dst.writemask != WRITEMASK_XYZW) { + /* The gen6 math instruction must be align1, so we can't do + * writemasks. + */ + dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type); + + emit(opcode, temp_dst, temp_src); + + emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst)); + } else { + emit(opcode, dst, temp_src); + } +} + +void +vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src) +{ + vec4_instruction *inst = emit(opcode, dst, src); + inst->base_mrf = 1; + inst->mlen = 1; +} + +void +vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src) +{ + switch (opcode) { + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: + break; + default: + assert(!"not reached: bad math opcode"); + return; + } + + if (intel->gen >= 6) { + return emit_math1_gen6(opcode, dst, src); + } else { + return emit_math1_gen4(opcode, dst, src); + } +} + +void +vec4_visitor::emit_math2_gen6(enum opcode opcode, + dst_reg dst, src_reg src0, src_reg src1) +{ + src_reg expanded; + + /* The gen6 math instruction ignores the source modifiers -- + * swizzle, abs, negate, and at least some parts of the register + * region description. Move the sources to temporaries to make it + * generally work. + */ + + expanded = src_reg(this, glsl_type::vec4_type); + emit(BRW_OPCODE_MOV, dst_reg(expanded), src0); + src0 = expanded; + + expanded = src_reg(this, glsl_type::vec4_type); + emit(BRW_OPCODE_MOV, dst_reg(expanded), src1); + src1 = expanded; + + if (dst.writemask != WRITEMASK_XYZW) { + /* The gen6 math instruction must be align1, so we can't do + * writemasks. + */ + dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type); + + emit(opcode, temp_dst, src0, src1); + + emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst)); + } else { + emit(opcode, dst, src0, src1); + } +} + +void +vec4_visitor::emit_math2_gen4(enum opcode opcode, + dst_reg dst, src_reg src0, src_reg src1) +{ + vec4_instruction *inst = emit(opcode, dst, src0, src1); + inst->base_mrf = 1; + inst->mlen = 2; +} + +void +vec4_visitor::emit_math(enum opcode opcode, + dst_reg dst, src_reg src0, src_reg src1) +{ + assert(opcode == SHADER_OPCODE_POW); + + if (intel->gen >= 6) { + return emit_math2_gen6(opcode, dst, src0, src1); + } else { + return emit_math2_gen4(opcode, dst, src0, src1); + } +} + +void +vec4_visitor::visit_instructions(const exec_list *list) +{ + foreach_list(node, list) { + ir_instruction *ir = (ir_instruction *)node; + + base_ir = ir; + ir->accept(this); + } +} + + +static int +type_size(const struct glsl_type *type) +{ + unsigned int i; + int size; + + switch (type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + if (type->is_matrix()) { + return type->matrix_columns; + } else { + /* Regardless of size of vector, it gets a vec4. This is bad + * packing for things like floats, but otherwise arrays become a + * mess. Hopefully a later pass over the code can pack scalars + * down if appropriate. + */ + return 1; + } + case GLSL_TYPE_ARRAY: + assert(type->length > 0); + return type_size(type->fields.array) * type->length; + case GLSL_TYPE_STRUCT: + size = 0; + for (i = 0; i < type->length; i++) { + size += type_size(type->fields.structure[i].type); + } + return size; + case GLSL_TYPE_SAMPLER: + /* Samplers take up one slot in UNIFORMS[], but they're baked in + * at link time. + */ + return 1; + default: + assert(0); + return 0; + } +} + +int +vec4_visitor::virtual_grf_alloc(int size) +{ + if (virtual_grf_array_size <= virtual_grf_count) { + if (virtual_grf_array_size == 0) + virtual_grf_array_size = 16; + else + virtual_grf_array_size *= 2; + virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, + virtual_grf_array_size); + } + virtual_grf_sizes[virtual_grf_count] = size; + return virtual_grf_count++; +} + +src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) +{ + init(); + + this->file = GRF; + this->reg = v->virtual_grf_alloc(type_size(type)); + + if (type->is_array() || type->is_record()) { + this->swizzle = BRW_SWIZZLE_NOOP; + } else { + this->swizzle = swizzle_for_size(type->vector_elements); + } + + this->type = brw_type_for_base_type(type); +} + +dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) +{ + init(); + + this->file = GRF; + this->reg = v->virtual_grf_alloc(type_size(type)); + + if (type->is_array() || type->is_record()) { + this->writemask = WRITEMASK_XYZW; + } else { + this->writemask = (1 << type->vector_elements) - 1; + } + + this->type = brw_type_for_base_type(type); +} + +/* Our support for uniforms is piggy-backed on the struct + * gl_fragment_program, because that's where the values actually + * get stored, rather than in some global gl_shader_program uniform + * store. + */ +int +vec4_visitor::setup_uniform_values(int loc, const glsl_type *type) +{ + unsigned int offset = 0; + float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f; + + if (type->is_matrix()) { + const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, + type->vector_elements, + 1); + + for (unsigned int i = 0; i < type->matrix_columns; i++) { + offset += setup_uniform_values(loc + offset, column); + } + + return offset; + } + + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_BOOL: + for (unsigned int i = 0; i < type->vector_elements; i++) { + int slot = this->uniforms * 4 + i; + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; + break; + case GLSL_TYPE_UINT: + c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U; + break; + case GLSL_TYPE_INT: + c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I; + break; + case GLSL_TYPE_BOOL: + c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B; + break; + default: + assert(!"not reached"); + c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; + break; + } + c->prog_data.param[slot] = &values[i]; + } + + for (unsigned int i = type->vector_elements; i < 4; i++) { + c->prog_data.param_convert[this->uniforms * 4 + i] = + PARAM_CONVERT_ZERO; + c->prog_data.param[this->uniforms * 4 + i] = NULL; + } + + this->uniform_size[this->uniforms] = type->vector_elements; + this->uniforms++; + + return 1; + + case GLSL_TYPE_STRUCT: + for (unsigned int i = 0; i < type->length; i++) { + offset += setup_uniform_values(loc + offset, + type->fields.structure[i].type); + } + return offset; + + case GLSL_TYPE_ARRAY: + for (unsigned int i = 0; i < type->length; i++) { + offset += setup_uniform_values(loc + offset, type->fields.array); + } + return offset; + + case GLSL_TYPE_SAMPLER: + /* The sampler takes up a slot, but we don't use any values from it. */ + return 1; + + default: + assert(!"not reached"); + return 0; + } +} + +/* Our support for builtin uniforms is even scarier than non-builtin. + * It sits on top of the PROG_STATE_VAR parameters that are + * automatically updated from GL context state. + */ +void +vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) +{ + const ir_state_slot *const slots = ir->state_slots; + assert(ir->state_slots != NULL); + + for (unsigned int i = 0; i < ir->num_state_slots; i++) { + /* This state reference has already been setup by ir_to_mesa, + * but we'll get the same index back here. We can reference + * ParameterValues directly, since unlike brw_fs.cpp, we never + * add new state references during compile. + */ + int index = _mesa_add_state_reference(this->vp->Base.Parameters, + (gl_state_index *)slots[i].tokens); + float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f; + + this->uniform_size[this->uniforms] = 0; + /* Add each of the unique swizzled channels of the element. + * This will end up matching the size of the glsl_type of this field. + */ + int last_swiz = -1; + for (unsigned int j = 0; j < 4; j++) { + int swiz = GET_SWZ(slots[i].swizzle, j); + last_swiz = swiz; + + c->prog_data.param[this->uniforms * 4 + j] = &values[swiz]; + c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT; + if (swiz <= last_swiz) + this->uniform_size[this->uniforms]++; + } + this->uniforms++; + } +} + +dst_reg * +vec4_visitor::variable_storage(ir_variable *var) +{ + return (dst_reg *)hash_table_find(this->variable_ht, var); +} + +void +vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir) +{ + ir_expression *expr = ir->as_expression(); + + if (expr) { + src_reg op[2]; + vec4_instruction *inst; + + assert(expr->get_num_operands() <= 2); + for (unsigned int i = 0; i < expr->get_num_operands(); i++) { + assert(expr->operands[i]->type->is_scalar()); + + expr->operands[i]->accept(this); + op[i] = this->result; + } + + switch (expr->operation) { + case ir_unop_logic_not: + inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1)); + inst->conditional_mod = BRW_CONDITIONAL_Z; + break; + + case ir_binop_logic_xor: + inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_binop_logic_or: + inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_binop_logic_and: + inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_unop_f2b: + if (intel->gen >= 6) { + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f)); + } else { + inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]); + } + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_unop_i2b: + if (intel->gen >= 6) { + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); + } else { + inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]); + } + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_binop_greater: + case ir_binop_gequal: + case ir_binop_less: + case ir_binop_lequal: + case ir_binop_equal: + case ir_binop_all_equal: + case ir_binop_nequal: + case ir_binop_any_nequal: + inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); + inst->conditional_mod = + brw_conditional_for_comparison(expr->operation); + break; + + default: + assert(!"not reached"); + break; + } + return; + } + + ir->accept(this); + + if (intel->gen >= 6) { + vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(), + this->result, src_reg(1)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + } else { + vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + } +} + +/** + * Emit a gen6 IF statement with the comparison folded into the IF + * instruction. + */ +void +vec4_visitor::emit_if_gen6(ir_if *ir) +{ + ir_expression *expr = ir->condition->as_expression(); + + if (expr) { + src_reg op[2]; + vec4_instruction *inst; + dst_reg temp; + + assert(expr->get_num_operands() <= 2); + for (unsigned int i = 0; i < expr->get_num_operands(); i++) { + expr->operands[i]->accept(this); + op[i] = this->result; + } + + switch (expr->operation) { + case ir_unop_logic_not: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_Z; + return; + + case ir_binop_logic_xor: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_binop_logic_or: + temp = dst_reg(this, glsl_type::bool_type); + emit(BRW_OPCODE_OR, temp, op[0], op[1]); + inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_binop_logic_and: + temp = dst_reg(this, glsl_type::bool_type); + emit(BRW_OPCODE_AND, temp, op[0], op[1]); + inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_unop_f2b: + inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_unop_i2b: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_binop_greater: + case ir_binop_gequal: + case ir_binop_less: + case ir_binop_lequal: + case ir_binop_equal: + case ir_binop_nequal: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); + inst->conditional_mod = + brw_conditional_for_comparison(expr->operation); + return; + + case ir_binop_all_equal: + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_Z; + + inst = emit(BRW_OPCODE_IF); + inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; + return; + + case ir_binop_any_nequal: + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + inst = emit(BRW_OPCODE_IF); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + return; + + case ir_unop_any: + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + inst = emit(BRW_OPCODE_IF); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + return; + + default: + assert(!"not reached"); + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + } + return; + } + + ir->condition->accept(this); + + vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(), + this->result, src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; +} + +void +vec4_visitor::visit(ir_variable *ir) +{ + dst_reg *reg = NULL; + + if (variable_storage(ir)) + return; + + switch (ir->mode) { + case ir_var_in: + reg = new(mem_ctx) dst_reg(ATTR, ir->location); + break; + + case ir_var_out: + reg = new(mem_ctx) dst_reg(this, ir->type); + + for (int i = 0; i < type_size(ir->type); i++) { + output_reg[ir->location + i] = *reg; + output_reg[ir->location + i].reg_offset = i; + output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F; + } + break; + + case ir_var_auto: + case ir_var_temporary: + reg = new(mem_ctx) dst_reg(this, ir->type); + break; + + case ir_var_uniform: + reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms); + + if (!strncmp(ir->name, "gl_", 3)) { + setup_builtin_uniform_values(ir); + } else { + setup_uniform_values(ir->location, ir->type); + } + break; + + default: + assert(!"not reached"); + } + + reg->type = brw_type_for_base_type(ir->type); + hash_table_insert(this->variable_ht, reg, ir); +} + +void +vec4_visitor::visit(ir_loop *ir) +{ + dst_reg counter; + + /* We don't want debugging output to print the whole body of the + * loop as the annotation. + */ + this->base_ir = NULL; + + if (ir->counter != NULL) { + this->base_ir = ir->counter; + ir->counter->accept(this); + counter = *(variable_storage(ir->counter)); + + if (ir->from != NULL) { + this->base_ir = ir->from; + ir->from->accept(this); + + emit(BRW_OPCODE_MOV, counter, this->result); + } + } + + emit(BRW_OPCODE_DO); + + if (ir->to) { + this->base_ir = ir->to; + ir->to->accept(this); + + vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst_null_d(), + src_reg(counter), this->result); + inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); + + inst = emit(BRW_OPCODE_BREAK); + inst->predicate = BRW_PREDICATE_NORMAL; + } + + visit_instructions(&ir->body_instructions); + + + if (ir->increment) { + this->base_ir = ir->increment; + ir->increment->accept(this); + emit(BRW_OPCODE_ADD, counter, src_reg(counter), this->result); + } + + emit(BRW_OPCODE_WHILE); +} + +void +vec4_visitor::visit(ir_loop_jump *ir) +{ + switch (ir->mode) { + case ir_loop_jump::jump_break: + emit(BRW_OPCODE_BREAK); + break; + case ir_loop_jump::jump_continue: + emit(BRW_OPCODE_CONTINUE); + break; + } +} + + +void +vec4_visitor::visit(ir_function_signature *ir) +{ + assert(0); + (void)ir; +} + +void +vec4_visitor::visit(ir_function *ir) +{ + /* Ignore function bodies other than main() -- we shouldn't see calls to + * them since they should all be inlined. + */ + if (strcmp(ir->name, "main") == 0) { + const ir_function_signature *sig; + exec_list empty; + + sig = ir->matching_signature(&empty); + + assert(sig); + + visit_instructions(&sig->body); + } +} + +GLboolean +vec4_visitor::try_emit_sat(ir_expression *ir) +{ + ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); + if (!sat_src) + return false; + + sat_src->accept(this); + src_reg src = this->result; + + this->result = src_reg(this, ir->type); + vec4_instruction *inst; + inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src); + inst->saturate = true; + + return true; +} + +void +vec4_visitor::emit_bool_comparison(unsigned int op, + dst_reg dst, src_reg src0, src_reg src1) +{ + /* original gen4 does destination conversion before comparison. */ + if (intel->gen < 5) + dst.type = src0.type; + + vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1); + inst->conditional_mod = brw_conditional_for_comparison(op); + + dst.type = BRW_REGISTER_TYPE_D; + emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1)); +} + +void +vec4_visitor::visit(ir_expression *ir) +{ + unsigned int operand; + src_reg op[Elements(ir->operands)]; + src_reg result_src; + dst_reg result_dst; + vec4_instruction *inst; + + if (try_emit_sat(ir)) + return; + + for (operand = 0; operand < ir->get_num_operands(); operand++) { + this->result.file = BAD_FILE; + ir->operands[operand]->accept(this); + if (this->result.file == BAD_FILE) { + printf("Failed to get tree for expression operand:\n"); + ir->operands[operand]->print(); + exit(1); + } + op[operand] = this->result; + + /* Matrix expression operands should have been broken down to vector + * operations already. + */ + assert(!ir->operands[operand]->type->is_matrix()); + } + + int vector_elements = ir->operands[0]->type->vector_elements; + if (ir->operands[1]) { + vector_elements = MAX2(vector_elements, + ir->operands[1]->type->vector_elements); + } + + this->result.file = BAD_FILE; + + /* Storage for our result. Ideally for an assignment we'd be using + * the actual storage for the result here, instead. + */ + result_src = src_reg(this, ir->type); + /* convenience for the emit functions below. */ + result_dst = dst_reg(result_src); + /* If nothing special happens, this is the result. */ + this->result = result_src; + /* Limit writes to the channels that will be used by result_src later. + * This does limit this temp's use as a temporary for multi-instruction + * sequences. + */ + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + + switch (ir->operation) { + case ir_unop_logic_not: + /* Note that BRW_OPCODE_NOT is not appropriate here, since it is + * ones complement of the whole register, not just bit 0. + */ + emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1)); + break; + case ir_unop_neg: + op[0].negate = !op[0].negate; + this->result = op[0]; + break; + case ir_unop_abs: + op[0].abs = true; + op[0].negate = false; + this->result = op[0]; + break; + + case ir_unop_sign: + emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f)); + + inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_G; + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f)); + inst->predicate = BRW_PREDICATE_NORMAL; + + inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_L; + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f)); + inst->predicate = BRW_PREDICATE_NORMAL; + + break; + + case ir_unop_rcp: + emit_math(SHADER_OPCODE_RCP, result_dst, op[0]); + break; + + case ir_unop_exp2: + emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]); + break; + case ir_unop_log2: + emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]); + break; + case ir_unop_exp: + case ir_unop_log: + assert(!"not reached: should be handled by ir_explog_to_explog2"); + break; + case ir_unop_sin: + case ir_unop_sin_reduced: + emit_math(SHADER_OPCODE_SIN, result_dst, op[0]); + break; + case ir_unop_cos: + case ir_unop_cos_reduced: + emit_math(SHADER_OPCODE_COS, result_dst, op[0]); + break; + + case ir_unop_dFdx: + case ir_unop_dFdy: + assert(!"derivatives not valid in vertex shader"); + break; + + case ir_unop_noise: + assert(!"not reached: should be handled by lower_noise"); + break; + + case ir_binop_add: + emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]); + break; + case ir_binop_sub: + assert(!"not reached: should be handled by ir_sub_to_add_neg"); + break; + + case ir_binop_mul: + if (ir->type->is_integer()) { + /* For integer multiplication, the MUL uses the low 16 bits + * of one of the operands (src0 on gen6, src1 on gen7). The + * MACH accumulates in the contribution of the upper 16 bits + * of that operand. + * + * FINISHME: Emit just the MUL if we know an operand is small + * enough. + */ + struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D); + + emit(BRW_OPCODE_MUL, acc, op[0], op[1]); + emit(BRW_OPCODE_MACH, dst_null_d(), op[0], op[1]); + emit(BRW_OPCODE_MOV, result_dst, src_reg(acc)); + } else { + emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]); + } + break; + case ir_binop_div: + assert(!"not reached: should be handled by ir_div_to_mul_rcp"); + case ir_binop_mod: + assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); + break; + + case ir_binop_less: + case ir_binop_greater: + case ir_binop_lequal: + case ir_binop_gequal: + case ir_binop_equal: + case ir_binop_nequal: { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); + inst->conditional_mod = brw_conditional_for_comparison(ir->operation); + emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1)); + break; + } + + case ir_binop_all_equal: + /* "==" operator producing a scalar boolean. */ + if (ir->operands[0]->type->is_vector() || + ir->operands[1]->type->is_vector()) { + inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_Z; + + emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); + inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; + } else { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_Z; + emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); + } + break; + case ir_binop_any_nequal: + /* "!=" operator producing a scalar boolean. */ + if (ir->operands[0]->type->is_vector() || + ir->operands[1]->type->is_vector()) { + inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + } else { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); + } + break; + + case ir_unop_any: + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); + + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + break; + + case ir_binop_logic_xor: + emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); + break; + + case ir_binop_logic_or: + emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); + break; + + case ir_binop_logic_and: + emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); + break; + + case ir_binop_dot: + assert(ir->operands[0]->type->is_vector()); + assert(ir->operands[0]->type == ir->operands[1]->type); + emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements); + break; + + case ir_unop_sqrt: + emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]); + break; + case ir_unop_rsq: + emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]); + break; + case ir_unop_i2f: + case ir_unop_i2u: + case ir_unop_u2i: + case ir_unop_u2f: + case ir_unop_b2f: + case ir_unop_b2i: + case ir_unop_f2i: + emit(BRW_OPCODE_MOV, result_dst, op[0]); + break; + case ir_unop_f2b: + case ir_unop_i2b: { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1)); + break; + } + + case ir_unop_trunc: + emit(BRW_OPCODE_RNDZ, result_dst, op[0]); + break; + case ir_unop_ceil: + op[0].negate = !op[0].negate; + inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); + this->result.negate = true; + break; + case ir_unop_floor: + inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); + break; + case ir_unop_fract: + inst = emit(BRW_OPCODE_FRC, result_dst, op[0]); + break; + case ir_unop_round_even: + emit(BRW_OPCODE_RNDE, result_dst, op[0]); + break; + + case ir_binop_min: + inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_L; + + inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); + inst->predicate = BRW_PREDICATE_NORMAL; + break; + case ir_binop_max: + inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_G; + + inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); + inst->predicate = BRW_PREDICATE_NORMAL; + break; + + case ir_binop_pow: + emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]); + break; + + case ir_unop_bit_not: + inst = emit(BRW_OPCODE_NOT, result_dst, op[0]); + break; + case ir_binop_bit_and: + inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); + break; + case ir_binop_bit_xor: + inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); + break; + case ir_binop_bit_or: + inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); + break; + + case ir_binop_lshift: + case ir_binop_rshift: + assert(!"GLSL 1.30 features unsupported"); + break; + + case ir_quadop_vector: + assert(!"not reached: should be handled by lower_quadop_vector"); + break; + } +} + + +void +vec4_visitor::visit(ir_swizzle *ir) +{ + src_reg src; + int i = 0; + int swizzle[4]; + + /* Note that this is only swizzles in expressions, not those on the left + * hand side of an assignment, which do write masking. See ir_assignment + * for that. + */ + + ir->val->accept(this); + src = this->result; + assert(src.file != BAD_FILE); + + for (i = 0; i < ir->type->vector_elements; i++) { + switch (i) { + case 0: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x); + break; + case 1: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y); + break; + case 2: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z); + break; + case 3: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w); + break; + } + } + for (; i < 4; i++) { + /* Replicate the last channel out. */ + swizzle[i] = swizzle[ir->type->vector_elements - 1]; + } + + src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); + + this->result = src; +} + +void +vec4_visitor::visit(ir_dereference_variable *ir) +{ + const struct glsl_type *type = ir->type; + dst_reg *reg = variable_storage(ir->var); + + if (!reg) { + fail("Failed to find variable storage for %s\n", ir->var->name); + this->result = src_reg(brw_null_reg()); + return; + } + + this->result = src_reg(*reg); + + if (type->is_scalar() || type->is_vector() || type->is_matrix()) + this->result.swizzle = swizzle_for_size(type->vector_elements); +} + +void +vec4_visitor::visit(ir_dereference_array *ir) +{ + ir_constant *constant_index; + src_reg src; + int element_size = type_size(ir->type); + + constant_index = ir->array_index->constant_expression_value(); + + ir->array->accept(this); + src = this->result; + + if (constant_index) { + src.reg_offset += constant_index->value.i[0] * element_size; + } else { + /* Variable index array dereference. It eats the "vec4" of the + * base of the array and an index that offsets the Mesa register + * index. + */ + ir->array_index->accept(this); + + src_reg index_reg; + + if (element_size == 1) { + index_reg = this->result; + } else { + index_reg = src_reg(this, glsl_type::int_type); + + emit(BRW_OPCODE_MUL, dst_reg(index_reg), + this->result, src_reg(element_size)); + } + + if (src.reladdr) { + src_reg temp = src_reg(this, glsl_type::int_type); + + emit(BRW_OPCODE_ADD, dst_reg(temp), *src.reladdr, index_reg); + + index_reg = temp; + } + + src.reladdr = ralloc(mem_ctx, src_reg); + memcpy(src.reladdr, &index_reg, sizeof(index_reg)); + } + + /* If the type is smaller than a vec4, replicate the last channel out. */ + if (ir->type->is_scalar() || ir->type->is_vector()) + src.swizzle = swizzle_for_size(ir->type->vector_elements); + else + src.swizzle = BRW_SWIZZLE_NOOP; + src.type = brw_type_for_base_type(ir->type); + + this->result = src; +} + +void +vec4_visitor::visit(ir_dereference_record *ir) +{ + unsigned int i; + const glsl_type *struct_type = ir->record->type; + int offset = 0; + + ir->record->accept(this); + + for (i = 0; i < struct_type->length; i++) { + if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) + break; + offset += type_size(struct_type->fields.structure[i].type); + } + + /* If the type is smaller than a vec4, replicate the last channel out. */ + if (ir->type->is_scalar() || ir->type->is_vector()) + this->result.swizzle = swizzle_for_size(ir->type->vector_elements); + else + this->result.swizzle = BRW_SWIZZLE_NOOP; + this->result.type = brw_type_for_base_type(ir->type); + + this->result.reg_offset += offset; +} + +/** + * We want to be careful in assignment setup to hit the actual storage + * instead of potentially using a temporary like we might with the + * ir_dereference handler. + */ +static dst_reg +get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) +{ + /* The LHS must be a dereference. If the LHS is a variable indexed array + * access of a vector, it must be separated into a series conditional moves + * before reaching this point (see ir_vec_index_to_cond_assign). + */ + assert(ir->as_dereference()); + ir_dereference_array *deref_array = ir->as_dereference_array(); + if (deref_array) { + assert(!deref_array->array->type->is_vector()); + } + + /* Use the rvalue deref handler for the most part. We'll ignore + * swizzles in it and write swizzles using writemask, though. + */ + ir->accept(v); + return dst_reg(v->result); +} + +void +vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src, + const struct glsl_type *type, bool predicated) +{ + if (type->base_type == GLSL_TYPE_STRUCT) { + for (unsigned int i = 0; i < type->length; i++) { + emit_block_move(dst, src, type->fields.structure[i].type, predicated); + } + return; + } + + if (type->is_array()) { + for (unsigned int i = 0; i < type->length; i++) { + emit_block_move(dst, src, type->fields.array, predicated); + } + return; + } + + if (type->is_matrix()) { + const struct glsl_type *vec_type; + + vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, + type->vector_elements, 1); + + for (int i = 0; i < type->matrix_columns; i++) { + emit_block_move(dst, src, vec_type, predicated); + } + return; + } + + assert(type->is_scalar() || type->is_vector()); + + dst->type = brw_type_for_base_type(type); + src->type = dst->type; + + dst->writemask = (1 << type->vector_elements) - 1; + + /* Do we need to worry about swizzling a swizzle? */ + assert(src->swizzle = BRW_SWIZZLE_NOOP); + src->swizzle = swizzle_for_size(type->vector_elements); + + vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src); + if (predicated) + inst->predicate = BRW_PREDICATE_NORMAL; + + dst->reg_offset++; + src->reg_offset++; +} + + +/* If the RHS processing resulted in an instruction generating a + * temporary value, and it would be easy to rewrite the instruction to + * generate its result right into the LHS instead, do so. This ends + * up reliably removing instructions where it can be tricky to do so + * later without real UD chain information. + */ +bool +vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, + dst_reg dst, + src_reg src, + vec4_instruction *pre_rhs_inst, + vec4_instruction *last_rhs_inst) +{ + /* This could be supported, but it would take more smarts. */ + if (ir->condition) + return false; + + if (pre_rhs_inst == last_rhs_inst) + return false; /* No instructions generated to work with. */ + + /* Make sure the last instruction generated our source reg. */ + if (src.file != GRF || + src.file != last_rhs_inst->dst.file || + src.reg != last_rhs_inst->dst.reg || + src.reg_offset != last_rhs_inst->dst.reg_offset || + src.reladdr || + src.abs || + src.negate || + last_rhs_inst->predicate != BRW_PREDICATE_NONE) + return false; + + /* Check that that last instruction fully initialized the channels + * we want to use, in the order we want to use them. We could + * potentially reswizzle the operands of many instructions so that + * we could handle out of order channels, but don't yet. + */ + for (int i = 0; i < 4; i++) { + if (dst.writemask & (1 << i)) { + if (!(last_rhs_inst->dst.writemask & (1 << i))) + return false; + + if (BRW_GET_SWZ(src.swizzle, i) != i) + return false; + } + } + + /* Success! Rewrite the instruction. */ + last_rhs_inst->dst.file = dst.file; + last_rhs_inst->dst.reg = dst.reg; + last_rhs_inst->dst.reg_offset = dst.reg_offset; + last_rhs_inst->dst.reladdr = dst.reladdr; + last_rhs_inst->dst.writemask &= dst.writemask; + + return true; +} + +void +vec4_visitor::visit(ir_assignment *ir) +{ + dst_reg dst = get_assignment_lhs(ir->lhs, this); + + if (!ir->lhs->type->is_scalar() && + !ir->lhs->type->is_vector()) { + ir->rhs->accept(this); + src_reg src = this->result; + + if (ir->condition) { + emit_bool_to_cond_code(ir->condition); + } + + emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL); + return; + } + + /* Now we're down to just a scalar/vector with writemasks. */ + int i; + + vec4_instruction *pre_rhs_inst, *last_rhs_inst; + pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail(); + + ir->rhs->accept(this); + + last_rhs_inst = (vec4_instruction *)this->instructions.get_tail(); + + src_reg src = this->result; + + int swizzles[4]; + int first_enabled_chan = 0; + int src_chan = 0; + + assert(ir->lhs->type->is_vector() || + ir->lhs->type->is_scalar()); + dst.writemask = ir->write_mask; + + for (int i = 0; i < 4; i++) { + if (dst.writemask & (1 << i)) { + first_enabled_chan = BRW_GET_SWZ(src.swizzle, i); + break; + } + } + + /* Swizzle a small RHS vector into the channels being written. + * + * glsl ir treats write_mask as dictating how many channels are + * present on the RHS while in our instructions we need to make + * those channels appear in the slots of the vec4 they're written to. + */ + for (int i = 0; i < 4; i++) { + if (dst.writemask & (1 << i)) + swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++); + else + swizzles[i] = first_enabled_chan; + } + src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], + swizzles[2], swizzles[3]); + + if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) { + return; + } + + if (ir->condition) { + emit_bool_to_cond_code(ir->condition); + } + + for (i = 0; i < type_size(ir->lhs->type); i++) { + vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); + + if (ir->condition) + inst->predicate = BRW_PREDICATE_NORMAL; + + dst.reg_offset++; + src.reg_offset++; + } +} + +void +vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir) +{ + if (ir->type->base_type == GLSL_TYPE_STRUCT) { + foreach_list(node, &ir->components) { + ir_constant *field_value = (ir_constant *)node; + + emit_constant_values(dst, field_value); + } + return; + } + + if (ir->type->is_array()) { + for (unsigned int i = 0; i < ir->type->length; i++) { + emit_constant_values(dst, ir->array_elements[i]); + } + return; + } + + if (ir->type->is_matrix()) { + for (int i = 0; i < ir->type->matrix_columns; i++) { + for (int j = 0; j < ir->type->vector_elements; j++) { + dst->writemask = 1 << j; + dst->type = BRW_REGISTER_TYPE_F; + + emit(BRW_OPCODE_MOV, *dst, + src_reg(ir->value.f[i * ir->type->vector_elements + j])); + } + dst->reg_offset++; + } + return; + } + + for (int i = 0; i < ir->type->vector_elements; i++) { + dst->writemask = 1 << i; + dst->type = brw_type_for_base_type(ir->type); + + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i])); + break; + case GLSL_TYPE_INT: + emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i])); + break; + case GLSL_TYPE_UINT: + emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i])); + break; + case GLSL_TYPE_BOOL: + emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i])); + break; + default: + assert(!"Non-float/uint/int/bool constant"); + break; + } + } + dst->reg_offset++; +} + +void +vec4_visitor::visit(ir_constant *ir) +{ + dst_reg dst = dst_reg(this, ir->type); + this->result = src_reg(dst); + + emit_constant_values(&dst, ir); +} + +void +vec4_visitor::visit(ir_call *ir) +{ + assert(!"not reached"); +} + +void +vec4_visitor::visit(ir_texture *ir) +{ + /* FINISHME: Implement vertex texturing. + * + * With 0 vertex samplers available, the linker will reject + * programs that do vertex texturing, but after our visitor has + * run. + */ +} + +void +vec4_visitor::visit(ir_return *ir) +{ + assert(!"not reached"); +} + +void +vec4_visitor::visit(ir_discard *ir) +{ + assert(!"not reached"); +} + +void +vec4_visitor::visit(ir_if *ir) +{ + /* Don't point the annotation at the if statement, because then it plus + * the then and else blocks get printed. + */ + this->base_ir = ir->condition; + + if (intel->gen == 6) { + emit_if_gen6(ir); + } else { + emit_bool_to_cond_code(ir->condition); + vec4_instruction *inst = emit(BRW_OPCODE_IF); + inst->predicate = BRW_PREDICATE_NORMAL; + } + + visit_instructions(&ir->then_instructions); + + if (!ir->else_instructions.is_empty()) { + this->base_ir = ir->condition; + emit(BRW_OPCODE_ELSE); + + visit_instructions(&ir->else_instructions); + } + + this->base_ir = ir->condition; + emit(BRW_OPCODE_ENDIF); +} + +int +vec4_visitor::emit_vue_header_gen4(int header_mrf) +{ + /* Get the position */ + src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]); + + /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */ + dst_reg ndc = dst_reg(this, glsl_type::vec4_type); + + current_annotation = "NDC"; + dst_reg ndc_w = ndc; + ndc_w.writemask = WRITEMASK_W; + src_reg pos_w = pos; + pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); + emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w); + + dst_reg ndc_xyz = ndc; + ndc_xyz.writemask = WRITEMASK_XYZ; + + emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w)); + + if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || + c->key.nr_userclip || brw->has_negative_rhw_bug) { + dst_reg header1 = dst_reg(this, glsl_type::uvec4_type); + GLuint i; + + emit(BRW_OPCODE_MOV, header1, 0u); + + if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { + assert(!"finishme: psiz"); + src_reg psiz; + + header1.writemask = WRITEMASK_W; + emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11); + emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8); + } + + for (i = 0; i < c->key.nr_userclip; i++) { + vec4_instruction *inst; + + inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()), + pos, src_reg(c->userplane[i])); + inst->conditional_mod = BRW_CONDITIONAL_L; + + emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i); + inst->predicate = BRW_PREDICATE_NORMAL; + } + + /* i965 clipping workaround: + * 1) Test for -ve rhw + * 2) If set, + * set ndc = (0,0,0,0) + * set ucp[6] = 1 + * + * Later, clipping will detect ucp[6] and ensure the primitive is + * clipped against all fixed planes. + */ + if (brw->has_negative_rhw_bug) { +#if 0 + /* FINISHME */ + brw_CMP(p, + vec8(brw_null_reg()), + BRW_CONDITIONAL_L, + brw_swizzle1(ndc, 3), + brw_imm_f(0)); + + brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6)); + brw_MOV(p, ndc, brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +#endif + } + + header1.writemask = WRITEMASK_XYZW; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1)); + } else { + emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++), + BRW_REGISTER_TYPE_UD), 0u); + } + + if (intel->gen == 5) { + /* There are 20 DWs (D0-D19) in VUE header on Ironlake: + * dword 0-3 (m1) of the header is indices, point width, clip flags. + * dword 4-7 (m2) is the ndc position (set above) + * dword 8-11 (m3) of the vertex header is the 4D space position + * dword 12-19 (m4,m5) of the vertex header is the user clip distance. + * m6 is a pad so that the vertex element data is aligned + * m7 is the first vertex data we fill. + */ + current_annotation = "NDC"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); + + current_annotation = "gl_Position"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); + + /* user clip distance. */ + header_mrf += 2; + + /* Pad so that vertex element data is aligned. */ + header_mrf++; + } else { + /* There are 8 dwords in VUE header pre-Ironlake: + * dword 0-3 (m1) is indices, point width, clip flags. + * dword 4-7 (m2) is ndc position (set above) + * + * dword 8-11 (m3) is the first vertex data. + */ + current_annotation = "NDC"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); + + current_annotation = "gl_Position"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); + } + + return header_mrf; +} + +int +vec4_visitor::emit_vue_header_gen6(int header_mrf) +{ + struct brw_reg reg; + + /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge: + * dword 0-3 (m2) of the header is indices, point width, clip flags. + * dword 4-7 (m3) is the 4D space position + * dword 8-15 (m4,m5) of the vertex header is the user clip distance if + * enabled. + * + * m4 or 6 is the first vertex element data we fill. + */ + + current_annotation = "indices, point width, clip flags"; + reg = brw_message_reg(header_mrf++); + emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)); + if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { + emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W), + src_reg(output_reg[VERT_RESULT_PSIZ])); + } + + current_annotation = "gl_Position"; + emit(BRW_OPCODE_MOV, + brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS])); + + current_annotation = "user clip distances"; + if (c->key.nr_userclip) { + for (int i = 0; i < c->key.nr_userclip; i++) { + struct brw_reg m; + if (i < 4) + m = brw_message_reg(header_mrf); + else + m = brw_message_reg(header_mrf + 1); + + emit(BRW_OPCODE_DP4, + dst_reg(brw_writemask(m, 1 << (i & 3))), + src_reg(c->userplane[i])); + } + header_mrf += 2; + } + + current_annotation = NULL; + + return header_mrf; +} + +static int +align_interleaved_urb_mlen(struct brw_context *brw, int mlen) +{ + struct intel_context *intel = &brw->intel; + + if (intel->gen >= 6) { + /* URB data written (does not include the message header reg) must + * be a multiple of 256 bits, or 2 VS registers. See vol5c.5, + * section 5.4.3.2.2: URB_INTERLEAVED. + * + * URB entries are allocated on a multiple of 1024 bits, so an + * extra 128 bits written here to make the end align to 256 is + * no problem. + */ + if ((mlen % 2) != 1) + mlen++; + } + + return mlen; +} + +/** + * Generates the VUE payload plus the 1 or 2 URB write instructions to + * complete the VS thread. + * + * The VUE layout is documented in Volume 2a. + */ +void +vec4_visitor::emit_urb_writes() +{ + /* MRF 0 is reserved for the debugger, so start with message header + * in MRF 1. + */ + int base_mrf = 1; + int mrf = base_mrf; + int urb_entry_size; + uint64_t outputs_remaining = c->prog_data.outputs_written; + /* In the process of generating our URB write message contents, we + * may need to unspill a register or load from an array. Those + * reads would use MRFs 14-15. + */ + int max_usable_mrf = 13; + + /* FINISHME: edgeflag */ + + /* First mrf is the g0-based message header containing URB handles and such, + * which is implied in VS_OPCODE_URB_WRITE. + */ + mrf++; + + if (intel->gen >= 6) { + mrf = emit_vue_header_gen6(mrf); + } else { + mrf = emit_vue_header_gen4(mrf); + } + + /* Set up the VUE data for the first URB write */ + int attr; + for (attr = 0; attr < VERT_RESULT_MAX; attr++) { + if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) + continue; + + outputs_remaining &= ~BITFIELD64_BIT(attr); + + /* This is set up in the VUE header. */ + if (attr == VERT_RESULT_HPOS) + continue; + + /* This is loaded into the VUE header, and thus doesn't occupy + * an attribute slot. + */ + if (attr == VERT_RESULT_PSIZ) + continue; + + vec4_instruction *inst = emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), + src_reg(output_reg[attr])); + + if ((attr == VERT_RESULT_COL0 || + attr == VERT_RESULT_COL1 || + attr == VERT_RESULT_BFC0 || + attr == VERT_RESULT_BFC1) && + c->key.clamp_vertex_color) { + inst->saturate = true; + } + + /* If this was MRF 15, we can't fit anything more into this URB + * WRITE. Note that base_mrf of 1 means that MRF 15 is an + * even-numbered amount of URB write data, which will meet + * gen6's requirements for length alignment. + */ + if (mrf > max_usable_mrf) { + attr++; + break; + } + } + + vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); + inst->base_mrf = base_mrf; + inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); + inst->eot = !outputs_remaining; + + urb_entry_size = mrf - base_mrf; + + /* Optional second URB write */ + if (outputs_remaining) { + mrf = base_mrf + 1; + + for (; attr < VERT_RESULT_MAX; attr++) { + if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) + continue; + + assert(mrf < max_usable_mrf); + + emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); + } + + inst = emit(VS_OPCODE_URB_WRITE); + inst->base_mrf = base_mrf; + inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); + inst->eot = true; + /* URB destination offset. In the previous write, we got MRFs + * 2-13 minus the one header MRF, so 12 regs. URB offset is in + * URB row increments, and each of our MRFs is half of one of + * those, since we're doing interleaved writes. + */ + inst->offset = (max_usable_mrf - base_mrf) / 2; + + urb_entry_size += mrf - base_mrf; + } + + if (intel->gen == 6) + c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8; + else + c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4; +} + +src_reg +vec4_visitor::get_scratch_offset(vec4_instruction *inst, + src_reg *reladdr, int reg_offset) +{ + /* Because we store the values to scratch interleaved like our + * vertex data, we need to scale the vec4 index by 2. + */ + int message_header_scale = 2; + + /* Pre-gen6, the message header uses byte offsets instead of vec4 + * (16-byte) offset units. + */ + if (intel->gen < 6) + message_header_scale *= 16; + + if (reladdr) { + src_reg index = src_reg(this, glsl_type::int_type); + + vec4_instruction *add = emit(BRW_OPCODE_ADD, + dst_reg(index), + *reladdr, + src_reg(reg_offset)); + /* Move our new instruction from the tail to its correct place. */ + add->remove(); + inst->insert_before(add); + + vec4_instruction *mul = emit(BRW_OPCODE_MUL, dst_reg(index), + index, src_reg(message_header_scale)); + mul->remove(); + inst->insert_before(mul); + + return index; + } else { + return src_reg(reg_offset * message_header_scale); + } +} + +/** + * Emits an instruction before @inst to load the value named by @orig_src + * from scratch space at @base_offset to @temp. + */ +void +vec4_visitor::emit_scratch_read(vec4_instruction *inst, + dst_reg temp, src_reg orig_src, + int base_offset) +{ + int reg_offset = base_offset + orig_src.reg_offset; + src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset); + + vec4_instruction *scratch_read_inst = emit(VS_OPCODE_SCRATCH_READ, + temp, index); + + scratch_read_inst->base_mrf = 14; + scratch_read_inst->mlen = 1; + /* Move our instruction from the tail to its correct place. */ + scratch_read_inst->remove(); + inst->insert_before(scratch_read_inst); +} + +/** + * Emits an instruction after @inst to store the value to be written + * to @orig_dst to scratch space at @base_offset, from @temp. + */ +void +vec4_visitor::emit_scratch_write(vec4_instruction *inst, + src_reg temp, dst_reg orig_dst, + int base_offset) +{ + int reg_offset = base_offset + orig_dst.reg_offset; + src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset); + + dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0), + orig_dst.writemask)); + vec4_instruction *scratch_write_inst = emit(VS_OPCODE_SCRATCH_WRITE, + dst, temp, index); + scratch_write_inst->base_mrf = 13; + scratch_write_inst->mlen = 2; + scratch_write_inst->predicate = inst->predicate; + /* Move our instruction from the tail to its correct place. */ + scratch_write_inst->remove(); + inst->insert_after(scratch_write_inst); +} + +/** + * We can't generally support array access in GRF space, because a + * single instruction's destination can only span 2 contiguous + * registers. So, we send all GRF arrays that get variable index + * access to scratch space. + */ +void +vec4_visitor::move_grf_array_access_to_scratch() +{ + int scratch_loc[this->virtual_grf_count]; + + for (int i = 0; i < this->virtual_grf_count; i++) { + scratch_loc[i] = -1; + } + + /* First, calculate the set of virtual GRFs that need to be punted + * to scratch due to having any array access on them, and where in + * scratch. + */ + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + if (inst->dst.file == GRF && inst->dst.reladdr && + scratch_loc[inst->dst.reg] == -1) { + scratch_loc[inst->dst.reg] = c->last_scratch; + c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4; + } + + for (int i = 0 ; i < 3; i++) { + src_reg *src = &inst->src[i]; + + if (src->file == GRF && src->reladdr && + scratch_loc[src->reg] == -1) { + scratch_loc[src->reg] = c->last_scratch; + c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4; + } + } + } + + /* Now, for anything that will be accessed through scratch, rewrite + * it to load/store. Note that this is a _safe list walk, because + * we may generate a new scratch_write instruction after the one + * we're processing. + */ + foreach_list_safe(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + /* Set up the annotation tracking for new generated instructions. */ + base_ir = inst->ir; + current_annotation = inst->annotation; + + if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) { + src_reg temp = src_reg(this, glsl_type::vec4_type); + + emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]); + + inst->dst.file = temp.file; + inst->dst.reg = temp.reg; + inst->dst.reg_offset = temp.reg_offset; + inst->dst.reladdr = NULL; + } + + for (int i = 0 ; i < 3; i++) { + if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1) + continue; + + dst_reg temp = dst_reg(this, glsl_type::vec4_type); + + emit_scratch_read(inst, temp, inst->src[i], + scratch_loc[inst->src[i].reg]); + + inst->src[i].file = temp.file; + inst->src[i].reg = temp.reg; + inst->src[i].reg_offset = temp.reg_offset; + inst->src[i].reladdr = NULL; + } + } +} + + +vec4_visitor::vec4_visitor(struct brw_vs_compile *c, + struct gl_shader_program *prog, + struct brw_shader *shader) +{ + this->c = c; + this->p = &c->func; + this->brw = p->brw; + this->intel = &brw->intel; + this->ctx = &intel->ctx; + this->prog = prog; + this->shader = shader; + + this->mem_ctx = ralloc_context(NULL); + this->failed = false; + + this->base_ir = NULL; + this->current_annotation = NULL; + + this->c = c; + this->vp = prog->VertexProgram; + this->prog_data = &c->prog_data; + + this->variable_ht = hash_table_ctor(0, + hash_table_pointer_hash, + hash_table_pointer_compare); + + this->virtual_grf_def = NULL; + this->virtual_grf_use = NULL; + this->virtual_grf_sizes = NULL; + this->virtual_grf_count = 0; + this->virtual_grf_array_size = 0; + this->live_intervals_valid = false; + + this->uniforms = 0; + + this->variable_ht = hash_table_ctor(0, + hash_table_pointer_hash, + hash_table_pointer_compare); +} + +vec4_visitor::~vec4_visitor() +{ + ralloc_free(this->mem_ctx); + hash_table_dtor(this->variable_ht); +} + + +void +vec4_visitor::fail(const char *format, ...) +{ + va_list va; + char *msg; + + if (failed) + return; + + failed = true; + + va_start(va, format); + msg = ralloc_vasprintf(mem_ctx, format, va); + va_end(va); + msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg); + + this->fail_msg = msg; + + if (INTEL_DEBUG & DEBUG_VS) { + fprintf(stderr, "%s", msg); + } +} + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index a9ad5311fe3..3373e707d98 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -30,6 +30,7 @@ */ +#include "main/compiler.h" #include "brw_context.h" #include "brw_vs.h" #include "brw_util.h" @@ -39,17 +40,21 @@ #include "../glsl/ralloc.h" -static void do_vs_prog( struct brw_context *brw, - struct brw_vertex_program *vp, - struct brw_vs_prog_key *key ) +static bool +do_vs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_vertex_program *vp, + struct brw_vs_prog_key *key) { struct gl_context *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; GLuint program_size; const GLuint *program; struct brw_vs_compile c; void *mem_ctx; int aux_size; int i; + static int new_vs = -1; memset(&c, 0, sizeof(c)); memcpy(&c.key, key, sizeof(*key)); @@ -85,7 +90,25 @@ static void do_vs_prog( struct brw_context *brw, /* Emit GEN4 code. */ - brw_vs_emit(&c); + if (new_vs == -1) + new_vs = getenv("INTEL_NEW_VS") != NULL; + + if (new_vs && prog) { + if (!brw_vs_emit(prog, &c)) { + ralloc_free(mem_ctx); + return false; + } + } else { + brw_old_vs_emit(&c); + } + + /* Scratch space is used for register spilling */ + if (c.last_scratch) { + c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch); + + brw_get_scratch_bo(intel, &brw->vs.scratch_bo, + c.prog_data.total_scratch * brw->vs_max_threads); + } /* get the program */ @@ -111,6 +134,8 @@ static void do_vs_prog( struct brw_context *brw, &c.prog_data, aux_size, &brw->vs.prog_offset, &brw->vs.prog_data); ralloc_free(mem_ctx); + + return true; } @@ -155,13 +180,15 @@ static void brw_upload_vs_prog(struct brw_context *brw) if (!brw_search_cache(&brw->cache, BRW_VS_PROG, &key, sizeof(key), &brw->vs.prog_offset, &brw->vs.prog_data)) { - do_vs_prog(brw, vp, &key); + bool success = do_vs_prog(brw, ctx->Shader.CurrentVertexProgram, + vp, &key); + + assert(success); } brw->vs.constant_map = ((int8_t *)brw->vs.prog_data + sizeof(*brw->vs.prog_data)); } - /* See brw_vs.c: */ const struct brw_tracked_state brw_vs_prog = { @@ -174,3 +201,30 @@ const struct brw_tracked_state brw_vs_prog = { }, .prepare = brw_upload_vs_prog }; + +bool +brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_vs_prog_key key; + struct gl_vertex_program *vp = prog->VertexProgram; + struct brw_vertex_program *bvp = brw_vertex_program(vp); + uint32_t old_prog_offset = brw->vs.prog_offset; + struct brw_vs_prog_data *old_prog_data = brw->vs.prog_data; + bool success; + + if (!vp) + return true; + + memset(&key, 0, sizeof(key)); + + key.program_string_id = bvp->id; + key.clamp_vertex_color = true; + + success = do_vs_prog(brw, prog, bvp, &key); + + brw->vs.prog_offset = old_prog_offset; + brw->vs.prog_data = old_prog_data; + + return success; +} diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 432994a8534..beccb381ee2 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -66,6 +66,7 @@ struct brw_vs_compile { GLuint first_output; GLuint nr_outputs; GLuint first_overflow_output; /**< VERT_ATTRIB_x */ + GLuint last_scratch; GLuint first_tmp; GLuint last_tmp; @@ -92,6 +93,8 @@ struct brw_vs_compile { GLboolean needs_stack; }; -void brw_vs_emit( struct brw_vs_compile *c ); +bool brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c); +void brw_old_vs_emit(struct brw_vs_compile *c); +bool brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog); #endif diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c index 9fdfebe9f76..47cc0a7da7a 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_constval.c +++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c @@ -194,19 +194,11 @@ static void calc_wm_input_sizes( struct brw_context *brw ) /* BRW_NEW_VERTEX_PROGRAM */ const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); - /* BRW_NEW_FRAGMENT_PROGRAM */ - struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram; /* BRW_NEW_INPUT_DIMENSIONS */ struct tracker t; GLuint insn; GLuint i; - /* If we're going to go through brw_fs.cpp, we don't end up using - * brw->wm.input_size_masks. - */ - if (prog && prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) - return; - memset(&t, 0, sizeof(t)); /* _NEW_LIGHT */ @@ -246,9 +238,7 @@ static void calc_wm_input_sizes( struct brw_context *brw ) const struct brw_tracked_state brw_wm_input_sizes = { .dirty = { .mesa = _NEW_LIGHT, - .brw = (BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_VERTEX_PROGRAM | - BRW_NEW_INPUT_DIMENSIONS), + .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS, .cache = 0 }, .prepare = calc_wm_input_sizes diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 9d733344a26..bfee811e13d 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1096,31 +1096,6 @@ static void emit_lrp_noalias(struct brw_vs_compile *c, brw_MAC(p, dst, arg0, arg1); } -/** 3 or 4-component vector normalization */ -static void emit_nrm( struct brw_vs_compile *c, - struct brw_reg dst, - struct brw_reg arg0, - int num_comps) -{ - struct brw_compile *p = &c->func; - struct brw_reg tmp = get_tmp(c); - - /* tmp = dot(arg0, arg0) */ - if (num_comps == 3) - brw_DP3(p, tmp, arg0, arg0); - else - brw_DP4(p, tmp, arg0, arg0); - - /* tmp = 1 / sqrt(tmp) */ - emit_math1(c, BRW_MATH_FUNCTION_RSQ, tmp, tmp, BRW_MATH_PRECISION_FULL); - - /* dst = arg0 * tmp */ - brw_MUL(p, dst, arg0, tmp); - - release_tmp(c, tmp); -} - - static struct brw_reg get_constant(struct brw_vs_compile *c, const struct prog_instruction *inst, @@ -1359,7 +1334,7 @@ get_src_reg( struct brw_vs_compile *c, if (component >= 0) { params = c->vp->program.Base.Parameters; - f = params->ParameterValues[src->Index][component]; + f = params->ParameterValues[src->Index][component].f; if (src->Abs) f = fabs(f); @@ -1821,6 +1796,9 @@ accumulator_contains(struct brw_vs_compile *c, struct brw_reg val) if (val.address_mode != BRW_ADDRESS_DIRECT) return GL_FALSE; + if (val.negate || val.abs) + return GL_FALSE; + switch (prev_insn->header.opcode) { case BRW_OPCODE_MOV: case BRW_OPCODE_MAC: @@ -1900,7 +1878,7 @@ brw_vs_rescale_gl_fixed(struct brw_vs_compile *c) /* Emit the vertex program instructions here. */ -void brw_vs_emit(struct brw_vs_compile *c ) +void brw_old_vs_emit(struct brw_vs_compile *c ) { #define MAX_IF_DEPTH 32 #define MAX_LOOP_DEPTH 32 @@ -1980,9 +1958,22 @@ void brw_vs_emit(struct brw_vs_compile *c ) const struct prog_src_register *src = &inst->SrcReg[i]; index = src->Index; file = src->File; - if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) - args[i] = c->output_regs[index].reg; - else + if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) { + /* Can't just make get_arg "do the right thing" here because + * other callers of get_arg and get_src_reg don't expect any + * special behavior for the c->output_regs[index].used_in_src + * case. + */ + args[i] = c->output_regs[index].reg; + args[i].dw1.bits.swizzle = + BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0), + GET_SWZ(src->Swizzle, 1), + GET_SWZ(src->Swizzle, 2), + GET_SWZ(src->Swizzle, 3)); + + /* Note this is ok for non-swizzle ARB_vp instructions */ + args[i].negate = src->Negate ? 1 : 0; + } else args[i] = get_arg(c, inst, i); } @@ -1993,7 +1984,11 @@ void brw_vs_emit(struct brw_vs_compile *c ) index = inst->DstReg.Index; file = inst->DstReg.File; if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) - dst = c->output_regs[index].reg; + /* Can't just make get_dst "do the right thing" here because other + * callers of get_dst don't expect any special behavior for the + * c->output_regs[index].used_in_src case. + */ + dst = brw_writemask(c->output_regs[index].reg, inst->DstReg.WriteMask); else dst = get_dst(c, inst->DstReg); @@ -2025,12 +2020,6 @@ void brw_vs_emit(struct brw_vs_compile *c ) case OPCODE_DPH: brw_DPH(p, dst, args[0], args[1]); break; - case OPCODE_NRM3: - emit_nrm(c, dst, args[0], 3); - break; - case OPCODE_NRM4: - emit_nrm(c, dst, args[0], 4); - break; case OPCODE_DST: unalias2(c, dst, args[0], args[1], emit_dst_noalias); break; diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index fc4373ab311..29b3e47ab0c 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -77,6 +77,16 @@ brw_prepare_vs_unit(struct brw_context *brw) else vs->thread1.binding_table_entry_count = brw->vs.nr_surfaces; + if (brw->vs.prog_data->total_scratch != 0) { + vs->thread2.scratch_space_base_pointer = + brw->vs.scratch_bo->offset >> 10; /* reloc */ + vs->thread2.per_thread_scratch_space = + ffs(brw->vs.prog_data->total_scratch) - 11; + } else { + vs->thread2.scratch_space_base_pointer = 0; + vs->thread2.per_thread_scratch_space = 0; + } + vs->thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; vs->thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; vs->thread3.dispatch_grf_start_reg = 1; diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 55dbd4fa8b0..40360b23fff 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -213,6 +213,7 @@ static void brw_new_batch( struct intel_context *intel ) brw->state_batch_count = 0; brw->vb.nr_current_buffers = 0; + brw->ib.type = -1; /* Mark that the current program cache BO has been used by the GPU. * It will be reallocated if we need to put new programs in for the diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index b0dfdd536aa..e76832515fe 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -206,10 +206,6 @@ bool do_wm_prog(struct brw_context *brw, */ return false; } - c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN); - c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN); - c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG); - c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF); } else { void *instruction = c->instruction; void *prog_instructions = c->prog_instructions; @@ -232,6 +228,13 @@ bool do_wm_prog(struct brw_context *brw, if (!brw_wm_fs_emit(brw, c, prog)) return false; } else { + if (!c->instruction) { + c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN); + c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN); + c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG); + c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF); + } + /* Fallback for fixed function and ARB_fp shaders. */ c->dispatch_width = 16; brw_wm_payload_setup(brw, c); @@ -241,29 +244,10 @@ bool do_wm_prog(struct brw_context *brw, /* Scratch space is used for register spilling */ if (c->last_scratch) { - uint32_t total_scratch; - - /* Per-thread scratch space is power-of-two sized. */ - for (c->prog_data.total_scratch = 1024; - c->prog_data.total_scratch <= c->last_scratch; - c->prog_data.total_scratch *= 2) { - /* empty */ - } - total_scratch = c->prog_data.total_scratch * brw->wm_max_threads; + c->prog_data.total_scratch = brw_get_scratch_size(c->last_scratch); - if (brw->wm.scratch_bo && total_scratch > brw->wm.scratch_bo->size) { - drm_intel_bo_unreference(brw->wm.scratch_bo); - brw->wm.scratch_bo = NULL; - } - if (brw->wm.scratch_bo == NULL) { - brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr, - "wm scratch", - total_scratch, - 4096); - } - } - else { - c->prog_data.total_scratch = 0; + brw_get_scratch_bo(intel, &brw->wm.scratch_bo, + c->prog_data.total_scratch * brw->wm_max_threads); } if (unlikely(INTEL_DEBUG & DEBUG_WM)) diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index f61757a8cac..6ea4a7d6e50 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -1094,9 +1094,16 @@ void emit_tex(struct brw_wm_compile *c, if (intel->gen < 5 && c->dispatch_width == 8) nr_texcoords = 3; - /* For shadow comparisons, we have to supply u,v,r. */ - if (shadow) - nr_texcoords = 3; + if (shadow) { + if (intel->gen < 7) { + /* For shadow comparisons, we have to supply u,v,r. */ + nr_texcoords = 3; + } else { + /* On Ivybridge, the shadow comparitor comes first. Just load it. */ + brw_MOV(p, brw_message_reg(cur_mrf), arg[2]); + cur_mrf += mrf_per_channel; + } + } /* Emit the texcoords. */ for (i = 0; i < nr_texcoords; i++) { @@ -1113,7 +1120,7 @@ void emit_tex(struct brw_wm_compile *c, } /* Fill in the shadow comparison reference value. */ - if (shadow) { + if (shadow && intel->gen < 7) { if (intel->gen >= 5) { /* Fill in the cube map array index value. */ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 7cd3edad235..bd46bd8de43 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -535,15 +535,15 @@ static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, GLfloat s3) { struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; - GLfloat values[4]; + gl_constant_value values[4]; GLuint idx; GLuint swizzle; struct prog_src_register reg; - values[0] = s0; - values[1] = s1; - values[2] = s2; - values[3] = s3; + values[0].f = s0; + values[1].f = s1; + values[2].f = s2; + values[3].f = s3; idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle ); reg = src_reg(PROGRAM_STATE_VAR, idx); @@ -664,6 +664,8 @@ static void precalc_lit( struct brw_wm_compile *c, static void precalc_tex( struct brw_wm_compile *c, const struct prog_instruction *inst ) { + struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; struct prog_src_register coord; struct prog_dst_register tmpcoord = { 0 }; const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; @@ -727,7 +729,7 @@ static void precalc_tex( struct brw_wm_compile *c, release_temp(c, tmp0); release_temp(c, tmp1); } - else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) { + else if (intel->gen < 6 && inst->TexSrcTarget == TEXTURE_RECT_INDEX) { struct prog_src_register scale = search_or_add_param5( c, STATE_INTERNAL, diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index f78bdc31866..ccf9dc2bc18 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -205,14 +205,14 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, case PROGRAM_CONSTANT: /* These are invarient: */ - ref = get_const_ref(c, &plist->ParameterValues[idx][component]); + ref = get_const_ref(c, &plist->ParameterValues[idx][component].f); break; case PROGRAM_STATE_VAR: case PROGRAM_UNIFORM: /* These may change from run to run: */ - ref = get_param_ref(c, &plist->ParameterValues[idx][component] ); + ref = get_param_ref(c, &plist->ParameterValues[idx][component].f ); break; default: diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 98146136703..6834ebad780 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -289,6 +289,13 @@ static void brw_update_sampler_state(struct brw_context *brw, sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 6); sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 6); + /* On Gen6+, the sampler can handle non-normalized texture + * rectangle coordinates natively + */ + if (intel->gen >= 6 && texObj->Target == GL_TEXTURE_RECTANGLE) { + sampler->ss3.non_normalized_coord = 1; + } + upload_default_color(brw, gl_sampler, unit); if (intel->gen >= 6) { diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index fb4fb146f8d..ad909789d82 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -342,7 +342,7 @@ prepare_wm_pull_constants(struct brw_context *brw) constants = brw->wm.const_bo->virtual; for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) { constants[i] = convert_param(brw->wm.prog_data->pull_param_convert[i], - *brw->wm.prog_data->pull_param[i]); + brw->wm.prog_data->pull_param[i]); } drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo); diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index fb4cdbaadf9..b94121e8437 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -81,12 +81,21 @@ gen6_prepare_vs_push_constants(struct brw_context *brw) params_uploaded++; } - for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { - if (brw->vs.constant_map[i] != -1) { - memcpy(param + brw->vs.constant_map[i] * 4, - vp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); - params_uploaded++; + if (brw->vs.prog_data->uses_new_param_layout) { + for (i = 0; i < brw->vs.prog_data->nr_params; i++) { + *param = convert_param(brw->vs.prog_data->param_convert[i], + brw->vs.prog_data->param[i]); + param++; + } + params_uploaded += brw->vs.prog_data->nr_params / 4; + } else { + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + memcpy(param + brw->vs.constant_map[i] * 4, + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + params_uploaded++; + } } } @@ -151,7 +160,15 @@ upload_vs_state(struct brw_context *brw) OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | GEN6_VS_FLOATING_POINT_MODE_ALT | (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - OUT_BATCH(0); /* scratch space base offset */ + + if (brw->vs.prog_data->total_scratch) { + OUT_RELOC(brw->vs.scratch_bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + ffs(brw->vs.prog_data->total_scratch) - 11); + } else { + OUT_BATCH(0); + } + OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); @@ -160,6 +177,32 @@ upload_vs_state(struct brw_context *brw) GEN6_VS_STATISTICS_ENABLE | GEN6_VS_ENABLE); ADVANCE_BATCH(); + + /* Based on my reading of the simulator, the VS constants don't get + * pulled into the VS FF unit until an appropriate pipeline flush + * happens, and instead the 3DSTATE_CONSTANT_VS packet just adds + * references to them into a little FIFO. The flushes are common, + * but don't reliably happen between this and a 3DPRIMITIVE, causing + * the primitive to use the wrong constants. Then the FIFO + * containing the constant setup gets added to again on the next + * constants change, and eventually when a flush does happen the + * unit is overwhelmed by constant changes and dies. + * + * To avoid this, send a PIPE_CONTROL down the line that will + * update the unit immediately loading the constants. The flush + * type bits here were those set by the STATE_BASE_ADDRESS whose + * move in a82a43e8d99e1715dd11c9c091b5ab734079b6a6 triggered the + * bug reports that led to this workaround, and may be more than + * what is strictly required to avoid the issue. + */ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL); + OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_INSTRUCTION_FLUSH | + PIPE_CONTROL_STATE_CACHE_INVALIDATE); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); } const struct brw_tracked_state gen6_vs_state = { diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 185da9c355f..07e9995f53b 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -54,14 +54,14 @@ gen6_prepare_wm_push_constants(struct brw_context *brw) float *constants; unsigned int i; - constants = brw_state_batch(brw, AUB_TRACE_NO_TYPE, + constants = brw_state_batch(brw, AUB_TRACE_WM_CONSTANTS, brw->wm.prog_data->nr_params * sizeof(float), 32, &brw->wm.push_const_offset); for (i = 0; i < brw->wm.prog_data->nr_params; i++) { constants[i] = convert_param(brw->wm.prog_data->param_convert[i], - *brw->wm.prog_data->param[i]); + brw->wm.prog_data->param[i]); } if (0) { diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c index e787c21f4d1..aee67c87472 100644 --- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c @@ -157,6 +157,13 @@ gen7_update_sampler_state(struct brw_context *brw, int unit, sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 8); sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 8); + /* The sampler can handle non-normalized texture rectangle coordinates + * natively + */ + if (texObj->Target == GL_TEXTURE_RECTANGLE) { + sampler->ss3.non_normalized_coord = 1; + } + upload_default_color(brw, gl_sampler, unit); sampler->ss2.default_color_pointer = brw->wm.sdc_offset[unit] >> 5; diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index 0fad3d2fb68..f3cd5d15bf0 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -71,7 +71,15 @@ upload_vs_state(struct brw_context *brw) OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | GEN6_VS_FLOATING_POINT_MODE_ALT | (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - OUT_BATCH(0); /* scratch space base offset */ + + if (brw->vs.prog_data->total_scratch) { + OUT_RELOC(brw->vs.scratch_bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + ffs(brw->vs.prog_data->total_scratch) - 11); + } else { + OUT_BATCH(0); + } + OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index a102ca772b3..55a603e887a 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -58,7 +58,7 @@ gen7_prepare_wm_constants(struct brw_context *brw) for (i = 0; i < brw->wm.prog_data->nr_params; i++) { constants[i] = convert_param(brw->wm.prog_data->param_convert[i], - *brw->wm.prog_data->param[i]); + brw->wm.prog_data->param[i]); } if (0) { @@ -228,7 +228,13 @@ upload_ps_state(struct brw_context *brw) OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); OUT_BATCH(brw->wm.prog_offset); OUT_BATCH(dw2); - OUT_BATCH(0); /* scratch space base offset */ + if (brw->wm.prog_data->total_scratch) { + OUT_RELOC(brw->wm.scratch_bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + ffs(brw->wm.prog_data->total_scratch) - 11); + } else { + OUT_BATCH(0); + } OUT_BATCH(dw4); OUT_BATCH(dw5); OUT_BATCH(0); /* kernel 1 pointer */ diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index b61a2ffef19..db4343be10c 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -308,12 +308,29 @@ emit: * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. * - * XXX: There is also a workaround that would appear to apply to this - * workaround, but it doesn't appear to be necessary so far: + * And the workaround for these two requires this workaround first: * - * Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent + * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent * BEFORE the pipe-control with a post-sync op and no write-cache * flushes. + * + * And this last workaround is tricky because of the requirements on + * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM + * volume 2 part 1: + * + * "1 of the following must also be set: + * - Render Target Cache Flush Enable ([12] of DW1) + * - Depth Cache Flush Enable ([0] of DW1) + * - Stall at Pixel Scoreboard ([1] of DW1) + * - Depth Stall ([13] of DW1) + * - Post-Sync Operation ([13] of DW1) + * - Notify Enable ([8] of DW1)" + * + * The cache flushes require the workaround flush that triggered this + * one, so we can't use it. Depth stall would trigger the same. + * Post-sync nonzero is what triggered this second workaround, so we + * can't use that one either. Notify enable is IRQs, which aren't + * really our business. That leaves only stall at scoreboard. */ void intel_emit_post_sync_nonzero_flush(struct intel_context *intel) @@ -323,9 +340,17 @@ intel_emit_post_sync_nonzero_flush(struct intel_context *intel) BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_PIPE_CONTROL); + OUT_BATCH(PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL); OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); OUT_RELOC(intel->batch.workaround_bo, - I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT, 0); + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); OUT_BATCH(0); /* write data */ ADVANCE_BATCH(); @@ -365,6 +390,7 @@ intel_batchbuffer_emit_mi_flush(struct intel_context *intel) OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH | PIPE_CONTROL_WRITE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_TC_FLUSH | PIPE_CONTROL_NO_WRITE); OUT_BATCH(0); /* write address */ OUT_BATCH(0); /* write data */ diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 30be1b9382f..b18dd2922d9 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -541,8 +541,8 @@ intel_set_teximage_alpha_to_one(struct gl_context *ctx, /* get dest x/y in destination texture */ intel_miptree_get_image_offset(intel_image->mt, - intel_image->level, - intel_image->face, + intel_image->base.Level, + intel_image->base.Face, 0, &image_x, &image_y); diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index 439d6fc8247..d908975fc87 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -41,8 +41,7 @@ #include "intel_regions.h" static GLboolean -intel_bufferobj_unmap(struct gl_context * ctx, - GLenum target, struct gl_buffer_object *obj); +intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj); /** Allocates a new drm_intel_bo to store the data for the buffer object. */ static void @@ -122,7 +121,7 @@ intel_bufferobj_free(struct gl_context * ctx, struct gl_buffer_object *obj) * (though it does if you call glDeleteBuffers) */ if (obj->Pointer) - intel_bufferobj_unmap(ctx, 0, obj); + intel_bufferobj_unmap(ctx, obj); free(intel_obj->sys_buffer); if (intel_obj->region) { @@ -203,7 +202,6 @@ intel_bufferobj_data(struct gl_context * ctx, */ static void intel_bufferobj_subdata(struct gl_context * ctx, - GLenum target, GLintptrARB offset, GLsizeiptrARB size, const GLvoid * data, struct gl_buffer_object *obj) @@ -276,82 +274,28 @@ intel_bufferobj_subdata(struct gl_context * ctx, */ static void intel_bufferobj_get_subdata(struct gl_context * ctx, - GLenum target, GLintptrARB offset, GLsizeiptrARB size, GLvoid * data, struct gl_buffer_object *obj) { struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + struct intel_context *intel = intel_context(ctx); assert(intel_obj); if (intel_obj->sys_buffer) memcpy(data, (char *)intel_obj->sys_buffer + offset, size); - else - drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data); -} - - - -/** - * Called via glMapBufferARB(). - */ -static void * -intel_bufferobj_map(struct gl_context * ctx, - GLenum target, - GLenum access, struct gl_buffer_object *obj) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_buffer_object *intel_obj = intel_buffer_object(obj); - GLboolean read_only = (access == GL_READ_ONLY_ARB); - GLboolean write_only = (access == GL_WRITE_ONLY_ARB); - - assert(intel_obj); - - if (intel_obj->sys_buffer) { - if (!read_only && intel_obj->source) { - release_buffer(intel_obj); - } - - if (!intel_obj->buffer || intel_obj->source) { - obj->Pointer = intel_obj->sys_buffer; - obj->Length = obj->Size; - obj->Offset = 0; - return obj->Pointer; + else { + if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) { + intel_batchbuffer_flush(intel); } - - free(intel_obj->sys_buffer); - intel_obj->sys_buffer = NULL; - } - - /* Flush any existing batchbuffer that might reference this data. */ - if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) - intel_flush(ctx); - - if (intel_obj->region) - intel_bufferobj_cow(intel, intel_obj); - - if (intel_obj->buffer == NULL) { - obj->Pointer = NULL; - return NULL; - } - - if (write_only) { - drm_intel_gem_bo_map_gtt(intel_obj->buffer); - intel_obj->mapped_gtt = GL_TRUE; - } else { - drm_intel_bo_map(intel_obj->buffer, !read_only); - intel_obj->mapped_gtt = GL_FALSE; + drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data); } +} - obj->Pointer = intel_obj->buffer->virtual; - obj->Length = obj->Size; - obj->Offset = 0; - return obj->Pointer; -} /** - * Called via glMapBufferRange(). + * Called via glMapBufferRange and glMapBuffer * * The goal of this extension is to allow apps to accumulate their rendering * at the same time as they accumulate their buffer object. Without it, @@ -368,12 +312,11 @@ intel_bufferobj_map(struct gl_context * ctx, */ static void * intel_bufferobj_map_range(struct gl_context * ctx, - GLenum target, GLintptr offset, GLsizeiptr length, + GLintptr offset, GLsizeiptr length, GLbitfield access, struct gl_buffer_object *obj) { struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); - GLboolean read_only = (access == GL_READ_ONLY_ARB); assert(intel_obj); @@ -385,6 +328,9 @@ intel_bufferobj_map_range(struct gl_context * ctx, obj->AccessFlags = access; if (intel_obj->sys_buffer) { + const bool read_only = + (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_READ_BIT; + if (!read_only && intel_obj->source) release_buffer(intel_obj); @@ -468,7 +414,7 @@ intel_bufferobj_map_range(struct gl_context * ctx, * would defeat the point. */ static void -intel_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target, +intel_bufferobj_flush_mapped_range(struct gl_context *ctx, GLintptr offset, GLsizeiptr length, struct gl_buffer_object *obj) { @@ -502,8 +448,7 @@ intel_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target, * Called via glUnmapBuffer(). */ static GLboolean -intel_bufferobj_unmap(struct gl_context * ctx, - GLenum target, struct gl_buffer_object *obj) +intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj) { struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); @@ -758,23 +703,23 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx, * not overlap. */ if (src == dst) { - char *ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER, - GL_READ_WRITE, dst); + char *ptr = intel_bufferobj_map_range(ctx, 0, dst->Size, + GL_MAP_READ_BIT, dst); memmove(ptr + write_offset, ptr + read_offset, size); - intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst); + intel_bufferobj_unmap(ctx, dst); } else { const char *src_ptr; char *dst_ptr; - src_ptr = intel_bufferobj_map(ctx, GL_COPY_READ_BUFFER, - GL_READ_ONLY, src); - dst_ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER, - GL_WRITE_ONLY, dst); + src_ptr = intel_bufferobj_map_range(ctx, 0, src->Size, + GL_MAP_READ_BIT, src); + dst_ptr = intel_bufferobj_map_range(ctx, 0, dst->Size, + GL_MAP_WRITE_BIT, dst); memcpy(dst_ptr + write_offset, src_ptr + read_offset, size); - intel_bufferobj_unmap(ctx, GL_COPY_READ_BUFFER, src); - intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst); + intel_bufferobj_unmap(ctx, src); + intel_bufferobj_unmap(ctx, dst); } return; } @@ -924,7 +869,6 @@ intelInitBufferObjectFuncs(struct dd_function_table *functions) functions->BufferData = intel_bufferobj_data; functions->BufferSubData = intel_bufferobj_subdata; functions->GetBufferSubData = intel_bufferobj_get_subdata; - functions->MapBuffer = intel_bufferobj_map; functions->MapBufferRange = intel_bufferobj_map_range; functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range; functions->UnmapBuffer = intel_bufferobj_unmap; diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c index dfca03c14bf..76d33f9b37e 100644 --- a/src/mesa/drivers/dri/intel/intel_clear.c +++ b/src/mesa/drivers/dri/intel/intel_clear.c @@ -116,13 +116,13 @@ intelClear(struct gl_context *ctx, GLbitfield mask) } /* HW color buffers (front, back, aux, generic FBO, etc) */ - if (colorMask == ~0) { + if (intel->gen < 6 && colorMask == ~0) { /* clear all R,G,B,A */ blit_mask |= (mask & BUFFER_BITS_COLOR); } else { /* glColorMask in effect */ - tri_mask |= (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT)); + tri_mask |= (mask & BUFFER_BITS_COLOR); } /* Make sure we have up to date buffers before we start looking at @@ -143,6 +143,12 @@ intelClear(struct gl_context *ctx, GLbitfield mask) */ tri_mask |= BUFFER_BIT_STENCIL; } + else if (intel->has_separate_stencil && + stencilRegion->tiling == I915_TILING_NONE) { + /* The stencil buffer is actually W tiled, which the hardware + * cannot blit to. */ + tri_mask |= BUFFER_BIT_STENCIL; + } else { /* clearing all stencil bits, use blitting */ blit_mask |= BUFFER_BIT_STENCIL; @@ -182,7 +188,10 @@ intelClear(struct gl_context *ctx, GLbitfield mask) if (tri_mask) { debug_mask("tri", tri_mask); - _mesa_meta_Clear(&intel->ctx, tri_mask); + if (ctx->Extensions.ARB_fragment_shader) + _mesa_meta_glsl_Clear(&intel->ctx, tri_mask); + else + _mesa_meta_Clear(&intel->ctx, tri_mask); } } diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 2ba13632569..14342ef6246 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -1439,7 +1439,12 @@ intel_verify_dri2_has_hiz(struct intel_context *intel, assert(stencil_rb->Base.Format == MESA_FORMAT_S8); assert(depth_rb && depth_rb->Base.Format == MESA_FORMAT_X8_Z24); - if (stencil_rb->region->tiling == I915_TILING_Y) { + if (stencil_rb->region->tiling == I915_TILING_NONE) { + /* + * The stencil buffer is actually W tiled. The region's tiling is + * I915_TILING_NONE, however, because the GTT is incapable of W + * fencing. + */ intel->intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_TRUE; return; } else { @@ -1449,6 +1454,13 @@ intel_verify_dri2_has_hiz(struct intel_context *intel, * a combined depth/stencil buffer. Discard the hiz buffer too. */ intel->intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_FALSE; + if (intel->must_use_separate_stencil) { + _mesa_problem(&intel->ctx, + "intel_context requires separate stencil, but the " + "DRIscreen does not support it. You may need to " + "upgrade the Intel X driver to 2.16.0"); + abort(); + } /* 1. Discard depth and stencil renderbuffers. */ _mesa_remove_renderbuffer(fb, BUFFER_DEPTH); @@ -1527,7 +1539,7 @@ intel_verify_dri2_has_hiz(struct intel_context *intel, * Presently, however, no verification or clean up is necessary, and * execution should not reach here. If the framebuffer still has a hiz * region, then we have already set dri2_has_hiz to true after - * confirming above that the stencil buffer is Y tiled. + * confirming above that the stencil buffer is W tiled. */ assert(0); } diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 55bcc757873..754f9f202d1 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -173,6 +173,9 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer if (irb->Base.Format == MESA_FORMAT_S8) { /* + * The stencil buffer is W tiled. However, we request from the kernel a + * non-tiled buffer because the GTT is incapable of W fencing. + * * The stencil buffer has quirky pitch requirements. From Vol 2a, * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch": * The pitch must be set to 2x the value computed based on width, as @@ -180,14 +183,13 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer * To accomplish this, we resort to the nasty hack of doubling the drm * region's cpp and halving its height. * - * If we neglect to double the pitch, then drm_intel_gem_bo_map_gtt() - * maps the memory incorrectly. + * If we neglect to double the pitch, then render corruption occurs. */ irb->region = intel_region_alloc(intel->intelScreen, - I915_TILING_Y, + I915_TILING_NONE, cpp * 2, - width, - height / 2, + ALIGN(width, 64), + ALIGN((height + 1) / 2, 64), GL_TRUE); if (!irb->region) return false; @@ -594,17 +596,15 @@ intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb, struct intel_texture_image *intel_image, int zoffset) { - struct intel_mipmap_tree *mt = intel_image->mt; unsigned int dst_x, dst_y; /* compute offset of the particular 2D image within the texture region */ intel_miptree_get_image_offset(intel_image->mt, - intel_image->level, - intel_image->face, + intel_image->base.Level, + intel_image->base.Face, zoffset, &dst_x, &dst_y); - irb->draw_offset = (dst_y * mt->region->pitch + dst_x) * mt->cpp; irb->draw_x = dst_x; irb->draw_y = dst_y; } @@ -645,6 +645,22 @@ intel_renderbuffer_tile_offsets(struct intel_renderbuffer *irb, } } +#ifndef I915 +static bool +need_tile_offset_workaround(struct brw_context *brw, + struct intel_renderbuffer *irb) +{ + uint32_t tile_x, tile_y; + + if (brw->has_surface_tile_offset) + return false; + + intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y); + + return tile_x != 0 || tile_y != 0; +} +#endif + /** * Called by glFramebufferTexture[123]DEXT() (and other places) to * prepare for rendering into texture memory. This might be called @@ -698,8 +714,7 @@ intel_render_texture(struct gl_context * ctx, intel_image->used_as_render_target = GL_TRUE; #ifndef I915 - if (!brw_context(ctx)->has_surface_tile_offset && - (irb->draw_offset & 4095) != 0) { + if (need_tile_offset_workaround(brw_context(ctx), irb)) { /* Original gen4 hardware couldn't draw to a non-tile-aligned * destination in a miptree unless you actually setup your * renderbuffer as a miptree and used the fragile @@ -713,8 +728,8 @@ intel_render_texture(struct gl_context * ctx, new_mt = intel_miptree_create(intel, image->TexObject->Target, intel_image->base.TexFormat, - intel_image->level, - intel_image->level, + intel_image->base.Level, + intel_image->base.Level, intel_image->base.Width, intel_image->base.Height, intel_image->base.Depth, @@ -722,8 +737,8 @@ intel_render_texture(struct gl_context * ctx, intel_miptree_image_copy(intel, new_mt, - intel_image->face, - intel_image->level, + intel_image->base.Face, + intel_image->base.Level, old_mt); intel_miptree_release(intel, &intel_image->mt); diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index f7f99a4f00c..2487994fde5 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -58,7 +58,6 @@ struct intel_renderbuffer /** \} */ - GLuint draw_offset; /**< Offset of drawing address within the region */ GLuint draw_x, draw_y; /**< Offset of drawing within the region */ }; diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index 4e711de1ce1..f36240d7f1d 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -227,7 +227,7 @@ intel_miptree_match_image(struct intel_mipmap_tree *mt, struct gl_texture_image *image) { struct intel_texture_image *intelImage = intel_texture_image(image); - GLuint level = intelImage->level; + GLuint level = intelImage->base.Level; /* Images with borders are never pulled into mipmap trees. */ if (image->Border) diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c index 86d0ef2d748..d9873a303ee 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c @@ -74,9 +74,9 @@ static const GLubyte *map_pbo( struct gl_context *ctx, return NULL; } - buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - GL_READ_ONLY_ARB, - unpack->BufferObj); + buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size, + GL_MAP_READ_BIT, + unpack->BufferObj); if (!buf) { _mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)"); return NULL; @@ -292,8 +292,7 @@ out: if (_mesa_is_bufferobj(unpack->BufferObj)) { /* done with PBO so unmap it now */ - ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - unpack->BufferObj); + ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj); } intel_check_front_buffer_rendering(intel); diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h index 5aa629150cf..a98a669af21 100644 --- a/src/mesa/drivers/dri/intel/intel_reg.h +++ b/src/mesa/drivers/dri/intel/intel_reg.h @@ -75,6 +75,7 @@ #define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4) #define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1 << 3) #define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2) +#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) #define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) #define PIPE_CONTROL_PPGTT_WRITE (0 << 2) #define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2) diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h index b2013af1a29..9dd6a525566 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.h +++ b/src/mesa/drivers/dri/intel/intel_screen.h @@ -63,9 +63,12 @@ * x8_z24 and s8). * * Eventually, intel_update_renderbuffers() makes a DRI2 request for - * DRI2BufferStencil and DRI2BufferHiz. If the returned buffers are Y tiled, - * then we joyfully set intel_screen.dri2_has_hiz to true and continue as if - * nothing happend. + * DRI2BufferStencil and DRI2BufferHiz. If the stencil buffer's tiling is + * I915_TILING_NONE [1], then we joyfully set intel_screen.dri2_has_hiz to + * true and continue as if nothing happend. + * + * [1] The stencil buffer is actually W tiled. However, we request from the + * kernel a non-tiled buffer because the GTT is incapable of W fencing. * * If the buffers are X tiled, however, the handshake has failed and we must * clean up. diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 153803fba09..2e1c80c4766 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -131,38 +131,84 @@ intel_set_span_functions(struct intel_context *intel, int miny = 0; \ int maxx = rb->Width; \ int maxy = rb->Height; \ - int stride = rb->RowStride; \ - uint8_t *buf = rb->Data; \ + \ + /* \ + * Here we ignore rb->Data and rb->RowStride as set by \ + * intelSpanRenderStart. Since intel_offset_S8 decodes the W tile \ + * manually, the region's *real* base address and stride is \ + * required. \ + */ \ + struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ + uint8_t *buf = irb->region->buffer->virtual; \ + unsigned stride = irb->region->pitch; \ + unsigned height = 2 * irb->region->height; \ + bool flip = rb->Name == 0; \ + int y_scale = flip ? -1 : 1; \ + int y_bias = flip ? (height - 1) : 0; \ -/* Don't flip y. */ #undef Y_FLIP -#define Y_FLIP(y) y +#define Y_FLIP(y) (y_scale * (y) + y_bias) /** * \brief Get pointer offset into stencil buffer. * - * The stencil buffer interleaves two rows into one. Yay for crazy hardware. - * The table below demonstrates how the pointer arithmetic behaves for a buffer - * with positive stride (s=stride). - * - * x | y | byte offset - * -------------------------- - * 0 | 0 | 0 - * 0 | 1 | 1 - * 1 | 0 | 2 - * 1 | 1 | 3 - * ... | ... | ... - * 0 | 2 | s - * 0 | 3 | s + 1 - * 1 | 2 | s + 2 - * 1 | 3 | s + 3 + * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we + * must decode the tile's layout in software. * + * See + * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile + * Format. + * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm * + * Even though the returned offset is always positive, the return type is + * signed due to + * commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137 + * mesa: Fix return type of _mesa_get_format_bytes() (#37351) */ static inline intptr_t -intel_offset_S8(int stride, GLint x, GLint y) +intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y) { - return 2 * ((y / 2) * stride + x) + y % 2; + uint32_t tile_size = 4096; + uint32_t tile_width = 64; + uint32_t tile_height = 64; + uint32_t row_size = 64 * stride; + + uint32_t tile_x = x / tile_width; + uint32_t tile_y = y / tile_height; + + /* The byte's address relative to the tile's base addres. */ + uint32_t byte_x = x % tile_width; + uint32_t byte_y = y % tile_height; + + uintptr_t u = tile_y * row_size + + tile_x * tile_size + + 512 * (byte_x / 8) + + 64 * (byte_y / 8) + + 32 * ((byte_y / 4) % 2) + + 16 * ((byte_x / 4) % 2) + + 8 * ((byte_y / 2) % 2) + + 4 * ((byte_x / 2) % 2) + + 2 * (byte_y % 2) + + 1 * (byte_x % 2); + + /* + * Errata for Gen5: + * + * An additional offset is needed which is not documented in the PRM. + * + * if ((byte_x / 8) % 2 == 1) { + * if ((byte_y / 8) % 2) == 0) { + * u += 64; + * } else { + * u -= 64; + * } + * } + * + * The offset is expressed more tersely as + * u += ((int) x & 0x8) * (8 - (((int) y & 0x8) << 1)); + */ + + return u; } #define WRITE_STENCIL(x, y, src) buf[intel_offset_S8(stride, x, y)] = src; diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c index 21c4a1dddba..ee0cd252375 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.c +++ b/src/mesa/drivers/dri/intel/intel_tex.c @@ -95,17 +95,12 @@ intelGenerateMipmap(struct gl_context *ctx, GLenum target, if (!_mesa_is_format_compressed(first_image->TexFormat)) { GLuint nr_faces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; GLuint face, i; - /* Update the level information in our private data in the new images, - * since it didn't get set as part of a normal TexImage path. - */ for (face = 0; face < nr_faces; face++) { for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) { struct intel_texture_image *intelImage = intel_texture_image(texObj->Image[face][i]); if (!intelImage) break; - intelImage->level = i; - intelImage->face = face; /* Unreference the miptree to signal that the new Data is a * bare pointer from mesa. */ diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 1a3643da593..600bd1251e0 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -118,8 +118,8 @@ intel_copy_texsubimage(struct intel_context *intel, /* get dest x/y in destination texture */ intel_miptree_get_image_offset(intelImage->mt, - intelImage->level, - intelImage->face, + intelImage->base.Level, + intelImage->base.Face, 0, &image_x, &image_y); @@ -164,101 +164,6 @@ intel_copy_texsubimage(struct intel_context *intel, static void -intelCopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLint border) -{ - struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - int srcx, srcy, dstx, dsty, height; - - if (border) - goto fail; - - /* Setup or redefine the texture object, mipmap tree and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage1D(ctx, target, level, internalFormat, - width, border, - GL_RGBA, CHAN_TYPE, NULL, - &ctx->DefaultPacking, texObj, texImage); - srcx = x; - srcy = y; - dstx = 0; - dsty = 0; - height = 1; - if (!_mesa_clip_copytexsubimage(ctx, - &dstx, &dsty, - &srcx, &srcy, - &width, &height)) - return; - - if (!intel_copy_texsubimage(intel_context(ctx), target, - intel_texture_image(texImage), - internalFormat, 0, 0, x, y, width, height)) - goto fail; - - return; - - fail: - fallback_debug("%s - fallback to swrast\n", __FUNCTION__); - _mesa_meta_CopyTexImage1D(ctx, target, level, internalFormat, x, y, - width, border); -} - - -static void -intelCopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border) -{ - struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - int srcx, srcy, dstx, dsty; - - if (border) - goto fail; - - /* Setup or redefine the texture object, mipmap tree and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, - GL_RGBA, GL_UNSIGNED_BYTE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - srcx = x; - srcy = y; - dstx = 0; - dsty = 0; - if (!_mesa_clip_copytexsubimage(ctx, - &dstx, &dsty, - &srcx, &srcy, - &width, &height)) - return; - - if (!intel_copy_texsubimage(intel_context(ctx), target, - intel_texture_image(texImage), - internalFormat, 0, 0, x, y, width, height)) - goto fail; - - return; - - fail: - fallback_debug("%s - fallback to swrast\n", __FUNCTION__); - _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y, - width, height, border); -} - - -static void intelCopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width) { @@ -312,8 +217,6 @@ intelCopyTexSubImage2D(struct gl_context * ctx, GLenum target, GLint level, void intelInitTextureCopyImageFuncs(struct dd_function_table *functions) { - functions->CopyTexImage1D = intelCopyTexImage1D; - functions->CopyTexImage2D = intelCopyTexImage2D; functions->CopyTexSubImage1D = intelCopyTexSubImage1D; functions->CopyTexSubImage2D = intelCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 1f8b885bbec..4ee66847255 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -63,7 +63,7 @@ intel_miptree_create_for_teximage(struct intel_context *intel, if (intelImage->base.Border) return NULL; - if (intelImage->level > intelObj->base.BaseLevel && + if (intelImage->base.Level > intelObj->base.BaseLevel && (intelImage->base.Width == 1 || (intelObj->base.Target != GL_TEXTURE_1D && intelImage->base.Height == 1) || @@ -74,19 +74,19 @@ intel_miptree_create_for_teximage(struct intel_context *intel, * likely base level width/height/depth for a full mipmap stack * from this info, so just allocate this one level. */ - firstLevel = intelImage->level; - lastLevel = intelImage->level; + firstLevel = intelImage->base.Level; + lastLevel = intelImage->base.Level; } else { /* If this image disrespects BaseLevel, allocate from level zero. * Usually BaseLevel == 0, so it's unlikely to happen. */ - if (intelImage->level < intelObj->base.BaseLevel) + if (intelImage->base.Level < intelObj->base.BaseLevel) firstLevel = 0; else firstLevel = intelObj->base.BaseLevel; /* Figure out image dimensions at start level. */ - for (i = intelImage->level; i > firstLevel; i--) { + for (i = intelImage->base.Level; i > firstLevel; i--) { width <<= 1; if (height != 1) height <<= 1; @@ -101,7 +101,7 @@ intel_miptree_create_for_teximage(struct intel_context *intel, */ if ((intelObj->base.Sampler.MinFilter == GL_NEAREST || intelObj->base.Sampler.MinFilter == GL_LINEAR) && - intelImage->level == firstLevel && + intelImage->base.Level == firstLevel && (intel->gen < 4 || firstLevel == 0)) { lastLevel = firstLevel; } else { @@ -186,8 +186,8 @@ try_pbo_upload(struct intel_context *intel, else src_stride = width; - intel_miptree_get_image_offset(intelImage->mt, intelImage->level, - intelImage->face, 0, + intel_miptree_get_image_offset(intelImage->mt, intelImage->base.Level, + intelImage->base.Face, 0, &dst_x, &dst_y); dst_stride = intelImage->mt->region->pitch; @@ -243,8 +243,8 @@ try_pbo_zcopy(struct intel_context *intel, else src_stride = width; - intel_miptree_get_image_offset(intelImage->mt, intelImage->level, - intelImage->face, 0, + intel_miptree_get_image_offset(intelImage->mt, intelImage->base.Level, + intelImage->base.Face, 0, &dst_x, &dst_y); dst_stride = intelImage->mt->region->pitch; @@ -407,9 +407,6 @@ intelTexImage(struct gl_context * ctx, DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__, _mesa_lookup_enum_by_nr(target), level, width, height, depth, border); - intelImage->face = _mesa_tex_target_to_face(target); - intelImage->level = level; - if (_mesa_is_format_compressed(texImage->TexFormat)) { texelBytes = 0; } @@ -514,8 +511,8 @@ intelTexImage(struct gl_context * ctx, } texImage->Data = intel_miptree_image_map(intel, intelImage->mt, - intelImage->face, - intelImage->level, + intelImage->base.Face, + intelImage->base.Level, &dstRowStride, intelImage->base.ImageOffsets); } @@ -684,8 +681,8 @@ intel_get_tex_image(struct gl_context * ctx, GLenum target, GLint level, intelImage->base.Data = intel_miptree_image_map(intel, intelImage->mt, - intelImage->face, - intelImage->level, + intelImage->base.Face, + intelImage->base.Level, &intelImage->base.RowStride, intelImage->base.ImageOffsets); intelImage->base.RowStride /= intelImage->mt->cpp; @@ -816,8 +813,6 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, rb->region->width, rb->region->height, 1, 0, internalFormat, texFormat); - intelImage->face = _mesa_tex_target_to_face(target); - intelImage->level = level; texImage->RowStride = rb->region->pitch; intel_miptree_reference(&intelImage->mt, intelObj->mt); @@ -874,8 +869,6 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target, image->region->width, image->region->height, 1, 0, image->internal_format, image->format); - intelImage->face = _mesa_tex_target_to_face(target); - intelImage->level = 0; texImage->RowStride = image->region->pitch; intel_miptree_reference(&intelImage->mt, intelObj->mt); diff --git a/src/mesa/drivers/dri/intel/intel_tex_obj.h b/src/mesa/drivers/dri/intel/intel_tex_obj.h index a9ae2ec5429..e7a4318b8d8 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_obj.h +++ b/src/mesa/drivers/dri/intel/intel_tex_obj.h @@ -52,11 +52,6 @@ struct intel_texture_image { struct gl_texture_image base; - /* These aren't stored in gl_texture_image - */ - GLuint level; - GLuint face; - /* If intelImage->mt != NULL, image data is stored here. * Else if intelImage->base.Data != NULL, image is stored there. * Else there is no image data. diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c index 8b43c406cf9..5fd2cc36234 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c @@ -113,7 +113,7 @@ intelTexSubimage(struct gl_context * ctx, dstRowStride = pitch; intel_miptree_get_image_offset(intelImage->mt, level, - intelImage->face, 0, + intelImage->base.Face, 0, &blit_x, &blit_y); blit_x += xoffset; blit_y += yoffset; @@ -122,8 +122,8 @@ intelTexSubimage(struct gl_context * ctx, } else { texImage->Data = intel_miptree_image_map(intel, intelImage->mt, - intelImage->face, - intelImage->level, + intelImage->base.Face, + intelImage->base.Level, &dstRowStride, texImage->ImageOffsets); } diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c index 7135a6276fe..31ac689ad77 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_validate.c +++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c @@ -42,8 +42,8 @@ copy_image_data_to_tree(struct intel_context *intel, */ intel_miptree_image_copy(intel, intelObj->mt, - intelImage->face, - intelImage->level, intelImage->mt); + intelImage->base.Face, + intelImage->base.Level, intelImage->mt); intel_miptree_release(intel, &intelImage->mt); } @@ -54,8 +54,8 @@ copy_image_data_to_tree(struct intel_context *intel, */ intel_miptree_image_data(intel, intelObj->mt, - intelImage->face, - intelImage->level, + intelImage->base.Face, + intelImage->base.Level, intelImage->base.Data, intelImage->base.RowStride, intelImage->base.RowStride * @@ -177,8 +177,8 @@ intel_tex_map_level_images(struct intel_context *intel, intelImage->base.Data = intel_miptree_image_map(intel, intelImage->mt, - intelImage->face, - intelImage->level, + intelImage->base.Face, + intelImage->base.Level, &intelImage->base.RowStride, intelImage->base.ImageOffsets); /* convert stride to texels, not bytes */ diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c index e60b91f64be..433590c4181 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c @@ -107,7 +107,7 @@ nouveau_bufferobj_data(struct gl_context *ctx, GLenum target, GLsizeiptrARB size } static void -nouveau_bufferobj_subdata(struct gl_context *ctx, GLenum target, GLintptrARB offset, +nouveau_bufferobj_subdata(struct gl_context *ctx, GLintptrARB offset, GLsizeiptrARB size, const GLvoid *data, struct gl_buffer_object *obj) { @@ -115,7 +115,7 @@ nouveau_bufferobj_subdata(struct gl_context *ctx, GLenum target, GLintptrARB off } static void -nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLenum target, GLintptrARB offset, +nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLintptrARB offset, GLsizeiptrARB size, GLvoid *data, struct gl_buffer_object *obj) { @@ -123,23 +123,6 @@ nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLenum target, GLintptrARB } static void * -nouveau_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access, - struct gl_buffer_object *obj) -{ - unsigned flags = 0; - - if (access == GL_READ_ONLY_ARB || - access == GL_READ_WRITE_ARB) - flags |= GL_MAP_READ_BIT; - if (access == GL_WRITE_ONLY_ARB || - access == GL_READ_WRITE_ARB) - flags |= GL_MAP_WRITE_BIT; - - return ctx->Driver.MapBufferRange(ctx, target, 0, obj->Size, flags, - obj); -} - -static void * nouveau_bufferobj_map_range(struct gl_context *ctx, GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access, struct gl_buffer_object *obj) @@ -169,7 +152,7 @@ nouveau_bufferobj_map_range(struct gl_context *ctx, GLenum target, GLintptr offs } static GLboolean -nouveau_bufferobj_unmap(struct gl_context *ctx, GLenum target, struct gl_buffer_object *obj) +nouveau_bufferobj_unmap(struct gl_context *ctx, struct gl_buffer_object *obj) { assert(obj->Pointer); @@ -189,7 +172,6 @@ nouveau_bufferobj_functions_init(struct dd_function_table *functions) functions->BufferData = nouveau_bufferobj_data; functions->BufferSubData = nouveau_bufferobj_subdata; functions->GetBufferSubData = nouveau_bufferobj_get_subdata; - functions->MapBuffer = nouveau_bufferobj_map; functions->MapBufferRange = nouveau_bufferobj_map_range; functions->UnmapBuffer = nouveau_bufferobj_unmap; } diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c index 02201cb53d6..44a794da396 100644 --- a/src/mesa/drivers/dri/r200/r200_ioctl.c +++ b/src/mesa/drivers/dri/r200/r200_ioctl.c @@ -185,7 +185,6 @@ static void r200Clear( struct gl_context *ctx, GLbitfield mask ) r200ContextPtr rmesa = R200_CONTEXT(ctx); __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); GLuint flags = 0; - GLuint color_mask = 0; GLuint orig_mask = mask; if ( R200_DEBUG & RADEON_IOCTL ) { @@ -206,13 +205,11 @@ static void r200Clear( struct gl_context *ctx, GLbitfield mask ) if ( mask & BUFFER_BIT_FRONT_LEFT ) { flags |= RADEON_FRONT; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_FRONT_LEFT; } if ( mask & BUFFER_BIT_BACK_LEFT ) { flags |= RADEON_BACK; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_BACK_LEFT; } diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c index d42e8f12041..91e77f9f7da 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.c +++ b/src/mesa/drivers/dri/r200/r200_tex.c @@ -527,7 +527,6 @@ void r200InitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *fu functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c index 7adf9ad73ed..8c9bd6d00b2 100644 --- a/src/mesa/drivers/dri/r200/r200_texstate.c +++ b/src/mesa/drivers/dri/r200/r200_texstate.c @@ -773,18 +773,12 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format struct radeon_renderbuffer *rb; radeon_texture_image *rImage; radeonContextPtr radeon; - r200ContextPtr rmesa; struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, format; gl_format texFormat; - format = GL_UNSIGNED_BYTE; - internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; - rmesa = pDRICtx->driverPrivate; rfb = dPriv->driverPrivate; texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit]; diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c index 63e03b0e0c7..cf44d7f459c 100644 --- a/src/mesa/drivers/dri/r200/r200_vertprog.c +++ b/src/mesa/drivers/dri/r200/r200_vertprog.c @@ -126,10 +126,10 @@ static GLboolean r200VertexProgUpdateParams(struct gl_context *ctx, struct r200_ case PROGRAM_NAMED_PARAM: //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name); case PROGRAM_CONSTANT: - *fcmd++ = paramList->ParameterValues[pi][0]; - *fcmd++ = paramList->ParameterValues[pi][1]; - *fcmd++ = paramList->ParameterValues[pi][2]; - *fcmd++ = paramList->ParameterValues[pi][3]; + *fcmd++ = paramList->ParameterValues[pi][0].f; + *fcmd++ = paramList->ParameterValues[pi][1].f; + *fcmd++ = paramList->ParameterValues[pi][2].f; + *fcmd++ = paramList->ParameterValues[pi][3].f; break; default: _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index b24274259f4..39dcb21d4f4 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -561,28 +561,29 @@ static int peephole_add_presub_add( struct rc_instruction * inst_add) { unsigned dstmask = inst_add->U.I.DstReg.WriteMask; - struct rc_src_register * src1 = NULL; - unsigned int i; - - if (!is_presub_candidate(c, inst_add)) - return 0; + unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; + unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) return 0; - /* XXX This isn't fully implemented, is it? */ - /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */ - for (i = 0; i < 2; i++) { - if (inst_add->U.I.SrcReg[i].Abs) - return 0; + /* src0 and src1 can't have absolute values */ + if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) + return 0; - /* XXX This looks weird, but it's basically what was here before this commit (see git blame): */ - if ((inst_add->U.I.SrcReg[i].Negate & dstmask) != dstmask && !src1) { - src1 = &inst_add->U.I.SrcReg[i]; - } - } + /* presub_replace_add() assumes only one is negative */ + if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) + return 0; + + /* if src0 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) + return 0; - if (!src1) + /* if src1 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) + return 0; + + if (!is_presub_candidate(c, inst_add)) return 0; if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { @@ -615,7 +616,7 @@ static void presub_replace_inv( * of the add instruction must have the constatnt 1 swizzle. This function * does not check const registers to see if their value is 1.0, so it should * be called after the constant_folding optimization. - * @return + * @return * 0 if the ADD instruction is still part of the program. * 1 if the ADD instruction is no longer part of the program. */ diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index 0c4d8537c61..5587c16dd44 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -84,7 +84,8 @@ static void r300FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde GLboolean mapped_named_bo = GL_FALSE; if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size, + GL_MAP_READ_BIT, mesa_ind_buf->obj); mapped_named_bo = GL_TRUE; assert(mesa_ind_buf->obj->Pointer != NULL); } @@ -138,7 +139,7 @@ static void r300FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde r300->ind_buf.count = mesa_ind_buf->count; if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj); } } @@ -163,7 +164,10 @@ static void r300SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde GLboolean mapped_named_bo = GL_FALSE; if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + ctx->Driver.MapBufferRange(ctx, 0, + mesa_ind_buf->obj->Size, + GL_MAP_READ_BIT, + mesa_ind_buf->obj); assert(mesa_ind_buf->obj->Pointer != NULL); mapped_named_bo = GL_TRUE; } @@ -184,7 +188,7 @@ static void r300SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde r300->ind_buf.count = mesa_ind_buf->count; if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj); } } else { r300FixupIndexBuffer(ctx, mesa_ind_buf); @@ -235,7 +239,8 @@ static void r300ConvertAttrib(struct gl_context *ctx, int count, const struct gl if (input->BufferObj->Name) { if (!input->BufferObj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size, + GL_MAP_READ_BIT, input->BufferObj); mapped_named_bo = GL_TRUE; } @@ -286,7 +291,7 @@ static void r300ConvertAttrib(struct gl_context *ctx, int count, const struct gl radeon_bo_unmap(attr->bo); if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + ctx->Driver.UnmapBuffer(ctx, input->BufferObj); } } @@ -302,7 +307,8 @@ static void r300AlignDataToDword(struct gl_context *ctx, const struct gl_client_ radeon_bo_map(attr->bo, 1); if (!input->BufferObj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size, + GL_MAP_READ_BIT, input->BufferObj); mapped_named_bo = GL_TRUE; } @@ -321,7 +327,7 @@ static void r300AlignDataToDword(struct gl_context *ctx, const struct gl_client_ } if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + ctx->Driver.UnmapBuffer(ctx, input->BufferObj); } radeon_bo_unmap(attr->bo); diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 590d9afe14a..93d8fe185ef 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -379,7 +379,6 @@ void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index e24ad6f088d..e4388a021ed 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -427,13 +427,8 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, type, format; gl_format texFormat; - type = GL_BGRA; - format = GL_UNSIGNED_BYTE; - internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; rmesa = pDRICtx->driverPrivate; diff --git a/src/mesa/drivers/dri/r600/evergreen_fragprog.c b/src/mesa/drivers/dri/r600/evergreen_fragprog.c index e527c379b62..cc584ca2b35 100644 --- a/src/mesa/drivers/dri/r600/evergreen_fragprog.c +++ b/src/mesa/drivers/dri/r600/evergreen_fragprog.c @@ -752,10 +752,10 @@ GLboolean evergreenSetupFPconstants(struct gl_context * ctx) unNumParamData = paramList->NumParameters; for(ui=0; ui<unNumParamData; ui++) { - evergreen->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + evergreen->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } /* alloc multiple of 16 constants */ diff --git a/src/mesa/drivers/dri/r600/evergreen_render.c b/src/mesa/drivers/dri/r600/evergreen_render.c index 4507be29d86..74563caf47c 100644 --- a/src/mesa/drivers/dri/r600/evergreen_render.c +++ b/src/mesa/drivers/dri/r600/evergreen_render.c @@ -403,7 +403,8 @@ static void evergreenConvertAttrib(struct gl_context *ctx, int count, { if (!input->BufferObj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size, + GL_MAP_READ_BIT, input->BufferObj); mapped_named_bo = GL_TRUE; } @@ -456,7 +457,7 @@ static void evergreenConvertAttrib(struct gl_context *ctx, int count, if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + ctx->Driver.UnmapBuffer(ctx, input->BufferObj); } } @@ -470,7 +471,8 @@ static void evergreenFixupIndexBuffer(struct gl_context *ctx, const struct _mesa if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size, + GL_MAP_READ_BIT, mesa_ind_buf->obj); mapped_named_bo = GL_TRUE; assert(mesa_ind_buf->obj->Pointer != NULL); } @@ -531,7 +533,7 @@ static void evergreenFixupIndexBuffer(struct gl_context *ctx, const struct _mesa if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj); } } @@ -606,7 +608,8 @@ static void evergreenSetupIndexBuffer(struct gl_context *ctx, const struct _mesa if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size, + GL_MAP_READ_BIT, mesa_ind_buf->obj); assert(mesa_ind_buf->obj->Pointer != NULL); mapped_named_bo = GL_TRUE; } @@ -629,7 +632,7 @@ static void evergreenSetupIndexBuffer(struct gl_context *ctx, const struct _mesa if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj); } } else @@ -655,7 +658,8 @@ static void evergreenAlignDataToDword(struct gl_context *ctx, if (!input->BufferObj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size, + GL_MAP_READ_BIT, input->BufferObj->obj); mapped_named_bo = GL_TRUE; } @@ -675,7 +679,7 @@ static void evergreenAlignDataToDword(struct gl_context *ctx, radeon_bo_unmap(attr->bo); if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + ctx->Driver.UnmapBuffer(ctx, input->BufferObj); } attr->stride = dst_stride; diff --git a/src/mesa/drivers/dri/r600/evergreen_tex.c b/src/mesa/drivers/dri/r600/evergreen_tex.c index 33a5f277683..d240a216817 100644 --- a/src/mesa/drivers/dri/r600/evergreen_tex.c +++ b/src/mesa/drivers/dri/r600/evergreen_tex.c @@ -1288,19 +1288,12 @@ void evergreenSetTexBuffer(__DRIcontext *pDRICtx, GLint target, GLint glx_textur struct radeon_renderbuffer *rb; radeon_texture_image *rImage; radeonContextPtr radeon; - context_t *rmesa; struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, type, format; gl_format texFormat; - type = GL_BGRA; - format = GL_UNSIGNED_BYTE; - internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; - rmesa = pDRICtx->driverPrivate; rfb = dPriv->driverPrivate; texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit]; @@ -1688,7 +1681,6 @@ void evergreenInitTextureFuncs(radeonContextPtr radeon, struct dd_function_table functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/r600/evergreen_vertprog.c b/src/mesa/drivers/dri/r600/evergreen_vertprog.c index 018869b9996..117916ac78f 100644 --- a/src/mesa/drivers/dri/r600/evergreen_vertprog.c +++ b/src/mesa/drivers/dri/r600/evergreen_vertprog.c @@ -684,17 +684,17 @@ GLboolean evergreenSetupVPconstants(struct gl_context * ctx) for(ui=0; ui<unNumParamData; ui++) { if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM) { - evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0]; - evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1]; - evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2]; - evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3]; + evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f; + evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f; + evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f; + evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f; } else { - evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } } diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c index ce2f7779563..74f048b1062 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c @@ -259,13 +259,11 @@ static int r600_cs_process_relocs(struct radeon_cs_int *csi, uint32_t * reloc_chunk, uint32_t * length_dw_reloc_chunk) { - struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm; struct r600_cs_reloc_legacy *relocs; int i, j, r; uint32_t offset_dw = 0; - csm = (struct r600_cs_manager_legacy*)csi->csm; relocs = (struct r600_cs_reloc_legacy *)csi->relocs; restart: for (i = 0; i < csi->crelocs; i++) { diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c index eb7ed30c7a3..3efa1d197fa 100644 --- a/src/mesa/drivers/dri/r600/r600_tex.c +++ b/src/mesa/drivers/dri/r600/r600_tex.c @@ -470,7 +470,6 @@ void r600InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index 949db29c189..65fae7195fd 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -1141,13 +1141,8 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, type, format; gl_format texFormat; - type = GL_BGRA; - format = GL_UNSIGNED_BYTE; - internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; rmesa = pDRICtx->driverPrivate; diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index 40494cd6af0..6f9834e68fe 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -778,10 +778,10 @@ GLboolean r700SetupFragmentProgram(struct gl_context * ctx) unNumParamData = paramList->NumParameters; for(ui=0; ui<unNumParamData; ui++) { - r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } /* Load fp constants to gpu */ diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 0f7a7a46b71..a565c9f2087 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -490,7 +490,8 @@ static void r700ConvertAttrib(struct gl_context *ctx, int count, { if (!input->BufferObj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size, + GL_MAP_READ_BIT, input->BufferObj); mapped_named_bo = GL_TRUE; } @@ -543,7 +544,7 @@ static void r700ConvertAttrib(struct gl_context *ctx, int count, if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + ctx->Driver.UnmapBuffer(ctx, input->BufferObj); } } @@ -564,7 +565,8 @@ static void r700AlignDataToDword(struct gl_context *ctx, if (!input->BufferObj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size, + GL_MAP_READ_BIT, input->BufferObj); mapped_named_bo = GL_TRUE; } @@ -584,7 +586,7 @@ static void r700AlignDataToDword(struct gl_context *ctx, radeon_bo_unmap(attr->bo); if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + ctx->Driver.UnmapBuffer(ctx, input->BufferObj); } attr->stride = dst_stride; @@ -727,7 +729,8 @@ static void r700FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size, + GL_MAP_READ_BIT, mesa_ind_buf->obj); mapped_named_bo = GL_TRUE; assert(mesa_ind_buf->obj->Pointer != NULL); } @@ -788,7 +791,7 @@ static void r700FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj); } } @@ -813,7 +816,8 @@ static void r700SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size, + GL_MAP_READ_BIT, mesa_ind_buf->obj); assert(mesa_ind_buf->obj->Pointer != NULL); mapped_named_bo = GL_TRUE; } @@ -836,7 +840,7 @@ static void r700SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj); } } else diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 7d4be9180a0..b1e2742b27d 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -720,17 +720,17 @@ GLboolean r700SetupVertexProgram(struct gl_context * ctx) for(ui=0; ui<unNumParamData; ui++) { if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM) { - r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0]; - r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1]; - r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2]; - r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3]; + r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f; + r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f; + r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f; + r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f; } else { - r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } } diff --git a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h index 607b7470d4b..a74c6c7a575 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h +++ b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h @@ -78,6 +78,9 @@ static inline uint32_t radeon_gem_name_bo(struct radeon_bo *dummy) static inline void *radeon_bo_manager_gem_ctor(int fd) { + fprintf(stderr, "[%s:%u] Mesa built without Radeon libdrm support.\n", + __func__, __LINE__); + return NULL; } @@ -87,6 +90,9 @@ static inline void radeon_bo_manager_gem_dtor(void *dummy) static inline void *radeon_cs_manager_gem_ctor(int fd) { + fprintf(stderr, "[%s:%u] Mesa built without Radeon libdrm support.\n", + __func__, __LINE__); + return NULL; } diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c index 0d1af726c07..7b59c0377f8 100644 --- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c +++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c @@ -130,7 +130,6 @@ radeonBufferData(struct gl_context * ctx, */ static void radeonBufferSubData(struct gl_context * ctx, - GLenum target, GLintptrARB offset, GLsizeiptrARB size, const GLvoid * data, @@ -155,7 +154,6 @@ radeonBufferSubData(struct gl_context * ctx, */ static void radeonGetBufferSubData(struct gl_context * ctx, - GLenum target, GLintptrARB offset, GLsizeiptrARB size, GLvoid * data, @@ -171,17 +169,18 @@ radeonGetBufferSubData(struct gl_context * ctx, } /** - * Called via glMapBufferARB() + * Called via glMapBuffer() and glMapBufferRange() */ static void * -radeonMapBuffer(struct gl_context * ctx, - GLenum target, - GLenum access, - struct gl_buffer_object *obj) +radeonMapBufferRange(struct gl_context * ctx, + GLintptr offset, GLsizeiptr length, + GLbitfield access, struct gl_buffer_object *obj) { struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj); + const GLboolean write_only = + (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_WRITE_BIT; - if (access == GL_WRITE_ONLY_ARB) { + if (write_only) { ctx->Driver.Flush(ctx); } @@ -190,12 +189,13 @@ radeonMapBuffer(struct gl_context * ctx, return NULL; } - radeon_bo_map(radeon_obj->bo, access == GL_WRITE_ONLY_ARB); + obj->Offset = offset; + obj->Length = length; + obj->AccessFlags = access; - obj->Pointer = radeon_obj->bo->ptr; - obj->Length = obj->Size; - obj->Offset = 0; + radeon_bo_map(radeon_obj->bo, write_only); + obj->Pointer = radeon_obj->bo->ptr + offset; return obj->Pointer; } @@ -205,7 +205,6 @@ radeonMapBuffer(struct gl_context * ctx, */ static GLboolean radeonUnmapBuffer(struct gl_context * ctx, - GLenum target, struct gl_buffer_object *obj) { struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj); @@ -229,6 +228,6 @@ radeonInitBufferObjectFuncs(struct dd_function_table *functions) functions->BufferData = radeonBufferData; functions->BufferSubData = radeonBufferSubData; functions->GetBufferSubData = radeonGetBufferSubData; - functions->MapBuffer = radeonMapBuffer; + functions->MapBufferRange = radeonMapBufferRange; functions->UnmapBuffer = radeonUnmapBuffer; } diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index bfc307ca987..e7a6623cf84 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -436,7 +436,6 @@ void radeonCopyBuffer( __DRIdrawable *dPriv, const drm_clip_rect_t *rect) { radeonContextPtr rmesa; - struct radeon_framebuffer *rfb; GLint nbox, i, ret; assert(dPriv); @@ -447,8 +446,6 @@ void radeonCopyBuffer( __DRIdrawable *dPriv, LOCK_HARDWARE(rmesa); - rfb = dPriv->driverPrivate; - if ( RADEON_DEBUG & RADEON_IOCTL ) { fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx ); } @@ -527,8 +524,6 @@ static GLboolean radeonPageFlip( __DRIdrawable *dPriv ) { radeonContextPtr radeon; GLint ret; - __DRIscreen *psp; - struct radeon_renderbuffer *rrb; struct radeon_framebuffer *rfb; assert(dPriv); @@ -537,9 +532,6 @@ static GLboolean radeonPageFlip( __DRIdrawable *dPriv ) radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; rfb = dPriv->driverPrivate; - rrb = (void *)rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer; - - psp = dPriv->driScreenPriv; LOCK_HARDWARE(radeon); diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index bf8925f61d0..c08b79484af 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -515,7 +515,6 @@ void radeon_prepare_render(radeonContextPtr radeon) __DRIcontext *driContext = radeon->dri.context; __DRIdrawable *drawable; __DRIscreen *screen; - struct radeon_framebuffer *draw; screen = driContext->driScreenPriv; if (!screen->dri2.loader) @@ -527,7 +526,6 @@ void radeon_prepare_render(radeonContextPtr radeon) radeon_update_renderbuffers(driContext, drawable, GL_FALSE); /* Intel driver does the equivalent of this, no clue if it is needed:*/ - draw = drawable->driverPrivate; radeon_draw_buffer(radeon->glCtx, radeon->glCtx->DrawBuffer); driContext->dri2.draw_stamp = drawable->dri2.stamp; diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c index c2722a4e195..5595b705b15 100644 --- a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c +++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c @@ -218,11 +218,9 @@ static int cs_end(struct radeon_cs_int *cs, static int cs_process_relocs(struct radeon_cs_int *cs) { - struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm; struct cs_reloc_legacy *relocs; int i, j, r; - csm = (struct cs_manager_legacy*)cs->csm; relocs = (struct cs_reloc_legacy *)cs->relocs; restart: for (i = 0; i < cs->crelocs; i++) diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c index a91d8727792..c23e9c2d2a2 100644 --- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c +++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c @@ -560,7 +560,6 @@ static void radeonClear( struct gl_context *ctx, GLbitfield mask ) r100ContextPtr rmesa = R100_CONTEXT(ctx); __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); GLuint flags = 0; - GLuint color_mask = 0; GLuint orig_mask = mask; if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) { @@ -582,13 +581,11 @@ static void radeonClear( struct gl_context *ctx, GLbitfield mask ) if ( mask & BUFFER_BIT_FRONT_LEFT ) { flags |= RADEON_FRONT; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_FRONT_LEFT; } if ( mask & BUFFER_BIT_BACK_LEFT ) { flags |= RADEON_BACK; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_BACK_LEFT; } diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c index 7b6bd36dcf7..ae8a212f806 100644 --- a/src/mesa/drivers/dri/radeon/radeon_lock.c +++ b/src/mesa/drivers/dri/radeon/radeon_lock.c @@ -114,16 +114,6 @@ void radeon_lock_hardware(radeonContextPtr radeon ) { char ret = 0; - struct radeon_framebuffer *rfb = NULL; - struct radeon_renderbuffer *rrb = NULL; - - if (radeon_get_drawable(radeon)) { - rfb = radeon_get_drawable(radeon)->driverPrivate; - - if (rfb) - rrb = radeon_get_renderbuffer(&rfb->base, - rfb->base._ColorDrawBufferIndexes[0]); - } if (!radeon->radeonScreen->driScreen->dri2.enabled) { if (ATOMIC_INC_AND_FETCH(radeon->dri.hwLockCount) > 1) diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c index 25a8ddf7b6a..a0b5506ae76 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex.c @@ -455,7 +455,6 @@ void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table * functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c index f14dfa25d40..94ff3c4a727 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c @@ -141,61 +141,6 @@ do_copy_texsubimage(struct gl_context *ctx, } void -radeonCopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border) -{ - struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - int srcx, srcy, dstx, dsty; - - radeonContextPtr radeon = RADEON_CONTEXT(ctx); - radeon_prepare_render(radeon); - - if (border) - goto fail; - - /* Setup or redefine the texture object, mipmap tree and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, - GL_RGBA, GL_UNSIGNED_BYTE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - srcx = x; - srcy = y; - dstx = 0; - dsty = 0; - if (!_mesa_clip_copytexsubimage(ctx, - &dstx, &dsty, - &srcx, &srcy, - &width, &height)) { - return; - } - - if (!do_copy_texsubimage(ctx, target, level, - radeon_tex_obj(texObj), (radeon_texture_image *)texImage, - 0, 0, x, y, width, height)) { - goto fail; - } - - return; - -fail: - radeon_print(RADEON_FALLBACKS, RADEON_NORMAL, - "Falling back to sw for glCopyTexImage2D (internalFormat %s, border %d)\n", - _mesa_lookup_enum_by_nr(internalFormat), border); - - _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y, - width, height, border); -} - -void radeonCopyTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c index 9ba98e303a7..430309392a0 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texstate.c +++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c @@ -648,18 +648,12 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_form struct radeon_renderbuffer *rb; radeon_texture_image *rImage; radeonContextPtr radeon; - r100ContextPtr rmesa; struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, format; gl_format texFormat; - format = GL_UNSIGNED_BYTE; - internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? GL_RGB : GL_RGBA); - radeon = pDRICtx->driverPrivate; - rmesa = pDRICtx->driverPrivate; rfb = dPriv->driverPrivate; texUnit = _mesa_get_current_tex_unit(radeon->glCtx); @@ -1018,7 +1012,7 @@ static GLboolean radeon_validate_texgen( struct gl_context *ctx, GLuint unit ) static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int unit) { const struct gl_texture_image *firstImage; - GLint log2Width, log2Height, log2Depth, texelBytes; + GLint log2Width, log2Height, texelBytes; if ( t->bo ) { return GL_TRUE; @@ -1033,7 +1027,6 @@ static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int log2Width = firstImage->WidthLog2; log2Height = firstImage->HeightLog2; - log2Depth = firstImage->DepthLog2; texelBytes = _mesa_get_format_bytes(firstImage->TexFormat); if (!t->image_override) { diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index ce0df32bfe4..ad7e4c146a4 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -787,18 +787,6 @@ static void radeon_teximage( radeon_print(RADEON_TEXTURE, RADEON_NORMAL, "%s %dd: texObj %p, texImage %p, face %d, level %d\n", __func__, dims, texObj, texImage, face, level); - { - struct radeon_bo *bo; - bo = !image->mt ? image->bo : image->mt->bo; - if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) { - radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, - "%s Calling teximage for texture that is " - "queued for GPU processing.\n", - __func__); - radeon_firevertices(rmesa); - } - } - t->validated = GL_FALSE; @@ -820,6 +808,18 @@ static void radeon_teximage( } } + { + struct radeon_bo *bo; + bo = !image->mt ? image->bo : image->mt->bo; + if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) { + radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, + "%s Calling teximage for texture that is " + "queued for GPU processing.\n", + __func__); + radeon_firevertices(rmesa); + } + } + /* Upload texture image; note that the spec allows pixels to be NULL */ if (compressed) { pixels = _mesa_validate_pbo_compressed_teximage( diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h index 538a07fbba8..6fc06d967dd 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.h +++ b/src/mesa/drivers/dri/radeon/radeon_texture.h @@ -126,11 +126,6 @@ void radeonGetCompressedTexImage(struct gl_context *ctx, GLenum target, GLint le struct gl_texture_object *texObj, struct gl_texture_image *texImage); -void radeonCopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border); - void radeonCopyTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c index 81f48f9d95a..81d000b3952 100644 --- a/src/mesa/drivers/x11/xm_dd.c +++ b/src/mesa/drivers/x11/xm_dd.c @@ -454,10 +454,10 @@ xmesa_DrawPixels_8R8G8B( struct gl_context *ctx, "glDrawPixels(invalid PBO access)"); return; } - buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, - GL_PIXEL_UNPACK_BUFFER_EXT, - GL_READ_ONLY_ARB, - unpack->BufferObj); + buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, + unpack->BufferObj->Size, + GL_MAP_READ_BIT, + unpack->BufferObj); if (!buf) { /* buffer is already mapped - that's an error */ _mesa_error(ctx, GL_INVALID_OPERATION, @@ -508,8 +508,7 @@ xmesa_DrawPixels_8R8G8B( struct gl_context *ctx, } if (_mesa_is_bufferobj(unpack->BufferObj)) { - ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - unpack->BufferObj); + ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj); } } else { @@ -589,10 +588,10 @@ xmesa_DrawPixels_5R6G5B( struct gl_context *ctx, "glDrawPixels(invalid PBO access)"); return; } - buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, - GL_PIXEL_UNPACK_BUFFER_EXT, - GL_READ_ONLY_ARB, - unpack->BufferObj); + buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, + unpack->BufferObj->Size, + GL_MAP_READ_BIT, + unpack->BufferObj); if (!buf) { /* buffer is already mapped - that's an error */ _mesa_error(ctx, GL_INVALID_OPERATION, @@ -642,8 +641,7 @@ xmesa_DrawPixels_5R6G5B( struct gl_context *ctx, } if (unpack->BufferObj->Name) { - ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - unpack->BufferObj); + ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj); } } else { diff --git a/src/mesa/main/.gitignore b/src/mesa/main/.gitignore index 2575f44df4a..d0744e3f0d7 100644 --- a/src/mesa/main/.gitignore +++ b/src/mesa/main/.gitignore @@ -4,3 +4,7 @@ get_es1.c get_es2.c git_sha1.h git_sha1.h.tmp +api_exec_es1_dispatch.h +api_exec_es1_remap_helper.h +api_exec_es2_dispatch.h +api_exec_es2_remap_helper.h diff --git a/src/mesa/main/api_arrayelt.c b/src/mesa/main/api_arrayelt.c index f88da845853..b93a057e68b 100644 --- a/src/mesa/main/api_arrayelt.c +++ b/src/mesa/main/api_arrayelt.c @@ -1602,10 +1602,10 @@ void _ae_map_vbos( struct gl_context *ctx ) _ae_update_state(ctx); for (i = 0; i < actx->nr_vbos; i++) - ctx->Driver.MapBuffer(ctx, - GL_ARRAY_BUFFER_ARB, - GL_DYNAMIC_DRAW_ARB, - actx->vbo[i]); + ctx->Driver.MapBufferRange(ctx, 0, + actx->vbo[i]->Size, + GL_MAP_READ_BIT, + actx->vbo[i]); if (actx->nr_vbos) actx->mapped_vbos = GL_TRUE; @@ -1622,9 +1622,7 @@ void _ae_unmap_vbos( struct gl_context *ctx ) assert (!actx->NewState); for (i = 0; i < actx->nr_vbos; i++) - ctx->Driver.UnmapBuffer(ctx, - GL_ARRAY_BUFFER_ARB, - actx->vbo[i]); + ctx->Driver.UnmapBuffer(ctx, actx->vbo[i]); actx->mapped_vbos = GL_FALSE; } diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c index 2981d42297a..699b414f502 100644 --- a/src/mesa/main/api_validate.c +++ b/src/mesa/main/api_validate.c @@ -65,8 +65,8 @@ _mesa_max_buffer_index(struct gl_context *ctx, GLuint count, GLenum type, if (_mesa_is_bufferobj(elementBuf)) { /* elements are in a user-defined buffer object. need to map it */ - map = ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, - GL_READ_ONLY, elementBuf); + map = ctx->Driver.MapBufferRange(ctx, 0, elementBuf->Size, + GL_MAP_READ_BIT, elementBuf); /* Actual address is the sum of pointers */ indices = (const GLvoid *) ADD_POINTERS(map, (const GLubyte *) indices); } @@ -89,7 +89,7 @@ _mesa_max_buffer_index(struct gl_context *ctx, GLuint count, GLenum type, } if (map) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, elementBuf); + ctx->Driver.UnmapBuffer(ctx, elementBuf); } return max; diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c index c52358ecb04..c453f9c8554 100644 --- a/src/mesa/main/bufferobj.c +++ b/src/mesa/main/bufferobj.c @@ -386,11 +386,11 @@ _mesa_buffer_data( struct gl_context *ctx, GLenum target, GLsizeiptrARB size, * \sa glBufferSubDataARB, dd_function_table::BufferSubData. */ static void -_mesa_buffer_subdata( struct gl_context *ctx, GLenum target, GLintptrARB offset, +_mesa_buffer_subdata( struct gl_context *ctx, GLintptrARB offset, GLsizeiptrARB size, const GLvoid * data, struct gl_buffer_object * bufObj ) { - (void) ctx; (void) target; + (void) ctx; /* this should have been caught in _mesa_BufferSubData() */ ASSERT(size + offset <= bufObj->Size); @@ -419,12 +419,11 @@ _mesa_buffer_subdata( struct gl_context *ctx, GLenum target, GLintptrARB offset, * \sa glBufferGetSubDataARB, dd_function_table::GetBufferSubData. */ static void -_mesa_buffer_get_subdata( struct gl_context *ctx, - GLenum target, GLintptrARB offset, +_mesa_buffer_get_subdata( struct gl_context *ctx, GLintptrARB offset, GLsizeiptrARB size, GLvoid * data, struct gl_buffer_object * bufObj ) { - (void) ctx; (void) target; + (void) ctx; if (bufObj->Data && ((GLsizeiptrARB) (size + offset) <= bufObj->Size)) { memcpy( data, (GLubyte *) bufObj->Data + offset, size ); @@ -433,49 +432,15 @@ _mesa_buffer_get_subdata( struct gl_context *ctx, /** - * Default callback for \c dd_function_tabel::MapBuffer(). - * - * The function parameters will have been already tested for errors. - * - * \param ctx GL context. - * \param target Buffer object target on which to operate. - * \param access Information about how the buffer will be accessed. - * \param bufObj Object to be mapped. - * \return A pointer to the object's internal data store that can be accessed - * by the processor - * - * \sa glMapBufferARB, dd_function_table::MapBuffer - */ -static void * -_mesa_buffer_map( struct gl_context *ctx, GLenum target, GLenum access, - struct gl_buffer_object *bufObj ) -{ - (void) ctx; - (void) target; - (void) access; - /* Just return a direct pointer to the data */ - if (_mesa_bufferobj_mapped(bufObj)) { - /* already mapped! */ - return NULL; - } - bufObj->Pointer = bufObj->Data; - bufObj->Length = bufObj->Size; - bufObj->Offset = 0; - return bufObj->Pointer; -} - - -/** * Default fallback for \c dd_function_table::MapBufferRange(). * Called via glMapBufferRange(). */ static void * -_mesa_buffer_map_range( struct gl_context *ctx, GLenum target, GLintptr offset, +_mesa_buffer_map_range( struct gl_context *ctx, GLintptr offset, GLsizeiptr length, GLbitfield access, struct gl_buffer_object *bufObj ) { (void) ctx; - (void) target; assert(!_mesa_bufferobj_mapped(bufObj)); /* Just return a direct pointer to the data */ bufObj->Pointer = bufObj->Data + offset; @@ -491,12 +456,11 @@ _mesa_buffer_map_range( struct gl_context *ctx, GLenum target, GLintptr offset, * Called via glFlushMappedBufferRange(). */ static void -_mesa_buffer_flush_mapped_range( struct gl_context *ctx, GLenum target, +_mesa_buffer_flush_mapped_range( struct gl_context *ctx, GLintptr offset, GLsizeiptr length, struct gl_buffer_object *obj ) { (void) ctx; - (void) target; (void) offset; (void) length; (void) obj; @@ -512,11 +476,9 @@ _mesa_buffer_flush_mapped_range( struct gl_context *ctx, GLenum target, * \sa glUnmapBufferARB, dd_function_table::UnmapBuffer */ static GLboolean -_mesa_buffer_unmap( struct gl_context *ctx, GLenum target, - struct gl_buffer_object *bufObj ) +_mesa_buffer_unmap( struct gl_context *ctx, struct gl_buffer_object *bufObj ) { (void) ctx; - (void) target; /* XXX we might assert here that bufObj->Pointer is non-null */ bufObj->Pointer = NULL; bufObj->Length = 0; @@ -543,16 +505,16 @@ _mesa_copy_buffer_subdata(struct gl_context *ctx, assert(!_mesa_bufferobj_mapped(src)); assert(!_mesa_bufferobj_mapped(dst)); - srcPtr = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_COPY_READ_BUFFER, - GL_READ_ONLY, src); - dstPtr = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_COPY_WRITE_BUFFER, - GL_WRITE_ONLY, dst); + srcPtr = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, src->Size, + GL_MAP_READ_BIT, src); + dstPtr = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, dst->Size, + GL_MAP_WRITE_BIT, dst); if (srcPtr && dstPtr) memcpy(dstPtr + writeOffset, srcPtr + readOffset, size); - ctx->Driver.UnmapBuffer(ctx, GL_COPY_READ_BUFFER, src); - ctx->Driver.UnmapBuffer(ctx, GL_COPY_WRITE_BUFFER, dst); + ctx->Driver.UnmapBuffer(ctx, src); + ctx->Driver.UnmapBuffer(ctx, dst); } @@ -712,7 +674,6 @@ _mesa_init_buffer_object_functions(struct dd_function_table *driver) driver->BufferData = _mesa_buffer_data; driver->BufferSubData = _mesa_buffer_subdata; driver->GetBufferSubData = _mesa_buffer_get_subdata; - driver->MapBuffer = _mesa_buffer_map; driver->UnmapBuffer = _mesa_buffer_unmap; /* GL_ARB_map_buffer_range */ @@ -774,7 +735,7 @@ _mesa_DeleteBuffersARB(GLsizei n, const GLuint *ids) if (_mesa_bufferobj_mapped(bufObj)) { /* if mapped, unmap it now */ - ctx->Driver.UnmapBuffer(ctx, 0, bufObj); + ctx->Driver.UnmapBuffer(ctx, bufObj); bufObj->AccessFlags = DEFAULT_ACCESS; bufObj->Pointer = NULL; } @@ -934,7 +895,7 @@ _mesa_BufferDataARB(GLenum target, GLsizeiptrARB size, if (_mesa_bufferobj_mapped(bufObj)) { /* Unmap the existing buffer. We'll replace it now. Not an error. */ - ctx->Driver.UnmapBuffer(ctx, target, bufObj); + ctx->Driver.UnmapBuffer(ctx, bufObj); bufObj->AccessFlags = DEFAULT_ACCESS; ASSERT(bufObj->Pointer == NULL); } @@ -980,7 +941,7 @@ _mesa_BufferSubDataARB(GLenum target, GLintptrARB offset, bufObj->Written = GL_TRUE; ASSERT(ctx->Driver.BufferSubData); - ctx->Driver.BufferSubData( ctx, target, offset, size, data, bufObj ); + ctx->Driver.BufferSubData( ctx, offset, size, data, bufObj ); } @@ -1000,7 +961,7 @@ _mesa_GetBufferSubDataARB(GLenum target, GLintptrARB offset, } ASSERT(ctx->Driver.GetBufferSubData); - ctx->Driver.GetBufferSubData( ctx, target, offset, size, data, bufObj ); + ctx->Driver.GetBufferSubData( ctx, offset, size, data, bufObj ); } @@ -1043,8 +1004,8 @@ _mesa_MapBufferARB(GLenum target, GLenum access) return NULL; } - ASSERT(ctx->Driver.MapBuffer); - map = ctx->Driver.MapBuffer( ctx, target, access, bufObj ); + ASSERT(ctx->Driver.MapBufferRange); + map = ctx->Driver.MapBufferRange(ctx, 0, bufObj->Size, accessFlags, bufObj); if (!map) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glMapBufferARB(map failed)"); return NULL; @@ -1147,7 +1108,7 @@ _mesa_UnmapBufferARB(GLenum target) } #endif - status = ctx->Driver.UnmapBuffer( ctx, target, bufObj ); + status = ctx->Driver.UnmapBuffer( ctx, bufObj ); bufObj->AccessFlags = DEFAULT_ACCESS; ASSERT(bufObj->Pointer == NULL); ASSERT(bufObj->Offset == 0); @@ -1451,8 +1412,7 @@ _mesa_MapBufferRange(GLenum target, GLintptr offset, GLsizeiptr length, } ASSERT(ctx->Driver.MapBufferRange); - map = ctx->Driver.MapBufferRange(ctx, target, offset, length, - access, bufObj); + map = ctx->Driver.MapBufferRange(ctx, offset, length, access, bufObj); if (!map) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glMapBufferARB(map failed)"); } @@ -1535,7 +1495,7 @@ _mesa_FlushMappedBufferRange(GLenum target, GLintptr offset, GLsizeiptr length) ASSERT(bufObj->AccessFlags & GL_MAP_WRITE_BIT); if (ctx->Driver.FlushMappedBufferRange) - ctx->Driver.FlushMappedBufferRange(ctx, target, offset, length, bufObj); + ctx->Driver.FlushMappedBufferRange(ctx, offset, length, bufObj); } diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h index 743841be4ef..8ed1c6fa61f 100644 --- a/src/mesa/main/compiler.h +++ b/src/mesa/main/compiler.h @@ -45,9 +45,6 @@ #include <stdlib.h> #include <stdio.h> #include <string.h> -#if defined(__linux__) && defined(__i386__) -#include <fpu_control.h> -#endif #include <float.h> #include <stdarg.h> @@ -60,29 +57,7 @@ extern "C" { /** * Get standard integer types */ -#if defined(_MSC_VER) - typedef __int8 int8_t; - typedef unsigned __int8 uint8_t; - typedef __int16 int16_t; - typedef unsigned __int16 uint16_t; - typedef __int32 int32_t; - typedef unsigned __int32 uint32_t; - typedef __int64 int64_t; - typedef unsigned __int64 uint64_t; - -# if defined(_WIN64) - typedef __int64 intptr_t; - typedef unsigned __int64 uintptr_t; -# else - typedef __int32 intptr_t; - typedef unsigned __int32 uintptr_t; -# endif - -# define INT64_C(__val) __val##i64 -# define UINT64_C(__val) __val##ui64 -#else -# include <stdint.h> -#endif +#include <stdint.h> /** @@ -139,26 +114,28 @@ extern "C" { /** * Function inlining */ -#if defined(__GNUC__) -# define INLINE __inline__ -#elif defined(__MSC__) -# define INLINE __inline -#elif defined(_MSC_VER) -# define INLINE __inline -#elif defined(__ICL) -# define INLINE __inline -#elif defined(__INTEL_COMPILER) -# define INLINE inline -#elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100) -# define INLINE __inline -#elif defined(__SUNPRO_C) && defined(__C99FEATURES__) -# define INLINE inline -# define __inline inline -# define __inline__ inline -#elif (__STDC_VERSION__ >= 199901L) /* C99 */ -# define INLINE inline -#else -# define INLINE +#ifndef INLINE +# if defined(__GNUC__) +# define INLINE __inline__ +# elif defined(__MSC__) +# define INLINE __inline +# elif defined(_MSC_VER) +# define INLINE __inline +# elif defined(__ICL) +# define INLINE __inline +# elif defined(__INTEL_COMPILER) +# define INLINE inline +# elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100) +# define INLINE __inline +# elif defined(__SUNPRO_C) && defined(__C99FEATURES__) +# define INLINE inline +# define __inline inline +# define __inline__ inline +# elif (__STDC_VERSION__ >= 199901L) /* C99 */ +# define INLINE inline +# else +# define INLINE +# endif #endif diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 9fe6d527f92..fcf40ecf102 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -189,31 +189,22 @@ struct dd_function_table { /*@{*/ /** - * Choose texture format. - * - * This is called by the \c _mesa_store_tex[sub]image[123]d() fallback - * functions. The driver should examine \p internalFormat and return a - * gl_format value. + * Choose actual hardware texture format given the user-provided source + * image format and type and the desired internal format. In some + * cases, srcFormat and srcType can be GL_NONE. + * Called by glTexImage(), etc. */ - GLuint (*ChooseTextureFormat)( struct gl_context *ctx, GLint internalFormat, + gl_format (*ChooseTextureFormat)( struct gl_context *ctx, GLint internalFormat, GLenum srcFormat, GLenum srcType ); /** - * Called by glTexImage1D(). - * - * \param target user specified. - * \param format user specified. - * \param type user specified. - * \param pixels user specified. - * \param packing indicates the image packing of pixels. + * Called by glTexImage1D(). Simply copy the source texture data into the + * destination texture memory. The gl_texture_image fields, etc. will be + * fully initialized. + * The parameters are the same as glTexImage1D(), plus: + * \param packing describes how to unpack the source data. * \param texObj is the target texture object. - * \param texImage is the target texture image. It will have the texture \p - * width, \p height, \p depth, \p border and \p internalFormat information. - * - * \p retainInternalCopy is returned by this function and indicates whether - * core Mesa should keep an internal copy of the texture image. - * - * Drivers should call a fallback routine from texstore.c if needed. + * \param texImage is the target texture image. */ void (*TexImage1D)( struct gl_context *ctx, GLenum target, GLint level, GLint internalFormat, @@ -250,25 +241,9 @@ struct dd_function_table { struct gl_texture_image *texImage ); /** - * Called by glTexSubImage1D(). - * - * \param target user specified. - * \param level user specified. - * \param xoffset user specified. - * \param yoffset user specified. - * \param zoffset user specified. - * \param width user specified. - * \param height user specified. - * \param depth user specified. - * \param format user specified. - * \param type user specified. - * \param pixels user specified. - * \param packing indicates the image packing of pixels. - * \param texObj is the target texture object. - * \param texImage is the target texture image. It will have the texture \p - * width, \p height, \p border and \p internalFormat information. - * - * The driver should use a fallback routine from texstore.c if needed. + * Called by glTexSubImage1D(). Replace a subset of the target texture + * with new texel data. + * \sa dd_function_table::TexImage1D. */ void (*TexSubImage1D)( struct gl_context *ctx, GLenum target, GLint level, GLint xoffset, GLsizei width, @@ -315,24 +290,6 @@ struct dd_function_table { struct gl_texture_image *texImage ); /** - * Called by glCopyTexImage1D(). - * - * Drivers should use a fallback routine from texstore.c if needed. - */ - void (*CopyTexImage1D)( struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLint border ); - - /** - * Called by glCopyTexImage2D(). - * - * Drivers should use a fallback routine from texstore.c if needed. - */ - void (*CopyTexImage2D)( struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLsizei height, GLint border ); - - /** * Called by glCopyTexSubImage1D(). * * Drivers should use a fallback routine from texstore.c if needed. @@ -741,17 +698,14 @@ struct dd_function_table { const GLvoid *data, GLenum usage, struct gl_buffer_object *obj ); - void (*BufferSubData)( struct gl_context *ctx, GLenum target, GLintptrARB offset, + void (*BufferSubData)( struct gl_context *ctx, GLintptrARB offset, GLsizeiptrARB size, const GLvoid *data, struct gl_buffer_object *obj ); - void (*GetBufferSubData)( struct gl_context *ctx, GLenum target, + void (*GetBufferSubData)( struct gl_context *ctx, GLintptrARB offset, GLsizeiptrARB size, GLvoid *data, struct gl_buffer_object *obj ); - void * (*MapBuffer)( struct gl_context *ctx, GLenum target, GLenum access, - struct gl_buffer_object *obj ); - void (*CopyBufferSubData)( struct gl_context *ctx, struct gl_buffer_object *src, struct gl_buffer_object *dst, @@ -760,15 +714,15 @@ struct dd_function_table { /* May return NULL if MESA_MAP_NOWAIT_BIT is set in access: */ - void * (*MapBufferRange)( struct gl_context *ctx, GLenum target, GLintptr offset, + void * (*MapBufferRange)( struct gl_context *ctx, GLintptr offset, GLsizeiptr length, GLbitfield access, struct gl_buffer_object *obj); - void (*FlushMappedBufferRange)(struct gl_context *ctx, GLenum target, + void (*FlushMappedBufferRange)(struct gl_context *ctx, GLintptr offset, GLsizeiptr length, struct gl_buffer_object *obj); - GLboolean (*UnmapBuffer)( struct gl_context *ctx, GLenum target, + GLboolean (*UnmapBuffer)( struct gl_context *ctx, struct gl_buffer_object *obj ); /*@}*/ diff --git a/src/mesa/main/debug.c b/src/mesa/main/debug.c index e7f6be99481..b1fc096f296 100644 --- a/src/mesa/main/debug.c +++ b/src/mesa/main/debug.c @@ -192,17 +192,6 @@ static void add_debug_flags( const char *debug ) if (strstr(debug, "flush")) MESA_DEBUG_FLAGS |= DEBUG_ALWAYS_FLUSH; -#if defined(_FPU_GETCW) && defined(_FPU_SETCW) - if (strstr(debug, "fpexceptions")) { - /* raise FP exceptions */ - fpu_control_t mask; - _FPU_GETCW(mask); - mask &= ~(_FPU_MASK_IM | _FPU_MASK_DM | _FPU_MASK_ZM - | _FPU_MASK_OM | _FPU_MASK_UM); - _FPU_SETCW(mask); - } -#endif - #else (void) debug; #endif diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c index f9282398c21..6e075b4e54b 100644 --- a/src/mesa/main/dlist.c +++ b/src/mesa/main/dlist.c @@ -894,8 +894,8 @@ unpack_image(struct gl_context *ctx, GLuint dimensions, GLvoid *image; map = (GLubyte *) - ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - GL_READ_ONLY_ARB, unpack->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size, + GL_MAP_READ_BIT, unpack->BufferObj); if (!map) { /* unable to map src buffer! */ _mesa_error(ctx, GL_INVALID_OPERATION, "unable to map PBO"); @@ -906,8 +906,7 @@ unpack_image(struct gl_context *ctx, GLuint dimensions, image = _mesa_unpack_image(dimensions, width, height, depth, format, type, src, unpack); - ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - unpack->BufferObj); + ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj); if (!image) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "display list construction"); diff --git a/src/mesa/main/drawtex.c b/src/mesa/main/drawtex.c index 2089cdfcef9..83485a928d8 100644 --- a/src/mesa/main/drawtex.c +++ b/src/mesa/main/drawtex.c @@ -45,11 +45,15 @@ draw_texture(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, return; } + _mesa_set_vp_override(ctx, GL_TRUE); + if (ctx->NewState) _mesa_update_state(ctx); ASSERT(ctx->Driver.DrawTex); ctx->Driver.DrawTex(ctx, x, y, z, width, height); + + _mesa_set_vp_override(ctx, GL_FALSE); } diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c index aac8b9c5eaf..3ba4df6342f 100644 --- a/src/mesa/main/enable.c +++ b/src/mesa/main/enable.c @@ -5,7 +5,6 @@ /* * Mesa 3-D graphics library - * Version: 7.0.3 * * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. * @@ -560,7 +559,6 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state) ctx->Polygon.OffsetLine = state; break; case GL_POLYGON_OFFSET_FILL: - /*case GL_POLYGON_OFFSET_EXT:*/ if (ctx->Polygon.OffsetFill == state) return; FLUSH_VERTICES(ctx, _NEW_POLYGON); @@ -643,9 +641,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state) break; #endif - /* - * CLIENT STATE!!! - */ + /* client-side state */ case GL_VERTEX_ARRAY: case GL_NORMAL_ARRAY: case GL_COLOR_ARRAY: @@ -1174,7 +1170,6 @@ _mesa_IsEnabled( GLenum cap ) case GL_POLYGON_OFFSET_LINE: return ctx->Polygon.OffsetLine; case GL_POLYGON_OFFSET_FILL: - /*case GL_POLYGON_OFFSET_EXT:*/ return ctx->Polygon.OffsetFill; case GL_RESCALE_NORMAL_EXT: return ctx->Transform.RescaleNormals; @@ -1213,9 +1208,7 @@ _mesa_IsEnabled( GLenum cap ) } #endif - /* - * CLIENT STATE!!! - */ + /* client-side state */ case GL_VERTEX_ARRAY: return (ctx->Array.ArrayObj->Vertex.Enabled != 0); case GL_NORMAL_ARRAY: diff --git a/src/mesa/main/es_generator.py b/src/mesa/main/es_generator.py index c0b0a445806..cad3deaef94 100644 --- a/src/mesa/main/es_generator.py +++ b/src/mesa/main/es_generator.py @@ -681,10 +681,10 @@ print """ #if FEATURE_remap_table /* define esLocalRemapTable */ -#include "%sapi/main/dispatch.h" +#include "main/api_exec_%s_dispatch.h" #define need_MESA_remap_table -#include "%sapi/main/remap_helper.h" +#include "main/api_exec_%s_remap_helper.h" static void init_remap_table(void) diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index bc61c50a90f..14b0cf9acbd 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -81,6 +81,7 @@ static const struct extension extension_table[] = { { "GL_ARB_blend_func_extended", o(ARB_blend_func_extended), GL, 2009 }, { "GL_ARB_color_buffer_float", o(ARB_color_buffer_float), GL, 2004 }, { "GL_ARB_copy_buffer", o(ARB_copy_buffer), GL, 2008 }, + { "GL_ARB_conservative_depth", o(AMD_conservative_depth), GL, 2011 }, { "GL_ARB_depth_buffer_float", o(ARB_depth_buffer_float), GL, 2008 }, { "GL_ARB_depth_clamp", o(ARB_depth_clamp), GL, 2003 }, { "GL_ARB_depth_texture", o(ARB_depth_texture), GL, 2001 }, diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 84969360d92..0b48fc7eab0 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -1984,10 +1984,26 @@ _mesa_FramebufferTexture1DEXT(GLenum target, GLenum attachment, { GET_CURRENT_CONTEXT(ctx); - if ((texture != 0) && (textarget != GL_TEXTURE_1D)) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glFramebufferTexture1DEXT(textarget)"); - return; + if (texture != 0) { + GLboolean error; + + switch (textarget) { + case GL_TEXTURE_1D: + error = GL_FALSE; + break; + case GL_TEXTURE_1D_ARRAY: + error = !ctx->Extensions.EXT_texture_array; + break; + default: + error = GL_TRUE; + } + + if (error) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glFramebufferTexture1DEXT(textarget=%s)", + _mesa_lookup_enum_by_nr(textarget)); + return; + } } framebuffer_texture(ctx, "1D", target, attachment, textarget, texture, @@ -2001,13 +2017,37 @@ _mesa_FramebufferTexture2DEXT(GLenum target, GLenum attachment, { GET_CURRENT_CONTEXT(ctx); - if ((texture != 0) && - (textarget != GL_TEXTURE_2D) && - (textarget != GL_TEXTURE_RECTANGLE_ARB) && - (!is_cube_face(textarget))) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferTexture2DEXT(textarget=0x%x)", textarget); - return; + if (texture != 0) { + GLboolean error; + + switch (textarget) { + case GL_TEXTURE_2D: + error = GL_FALSE; + break; + case GL_TEXTURE_RECTANGLE: + error = !ctx->Extensions.NV_texture_rectangle; + break; + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + error = !ctx->Extensions.ARB_texture_cube_map; + break; + case GL_TEXTURE_2D_ARRAY: + error = !ctx->Extensions.EXT_texture_array; + break; + default: + error = GL_FALSE; + } + + if (error) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glFramebufferTexture2DEXT(textarget=%s)", + _mesa_lookup_enum_by_nr(textarget)); + return; + } } framebuffer_texture(ctx, "2D", target, attachment, textarget, texture, @@ -2023,7 +2063,7 @@ _mesa_FramebufferTexture3DEXT(GLenum target, GLenum attachment, GET_CURRENT_CONTEXT(ctx); if ((texture != 0) && (textarget != GL_TEXTURE_3D)) { - _mesa_error(ctx, GL_INVALID_ENUM, + _mesa_error(ctx, GL_INVALID_OPERATION, "glFramebufferTexture3DEXT(textarget)"); return; } @@ -2134,10 +2174,14 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, { const struct gl_renderbuffer_attachment *att; struct gl_framebuffer *buffer; + GLenum err; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); + /* The error differs in GL andd GLES. */ + err = ctx->API == API_OPENGL ? GL_INVALID_OPERATION : GL_INVALID_ENUM; + buffer = get_framebuffer_target(ctx, target); if (!buffer) { _mesa_error(ctx, GL_INVALID_ENUM, @@ -2188,7 +2232,12 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, } else { assert(att->Type == GL_NONE); - *params = 0; + if (ctx->API == API_OPENGL) { + *params = 0; + } else { + _mesa_error(ctx, GL_INVALID_ENUM, + "glGetFramebufferAttachmentParameterivEXT(pname)"); + } } return; case GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL_EXT: @@ -2196,7 +2245,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, *params = att->TextureLevel; } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2214,7 +2263,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, } } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2232,7 +2281,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, } } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2246,7 +2295,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2267,7 +2316,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, return; } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2301,7 +2350,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else if (att->Texture) { @@ -2337,6 +2386,8 @@ void GLAPIENTRY _mesa_GenerateMipmapEXT(GLenum target) { struct gl_texture_object *texObj; + GLboolean error; + GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); @@ -2346,12 +2397,22 @@ _mesa_GenerateMipmapEXT(GLenum target) case GL_TEXTURE_1D: case GL_TEXTURE_2D: case GL_TEXTURE_3D: + error = GL_FALSE; + break; case GL_TEXTURE_CUBE_MAP: - /* OK, legal value */ + error = !ctx->Extensions.ARB_texture_cube_map; + break; + case GL_TEXTURE_1D_ARRAY: + case GL_TEXTURE_2D_ARRAY: + error = !ctx->Extensions.EXT_texture_array; break; default: - /* XXX need to implement GL_TEXTURE_1D_ARRAY and GL_TEXTURE_2D_ARRAY */ - _mesa_error(ctx, GL_INVALID_ENUM, "glGenerateMipmapEXT(target)"); + error = GL_TRUE; + } + + if (error) { + _mesa_error(ctx, GL_INVALID_ENUM, "glGenerateMipmapEXT(target=%s)", + _mesa_lookup_enum_by_nr(target)); return; } diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp index 0b53c28f7ae..7cc17216884 100644 --- a/src/mesa/main/ff_fragment_shader.cpp +++ b/src/mesa/main/ff_fragment_shader.cpp @@ -330,8 +330,7 @@ static GLbitfield get_fp_input_mask( struct gl_context *ctx ) /* _NEW_RENDERMODE */ fp_inputs = (FRAG_BIT_COL0 | FRAG_BIT_TEX0); } - else if (!(vertexProgram || vertexShader) || - !ctx->VertexProgram._Current) { + else if (!(vertexProgram || vertexShader)) { /* Fixed function vertex logic */ /* _NEW_ARRAY */ GLbitfield varying_inputs = ctx->varying_vp_inputs; @@ -875,7 +874,8 @@ static struct ureg register_const4f( struct texenv_fragment_program *p, values[1] = s1; values[2] = s2; values[3] = s3; - idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4, + idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, + (gl_constant_value *) values, 4, &swizzle ); r = make_ureg(PROGRAM_CONSTANT, idx); r.swz = swizzle; diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c index b8e49a3757f..2d2485c9e06 100644 --- a/src/mesa/main/ffvertex_prog.c +++ b/src/mesa/main/ffvertex_prog.c @@ -455,13 +455,13 @@ static struct ureg register_const4f( struct tnl_program *p, GLfloat s2, GLfloat s3) { - GLfloat values[4]; + gl_constant_value values[4]; GLint idx; GLuint swizzle; - values[0] = s0; - values[1] = s1; - values[2] = s2; - values[3] = s3; + values[0].f = s0; + values[1].f = s1; + values[2].f = s2; + values[3].f = s3; idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4, &swizzle ); ASSERT(swizzle == SWIZZLE_NOOP); diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index e27569a6fac..23fa1b2c11e 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -548,6 +548,7 @@ _mesa_update_framebuffer_visual(struct gl_context *ctx, fb->Visual.rgbBits = fb->Visual.redBits + fb->Visual.greenBits + fb->Visual.blueBits; fb->Visual.samples = rb->NumSamples; + fb->Visual.sampleBuffers = rb->NumSamples > 0 ? 1 : 0; if (_mesa_get_format_color_encoding(fmt) == GL_SRGB) fb->Visual.sRGBCapable = ctx->Const.sRGBCapable; break; diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index 0492e1585c3..d32c68a53a4 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -1569,11 +1569,11 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu break; case GL_NUM_COMPRESSED_TEXTURE_FORMATS_ARB: - v->value_int = _mesa_get_compressed_formats(ctx, NULL, GL_FALSE); + v->value_int = _mesa_get_compressed_formats(ctx, NULL); break; case GL_COMPRESSED_TEXTURE_FORMATS_ARB: v->value_int_n.n = - _mesa_get_compressed_formats(ctx, v->value_int_n.ints, GL_FALSE); + _mesa_get_compressed_formats(ctx, v->value_int_n.ints); ASSERT(v->value_int_n.n <= 100); break; diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c index 0a572ec225d..8f097195922 100644 --- a/src/mesa/main/imports.c +++ b/src/mesa/main/imports.c @@ -753,7 +753,8 @@ _mesa_strdup( const char *s ) float _mesa_strtof( const char *s, char **end ) { -#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__) +#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__) && \ + !defined(ANDROID) static locale_t loc = NULL; if (!loc) { loc = newlocale(LC_CTYPE_MASK, "C", NULL); diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h index 3fa1db02aee..70defdc4327 100644 --- a/src/mesa/main/imports.h +++ b/src/mesa/main/imports.h @@ -134,7 +134,13 @@ typedef union { GLfloat f; GLint i; } fi_type; #define exp2f(f) ((float) exp2(f)) #define floorf(f) ((float) floor(f)) #define logf(f) ((float) log(f)) + +#ifdef ANDROID +#define log2f(f) (logf(f) * (float) (1.0 / M_LN2)) +#else #define log2f(f) ((float) log2(f)) +#endif + #define powf(x,y) ((float) pow(x,y)) #define sinf(f) ((float) sin(f)) #define sinhf(f) ((float) sinh(f)) @@ -562,7 +568,7 @@ _mesa_init_sqrt_table(void); #ifdef __GNUC__ -#ifdef __MINGW32__ +#if defined(__MINGW32__) || defined(ANDROID) #define ffs __builtin_ffs #define ffsll __builtin_ffsll #endif diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index b88118366b2..f2eb889feb4 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1279,6 +1279,9 @@ struct gl_texture_image GLboolean _IsPowerOfTwo; /**< Are all dimensions powers of two? */ struct gl_texture_object *TexObject; /**< Pointer back to parent object */ + GLuint Level; /**< Which mipmap level am I? */ + /** Cube map face: index into gl_texture_object::Image[] array */ + GLuint Face; FetchTexelFuncC FetchTexelc; /**< GLchan texel fetch function pointer */ FetchTexelFuncF FetchTexelf; /**< Float texel fetch function pointer */ @@ -2252,8 +2255,6 @@ struct gl_shader_state */ struct gl_shader_program *ActiveProgram; - void *MemPool; - GLbitfield Flags; /**< Mask of GLSL_x flags */ }; @@ -2719,6 +2720,12 @@ struct gl_constants GLuint GLSLVersion; /**< GLSL version supported (ex: 120 = 1.20) */ + /** + * Does the driver support real 32-bit integers? (Otherwise, integers are + * simulated via floats.) + */ + GLboolean NativeIntegers; + /** Which texture units support GL_ATI_envmap_bumpmap as targets */ GLbitfield SupportedBumpUnits; diff --git a/src/mesa/main/nvprogram.c b/src/mesa/main/nvprogram.c index dd198b8141a..7ff7645b7b7 100644 --- a/src/mesa/main/nvprogram.c +++ b/src/mesa/main/nvprogram.c @@ -812,7 +812,7 @@ _mesa_ProgramNamedParameter4fNV(GLuint id, GLsizei len, const GLubyte *name, { struct gl_program *prog; struct gl_fragment_program *fragProg; - GLfloat *v; + gl_constant_value *v; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); @@ -834,10 +834,10 @@ _mesa_ProgramNamedParameter4fNV(GLuint id, GLsizei len, const GLubyte *name, v = _mesa_lookup_parameter_value(fragProg->Base.Parameters, len, (char *) name); if (v) { - v[0] = x; - v[1] = y; - v[2] = z; - v[3] = w; + v[0].f = x; + v[1].f = y; + v[2].f = z; + v[3].f = w; return; } @@ -878,7 +878,7 @@ _mesa_GetProgramNamedParameterfvNV(GLuint id, GLsizei len, const GLubyte *name, { struct gl_program *prog; struct gl_fragment_program *fragProg; - const GLfloat *v; + const gl_constant_value *v; GET_CURRENT_CONTEXT(ctx); @@ -899,10 +899,10 @@ _mesa_GetProgramNamedParameterfvNV(GLuint id, GLsizei len, const GLubyte *name, v = _mesa_lookup_parameter_value(fragProg->Base.Parameters, len, (char *) name); if (v) { - params[0] = v[0]; - params[1] = v[1]; - params[2] = v[2]; - params[3] = v[3]; + params[0] = v[0].f; + params[1] = v[1].f; + params[2] = v[2].f; + params[3] = v[3].f; return; } diff --git a/src/mesa/main/pbo.c b/src/mesa/main/pbo.c index 15e0480e9f1..4e7e6f925cc 100644 --- a/src/mesa/main/pbo.c +++ b/src/mesa/main/pbo.c @@ -128,9 +128,10 @@ _mesa_map_pbo_source(struct gl_context *ctx, if (_mesa_is_bufferobj(unpack->BufferObj)) { /* unpack from PBO */ - buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - GL_READ_ONLY_ARB, - unpack->BufferObj); + buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, + unpack->BufferObj->Size, + GL_MAP_READ_BIT, + unpack->BufferObj); if (!buf) return NULL; @@ -201,8 +202,7 @@ _mesa_unmap_pbo_source(struct gl_context *ctx, { ASSERT(unpack != &ctx->Pack); /* catch pack/unpack mismatch */ if (_mesa_is_bufferobj(unpack->BufferObj)) { - ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - unpack->BufferObj); + ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj); } } @@ -224,9 +224,10 @@ _mesa_map_pbo_dest(struct gl_context *ctx, if (_mesa_is_bufferobj(pack->BufferObj)) { /* pack into PBO */ - buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT, - GL_WRITE_ONLY_ARB, - pack->BufferObj); + buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, + pack->BufferObj->Size, + GL_MAP_WRITE_BIT, + pack->BufferObj); if (!buf) return NULL; @@ -297,7 +298,7 @@ _mesa_unmap_pbo_dest(struct gl_context *ctx, { ASSERT(pack != &ctx->Unpack); /* catch pack/unpack mismatch */ if (_mesa_is_bufferobj(pack->BufferObj)) { - ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT, pack->BufferObj); + ctx->Driver.UnmapBuffer(ctx, pack->BufferObj); } } @@ -327,8 +328,9 @@ _mesa_validate_pbo_teximage(struct gl_context *ctx, GLuint dimensions, return NULL; } - buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - GL_READ_ONLY_ARB, unpack->BufferObj); + buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size, + GL_MAP_READ_BIT, + unpack->BufferObj); if (!buf) { _mesa_error(ctx, GL_INVALID_OPERATION, funcName, "(PBO is mapped)"); return NULL; @@ -364,8 +366,10 @@ _mesa_validate_pbo_compressed_teximage(struct gl_context *ctx, return NULL; } - buf = (GLubyte*) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - GL_READ_ONLY_ARB, packing->BufferObj); + buf = (GLubyte*) ctx->Driver.MapBufferRange(ctx, 0, + packing->BufferObj->Size, + GL_MAP_READ_BIT, + packing->BufferObj); if (!buf) { _mesa_error(ctx, GL_INVALID_OPERATION, funcName, "(PBO is mapped"); return NULL; @@ -384,8 +388,7 @@ _mesa_unmap_teximage_pbo(struct gl_context *ctx, const struct gl_pixelstore_attrib *unpack) { if (_mesa_is_bufferobj(unpack->BufferObj)) { - ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - unpack->BufferObj); + ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj); } } diff --git a/src/mesa/main/querymatrix.c b/src/mesa/main/querymatrix.c index 944ad435f7a..eaedf7cd238 100644 --- a/src/mesa/main/querymatrix.c +++ b/src/mesa/main/querymatrix.c @@ -73,7 +73,7 @@ fpclassify(double x) #elif defined(__APPLE__) || defined(__CYGWIN__) || defined(__FreeBSD__) || \ defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || \ (defined(__sun) && defined(__C99FEATURES__)) || defined(__MINGW32__) || \ - (defined(__sun) && defined(__GNUC__)) + (defined(__sun) && defined(__GNUC__)) || defined(ANDROID) /* fpclassify is available. */ diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 8df25c3f988..74997eaaa77 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -1125,7 +1125,7 @@ static void validate_program(struct gl_context *ctx, GLuint program) { struct gl_shader_program *shProg; - char errMsg[100]; + char errMsg[100] = ""; shProg = _mesa_lookup_shader_program_err(ctx, program, "glValidateProgram"); if (!shProg) { diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c index 33d91ad594d..f128648f477 100644 --- a/src/mesa/main/shaderobj.c +++ b/src/mesa/main/shaderobj.c @@ -244,6 +244,8 @@ _mesa_init_shader_program(struct gl_context *ctx, struct gl_shader_program *prog prog->Geom.InputType = GL_TRIANGLES; prog->Geom.OutputType = GL_TRIANGLE_STRIP; #endif + + prog->InfoLog = ralloc_strdup(prog, ""); } /** @@ -283,6 +285,10 @@ _mesa_clear_shader_program_data(struct gl_context *ctx, _mesa_free_parameter_list(shProg->Varying); shProg->Varying = NULL; } + + assert(shProg->InfoLog != NULL); + ralloc_free(shProg->InfoLog); + shProg->InfoLog = ralloc_strdup(shProg, ""); } @@ -317,11 +323,6 @@ _mesa_free_shader_program_data(struct gl_context *ctx, shProg->Shaders = NULL; } - if (shProg->InfoLog) { - ralloc_free(shProg->InfoLog); - shProg->InfoLog = NULL; - } - /* Transform feedback varying vars */ for (i = 0; i < shProg->TransformFeedback.NumVarying; i++) { free(shProg->TransformFeedback.VaryingNames[i]); diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c index d84f59690c5..8b7159db09c 100644 --- a/src/mesa/main/shared.c +++ b/src/mesa/main/shared.c @@ -200,7 +200,7 @@ delete_bufferobj_cb(GLuint id, void *data, void *userData) struct gl_buffer_object *bufObj = (struct gl_buffer_object *) data; struct gl_context *ctx = (struct gl_context *) userData; if (_mesa_bufferobj_mapped(bufObj)) { - ctx->Driver.UnmapBuffer(ctx, 0, bufObj); + ctx->Driver.UnmapBuffer(ctx, bufObj); bufObj->Pointer = NULL; } _mesa_reference_buffer_object(ctx, &bufObj, NULL); diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c index d820ae92747..42bd1eee5ca 100644 --- a/src/mesa/main/texcompress.c +++ b/src/mesa/main/texcompress.c @@ -40,19 +40,192 @@ /** + * Get the GL base format of a specified GL compressed texture format + * + * From page 232 of the OpenGL 3.3 (Compatiblity Profile) spec: + * + * "Compressed Internal Format Base Internal Format Type + * --------------------------- -------------------- --------- + * COMPRESSED_ALPHA ALPHA Generic + * COMPRESSED_LUMINANCE LUMINANCE Generic + * COMPRESSED_LUMINANCE_ALPHA LUMINANCE_ALPHA Generic + * COMPRESSED_INTENSITY INTENSITY Generic + * COMPRESSED_RED RED Generic + * COMPRESSED_RG RG Generic + * COMPRESSED_RGB RGB Generic + * COMPRESSED_RGBA RGBA Generic + * COMPRESSED_SRGB RGB Generic + * COMPRESSED_SRGB_ALPHA RGBA Generic + * COMPRESSED_SLUMINANCE LUMINANCE Generic + * COMPRESSED_SLUMINANCE_ALPHA LUMINANCE_ALPHA Generic + * COMPRESSED_RED_RGTC1 RED Specific + * COMPRESSED_SIGNED_RED_RGTC1 RED Specific + * COMPRESSED_RG_RGTC2 RG Specific + * COMPRESSED_SIGNED_RG_RGTC2 RG Specific" + * + * \return + * The base format of \c format if \c format is a compressed format (either + * generic or specific. Otherwise 0 is returned. + */ +GLenum +_mesa_gl_compressed_format_base_format(GLenum format) +{ + switch (format) { + case GL_COMPRESSED_RED: + case GL_COMPRESSED_RED_RGTC1: + case GL_COMPRESSED_SIGNED_RED_RGTC1: + return GL_RED; + + case GL_COMPRESSED_RG: + case GL_COMPRESSED_RG_RGTC2: + case GL_COMPRESSED_SIGNED_RG_RGTC2: + return GL_RG; + + case GL_COMPRESSED_RGB: + case GL_COMPRESSED_SRGB: + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: + case GL_COMPRESSED_RGB_FXT1_3DFX: + case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT: + return GL_RGB; + + case GL_COMPRESSED_RGBA: + case GL_COMPRESSED_SRGB_ALPHA: + case GL_COMPRESSED_RGBA_BPTC_UNORM_ARB: + case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB: + case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB: + case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB: + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + case GL_COMPRESSED_RGBA_FXT1_3DFX: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: + return GL_RGBA; + + case GL_COMPRESSED_ALPHA: + return GL_ALPHA; + + case GL_COMPRESSED_LUMINANCE: + case GL_COMPRESSED_SLUMINANCE: + case GL_COMPRESSED_LUMINANCE_LATC1_EXT: + case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT: + return GL_LUMINANCE; + + case GL_COMPRESSED_LUMINANCE_ALPHA: + case GL_COMPRESSED_SLUMINANCE_ALPHA: + case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT: + case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT: + case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI: + return GL_LUMINANCE_ALPHA; + + case GL_COMPRESSED_INTENSITY: + return GL_INTENSITY; + + default: + return 0; + } +} + +/** * Return list of (and count of) all specific texture compression * formats that are supported. * + * Some formats are \b not returned by this function. The + * \c GL_COMPRESSED_TEXTURE_FORMATS query only returns formats that are + * "suitable for general-purpose usage." All texture compression extensions + * have taken this to mean either linear RGB or linear RGBA. + * + * The GL_ARB_texture_compress_rgtc spec says: + * + * "19) Should the GL_NUM_COMPRESSED_TEXTURE_FORMATS and + * GL_COMPRESSED_TEXTURE_FORMATS queries return the RGTC formats? + * + * RESOLVED: No. + * + * The OpenGL 2.1 specification says "The only values returned + * by this query [GL_COMPRESSED_TEXTURE_FORMATS"] are those + * corresponding to formats suitable for general-purpose usage. + * The renderer will not enumerate formats with restrictions that + * need to be specifically understood prior to use." + * + * Compressed textures with just red or red-green components are + * not general-purpose so should not be returned by these queries + * because they have restrictions. + * + * Applications that seek to use the RGTC formats should do so + * by looking for this extension's name in the string returned by + * glGetString(GL_EXTENSIONS) rather than + * what GL_NUM_COMPRESSED_TEXTURE_FORMATS and + * GL_COMPRESSED_TEXTURE_FORMATS return." + * + * There is nearly identical wording in the GL_EXT_texture_compression_rgtc + * spec. + * + * The GL_EXT_texture_rRGB spec says: + * + * "22) Should the new COMPRESSED_SRGB_* formats be listed in an + * implementation's GL_COMPRESSED_TEXTURE_FORMATS list? + * + * RESOLVED: No. Section 3.8.1 says formats listed by + * GL_COMPRESSED_TEXTURE_FORMATS are "suitable for general-purpose + * usage." The non-linear distribution of red, green, and + * blue for these sRGB compressed formats makes them not really + * general-purpose." + * + * The GL_EXT_texture_compression_latc spec says: + * + * "16) Should the GL_NUM_COMPRESSED_TEXTURE_FORMATS and + * GL_COMPRESSED_TEXTURE_FORMATS queries return the LATC formats? + * + * RESOLVED: No. + * + * The OpenGL 2.1 specification says "The only values returned + * by this query [GL_COMPRESSED_TEXTURE_FORMATS"] are those + * corresponding to formats suitable for general-purpose usage. + * The renderer will not enumerate formats with restrictions that + * need to be specifically understood prior to use." + * + * Historically, OpenGL implementation have advertised the RGB and + * RGBA versions of the S3TC extensions compressed format tokens + * through this mechanism. + * + * The specification is not sufficiently clear about what "suitable + * for general-purpose usage" means. Historically that seems to mean + * unsigned RGB or unsigned RGBA. The DXT1 format supporting alpha + * (GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) is not exposed in the list (at + * least for NVIDIA drivers) because the alpha is always 1.0 expect + * when it is 0.0 when RGB is required to be black. NVIDIA's even + * limits itself to true linear RGB or RGBA formats, specifically + * not including EXT_texture_sRGB's sRGB S3TC compressed formats. + * + * Adding luminance and luminance-alpha texture formats (and + * certainly signed versions of luminance and luminance-alpha + * formats!) invites potential comptaibility problems with old + * applications using this mechanism since old applications are + * unlikely to expect non-RGB or non-RGBA formats to be advertised + * through this mechanism. However no specific misinteractions + * with old applications is known. + * + * Applications that seek to use the LATC formats should do so + * by looking for this extension's name in the string returned by + * glGetString(GL_EXTENSIONS) rather than + * what GL_NUM_COMPRESSED_TEXTURE_FORMATS and + * GL_COMPRESSED_TEXTURE_FORMATS return." + * + * There is no formal spec for GL_ATI_texture_compression_3dc. Since the + * formats added by this extension are luminance-alpha formats, it is + * reasonable to expect them to follow the same rules as + * GL_EXT_texture_compression_latc. At the very least, Catalyst 11.6 does not + * expose the 3dc formats through this mechanism. + * * \param ctx the GL context * \param formats the resulting format list (may be NULL). - * \param all if true return all formats, even those with some kind - * of restrictions/limitations (See GL_ARB_texture_compression - * spec for more info). * * \return number of formats. */ GLuint -_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean all) +_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats) { GLuint n = 0; if (ctx->Extensions.TDFX_texture_compression_FXT1) { @@ -64,24 +237,15 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean a n += 2; } } - /* don't return RGTC - ARB_texture_compression_rgtc query 19 */ + if (ctx->Extensions.EXT_texture_compression_s3tc) { if (formats) { formats[n++] = GL_COMPRESSED_RGB_S3TC_DXT1_EXT; - /* This format has some restrictions/limitations and so should - * not be returned via the GL_COMPRESSED_TEXTURE_FORMATS query. - * Specifically, all transparent pixels become black. NVIDIA - * omits this format too. - */ - if (all) - formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT; formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT; formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT; } else { n += 3; - if (all) - n += 1; } } if (ctx->Extensions.S3_s3tc) { @@ -95,19 +259,6 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean a n += 4; } } -#if FEATURE_EXT_texture_sRGB - if (ctx->Extensions.EXT_texture_sRGB) { - if (formats) { - formats[n++] = GL_COMPRESSED_SRGB_S3TC_DXT1_EXT; - formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT; - formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT; - formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT; - } - else { - n += 4; - } - } -#endif /* FEATURE_EXT_texture_sRGB */ return n; #if FEATURE_ES1 || FEATURE_ES2 diff --git a/src/mesa/main/texcompress.h b/src/mesa/main/texcompress.h index 19b08bbadf6..375cf90c8a2 100644 --- a/src/mesa/main/texcompress.h +++ b/src/mesa/main/texcompress.h @@ -33,8 +33,11 @@ struct gl_context; #if _HAVE_FULL_GL +extern GLenum +_mesa_gl_compressed_format_base_format(GLenum format); + extern GLuint -_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean all); +_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats); extern gl_format _mesa_glenum_to_compressed_format(GLenum format); diff --git a/src/mesa/main/texcompress_rgtc_tmp.h b/src/mesa/main/texcompress_rgtc_tmp.h index c8bf082a158..48bbd374e08 100644 --- a/src/mesa/main/texcompress_rgtc_tmp.h +++ b/src/mesa/main/texcompress_rgtc_tmp.h @@ -181,7 +181,7 @@ static void TAG(encode_rgtc_chan)(TYPE *blkaddr, TYPE srccolors[4][4], fprintf(stderr, "%d ", alphaenc1[i]); } fprintf(stderr, "cutVals "); - for (i = 0; i < 8; i++) { + for (i = 0; i < 7; i++) { fprintf(stderr, "%d ", acutValues[i]); } fprintf(stderr, "srcVals "); diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c index 26c2ff98ba1..b2ebb0de475 100644 --- a/src/mesa/main/texgetimage.c +++ b/src/mesa/main/texgetimage.c @@ -441,8 +441,8 @@ _mesa_get_teximage(struct gl_context *ctx, GLenum target, GLint level, * texture data to the PBO if the PBO is in VRAM along with the texture. */ GLubyte *buf = (GLubyte *) - ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT, - GL_WRITE_ONLY_ARB, ctx->Pack.BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, ctx->Pack.BufferObj->Size, + GL_MAP_WRITE_BIT, ctx->Pack.BufferObj); if (!buf) { /* out of memory or other unexpected error */ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage(map PBO failed)"); @@ -474,8 +474,7 @@ _mesa_get_teximage(struct gl_context *ctx, GLenum target, GLint level, } if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) { - ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT, - ctx->Pack.BufferObj); + ctx->Driver.UnmapBuffer(ctx, ctx->Pack.BufferObj); } } @@ -500,8 +499,8 @@ _mesa_get_compressed_teximage(struct gl_context *ctx, GLenum target, GLint level if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) { /* pack texture image into a PBO */ GLubyte *buf = (GLubyte *) - ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT, - GL_WRITE_ONLY_ARB, ctx->Pack.BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, ctx->Pack.BufferObj->Size, + GL_MAP_WRITE_BIT, ctx->Pack.BufferObj); if (!buf) { /* out of memory or other unexpected error */ _mesa_error(ctx, GL_OUT_OF_MEMORY, @@ -531,8 +530,7 @@ _mesa_get_compressed_teximage(struct gl_context *ctx, GLenum target, GLint level } if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) { - ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT, - ctx->Pack.BufferObj); + ctx->Driver.UnmapBuffer(ctx, ctx->Pack.BufferObj); } } diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 6f53686e7ff..a005d2935fa 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -1,6 +1,5 @@ /* - * mesa 3-D graphics library - * Version: 7.6 + * Mesa 3-D graphics library * * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. * Copyright (C) 2009 VMware, Inc. All Rights Reserved. @@ -556,8 +555,6 @@ _mesa_tex_target_to_face(GLenum target) * \param target texture target. * \param level image level. * \param texImage texture image. - * - * This was basically prompted by the introduction of cube maps. */ void _mesa_set_tex_image(struct gl_texture_object *tObj, @@ -574,6 +571,8 @@ _mesa_set_tex_image(struct gl_texture_object *tObj, /* Set the 'back' pointer */ texImage->TexObject = tObj; + texImage->Level = level; + texImage->Face = face; } @@ -709,15 +708,13 @@ get_proxy_target(GLenum target) /** * Get the texture object that corresponds to the target of the given - * texture unit. + * texture unit. The target should have already been checked for validity. * * \param ctx GL context. * \param texUnit texture unit. * \param target texture target. * * \return pointer to the texture object on success, or NULL on failure. - * - * \sa gl_texture_unit. */ struct gl_texture_object * _mesa_select_tex_object(struct gl_context *ctx, @@ -2797,29 +2794,43 @@ copyteximage(struct gl_context *ctx, GLuint dims, _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims); } else { - gl_format texFormat; - - if (texImage->Data) { - ctx->Driver.FreeTexImageData( ctx, texImage ); - } + /* choose actual hw format */ + gl_format texFormat = _mesa_choose_texture_format(ctx, texObj, + target, level, + internalFormat, + GL_NONE, GL_NONE); - ASSERT(texImage->Data == NULL); + if (legal_texture_size(ctx, texFormat, width, height, 1)) { + GLint srcX = x, srcY = y, dstX = 0, dstY = 0; - texFormat = _mesa_choose_texture_format(ctx, texObj, target, level, - internalFormat, GL_NONE, - GL_NONE); + /* Free old texture image */ + ctx->Driver.FreeTexImageData(ctx, texImage); - if (legal_texture_size(ctx, texFormat, width, height, 1)) { _mesa_init_teximage_fields(ctx, target, texImage, width, height, 1, border, internalFormat, texFormat); - ASSERT(ctx->Driver.CopyTexImage2D); - if (dims == 1) - ctx->Driver.CopyTexImage1D(ctx, target, level, internalFormat, - x, y, width, border); - else - ctx->Driver.CopyTexImage2D(ctx, target, level, internalFormat, - x, y, width, height, border); + /* Allocate texture memory (no pixel data yet) */ + if (dims == 1) { + ctx->Driver.TexImage1D(ctx, target, level, internalFormat, + width, border, GL_NONE, GL_NONE, NULL, + &ctx->Unpack, texObj, texImage); + } + else { + ctx->Driver.TexImage2D(ctx, target, level, internalFormat, + width, height, border, GL_NONE, GL_NONE, + NULL, &ctx->Unpack, texObj, texImage); + } + + if (_mesa_clip_copytexsubimage(ctx, &dstX, &dstY, &srcX, &srcY, + &width, &height)) { + if (dims == 1) + ctx->Driver.CopyTexSubImage1D(ctx, target, level, dstX, + srcX, srcY, width); + + else + ctx->Driver.CopyTexSubImage2D(ctx, target, level, dstX, dstY, + srcX, srcY, width, height); + } check_gen_mipmap(ctx, target, texObj, level); @@ -2830,6 +2841,7 @@ copyteximage(struct gl_context *ctx, GLuint dims, ctx->NewState |= _NEW_TEXTURE; } else { + /* probably too large of image */ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims); } } diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index 3021716a0b6..078a43ab153 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -842,7 +842,7 @@ _mesa_GenTextures( GLsizei n, GLuint *textures ) struct gl_texture_object *texObj; GLuint name = first + i; GLenum target = 0; - texObj = (*ctx->Driver.NewTextureObject)( ctx, name, target); + texObj = ctx->Driver.NewTextureObject(ctx, name, target); if (!texObj) { _glthread_UNLOCK_MUTEX(ctx->Shared->Mutex); _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGenTextures"); @@ -1066,7 +1066,7 @@ _mesa_BindTexture( GLenum target, GLuint texName ) } else { /* if this is a new texture id, allocate a texture object now */ - newTexObj = (*ctx->Driver.NewTextureObject)(ctx, texName, target); + newTexObj = ctx->Driver.NewTextureObject(ctx, texName, target); if (!newTexObj) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBindTexture"); return; @@ -1108,7 +1108,7 @@ _mesa_BindTexture( GLenum target, GLuint texName ) /* Pass BindTexture call to device driver */ if (ctx->Driver.BindTexture) - (*ctx->Driver.BindTexture)( ctx, target, newTexObj ); + ctx->Driver.BindTexture(ctx, target, newTexObj); } diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 4b9dcb5d3b5..bbbb306b2d9 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -888,7 +888,7 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level, texObj = _mesa_select_tex_object(ctx, texUnit, target); img = _mesa_select_tex_image(ctx, texObj, target, level); - if (!img || !img->TexFormat) { + if (!img || img->TexFormat == MESA_FORMAT_NONE) { /* undefined texture image */ if (pname == GL_TEXTURE_COMPONENTS) *params = 1; @@ -915,9 +915,23 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level, *params = _mesa_compressed_format_to_glenum(ctx, texFormat); } else { - /* return the user's requested internal format */ - *params = img->InternalFormat; - } + /* If the true internal format is not compressed but the user + * requested a generic compressed format, we have to return the + * generic base format that matches. + * + * From page 119 (page 129 of the PDF) of the OpenGL 1.3 spec: + * + * "If no specific compressed format is available, + * internalformat is instead replaced by the corresponding base + * internal format." + * + * Otherwise just return the user's requested internal format + */ + const GLenum f = + _mesa_gl_compressed_format_base_format(img->InternalFormat); + + *params = (f != 0) ? f : img->InternalFormat; + } break; case GL_TEXTURE_BORDER: *params = img->Border; @@ -980,28 +994,21 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level, *params = 0; break; case GL_TEXTURE_DEPTH_SIZE_ARB: - if (ctx->Extensions.ARB_depth_texture) - *params = _mesa_get_format_bits(texFormat, pname); - else + if (!ctx->Extensions.ARB_depth_texture) goto invalid_pname; + *params = _mesa_get_format_bits(texFormat, pname); break; case GL_TEXTURE_STENCIL_SIZE_EXT: - if (ctx->Extensions.EXT_packed_depth_stencil || - ctx->Extensions.ARB_framebuffer_object) { - *params = _mesa_get_format_bits(texFormat, pname); - } - else { + if (!ctx->Extensions.EXT_packed_depth_stencil && + !ctx->Extensions.ARB_framebuffer_object) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, pname); break; case GL_TEXTURE_SHARED_SIZE: - if (ctx->VersionMajor >= 3 || - ctx->Extensions.EXT_texture_shared_exponent) { - *params = texFormat == MESA_FORMAT_RGB9_E5_FLOAT ? 5 : 0; - } - else { + if (ctx->VersionMajor < 3 && + !ctx->Extensions.EXT_texture_shared_exponent) goto invalid_pname; - } + *params = texFormat == MESA_FORMAT_RGB9_E5_FLOAT ? 5 : 0; break; /* GL_ARB_texture_compression */ @@ -1022,67 +1029,46 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level, /* GL_ARB_texture_float */ case GL_TEXTURE_RED_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_RED_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_RED_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_GREEN_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_GREEN_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_GREEN_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_BLUE_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_BLUE_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_BLUE_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_ALPHA_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_ALPHA_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_ALPHA_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_LUMINANCE_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_LUMINANCE_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_LUMINANCE_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_INTENSITY_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_INTENSITY_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_INTENSITY_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_DEPTH_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_DEPTH_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_DEPTH_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; default: @@ -1104,7 +1090,6 @@ void GLAPIENTRY _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) { struct gl_texture_object *obj; - GLboolean error = GL_FALSE; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); @@ -1130,17 +1115,15 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) *params = ENUM_TO_FLOAT(obj->Sampler.WrapR); break; case GL_TEXTURE_BORDER_COLOR: - if(ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP)) + if (ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP)) _mesa_update_state_locked(ctx); - if(ctx->Color._ClampFragmentColor) - { + if (ctx->Color._ClampFragmentColor) { params[0] = CLAMP(obj->Sampler.BorderColor.f[0], 0.0F, 1.0F); params[1] = CLAMP(obj->Sampler.BorderColor.f[1], 0.0F, 1.0F); params[2] = CLAMP(obj->Sampler.BorderColor.f[2], 0.0F, 1.0F); params[3] = CLAMP(obj->Sampler.BorderColor.f[3], 0.0F, 1.0F); } - else - { + else { params[0] = obj->Sampler.BorderColor.f[0]; params[1] = obj->Sampler.BorderColor.f[1]; params[2] = obj->Sampler.BorderColor.f[2]; @@ -1148,14 +1131,8 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) } break; case GL_TEXTURE_RESIDENT: - { - GLboolean resident; - if (ctx->Driver.IsTextureResident) - resident = ctx->Driver.IsTextureResident(ctx, obj); - else - resident = GL_TRUE; - *params = ENUM_TO_FLOAT(resident); - } + *params = ctx->Driver.IsTextureResident ? + ctx->Driver.IsTextureResident(ctx, obj) : 1.0F; break; case GL_TEXTURE_PRIORITY: *params = obj->Priority; @@ -1173,49 +1150,37 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) *params = (GLfloat) obj->MaxLevel; break; case GL_TEXTURE_MAX_ANISOTROPY_EXT: - if (ctx->Extensions.EXT_texture_filter_anisotropic) { - *params = obj->Sampler.MaxAnisotropy; - } - else - error = GL_TRUE; + if (!ctx->Extensions.EXT_texture_filter_anisotropic) + goto invalid_pname; + *params = obj->Sampler.MaxAnisotropy; break; case GL_TEXTURE_COMPARE_FAIL_VALUE_ARB: - if (ctx->Extensions.ARB_shadow_ambient) { - *params = obj->Sampler.CompareFailValue; - } - else - error = GL_TRUE; + if (!ctx->Extensions.ARB_shadow_ambient) + goto invalid_pname; + *params = obj->Sampler.CompareFailValue; break; case GL_GENERATE_MIPMAP_SGIS: *params = (GLfloat) obj->GenerateMipmap; break; case GL_TEXTURE_COMPARE_MODE_ARB: - if (ctx->Extensions.ARB_shadow) { - *params = (GLfloat) obj->Sampler.CompareMode; - } - else - error = GL_TRUE; + if (!ctx->Extensions.ARB_shadow) + goto invalid_pname; + *params = (GLfloat) obj->Sampler.CompareMode; break; case GL_TEXTURE_COMPARE_FUNC_ARB: - if (ctx->Extensions.ARB_shadow) { - *params = (GLfloat) obj->Sampler.CompareFunc; - } - else - error = GL_TRUE; + if (!ctx->Extensions.ARB_shadow) + goto invalid_pname; + *params = (GLfloat) obj->Sampler.CompareFunc; break; case GL_DEPTH_TEXTURE_MODE_ARB: - if (ctx->Extensions.ARB_depth_texture) { - *params = (GLfloat) obj->Sampler.DepthMode; - } - else - error = GL_TRUE; + if (!ctx->Extensions.ARB_depth_texture) + goto invalid_pname; + *params = (GLfloat) obj->Sampler.DepthMode; break; case GL_TEXTURE_LOD_BIAS: - if (ctx->Extensions.EXT_texture_lod_bias) { - *params = obj->Sampler.LodBias; - } - else - error = GL_TRUE; + if (!ctx->Extensions.EXT_texture_lod_bias) + goto invalid_pname; + *params = obj->Sampler.LodBias; break; #if FEATURE_OES_draw_texture case GL_TEXTURE_CROP_RECT_OES: @@ -1230,45 +1195,40 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) case GL_TEXTURE_SWIZZLE_G_EXT: case GL_TEXTURE_SWIZZLE_B_EXT: case GL_TEXTURE_SWIZZLE_A_EXT: - if (ctx->Extensions.EXT_texture_swizzle) { - GLuint comp = pname - GL_TEXTURE_SWIZZLE_R_EXT; - *params = (GLfloat) obj->Swizzle[comp]; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_swizzle) + goto invalid_pname; + *params = (GLfloat) obj->Swizzle[pname - GL_TEXTURE_SWIZZLE_R_EXT]; break; case GL_TEXTURE_SWIZZLE_RGBA_EXT: - if (ctx->Extensions.EXT_texture_swizzle) { + if (!ctx->Extensions.EXT_texture_swizzle) { + goto invalid_pname; + } + else { GLuint comp; for (comp = 0; comp < 4; comp++) { params[comp] = (GLfloat) obj->Swizzle[comp]; } } - else { - error = GL_TRUE; - } break; case GL_TEXTURE_CUBE_MAP_SEAMLESS: - if (ctx->Extensions.AMD_seamless_cubemap_per_texture) { + if (!ctx->Extensions.AMD_seamless_cubemap_per_texture) + goto invalid_pname; *params = (GLfloat) obj->Sampler.CubeMapSeamless; - } - else { - error = GL_TRUE; - } + break; default: - error = GL_TRUE; - break; + goto invalid_pname; } - if (error) - _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameterfv(pname=0x%x)", - pname); + /* no error if we get here */ + _mesa_unlock_texture(ctx, obj); + return; +invalid_pname: _mesa_unlock_texture(ctx, obj); + _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameterfv(pname=0x%x)", pname); } @@ -1276,13 +1236,12 @@ void GLAPIENTRY _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) { struct gl_texture_object *obj; - GLboolean error = GL_FALSE; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); - obj = get_texobj(ctx, target, GL_TRUE); - if (!obj) - return; + obj = get_texobj(ctx, target, GL_TRUE); + if (!obj) + return; _mesa_lock_texture(ctx, obj); switch (pname) { @@ -1315,14 +1274,8 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) } break;; case GL_TEXTURE_RESIDENT: - { - GLboolean resident; - if (ctx->Driver.IsTextureResident) - resident = ctx->Driver.IsTextureResident(ctx, obj); - else - resident = GL_TRUE; - *params = (GLint) resident; - } + *params = ctx->Driver.IsTextureResident ? + ctx->Driver.IsTextureResident(ctx, obj) : 1; break;; case GL_TEXTURE_PRIORITY: *params = FLOAT_TO_INT(obj->Priority); @@ -1340,55 +1293,37 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) *params = obj->MaxLevel; break;; case GL_TEXTURE_MAX_ANISOTROPY_EXT: - if (ctx->Extensions.EXT_texture_filter_anisotropic) { - *params = (GLint) obj->Sampler.MaxAnisotropy; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_filter_anisotropic) + goto invalid_pname; + *params = (GLint) obj->Sampler.MaxAnisotropy; break; case GL_TEXTURE_COMPARE_FAIL_VALUE_ARB: - if (ctx->Extensions.ARB_shadow_ambient) { - *params = (GLint) FLOAT_TO_INT(obj->Sampler.CompareFailValue); - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.ARB_shadow_ambient) + goto invalid_pname; + *params = (GLint) FLOAT_TO_INT(obj->Sampler.CompareFailValue); break; case GL_GENERATE_MIPMAP_SGIS: *params = (GLint) obj->GenerateMipmap; break; case GL_TEXTURE_COMPARE_MODE_ARB: - if (ctx->Extensions.ARB_shadow) { - *params = (GLint) obj->Sampler.CompareMode; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.ARB_shadow) + goto invalid_pname; + *params = (GLint) obj->Sampler.CompareMode; break; case GL_TEXTURE_COMPARE_FUNC_ARB: - if (ctx->Extensions.ARB_shadow) { - *params = (GLint) obj->Sampler.CompareFunc; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.ARB_shadow) + goto invalid_pname; + *params = (GLint) obj->Sampler.CompareFunc; break; case GL_DEPTH_TEXTURE_MODE_ARB: - if (ctx->Extensions.ARB_depth_texture) { - *params = (GLint) obj->Sampler.DepthMode; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.ARB_depth_texture) + goto invalid_pname; + *params = (GLint) obj->Sampler.DepthMode; break; case GL_TEXTURE_LOD_BIAS: - if (ctx->Extensions.EXT_texture_lod_bias) { - *params = (GLint) obj->Sampler.LodBias; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_lod_bias) + goto invalid_pname; + *params = (GLint) obj->Sampler.LodBias; break; #if FEATURE_OES_draw_texture case GL_TEXTURE_CROP_RECT_OES: @@ -1402,41 +1337,34 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) case GL_TEXTURE_SWIZZLE_G_EXT: case GL_TEXTURE_SWIZZLE_B_EXT: case GL_TEXTURE_SWIZZLE_A_EXT: - if (ctx->Extensions.EXT_texture_swizzle) { - GLuint comp = pname - GL_TEXTURE_SWIZZLE_R_EXT; - *params = obj->Swizzle[comp]; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_swizzle) + goto invalid_pname; + *params = obj->Swizzle[pname - GL_TEXTURE_SWIZZLE_R_EXT]; break; case GL_TEXTURE_SWIZZLE_RGBA_EXT: - if (ctx->Extensions.EXT_texture_swizzle) { - COPY_4V(params, obj->Swizzle); - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_swizzle) + goto invalid_pname; + COPY_4V(params, obj->Swizzle); break; case GL_TEXTURE_CUBE_MAP_SEAMLESS: - if (ctx->Extensions.AMD_seamless_cubemap_per_texture) { - *params = (GLint) obj->Sampler.CubeMapSeamless; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.AMD_seamless_cubemap_per_texture) + goto invalid_pname; + *params = (GLint) obj->Sampler.CubeMapSeamless; + break; default: - ; /* silence warnings */ + goto invalid_pname; } - if (error) - _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameteriv(pname=0x%x)", - pname); + /* no error if we get here */ + _mesa_unlock_texture(ctx, obj); + return; +invalid_pname: _mesa_unlock_texture(ctx, obj); + _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameteriv(pname=0x%x)", pname); } @@ -1449,6 +1377,8 @@ _mesa_GetTexParameterIiv(GLenum target, GLenum pname, GLint *params) ASSERT_OUTSIDE_BEGIN_END(ctx); texObj = get_texobj(ctx, target, GL_TRUE); + if (!texObj) + return; switch (pname) { case GL_TEXTURE_BORDER_COLOR: @@ -1469,6 +1399,8 @@ _mesa_GetTexParameterIuiv(GLenum target, GLenum pname, GLuint *params) ASSERT_OUTSIDE_BEGIN_END(ctx); texObj = get_texobj(ctx, target, GL_TRUE); + if (!texObj) + return; switch (pname) { case GL_TEXTURE_BORDER_COLOR: diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index 6e1e63bdfb0..c4aeaa8f16d 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -4577,8 +4577,7 @@ texture_row_stride(const struct gl_texture_image *texImage) /** - * This is the software fallback for Driver.TexImage1D() - * and Driver.CopyTexImage1D(). + * This is the software fallback for Driver.TexImage1D(). * \sa _mesa_store_teximage2d() */ void @@ -4629,8 +4628,7 @@ _mesa_store_teximage1d(struct gl_context *ctx, GLenum target, GLint level, /** - * This is the software fallback for Driver.TexImage2D() - * and Driver.CopyTexImage2D(). + * This is the software fallback for Driver.TexImage2D(). * * This function is oriented toward storing images in main memory, rather * than VRAM. Device driver's can easily plug in their own replacement. @@ -4684,8 +4682,7 @@ _mesa_store_teximage2d(struct gl_context *ctx, GLenum target, GLint level, /** - * This is the software fallback for Driver.TexImage3D() - * and Driver.CopyTexImage3D(). + * This is the software fallback for Driver.TexImage3D(). * \sa _mesa_store_teximage2d() */ void diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c index dd069a3a4d1..cda840fe2d2 100644 --- a/src/mesa/main/uniforms.c +++ b/src/mesa/main/uniforms.c @@ -429,7 +429,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, for (i = 0; i < rows; i++) { const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { - params[k++] = prog->Parameters->ParameterValues[base][j]; + params[k++] = prog->Parameters->ParameterValues[base][j].f; } } } @@ -442,7 +442,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { params[k++] = (GLdouble) - prog->Parameters->ParameterValues[base][j]; + prog->Parameters->ParameterValues[base][j].f; } } } @@ -454,8 +454,9 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, for (i = 0; i < rows; i++) { const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { - params[k++] = (GLint) - prog->Parameters->ParameterValues[base][j]; + params[k++] = ctx->Const.NativeIntegers ? + prog->Parameters->ParameterValues[base][j].i : + (GLint) prog->Parameters->ParameterValues[base][j].f; } } } @@ -467,8 +468,9 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, for (i = 0; i < rows; i++) { const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { - params[k++] = (GLuint) - prog->Parameters->ParameterValues[base][j]; + params[k++] = ctx->Const.NativeIntegers ? + prog->Parameters->ParameterValues[base][j].u : + (GLuint) prog->Parameters->ParameterValues[base][j].f; } } } @@ -670,7 +672,7 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program, /* loop over number of samplers to change */ for (i = 0; i < count; i++) { GLuint sampler = (GLuint) - program->Parameters->ParameterValues[index + offset + i][0]; + program->Parameters->ParameterValues[index+offset + i][0].f; GLuint texUnit = ((GLuint *) values)[i]; /* check that the sampler (tex unit index) is legal */ @@ -735,42 +737,52 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program, /* loop over number of array elements */ for (k = 0; k < count; k++) { - GLfloat *uniformVal; + gl_constant_value *uniformVal; if (offset + k >= slots) { /* Extra array data is ignored */ break; } - /* uniformVal (the destination) is always float[4] */ + /* uniformVal (the destination) is always gl_constant_value[4] */ uniformVal = program->Parameters->ParameterValues[index + offset + k]; if (basicType == GL_INT) { - /* convert user's ints to floats */ const GLint *iValues = ((const GLint *) values) + k * elems; for (i = 0; i < elems; i++) { - uniformVal[i] = (GLfloat) iValues[i]; + if (!ctx->Const.NativeIntegers) + uniformVal[i].f = (GLfloat) iValues[i]; + else + uniformVal[i].i = iValues[i]; } } else if (basicType == GL_UNSIGNED_INT) { - /* convert user's uints to floats */ const GLuint *iValues = ((const GLuint *) values) + k * elems; for (i = 0; i < elems; i++) { - uniformVal[i] = (GLfloat) iValues[i]; + if (!ctx->Const.NativeIntegers) + uniformVal[i].f = (GLfloat)(GLuint) iValues[i]; + else + uniformVal[i].u = iValues[i]; } } else { const GLfloat *fValues = ((const GLfloat *) values) + k * elems; assert(basicType == GL_FLOAT); for (i = 0; i < elems; i++) { - uniformVal[i] = fValues[i]; + uniformVal[i].f = fValues[i]; } } - /* if the uniform is bool-valued, convert to 1.0 or 0.0 */ + /* if the uniform is bool-valued, convert to 1 or 0 */ if (isUniformBool) { for (i = 0; i < elems; i++) { - uniformVal[i] = uniformVal[i] ? 1.0f : 0.0f; + if (basicType == GL_FLOAT) + uniformVal[i].b = uniformVal[i].f != 0.0f ? 1 : 0; + else + uniformVal[i].b = uniformVal[i].u ? 1 : 0; + + if (!ctx->Const.NativeIntegers) + uniformVal[i].f = uniformVal[i].b ? 1.0f : 0.0f; } } } @@ -936,7 +948,7 @@ set_program_uniform_matrix(struct gl_context *ctx, struct gl_program *program, /* Ignore writes beyond the end of (the used part of) an array */ return; } - v = program->Parameters->ParameterValues[index + offset]; + v = (GLfloat *) program->Parameters->ParameterValues[index + offset]; for (row = 0; row < rows; row++) { if (transpose) { v[row] = values[src + row * cols + col]; diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index d8e5a3a9772..6820e4c6ba7 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -134,7 +134,7 @@ src_reg::src_reg(dst_reg reg) this->index = reg.index; this->swizzle = SWIZZLE_XYZW; this->negate = 0; - this->reladdr = NULL; + this->reladdr = reg.reladdr; } dst_reg::dst_reg(src_reg reg) @@ -297,11 +297,11 @@ public: /** * Emit the correct dot-product instruction for the type of arguments */ - void emit_dp(ir_instruction *ir, - dst_reg dst, - src_reg src0, - src_reg src1, - unsigned elements); + ir_to_mesa_instruction * emit_dp(ir_instruction *ir, + dst_reg dst, + src_reg src0, + src_reg src1, + unsigned elements); void emit_scalar(ir_instruction *ir, enum prog_opcode op, dst_reg dst, src_reg src0); @@ -312,9 +312,11 @@ public: void emit_scs(ir_instruction *ir, enum prog_opcode op, dst_reg dst, const src_reg &src); - GLboolean try_emit_mad(ir_expression *ir, + bool try_emit_mad(ir_expression *ir, int mul_operand); - GLboolean try_emit_sat(ir_expression *ir); + bool try_emit_mad_for_and_not(ir_expression *ir, + int mul_operand); + bool try_emit_sat(ir_expression *ir); void emit_swz(ir_expression *ir); @@ -331,20 +333,6 @@ dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP); dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X); -static void -fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); - -static void -fail_link(struct gl_shader_program *prog, const char *fmt, ...) -{ - va_list args; - va_start(args, fmt); - ralloc_vasprintf_append(&prog->InfoLog, fmt, args); - va_end(args); - - prog->LinkStatus = GL_FALSE; -} - static int swizzle_for_size(int size) { @@ -422,7 +410,7 @@ ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op) return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); } -void +ir_to_mesa_instruction * ir_to_mesa_visitor::emit_dp(ir_instruction *ir, dst_reg dst, src_reg src0, src_reg src1, unsigned elements) @@ -431,7 +419,7 @@ ir_to_mesa_visitor::emit_dp(ir_instruction *ir, OPCODE_DP2, OPCODE_DP3, OPCODE_DP4 }; - emit(ir, dot_opcodes[elements - 2], dst, src0, src1); + return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); } /** @@ -593,13 +581,13 @@ ir_to_mesa_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op, } } -struct src_reg +src_reg ir_to_mesa_visitor::src_reg_for_float(float val) { src_reg src(PROGRAM_CONSTANT, -1, NULL); src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - &val, 1, &src.swizzle); + (const gl_constant_value *)&val, 1, &src.swizzle); return src; } @@ -655,8 +643,6 @@ src_reg ir_to_mesa_visitor::get_temp(const glsl_type *type) { src_reg src; - int swizzle[4]; - int i; src.file = PROGRAM_TEMPORARY; src.index = next_temp; @@ -666,12 +652,7 @@ ir_to_mesa_visitor::get_temp(const glsl_type *type) if (type->is_array() || type->is_record()) { src.swizzle = SWIZZLE_NOOP; } else { - for (i = 0; i < type->vector_elements; i++) - swizzle[i] = i; - for (; i < 4; i++) - swizzle[i] = type->vector_elements - 1; - src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], - swizzle[2], swizzle[3]); + src.swizzle = swizzle_for_size(type->vector_elements); } src.negate = 0; @@ -744,7 +725,7 @@ ir_to_mesa_visitor::visit(ir_variable *ir) } } - struct variable_storage *storage; + variable_storage *storage; dst_reg dst; if (i == ir->num_state_slots) { /* We'll set the index later. */ @@ -789,10 +770,11 @@ ir_to_mesa_visitor::visit(ir_variable *ir) if (storage->file == PROGRAM_TEMPORARY && dst.index != storage->index + (int) ir->num_state_slots) { - fail_link(this->shader_program, - "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", - ir->name, dst.index - storage->index, - type_size(ir->type)); + linker_error(this->shader_program, + "failed to load builtin uniform `%s' " + "(%d/%d regs loaded)\n", + ir->name, dst.index - storage->index, + type_size(ir->type)); } } } @@ -889,7 +871,7 @@ ir_to_mesa_visitor::visit(ir_function *ir) } } -GLboolean +bool ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand) { int nonmul_operand = 1 - mul_operand; @@ -912,7 +894,47 @@ ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand) return true; } -GLboolean +/** + * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b)) + * + * The logic values are 1.0 for true and 0.0 for false. Logical-and is + * implemented using multiplication, and logical-or is implemented using + * addition. Logical-not can be implemented as (true - x), or (1.0 - x). + * As result, the logical expression (a & !b) can be rewritten as: + * + * - a * !b + * - a * (1 - b) + * - (a * 1) - (a * b) + * - a + -(a * b) + * - a + (a * -b) + * + * This final expression can be implemented as a single MAD(a, -b, a) + * instruction. + */ +bool +ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) +{ + const int other_operand = 1 - try_operand; + src_reg a, b; + + ir_expression *expr = ir->operands[try_operand]->as_expression(); + if (!expr || expr->operation != ir_unop_logic_not) + return false; + + ir->operands[other_operand]->accept(this); + a = this->result; + expr->operands[0]->accept(this); + b = this->result; + + b.negate = ~b.negate; + + this->result = get_temp(ir->type); + emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a); + + return true; +} + +bool ir_to_mesa_visitor::try_emit_sat(ir_expression *ir) { /* Saturates were only introduced to vertex programs in @@ -928,10 +950,30 @@ ir_to_mesa_visitor::try_emit_sat(ir_expression *ir) sat_src->accept(this); src_reg src = this->result; - this->result = get_temp(ir->type); - ir_to_mesa_instruction *inst; - inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src); - inst->saturate = true; + /* If we generated an expression instruction into a temporary in + * processing the saturate's operand, apply the saturate to that + * instruction. Otherwise, generate a MOV to do the saturate. + * + * Note that we have to be careful to only do this optimization if + * the instruction in question was what generated src->result. For + * example, ir_dereference_array might generate a MUL instruction + * to create the reladdr, and return us a src reg using that + * reladdr. That MUL result is not the value we're trying to + * saturate. + */ + ir_expression *sat_src_expr = sat_src->as_expression(); + ir_to_mesa_instruction *new_inst; + new_inst = (ir_to_mesa_instruction *)this->instructions.get_tail(); + if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || + sat_src_expr->operation == ir_binop_add || + sat_src_expr->operation == ir_binop_dot)) { + new_inst->saturate = true; + } else { + this->result = get_temp(ir->type); + ir_to_mesa_instruction *inst; + inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src); + inst->saturate = true; + } return true; } @@ -1088,6 +1130,16 @@ ir_to_mesa_visitor::visit(ir_expression *ir) if (try_emit_mad(ir, 0)) return; } + + /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) + */ + if (ir->operation == ir_binop_logic_and) { + if (try_emit_mad_for_and_not(ir, 1)) + return; + if (try_emit_mad_for_and_not(ir, 0)) + return; + } + if (try_emit_sat(ir)) return; @@ -1135,7 +1187,13 @@ ir_to_mesa_visitor::visit(ir_expression *ir) switch (ir->operation) { case ir_unop_logic_not: - emit(ir, OPCODE_SEQ, result_dst, op[0], src_reg_for_float(0.0)); + /* Previously 'SEQ dst, src, 0.0' was used for this. However, many + * older GPUs implement SEQ using multiple instructions (i915 uses two + * SGE instructions and a MUL instruction). Since our logic values are + * 0.0 and 1.0, 1-x also implements !x. + */ + op[0].negate = ~op[0].negate; + emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0)); break; case ir_unop_neg: op[0].negate = ~op[0].negate; @@ -1231,8 +1289,19 @@ ir_to_mesa_visitor::visit(ir_expression *ir) ir->operands[1]->type->is_vector()) { src_reg temp = get_temp(glsl_type::vec4_type); emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]); + + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero becomes 1.0, and positive values become zero. + */ emit_dp(ir, result_dst, temp, temp, vector_elements); - emit(ir, OPCODE_SEQ, result_dst, result_src, src_reg_for_float(0.0)); + + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero becomes 1.0, and negative values become zero. This + * achieved using SGE. + */ + src_reg sge_src = result_src; + sge_src.negate = ~sge_src.negate; + emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0)); } else { emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]); } @@ -1243,29 +1312,83 @@ ir_to_mesa_visitor::visit(ir_expression *ir) ir->operands[1]->type->is_vector()) { src_reg temp = get_temp(glsl_type::vec4_type); emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]); - emit_dp(ir, result_dst, temp, temp, vector_elements); - emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0)); + + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero stays zero, and positive values become 1.0. + */ + ir_to_mesa_instruction *const dp = + emit_dp(ir, result_dst, temp, temp, vector_elements); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate. + */ + dp->saturate = true; + } else { + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero stays zero, and negative values become 1.0. This + * achieved using SLT. + */ + src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); + } } else { emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); } break; - case ir_unop_any: + case ir_unop_any: { assert(ir->operands[0]->type->is_vector()); - emit_dp(ir, result_dst, op[0], op[0], - ir->operands[0]->type->vector_elements); - emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0)); + + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero stays zero, and positive values become 1.0. + */ + ir_to_mesa_instruction *const dp = + emit_dp(ir, result_dst, op[0], op[0], + ir->operands[0]->type->vector_elements); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate. + */ + dp->saturate = true; + } else { + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero stays zero, and negative values become 1.0. This + * is achieved using SLT. + */ + src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); + } break; + } case ir_binop_logic_xor: emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); break; - case ir_binop_logic_or: - /* This could be a saturated add and skip the SNE. */ - emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); - emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0)); + case ir_binop_logic_or: { + /* After the addition, the value will be an integer on the + * range [0,2]. Zero stays zero, and positive values become 1.0. + */ + ir_to_mesa_instruction *add = + emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate. + */ + add->saturate = true; + } else { + /* Negating the result of the addition gives values on the range + * [-2, 0]. Zero stays zero, and negative values become 1.0. This + * is achieved using SLT. + */ + src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); + } break; + } case ir_binop_logic_and: /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ @@ -1496,6 +1619,18 @@ ir_to_mesa_visitor::visit(ir_dereference_array *ir) this->result, src_reg_for_float(element_size)); } + /* If there was already a relative address register involved, add the + * new and the old together to get the new offset. + */ + if (src.reladdr != NULL) { + src_reg accum_reg = get_temp(glsl_type::float_type); + + emit(ir, OPCODE_ADD, dst_reg(accum_reg), + index_reg, *src.reladdr); + + index_reg = accum_reg; + } + src.reladdr = ralloc(mem_ctx, src_reg); memcpy(src.reladdr, &index_reg, sizeof(index_reg)); } @@ -1796,7 +1931,7 @@ ir_to_mesa_visitor::visit(ir_constant *ir) src = src_reg(PROGRAM_CONSTANT, -1, NULL); src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - values, + (gl_constant_value *) values, ir->type->vector_elements, &src.swizzle); emit(ir, OPCODE_MOV, mat_column, src); @@ -1834,7 +1969,7 @@ ir_to_mesa_visitor::visit(ir_constant *ir) this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type); this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters, - values, + (gl_constant_value *) values, ir->type->vector_elements, &this->result.swizzle); } @@ -1969,7 +2104,10 @@ ir_to_mesa_visitor::visit(ir_texture *ir) ir_to_mesa_instruction *inst = NULL; prog_opcode opcode = OPCODE_NOP; - ir->coordinate->accept(this); + if (ir->op == ir_txs) + this->result = src_reg_for_float(0.0); + else + ir->coordinate->accept(this); /* Put our coords in a temp. We'll need to modify them for shadow, * projection, or LOD, so the only case we'd use it as is is if @@ -1993,6 +2131,7 @@ ir_to_mesa_visitor::visit(ir_texture *ir) switch (ir->op) { case ir_tex: + case ir_txs: opcode = OPCODE_TEX; break; case ir_txb: @@ -2401,29 +2540,32 @@ check_resources(const struct gl_context *ctx, case GL_VERTEX_PROGRAM_ARB: if (_mesa_bitcount(prog->SamplersUsed) > ctx->Const.MaxVertexTextureImageUnits) { - fail_link(shader_program, "Too many vertex shader texture samplers"); + linker_error(shader_program, + "Too many vertex shader texture samplers"); } if (prog->Parameters->NumParameters > MAX_UNIFORMS) { - fail_link(shader_program, "Too many vertex shader constants"); + linker_error(shader_program, "Too many vertex shader constants"); } break; case MESA_GEOMETRY_PROGRAM: if (_mesa_bitcount(prog->SamplersUsed) > ctx->Const.MaxGeometryTextureImageUnits) { - fail_link(shader_program, "Too many geometry shader texture samplers"); + linker_error(shader_program, + "Too many geometry shader texture samplers"); } if (prog->Parameters->NumParameters > MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) { - fail_link(shader_program, "Too many geometry shader constants"); + linker_error(shader_program, "Too many geometry shader constants"); } break; case GL_FRAGMENT_PROGRAM_ARB: if (_mesa_bitcount(prog->SamplersUsed) > ctx->Const.MaxTextureImageUnits) { - fail_link(shader_program, "Too many fragment shader texture samplers"); + linker_error(shader_program, + "Too many fragment shader texture samplers"); } if (prog->Parameters->NumParameters > MAX_UNIFORMS) { - fail_link(shader_program, "Too many fragment shader constants"); + linker_error(shader_program, "Too many fragment shader constants"); } break; default: @@ -2531,16 +2673,17 @@ add_uniforms_to_parameters_list(struct gl_shader_program *shader_program, */ if (file == PROGRAM_SAMPLER) { for (unsigned int j = 0; j < size / 4; j++) - prog->Parameters->ParameterValues[index + j][0] = next_sampler++; + prog->Parameters->ParameterValues[index + j][0].f = next_sampler++; } /* The location chosen in the Parameters list here (returned * from _mesa_add_uniform) has to match what the linker chose. */ if (index != parameter_index) { - fail_link(shader_program, "Allocation of uniform `%s' to target " - "failed (%d vs %d)\n", - uniform->Name, index, parameter_index); + linker_error(shader_program, + "Allocation of uniform `%s' to target failed " + "(%d vs %d)\n", + uniform->Name, index, parameter_index); } } } @@ -2573,8 +2716,8 @@ set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, int loc = _mesa_get_uniform_location(ctx, shader_program, name); if (loc == -1) { - fail_link(shader_program, - "Couldn't find uniform for initializer %s\n", name); + linker_error(shader_program, + "Couldn't find uniform for initializer %s\n", name); return; } @@ -2974,11 +3117,31 @@ get_mesa_program(struct gl_context *ctx, if (mesa_inst->SrcReg[src].RelAddr) prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File; - if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) { - fail_link(shader_program, "Couldn't flatten if statement\n"); - } - switch (mesa_inst->Opcode) { + case OPCODE_IF: + if (options->EmitNoIfs) { + linker_warning(shader_program, + "Couldn't flatten if-statement. " + "This will likely result in software " + "rasterization.\n"); + } + break; + case OPCODE_BGNLOOP: + if (options->EmitNoLoops) { + linker_warning(shader_program, + "Couldn't unroll loop. " + "This will likely result in software " + "rasterization.\n"); + } + break; + case OPCODE_CONT: + if (options->EmitNoCont) { + linker_warning(shader_program, + "Couldn't lower continue-statement. " + "This will likely result in software " + "rasterization.\n"); + } + break; case OPCODE_BGNSUB: inst->function->inst = i; mesa_inst->Comment = strdup(inst->function->sig->function_name()); @@ -3246,7 +3409,7 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) for (i = 0; i < prog->NumShaders; i++) { if (!prog->Shaders[i]->CompileStatus) { - fail_link(prog, "linking with uncompiled shader"); + linker_error(prog, "linking with uncompiled shader"); prog->LinkStatus = GL_FALSE; } } diff --git a/src/mesa/program/nvfragparse.c b/src/mesa/program/nvfragparse.c index 8516b5fc1ff..ce72c610d89 100644 --- a/src/mesa/program/nvfragparse.c +++ b/src/mesa/program/nvfragparse.c @@ -472,8 +472,9 @@ Parse_ScalarConstant(struct parse_state *parseState, GLfloat *number) const GLfloat *constant; if (!Parse_Identifier(parseState, ident)) RETURN_ERROR1("Expected an identifier"); - constant = _mesa_lookup_parameter_value(parseState->parameters, - -1, (const char *) ident); + constant = (GLfloat *)_mesa_lookup_parameter_value(parseState->parameters, + -1, + (const char *) ident); /* XXX Check that it's a constant and not a parameter */ if (!constant) { RETURN_ERROR1("Undefined symbol"); @@ -1039,7 +1040,8 @@ Parse_VectorSrc(struct parse_state *parseState, if (!Parse_ScalarConstant(parseState, values)) RETURN_ERROR; paramIndex = _mesa_add_unnamed_constant(parseState->parameters, - values, 4, NULL); + (gl_constant_value *) values, + 4, NULL); srcReg->File = PROGRAM_NAMED_PARAM; srcReg->Index = paramIndex; } @@ -1051,7 +1053,8 @@ Parse_VectorSrc(struct parse_state *parseState, if (!Parse_VectorConstant(parseState, values)) RETURN_ERROR; paramIndex = _mesa_add_unnamed_constant(parseState->parameters, - values, 4, NULL); + (gl_constant_value *) values, + 4, NULL); srcReg->File = PROGRAM_NAMED_PARAM; srcReg->Index = paramIndex; } @@ -1145,7 +1148,8 @@ Parse_ScalarSrcReg(struct parse_state *parseState, if (!Parse_VectorConstant(parseState, values)) RETURN_ERROR; paramIndex = _mesa_add_unnamed_constant(parseState->parameters, - values, 4, NULL); + (gl_constant_value *) values, + 4, NULL); srcReg->File = PROGRAM_NAMED_PARAM; srcReg->Index = paramIndex; } @@ -1170,7 +1174,8 @@ Parse_ScalarSrcReg(struct parse_state *parseState, if (!Parse_ScalarConstant(parseState, values)) RETURN_ERROR; paramIndex = _mesa_add_unnamed_constant(parseState->parameters, - values, 4, NULL); + (gl_constant_value *) values, + 4, NULL); srcReg->Index = paramIndex; srcReg->File = PROGRAM_NAMED_PARAM; needSuffix = GL_FALSE; @@ -1296,7 +1301,8 @@ Parse_InstructionSequence(struct parse_state *parseState, RETURN_ERROR2(id, "already defined"); } _mesa_add_named_parameter(parseState->parameters, - (const char *) id, value); + (const char *) id, + (gl_constant_value *) value); } else if (Parse_String(parseState, "DECLARE")) { GLubyte id[100]; @@ -1315,7 +1321,8 @@ Parse_InstructionSequence(struct parse_state *parseState, RETURN_ERROR2(id, "already declared"); } _mesa_add_named_parameter(parseState->parameters, - (const char *) id, value); + (const char *) id, + (gl_constant_value *) value); } else if (Parse_String(parseState, "END")) { inst->Opcode = OPCODE_END; diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c index e7553c69dbe..77f842a1630 100644 --- a/src/mesa/program/prog_execute.c +++ b/src/mesa/program/prog_execute.c @@ -157,7 +157,7 @@ get_src_register_pointer(const struct prog_src_register *source, case PROGRAM_NAMED_PARAM: if (reg >= (GLint) prog->Parameters->NumParameters) return ZeroVec; - return prog->Parameters->ParameterValues[reg]; + return (GLfloat *) prog->Parameters->ParameterValues[reg]; case PROGRAM_SYSTEM_VALUE: assert(reg < Elements(machine->SystemValues)); @@ -639,7 +639,7 @@ _mesa_execute_program(struct gl_context * ctx, struct gl_program_machine *machine) { const GLuint numInst = program->NumInstructions; - const GLuint maxExec = 10000; + const GLuint maxExec = 65536; GLuint pc, numExec = 0; machine->CurProgram = program; @@ -1651,6 +1651,14 @@ _mesa_execute_program(struct gl_context * ctx, GLfloat texcoord[4], color[4]; fetch_vector4(&inst->SrcReg[0], machine, texcoord); + /* For TEX, texcoord.Q should not be used and its value should not + * matter (at most, we pass coord.xyz to texture3D() in GLSL). + * Set Q=1 so that FetchTexelDeriv() doesn't get a garbage value + * which is effectively what happens when the texcoord swizzle + * is .xyzz + */ + texcoord[3] = 1.0f; + fetch_texel(ctx, machine, inst, texcoord, 0.0, color); if (DEBUG_PROG) { diff --git a/src/mesa/program/prog_opt_constant_fold.c b/src/mesa/program/prog_opt_constant_fold.c new file mode 100644 index 00000000000..e2418b55451 --- /dev/null +++ b/src/mesa/program/prog_opt_constant_fold.c @@ -0,0 +1,451 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "main/glheader.h" +#include "main/context.h" +#include "main/macros.h" +#include "program.h" +#include "prog_instruction.h" +#include "prog_optimize.h" +#include "prog_parameter.h" +#include <stdbool.h> + +static bool +src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs) +{ + unsigned i; + + for (i = 0; i < num_srcs; i++) { + if (inst->SrcReg[i].File != PROGRAM_CONSTANT) + return false; + } + + return true; +} + +static struct prog_src_register +src_reg_for_float(struct gl_program *prog, float val) +{ + struct prog_src_register src; + unsigned swiz; + + memset(&src, 0, sizeof(src)); + + src.File = PROGRAM_CONSTANT; + src.Index = _mesa_add_unnamed_constant(prog->Parameters, + (gl_constant_value *) &val, 1, &swiz); + src.Swizzle = swiz; + return src; +} + +static struct prog_src_register +src_reg_for_vec4(struct gl_program *prog, const float *val) +{ + struct prog_src_register src; + unsigned swiz; + + memset(&src, 0, sizeof(src)); + + src.File = PROGRAM_CONSTANT; + src.Index = _mesa_add_unnamed_constant(prog->Parameters, + (gl_constant_value *) val, 4, &swiz); + src.Swizzle = swiz; + return src; +} + +static bool +src_regs_are_same(const struct prog_src_register *a, + const struct prog_src_register *b) +{ + return (a->File == b->File) + && (a->Index == b->Index) + && (a->Swizzle == b->Swizzle) + && (a->Abs == b->Abs) + && (a->Negate == b->Negate) + && (a->RelAddr == 0) + && (b->RelAddr == 0); +} + +static void +get_value(struct gl_program *prog, struct prog_src_register *r, float *data) +{ + const gl_constant_value *const value = + prog->Parameters->ParameterValues[r->Index]; + + data[0] = value[GET_SWZ(r->Swizzle, 0)].f; + data[1] = value[GET_SWZ(r->Swizzle, 1)].f; + data[2] = value[GET_SWZ(r->Swizzle, 2)].f; + data[3] = value[GET_SWZ(r->Swizzle, 3)].f; + + if (r->Abs) { + data[0] = fabsf(data[0]); + data[1] = fabsf(data[1]); + data[2] = fabsf(data[2]); + data[3] = fabsf(data[3]); + } + + if (r->Negate & 0x01) { + data[0] = -data[0]; + } + + if (r->Negate & 0x02) { + data[1] = -data[1]; + } + + if (r->Negate & 0x04) { + data[2] = -data[2]; + } + + if (r->Negate & 0x08) { + data[3] = -data[3]; + } +} + +/** + * Try to replace instructions that produce a constant result with simple moves + * + * The hope is that a following copy propagation pass will eliminate the + * unnecessary move instructions. + */ +GLboolean +_mesa_constant_fold(struct gl_program *prog) +{ + bool progress = false; + unsigned i; + + for (i = 0; i < prog->NumInstructions; i++) { + struct prog_instruction *const inst = &prog->Instructions[i]; + + switch (inst->Opcode) { + case OPCODE_ADD: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = a[0] + b[0]; + result[1] = a[1] + b[1]; + result[2] = a[2] + b[2]; + result[3] = a[3] + b[3]; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_CMP: + /* FINISHME: We could also optimize CMP instructions where the first + * FINISHME: source is a constant that is either all < 0.0 or all + * FINISHME: >= 0.0. + */ + if (src_regs_are_constant(inst, 3)) { + float a[4]; + float b[4]; + float c[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + get_value(prog, &inst->SrcReg[2], c); + + result[0] = a[0] < 0.0f ? b[0] : c[0]; + result[1] = a[1] < 0.0f ? b[1] : c[1]; + result[2] = a[2] < 0.0f ? b[2] : c[2]; + result[3] = a[3] < 0.0f ? b[3] : c[3]; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + inst->SrcReg[2].File = PROGRAM_UNDEFINED; + inst->SrcReg[2].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_DP2: + case OPCODE_DP3: + case OPCODE_DP4: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + /* It seems like a loop could be used here, but we cleverly put + * DP2A between DP2 and DP3. Subtracting DP2 (or similar) from + * the opcode results in various failures of the loop control. + */ + result = (a[0] * b[0]) + (a[1] * b[1]); + + if (inst->Opcode >= OPCODE_DP3) + result += a[2] * b[2]; + + if (inst->Opcode == OPCODE_DP4) + result += a[3] * b[3]; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_MUL: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = a[0] * b[0]; + result[1] = a[1] * b[1]; + result[2] = a[2] * b[2]; + result[3] = a[3] * b[3]; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SEQ: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] == b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] == b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] == b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] == b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 1.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SGE: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 1.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SGT: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] > b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] > b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] > b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] > b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 0.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SLE: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] <= b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] <= b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] <= b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] <= b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 1.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SLT: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] < b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] < b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] < b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] < b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 0.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SNE: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] != b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] != b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] != b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] != b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 0.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + default: + break; + } + } + + return progress; +} diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c index 8a40fa69eca..25d9684b137 100644 --- a/src/mesa/program/prog_optimize.c +++ b/src/mesa/program/prog_optimize.c @@ -472,8 +472,7 @@ can_downward_mov_be_modifed(const struct prog_instruction *mov) mov->SrcReg[0].HasIndex2 == 0 && mov->SrcReg[0].RelAddr2 == 0 && mov->DstReg.RelAddr == 0 && - mov->DstReg.CondMask == COND_TR && - mov->SaturateMode == SATURATE_OFF; + mov->DstReg.CondMask == COND_TR; } @@ -482,7 +481,8 @@ can_upward_mov_be_modifed(const struct prog_instruction *mov) { return can_downward_mov_be_modifed(mov) && - mov->DstReg.File == PROGRAM_TEMPORARY; + mov->DstReg.File == PROGRAM_TEMPORARY && + mov->SaturateMode == SATURATE_OFF; } @@ -657,6 +657,8 @@ _mesa_merge_mov_into_inst(struct prog_instruction *inst, if (mask != (inst->DstReg.WriteMask & mask)) return GL_FALSE; + inst->SaturateMode |= mov->SaturateMode; + /* Depending on the instruction, we may need to recompute the swizzles. * Also, some other instructions (like TEX) are not linear. We will only * consider completely active sources and destinations @@ -1319,6 +1321,15 @@ _mesa_simplify_cmp(struct gl_program * program) inst->Opcode = OPCODE_MOV; inst->SrcReg[0] = inst->SrcReg[1]; + + /* Unused operands are expected to have the file set to + * PROGRAM_UNDEFINED. This is how _mesa_init_instructions initializes + * all of the sources. + */ + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + inst->SrcReg[2].File = PROGRAM_UNDEFINED; + inst->SrcReg[2].Swizzle = SWIZZLE_NOOP; } } if (dbg) { @@ -1347,6 +1358,8 @@ _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program) any_change = GL_TRUE; if (_mesa_remove_dead_code_local(program)) any_change = GL_TRUE; + + any_change = _mesa_constant_fold(program) || any_change; _mesa_reallocate_registers(program); } while (any_change); } diff --git a/src/mesa/program/prog_optimize.h b/src/mesa/program/prog_optimize.h index 463f5fc51c4..9854fb7a491 100644 --- a/src/mesa/program/prog_optimize.h +++ b/src/mesa/program/prog_optimize.h @@ -44,4 +44,7 @@ _mesa_find_temp_intervals(const struct prog_instruction *instructions, extern void _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program); +extern GLboolean +_mesa_constant_fold(struct gl_program *prog); + #endif diff --git a/src/mesa/program/prog_parameter.c b/src/mesa/program/prog_parameter.c index 3570cab118b..49b3ffbdd5c 100644 --- a/src/mesa/program/prog_parameter.c +++ b/src/mesa/program/prog_parameter.c @@ -56,8 +56,8 @@ _mesa_new_parameter_list_sized(unsigned size) p->Parameters = (struct gl_program_parameter *) calloc(1, size * sizeof(struct gl_program_parameter)); - p->ParameterValues = (GLfloat (*)[4]) - _mesa_align_malloc(size * 4 *sizeof(GLfloat), 16); + p->ParameterValues = (gl_constant_value (*)[4]) + _mesa_align_malloc(size * 4 *sizeof(gl_constant_value), 16); if ((p->Parameters == NULL) || (p->ParameterValues == NULL)) { @@ -101,14 +101,15 @@ _mesa_free_parameter_list(struct gl_program_parameter_list *paramList) * \param name the parameter name, will be duplicated/copied! * \param size number of elements in 'values' vector (1..4, or more) * \param datatype GL_FLOAT, GL_FLOAT_VECx, GL_INT, GL_INT_VECx or GL_NONE. - * \param values initial parameter value, up to 4 GLfloats, or NULL + * \param values initial parameter value, up to 4 gl_constant_values, or NULL * \param state state indexes, or NULL * \return index of new parameter in the list, or -1 if error (out of mem) */ GLint _mesa_add_parameter(struct gl_program_parameter_list *paramList, gl_register_file type, const char *name, - GLuint size, GLenum datatype, const GLfloat *values, + GLuint size, GLenum datatype, + const gl_constant_value *values, const gl_state_index state[STATE_LENGTH], GLbitfield flags) { @@ -127,10 +128,10 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList, oldNum * sizeof(struct gl_program_parameter), paramList->Size * sizeof(struct gl_program_parameter)); - paramList->ParameterValues = (GLfloat (*)[4]) + paramList->ParameterValues = (gl_constant_value (*)[4]) _mesa_align_realloc(paramList->ParameterValues, /* old buf */ - oldNum * 4 * sizeof(GLfloat), /* old size */ - paramList->Size * 4 *sizeof(GLfloat), /* new sz */ + oldNum * 4 * sizeof(gl_constant_value),/* old sz */ + paramList->Size*4*sizeof(gl_constant_value),/*new*/ 16); } @@ -142,7 +143,7 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList, return -1; } else { - GLuint i; + GLuint i, j; paramList->NumParameters = oldNum + sz4; @@ -163,7 +164,8 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList, } else { /* silence valgrind */ - ASSIGN_4V(paramList->ParameterValues[oldNum + i], 0, 0, 0, 0); + for (j = 0; j < 4; j++) + paramList->ParameterValues[oldNum + i][j].f = 0; } size -= 4; } @@ -184,7 +186,7 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList, */ GLint _mesa_add_named_parameter(struct gl_program_parameter_list *paramList, - const char *name, const GLfloat values[4]) + const char *name, const gl_constant_value values[4]) { return _mesa_add_parameter(paramList, PROGRAM_NAMED_PARAM, name, 4, GL_NONE, values, NULL, 0x0); @@ -204,17 +206,17 @@ _mesa_add_named_parameter(struct gl_program_parameter_list *paramList, */ GLint _mesa_add_named_constant(struct gl_program_parameter_list *paramList, - const char *name, const GLfloat values[4], + const char *name, const gl_constant_value values[4], GLuint size) { /* first check if this is a duplicate constant */ GLint pos; for (pos = 0; pos < (GLint)paramList->NumParameters; pos++) { - const GLfloat *pvals = paramList->ParameterValues[pos]; - if (pvals[0] == values[0] && - pvals[1] == values[1] && - pvals[2] == values[2] && - pvals[3] == values[3] && + const gl_constant_value *pvals = paramList->ParameterValues[pos]; + if (pvals[0].u == values[0].u && + pvals[1].u == values[1].u && + pvals[2].u == values[2].u && + pvals[3].u == values[3].u && strcmp(paramList->Parameters[pos].Name, name) == 0) { /* Same name and value is already in the param list - reuse it */ return pos; @@ -239,9 +241,9 @@ _mesa_add_named_constant(struct gl_program_parameter_list *paramList, * \return index/position of the new parameter in the parameter list. */ GLint -_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, - const GLfloat values[4], GLuint size, - GLuint *swizzleOut) +_mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList, + const gl_constant_value values[4], GLuint size, + GLenum datatype, GLuint *swizzleOut) { GLint pos; ASSERT(size >= 1); @@ -262,7 +264,7 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, struct gl_program_parameter *p = paramList->Parameters + pos; if (p->Type == PROGRAM_CONSTANT && p->Size + size <= 4) { /* ok, found room */ - GLfloat *pVal = paramList->ParameterValues[pos]; + gl_constant_value *pVal = paramList->ParameterValues[pos]; GLuint swz = p->Size; /* 1, 2 or 3 for Y, Z, W */ pVal[p->Size] = values[0]; p->Size++; @@ -274,7 +276,7 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, /* add a new parameter to store this constant */ pos = _mesa_add_parameter(paramList, PROGRAM_CONSTANT, NULL, - size, GL_NONE, values, NULL, 0x0); + size, datatype, values, NULL, 0x0); if (pos >= 0 && swizzleOut) { if (size == 1) *swizzleOut = SWIZZLE_XXXX; @@ -285,6 +287,28 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, } /** + * Add a new unnamed constant to the parameter list. This will be used + * when a fragment/vertex program contains something like this: + * MOV r, { 0, 1, 2, 3 }; + * If swizzleOut is non-null we'll search the parameter list for an + * existing instance of the constant which matches with a swizzle. + * + * \param paramList the parameter list + * \param values four float values + * \param swizzleOut returns swizzle mask for accessing the constant + * \return index/position of the new parameter in the parameter list. + * \sa _mesa_add_typed_unnamed_constant + */ +GLint +_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, + const gl_constant_value values[4], GLuint size, + GLuint *swizzleOut) +{ + return _mesa_add_typed_unnamed_constant(paramList, values, size, GL_NONE, + swizzleOut); +} + +/** * Add parameter representing a varying variable. */ GLint @@ -401,7 +425,7 @@ _mesa_add_state_reference(struct gl_program_parameter_list *paramList, * Lookup a parameter value by name in the given parameter list. * \return pointer to the float[4] values. */ -GLfloat * +gl_constant_value * _mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList, GLsizei nameLen, const char *name) { @@ -465,7 +489,7 @@ _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList, */ GLboolean _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, - const GLfloat v[], GLuint vSize, + const gl_constant_value v[], GLuint vSize, GLint *posOut, GLuint *swizzleOut) { GLuint i; @@ -484,7 +508,7 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, /* swizzle not allowed */ GLuint j, match = 0; for (j = 0; j < vSize; j++) { - if (v[j] == list->ParameterValues[i][j]) + if (v[j].u == list->ParameterValues[i][j].u) match++; } if (match == vSize) { @@ -498,7 +522,7 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, /* look for v[0] anywhere within float[4] value */ GLuint j; for (j = 0; j < list->Parameters[i].Size; j++) { - if (list->ParameterValues[i][j] == v[0]) { + if (list->ParameterValues[i][j].u == v[0].u) { /* found it */ *posOut = i; *swizzleOut = MAKE_SWIZZLE4(j, j, j, j); @@ -511,13 +535,13 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, GLuint swz[4]; GLuint match = 0, j, k; for (j = 0; j < vSize; j++) { - if (v[j] == list->ParameterValues[i][j]) { + if (v[j].u == list->ParameterValues[i][j].u) { swz[j] = j; match++; } else { for (k = 0; k < list->Parameters[i].Size; k++) { - if (v[j] == list->ParameterValues[i][k]) { + if (v[j].u == list->ParameterValues[i][k].u) { swz[j] = k; match++; break; diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h index 10cbbe57a6c..1a5ed343937 100644 --- a/src/mesa/program/prog_parameter.h +++ b/src/mesa/program/prog_parameter.h @@ -47,6 +47,17 @@ /*@}*/ +/** + * Actual data for constant values of parameters. + */ +typedef union gl_constant_value +{ + GLfloat f; + GLboolean b; + GLint i; + GLuint u; +} gl_constant_value; + /** * Program parameter. @@ -81,7 +92,7 @@ struct gl_program_parameter_list GLuint Size; /**< allocated size of Parameters, ParameterValues */ GLuint NumParameters; /**< number of parameters in arrays */ struct gl_program_parameter *Parameters; /**< Array [Size] */ - GLfloat (*ParameterValues)[4]; /**< Array [Size] of GLfloat[4] */ + gl_constant_value (*ParameterValues)[4]; /**< Array [Size] of constant[4] */ GLbitfield StateFlags; /**< _NEW_* flags indicating which state changes might invalidate ParameterValues[] */ }; @@ -112,22 +123,28 @@ _mesa_num_parameters(const struct gl_program_parameter_list *list) extern GLint _mesa_add_parameter(struct gl_program_parameter_list *paramList, gl_register_file type, const char *name, - GLuint size, GLenum datatype, const GLfloat *values, + GLuint size, GLenum datatype, + const gl_constant_value *values, const gl_state_index state[STATE_LENGTH], GLbitfield flags); extern GLint _mesa_add_named_parameter(struct gl_program_parameter_list *paramList, - const char *name, const GLfloat values[4]); + const char *name, const gl_constant_value values[4]); extern GLint _mesa_add_named_constant(struct gl_program_parameter_list *paramList, - const char *name, const GLfloat values[4], + const char *name, const gl_constant_value values[4], GLuint size); extern GLint +_mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList, + const gl_constant_value values[4], GLuint size, + GLenum datatype, GLuint *swizzleOut); + +extern GLint _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, - const GLfloat values[4], GLuint size, + const gl_constant_value values[4], GLuint size, GLuint *swizzleOut); extern GLint @@ -143,7 +160,7 @@ extern GLint _mesa_add_state_reference(struct gl_program_parameter_list *paramList, const gl_state_index stateTokens[STATE_LENGTH]); -extern GLfloat * +extern gl_constant_value * _mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList, GLsizei nameLen, const char *name); @@ -153,7 +170,7 @@ _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList, extern GLboolean _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, - const GLfloat v[], GLuint vSize, + const gl_constant_value v[], GLuint vSize, GLint *posOut, GLuint *swizzleOut); extern GLuint diff --git a/src/mesa/program/prog_parameter_layout.c b/src/mesa/program/prog_parameter_layout.c index 90a9771080c..28fca3b92d9 100644 --- a/src/mesa/program/prog_parameter_layout.c +++ b/src/mesa/program/prog_parameter_layout.c @@ -182,7 +182,7 @@ _mesa_layout_parameters(struct asm_parser_state *state) switch (p->Type) { case PROGRAM_CONSTANT: { - const float *const v = + const gl_constant_value *const v = state->prog->Parameters->ParameterValues[idx]; inst->Base.SrcReg[i].Index = diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c index 7c3b4909e73..70412b1fa6a 100644 --- a/src/mesa/program/prog_print.c +++ b/src/mesa/program/prog_print.c @@ -985,7 +985,7 @@ _mesa_fprint_parameter_list(FILE *f, fprintf(f, "dirty state flags: 0x%x\n", list->StateFlags); for (i = 0; i < list->NumParameters; i++){ struct gl_program_parameter *param = list->Parameters + i; - const GLfloat *v = list->ParameterValues[i]; + const GLfloat *v = (GLfloat *) list->ParameterValues[i]; fprintf(f, "param[%d] sz=%d %s %s = {%.3g, %.3g, %.3g, %.3g}", i, param->Size, _mesa_register_file_name(list->Parameters[i].Type), diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c index 16f9690e865..6aa2409e85e 100644 --- a/src/mesa/program/prog_statevars.c +++ b/src/mesa/program/prog_statevars.c @@ -1111,7 +1111,7 @@ _mesa_load_state_parameters(struct gl_context *ctx, if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) { _mesa_fetch_state(ctx, paramList->Parameters[i].StateIndexes, - paramList->ParameterValues[i]); + ¶mList->ParameterValues[i][0].f); } } } diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index adca094ee89..ecff2344a44 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -388,8 +388,9 @@ _mesa_delete_program(struct gl_context *ctx, struct gl_program *prog) if (prog->String) free(prog->String); - _mesa_free_instructions(prog->Instructions, prog->NumInstructions); - + if (prog->Instructions) { + _mesa_free_instructions(prog->Instructions, prog->NumInstructions); + } if (prog->Parameters) { _mesa_free_parameter_list(prog->Parameters); } @@ -1031,7 +1032,8 @@ _mesa_postprocess_program(struct gl_context *ctx, struct gl_program *prog) GLuint i; GLuint whiteSwizzle; GLint whiteIndex = _mesa_add_unnamed_constant(prog->Parameters, - white, 4, &whiteSwizzle); + (gl_constant_value *) white, + 4, &whiteSwizzle); (void) whiteIndex; diff --git a/src/mesa/program/program_parse.y b/src/mesa/program/program_parse.y index dbf5abaa617..dec35038be5 100644 --- a/src/mesa/program/program_parse.y +++ b/src/mesa/program/program_parse.y @@ -1854,64 +1854,64 @@ paramConstUse: paramConstScalarUse | paramConstVector; paramConstScalarDecl: signedFloatConstant { $$.count = 4; - $$.data[0] = $1; - $$.data[1] = $1; - $$.data[2] = $1; - $$.data[3] = $1; + $$.data[0].f = $1; + $$.data[1].f = $1; + $$.data[2].f = $1; + $$.data[3].f = $1; } ; paramConstScalarUse: REAL { $$.count = 1; - $$.data[0] = $1; - $$.data[1] = $1; - $$.data[2] = $1; - $$.data[3] = $1; + $$.data[0].f = $1; + $$.data[1].f = $1; + $$.data[2].f = $1; + $$.data[3].f = $1; } | INTEGER { $$.count = 1; - $$.data[0] = (float) $1; - $$.data[1] = (float) $1; - $$.data[2] = (float) $1; - $$.data[3] = (float) $1; + $$.data[0].f = (float) $1; + $$.data[1].f = (float) $1; + $$.data[2].f = (float) $1; + $$.data[3].f = (float) $1; } ; paramConstVector: '{' signedFloatConstant '}' { $$.count = 4; - $$.data[0] = $2; - $$.data[1] = 0.0f; - $$.data[2] = 0.0f; - $$.data[3] = 1.0f; + $$.data[0].f = $2; + $$.data[1].f = 0.0f; + $$.data[2].f = 0.0f; + $$.data[3].f = 1.0f; } | '{' signedFloatConstant ',' signedFloatConstant '}' { $$.count = 4; - $$.data[0] = $2; - $$.data[1] = $4; - $$.data[2] = 0.0f; - $$.data[3] = 1.0f; + $$.data[0].f = $2; + $$.data[1].f = $4; + $$.data[2].f = 0.0f; + $$.data[3].f = 1.0f; } | '{' signedFloatConstant ',' signedFloatConstant ',' signedFloatConstant '}' { $$.count = 4; - $$.data[0] = $2; - $$.data[1] = $4; - $$.data[2] = $6; - $$.data[3] = 1.0f; + $$.data[0].f = $2; + $$.data[1].f = $4; + $$.data[2].f = $6; + $$.data[3].f = 1.0f; } | '{' signedFloatConstant ',' signedFloatConstant ',' signedFloatConstant ',' signedFloatConstant '}' { $$.count = 4; - $$.data[0] = $2; - $$.data[1] = $4; - $$.data[2] = $6; - $$.data[3] = $8; + $$.data[0].f = $2; + $$.data[1].f = $4; + $$.data[2].f = $6; + $$.data[3].f = $8; } ; diff --git a/src/mesa/program/program_parser.h b/src/mesa/program/program_parser.h index 8e5aaee95e5..5637598f3b3 100644 --- a/src/mesa/program/program_parser.h +++ b/src/mesa/program/program_parser.h @@ -23,6 +23,7 @@ #pragma once #include "main/config.h" +#include "program/prog_parameter.h" struct gl_context; @@ -96,7 +97,7 @@ struct asm_symbol { struct asm_vector { unsigned count; - float data[4]; + gl_constant_value data[4]; }; diff --git a/src/mesa/program/register_allocate.c b/src/mesa/program/register_allocate.c index de96eb42c9b..f5b5174fc18 100644 --- a/src/mesa/program/register_allocate.c +++ b/src/mesa/program/register_allocate.c @@ -200,6 +200,27 @@ ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2) } } +/** + * Adds a conflict between base_reg and reg, and also between reg and + * anything that base_reg conflicts with. + * + * This can simplify code for setting up multiple register classes + * which are aggregates of some base hardware registers, compared to + * explicitly using ra_add_reg_conflict. + */ +void +ra_add_transitive_reg_conflict(struct ra_regs *regs, + unsigned int base_reg, unsigned int reg) +{ + int i; + + ra_add_reg_conflict(regs, reg, base_reg); + + for (i = 0; i < regs->regs[base_reg].num_conflicts; i++) { + ra_add_reg_conflict(regs, reg, regs->regs[base_reg].conflict_list[i]); + } +} + unsigned int ra_alloc_reg_class(struct ra_regs *regs) { diff --git a/src/mesa/program/register_allocate.h b/src/mesa/program/register_allocate.h index 5b95833f394..ee2e58a4756 100644 --- a/src/mesa/program/register_allocate.h +++ b/src/mesa/program/register_allocate.h @@ -40,6 +40,8 @@ struct ra_regs *ra_alloc_reg_set(unsigned int count); unsigned int ra_alloc_reg_class(struct ra_regs *regs); void ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2); +void ra_add_transitive_reg_conflict(struct ra_regs *regs, + unsigned int base_reg, unsigned int reg); void ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int reg); void ra_set_finalize(struct ra_regs *regs); /** @} */ diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp index 1457d1199fa..e8d34c670a9 100644 --- a/src/mesa/program/sampler.cpp +++ b/src/mesa/program/sampler.cpp @@ -132,6 +132,6 @@ _mesa_get_sampler_uniform_value(class ir_dereference *sampler, index += getname.offset; - return prog->Parameters->ParameterValues[index][0]; + return prog->Parameters->ParameterValues[index][0].f; } } diff --git a/src/mesa/sources.mak b/src/mesa/sources.mak index 4b2ec08bbb0..5e77e0f5919 100644 --- a/src/mesa/sources.mak +++ b/src/mesa/sources.mak @@ -251,6 +251,7 @@ PROGRAM_SOURCES = \ program/prog_instruction.c \ program/prog_noise.c \ program/prog_optimize.c \ + program/prog_opt_constant_fold.c \ program/prog_parameter.c \ program/prog_parameter_layout.c \ program/prog_print.c \ @@ -336,7 +337,8 @@ MESA_GALLIUM_SOURCES = \ MESA_GALLIUM_CXX_SOURCES = \ $(MAIN_CXX_SOURCES) \ - $(SHADER_CXX_SOURCES) + $(SHADER_CXX_SOURCES) \ + state_tracker/st_glsl_to_tgsi.cpp # All the core C sources, for dependency checking ALL_SOURCES = \ diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c index 1f833d28212..12b5bc5ba79 100644 --- a/src/mesa/state_tracker/st_atom_pixeltransfer.c +++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c @@ -84,26 +84,6 @@ make_state_key(struct gl_context *ctx, struct state_key *key) } -static struct pipe_resource * -create_color_map_texture(struct gl_context *ctx) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - struct pipe_resource *pt; - enum pipe_format format; - const uint texSize = 256; /* simple, and usually perfect */ - - /* find an RGBA texture format */ - format = st_choose_format(pipe->screen, GL_RGBA, GL_NONE, GL_NONE, - PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); - - /* create texture for color map/table */ - pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0, - texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW); - return pt; -} - - /** * Update the pixelmap texture with the contents of the R/G/B/A pixel maps. */ @@ -219,7 +199,7 @@ get_pixel_transfer_program(struct gl_context *ctx, const struct state_key *key) /* create the colormap/texture now if not already done */ if (!st->pixel_xfer.pixelmap_texture) { - st->pixel_xfer.pixelmap_texture = create_color_map_texture(ctx); + st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx); st->pixel_xfer.pixelmap_sampler_view = st_create_texture_sampler_view(st->pipe, st->pixel_xfer.pixelmap_texture); diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c index 800a9f1f0e0..3115a2511ce 100644 --- a/src/mesa/state_tracker/st_atom_texture.c +++ b/src/mesa/state_tracker/st_atom_texture.c @@ -221,9 +221,9 @@ update_single_texture(struct st_context *st, struct pipe_sampler_view **sampler_ if ((samp->sRGBDecode == GL_SKIP_DECODE_EXT) && (_mesa_get_format_color_encoding(texFormat) == GL_SRGB)) { - /* don't do sRGB->RGB conversion. Interpret the texture - * texture data as linear values. - */ + /* Don't do sRGB->RGB conversion. Interpret the texture data as + * linear values. + */ const gl_format linearFormat = _mesa_get_srgb_format_linear(texFormat); firstImageFormat = st_mesa_format_to_pipe_format(linearFormat); diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index 49b196032b9..beb5e7cab31 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -172,6 +172,23 @@ make_bitmap_fragment_program(struct gl_context *ctx, GLuint samplerIndex) } +static struct gl_program * +make_bitmap_fragment_program_glsl(struct st_context *st, + struct st_fragment_program *orig, + GLuint samplerIndex) +{ + struct gl_context *ctx = st->ctx; + struct st_fragment_program *fp = (struct st_fragment_program *) + ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); + + if (!fp) + return NULL; + + get_bitmap_visitor(fp, orig->glsl_to_tgsi, samplerIndex); + return &fp->Base.Base; +} + + static int find_free_bit(uint bitfield) { @@ -199,6 +216,7 @@ st_make_bitmap_fragment_program(struct st_context *st, GLuint *bitmap_sampler) { struct st_fragment_program *bitmap_prog; + struct st_fragment_program *stfpIn = (struct st_fragment_program *) fpIn; struct gl_program *newProg; uint sampler; @@ -207,13 +225,18 @@ st_make_bitmap_fragment_program(struct st_context *st, * with the bitmap sampler/kill instructions. */ sampler = find_free_bit(fpIn->Base.SamplersUsed); - bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler); + + if (stfpIn->glsl_to_tgsi) + newProg = make_bitmap_fragment_program_glsl(st, stfpIn, sampler); + else { + bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler); - newProg = _mesa_combine_programs(st->ctx, - &bitmap_prog->Base.Base, - &fpIn->Base); - /* done with this after combining */ - st_reference_fragprog(st, &bitmap_prog, NULL); + newProg = _mesa_combine_programs(st->ctx, + &bitmap_prog->Base.Base, + &fpIn->Base); + /* done with this after combining */ + st_reference_fragprog(st, &bitmap_prog, NULL); + } #if 0 { @@ -328,8 +351,8 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized, if(!normalized) { - sRight = width; - tBot = height; + sRight = (GLfloat) width; + tBot = (GLfloat) height; } /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as @@ -381,7 +404,7 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized, /* same for all verts: */ for (i = 0; i < 4; i++) { st->bitmap.vertices[i][0][2] = z; - st->bitmap.vertices[i][0][3] = 1.0; + st->bitmap.vertices[i][0][3] = 1.0f; st->bitmap.vertices[i][1][0] = color[0]; st->bitmap.vertices[i][1][1] = color[1]; st->bitmap.vertices[i][1][2] = color[2]; @@ -513,7 +536,7 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, cso_set_vertex_elements(cso, 3, st->velems_util_draw); /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */ - z = z * 2.0 - 1.0; + z = z * 2.0f - 1.0f; /* draw textured quad */ offset = setup_bitmap_vertex_data(st, diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c index 416be194d11..750f541b5dd 100644 --- a/src/mesa/state_tracker/st_cb_blit.c +++ b/src/mesa/state_tracker/st_cb_blit.c @@ -62,6 +62,84 @@ st_destroy_blit(struct st_context *st) #if FEATURE_EXT_framebuffer_blit static void +st_BlitFramebuffer_resolve(struct gl_context *ctx, + GLbitfield mask, + struct pipe_resolve_info *info) +{ + const GLbitfield depthStencil = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + + struct st_context *st = st_context(ctx); + + struct st_renderbuffer *srcRb, *dstRb; + + if (mask & GL_COLOR_BUFFER_BIT) { + srcRb = st_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer); + dstRb = st_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]); + + info->mask = PIPE_MASK_RGBA; + + info->src.res = srcRb->texture; + info->src.layer = srcRb->surface->u.tex.first_layer; + info->dst.res = dstRb->texture; + info->dst.level = dstRb->surface->u.tex.level; + info->dst.layer = dstRb->surface->u.tex.first_layer; + + st->pipe->resource_resolve(st->pipe, info); + } + + if (mask & depthStencil) { + struct gl_renderbuffer_attachment *srcDepth, *srcStencil; + struct gl_renderbuffer_attachment *dstDepth, *dstStencil; + boolean combined; + + srcDepth = &ctx->ReadBuffer->Attachment[BUFFER_DEPTH]; + dstDepth = &ctx->DrawBuffer->Attachment[BUFFER_DEPTH]; + srcStencil = &ctx->ReadBuffer->Attachment[BUFFER_STENCIL]; + dstStencil = &ctx->DrawBuffer->Attachment[BUFFER_STENCIL]; + + combined = + st_is_depth_stencil_combined(srcDepth, srcStencil) && + st_is_depth_stencil_combined(dstDepth, dstStencil); + + if ((mask & GL_DEPTH_BUFFER_BIT) || combined) { + /* resolve depth and, if combined and requested, stencil as well */ + srcRb = st_renderbuffer(srcDepth->Renderbuffer); + dstRb = st_renderbuffer(dstDepth->Renderbuffer); + + info->mask = (mask & GL_DEPTH_BUFFER_BIT) ? PIPE_MASK_Z : 0; + if (combined && (mask & GL_STENCIL_BUFFER_BIT)) { + mask &= ~GL_STENCIL_BUFFER_BIT; + info->mask |= PIPE_MASK_S; + } + + info->src.res = srcRb->texture; + info->src.layer = srcRb->surface->u.tex.first_layer; + info->dst.res = dstRb->texture; + info->dst.level = dstRb->surface->u.tex.level; + info->dst.layer = dstRb->surface->u.tex.first_layer; + + st->pipe->resource_resolve(st->pipe, info); + } + + if (mask & GL_STENCIL_BUFFER_BIT) { + /* resolve separate stencil buffer */ + srcRb = st_renderbuffer(srcStencil->Renderbuffer); + dstRb = st_renderbuffer(dstStencil->Renderbuffer); + + info->mask = PIPE_MASK_S; + + info->src.res = srcRb->texture; + info->src.layer = srcRb->surface->u.tex.first_layer; + info->dst.res = dstRb->texture; + info->dst.level = dstRb->surface->u.tex.level; + info->dst.layer = dstRb->surface->u.tex.first_layer; + + st->pipe->resource_resolve(st->pipe, info); + } + } +} + +static void st_BlitFramebuffer(struct gl_context *ctx, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, @@ -95,6 +173,42 @@ st_BlitFramebuffer(struct gl_context *ctx, srcY1 = readFB->Height - srcY1; } + /* Disable conditional rendering. */ + if (st->render_condition) { + st->pipe->render_condition(st->pipe, NULL, 0); + } + + if (readFB->Visual.sampleBuffers > drawFB->Visual.sampleBuffers) { + struct pipe_resolve_info info; + + if (dstX0 < dstX1) { + info.dst.x0 = dstX0; + info.dst.x1 = dstX1; + info.src.x0 = srcX0; + info.src.x1 = srcX1; + } else { + info.dst.x0 = dstX1; + info.dst.x1 = dstX0; + info.src.x0 = srcX1; + info.src.x1 = srcX0; + } + if (dstY0 < dstY1) { + info.dst.y0 = dstY0; + info.dst.y1 = dstY1; + info.src.y0 = srcY0; + info.src.y1 = srcY1; + } else { + info.dst.y0 = dstY1; + info.dst.y1 = dstY0; + info.src.y0 = srcY1; + info.src.y1 = srcY0; + } + + st_BlitFramebuffer_resolve(ctx, mask, &info); /* filter doesn't apply */ + + goto done; + } + if (srcY0 > srcY1 && dstY0 > dstY1) { /* Both src and dst are upside down. Swap Y to make it * right-side up to increase odds of using a fast path. @@ -109,11 +223,6 @@ st_BlitFramebuffer(struct gl_context *ctx, dstY1 = tmp; } - /* Disable conditional rendering. */ - if (st->render_condition) { - st->pipe->render_condition(st->pipe, NULL, 0); - } - if (mask & GL_COLOR_BUFFER_BIT) { struct gl_renderbuffer_attachment *srcAtt = &readFB->Attachment[readFB->_ColorReadBufferIndex]; diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c index 7374bb0acc5..a451b44049e 100644 --- a/src/mesa/state_tracker/st_cb_bufferobjects.c +++ b/src/mesa/state_tracker/st_cb_bufferobjects.c @@ -93,7 +93,6 @@ st_bufferobj_free(struct gl_context *ctx, struct gl_buffer_object *obj) */ static void st_bufferobj_subdata(struct gl_context *ctx, - GLenum target, GLintptrARB offset, GLsizeiptrARB size, const GLvoid * data, struct gl_buffer_object *obj) @@ -133,7 +132,6 @@ st_bufferobj_subdata(struct gl_context *ctx, */ static void st_bufferobj_get_subdata(struct gl_context *ctx, - GLenum target, GLintptrARB offset, GLsizeiptrARB size, GLvoid * data, struct gl_buffer_object *obj) @@ -238,52 +236,10 @@ static long st_bufferobj_zero_length = 0; /** - * Called via glMapBufferARB(). - */ -static void * -st_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access, - struct gl_buffer_object *obj) -{ - struct st_buffer_object *st_obj = st_buffer_object(obj); - uint flags; - - switch (access) { - case GL_WRITE_ONLY: - flags = PIPE_TRANSFER_WRITE; - break; - case GL_READ_ONLY: - flags = PIPE_TRANSFER_READ; - break; - case GL_READ_WRITE: - default: - flags = PIPE_TRANSFER_READ_WRITE; - break; - } - - /* Handle zero-size buffers here rather than in drivers */ - if (obj->Size == 0) { - obj->Pointer = &st_bufferobj_zero_length; - } - else { - obj->Pointer = pipe_buffer_map(st_context(ctx)->pipe, - st_obj->buffer, - flags, - &st_obj->transfer); - } - - if (obj->Pointer) { - obj->Offset = 0; - obj->Length = obj->Size; - } - return obj->Pointer; -} - - -/** * Called via glMapBufferRange(). */ static void * -st_bufferobj_map_range(struct gl_context *ctx, GLenum target, +st_bufferobj_map_range(struct gl_context *ctx, GLintptr offset, GLsizeiptr length, GLbitfield access, struct gl_buffer_object *obj) { @@ -353,7 +309,7 @@ st_bufferobj_map_range(struct gl_context *ctx, GLenum target, static void -st_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target, +st_bufferobj_flush_mapped_range(struct gl_context *ctx, GLintptr offset, GLsizeiptr length, struct gl_buffer_object *obj) { @@ -378,7 +334,7 @@ st_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target, * Called via glUnmapBufferARB(). */ static GLboolean -st_bufferobj_unmap(struct gl_context *ctx, GLenum target, struct gl_buffer_object *obj) +st_bufferobj_unmap(struct gl_context *ctx, struct gl_buffer_object *obj) { struct pipe_context *pipe = st_context(ctx)->pipe; struct st_buffer_object *st_obj = st_buffer_object(obj); @@ -444,7 +400,6 @@ st_init_bufferobject_functions(struct dd_function_table *functions) functions->BufferData = st_bufferobj_data; functions->BufferSubData = st_bufferobj_subdata; functions->GetBufferSubData = st_bufferobj_get_subdata; - functions->MapBuffer = st_bufferobj_map; functions->MapBufferRange = st_bufferobj_map_range; functions->FlushMappedBufferRange = st_bufferobj_flush_mapped_range; functions->UnmapBuffer = st_bufferobj_unmap; diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 1d908c0317a..390c518699f 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -94,6 +94,46 @@ is_passthrough_program(const struct gl_fragment_program *prog) } +/** + * Returns a fragment program which implements the current pixel transfer ops. + */ +static struct gl_fragment_program * +get_glsl_pixel_transfer_program(struct st_context *st, + struct st_fragment_program *orig) +{ + int pixelMaps = 0, scaleAndBias = 0; + struct gl_context *ctx = st->ctx; + struct st_fragment_program *fp = (struct st_fragment_program *) + ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); + + if (!fp) + return NULL; + + if (ctx->Pixel.RedBias != 0.0 || ctx->Pixel.RedScale != 1.0 || + ctx->Pixel.GreenBias != 0.0 || ctx->Pixel.GreenScale != 1.0 || + ctx->Pixel.BlueBias != 0.0 || ctx->Pixel.BlueScale != 1.0 || + ctx->Pixel.AlphaBias != 0.0 || ctx->Pixel.AlphaScale != 1.0) { + scaleAndBias = 1; + } + + pixelMaps = ctx->Pixel.MapColorFlag; + + if (pixelMaps) { + /* create the colormap/texture now if not already done */ + if (!st->pixel_xfer.pixelmap_texture) { + st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx); + st->pixel_xfer.pixelmap_sampler_view = + st_create_texture_sampler_view(st->pipe, + st->pixel_xfer.pixelmap_texture); + } + } + + get_pixel_transfer_visitor(fp, orig->glsl_to_tgsi, + scaleAndBias, pixelMaps); + + return &fp->Base; +} + /** * Make fragment shader for glDraw/CopyPixels. This shader is made @@ -107,11 +147,15 @@ st_make_drawpix_fragment_program(struct st_context *st, struct gl_fragment_program **fpOut) { struct gl_program *newProg; + struct st_fragment_program *stfp = (struct st_fragment_program *) fpIn; if (is_passthrough_program(fpIn)) { newProg = (struct gl_program *) _mesa_clone_fragment_program(st->ctx, &st->pixel_xfer.program->Base); } + else if (stfp->glsl_to_tgsi != NULL) { + newProg = (struct gl_program *) get_glsl_pixel_transfer_program(st, stfp); + } else { #if 0 /* debug */ diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index 32694975d17..2abb4d8f082 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -44,6 +44,7 @@ #include "st_program.h" #include "st_mesa_to_tgsi.h" #include "st_cb_program.h" +#include "st_glsl_to_tgsi.h" @@ -129,6 +130,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog) { struct st_vertex_program *stvp = (struct st_vertex_program *) prog; st_release_vp_variants( st, stvp ); + + if (stvp->glsl_to_tgsi) + free_glsl_to_tgsi_visitor(stvp->glsl_to_tgsi); } break; case MESA_GEOMETRY_PROGRAM: @@ -137,6 +141,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog) (struct st_geometry_program *) prog; st_release_gp_variants(st, stgp); + + if (stgp->glsl_to_tgsi) + free_glsl_to_tgsi_visitor(stgp->glsl_to_tgsi); if (stgp->tgsi.tokens) { st_free_tokens((void *) stgp->tgsi.tokens); @@ -151,6 +158,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog) st_release_fp_variants(st, stfp); + if (stfp->glsl_to_tgsi) + free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi); + if (stfp->tgsi.tokens) { st_free_tokens(stfp->tgsi.tokens); stfp->tgsi.tokens = NULL; @@ -242,4 +252,8 @@ st_init_program_functions(struct dd_function_table *functions) functions->DeleteProgram = st_delete_program; functions->IsProgramNative = st_is_program_native; functions->ProgramStringNotify = st_program_string_notify; + + functions->NewShader = st_new_shader; + functions->NewShaderProgram = st_new_shader_program; + functions->LinkShader = st_link_shader; } diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 6907cfc03cf..a3b2ba9e78d 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -344,7 +344,7 @@ guess_and_alloc_texture(struct st_context *st, stImage->base.Width2, stImage->base.Height2, stImage->base.Depth2, - stImage->level, + stImage->base.Level, &width, &height, &depth)) { /* we can't determine the image size at level=0 */ stObj->width0 = stObj->height0 = stObj->depth0 = 0; @@ -367,7 +367,7 @@ guess_and_alloc_texture(struct st_context *st, stImage->base._BaseFormat == GL_DEPTH_COMPONENT || stImage->base._BaseFormat == GL_DEPTH_STENCIL_EXT) && !stObj->base.GenerateMipmap && - stImage->level == 0) { + stImage->base.Level == 0) { /* only alloc space for a single mipmap level */ lastLevel = 0; } @@ -506,8 +506,8 @@ st_TexImage(struct gl_context * ctx, assert(texImage->Depth == depth); } - stImage->face = _mesa_tex_target_to_face(target); - stImage->level = level; + stImage->base.Face = _mesa_tex_target_to_face(target); + stImage->base.Level = level; _mesa_set_fetch_functions(texImage, dims); @@ -529,7 +529,7 @@ st_TexImage(struct gl_context * ctx, if (stObj->pt) { if (level > (GLint) stObj->pt->last_level || !st_texture_match_image(stObj->pt, &stImage->base, - stImage->face, stImage->level)) { + stImage->base.Face, stImage->base.Level)) { DBG("release it\n"); pipe_resource_reference(&stObj->pt, NULL); assert(!stObj->pt); @@ -563,7 +563,7 @@ st_TexImage(struct gl_context * ctx, */ if (stObj->pt && st_texture_match_image(stObj->pt, &stImage->base, - stImage->face, stImage->level)) { + stImage->base.Face, stImage->base.Level)) { pipe_resource_reference(&stImage->pt, stObj->pt); assert(stImage->pt); @@ -1466,34 +1466,6 @@ st_copy_texsubimage(struct gl_context *ctx, depth/stencil samples per pixel? Need some transfer clarifications. */ assert(sample_count < 2); - if (srcX < 0) { - width -= -srcX; - destX += -srcX; - srcX = 0; - } - - if (srcY < 0) { - height -= -srcY; - destY += -srcY; - srcY = 0; - } - - if (destX < 0) { - width -= -destX; - srcX += -destX; - destX = 0; - } - - if (destY < 0) { - height -= -destY; - srcY += -destY; - destY = 0; - } - - if (width < 0 || height < 0) - return; - - assert(strb); assert(strb->surface); assert(stImage->pt); @@ -1529,8 +1501,8 @@ st_copy_texsubimage(struct gl_context *ctx, pipe->resource_copy_region(pipe, /* dest */ stImage->pt, - stImage->level, - destX, destY, destZ + stImage->face, + stImage->base.Level, + destX, destY, destZ + stImage->base.Face, /* src */ strb->texture, strb->surface->u.tex.level, @@ -1552,9 +1524,9 @@ st_copy_texsubimage(struct gl_context *ctx, memset(&surf_tmpl, 0, sizeof(surf_tmpl)); surf_tmpl.format = util_format_linear(stImage->pt->format); surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; - surf_tmpl.u.tex.level = stImage->level; - surf_tmpl.u.tex.first_layer = stImage->face + destZ; - surf_tmpl.u.tex.last_layer = stImage->face + destZ; + surf_tmpl.u.tex.level = stImage->base.Level; + surf_tmpl.u.tex.first_layer = stImage->base.Face + destZ; + surf_tmpl.u.tex.last_layer = stImage->base.Face + destZ; dest_surface = pipe->create_surface(pipe, stImage->pt, &surf_tmpl); @@ -1610,59 +1582,6 @@ st_copy_texsubimage(struct gl_context *ctx, static void -st_CopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLint border) -{ - struct gl_texture_unit *texUnit = - &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - - /* Setup or redefine the texture object, texture and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage1D(ctx, target, level, internalFormat, - width, border, - GL_RGBA, CHAN_TYPE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - st_copy_texsubimage(ctx, target, level, - 0, 0, 0, /* destX,Y,Z */ - x, y, width, 1); /* src X, Y, size */ -} - - -static void -st_CopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border) -{ - struct gl_texture_unit *texUnit = - &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - - /* Setup or redefine the texture object, texture and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, - GL_RGBA, CHAN_TYPE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - st_copy_texsubimage(ctx, target, level, - 0, 0, 0, /* destX,Y,Z */ - x, y, width, height); /* src X, Y, size */ -} - - -static void st_CopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width) { @@ -1710,7 +1629,7 @@ copy_image_data_to_texture(struct st_context *st, /* debug checks */ { const struct gl_texture_image *dstImage = - stObj->base.Image[stImage->face][dstLevel]; + stObj->base.Image[stImage->base.Face][dstLevel]; assert(dstImage); assert(dstImage->Width == stImage->base.Width); assert(dstImage->Height == stImage->base.Height); @@ -1722,15 +1641,15 @@ copy_image_data_to_texture(struct st_context *st, */ st_texture_image_copy(st->pipe, stObj->pt, dstLevel, /* dest texture, level */ - stImage->pt, stImage->level, /* src texture, level */ - stImage->face); + stImage->pt, stImage->base.Level, /* src texture, level */ + stImage->base.Face); pipe_resource_reference(&stImage->pt, NULL); } else if (stImage->base.Data) { st_texture_image_data(st, stObj->pt, - stImage->face, + stImage->base.Face, dstLevel, stImage->base.Data, stImage->base.RowStride * @@ -1947,8 +1866,6 @@ st_init_texture_functions(struct dd_function_table *functions) functions->CompressedTexSubImage1D = st_CompressedTexSubImage1D; functions->CompressedTexSubImage2D = st_CompressedTexSubImage2D; functions->CompressedTexSubImage3D = st_CompressedTexSubImage3D; - functions->CopyTexImage1D = st_CopyTexImage1D; - functions->CopyTexImage2D = st_CopyTexImage2D; functions->CopyTexSubImage1D = st_CopyTexSubImage1D; functions->CopyTexSubImage2D = st_CopyTexSubImage2D; functions->CopyTexSubImage3D = st_CopyTexSubImage3D; diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 99b231d9706..8e900934054 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -208,6 +208,15 @@ void st_init_limits(struct st_context *st) } +static GLboolean st_get_s3tc_override(void) +{ + const char *override = _mesa_getenv("force_s3tc_enable"); + if (override && !strcmp(override, "true")) + return GL_TRUE; + return GL_FALSE; +} + + /** * Use pipe_screen::get_param() to query PIPE_CAP_ values to determine * which GL extensions are supported. @@ -219,6 +228,7 @@ void st_init_extensions(struct st_context *st) { struct pipe_screen *screen = st->pipe->screen; struct gl_context *ctx = st->ctx; + int i; /* * Extensions that are supported by all Gallium drivers: @@ -426,7 +436,7 @@ void st_init_extensions(struct st_context *st) if (screen->is_format_supported(screen, PIPE_FORMAT_DXT5_RGBA, PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW) && - ctx->Mesa_DXTn) { + (ctx->Mesa_DXTn || st_get_s3tc_override())) { ctx->Extensions.EXT_texture_compression_s3tc = GL_TRUE; ctx->Extensions.S3_s3tc = GL_TRUE; } @@ -596,6 +606,16 @@ void st_init_extensions(struct st_context *st) ctx->Extensions.EXT_packed_float = GL_TRUE; } + /* Maximum sample count. */ + for (i = 16; i > 0; --i) { + if (screen->is_format_supported(screen, PIPE_FORMAT_B8G8R8A8_UNORM, + PIPE_TEXTURE_2D, i, + PIPE_BIND_RENDER_TARGET)) { + ctx->Const.MaxSamples = i; + break; + } + } + if (screen->get_param(screen, PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE)) { ctx->Extensions.ARB_seamless_cube_map = GL_TRUE; ctx->Extensions.AMD_seamless_cubemap_per_texture = GL_TRUE; diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index b0911294a7c..82ca4af7fe4 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -453,7 +453,6 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target, srcImage->TexFormat); stImage = st_texture_image(dstImage); - stImage->level = dstLevel; pipe_resource_reference(&stImage->pt, pt); } diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp new file mode 100644 index 00000000000..9cac30995af --- /dev/null +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -0,0 +1,5142 @@ +/* + * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. + * Copyright (C) 2008 VMware, Inc. All Rights Reserved. + * Copyright © 2010 Intel Corporation + * Copyright © 2011 Bryan Cain + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file glsl_to_tgsi.cpp + * + * Translate GLSL IR to TGSI. + */ + +#include <stdio.h> +#include "main/compiler.h" +#include "ir.h" +#include "ir_visitor.h" +#include "ir_print_visitor.h" +#include "ir_expression_flattening.h" +#include "glsl_types.h" +#include "glsl_parser_extras.h" +#include "../glsl/program.h" +#include "ir_optimization.h" +#include "ast.h" + +extern "C" { +#include "main/mtypes.h" +#include "main/shaderapi.h" +#include "main/shaderobj.h" +#include "main/uniforms.h" +#include "program/hash_table.h" +#include "program/prog_instruction.h" +#include "program/prog_optimize.h" +#include "program/prog_print.h" +#include "program/program.h" +#include "program/prog_uniform.h" +#include "program/prog_parameter.h" +#include "program/sampler.h" + +#include "pipe/p_compiler.h" +#include "pipe/p_context.h" +#include "pipe/p_screen.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_state.h" +#include "util/u_math.h" +#include "tgsi/tgsi_ureg.h" +#include "tgsi/tgsi_info.h" +#include "st_context.h" +#include "st_program.h" +#include "st_glsl_to_tgsi.h" +#include "st_mesa_to_tgsi.h" +} + +#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX +#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ + (1 << PROGRAM_ENV_PARAM) | \ + (1 << PROGRAM_STATE_VAR) | \ + (1 << PROGRAM_NAMED_PARAM) | \ + (1 << PROGRAM_CONSTANT) | \ + (1 << PROGRAM_UNIFORM)) + +#define MAX_TEMPS 4096 + +class st_src_reg; +class st_dst_reg; + +static int swizzle_for_size(int size); + +/** + * This struct is a corresponding struct to TGSI ureg_src. + */ +class st_src_reg { +public: + st_src_reg(gl_register_file file, int index, const glsl_type *type) + { + this->file = file; + this->index = index; + if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) + this->swizzle = swizzle_for_size(type->vector_elements); + else + this->swizzle = SWIZZLE_XYZW; + this->negate = 0; + this->type = type ? type->base_type : GLSL_TYPE_ERROR; + this->reladdr = NULL; + } + + st_src_reg(gl_register_file file, int index, int type) + { + this->type = type; + this->file = file; + this->index = index; + this->swizzle = SWIZZLE_XYZW; + this->negate = 0; + this->reladdr = NULL; + } + + st_src_reg() + { + this->type = GLSL_TYPE_ERROR; + this->file = PROGRAM_UNDEFINED; + this->index = 0; + this->swizzle = 0; + this->negate = 0; + this->reladdr = NULL; + } + + explicit st_src_reg(st_dst_reg reg); + + gl_register_file file; /**< PROGRAM_* from Mesa */ + int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ + GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ + int negate; /**< NEGATE_XYZW mask from mesa */ + int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ + /** Register index should be offset by the integer in this reg. */ + st_src_reg *reladdr; +}; + +class st_dst_reg { +public: + st_dst_reg(gl_register_file file, int writemask, int type) + { + this->file = file; + this->index = 0; + this->writemask = writemask; + this->cond_mask = COND_TR; + this->reladdr = NULL; + this->type = type; + } + + st_dst_reg() + { + this->type = GLSL_TYPE_ERROR; + this->file = PROGRAM_UNDEFINED; + this->index = 0; + this->writemask = 0; + this->cond_mask = COND_TR; + this->reladdr = NULL; + } + + explicit st_dst_reg(st_src_reg reg); + + gl_register_file file; /**< PROGRAM_* from Mesa */ + int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ + int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ + GLuint cond_mask:4; + int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ + /** Register index should be offset by the integer in this reg. */ + st_src_reg *reladdr; +}; + +st_src_reg::st_src_reg(st_dst_reg reg) +{ + this->type = reg.type; + this->file = reg.file; + this->index = reg.index; + this->swizzle = SWIZZLE_XYZW; + this->negate = 0; + this->reladdr = reg.reladdr; +} + +st_dst_reg::st_dst_reg(st_src_reg reg) +{ + this->type = reg.type; + this->file = reg.file; + this->index = reg.index; + this->writemask = WRITEMASK_XYZW; + this->cond_mask = COND_TR; + this->reladdr = reg.reladdr; +} + +class glsl_to_tgsi_instruction : public exec_node { +public: + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = rzalloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + unsigned op; + st_dst_reg dst; + st_src_reg src[3]; + /** Pointer to the ir source this tree came from for debugging */ + ir_instruction *ir; + GLboolean cond_update; + bool saturate; + int sampler; /**< sampler index */ + int tex_target; /**< One of TEXTURE_*_INDEX */ + GLboolean tex_shadow; + int dead_mask; /**< Used in dead code elimination */ + + class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ +}; + +class variable_storage : public exec_node { +public: + variable_storage(ir_variable *var, gl_register_file file, int index) + : file(file), index(index), var(var) + { + /* empty */ + } + + gl_register_file file; + int index; + ir_variable *var; /* variable that maps to this, if any */ +}; + +class immediate_storage : public exec_node { +public: + immediate_storage(gl_constant_value *values, int size, int type) + { + memcpy(this->values, values, size * sizeof(gl_constant_value)); + this->size = size; + this->type = type; + } + + gl_constant_value values[4]; + int size; /**< Number of components (1-4) */ + int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ +}; + +class function_entry : public exec_node { +public: + ir_function_signature *sig; + + /** + * identifier of this function signature used by the program. + * + * At the point that TGSI instructions for function calls are + * generated, we don't know the address of the first instruction of + * the function body. So we make the BranchTarget that is called a + * small integer and rewrite them during set_branchtargets(). + */ + int sig_id; + + /** + * Pointer to first instruction of the function body. + * + * Set during function body emits after main() is processed. + */ + glsl_to_tgsi_instruction *bgn_inst; + + /** + * Index of the first instruction of the function body in actual TGSI. + * + * Set after conversion from glsl_to_tgsi_instruction to TGSI. + */ + int inst; + + /** Storage for the return value. */ + st_src_reg return_reg; +}; + +class glsl_to_tgsi_visitor : public ir_visitor { +public: + glsl_to_tgsi_visitor(); + ~glsl_to_tgsi_visitor(); + + function_entry *current_function; + + struct gl_context *ctx; + struct gl_program *prog; + struct gl_shader_program *shader_program; + struct gl_shader_compiler_options *options; + + int next_temp; + + int num_address_regs; + int samplers_used; + bool indirect_addr_temps; + bool indirect_addr_consts; + + int glsl_version; + bool native_integers; + + variable_storage *find_variable_storage(ir_variable *var); + + int add_constant(gl_register_file file, gl_constant_value values[4], + int size, int datatype, GLuint *swizzle_out); + + function_entry *get_function_signature(ir_function_signature *sig); + + st_src_reg get_temp(const glsl_type *type); + void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); + + st_src_reg st_src_reg_for_float(float val); + st_src_reg st_src_reg_for_int(int val); + st_src_reg st_src_reg_for_type(int type, int val); + + /** + * \name Visit methods + * + * As typical for the visitor pattern, there must be one \c visit method for + * each concrete subclass of \c ir_instruction. Virtual base classes within + * the hierarchy should not have \c visit methods. + */ + /*@{*/ + virtual void visit(ir_variable *); + virtual void visit(ir_loop *); + virtual void visit(ir_loop_jump *); + virtual void visit(ir_function_signature *); + virtual void visit(ir_function *); + virtual void visit(ir_expression *); + virtual void visit(ir_swizzle *); + virtual void visit(ir_dereference_variable *); + virtual void visit(ir_dereference_array *); + virtual void visit(ir_dereference_record *); + virtual void visit(ir_assignment *); + virtual void visit(ir_constant *); + virtual void visit(ir_call *); + virtual void visit(ir_return *); + virtual void visit(ir_discard *); + virtual void visit(ir_texture *); + virtual void visit(ir_if *); + /*@}*/ + + st_src_reg result; + + /** List of variable_storage */ + exec_list variables; + + /** List of immediate_storage */ + exec_list immediates; + int num_immediates; + + /** List of function_entry */ + exec_list function_signatures; + int next_signature_id; + + /** List of glsl_to_tgsi_instruction */ + exec_list instructions; + + glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op); + + glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_src_reg src0); + + glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_src_reg src0, st_src_reg src1); + + glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1, st_src_reg src2); + + unsigned get_opcode(ir_instruction *ir, unsigned op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1); + + /** + * Emit the correct dot-product instruction for the type of arguments + */ + glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir, + st_dst_reg dst, + st_src_reg src0, + st_src_reg src1, + unsigned elements); + + void emit_scalar(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_src_reg src0); + + void emit_scalar(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_src_reg src0, st_src_reg src1); + + void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); + + void emit_scs(ir_instruction *ir, unsigned op, + st_dst_reg dst, const st_src_reg &src); + + bool try_emit_mad(ir_expression *ir, + int mul_operand); + bool try_emit_mad_for_and_not(ir_expression *ir, + int mul_operand); + bool try_emit_sat(ir_expression *ir); + + void emit_swz(ir_expression *ir); + + bool process_move_condition(ir_rvalue *ir); + + void remove_output_reads(gl_register_file type); + void simplify_cmp(void); + + void rename_temp_register(int index, int new_index); + int get_first_temp_read(int index); + int get_first_temp_write(int index); + int get_last_temp_read(int index); + int get_last_temp_write(int index); + + void copy_propagate(void); + void eliminate_dead_code(void); + int eliminate_dead_code_advanced(void); + void merge_registers(void); + void renumber_registers(void); + + void *mem_ctx; +}; + +static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); + +static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); + +static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT); + +static void +fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); + +static void +fail_link(struct gl_shader_program *prog, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + ralloc_vasprintf_append(&prog->InfoLog, fmt, args); + va_end(args); + + prog->LinkStatus = GL_FALSE; +} + +static int +swizzle_for_size(int size) +{ + int size_swizzles[4] = { + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), + }; + + assert((size >= 1) && (size <= 4)); + return size_swizzles[size - 1]; +} + +static bool +is_tex_instruction(unsigned opcode) +{ + const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); + return info->is_tex; +} + +static unsigned +num_inst_dst_regs(unsigned opcode) +{ + const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); + return info->num_dst; +} + +static unsigned +num_inst_src_regs(unsigned opcode) +{ + const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); + return info->is_tex ? info->num_src - 1 : info->num_src; +} + +glsl_to_tgsi_instruction * +glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1, st_src_reg src2) +{ + glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); + int num_reladdr = 0, i; + + op = get_opcode(ir, op, dst, src0, src1); + + /* If we have to do relative addressing, we want to load the ARL + * reg directly for one of the regs, and preload the other reladdr + * sources into temps. + */ + num_reladdr += dst.reladdr != NULL; + num_reladdr += src0.reladdr != NULL; + num_reladdr += src1.reladdr != NULL; + num_reladdr += src2.reladdr != NULL; + + reladdr_to_temp(ir, &src2, &num_reladdr); + reladdr_to_temp(ir, &src1, &num_reladdr); + reladdr_to_temp(ir, &src0, &num_reladdr); + + if (dst.reladdr) { + emit_arl(ir, address_reg, *dst.reladdr); + num_reladdr--; + } + assert(num_reladdr == 0); + + inst->op = op; + inst->dst = dst; + inst->src[0] = src0; + inst->src[1] = src1; + inst->src[2] = src2; + inst->ir = ir; + inst->dead_mask = 0; + + inst->function = NULL; + + if (op == TGSI_OPCODE_ARL) + this->num_address_regs = 1; + + /* Update indirect addressing status used by TGSI */ + if (dst.reladdr) { + switch(dst.file) { + case PROGRAM_TEMPORARY: + this->indirect_addr_temps = true; + break; + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + this->indirect_addr_consts = true; + break; + case PROGRAM_IMMEDIATE: + assert(!"immediates should not have indirect addressing"); + break; + default: + break; + } + } + else { + for (i=0; i<3; i++) { + if(inst->src[i].reladdr) { + switch(inst->src[i].file) { + case PROGRAM_TEMPORARY: + this->indirect_addr_temps = true; + break; + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + this->indirect_addr_consts = true; + break; + case PROGRAM_IMMEDIATE: + assert(!"immediates should not have indirect addressing"); + break; + default: + break; + } + } + } + } + + this->instructions.push_tail(inst); + + return inst; +} + + +glsl_to_tgsi_instruction * +glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_src_reg src0, st_src_reg src1) +{ + return emit(ir, op, dst, src0, src1, undef_src); +} + +glsl_to_tgsi_instruction * +glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_src_reg src0) +{ + assert(dst.writemask != 0); + return emit(ir, op, dst, src0, undef_src, undef_src); +} + +glsl_to_tgsi_instruction * +glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op) +{ + return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); +} + +/** + * Determines whether to use an integer, unsigned integer, or float opcode + * based on the operands and input opcode, then emits the result. + * + * TODO: type checking for remaining TGSI opcodes + */ +unsigned +glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1) +{ + int type = GLSL_TYPE_FLOAT; + + if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) + type = GLSL_TYPE_FLOAT; + else if (native_integers) + type = src0.type; + +#define case4(c, f, i, u) \ + case TGSI_OPCODE_##c: \ + if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \ + else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \ + else op = TGSI_OPCODE_##f; \ + break; +#define case3(f, i, u) case4(f, f, i, u) +#define case2fi(f, i) case4(f, f, i, i) +#define case2iu(i, u) case4(i, LAST, i, u) + + switch(op) { + case2fi(ADD, UADD); + case2fi(MUL, UMUL); + case2fi(MAD, UMAD); + case3(DIV, IDIV, UDIV); + case3(MAX, IMAX, UMAX); + case3(MIN, IMIN, UMIN); + case2iu(MOD, UMOD); + + case2fi(SEQ, USEQ); + case2fi(SNE, USNE); + case3(SGE, ISGE, USGE); + case3(SLT, ISLT, USLT); + + case2iu(SHL, SHL); + case2iu(ISHR, USHR); + case2iu(NOT, NOT); + case2iu(AND, AND); + case2iu(OR, OR); + case2iu(XOR, XOR); + + default: break; + } + + assert(op != TGSI_OPCODE_LAST); + return op; +} + +glsl_to_tgsi_instruction * +glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, + st_dst_reg dst, st_src_reg src0, st_src_reg src1, + unsigned elements) +{ + static const unsigned dot_opcodes[] = { + TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 + }; + + return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); +} + +/** + * Emits TGSI scalar opcodes to produce unique answers across channels. + * + * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X + * channel determines the result across all channels. So to do a vec4 + * of this operation, we want to emit a scalar per source channel used + * to produce dest channels. + */ +void +glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, + st_dst_reg dst, + st_src_reg orig_src0, st_src_reg orig_src1) +{ + int i, j; + int done_mask = ~dst.writemask; + + /* TGSI RCP is a scalar operation splatting results to all channels, + * like ARB_fp/vp. So emit as many RCPs as necessary to cover our + * dst channels. + */ + for (i = 0; i < 4; i++) { + GLuint this_mask = (1 << i); + glsl_to_tgsi_instruction *inst; + st_src_reg src0 = orig_src0; + st_src_reg src1 = orig_src1; + + if (done_mask & this_mask) + continue; + + GLuint src0_swiz = GET_SWZ(src0.swizzle, i); + GLuint src1_swiz = GET_SWZ(src1.swizzle, i); + for (j = i + 1; j < 4; j++) { + /* If there is another enabled component in the destination that is + * derived from the same inputs, generate its value on this pass as + * well. + */ + if (!(done_mask & (1 << j)) && + GET_SWZ(src0.swizzle, j) == src0_swiz && + GET_SWZ(src1.swizzle, j) == src1_swiz) { + this_mask |= (1 << j); + } + } + src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, + src0_swiz, src0_swiz); + src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, + src1_swiz, src1_swiz); + + inst = emit(ir, op, dst, src0, src1); + inst->dst.writemask = this_mask; + done_mask |= this_mask; + } +} + +void +glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_src_reg src0) +{ + st_src_reg undef = undef_src; + + undef.swizzle = SWIZZLE_XXXX; + + emit_scalar(ir, op, dst, src0, undef); +} + +void +glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, + st_dst_reg dst, st_src_reg src0) +{ + st_src_reg tmp = get_temp(glsl_type::float_type); + + if (src0.type == GLSL_TYPE_INT) + emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0); + else if (src0.type == GLSL_TYPE_UINT) + emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0); + else + tmp = src0; + + emit(NULL, TGSI_OPCODE_ARL, dst, tmp); +} + +/** + * Emit an TGSI_OPCODE_SCS instruction + * + * The \c SCS opcode functions a bit differently than the other TGSI opcodes. + * Instead of splatting its result across all four components of the + * destination, it writes one value to the \c x component and another value to + * the \c y component. + * + * \param ir IR instruction being processed + * \param op Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending + * on which value is desired. + * \param dst Destination register + * \param src Source register + */ +void +glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, + st_dst_reg dst, + const st_src_reg &src) +{ + /* Vertex programs cannot use the SCS opcode. + */ + if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) { + emit_scalar(ir, op, dst, src); + return; + } + + const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1; + const unsigned scs_mask = (1U << component); + int done_mask = ~dst.writemask; + st_src_reg tmp; + + assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS); + + /* If there are compnents in the destination that differ from the component + * that will be written by the SCS instrution, we'll need a temporary. + */ + if (scs_mask != unsigned(dst.writemask)) { + tmp = get_temp(glsl_type::vec4_type); + } + + for (unsigned i = 0; i < 4; i++) { + unsigned this_mask = (1U << i); + st_src_reg src0 = src; + + if ((done_mask & this_mask) != 0) + continue; + + /* The source swizzle specified which component of the source generates + * sine / cosine for the current component in the destination. The SCS + * instruction requires that this value be swizzle to the X component. + * Replace the current swizzle with a swizzle that puts the source in + * the X component. + */ + unsigned src0_swiz = GET_SWZ(src.swizzle, i); + + src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, + src0_swiz, src0_swiz); + for (unsigned j = i + 1; j < 4; j++) { + /* If there is another enabled component in the destination that is + * derived from the same inputs, generate its value on this pass as + * well. + */ + if (!(done_mask & (1 << j)) && + GET_SWZ(src0.swizzle, j) == src0_swiz) { + this_mask |= (1 << j); + } + } + + if (this_mask != scs_mask) { + glsl_to_tgsi_instruction *inst; + st_dst_reg tmp_dst = st_dst_reg(tmp); + + /* Emit the SCS instruction. + */ + inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0); + inst->dst.writemask = scs_mask; + + /* Move the result of the SCS instruction to the desired location in + * the destination. + */ + tmp.swizzle = MAKE_SWIZZLE4(component, component, + component, component); + inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp); + inst->dst.writemask = this_mask; + } else { + /* Emit the SCS instruction to write directly to the destination. + */ + glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0); + inst->dst.writemask = scs_mask; + } + + done_mask |= this_mask; + } +} + +int +glsl_to_tgsi_visitor::add_constant(gl_register_file file, + gl_constant_value values[4], int size, int datatype, + GLuint *swizzle_out) +{ + if (file == PROGRAM_CONSTANT) { + return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values, + size, datatype, swizzle_out); + } else { + int index = 0; + immediate_storage *entry; + assert(file == PROGRAM_IMMEDIATE); + + /* Search immediate storage to see if we already have an identical + * immediate that we can use instead of adding a duplicate entry. + */ + foreach_iter(exec_list_iterator, iter, this->immediates) { + entry = (immediate_storage *)iter.get(); + + if (entry->size == size && + entry->type == datatype && + !memcmp(entry->values, values, size * sizeof(gl_constant_value))) { + return index; + } + index++; + } + + /* Add this immediate to the list. */ + entry = new(mem_ctx) immediate_storage(values, size, datatype); + this->immediates.push_tail(entry); + this->num_immediates++; + return index; + } +} + +st_src_reg +glsl_to_tgsi_visitor::st_src_reg_for_float(float val) +{ + st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT); + union gl_constant_value uval; + + uval.f = val; + src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle); + + return src; +} + +st_src_reg +glsl_to_tgsi_visitor::st_src_reg_for_int(int val) +{ + st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT); + union gl_constant_value uval; + + assert(native_integers); + + uval.i = val; + src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle); + + return src; +} + +st_src_reg +glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val) +{ + if (native_integers) + return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : + st_src_reg_for_int(val); + else + return st_src_reg_for_float(val); +} + +static int +type_size(const struct glsl_type *type) +{ + unsigned int i; + int size; + + switch (type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + if (type->is_matrix()) { + return type->matrix_columns; + } else { + /* Regardless of size of vector, it gets a vec4. This is bad + * packing for things like floats, but otherwise arrays become a + * mess. Hopefully a later pass over the code can pack scalars + * down if appropriate. + */ + return 1; + } + case GLSL_TYPE_ARRAY: + assert(type->length > 0); + return type_size(type->fields.array) * type->length; + case GLSL_TYPE_STRUCT: + size = 0; + for (i = 0; i < type->length; i++) { + size += type_size(type->fields.structure[i].type); + } + return size; + case GLSL_TYPE_SAMPLER: + /* Samplers take up one slot in UNIFORMS[], but they're baked in + * at link time. + */ + return 1; + default: + assert(0); + return 0; + } +} + +/** + * In the initial pass of codegen, we assign temporary numbers to + * intermediate results. (not SSA -- variable assignments will reuse + * storage). + */ +st_src_reg +glsl_to_tgsi_visitor::get_temp(const glsl_type *type) +{ + st_src_reg src; + + src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT; + src.file = PROGRAM_TEMPORARY; + src.index = next_temp; + src.reladdr = NULL; + next_temp += type_size(type); + + if (type->is_array() || type->is_record()) { + src.swizzle = SWIZZLE_NOOP; + } else { + src.swizzle = swizzle_for_size(type->vector_elements); + } + src.negate = 0; + + return src; +} + +variable_storage * +glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) +{ + + variable_storage *entry; + + foreach_iter(exec_list_iterator, iter, this->variables) { + entry = (variable_storage *)iter.get(); + + if (entry->var == var) + return entry; + } + + return NULL; +} + +void +glsl_to_tgsi_visitor::visit(ir_variable *ir) +{ + if (strcmp(ir->name, "gl_FragCoord") == 0) { + struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; + + fp->OriginUpperLeft = ir->origin_upper_left; + fp->PixelCenterInteger = ir->pixel_center_integer; + + } else if (strcmp(ir->name, "gl_FragDepth") == 0) { + struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; + switch (ir->depth_layout) { + case ir_depth_layout_none: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE; + break; + case ir_depth_layout_any: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY; + break; + case ir_depth_layout_greater: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER; + break; + case ir_depth_layout_less: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS; + break; + case ir_depth_layout_unchanged: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED; + break; + default: + assert(0); + break; + } + } + + if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { + unsigned int i; + const ir_state_slot *const slots = ir->state_slots; + assert(ir->state_slots != NULL); + + /* Check if this statevar's setup in the STATE file exactly + * matches how we'll want to reference it as a + * struct/array/whatever. If not, then we need to move it into + * temporary storage and hope that it'll get copy-propagated + * out. + */ + for (i = 0; i < ir->num_state_slots; i++) { + if (slots[i].swizzle != SWIZZLE_XYZW) { + break; + } + } + + variable_storage *storage; + st_dst_reg dst; + if (i == ir->num_state_slots) { + /* We'll set the index later. */ + storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); + this->variables.push_tail(storage); + + dst = undef_dst; + } else { + /* The variable_storage constructor allocates slots based on the size + * of the type. However, this had better match the number of state + * elements that we're going to copy into the new temporary. + */ + assert((int) ir->num_state_slots == type_size(ir->type)); + + storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, + this->next_temp); + this->variables.push_tail(storage); + this->next_temp += type_size(ir->type); + + dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, + native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT)); + } + + + for (unsigned int i = 0; i < ir->num_state_slots; i++) { + int index = _mesa_add_state_reference(this->prog->Parameters, + (gl_state_index *)slots[i].tokens); + + if (storage->file == PROGRAM_STATE_VAR) { + if (storage->index == -1) { + storage->index = index; + } else { + assert(index == storage->index + (int)i); + } + } else { + st_src_reg src(PROGRAM_STATE_VAR, index, + native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT); + src.swizzle = slots[i].swizzle; + emit(ir, TGSI_OPCODE_MOV, dst, src); + /* even a float takes up a whole vec4 reg in a struct/array. */ + dst.index++; + } + } + + if (storage->file == PROGRAM_TEMPORARY && + dst.index != storage->index + (int) ir->num_state_slots) { + fail_link(this->shader_program, + "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", + ir->name, dst.index - storage->index, + type_size(ir->type)); + } + } +} + +void +glsl_to_tgsi_visitor::visit(ir_loop *ir) +{ + ir_dereference_variable *counter = NULL; + + if (ir->counter != NULL) + counter = new(ir) ir_dereference_variable(ir->counter); + + if (ir->from != NULL) { + assert(ir->counter != NULL); + + ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); + + a->accept(this); + delete a; + } + + emit(NULL, TGSI_OPCODE_BGNLOOP); + + if (ir->to) { + ir_expression *e = + new(ir) ir_expression(ir->cmp, glsl_type::bool_type, + counter, ir->to); + ir_if *if_stmt = new(ir) ir_if(e); + + ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); + + if_stmt->then_instructions.push_tail(brk); + + if_stmt->accept(this); + + delete if_stmt; + delete e; + delete brk; + } + + visit_exec_list(&ir->body_instructions, this); + + if (ir->increment) { + ir_expression *e = + new(ir) ir_expression(ir_binop_add, counter->type, + counter, ir->increment); + + ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); + + a->accept(this); + delete a; + delete e; + } + + emit(NULL, TGSI_OPCODE_ENDLOOP); +} + +void +glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) +{ + switch (ir->mode) { + case ir_loop_jump::jump_break: + emit(NULL, TGSI_OPCODE_BRK); + break; + case ir_loop_jump::jump_continue: + emit(NULL, TGSI_OPCODE_CONT); + break; + } +} + + +void +glsl_to_tgsi_visitor::visit(ir_function_signature *ir) +{ + assert(0); + (void)ir; +} + +void +glsl_to_tgsi_visitor::visit(ir_function *ir) +{ + /* Ignore function bodies other than main() -- we shouldn't see calls to + * them since they should all be inlined before we get to glsl_to_tgsi. + */ + if (strcmp(ir->name, "main") == 0) { + const ir_function_signature *sig; + exec_list empty; + + sig = ir->matching_signature(&empty); + + assert(sig); + + foreach_iter(exec_list_iterator, iter, sig->body) { + ir_instruction *ir = (ir_instruction *)iter.get(); + + ir->accept(this); + } + } +} + +bool +glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) +{ + int nonmul_operand = 1 - mul_operand; + st_src_reg a, b, c; + st_dst_reg result_dst; + + ir_expression *expr = ir->operands[mul_operand]->as_expression(); + if (!expr || expr->operation != ir_binop_mul) + return false; + + expr->operands[0]->accept(this); + a = this->result; + expr->operands[1]->accept(this); + b = this->result; + ir->operands[nonmul_operand]->accept(this); + c = this->result; + + this->result = get_temp(ir->type); + result_dst = st_dst_reg(this->result); + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); + + return true; +} + +/** + * Emit MAD(a, -b, a) instead of AND(a, NOT(b)) + * + * The logic values are 1.0 for true and 0.0 for false. Logical-and is + * implemented using multiplication, and logical-or is implemented using + * addition. Logical-not can be implemented as (true - x), or (1.0 - x). + * As result, the logical expression (a & !b) can be rewritten as: + * + * - a * !b + * - a * (1 - b) + * - (a * 1) - (a * b) + * - a + -(a * b) + * - a + (a * -b) + * + * This final expression can be implemented as a single MAD(a, -b, a) + * instruction. + */ +bool +glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) +{ + const int other_operand = 1 - try_operand; + st_src_reg a, b; + + ir_expression *expr = ir->operands[try_operand]->as_expression(); + if (!expr || expr->operation != ir_unop_logic_not) + return false; + + ir->operands[other_operand]->accept(this); + a = this->result; + expr->operands[0]->accept(this); + b = this->result; + + b.negate = ~b.negate; + + this->result = get_temp(ir->type); + emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a); + + return true; +} + +bool +glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) +{ + /* Saturates were only introduced to vertex programs in + * NV_vertex_program3, so don't give them to drivers in the VP. + */ + if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) + return false; + + ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); + if (!sat_src) + return false; + + sat_src->accept(this); + st_src_reg src = this->result; + + /* If we generated an expression instruction into a temporary in + * processing the saturate's operand, apply the saturate to that + * instruction. Otherwise, generate a MOV to do the saturate. + * + * Note that we have to be careful to only do this optimization if + * the instruction in question was what generated src->result. For + * example, ir_dereference_array might generate a MUL instruction + * to create the reladdr, and return us a src reg using that + * reladdr. That MUL result is not the value we're trying to + * saturate. + */ + ir_expression *sat_src_expr = sat_src->as_expression(); + if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || + sat_src_expr->operation == ir_binop_add || + sat_src_expr->operation == ir_binop_dot)) { + glsl_to_tgsi_instruction *new_inst; + new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + new_inst->saturate = true; + } else { + this->result = get_temp(ir->type); + st_dst_reg result_dst = st_dst_reg(this->result); + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + glsl_to_tgsi_instruction *inst; + inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); + inst->saturate = true; + } + + return true; +} + +void +glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, + st_src_reg *reg, int *num_reladdr) +{ + if (!reg->reladdr) + return; + + emit_arl(ir, address_reg, *reg->reladdr); + + if (*num_reladdr != 1) { + st_src_reg temp = get_temp(glsl_type::vec4_type); + + emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); + *reg = temp; + } + + (*num_reladdr)--; +} + +void +glsl_to_tgsi_visitor::visit(ir_expression *ir) +{ + unsigned int operand; + st_src_reg op[Elements(ir->operands)]; + st_src_reg result_src; + st_dst_reg result_dst; + + /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c) + */ + if (ir->operation == ir_binop_add) { + if (try_emit_mad(ir, 1)) + return; + if (try_emit_mad(ir, 0)) + return; + } + + /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) + */ + if (ir->operation == ir_binop_logic_and) { + if (try_emit_mad_for_and_not(ir, 1)) + return; + if (try_emit_mad_for_and_not(ir, 0)) + return; + } + + if (try_emit_sat(ir)) + return; + + if (ir->operation == ir_quadop_vector) + assert(!"ir_quadop_vector should have been lowered"); + + for (operand = 0; operand < ir->get_num_operands(); operand++) { + this->result.file = PROGRAM_UNDEFINED; + ir->operands[operand]->accept(this); + if (this->result.file == PROGRAM_UNDEFINED) { + ir_print_visitor v; + printf("Failed to get tree for expression operand:\n"); + ir->operands[operand]->accept(&v); + exit(1); + } + op[operand] = this->result; + + /* Matrix expression operands should have been broken down to vector + * operations already. + */ + assert(!ir->operands[operand]->type->is_matrix()); + } + + int vector_elements = ir->operands[0]->type->vector_elements; + if (ir->operands[1]) { + vector_elements = MAX2(vector_elements, + ir->operands[1]->type->vector_elements); + } + + this->result.file = PROGRAM_UNDEFINED; + + /* Storage for our result. Ideally for an assignment we'd be using + * the actual storage for the result here, instead. + */ + result_src = get_temp(ir->type); + /* convenience for the emit functions below. */ + result_dst = st_dst_reg(result_src); + /* Limit writes to the channels that will be used by result_src later. + * This does limit this temp's use as a temporary for multi-instruction + * sequences. + */ + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + + switch (ir->operation) { + case ir_unop_logic_not: + if (result_dst.type != GLSL_TYPE_FLOAT) + emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0)); + else { + /* Previously 'SEQ dst, src, 0.0' was used for this. However, many + * older GPUs implement SEQ using multiple instructions (i915 uses two + * SGE instructions and a MUL instruction). Since our logic values are + * 0.0 and 1.0, 1-x also implements !x. + */ + op[0].negate = ~op[0].negate; + emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0)); + } + break; + case ir_unop_neg: + assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT); + if (result_dst.type == GLSL_TYPE_INT) + emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); + else { + op[0].negate = ~op[0].negate; + result_src = op[0]; + } + break; + case ir_unop_abs: + assert(result_dst.type == GLSL_TYPE_FLOAT); + emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]); + break; + case ir_unop_sign: + emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]); + break; + case ir_unop_rcp: + emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]); + break; + + case ir_unop_exp2: + emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]); + break; + case ir_unop_exp: + case ir_unop_log: + assert(!"not reached: should be handled by ir_explog_to_explog2"); + break; + case ir_unop_log2: + emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]); + break; + case ir_unop_sin: + emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]); + break; + case ir_unop_cos: + emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]); + break; + case ir_unop_sin_reduced: + emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]); + break; + case ir_unop_cos_reduced: + emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]); + break; + + case ir_unop_dFdx: + emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]); + break; + case ir_unop_dFdy: + op[0].negate = ~op[0].negate; + emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]); + break; + + case ir_unop_noise: { + /* At some point, a motivated person could add a better + * implementation of noise. Currently not even the nvidia + * binary drivers do anything more than this. In any case, the + * place to do this is in the GL state tracker, not the poor + * driver. + */ + emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); + break; + } + + case ir_binop_add: + emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); + break; + case ir_binop_sub: + emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]); + break; + + case ir_binop_mul: + emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); + break; + case ir_binop_div: + if (result_dst.type == GLSL_TYPE_FLOAT) + assert(!"not reached: should be handled by ir_div_to_mul_rcp"); + else + emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); + break; + case ir_binop_mod: + if (result_dst.type == GLSL_TYPE_FLOAT) + assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); + else + emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]); + break; + + case ir_binop_less: + emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); + break; + case ir_binop_greater: + emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]); + break; + case ir_binop_lequal: + emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]); + break; + case ir_binop_gequal: + emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); + break; + case ir_binop_equal: + emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); + break; + case ir_binop_nequal: + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); + break; + case ir_binop_all_equal: + /* "==" operator producing a scalar boolean. */ + if (ir->operands[0]->type->is_vector() || + ir->operands[1]->type->is_vector()) { + st_src_reg temp = get_temp(native_integers ? + glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : + glsl_type::vec4_type); + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); + + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero becomes 1.0, and positive values become zero. + */ + emit_dp(ir, result_dst, temp, temp, vector_elements); + + if (result_dst.type == GLSL_TYPE_FLOAT) { + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero becomes 1.0, and negative values become zero. + * This is achieved using SGE. + */ + st_src_reg sge_src = result_src; + sge_src.negate = ~sge_src.negate; + emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0)); + } else { + /* The TGSI negate flag doesn't work for integers, so use SEQ 0 + * instead. + */ + emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_int(0)); + } + } else { + emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); + } + break; + case ir_binop_any_nequal: + /* "!=" operator producing a scalar boolean. */ + if (ir->operands[0]->type->is_vector() || + ir->operands[1]->type->is_vector()) { + st_src_reg temp = get_temp(native_integers ? + glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : + glsl_type::vec4_type); + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); + + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero stays zero, and positive values become 1.0. + */ + glsl_to_tgsi_instruction *const dp = + emit_dp(ir, result_dst, temp, temp, vector_elements); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && + result_dst.type == GLSL_TYPE_FLOAT) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate. + */ + dp->saturate = true; + } else if (result_dst.type == GLSL_TYPE_FLOAT) { + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero stays zero, and negative values become 1.0. This + * achieved using SLT. + */ + st_src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + } else { + emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); + } + } else { + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); + } + break; + + case ir_unop_any: { + assert(ir->operands[0]->type->is_vector()); + + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero stays zero, and positive values become 1.0. + */ + glsl_to_tgsi_instruction *const dp = + emit_dp(ir, result_dst, op[0], op[0], + ir->operands[0]->type->vector_elements); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && + result_dst.type == GLSL_TYPE_FLOAT) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate. + */ + dp->saturate = true; + } else if (result_dst.type == GLSL_TYPE_FLOAT) { + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero stays zero, and negative values become 1.0. This + * is achieved using SLT. + */ + st_src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + } + else { + /* Use SNE 0 if integers are being used as boolean values. */ + emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); + } + break; + } + + case ir_binop_logic_xor: + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); + break; + + case ir_binop_logic_or: { + /* After the addition, the value will be an integer on the + * range [0,2]. Zero stays zero, and positive values become 1.0. + */ + glsl_to_tgsi_instruction *add = + emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && + result_dst.type == GLSL_TYPE_FLOAT) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate if floats are being used as boolean values. + */ + add->saturate = true; + } else if (result_dst.type == GLSL_TYPE_FLOAT) { + /* Negating the result of the addition gives values on the range + * [-2, 0]. Zero stays zero, and negative values become 1.0. This + * is achieved using SLT. + */ + st_src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + } else { + /* Use an SNE on the result of the addition. Zero stays zero, + * 1 stays 1, and 2 becomes 1. + */ + emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); + } + break; + } + + case ir_binop_logic_and: + /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ + emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); + break; + + case ir_binop_dot: + assert(ir->operands[0]->type->is_vector()); + assert(ir->operands[0]->type == ir->operands[1]->type); + emit_dp(ir, result_dst, op[0], op[1], + ir->operands[0]->type->vector_elements); + break; + + case ir_unop_sqrt: + /* sqrt(x) = x * rsq(x). */ + emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); + emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]); + /* For incoming channels <= 0, set the result to 0. */ + op[0].negate = ~op[0].negate; + emit(ir, TGSI_OPCODE_CMP, result_dst, + op[0], result_src, st_src_reg_for_float(0.0)); + break; + case ir_unop_rsq: + emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); + break; + case ir_unop_i2f: + case ir_unop_b2f: + if (native_integers) { + emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]); + break; + } + case ir_unop_i2u: + case ir_unop_u2i: + /* Converting between signed and unsigned integers is a no-op. */ + case ir_unop_b2i: + /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */ + result_src = op[0]; + break; + case ir_unop_f2i: + if (native_integers) + emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]); + else + emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); + break; + case ir_unop_f2b: + case ir_unop_i2b: + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], + st_src_reg_for_type(result_dst.type, 0)); + break; + case ir_unop_trunc: + emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); + break; + case ir_unop_ceil: + op[0].negate = ~op[0].negate; + emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); + result_src.negate = ~result_src.negate; + break; + case ir_unop_floor: + emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); + break; + case ir_unop_fract: + emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]); + break; + + case ir_binop_min: + emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]); + break; + case ir_binop_max: + emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]); + break; + case ir_binop_pow: + emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]); + break; + + case ir_unop_bit_not: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); + break; + } + case ir_unop_u2f: + if (native_integers) { + emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]); + break; + } + case ir_binop_lshift: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]); + break; + } + case ir_binop_rshift: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]); + break; + } + case ir_binop_bit_and: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_AND, result_dst, op[0]); + break; + } + case ir_binop_bit_xor: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]); + break; + } + case ir_binop_bit_or: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_OR, result_dst, op[0]); + break; + } + case ir_unop_round_even: + assert(!"GLSL 1.30 features unsupported"); + break; + + case ir_quadop_vector: + /* This operation should have already been handled. + */ + assert(!"Should not get here."); + break; + } + + this->result = result_src; +} + + +void +glsl_to_tgsi_visitor::visit(ir_swizzle *ir) +{ + st_src_reg src; + int i; + int swizzle[4]; + + /* Note that this is only swizzles in expressions, not those on the left + * hand side of an assignment, which do write masking. See ir_assignment + * for that. + */ + + ir->val->accept(this); + src = this->result; + assert(src.file != PROGRAM_UNDEFINED); + + for (i = 0; i < 4; i++) { + if (i < ir->type->vector_elements) { + switch (i) { + case 0: + swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); + break; + case 1: + swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); + break; + case 2: + swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); + break; + case 3: + swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); + break; + } + } else { + /* If the type is smaller than a vec4, replicate the last + * channel out. + */ + swizzle[i] = swizzle[ir->type->vector_elements - 1]; + } + } + + src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); + + this->result = src; +} + +void +glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) +{ + variable_storage *entry = find_variable_storage(ir->var); + ir_variable *var = ir->var; + + if (!entry) { + switch (var->mode) { + case ir_var_uniform: + entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, + var->location); + this->variables.push_tail(entry); + break; + case ir_var_in: + case ir_var_inout: + /* The linker assigns locations for varyings and attributes, + * including deprecated builtins (like gl_Color), user-assign + * generic attributes (glBindVertexLocation), and + * user-defined varyings. + * + * FINISHME: We would hit this path for function arguments. Fix! + */ + assert(var->location != -1); + entry = new(mem_ctx) variable_storage(var, + PROGRAM_INPUT, + var->location); + if (this->prog->Target == GL_VERTEX_PROGRAM_ARB && + var->location >= VERT_ATTRIB_GENERIC0) { + _mesa_add_attribute(this->prog->Attributes, + var->name, + _mesa_sizeof_glsl_type(var->type->gl_type), + var->type->gl_type, + var->location - VERT_ATTRIB_GENERIC0); + } + break; + case ir_var_out: + assert(var->location != -1); + entry = new(mem_ctx) variable_storage(var, + PROGRAM_OUTPUT, + var->location); + break; + case ir_var_system_value: + entry = new(mem_ctx) variable_storage(var, + PROGRAM_SYSTEM_VALUE, + var->location); + break; + case ir_var_auto: + case ir_var_temporary: + entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, + this->next_temp); + this->variables.push_tail(entry); + + next_temp += type_size(var->type); + break; + } + + if (!entry) { + printf("Failed to make storage for %s\n", var->name); + exit(1); + } + } + + this->result = st_src_reg(entry->file, entry->index, var->type); + if (!native_integers) + this->result.type = GLSL_TYPE_FLOAT; +} + +void +glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) +{ + ir_constant *index; + st_src_reg src; + int element_size = type_size(ir->type); + + index = ir->array_index->constant_expression_value(); + + ir->array->accept(this); + src = this->result; + + if (index) { + src.index += index->value.i[0] * element_size; + } else { + /* Variable index array dereference. It eats the "vec4" of the + * base of the array and an index that offsets the TGSI register + * index. + */ + ir->array_index->accept(this); + + st_src_reg index_reg; + + if (element_size == 1) { + index_reg = this->result; + } else { + index_reg = get_temp(glsl_type::float_type); + + emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), + this->result, st_src_reg_for_float(element_size)); + } + + /* If there was already a relative address register involved, add the + * new and the old together to get the new offset. + */ + if (src.reladdr != NULL) { + st_src_reg accum_reg = get_temp(glsl_type::float_type); + + emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), + index_reg, *src.reladdr); + + index_reg = accum_reg; + } + + src.reladdr = ralloc(mem_ctx, st_src_reg); + memcpy(src.reladdr, &index_reg, sizeof(index_reg)); + } + + /* If the type is smaller than a vec4, replicate the last channel out. */ + if (ir->type->is_scalar() || ir->type->is_vector()) + src.swizzle = swizzle_for_size(ir->type->vector_elements); + else + src.swizzle = SWIZZLE_NOOP; + + this->result = src; +} + +void +glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) +{ + unsigned int i; + const glsl_type *struct_type = ir->record->type; + int offset = 0; + + ir->record->accept(this); + + for (i = 0; i < struct_type->length; i++) { + if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) + break; + offset += type_size(struct_type->fields.structure[i].type); + } + + /* If the type is smaller than a vec4, replicate the last channel out. */ + if (ir->type->is_scalar() || ir->type->is_vector()) + this->result.swizzle = swizzle_for_size(ir->type->vector_elements); + else + this->result.swizzle = SWIZZLE_NOOP; + + this->result.index += offset; +} + +/** + * We want to be careful in assignment setup to hit the actual storage + * instead of potentially using a temporary like we might with the + * ir_dereference handler. + */ +static st_dst_reg +get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v) +{ + /* The LHS must be a dereference. If the LHS is a variable indexed array + * access of a vector, it must be separated into a series conditional moves + * before reaching this point (see ir_vec_index_to_cond_assign). + */ + assert(ir->as_dereference()); + ir_dereference_array *deref_array = ir->as_dereference_array(); + if (deref_array) { + assert(!deref_array->array->type->is_vector()); + } + + /* Use the rvalue deref handler for the most part. We'll ignore + * swizzles in it and write swizzles using writemask, though. + */ + ir->accept(v); + return st_dst_reg(v->result); +} + +/** + * Process the condition of a conditional assignment + * + * Examines the condition of a conditional assignment to generate the optimal + * first operand of a \c CMP instruction. If the condition is a relational + * operator with 0 (e.g., \c ir_binop_less), the value being compared will be + * used as the source for the \c CMP instruction. Otherwise the comparison + * is processed to a boolean result, and the boolean result is used as the + * operand to the CMP instruction. + */ +bool +glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) +{ + ir_rvalue *src_ir = ir; + bool negate = true; + bool switch_order = false; + + ir_expression *const expr = ir->as_expression(); + if ((expr != NULL) && (expr->get_num_operands() == 2)) { + bool zero_on_left = false; + + if (expr->operands[0]->is_zero()) { + src_ir = expr->operands[1]; + zero_on_left = true; + } else if (expr->operands[1]->is_zero()) { + src_ir = expr->operands[0]; + zero_on_left = false; + } + + /* a is - 0 + - 0 + + * (a < 0) T F F ( a < 0) T F F + * (0 < a) F F T (-a < 0) F F T + * (a <= 0) T T F (-a < 0) F F T (swap order of other operands) + * (0 <= a) F T T ( a < 0) T F F (swap order of other operands) + * (a > 0) F F T (-a < 0) F F T + * (0 > a) T F F ( a < 0) T F F + * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) + * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) + * + * Note that exchanging the order of 0 and 'a' in the comparison simply + * means that the value of 'a' should be negated. + */ + if (src_ir != ir) { + switch (expr->operation) { + case ir_binop_less: + switch_order = false; + negate = zero_on_left; + break; + + case ir_binop_greater: + switch_order = false; + negate = !zero_on_left; + break; + + case ir_binop_lequal: + switch_order = true; + negate = !zero_on_left; + break; + + case ir_binop_gequal: + switch_order = true; + negate = zero_on_left; + break; + + default: + /* This isn't the right kind of comparison afterall, so make sure + * the whole condition is visited. + */ + src_ir = ir; + break; + } + } + } + + src_ir->accept(this); + + /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the + * condition we produced is 0.0 or 1.0. By flipping the sign, we can + * choose which value TGSI_OPCODE_CMP produces without an extra instruction + * computing the condition. + */ + if (negate) + this->result.negate = ~this->result.negate; + + return switch_order; +} + +void +glsl_to_tgsi_visitor::visit(ir_assignment *ir) +{ + st_dst_reg l; + st_src_reg r; + int i; + + ir->rhs->accept(this); + r = this->result; + + l = get_assignment_lhs(ir->lhs, this); + + /* FINISHME: This should really set to the correct maximal writemask for each + * FINISHME: component written (in the loops below). This case can only + * FINISHME: occur for matrices, arrays, and structures. + */ + if (ir->write_mask == 0) { + assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); + l.writemask = WRITEMASK_XYZW; + } else if (ir->lhs->type->is_scalar() && + ir->lhs->variable_referenced()->mode == ir_var_out) { + /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the + * FINISHME: W component of fragment shader output zero, work correctly. + */ + l.writemask = WRITEMASK_XYZW; + } else { + int swizzles[4]; + int first_enabled_chan = 0; + int rhs_chan = 0; + + l.writemask = ir->write_mask; + + for (int i = 0; i < 4; i++) { + if (l.writemask & (1 << i)) { + first_enabled_chan = GET_SWZ(r.swizzle, i); + break; + } + } + + /* Swizzle a small RHS vector into the channels being written. + * + * glsl ir treats write_mask as dictating how many channels are + * present on the RHS while TGSI treats write_mask as just + * showing which channels of the vec4 RHS get written. + */ + for (int i = 0; i < 4; i++) { + if (l.writemask & (1 << i)) + swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); + else + swizzles[i] = first_enabled_chan; + } + r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], + swizzles[2], swizzles[3]); + } + + assert(l.file != PROGRAM_UNDEFINED); + assert(r.file != PROGRAM_UNDEFINED); + + if (ir->condition) { + const bool switch_order = this->process_move_condition(ir->condition); + st_src_reg condition = this->result; + + for (i = 0; i < type_size(ir->lhs->type); i++) { + st_src_reg l_src = st_src_reg(l); + l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements); + + if (switch_order) { + emit(ir, TGSI_OPCODE_CMP, l, condition, l_src, r); + } else { + emit(ir, TGSI_OPCODE_CMP, l, condition, r, l_src); + } + + l.index++; + r.index++; + } + } else if (ir->rhs->as_expression() && + this->instructions.get_tail() && + ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && + type_size(ir->lhs->type) == 1 && + l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) { + /* To avoid emitting an extra MOV when assigning an expression to a + * variable, emit the last instruction of the expression again, but + * replace the destination register with the target of the assignment. + * Dead code elimination will remove the original instruction. + */ + glsl_to_tgsi_instruction *inst, *new_inst; + inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); + new_inst->saturate = inst->saturate; + } else { + for (i = 0; i < type_size(ir->lhs->type); i++) { + emit(ir, TGSI_OPCODE_MOV, l, r); + l.index++; + r.index++; + } + } +} + + +void +glsl_to_tgsi_visitor::visit(ir_constant *ir) +{ + st_src_reg src; + GLfloat stack_vals[4] = { 0 }; + gl_constant_value *values = (gl_constant_value *) stack_vals; + GLenum gl_type = GL_NONE; + unsigned int i; + static int in_array = 0; + gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; + + /* Unfortunately, 4 floats is all we can get into + * _mesa_add_typed_unnamed_constant. So, make a temp to store an + * aggregate constant and move each constant value into it. If we + * get lucky, copy propagation will eliminate the extra moves. + */ + if (ir->type->base_type == GLSL_TYPE_STRUCT) { + st_src_reg temp_base = get_temp(ir->type); + st_dst_reg temp = st_dst_reg(temp_base); + + foreach_iter(exec_list_iterator, iter, ir->components) { + ir_constant *field_value = (ir_constant *)iter.get(); + int size = type_size(field_value->type); + + assert(size > 0); + + field_value->accept(this); + src = this->result; + + for (i = 0; i < (unsigned int)size; i++) { + emit(ir, TGSI_OPCODE_MOV, temp, src); + + src.index++; + temp.index++; + } + } + this->result = temp_base; + return; + } + + if (ir->type->is_array()) { + st_src_reg temp_base = get_temp(ir->type); + st_dst_reg temp = st_dst_reg(temp_base); + int size = type_size(ir->type->fields.array); + + assert(size > 0); + in_array++; + + for (i = 0; i < ir->type->length; i++) { + ir->array_elements[i]->accept(this); + src = this->result; + for (int j = 0; j < size; j++) { + emit(ir, TGSI_OPCODE_MOV, temp, src); + + src.index++; + temp.index++; + } + } + this->result = temp_base; + in_array--; + return; + } + + if (ir->type->is_matrix()) { + st_src_reg mat = get_temp(ir->type); + st_dst_reg mat_column = st_dst_reg(mat); + + for (i = 0; i < ir->type->matrix_columns; i++) { + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; + + src = st_src_reg(file, -1, ir->type->base_type); + src.index = add_constant(file, + values, + ir->type->vector_elements, + GL_FLOAT, + &src.swizzle); + emit(ir, TGSI_OPCODE_MOV, mat_column, src); + + mat_column.index++; + } + + this->result = mat; + return; + } + + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + gl_type = GL_FLOAT; + for (i = 0; i < ir->type->vector_elements; i++) { + values[i].f = ir->value.f[i]; + } + break; + case GLSL_TYPE_UINT: + gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT; + for (i = 0; i < ir->type->vector_elements; i++) { + if (native_integers) + values[i].u = ir->value.u[i]; + else + values[i].f = ir->value.u[i]; + } + break; + case GLSL_TYPE_INT: + gl_type = native_integers ? GL_INT : GL_FLOAT; + for (i = 0; i < ir->type->vector_elements; i++) { + if (native_integers) + values[i].i = ir->value.i[i]; + else + values[i].f = ir->value.i[i]; + } + break; + case GLSL_TYPE_BOOL: + gl_type = native_integers ? GL_BOOL : GL_FLOAT; + for (i = 0; i < ir->type->vector_elements; i++) { + if (native_integers) + values[i].b = ir->value.b[i]; + else + values[i].f = ir->value.b[i]; + } + break; + default: + assert(!"Non-float/uint/int/bool constant"); + } + + this->result = st_src_reg(file, -1, ir->type); + this->result.index = add_constant(file, + values, + ir->type->vector_elements, + gl_type, + &this->result.swizzle); +} + +function_entry * +glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) +{ + function_entry *entry; + + foreach_iter(exec_list_iterator, iter, this->function_signatures) { + entry = (function_entry *)iter.get(); + + if (entry->sig == sig) + return entry; + } + + entry = ralloc(mem_ctx, function_entry); + entry->sig = sig; + entry->sig_id = this->next_signature_id++; + entry->bgn_inst = NULL; + + /* Allocate storage for all the parameters. */ + foreach_iter(exec_list_iterator, iter, sig->parameters) { + ir_variable *param = (ir_variable *)iter.get(); + variable_storage *storage; + + storage = find_variable_storage(param); + assert(!storage); + + storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY, + this->next_temp); + this->variables.push_tail(storage); + + this->next_temp += type_size(param->type); + } + + if (!sig->return_type->is_void()) { + entry->return_reg = get_temp(sig->return_type); + } else { + entry->return_reg = undef_src; + } + + this->function_signatures.push_tail(entry); + return entry; +} + +void +glsl_to_tgsi_visitor::visit(ir_call *ir) +{ + glsl_to_tgsi_instruction *call_inst; + ir_function_signature *sig = ir->get_callee(); + function_entry *entry = get_function_signature(sig); + int i; + + /* Process in parameters. */ + exec_list_iterator sig_iter = sig->parameters.iterator(); + foreach_iter(exec_list_iterator, iter, *ir) { + ir_rvalue *param_rval = (ir_rvalue *)iter.get(); + ir_variable *param = (ir_variable *)sig_iter.get(); + + if (param->mode == ir_var_in || + param->mode == ir_var_inout) { + variable_storage *storage = find_variable_storage(param); + assert(storage); + + param_rval->accept(this); + st_src_reg r = this->result; + + st_dst_reg l; + l.file = storage->file; + l.index = storage->index; + l.reladdr = NULL; + l.writemask = WRITEMASK_XYZW; + l.cond_mask = COND_TR; + + for (i = 0; i < type_size(param->type); i++) { + emit(ir, TGSI_OPCODE_MOV, l, r); + l.index++; + r.index++; + } + } + + sig_iter.next(); + } + assert(!sig_iter.has_next()); + + /* Emit call instruction */ + call_inst = emit(ir, TGSI_OPCODE_CAL); + call_inst->function = entry; + + /* Process out parameters. */ + sig_iter = sig->parameters.iterator(); + foreach_iter(exec_list_iterator, iter, *ir) { + ir_rvalue *param_rval = (ir_rvalue *)iter.get(); + ir_variable *param = (ir_variable *)sig_iter.get(); + + if (param->mode == ir_var_out || + param->mode == ir_var_inout) { + variable_storage *storage = find_variable_storage(param); + assert(storage); + + st_src_reg r; + r.file = storage->file; + r.index = storage->index; + r.reladdr = NULL; + r.swizzle = SWIZZLE_NOOP; + r.negate = 0; + + param_rval->accept(this); + st_dst_reg l = st_dst_reg(this->result); + + for (i = 0; i < type_size(param->type); i++) { + emit(ir, TGSI_OPCODE_MOV, l, r); + l.index++; + r.index++; + } + } + + sig_iter.next(); + } + assert(!sig_iter.has_next()); + + /* Process return value. */ + this->result = entry->return_reg; +} + +void +glsl_to_tgsi_visitor::visit(ir_texture *ir) +{ + st_src_reg result_src, coord, lod_info, projector, dx, dy; + st_dst_reg result_dst, coord_dst; + glsl_to_tgsi_instruction *inst = NULL; + unsigned opcode = TGSI_OPCODE_NOP; + + if (ir->coordinate) { + ir->coordinate->accept(this); + + /* Put our coords in a temp. We'll need to modify them for shadow, + * projection, or LOD, so the only case we'd use it as is is if + * we're doing plain old texturing. The optimization passes on + * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. + */ + coord = get_temp(glsl_type::vec4_type); + coord_dst = st_dst_reg(coord); + emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); + } + + if (ir->projector) { + ir->projector->accept(this); + projector = this->result; + } + + /* Storage for our result. Ideally for an assignment we'd be using + * the actual storage for the result here, instead. + */ + result_src = get_temp(glsl_type::vec4_type); + result_dst = st_dst_reg(result_src); + + switch (ir->op) { + case ir_tex: + opcode = TGSI_OPCODE_TEX; + break; + case ir_txb: + opcode = TGSI_OPCODE_TXB; + ir->lod_info.bias->accept(this); + lod_info = this->result; + break; + case ir_txl: + opcode = TGSI_OPCODE_TXL; + ir->lod_info.lod->accept(this); + lod_info = this->result; + break; + case ir_txd: + opcode = TGSI_OPCODE_TXD; + ir->lod_info.grad.dPdx->accept(this); + dx = this->result; + ir->lod_info.grad.dPdy->accept(this); + dy = this->result; + break; + case ir_txs: + opcode = TGSI_OPCODE_TXQ; + ir->lod_info.lod->accept(this); + lod_info = this->result; + break; + case ir_txf: + opcode = TGSI_OPCODE_TXF; + ir->lod_info.lod->accept(this); + lod_info = this->result; + break; + } + + if (ir->projector) { + if (opcode == TGSI_OPCODE_TEX) { + /* Slot the projector in as the last component of the coord. */ + coord_dst.writemask = WRITEMASK_W; + emit(ir, TGSI_OPCODE_MOV, coord_dst, projector); + coord_dst.writemask = WRITEMASK_XYZW; + opcode = TGSI_OPCODE_TXP; + } else { + st_src_reg coord_w = coord; + coord_w.swizzle = SWIZZLE_WWWW; + + /* For the other TEX opcodes there's no projective version + * since the last slot is taken up by LOD info. Do the + * projective divide now. + */ + coord_dst.writemask = WRITEMASK_W; + emit(ir, TGSI_OPCODE_RCP, coord_dst, projector); + + /* In the case where we have to project the coordinates "by hand," + * the shadow comparator value must also be projected. + */ + st_src_reg tmp_src = coord; + if (ir->shadow_comparitor) { + /* Slot the shadow value in as the second to last component of the + * coord. + */ + ir->shadow_comparitor->accept(this); + + tmp_src = get_temp(glsl_type::vec4_type); + st_dst_reg tmp_dst = st_dst_reg(tmp_src); + + tmp_dst.writemask = WRITEMASK_Z; + emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); + + tmp_dst.writemask = WRITEMASK_XY; + emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord); + } + + coord_dst.writemask = WRITEMASK_XYZ; + emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w); + + coord_dst.writemask = WRITEMASK_XYZW; + coord.swizzle = SWIZZLE_XYZW; + } + } + + /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow + * comparator was put in the correct place (and projected) by the code, + * above, that handles by-hand projection. + */ + if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { + /* Slot the shadow value in as the second to last component of the + * coord. + */ + ir->shadow_comparitor->accept(this); + coord_dst.writemask = WRITEMASK_Z; + emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); + coord_dst.writemask = WRITEMASK_XYZW; + } + + if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB || + opcode == TGSI_OPCODE_TXF) { + /* TGSI stores LOD or LOD bias in the last channel of the coords. */ + coord_dst.writemask = WRITEMASK_W; + emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); + coord_dst.writemask = WRITEMASK_XYZW; + } + + if (opcode == TGSI_OPCODE_TXD) + inst = emit(ir, opcode, result_dst, coord, dx, dy); + else if (opcode == TGSI_OPCODE_TXQ) + inst = emit(ir, opcode, result_dst, lod_info); + else + inst = emit(ir, opcode, result_dst, coord); + + if (ir->shadow_comparitor) + inst->tex_shadow = GL_TRUE; + + inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler, + this->shader_program, + this->prog); + + const glsl_type *sampler_type = ir->sampler->type; + + switch (sampler_type->sampler_dimensionality) { + case GLSL_SAMPLER_DIM_1D: + inst->tex_target = (sampler_type->sampler_array) + ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; + break; + case GLSL_SAMPLER_DIM_2D: + inst->tex_target = (sampler_type->sampler_array) + ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; + break; + case GLSL_SAMPLER_DIM_3D: + inst->tex_target = TEXTURE_3D_INDEX; + break; + case GLSL_SAMPLER_DIM_CUBE: + inst->tex_target = TEXTURE_CUBE_INDEX; + break; + case GLSL_SAMPLER_DIM_RECT: + inst->tex_target = TEXTURE_RECT_INDEX; + break; + case GLSL_SAMPLER_DIM_BUF: + assert(!"FINISHME: Implement ARB_texture_buffer_object"); + break; + default: + assert(!"Should not get here."); + } + + this->result = result_src; +} + +void +glsl_to_tgsi_visitor::visit(ir_return *ir) +{ + if (ir->get_value()) { + st_dst_reg l; + int i; + + assert(current_function); + + ir->get_value()->accept(this); + st_src_reg r = this->result; + + l = st_dst_reg(current_function->return_reg); + + for (i = 0; i < type_size(current_function->sig->return_type); i++) { + emit(ir, TGSI_OPCODE_MOV, l, r); + l.index++; + r.index++; + } + } + + emit(ir, TGSI_OPCODE_RET); +} + +void +glsl_to_tgsi_visitor::visit(ir_discard *ir) +{ + struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; + + if (ir->condition) { + ir->condition->accept(this); + this->result.negate = ~this->result.negate; + emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result); + } else { + emit(ir, TGSI_OPCODE_KILP); + } + + fp->UsesKill = GL_TRUE; +} + +void +glsl_to_tgsi_visitor::visit(ir_if *ir) +{ + glsl_to_tgsi_instruction *cond_inst, *if_inst; + glsl_to_tgsi_instruction *prev_inst; + + prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + + ir->condition->accept(this); + assert(this->result.file != PROGRAM_UNDEFINED); + + if (this->options->EmitCondCodes) { + cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + + /* See if we actually generated any instruction for generating + * the condition. If not, then cook up a move to a temp so we + * have something to set cond_update on. + */ + if (cond_inst == prev_inst) { + st_src_reg temp = get_temp(glsl_type::bool_type); + cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result); + } + cond_inst->cond_update = GL_TRUE; + + if_inst = emit(ir->condition, TGSI_OPCODE_IF); + if_inst->dst.cond_mask = COND_NE; + } else { + if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result); + } + + this->instructions.push_tail(if_inst); + + visit_exec_list(&ir->then_instructions, this); + + if (!ir->else_instructions.is_empty()) { + emit(ir->condition, TGSI_OPCODE_ELSE); + visit_exec_list(&ir->else_instructions, this); + } + + if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF); +} + +glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() +{ + result.file = PROGRAM_UNDEFINED; + next_temp = 1; + next_signature_id = 1; + num_immediates = 0; + current_function = NULL; + num_address_regs = 0; + indirect_addr_temps = false; + indirect_addr_consts = false; + mem_ctx = ralloc_context(NULL); +} + +glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() +{ + ralloc_free(mem_ctx); +} + +extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) +{ + delete v; +} + + +/** + * Count resources used by the given gpu program (number of texture + * samplers, etc). + */ +static void +count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) +{ + v->samplers_used = 0; + + foreach_iter(exec_list_iterator, iter, v->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + if (is_tex_instruction(inst->op)) { + v->samplers_used |= 1 << inst->sampler; + + prog->SamplerTargets[inst->sampler] = + (gl_texture_index)inst->tex_target; + if (inst->tex_shadow) { + prog->ShadowSamplers |= 1 << inst->sampler; + } + } + } + + prog->SamplersUsed = v->samplers_used; + _mesa_update_shader_textures_used(prog); +} + + +/** + * Check if the given vertex/fragment/shader program is within the + * resource limits of the context (number of texture units, etc). + * If any of those checks fail, record a linker error. + * + * XXX more checks are needed... + */ +static void +check_resources(const struct gl_context *ctx, + struct gl_shader_program *shader_program, + glsl_to_tgsi_visitor *prog, + struct gl_program *proginfo) +{ + switch (proginfo->Target) { + case GL_VERTEX_PROGRAM_ARB: + if (_mesa_bitcount(prog->samplers_used) > + ctx->Const.MaxVertexTextureImageUnits) { + fail_link(shader_program, "Too many vertex shader texture samplers"); + } + if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { + fail_link(shader_program, "Too many vertex shader constants"); + } + break; + case MESA_GEOMETRY_PROGRAM: + if (_mesa_bitcount(prog->samplers_used) > + ctx->Const.MaxGeometryTextureImageUnits) { + fail_link(shader_program, "Too many geometry shader texture samplers"); + } + if (proginfo->Parameters->NumParameters > + MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) { + fail_link(shader_program, "Too many geometry shader constants"); + } + break; + case GL_FRAGMENT_PROGRAM_ARB: + if (_mesa_bitcount(prog->samplers_used) > + ctx->Const.MaxTextureImageUnits) { + fail_link(shader_program, "Too many fragment shader texture samplers"); + } + if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { + fail_link(shader_program, "Too many fragment shader constants"); + } + break; + default: + _mesa_problem(ctx, "unexpected program type in check_resources()"); + } +} + + + +struct uniform_sort { + struct gl_uniform *u; + int pos; +}; + +/* The shader_program->Uniforms list is almost sorted in increasing + * uniform->{Frag,Vert}Pos locations, but not quite when there are + * uniforms shared between targets. We need to add parameters in + * increasing order for the targets. + */ +static int +sort_uniforms(const void *a, const void *b) +{ + struct uniform_sort *u1 = (struct uniform_sort *)a; + struct uniform_sort *u2 = (struct uniform_sort *)b; + + return u1->pos - u2->pos; +} + +/* Add the uniforms to the parameters. The linker chose locations + * in our parameters lists (which weren't created yet), which the + * uniforms code will use to poke values into our parameters list + * when uniforms are updated. + */ +static void +add_uniforms_to_parameters_list(struct gl_shader_program *shader_program, + struct gl_shader *shader, + struct gl_program *prog) +{ + unsigned int i; + unsigned int next_sampler = 0, num_uniforms = 0; + struct uniform_sort *sorted_uniforms; + + sorted_uniforms = ralloc_array(NULL, struct uniform_sort, + shader_program->Uniforms->NumUniforms); + + for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) { + struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i; + int parameter_index = -1; + + switch (shader->Type) { + case GL_VERTEX_SHADER: + parameter_index = uniform->VertPos; + break; + case GL_FRAGMENT_SHADER: + parameter_index = uniform->FragPos; + break; + case GL_GEOMETRY_SHADER: + parameter_index = uniform->GeomPos; + break; + } + + /* Only add uniforms used in our target. */ + if (parameter_index != -1) { + sorted_uniforms[num_uniforms].pos = parameter_index; + sorted_uniforms[num_uniforms].u = uniform; + num_uniforms++; + } + } + + qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort), + sort_uniforms); + + for (i = 0; i < num_uniforms; i++) { + struct gl_uniform *uniform = sorted_uniforms[i].u; + int parameter_index = sorted_uniforms[i].pos; + const glsl_type *type = uniform->Type; + unsigned int size; + + if (type->is_vector() || + type->is_scalar()) { + size = type->vector_elements; + } else { + size = type_size(type) * 4; + } + + gl_register_file file; + if (type->is_sampler() || + (type->is_array() && type->fields.array->is_sampler())) { + file = PROGRAM_SAMPLER; + } else { + file = PROGRAM_UNIFORM; + } + + GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1, + uniform->Name); + + if (index < 0) { + index = _mesa_add_parameter(prog->Parameters, file, + uniform->Name, size, type->gl_type, + NULL, NULL, 0x0); + + /* Sampler uniform values are stored in prog->SamplerUnits, + * and the entry in that array is selected by this index we + * store in ParameterValues[]. + */ + if (file == PROGRAM_SAMPLER) { + for (unsigned int j = 0; j < size / 4; j++) + prog->Parameters->ParameterValues[index + j][0].f = next_sampler++; + } + + /* The location chosen in the Parameters list here (returned + * from _mesa_add_uniform) has to match what the linker chose. + */ + if (index != parameter_index) { + fail_link(shader_program, "Allocation of uniform `%s' to target " + "failed (%d vs %d)\n", + uniform->Name, index, parameter_index); + } + } + } + + ralloc_free(sorted_uniforms); +} + +static void +set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, + struct gl_shader_program *shader_program, + const char *name, const glsl_type *type, + ir_constant *val) +{ + if (type->is_record()) { + ir_constant *field_constant; + + field_constant = (ir_constant *)val->components.get_head(); + + for (unsigned int i = 0; i < type->length; i++) { + const glsl_type *field_type = type->fields.structure[i].type; + const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name, + type->fields.structure[i].name); + set_uniform_initializer(ctx, mem_ctx, shader_program, field_name, + field_type, field_constant); + field_constant = (ir_constant *)field_constant->next; + } + return; + } + + int loc = _mesa_get_uniform_location(ctx, shader_program, name); + + if (loc == -1) { + fail_link(shader_program, + "Couldn't find uniform for initializer %s\n", name); + return; + } + + for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) { + ir_constant *element; + const glsl_type *element_type; + if (type->is_array()) { + element = val->array_elements[i]; + element_type = type->fields.array; + } else { + element = val; + element_type = type; + } + + void *values; + + if (element_type->base_type == GLSL_TYPE_BOOL) { + int *conv = ralloc_array(mem_ctx, int, element_type->components()); + for (unsigned int j = 0; j < element_type->components(); j++) { + conv[j] = element->value.b[j]; + } + values = (void *)conv; + element_type = glsl_type::get_instance(GLSL_TYPE_INT, + element_type->vector_elements, + 1); + } else { + values = &element->value; + } + + if (element_type->is_matrix()) { + _mesa_uniform_matrix(ctx, shader_program, + element_type->matrix_columns, + element_type->vector_elements, + loc, 1, GL_FALSE, (GLfloat *)values); + loc += element_type->matrix_columns; + } else { + _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns, + values, element_type->gl_type); + loc += type_size(element_type); + } + } +} + +static void +set_uniform_initializers(struct gl_context *ctx, + struct gl_shader_program *shader_program) +{ + void *mem_ctx = NULL; + + for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) { + struct gl_shader *shader = shader_program->_LinkedShaders[i]; + + if (shader == NULL) + continue; + + foreach_iter(exec_list_iterator, iter, *shader->ir) { + ir_instruction *ir = (ir_instruction *)iter.get(); + ir_variable *var = ir->as_variable(); + + if (!var || var->mode != ir_var_uniform || !var->constant_value) + continue; + + if (!mem_ctx) + mem_ctx = ralloc_context(NULL); + + set_uniform_initializer(ctx, mem_ctx, shader_program, var->name, + var->type, var->constant_value); + } + } + + ralloc_free(mem_ctx); +} + +/* + * Scan/rewrite program to remove reads of custom (output) registers. + * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING + * (for vertex shaders). + * In GLSL shaders, varying vars can be read and written. + * On some hardware, trying to read an output register causes trouble. + * So, rewrite the program to use a temporary register in this case. + * + * Based on _mesa_remove_output_reads from programopt.c. + */ +void +glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) +{ + GLuint i; + GLint outputMap[VERT_RESULT_MAX]; + GLint outputTypes[VERT_RESULT_MAX]; + GLuint numVaryingReads = 0; + GLboolean usedTemps[MAX_TEMPS]; + GLuint firstTemp = 0; + + _mesa_find_used_registers(prog, PROGRAM_TEMPORARY, + usedTemps, MAX_TEMPS); + + assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT); + assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING); + + for (i = 0; i < VERT_RESULT_MAX; i++) + outputMap[i] = -1; + + /* look for instructions which read from varying vars */ + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + const GLuint numSrc = num_inst_src_regs(inst->op); + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->src[j].file == type) { + /* replace the read with a temp reg */ + const GLuint var = inst->src[j].index; + if (outputMap[var] == -1) { + numVaryingReads++; + outputMap[var] = _mesa_find_free_register(usedTemps, + MAX_TEMPS, + firstTemp); + outputTypes[var] = inst->src[j].type; + firstTemp = outputMap[var] + 1; + } + inst->src[j].file = PROGRAM_TEMPORARY; + inst->src[j].index = outputMap[var]; + } + } + } + + if (numVaryingReads == 0) + return; /* nothing to be done */ + + /* look for instructions which write to the varying vars identified above */ + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) { + /* change inst to write to the temp reg, instead of the varying */ + inst->dst.file = PROGRAM_TEMPORARY; + inst->dst.index = outputMap[inst->dst.index]; + } + } + + /* insert new MOV instructions at the end */ + for (i = 0; i < VERT_RESULT_MAX; i++) { + if (outputMap[i] >= 0) { + /* MOV VAR[i], TEMP[tmp]; */ + st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]); + st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]); + dst.index = i; + this->emit(NULL, TGSI_OPCODE_MOV, dst, src); + } + } +} + +/** + * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which + * are read from the given src in this instruction + */ +static int +get_src_arg_mask(st_dst_reg dst, st_src_reg src) +{ + int read_mask = 0, comp; + + /* Now, given the src swizzle and the written channels, find which + * components are actually read + */ + for (comp = 0; comp < 4; ++comp) { + const unsigned coord = GET_SWZ(src.swizzle, comp); + ASSERT(coord < 4); + if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W) + read_mask |= 1 << coord; + } + + return read_mask; +} + +/** + * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP + * instruction is the first instruction to write to register T0. There are + * several lowering passes done in GLSL IR (e.g. branches and + * relative addressing) that create a large number of conditional assignments + * that ir_to_mesa converts to CMP instructions like the one mentioned above. + * + * Here is why this conversion is safe: + * CMP T0, T1 T2 T0 can be expanded to: + * if (T1 < 0.0) + * MOV T0, T2; + * else + * MOV T0, T0; + * + * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same + * as the original program. If (T1 < 0.0) evaluates to false, executing + * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized. + * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2 + * because any instruction that was going to read from T0 after this was going + * to read a garbage value anyway. + */ +void +glsl_to_tgsi_visitor::simplify_cmp(void) +{ + unsigned tempWrites[MAX_TEMPS]; + unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; + + memset(tempWrites, 0, sizeof(tempWrites)); + memset(outputWrites, 0, sizeof(outputWrites)); + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + unsigned prevWriteMask = 0; + + /* Give up if we encounter relative addressing or flow control. */ + if (inst->dst.reladdr || + tgsi_get_opcode_info(inst->op)->is_branch || + inst->op == TGSI_OPCODE_BGNSUB || + inst->op == TGSI_OPCODE_CONT || + inst->op == TGSI_OPCODE_END || + inst->op == TGSI_OPCODE_ENDSUB || + inst->op == TGSI_OPCODE_RET) { + return; + } + + if (inst->dst.file == PROGRAM_OUTPUT) { + assert(inst->dst.index < MAX_PROGRAM_OUTPUTS); + prevWriteMask = outputWrites[inst->dst.index]; + outputWrites[inst->dst.index] |= inst->dst.writemask; + } else if (inst->dst.file == PROGRAM_TEMPORARY) { + assert(inst->dst.index < MAX_TEMPS); + prevWriteMask = tempWrites[inst->dst.index]; + tempWrites[inst->dst.index] |= inst->dst.writemask; + } + + /* For a CMP to be considered a conditional write, the destination + * register and source register two must be the same. */ + if (inst->op == TGSI_OPCODE_CMP + && !(inst->dst.writemask & prevWriteMask) + && inst->src[2].file == inst->dst.file + && inst->src[2].index == inst->dst.index + && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) { + + inst->op = TGSI_OPCODE_MOV; + inst->src[0] = inst->src[1]; + } + } +} + +/* Replaces all references to a temporary register index with another index. */ +void +glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) +{ + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + unsigned j; + + for (j=0; j < num_inst_src_regs(inst->op); j++) { + if (inst->src[j].file == PROGRAM_TEMPORARY && + inst->src[j].index == index) { + inst->src[j].index = new_index; + } + } + + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { + inst->dst.index = new_index; + } + } +} + +int +glsl_to_tgsi_visitor::get_first_temp_read(int index) +{ + int depth = 0; /* loop depth */ + int loop_start = -1; /* index of the first active BGNLOOP (if any) */ + unsigned i = 0, j; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + for (j=0; j < num_inst_src_regs(inst->op); j++) { + if (inst->src[j].file == PROGRAM_TEMPORARY && + inst->src[j].index == index) { + return (depth == 0) ? i : loop_start; + } + } + + if (inst->op == TGSI_OPCODE_BGNLOOP) { + if(depth++ == 0) + loop_start = i; + } else if (inst->op == TGSI_OPCODE_ENDLOOP) { + if (--depth == 0) + loop_start = -1; + } + assert(depth >= 0); + + i++; + } + + return -1; +} + +int +glsl_to_tgsi_visitor::get_first_temp_write(int index) +{ + int depth = 0; /* loop depth */ + int loop_start = -1; /* index of the first active BGNLOOP (if any) */ + int i = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { + return (depth == 0) ? i : loop_start; + } + + if (inst->op == TGSI_OPCODE_BGNLOOP) { + if(depth++ == 0) + loop_start = i; + } else if (inst->op == TGSI_OPCODE_ENDLOOP) { + if (--depth == 0) + loop_start = -1; + } + assert(depth >= 0); + + i++; + } + + return -1; +} + +int +glsl_to_tgsi_visitor::get_last_temp_read(int index) +{ + int depth = 0; /* loop depth */ + int last = -1; /* index of last instruction that reads the temporary */ + unsigned i = 0, j; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + for (j=0; j < num_inst_src_regs(inst->op); j++) { + if (inst->src[j].file == PROGRAM_TEMPORARY && + inst->src[j].index == index) { + last = (depth == 0) ? i : -2; + } + } + + if (inst->op == TGSI_OPCODE_BGNLOOP) + depth++; + else if (inst->op == TGSI_OPCODE_ENDLOOP) + if (--depth == 0 && last == -2) + last = i; + assert(depth >= 0); + + i++; + } + + assert(last >= -1); + return last; +} + +int +glsl_to_tgsi_visitor::get_last_temp_write(int index) +{ + int depth = 0; /* loop depth */ + int last = -1; /* index of last instruction that writes to the temporary */ + int i = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) + last = (depth == 0) ? i : -2; + + if (inst->op == TGSI_OPCODE_BGNLOOP) + depth++; + else if (inst->op == TGSI_OPCODE_ENDLOOP) + if (--depth == 0 && last == -2) + last = i; + assert(depth >= 0); + + i++; + } + + assert(last >= -1); + return last; +} + +/* + * On a basic block basis, tracks available PROGRAM_TEMPORARY register + * channels for copy propagation and updates following instructions to + * use the original versions. + * + * The glsl_to_tgsi_visitor lazily produces code assuming that this pass + * will occur. As an example, a TXP production before this pass: + * + * 0: MOV TEMP[1], INPUT[4].xyyy; + * 1: MOV TEMP[1].w, INPUT[4].wwww; + * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; + * + * and after: + * + * 0: MOV TEMP[1], INPUT[4].xyyy; + * 1: MOV TEMP[1].w, INPUT[4].wwww; + * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; + * + * which allows for dead code elimination on TEMP[1]'s writes. + */ +void +glsl_to_tgsi_visitor::copy_propagate(void) +{ + glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx, + glsl_to_tgsi_instruction *, + this->next_temp * 4); + int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); + int level = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + assert(inst->dst.file != PROGRAM_TEMPORARY + || inst->dst.index < this->next_temp); + + /* First, do any copy propagation possible into the src regs. */ + for (int r = 0; r < 3; r++) { + glsl_to_tgsi_instruction *first = NULL; + bool good = true; + int acp_base = inst->src[r].index * 4; + + if (inst->src[r].file != PROGRAM_TEMPORARY || + inst->src[r].reladdr) + continue; + + /* See if we can find entries in the ACP consisting of MOVs + * from the same src register for all the swizzled channels + * of this src register reference. + */ + for (int i = 0; i < 4; i++) { + int src_chan = GET_SWZ(inst->src[r].swizzle, i); + glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan]; + + if (!copy_chan) { + good = false; + break; + } + + assert(acp_level[acp_base + src_chan] <= level); + + if (!first) { + first = copy_chan; + } else { + if (first->src[0].file != copy_chan->src[0].file || + first->src[0].index != copy_chan->src[0].index) { + good = false; + break; + } + } + } + + if (good) { + /* We've now validated that we can copy-propagate to + * replace this src register reference. Do it. + */ + inst->src[r].file = first->src[0].file; + inst->src[r].index = first->src[0].index; + + int swizzle = 0; + for (int i = 0; i < 4; i++) { + int src_chan = GET_SWZ(inst->src[r].swizzle, i); + glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan]; + swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << + (3 * i)); + } + inst->src[r].swizzle = swizzle; + } + } + + switch (inst->op) { + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_ENDLOOP: + /* End of a basic block, clear the ACP entirely. */ + memset(acp, 0, sizeof(*acp) * this->next_temp * 4); + break; + + case TGSI_OPCODE_IF: + ++level; + break; + + case TGSI_OPCODE_ENDIF: + case TGSI_OPCODE_ELSE: + /* Clear all channels written inside the block from the ACP, but + * leaving those that were not touched. + */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + if (!acp[4 * r + c]) + continue; + + if (acp_level[4 * r + c] >= level) + acp[4 * r + c] = NULL; + } + } + if (inst->op == TGSI_OPCODE_ENDIF) + --level; + break; + + default: + /* Continuing the block, clear any written channels from + * the ACP. + */ + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { + /* Any temporary might be written, so no copy propagation + * across this instruction. + */ + memset(acp, 0, sizeof(*acp) * this->next_temp * 4); + } else if (inst->dst.file == PROGRAM_OUTPUT && + inst->dst.reladdr) { + /* Any output might be written, so no copy propagation + * from outputs across this instruction. + */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + if (!acp[4 * r + c]) + continue; + + if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) + acp[4 * r + c] = NULL; + } + } + } else if (inst->dst.file == PROGRAM_TEMPORARY || + inst->dst.file == PROGRAM_OUTPUT) { + /* Clear where it's used as dst. */ + if (inst->dst.file == PROGRAM_TEMPORARY) { + for (int c = 0; c < 4; c++) { + if (inst->dst.writemask & (1 << c)) { + acp[4 * inst->dst.index + c] = NULL; + } + } + } + + /* Clear where it's used as src. */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + if (!acp[4 * r + c]) + continue; + + int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); + + if (acp[4 * r + c]->src[0].file == inst->dst.file && + acp[4 * r + c]->src[0].index == inst->dst.index && + inst->dst.writemask & (1 << src_chan)) + { + acp[4 * r + c] = NULL; + } + } + } + } + break; + } + + /* If this is a copy, add it to the ACP. */ + if (inst->op == TGSI_OPCODE_MOV && + inst->dst.file == PROGRAM_TEMPORARY && + !inst->dst.reladdr && + !inst->saturate && + !inst->src[0].reladdr && + !inst->src[0].negate) { + for (int i = 0; i < 4; i++) { + if (inst->dst.writemask & (1 << i)) { + acp[4 * inst->dst.index + i] = inst; + acp_level[4 * inst->dst.index + i] = level; + } + } + } + } + + ralloc_free(acp_level); + ralloc_free(acp); +} + +/* + * Tracks available PROGRAM_TEMPORARY registers for dead code elimination. + * + * The glsl_to_tgsi_visitor lazily produces code assuming that this pass + * will occur. As an example, a TXP production after copy propagation but + * before this pass: + * + * 0: MOV TEMP[1], INPUT[4].xyyy; + * 1: MOV TEMP[1].w, INPUT[4].wwww; + * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; + * + * and after this pass: + * + * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; + * + * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB) + * FIXME: doesn't eliminate all dead code inside of loops; it steps around them + */ +void +glsl_to_tgsi_visitor::eliminate_dead_code(void) +{ + int i; + + for (i=0; i < this->next_temp; i++) { + int last_read = get_last_temp_read(i); + int j = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i && + j > last_read) + { + iter.remove(); + delete inst; + } + + j++; + } + } +} + +/* + * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead + * code elimination. This is less primitive than eliminate_dead_code(), as it + * is per-channel and can detect consecutive writes without a read between them + * as dead code. However, there is some dead code that can be eliminated by + * eliminate_dead_code() but not this function - for example, this function + * cannot eliminate an instruction writing to a register that is never read and + * is the only instruction writing to that register. + * + * The glsl_to_tgsi_visitor lazily produces code assuming that this pass + * will occur. + */ +int +glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) +{ + glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx, + glsl_to_tgsi_instruction *, + this->next_temp * 4); + int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); + int level = 0; + int removed = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + assert(inst->dst.file != PROGRAM_TEMPORARY + || inst->dst.index < this->next_temp); + + switch (inst->op) { + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_ENDLOOP: + /* End of a basic block, clear the write array entirely. + * FIXME: This keeps us from killing dead code when the writes are + * on either side of a loop, even when the register isn't touched + * inside the loop. + */ + memset(writes, 0, sizeof(*writes) * this->next_temp * 4); + break; + + case TGSI_OPCODE_ENDIF: + --level; + break; + + case TGSI_OPCODE_ELSE: + /* Clear all channels written inside the preceding if block from the + * write array, but leave those that were not touched. + * + * FIXME: This destroys opportunities to remove dead code inside of + * IF blocks that are followed by an ELSE block. + */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + if (!writes[4 * r + c]) + continue; + + if (write_level[4 * r + c] >= level) + writes[4 * r + c] = NULL; + } + } + break; + + case TGSI_OPCODE_IF: + ++level; + /* fallthrough to default case to mark the condition as read */ + + default: + /* Continuing the block, clear any channels from the write array that + * are read by this instruction. + */ + for (unsigned i = 0; i < Elements(inst->src); i++) { + if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){ + /* Any temporary might be read, so no dead code elimination + * across this instruction. + */ + memset(writes, 0, sizeof(*writes) * this->next_temp * 4); + } else if (inst->src[i].file == PROGRAM_TEMPORARY) { + /* Clear where it's used as src. */ + int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0); + src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1); + src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2); + src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3); + + for (int c = 0; c < 4; c++) { + if (src_chans & (1 << c)) { + writes[4 * inst->src[i].index + c] = NULL; + } + } + } + } + break; + } + + /* If this instruction writes to a temporary, add it to the write array. + * If there is already an instruction in the write array for one or more + * of the channels, flag that channel write as dead. + */ + if (inst->dst.file == PROGRAM_TEMPORARY && + !inst->dst.reladdr && + !inst->saturate) { + for (int c = 0; c < 4; c++) { + if (inst->dst.writemask & (1 << c)) { + if (writes[4 * inst->dst.index + c]) { + if (write_level[4 * inst->dst.index + c] < level) + continue; + else + writes[4 * inst->dst.index + c]->dead_mask |= (1 << c); + } + writes[4 * inst->dst.index + c] = inst; + write_level[4 * inst->dst.index + c] = level; + } + } + } + } + + /* Anything still in the write array at this point is dead code. */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + glsl_to_tgsi_instruction *inst = writes[4 * r + c]; + if (inst) + inst->dead_mask |= (1 << c); + } + } + + /* Now actually remove the instructions that are completely dead and update + * the writemask of other instructions with dead channels. + */ + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + if (!inst->dead_mask || !inst->dst.writemask) + continue; + else if (inst->dead_mask == inst->dst.writemask) { + iter.remove(); + delete inst; + removed++; + } else + inst->dst.writemask &= ~(inst->dead_mask); + } + + ralloc_free(write_level); + ralloc_free(writes); + + return removed; +} + +/* Merges temporary registers together where possible to reduce the number of + * registers needed to run a program. + * + * Produces optimal code only after copy propagation and dead code elimination + * have been run. */ +void +glsl_to_tgsi_visitor::merge_registers(void) +{ + int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp); + int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp); + int i, j; + + /* Read the indices of the last read and first write to each temp register + * into an array so that we don't have to traverse the instruction list as + * much. */ + for (i=0; i < this->next_temp; i++) { + last_reads[i] = get_last_temp_read(i); + first_writes[i] = get_first_temp_write(i); + } + + /* Start looking for registers with non-overlapping usages that can be + * merged together. */ + for (i=0; i < this->next_temp; i++) { + /* Don't touch unused registers. */ + if (last_reads[i] < 0 || first_writes[i] < 0) continue; + + for (j=0; j < this->next_temp; j++) { + /* Don't touch unused registers. */ + if (last_reads[j] < 0 || first_writes[j] < 0) continue; + + /* We can merge the two registers if the first write to j is after or + * in the same instruction as the last read from i. Note that the + * register at index i will always be used earlier or at the same time + * as the register at index j. */ + if (first_writes[i] <= first_writes[j] && + last_reads[i] <= first_writes[j]) + { + rename_temp_register(j, i); /* Replace all references to j with i.*/ + + /* Update the first_writes and last_reads arrays with the new + * values for the merged register index, and mark the newly unused + * register index as such. */ + last_reads[i] = last_reads[j]; + first_writes[j] = -1; + last_reads[j] = -1; + } + } + } + + ralloc_free(last_reads); + ralloc_free(first_writes); +} + +/* Reassign indices to temporary registers by reusing unused indices created + * by optimization passes. */ +void +glsl_to_tgsi_visitor::renumber_registers(void) +{ + int i = 0; + int new_index = 0; + + for (i=0; i < this->next_temp; i++) { + if (get_first_temp_read(i) < 0) continue; + if (i != new_index) + rename_temp_register(i, new_index); + new_index++; + } + + this->next_temp = new_index; +} + +/** + * Returns a fragment program which implements the current pixel transfer ops. + * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c. + */ +extern "C" void +get_pixel_transfer_visitor(struct st_fragment_program *fp, + glsl_to_tgsi_visitor *original, + int scale_and_bias, int pixel_maps) +{ + glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); + struct st_context *st = st_context(original->ctx); + struct gl_program *prog = &fp->Base.Base; + struct gl_program_parameter_list *params = _mesa_new_parameter_list(); + st_src_reg coord, src0; + st_dst_reg dst0; + glsl_to_tgsi_instruction *inst; + + /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ + v->ctx = original->ctx; + v->prog = prog; + v->glsl_version = original->glsl_version; + v->native_integers = original->native_integers; + v->options = original->options; + v->next_temp = original->next_temp; + v->num_address_regs = original->num_address_regs; + v->samplers_used = prog->SamplersUsed = original->samplers_used; + v->indirect_addr_temps = original->indirect_addr_temps; + v->indirect_addr_consts = original->indirect_addr_consts; + memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); + + /* + * Get initial pixel color from the texture. + * TEX colorTemp, fragment.texcoord[0], texture[0], 2D; + */ + coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); + src0 = v->get_temp(glsl_type::vec4_type); + dst0 = st_dst_reg(src0); + inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); + inst->sampler = 0; + inst->tex_target = TEXTURE_2D_INDEX; + + prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); + prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */ + v->samplers_used |= (1 << 0); + + if (scale_and_bias) { + static const gl_state_index scale_state[STATE_LENGTH] = + { STATE_INTERNAL, STATE_PT_SCALE, + (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; + static const gl_state_index bias_state[STATE_LENGTH] = + { STATE_INTERNAL, STATE_PT_BIAS, + (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; + GLint scale_p, bias_p; + st_src_reg scale, bias; + + scale_p = _mesa_add_state_reference(params, scale_state); + bias_p = _mesa_add_state_reference(params, bias_state); + + /* MAD colorTemp, colorTemp, scale, bias; */ + scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT); + bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT); + inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias); + } + + if (pixel_maps) { + st_src_reg temp = v->get_temp(glsl_type::vec4_type); + st_dst_reg temp_dst = st_dst_reg(temp); + + assert(st->pixel_xfer.pixelmap_texture); + + /* With a little effort, we can do four pixel map look-ups with + * two TEX instructions: + */ + + /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */ + temp_dst.writemask = WRITEMASK_XY; /* write R,G */ + inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); + inst->sampler = 1; + inst->tex_target = TEXTURE_2D_INDEX; + + /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */ + src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); + temp_dst.writemask = WRITEMASK_ZW; /* write B,A */ + inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); + inst->sampler = 1; + inst->tex_target = TEXTURE_2D_INDEX; + + prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */ + v->samplers_used |= (1 << 1); + + /* MOV colorTemp, temp; */ + inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp); + } + + /* Now copy the instructions from the original glsl_to_tgsi_visitor into the + * new visitor. */ + foreach_iter(exec_list_iterator, iter, original->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + st_src_reg src_regs[3]; + + if (inst->dst.file == PROGRAM_OUTPUT) + prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); + + for (int i=0; i<3; i++) { + src_regs[i] = inst->src[i]; + if (src_regs[i].file == PROGRAM_INPUT && + src_regs[i].index == FRAG_ATTRIB_COL0) + { + src_regs[i].file = PROGRAM_TEMPORARY; + src_regs[i].index = src0.index; + } + else if (src_regs[i].file == PROGRAM_INPUT) + prog->InputsRead |= (1 << src_regs[i].index); + } + + v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); + } + + /* Make modifications to fragment program info. */ + prog->Parameters = _mesa_combine_parameter_lists(params, + original->prog->Parameters); + prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes); + prog->Varying = _mesa_clone_parameter_list(original->prog->Varying); + _mesa_free_parameter_list(params); + count_resources(v, prog); + fp->glsl_to_tgsi = v; +} + +/** + * Make fragment program for glBitmap: + * Sample the texture and kill the fragment if the bit is 0. + * This program will be combined with the user's fragment program. + * + * Based on make_bitmap_fragment_program in st_cb_bitmap.c. + */ +extern "C" void +get_bitmap_visitor(struct st_fragment_program *fp, + glsl_to_tgsi_visitor *original, int samplerIndex) +{ + glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); + struct st_context *st = st_context(original->ctx); + struct gl_program *prog = &fp->Base.Base; + st_src_reg coord, src0; + st_dst_reg dst0; + glsl_to_tgsi_instruction *inst; + + /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ + v->ctx = original->ctx; + v->prog = prog; + v->glsl_version = original->glsl_version; + v->native_integers = original->native_integers; + v->options = original->options; + v->next_temp = original->next_temp; + v->num_address_regs = original->num_address_regs; + v->samplers_used = prog->SamplersUsed = original->samplers_used; + v->indirect_addr_temps = original->indirect_addr_temps; + v->indirect_addr_consts = original->indirect_addr_consts; + memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); + + /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ + coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); + src0 = v->get_temp(glsl_type::vec4_type); + dst0 = st_dst_reg(src0); + inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); + inst->sampler = samplerIndex; + inst->tex_target = TEXTURE_2D_INDEX; + + prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); + prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */ + v->samplers_used |= (1 << samplerIndex); + + /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */ + src0.negate = NEGATE_XYZW; + if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM) + src0.swizzle = SWIZZLE_XXXX; + inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0); + + /* Now copy the instructions from the original glsl_to_tgsi_visitor into the + * new visitor. */ + foreach_iter(exec_list_iterator, iter, original->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + st_src_reg src_regs[3]; + + if (inst->dst.file == PROGRAM_OUTPUT) + prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); + + for (int i=0; i<3; i++) { + src_regs[i] = inst->src[i]; + if (src_regs[i].file == PROGRAM_INPUT) + prog->InputsRead |= (1 << src_regs[i].index); + } + + v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); + } + + /* Make modifications to fragment program info. */ + prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters); + prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes); + prog->Varying = _mesa_clone_parameter_list(original->prog->Varying); + count_resources(v, prog); + fp->glsl_to_tgsi = v; +} + +/* ------------------------- TGSI conversion stuff -------------------------- */ +struct label { + unsigned branch_target; + unsigned token; +}; + +/** + * Intermediate state used during shader translation. + */ +struct st_translate { + struct ureg_program *ureg; + + struct ureg_dst temps[MAX_TEMPS]; + struct ureg_src *constants; + struct ureg_src *immediates; + struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; + struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; + struct ureg_dst address[1]; + struct ureg_src samplers[PIPE_MAX_SAMPLERS]; + struct ureg_src systemValues[SYSTEM_VALUE_MAX]; + + /* Extra info for handling point size clamping in vertex shader */ + struct ureg_dst pointSizeResult; /**< Actual point size output register */ + struct ureg_src pointSizeConst; /**< Point size range constant register */ + GLint pointSizeOutIndex; /**< Temp point size output register */ + GLboolean prevInstWrotePointSize; + + const GLuint *inputMapping; + const GLuint *outputMapping; + + /* For every instruction that contains a label (eg CALL), keep + * details so that we can go back afterwards and emit the correct + * tgsi instruction number for each label. + */ + struct label *labels; + unsigned labels_size; + unsigned labels_count; + + /* Keep a record of the tgsi instruction number that each mesa + * instruction starts at, will be used to fix up labels after + * translation. + */ + unsigned *insn; + unsigned insn_size; + unsigned insn_count; + + unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ + + boolean error; +}; + +/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ +static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { + TGSI_SEMANTIC_FACE, + TGSI_SEMANTIC_INSTANCEID +}; + +/** + * Make note of a branch to a label in the TGSI code. + * After we've emitted all instructions, we'll go over the list + * of labels built here and patch the TGSI code with the actual + * location of each label. + */ +static unsigned *get_label(struct st_translate *t, unsigned branch_target) +{ + unsigned i; + + if (t->labels_count + 1 >= t->labels_size) { + t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); + t->labels = (struct label *)realloc(t->labels, + t->labels_size * sizeof(struct label)); + if (t->labels == NULL) { + static unsigned dummy; + t->error = TRUE; + return &dummy; + } + } + + i = t->labels_count++; + t->labels[i].branch_target = branch_target; + return &t->labels[i].token; +} + +/** + * Called prior to emitting the TGSI code for each instruction. + * Allocate additional space for instructions if needed. + * Update the insn[] array so the next glsl_to_tgsi_instruction points to + * the next TGSI instruction. + */ +static void set_insn_start(struct st_translate *t, unsigned start) +{ + if (t->insn_count + 1 >= t->insn_size) { + t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); + t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0])); + if (t->insn == NULL) { + t->error = TRUE; + return; + } + } + + t->insn[t->insn_count++] = start; +} + +/** + * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. + */ +static struct ureg_src +emit_immediate(struct st_translate *t, + gl_constant_value values[4], + int type, int size) +{ + struct ureg_program *ureg = t->ureg; + + switch(type) + { + case GL_FLOAT: + return ureg_DECL_immediate(ureg, &values[0].f, size); + case GL_INT: + return ureg_DECL_immediate_int(ureg, &values[0].i, size); + case GL_UNSIGNED_INT: + case GL_BOOL: + return ureg_DECL_immediate_uint(ureg, &values[0].u, size); + default: + assert(!"should not get here - type must be float, int, uint, or bool"); + return ureg_src_undef(); + } +} + +/** + * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register. + */ +static struct ureg_dst +dst_register(struct st_translate *t, + gl_register_file file, + GLuint index) +{ + switch(file) { + case PROGRAM_UNDEFINED: + return ureg_dst_undef(); + + case PROGRAM_TEMPORARY: + if (ureg_dst_is_undef(t->temps[index])) + t->temps[index] = ureg_DECL_temporary(t->ureg); + + return t->temps[index]; + + case PROGRAM_OUTPUT: + if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ) + t->prevInstWrotePointSize = GL_TRUE; + + if (t->procType == TGSI_PROCESSOR_VERTEX) + assert(index < VERT_RESULT_MAX); + else if (t->procType == TGSI_PROCESSOR_FRAGMENT) + assert(index < FRAG_RESULT_MAX); + else + assert(index < GEOM_RESULT_MAX); + + assert(t->outputMapping[index] < Elements(t->outputs)); + + return t->outputs[t->outputMapping[index]]; + + case PROGRAM_ADDRESS: + return t->address[index]; + + default: + assert(!"unknown dst register file"); + return ureg_dst_undef(); + } +} + +/** + * Map a glsl_to_tgsi src register to a TGSI ureg_src register. + */ +static struct ureg_src +src_register(struct st_translate *t, + gl_register_file file, + GLuint index) +{ + switch(file) { + case PROGRAM_UNDEFINED: + return ureg_src_undef(); + + case PROGRAM_TEMPORARY: + assert(index >= 0); + assert(index < Elements(t->temps)); + if (ureg_dst_is_undef(t->temps[index])) + t->temps[index] = ureg_DECL_temporary(t->ureg); + return ureg_src(t->temps[index]); + + case PROGRAM_NAMED_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_UNIFORM: + assert(index >= 0); + return t->constants[index]; + case PROGRAM_STATE_VAR: + case PROGRAM_CONSTANT: /* ie, immediate */ + if (index < 0) + return ureg_DECL_constant(t->ureg, 0); + else + return t->constants[index]; + + case PROGRAM_IMMEDIATE: + return t->immediates[index]; + + case PROGRAM_INPUT: + assert(t->inputMapping[index] < Elements(t->inputs)); + return t->inputs[t->inputMapping[index]]; + + case PROGRAM_OUTPUT: + assert(t->outputMapping[index] < Elements(t->outputs)); + return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ + + case PROGRAM_ADDRESS: + return ureg_src(t->address[index]); + + case PROGRAM_SYSTEM_VALUE: + assert(index < Elements(t->systemValues)); + return t->systemValues[index]; + + default: + assert(!"unknown src register file"); + return ureg_src_undef(); + } +} + +/** + * Create a TGSI ureg_dst register from an st_dst_reg. + */ +static struct ureg_dst +translate_dst(struct st_translate *t, + const st_dst_reg *dst_reg, + bool saturate) +{ + struct ureg_dst dst = dst_register(t, + dst_reg->file, + dst_reg->index); + + dst = ureg_writemask(dst, dst_reg->writemask); + + if (saturate) + dst = ureg_saturate(dst); + + if (dst_reg->reladdr != NULL) + dst = ureg_dst_indirect(dst, ureg_src(t->address[0])); + + return dst; +} + +/** + * Create a TGSI ureg_src register from an st_src_reg. + */ +static struct ureg_src +translate_src(struct st_translate *t, const st_src_reg *src_reg) +{ + struct ureg_src src = src_register(t, src_reg->file, src_reg->index); + + src = ureg_swizzle(src, + GET_SWZ(src_reg->swizzle, 0) & 0x3, + GET_SWZ(src_reg->swizzle, 1) & 0x3, + GET_SWZ(src_reg->swizzle, 2) & 0x3, + GET_SWZ(src_reg->swizzle, 3) & 0x3); + + if ((src_reg->negate & 0xf) == NEGATE_XYZW) + src = ureg_negate(src); + + if (src_reg->reladdr != NULL) { + /* Normally ureg_src_indirect() would be used here, but a stupid compiler + * bug in g++ makes ureg_src_indirect (an inline C function) erroneously + * set the bit for src.Negate. So we have to do the operation manually + * here to work around the compiler's problems. */ + /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/ + struct ureg_src addr = ureg_src(t->address[0]); + src.Indirect = 1; + src.IndirectFile = addr.File; + src.IndirectIndex = addr.Index; + src.IndirectSwizzle = addr.SwizzleX; + + if (src_reg->file != PROGRAM_INPUT && + src_reg->file != PROGRAM_OUTPUT) { + /* If src_reg->index was negative, it was set to zero in + * src_register(). Reassign it now. But don't do this + * for input/output regs since they get remapped while + * const buffers don't. + */ + src.Index = src_reg->index; + } + } + + return src; +} + +static void +compile_tgsi_instruction(struct st_translate *t, + const glsl_to_tgsi_instruction *inst) +{ + struct ureg_program *ureg = t->ureg; + GLuint i; + struct ureg_dst dst[1]; + struct ureg_src src[4]; + unsigned num_dst; + unsigned num_src; + + num_dst = num_inst_dst_regs(inst->op); + num_src = num_inst_src_regs(inst->op); + + if (num_dst) + dst[0] = translate_dst(t, + &inst->dst, + inst->saturate); + + for (i = 0; i < num_src; i++) + src[i] = translate_src(t, &inst->src[i]); + + switch(inst->op) { + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_CAL: + case TGSI_OPCODE_ELSE: + case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_IF: + assert(num_dst == 0); + ureg_label_insn(ureg, + inst->op, + src, num_src, + get_label(t, + inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0)); + return; + + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXD: + case TGSI_OPCODE_TXL: + case TGSI_OPCODE_TXP: + case TGSI_OPCODE_TXQ: + case TGSI_OPCODE_TXF: + src[num_src++] = t->samplers[inst->sampler]; + ureg_tex_insn(ureg, + inst->op, + dst, num_dst, + translate_texture_target(inst->tex_target, inst->tex_shadow), + src, num_src); + return; + + case TGSI_OPCODE_SCS: + dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY); + ureg_insn(ureg, inst->op, dst, num_dst, src, num_src); + break; + + default: + ureg_insn(ureg, + inst->op, + dst, num_dst, + src, num_src); + break; + } +} + +/** + * Emit the TGSI instructions to adjust the WPOS pixel center convention + * Basically, add (adjX, adjY) to the fragment position. + */ +static void +emit_adjusted_wpos(struct st_translate *t, + const struct gl_program *program, + float adjX, float adjY) +{ + struct ureg_program *ureg = t->ureg; + struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); + struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; + + /* Note that we bias X and Y and pass Z and W through unchanged. + * The shader might also use gl_FragCoord.w and .z. + */ + ureg_ADD(ureg, wpos_temp, wpos_input, + ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f)); + + t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); +} + + +/** + * Emit the TGSI instructions for inverting the WPOS y coordinate. + * This code is unavoidable because it also depends on whether + * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). + */ +static void +emit_wpos_inversion(struct st_translate *t, + const struct gl_program *program, + bool invert) +{ + struct ureg_program *ureg = t->ureg; + + /* Fragment program uses fragment position input. + * Need to replace instances of INPUT[WPOS] with temp T + * where T = INPUT[WPOS] by y is inverted. + */ + static const gl_state_index wposTransformState[STATE_LENGTH] + = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, + (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; + + /* XXX: note we are modifying the incoming shader here! Need to + * do this before emitting the constant decls below, or this + * will be missed: + */ + unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, + wposTransformState); + + struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst); + struct ureg_dst wpos_temp; + struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; + + /* MOV wpos_temp, input[wpos] + */ + if (wpos_input.File == TGSI_FILE_TEMPORARY) + wpos_temp = ureg_dst(wpos_input); + else { + wpos_temp = ureg_DECL_temporary(ureg); + ureg_MOV(ureg, wpos_temp, wpos_input); + } + + if (invert) { + /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy + */ + ureg_MAD(ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), + wpos_input, + ureg_scalar(wpostrans, 0), + ureg_scalar(wpostrans, 1)); + } else { + /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww + */ + ureg_MAD(ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), + wpos_input, + ureg_scalar(wpostrans, 2), + ureg_scalar(wpostrans, 3)); + } + + /* Use wpos_temp as position input from here on: + */ + t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); +} + + +/** + * Emit fragment position/ooordinate code. + */ +static void +emit_wpos(struct st_context *st, + struct st_translate *t, + const struct gl_program *program, + struct ureg_program *ureg) +{ + const struct gl_fragment_program *fp = + (const struct gl_fragment_program *) program; + struct pipe_screen *pscreen = st->pipe->screen; + boolean invert = FALSE; + + if (fp->OriginUpperLeft) { + /* Fragment shader wants origin in upper-left */ + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { + /* the driver supports upper-left origin */ + } + else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { + /* the driver supports lower-left origin, need to invert Y */ + ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); + invert = TRUE; + } + else + assert(0); + } + else { + /* Fragment shader wants origin in lower-left */ + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) + /* the driver supports lower-left origin */ + ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); + else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) + /* the driver supports upper-left origin, need to invert Y */ + invert = TRUE; + else + assert(0); + } + + if (fp->PixelCenterInteger) { + /* Fragment shader wants pixel center integer */ + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) + /* the driver supports pixel center integer */ + ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); + else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) + /* the driver supports pixel center half integer, need to bias X,Y */ + emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f); + else + assert(0); + } + else { + /* Fragment shader wants pixel center half integer */ + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { + /* the driver supports pixel center half integer */ + } + else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { + /* the driver supports pixel center integer, need to bias X,Y */ + ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); + emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f); + } + else + assert(0); + } + + /* we invert after adjustment so that we avoid the MOV to temporary, + * and reuse the adjustment ADD instead */ + emit_wpos_inversion(t, program, invert); +} + +/** + * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. + * TGSI uses +1 for front, -1 for back. + * This function converts the TGSI value to the GL value. Simply clamping/ + * saturating the value to [0,1] does the job. + */ +static void +emit_face_var(struct st_translate *t) +{ + struct ureg_program *ureg = t->ureg; + struct ureg_dst face_temp = ureg_DECL_temporary(ureg); + struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]]; + + /* MOV_SAT face_temp, input[face] */ + face_temp = ureg_saturate(face_temp); + ureg_MOV(ureg, face_temp, face_input); + + /* Use face_temp as face input from here on: */ + t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); +} + +static void +emit_edgeflags(struct st_translate *t) +{ + struct ureg_program *ureg = t->ureg; + struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]]; + struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; + + ureg_MOV(ureg, edge_dst, edge_src); +} + +/** + * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. + * \param program the program to translate + * \param numInputs number of input registers used + * \param inputMapping maps Mesa fragment program inputs to TGSI generic + * input indexes + * \param inputSemanticName the TGSI_SEMANTIC flag for each input + * \param inputSemanticIndex the semantic index (ex: which texcoord) for + * each input + * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input + * \param numOutputs number of output registers used + * \param outputMapping maps Mesa fragment program outputs to TGSI + * generic outputs + * \param outputSemanticName the TGSI_SEMANTIC flag for each output + * \param outputSemanticIndex the semantic index (ex: which texcoord) for + * each output + * + * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY + */ +extern "C" enum pipe_error +st_translate_program( + struct gl_context *ctx, + uint procType, + struct ureg_program *ureg, + glsl_to_tgsi_visitor *program, + const struct gl_program *proginfo, + GLuint numInputs, + const GLuint inputMapping[], + const ubyte inputSemanticName[], + const ubyte inputSemanticIndex[], + const GLuint interpMode[], + GLuint numOutputs, + const GLuint outputMapping[], + const ubyte outputSemanticName[], + const ubyte outputSemanticIndex[], + boolean passthrough_edgeflags) +{ + struct st_translate translate, *t; + unsigned i; + enum pipe_error ret = PIPE_OK; + + assert(numInputs <= Elements(t->inputs)); + assert(numOutputs <= Elements(t->outputs)); + + t = &translate; + memset(t, 0, sizeof *t); + + t->procType = procType; + t->inputMapping = inputMapping; + t->outputMapping = outputMapping; + t->ureg = ureg; + t->pointSizeOutIndex = -1; + t->prevInstWrotePointSize = GL_FALSE; + + /* + * Declare input attributes. + */ + if (procType == TGSI_PROCESSOR_FRAGMENT) { + for (i = 0; i < numInputs; i++) { + t->inputs[i] = ureg_DECL_fs_input(ureg, + inputSemanticName[i], + inputSemanticIndex[i], + interpMode[i]); + } + + if (proginfo->InputsRead & FRAG_BIT_WPOS) { + /* Must do this after setting up t->inputs, and before + * emitting constant references, below: + */ + emit_wpos(st_context(ctx), t, proginfo, ureg); + } + + if (proginfo->InputsRead & FRAG_BIT_FACE) + emit_face_var(t); + + /* + * Declare output attributes. + */ + for (i = 0; i < numOutputs; i++) { + switch (outputSemanticName[i]) { + case TGSI_SEMANTIC_POSITION: + t->outputs[i] = ureg_DECL_output(ureg, + TGSI_SEMANTIC_POSITION, /* Z/Depth */ + outputSemanticIndex[i]); + t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z); + break; + case TGSI_SEMANTIC_STENCIL: + t->outputs[i] = ureg_DECL_output(ureg, + TGSI_SEMANTIC_STENCIL, /* Stencil */ + outputSemanticIndex[i]); + t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y); + break; + case TGSI_SEMANTIC_COLOR: + t->outputs[i] = ureg_DECL_output(ureg, + TGSI_SEMANTIC_COLOR, + outputSemanticIndex[i]); + break; + default: + assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR"); + return PIPE_ERROR_BAD_INPUT; + } + } + } + else if (procType == TGSI_PROCESSOR_GEOMETRY) { + for (i = 0; i < numInputs; i++) { + t->inputs[i] = ureg_DECL_gs_input(ureg, + i, + inputSemanticName[i], + inputSemanticIndex[i]); + } + + for (i = 0; i < numOutputs; i++) { + t->outputs[i] = ureg_DECL_output(ureg, + outputSemanticName[i], + outputSemanticIndex[i]); + } + } + else { + assert(procType == TGSI_PROCESSOR_VERTEX); + + for (i = 0; i < numInputs; i++) { + t->inputs[i] = ureg_DECL_vs_input(ureg, i); + } + + for (i = 0; i < numOutputs; i++) { + t->outputs[i] = ureg_DECL_output(ureg, + outputSemanticName[i], + outputSemanticIndex[i]); + if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) { + /* Writing to the point size result register requires special + * handling to implement clamping. + */ + static const gl_state_index pointSizeClampState[STATE_LENGTH] + = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; + /* XXX: note we are modifying the incoming shader here! Need to + * do this before emitting the constant decls below, or this + * will be missed. + */ + unsigned pointSizeClampConst = + _mesa_add_state_reference(proginfo->Parameters, + pointSizeClampState); + struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg); + t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst); + t->pointSizeResult = t->outputs[i]; + t->pointSizeOutIndex = i; + t->outputs[i] = psizregtemp; + } + } + if (passthrough_edgeflags) + emit_edgeflags(t); + } + + /* Declare address register. + */ + if (program->num_address_regs > 0) { + assert(program->num_address_regs == 1); + t->address[0] = ureg_DECL_address(ureg); + } + + /* Declare misc input registers + */ + { + GLbitfield sysInputs = proginfo->SystemValuesRead; + unsigned numSys = 0; + for (i = 0; sysInputs; i++) { + if (sysInputs & (1 << i)) { + unsigned semName = mesa_sysval_to_semantic[i]; + t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0); + numSys++; + sysInputs &= ~(1 << i); + } + } + } + + if (program->indirect_addr_temps) { + /* If temps are accessed with indirect addressing, declare temporaries + * in sequential order. Else, we declare them on demand elsewhere. + * (Note: the number of temporaries is equal to program->next_temp) + */ + for (i = 0; i < (unsigned)program->next_temp; i++) { + /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ + t->temps[i] = ureg_DECL_temporary(t->ureg); + } + } + + /* Emit constants and uniforms. TGSI uses a single index space for these, + * so we put all the translated regs in t->constants. + */ + if (proginfo->Parameters) { + t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0])); + if (t->constants == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out; + } + + for (i = 0; i < proginfo->Parameters->NumParameters; i++) { + switch (proginfo->Parameters->Parameters[i].Type) { + case PROGRAM_ENV_PARAM: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_UNIFORM: + t->constants[i] = ureg_DECL_constant(ureg, i); + break; + + /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect + * addressing of the const buffer. + * FIXME: Be smarter and recognize param arrays: + * indirect addressing is only valid within the referenced + * array. + */ + case PROGRAM_CONSTANT: + if (program->indirect_addr_consts) + t->constants[i] = ureg_DECL_constant(ureg, i); + else + t->constants[i] = emit_immediate(t, + proginfo->Parameters->ParameterValues[i], + proginfo->Parameters->Parameters[i].DataType, + 4); + break; + default: + break; + } + } + } + + /* Emit immediate values. + */ + t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src)); + if (t->immediates == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out; + } + i = 0; + foreach_iter(exec_list_iterator, iter, program->immediates) { + immediate_storage *imm = (immediate_storage *)iter.get(); + t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size); + } + + /* texture samplers */ + for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { + if (program->samplers_used & (1 << i)) { + t->samplers[i] = ureg_DECL_sampler(ureg, i); + } + } + + /* Emit each instruction in turn: + */ + foreach_iter(exec_list_iterator, iter, program->instructions) { + set_insn_start(t, ureg_get_instruction_number(ureg)); + compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get()); + + if (t->prevInstWrotePointSize && proginfo->Id) { + /* The previous instruction wrote to the (fake) vertex point size + * result register. Now we need to clamp that value to the min/max + * point size range, putting the result into the real point size + * register. + * Note that we can't do this easily at the end of program due to + * possible early return. + */ + set_insn_start(t, ureg_get_instruction_number(ureg)); + ureg_MAX(t->ureg, + ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), + ureg_src(t->outputs[t->pointSizeOutIndex]), + ureg_swizzle(t->pointSizeConst, 1,1,1,1)); + ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), + ureg_src(t->outputs[t->pointSizeOutIndex]), + ureg_swizzle(t->pointSizeConst, 2,2,2,2)); + } + t->prevInstWrotePointSize = GL_FALSE; + } + + /* Fix up all emitted labels: + */ + for (i = 0; i < t->labels_count; i++) { + ureg_fixup_label(ureg, t->labels[i].token, + t->insn[t->labels[i].branch_target]); + } + +out: + FREE(t->insn); + FREE(t->labels); + FREE(t->constants); + FREE(t->immediates); + + if (t->error) { + debug_printf("%s: translate error flag set\n", __FUNCTION__); + } + + return ret; +} +/* ----------------------------- End TGSI code ------------------------------ */ + +/** + * Convert a shader's GLSL IR into a Mesa gl_program, although without + * generating Mesa IR. + */ +static struct gl_program * +get_mesa_program(struct gl_context *ctx, + struct gl_shader_program *shader_program, + struct gl_shader *shader) +{ + glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor(); + struct gl_program *prog; + GLenum target; + const char *target_string; + bool progress; + struct gl_shader_compiler_options *options = + &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; + + switch (shader->Type) { + case GL_VERTEX_SHADER: + target = GL_VERTEX_PROGRAM_ARB; + target_string = "vertex"; + break; + case GL_FRAGMENT_SHADER: + target = GL_FRAGMENT_PROGRAM_ARB; + target_string = "fragment"; + break; + case GL_GEOMETRY_SHADER: + target = GL_GEOMETRY_PROGRAM_NV; + target_string = "geometry"; + break; + default: + assert(!"should not be reached"); + return NULL; + } + + validate_ir_tree(shader->ir); + + prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name); + if (!prog) + return NULL; + prog->Parameters = _mesa_new_parameter_list(); + prog->Varying = _mesa_new_parameter_list(); + prog->Attributes = _mesa_new_parameter_list(); + v->ctx = ctx; + v->prog = prog; + v->shader_program = shader_program; + v->options = options; + v->glsl_version = ctx->Const.GLSLVersion; + v->native_integers = ctx->Const.NativeIntegers; + + add_uniforms_to_parameters_list(shader_program, shader, prog); + + /* Emit intermediate IR for main(). */ + visit_exec_list(shader->ir, v); + + /* Now emit bodies for any functions that were used. */ + do { + progress = GL_FALSE; + + foreach_iter(exec_list_iterator, iter, v->function_signatures) { + function_entry *entry = (function_entry *)iter.get(); + + if (!entry->bgn_inst) { + v->current_function = entry; + + entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB); + entry->bgn_inst->function = entry; + + visit_exec_list(&entry->sig->body, v); + + glsl_to_tgsi_instruction *last; + last = (glsl_to_tgsi_instruction *)v->instructions.get_tail(); + if (last->op != TGSI_OPCODE_RET) + v->emit(NULL, TGSI_OPCODE_RET); + + glsl_to_tgsi_instruction *end; + end = v->emit(NULL, TGSI_OPCODE_ENDSUB); + end->function = entry; + + progress = GL_TRUE; + } + } + } while (progress); + +#if 0 + /* Print out some information (for debugging purposes) used by the + * optimization passes. */ + for (i=0; i < v->next_temp; i++) { + int fr = v->get_first_temp_read(i); + int fw = v->get_first_temp_write(i); + int lr = v->get_last_temp_read(i); + int lw = v->get_last_temp_write(i); + + printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw); + assert(fw <= fr); + } +#endif + + /* Remove reads to output registers, and to varyings in vertex shaders. */ + v->remove_output_reads(PROGRAM_OUTPUT); + if (target == GL_VERTEX_PROGRAM_ARB) + v->remove_output_reads(PROGRAM_VARYING); + + /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ + v->simplify_cmp(); + v->copy_propagate(); + while (v->eliminate_dead_code_advanced()); + + /* FIXME: These passes to optimize temporary registers don't work when there + * is indirect addressing of the temporary register space. We need proper + * array support so that we don't have to give up these passes in every + * shader that uses arrays. + */ + if (!v->indirect_addr_temps) { + v->eliminate_dead_code(); + v->merge_registers(); + v->renumber_registers(); + } + + /* Write the END instruction. */ + v->emit(NULL, TGSI_OPCODE_END); + + if (ctx->Shader.Flags & GLSL_DUMP) { + printf("\n"); + printf("GLSL IR for linked %s program %d:\n", target_string, + shader_program->Name); + _mesa_print_ir(shader->ir, NULL); + printf("\n"); + printf("\n"); + } + + prog->Instructions = NULL; + prog->NumInstructions = 0; + + do_set_program_inouts(shader->ir, prog); + count_resources(v, prog); + + check_resources(ctx, shader_program, v, prog); + + _mesa_reference_program(ctx, &shader->Program, prog); + + struct st_vertex_program *stvp; + struct st_fragment_program *stfp; + struct st_geometry_program *stgp; + + switch (shader->Type) { + case GL_VERTEX_SHADER: + stvp = (struct st_vertex_program *)prog; + stvp->glsl_to_tgsi = v; + break; + case GL_FRAGMENT_SHADER: + stfp = (struct st_fragment_program *)prog; + stfp->glsl_to_tgsi = v; + break; + case GL_GEOMETRY_SHADER: + stgp = (struct st_geometry_program *)prog; + stgp->glsl_to_tgsi = v; + break; + default: + assert(!"should not be reached"); + return NULL; + } + + return prog; +} + +extern "C" { + +struct gl_shader * +st_new_shader(struct gl_context *ctx, GLuint name, GLuint type) +{ + struct gl_shader *shader; + assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER || + type == GL_GEOMETRY_SHADER_ARB); + shader = rzalloc(NULL, struct gl_shader); + if (shader) { + shader->Type = type; + shader->Name = name; + _mesa_init_shader(ctx, shader); + } + return shader; +} + +struct gl_shader_program * +st_new_shader_program(struct gl_context *ctx, GLuint name) +{ + struct gl_shader_program *shProg; + shProg = rzalloc(NULL, struct gl_shader_program); + if (shProg) { + shProg->Name = name; + _mesa_init_shader_program(ctx, shProg); + } + return shProg; +} + +/** + * Link a shader. + * Called via ctx->Driver.LinkShader() + * This actually involves converting GLSL IR into an intermediate TGSI-like IR + * with code lowering and other optimizations. + */ +GLboolean +st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) +{ + assert(prog->LinkStatus); + + for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { + if (prog->_LinkedShaders[i] == NULL) + continue; + + bool progress; + exec_list *ir = prog->_LinkedShaders[i]->ir; + const struct gl_shader_compiler_options *options = + &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)]; + + do { + progress = false; + + /* Lowering */ + do_mat_op_to_vec(ir); + lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 + | LOG_TO_LOG2 + | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); + + progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; + + progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress; + + progress = lower_quadop_vector(ir, false) || progress; + + if (options->EmitNoIfs) { + progress = lower_discard(ir) || progress; + progress = lower_if_to_cond_assign(ir) || progress; + } + + if (options->EmitNoNoise) + progress = lower_noise(ir) || progress; + + /* If there are forms of indirect addressing that the driver + * cannot handle, perform the lowering pass. + */ + if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput + || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) + progress = + lower_variable_index_to_cond_assign(ir, + options->EmitNoIndirectInput, + options->EmitNoIndirectOutput, + options->EmitNoIndirectTemp, + options->EmitNoIndirectUniform) + || progress; + + progress = do_vec_index_to_cond_assign(ir) || progress; + } while (progress); + + validate_ir_tree(ir); + } + + for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { + struct gl_program *linked_prog; + + if (prog->_LinkedShaders[i] == NULL) + continue; + + linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); + + if (linked_prog) { + bool ok = true; + + switch (prog->_LinkedShaders[i]->Type) { + case GL_VERTEX_SHADER: + _mesa_reference_vertprog(ctx, &prog->VertexProgram, + (struct gl_vertex_program *)linked_prog); + ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB, + linked_prog); + break; + case GL_FRAGMENT_SHADER: + _mesa_reference_fragprog(ctx, &prog->FragmentProgram, + (struct gl_fragment_program *)linked_prog); + ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB, + linked_prog); + break; + case GL_GEOMETRY_SHADER: + _mesa_reference_geomprog(ctx, &prog->GeometryProgram, + (struct gl_geometry_program *)linked_prog); + ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV, + linked_prog); + break; + } + if (!ok) { + return GL_FALSE; + } + } + + _mesa_reference_program(ctx, &linked_prog, NULL); + } + + return GL_TRUE; +} + + +/** + * Link a GLSL shader program. Called via glLinkProgram(). + */ +void +st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) +{ + unsigned int i; + + _mesa_clear_shader_program_data(ctx, prog); + + prog->LinkStatus = GL_TRUE; + + for (i = 0; i < prog->NumShaders; i++) { + if (!prog->Shaders[i]->CompileStatus) { + fail_link(prog, "linking with uncompiled shader"); + prog->LinkStatus = GL_FALSE; + } + } + + prog->Varying = _mesa_new_parameter_list(); + _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL); + _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL); + _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL); + + if (prog->LinkStatus) { + link_shaders(ctx, prog); + } + + if (prog->LinkStatus) { + if (!ctx->Driver.LinkShader(ctx, prog)) { + prog->LinkStatus = GL_FALSE; + } + } + + set_uniform_initializers(ctx, prog); + + if (ctx->Shader.Flags & GLSL_DUMP) { + if (!prog->LinkStatus) { + printf("GLSL shader program %d failed to link\n", prog->Name); + } + + if (prog->InfoLog && prog->InfoLog[0] != 0) { + printf("GLSL shader program %d info log:\n", prog->Name); + printf("%s\n", prog->InfoLog); + } + } +} + +} /* extern "C" */ diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h new file mode 100644 index 00000000000..d877471785d --- /dev/null +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h @@ -0,0 +1,72 @@ +/* + * Copyright © 2010 Intel Corporation + * Copyright © 2011 Bryan Cain + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "main/glheader.h" +#include "tgsi/tgsi_ureg.h" + +struct gl_context; +struct gl_shader; +struct gl_shader_program; +struct glsl_to_tgsi_visitor; + +enum pipe_error st_translate_program( + struct gl_context *ctx, + uint procType, + struct ureg_program *ureg, + struct glsl_to_tgsi_visitor *program, + const struct gl_program *proginfo, + GLuint numInputs, + const GLuint inputMapping[], + const ubyte inputSemanticName[], + const ubyte inputSemanticIndex[], + const GLuint interpMode[], + GLuint numOutputs, + const GLuint outputMapping[], + const ubyte outputSemanticName[], + const ubyte outputSemanticIndex[], + boolean passthrough_edgeflags); + +void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v); +void get_pixel_transfer_visitor(struct st_fragment_program *fp, + struct glsl_to_tgsi_visitor *original, + int scale_and_bias, int pixel_maps); +void get_bitmap_visitor(struct st_fragment_program *fp, + struct glsl_to_tgsi_visitor *original, + int samplerIndex); + +struct gl_shader *st_new_shader(struct gl_context *ctx, GLuint name, GLuint type); + +struct gl_shader_program * +st_new_shader_program(struct gl_context *ctx, GLuint name); + +void st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog); +GLboolean st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog); + +#ifdef __cplusplus +} +#endif diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c index 7bd82aae206..d5228d387f7 100644 --- a/src/mesa/state_tracker/st_manager.c +++ b/src/mesa/state_tracker/st_manager.c @@ -587,7 +587,7 @@ st_context_teximage(struct st_context_iface *stctxi, internalFormat = GL_RGB; texFormat = st_ChooseTextureFormat(ctx, internalFormat, - GL_RGBA, GL_UNSIGNED_BYTE); + GL_BGRA, GL_UNSIGNED_BYTE); _mesa_init_teximage_fields(ctx, target, texImage, tex->width0, tex->height0, 1, 0, diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index a41e5b16a85..656c985d78f 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -267,7 +267,7 @@ src_register( struct st_translate *t, /** * Map mesa texture target to TGSI texture target. */ -static unsigned +unsigned translate_texture_target( GLuint textarget, GLboolean shadow ) { @@ -511,7 +511,7 @@ static void emit_ddy( struct st_translate *t, -static unsigned +unsigned translate_opcode( unsigned op ) { switch( op ) { @@ -1207,7 +1207,7 @@ st_translate_mesa_program( else t->constants[i] = ureg_DECL_immediate( ureg, - program->Parameters->ParameterValues[i], + (const float*) program->Parameters->ParameterValues[i], 4 ); break; default: diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.h b/src/mesa/state_tracker/st_mesa_to_tgsi.h index 0615e52ef62..0dbdf5f6159 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.h +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.h @@ -64,6 +64,12 @@ st_translate_mesa_program( void st_free_tokens(const struct tgsi_token *tokens); +unsigned +translate_opcode(unsigned op); + +unsigned +translate_texture_target(GLuint textarget, GLboolean shadow); + #if defined __cplusplus } /* extern "C" */ diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 132ebdbadc9..a4f47edfcd3 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -174,8 +174,8 @@ st_release_gp_variants(struct st_context *st, struct st_geometry_program *stgp) * \param tokensOut destination for TGSI tokens * \return pointer to cached pipe_shader object. */ -static void -st_prepare_vertex_program(struct st_context *st, +void +st_prepare_vertex_program(struct gl_context *ctx, struct st_vertex_program *stvp) { GLuint attr; @@ -184,9 +184,10 @@ st_prepare_vertex_program(struct st_context *st, stvp->num_outputs = 0; if (stvp->Base.IsPositionInvariant) - _mesa_insert_mvp_code(st->ctx, &stvp->Base); + _mesa_insert_mvp_code(ctx, &stvp->Base); - assert(stvp->Base.Base.NumInstructions > 1); + if (!stvp->glsl_to_tgsi) + assert(stvp->Base.Base.NumInstructions > 1); /* * Determine number of inputs, the mappings between VERT_ATTRIB_x @@ -292,10 +293,13 @@ st_translate_vertex_program(struct st_context *st, enum pipe_error error; unsigned num_outputs; - st_prepare_vertex_program( st, stvp ); + st_prepare_vertex_program(st->ctx, stvp); - _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT); - _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING); + if (!stvp->glsl_to_tgsi) + { + _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT); + _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING); + } ureg = ureg_create( TGSI_PROCESSOR_VERTEX ); if (ureg == NULL) { @@ -318,22 +322,41 @@ st_translate_vertex_program(struct st_context *st, debug_printf("\n"); } - error = st_translate_mesa_program(st->ctx, - TGSI_PROCESSOR_VERTEX, - ureg, - &stvp->Base.Base, - /* inputs */ - vpv->num_inputs, - stvp->input_to_index, - NULL, /* input semantic name */ - NULL, /* input semantic index */ - NULL, - /* outputs */ - num_outputs, - stvp->result_to_output, - stvp->output_semantic_name, - stvp->output_semantic_index, - key->passthrough_edgeflags ); + if (stvp->glsl_to_tgsi) + error = st_translate_program(st->ctx, + TGSI_PROCESSOR_VERTEX, + ureg, + stvp->glsl_to_tgsi, + &stvp->Base.Base, + /* inputs */ + stvp->num_inputs, + stvp->input_to_index, + NULL, /* input semantic name */ + NULL, /* input semantic index */ + NULL, /* interp mode */ + /* outputs */ + stvp->num_outputs, + stvp->result_to_output, + stvp->output_semantic_name, + stvp->output_semantic_index, + key->passthrough_edgeflags ); + else + error = st_translate_mesa_program(st->ctx, + TGSI_PROCESSOR_VERTEX, + ureg, + &stvp->Base.Base, + /* inputs */ + vpv->num_inputs, + stvp->input_to_index, + NULL, /* input semantic name */ + NULL, /* input semantic index */ + NULL, + /* outputs */ + num_outputs, + stvp->result_to_output, + stvp->output_semantic_name, + stvp->output_semantic_index, + key->passthrough_edgeflags ); if (error) goto fail; @@ -451,6 +474,7 @@ st_translate_fragment_program(struct st_context *st, GLuint attr; const GLbitfield inputsRead = stfp->Base.Base.InputsRead; struct ureg_program *ureg; + GLboolean write_all = GL_FALSE; ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; @@ -460,9 +484,9 @@ st_translate_fragment_program(struct st_context *st, ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; uint fs_num_outputs = 0; - - - _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT); + + if (!stfp->glsl_to_tgsi) + _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT); /* * Convert Mesa program inputs to TGSI input register semantics. @@ -605,21 +629,39 @@ st_translate_fragment_program(struct st_context *st, if (write_all == GL_TRUE) ureg_property_fs_color0_writes_all_cbufs(ureg, 1); - st_translate_mesa_program(st->ctx, - TGSI_PROCESSOR_FRAGMENT, - ureg, - &stfp->Base.Base, - /* inputs */ - fs_num_inputs, - inputMapping, - input_semantic_name, - input_semantic_index, - interpMode, - /* outputs */ - fs_num_outputs, - outputMapping, - fs_output_semantic_name, - fs_output_semantic_index, FALSE ); + if (stfp->glsl_to_tgsi) + st_translate_program(st->ctx, + TGSI_PROCESSOR_FRAGMENT, + ureg, + stfp->glsl_to_tgsi, + &stfp->Base.Base, + /* inputs */ + fs_num_inputs, + inputMapping, + input_semantic_name, + input_semantic_index, + interpMode, + /* outputs */ + fs_num_outputs, + outputMapping, + fs_output_semantic_name, + fs_output_semantic_index, FALSE ); + else + st_translate_mesa_program(st->ctx, + TGSI_PROCESSOR_FRAGMENT, + ureg, + &stfp->Base.Base, + /* inputs */ + fs_num_inputs, + inputMapping, + input_semantic_name, + input_semantic_index, + interpMode, + /* outputs */ + fs_num_outputs, + outputMapping, + fs_output_semantic_name, + fs_output_semantic_index, FALSE ); stfp->tgsi.tokens = ureg_get_tokens( ureg, NULL ); ureg_destroy( ureg ); diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index c4244df939e..699b6e8ccb7 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -38,6 +38,7 @@ #include "program/program.h" #include "pipe/p_state.h" #include "st_context.h" +#include "st_glsl_to_tgsi.h" /** Fragment program variant key */ @@ -83,6 +84,7 @@ struct st_fp_variant struct st_fragment_program { struct gl_fragment_program Base; + struct glsl_to_tgsi_visitor* glsl_to_tgsi; struct pipe_shader_state tgsi; @@ -136,6 +138,7 @@ struct st_vp_variant struct st_vertex_program { struct gl_vertex_program Base; /**< The Mesa vertex program */ + struct glsl_to_tgsi_visitor* glsl_to_tgsi; /** maps a Mesa VERT_ATTRIB_x to a packed TGSI input index */ GLuint input_to_index[VERT_ATTRIB_MAX]; @@ -184,6 +187,7 @@ struct st_gp_variant struct st_geometry_program { struct gl_geometry_program Base; /**< The Mesa geometry program */ + struct glsl_to_tgsi_visitor* glsl_to_tgsi; /** map GP input back to VP output */ GLuint input_map[PIPE_MAX_SHADER_INPUTS]; @@ -276,6 +280,14 @@ st_get_gp_variant(struct st_context *st, const struct st_gp_variant_key *key); +extern void +st_prepare_vertex_program(struct gl_context *ctx, + struct st_vertex_program *stvp); + +extern GLboolean +st_prepare_fragment_program(struct gl_context *ctx, + struct st_fragment_program *stfp); + extern void st_release_vp_variants( struct st_context *st, diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index ffe7e256a56..232c286c1d1 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -221,8 +221,8 @@ st_texture_image_map(struct st_context *st, struct st_texture_image *stImage, DBG("%s \n", __FUNCTION__); - stImage->transfer = pipe_get_transfer(st->pipe, pt, stImage->level, - stImage->face + zoffset, + stImage->transfer = pipe_get_transfer(st->pipe, pt, stImage->base.Level, + stImage->base.Face + zoffset, usage, x, y, w, h); if (stImage->transfer) @@ -396,3 +396,23 @@ st_texture_image_copy(struct pipe_context *pipe, } } + +struct pipe_resource * +st_create_color_map_texture(struct gl_context *ctx) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct pipe_resource *pt; + enum pipe_format format; + const uint texSize = 256; /* simple, and usually perfect */ + + /* find an RGBA texture format */ + format = st_choose_format(pipe->screen, GL_RGBA, GL_NONE, GL_NONE, + PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); + + /* create texture for color map/table */ + pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0, + texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW); + return pt; +} + diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h index d50c3c9af79..50b7284e760 100644 --- a/src/mesa/state_tracker/st_texture.h +++ b/src/mesa/state_tracker/st_texture.h @@ -45,11 +45,6 @@ struct st_texture_image { struct gl_texture_image base; - /* These aren't stored in gl_texture_image - */ - GLuint level; - GLuint face; - /* If stImage->pt != NULL, image data is stored here. * Else if stImage->base.Data != NULL, image is stored there. * Else there is no image data. @@ -232,4 +227,8 @@ st_texture_image_copy(struct pipe_context *pipe, struct pipe_resource *src, GLuint srcLevel, GLuint face); + +extern struct pipe_resource * +st_create_color_map_texture(struct gl_context *ctx); + #endif diff --git a/src/mesa/swrast/s_aatritemp.h b/src/mesa/swrast/s_aatritemp.h index 91d4f7a10ab..77b3ae6ec7a 100644 --- a/src/mesa/swrast/s_aatritemp.h +++ b/src/mesa/swrast/s_aatritemp.h @@ -181,13 +181,20 @@ const GLfloat *pMax = vMax->attrib[FRAG_ATTRIB_WPOS]; const GLfloat dxdy = majDx / majDy; const GLfloat xAdj = dxdy < 0.0F ? -dxdy : 0.0F; - GLfloat x = pMin[0] - (yMin - iyMin) * dxdy; GLint iy; - for (iy = iyMin; iy < iyMax; iy++, x += dxdy) { +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic) private(iy) firstprivate(span) +#endif + for (iy = iyMin; iy < iyMax; iy++) { + GLfloat x = pMin[0] - (yMin - iy) * dxdy; GLint ix, startX = (GLint) (x - xAdj); GLuint count; GLfloat coverage = 0.0F; +#ifdef _OPENMP + /* each thread needs to use a different (global) SpanArrays variable */ + span.array = SWRAST_CONTEXT(ctx)->SpanArrays + omp_get_thread_num(); +#endif /* skip over fragments with zero coverage */ while (startX < MAX_WIDTH) { coverage = compute_coveragef(pMin, pMid, pMax, startX, iy); @@ -228,13 +235,12 @@ coverage = compute_coveragef(pMin, pMid, pMax, ix, iy); } - if (ix <= startX) - continue; - - span.x = startX; - span.y = iy; - span.end = (GLuint) ix - (GLuint) startX; - _swrast_write_rgba_span(ctx, &span); + if (ix > startX) { + span.x = startX; + span.y = iy; + span.end = (GLuint) ix - (GLuint) startX; + _swrast_write_rgba_span(ctx, &span); + } } } else { @@ -244,13 +250,20 @@ const GLfloat *pMax = vMax->attrib[FRAG_ATTRIB_WPOS]; const GLfloat dxdy = majDx / majDy; const GLfloat xAdj = dxdy > 0 ? dxdy : 0.0F; - GLfloat x = pMin[0] - (yMin - iyMin) * dxdy; GLint iy; - for (iy = iyMin; iy < iyMax; iy++, x += dxdy) { +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic) private(iy) firstprivate(span) +#endif + for (iy = iyMin; iy < iyMax; iy++) { + GLfloat x = pMin[0] - (yMin - iy) * dxdy; GLint ix, left, startX = (GLint) (x + xAdj); GLuint count, n; GLfloat coverage = 0.0F; +#ifdef _OPENMP + /* each thread needs to use a different (global) SpanArrays variable */ + span.array = SWRAST_CONTEXT(ctx)->SpanArrays + omp_get_thread_num(); +#endif /* make sure we're not past the window edge */ if (startX >= ctx->DrawBuffer->_Xmax) { startX = ctx->DrawBuffer->_Xmax - 1; @@ -296,31 +309,30 @@ ATTRIB_LOOP_END #endif - if (startX <= ix) - continue; - - n = (GLuint) startX - (GLuint) ix; + if (startX > ix) { + n = (GLuint) startX - (GLuint) ix; - left = ix + 1; + left = ix + 1; - /* shift all values to the left */ - /* XXX this is temporary */ - { - SWspanarrays *array = span.array; - GLint j; - for (j = 0; j < (GLint) n; j++) { - array->coverage[j] = array->coverage[j + left]; - COPY_CHAN4(array->rgba[j], array->rgba[j + left]); + /* shift all values to the left */ + /* XXX this is temporary */ + { + SWspanarrays *array = span.array; + GLint j; + for (j = 0; j < (GLint) n; j++) { + array->coverage[j] = array->coverage[j + left]; + COPY_CHAN4(array->rgba[j], array->rgba[j + left]); #ifdef DO_Z - array->z[j] = array->z[j + left]; + array->z[j] = array->z[j + left]; #endif + } } - } - span.x = left; - span.y = iy; - span.end = n; - _swrast_write_rgba_span(ctx, &span); + span.x = left; + span.y = iy; + span.end = n; + _swrast_write_rgba_span(ctx, &span); + } } } } diff --git a/src/mesa/swrast/s_context.c b/src/mesa/swrast/s_context.c index def1531d7ff..792b528ee34 100644 --- a/src/mesa/swrast/s_context.c +++ b/src/mesa/swrast/s_context.c @@ -417,84 +417,6 @@ _swrast_validate_blend_func(struct gl_context *ctx, GLuint n, const GLubyte mask swrast->BlendFunc( ctx, n, mask, src, dst, chanType ); } - -/** - * Make sure we have texture image data for all the textures we may need - * for subsequent rendering. - */ -static void -_swrast_validate_texture_images(struct gl_context *ctx) -{ - SWcontext *swrast = SWRAST_CONTEXT(ctx); - GLuint u; - - if (!swrast->ValidateTextureImage || !ctx->Texture._EnabledUnits) { - /* no textures enabled, or no way to validate images! */ - return; - } - - for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++) { - if (ctx->Texture.Unit[u]._ReallyEnabled) { - struct gl_texture_object *texObj = ctx->Texture.Unit[u]._Current; - ASSERT(texObj); - if (texObj) { - GLuint numFaces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; - GLuint face; - for (face = 0; face < numFaces; face++) { - GLint lvl; - for (lvl = texObj->BaseLevel; lvl <= texObj->_MaxLevel; lvl++) { - struct gl_texture_image *texImg = texObj->Image[face][lvl]; - if (texImg && !texImg->Data) { - swrast->ValidateTextureImage(ctx, texObj, face, lvl); - ASSERT(texObj->Image[face][lvl]->Data); - } - } - } - } - } - } -} - - -/** - * Free the texture image data attached to all currently enabled - * textures. Meant to be called by device drivers when transitioning - * from software to hardware rendering. - */ -void -_swrast_eject_texture_images(struct gl_context *ctx) -{ - GLuint u; - - if (!ctx->Texture._EnabledUnits) { - /* no textures enabled */ - return; - } - - for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++) { - if (ctx->Texture.Unit[u]._ReallyEnabled) { - struct gl_texture_object *texObj = ctx->Texture.Unit[u]._Current; - ASSERT(texObj); - if (texObj) { - GLuint numFaces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; - GLuint face; - for (face = 0; face < numFaces; face++) { - GLint lvl; - for (lvl = texObj->BaseLevel; lvl <= texObj->_MaxLevel; lvl++) { - struct gl_texture_image *texImg = texObj->Image[face][lvl]; - if (texImg && texImg->Data) { - _mesa_free_texmemory(texImg->Data); - texImg->Data = NULL; - } - } - } - } - } - } -} - - - static void _swrast_sleep( struct gl_context *ctx, GLbitfield new_state ) { @@ -640,7 +562,6 @@ _swrast_validate_derived( struct gl_context *ctx ) if (swrast->NewState & (_NEW_TEXTURE | _NEW_PROGRAM)) { _swrast_update_texture_samplers( ctx ); - _swrast_validate_texture_images(ctx); } if (swrast->NewState & (_NEW_COLOR | _NEW_PROGRAM)) @@ -772,6 +693,11 @@ _swrast_CreateContext( struct gl_context *ctx ) { GLuint i; SWcontext *swrast = (SWcontext *)CALLOC(sizeof(SWcontext)); +#ifdef _OPENMP + const GLint maxThreads = omp_get_max_threads(); +#else + const GLint maxThreads = 1; +#endif if (SWRAST_DEBUG) { _mesa_debug(ctx, "_swrast_CreateContext\n"); @@ -806,19 +732,25 @@ _swrast_CreateContext( struct gl_context *ctx ) for (i = 0; i < MAX_TEXTURE_IMAGE_UNITS; i++) swrast->TextureSample[i] = NULL; - swrast->SpanArrays = MALLOC_STRUCT(sw_span_arrays); + /* SpanArrays is global and shared by all SWspan instances. However, when + * using multiple threads, it is necessary to have one SpanArrays instance + * per thread. + */ + swrast->SpanArrays = (SWspanarrays *) MALLOC(maxThreads * sizeof(SWspanarrays)); if (!swrast->SpanArrays) { FREE(swrast); return GL_FALSE; } - swrast->SpanArrays->ChanType = CHAN_TYPE; + for(i = 0; i < maxThreads; i++) { + swrast->SpanArrays[i].ChanType = CHAN_TYPE; #if CHAN_TYPE == GL_UNSIGNED_BYTE - swrast->SpanArrays->rgba = swrast->SpanArrays->rgba8; + swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].rgba8; #elif CHAN_TYPE == GL_UNSIGNED_SHORT - swrast->SpanArrays->rgba = swrast->SpanArrays->rgba16; + swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].rgba16; #else - swrast->SpanArrays->rgba = swrast->SpanArrays->attribs[FRAG_ATTRIB_COL0]; + swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].attribs[FRAG_ATTRIB_COL0]; #endif + } /* init point span buffer */ swrast->PointSpan.primitive = GL_POINT; @@ -826,7 +758,10 @@ _swrast_CreateContext( struct gl_context *ctx ) swrast->PointSpan.facing = 0; swrast->PointSpan.array = swrast->SpanArrays; - swrast->TexelBuffer = (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits * + /* TexelBuffer is also global and normally shared by all SWspan instances; + * when running with multiple threads, create one per thread. + */ + swrast->TexelBuffer = (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits * maxThreads * MAX_WIDTH * 4 * sizeof(GLfloat)); if (!swrast->TexelBuffer) { FREE(swrast->SpanArrays); diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c index db102ac7946..9a91be39970 100644 --- a/src/mesa/swrast/s_span.c +++ b/src/mesa/swrast/s_span.c @@ -212,10 +212,10 @@ interpolate_active_attribs(struct gl_context *ctx, SWspan *span, GLbitfield attr static INLINE void interpolate_int_colors(struct gl_context *ctx, SWspan *span) { +#if CHAN_BITS != 32 const GLuint n = span->end; GLuint i; -#if CHAN_BITS != 32 ASSERT(!(span->arrayMask & SPAN_RGBA)); #endif diff --git a/src/mesa/swrast/s_stencil.c b/src/mesa/swrast/s_stencil.c index 5bec71c057b..fa5093a3407 100644 --- a/src/mesa/swrast/s_stencil.c +++ b/src/mesa/swrast/s_stencil.c @@ -462,7 +462,8 @@ stencil_and_ztest_span(struct gl_context *ctx, SWspan *span, GLuint face) * Some fragments passed the stencil test, apply depth test to them * and apply Zpass and Zfail stencil ops. */ - if (ctx->Depth.Test == GL_FALSE) { + if (ctx->Depth.Test == GL_FALSE || + ctx->DrawBuffer->_DepthBuffer == NULL) { /* * No depth buffer, just apply zpass stencil function to active pixels. */ diff --git a/src/mesa/swrast/s_texcombine.c b/src/mesa/swrast/s_texcombine.c index 086ed0b33d7..80b9dff3cc2 100644 --- a/src/mesa/swrast/s_texcombine.c +++ b/src/mesa/swrast/s_texcombine.c @@ -48,7 +48,11 @@ typedef float (*float4_array)[4]; static INLINE float4_array get_texel_array(SWcontext *swrast, GLuint unit) { +#ifdef _OPENMP + return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4 * omp_get_num_threads() + (MAX_WIDTH * 4 * omp_get_thread_num())); +#else return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4); +#endif } diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c index b1967e65417..86af4b7cfe2 100644 --- a/src/mesa/tnl/t_draw.c +++ b/src/mesa/tnl/t_draw.c @@ -280,10 +280,9 @@ static void bind_inputs( struct gl_context *ctx, if (!inputs[i]->BufferObj->Pointer) { bo[*nr_bo] = inputs[i]->BufferObj; (*nr_bo)++; - ctx->Driver.MapBuffer(ctx, - GL_ARRAY_BUFFER, - GL_READ_ONLY_ARB, - inputs[i]->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, inputs[i]->BufferObj->Size, + GL_MAP_READ_BIT, + inputs[i]->BufferObj); assert(inputs[i]->BufferObj->Pointer); } @@ -348,18 +347,32 @@ static void bind_indices( struct gl_context *ctx, } if (ib->obj->Name && !ib->obj->Pointer) { + unsigned map_size; + + switch (ib->type) { + case GL_UNSIGNED_BYTE: + map_size = ib->count * sizeof(GLubyte); + break; + case GL_UNSIGNED_SHORT: + map_size = ib->count * sizeof(GLushort); + break; + case GL_UNSIGNED_INT: + map_size = ib->count * sizeof(GLuint); + break; + default: + assert(0); + map_size = 0; + } + bo[*nr_bo] = ib->obj; (*nr_bo)++; - ctx->Driver.MapBuffer(ctx, - GL_ELEMENT_ARRAY_BUFFER, - GL_READ_ONLY_ARB, - ib->obj); - + ptr = ctx->Driver.MapBufferRange(ctx, (GLsizeiptr) ib->ptr, map_size, + GL_MAP_READ_BIT, ib->obj); assert(ib->obj->Pointer); + } else { + ptr = ib->ptr; } - ptr = ADD_POINTERS(ib->obj->Pointer, ib->ptr); - if (ib->type == GL_UNSIGNED_INT && VB->Primitive[0].basevertex == 0) { VB->Elts = (GLuint *) ptr; } @@ -402,9 +415,7 @@ static void unmap_vbos( struct gl_context *ctx, { GLuint i; for (i = 0; i < nr_bo; i++) { - ctx->Driver.UnmapBuffer(ctx, - 0, /* target -- I don't see why this would be needed */ - bo[i]); + ctx->Driver.UnmapBuffer(ctx, bo[i]); } } diff --git a/src/mesa/tnl/t_pipeline.c b/src/mesa/tnl/t_pipeline.c index 18f095f0d4b..881d5d5f535 100644 --- a/src/mesa/tnl/t_pipeline.c +++ b/src/mesa/tnl/t_pipeline.c @@ -146,7 +146,17 @@ void _tnl_run_pipeline( struct gl_context *ctx ) _tnl_notify_pipeline_output_change( ctx ); } +#ifndef _OPENMP + /* Don't adjust FPU precision mode in case multiple threads are to be used. + * This would require that the additional threads also changed the FPU mode + * which is quite a mess as this had to be done in all parallelized sections; + * otherwise the master thread and all other threads are running in different + * modes, producing inconsistent results. + * Note that all x64 implementations don't define/use START_FAST_MATH, so + * this is "hack" is only used in i386 mode + */ START_FAST_MATH(__tmp); +#endif for (i = 0; i < tnl->pipeline.nr_stages ; i++) { struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i]; @@ -154,7 +164,9 @@ void _tnl_run_pipeline( struct gl_context *ctx ) break; } +#ifndef _OPENMP END_FAST_MATH(__tmp); +#endif } diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c index 2b8d38ef283..8474c787a46 100644 --- a/src/mesa/vbo/vbo_exec_api.c +++ b/src/mesa/vbo/vbo_exec_api.c @@ -431,6 +431,24 @@ do { \ #include "vbo_attrib_tmp.h" +/** + * Flush (draw) vertices. + * \param unmap - leave VBO unmapped after flushing? + */ +static void +vbo_exec_FlushVertices_internal(struct vbo_exec_context *exec, GLboolean unmap) +{ + if (exec->vtx.vert_count || unmap) { + vbo_exec_vtx_flush( exec, unmap ); + } + + if (exec->vtx.vertex_size) { + vbo_exec_copy_to_current( exec ); + reset_attrfv( exec ); + } +} + + #if FEATURE_beginend @@ -535,24 +553,6 @@ static void GLAPIENTRY vbo_exec_EvalPoint2( GLint i, GLint j ) /** - * Flush (draw) vertices. - * \param unmap - leave VBO unmapped after flushing? - */ -static void -vbo_exec_FlushVertices_internal(struct vbo_exec_context *exec, GLboolean unmap) -{ - if (exec->vtx.vert_count || unmap) { - vbo_exec_vtx_flush( exec, unmap ); - } - - if (exec->vtx.vertex_size) { - vbo_exec_copy_to_current( exec ); - reset_attrfv( exec ); - } -} - - -/** * Called via glBegin. */ static void GLAPIENTRY vbo_exec_Begin( GLenum mode ) @@ -947,7 +947,7 @@ void vbo_exec_vtx_destroy( struct vbo_exec_context *exec ) /* Free the vertex buffer. Unmap first if needed. */ if (_mesa_bufferobj_mapped(exec->vtx.bufferobj)) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, exec->vtx.bufferobj); + ctx->Driver.UnmapBuffer(ctx, exec->vtx.bufferobj); } _mesa_reference_buffer_object(ctx, &exec->vtx.bufferobj, NULL); } diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index b908d5aea7e..18719d5f537 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -95,10 +95,25 @@ vbo_get_minmax_index(struct gl_context *ctx, GLuint i; if (_mesa_is_bufferobj(ib->obj)) { - const GLvoid *map = - ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, - GL_READ_ONLY, ib->obj); - indices = ADD_POINTERS(map, ib->ptr); + unsigned map_size; + + switch (ib->type) { + case GL_UNSIGNED_INT: + map_size = count * sizeof(GLuint); + break; + case GL_UNSIGNED_SHORT: + map_size = count * sizeof(GLushort); + break; + case GL_UNSIGNED_BYTE: + map_size = count * sizeof(GLubyte); + break; + default: + assert(0); + map_size = 0; + } + + indices = ctx->Driver.MapBufferRange(ctx, (GLsizeiptr) ib->ptr, map_size, + GL_MAP_READ_BIT, ib->obj); } else { indices = ib->ptr; } @@ -176,7 +191,7 @@ vbo_get_minmax_index(struct gl_context *ctx, } if (_mesa_is_bufferobj(ib->obj)) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, ib->obj); + ctx->Driver.UnmapBuffer(ctx, ib->obj); } } @@ -196,8 +211,8 @@ check_array_data(struct gl_context *ctx, struct gl_client_array *array, if (!array->BufferObj->Pointer) { /* need to map now */ array->BufferObj->Pointer = - ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB, - GL_READ_ONLY, array->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, array->BufferObj->Size, + GL_MAP_READ_BIT, array->BufferObj); } data = ADD_POINTERS(data, array->BufferObj->Pointer); } @@ -238,7 +253,7 @@ unmap_array_buffer(struct gl_context *ctx, struct gl_client_array *array) if (array->Enabled && _mesa_is_bufferobj(array->BufferObj) && _mesa_bufferobj_mapped(array->BufferObj)) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, array->BufferObj); + ctx->Driver.UnmapBuffer(ctx, array->BufferObj); } } @@ -256,10 +271,10 @@ check_draw_elements_data(struct gl_context *ctx, GLsizei count, GLenum elemType, GLint i, k; if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) { - elemMap = ctx->Driver.MapBuffer(ctx, - GL_ELEMENT_ARRAY_BUFFER_ARB, - GL_READ_ONLY, - ctx->Array.ElementArrayBufferObj); + elemMap = ctx->Driver.MapBufferRange(ctx, 0, + ctx->Array.ElementArrayBufferObj->Size, + GL_MAP_READ_BIT, + ctx->Array.ElementArrayBufferObj); elements = ADD_POINTERS(elements, elemMap); } @@ -296,8 +311,7 @@ check_draw_elements_data(struct gl_context *ctx, GLsizei count, GLenum elemType, } if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, - ctx->Array.ElementArrayBufferObj); + ctx->Driver.UnmapBuffer(ctx, ctx->Array.ElementArrayBufferObj); } unmap_array_buffer(ctx, &arrayObj->Vertex); @@ -351,8 +365,8 @@ print_draw_arrays(struct gl_context *ctx, bufName); if (bufName) { - GLubyte *p = ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB, - GL_READ_ONLY_ARB, bufObj); + GLubyte *p = ctx->Driver.MapBufferRange(ctx, 0, bufObj->Size, + GL_MAP_READ_BIT, bufObj); int offset = (int) (GLintptr) exec->array.inputs[i]->Ptr; float *f = (float *) (p + offset); int *k = (int *) f; @@ -364,7 +378,7 @@ print_draw_arrays(struct gl_context *ctx, for (i = 0; i < n; i++) { printf(" float[%d] = 0x%08x %f\n", i, k[i], f[i]); } - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, bufObj); + ctx->Driver.UnmapBuffer(ctx, bufObj); } } } @@ -715,10 +729,11 @@ vbo_exec_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count, static void dump_element_buffer(struct gl_context *ctx, GLenum type) { - const GLvoid *map = ctx->Driver.MapBuffer(ctx, - GL_ELEMENT_ARRAY_BUFFER_ARB, - GL_READ_ONLY, - ctx->Array.ElementArrayBufferObj); + const GLvoid *map = + ctx->Driver.MapBufferRange(ctx, 0, + ctx->Array.ElementArrayBufferObj->Size, + GL_MAP_READ_BIT, + ctx->Array.ElementArrayBufferObj); switch (type) { case GL_UNSIGNED_BYTE: { @@ -760,8 +775,7 @@ dump_element_buffer(struct gl_context *ctx, GLenum type) ; } - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, - ctx->Array.ElementArrayBufferObj); + ctx->Driver.UnmapBuffer(ctx, ctx->Array.ElementArrayBufferObj); } @@ -909,11 +923,10 @@ vbo_exec_DrawRangeElementsBaseVertex(GLenum mode, if (0) _mesa_print_arrays(ctx); -#ifdef DEBUG /* 'end' was out of bounds, but now let's check the actual array * indexes to see if any of them are out of bounds. */ - { + if (0) { GLuint max = _mesa_max_buffer_index(ctx, count, type, indices, ctx->Array.ElementArrayBufferObj); if (max >= ctx->Array.ArrayObj->_MaxElement) { @@ -934,7 +947,6 @@ vbo_exec_DrawRangeElementsBaseVertex(GLenum mode, * upper bound wrong. */ } -#endif /* Set 'end' to the max possible legal value */ assert(ctx->Array.ArrayObj->_MaxElement >= 1); diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c index 7e8d8602093..8ffaaaa4876 100644 --- a/src/mesa/vbo/vbo_exec_draw.c +++ b/src/mesa/vbo/vbo_exec_draw.c @@ -260,8 +260,6 @@ vbo_exec_bind_arrays( struct gl_context *ctx ) static void vbo_exec_vtx_unmap( struct vbo_exec_context *exec ) { - GLenum target = GL_ARRAY_BUFFER_ARB; - if (_mesa_is_bufferobj(exec->vtx.bufferobj)) { struct gl_context *ctx = exec->ctx; @@ -270,8 +268,7 @@ vbo_exec_vtx_unmap( struct vbo_exec_context *exec ) GLsizeiptr length = (exec->vtx.buffer_ptr - exec->vtx.buffer_map) * sizeof(float); if (length) - ctx->Driver.FlushMappedBufferRange(ctx, target, - offset, length, + ctx->Driver.FlushMappedBufferRange(ctx, offset, length, exec->vtx.bufferobj); } @@ -281,7 +278,7 @@ vbo_exec_vtx_unmap( struct vbo_exec_context *exec ) assert(exec->vtx.buffer_used <= VBO_VERT_BUFFER_SIZE); assert(exec->vtx.buffer_ptr != NULL); - ctx->Driver.UnmapBuffer(ctx, target, exec->vtx.bufferobj); + ctx->Driver.UnmapBuffer(ctx, exec->vtx.bufferobj); exec->vtx.buffer_map = NULL; exec->vtx.buffer_ptr = NULL; exec->vtx.max_vert = 0; @@ -296,8 +293,6 @@ void vbo_exec_vtx_map( struct vbo_exec_context *exec ) { struct gl_context *ctx = exec->ctx; - const GLenum target = GL_ARRAY_BUFFER_ARB; - const GLenum access = GL_READ_WRITE_ARB; /* for MapBuffer */ const GLenum accessRange = GL_MAP_WRITE_BIT | /* for MapBufferRange */ GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | @@ -311,12 +306,10 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec ) assert(!exec->vtx.buffer_map); assert(!exec->vtx.buffer_ptr); - if (VBO_VERT_BUFFER_SIZE > exec->vtx.buffer_used + 1024 && - ctx->Driver.MapBufferRange) { + if (VBO_VERT_BUFFER_SIZE > exec->vtx.buffer_used + 1024) { /* The VBO exists and there's room for more */ exec->vtx.buffer_map = (GLfloat *)ctx->Driver.MapBufferRange(ctx, - target, exec->vtx.buffer_used, (VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used), @@ -329,20 +322,16 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec ) /* Need to allocate a new VBO */ exec->vtx.buffer_used = 0; - ctx->Driver.BufferData(ctx, target, + ctx->Driver.BufferData(ctx, GL_ARRAY_BUFFER_ARB, VBO_VERT_BUFFER_SIZE, NULL, usage, exec->vtx.bufferobj); - if (ctx->Driver.MapBufferRange) - exec->vtx.buffer_map = - (GLfloat *)ctx->Driver.MapBufferRange(ctx, target, - 0, VBO_VERT_BUFFER_SIZE, - accessRange, - exec->vtx.bufferobj); - if (!exec->vtx.buffer_map) - exec->vtx.buffer_map = - (GLfloat *)ctx->Driver.MapBuffer(ctx, target, access, exec->vtx.bufferobj); + exec->vtx.buffer_map = + (GLfloat *)ctx->Driver.MapBufferRange(ctx, + 0, VBO_VERT_BUFFER_SIZE, + accessRange, + exec->vtx.bufferobj); assert(exec->vtx.buffer_map); exec->vtx.buffer_ptr = exec->vtx.buffer_map; } diff --git a/src/mesa/vbo/vbo_rebase.c b/src/mesa/vbo/vbo_rebase.c index 1de290ff602..a1eab752ad6 100644 --- a/src/mesa/vbo/vbo_rebase.c +++ b/src/mesa/vbo/vbo_rebase.c @@ -159,10 +159,8 @@ void vbo_rebase_prims( struct gl_context *ctx, void *ptr; if (map_ib) - ctx->Driver.MapBuffer(ctx, - GL_ELEMENT_ARRAY_BUFFER, - GL_READ_ONLY_ARB, - ib->obj); + ctx->Driver.MapBufferRange(ctx, 0, ib->obj->Size, GL_MAP_READ_BIT, + ib->obj); ptr = ADD_POINTERS(ib->obj->Pointer, ib->ptr); @@ -183,9 +181,7 @@ void vbo_rebase_prims( struct gl_context *ctx, } if (map_ib) - ctx->Driver.UnmapBuffer(ctx, - GL_ELEMENT_ARRAY_BUFFER, - ib->obj); + ctx->Driver.UnmapBuffer(ctx, ib->obj); tmp_ib.obj = ctx->Shared->NullBufferObj; tmp_ib.ptr = tmp_indices; diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c index 9041f791edd..ad36e93329c 100644 --- a/src/mesa/vbo/vbo_save_api.c +++ b/src/mesa/vbo/vbo_save_api.c @@ -232,11 +232,10 @@ map_vertex_store(struct gl_context *ctx, assert(vertex_store->bufferobj); assert(!vertex_store->buffer); vertex_store->buffer = - (GLfloat *) ctx->Driver.MapBuffer(ctx, - GL_ARRAY_BUFFER_ARB, /* not used */ - GL_WRITE_ONLY, /* not used */ - vertex_store-> - bufferobj); + (GLfloat *) ctx->Driver.MapBufferRange(ctx, 0, + vertex_store->bufferobj->Size, + GL_MAP_WRITE_BIT, /* not used */ + vertex_store->bufferobj); assert(vertex_store->buffer); return vertex_store->buffer + vertex_store->used; @@ -247,7 +246,7 @@ static void unmap_vertex_store(struct gl_context *ctx, struct vbo_save_vertex_store *vertex_store) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, vertex_store->bufferobj); + ctx->Driver.UnmapBuffer(ctx, vertex_store->bufferobj); vertex_store->buffer = NULL; } diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c index a37af73e0db..6cda831aa85 100644 --- a/src/mesa/vbo/vbo_save_draw.c +++ b/src/mesa/vbo/vbo_save_draw.c @@ -70,7 +70,7 @@ _playback_copy_to_current(struct gl_context *ctx, else offset = node->buffer_offset; - ctx->Driver.GetBufferSubData( ctx, 0, offset, + ctx->Driver.GetBufferSubData( ctx, offset, node->vertex_size * sizeof(GLfloat), data, node->vertex_store->bufferobj ); @@ -217,10 +217,11 @@ static void vbo_save_loopback_vertex_list(struct gl_context *ctx, const struct vbo_save_vertex_list *list) { - const char *buffer = ctx->Driver.MapBuffer(ctx, - GL_ARRAY_BUFFER_ARB, - GL_READ_ONLY, /* ? */ - list->vertex_store->bufferobj); + const char *buffer = + ctx->Driver.MapBufferRange(ctx, 0, + list->vertex_store->bufferobj->Size, + GL_MAP_READ_BIT, /* ? */ + list->vertex_store->bufferobj); vbo_loopback_vertex_list(ctx, (const GLfloat *)(buffer + list->buffer_offset), @@ -230,8 +231,7 @@ vbo_save_loopback_vertex_list(struct gl_context *ctx, list->wrap_count, list->vertex_size); - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, - list->vertex_store->bufferobj); + ctx->Driver.UnmapBuffer(ctx, list->vertex_store->bufferobj); } diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c index ecca1171673..40906e38917 100644 --- a/src/mesa/vbo/vbo_split_copy.c +++ b/src/mesa/vbo/vbo_split_copy.c @@ -444,7 +444,7 @@ replay_init( struct copy_context *copy ) copy->vertex_size += attr_size(copy->array[i]); if (_mesa_is_bufferobj(vbo) && !_mesa_bufferobj_mapped(vbo)) - ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY, vbo); + ctx->Driver.MapBufferRange(ctx, 0, vbo->Size, GL_MAP_READ_BIT, vbo); copy->varying[j].src_ptr = ADD_POINTERS(vbo->Pointer, copy->array[i]->Ptr); @@ -459,8 +459,8 @@ replay_init( struct copy_context *copy ) */ if (_mesa_is_bufferobj(copy->ib->obj) && !_mesa_bufferobj_mapped(copy->ib->obj)) - ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY, - copy->ib->obj); + ctx->Driver.MapBufferRange(ctx, 0, copy->ib->obj->Size, GL_MAP_READ_BIT, + copy->ib->obj); srcptr = (const GLubyte *) ADD_POINTERS(copy->ib->obj->Pointer, copy->ib->ptr); @@ -564,14 +564,14 @@ replay_finish( struct copy_context *copy ) for (i = 0; i < copy->nr_varying; i++) { struct gl_buffer_object *vbo = copy->varying[i].array->BufferObj; if (_mesa_is_bufferobj(vbo) && _mesa_bufferobj_mapped(vbo)) - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, vbo); + ctx->Driver.UnmapBuffer(ctx, vbo); } /* Unmap index buffer: */ if (_mesa_is_bufferobj(copy->ib->obj) && _mesa_bufferobj_mapped(copy->ib->obj)) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, copy->ib->obj); + ctx->Driver.UnmapBuffer(ctx, copy->ib->obj); } } diff --git a/src/mesa/x86-64/xform4.S b/src/mesa/x86-64/xform4.S index 6141e434679..5abd5a25de5 100644 --- a/src/mesa/x86-64/xform4.S +++ b/src/mesa/x86-64/xform4.S @@ -118,7 +118,7 @@ p4_constants: .byte 0x00, 0x00, 0x00, 0x00 .byte 0x00, 0x00, 0x00, 0x00 .byte 0x00, 0x00, 0x00, 0x00 -.float 0f+1.0 +.float 1.0 .text .align 16 |