diff options
Diffstat (limited to 'src')
244 files changed, 9594 insertions, 3760 deletions
diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index aa384cb1172..5a5e43bffe0 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -702,15 +702,18 @@ dri2_initialize(_EGLDriver *drv, _EGLDisplay *disp, struct dri2_egl_display *dri2_dpy; unsigned int api_mask; + if (disp->Platform != _EGL_PLATFORM_X11) + return EGL_FALSE; + dri2_dpy = malloc(sizeof *dri2_dpy); if (!dri2_dpy) return _eglError(EGL_BAD_ALLOC, "eglInitialize"); disp->DriverData = (void *) dri2_dpy; - if (disp->NativeDisplay == NULL) { + if (disp->PlatformDisplay == NULL) { dri2_dpy->conn = xcb_connect(0, 0); } else { - dri2_dpy->conn = XGetXCBConnection(disp->NativeDisplay); + dri2_dpy->conn = XGetXCBConnection((Display *) disp->PlatformDisplay); } if (xcb_connection_has_error(dri2_dpy->conn)) { @@ -815,7 +818,7 @@ dri2_initialize(_EGLDriver *drv, _EGLDisplay *disp, cleanup_driver: dlclose(dri2_dpy->driver); cleanup_conn: - if (disp->NativeDisplay == NULL) + if (disp->PlatformDisplay == NULL) xcb_disconnect(dri2_dpy->conn); cleanup_dpy: free(dri2_dpy); @@ -837,7 +840,7 @@ dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp) dri2_dpy->core->destroyScreen(dri2_dpy->dri_screen); close(dri2_dpy->fd); dlclose(dri2_dpy->driver); - if (disp->NativeDisplay == NULL) + if (disp->PlatformDisplay == NULL) xcb_disconnect(dri2_dpy->conn); free(dri2_dpy); disp->DriverData = NULL; diff --git a/src/egl/drivers/glx/egl_glx.c b/src/egl/drivers/glx/egl_glx.c index e08ef5f2228..804dc028a3c 100644 --- a/src/egl/drivers/glx/egl_glx.c +++ b/src/egl/drivers/glx/egl_glx.c @@ -498,11 +498,14 @@ GLX_eglInitialize(_EGLDriver *drv, _EGLDisplay *disp, { struct GLX_egl_display *GLX_dpy; + if (disp->Platform != _EGL_PLATFORM_X11) + return EGL_FALSE; + GLX_dpy = CALLOC_STRUCT(GLX_egl_display); if (!GLX_dpy) return _eglError(EGL_BAD_ALLOC, "eglInitialize"); - GLX_dpy->dpy = (Display *) disp->NativeDisplay; + GLX_dpy->dpy = (Display *) disp->PlatformDisplay; if (!GLX_dpy->dpy) { GLX_dpy->dpy = XOpenDisplay(NULL); if (!GLX_dpy->dpy) { @@ -514,7 +517,7 @@ GLX_eglInitialize(_EGLDriver *drv, _EGLDisplay *disp, if (!glXQueryVersion(GLX_dpy->dpy, &GLX_dpy->glx_maj, &GLX_dpy->glx_min)) { _eglLog(_EGL_WARNING, "GLX: glXQueryVersion failed"); - if (!disp->NativeDisplay) + if (!disp->PlatformDisplay) XCloseDisplay(GLX_dpy->dpy); free(GLX_dpy); return EGL_FALSE; @@ -526,7 +529,7 @@ GLX_eglInitialize(_EGLDriver *drv, _EGLDisplay *disp, create_configs(disp, GLX_dpy, DefaultScreen(GLX_dpy->dpy)); if (!disp->NumConfigs) { _eglLog(_EGL_WARNING, "GLX: failed to create any config"); - if (!disp->NativeDisplay) + if (!disp->PlatformDisplay) XCloseDisplay(GLX_dpy->dpy); free(GLX_dpy); return EGL_FALSE; @@ -558,7 +561,7 @@ GLX_eglTerminate(_EGLDriver *drv, _EGLDisplay *disp) if (GLX_dpy->fbconfigs) XFree(GLX_dpy->fbconfigs); - if (!disp->NativeDisplay) + if (!disp->PlatformDisplay) XCloseDisplay(GLX_dpy->dpy); free(GLX_dpy); @@ -617,10 +620,11 @@ GLX_eglCreateContext(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf, static void destroy_surface(_EGLDisplay *disp, _EGLSurface *surf) { + struct GLX_egl_display *GLX_dpy = GLX_egl_display(disp); struct GLX_egl_surface *GLX_surf = GLX_egl_surface(surf); if (GLX_surf->destroy) - GLX_surf->destroy(disp->NativeDisplay, GLX_surf->glx_drawable); + GLX_surf->destroy(GLX_dpy->dpy, GLX_surf->glx_drawable); free(GLX_surf); } diff --git a/src/egl/main/Makefile b/src/egl/main/Makefile index 82fd855b1d5..be27d9450f4 100644 --- a/src/egl/main/Makefile +++ b/src/egl/main/Makefile @@ -7,7 +7,7 @@ include $(TOP)/configs/current EGL_MAJOR = 1 EGL_MINOR = 0 -INCLUDE_DIRS = -I$(TOP)/include $(X11_CFLAGS) +INCLUDE_DIRS = -I$(TOP)/include HEADERS = \ eglcompiler.h \ @@ -49,12 +49,25 @@ OBJECTS = $(SOURCES:.c=.o) # use dl*() to load drivers -LOCAL_CFLAGS = -D_EGL_PLATFORM_POSIX=1 - -EGL_DEFAULT_DISPLAY = $(word 1, $(EGL_DISPLAYS)) +LOCAL_CFLAGS = -D_EGL_OS_UNIX=1 + +EGL_DEFAULT_PLATFORM = $(firstword $(EGL_PLATFORMS)) + +# translate --with-egl-platforms to _EGLPlatformType +EGL_NATIVE_PLATFORM=_EGL_INVALID_PLATFORM +ifeq ($(firstword $(EGL_PLATFORMS)),x11) +EGL_NATIVE_PLATFORM=_EGL_PLATFORM_X11 +endif +ifeq ($(firstword $(EGL_PLATFORMS)),kms) +EGL_NATIVE_PLATFORM=_EGL_PLATFORM_DRM +endif +ifeq ($(firstword $(EGL_PLATFORMS)),fbdev) +EGL_NATIVE_PLATFORM=_EGL_PLATFORM_FBDEV +endif LOCAL_CFLAGS += \ - -D_EGL_DEFAULT_DISPLAY=\"$(EGL_DEFAULT_DISPLAY)\" \ + -D_EGL_NATIVE_PLATFORM=$(EGL_NATIVE_PLATFORM) \ + -D_EGL_DEFAULT_PLATFORM=\"$(EGL_DEFAULT_PLATFORM)\" \ -D_EGL_DRIVER_SEARCH_DIR=\"$(EGL_DRIVER_INSTALL_DIR)\" .c.o: diff --git a/src/egl/main/SConscript b/src/egl/main/SConscript index f3fe9966b3e..fad0671f38a 100644 --- a/src/egl/main/SConscript +++ b/src/egl/main/SConscript @@ -9,9 +9,10 @@ if env['platform'] != 'winddk': env = env.Clone() env.Append(CPPDEFINES = [ - '_EGL_DEFAULT_DISPLAY=\\"gdi\\"', + '_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_WINDOWS', + '_EGL_DEFAULT_PLATFORM=\\"gdi\\"', '_EGL_DRIVER_SEARCH_DIR=\\"\\"', - '_EGL_PLATFORM_WINDOWS', + '_EGL_OS_WINDOWS', 'KHRONOS_DLL_EXPORTS', ]) diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 9912043e06c..1ec1486d3fc 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -250,7 +250,8 @@ _eglUnlockDisplay(_EGLDisplay *dpy) EGLDisplay EGLAPIENTRY eglGetDisplay(EGLNativeDisplayType nativeDisplay) { - _EGLDisplay *dpy = _eglFindDisplay(nativeDisplay); + _EGLPlatformType plat = _eglGetNativePlatform(); + _EGLDisplay *dpy = _eglFindDisplay(plat, (void *) nativeDisplay); return _eglGetDisplayHandle(dpy); } @@ -491,6 +492,8 @@ eglCreateWindowSurface(EGLDisplay dpy, EGLConfig config, EGLSurface ret; _EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv); + if (disp->Platform != _eglGetNativePlatform()) + RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_WINDOW, EGL_NO_SURFACE); surf = drv->API.CreateWindowSurface(drv, disp, conf, window, attrib_list); ret = (surf) ? _eglLinkSurface(surf, disp) : EGL_NO_SURFACE; @@ -510,6 +513,8 @@ eglCreatePixmapSurface(EGLDisplay dpy, EGLConfig config, EGLSurface ret; _EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv); + if (disp->Platform != _eglGetNativePlatform()) + RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_PIXMAP, EGL_NO_SURFACE); surf = drv->API.CreatePixmapSurface(drv, disp, conf, pixmap, attrib_list); ret = (surf) ? _eglLinkSurface(surf, disp) : EGL_NO_SURFACE; @@ -667,6 +672,8 @@ eglCopyBuffers(EGLDisplay dpy, EGLSurface surface, EGLNativePixmapType target) EGLBoolean ret; _EGL_CHECK_SURFACE(disp, surf, EGL_FALSE, drv); + if (disp->Platform != _eglGetNativePlatform()) + RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_PIXMAP, EGL_FALSE); ret = drv->API.CopyBuffers(drv, disp, surf, target); RETURN_EGL_EVAL(disp, ret); @@ -836,6 +843,9 @@ eglGetProcAddress(const char *procname) { "eglQueryScreenModeMESA", (_EGLProc) eglQueryScreenModeMESA }, { "eglQueryModeStringMESA", (_EGLProc) eglQueryModeStringMESA }, #endif /* EGL_MESA_screen_surface */ +#ifdef EGL_MESA_drm_display + { "eglGetDRMDisplayMESA", (_EGLProc) eglGetDRMDisplayMESA }, +#endif #ifdef EGL_KHR_image_base { "eglCreateImageKHR", (_EGLProc) eglCreateImageKHR }, { "eglDestroyImageKHR", (_EGLProc) eglDestroyImageKHR }, @@ -1098,6 +1108,17 @@ eglQueryModeStringMESA(EGLDisplay dpy, EGLModeMESA mode) #endif /* EGL_MESA_screen_surface */ +#ifdef EGL_MESA_drm_display + +EGLDisplay EGLAPIENTRY +eglGetDRMDisplayMESA(int fd) +{ + _EGLDisplay *dpy = _eglFindDisplay(_EGL_PLATFORM_DRM, (void *) fd); + return _eglGetDisplayHandle(dpy); +} + +#endif /* EGL_MESA_drm_display */ + /** ** EGL 1.2 **/ diff --git a/src/egl/main/egldisplay.c b/src/egl/main/egldisplay.c index 5dc5fd9719a..d666bdabe02 100644 --- a/src/egl/main/egldisplay.c +++ b/src/egl/main/egldisplay.c @@ -49,16 +49,19 @@ _eglFiniDisplay(void) * new one. */ _EGLDisplay * -_eglFindDisplay(EGLNativeDisplayType nativeDisplay) +_eglFindDisplay(_EGLPlatformType plat, void *plat_dpy) { _EGLDisplay *dpy; + if (plat == _EGL_INVALID_PLATFORM) + return NULL; + _eglLockMutex(_eglGlobal.Mutex); /* search the display list first */ dpy = _eglGlobal.DisplayList; while (dpy) { - if (dpy->NativeDisplay == nativeDisplay) + if (dpy->Platform == plat && dpy->PlatformDisplay == plat_dpy) break; dpy = dpy->Next; } @@ -68,7 +71,8 @@ _eglFindDisplay(EGLNativeDisplayType nativeDisplay) dpy = (_EGLDisplay *) calloc(1, sizeof(_EGLDisplay)); if (dpy) { _eglInitMutex(&dpy->Mutex); - dpy->NativeDisplay = nativeDisplay; + dpy->Platform = plat; + dpy->PlatformDisplay = plat_dpy; /* add to the display list */ dpy->Next = _eglGlobal.DisplayList; diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h index 42e305f91ac..0b325f7cf03 100644 --- a/src/egl/main/egldisplay.h +++ b/src/egl/main/egldisplay.h @@ -7,6 +7,18 @@ #include "eglmutex.h" +enum _egl_platform_type { + _EGL_PLATFORM_WINDOWS, + _EGL_PLATFORM_X11, + _EGL_PLATFORM_DRM, + _EGL_PLATFORM_FBDEV, + + _EGL_NUM_PLATFORMS, + _EGL_INVALID_PLATFORM = -1 +}; +typedef enum _egl_platform_type _EGLPlatformType; + + enum _egl_resource_type { _EGL_RESOURCE_CONTEXT, _EGL_RESOURCE_SURFACE, @@ -39,6 +51,7 @@ struct _egl_extensions { EGLBoolean MESA_screen_surface; EGLBoolean MESA_copy_context; + EGLBoolean MESA_drm_display; EGLBoolean KHR_image_base; EGLBoolean KHR_image_pixmap; EGLBoolean KHR_vg_parent_image; @@ -53,14 +66,15 @@ struct _egl_extensions }; -struct _egl_display +struct _egl_display { /* used to link displays */ _EGLDisplay *Next; _EGLMutex Mutex; - EGLNativeDisplayType NativeDisplay; + _EGLPlatformType Platform; + void *PlatformDisplay; EGLBoolean Initialized; /**< True if the display is initialized */ _EGLDriver *Driver; @@ -92,7 +106,7 @@ _eglFiniDisplay(void); extern _EGLDisplay * -_eglFindDisplay(EGLNativeDisplayType displayName); +_eglFindDisplay(_EGLPlatformType plat, void *plat_dpy); PUBLIC void diff --git a/src/egl/main/egldriver.c b/src/egl/main/egldriver.c index 631a8710ac5..db7b4a7471e 100644 --- a/src/egl/main/egldriver.c +++ b/src/egl/main/egldriver.c @@ -23,7 +23,7 @@ #include "eglsurface.h" #include "eglimage.h" -#if defined(_EGL_PLATFORM_POSIX) +#if defined(_EGL_OS_UNIX) #include <dlfcn.h> #include <sys/types.h> #include <dirent.h> @@ -34,7 +34,7 @@ /** * Wrappers for dlopen/dlclose() */ -#if defined(_EGL_PLATFORM_WINDOWS) +#if defined(_EGL_OS_WINDOWS) /* XXX Need to decide how to do dynamic name lookup on Windows */ @@ -64,7 +64,7 @@ library_suffix(void) } -#elif defined(_EGL_PLATFORM_POSIX) +#elif defined(_EGL_OS_UNIX) static const char *DefaultDriverNames[] = { @@ -119,11 +119,11 @@ _eglOpenLibrary(const char *driverPath, lib_handle *handle) _eglLog(_EGL_DEBUG, "dlopen(%s)", driverPath); lib = open_library(driverPath); -#if defined(_EGL_PLATFORM_WINDOWS) +#if defined(_EGL_OS_WINDOWS) /* XXX untested */ if (lib) mainFunc = (_EGLMain_t) GetProcAddress(lib, "_eglMain"); -#elif defined(_EGL_PLATFORM_POSIX) +#elif defined(_EGL_OS_UNIX) if (lib) { union { _EGLMain_t func; @@ -301,7 +301,7 @@ _eglLoaderFile(const char *dir, size_t len, void *loader_data) static EGLBoolean _eglLoaderPattern(const char *dir, size_t len, void *loader_data) { -#if defined(_EGL_PLATFORM_POSIX) +#if defined(_EGL_OS_UNIX) const char *prefix, *suffix; size_t prefix_len, suffix_len; DIR *dirp; @@ -352,7 +352,7 @@ _eglLoaderPattern(const char *dir, size_t len, void *loader_data) closedir(dirp); return EGL_TRUE; -#else /* _EGL_PLATFORM_POSIX */ +#else /* _EGL_OS_UNIX */ /* stop immediately */ return EGL_FALSE; #endif @@ -397,20 +397,20 @@ _eglGetSearchPath(void) { static const char *search_path; -#if defined(_EGL_PLATFORM_POSIX) || defined(_EGL_PLATFORM_WINDOWS) +#if defined(_EGL_OS_UNIX) || defined(_EGL_OS_WINDOWS) if (!search_path) { static char buffer[1024]; const char *p; int ret; p = getenv("EGL_DRIVERS_PATH"); -#if defined(_EGL_PLATFORM_POSIX) +#if defined(_EGL_OS_UNIX) if (p && (geteuid() != getuid() || getegid() != getgid())) { _eglLog(_EGL_DEBUG, "ignore EGL_DRIVERS_PATH for setuid/setgid binaries"); p = NULL; } -#endif /* _EGL_PLATFORM_POSIX */ +#endif /* _EGL_OS_UNIX */ if (p) { ret = _eglsnprintf(buffer, sizeof(buffer), @@ -441,7 +441,7 @@ _eglPreloadUserDriver(void) char *env; env = getenv("EGL_DRIVER"); -#if defined(_EGL_PLATFORM_POSIX) +#if defined(_EGL_OS_UNIX) if (env && strchr(env, '/')) { search_path = ""; if ((geteuid() != getuid() || getegid() != getgid())) { @@ -450,7 +450,7 @@ _eglPreloadUserDriver(void) env = NULL; } } -#endif /* _EGL_PLATFORM_POSIX */ +#endif /* _EGL_OS_UNIX */ if (!env) return EGL_FALSE; @@ -464,24 +464,27 @@ _eglPreloadUserDriver(void) /** - * Preload display drivers. + * Preload platform drivers. * - * Display drivers are a set of drivers that support a certain display system. - * The display system may be specified by EGL_DISPLAY. + * Platform drivers are a set of drivers that support a certain window system. + * The window system may be specified by EGL_PLATFORM. * * FIXME This makes libEGL a memory hog if an user driver is not specified and - * there are many display drivers. + * there are many platform drivers. */ static EGLBoolean -_eglPreloadDisplayDrivers(void) +_eglPreloadPlatformDrivers(void) { const char *dpy; char prefix[32]; int ret; - dpy = getenv("EGL_DISPLAY"); + dpy = getenv("EGL_PLATFORM"); + /* try deprecated env variable */ if (!dpy || !dpy[0]) - dpy = _EGL_DEFAULT_DISPLAY; + dpy = getenv("EGL_DISPLAY"); + if (!dpy || !dpy[0]) + dpy = _EGL_DEFAULT_PLATFORM; if (!dpy || !dpy[0]) return EGL_FALSE; @@ -515,7 +518,7 @@ _eglPreloadDrivers(void) } loaded = (_eglPreloadUserDriver() || - _eglPreloadDisplayDrivers()); + _eglPreloadPlatformDrivers()); _eglUnlockMutex(_eglGlobal.Mutex); @@ -578,6 +581,16 @@ _eglLoadDefaultDriver(EGLDisplay dpy, EGLint *major, EGLint *minor) /** + * Return the native platform. It is the platform of the EGL native types. + */ +_EGLPlatformType +_eglGetNativePlatform(void) +{ + return _EGL_NATIVE_PLATFORM; +} + + +/** * Plug all the available fallback routines into the given driver's * dispatch table. */ diff --git a/src/egl/main/egldriver.h b/src/egl/main/egldriver.h index 8b34c43b924..6a523747647 100644 --- a/src/egl/main/egldriver.h +++ b/src/egl/main/egldriver.h @@ -3,6 +3,7 @@ #include "egltypedefs.h" +#include "egldisplay.h" #include "eglapi.h" @@ -88,6 +89,10 @@ extern _EGLDriver * _eglLoadDefaultDriver(EGLDisplay dpy, EGLint *major, EGLint *minor); +extern _EGLPlatformType +_eglGetNativePlatform(void); + + PUBLIC void _eglInitDriverFallbacks(_EGLDriver *drv); diff --git a/src/egl/main/eglmisc.c b/src/egl/main/eglmisc.c index 4652969659b..281138c7523 100644 --- a/src/egl/main/eglmisc.c +++ b/src/egl/main/eglmisc.c @@ -84,6 +84,7 @@ _eglUpdateExtensionsString(_EGLDisplay *dpy) _EGL_CHECK_EXTENSION(MESA_screen_surface); _EGL_CHECK_EXTENSION(MESA_copy_context); + _EGL_CHECK_EXTENSION(MESA_drm_display); _EGL_CHECK_EXTENSION(KHR_image_base); _EGL_CHECK_EXTENSION(KHR_image_pixmap); diff --git a/src/egl/main/eglstring.h b/src/egl/main/eglstring.h index bebb758dd82..f1d559b24a2 100644 --- a/src/egl/main/eglstring.h +++ b/src/egl/main/eglstring.h @@ -3,7 +3,7 @@ #include <string.h> -#ifdef _EGL_PLATFORM_WINDOWS +#ifdef _EGL_OS_WINDOWS #define _eglstrcasecmp _stricmp #define _eglsnprintf _snprintf #else diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 7c7702549e0..dab95e50515 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -435,13 +435,18 @@ draw_num_shader_outputs(const struct draw_context *draw) */ void draw_texture_samplers(struct draw_context *draw, + uint shader, uint num_samplers, struct tgsi_sampler **samplers) { - draw->vs.num_samplers = num_samplers; - draw->vs.samplers = samplers; - draw->gs.num_samplers = num_samplers; - draw->gs.samplers = samplers; + if (shader == PIPE_SHADER_VERTEX) { + draw->vs.num_samplers = num_samplers; + draw->vs.samplers = samplers; + } else { + debug_assert(shader == PIPE_SHADER_GEOMETRY); + draw->gs.num_samplers = num_samplers; + draw->gs.samplers = samplers; + } } diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 103d6538b81..c0122f2aca5 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -97,6 +97,7 @@ draw_num_shader_outputs(const struct draw_context *draw); void draw_texture_samplers(struct draw_context *draw, + uint shader_type, uint num_samplers, struct tgsi_sampler **samplers); diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index a1ca7071e35..79a57a67f3e 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -39,7 +39,6 @@ #include "util/u_memory.h" #include "util/u_prim.h" -#define MAX_PRIM_VERTICES 6 /* fixme: move it from here */ #define MAX_PRIMITIVES 64 @@ -76,6 +75,7 @@ draw_gs_set_constants(struct draw_context *draw, const void *constants, unsigned size) { + /* noop */ } @@ -171,9 +171,10 @@ draw_geometry_fetch_outputs(struct draw_geometry_shader *shader, /* Unswizzle all output results. */ - shader->emitted_primitives += num_primitives; for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) { unsigned num_verts_per_prim = machine->Primitives[prim_idx]; + shader->primitive_lengths[prim_idx + shader->emitted_primitives] = + machine->Primitives[prim_idx]; shader->emitted_vertices += num_verts_per_prim; for (j = 0; j < num_verts_per_prim; j++) { int idx = (prim_idx * num_verts_per_prim + j) * @@ -199,9 +200,10 @@ draw_geometry_fetch_outputs(struct draw_geometry_shader *shader, } } *p_output = output; + shader->emitted_primitives += num_primitives; } - +/*#define DEBUG_INPUTS 1*/ static void draw_fetch_gs_input(struct draw_geometry_shader *shader, unsigned *indices, unsigned num_vertices, @@ -216,19 +218,28 @@ static void draw_fetch_gs_input(struct draw_geometry_shader *shader, for (i = 0; i < num_vertices; ++i) { const float (*input)[4]; - /*debug_printf("%d) vertex index = %d (prim idx = %d)\n", i, indices[i], prim_idx);*/ +#if DEBUG_INPUTS + debug_printf("%d) vertex index = %d (prim idx = %d)\n", + i, indices[i], prim_idx); +#endif input = (const float (*)[4])( (const char *)input_ptr + (indices[i] * input_vertex_stride)); for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) { unsigned idx = i * TGSI_EXEC_MAX_INPUT_ATTRIBS + slot; if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) { - machine->Inputs[idx].xyzw[0].f[prim_idx] = (float)shader->in_prim_idx; - machine->Inputs[idx].xyzw[1].f[prim_idx] = (float)shader->in_prim_idx; - machine->Inputs[idx].xyzw[2].f[prim_idx] = (float)shader->in_prim_idx; - machine->Inputs[idx].xyzw[3].f[prim_idx] = (float)shader->in_prim_idx; + machine->Inputs[idx].xyzw[0].f[prim_idx] = + (float)shader->in_prim_idx; + machine->Inputs[idx].xyzw[1].f[prim_idx] = + (float)shader->in_prim_idx; + machine->Inputs[idx].xyzw[2].f[prim_idx] = + (float)shader->in_prim_idx; + machine->Inputs[idx].xyzw[3].f[prim_idx] = + (float)shader->in_prim_idx; } else { - /*debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n", - slot, vs_slot, idx);*/ +#if DEBUG_INPUTS + debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n", + slot, vs_slot, idx); +#endif #if 1 assert(!util_is_inf_or_nan(input[vs_slot][0])); assert(!util_is_inf_or_nan(input[vs_slot][1])); @@ -239,7 +250,7 @@ static void draw_fetch_gs_input(struct draw_geometry_shader *shader, machine->Inputs[idx].xyzw[1].f[prim_idx] = input[vs_slot][1]; machine->Inputs[idx].xyzw[2].f[prim_idx] = input[vs_slot][2]; machine->Inputs[idx].xyzw[3].f[prim_idx] = input[vs_slot][3]; -#if 0 +#if DEBUG_INPUTS debug_printf("\t\t%f %f %f %f\n", machine->Inputs[idx].xyzw[0].f[prim_idx], machine->Inputs[idx].xyzw[1].f[prim_idx], @@ -252,7 +263,6 @@ static void draw_fetch_gs_input(struct draw_geometry_shader *shader, } } - static void gs_flush(struct draw_geometry_shader *shader, unsigned input_primitives) { @@ -274,6 +284,11 @@ static void gs_flush(struct draw_geometry_shader *shader, out_prim_count = machine->Temps[TGSI_EXEC_TEMP_PRIMITIVE_I].xyzw[TGSI_EXEC_TEMP_PRIMITIVE_C].u[0]; +#if 0 + debug_printf("PRIM emitted prims = %d (verts=%d), cur prim count = %d\n", + shader->emitted_primitives, shader->emitted_vertices, + out_prim_count); +#endif draw_geometry_fetch_outputs(shader, out_prim_count, &shader->tmp_output); } @@ -305,6 +320,22 @@ static void gs_line(struct draw_geometry_shader *shader, gs_flush(shader, 1); } +static void gs_line_adj(struct draw_geometry_shader *shader, + int i0, int i1, int i2, int i3) +{ + unsigned indices[4]; + + indices[0] = i0; + indices[1] = i1; + indices[2] = i2; + indices[3] = i3; + + draw_fetch_gs_input(shader, indices, 4, 0); + ++shader->in_prim_idx; + + gs_flush(shader, 1); +} + static void gs_tri(struct draw_geometry_shader *shader, int i0, int i1, int i2) { @@ -320,58 +351,130 @@ static void gs_tri(struct draw_geometry_shader *shader, gs_flush(shader, 1); } +static void gs_tri_adj(struct draw_geometry_shader *shader, + int i0, int i1, int i2, + int i3, int i4, int i5) +{ + unsigned indices[6]; + + indices[0] = i0; + indices[1] = i1; + indices[2] = i2; + indices[3] = i3; + indices[4] = i4; + indices[5] = i5; + + draw_fetch_gs_input(shader, indices, 6, 0); + ++shader->in_prim_idx; + + gs_flush(shader, 1); +} + #define TRIANGLE(gs,i0,i1,i2) gs_tri(gs,i0,i1,i2) -#define LINE(gs,i0,i1) gs_line(gs,i0,i1) +#define TRI_ADJ(gs,i0,i1,i2,i3,i4,i5) gs_tri_adj(gs,i0,i1,i2,i3,i4,i5) +#define LINE(gs,i0,i1) gs_line(gs,i0,i1) +#define LINE_ADJ(gs,i0,i1,i2,i3) gs_line_adj(gs,i0,i1,i2,i3) #define POINT(gs,i0) gs_point(gs,i0) #define FUNC gs_run +#define LOCAL_VARS +#include "draw_gs_tmp.h" + + +#define TRIANGLE(gs,i0,i1,i2) gs_tri(gs,elts[i0],elts[i1],elts[i2]) +#define TRI_ADJ(gs,i0,i1,i2,i3,i4,i5) \ + gs_tri_adj(gs,elts[i0],elts[i1],elts[i2],elts[i3], \ + elts[i4],elts[i5]) +#define LINE(gs,i0,i1) gs_line(gs,elts[i0],elts[i1]) +#define LINE_ADJ(gs,i0,i1,i2,i3) gs_line_adj(gs,elts[i0], \ + elts[i1], \ + elts[i2],elts[i3]) +#define POINT(gs,i0) gs_point(gs,elts[i0]) +#define FUNC gs_run_elts +#define LOCAL_VARS \ + const ushort *elts = input_prims->elts; #include "draw_gs_tmp.h" int draw_geometry_shader_run(struct draw_geometry_shader *shader, - unsigned pipe_prim, - const float (*input)[4], - float (*output)[4], - const void *constants[PIPE_MAX_CONSTANT_BUFFERS], - unsigned count, - unsigned input_stride, - unsigned vertex_size) + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const struct draw_vertex_info *input_verts, + const struct draw_prim_info *input_prim, + struct draw_vertex_info *output_verts, + struct draw_prim_info *output_prims ) { + const float (*input)[4] = (const float (*)[4])input_verts->verts->data; + unsigned input_stride = input_verts->vertex_size; + unsigned vertex_size = input_verts->vertex_size; struct tgsi_exec_machine *machine = shader->machine; unsigned int i; + unsigned num_input_verts = input_prim->linear ? + input_verts->count : + input_prim->count; unsigned num_in_primitives = - u_gs_prims_for_vertices(pipe_prim, count); - unsigned alloc_count = draw_max_output_vertices(shader->draw, - pipe_prim, - count); - /* this is bad, but we can't be overwriting the output array - * because it's the same as input array here */ - struct vertex_header *pipeline_verts = - (struct vertex_header *)MALLOC(vertex_size * alloc_count); + MAX2(u_gs_prims_for_vertices(input_prim->prim, num_input_verts), + u_gs_prims_for_vertices(shader->input_primitive, num_input_verts)); + unsigned max_out_prims = u_gs_prims_for_vertices(shader->output_primitive, + shader->max_output_vertices) + * num_in_primitives; + + output_verts->vertex_size = input_verts->vertex_size; + output_verts->stride = input_verts->vertex_size; + output_verts->verts = + (struct vertex_header *)MALLOC(input_verts->vertex_size * + num_in_primitives * + shader->max_output_vertices); - if (!pipeline_verts) - return 0; - if (0) debug_printf("%s count = %d (prims = %d)\n", __FUNCTION__, - count, num_in_primitives); +#if 0 + debug_printf("%s count = %d (in prims # = %d)\n", + __FUNCTION__, num_input_verts, num_in_primitives); + debug_printf("\tlinear = %d, prim_info->count = %d\n", + input_prim->linear, input_prim->count); + debug_printf("\tprimt pipe = %d, shader in = %d, shader out = %d, max out = %d\n", + input_prim->prim, shader->input_primitive, + shader->output_primitive, + shader->max_output_vertices); +#endif shader->emitted_vertices = 0; shader->emitted_primitives = 0; shader->vertex_size = vertex_size; - shader->tmp_output = ( float (*)[4])pipeline_verts->data; + shader->tmp_output = (float (*)[4])output_verts->verts->data; shader->in_prim_idx = 0; shader->input_vertex_stride = input_stride; shader->input = input; + if (shader->primitive_lengths) { + FREE(shader->primitive_lengths); + } + shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned)); for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { machine->Consts[i] = constants[i]; } - gs_run(shader, pipe_prim, count); + if (input_prim->linear) + gs_run(shader, input_prim, input_verts, + output_prims, output_verts); + else + gs_run_elts(shader, input_prim, input_verts, + output_prims, output_verts); - memcpy(output, pipeline_verts->data, - shader->info.num_outputs * 4 * sizeof(float) + - vertex_size * (shader->emitted_vertices -1)); + /* Update prim_info: + */ + output_prims->linear = TRUE; + output_prims->elts = NULL; + output_prims->start = 0; + output_prims->count = shader->emitted_vertices; + output_prims->prim = shader->output_primitive; + output_prims->primitive_lengths = shader->primitive_lengths; + output_prims->primitive_count = shader->emitted_primitives; + output_verts->count = shader->emitted_vertices; + +#if 0 + debug_printf("GS finished, prims = %d, verts = %d\n", + output_prims->primitive_count, + output_verts->count); +#endif - FREE(pipeline_verts); return shader->emitted_vertices; } @@ -391,24 +494,3 @@ void draw_geometry_shader_prepare(struct draw_geometry_shader *shader, draw->gs.samplers); } } - -int draw_max_output_vertices(struct draw_context *draw, - unsigned pipe_prim, - unsigned count) -{ - unsigned alloc_count = align( count, 4 ); - - if (draw->gs.geometry_shader) { - unsigned input_primitives = u_gs_prims_for_vertices(pipe_prim, - count); - /* max GS output is number of input primitives * max output - * vertices per each invocation */ - unsigned gs_max_verts = input_primitives * - draw->gs.geometry_shader->max_output_vertices; - if (gs_max_verts > count) - alloc_count = align(gs_max_verts, 4); - } - /*debug_printf("------- alloc count = %d (input = %d)\n", - alloc_count, count);*/ - return alloc_count; -} diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h index 65f0c61916e..2cb634818c2 100644 --- a/src/gallium/auxiliary/draw/draw_gs.h +++ b/src/gallium/auxiliary/draw/draw_gs.h @@ -54,6 +54,7 @@ struct draw_geometry_shader { unsigned input_primitive; unsigned output_primitive; + unsigned *primitive_lengths; unsigned emitted_vertices; unsigned emitted_primitives; @@ -71,13 +72,11 @@ struct draw_geometry_shader { * smaller than the GS_MAX_OUTPUT_VERTICES shader property. */ int draw_geometry_shader_run(struct draw_geometry_shader *shader, - unsigned pipe_prim, - const float (*input)[4], - float (*output)[4], - const void *constants[PIPE_MAX_CONSTANT_BUFFERS], - unsigned count, - unsigned input_stride, - unsigned output_stride); + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const struct draw_vertex_info *input_verts, + const struct draw_prim_info *input_prim, + struct draw_vertex_info *output_verts, + struct draw_prim_info *output_prims ); void draw_geometry_shader_prepare(struct draw_geometry_shader *shader, struct draw_context *draw); diff --git a/src/gallium/auxiliary/draw/draw_gs_tmp.h b/src/gallium/auxiliary/draw/draw_gs_tmp.h index eb4a313ca18..7a8683cf7c2 100644 --- a/src/gallium/auxiliary/draw/draw_gs_tmp.h +++ b/src/gallium/auxiliary/draw/draw_gs_tmp.h @@ -1,18 +1,23 @@ static void FUNC( struct draw_geometry_shader *shader, - unsigned pipe_prim, - unsigned count ) + const struct draw_prim_info *input_prims, + const struct draw_vertex_info *input_verts, + struct draw_prim_info *output_prims, + struct draw_vertex_info *output_verts) { struct draw_context *draw = shader->draw; boolean flatfirst = (draw->rasterizer->flatshade && draw->rasterizer->flatshade_first); - unsigned i; + unsigned i, j; + unsigned count = input_prims->count; + LOCAL_VARS if (0) debug_printf("%s %d\n", __FUNCTION__, count); + debug_assert(input_prims->primitive_count == 1); - switch (pipe_prim) { + switch (input_prims->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < count; i++) { POINT( shader, i + 0 ); @@ -90,20 +95,6 @@ static void FUNC( struct draw_geometry_shader *shader, case PIPE_PRIM_POLYGON: { - /* These bitflags look a little odd because we submit the - * vertices as (1,2,0) to satisfy flatshade requirements. - */ - ushort edge_next, edge_finish; - - if (flatfirst) { - edge_next = DRAW_PIPE_EDGE_FLAG_2; - edge_finish = DRAW_PIPE_EDGE_FLAG_0; - } - else { - edge_next = DRAW_PIPE_EDGE_FLAG_0; - edge_finish = DRAW_PIPE_EDGE_FLAG_1; - } - for (i = 0; i+2 < count; i++) { if (flatfirst) { @@ -116,14 +107,46 @@ static void FUNC( struct draw_geometry_shader *shader, } break; + case PIPE_PRIM_LINES_ADJACENCY: + for (i = 0; i+3 < count; i += 4) { + LINE_ADJ( shader , i + 0 , i + 1, i + 2, i + 3 ); + } + break; + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + for (i = 1; i + 2 < count; i++) { + LINE_ADJ( shader, i - 1, i, i + 1, i + 2 ); + } + break; + + case PIPE_PRIM_TRIANGLES_ADJACENCY: + for (i = 0; i+5 < count; i += 5) { + TRI_ADJ( shader, i + 0, i + 1, i + 2, + i + 3, i + 4, i + 5); + } + break; + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + for (i = 0, j = 0; i+5 < count; i += 2, ++j) { + TRI_ADJ( shader, + i + 0, + i + 1 + 2*(j&1), + i + 2 + 2*(j&1), + i + 3 - 2*(j&1), + i + 4 - 2*(j&1), + i + 5); + } + break; + default: - assert(0); + debug_assert(!"Unsupported primitive in geometry shader"); break; } } #undef TRIANGLE +#undef TRI_ADJ #undef POINT #undef LINE +#undef LINE_ADJ #undef FUNC +#undef LOCAL_VARS diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index bd5d8853cf8..9117c1303dc 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -173,6 +173,8 @@ draw_llvm_create(struct draw_context *draw) #endif llvm = CALLOC_STRUCT( draw_llvm ); + if (!llvm) + return NULL; llvm->draw = draw; llvm->engine = draw->engine; diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 7ea04e38193..a8b9dc60141 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -177,15 +177,15 @@ static void do_triangle( struct draw_context *draw, ( DRAW_PIPE_RESET_STIPPLE | \ DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_1 ), \ - verts + stride * elts[i0], \ - verts + stride * elts[i1], \ - verts + stride * elts[i2]); \ + verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK)); \ do_triangle( draw, \ ( DRAW_PIPE_EDGE_FLAG_1 | \ DRAW_PIPE_EDGE_FLAG_2 ), \ - verts + stride * elts[i0], \ - verts + stride * elts[i2], \ - verts + stride * elts[i3]) + verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)) /* emit last quad vertex as last vertex in triangles */ #define QUAD_LAST_PV(i0,i1,i2,i3) \ @@ -193,15 +193,15 @@ static void do_triangle( struct draw_context *draw, ( DRAW_PIPE_RESET_STIPPLE | \ DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_2 ), \ - verts + stride * elts[i0], \ - verts + stride * elts[i1], \ - verts + stride * elts[i3]); \ + verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)); \ do_triangle( draw, \ ( DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_1 ), \ - verts + stride * elts[i1], \ - verts + stride * elts[i2], \ - verts + stride * elts[i3]) + verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)) #define TRIANGLE(flags,i0,i1,i2) \ do_triangle( draw, \ @@ -218,7 +218,7 @@ static void do_triangle( struct draw_context *draw, #define POINT(i0) \ do_point( draw, \ - verts + stride * elts[i0] ) + verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK) ) #define FUNC pipe_run #define ARGS \ @@ -256,27 +256,34 @@ static void do_triangle( struct draw_context *draw, * draw_vbuf.c code uses when it has to perform a flush. */ void draw_pipeline_run( struct draw_context *draw, - unsigned prim, - struct vertex_header *vertices, - unsigned vertex_count, - unsigned stride, - const ushort *elts, - unsigned count ) + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) { - char *verts = (char *)vertices; - - draw->pipeline.verts = verts; - draw->pipeline.vertex_stride = stride; - draw->pipeline.vertex_count = vertex_count; - - pipe_run(draw, prim, vertices, stride, elts, count); + unsigned i, start; + draw->pipeline.verts = (char *)vert_info->verts; + draw->pipeline.vertex_stride = vert_info->stride; + draw->pipeline.vertex_count = vert_info->count; + + for (start = i = 0; + i < prim_info->primitive_count; + start += prim_info->primitive_lengths[i], i++) + { + unsigned count = prim_info->primitive_lengths[i]; + + pipe_run(draw, + prim_info->prim, + vert_info->verts, + vert_info->stride, + prim_info->elts + start, + count); + } + draw->pipeline.verts = NULL; draw->pipeline.vertex_count = 0; } - /* * Set up macros for draw_pt_decompose.h template code. * This code is for non-indexed (aka linear) rendering (no elts). @@ -289,14 +296,14 @@ void draw_pipeline_run( struct draw_context *draw, DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_1 ), \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1), \ - verts + stride * (i2)); \ + verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK)); \ do_triangle( draw, \ ( DRAW_PIPE_EDGE_FLAG_1 | \ DRAW_PIPE_EDGE_FLAG_2 ), \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i2), \ - verts + stride * (i3)) + verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)) /* emit last quad vertex as last vertex in triangles */ #define QUAD_LAST_PV(i0,i1,i2,i3) \ @@ -305,31 +312,31 @@ void draw_pipeline_run( struct draw_context *draw, DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_2 ), \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1), \ - verts + stride * (i3)); \ + verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)); \ do_triangle( draw, \ ( DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_1 ), \ verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i2), \ - verts + stride * (i3)) + verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)) #define TRIANGLE(flags,i0,i1,i2) \ do_triangle( draw, \ flags, /* flags */ \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1), \ - verts + stride * (i2)) + verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK)) #define LINE(flags,i0,i1) \ do_line( draw, \ flags, \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1)) + verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK)) #define POINT(i0) \ do_point( draw, \ - verts + stride * i0 ) + verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK) ) #define FUNC pipe_run_linear #define ARGS \ @@ -354,17 +361,29 @@ void draw_pipeline_run( struct draw_context *draw, * For drawing non-indexed primitives. */ void draw_pipeline_run_linear( struct draw_context *draw, - unsigned prim, - struct vertex_header *vertices, - unsigned count, - unsigned stride ) + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) { - char *verts = (char *)vertices; - draw->pipeline.verts = verts; - draw->pipeline.vertex_stride = stride; - draw->pipeline.vertex_count = count; - - pipe_run_linear(draw, prim, vertices, stride, count); + unsigned i, start; + + for (start = i = 0; + i < prim_info->primitive_count; + start += prim_info->primitive_lengths[i], i++) + { + unsigned count = prim_info->primitive_lengths[i]; + char *verts = ((char*)vert_info->verts) + + (start * vert_info->stride); + + draw->pipeline.verts = verts; + draw->pipeline.vertex_stride = vert_info->stride; + draw->pipeline.vertex_count = count; + + pipe_run_linear(draw, + prim_info->prim, + (struct vertex_header*)verts, + vert_info->stride, + count); + } draw->pipeline.verts = NULL; draw->pipeline.vertex_count = 0; diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 4faf0a779ca..debd17fd74b 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -788,9 +788,6 @@ draw_aaline_stage(struct draw_context *draw) if (aaline == NULL) return NULL; - if (!draw_alloc_temp_verts( &aaline->stage, 8 )) - goto fail; - aaline->stage.draw = draw; aaline->stage.name = "aaline"; aaline->stage.next = NULL; @@ -801,11 +798,14 @@ draw_aaline_stage(struct draw_context *draw) aaline->stage.reset_stipple_counter = aaline_reset_stipple_counter; aaline->stage.destroy = aaline_destroy; + if (!draw_alloc_temp_verts( &aaline->stage, 8 )) + goto fail; + return aaline; fail: if (aaline) - aaline_destroy(&aaline->stage); + aaline->stage.destroy(&aaline->stage); return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index bba6f50c020..d406a86ccb5 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -780,9 +780,6 @@ draw_aapoint_stage(struct draw_context *draw) if (aapoint == NULL) goto fail; - if (!draw_alloc_temp_verts( &aapoint->stage, 4 )) - goto fail; - aapoint->stage.draw = draw; aapoint->stage.name = "aapoint"; aapoint->stage.next = NULL; @@ -793,11 +790,14 @@ draw_aapoint_stage(struct draw_context *draw) aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter; aapoint->stage.destroy = aapoint_destroy; + if (!draw_alloc_temp_verts( &aapoint->stage, 4 )) + goto fail; + return aapoint; fail: if (aapoint) - aapoint_destroy(&aapoint->stage); + aapoint->stage.destroy(&aapoint->stage); return NULL; diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index df8d82e367d..122b1c79689 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -522,9 +522,6 @@ struct draw_stage *draw_clip_stage( struct draw_context *draw ) if (clipper == NULL) goto fail; - if (!draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 )) - goto fail; - clipper->stage.draw = draw; clipper->stage.name = "clipper"; clipper->stage.point = clip_point; @@ -536,6 +533,9 @@ struct draw_stage *draw_clip_stage( struct draw_context *draw ) clipper->plane = draw->plane; + if (!draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 )) + goto fail; + return &clipper->stage; fail: diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c index bf84ce30ed1..2f4d01d23ab 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_cull.c +++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c @@ -129,9 +129,6 @@ struct draw_stage *draw_cull_stage( struct draw_context *draw ) if (cull == NULL) goto fail; - if (!draw_alloc_temp_verts( &cull->stage, 0 )) - goto fail; - cull->stage.draw = draw; cull->stage.name = "cull"; cull->stage.next = NULL; @@ -142,6 +139,9 @@ struct draw_stage *draw_cull_stage( struct draw_context *draw ) cull->stage.reset_stipple_counter = cull_reset_stipple_counter; cull->stage.destroy = cull_destroy; + if (!draw_alloc_temp_verts( &cull->stage, 0 )) + goto fail; + return &cull->stage; fail: diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c index 34afb1a0b60..693f2895aad 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c +++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c @@ -257,9 +257,6 @@ struct draw_stage *draw_flatshade_stage( struct draw_context *draw ) if (flatshade == NULL) goto fail; - if (!draw_alloc_temp_verts( &flatshade->stage, 2 )) - goto fail; - flatshade->stage.draw = draw; flatshade->stage.name = "flatshade"; flatshade->stage.next = NULL; @@ -270,6 +267,9 @@ struct draw_stage *draw_flatshade_stage( struct draw_context *draw ) flatshade->stage.reset_stipple_counter = flatshade_reset_stipple_counter; flatshade->stage.destroy = flatshade_destroy; + if (!draw_alloc_temp_verts( &flatshade->stage, 2 )) + goto fail; + return &flatshade->stage; fail: diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c index 8e321946ced..8afbbfa1569 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_offset.c +++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -161,9 +161,7 @@ struct draw_stage *draw_offset_stage( struct draw_context *draw ) { struct offset_stage *offset = CALLOC_STRUCT(offset_stage); if (offset == NULL) - return NULL; - - draw_alloc_temp_verts( &offset->stage, 3 ); + goto fail; offset->stage.draw = draw; offset->stage.name = "offset"; @@ -175,5 +173,14 @@ struct draw_stage *draw_offset_stage( struct draw_context *draw ) offset->stage.reset_stipple_counter = offset_reset_stipple_counter; offset->stage.destroy = offset_destroy; + if (!draw_alloc_temp_verts( &offset->stage, 3 )) + goto fail; + return &offset->stage; + +fail: + if (offset) + offset->stage.destroy( &offset->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index ef30db094fe..fff960c7eb5 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -607,8 +607,8 @@ static struct pstip_stage * draw_pstip_stage(struct draw_context *draw) { struct pstip_stage *pstip = CALLOC_STRUCT(pstip_stage); - - draw_alloc_temp_verts( &pstip->stage, 8 ); + if (pstip == NULL) + goto fail; pstip->stage.draw = draw; pstip->stage.name = "pstip"; @@ -620,7 +620,16 @@ draw_pstip_stage(struct draw_context *draw) pstip->stage.reset_stipple_counter = pstip_reset_stipple_counter; pstip->stage.destroy = pstip_destroy; + if (!draw_alloc_temp_verts( &pstip->stage, 8 )) + goto fail; + return pstip; + +fail: + if (pstip) + pstip->stage.destroy( &pstip->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c index 70fbab9ea76..4b3f4e7ae11 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c @@ -235,8 +235,8 @@ stipple_destroy( struct draw_stage *stage ) struct draw_stage *draw_stipple_stage( struct draw_context *draw ) { struct stipple_stage *stipple = CALLOC_STRUCT(stipple_stage); - - draw_alloc_temp_verts( &stipple->stage, 2 ); + if (stipple == NULL) + goto fail; stipple->stage.draw = draw; stipple->stage.name = "stipple"; @@ -248,5 +248,14 @@ struct draw_stage *draw_stipple_stage( struct draw_context *draw ) stipple->stage.flush = stipple_flush; stipple->stage.destroy = stipple_destroy; + if (!draw_alloc_temp_verts( &stipple->stage, 2 )) + goto fail; + return &stipple->stage; + +fail: + if (stipple) + stipple->stage.destroy( &stipple->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c index 808b2fb0b58..9a3f3fee625 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_twoside.c +++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c @@ -177,9 +177,6 @@ struct draw_stage *draw_twoside_stage( struct draw_context *draw ) if (twoside == NULL) goto fail; - if (!draw_alloc_temp_verts( &twoside->stage, 3 )) - goto fail; - twoside->stage.draw = draw; twoside->stage.name = "twoside"; twoside->stage.next = NULL; @@ -190,6 +187,9 @@ struct draw_stage *draw_twoside_stage( struct draw_context *draw ) twoside->stage.reset_stipple_counter = twoside_reset_stipple_counter; twoside->stage.destroy = twoside_destroy; + if (!draw_alloc_temp_verts( &twoside->stage, 3 )) + goto fail; + return &twoside->stage; fail: diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c index e333d26a932..d87741b91e7 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c @@ -202,9 +202,6 @@ struct draw_stage *draw_unfilled_stage( struct draw_context *draw ) if (unfilled == NULL) goto fail; - if (!draw_alloc_temp_verts( &unfilled->stage, 0 )) - goto fail; - unfilled->stage.draw = draw; unfilled->stage.name = "unfilled"; unfilled->stage.next = NULL; @@ -216,6 +213,9 @@ struct draw_stage *draw_unfilled_stage( struct draw_context *draw ) unfilled->stage.reset_stipple_counter = unfilled_reset_stipple_counter; unfilled->stage.destroy = unfilled_destroy; + if (!draw_alloc_temp_verts( &unfilled->stage, 0 )) + goto fail; + return &unfilled->stage; fail: diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c index d7ac95b7405..98da9cfb999 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c @@ -202,8 +202,8 @@ static void wideline_destroy( struct draw_stage *stage ) struct draw_stage *draw_wide_line_stage( struct draw_context *draw ) { struct wideline_stage *wide = CALLOC_STRUCT(wideline_stage); - - draw_alloc_temp_verts( &wide->stage, 4 ); + if (wide == NULL) + goto fail; wide->stage.draw = draw; wide->stage.name = "wide-line"; @@ -215,5 +215,14 @@ struct draw_stage *draw_wide_line_stage( struct draw_context *draw ) wide->stage.reset_stipple_counter = wideline_reset_stipple_counter; wide->stage.destroy = wideline_destroy; + if (!draw_alloc_temp_verts( &wide->stage, 4 )) + goto fail; + return &wide->stage; + +fail: + if (wide) + wide->stage.destroy( &wide->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index a86fe19586c..3e6e5389950 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -324,9 +324,6 @@ struct draw_stage *draw_wide_point_stage( struct draw_context *draw ) if (wide == NULL) goto fail; - if (!draw_alloc_temp_verts( &wide->stage, 4 )) - goto fail; - wide->stage.draw = draw; wide->stage.name = "wide-point"; wide->stage.next = NULL; @@ -337,6 +334,9 @@ struct draw_stage *draw_wide_point_stage( struct draw_context *draw ) wide->stage.reset_stipple_counter = widepoint_reset_stipple_counter; wide->stage.destroy = widepoint_destroy; + if (!draw_alloc_temp_verts( &wide->stage, 4 )) + goto fail; + return &wide->stage; fail: diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index fe867ff8e27..4584033bc2b 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -265,6 +265,34 @@ struct draw_context void *driver_private; }; + +struct draw_fetch_info { + boolean linear; + unsigned start; + const unsigned *elts; + unsigned count; +}; + +struct draw_vertex_info { + struct vertex_header *verts; + unsigned vertex_size; + unsigned stride; + unsigned count; +}; + +struct draw_prim_info { + boolean linear; + unsigned start; + + const ushort *elts; + unsigned count; + + unsigned prim; + unsigned *primitive_lengths; + unsigned primitive_count; +}; + + /******************************************************************************* * Draw common initialization code */ @@ -342,18 +370,13 @@ void draw_pipeline_destroy( struct draw_context *draw ); #define DRAW_PIPE_FLAG_MASK (0xf<<12) void draw_pipeline_run( struct draw_context *draw, - unsigned prim, - struct vertex_header *vertices, - unsigned vertex_count, - unsigned stride, - const ushort *elts, - unsigned count ); + const struct draw_vertex_info *vert, + const struct draw_prim_info *prim); void draw_pipeline_run_linear( struct draw_context *draw, - unsigned prim, - struct vertex_header *vertices, - unsigned count, - unsigned stride ); + const struct draw_vertex_info *vert, + const struct draw_prim_info *prim); + @@ -380,9 +403,4 @@ draw_get_rasterizer_no_cull( struct draw_context *draw, boolean flatshade ); -int draw_max_output_vertices(struct draw_context *draw, - unsigned pipe_prim, - unsigned count); - - #endif /* DRAW_PRIVATE_H */ diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 02c97fec817..6234272d6ce 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -69,7 +69,6 @@ draw_pt_arrays(struct draw_context *draw, struct draw_pt_front_end *frontend = NULL; struct draw_pt_middle_end *middle = NULL; unsigned opt = 0; - unsigned out_prim = prim; /* Sanitize primitive length: */ @@ -80,18 +79,19 @@ draw_pt_arrays(struct draw_context *draw, if (count < first) return TRUE; } - if (draw->gs.geometry_shader) { - out_prim = draw->gs.geometry_shader->output_primitive; - } if (!draw->force_passthrough) { + unsigned gs_out_prim = (draw->gs.geometry_shader ? + draw->gs.geometry_shader->output_primitive : + prim); + if (!draw->render) { opt |= PT_PIPELINE; } if (draw_need_pipeline(draw, draw->rasterizer, - out_prim)) { + gs_out_prim)) { opt |= PT_PIPELINE; } @@ -122,7 +122,7 @@ draw_pt_arrays(struct draw_context *draw, frontend = draw->pt.front.varray; } - frontend->prepare( frontend, prim, out_prim, middle, opt ); + frontend->prepare( frontend, prim, middle, opt ); frontend->run(frontend, draw_pt_elt_func(draw), diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 67ae70fdaf7..44356fba4c5 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -39,6 +39,8 @@ typedef unsigned (*pt_elt_func)( const void *elts, unsigned idx ); struct draw_pt_middle_end; struct draw_context; +struct draw_prim_info; +struct draw_vertex_info; #define PT_SHADE 0x1 @@ -60,8 +62,7 @@ struct draw_context; */ struct draw_pt_front_end { void (*prepare)( struct draw_pt_front_end *, - unsigned input_prim, - unsigned output_prim, + unsigned prim, struct draw_pt_middle_end *, unsigned opt ); @@ -85,8 +86,7 @@ struct draw_pt_front_end { */ struct draw_pt_middle_end { void (*prepare)( struct draw_pt_middle_end *, - unsigned input_prim, - unsigned output_prim, + unsigned prim, unsigned opt, unsigned *max_vertices ); @@ -164,16 +164,12 @@ void draw_pt_emit_prepare( struct pt_emit *emit, unsigned *max_vertices ); void draw_pt_emit( struct pt_emit *emit, - const float (*vertex_data)[4], - unsigned vertex_count, - unsigned stride, - const ushort *elts, - unsigned count ); + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info); void draw_pt_emit_linear( struct pt_emit *emit, - const float (*vertex_data)[4], - unsigned stride, - unsigned count ); + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info); void draw_pt_emit_destroy( struct pt_emit *emit ); @@ -184,13 +180,11 @@ struct pt_emit *draw_pt_emit_create( struct draw_context *draw ); */ struct pt_so_emit; -void draw_pt_so_emit_prepare( struct pt_so_emit *emit, - unsigned prim ); +void draw_pt_so_emit_prepare( struct pt_so_emit *emit ); void draw_pt_so_emit( struct pt_so_emit *emit, - const float (*vertex_data)[4], - unsigned vertex_count, - unsigned stride ); + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info ); void draw_pt_so_emit_destroy( struct pt_so_emit *emit ); @@ -226,9 +220,7 @@ struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw ); struct pt_post_vs; boolean draw_pt_post_vs_run( struct pt_post_vs *pvs, - struct vertex_header *pipeline_verts, - unsigned stride, - unsigned count ); + struct draw_vertex_info *info ); void draw_pt_post_vs_prepare( struct pt_post_vs *pvs, boolean bypass_clipping, diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index f623c0743da..0229bcc7fe1 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -127,15 +127,17 @@ void draw_pt_emit_prepare( struct pt_emit *emit, void draw_pt_emit( struct pt_emit *emit, - const float (*vertex_data)[4], - unsigned vertex_count, - unsigned stride, - const ushort *elts, - unsigned count ) + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) { + const float (*vertex_data)[4] = (const float (*)[4])vert_info->verts->data; + unsigned vertex_count = vert_info->count; + unsigned stride = vert_info->stride; + const ushort *elts = prim_info->elts; struct draw_context *draw = emit->draw; struct translate *translate = emit->translate; struct vbuf_render *render = draw->render; + unsigned start, i; void *hw_verts; /* XXX: need to flush to get prim_vbuf.c to release its allocation?? @@ -190,23 +192,31 @@ void draw_pt_emit( struct pt_emit *emit, 0, vertex_count - 1 ); - render->draw_elements(render, - elts, - count); + for (start = i = 0; + i < prim_info->primitive_count; + start += prim_info->primitive_lengths[i], i++) + { + render->draw_elements(render, + elts + start, + prim_info->primitive_lengths[i]); + } render->release_vertices(render); } void draw_pt_emit_linear(struct pt_emit *emit, - const float (*vertex_data)[4], - unsigned stride, - unsigned count) + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) { + const float (*vertex_data)[4] = (const float (*)[4])vert_info->verts->data; + unsigned stride = vert_info->stride; + unsigned count = vert_info->count; struct draw_context *draw = emit->draw; struct translate *translate = emit->translate; struct vbuf_render *render = draw->render; void *hw_verts; + unsigned start, i; #if 0 debug_printf("Linear emit\n"); @@ -258,7 +268,14 @@ void draw_pt_emit_linear(struct pt_emit *emit, render->unmap_vertices( render, 0, count - 1 ); - render->draw_arrays(render, 0, count); + for (start = i = 0; + i < prim_info->primitive_count; + start += prim_info->primitive_lengths[i], i++) + { + render->draw_arrays(render, + start, + prim_info->primitive_lengths[i]); + } render->release_vertices(render); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index a1347221b5d..bf799db3524 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -29,7 +29,6 @@ #include "util/u_math.h" #include "draw/draw_context.h" #include "draw/draw_private.h" -#include "draw/draw_vbuf.h" #include "draw/draw_pt.h" #include "translate/translate.h" #include "translate/translate_cache.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index c629d555636..5c8af17c8e3 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -36,6 +36,7 @@ #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" #include "draw/draw_pt.h" +#include "draw/draw_gs.h" #include "translate/translate.h" #include "translate/translate_cache.h" @@ -90,7 +91,6 @@ struct fetch_emit_middle_end { static void fetch_emit_prepare( struct draw_pt_middle_end *middle, unsigned prim, - unsigned out_prim, unsigned opt, unsigned *max_vertices ) { @@ -101,9 +101,14 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, boolean ok; struct translate_key key; + unsigned gs_out_prim = (draw->gs.geometry_shader ? + draw->gs.geometry_shader->output_primitive : + prim); + + ok = draw->render->set_primitive( draw->render, - out_prim ); + gs_out_prim ); if (!ok) { assert(0); return; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 5483a25f1dc..b8270280b64 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -68,8 +68,7 @@ struct fetch_shade_emit { static void fse_prepare( struct draw_pt_middle_end *middle, - unsigned in_prim, - unsigned out_prim, + unsigned prim, unsigned opt, unsigned *max_vertices ) { @@ -80,9 +79,12 @@ static void fse_prepare( struct draw_pt_middle_end *middle, unsigned i; unsigned nr_vbs = 0; + /* Can't support geometry shader on this path. + */ + assert(!draw->gs.geometry_shader); if (!draw->render->set_primitive( draw->render, - out_prim )) { + prim )) { assert(0); return; } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 2301e542aab..24c538b099c 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -48,13 +48,11 @@ struct fetch_pipeline_middle_end { unsigned vertex_data_offset; unsigned vertex_size; unsigned input_prim; - unsigned output_prim; unsigned opt; }; static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, - unsigned in_prim, - unsigned out_prim, + unsigned prim, unsigned opt, unsigned *max_vertices ) { @@ -64,6 +62,10 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, unsigned i; unsigned instance_id_index = ~0; + unsigned gs_out_prim = (draw->gs.geometry_shader ? + draw->gs.geometry_shader->output_primitive : + prim); + /* Add one to num_outputs because the pipeline occasionally tags on * an additional texcoord, eg for AA lines. */ @@ -79,8 +81,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, } } - fpme->input_prim = in_prim; - fpme->output_prim = out_prim; + fpme->input_prim = prim; fpme->opt = opt; /* Always leave room for the vertex header whether we need it or @@ -102,13 +103,13 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, (boolean)draw->bypass_clipping, (boolean)draw->identity_viewport, (boolean)draw->rasterizer->gl_rasterization_rules, - (draw->vs.edgeflag_output ? true : false) ); + (draw->vs.edgeflag_output ? TRUE : FALSE) ); - draw_pt_so_emit_prepare( fpme->so_emit, out_prim ); + draw_pt_so_emit_prepare( fpme->so_emit ); if (!(opt & PT_PIPELINE)) { draw_pt_emit_prepare( fpme->emit, - out_prim, + gs_out_prim, max_vertices ); *max_vertices = MAX2( *max_vertices, @@ -127,173 +128,146 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, } - -static void fetch_pipeline_run( struct draw_pt_middle_end *middle, - const unsigned *fetch_elts, - unsigned fetch_count, - const ushort *draw_elts, - unsigned draw_count ) +static void fetch( struct pt_fetch *fetch, + const struct draw_fetch_info *fetch_info, + char *output) { - struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; - struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *vshader = draw->vs.vertex_shader; - struct draw_geometry_shader *gshader = draw->gs.geometry_shader; - unsigned opt = fpme->opt; - struct vertex_header *pipeline_verts; - unsigned alloc_count = draw_max_output_vertices(draw, - fpme->input_prim, - fetch_count); - - pipeline_verts = - (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); - - if (!pipeline_verts) { - /* Not much we can do here - just skip the rendering. - */ - assert(0); - return; + if (fetch_info->linear) { + draw_pt_fetch_run_linear( fetch, + fetch_info->start, + fetch_info->count, + output ); } - - /* Fetch into our vertex buffer - */ - draw_pt_fetch_run( fpme->fetch, - fetch_elts, - fetch_count, - (char *)pipeline_verts ); - - /* Run the shader, note that this overwrites the data[] parts of - * the pipeline verts. - */ - if (opt & PT_SHADE) - { - vshader->run_linear(vshader, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.vs_constants, - fetch_count, - fpme->vertex_size, - fpme->vertex_size); - if (gshader) { - fetch_count = - draw_geometry_shader_run(gshader, - fpme->input_prim, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.gs_constants, - fetch_count, - fpme->vertex_size, - fpme->vertex_size); - debug_assert(fetch_count <= alloc_count); - } + else { + draw_pt_fetch_run( fetch, + fetch_info->elts, + fetch_info->count, + output ); } +} - /* stream output needs to be done before clipping */ - draw_pt_so_emit( fpme->so_emit, - (const float (*)[4])pipeline_verts->data, - fetch_count, - fpme->vertex_size ); - - if (draw_pt_post_vs_run( fpme->post_vs, - pipeline_verts, - fetch_count, - fpme->vertex_size )) - { - opt |= PT_PIPELINE; - } - /* Do we need to run the pipeline? - */ - if (opt & PT_PIPELINE) { +static void pipeline(struct fetch_pipeline_middle_end *fpme, + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) +{ + if (prim_info->linear) + draw_pipeline_run_linear( fpme->draw, + vert_info, + prim_info); + else draw_pipeline_run( fpme->draw, - fpme->output_prim, - pipeline_verts, - fetch_count, - fpme->vertex_size, - draw_elts, - draw_count ); + vert_info, + prim_info ); +} + +static void emit(struct pt_emit *emit, + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) +{ + if (prim_info->linear) { + draw_pt_emit_linear(emit, vert_info, prim_info); } else { - draw_pt_emit( fpme->emit, - (const float (*)[4])pipeline_verts->data, - fetch_count, - fpme->vertex_size, - draw_elts, - draw_count ); + draw_pt_emit(emit, vert_info, prim_info); } +} - FREE(pipeline_verts); +static void draw_vertex_shader_run(struct draw_vertex_shader *vshader, + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const struct draw_vertex_info *input_verts, + struct draw_vertex_info *output_verts ) +{ + output_verts->vertex_size = input_verts->vertex_size; + output_verts->stride = input_verts->vertex_size; + output_verts->count = input_verts->count; + output_verts->verts = + (struct vertex_header *)MALLOC(output_verts->vertex_size * + output_verts->count); + + vshader->run_linear(vshader, + (const float (*)[4])input_verts->verts->data, + ( float (*)[4])output_verts->verts->data, + constants, + input_verts->count, + input_verts->vertex_size, + input_verts->vertex_size); } - -static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, - unsigned start, - unsigned count) +static void fetch_pipeline_generic( struct draw_pt_middle_end *middle, + const struct draw_fetch_info *fetch_info, + const struct draw_prim_info *prim_info ) { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *shader = draw->vs.vertex_shader; - struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader; + struct draw_vertex_shader *vshader = draw->vs.vertex_shader; + struct draw_geometry_shader *gshader = draw->gs.geometry_shader; + struct draw_prim_info gs_prim_info; + struct draw_vertex_info fetched_vert_info; + struct draw_vertex_info vs_vert_info; + struct draw_vertex_info gs_vert_info; + struct draw_vertex_info *vert_info; unsigned opt = fpme->opt; - struct vertex_header *pipeline_verts; - unsigned alloc_count = draw_max_output_vertices(draw, - fpme->input_prim, - count); - pipeline_verts = - (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); - - if (!pipeline_verts) { - /* Not much we can do here - just skip the rendering. - */ + fetched_vert_info.count = fetch_info->count; + fetched_vert_info.vertex_size = fpme->vertex_size; + fetched_vert_info.stride = fpme->vertex_size; + fetched_vert_info.verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * + align(fetch_info->count, 4)); + if (!fetched_vert_info.verts) { assert(0); return; } - /* Fetch into our vertex buffer + /* Fetch into our vertex buffer. + */ + fetch( fpme->fetch, fetch_info, (char *)fetched_vert_info.verts ); + + /* Finished with fetch: */ - draw_pt_fetch_run_linear( fpme->fetch, - start, - count, - (char *)pipeline_verts ); + fetch_info = NULL; + vert_info = &fetched_vert_info; /* Run the shader, note that this overwrites the data[] parts of * the pipeline verts. */ - if (opt & PT_SHADE) - { - shader->run_linear(shader, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.vs_constants, - count, - fpme->vertex_size, - fpme->vertex_size); - - if (geometry_shader) { - count = - draw_geometry_shader_run(geometry_shader, - fpme->input_prim, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.gs_constants, - count, - fpme->vertex_size, - fpme->vertex_size); - debug_assert(count <= alloc_count); - } + if (fpme->opt & PT_SHADE) { + draw_vertex_shader_run(vshader, + draw->pt.user.vs_constants, + vert_info, + &vs_vert_info); + + FREE(vert_info->verts); + vert_info = &vs_vert_info; + } + + if ((fpme->opt & PT_SHADE) && gshader) { + draw_geometry_shader_run(gshader, + draw->pt.user.gs_constants, + vert_info, + prim_info, + &gs_vert_info, + &gs_prim_info); + + FREE(vert_info->verts); + vert_info = &gs_vert_info; + prim_info = &gs_prim_info; } - /* stream output needs to be done before clipping */ + + /* Stream output needs to be done before clipping. + * + * XXX: Stream output surely needs to respect the prim_info->elt + * lists. + */ draw_pt_so_emit( fpme->so_emit, - (const float (*)[4])pipeline_verts->data, - count, - fpme->vertex_size ); + vert_info, + prim_info ); if (draw_pt_post_vs_run( fpme->post_vs, - pipeline_verts, - count, - fpme->vertex_size )) + vert_info )) { opt |= PT_PIPELINE; } @@ -301,115 +275,96 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, /* Do we need to run the pipeline? */ if (opt & PT_PIPELINE) { - draw_pipeline_run_linear( fpme->draw, - fpme->output_prim, - pipeline_verts, - count, - fpme->vertex_size); + pipeline( fpme, + vert_info, + prim_info ); } else { - draw_pt_emit_linear( fpme->emit, - (const float (*)[4])pipeline_verts->data, - fpme->vertex_size, - count ); + emit( fpme->emit, + vert_info, + prim_info ); } - - FREE(pipeline_verts); + FREE(vert_info->verts); } +static void fetch_pipeline_run( struct draw_pt_middle_end *middle, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + struct draw_fetch_info fetch_info; + struct draw_prim_info prim_info; + + fetch_info.linear = FALSE; + fetch_info.start = 0; + fetch_info.elts = fetch_elts; + fetch_info.count = fetch_count; + + prim_info.linear = FALSE; + prim_info.start = 0; + prim_info.count = draw_count; + prim_info.elts = draw_elts; + prim_info.prim = fpme->input_prim; + prim_info.primitive_count = 1; + prim_info.primitive_lengths = &draw_count; + + fetch_pipeline_generic( middle, &fetch_info, &prim_info ); +} -static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle, - unsigned start, - unsigned count, - const ushort *draw_elts, - unsigned draw_count ) +static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count) { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; - struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *shader = draw->vs.vertex_shader; - struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader; - unsigned opt = fpme->opt; - struct vertex_header *pipeline_verts; - unsigned alloc_count = draw_max_output_vertices(draw, - fpme->input_prim, - count); - - pipeline_verts = - (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); + struct draw_fetch_info fetch_info; + struct draw_prim_info prim_info; + + fetch_info.linear = TRUE; + fetch_info.start = start; + fetch_info.count = count; + fetch_info.elts = NULL; + + prim_info.linear = TRUE; + prim_info.start = 0; + prim_info.count = count; + prim_info.elts = NULL; + prim_info.prim = fpme->input_prim; + prim_info.primitive_count = 1; + prim_info.primitive_lengths = &count; + + fetch_pipeline_generic( middle, &fetch_info, &prim_info ); +} - if (!pipeline_verts) - return FALSE; - /* Fetch into our vertex buffer - */ - draw_pt_fetch_run_linear( fpme->fetch, - start, - count, - (char *)pipeline_verts ); - /* Run the shader, note that this overwrites the data[] parts of - * the pipeline verts. - */ - if (opt & PT_SHADE) - { - shader->run_linear(shader, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.vs_constants, - count, - fpme->vertex_size, - fpme->vertex_size); - - if (geometry_shader) { - count = - draw_geometry_shader_run(geometry_shader, - fpme->input_prim, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.gs_constants, - count, - fpme->vertex_size, - fpme->vertex_size); - debug_assert(count <= alloc_count); - } - } +static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + struct draw_fetch_info fetch_info; + struct draw_prim_info prim_info; - /* stream output needs to be done before clipping */ - draw_pt_so_emit( fpme->so_emit, - (const float (*)[4])pipeline_verts->data, - count, - fpme->vertex_size ); + fetch_info.linear = TRUE; + fetch_info.start = start; + fetch_info.count = count; + fetch_info.elts = NULL; - if (draw_pt_post_vs_run( fpme->post_vs, - pipeline_verts, - count, - fpme->vertex_size )) - { - opt |= PT_PIPELINE; - } + prim_info.linear = FALSE; + prim_info.start = 0; + prim_info.count = draw_count; + prim_info.elts = draw_elts; + prim_info.prim = fpme->input_prim; + prim_info.primitive_count = 1; + prim_info.primitive_lengths = &draw_count; - /* Do we need to run the pipeline? - */ - if (opt & PT_PIPELINE) { - draw_pipeline_run( fpme->draw, - fpme->output_prim, - pipeline_verts, - count, - fpme->vertex_size, - draw_elts, - draw_count ); - } - else { - draw_pt_emit( fpme->emit, - (const float (*)[4])pipeline_verts->data, - count, - fpme->vertex_size, - draw_elts, - draw_count ); - } + fetch_pipeline_generic( middle, &fetch_info, &prim_info ); - FREE(pipeline_verts); return TRUE; } @@ -464,7 +419,7 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context * goto fail; fpme->emit = draw_pt_emit_create( draw ); - if (!fpme->emit) + if (!fpme->emit) goto fail; fpme->so_emit = draw_pt_so_emit_create( draw ); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index 5f6d23874f8..c7f76397e76 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -28,6 +28,7 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "draw/draw_context.h" +#include "draw/draw_gs.h" #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" #include "draw/draw_pt.h" @@ -48,7 +49,6 @@ struct llvm_middle_end { unsigned vertex_data_offset; unsigned vertex_size; unsigned input_prim; - unsigned output_prim; unsigned opt; struct draw_llvm *llvm; @@ -61,7 +61,6 @@ struct llvm_middle_end { static void llvm_middle_end_prepare( struct draw_pt_middle_end *middle, unsigned in_prim, - unsigned out_prim, unsigned opt, unsigned *max_vertices ) { @@ -73,6 +72,11 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, unsigned i; unsigned instance_id_index = ~0; + + unsigned out_prim = (draw->gs.geometry_shader ? + draw->gs.geometry_shader->output_primitive : + in_prim); + /* Add one to num_outputs because the pipeline occasionally tags on * an additional texcoord, eg for AA lines. */ @@ -89,7 +93,6 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, } fpme->input_prim = in_prim; - fpme->output_prim = out_prim; fpme->opt = opt; /* Always leave room for the vertex header whether we need it or @@ -106,9 +109,10 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, (boolean)draw->bypass_clipping, (boolean)(draw->identity_viewport), (boolean)draw->rasterizer->gl_rasterization_rules, - (draw->vs.edgeflag_output ? true : false) ); + (draw->vs.edgeflag_output ? TRUE : FALSE) ); + + draw_pt_so_emit_prepare( fpme->so_emit ); - draw_pt_so_emit_prepare( fpme->so_emit, out_prim ); if (!(opt & PT_PIPELINE)) { draw_pt_emit_prepare( fpme->emit, out_prim, @@ -150,72 +154,142 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, } +static void pipeline(struct llvm_middle_end *llvm, + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) +{ + if (prim_info->linear) + draw_pipeline_run_linear( llvm->draw, + vert_info, + prim_info); + else + draw_pipeline_run( llvm->draw, + vert_info, + prim_info ); +} -static void llvm_middle_end_run( struct draw_pt_middle_end *middle, - const unsigned *fetch_elts, - unsigned fetch_count, - const ushort *draw_elts, - unsigned draw_count ) +static void emit(struct pt_emit *emit, + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) +{ + if (prim_info->linear) { + draw_pt_emit_linear(emit, vert_info, prim_info); + } + else { + draw_pt_emit(emit, vert_info, prim_info); + } +} + +static void +llvm_pipeline_generic( struct draw_pt_middle_end *middle, + const struct draw_fetch_info *fetch_info, + const struct draw_prim_info *prim_info ) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; struct draw_context *draw = fpme->draw; + struct draw_geometry_shader *gshader = draw->gs.geometry_shader; + struct draw_prim_info gs_prim_info; + struct draw_vertex_info llvm_vert_info; + struct draw_vertex_info gs_vert_info; + struct draw_vertex_info *vert_info; unsigned opt = fpme->opt; - unsigned alloc_count = align( fetch_count, 4 ); - struct vertex_header *pipeline_verts = - (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); - - if (!pipeline_verts) { - /* Not much we can do here - just skip the rendering. - */ + llvm_vert_info.count = fetch_info->count; + llvm_vert_info.vertex_size = fpme->vertex_size; + llvm_vert_info.stride = fpme->vertex_size; + llvm_vert_info.verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * + align(fetch_info->count, 4)); + if (!llvm_vert_info.verts) { assert(0); return; } - fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context, - pipeline_verts, - (const char **)draw->pt.user.vbuffer, - fetch_elts, - fetch_count, - fpme->vertex_size, - draw->pt.vertex_buffer ); + if (fetch_info->linear) + fpme->current_variant->jit_func( &fpme->llvm->jit_context, + llvm_vert_info.verts, + (const char **)draw->pt.user.vbuffer, + fetch_info->start, + fetch_info->count, + fpme->vertex_size, + draw->pt.vertex_buffer ); + else + fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context, + llvm_vert_info.verts, + (const char **)draw->pt.user.vbuffer, + fetch_info->elts, + fetch_info->count, + fpme->vertex_size, + draw->pt.vertex_buffer); + + /* Finished with fetch and vs: + */ + fetch_info = NULL; + vert_info = &llvm_vert_info; + + + if ((opt & PT_SHADE) && gshader) { + draw_geometry_shader_run(gshader, + draw->pt.user.gs_constants, + vert_info, + prim_info, + &gs_vert_info, + &gs_prim_info); + + FREE(vert_info->verts); + vert_info = &gs_vert_info; + prim_info = &gs_prim_info; + } /* stream output needs to be done before clipping */ draw_pt_so_emit( fpme->so_emit, - (const float (*)[4])pipeline_verts->data, - fetch_count, - fpme->vertex_size ); - - if (draw_pt_post_vs_run( fpme->post_vs, - pipeline_verts, - fetch_count, - fpme->vertex_size )) - { + vert_info, + prim_info ); + + if (draw_pt_post_vs_run( fpme->post_vs, vert_info )) { opt |= PT_PIPELINE; } /* Do we need to run the pipeline? */ if (opt & PT_PIPELINE) { - draw_pipeline_run( fpme->draw, - fpme->output_prim, - pipeline_verts, - fetch_count, - fpme->vertex_size, - draw_elts, - draw_count ); + pipeline( fpme, + vert_info, + prim_info ); } else { - draw_pt_emit( fpme->emit, - (const float (*)[4])pipeline_verts->data, - fetch_count, - fpme->vertex_size, - draw_elts, - draw_count ); + emit( fpme->emit, + vert_info, + prim_info ); } + FREE(vert_info->verts); +} - FREE(pipeline_verts); +static void llvm_middle_end_run( struct draw_pt_middle_end *middle, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_fetch_info fetch_info; + struct draw_prim_info prim_info; + + fetch_info.linear = FALSE; + fetch_info.start = 0; + fetch_info.elts = fetch_elts; + fetch_info.count = fetch_count; + + prim_info.linear = FALSE; + prim_info.start = 0; + prim_info.count = draw_count; + prim_info.elts = draw_elts; + prim_info.prim = fpme->input_prim; + prim_info.primitive_count = 1; + prim_info.primitive_lengths = &draw_count; + + llvm_pipeline_generic( middle, &fetch_info, &prim_info ); } @@ -224,63 +298,23 @@ static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle, unsigned count) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; - struct draw_context *draw = fpme->draw; - unsigned opt = fpme->opt; - unsigned alloc_count = align( count, 4 ); - - struct vertex_header *pipeline_verts = - (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); - - if (!pipeline_verts) { - /* Not much we can do here - just skip the rendering. - */ - assert(0); - return; - } - -#if 0 - debug_printf("#### Pipeline = %p (data = %p)\n", - pipeline_verts, pipeline_verts->data); -#endif - fpme->current_variant->jit_func( &fpme->llvm->jit_context, - pipeline_verts, - (const char **)draw->pt.user.vbuffer, - start, - count, - fpme->vertex_size, - draw->pt.vertex_buffer ); - - /* stream output needs to be done before clipping */ - draw_pt_so_emit( fpme->so_emit, - (const float (*)[4])pipeline_verts->data, - count, - fpme->vertex_size ); - - if (draw_pt_post_vs_run( fpme->post_vs, - pipeline_verts, - count, - fpme->vertex_size )) - { - opt |= PT_PIPELINE; - } - - /* Do we need to run the pipeline? - */ - if (opt & PT_PIPELINE) { - draw_pipeline_run_linear( fpme->draw, - fpme->output_prim, - pipeline_verts, - count, - fpme->vertex_size); - } - else { - draw_pt_emit_linear( fpme->emit, - (const float (*)[4])pipeline_verts->data, - fpme->vertex_size, - count ); - } - - FREE(pipeline_verts); + struct draw_fetch_info fetch_info; + struct draw_prim_info prim_info; + + fetch_info.linear = TRUE; + fetch_info.start = start; + fetch_info.count = count; + fetch_info.elts = NULL; + + prim_info.linear = TRUE; + prim_info.start = 0; + prim_info.count = count; + prim_info.elts = NULL; + prim_info.prim = fpme->input_prim; + prim_info.primitive_count = 1; + prim_info.primitive_lengths = &count; + + llvm_pipeline_generic( middle, &fetch_info, &prim_info ); } @@ -293,59 +327,24 @@ llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle, unsigned draw_count ) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; - struct draw_context *draw = fpme->draw; - unsigned opt = fpme->opt; - unsigned alloc_count = align( count, 4 ); + struct draw_fetch_info fetch_info; + struct draw_prim_info prim_info; - struct vertex_header *pipeline_verts = - (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); + fetch_info.linear = TRUE; + fetch_info.start = start; + fetch_info.count = count; + fetch_info.elts = NULL; - if (!pipeline_verts) - return FALSE; - - fpme->current_variant->jit_func( &fpme->llvm->jit_context, - pipeline_verts, - (const char **)draw->pt.user.vbuffer, - start, - count, - fpme->vertex_size, - draw->pt.vertex_buffer ); - - /* stream output needs to be done before clipping */ - draw_pt_so_emit( fpme->so_emit, - (const float (*)[4])pipeline_verts->data, - count, - fpme->vertex_size ); - - if (draw_pt_post_vs_run( fpme->post_vs, - pipeline_verts, - count, - fpme->vertex_size )) - { - opt |= PT_PIPELINE; - } + prim_info.linear = FALSE; + prim_info.start = 0; + prim_info.count = draw_count; + prim_info.elts = draw_elts; + prim_info.prim = fpme->input_prim; + prim_info.primitive_count = 1; + prim_info.primitive_lengths = &draw_count; - /* Do we need to run the pipeline? - */ - if (opt & PT_PIPELINE) { - draw_pipeline_run( fpme->draw, - fpme->output_prim, - pipeline_verts, - count, - fpme->vertex_size, - draw_elts, - draw_count ); - } - else { - draw_pt_emit( fpme->emit, - (const float (*)[4])pipeline_verts->data, - count, - fpme->vertex_size, - draw_elts, - draw_count ); - } + llvm_pipeline_generic( middle, &fetch_info, &prim_info ); - FREE(pipeline_verts); return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index fd33a548b48..112be50f9ae 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -29,16 +29,13 @@ #include "pipe/p_context.h" #include "draw/draw_context.h" #include "draw/draw_private.h" -#include "draw/draw_vbuf.h" #include "draw/draw_pt.h" struct pt_post_vs { struct draw_context *draw; boolean (*run)( struct pt_post_vs *pvs, - struct vertex_header *vertices, - unsigned count, - unsigned stride ); + struct draw_vertex_info *info ); }; @@ -92,20 +89,18 @@ compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr) * instructions */ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, - struct vertex_header *vertices, - unsigned count, - unsigned stride ) + struct draw_vertex_info *info ) { - struct vertex_header *out = vertices; + struct vertex_header *out = info->verts; const float *scale = pvs->draw->viewport.scale; const float *trans = pvs->draw->viewport.translate; const unsigned pos = draw_current_shader_position_output(pvs->draw); unsigned clipped = 0; unsigned j; - if (0) debug_printf("%s count, %d\n", __FUNCTION__, count); + if (0) debug_printf("%s count, %d\n", __FUNCTION__, info->count); - for (j = 0; j < count; j++) { + for (j = 0; j < info->count; j++) { float *position = out->data[pos]; #if 0 @@ -143,7 +138,7 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, #endif } - out = (struct vertex_header *)( (char *)out + stride ); + out = (struct vertex_header *)( (char *)out + info->stride ); } return clipped != 0; @@ -153,29 +148,27 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, /* As above plus edgeflags */ -static boolean +static boolean post_vs_cliptest_viewport_gl_edgeflag(struct pt_post_vs *pvs, - struct vertex_header *vertices, - unsigned count, - unsigned stride ) + struct draw_vertex_info *info) { unsigned j; boolean needpipe; - needpipe = post_vs_cliptest_viewport_gl( pvs, vertices, count, stride); + needpipe = post_vs_cliptest_viewport_gl(pvs, info); /* If present, copy edgeflag VS output into vertex header. * Otherwise, leave header as is. */ if (pvs->draw->vs.edgeflag_output) { - struct vertex_header *out = vertices; + struct vertex_header *out = info->verts; int ef = pvs->draw->vs.edgeflag_output; - for (j = 0; j < count; j++) { + for (j = 0; j < info->count; j++) { const float *edgeflag = out->data[ef]; out->edgeflag = !(edgeflag[0] != 1.0f); needpipe |= !out->edgeflag; - out = (struct vertex_header *)( (char *)out + stride ); + out = (struct vertex_header *)( (char *)out + info->stride ); } } return needpipe; @@ -187,18 +180,16 @@ post_vs_cliptest_viewport_gl_edgeflag(struct pt_post_vs *pvs, /* If bypass_clipping is set, skip cliptest and rhw divide. */ static boolean post_vs_viewport( struct pt_post_vs *pvs, - struct vertex_header *vertices, - unsigned count, - unsigned stride ) + struct draw_vertex_info *info ) { - struct vertex_header *out = vertices; + struct vertex_header *out = info->verts; const float *scale = pvs->draw->viewport.scale; const float *trans = pvs->draw->viewport.translate; const unsigned pos = draw_current_shader_position_output(pvs->draw); unsigned j; if (0) debug_printf("%s\n", __FUNCTION__); - for (j = 0; j < count; j++) { + for (j = 0; j < info->count; j++) { float *position = out->data[pos]; /* Viewport mapping only, no cliptest/rhw divide @@ -207,9 +198,9 @@ static boolean post_vs_viewport( struct pt_post_vs *pvs, position[1] = position[1] * scale[1] + trans[1]; position[2] = position[2] * scale[2] + trans[2]; - out = (struct vertex_header *)((char *)out + stride); + out = (struct vertex_header *)((char *)out + info->stride); } - + return FALSE; } @@ -218,20 +209,16 @@ static boolean post_vs_viewport( struct pt_post_vs *pvs, * to do. */ static boolean post_vs_none( struct pt_post_vs *pvs, - struct vertex_header *vertices, - unsigned count, - unsigned stride ) + struct draw_vertex_info *info ) { if (0) debug_printf("%s\n", __FUNCTION__); return FALSE; } boolean draw_pt_post_vs_run( struct pt_post_vs *pvs, - struct vertex_header *pipeline_verts, - unsigned count, - unsigned stride ) + struct draw_vertex_info *info ) { - return pvs->run( pvs, pipeline_verts, count, stride ); + return pvs->run( pvs, info ); } @@ -272,7 +259,7 @@ struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw ) return NULL; pvs->draw = draw; - + return pvs; } diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c index bb153cedfa0..5d82934889b 100644 --- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c @@ -25,151 +25,264 @@ * **************************************************************************/ -#include "util/u_memory.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" #include "draw/draw_pt.h" -#include "translate/translate.h" -#include "translate/translate_cache.h" + +#include "util/u_math.h" +#include "util/u_memory.h" struct pt_so_emit { struct draw_context *draw; - struct translate *translate; + void *buffers[PIPE_MAX_SO_BUFFERS]; - struct translate_cache *cache; - unsigned prim; + unsigned input_vertex_stride; + const float (*inputs)[4]; - const struct vertex_info *vinfo; boolean has_so; + + boolean single_buffer; + + unsigned emitted_primitives; + unsigned emitted_vertices; }; -static void -prepare_so_emit( struct pt_so_emit *emit, - const struct vertex_info *vinfo ) + +void draw_pt_so_emit_prepare(struct pt_so_emit *emit) { struct draw_context *draw = emit->draw; - unsigned i; - struct translate_key hw_key; - unsigned dst_offset = 0; + emit->has_so = (draw->so.state.num_outputs > 0); + + /* if we have a state with outputs make sure we have + * buffers to output to */ if (emit->has_so) { - for (i = 0; i < draw->so.state.num_outputs; ++i) { - unsigned src_offset = (draw->so.state.register_index[i] * 4 * - sizeof(float) ); - unsigned output_format; - unsigned emit_sz = 0; - /*unsigned output_bytes = util_format_get_blocksize(output_format); - unsigned nr_compo = util_format_get_nr_components(output_format);*/ - - output_format = draw_translate_vinfo_format(vinfo->attrib[i].emit); - emit_sz = draw_translate_vinfo_size(vinfo->attrib[i].emit); - - /* doesn't handle EMIT_OMIT */ - assert(emit_sz != 0); - - if (draw->so.state.register_mask[i] != TGSI_WRITEMASK_XYZW) { - /* we only support rendering with XYZW writemask*/ - debug_printf("NOT_IMPLEMENTED(writemask with stream output) at %s: %s:%d\n", - __FUNCTION__, __FILE__, __LINE__); + boolean has_valid_buffer = FALSE; + unsigned i; + for (i = 0; i < draw->so.num_buffers; ++i) { + if (draw->so.buffers[i]) { + has_valid_buffer = TRUE; + break; } + } + emit->has_so = has_valid_buffer; + } + + if (!emit->has_so) + return; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); +} - hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL; - hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - hw_key.element[i].input_buffer = 0; - hw_key.element[i].input_offset = src_offset; - hw_key.element[i].instance_divisor = 0; - hw_key.element[i].output_format = output_format; - hw_key.element[i].output_offset = dst_offset; +static boolean +is_component_writable(unsigned mask, + unsigned compo) +{ + switch (mask) { + case TGSI_WRITEMASK_NONE: + return FALSE; + case TGSI_WRITEMASK_X: + return compo == 0; + case TGSI_WRITEMASK_Y: + return compo == 1; + case TGSI_WRITEMASK_XY: + return compo == 0 || compo == 1; + case TGSI_WRITEMASK_Z: + return compo == 2; + case TGSI_WRITEMASK_XZ: + return compo == 0 || compo == 2; + case TGSI_WRITEMASK_YZ: + return compo == 1 || compo == 2; + case TGSI_WRITEMASK_XYZ: + return compo == 0 || compo == 1 || compo == 2; + case TGSI_WRITEMASK_W: + return compo == 3; + case TGSI_WRITEMASK_XW: + return compo == 0 || compo == 3; + case TGSI_WRITEMASK_YW: + return compo == 1 || compo == 3; + case TGSI_WRITEMASK_XYW: + return compo == 0 || compo == 1 || compo == 3; + case TGSI_WRITEMASK_ZW: + return compo == 2 || compo == 3; + case TGSI_WRITEMASK_XZW: + return compo == 0 || compo == 1 || compo == 3; + case TGSI_WRITEMASK_YZW: + return compo == 1 || compo == 2 || compo == 4; + case TGSI_WRITEMASK_XYZW: + return compo < 4; + default: + debug_assert(!"Unknown writemask in stream out"); + return compo < 4; + } +} - dst_offset += emit_sz; +static void so_emit_prim(struct pt_so_emit *so, + unsigned *indices, + unsigned num_vertices) +{ + unsigned slot, i; + unsigned input_vertex_stride = so->input_vertex_stride; + struct draw_context *draw = so->draw; + const float (*input_ptr)[4]; + const struct pipe_stream_output_state *state = + &draw->so.state; + float **buffer = 0; + + input_ptr = so->inputs; + + for (i = 0; i < num_vertices; ++i) { + const float (*input)[4]; + unsigned total_written_compos = 0; + /*debug_printf("%d) vertex index = %d (prim idx = %d)\n", i, indices[i], prim_idx);*/ + input = (const float (*)[4])( + (const char *)input_ptr + (indices[i] * input_vertex_stride)); + for (slot = 0; slot < state->num_outputs; ++slot) { + unsigned idx = state->register_index[slot]; + unsigned writemask = state->register_mask[slot]; + unsigned written_compos = 0; + unsigned compo; + + buffer = (float**)&so->buffers[state->output_buffer[slot]]; + + /*debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n", + slot, vs_slot, idx);*/ +#if 1 + assert(!util_is_inf_or_nan(input[idx][0])); + assert(!util_is_inf_or_nan(input[idx][1])); + assert(!util_is_inf_or_nan(input[idx][2])); + assert(!util_is_inf_or_nan(input[idx][3])); +#endif + for (compo = 0; compo < 4; ++compo) { + if (is_component_writable(writemask, compo)) { + float *buf = *buffer; + buf[written_compos++] = input[idx][compo]; + } + } +#if 0 + debug_printf("\t\t(writemask = %d)%f %f %f %f\n", + writemask, + input[idx][0], + input[idx][1], + input[idx][2], + input[idx][3]); +#endif + *buffer += written_compos; + total_written_compos += written_compos; } - hw_key.nr_elements = draw->so.state.num_outputs; - hw_key.output_stride = draw->so.state.stride; - - if (!emit->translate || - translate_key_compare(&emit->translate->key, &hw_key) != 0) - { - translate_key_sanitize(&hw_key); - emit->translate = translate_cache_find(emit->cache, &hw_key); + if (so->single_buffer) { + int stride = (int)state->stride - + sizeof(float) * total_written_compos; + + debug_assert(stride >= 0); + *buffer = (float*) (((char*)*buffer) + stride); } - } else { - /* no stream output */ - emit->translate = NULL; } + so->emitted_vertices += num_vertices; + ++so->emitted_primitives; } - -void draw_pt_so_emit_prepare( struct pt_so_emit *emit, - unsigned prim ) +static void so_point(struct pt_so_emit *so, int idx) { - struct draw_context *draw = emit->draw; - boolean ok; + unsigned indices[1]; - emit->has_so = (draw->so.state.num_outputs > 0); + indices[0] = idx; - if (!emit->has_so) - return; + so_emit_prim(so, indices, 1); +} - /* XXX: need to flush to get prim_vbuf.c to release its allocation?? - */ - draw_do_flush( draw, DRAW_FLUSH_BACKEND ); +static void so_line(struct pt_so_emit *so, int i0, int i1) +{ + unsigned indices[2]; - emit->prim = prim; + indices[0] = i0; + indices[1] = i1; - ok = draw->render->set_primitive(draw->render, emit->prim); - if (!ok) { - assert(0); - return; - } + so_emit_prim(so, indices, 2); +} + +static void so_tri(struct pt_so_emit *so, int i0, int i1, int i2) +{ + unsigned indices[3]; - /* Must do this after set_primitive() above: */ - emit->vinfo = draw->render->get_vertex_info(draw->render); + indices[0] = i0; + indices[1] = i1; + indices[2] = i2; - prepare_so_emit( emit, emit->vinfo ); + so_emit_prim(so, indices, 3); } +#define TRIANGLE(gs,i0,i1,i2) so_tri(so,i0,i1,i2) +#define LINE(gs,i0,i1) so_line(so,i0,i1) +#define POINT(gs,i0) so_point(so,i0) +#define FUNC so_run_linear +#define LOCAL_VARS +#include "draw_so_emit_tmp.h" +#undef LOCAL_VARS +#undef FUNC + + +#define TRIANGLE(gs,i0,i1,i2) so_tri(gs,elts[i0],elts[i1],elts[i2]) +#define LINE(gs,i0,i1) so_line(gs,elts[i0],elts[i1]) +#define POINT(gs,i0) so_point(gs,elts[i0]) +#define FUNC so_run_elts +#define LOCAL_VARS \ + const ushort *elts = input_prims->elts; +#include "draw_so_emit_tmp.h" +#undef LOCAL_VARS +#undef FUNC + + void draw_pt_so_emit( struct pt_so_emit *emit, - const float (*vertex_data)[4], - unsigned vertex_count, - unsigned stride ) + const struct draw_vertex_info *input_verts, + const struct draw_prim_info *input_prims ) { struct draw_context *draw = emit->draw; - struct translate *translate = emit->translate; struct vbuf_render *render = draw->render; - void *so_buffer; + unsigned start, i; if (!emit->has_so) return; - so_buffer = draw->so.buffers[0]; + emit->emitted_vertices = 0; + emit->emitted_primitives = 0; + emit->input_vertex_stride = input_verts->stride; + emit->inputs = (const float (*)[4])input_verts->verts->data; + for (i = 0; i < draw->so.num_buffers; ++i) { + emit->buffers[i] = draw->so.buffers[i]; + } + emit->single_buffer = TRUE; + for (i = 0; i < draw->so.state.num_outputs; ++i) { + if (draw->so.state.output_buffer[i] != 0) + emit->single_buffer = FALSE; + } /* XXX: need to flush to get prim_vbuf.c to release its allocation??*/ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - if (vertex_count == 0) - return; - - if (vertex_count >= UNDEFINED_VERTEX_ID) { - assert(0); - return; - } - - /* XXX we only support single output buffer */ - if (draw->so.num_buffers != 1) { - debug_printf("NOT_IMPLEMENTED(multiple stream output buffers) at %s: %s:%d\n", - __FUNCTION__, __FILE__, __LINE__); + for (start = i = 0; i < input_prims->primitive_count; + start += input_prims->primitive_lengths[i], i++) + { + unsigned count = input_prims->primitive_lengths[i]; + + if (input_prims->linear) { + so_run_linear(emit, input_prims, input_verts, + start, count); + } else { + so_run_elts(emit, input_prims, input_verts, + start, count); + } } - translate->set_buffer(translate, 0, vertex_data, - stride, ~0); - translate->run(translate, 0, vertex_count, - draw->instance_id, so_buffer); - - render->set_stream_output_info(render, 0, vertex_count); + render->set_stream_output_info(render, + emit->emitted_primitives, + emit->emitted_vertices); } @@ -180,19 +293,11 @@ struct pt_so_emit *draw_pt_so_emit_create( struct draw_context *draw ) return NULL; emit->draw = draw; - emit->cache = translate_cache_create(); - if (!emit->cache) { - FREE(emit); - return NULL; - } return emit; } void draw_pt_so_emit_destroy( struct pt_so_emit *emit ) { - if (emit->cache) - translate_cache_destroy(emit->cache); - FREE(emit); } diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index 5ea833032f3..d89d5cd20f4 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -137,7 +137,6 @@ static unsigned decompose_prim[PIPE_PRIM_POLYGON + 1] = { static void varray_prepare(struct draw_pt_front_end *frontend, unsigned in_prim, - unsigned out_prim, struct draw_pt_middle_end *middle, unsigned opt) { @@ -146,11 +145,12 @@ static void varray_prepare(struct draw_pt_front_end *frontend, varray->base.run = varray_run; varray->input_prim = in_prim; - varray->output_prim = decompose_prim[out_prim]; + varray->output_prim = decompose_prim[in_prim]; varray->middle = middle; - middle->prepare(middle, varray->input_prim, - varray->output_prim, opt, &varray->driver_fetch_max ); + middle->prepare(middle, + varray->output_prim, + opt, &varray->driver_fetch_max ); /* check that the max is even */ assert((varray->driver_fetch_max & 1) == 0); diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 914c87a9dc4..b7e0da7d44e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -70,7 +70,6 @@ vcache_flush( struct vcache_frontend *vcache ) if (vcache->middle_prim != vcache->output_prim) { vcache->middle_prim = vcache->output_prim; vcache->middle->prepare( vcache->middle, - vcache->input_prim, vcache->middle_prim, vcache->opt, &vcache->fetch_max ); @@ -368,7 +367,6 @@ vcache_check_run( struct draw_pt_front_end *frontend, if (vcache->middle_prim != vcache->input_prim) { vcache->middle_prim = vcache->input_prim; vcache->middle->prepare( vcache->middle, - vcache->input_prim, vcache->middle_prim, vcache->opt, &vcache->fetch_max ); @@ -472,7 +470,6 @@ vcache_check_run( struct draw_pt_front_end *frontend, static void vcache_prepare( struct draw_pt_front_end *frontend, unsigned in_prim, - unsigned out_prim, struct draw_pt_middle_end *middle, unsigned opt ) { @@ -487,8 +484,14 @@ vcache_prepare( struct draw_pt_front_end *frontend, vcache->base.run = vcache_check_run; } + /* VCache will always emit the reduced version of its input + * primitive, ie STRIP/FANS become TRIS, etc. + * + * This is not to be confused with what the GS might be up to, + * which is a separate issue. + */ vcache->input_prim = in_prim; - vcache->output_prim = u_reduced_prim(out_prim); + vcache->output_prim = u_reduced_prim(in_prim); vcache->middle = middle; vcache->opt = opt; @@ -497,8 +500,9 @@ vcache_prepare( struct draw_pt_front_end *frontend, * doing so: */ vcache->middle_prim = (opt & PT_PIPELINE) ? vcache->output_prim : vcache->input_prim; - middle->prepare( middle, vcache->input_prim, - vcache->middle_prim, opt, &vcache->fetch_max ); + middle->prepare( middle, + vcache->middle_prim, + opt, &vcache->fetch_max ); } diff --git a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h new file mode 100644 index 00000000000..01212a8e536 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h @@ -0,0 +1,123 @@ + +static void FUNC( struct pt_so_emit *so, + const struct draw_prim_info *input_prims, + const struct draw_vertex_info *input_verts, + unsigned start, + unsigned count) +{ + struct draw_context *draw = so->draw; + + boolean flatfirst = (draw->rasterizer->flatshade && + draw->rasterizer->flatshade_first); + unsigned i; + LOCAL_VARS + + if (0) debug_printf("%s %d\n", __FUNCTION__, count); + + debug_assert(input_prims->primitive_count == 1); + + switch (input_prims->prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < count; i++) { + POINT( so, start + i + 0 ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 0; i+1 < count; i += 2) { + LINE( so , start + i + 0 , start + i + 1 ); + } + break; + + case PIPE_PRIM_LINE_LOOP: + if (count >= 2) { + + for (i = 1; i < count; i++) { + LINE( so, start + i - 1, start + i ); + } + + LINE( so, start + i - 1, start ); + } + break; + + case PIPE_PRIM_LINE_STRIP: + for (i = 1; i < count; i++) { + LINE( so, start + i - 1, start + i ); + } + break; + + case PIPE_PRIM_TRIANGLES: + for (i = 0; i+2 < count; i += 3) { + TRIANGLE( so, start + i + 0, start + i + 1, start + i + 2 ); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + TRIANGLE( so, + start + i + 0, + start + i + 1 + (i&1), + start + i + 2 - (i&1) ); + } + } + else { + for (i = 0; i+2 < count; i++) { + TRIANGLE( so, + start + i + 0 + (i&1), + start + i + 1 - (i&1), + start + i + 2 ); + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (count >= 3) { + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + TRIANGLE( so, + start + i + 1, + start + i + 2, + start ); + } + } + else { + for (i = 0; i+2 < count; i++) { + TRIANGLE( so, + start, + start + i + 1, + start + i + 2 ); + } + } + } + break; + + case PIPE_PRIM_POLYGON: + { + /* These bitflags look a little odd because we submit the + * vertices as (1,2,0) to satisfy flatshade requirements. + */ + + for (i = 0; i+2 < count; i++) { + + if (flatfirst) { + TRIANGLE( so, start + 0, start + i + 1, start + i + 2 ); + } + else { + TRIANGLE( so, start + i + 1, start + i + 2, start + 0 ); + } + } + } + break; + + default: + debug_assert(!"Unsupported primitive in stream output"); + break; + } +} + + +#undef TRIANGLE +#undef POINT +#undef LINE +#undef FUNC diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h index 8d9768246ed..e32803c0720 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.h +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -123,7 +123,7 @@ struct vbuf_render { * Called after writing data to the stream out buffers */ void (*set_stream_output_info)( struct vbuf_render *vbufr, - unsigned buffer_index, + unsigned primitive_count, unsigned vertices_count ); }; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index eb492042387..87e3e72a6e8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -310,21 +310,6 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, } -typedef void (*fetch_func)(float *, const uint8_t *, unsigned, unsigned); - -/** cast wrapper */ -static void * -fetch_func_ptr_to_voidptr(fetch_func f) -{ - union { - void *v; - fetch_func f; - } u; - u.f = f; - return u.v; -} - - /** * Fetch a pixel into a 4 float AoS. * @@ -406,7 +391,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, assert(LLVMIsDeclaration(function)); LLVMAddGlobalMapping(lp_build_engine, function, - fetch_func_ptr_to_voidptr(format_desc->fetch_rgba_float)); + func_to_pointer((func_pointer)format_desc->fetch_rgba_float)); } tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), ""); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 0a690ea7476..44cfdc4d3fb 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -78,6 +78,9 @@ enum LLVM_CodeGenOpt_Level { extern void lp_register_oprofile_jit_event_listener(LLVMExecutionEngineRef EE); +extern void +lp_set_target_options(void); + void lp_build_init(void) @@ -86,6 +89,8 @@ lp_build_init(void) gallivm_debug = debug_get_flags_option("GALLIVM_DEBUG", lp_bld_debug_flags, 0 ); #endif + lp_set_target_options(); + LLVMInitializeNativeTarget(); LLVMLinkInJIT(); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index f004c0ae451..5a9488b5f79 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -36,6 +36,7 @@ #include <llvm-c/Core.h> #include <llvm-c/ExecutionEngine.h> +#include <llvm/Target/TargetOptions.h> #include <llvm/ExecutionEngine/ExecutionEngine.h> #include <llvm/ExecutionEngine/JITEventListener.h> @@ -119,3 +120,25 @@ lp_register_oprofile_jit_event_listener(LLVMExecutionEngineRef EE) { llvm::unwrap(EE)->RegisterJITEventListener(llvm::createOProfileJITEventListener()); } + + +extern "C" void +lp_set_target_options(void) +{ +#if defined(DEBUG) +#if HAVE_LLVM >= 0x0207 + llvm::JITEmitDebugInfo = true; +#endif +#endif + +#if defined(DEBUG) || defined(PROFILE) + llvm::NoFramePointerElim = true; +#endif + + llvm::NoExcessFPPrecision = false; + + /* XXX: Investigate this */ +#if 0 + llvm::UnsafeFPMath = true; +#endif +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 0890078cd05..6dbedf15ca8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -164,6 +164,7 @@ tgsi_default_full_declaration( void ) full_declaration.Declaration = tgsi_default_declaration(); full_declaration.Range = tgsi_default_declaration_range(); full_declaration.Semantic = tgsi_default_declaration_semantic(); + full_declaration.ImmediateData.u = NULL; return full_declaration; } @@ -180,7 +181,7 @@ tgsi_build_full_declaration( struct tgsi_declaration_range *dr; if( maxsize <= size ) - return 0; + return 0; declaration = (struct tgsi_declaration *) &tokens[size]; size++; @@ -235,6 +236,24 @@ tgsi_build_full_declaration( header ); } + if (full_decl->Declaration.File == TGSI_FILE_IMMEDIATE_ARRAY) { + unsigned i, j; + union tgsi_immediate_data *data; + + for (i = 0; i <= dr->Last; ++i) { + for (j = 0; j < 4; ++j) { + unsigned idx = i*4 + j; + if (maxsize <= size) + return 0; + data = (union tgsi_immediate_data *) &tokens[size]; + ++size; + + *data = full_decl->ImmediateData.u[idx]; + declaration_grow( declaration, header ); + } + } + } + return size; } @@ -613,6 +632,7 @@ tgsi_build_full_instruction( reg->Register.File, reg->Register.WriteMask, reg->Register.Indirect, + reg->Register.Dimension, reg->Register.Index, instruction, header ); @@ -640,6 +660,46 @@ tgsi_build_full_instruction( instruction, header ); } + + if( reg->Register.Dimension ) { + struct tgsi_dimension *dim; + + assert( !reg->Dimension.Dimension ); + + if( maxsize <= size ) + return 0; + dim = (struct tgsi_dimension *) &tokens[size]; + size++; + + *dim = tgsi_build_dimension( + reg->Dimension.Indirect, + reg->Dimension.Index, + instruction, + header ); + + if( reg->Dimension.Indirect ) { + struct tgsi_src_register *ind; + + if( maxsize <= size ) + return 0; + ind = (struct tgsi_src_register *) &tokens[size]; + size++; + + *ind = tgsi_build_src_register( + reg->DimIndirect.File, + reg->DimIndirect.SwizzleX, + reg->DimIndirect.SwizzleY, + reg->DimIndirect.SwizzleZ, + reg->DimIndirect.SwizzleW, + reg->DimIndirect.Negate, + reg->DimIndirect.Absolute, + reg->DimIndirect.Indirect, + reg->DimIndirect.Dimension, + reg->DimIndirect.Index, + instruction, + header ); + } + } } for( i = 0; i < full_inst->Instruction.NumSrcRegs; i++ ) { @@ -959,6 +1019,7 @@ tgsi_build_dst_register( unsigned file, unsigned mask, unsigned indirect, + unsigned dimension, int index, struct tgsi_instruction *instruction, struct tgsi_header *header ) @@ -974,6 +1035,7 @@ tgsi_build_dst_register( dst_register.WriteMask = mask; dst_register.Index = index; dst_register.Indirect = indirect; + dst_register.Dimension = dimension; instruction_grow( instruction, header ); @@ -987,6 +1049,8 @@ tgsi_default_full_dst_register( void ) full_dst_register.Register = tgsi_default_dst_register(); full_dst_register.Indirect = tgsi_default_src_register(); + full_dst_register.Dimension = tgsi_default_dimension(); + full_dst_register.DimIndirect = tgsi_default_src_register(); return full_dst_register; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h index 13d7f5272d6..112107a0881 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -263,6 +263,7 @@ tgsi_build_dst_register( unsigned file, unsigned mask, unsigned indirect, + unsigned dimension, int index, struct tgsi_instruction *instruction, struct tgsi_header *header ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 35480076edf..9fcc28f4c96 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -101,7 +101,9 @@ static const char *file_names[TGSI_FILE_COUNT] = "ADDR", "IMM", "PRED", - "SV" + "SV", + "IMMX", + "TEMPX" }; static const char *interpolate_names[] = @@ -191,29 +193,30 @@ static const char *fs_coord_pixel_center_names[] = static void -_dump_register_dst( - struct dump_ctx *ctx, - uint file, - int index) -{ - ENM( file, file_names ); - - CHR( '[' ); - SID( index ); - CHR( ']' ); -} - - -static void _dump_register_src( struct dump_ctx *ctx, const struct tgsi_full_src_register *src ) { ENM(src->Register.File, file_names); if (src->Register.Dimension) { - CHR('['); - SID(src->Dimension.Index); - CHR(']'); + if (src->Dimension.Indirect) { + CHR( '[' ); + ENM( src->DimIndirect.File, file_names ); + CHR( '[' ); + SID( src->DimIndirect.Index ); + TXT( "]." ); + ENM( src->DimIndirect.SwizzleX, swizzle_names ); + if (src->Dimension.Index != 0) { + if (src->Dimension.Index > 0) + CHR( '+' ); + SID( src->Dimension.Index ); + } + CHR( ']' ); + } else { + CHR('['); + SID(src->Dimension.Index); + CHR(']'); + } } if (src->Register.Indirect) { CHR( '[' ); @@ -235,30 +238,52 @@ _dump_register_src( } } + static void -_dump_register_ind( +_dump_register_dst( struct dump_ctx *ctx, - uint file, - int index, - uint ind_file, - int ind_index, - uint ind_swizzle ) + const struct tgsi_full_dst_register *dst ) { - ENM( file, file_names ); - CHR( '[' ); - ENM( ind_file, file_names ); - CHR( '[' ); - SID( ind_index ); - TXT( "]." ); - ENM( ind_swizzle, swizzle_names ); - if (index != 0) { - if (index > 0) - CHR( '+' ); - SID( index ); + ENM(dst->Register.File, file_names); + if (dst->Register.Dimension) { + if (dst->Dimension.Indirect) { + CHR( '[' ); + ENM( dst->DimIndirect.File, file_names ); + CHR( '[' ); + SID( dst->DimIndirect.Index ); + TXT( "]." ); + ENM( dst->DimIndirect.SwizzleX, swizzle_names ); + if (dst->Dimension.Index != 0) { + if (dst->Dimension.Index > 0) + CHR( '+' ); + SID( dst->Dimension.Index ); + } + CHR( ']' ); + } else { + CHR('['); + SID(dst->Dimension.Index); + CHR(']'); + } + } + if (dst->Register.Indirect) { + CHR( '[' ); + ENM( dst->Indirect.File, file_names ); + CHR( '[' ); + SID( dst->Indirect.Index ); + TXT( "]." ); + ENM( dst->Indirect.SwizzleX, swizzle_names ); + if (dst->Register.Index != 0) { + if (dst->Register.Index > 0) + CHR( '+' ); + SID( dst->Register.Index ); + } + CHR( ']' ); + } else { + CHR( '[' ); + SID( dst->Register.Index ); + CHR( ']' ); } - CHR( ']' ); } - static void _dump_writemask( struct dump_ctx *ctx, @@ -277,6 +302,39 @@ _dump_writemask( } } +static void +dump_imm_data(struct tgsi_iterate_context *iter, + union tgsi_immediate_data *data, + unsigned num_tokens, + unsigned data_type) +{ + struct dump_ctx *ctx = (struct dump_ctx *)iter; + unsigned i ; + + TXT( " {" ); + + assert( num_tokens <= 4 ); + for (i = 0; i < num_tokens; i++) { + switch (data_type) { + case TGSI_IMM_FLOAT32: + FLT( data[i].Float ); + break; + case TGSI_IMM_UINT32: + UID(data[i].Uint); + break; + case TGSI_IMM_INT32: + SID(data[i].Int); + break; + default: + assert( 0 ); + } + + if (i < num_tokens - 1) + TXT( ", " ); + } + TXT( "}" ); +} + static boolean iter_declaration( struct tgsi_iterate_context *iter, @@ -357,6 +415,43 @@ iter_declaration( } } + if (decl->Declaration.File == TGSI_FILE_IMMEDIATE_ARRAY) { + unsigned i; + char range_indent[4]; + + TXT(" {"); + + if (decl->Range.Last < 10) + range_indent[0] = '\0'; + else if (decl->Range.Last < 100) { + range_indent[0] = ' '; + range_indent[1] = '\0'; + } else if (decl->Range.Last < 1000) { + range_indent[0] = ' '; + range_indent[1] = ' '; + range_indent[2] = '\0'; + } else { + range_indent[0] = ' '; + range_indent[1] = ' '; + range_indent[2] = ' '; + range_indent[3] = '\0'; + } + + dump_imm_data(iter, decl->ImmediateData.u, + 4, TGSI_IMM_FLOAT32); + for(i = 1; i <= decl->Range.Last; ++i) { + /* indent by strlen of: + * "DCL IMMX[0..1] {" */ + CHR('\n'); + TXT( " " ); + TXT( range_indent ); + dump_imm_data(iter, decl->ImmediateData.u + i, + 4, TGSI_IMM_FLOAT32); + } + + TXT(" }"); + } + EOL(); return TRUE; @@ -430,33 +525,11 @@ iter_immediate( { struct dump_ctx *ctx = (struct dump_ctx *) iter; - uint i; - TXT( "IMM " ); ENM( imm->Immediate.DataType, immediate_type_names ); - TXT( " { " ); - - assert( imm->Immediate.NrTokens <= 4 + 1 ); - for (i = 0; i < imm->Immediate.NrTokens - 1; i++) { - switch (imm->Immediate.DataType) { - case TGSI_IMM_FLOAT32: - FLT( imm->u[i].Float ); - break; - case TGSI_IMM_UINT32: - UID(imm->u[i].Uint); - break; - case TGSI_IMM_INT32: - SID(imm->u[i].Int); - break; - default: - assert( 0 ); - } - - if (i < imm->Immediate.NrTokens - 2) - TXT( ", " ); - } - TXT( " }" ); + dump_imm_data(iter, imm->u, imm->Immediate.NrTokens - 1, + imm->Immediate.DataType); EOL(); @@ -487,12 +560,12 @@ iter_instruction( INSTID( instno ); TXT( ": " ); - + ctx->indent -= info->pre_dedent; for(i = 0; (int)i < ctx->indent; ++i) TXT( " " ); ctx->indent += info->post_indent; - + if (inst->Instruction.Predicate) { CHR( '(' ); @@ -539,21 +612,7 @@ iter_instruction( CHR( ',' ); CHR( ' ' ); - if (dst->Register.Indirect) { - _dump_register_ind( - ctx, - dst->Register.File, - dst->Register.Index, - dst->Indirect.File, - dst->Indirect.Index, - dst->Indirect.SwizzleX ); - } - else { - _dump_register_dst( - ctx, - dst->Register.File, - dst->Register.Index ); - } + _dump_register_dst( ctx, dst ); _dump_writemask( ctx, dst->Register.WriteMask ); first_reg = FALSE; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index c15d970b573..5275faa5e22 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -699,6 +699,19 @@ tgsi_exec_machine_bind_shader( ++mach->NumOutputs; } } + if (parse.FullToken.FullDeclaration.Declaration.File == + TGSI_FILE_IMMEDIATE_ARRAY) { + unsigned reg; + struct tgsi_full_declaration *decl = + &parse.FullToken.FullDeclaration; + debug_assert(decl->Range.Last < TGSI_EXEC_NUM_IMMEDIATES); + for (reg = decl->Range.First; reg <= decl->Range.Last; ++reg) { + for( i = 0; i < 4; i++ ) { + int idx = reg * 4 + i; + mach->ImmArray[reg][i] = decl->ImmediateData.u[idx].Float; + } + } + } memcpy(declarations + numDeclarations, &parse.FullToken.FullDeclaration, sizeof(declarations[0])); @@ -1046,8 +1059,15 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, case TGSI_FILE_INPUT: case TGSI_FILE_SYSTEM_VALUE: for (i = 0; i < QUAD_SIZE; i++) { - /* XXX: 2D indexing */ - chan->u[i] = mach->Inputs[index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]].xyzw[swizzle].u[i]; + /* + if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { + debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n", + index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i], + index2D->i[i], index->i[i]); + }*/ + chan->u[i] = mach->Inputs[index2D->i[i] * + TGSI_EXEC_MAX_INPUT_ATTRIBS + + index->i[i]].xyzw[swizzle].u[i]; } break; @@ -1060,6 +1080,16 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, } break; + case TGSI_FILE_TEMPORARY_ARRAY: + for (i = 0; i < QUAD_SIZE; i++) { + assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); + assert(index2D->i[i] < TGSI_EXEC_NUM_TEMP_ARRAYS); + + chan->u[i] = + mach->TempArray[index2D->i[i]][index->i[i]].xyzw[swizzle].u[i]; + } + break; + case TGSI_FILE_IMMEDIATE: for (i = 0; i < QUAD_SIZE; i++) { assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit); @@ -1069,6 +1099,14 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, } break; + case TGSI_FILE_IMMEDIATE_ARRAY: + for (i = 0; i < QUAD_SIZE; i++) { + assert(index2D->i[i] == 0); + + chan->f[i] = mach->ImmArray[index->i[i]][swizzle]; + } + break; + case TGSI_FILE_ADDRESS: for (i = 0; i < QUAD_SIZE; i++) { assert(index->i[i] >= 0); @@ -1280,6 +1318,7 @@ store_dest(struct tgsi_exec_machine *mach, uint i; union tgsi_exec_channel null; union tgsi_exec_channel *dst; + union tgsi_exec_channel index2D; uint execmask = mach->ExecMask; int offset = 0; /* indirection offset */ int index; @@ -1325,6 +1364,77 @@ store_dest(struct tgsi_exec_machine *mach, offset = indir_index.i[0]; } + /* There is an extra source register that is a second + * subscript to a register file. Effectively it means that + * the register file is actually a 2D array of registers. + * + * file[3][1], + * where: + * [3] = Dimension.Index + */ + if (reg->Register.Dimension) { + index2D.i[0] = + index2D.i[1] = + index2D.i[2] = + index2D.i[3] = reg->Dimension.Index; + + /* Again, the second subscript index can be addressed indirectly + * identically to the first one. + * Nothing stops us from indirectly addressing the indirect register, + * but there is no need for that, so we won't exercise it. + * + * file[ind[4].y+3][1], + * where: + * ind = DimIndirect.File + * [4] = DimIndirect.Index + * .y = DimIndirect.SwizzleX + */ + if (reg->Dimension.Indirect) { + union tgsi_exec_channel index2; + union tgsi_exec_channel indir_index; + const uint execmask = mach->ExecMask; + unsigned swizzle; + uint i; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->DimIndirect.Index; + + swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); + fetch_src_file_channel(mach, + reg->DimIndirect.File, + swizzle, + &index2, + &ZeroVec, + &indir_index); + + index2D.i[0] += indir_index.i[0]; + index2D.i[1] += indir_index.i[1]; + index2D.i[2] += indir_index.i[2]; + index2D.i[3] += indir_index.i[3]; + + /* for disabled execution channels, zero-out the index to + * avoid using a potential garbage value. + */ + for (i = 0; i < QUAD_SIZE; i++) { + if ((execmask & (1 << i)) == 0) { + index2D.i[i] = 0; + } + } + } + + /* If by any chance there was a need for a 3D array of register + * files, we would have to check whether Dimension is followed + * by a dimension register and continue the saga. + */ + } else { + index2D.i[0] = + index2D.i[1] = + index2D.i[2] = + index2D.i[3] = 0; + } + switch (reg->Register.File) { case TGSI_FILE_NULL: dst = &null; @@ -1351,6 +1461,16 @@ store_dest(struct tgsi_exec_machine *mach, dst = &mach->Temps[offset + index].xyzw[chan_index]; break; + case TGSI_FILE_TEMPORARY_ARRAY: + index = reg->Register.Index; + assert( index < TGSI_EXEC_NUM_TEMPS ); + assert( index2D.i[0] < TGSI_EXEC_NUM_TEMP_ARRAYS ); + /* XXX we use index2D.i[0] here but somehow we might + * end up with someone trying to store indirectly in + * different buffers */ + dst = &mach->TempArray[index2D.i[0]][offset + index].xyzw[chan_index]; + break; + case TGSI_FILE_ADDRESS: index = reg->Register.Index; dst = &mach->Addrs[index].xyzw[chan_index]; @@ -1536,6 +1656,19 @@ emit_primitive(struct tgsi_exec_machine *mach) } } +static void +conditional_emit_primitive(struct tgsi_exec_machine *mach) +{ + if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { + int emitted_verts = + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]; + if (emitted_verts) { + emit_primitive(mach); + } + } +} + + /* * Fetch four texture samples using STR texture coordinates. */ @@ -3185,6 +3318,9 @@ exec_instruction( break; case TGSI_OPCODE_END: + /* make sure we end primitives which haven't + * been explicitly emitted */ + conditional_emit_primitive(mach); /* halt execution */ *pc = -1; break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 3caf820af67..ccf80ca6fd9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -93,6 +93,7 @@ struct tgsi_sampler #define TGSI_EXEC_NUM_TEMPS 128 #define TGSI_EXEC_NUM_IMMEDIATES 256 +#define TGSI_EXEC_NUM_TEMP_ARRAYS 8 /* * Locations of various utility registers (_I = Index, _C = Channel) @@ -237,9 +238,12 @@ struct tgsi_exec_machine */ struct tgsi_exec_vector Temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; + struct tgsi_exec_vector TempArray[TGSI_EXEC_NUM_TEMP_ARRAYS][TGSI_EXEC_NUM_TEMPS]; float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; + float ImmArray[TGSI_EXEC_NUM_IMMEDIATES][4]; + struct tgsi_exec_vector Inputs[TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS]; struct tgsi_exec_vector Outputs[TGSI_MAX_TOTAL_VERTICES]; diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index 7e19e1fe36f..db9a3422203 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -117,6 +117,17 @@ tgsi_parse_token( next_token( ctx, &decl->Semantic ); } + if (decl->Declaration.File == TGSI_FILE_IMMEDIATE_ARRAY) { + unsigned i, j; + decl->ImmediateData.u = (union tgsi_immediate_data*) + &ctx->Tokens[ctx->Position]; + for (i = 0; i <= decl->Range.Last; ++i) { + for (j = 0; j < 4; ++j) { + ctx->Position++; + } + } + } + break; } @@ -181,11 +192,6 @@ tgsi_parse_token( next_token( ctx, &inst->Dst[i].Register ); - /* - * No support for indirect or multi-dimensional addressing. - */ - assert( !inst->Dst[i].Register.Dimension ); - if( inst->Dst[i].Register.Indirect ) { next_token( ctx, &inst->Dst[i].Indirect ); @@ -195,6 +201,24 @@ tgsi_parse_token( assert( !inst->Dst[i].Indirect.Dimension ); assert( !inst->Dst[i].Indirect.Indirect ); } + if( inst->Dst[i].Register.Dimension ) { + next_token( ctx, &inst->Dst[i].Dimension ); + + /* + * No support for multi-dimensional addressing. + */ + assert( !inst->Dst[i].Dimension.Dimension ); + + if( inst->Dst[i].Dimension.Indirect ) { + next_token( ctx, &inst->Dst[i].DimIndirect ); + + /* + * No support for indirect or multi-dimensional addressing. + */ + assert( !inst->Dst[i].Indirect.Indirect ); + assert( !inst->Dst[i].Indirect.Dimension ); + } + } } assert( inst->Instruction.NumSrcRegs <= TGSI_FULL_MAX_SRC_REGISTERS ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h index b45ccee2f63..36de8807b44 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -44,6 +44,8 @@ struct tgsi_full_dst_register { struct tgsi_dst_register Register; struct tgsi_src_register Indirect; + struct tgsi_dimension Dimension; + struct tgsi_src_register DimIndirect; }; struct tgsi_full_src_register @@ -54,12 +56,18 @@ struct tgsi_full_src_register struct tgsi_src_register DimIndirect; }; +struct tgsi_immediate_array_data +{ + union tgsi_immediate_data *u; +}; + struct tgsi_full_declaration { struct tgsi_declaration Declaration; struct tgsi_declaration_range Range; struct tgsi_declaration_dimension Dim; struct tgsi_declaration_semantic Semantic; + struct tgsi_immediate_array_data ImmediateData; }; struct tgsi_full_immediate diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index ce0a92f7fb3..97148dbe233 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -90,9 +90,18 @@ static void scan_register_dst(scan_register *reg, struct tgsi_full_dst_register *dst) { - fill_scan_register1d(reg, - dst->Register.File, - dst->Register.Index); + if (dst->Register.Dimension) { + /*FIXME: right now we don't support indirect + * multidimensional addressing */ + fill_scan_register2d(reg, + dst->Register.File, + dst->Register.Index, + dst->Dimension.Index); + } else { + fill_scan_register1d(reg, + dst->Register.File, + dst->Register.Index); + } } static void @@ -102,7 +111,6 @@ scan_register_src(scan_register *reg, if (src->Register.Dimension) { /*FIXME: right now we don't support indirect * multidimensional addressing */ - debug_assert(!src->Dimension.Indirect); fill_scan_register2d(reg, src->Register.File, src->Register.Index, @@ -236,7 +244,9 @@ static const char *file_names[TGSI_FILE_COUNT] = "ADDR", "IMM", "PRED", - "SV" + "SV", + "IMMX", + "TEMPX" }; static boolean diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 527b7d7b226..55fccba4d8c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -280,7 +280,9 @@ static const char *file_names[TGSI_FILE_COUNT] = "ADDR", "IMM", "PRED", - "SV" + "SV", + "IMMX", + "TEMPX" }; static boolean @@ -345,12 +347,68 @@ parse_opt_writemask( return TRUE; } + +/* <register_file_bracket> ::= <file> `[' + */ static boolean -parse_register_dst( struct translate_ctx *ctx, - uint *file, - int *index ); +parse_register_file_bracket( + struct translate_ctx *ctx, + uint *file ) +{ + if (!parse_file( &ctx->cur, file )) { + report_error( ctx, "Unknown register file" ); + return FALSE; + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '[') { + report_error( ctx, "Expected `['" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} + +/* <register_file_bracket_index> ::= <register_file_bracket> <uint> + */ +static boolean +parse_register_file_bracket_index( + struct translate_ctx *ctx, + uint *file, + int *index ) +{ + uint uindex; -struct parsed_src_bracket { + if (!parse_register_file_bracket( ctx, file )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (!parse_uint( &ctx->cur, &uindex )) { + report_error( ctx, "Expected literal unsigned integer" ); + return FALSE; + } + *index = (int) uindex; + return TRUE; +} + +/* Parse simple 1d register operand. + * <register_dst> ::= <register_file_bracket_index> `]' + */ +static boolean +parse_register_1d(struct translate_ctx *ctx, + uint *file, + int *index ) +{ + if (!parse_register_file_bracket_index( ctx, file, index )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ']') { + report_error( ctx, "Expected `]'" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} + +struct parsed_bracket { int index; uint ind_file; @@ -360,21 +418,21 @@ struct parsed_src_bracket { static boolean -parse_register_src_bracket( +parse_register_bracket( struct translate_ctx *ctx, - struct parsed_src_bracket *brackets) + struct parsed_bracket *brackets) { const char *cur; uint uindex; - memset(brackets, 0, sizeof(struct parsed_src_bracket)); + memset(brackets, 0, sizeof(struct parsed_bracket)); eat_opt_white( &ctx->cur ); cur = ctx->cur; if (parse_file( &cur, &brackets->ind_file )) { - if (!parse_register_dst( ctx, &brackets->ind_file, - &brackets->ind_index )) + if (!parse_register_1d( ctx, &brackets->ind_file, + &brackets->ind_index )) return FALSE; eat_opt_white( &ctx->cur ); @@ -443,7 +501,7 @@ parse_register_src_bracket( static boolean parse_opt_register_src_bracket( struct translate_ctx *ctx, - struct parsed_src_bracket *brackets, + struct parsed_bracket *brackets, int *parsed_brackets) { const char *cur = ctx->cur; @@ -455,7 +513,7 @@ parse_opt_register_src_bracket( ++cur; ctx->cur = cur; - if (!parse_register_src_bracket(ctx, brackets)) + if (!parse_register_bracket(ctx, brackets)) return FALSE; *parsed_brackets = 1; @@ -464,46 +522,6 @@ parse_opt_register_src_bracket( return TRUE; } -/* <register_file_bracket> ::= <file> `[' - */ -static boolean -parse_register_file_bracket( - struct translate_ctx *ctx, - uint *file ) -{ - if (!parse_file( &ctx->cur, file )) { - report_error( ctx, "Unknown register file" ); - return FALSE; - } - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '[') { - report_error( ctx, "Expected `['" ); - return FALSE; - } - ctx->cur++; - return TRUE; -} - -/* <register_file_bracket_index> ::= <register_file_bracket> <uint> - */ -static boolean -parse_register_file_bracket_index( - struct translate_ctx *ctx, - uint *file, - int *index ) -{ - uint uindex; - - if (!parse_register_file_bracket( ctx, file )) - return FALSE; - eat_opt_white( &ctx->cur ); - if (!parse_uint( &ctx->cur, &uindex )) { - report_error( ctx, "Expected literal unsigned integer" ); - return FALSE; - } - *index = (int) uindex; - return TRUE; -} /* Parse source register operand. * <register_src> ::= <register_file_bracket_index> `]' | @@ -515,13 +533,12 @@ static boolean parse_register_src( struct translate_ctx *ctx, uint *file, - struct parsed_src_bracket *brackets) + struct parsed_bracket *brackets) { - brackets->ind_comp = TGSI_SWIZZLE_X; if (!parse_register_file_bracket( ctx, file )) return FALSE; - if (!parse_register_src_bracket( ctx, brackets )) + if (!parse_register_bracket( ctx, brackets )) return FALSE; return TRUE; @@ -629,23 +646,19 @@ parse_register_dcl( } -/* Parse destination register operand. - * <register_dst> ::= <register_file_bracket_index> `]' - */ +/* Parse destination register operand.*/ static boolean parse_register_dst( struct translate_ctx *ctx, uint *file, - int *index ) + struct parsed_bracket *brackets) { - if (!parse_register_file_bracket_index( ctx, file, index )) - return FALSE; - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ']') { - report_error( ctx, "Expected `]'" ); + brackets->ind_comp = TGSI_SWIZZLE_X; + if (!parse_register_file_bracket( ctx, file )) return FALSE; - } - ctx->cur++; + if (!parse_register_bracket( ctx, brackets )) + return FALSE; + return TRUE; } @@ -655,11 +668,14 @@ parse_dst_operand( struct tgsi_full_dst_register *dst ) { uint file; - int index; uint writemask; const char *cur; + struct parsed_bracket bracket[2]; + int parsed_opt_brackets; - if (!parse_register_dst( ctx, &file, &index )) + if (!parse_register_dst( ctx, &file, &bracket[0] )) + return FALSE; + if (!parse_opt_register_src_bracket(ctx, &bracket[1], &parsed_opt_brackets)) return FALSE; cur = ctx->cur; @@ -669,8 +685,24 @@ parse_dst_operand( return FALSE; dst->Register.File = file; - dst->Register.Index = index; + if (parsed_opt_brackets) { + dst->Register.Dimension = 1; + dst->Dimension.Indirect = 0; + dst->Dimension.Dimension = 0; + dst->Dimension.Index = bracket[0].index; + bracket[0] = bracket[1]; + } + dst->Register.Index = bracket[0].index; dst->Register.WriteMask = writemask; + if (bracket[0].ind_file != TGSI_FILE_NULL) { + dst->Register.Indirect = 1; + dst->Indirect.File = bracket[0].ind_file; + dst->Indirect.Index = bracket[0].ind_index; + dst->Indirect.SwizzleX = bracket[0].ind_comp; + dst->Indirect.SwizzleY = bracket[0].ind_comp; + dst->Indirect.SwizzleZ = bracket[0].ind_comp; + dst->Indirect.SwizzleW = bracket[0].ind_comp; + } return TRUE; } @@ -719,7 +751,7 @@ parse_src_operand( uint file; uint swizzle[4]; boolean parsed_swizzle; - struct parsed_src_bracket bracket[2]; + struct parsed_bracket bracket[2]; int parsed_opt_brackets; if (*ctx->cur == '-') { @@ -835,7 +867,7 @@ parse_instruction( inst.Predicate.Negate = 1; } - if (!parse_register_dst( ctx, &file, &index )) + if (!parse_register_1d( ctx, &file, &index )) return FALSE; if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle )) { @@ -985,6 +1017,45 @@ static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] = "PERSPECTIVE" }; + +/* parses a 4-touple of the form {x, y, z, w} + * where x, y, z, w are numbers */ +static boolean parse_immediate_data(struct translate_ctx *ctx, + float *values) +{ + unsigned i; + + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '{') { + report_error( ctx, "Expected `{'" ); + return FALSE; + } + ctx->cur++; + for (i = 0; i < 4; i++) { + eat_opt_white( &ctx->cur ); + if (i > 0) { + if (*ctx->cur != ',') { + report_error( ctx, "Expected `,'" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + } + if (!parse_float( &ctx->cur, &values[i] )) { + report_error( ctx, "Expected literal floating point" ); + return FALSE; + } + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '}') { + report_error( ctx, "Expected `}'" ); + return FALSE; + } + ctx->cur++; + + return TRUE; +} + static boolean parse_declaration( struct translate_ctx *ctx ) { struct tgsi_full_declaration decl; @@ -995,6 +1066,7 @@ static boolean parse_declaration( struct translate_ctx *ctx ) const char *cur; uint advance; boolean is_vs_input; + boolean is_imm_array; assert(Elements(semantic_names) == TGSI_SEMANTIC_COUNT); assert(Elements(interpolate_names) == TGSI_INTERPOLATE_COUNT); @@ -1023,8 +1095,9 @@ static boolean parse_declaration( struct translate_ctx *ctx ) decl.Dim.Index2D = brackets[0].first; } - is_vs_input = (file == TGSI_FILE_INPUT && + is_vs_input = (file == TGSI_FILE_INPUT && ctx->processor == TGSI_PROCESSOR_VERTEX); + is_imm_array = (file == TGSI_FILE_IMMEDIATE_ARRAY); cur = ctx->cur; eat_opt_white( &cur ); @@ -1067,6 +1140,44 @@ static boolean parse_declaration( struct translate_ctx *ctx ) break; } } + } else if (is_imm_array) { + unsigned i; + float *vals_itr; + /* we have our immediate data */ + if (*cur != '{') { + report_error( ctx, "Immediate array without data" ); + return FALSE; + } + ++cur; + ctx->cur = cur; + + decl.ImmediateData.u = + MALLOC(sizeof(union tgsi_immediate_data) * 4 * + (decl.Range.Last + 1)); + vals_itr = (float*)decl.ImmediateData.u; + for (i = 0; i <= decl.Range.Last; ++i) { + if (!parse_immediate_data(ctx, vals_itr)) { + FREE(decl.ImmediateData.u); + return FALSE; + } + vals_itr += 4; + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ',') { + if (i != decl.Range.Last) { + report_error( ctx, "Not enough data in immediate array!" ); + FREE(decl.ImmediateData.u); + return FALSE; + } + } else + ++ctx->cur; + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '}') { + FREE(decl.ImmediateData.u); + report_error( ctx, "Immediate array data missing closing '}'" ); + return FALSE; + } + ++ctx->cur; } cur = ctx->cur; @@ -1097,6 +1208,10 @@ static boolean parse_declaration( struct translate_ctx *ctx ) ctx->tokens_cur, ctx->header, (uint) (ctx->tokens_end - ctx->tokens_cur) ); + + if (is_imm_array) + FREE(decl.ImmediateData.u); + if (advance == 0) return FALSE; ctx->tokens_cur += advance; @@ -1107,7 +1222,6 @@ static boolean parse_declaration( struct translate_ctx *ctx ) static boolean parse_immediate( struct translate_ctx *ctx ) { struct tgsi_full_immediate imm; - uint i; float values[4]; uint advance; @@ -1115,37 +1229,13 @@ static boolean parse_immediate( struct translate_ctx *ctx ) report_error( ctx, "Syntax error" ); return FALSE; } - if (!str_match_no_case( &ctx->cur, "FLT32" ) || is_digit_alpha_underscore( ctx->cur )) { + if (!str_match_no_case( &ctx->cur, "FLT32" ) || + is_digit_alpha_underscore( ctx->cur )) { report_error( ctx, "Expected `FLT32'" ); return FALSE; } - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '{') { - report_error( ctx, "Expected `{'" ); - return FALSE; - } - ctx->cur++; - for (i = 0; i < 4; i++) { - eat_opt_white( &ctx->cur ); - if (i > 0) { - if (*ctx->cur != ',') { - report_error( ctx, "Expected `,'" ); - return FALSE; - } - ctx->cur++; - eat_opt_white( &ctx->cur ); - } - if (!parse_float( &ctx->cur, &values[i] )) { - report_error( ctx, "Expected literal floating point" ); - return FALSE; - } - } - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '}') { - report_error( ctx, "Expected `}'" ); - return FALSE; - } - ctx->cur++; + + parse_immediate_data(ctx, values); imm = tgsi_default_full_immediate(); imm.Immediate.NrTokens += 4; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 7d357e154b3..3cf6893a9b9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -732,11 +732,12 @@ ureg_DECL_immediate_int( struct ureg_program *ureg, } -void +void ureg_emit_src( struct ureg_program *ureg, struct ureg_src src ) { - unsigned size = 1 + (src.Indirect ? 1 : 0) + (src.Dimension ? 1 : 0); + unsigned size = 1 + (src.Indirect ? 1 : 0) + + (src.Dimension ? (src.DimIndirect ? 2 : 1) : 0); union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); unsigned n = 0; @@ -769,11 +770,27 @@ ureg_emit_src( struct ureg_program *ureg, } if (src.Dimension) { - out[0].src.Dimension = 1; - out[n].dim.Indirect = 0; - out[n].dim.Dimension = 0; - out[n].dim.Padding = 0; - out[n].dim.Index = src.DimensionIndex; + if (src.DimIndirect) { + out[0].src.Dimension = 1; + out[n].dim.Indirect = 1; + out[n].dim.Dimension = 0; + out[n].dim.Padding = 0; + out[n].dim.Index = src.DimensionIndex; + n++; + out[n].value = 0; + out[n].src.File = src.DimIndFile; + out[n].src.SwizzleX = src.DimIndSwizzle; + out[n].src.SwizzleY = src.DimIndSwizzle; + out[n].src.SwizzleZ = src.DimIndSwizzle; + out[n].src.SwizzleW = src.DimIndSwizzle; + out[n].src.Index = src.DimIndIndex; + } else { + out[0].src.Dimension = 1; + out[n].dim.Indirect = 0; + out[n].dim.Dimension = 0; + out[n].dim.Padding = 0; + out[n].dim.Index = src.DimensionIndex; + } n++; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 055545f3d2a..07fb01ab7b3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -49,14 +49,18 @@ struct ureg_src unsigned SwizzleZ : 2; /* TGSI_SWIZZLE_ */ unsigned SwizzleW : 2; /* TGSI_SWIZZLE_ */ unsigned Indirect : 1; /* BOOL */ + unsigned DimIndirect : 1; /* BOOL */ unsigned Dimension : 1; /* BOOL */ unsigned Absolute : 1; /* BOOL */ unsigned Negate : 1; /* BOOL */ int Index : 16; /* SINT */ - unsigned IndirectFile : 4; /* TGSI_FILE_ */ - int IndirectIndex : 16; /* SINT */ - unsigned IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ - int DimensionIndex : 16; /* SINT */ + unsigned IndirectFile : 4; /* TGSI_FILE_ */ + int IndirectIndex : 16; /* SINT */ + unsigned IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ + int DimensionIndex : 16; /* SINT */ + unsigned DimIndFile : 4; /* TGSI_FILE_ */ + int DimIndIndex : 16; /* SINT */ + unsigned DimIndSwizzle : 2; /* TGSI_SWIZZLE_ */ }; /* Very similar to a tgsi_dst_register, removing unsupported fields @@ -821,12 +825,28 @@ ureg_src_indirect( struct ureg_src reg, struct ureg_src addr ) return reg; } -static INLINE struct ureg_src +static INLINE struct ureg_src ureg_src_dimension( struct ureg_src reg, int index ) { assert(reg.File != TGSI_FILE_NULL); reg.Dimension = 1; + reg.DimIndirect = 0; + reg.DimensionIndex = index; + return reg; +} + + +static INLINE struct ureg_src +ureg_src_dimension_indirect( struct ureg_src reg, struct ureg_src addr, + int index ) +{ + assert(reg.File != TGSI_FILE_NULL); + reg.Dimension = 1; + reg.DimIndirect = 1; reg.DimensionIndex = index; + reg.DimIndFile = addr.File; + reg.DimIndIndex = addr.Index; + reg.DimIndSwizzle = addr.SwizzleX; return reg; } @@ -874,6 +894,10 @@ ureg_src_register(unsigned file, src.Negate = 0; src.Dimension = 0; src.DimensionIndex = 0; + src.DimIndirect = 0; + src.DimIndFile = TGSI_FILE_NULL; + src.DimIndIndex = 0; + src.DimIndSwizzle = 0; return src; } @@ -897,6 +921,10 @@ ureg_src( struct ureg_dst dst ) src.Negate = 0; src.Dimension = 0; src.DimensionIndex = 0; + src.DimIndirect = 0; + src.DimIndFile = TGSI_FILE_NULL; + src.DimIndIndex = 0; + src.DimIndSwizzle = 0; return src; } @@ -944,7 +972,11 @@ ureg_src_undef( void ) src.Negate = 0; src.Dimension = 0; src.DimensionIndex = 0; - + src.DimIndirect = 0; + src.DimIndFile = TGSI_FILE_NULL; + src.DimIndIndex = 0; + src.DimIndSwizzle = 0; + return src; } diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index f8dbd2b36a1..0e43a512ee8 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -378,25 +378,28 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, char *dst = (vert + tg->attrib[attr].output_offset); - if (tg->attrib[attr].instance_divisor) { - index = instance_id / tg->attrib[attr].instance_divisor; - } else { - index = elt; - } + if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { + if (tg->attrib[attr].instance_divisor) { + index = instance_id / tg->attrib[attr].instance_divisor; + } else { + index = elt; + } - index = MIN2(index, tg->attrib[attr].max_index); + index = MIN2(index, tg->attrib[attr].max_index); - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * index; + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * index; - tg->attrib[attr].fetch( data, src, 0, 0 ); + tg->attrib[attr].fetch( data, src, 0, 0 ); + } else { + data[0] = (float)instance_id; + } if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n", i, elt, attr, data[0], data[1], data[2], data[3]); tg->attrib[attr].emit( data, dst ); } - vert += tg->translate.key.output_stride; } } diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 954f5706ef3..5e373ff24c4 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -195,7 +195,7 @@ debug_get_flags_option(const char *name, namealign = MAX2(namealign, strlen(flags->name)); for (flags = orig; flags->name; ++flags) debug_printf("| %*s [0x%0*lx]%s%s\n", namealign, flags->name, - sizeof(unsigned long)*CHAR_BIT/4, flags->value, + (int)sizeof(unsigned long)*CHAR_BIT/4, flags->value, flags->desc ? " " : "", flags->desc ? flags->desc : ""); } else { diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 6370e779865..fe19466436a 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -567,12 +567,26 @@ util_bswap16(uint16_t n) #define MAX3( A, B, C ) MAX2( MAX2( A, B ), C ) +/** + * Align a value, only works pot alignemnts. + */ static INLINE int align(int value, int alignment) { return (value + alignment - 1) & ~(alignment - 1); } +/** + * Works like align but on npot alignments. + */ +static INLINE size_t +util_align_npot(size_t value, size_t alignment) +{ + if (value % alignment) + return value + (alignment - (value % alignment)); + return value; +} + static INLINE unsigned u_minify(unsigned value, unsigned levels) { diff --git a/src/gallium/auxiliary/util/u_pointer.h b/src/gallium/auxiliary/util/u_pointer.h index ae6f43bff87..cce0c7430e7 100644 --- a/src/gallium/auxiliary/util/u_pointer.h +++ b/src/gallium/auxiliary/util/u_pointer.h @@ -111,6 +111,17 @@ pointer_to_func( void *p ) return pf.f; } +static INLINE void * +func_to_pointer( func_pointer f ) +{ + union { + void *p; + func_pointer f; + } pf; + pf.f = f; + return pf.p; +} + #ifdef __cplusplus } diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c index 75d44432d9e..af229e61a00 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.c +++ b/src/gallium/auxiliary/util/u_upload_mgr.c @@ -59,6 +59,8 @@ struct u_upload_mgr *u_upload_create( struct pipe_context *pipe, unsigned usage ) { struct u_upload_mgr *upload = CALLOC_STRUCT( u_upload_mgr ); + if (!upload) + return NULL; upload->pipe = pipe; upload->default_size = default_size; diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst index 0242dedbf3b..4e35a4c4082 100644 --- a/src/gallium/docs/source/context.rst +++ b/src/gallium/docs/source/context.rst @@ -1,3 +1,5 @@ +.. _context: + Context ======= @@ -120,7 +122,7 @@ will be cleared. color value. While it is only possible to clear one surface at a time (which can include several layers), this surface need not be bound to the framebuffer. -``clear_depth_stencil``clears a single depth, stencil or depth/stencil surface +``clear_depth_stencil`` clears a single depth, stencil or depth/stencil surface with the specified depth and stencil values (for combined depth/stencil buffers, is is also possible to only clear one or the other part). While it is only possible to clear one surface at a time (which can include several layers), diff --git a/src/gallium/docs/source/distro.rst b/src/gallium/docs/source/distro.rst index 100afe33972..6ba5a056f45 100644 --- a/src/gallium/docs/source/distro.rst +++ b/src/gallium/docs/source/distro.rst @@ -10,69 +10,100 @@ Drivers Cell ^^^^ +Simple driver for the IBM Cell architecture. Runs faster than :ref:`softpipe` +on Cell-based machines. + Failover ^^^^^^^^ -Deprecated. +Broken and deprecated. Intel i915 ^^^^^^^^^^ +Driver for Intel i915 and i945 chipsets. + Intel i965 ^^^^^^^^^^ -Highly experimental. +Highly experimental driver for Intel i965 chipsets. Identity ^^^^^^^^ -Wrapper driver. +Wrapper driver. The identity driver is a simple skeleton that passes through +all of its :ref:`Context` and :ref:`Screen` methods to an underlying Context +and Screen, and as such, it is an excellent starting point for new drivers. LLVM Softpipe ^^^^^^^^^^^^^ -nVidia nv30 -^^^^^^^^^^^ +A version of :ref:`softpipe` that uses the Low-Level Virtual Machine to +dynamically generate optimized rasterizing pipelines. -nVidia nv40 +nVidia nvfx ^^^^^^^^^^^ +Driver for the nVidia nv30 and nv40 families of GPUs. + nVidia nv50 ^^^^^^^^^^^ +Driver for the nVidia nv50 family of GPUs. + VMWare SVGA ^^^^^^^^^^^ +Driver for VMWare virtualized guest operating system graphics processing. + ATI r300 ^^^^^^^^ -Testing-quality. +Driver for the ATI/AMD r300, r400, and r500 families of GPUs. + +.. _softpipe: Softpipe ^^^^^^^^ -Reference software rasterizer. +Reference software rasterizer. Slow but accurate. Trace ^^^^^ -Wrapper driver. +Wrapper driver. Trace dumps an XML record of the calls made to the +:ref:`Context` and :ref:`Screen` objects that it wraps. State Trackers -------------- +.. _dri: + Direct Rendering Infrastructure ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Tracker that implements the client-side DRI protocol, for providing direct +acceleration services to X11 servers with the DRI extension. Supports DRI1 +and DRI2. Only GL is supported. + +.. _egl: + EGL ^^^ +Tracker for the Khronos EGL standard, used to set up GL and GLES contexts +without extra knowledge of the underlying windowing system. + GLX ^^^ MesaGL ^^^^^^ +Tracker implementing a GL state machine. Not usable as a standalone tracker; +Mesa should be built with another state tracker, such as :ref:`DRI` or +:ref:`EGL`. + Python ^^^^^^ @@ -82,9 +113,12 @@ OpenVG WGL ^^^ -Xorg XFree86 DDX +Xorg/XFree86 DDX ^^^^^^^^^^^^^^^^ +Tracker for XFree86 and Xorg X11 servers. Provides device-dependent +modesetting and acceleration as a DDX driver. + Auxiliary --------- diff --git a/src/gallium/docs/source/glossary.rst b/src/gallium/docs/source/glossary.rst index 0696cb5d277..acde56eafc4 100644 --- a/src/gallium/docs/source/glossary.rst +++ b/src/gallium/docs/source/glossary.rst @@ -21,3 +21,7 @@ Glossary LOD Level of Detail. Also spelled "LoD." The value that determines when the switches between mipmaps occur during texture sampling. + + GLSL + GL Shading Language. The official, common high-level shader language used + in GL 2.0 and above. diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 48d9d570b6f..e3ef49c862c 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -36,7 +36,9 @@ The integer capabilities: bound. * ``OCCLUSION_QUERY``: Whether occlusion queries are available. * ``TIMER_QUERY``: Whether timer queries are available. -* ``TEXTURE_SHADOW_MAP``: XXX +* ``TEXTURE_SHADOW_MAP``: indicates whether the fragment shader hardware + can do the depth texture / Z comparison operation in TEX instructions + for shadow testing. * ``MAX_TEXTURE_2D_LEVELS``: The maximum number of mipmap levels available for a 2D texture. * ``MAX_TEXTURE_3D_LEVELS``: The maximum number of mipmap levels available @@ -55,7 +57,13 @@ The integer capabilities: from color blend equations, in :ref:`Blend` state. * ``SM3``: Whether the vertex shader and fragment shader support equivalent opcodes to the Shader Model 3 specification. XXX oh god this is horrible -* ``MAX_PREDICATE_REGISTERS``: XXX +* ``MAX_PREDICATE_REGISTERS``: indicates the number of predicate registers + available. Predicate register may be set as a side-effect of ALU + instructions to indicate less than, greater than or equal to zero. + Later instructions can use a predicate register to control writing to + each channel of destination registers. NOTE: predicate registers have + not been fully implemented in Gallium at this time. See the + GL_NV_fragment_program extension for more info (look for "condition codes"). * ``MAX_COMBINED_SAMPLERS``: The total number of samplers accessible from the vertex and fragment shader, inclusive. * ``MAX_CONST_BUFFERS``: Maximum number of constant buffers that can be bound diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 411dce856a3..205e7b8539d 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -26,9 +26,11 @@ each of the components of *dst*. When this happens, the result is said to be Instruction Set --------------- -From GL_NV_vertex_program +Core ISA ^^^^^^^^^^^^^^^^^^^^^^^^^ +These opcodes are guaranteed to be available regardless of the driver being +used. .. opcode:: ARL - Address Register Load @@ -637,10 +639,6 @@ This instruction replicates its result. Considered for removal. -From GL_NV_vertex_program2 -^^^^^^^^^^^^^^^^^^^^^^^^^^ - - .. opcode:: ARA - Address Register Add TBD @@ -827,11 +825,14 @@ This instruction replicates its result. Considered for removal. -From GL_NV_gpu_program4 +Compute ISA ^^^^^^^^^^^^^^^^^^^^^^^^ +These opcodes are primarily provided for special-use computational shaders. Support for these opcodes indicated by a special pipe capability bit (TBD). +XXX so let's discuss it, yeah? + .. opcode:: CEIL - Ceiling .. math:: @@ -989,10 +990,17 @@ Support for these opcodes indicated by a special pipe capability bit (TBD). TBD +.. note:: + + Support for CONT is determined by a special capability bit, + ``TGSI_CONT_SUPPORTED``. See :ref:`Screen` for more information. -From GL_NV_geometry_program4 + +Geometry ISA ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +These opcodes are only supported in geometry shaders; they have no meaning +in any other type of shader. .. opcode:: EMIT - Emit @@ -1004,9 +1012,11 @@ From GL_NV_geometry_program4 TBD -From GLSL +GLSL ISA ^^^^^^^^^^ +These opcodes are part of :term:`GLSL`'s opcode set. Support for these +opcodes is determined by a special capability bit, ``GLSL``. .. opcode:: BGNLOOP - Begin a Loop @@ -1045,6 +1055,7 @@ This instruction replicates its result. ps_2_x ^^^^^^^^^^^^ +XXX wait what .. opcode:: CALLNZ - Subroutine Call If Not Zero @@ -1062,10 +1073,15 @@ ps_2_x .. _doubleopcodes: -Double Opcodes +Double ISA ^^^^^^^^^^^^^^^ -.. opcode:: DADD - Add Double +The double-precision opcodes reinterpret four-component vectors into +two-component vectors with doubled precision in each component. + +Support for these opcodes is XXX undecided. :T + +.. opcode:: DADD - Add .. math:: @@ -1074,7 +1090,7 @@ Double Opcodes dst.zw = src0.zw + src1.zw -.. opcode:: DDIV - Divide Double +.. opcode:: DDIV - Divide .. math:: @@ -1082,7 +1098,7 @@ Double Opcodes dst.zw = src0.zw / src1.zw -.. opcode:: DSEQ - Set Double on Equal +.. opcode:: DSEQ - Set on Equal .. math:: @@ -1090,7 +1106,7 @@ Double Opcodes dst.zw = src0.zw == src1.zw ? 1.0F : 0.0F -.. opcode:: DSLT - Set Double on Less than +.. opcode:: DSLT - Set on Less than .. math:: @@ -1098,7 +1114,7 @@ Double Opcodes dst.zw = src0.zw < src1.zw ? 1.0F : 0.0F -.. opcode:: DFRAC - Double Fraction +.. opcode:: DFRAC - Fraction .. math:: @@ -1107,23 +1123,33 @@ Double Opcodes dst.zw = src.zw - \lfloor src.zw\rfloor -.. opcode:: DFRACEXP - Convert Double Number to Fractional and Integral Components +.. opcode:: DFRACEXP - Convert Number to Fractional and Integral Components + +Like the ``frexp()`` routine in many math libraries, this opcode stores the +exponent of its source to ``dst0``, and the significand to ``dst1``, such that +:math:`dst1 \times 2^{dst0} = src` . .. math:: - dst0.xy = frexp(src.xy, dst1.xy) + dst0.xy = exp(src.xy) + + dst1.xy = frac(src.xy) - dst0.zw = frexp(src.zw, dst1.zw) + dst0.zw = exp(src.zw) -.. opcode:: DLDEXP - Multiple Double Number by Integral Power of 2 + dst1.zw = frac(src.zw) + +.. opcode:: DLDEXP - Multiply Number by Integral Power of 2 + +This opcode is the inverse of :opcode:`DFRACEXP`. .. math:: - dst.xy = ldexp(src0.xy, src1.xy) + dst.xy = src0.xy \times 2^{src1.xy} - dst.zw = ldexp(src0.zw, src1.zw) + dst.zw = src0.zw \times 2^{src1.zw} -.. opcode:: DMIN - Minimum Double +.. opcode:: DMIN - Minimum .. math:: @@ -1131,7 +1157,7 @@ Double Opcodes dst.zw = min(src0.zw, src1.zw) -.. opcode:: DMAX - Maximum Double +.. opcode:: DMAX - Maximum .. math:: @@ -1139,7 +1165,7 @@ Double Opcodes dst.zw = max(src0.zw, src1.zw) -.. opcode:: DMUL - Multiply Double +.. opcode:: DMUL - Multiply .. math:: @@ -1148,7 +1174,7 @@ Double Opcodes dst.zw = src0.zw \times src1.zw -.. opcode:: DMAD - Multiply And Add Doubles +.. opcode:: DMAD - Multiply And Add .. math:: @@ -1157,7 +1183,7 @@ Double Opcodes dst.zw = src0.zw \times src1.zw + src2.zw -.. opcode:: DRCP - Reciprocal Double +.. opcode:: DRCP - Reciprocal .. math:: @@ -1165,7 +1191,7 @@ Double Opcodes dst.zw = \frac{1}{src.zw} -.. opcode:: DSQRT - Square root double +.. opcode:: DSQRT - Square Root .. math:: @@ -1280,38 +1306,46 @@ Declaration Semantic TGSI_SEMANTIC_POSITION """""""""""""""""""""" -Position, sometimes known as HPOS or WPOS for historical reasons, is the -location of the vertex in space, in ``(x, y, z, w)`` format. ``x``, ``y``, and ``z`` -are the Cartesian coordinates, and ``w`` is the homogenous coordinate and used -for the perspective divide, if enabled. - -As a vertex shader output, position should be scaled to the viewport. When -used in fragment shaders, position will be in window coordinates. The convention -used depends on the FS_COORD_ORIGIN and FS_COORD_PIXEL_CENTER properties. +For vertex shaders, TGSI_SEMANTIC_POSITION indicates the vertex shader +output register which contains the homogeneous vertex position in the clip +space coordinate system. After clipping, the X, Y and Z components of the +vertex will be divided by the W value to get normalized device coordinates. -XXX additionally, is there a way to configure the perspective divide? it's -accelerated on most chipsets AFAIK... +For fragment shaders, TGSI_SEMANTIC_POSITION is used to indicate that +fragment shader input contains the fragment's window position. The X +component starts at zero and always increases from left to right. +The Y component starts at zero and always increases but Y=0 may either +indicate the top of the window or the bottom depending on the fragment +coordinate origin convention (see TGSI_PROPERTY_FS_COORD_ORIGIN). +The Z coordinate ranges from 0 to 1 to represent depth from the front +to the back of the Z buffer. The W component contains the reciprocol +of the interpolated vertex position W component. -Position, if not specified, usually defaults to ``(0, 0, 0, 1)``, and can -be partially specified as ``(x, y, 0, 1)`` or ``(x, y, z, 1)``. -XXX usually? can we solidify that? TGSI_SEMANTIC_COLOR """"""""""""""""""" -Colors are used to, well, color the primitives. Colors are always in -``(r, g, b, a)`` format. +For vertex shader outputs or fragment shader inputs/outputs, this +label indicates that the resister contains an R,G,B,A color. + +Several shader inputs/outputs may contain colors so the semantic index +is used to distinguish them. For example, color[0] may be the diffuse +color while color[1] may be the specular color. + +This label is needed so that the flat/smooth shading can be applied +to the right interpolants during rasterization. + -If alpha is not specified, it defaults to 1. TGSI_SEMANTIC_BCOLOR """""""""""""""""""" Back-facing colors are only used for back-facing polygons, and are only valid in vertex shader outputs. After rasterization, all polygons are front-facing -and COLOR and BCOLOR end up occupying the same slots in the fragment, so -all BCOLORs effectively become regular COLORs in the fragment shader. +and COLOR and BCOLOR end up occupying the same slots in the fragment shader, +so all BCOLORs effectively become regular COLORs in the fragment shader. + TGSI_SEMANTIC_FOG """"""""""""""""" @@ -1363,7 +1397,15 @@ back-facing. TGSI_SEMANTIC_EDGEFLAG """""""""""""""""""""" -XXX no clue +For vertex shaders, this sematic label indicates that an input or +output is a boolean edge flag. The register layout is [F, x, x, x] +where F is 0.0 or 1.0 and x = don't care. Normally, the vertex shader +simply copies the edge flag input to the edgeflag output. + +Edge flags are used to control which lines or points are actually +drawn when the polygon mode converts triangles/quads/polygons into +points or lines. + Properties @@ -1420,9 +1462,9 @@ well. +--------------------+--------------+--------------------+--------------+ | Texture Components | Gallium | OpenGL | Direct3D 9 | +====================+==============+====================+==============+ -| R | XXX TBD | (r, 0, 0, 1) | (r, 1, 1, 1) | +| R | (r, 0, 0, 1) | (r, 0, 0, 1) | (r, 1, 1, 1) | +--------------------+--------------+--------------------+--------------+ -| RG | XXX TBD | (r, g, 0, 1) | (r, g, 1, 1) | +| RG | (r, g, 0, 1) | (r, g, 0, 1) | (r, g, 1, 1) | +--------------------+--------------+--------------------+--------------+ | RGB | (r, g, b, 1) | (r, g, b, 1) | (r, g, b, 1) | +--------------------+--------------+--------------------+--------------+ diff --git a/src/gallium/drivers/galahad/Makefile b/src/gallium/drivers/galahad/Makefile new file mode 100644 index 00000000000..67d08745662 --- /dev/null +++ b/src/gallium/drivers/galahad/Makefile @@ -0,0 +1,12 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = galahad + +C_SOURCES = \ + glhd_objects.c \ + glhd_context.c \ + glhd_screen.c \ + glhd_drm.c + +include ../../Makefile.template diff --git a/src/gallium/drivers/galahad/SConscript b/src/gallium/drivers/galahad/SConscript new file mode 100644 index 00000000000..fc668facaf5 --- /dev/null +++ b/src/gallium/drivers/galahad/SConscript @@ -0,0 +1,14 @@ +Import('*') + +env = env.Clone() + +identity = env.ConvenienceLibrary( + target = 'identity', + source = [ + 'glhd_context.c', + 'glhd_drm.c', + 'glhd_objects.c', + 'glhd_screen.c', + ]) + +Export('identity') diff --git a/src/gallium/drivers/galahad/glhd_context.c b/src/gallium/drivers/galahad/glhd_context.c new file mode 100644 index 00000000000..3b20cb1e7f8 --- /dev/null +++ b/src/gallium/drivers/galahad/glhd_context.c @@ -0,0 +1,990 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_context.h" + +#include "util/u_format.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" + +#include "glhd_context.h" +#include "glhd_objects.h" + + +static void +galahad_destroy(struct pipe_context *_pipe) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->destroy(pipe); + + FREE(glhd_pipe); +} + +static void +galahad_draw_arrays(struct pipe_context *_pipe, + unsigned prim, + unsigned start, + unsigned count) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->draw_arrays(pipe, + prim, + start, + count); +} + +static void +galahad_draw_elements(struct pipe_context *_pipe, + struct pipe_resource *_indexResource, + unsigned indexSize, + int indexBias, + unsigned prim, + unsigned start, + unsigned count) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct galahad_resource *glhd_resource = galahad_resource(_indexResource); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_resource *indexResource = glhd_resource->resource; + + pipe->draw_elements(pipe, + indexResource, + indexSize, + indexBias, + prim, + start, + count); +} + +static void +galahad_draw_range_elements(struct pipe_context *_pipe, + struct pipe_resource *_indexResource, + unsigned indexSize, + int indexBias, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct galahad_resource *glhd_resource = galahad_resource(_indexResource); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_resource *indexResource = glhd_resource->resource; + + pipe->draw_range_elements(pipe, + indexResource, + indexSize, + indexBias, + minIndex, + maxIndex, + mode, + start, + count); +} + +static struct pipe_query * +galahad_create_query(struct pipe_context *_pipe, + unsigned query_type) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + if (query_type == PIPE_QUERY_OCCLUSION_COUNTER && + !pipe->screen->get_param(pipe->screen, PIPE_CAP_OCCLUSION_QUERY)) { + glhd_error("Occlusion query requested but not supported"); + } + + if (query_type == PIPE_QUERY_TIME_ELAPSED && + !pipe->screen->get_param(pipe->screen, PIPE_CAP_TIMER_QUERY)) { + glhd_error("Timer query requested but not supported"); + } + + return pipe->create_query(pipe, + query_type); +} + +static void +galahad_destroy_query(struct pipe_context *_pipe, + struct pipe_query *query) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->destroy_query(pipe, + query); +} + +static void +galahad_begin_query(struct pipe_context *_pipe, + struct pipe_query *query) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->begin_query(pipe, + query); +} + +static void +galahad_end_query(struct pipe_context *_pipe, + struct pipe_query *query) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->end_query(pipe, + query); +} + +static boolean +galahad_get_query_result(struct pipe_context *_pipe, + struct pipe_query *query, + boolean wait, + void *result) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + return pipe->get_query_result(pipe, + query, + wait, + result); +} + +static void * +galahad_create_blend_state(struct pipe_context *_pipe, + const struct pipe_blend_state *blend) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + return pipe->create_blend_state(pipe, + blend); +} + +static void +galahad_bind_blend_state(struct pipe_context *_pipe, + void *blend) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_blend_state(pipe, + blend); +} + +static void +galahad_delete_blend_state(struct pipe_context *_pipe, + void *blend) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->delete_blend_state(pipe, + blend); +} + +static void * +galahad_create_sampler_state(struct pipe_context *_pipe, + const struct pipe_sampler_state *sampler) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + return pipe->create_sampler_state(pipe, + sampler); +} + +static void +galahad_bind_fragment_sampler_states(struct pipe_context *_pipe, + unsigned num_samplers, + void **samplers) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_fragment_sampler_states(pipe, + num_samplers, + samplers); +} + +static void +galahad_bind_vertex_sampler_states(struct pipe_context *_pipe, + unsigned num_samplers, + void **samplers) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_vertex_sampler_states(pipe, + num_samplers, + samplers); +} + +static void +galahad_delete_sampler_state(struct pipe_context *_pipe, + void *sampler) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->delete_sampler_state(pipe, + sampler); +} + +static void * +galahad_create_rasterizer_state(struct pipe_context *_pipe, + const struct pipe_rasterizer_state *rasterizer) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + if (rasterizer->point_quad_rasterization) { + if (rasterizer->point_smooth) { + glhd_warn("Point smoothing requested but ignored"); + } + } else { + if (rasterizer->sprite_coord_enable) { + glhd_warn("Point sprites requested but ignored"); + } + } + + return pipe->create_rasterizer_state(pipe, + rasterizer); +} + +static void +galahad_bind_rasterizer_state(struct pipe_context *_pipe, + void *rasterizer) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_rasterizer_state(pipe, + rasterizer); +} + +static void +galahad_delete_rasterizer_state(struct pipe_context *_pipe, + void *rasterizer) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->delete_rasterizer_state(pipe, + rasterizer); +} + +static void * +galahad_create_depth_stencil_alpha_state(struct pipe_context *_pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil_alpha) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + return pipe->create_depth_stencil_alpha_state(pipe, + depth_stencil_alpha); +} + +static void +galahad_bind_depth_stencil_alpha_state(struct pipe_context *_pipe, + void *depth_stencil_alpha) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_depth_stencil_alpha_state(pipe, + depth_stencil_alpha); +} + +static void +galahad_delete_depth_stencil_alpha_state(struct pipe_context *_pipe, + void *depth_stencil_alpha) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->delete_depth_stencil_alpha_state(pipe, + depth_stencil_alpha); +} + +static void * +galahad_create_fs_state(struct pipe_context *_pipe, + const struct pipe_shader_state *fs) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + return pipe->create_fs_state(pipe, + fs); +} + +static void +galahad_bind_fs_state(struct pipe_context *_pipe, + void *fs) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_fs_state(pipe, + fs); +} + +static void +galahad_delete_fs_state(struct pipe_context *_pipe, + void *fs) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->delete_fs_state(pipe, + fs); +} + +static void * +galahad_create_vs_state(struct pipe_context *_pipe, + const struct pipe_shader_state *vs) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + return pipe->create_vs_state(pipe, + vs); +} + +static void +galahad_bind_vs_state(struct pipe_context *_pipe, + void *vs) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_vs_state(pipe, + vs); +} + +static void +galahad_delete_vs_state(struct pipe_context *_pipe, + void *vs) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->delete_vs_state(pipe, + vs); +} + + +static void * +galahad_create_vertex_elements_state(struct pipe_context *_pipe, + unsigned num_elements, + const struct pipe_vertex_element *vertex_elements) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + return pipe->create_vertex_elements_state(pipe, + num_elements, + vertex_elements); +} + +static void +galahad_bind_vertex_elements_state(struct pipe_context *_pipe, + void *velems) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_vertex_elements_state(pipe, + velems); +} + +static void +galahad_delete_vertex_elements_state(struct pipe_context *_pipe, + void *velems) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->delete_vertex_elements_state(pipe, + velems); +} + +static void +galahad_set_blend_color(struct pipe_context *_pipe, + const struct pipe_blend_color *blend_color) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->set_blend_color(pipe, + blend_color); +} + +static void +galahad_set_stencil_ref(struct pipe_context *_pipe, + const struct pipe_stencil_ref *stencil_ref) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->set_stencil_ref(pipe, + stencil_ref); +} + +static void +galahad_set_clip_state(struct pipe_context *_pipe, + const struct pipe_clip_state *clip) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->set_clip_state(pipe, + clip); +} + +static void +galahad_set_sample_mask(struct pipe_context *_pipe, + unsigned sample_mask) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->set_sample_mask(pipe, + sample_mask); +} + +static void +galahad_set_constant_buffer(struct pipe_context *_pipe, + uint shader, + uint index, + struct pipe_resource *_resource) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_resource *unwrapped_resource; + struct pipe_resource *resource = NULL; + + /* XXX hmm? unwrap the input state */ + if (_resource) { + unwrapped_resource = galahad_resource_unwrap(_resource); + resource = unwrapped_resource; + } + + pipe->set_constant_buffer(pipe, + shader, + index, + resource); +} + +static void +galahad_set_framebuffer_state(struct pipe_context *_pipe, + const struct pipe_framebuffer_state *_state) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_framebuffer_state unwrapped_state; + struct pipe_framebuffer_state *state = NULL; + unsigned i; + + if (_state->nr_cbufs > PIPE_MAX_COLOR_BUFS) { + glhd_error("%d render targets bound, but only %d are permitted by API", + _state->nr_cbufs, PIPE_MAX_COLOR_BUFS); + } else if (_state->nr_cbufs > + pipe->screen->get_param(pipe->screen, PIPE_CAP_MAX_RENDER_TARGETS)) { + glhd_warn("%d render targets bound, but only %d are supported", + _state->nr_cbufs, + pipe->screen->get_param(pipe->screen, PIPE_CAP_MAX_RENDER_TARGETS)); + } + + /* unwrap the input state */ + if (_state) { + memcpy(&unwrapped_state, _state, sizeof(unwrapped_state)); + for(i = 0; i < _state->nr_cbufs; i++) + unwrapped_state.cbufs[i] = galahad_surface_unwrap(_state->cbufs[i]); + for (; i < PIPE_MAX_COLOR_BUFS; i++) + unwrapped_state.cbufs[i] = NULL; + unwrapped_state.zsbuf = galahad_surface_unwrap(_state->zsbuf); + state = &unwrapped_state; + } + + pipe->set_framebuffer_state(pipe, + state); +} + +static void +galahad_set_polygon_stipple(struct pipe_context *_pipe, + const struct pipe_poly_stipple *poly_stipple) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->set_polygon_stipple(pipe, + poly_stipple); +} + +static void +galahad_set_scissor_state(struct pipe_context *_pipe, + const struct pipe_scissor_state *scissor) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->set_scissor_state(pipe, + scissor); +} + +static void +galahad_set_viewport_state(struct pipe_context *_pipe, + const struct pipe_viewport_state *viewport) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->set_viewport_state(pipe, + viewport); +} + +static void +galahad_set_fragment_sampler_views(struct pipe_context *_pipe, + unsigned num, + struct pipe_sampler_view **_views) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_sampler_view *unwrapped_views[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_view **views = NULL; + unsigned i; + + if (_views) { + for (i = 0; i < num; i++) + unwrapped_views[i] = galahad_sampler_view_unwrap(_views[i]); + for (; i < PIPE_MAX_SAMPLERS; i++) + unwrapped_views[i] = NULL; + + views = unwrapped_views; + } + + pipe->set_fragment_sampler_views(pipe, num, views); +} + +static void +galahad_set_vertex_sampler_views(struct pipe_context *_pipe, + unsigned num, + struct pipe_sampler_view **_views) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_sampler_view *unwrapped_views[PIPE_MAX_VERTEX_SAMPLERS]; + struct pipe_sampler_view **views = NULL; + unsigned i; + + if (_views) { + for (i = 0; i < num; i++) + unwrapped_views[i] = galahad_sampler_view_unwrap(_views[i]); + for (; i < PIPE_MAX_VERTEX_SAMPLERS; i++) + unwrapped_views[i] = NULL; + + views = unwrapped_views; + } + + pipe->set_vertex_sampler_views(pipe, num, views); +} + +static void +galahad_set_vertex_buffers(struct pipe_context *_pipe, + unsigned num_buffers, + const struct pipe_vertex_buffer *_buffers) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_vertex_buffer unwrapped_buffers[PIPE_MAX_SHADER_INPUTS]; + struct pipe_vertex_buffer *buffers = NULL; + unsigned i; + + if (num_buffers) { + memcpy(unwrapped_buffers, _buffers, num_buffers * sizeof(*_buffers)); + for (i = 0; i < num_buffers; i++) + unwrapped_buffers[i].buffer = galahad_resource_unwrap(_buffers[i].buffer); + buffers = unwrapped_buffers; + } + + pipe->set_vertex_buffers(pipe, + num_buffers, + buffers); +} +static void +galahad_resource_copy_region(struct pipe_context *_pipe, + struct pipe_resource *_dst, + struct pipe_subresource subdst, + unsigned dstx, + unsigned dsty, + unsigned dstz, + struct pipe_resource *_src, + struct pipe_subresource subsrc, + unsigned srcx, + unsigned srcy, + unsigned srcz, + unsigned width, + unsigned height) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct galahad_resource *glhd_resource_dst = galahad_resource(_dst); + struct galahad_resource *glhd_resource_src = galahad_resource(_src); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_resource *dst = glhd_resource_dst->resource; + struct pipe_resource *src = glhd_resource_src->resource; + + if (_dst->format != _src->format) { + glhd_warn("Format mismatch: Source is %s, destination is %s", + util_format_short_name(_src->format), + util_format_short_name(_dst->format)); + } + + pipe->resource_copy_region(pipe, + dst, + subdst, + dstx, + dsty, + dstz, + src, + subsrc, + srcx, + srcy, + srcz, + width, + height); +} + +static void +galahad_clear(struct pipe_context *_pipe, + unsigned buffers, + const float *rgba, + double depth, + unsigned stencil) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->clear(pipe, + buffers, + rgba, + depth, + stencil); +} + +static void +galahad_clear_render_target(struct pipe_context *_pipe, + struct pipe_surface *_dst, + const float *rgba, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct galahad_surface *glhd_surface_dst = galahad_surface(_dst); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_surface *dst = glhd_surface_dst->surface; + + pipe->clear_render_target(pipe, + dst, + rgba, + dstx, + dsty, + width, + height); +} +static void +galahad_clear_depth_stencil(struct pipe_context *_pipe, + struct pipe_surface *_dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct galahad_surface *glhd_surface_dst = galahad_surface(_dst); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_surface *dst = glhd_surface_dst->surface; + + pipe->clear_depth_stencil(pipe, + dst, + clear_flags, + depth, + stencil, + dstx, + dsty, + width, + height); + +} + +static void +galahad_flush(struct pipe_context *_pipe, + unsigned flags, + struct pipe_fence_handle **fence) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->flush(pipe, + flags, + fence); +} + +static unsigned int +galahad_is_resource_referenced(struct pipe_context *_pipe, + struct pipe_resource *_resource, + unsigned face, + unsigned level) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct galahad_resource *glhd_resource = galahad_resource(_resource); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_resource *resource = glhd_resource->resource; + + return pipe->is_resource_referenced(pipe, + resource, + face, + level); +} + +static struct pipe_sampler_view * +galahad_context_create_sampler_view(struct pipe_context *_pipe, + struct pipe_resource *_resource, + const struct pipe_sampler_view *templ) +{ + struct galahad_context *glhd_context = galahad_context(_pipe); + struct galahad_resource *glhd_resource = galahad_resource(_resource); + struct pipe_context *pipe = glhd_context->pipe; + struct pipe_resource *resource = glhd_resource->resource; + struct pipe_sampler_view *result; + + result = pipe->create_sampler_view(pipe, + resource, + templ); + + if (result) + return galahad_sampler_view_create(glhd_context, glhd_resource, result); + return NULL; +} + +static void +galahad_context_sampler_view_destroy(struct pipe_context *_pipe, + struct pipe_sampler_view *_view) +{ + galahad_sampler_view_destroy(galahad_context(_pipe), + galahad_sampler_view(_view)); +} + +static struct pipe_transfer * +galahad_context_get_transfer(struct pipe_context *_context, + struct pipe_resource *_resource, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box) +{ + struct galahad_context *glhd_context = galahad_context(_context); + struct galahad_resource *glhd_resource = galahad_resource(_resource); + struct pipe_context *context = glhd_context->pipe; + struct pipe_resource *resource = glhd_resource->resource; + struct pipe_transfer *result; + + result = context->get_transfer(context, + resource, + sr, + usage, + box); + + if (result) + return galahad_transfer_create(glhd_context, glhd_resource, result); + return NULL; +} + +static void +galahad_context_transfer_destroy(struct pipe_context *_pipe, + struct pipe_transfer *_transfer) +{ + galahad_transfer_destroy(galahad_context(_pipe), + galahad_transfer(_transfer)); +} + +static void * +galahad_context_transfer_map(struct pipe_context *_context, + struct pipe_transfer *_transfer) +{ + struct galahad_context *glhd_context = galahad_context(_context); + struct galahad_transfer *glhd_transfer = galahad_transfer(_transfer); + struct pipe_context *context = glhd_context->pipe; + struct pipe_transfer *transfer = glhd_transfer->transfer; + + return context->transfer_map(context, + transfer); +} + + + +static void +galahad_context_transfer_flush_region(struct pipe_context *_context, + struct pipe_transfer *_transfer, + const struct pipe_box *box) +{ + struct galahad_context *glhd_context = galahad_context(_context); + struct galahad_transfer *glhd_transfer = galahad_transfer(_transfer); + struct pipe_context *context = glhd_context->pipe; + struct pipe_transfer *transfer = glhd_transfer->transfer; + + context->transfer_flush_region(context, + transfer, + box); +} + + +static void +galahad_context_transfer_unmap(struct pipe_context *_context, + struct pipe_transfer *_transfer) +{ + struct galahad_context *glhd_context = galahad_context(_context); + struct galahad_transfer *glhd_transfer = galahad_transfer(_transfer); + struct pipe_context *context = glhd_context->pipe; + struct pipe_transfer *transfer = glhd_transfer->transfer; + + context->transfer_unmap(context, + transfer); +} + + +static void +galahad_context_transfer_inline_write(struct pipe_context *_context, + struct pipe_resource *_resource, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned slice_stride) +{ + struct galahad_context *glhd_context = galahad_context(_context); + struct galahad_resource *glhd_resource = galahad_resource(_resource); + struct pipe_context *context = glhd_context->pipe; + struct pipe_resource *resource = glhd_resource->resource; + + context->transfer_inline_write(context, + resource, + sr, + usage, + box, + data, + stride, + slice_stride); +} + + +struct pipe_context * +galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) +{ + struct galahad_context *glhd_pipe; + (void)galahad_screen(_screen); + + glhd_pipe = CALLOC_STRUCT(galahad_context); + if (!glhd_pipe) { + return NULL; + } + + glhd_pipe->base.winsys = NULL; + glhd_pipe->base.screen = _screen; + glhd_pipe->base.priv = pipe->priv; /* expose wrapped data */ + glhd_pipe->base.draw = NULL; + + glhd_pipe->base.destroy = galahad_destroy; + glhd_pipe->base.draw_arrays = galahad_draw_arrays; + glhd_pipe->base.draw_elements = galahad_draw_elements; + glhd_pipe->base.draw_range_elements = galahad_draw_range_elements; + glhd_pipe->base.create_query = galahad_create_query; + glhd_pipe->base.destroy_query = galahad_destroy_query; + glhd_pipe->base.begin_query = galahad_begin_query; + glhd_pipe->base.end_query = galahad_end_query; + glhd_pipe->base.get_query_result = galahad_get_query_result; + glhd_pipe->base.create_blend_state = galahad_create_blend_state; + glhd_pipe->base.bind_blend_state = galahad_bind_blend_state; + glhd_pipe->base.delete_blend_state = galahad_delete_blend_state; + glhd_pipe->base.create_sampler_state = galahad_create_sampler_state; + glhd_pipe->base.bind_fragment_sampler_states = galahad_bind_fragment_sampler_states; + glhd_pipe->base.bind_vertex_sampler_states = galahad_bind_vertex_sampler_states; + glhd_pipe->base.delete_sampler_state = galahad_delete_sampler_state; + glhd_pipe->base.create_rasterizer_state = galahad_create_rasterizer_state; + glhd_pipe->base.bind_rasterizer_state = galahad_bind_rasterizer_state; + glhd_pipe->base.delete_rasterizer_state = galahad_delete_rasterizer_state; + glhd_pipe->base.create_depth_stencil_alpha_state = galahad_create_depth_stencil_alpha_state; + glhd_pipe->base.bind_depth_stencil_alpha_state = galahad_bind_depth_stencil_alpha_state; + glhd_pipe->base.delete_depth_stencil_alpha_state = galahad_delete_depth_stencil_alpha_state; + glhd_pipe->base.create_fs_state = galahad_create_fs_state; + glhd_pipe->base.bind_fs_state = galahad_bind_fs_state; + glhd_pipe->base.delete_fs_state = galahad_delete_fs_state; + glhd_pipe->base.create_vs_state = galahad_create_vs_state; + glhd_pipe->base.bind_vs_state = galahad_bind_vs_state; + glhd_pipe->base.delete_vs_state = galahad_delete_vs_state; + glhd_pipe->base.create_vertex_elements_state = galahad_create_vertex_elements_state; + glhd_pipe->base.bind_vertex_elements_state = galahad_bind_vertex_elements_state; + glhd_pipe->base.delete_vertex_elements_state = galahad_delete_vertex_elements_state; + glhd_pipe->base.set_blend_color = galahad_set_blend_color; + glhd_pipe->base.set_stencil_ref = galahad_set_stencil_ref; + glhd_pipe->base.set_clip_state = galahad_set_clip_state; + glhd_pipe->base.set_sample_mask = galahad_set_sample_mask; + glhd_pipe->base.set_constant_buffer = galahad_set_constant_buffer; + glhd_pipe->base.set_framebuffer_state = galahad_set_framebuffer_state; + glhd_pipe->base.set_polygon_stipple = galahad_set_polygon_stipple; + glhd_pipe->base.set_scissor_state = galahad_set_scissor_state; + glhd_pipe->base.set_viewport_state = galahad_set_viewport_state; + glhd_pipe->base.set_fragment_sampler_views = galahad_set_fragment_sampler_views; + glhd_pipe->base.set_vertex_sampler_views = galahad_set_vertex_sampler_views; + glhd_pipe->base.set_vertex_buffers = galahad_set_vertex_buffers; + glhd_pipe->base.resource_copy_region = galahad_resource_copy_region; + glhd_pipe->base.clear = galahad_clear; + glhd_pipe->base.clear_render_target = galahad_clear_render_target; + glhd_pipe->base.clear_depth_stencil = galahad_clear_depth_stencil; + glhd_pipe->base.flush = galahad_flush; + glhd_pipe->base.is_resource_referenced = galahad_is_resource_referenced; + glhd_pipe->base.create_sampler_view = galahad_context_create_sampler_view; + glhd_pipe->base.sampler_view_destroy = galahad_context_sampler_view_destroy; + glhd_pipe->base.get_transfer = galahad_context_get_transfer; + glhd_pipe->base.transfer_destroy = galahad_context_transfer_destroy; + glhd_pipe->base.transfer_map = galahad_context_transfer_map; + glhd_pipe->base.transfer_unmap = galahad_context_transfer_unmap; + glhd_pipe->base.transfer_flush_region = galahad_context_transfer_flush_region; + glhd_pipe->base.transfer_inline_write = galahad_context_transfer_inline_write; + + glhd_pipe->pipe = pipe; + + return &glhd_pipe->base; +} diff --git a/src/gallium/drivers/galahad/glhd_context.h b/src/gallium/drivers/galahad/glhd_context.h new file mode 100644 index 00000000000..4e71753ac37 --- /dev/null +++ b/src/gallium/drivers/galahad/glhd_context.h @@ -0,0 +1,64 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef GLHD_CONTEXT_H +#define GLHD_CONTEXT_H + +#include <stdio.h> + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + + +struct galahad_context { + struct pipe_context base; /**< base class */ + + struct pipe_context *pipe; +}; + + +struct pipe_context * +galahad_context_create(struct pipe_screen *screen, struct pipe_context *pipe); + + +static INLINE struct galahad_context * +galahad_context(struct pipe_context *pipe) +{ + return (struct galahad_context *)pipe; +} + +#define glhd_warn(...) \ +do { \ + fprintf(stderr, "galahad: %s: ", __FUNCTION__); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, "\n"); \ +} while (0) + +#define glhd_error(...) \ + glhd_warn(__VA_ARGS__); + +#endif /* GLHD_CONTEXT_H */ diff --git a/src/gallium/drivers/galahad/glhd_drm.c b/src/gallium/drivers/galahad/glhd_drm.c new file mode 100644 index 00000000000..d62f6f4f7bc --- /dev/null +++ b/src/gallium/drivers/galahad/glhd_drm.c @@ -0,0 +1,96 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "state_tracker/drm_api.h" + +#include "util/u_memory.h" +#include "glhd_drm.h" +#include "glhd_screen.h" +#include "glhd_public.h" + +struct galahad_drm_api +{ + struct drm_api base; + + struct drm_api *api; +}; + +static INLINE struct galahad_drm_api * +galahad_drm_api(struct drm_api *_api) +{ + return (struct galahad_drm_api *)_api; +} + +static struct pipe_screen * +galahad_drm_create_screen(struct drm_api *_api, int fd) +{ + struct galahad_drm_api *glhd_api = galahad_drm_api(_api); + struct drm_api *api = glhd_api->api; + struct pipe_screen *screen; + + screen = api->create_screen(api, fd); + + return galahad_screen_create(screen); +} + +static void +galahad_drm_destroy(struct drm_api *_api) +{ + struct galahad_drm_api *glhd_api = galahad_drm_api(_api); + struct drm_api *api = glhd_api->api; + api->destroy(api); + + FREE(glhd_api); +} + +struct drm_api * +galahad_drm_create(struct drm_api *api) +{ + struct galahad_drm_api *glhd_api; + + if (!api) + goto error; + + if (!debug_get_option("GALAHAD", FALSE)) + goto error; + + glhd_api = CALLOC_STRUCT(galahad_drm_api); + + if (!glhd_api) + goto error; + + glhd_api->base.name = api->name; + glhd_api->base.driver_name = api->driver_name; + glhd_api->base.create_screen = galahad_drm_create_screen; + glhd_api->base.destroy = galahad_drm_destroy; + glhd_api->api = api; + + return &glhd_api->base; + +error: + return api; +} diff --git a/src/gallium/drivers/galahad/glhd_drm.h b/src/gallium/drivers/galahad/glhd_drm.h new file mode 100644 index 00000000000..613ac24946d --- /dev/null +++ b/src/gallium/drivers/galahad/glhd_drm.h @@ -0,0 +1,35 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef GLHD_DRM_H +#define GLHD_DRM_H + +struct drm_api; + +struct drm_api* galahad_drm_create(struct drm_api *api); + +#endif /* GLHD_DRM_H */ diff --git a/src/gallium/drivers/galahad/glhd_objects.c b/src/gallium/drivers/galahad/glhd_objects.c new file mode 100644 index 00000000000..6c5a21ae704 --- /dev/null +++ b/src/gallium/drivers/galahad/glhd_objects.c @@ -0,0 +1,187 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_inlines.h" +#include "util/u_memory.h" + +#include "glhd_screen.h" +#include "glhd_objects.h" +#include "glhd_context.h" + + + +struct pipe_resource * +galahad_resource_create(struct galahad_screen *glhd_screen, + struct pipe_resource *resource) +{ + struct galahad_resource *glhd_resource; + + if(!resource) + goto error; + + assert(resource->screen == glhd_screen->screen); + + glhd_resource = CALLOC_STRUCT(galahad_resource); + if(!glhd_resource) + goto error; + + memcpy(&glhd_resource->base, resource, sizeof(struct pipe_resource)); + + pipe_reference_init(&glhd_resource->base.reference, 1); + glhd_resource->base.screen = &glhd_screen->base; + glhd_resource->resource = resource; + + return &glhd_resource->base; + +error: + pipe_resource_reference(&resource, NULL); + return NULL; +} + +void +galahad_resource_destroy(struct galahad_resource *glhd_resource) +{ + pipe_resource_reference(&glhd_resource->resource, NULL); + FREE(glhd_resource); +} + + +struct pipe_surface * +galahad_surface_create(struct galahad_resource *glhd_resource, + struct pipe_surface *surface) +{ + struct galahad_surface *glhd_surface; + + if(!surface) + goto error; + + assert(surface->texture == glhd_resource->resource); + + glhd_surface = CALLOC_STRUCT(galahad_surface); + if(!glhd_surface) + goto error; + + memcpy(&glhd_surface->base, surface, sizeof(struct pipe_surface)); + + pipe_reference_init(&glhd_surface->base.reference, 1); + glhd_surface->base.texture = NULL; + pipe_resource_reference(&glhd_surface->base.texture, &glhd_resource->base); + glhd_surface->surface = surface; + + return &glhd_surface->base; + +error: + pipe_surface_reference(&surface, NULL); + return NULL; +} + +void +galahad_surface_destroy(struct galahad_surface *glhd_surface) +{ + pipe_resource_reference(&glhd_surface->base.texture, NULL); + pipe_surface_reference(&glhd_surface->surface, NULL); + FREE(glhd_surface); +} + + +struct pipe_sampler_view * +galahad_sampler_view_create(struct galahad_context *glhd_context, + struct galahad_resource *glhd_resource, + struct pipe_sampler_view *view) +{ + struct galahad_sampler_view *glhd_view; + + if (!view) + goto error; + + assert(view->texture == glhd_resource->resource); + + glhd_view = CALLOC_STRUCT(galahad_sampler_view); + + glhd_view->base = *view; + glhd_view->base.reference.count = 1; + glhd_view->base.texture = NULL; + pipe_resource_reference(&glhd_view->base.texture, glhd_resource->resource); + glhd_view->base.context = glhd_context->pipe; + glhd_view->sampler_view = view; + + return &glhd_view->base; +error: + return NULL; +} + +void +galahad_sampler_view_destroy(struct galahad_context *glhd_context, + struct galahad_sampler_view *glhd_view) +{ + pipe_resource_reference(&glhd_view->base.texture, NULL); + glhd_context->pipe->sampler_view_destroy(glhd_context->pipe, + glhd_view->sampler_view); + FREE(glhd_view); +} + + +struct pipe_transfer * +galahad_transfer_create(struct galahad_context *glhd_context, + struct galahad_resource *glhd_resource, + struct pipe_transfer *transfer) +{ + struct galahad_transfer *glhd_transfer; + + if(!transfer) + goto error; + + assert(transfer->resource == glhd_resource->resource); + + glhd_transfer = CALLOC_STRUCT(galahad_transfer); + if(!glhd_transfer) + goto error; + + memcpy(&glhd_transfer->base, transfer, sizeof(struct pipe_transfer)); + + glhd_transfer->base.resource = NULL; + glhd_transfer->transfer = transfer; + + pipe_resource_reference(&glhd_transfer->base.resource, &glhd_resource->base); + assert(glhd_transfer->base.resource == &glhd_resource->base); + + return &glhd_transfer->base; + +error: + glhd_context->pipe->transfer_destroy(glhd_context->pipe, transfer); + return NULL; +} + +void +galahad_transfer_destroy(struct galahad_context *glhd_context, + struct galahad_transfer *glhd_transfer) +{ + pipe_resource_reference(&glhd_transfer->base.resource, NULL); + glhd_context->pipe->transfer_destroy(glhd_context->pipe, + glhd_transfer->transfer); + FREE(glhd_transfer); +} diff --git a/src/gallium/drivers/galahad/glhd_objects.h b/src/gallium/drivers/galahad/glhd_objects.h new file mode 100644 index 00000000000..935803915db --- /dev/null +++ b/src/gallium/drivers/galahad/glhd_objects.h @@ -0,0 +1,175 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef GLHD_OBJECTS_H +#define GLHD_OBJECTS_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" + +#include "glhd_screen.h" + +struct galahad_context; + + +struct galahad_resource +{ + struct pipe_resource base; + + struct pipe_resource *resource; +}; + + +struct galahad_sampler_view +{ + struct pipe_sampler_view base; + + struct pipe_sampler_view *sampler_view; +}; + + +struct galahad_surface +{ + struct pipe_surface base; + + struct pipe_surface *surface; +}; + + +struct galahad_transfer +{ + struct pipe_transfer base; + + struct pipe_transfer *transfer; +}; + + +static INLINE struct galahad_resource * +galahad_resource(struct pipe_resource *_resource) +{ + if(!_resource) + return NULL; + (void)galahad_screen(_resource->screen); + return (struct galahad_resource *)_resource; +} + +static INLINE struct galahad_sampler_view * +galahad_sampler_view(struct pipe_sampler_view *_sampler_view) +{ + if (!_sampler_view) { + return NULL; + } + return (struct galahad_sampler_view *)_sampler_view; +} + +static INLINE struct galahad_surface * +galahad_surface(struct pipe_surface *_surface) +{ + if(!_surface) + return NULL; + (void)galahad_resource(_surface->texture); + return (struct galahad_surface *)_surface; +} + +static INLINE struct galahad_transfer * +galahad_transfer(struct pipe_transfer *_transfer) +{ + if(!_transfer) + return NULL; + (void)galahad_resource(_transfer->resource); + return (struct galahad_transfer *)_transfer; +} + +static INLINE struct pipe_resource * +galahad_resource_unwrap(struct pipe_resource *_resource) +{ + if(!_resource) + return NULL; + return galahad_resource(_resource)->resource; +} + +static INLINE struct pipe_sampler_view * +galahad_sampler_view_unwrap(struct pipe_sampler_view *_sampler_view) +{ + if (!_sampler_view) { + return NULL; + } + return galahad_sampler_view(_sampler_view)->sampler_view; +} + +static INLINE struct pipe_surface * +galahad_surface_unwrap(struct pipe_surface *_surface) +{ + if(!_surface) + return NULL; + return galahad_surface(_surface)->surface; +} + +static INLINE struct pipe_transfer * +galahad_transfer_unwrap(struct pipe_transfer *_transfer) +{ + if(!_transfer) + return NULL; + return galahad_transfer(_transfer)->transfer; +} + + +struct pipe_resource * +galahad_resource_create(struct galahad_screen *glhd_screen, + struct pipe_resource *resource); + +void +galahad_resource_destroy(struct galahad_resource *glhd_resource); + +struct pipe_surface * +galahad_surface_create(struct galahad_resource *glhd_resource, + struct pipe_surface *surface); + +void +galahad_surface_destroy(struct galahad_surface *glhd_surface); + +struct pipe_sampler_view * +galahad_sampler_view_create(struct galahad_context *glhd_context, + struct galahad_resource *glhd_resource, + struct pipe_sampler_view *view); + +void +galahad_sampler_view_destroy(struct galahad_context *glhd_context, + struct galahad_sampler_view *glhd_sampler_view); + +struct pipe_transfer * +galahad_transfer_create(struct galahad_context *glhd_context, + struct galahad_resource *glhd_resource, + struct pipe_transfer *transfer); + +void +galahad_transfer_destroy(struct galahad_context *glhd_context, + struct galahad_transfer *glhd_transfer); + + +#endif /* GLHD_OBJECTS_H */ diff --git a/src/gallium/drivers/galahad/glhd_public.h b/src/gallium/drivers/galahad/glhd_public.h new file mode 100644 index 00000000000..77a380196a1 --- /dev/null +++ b/src/gallium/drivers/galahad/glhd_public.h @@ -0,0 +1,37 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef GLHD_PUBLIC_H +#define GLHD_PUBLIC_H + +struct pipe_screen; +struct pipe_context; + +struct pipe_screen * +galahad_screen_create(struct pipe_screen *screen); + +#endif /* GLHD_PUBLIC_H */ diff --git a/src/gallium/drivers/galahad/glhd_screen.c b/src/gallium/drivers/galahad/glhd_screen.c new file mode 100644 index 00000000000..bcc37cb633a --- /dev/null +++ b/src/gallium/drivers/galahad/glhd_screen.c @@ -0,0 +1,330 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * 2010 Corbin Simpson <[email protected]> + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_screen.h" +#include "pipe/p_state.h" +#include "util/u_memory.h" + +#include "glhd_public.h" +#include "glhd_screen.h" +#include "glhd_context.h" +#include "glhd_objects.h" + + +static void +galahad_screen_destroy(struct pipe_screen *_screen) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + screen->destroy(screen); + + FREE(glhd_screen); +} + +static const char * +galahad_screen_get_name(struct pipe_screen *_screen) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + return screen->get_name(screen); +} + +static const char * +galahad_screen_get_vendor(struct pipe_screen *_screen) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + return screen->get_vendor(screen); +} + +static int +galahad_screen_get_param(struct pipe_screen *_screen, + enum pipe_cap param) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + return screen->get_param(screen, + param); +} + +static float +galahad_screen_get_paramf(struct pipe_screen *_screen, + enum pipe_cap param) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + return screen->get_paramf(screen, + param); +} + +static boolean +galahad_screen_is_format_supported(struct pipe_screen *_screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned tex_usage, + unsigned geom_flags) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + if (target >= PIPE_MAX_TEXTURE_TYPES) { + glhd_warn("Received bogus texture target %d", target); + } + + return screen->is_format_supported(screen, + format, + target, + sample_count, + tex_usage, + geom_flags); +} + +static struct pipe_context * +galahad_screen_context_create(struct pipe_screen *_screen, + void *priv) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + struct pipe_context *result; + + result = screen->context_create(screen, priv); + if (result) + return galahad_context_create(_screen, result); + return NULL; +} + +static struct pipe_resource * +galahad_screen_resource_create(struct pipe_screen *_screen, + const struct pipe_resource *templat) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + struct pipe_resource *result; + + result = screen->resource_create(screen, + templat); + + if (result) + return galahad_resource_create(glhd_screen, result); + return NULL; +} + +static struct pipe_resource * +galahad_screen_resource_from_handle(struct pipe_screen *_screen, + const struct pipe_resource *templ, + struct winsys_handle *handle) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + struct pipe_resource *result; + + /* TODO trace call */ + + result = screen->resource_from_handle(screen, templ, handle); + + result = galahad_resource_create(galahad_screen(_screen), result); + + return result; +} + +static boolean +galahad_screen_resource_get_handle(struct pipe_screen *_screen, + struct pipe_resource *_resource, + struct winsys_handle *handle) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct galahad_resource *glhd_resource = galahad_resource(_resource); + struct pipe_screen *screen = glhd_screen->screen; + struct pipe_resource *resource = glhd_resource->resource; + + /* TODO trace call */ + + return screen->resource_get_handle(screen, resource, handle); +} + + + +static void +galahad_screen_resource_destroy(struct pipe_screen *screen, + struct pipe_resource *_resource) +{ + galahad_resource_destroy(galahad_resource(_resource)); +} + +static struct pipe_surface * +galahad_screen_get_tex_surface(struct pipe_screen *_screen, + struct pipe_resource *_resource, + unsigned face, + unsigned level, + unsigned zslice, + unsigned usage) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct galahad_resource *glhd_resource = galahad_resource(_resource); + struct pipe_screen *screen = glhd_screen->screen; + struct pipe_resource *resource = glhd_resource->resource; + struct pipe_surface *result; + + result = screen->get_tex_surface(screen, + resource, + face, + level, + zslice, + usage); + + if (result) + return galahad_surface_create(glhd_resource, result); + return NULL; +} + +static void +galahad_screen_tex_surface_destroy(struct pipe_surface *_surface) +{ + galahad_surface_destroy(galahad_surface(_surface)); +} + + + +static struct pipe_resource * +galahad_screen_user_buffer_create(struct pipe_screen *_screen, + void *ptr, + unsigned bytes, + unsigned usage) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + struct pipe_resource *result; + + result = screen->user_buffer_create(screen, + ptr, + bytes, + usage); + + if (result) + return galahad_resource_create(glhd_screen, result); + return NULL; +} + + + +static void +galahad_screen_flush_frontbuffer(struct pipe_screen *_screen, + struct pipe_surface *_surface, + void *context_private) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct galahad_surface *glhd_surface = galahad_surface(_surface); + struct pipe_screen *screen = glhd_screen->screen; + struct pipe_surface *surface = glhd_surface->surface; + + screen->flush_frontbuffer(screen, + surface, + context_private); +} + +static void +galahad_screen_fence_reference(struct pipe_screen *_screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + screen->fence_reference(screen, + ptr, + fence); +} + +static int +galahad_screen_fence_signalled(struct pipe_screen *_screen, + struct pipe_fence_handle *fence, + unsigned flags) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + return screen->fence_signalled(screen, + fence, + flags); +} + +static int +galahad_screen_fence_finish(struct pipe_screen *_screen, + struct pipe_fence_handle *fence, + unsigned flags) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + return screen->fence_finish(screen, + fence, + flags); +} + +struct pipe_screen * +galahad_screen_create(struct pipe_screen *screen) +{ + struct galahad_screen *glhd_screen; + + glhd_screen = CALLOC_STRUCT(galahad_screen); + if (!glhd_screen) { + return NULL; + } + + glhd_screen->base.winsys = NULL; + + glhd_screen->base.destroy = galahad_screen_destroy; + glhd_screen->base.get_name = galahad_screen_get_name; + glhd_screen->base.get_vendor = galahad_screen_get_vendor; + glhd_screen->base.get_param = galahad_screen_get_param; + glhd_screen->base.get_paramf = galahad_screen_get_paramf; + glhd_screen->base.is_format_supported = galahad_screen_is_format_supported; + glhd_screen->base.context_create = galahad_screen_context_create; + glhd_screen->base.resource_create = galahad_screen_resource_create; + glhd_screen->base.resource_from_handle = galahad_screen_resource_from_handle; + glhd_screen->base.resource_get_handle = galahad_screen_resource_get_handle; + glhd_screen->base.resource_destroy = galahad_screen_resource_destroy; + glhd_screen->base.get_tex_surface = galahad_screen_get_tex_surface; + glhd_screen->base.tex_surface_destroy = galahad_screen_tex_surface_destroy; + glhd_screen->base.user_buffer_create = galahad_screen_user_buffer_create; + glhd_screen->base.flush_frontbuffer = galahad_screen_flush_frontbuffer; + glhd_screen->base.fence_reference = galahad_screen_fence_reference; + glhd_screen->base.fence_signalled = galahad_screen_fence_signalled; + glhd_screen->base.fence_finish = galahad_screen_fence_finish; + + glhd_screen->screen = screen; + + return &glhd_screen->base; +} diff --git a/src/gallium/drivers/galahad/glhd_screen.h b/src/gallium/drivers/galahad/glhd_screen.h new file mode 100644 index 00000000000..7862f4af2b3 --- /dev/null +++ b/src/gallium/drivers/galahad/glhd_screen.h @@ -0,0 +1,48 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef GLHD_SCREEN_H +#define GLHD_SCREEN_H + +#include "pipe/p_screen.h" +#include "pipe/p_defines.h" + + +struct galahad_screen { + struct pipe_screen base; + + struct pipe_screen *screen; +}; + + +static INLINE struct galahad_screen * +galahad_screen(struct pipe_screen *screen) +{ + return (struct galahad_screen *)screen; +} + +#endif /* GLHD_SCREEN_H */ diff --git a/src/gallium/drivers/i915/SConscript b/src/gallium/drivers/i915/SConscript index 7b69681096d..d6e7a8dbd37 100644 --- a/src/gallium/drivers/i915/SConscript +++ b/src/gallium/drivers/i915/SConscript @@ -2,6 +2,10 @@ Import('*') env = env.Clone() +if msvc: + print 'warning: not building i915g' + Return() + i915 = env.ConvenienceLibrary( target = 'i915', source = [ diff --git a/src/gallium/drivers/i915/i915_blit.c b/src/gallium/drivers/i915/i915_blit.c index c5b5979bf98..0a1b3e0d66b 100644 --- a/src/gallium/drivers/i915/i915_blit.c +++ b/src/gallium/drivers/i915/i915_blit.c @@ -31,7 +31,6 @@ #include "i915_batch.h" #include "i915_debug.h" -#define FILE_DEBUG_FLAG DEBUG_BLIT void i915_fill_blit(struct i915_context *i915, @@ -47,10 +46,8 @@ i915_fill_blit(struct i915_context *i915, unsigned BR13, CMD; - I915_DBG(i915, - "%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", - __FUNCTION__, - dst_buffer, dst_pitch, dst_offset, x, y, w, h); + I915_DBG(DBG_BLIT, "%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", + __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h); switch (cpp) { case 1: @@ -100,11 +97,11 @@ i915_copy_blit(struct i915_context *i915, int dst_x2 = dst_x + w; - I915_DBG(i915, - "%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", - __FUNCTION__, - src_buffer, src_pitch, src_offset, src_x, src_y, - dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); + I915_DBG(DBG_BLIT, + "%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", + __FUNCTION__, + src_buffer, src_pitch, src_offset, src_x, src_y, + dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); switch (cpp) { case 1: diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index acc0ffe037f..ac02ab23325 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -237,8 +237,6 @@ struct i915_context struct i915_state current; unsigned hardware_dirty; - - unsigned debug; }; /* A flag for each state_tracker state object: diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c index 663fac3055c..57d3390dea3 100644 --- a/src/gallium/drivers/i915/i915_debug.c +++ b/src/gallium/drivers/i915/i915_debug.c @@ -27,11 +27,37 @@ #include "i915_reg.h" #include "i915_context.h" +#include "i915_screen.h" #include "i915_debug.h" +#include "i915_debug_private.h" #include "i915_batch.h" #include "util/u_debug.h" + +static const struct debug_named_value debug_options[] = { + {"blit", DBG_BLIT, "Print when using the 2d blitter"}, + {"emit", DBG_EMIT, "State emit information"}, + {"atoms", DBG_ATOMS, "Print dirty state atoms"}, + {"flush", DBG_FLUSH, "Flushing information"}, + {"texture", DBG_TEXTURE, "Texture information"}, + {"constants", DBG_CONSTANTS, "Constant buffers"}, + DEBUG_NAMED_VALUE_END +}; + +unsigned i915_debug = 0; + +void i915_debug_init(struct i915_screen *screen) +{ + i915_debug = debug_get_flags_option("I915_DEBUG", debug_options, 0); +} + + + +/*********************************************************************** + * Batchbuffer dumping + */ + static void PRINTF( struct debug_stream *stream, @@ -896,3 +922,66 @@ i915_dump_batchbuffer( struct i915_winsys_batchbuffer *batch ) } + +/*********************************************************************** + * Dirty state atom dumping + */ + +void +i915_dump_dirty(struct i915_context *i915, const char *func) +{ + struct { + unsigned dirty; + const char *name; + } l[] = { + {I915_NEW_VIEWPORT, "viewport"}, + {I915_NEW_RASTERIZER, "rasterizer"}, + {I915_NEW_FS, "fs"}, + {I915_NEW_BLEND, "blend"}, + {I915_NEW_CLIP, "clip"}, + {I915_NEW_SCISSOR, "scissor"}, + {I915_NEW_STIPPLE, "stipple"}, + {I915_NEW_FRAMEBUFFER, "framebuffer"}, + {I915_NEW_ALPHA_TEST, "alpha_test"}, + {I915_NEW_DEPTH_STENCIL, "depth_stencil"}, + {I915_NEW_SAMPLER, "sampler"}, + {I915_NEW_SAMPLER_VIEW, "sampler_view"}, + {I915_NEW_CONSTANTS, "constants"}, + {I915_NEW_VBO, "vbo"}, + {I915_NEW_VS, "vs"}, + {0, NULL}, + }; + int i; + + debug_printf("%s: ", func); + for (i = 0; l[i].name; i++) + if (i915->dirty & l[i].dirty) + debug_printf("%s ", l[i].name); + debug_printf("\n"); +} + +void +i915_dump_hardware_dirty(struct i915_context *i915, const char *func) +{ + struct { + unsigned dirty; + const char *name; + } l[] = { + {I915_HW_STATIC, "static"}, + {I915_HW_DYNAMIC, "dynamic"}, + {I915_HW_SAMPLER, "sampler"}, + {I915_HW_MAP, "map"}, + {I915_HW_PROGRAM, "program"}, + {I915_HW_CONSTANTS, "constants"}, + {I915_HW_IMMEDIATE, "immediate"}, + {I915_HW_INVARIENT, "invarient"}, + {0, NULL}, + }; + int i; + + debug_printf("%s: ", func); + for (i = 0; l[i].name; i++) + if (i915->hardware_dirty & l[i].dirty) + debug_printf("%s ", l[i].name); + debug_printf("\n"); +} diff --git a/src/gallium/drivers/i915/i915_debug.h b/src/gallium/drivers/i915/i915_debug.h index 67b8d9c2f63..8aa09f9c1f2 100644 --- a/src/gallium/drivers/i915/i915_debug.h +++ b/src/gallium/drivers/i915/i915_debug.h @@ -26,89 +26,51 @@ **************************************************************************/ /* Authors: Keith Whitwell <[email protected]> + * Jakob Bornecrantz <[email protected]> */ #ifndef I915_DEBUG_H #define I915_DEBUG_H -#include <stdarg.h> +#include "util/u_debug.h" +struct i915_screen; struct i915_context; +struct i915_winsys_batchbuffer; -struct debug_stream -{ - unsigned offset; /* current gtt offset */ - char *ptr; /* pointer to gtt offset zero */ - char *end; /* pointer to gtt offset zero */ - unsigned print_addresses; -}; - - -/* Internal functions - */ -void i915_disassemble_program(struct debug_stream *stream, - const unsigned *program, unsigned sz); - -void i915_print_ureg(const char *msg, unsigned ureg); - - -#define DEBUG_BATCH 0x1 -#define DEBUG_BLIT 0x2 -#define DEBUG_BUFFER 0x4 -#define DEBUG_CONSTANTS 0x8 -#define DEBUG_CONTEXT 0x10 -#define DEBUG_DRAW 0x20 -#define DEBUG_DYNAMIC 0x40 -#define DEBUG_FLUSH 0x80 -#define DEBUG_MAP 0x100 -#define DEBUG_PROGRAM 0x200 -#define DEBUG_REGIONS 0x400 -#define DEBUG_SAMPLER 0x800 -#define DEBUG_STATIC 0x1000 -#define DEBUG_SURFACE 0x2000 -#define DEBUG_WINSYS 0x4000 - -#include "pipe/p_compiler.h" +#define DBG_BLIT 0x1 +#define DBG_EMIT 0x2 +#define DBG_ATOMS 0x4 +#define DBG_FLUSH 0x8 +#define DBG_TEXTURE 0x10 +#define DBG_CONSTANTS 0x20 -#if defined(DEBUG) && defined(FILE_DEBUG_FLAG) +extern unsigned i915_debug; -#include "util/u_simple_screen.h" +static INLINE boolean +I915_DBG_ON(unsigned flags) +{ + return i915_debug & flags; +} static INLINE void -I915_DBG( - struct i915_context *i915, - const char *fmt, - ... ) +I915_DBG(unsigned flags, const char *fmt, ...) { - if ((i915)->debug & FILE_DEBUG_FLAG) { + if (I915_DBG_ON(flags)) { va_list args; - va_start( args, fmt ); - debug_vprintf( fmt, args ); - va_end( args ); + va_start(args, fmt); + debug_vprintf(fmt, args); + va_end(args); } } -#else - -static INLINE void -I915_DBG( - struct i915_context *i915, - const char *fmt, - ... ) -{ - (void) i915; - (void) fmt; -} - -#endif - - -struct i915_winsys_batchbuffer; +void i915_debug_init(struct i915_screen *i915); -void i915_dump_batchbuffer( struct i915_winsys_batchbuffer *i915 ); +void i915_dump_batchbuffer(struct i915_winsys_batchbuffer *i915); -void i915_debug_init( struct i915_context *i915 ); +void i915_dump_dirty(struct i915_context *i915, const char *func); +void i915_dump_hardware_dirty(struct i915_context *i915, const char *func); #endif diff --git a/src/gallium/drivers/i915/i915_debug_fp.c b/src/gallium/drivers/i915/i915_debug_fp.c index f41c51f2991..50f49c540fe 100644 --- a/src/gallium/drivers/i915/i915_debug_fp.c +++ b/src/gallium/drivers/i915/i915_debug_fp.c @@ -28,6 +28,7 @@ #include "i915_reg.h" #include "i915_debug.h" +#include "i915_debug_private.h" #include "util/u_debug.h" diff --git a/src/gallium/drivers/i915/i915_debug_private.h b/src/gallium/drivers/i915/i915_debug_private.h new file mode 100644 index 00000000000..b3668d08482 --- /dev/null +++ b/src/gallium/drivers/i915/i915_debug_private.h @@ -0,0 +1,45 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <[email protected]> + */ + +#ifndef I915_DEBUG_PRIVATE_H +#define I915_DEBUG_PRIVATE_H + +struct debug_stream +{ + unsigned offset; /* current gtt offset */ + char *ptr; /* pointer to gtt offset zero */ + char *end; /* pointer to gtt offset zero */ + unsigned print_addresses; +}; + +void i915_disassemble_program(struct debug_stream *stream, + const unsigned *program, unsigned sz); + +#endif diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c index 1582168eba5..967146479d1 100644 --- a/src/gallium/drivers/i915/i915_flush.c +++ b/src/gallium/drivers/i915/i915_flush.c @@ -35,6 +35,7 @@ #include "i915_context.h" #include "i915_reg.h" #include "i915_batch.h" +#include "i915_debug.h" static void i915_flush( struct pipe_context *pipe, @@ -76,9 +77,9 @@ static void i915_flush( struct pipe_context *pipe, */ FLUSH_BATCH(fence); i915->vbo_flushed = 1; -} - + I915_DBG(DBG_FLUSH, "%s: #####\n", __FUNCTION__); +} void i915_init_flush_functions( struct i915_context *i915 ) { diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c index f8665acbe18..bd046bd9058 100644 --- a/src/gallium/drivers/i915/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915/i915_prim_vbuf.c @@ -52,8 +52,7 @@ #include "i915_state.h" -#undef VBUF_USE_FIFO -#undef VBUF_MAP_BUFFER +#define VBUF_MAP_BUFFER /** * Primitive renderer for i915. @@ -79,23 +78,18 @@ struct i915_vbuf_render { struct i915_winsys_buffer *vbo; size_t vbo_size; /**< current size of allocated buffer */ size_t vbo_alloc_size; /**< minimum buffer size to allocate */ - size_t vbo_offset; + size_t vbo_hw_offset; /**< offset that we program the hardware with */ + size_t vbo_sw_offset; /**< offset that we work with */ + size_t vbo_index; /**< index offset to be added to all indices */ void *vbo_ptr; size_t vbo_max_used; + size_t vbo_max_index; /**< index offset to be added to all indices */ #ifndef VBUF_MAP_BUFFER size_t map_used_start; size_t map_used_end; size_t map_size; #endif - -#ifdef VBUF_USE_FIFO - /* Stuff for the pool */ - struct util_fifo *pool_fifo; - unsigned pool_used; - unsigned pool_buffer_size; - boolean pool_not_used; -#endif }; @@ -109,6 +103,35 @@ i915_vbuf_render(struct vbuf_render *render) return (struct i915_vbuf_render *)render; } +/** + * If vbo state differs between renderer and context + * push state to the context. This function pushes + * hw_offset to i915->vbo_offset and vbo to i915->vbo. + * + * Side effects: + * May updates context vbo_offset and vbo fields. + */ +static void +i915_vbuf_update_vbo_state(struct vbuf_render *render) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + + if (i915->vbo != i915_render->vbo || + i915->vbo_offset != i915_render->vbo_hw_offset) { + i915->vbo = i915_render->vbo; + i915->vbo_offset = i915_render->vbo_hw_offset; + i915->dirty |= I915_NEW_VBO; + } +} + +/** + * Callback exported to the draw module. + * Returns the current vertex_info. + * + * Side effects: + * If state is dirty update derived state. + */ static const struct vertex_info * i915_vbuf_render_get_vertex_info(struct vbuf_render *render) { @@ -123,12 +146,18 @@ i915_vbuf_render_get_vertex_info(struct vbuf_render *render) return &i915->current.vertex_info; } +/** + * Reserve space in the vbo for vertices. + * + * Side effects: + * None. + */ static boolean i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size) { struct i915_context *i915 = i915_render->i915; - if (i915_render->vbo_size < size + i915_render->vbo_offset) + if (i915_render->vbo_size < size + i915_render->vbo_sw_offset) return FALSE; if (i915->vbo_flushed) @@ -137,28 +166,28 @@ i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size) return TRUE; } +/** + * Allocate a new vbo buffer should there not be enough space for + * the requested number of vertices by the draw module. + * + * Side effects: + * Updates hw_offset, sw_offset, index and allocates a new buffer. + */ static void i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) { struct i915_context *i915 = i915_render->i915; struct i915_winsys *iws = i915->iws; - if (i915_render->vbo) { -#ifdef VBUF_USE_FIFO - if (i915_render->pool_not_used) - iws->buffer_destroy(iws, i915_render->vbo); - else - u_fifo_add(i915_render->pool_fifo, i915_render->vbo); - i915_render->vbo = NULL; -#else + if (i915_render->vbo) iws->buffer_destroy(iws, i915_render->vbo); -#endif - } i915->vbo_flushed = 0; i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size); - i915_render->vbo_offset = 0; + i915_render->vbo_hw_offset = 0; + i915_render->vbo_sw_offset = 0; + i915_render->vbo_index = 0; #ifndef VBUF_MAP_BUFFER if (i915_render->vbo_size > i915_render->map_size) { @@ -168,52 +197,51 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) } #endif -#ifdef VBUF_USE_FIFO - if (i915_render->vbo_size != i915_render->pool_buffer_size) { - i915_render->pool_not_used = TRUE; - i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, - I915_NEW_VERTEX); - } else { - i915_render->pool_not_used = FALSE; - - if (i915_render->pool_used >= 2) { - FLUSH_BATCH(NULL); - i915->vbo_flushed = 0; - i915_render->pool_used = 0; - } - u_fifo_pop(i915_render->pool_fifo, (void**)&i915_render->vbo); - } -#else i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, I915_NEW_VERTEX); -#endif } +/** + * Callback exported to the draw module. + * + * Side effects: + * Updates hw_offset, sw_offset, index and may allocate + * a new buffer. Also updates may update the vbo state + * on the i915 context. + */ static boolean i915_vbuf_render_allocate_vertices(struct vbuf_render *render, ushort vertex_size, ushort nr_vertices) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); - struct i915_context *i915 = i915_render->i915; size_t size = (size_t)vertex_size * (size_t)nr_vertices; + size_t offset; - /* FIXME: handle failure */ - assert(!i915->vbo); + /* + * Align sw_offset with first multiple of vertex size from hw_offset. + * Set index to be the multiples from from hw_offset to sw_offset. + * i915_vbuf_render_new_buf will reset index, sw_offset, hw_offset + * when it allocates a new buffer this is correct. + */ + { + offset = i915_render->vbo_sw_offset - i915_render->vbo_hw_offset; + offset = util_align_npot(offset, vertex_size); + i915_render->vbo_sw_offset = i915_render->vbo_hw_offset + offset; + i915_render->vbo_index = offset / vertex_size; + } - if (!i915_vbuf_render_reserve(i915_render, size)) { -#ifdef VBUF_USE_FIFO - /* incase we flushed reset the number of pool buffers used */ - if (i915->vbo_flushed) - i915_render->pool_used = 0; -#endif + if (!i915_vbuf_render_reserve(i915_render, size)) i915_vbuf_render_new_buf(i915_render, size); - } + + /* + * If a new buffer has been alocated sw_offset, + * hw_offset & index will be reset by new_buf + */ i915_render->vertex_size = vertex_size; - i915->vbo = i915_render->vbo; - i915->vbo_offset = i915_render->vbo_offset; - i915->dirty |= I915_NEW_VBO; + + i915_vbuf_update_vbo_state(render); if (!i915_render->vbo) return FALSE; @@ -232,7 +260,7 @@ i915_vbuf_render_map_vertices(struct vbuf_render *render) #ifdef VBUF_MAP_BUFFER i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE); - return (unsigned char *)i915_render->vbo_ptr + i915_render->vbo_offset; + return (unsigned char *)i915_render->vbo_ptr + i915_render->vbo_sw_offset; #else (void)iws; return (unsigned char *)i915_render->vbo_ptr; @@ -248,6 +276,7 @@ i915_vbuf_render_unmap_vertices(struct vbuf_render *render, struct i915_context *i915 = i915_render->i915; struct i915_winsys *iws = i915->iws; + i915_render->vbo_max_index = max_index; i915_render->vbo_max_used = MAX2(i915_render->vbo_max_used, i915_render->vertex_size * (max_index + 1)); #ifdef VBUF_MAP_BUFFER iws->buffer_unmap(iws, i915_render->vbo); @@ -255,13 +284,36 @@ i915_vbuf_render_unmap_vertices(struct vbuf_render *render, i915_render->map_used_start = i915_render->vertex_size * min_index; i915_render->map_used_end = i915_render->vertex_size * (max_index + 1); iws->buffer_write(iws, i915_render->vbo, - i915_render->map_used_start + i915_render->vbo_offset, + i915_render->map_used_start + i915_render->vbo_sw_offset, i915_render->map_used_end - i915_render->map_used_start, (unsigned char *)i915_render->vbo_ptr + i915_render->map_used_start); #endif } +/** + * Ensure that the given max_index given is not larger ushort max. + * If it is larger then ushort max it advanced the hw_offset to the + * same position in the vbo as sw_offset and set index to zero. + * + * Side effects: + * On failure update hw_offset and index. + */ +static void +i915_vbuf_ensure_index_bounds(struct vbuf_render *render, + unsigned max_index) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + + if (max_index + i915_render->vbo_index < ((1 << 17) - 1)) + return; + + i915_render->vbo_hw_offset = i915_render->vbo_sw_offset; + i915_render->vbo_index = 0; + + i915_vbuf_update_vbo_state(render); +} + static boolean i915_vbuf_render_set_primitive(struct vbuf_render *render, unsigned prim) @@ -327,7 +379,9 @@ draw_arrays_generate_indices(struct vbuf_render *render, struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; unsigned i; - unsigned end = start + nr; + unsigned end = start + nr + i915_render->vbo_index; + start += i915_render->vbo_index; + switch(type) { case 0: for (i = start; i+1 < end; i += 2) @@ -391,16 +445,18 @@ draw_arrays_fallback(struct vbuf_render *render, struct i915_context *i915 = i915_render->i915; unsigned nr_indices; + nr_indices = draw_arrays_calc_nr_indices(nr, i915_render->fallback); + if (!nr_indices) + return; + + i915_vbuf_ensure_index_bounds(render, start + nr_indices); + if (i915->dirty) i915_update_derived(i915); if (i915->hardware_dirty) i915_emit_hardware_state(i915); - nr_indices = draw_arrays_calc_nr_indices(nr, i915_render->fallback); - if (!nr_indices) - return; - if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { FLUSH_BATCH(NULL); @@ -415,6 +471,7 @@ draw_arrays_fallback(struct vbuf_render *render, goto out; } } + OUT_BATCH(_3DPRIMITIVE | PRIM_INDIRECT | i915_render->hwprim | @@ -440,6 +497,9 @@ i915_vbuf_render_draw_arrays(struct vbuf_render *render, return; } + i915_vbuf_ensure_index_bounds(render, start + nr); + start += i915_render->vbo_index; + if (i915->dirty) i915_update_derived(i915); @@ -485,35 +545,36 @@ draw_generate_indices(struct vbuf_render *render, struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; unsigned i; + unsigned o = i915_render->vbo_index; switch(type) { case 0: for (i = 0; i + 1 < nr_indices; i += 2) { - OUT_BATCH(indices[i] | indices[i+1] << 16); + OUT_BATCH((o+indices[i]) | (o+indices[i+1]) << 16); } if (i < nr_indices) { - OUT_BATCH(indices[i]); + OUT_BATCH((o+indices[i])); } break; case PIPE_PRIM_LINE_LOOP: if (nr_indices >= 2) { for (i = 1; i < nr_indices; i++) - OUT_BATCH(indices[i-1] | indices[i] << 16); - OUT_BATCH(indices[i-1] | indices[0] << 16); + OUT_BATCH((o+indices[i-1]) | (o+indices[i]) << 16); + OUT_BATCH((o+indices[i-1]) | (o+indices[0]) << 16); } break; case PIPE_PRIM_QUADS: for (i = 0; i + 3 < nr_indices; i += 4) { - OUT_BATCH(indices[i+0] | indices[i+1] << 16); - OUT_BATCH(indices[i+3] | indices[i+1] << 16); - OUT_BATCH(indices[i+2] | indices[i+3] << 16); + OUT_BATCH((o+indices[i+0]) | (o+indices[i+1]) << 16); + OUT_BATCH((o+indices[i+3]) | (o+indices[i+1]) << 16); + OUT_BATCH((o+indices[i+2]) | (o+indices[i+3]) << 16); } break; case PIPE_PRIM_QUAD_STRIP: for (i = 0; i + 3 < nr_indices; i += 2) { - OUT_BATCH(indices[i+0] | indices[i+1] << 16); - OUT_BATCH(indices[i+3] | indices[i+2] << 16); - OUT_BATCH(indices[i+0] | indices[i+3] << 16); + OUT_BATCH((o+indices[i+0]) | (o+indices[i+1]) << 16); + OUT_BATCH((o+indices[i+3]) | (o+indices[i+2]) << 16); + OUT_BATCH((o+indices[i+0]) | (o+indices[i+3]) << 16); } break; default: @@ -558,6 +619,8 @@ i915_vbuf_render_draw_elements(struct vbuf_render *render, if (!nr_indices) return; + i915_vbuf_ensure_index_bounds(render, i915_render->vbo_max_index); + if (i915->dirty) i915_update_derived(i915); @@ -597,14 +660,15 @@ static void i915_vbuf_render_release_vertices(struct vbuf_render *render) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); - struct i915_context *i915 = i915_render->i915; - - assert(i915->vbo); - i915_render->vbo_offset += i915_render->vbo_max_used; + i915_render->vbo_sw_offset += i915_render->vbo_max_used; i915_render->vbo_max_used = 0; - i915->vbo = NULL; - i915->dirty |= I915_NEW_VBO; + + /* + * Micro optimization, by calling update here we the offset change + * will be picked up on the next pipe_context::draw_*. + */ + i915_vbuf_update_vbo_state(render); } static void @@ -652,7 +716,8 @@ i915_vbuf_render_create(struct i915_context *i915) i915_render->vbo = NULL; i915_render->vbo_ptr = NULL; i915_render->vbo_size = 0; - i915_render->vbo_offset = 0; + i915_render->vbo_hw_offset = 0; + i915_render->vbo_sw_offset = 0; i915_render->vbo_alloc_size = i915_render->base.max_vertex_buffer_bytes * 4; #ifdef VBUF_USE_POOL diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index f82426520cd..255538ebaa4 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -31,6 +31,7 @@ #include "util/u_string.h" #include "i915_reg.h" +#include "i915_debug.h" #include "i915_context.h" #include "i915_screen.h" #include "i915_surface.h" @@ -330,5 +331,7 @@ i915_screen_create(struct i915_winsys *iws) i915_init_screen_resource_functions(is); i915_init_screen_surface_functions(is); + i915_debug_init(is); + return &is->base; } diff --git a/src/gallium/drivers/i915/i915_state.h b/src/gallium/drivers/i915/i915_state.h index 86c6b0027d5..7795046f06d 100644 --- a/src/gallium/drivers/i915/i915_state.h +++ b/src/gallium/drivers/i915/i915_state.h @@ -35,16 +35,21 @@ struct i915_context; struct i915_tracked_state { + const char *name; + void (*update)(struct i915_context *); unsigned dirty; - void (*update)( struct i915_context * ); }; -void i915_update_immediate( struct i915_context *i915 ); -void i915_update_dynamic( struct i915_context *i915 ); -void i915_update_derived( struct i915_context *i915 ); -void i915_update_samplers( struct i915_context *i915 ); -void i915_update_textures(struct i915_context *i915); +extern struct i915_tracked_state i915_update_vertex_layout; -void i915_emit_hardware_state( struct i915_context *i915 ); +extern struct i915_tracked_state i915_hw_samplers; +extern struct i915_tracked_state i915_hw_sampler_views; +extern struct i915_tracked_state i915_hw_immediate; +extern struct i915_tracked_state i915_hw_dynamic; +extern struct i915_tracked_state i915_hw_fs; +extern struct i915_tracked_state i915_hw_framebuffer; + +void i915_update_derived(struct i915_context *i915); +void i915_emit_hardware_state(struct i915_context *i915); #endif diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c index 4da46772b5d..c0595403571 100644 --- a/src/gallium/drivers/i915/i915_state_derived.c +++ b/src/gallium/drivers/i915/i915_state_derived.c @@ -32,15 +32,16 @@ #include "draw/draw_vertex.h" #include "i915_context.h" #include "i915_state.h" +#include "i915_debug.h" #include "i915_reg.h" -/** +/*********************************************************************** * Determine the hardware vertex layout. * Depends on vertex/fragment shader state. */ -static void calculate_vertex_layout( struct i915_context *i915 ) +static void calculate_vertex_layout(struct i915_context *i915) { const struct i915_fragment_shader *fs = i915->fs; const enum interp_mode colorInterp = i915->rasterizer->color_interp; @@ -146,37 +147,71 @@ static void calculate_vertex_layout( struct i915_context *i915 ) } } +struct i915_tracked_state i915_update_vertex_layout = { + "vertex_layout", + calculate_vertex_layout, + I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS +}; -/* Hopefully this will remain quite simple, otherwise need to pull in - * something like the state tracker mechanism. +/*********************************************************************** + * Update fragment state */ -void i915_update_derived( struct i915_context *i915 ) +static void update_fs(struct i915_context *i915) { - if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS)) - calculate_vertex_layout( i915 ); - - if (i915->dirty & (I915_NEW_SAMPLER | I915_NEW_SAMPLER_VIEW)) - i915_update_samplers(i915); - - if (i915->dirty & I915_NEW_SAMPLER_VIEW) - i915_update_textures(i915); + i915->hardware_dirty |= I915_HW_PROGRAM; /* XXX right? */ +} - if (i915->dirty) - i915_update_immediate( i915 ); +struct i915_tracked_state i915_hw_fs = { + "fs", + update_fs, + I915_NEW_FS +}; - if (i915->dirty) - i915_update_dynamic( i915 ); - if (i915->dirty & I915_NEW_FS) { - i915->hardware_dirty |= I915_HW_PROGRAM; /* XXX right? */ - } +/*********************************************************************** + * Update framebuffer state + */ +static void update_framebuffer(struct i915_context *i915) +{ /* HW emit currently references framebuffer state directly: */ - if (i915->dirty & I915_NEW_FRAMEBUFFER) - i915->hardware_dirty |= I915_HW_STATIC; + i915->hardware_dirty |= I915_HW_STATIC; +} + +struct i915_tracked_state i915_hw_framebuffer = { + "framebuffer", + update_framebuffer, + I915_NEW_FRAMEBUFFER +}; + + + +/*********************************************************************** + */ +static struct i915_tracked_state *atoms[] = { + &i915_update_vertex_layout, + &i915_hw_samplers, + &i915_hw_sampler_views, + &i915_hw_immediate, + &i915_hw_dynamic, + &i915_hw_fs, + &i915_hw_framebuffer, + NULL, +}; + +void i915_update_derived(struct i915_context *i915) +{ + int i; + + if (I915_DBG_ON(DBG_ATOMS)) + i915_dump_dirty(i915, __FUNCTION__); + + for (i = 0; atoms[i]; i++) + if (atoms[i]->dirty & i915->dirty) + atoms[i]->update(i915); i915->dirty = 0; } diff --git a/src/gallium/drivers/i915/i915_state_dynamic.c b/src/gallium/drivers/i915/i915_state_dynamic.c index 9c6723b3916..d964483ac7e 100644 --- a/src/gallium/drivers/i915/i915_state_dynamic.c +++ b/src/gallium/drivers/i915/i915_state_dynamic.c @@ -1,8 +1,8 @@ /************************************************************************** - * + * * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,7 +22,7 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ #include "i915_batch.h" @@ -34,10 +34,9 @@ #include "util/u_memory.h" #include "util/u_pack_color.h" -#define FILE_DEBUG_FLAG DEBUG_STATE /* State that we have chosen to store in the DYNAMIC segment of the - * i915 indirect state mechanism. + * i915 indirect state mechanism. * * Can't cache these in the way we do the static state, as there is no * start/size in the command packet, instead an 'end' value that gets @@ -47,10 +46,10 @@ * (active) state every time a 4kb boundary is crossed. */ -static INLINE void set_dynamic_indirect( struct i915_context *i915, - unsigned offset, - const unsigned *src, - unsigned dwords ) +static INLINE void set_dynamic_indirect(struct i915_context *i915, + unsigned offset, + const unsigned *src, + unsigned dwords) { unsigned i; @@ -61,38 +60,41 @@ static INLINE void set_dynamic_indirect( struct i915_context *i915, } + /*********************************************************************** - * Modes4: stencil masks and logicop + * Modes4: stencil masks and logicop */ -static void upload_MODES4( struct i915_context *i915 ) +static void upload_MODES4(struct i915_context *i915) { unsigned modes4 = 0; - /* I915_NEW_STENCIL */ + /* I915_NEW_STENCIL + */ modes4 |= i915->depth_stencil->stencil_modes4; - /* I915_NEW_BLEND */ + + /* I915_NEW_BLEND + */ modes4 |= i915->blend->modes4; - /* Always, so that we know when state is in-active: + /* Always, so that we know when state is in-active: */ - set_dynamic_indirect( i915, - I915_DYNAMIC_MODES4, - &modes4, - 1 ); + set_dynamic_indirect(i915, + I915_DYNAMIC_MODES4, + &modes4, + 1); } const struct i915_tracked_state i915_upload_MODES4 = { - I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL, - upload_MODES4 + "MODES4", + upload_MODES4, + I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL }; - /*********************************************************************** */ - -static void upload_BFO( struct i915_context *i915 ) +static void upload_BFO(struct i915_context *i915) { unsigned bfo[2]; bfo[0] = i915->depth_stencil->bfo[0]; @@ -101,88 +103,89 @@ static void upload_BFO( struct i915_context *i915 ) if (bfo[0] & BFO_ENABLE_STENCIL_REF) { bfo[0] |= i915->stencil_ref.ref_value[1] << BFO_STENCIL_REF_SHIFT; } - set_dynamic_indirect( i915, - I915_DYNAMIC_BFO_0, - &(bfo[0]), - 2 ); + + set_dynamic_indirect(i915, + I915_DYNAMIC_BFO_0, + &(bfo[0]), + 2); } const struct i915_tracked_state i915_upload_BFO = { - I915_NEW_DEPTH_STENCIL, - upload_BFO + "BFO", + upload_BFO, + I915_NEW_DEPTH_STENCIL }; + /*********************************************************************** */ - - -static void upload_BLENDCOLOR( struct i915_context *i915 ) +static void upload_BLENDCOLOR(struct i915_context *i915) { unsigned bc[2]; - memset( bc, 0, sizeof(bc) ); + memset(bc, 0, sizeof(bc)); - /* I915_NEW_BLEND {_COLOR} + /* I915_NEW_BLEND */ { const float *color = i915->blend_color.color; bc[0] = _3DSTATE_CONST_BLEND_COLOR_CMD; - bc[1] = pack_ui32_float4( color[0], - color[1], - color[2], - color[3] ); + bc[1] = pack_ui32_float4(color[0], + color[1], + color[2], + color[3]); } - set_dynamic_indirect( i915, - I915_DYNAMIC_BC_0, - bc, - 2 ); + set_dynamic_indirect(i915, + I915_DYNAMIC_BC_0, + bc, + 2); } const struct i915_tracked_state i915_upload_BLENDCOLOR = { - I915_NEW_BLEND, - upload_BLENDCOLOR + "BLENDCOLOR", + upload_BLENDCOLOR, + I915_NEW_BLEND }; -/*********************************************************************** - */ -static void upload_IAB( struct i915_context *i915 ) +/*********************************************************************** + */ +static void upload_IAB(struct i915_context *i915) { unsigned iab = i915->blend->iab; - - set_dynamic_indirect( i915, - I915_DYNAMIC_IAB, - &iab, - 1 ); + set_dynamic_indirect(i915, + I915_DYNAMIC_IAB, + &iab, + 1); } const struct i915_tracked_state i915_upload_IAB = { - I915_NEW_BLEND, - upload_IAB + "IAB", + upload_IAB, + I915_NEW_BLEND }; + /*********************************************************************** */ - - - -static void upload_DEPTHSCALE( struct i915_context *i915 ) +static void upload_DEPTHSCALE(struct i915_context *i915) { - set_dynamic_indirect( i915, - I915_DYNAMIC_DEPTHSCALE_0, - &(i915->rasterizer->ds[0].u), - 2 ); + set_dynamic_indirect(i915, + I915_DYNAMIC_DEPTHSCALE_0, + &(i915->rasterizer->ds[0].u), + 2); } const struct i915_tracked_state i915_upload_DEPTHSCALE = { - I915_NEW_RASTERIZER, - upload_DEPTHSCALE + "DEPTHSCALE", + upload_DEPTHSCALE, + I915_NEW_RASTERIZER }; @@ -196,10 +199,9 @@ const struct i915_tracked_state i915_upload_DEPTHSCALE = { * XXX: does stipple pattern need to be adjusted according to * the window position? * - * XXX: possibly need workaround for conform paths test. + * XXX: possibly need workaround for conform paths test. */ - -static void upload_STIPPLE( struct i915_context *i915 ) +static void upload_STIPPLE(struct i915_context *i915) { unsigned st[2]; @@ -210,7 +212,6 @@ static void upload_STIPPLE( struct i915_context *i915 ) */ st[1] |= i915->rasterizer->st; - /* I915_NEW_STIPPLE */ { @@ -225,73 +226,75 @@ static void upload_STIPPLE( struct i915_context *i915 ) /* Not sure what to do about fallbacks, so for now just dont: */ st[1] |= ((p[0] << 0) | - (p[1] << 4) | - (p[2] << 8) | - (p[3] << 12)); + (p[1] << 4) | + (p[2] << 8) | + (p[3] << 12)); } - - set_dynamic_indirect( i915, - I915_DYNAMIC_STP_0, - &st[0], - 2 ); + set_dynamic_indirect(i915, + I915_DYNAMIC_STP_0, + &st[0], + 2); } - const struct i915_tracked_state i915_upload_STIPPLE = { - I915_NEW_RASTERIZER | I915_NEW_STIPPLE, - upload_STIPPLE + "STIPPLE", + upload_STIPPLE, + I915_NEW_RASTERIZER | I915_NEW_STIPPLE }; /*********************************************************************** - * Scissor. + * Scissor enable */ static void upload_SCISSOR_ENABLE( struct i915_context *i915 ) { - set_dynamic_indirect( i915, - I915_DYNAMIC_SC_ENA_0, - &(i915->rasterizer->sc[0]), - 1 ); + set_dynamic_indirect(i915, + I915_DYNAMIC_SC_ENA_0, + &(i915->rasterizer->sc[0]), + 1); } const struct i915_tracked_state i915_upload_SCISSOR_ENABLE = { - I915_NEW_RASTERIZER, - upload_SCISSOR_ENABLE + "SCISSOR ENABLE", + upload_SCISSOR_ENABLE, + I915_NEW_RASTERIZER }; -static void upload_SCISSOR_RECT( struct i915_context *i915 ) +/*********************************************************************** + * Scissor rect + */ +static void upload_SCISSOR_RECT(struct i915_context *i915) { unsigned x1 = i915->scissor.minx; unsigned y1 = i915->scissor.miny; unsigned x2 = i915->scissor.maxx; unsigned y2 = i915->scissor.maxy; unsigned sc[3]; - + sc[0] = _3DSTATE_SCISSOR_RECT_0_CMD; sc[1] = (y1 << 16) | (x1 & 0xffff); sc[2] = (y2 << 16) | (x2 & 0xffff); - set_dynamic_indirect( i915, - I915_DYNAMIC_SC_RECT_0, - &sc[0], - 3 ); + set_dynamic_indirect(i915, + I915_DYNAMIC_SC_RECT_0, + &sc[0], + 3); } - const struct i915_tracked_state i915_upload_SCISSOR_RECT = { - I915_NEW_SCISSOR, - upload_SCISSOR_RECT + "SCISSOR RECT", + upload_SCISSOR_RECT, + I915_NEW_SCISSOR }; - - - +/*********************************************************************** + */ static const struct i915_tracked_state *atoms[] = { &i915_upload_MODES4, &i915_upload_BFO, @@ -306,12 +309,17 @@ static const struct i915_tracked_state *atoms[] = { /* These will be dynamic indirect state commands, but for now just end * up on the batch buffer with everything else. */ -void i915_update_dynamic( struct i915_context *i915 ) +static void update_dynamic(struct i915_context *i915) { int i; for (i = 0; i < Elements(atoms); i++) if (i915->dirty & atoms[i]->dirty) - atoms[i]->update( i915 ); + atoms[i]->update(i915); } +struct i915_tracked_state i915_hw_dynamic = { + "dynamic", + update_dynamic, + ~0 /* all state atoms, becuase we do internal checking */ +}; diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 22082fece82..bbf9ff51f5f 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -29,6 +29,7 @@ #include "i915_reg.h" #include "i915_context.h" #include "i915_batch.h" +#include "i915_debug.h" #include "i915_reg.h" #include "i915_resource.h" @@ -111,15 +112,20 @@ i915_emit_hardware_state(struct i915_context *i915 ) 3 ) * 3/2; /* plus 50% margin */ -#if 0 - debug_printf("i915_emit_hardware_state: %d dwords, %d relocs\n", dwords, relocs); -#endif - + uintptr_t save_ptr; + size_t save_relocs; + + if (I915_DBG_ON(DBG_ATOMS)) + i915_dump_hardware_dirty(i915, __FUNCTION__); + if(!BEGIN_BATCH(dwords, relocs)) { FLUSH_BATCH(NULL); assert(BEGIN_BATCH(dwords, relocs)); } + save_ptr = (uintptr_t)i915->batch->ptr; + save_relocs = i915->batch->relocs; + /* 14 dwords, 0 relocs */ if (i915->hardware_dirty & I915_HW_INVARIENT) { @@ -399,6 +405,9 @@ i915_emit_hardware_state(struct i915_context *i915 ) OUT_BATCH(0); } + I915_DBG(DBG_EMIT, "%s: used %d dwords, %d relocs\n", __FUNCTION__, + ((uintptr_t)i915->batch->ptr - save_ptr) / 4, + i915->batch->relocs - save_relocs); i915->hardware_dirty = 0; } diff --git a/src/gallium/drivers/i915/i915_state_immediate.c b/src/gallium/drivers/i915/i915_state_immediate.c index 8cec699285c..f9ade7077f2 100644 --- a/src/gallium/drivers/i915/i915_state_immediate.c +++ b/src/gallium/drivers/i915/i915_state_immediate.c @@ -1,8 +1,8 @@ /************************************************************************** - * + * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,13 +22,13 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ /* * Authors: * Keith Whitwell <[email protected]> */ - + #include "i915_state_inlines.h" #include "i915_context.h" #include "i915_state.h" @@ -46,30 +46,31 @@ /*********************************************************************** - * S0,S1: Vertex buffer state. + * S0,S1: Vertex buffer state. */ static void upload_S0S1(struct i915_context *i915) { unsigned LIS0, LIS1; - /* I915_NEW_VBO */ - /* TODO: re-use vertex buffers here? */ + /* I915_NEW_VBO + */ LIS0 = i915->vbo_offset; - /* I915_NEW_VERTEX_SIZE -- do this where the vertex size is calculated! + /* I915_NEW_VERTEX_SIZE */ + /* XXX do this where the vertex size is calculated! */ { unsigned vertex_size = i915->current.vertex_info.size; LIS1 = ((vertex_size << 24) | - (vertex_size << 16)); + (vertex_size << 16)); } - /* I915_NEW_VBO */ - /* TODO: use a vertex generation number to track vbo changes */ + /* I915_NEW_VBO + */ if (1 || i915->current.immediate[I915_IMMEDIATE_S0] != LIS0 || - i915->current.immediate[I915_IMMEDIATE_S1] != LIS1) + i915->current.immediate[I915_IMMEDIATE_S1] != LIS1) { i915->current.immediate[I915_IMMEDIATE_S0] = LIS0; i915->current.immediate[I915_IMMEDIATE_S1] = LIS1; @@ -78,13 +79,13 @@ static void upload_S0S1(struct i915_context *i915) } const struct i915_tracked_state i915_upload_S0S1 = { - I915_NEW_VBO | I915_NEW_VERTEX_FORMAT, - upload_S0S1 + "imm S0 S1", + upload_S0S1, + I915_NEW_VBO | I915_NEW_VERTEX_FORMAT }; - /*********************************************************************** * S4: Vertex format, rasterization state */ @@ -92,7 +93,8 @@ static void upload_S2S4(struct i915_context *i915) { unsigned LIS2, LIS4; - /* I915_NEW_VERTEX_FORMAT */ + /* I915_NEW_VERTEX_FORMAT + */ { LIS2 = i915->current.vertex_info.hwfmt[1]; LIS4 = i915->current.vertex_info.hwfmt[0]; @@ -113,35 +115,38 @@ static void upload_S2S4(struct i915_context *i915) } } - const struct i915_tracked_state i915_upload_S2S4 = { - I915_NEW_RASTERIZER | I915_NEW_VERTEX_FORMAT, - upload_S2S4 + "imm S2 S4", + upload_S2S4, + I915_NEW_RASTERIZER | I915_NEW_VERTEX_FORMAT }; /*********************************************************************** - * */ -static void upload_S5( struct i915_context *i915 ) +static void upload_S5(struct i915_context *i915) { unsigned LIS5 = 0; + /* I915_NEW_DEPTH_STENCIL + */ LIS5 |= i915->depth_stencil->stencil_LIS5; /* hope it's safe to set stencil ref value even if stencil test is disabled? */ LIS5 |= i915->stencil_ref.ref_value[0] << S5_STENCIL_REF_SHIFT; + /* I915_NEW_BLEND + */ LIS5 |= i915->blend->LIS5; #if 0 - /* I915_NEW_RASTERIZER */ + /* I915_NEW_RASTERIZER + */ if (i915->state.Polygon->OffsetFill) { LIS5 |= S5_GLOBAL_DEPTH_OFFSET_ENABLE; } #endif - if (LIS5 != i915->current.immediate[I915_IMMEDIATE_S5]) { i915->current.immediate[I915_IMMEDIATE_S5] = LIS5; i915->hardware_dirty |= I915_HW_IMMEDIATE; @@ -149,14 +154,16 @@ static void upload_S5( struct i915_context *i915 ) } const struct i915_tracked_state i915_upload_S5 = { - (I915_NEW_DEPTH_STENCIL | I915_NEW_BLEND | I915_NEW_RASTERIZER), - upload_S5 + "imm S5", + upload_S5, + I915_NEW_DEPTH_STENCIL | I915_NEW_BLEND | I915_NEW_RASTERIZER }; + /*********************************************************************** */ -static void upload_S6( struct i915_context *i915 ) +static void upload_S6(struct i915_context *i915) { unsigned LIS6 = (2 << S6_TRISTRIP_PV_SHIFT); @@ -180,14 +187,16 @@ static void upload_S6( struct i915_context *i915 ) } const struct i915_tracked_state i915_upload_S6 = { - I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL | I915_NEW_FRAMEBUFFER, - upload_S6 + "imm s6", + upload_S6, + I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL | I915_NEW_FRAMEBUFFER }; + /*********************************************************************** */ -static void upload_S7( struct i915_context *i915 ) +static void upload_S7(struct i915_context *i915) { unsigned LIS7; @@ -202,11 +211,15 @@ static void upload_S7( struct i915_context *i915 ) } const struct i915_tracked_state i915_upload_S7 = { - I915_NEW_RASTERIZER, - upload_S7 + "imm S7", + upload_S7, + I915_NEW_RASTERIZER }; + +/*********************************************************************** + */ static const struct i915_tracked_state *atoms[] = { &i915_upload_S0S1, &i915_upload_S2S4, @@ -215,13 +228,17 @@ static const struct i915_tracked_state *atoms[] = { &i915_upload_S7 }; -/* - */ -void i915_update_immediate( struct i915_context *i915 ) +static void update_immediate(struct i915_context *i915) { int i; for (i = 0; i < Elements(atoms); i++) if (i915->dirty & atoms[i]->dirty) - atoms[i]->update( i915 ); + atoms[i]->update(i915); } + +struct i915_tracked_state i915_hw_immediate = { + "immediate", + update_immediate, + ~0 /* all state atoms, becuase we do internal checking */ +}; diff --git a/src/gallium/drivers/i915/i915_state_sampler.c b/src/gallium/drivers/i915/i915_state_sampler.c index 77b9bccbb74..941259eb766 100644 --- a/src/gallium/drivers/i915/i915_state_sampler.c +++ b/src/gallium/drivers/i915/i915_state_sampler.c @@ -53,17 +53,23 @@ * * So we need to update the map state when we change samplers and * we need to be change the sampler state when map state is changed. - * The first part is done by calling i915_update_texture in - * i915_update_samplers and the second part is done else where in - * code tracking the state changes. + * The first part is done by calling update_texture in update_samplers + * and the second part is done else where in code tracking the state + * changes. + */ + +static void update_texture(struct i915_context *i915, + uint unit, + const struct i915_texture *tex, + const struct i915_sampler_state *sampler, + uint state[6]); + + + +/*********************************************************************** + * Samplers */ -static void -i915_update_texture(struct i915_context *i915, - uint unit, - const struct i915_texture *tex, - const struct i915_sampler_state *sampler, - uint state[6]); /** * Compute i915 texture sampling state. * @@ -74,16 +80,13 @@ i915_update_texture(struct i915_context *i915, */ static void update_sampler(struct i915_context *i915, uint unit, - const struct i915_sampler_state *sampler, - const struct i915_texture *tex, - unsigned state[3] ) + const struct i915_sampler_state *sampler, + const struct i915_texture *tex, + unsigned state[3]) { const struct pipe_resource *pt = &tex->b.b; unsigned minlod, lastlod; - /* Need to do this after updating the maps, which call the - * intel_finalize_mipmap_tree and hence can update firstLevel: - */ state[0] = sampler->state[0]; state[1] = sampler->state[1]; state[2] = sampler->state[2]; @@ -118,7 +121,7 @@ static void update_sampler(struct i915_context *i915, wr == PIPE_TEX_WRAP_CLAMP_TO_BORDER)) { if (i915->conformance_mode > 0) { assert(0); - /* sampler->fallback = true; */ + /* sampler->fallback = true; */ /* TODO */ } } @@ -137,8 +140,7 @@ static void update_sampler(struct i915_context *i915, state[1] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT); } - -void i915_update_samplers( struct i915_context *i915 ) +static void update_samplers(struct i915_context *i915) { uint unit; @@ -152,29 +154,38 @@ void i915_update_samplers( struct i915_context *i915 ) if (i915->fragment_sampler_views[unit]) { struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture); - update_sampler( i915, - unit, - i915->sampler[unit], /* sampler state */ - texture, /* texture */ - i915->current.sampler[unit] /* the result */ - ); - i915_update_texture( i915, - unit, - texture, /* texture */ - i915->sampler[unit], /* sampler state */ - i915->current.texbuffer[unit] ); - - i915->current.sampler_enable_nr++; - i915->current.sampler_enable_flags |= (1 << unit); + update_sampler(i915, + unit, + i915->sampler[unit], /* sampler state */ + texture, /* texture */ + i915->current.sampler[unit]); /* the result */ + update_texture(i915, + unit, + texture, /* texture */ + i915->sampler[unit], /* sampler state */ + i915->current.texbuffer[unit]); /* the result */ + + i915->current.sampler_enable_nr++; + i915->current.sampler_enable_flags |= (1 << unit); } } i915->hardware_dirty |= I915_HW_SAMPLER | I915_HW_MAP; } +struct i915_tracked_state i915_hw_samplers = { + "sampler_views", + update_samplers, + I915_NEW_SAMPLER | I915_NEW_SAMPLER_VIEW +}; -static uint -translate_texture_format(enum pipe_format pipeFormat) + + +/*********************************************************************** + * Sampler views + */ + +static uint translate_texture_format(enum pipe_format pipeFormat) { switch (pipeFormat) { case PIPE_FORMAT_L8_UNORM: @@ -226,19 +237,17 @@ translate_texture_format(enum pipe_format pipeFormat) return (MAPSURF_32BIT | MT_32BIT_xI824); default: debug_printf("i915: translate_texture_format() bad image format %x\n", - pipeFormat); + pipeFormat); assert(0); return 0; } } - -static void -i915_update_texture(struct i915_context *i915, - uint unit, - const struct i915_texture *tex, - const struct i915_sampler_state *sampler, - uint state[6]) +static void update_texture(struct i915_context *i915, + uint unit, + const struct i915_texture *tex, + const struct i915_sampler_state *sampler, + uint state[6]) { const struct pipe_resource *pt = &tex->b.b; uint format, pitch; @@ -287,9 +296,7 @@ i915_update_texture(struct i915_context *i915, | ((depth - 1) << MS4_VOLUME_DEPTH_SHIFT)); } - -void -i915_update_textures(struct i915_context *i915) +static void update_textures(struct i915_context *i915) { uint unit; @@ -300,13 +307,19 @@ i915_update_textures(struct i915_context *i915) if (i915->fragment_sampler_views[unit]) { struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture); - i915_update_texture( i915, - unit, - texture, /* texture */ - i915->sampler[unit], /* sampler state */ - i915->current.texbuffer[unit] ); + update_texture(i915, + unit, + texture, /* texture */ + i915->sampler[unit], /* sampler state */ + i915->current.texbuffer[unit]); } } i915->hardware_dirty |= I915_HW_MAP; } + +struct i915_tracked_state i915_hw_sampler_views = { + "sampler_views", + update_textures, + I915_NEW_SAMPLER_VIEW +}; diff --git a/src/gallium/drivers/i965/SConscript b/src/gallium/drivers/i965/SConscript index 019af682f68..119f914a16d 100644 --- a/src/gallium/drivers/i965/SConscript +++ b/src/gallium/drivers/i965/SConscript @@ -2,6 +2,10 @@ Import('*') env = env.Clone() +if msvc: + print 'warning: not building i965g' + Return(); + i965 = env.ConvenienceLibrary( target = 'i965', source = [ diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c index 4c8579365d5..28c83515ba9 100644 --- a/src/gallium/drivers/i965/brw_disasm.c +++ b/src/gallium/drivers/i965/brw_disasm.c @@ -239,7 +239,7 @@ char *imm_encoding[8] = { [2] = "UW", [3] = "W", [5] = "VF", - [5] = "V", + [6] = "V", [7] = "F" }; diff --git a/src/gallium/drivers/identity/id_objects.c b/src/gallium/drivers/identity/id_objects.c index ca4743f9ef7..593928f399c 100644 --- a/src/gallium/drivers/identity/id_objects.c +++ b/src/gallium/drivers/identity/id_objects.c @@ -120,13 +120,14 @@ identity_sampler_view_create(struct identity_context *id_context, assert(view->texture == id_resource->resource); - id_view = MALLOC(sizeof(struct identity_sampler_view)); + id_view = CALLOC_STRUCT(identity_sampler_view); id_view->base = *view; id_view->base.reference.count = 1; id_view->base.texture = NULL; pipe_resource_reference(&id_view->base.texture, id_resource->resource); id_view->base.context = id_context->pipe; + id_view->sampler_view = view; return &id_view->base; error: @@ -180,8 +181,8 @@ identity_transfer_destroy(struct identity_context *id_context, struct identity_transfer *id_transfer) { pipe_resource_reference(&id_transfer->base.resource, NULL); - id_transfer->pipe->transfer_destroy(id_context->pipe, - id_transfer->transfer); + id_context->pipe->transfer_destroy(id_context->pipe, + id_transfer->transfer); FREE(id_transfer); } diff --git a/src/gallium/drivers/identity/id_objects.h b/src/gallium/drivers/identity/id_objects.h index 5eea10b0b5a..e8deabf4fc7 100644 --- a/src/gallium/drivers/identity/id_objects.h +++ b/src/gallium/drivers/identity/id_objects.h @@ -65,7 +65,6 @@ struct identity_transfer { struct pipe_transfer base; - struct pipe_context *pipe; struct pipe_transfer *transfer; }; diff --git a/src/gallium/drivers/llvmpipe/.gitignore b/src/gallium/drivers/llvmpipe/.gitignore index a1b6f56e0d2..4e0d4c3fc0c 100644 --- a/src/gallium/drivers/llvmpipe/.gitignore +++ b/src/gallium/drivers/llvmpipe/.gitignore @@ -3,3 +3,4 @@ lp_test_blend lp_test_conv lp_test_format lp_test_printf +lp_test_sincos diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index c79c8bd9d1e..ee28179c303 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -37,6 +37,7 @@ C_SOURCES = \ lp_state_gs.c \ lp_state_rasterizer.c \ lp_state_sampler.c \ + lp_state_so.c \ lp_state_surface.c \ lp_state_vertex.c \ lp_state_vs.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index a0646692e7b..a1ef71da89d 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -57,6 +57,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_gs.c', 'lp_state_rasterizer.c', 'lp_state_sampler.c', + 'lp_state_so.c', 'lp_state_surface.c', 'lp_state_vertex.c', 'lp_state_vs.c', diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 9e88a6e09f4..3db4f12ebb6 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -36,6 +36,7 @@ #include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_simple_list.h" #include "lp_clear.h" #include "lp_context.h" #include "lp_flush.h" @@ -94,6 +95,8 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) memset(llvmpipe, 0, sizeof *llvmpipe); + make_empty_list(&llvmpipe->fs_variants_list); + llvmpipe->pipe.winsys = screen->winsys; llvmpipe->pipe.screen = screen; llvmpipe->pipe.priv = priv; @@ -110,6 +113,7 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) llvmpipe_init_sampler_funcs(llvmpipe); llvmpipe_init_query_funcs( llvmpipe ); llvmpipe_init_vertex_funcs(llvmpipe); + llvmpipe_init_so_funcs(llvmpipe); llvmpipe_init_fs_funcs(llvmpipe); llvmpipe_init_vs_funcs(llvmpipe); llvmpipe_init_gs_funcs(llvmpipe); diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index cb04d4a4d54..986e604ce7c 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -38,6 +38,7 @@ #include "lp_tex_sample.h" #include "lp_jit.h" #include "lp_setup.h" +#include "lp_state_fs.h" struct llvmpipe_vbuf_render; @@ -62,6 +63,7 @@ struct llvmpipe_context { const struct lp_vertex_shader *vs; const struct lp_geometry_shader *gs; const struct lp_velems_state *velems; + const struct lp_so_state *so; /** Other rendering state */ struct pipe_blend_color blend_color; @@ -75,6 +77,12 @@ struct llvmpipe_context { struct pipe_sampler_view *vertex_sampler_views[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct { + struct llvmpipe_resource *buffer[PIPE_MAX_SO_BUFFERS]; + int offset[PIPE_MAX_SO_BUFFERS]; + int so_count[PIPE_MAX_SO_BUFFERS]; + int num_buffers; + } so_target; unsigned num_samplers; unsigned num_fragment_sampler_views; @@ -105,6 +113,8 @@ struct llvmpipe_context { unsigned tex_timestamp; boolean no_rast; + struct lp_fs_variant_list_item fs_variants_list; + unsigned nr_fs_variants; }; diff --git a/src/gallium/drivers/llvmpipe/lp_limits.h b/src/gallium/drivers/llvmpipe/lp_limits.h index 4102a9df67c..d1c431475d8 100644 --- a/src/gallium/drivers/llvmpipe/lp_limits.h +++ b/src/gallium/drivers/llvmpipe/lp_limits.h @@ -66,5 +66,10 @@ */ #define LP_MAX_SCENE_SIZE (512 * 1024 * 1024) +/** + * Max number of shader variants (for all shaders combined, + * per context) that will be kept around. + */ +#define LP_MAX_SHADER_VARIANTS 1024 #endif /* LP_LIMITS_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 3f7a85b6827..05d1b937944 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -54,6 +54,9 @@ #define LP_NEW_QUERY 0x4000 #define LP_NEW_BLEND_COLOR 0x8000 #define LP_NEW_GS 0x10000 +#define LP_NEW_SO 0x20000 +#define LP_NEW_SO_BUFFERS 0x40000 + struct vertex_info; @@ -82,6 +85,10 @@ struct lp_velems_state struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; }; +struct lp_so_state { + struct pipe_stream_output_state base; +}; + void llvmpipe_set_framebuffer_state(struct pipe_context *, @@ -120,5 +127,9 @@ llvmpipe_init_gs_funcs(struct llvmpipe_context *llvmpipe); void llvmpipe_init_rasterizer_funcs(struct llvmpipe_context *llvmpipe); +void +llvmpipe_init_so_funcs(struct llvmpipe_context *llvmpipe); + + #endif diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 2619e043fdf..65115052cdd 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -68,6 +68,7 @@ #include "util/u_format.h" #include "util/u_dump.h" #include "util/u_string.h" +#include "util/u_simple_list.h" #include "os/os_time.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" @@ -95,6 +96,7 @@ #include "lp_setup.h" #include "lp_state.h" #include "lp_tex_sample.h" +#include "lp_flush.h" #include <llvm-c/Analysis.h> @@ -936,7 +938,10 @@ generate_variant(struct llvmpipe_context *lp, if(!variant) return NULL; - variant->no = shader->variant_no++; + variant->shader = shader; + variant->list_item_global.base = variant; + variant->list_item_local.base = variant; + variant->no = shader->variants_created++; memcpy(&variant->key, key, sizeof *key); @@ -962,10 +967,6 @@ generate_variant(struct llvmpipe_context *lp, !shader->info.uses_kill ? TRUE : FALSE; - /* insert new variant into linked list */ - variant->next = shader->variants; - shader->variants = variant; - return variant; } @@ -981,6 +982,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, return NULL; shader->no = fs_no++; + make_empty_list(&shader->variants); /* get/save the summary info for this shader */ tgsi_scan_shader(templ->tokens, &shader->info); @@ -1024,14 +1026,40 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs) llvmpipe->dirty |= LP_NEW_FS; } +static void +remove_shader_variant(struct llvmpipe_context *lp, + struct lp_fragment_shader_variant *variant) +{ + struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); + unsigned i; + + if (gallivm_debug & GALLIVM_DEBUG_IR) { + debug_printf("llvmpipe: del fs #%u var #%u v created #%u v cached #%u v total cached #%u\n", + variant->shader->no, variant->no, variant->shader->variants_created, + variant->shader->variants_cached, lp->nr_fs_variants); + } + for (i = 0; i < Elements(variant->function); i++) { + if (variant->function[i]) { + if (variant->jit_function[i]) + LLVMFreeMachineCodeForFunction(screen->engine, + variant->function[i]); + LLVMDeleteFunction(variant->function[i]); + } + } + remove_from_list(&variant->list_item_local); + variant->shader->variants_cached--; + remove_from_list(&variant->list_item_global); + lp->nr_fs_variants--; + FREE(variant); +} static void llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen); + struct pipe_fence_handle *fence = NULL; struct lp_fragment_shader *shader = fs; - struct lp_fragment_shader_variant *variant; + struct lp_fs_variant_list_item *li; assert(fs != llvmpipe->fs); (void) llvmpipe; @@ -1039,29 +1067,24 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) /* * XXX: we need to flush the context until we have some sort of reference * counting in fragment shaders as they may still be binned + * Flushing alone might not sufficient we need to wait on it too. */ - draw_flush(llvmpipe->draw); - lp_setup_flush(llvmpipe->setup, 0); - - variant = shader->variants; - while(variant) { - struct lp_fragment_shader_variant *next = variant->next; - unsigned i; - for (i = 0; i < Elements(variant->function); i++) { - if (variant->function[i]) { - if (variant->jit_function[i]) - LLVMFreeMachineCodeForFunction(screen->engine, - variant->function[i]); - LLVMDeleteFunction(variant->function[i]); - } - } + llvmpipe_flush(pipe, 0, &fence); - FREE(variant); + if (fence) { + pipe->screen->fence_finish(pipe->screen, fence, 0); + pipe->screen->fence_reference(pipe->screen, &fence, NULL); + } - variant = next; + li = first_elem(&shader->variants); + while(!at_end(&shader->variants, li)) { + struct lp_fs_variant_list_item *next = next_elem(li); + remove_shader_variant(llvmpipe, li->base); + li = next; } + assert(shader->variants_cached == 0); FREE((void *) shader->base.tokens); FREE(shader); } @@ -1088,9 +1111,10 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, /* note: reference counting */ pipe_resource_reference(&llvmpipe->constants[shader][index], constants); - if(shader == PIPE_SHADER_VERTEX) { - draw_set_mapped_constant_buffer(llvmpipe->draw, PIPE_SHADER_VERTEX, index, - data, size); + if(shader == PIPE_SHADER_VERTEX || + shader == PIPE_SHADER_GEOMETRY) { + draw_set_mapped_constant_buffer(llvmpipe->draw, shader, + index, data, size); } llvmpipe->dirty |= LP_NEW_CONSTANTS; @@ -1215,7 +1239,6 @@ make_variant_key(struct llvmpipe_context *lp, lp_sampler_static_state(&key->sampler[i], lp->fragment_sampler_views[i], lp->sampler[i]); } - /** * Update fragment state. This is called just prior to drawing * something when some fragment-related state has changed. @@ -1225,21 +1248,47 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) { struct lp_fragment_shader *shader = lp->fs; struct lp_fragment_shader_variant_key key; - struct lp_fragment_shader_variant *variant; + struct lp_fragment_shader_variant *variant = NULL; + struct lp_fs_variant_list_item *li; make_variant_key(lp, shader, &key); - variant = shader->variants; - while(variant) { - if(memcmp(&variant->key, &key, sizeof key) == 0) + li = first_elem(&shader->variants); + while(!at_end(&shader->variants, li)) { + if(memcmp(&li->base->key, &key, sizeof key) == 0) { + variant = li->base; break; - - variant = variant->next; + } + li = next_elem(li); } - if (!variant) { + if (variant) { + move_to_head(&lp->fs_variants_list, &variant->list_item_global); + } + else { int64_t t0, t1; int64_t dt; + unsigned i; + if (lp->nr_fs_variants >= LP_MAX_SHADER_VARIANTS) { + struct pipe_context *pipe = &lp->pipe; + struct pipe_fence_handle *fence = NULL; + + /* + * XXX: we need to flush the context until we have some sort of reference + * counting in fragment shaders as they may still be binned + * Flushing alone might not be sufficient we need to wait on it too. + */ + llvmpipe_flush(pipe, 0, &fence); + + if (fence) { + pipe->screen->fence_finish(pipe->screen, fence, 0); + pipe->screen->fence_reference(pipe->screen, &fence, NULL); + } + for (i = 0; i < LP_MAX_SHADER_VARIANTS / 4; i++) { + struct lp_fs_variant_list_item *item = last_elem(&lp->fs_variants_list); + remove_shader_variant(lp, item->base); + } + } t0 = os_time_get(); variant = generate_variant(lp, shader, &key); @@ -1248,6 +1297,13 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) dt = t1 - t0; LP_COUNT_ADD(llvm_compile_time, dt); LP_COUNT_ADD(nr_llvm_compiles, 2); /* emit vs. omit in/out test */ + + if (variant) { + insert_at_head(&shader->variants, &variant->list_item_local); + insert_at_head(&lp->fs_variants_list, &variant->list_item_global); + lp->nr_fs_variants++; + shader->variants_cached++; + } } lp_setup_set_fs_variant(lp->setup, variant); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h index 64ead2a9973..593cd4de6be 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h @@ -64,6 +64,11 @@ struct lp_fragment_shader_variant_key struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS]; }; +struct lp_fs_variant_list_item +{ + struct lp_fragment_shader_variant *base; + struct lp_fs_variant_list_item *next, *prev; +}; struct lp_fragment_shader_variant { @@ -75,7 +80,8 @@ struct lp_fragment_shader_variant lp_jit_frag_func jit_function[2]; - struct lp_fragment_shader_variant *next; + struct lp_fs_variant_list_item list_item_global, list_item_local; + struct lp_fragment_shader *shader; /* For debugging/profiling purposes */ unsigned no; @@ -89,11 +95,12 @@ struct lp_fragment_shader struct tgsi_shader_info info; - struct lp_fragment_shader_variant *variants; + struct lp_fs_variant_list_item variants; /* For debugging/profiling purposes */ unsigned no; - unsigned variant_no; + unsigned variants_created; + unsigned variants_cached; }; diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index 55d43368a3e..e94065fb6ab 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -105,6 +105,13 @@ llvmpipe_bind_vertex_sampler_states(struct pipe_context *pipe, static void +llvmpipe_bind_geometry_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + /* XXX: implementation missing */ +} + +static void llvmpipe_set_fragment_sampler_views(struct pipe_context *pipe, unsigned num, struct pipe_sampler_view **views) @@ -163,6 +170,14 @@ llvmpipe_set_vertex_sampler_views(struct pipe_context *pipe, } +static void +llvmpipe_set_geometry_sampler_views(struct pipe_context *pipe, + unsigned num, + struct pipe_sampler_view **views) +{ + /*XXX: implementation missing */ +} + static struct pipe_sampler_view * llvmpipe_create_sampler_view(struct pipe_context *pipe, struct pipe_resource *texture, @@ -206,8 +221,10 @@ llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe) llvmpipe->pipe.bind_fragment_sampler_states = llvmpipe_bind_sampler_states; llvmpipe->pipe.bind_vertex_sampler_states = llvmpipe_bind_vertex_sampler_states; + llvmpipe->pipe.bind_geometry_sampler_states = llvmpipe_bind_geometry_sampler_states; llvmpipe->pipe.set_fragment_sampler_views = llvmpipe_set_fragment_sampler_views; llvmpipe->pipe.set_vertex_sampler_views = llvmpipe_set_vertex_sampler_views; + llvmpipe->pipe.set_geometry_sampler_views = llvmpipe_set_geometry_sampler_views; llvmpipe->pipe.create_sampler_view = llvmpipe_create_sampler_view; llvmpipe->pipe.sampler_view_destroy = llvmpipe_sampler_view_destroy; llvmpipe->pipe.delete_sampler_state = llvmpipe_delete_sampler_state; diff --git a/src/gallium/drivers/llvmpipe/lp_state_so.c b/src/gallium/drivers/llvmpipe/lp_state_so.c new file mode 100644 index 00000000000..30b17c98816 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_state_so.c @@ -0,0 +1,137 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "lp_context.h" +#include "lp_state.h" +#include "lp_texture.h" + +#include "util/u_memory.h" +#include "draw/draw_context.h" + + +static void * +llvmpipe_create_stream_output_state(struct pipe_context *pipe, + const struct pipe_stream_output_state *templ) +{ + struct lp_so_state *so; + so = (struct lp_so_state *) CALLOC_STRUCT(lp_so_state); + + if (so) { + so->base.num_outputs = templ->num_outputs; + so->base.stride = templ->stride; + memcpy(so->base.output_buffer, + templ->output_buffer, + sizeof(int) * templ->num_outputs); + memcpy(so->base.register_index, + templ->register_index, + sizeof(int) * templ->num_outputs); + memcpy(so->base.register_mask, + templ->register_mask, + sizeof(ubyte) * templ->num_outputs); + } + return so; +} + +static void +llvmpipe_bind_stream_output_state(struct pipe_context *pipe, + void *so) +{ + struct llvmpipe_context *lp = llvmpipe_context(pipe); + struct lp_so_state *lp_so = (struct lp_so_state *) so; + + lp->so = lp_so; + + lp->dirty |= LP_NEW_SO; + + if (lp_so) + draw_set_so_state(lp->draw, &lp_so->base); +} + +static void +llvmpipe_delete_stream_output_state(struct pipe_context *pipe, void *so) +{ + FREE( so ); +} + +static void +llvmpipe_set_stream_output_buffers(struct pipe_context *pipe, + struct pipe_resource **buffers, + int *offsets, + int num_buffers) +{ + struct llvmpipe_context *lp = llvmpipe_context(pipe); + int i; + void *map_buffers[PIPE_MAX_SO_BUFFERS]; + + assert(num_buffers <= PIPE_MAX_SO_BUFFERS); + if (num_buffers > PIPE_MAX_SO_BUFFERS) + num_buffers = PIPE_MAX_SO_BUFFERS; + + lp->dirty |= LP_NEW_SO_BUFFERS; + + for (i = 0; i < num_buffers; ++i) { + void *mapped; + struct llvmpipe_resource *res = llvmpipe_resource(buffers[i]); + + if (!res) { + /* the whole call is invalid, bail out */ + lp->so_target.num_buffers = 0; + draw_set_mapped_so_buffers(lp->draw, 0, 0); + return; + } + + lp->so_target.buffer[i] = res; + lp->so_target.offset[i] = offsets[i]; + lp->so_target.so_count[i] = 0; + + mapped = res->data; + if (offsets[i] >= 0) + map_buffers[i] = ((char*)mapped) + offsets[i]; + else { + /* this is a buffer append */ + assert(!"appending not implemented"); + map_buffers[i] = mapped; + } + } + lp->so_target.num_buffers = num_buffers; + + draw_set_mapped_so_buffers(lp->draw, map_buffers, num_buffers); +} + +void +llvmpipe_init_so_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_stream_output_state = + llvmpipe_create_stream_output_state; + llvmpipe->pipe.bind_stream_output_state = + llvmpipe_bind_stream_output_state; + llvmpipe->pipe.delete_stream_output_state = + llvmpipe_delete_stream_output_state; + + llvmpipe->pipe.set_stream_output_buffers = + llvmpipe_set_stream_output_buffers; +} diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index 6bb82e5ed08..dd897f6072f 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -14,6 +14,8 @@ C_SOURCES = \ r300_hyperz.c \ r300_query.c \ r300_render.c \ + r300_render_stencilref.c \ + r300_render_translate.c \ r300_resource.c \ r300_screen.c \ r300_screen_buffer.c \ diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index eb3e1d365e9..ee19e9d2783 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -24,6 +24,8 @@ r300 = env.ConvenienceLibrary( 'r300_hyperz.c', 'r300_query.c', 'r300_render.c', + 'r300_render_stencilref.c', + 'r300_render_translate.c', 'r300_resource.c', 'r300_screen.c', 'r300_screen_buffer.c', diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index cc64fc3d7a7..389354c4e4a 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -25,8 +25,23 @@ #include "util/u_format.h" -static void r300_blitter_save_states(struct r300_context* r300) +enum r300_blitter_op { + R300_CLEAR, + R300_CLEAR_SURFACE, + R300_COPY +}; + +static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op op) +{ + if (r300->query_current) { + r300->blitter_saved_query = r300->query_current; + r300_stop_query(r300); + } + + /* Yeah we have to save all those states to ensure the blitter operation + * is really transparent. The states will be restored by the blitter once + * copying is done. */ util_blitter_save_blend(r300->blitter, r300->blend_state.state); util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state.state); util_blitter_save_stencil_ref(r300->blitter, &(r300->stencil_ref)); @@ -34,10 +49,34 @@ static void r300_blitter_save_states(struct r300_context* r300) util_blitter_save_fragment_shader(r300->blitter, r300->fs.state); util_blitter_save_vertex_shader(r300->blitter, r300->vs_state.state); util_blitter_save_viewport(r300->blitter, &r300->viewport); - util_blitter_save_clip(r300->blitter, &r300->clip); + util_blitter_save_clip(r300->blitter, (struct pipe_clip_state*)r300->clip_state.state); util_blitter_save_vertex_elements(r300->blitter, r300->velems); util_blitter_save_vertex_buffers(r300->blitter, r300->vertex_buffer_count, r300->vertex_buffer); + + if (op & (R300_CLEAR_SURFACE | R300_COPY)) + util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); + + if (op & R300_COPY) { + struct r300_textures_state* state = + (struct r300_textures_state*)r300->textures_state.state; + + util_blitter_save_fragment_sampler_states( + r300->blitter, state->sampler_state_count, + (void**)state->sampler_states); + + util_blitter_save_fragment_sampler_views( + r300->blitter, state->sampler_view_count, + (struct pipe_sampler_view**)state->sampler_views); + } +} + +static void r300_blitter_end(struct r300_context *r300) +{ + if (r300->blitter_saved_query) { + r300_resume_query(r300, r300->blitter_saved_query); + r300->blitter_saved_query = NULL; + } } /* Clear currently bound buffers. */ @@ -73,13 +112,45 @@ static void r300_clear(struct pipe_context* pipe, struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - r300_blitter_save_states(r300); - + r300_blitter_begin(r300, R300_CLEAR); util_blitter_clear(r300->blitter, fb->width, fb->height, fb->nr_cbufs, buffers, rgba, depth, stencil); + r300_blitter_end(r300); +} + +/* Clear a region of a color surface to a constant value. */ +static void r300_clear_render_target(struct pipe_context *pipe, + struct pipe_surface *dst, + const float *rgba, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct r300_context *r300 = r300_context(pipe); + + r300_blitter_begin(r300, R300_CLEAR_SURFACE); + util_blitter_clear_render_target(r300->blitter, dst, rgba, + dstx, dsty, width, height); + r300_blitter_end(r300); +} + +/* Clear a region of a depth stencil surface. */ +static void r300_clear_depth_stencil(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct r300_context *r300 = r300_context(pipe); + + r300_blitter_begin(r300, R300_CLEAR_SURFACE); + util_blitter_clear_depth_stencil(r300->blitter, dst, clear_flags, depth, stencil, + dstx, dsty, width, height); + r300_blitter_end(r300); } /* Copy a block of pixels from one surface to another using HW. */ @@ -93,27 +164,12 @@ static void r300_hw_copy_region(struct pipe_context* pipe, unsigned width, unsigned height) { struct r300_context* r300 = r300_context(pipe); - struct r300_textures_state* state = - (struct r300_textures_state*)r300->textures_state.state; - - /* Yeah we have to save all those states to ensure this blitter operation - * is really transparent. The states will be restored by the blitter once - * copying is done. */ - r300_blitter_save_states(r300); - util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); - - util_blitter_save_fragment_sampler_states( - r300->blitter, state->sampler_state_count, - (void**)state->sampler_states); - - util_blitter_save_fragment_sampler_views( - r300->blitter, state->sampler_view_count, - (struct pipe_sampler_view**)state->sampler_views); - /* Do a copy */ + r300_blitter_begin(r300, R300_COPY); util_blitter_copy_region(r300->blitter, dst, subdst, dstx, dsty, dstz, src, subsrc, srcx, srcy, srcz, width, height, TRUE); + r300_blitter_end(r300); } /* Copy a block of pixels from one surface to another. */ @@ -129,14 +185,6 @@ static void r300_resource_copy_region(struct pipe_context *pipe, enum pipe_format old_format = dst->format; enum pipe_format new_format = old_format; - if (dst->format != src->format) { - debug_printf("r300: Implementation error: Format mismatch in %s\n" - " : src: %s dst: %s\n", __FUNCTION__, - util_format_short_name(src->format), - util_format_short_name(dst->format)); - debug_assert(0); - } - if (!pipe->screen->is_format_supported(pipe->screen, old_format, src->target, src->nr_samples, @@ -187,40 +235,6 @@ static void r300_resource_copy_region(struct pipe_context *pipe, } } -/* Clear a region of a color surface to a constant value. */ -static void r300_clear_render_target(struct pipe_context *pipe, - struct pipe_surface *dst, - const float *rgba, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height) -{ - struct r300_context *r300 = r300_context(pipe); - - r300_blitter_save_states(r300); - util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); - - util_blitter_clear_render_target(r300->blitter, dst, rgba, - dstx, dsty, width, height); -} - -/* Clear a region of a depth stencil surface. */ -static void r300_clear_depth_stencil(struct pipe_context *pipe, - struct pipe_surface *dst, - unsigned clear_flags, - double depth, - unsigned stencil, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height) -{ - struct r300_context *r300 = r300_context(pipe); - - r300_blitter_save_states(r300); - util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); - - util_blitter_clear_depth_stencil(r300->blitter, dst, clear_flags, depth, stencil, - dstx, dsty, width, height); -} - void r300_init_blit_functions(struct r300_context *r300) { r300->context.clear = r300_clear; diff --git a/src/gallium/drivers/r300/r300_cb.h b/src/gallium/drivers/r300/r300_cb.h new file mode 100644 index 00000000000..69874712442 --- /dev/null +++ b/src/gallium/drivers/r300/r300_cb.h @@ -0,0 +1,142 @@ +/* + * Copyright 2008 Corbin Simpson <[email protected]> + * Copyright 2010 Marek Olšák <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +/** + * This file contains macros for building command buffers in memory. + * + * Use NEW_CB for buffers with a varying size and it will also allocate + * the buffer. + * Use BEGIN_CB for arrays with a static size. + * + * Example: + * + * uint32_t cb[3]; + * CB_LOCALS; + * + * BEGIN_CB(cb, 3); + * OUT_CB_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2); + * OUT_CB(blend_color_red_alpha); + * OUT_CB(blend_color_green_blue); + * END_CB; + * + * And later: + * + * CS_LOCALS; + * WRITE_CS_TABLE(cb, 3); + * + * Or using a little slower variant: + * + * CS_LOCALS; + * BEGIN_CS(cb, 3); + * OUT_CS_TABLE(cb, 3); + * END_CS; + */ + +#ifndef R300_CB_H +#define R300_CB_H + +#include "r300_reg.h" + +/* Yes, I know macros are ugly. However, they are much prettier than the code + * that they neatly hide away, and don't have the cost of function setup, so + * we're going to use them. */ + +#ifdef DEBUG +#define CB_DEBUG(x) x +#else +#define CB_DEBUG(x) +#endif + + +/** + * Command buffer setup. + */ + +#define CB_LOCALS \ + CB_DEBUG(int cs_count = 0;) \ + uint32_t *cs_ptr = NULL; \ + CB_DEBUG((void) cs_count;) (void) cs_ptr; + +#define NEW_CB(ptr, size) do { \ + assert(sizeof(*ptr) == sizeof(uint32_t)); \ + cs_ptr = (ptr) = (uint32_t*)malloc((size) * sizeof(uint32_t)); \ + CB_DEBUG(cs_count = size;) \ +} while (0) + +#define BEGIN_CB(ptr, size) do { \ + assert(sizeof(*ptr) == sizeof(uint32_t)); \ + cs_ptr = ptr; \ + CB_DEBUG(cs_count = size;) \ +} while (0) + +#define BEGIN_CS_AS_CB(r300, size) \ + BEGIN_CB(r300->rws->get_cs_pointer(r300->rws, dwords), dwords) + +#define END_CB do { \ + CB_DEBUG(if (cs_count != 0) \ + debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \ + cs_count, __FUNCTION__, __FILE__, __LINE__);) \ +} while (0) + + +/** + * Storing pure DWORDs. + */ + +#define OUT_CB(value) do { \ + *cs_ptr = (value); \ + cs_ptr++; \ + CB_DEBUG(cs_count--;) \ +} while (0) + +#define OUT_CB_TABLE(values, count) do { \ + memcpy(cs_ptr, values, count * sizeof(uint32_t)); \ + cs_ptr += count; \ + CB_DEBUG(cs_count -= count;) \ +} while (0) + +#define OUT_CB_32F(value) \ + OUT_CB(fui(value)); + +#define OUT_CB_REG(register, value) do { \ + assert(register); \ + OUT_CB(CP_PACKET0(register, 0)); \ + OUT_CB(value); \ +} while (0) + +/* Note: This expects count to be the number of registers, + * not the actual packet0 count! */ +#define OUT_CB_REG_SEQ(register, count) do { \ + assert(register); \ + OUT_CB(CP_PACKET0(register, (count) - 1)); \ +} while (0) + +#define OUT_CB_ONE_REG(register, count) do { \ + assert(register); \ + OUT_CB(CP_PACKET0(register, (count) - 1) | RADEON_ONE_REG_WR); \ +} while (0) + +#define OUT_CB_PKT3(op, count) \ + OUT_CB(CP_PACKET3(op, count)) + +#endif /* R300_CB_H */ diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index e6dca66d4a0..511aa7ee8a2 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -36,6 +36,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->num_vert_fpus = 2; caps->num_tex_units = 16; caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; + caps->has_hiz = TRUE; caps->is_r400 = FALSE; caps->is_r500 = FALSE; caps->high_second_pipe = FALSE; @@ -76,6 +77,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4E54: case 0x4E56: caps->family = CHIP_FAMILY_RV350; + caps->has_hiz = FALSE; caps->high_second_pipe = TRUE; break; @@ -106,6 +108,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5B64: case 0x5B65: caps->family = CHIP_FAMILY_RV370; + caps->has_hiz = FALSE; caps->high_second_pipe = TRUE; break; diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index ab649c38573..65750f54e71 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -42,6 +42,8 @@ struct r300_capabilities { unsigned num_tex_units; /* Whether or not TCL is physically present */ boolean has_tcl; + /* Some chipsets do not have HiZ RAM. */ + boolean has_hiz; /* Whether or not this is RV350 or newer, including all r400 and r500 * chipsets. The differences compared to the oldest r300 chips are: * - Blend LTE/GTE thresholds diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 88ce1867982..46d1ed9dbff 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -23,9 +23,11 @@ #include "draw/draw_context.h" #include "util/u_memory.h" +#include "util/u_sampler.h" #include "util/u_simple_list.h" #include "util/u_upload_mgr.h" +#include "r300_cb.h" #include "r300_context.h" #include "r300_emit.h" #include "r300_screen.h" @@ -38,9 +40,15 @@ static void r300_destroy_context(struct pipe_context* context) { struct r300_context* r300 = r300_context(context); - struct r300_query* query, * temp; + struct r300_query *query, *temp; struct r300_atom *atom; + if (r300->texkill_sampler) { + pipe_sampler_view_reference( + (struct pipe_sampler_view**)&r300->texkill_sampler, + NULL); + } + util_blitter_destroy(r300->blitter); draw_destroy(r300->draw); @@ -54,9 +62,6 @@ static void r300_destroy_context(struct pipe_context* context) } } - /* Free the OQ BO. */ - context->screen->resource_destroy(context->screen, r300->oqbo); - /* If there are any queries pending or not destroyed, remove them now. */ foreach_s(query, temp, &r300->query_list) { remove_from_list(query); @@ -66,9 +71,13 @@ static void r300_destroy_context(struct pipe_context* context) u_upload_destroy(r300->upload_vb); u_upload_destroy(r300->upload_ib); + translate_cache_destroy(r300->tran.translate_cache); + + FREE(r300->aa_state.state); FREE(r300->blend_color_state.state); FREE(r300->clip_state.state); FREE(r300->fb_state.state); + FREE(r300->gpu_flush.state); FREE(r300->rs_block_state.state); FREE(r300->scissor_state.state); FREE(r300->textures_state.state); @@ -110,27 +119,36 @@ static void r300_setup_atoms(struct r300_context* r300) * Some atoms never change size, others change every emit - those have * the size of 0 here. */ make_empty_list(&r300->atom_list); - R300_INIT_ATOM(invariant_state, 71); - R300_INIT_ATOM(query_start, 4); + /* RB3D (unpipelined), ZB (unpipelined), US, SC. */ + R300_INIT_ATOM(gpu_flush, 9); + R300_INIT_ATOM(aa_state, 4); + R300_INIT_ATOM(fb_state, 0); R300_INIT_ATOM(ztop_state, 2); + R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6); R300_INIT_ATOM(blend_state, 8); R300_INIT_ATOM(blend_color_state, is_r500 ? 3 : 2); - R300_INIT_ATOM(clip_state, has_tcl ? 5 + (6 * 4) : 2); - R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6); - R300_INIT_ATOM(fb_state, 0); - R300_INIT_ATOM(rs_state, 0); R300_INIT_ATOM(scissor_state, 3); + /* All sorts of things. */ + R300_INIT_ATOM(invariant_state, 22); + /* VAP. */ R300_INIT_ATOM(viewport_state, 9); - R300_INIT_ATOM(rs_block_state, 0); - R300_INIT_ATOM(vertex_stream_state, 0); R300_INIT_ATOM(pvs_flush, 2); + R300_INIT_ATOM(vertex_stream_state, 0); R300_INIT_ATOM(vs_state, 0); R300_INIT_ATOM(vs_constants, 0); - R300_INIT_ATOM(texture_cache_inval, 2); - R300_INIT_ATOM(textures_state, 0); + R300_INIT_ATOM(clip_state, has_tcl ? 5 + (6 * 4) : 2); + /* VAP, RS, GA, GB. */ + R300_INIT_ATOM(rs_block_state, 0); + R300_INIT_ATOM(rs_state, 0); + /* US. */ R300_INIT_ATOM(fs, 0); R300_INIT_ATOM(fs_rc_constant_state, 0); R300_INIT_ATOM(fs_constants, 0); + /* TX. */ + R300_INIT_ATOM(texture_cache_inval, 2); + R300_INIT_ATOM(textures_state, 0); + /* ZB (unpipelined), SU. */ + R300_INIT_ATOM(query_start, 4); /* Replace emission functions for r500. */ if (r300->screen->caps.is_r500) { @@ -140,9 +158,11 @@ static void r300_setup_atoms(struct r300_context* r300) } /* Some non-CSO atoms need explicit space to store the state locally. */ + r300->aa_state.state = CALLOC_STRUCT(r300_aa_state); r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); - r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state); + r300->clip_state.state = CALLOC_STRUCT(r300_clip_state); r300->fb_state.state = CALLOC_STRUCT(pipe_framebuffer_state); + r300->gpu_flush.state = CALLOC_STRUCT(pipe_framebuffer_state); r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block); r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); r300->textures_state.state = CALLOC_STRUCT(r300_textures_state); @@ -162,6 +182,52 @@ static void r300_setup_atoms(struct r300_context* r300) r300->texture_cache_inval.allow_null_state = TRUE; } +/* Not every state tracker calls every driver function before the first draw + * call and we must initialize the command buffers somehow. */ +static void r300_init_states(struct pipe_context *pipe) +{ + struct r300_context *r300 = r300_context(pipe); + struct pipe_blend_color bc = {{0}}; + struct pipe_clip_state cs = {{{0}}}; + struct pipe_scissor_state ss = {0}; + struct r300_clip_state *clip = + (struct r300_clip_state*)r300->clip_state.state; + struct r300_gpu_flush *gpuflush = + (struct r300_gpu_flush*)r300->gpu_flush.state; + CB_LOCALS; + + pipe->set_blend_color(pipe, &bc); + pipe->set_scissor_state(pipe, &ss); + + /* Initialize the clip state. */ + if (r300_context(pipe)->screen->caps.has_tcl) { + pipe->set_clip_state(pipe, &cs); + } else { + BEGIN_CB(clip->cb, 2); + OUT_CB_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); + END_CB; + } + + /* Initialize the GPU flush. */ + { + BEGIN_CB(gpuflush->cb_flush_clean, 6); + + /* Flush and free renderbuffer caches. */ + OUT_CB_REG(R300_RB3D_DSTCACHE_CTLSTAT, + R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | + R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); + OUT_CB_REG(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | + R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + + /* Wait until the GPU is idle. + * This fixes random pixels sometimes appearing probably caused + * by incomplete rendering. */ + OUT_CB_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); + END_CB; + } +} + struct pipe_context* r300_create_context(struct pipe_screen* screen, void *priv) { @@ -195,9 +261,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_setup_atoms(r300); - /* Open up the OQ BO. */ - r300->oqbo = pipe_buffer_create(screen, - R300_BIND_OQBO, 4096); make_empty_list(&r300->query_list); r300_init_blit_functions(r300); @@ -227,6 +290,39 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, if (r300->upload_vb == NULL) goto no_upload_vb; + r300->tran.translate_cache = translate_cache_create(); + + r300_init_states(&r300->context); + + /* The KIL opcode needs the first texture unit to be enabled + * on r3xx-r4xx. In order to calm down the CS checker, we bind this + * dummy texture there. */ + if (!r300->screen->caps.is_r500) { + struct pipe_resource *tex; + struct pipe_resource rtempl = {{0}}; + struct pipe_sampler_view vtempl = {{0}}; + + rtempl.target = PIPE_TEXTURE_2D; + rtempl.format = PIPE_FORMAT_I8_UNORM; + rtempl.bind = PIPE_BIND_SAMPLER_VIEW; + rtempl.width0 = 1; + rtempl.height0 = 1; + rtempl.depth0 = 1; + tex = screen->resource_create(screen, &rtempl); + + u_sampler_view_default_template(&vtempl, tex, tex->format); + + r300->texkill_sampler = (struct r300_sampler_view*) + r300->context.create_sampler_view(&r300->context, tex, &vtempl); + + pipe_resource_reference(&tex, NULL); + + /* This will make sure that the dummy texture is set up + * from the beginning even if an application does not use + * textures. */ + r300->textures_state.dirty = TRUE; + } + return &r300->context; no_upload_ib: @@ -238,10 +334,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, boolean r300_check_cs(struct r300_context *r300, unsigned size) { - struct r300_cs_info cs_info; - - r300->rws->get_cs_info(r300->rws, &cs_info); - return size <= cs_info.free; + return size <= r300->rws->get_cs_free_dwords(r300->rws); } void r300_finish(struct r300_context *r300) diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index cca11f80452..976ef20510c 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -31,6 +31,8 @@ #include "util/u_inlines.h" #include "util/u_transfer.h" +#include "translate/translate_cache.h" + #include "r300_defines.h" #include "r300_screen.h" @@ -59,36 +61,54 @@ struct r300_atom { boolean allow_null_state; }; +struct r300_aa_state { + struct r300_surface *dest; + + uint32_t aa_config; + uint32_t aaresolve_ctl; +}; + struct r300_blend_state { - uint32_t blend_control; /* R300_RB3D_CBLEND: 0x4e04 */ - uint32_t alpha_blend_control; /* R300_RB3D_ABLEND: 0x4e08 */ - uint32_t color_channel_mask; /* R300_RB3D_COLOR_CHANNEL_MASK: 0x4e0c */ - uint32_t rop; /* R300_RB3D_ROPCNTL: 0x4e18 */ - uint32_t dither; /* R300_RB3D_DITHER_CTL: 0x4e50 */ + uint32_t cb[8]; + uint32_t cb_no_readwrite[8]; }; struct r300_blend_color_state { - /* RV515 and earlier */ - uint32_t blend_color; /* R300_RB3D_BLEND_COLOR: 0x4e10 */ - /* R520 and newer */ - uint32_t blend_color_red_alpha; /* R500_RB3D_CONSTANT_COLOR_AR: 0x4ef8 */ - uint32_t blend_color_green_blue; /* R500_RB3D_CONSTANT_COLOR_GB: 0x4efc */ + uint32_t cb[3]; +}; + +struct r300_clip_state { + struct pipe_clip_state clip; + + uint32_t cb[29]; }; struct r300_dsa_state { + struct pipe_depth_stencil_alpha_state dsa; + + /* This is actually a command buffer with named dwords. */ + uint32_t cb_begin; uint32_t alpha_function; /* R300_FG_ALPHA_FUNC: 0x4bd4 */ - uint32_t alpha_reference; /* R500_FG_ALPHA_VALUE: 0x4be0 */ + uint32_t cb_reg_seq; uint32_t z_buffer_control; /* R300_ZB_CNTL: 0x4f00 */ uint32_t z_stencil_control; /* R300_ZB_ZSTENCILCNTL: 0x4f04 */ uint32_t stencil_ref_mask; /* R300_ZB_STENCILREFMASK: 0x4f08 */ + uint32_t cb_reg; uint32_t stencil_ref_bf; /* R500_ZB_STENCILREFMASK_BF: 0x4fd4 */ + /* The second command buffer disables zbuffer reads and writes. */ + uint32_t cb_no_readwrite[8]; + /* Whether a two-sided stencil is enabled. */ boolean two_sided; /* Whether a fallback should be used for a two-sided stencil ref value. */ boolean two_sided_stencil_ref; }; +struct r300_gpu_flush { + uint32_t cb_flush_clean[6]; +}; + struct r300_rs_state { /* Original rasterizer state. */ struct pipe_rasterizer_state rs; @@ -96,7 +116,8 @@ struct r300_rs_state { struct pipe_rasterizer_state rs_draw; uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ - uint32_t antialiasing_config; /* R300_GB_AA_CONFIG: 0x4020 */ + uint32_t multisample_position_0;/* R300_GB_MSPOS0: 0x4010 */ + uint32_t multisample_position_1;/* R300_GB_MSPOS1: 0x4014 */ uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */ @@ -163,16 +184,14 @@ struct r300_sampler_view { /* Copy of r300_texture::texture_format_state with format-specific bits * added. */ struct r300_texture_format_state format; + + /* The texture cache region for this texture. */ + uint32_t texcache_region; }; struct r300_texture_fb_state { - /* Colorbuffer. */ - uint32_t colorpitch[R300_MAX_TEXTURE_LEVELS]; /* R300_RB3D_COLORPITCH[0-3]*/ - uint32_t us_out_fmt; /* R300_US_OUT_FMT[0-3] */ - - /* Zbuffer. */ - uint32_t depthpitch[R300_MAX_TEXTURE_LEVELS]; /* R300_RB3D_DEPTHPITCH */ - uint32_t zb_format; /* R300_ZB_FORMAT */ + uint32_t pitch[R300_MAX_TEXTURE_LEVELS]; /* COLORPITCH or DEPTHPITCH. */ + uint32_t format; /* US_OUT_FMT or R300_ZB_FORMAT */ }; struct r300_texture_sampler_state { @@ -224,7 +243,7 @@ struct r300_ztop_state { struct r300_constant_buffer { /* Buffer of constants */ - float constants[256][4]; + uint32_t constants[256][4]; /* Total number of constants */ unsigned count; }; @@ -239,14 +258,23 @@ struct r300_constant_buffer { struct r300_query { /* The kind of query. Currently only OQ is supported. */ unsigned type; - /* The current count of this query. Required to be at least 32 bits. */ - unsigned int count; - /* The offset of this query into the query buffer, in bytes. */ - unsigned offset; + /* The number of pipes where query results are stored. */ + unsigned num_pipes; + /* How many results have been written, in dwords. It's incremented + * after end_query and flush. */ + unsigned num_results; /* if we've flushed the query */ boolean flushed; /* if begin has been emitted */ boolean begin_emitted; + + /* The buffer where query results are stored. */ + struct r300_winsys_buffer *buffer; + /* The size of the buffer. */ + unsigned buffer_size; + /* The domain of the buffer. */ + enum r300_buffer_domain domain; + /* Linked list members. */ struct r300_query* prev; struct r300_query* next; @@ -268,6 +296,19 @@ struct r300_fence { boolean signalled; }; +struct r300_surface { + struct pipe_surface base; + + /* Winsys buffer backing the texture. */ + struct r300_winsys_buffer *buffer; + + enum r300_buffer_domain domain; + + uint32_t offset; + uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */ + uint32_t format; /* US_OUT_FMT or R300_ZB_FORMAT. */ +}; + struct r300_texture { /* Parent class */ struct u_resource b; @@ -332,6 +373,9 @@ struct r300_vertex_element_state { enum pipe_format hw_format[PIPE_MAX_ATTRIBS]; unsigned hw_format_size[PIPE_MAX_ATTRIBS]; + /* The size of the vertex, in dwords. */ + unsigned vertex_size_dwords; + /* This might mean two things: * - src_format != hw_format, as discussed above. * - src_offset % 4 != 0. */ @@ -340,6 +384,17 @@ struct r300_vertex_element_state { struct r300_vertex_stream_state vertex_stream; }; +struct r300_translate_context { + /* Translate cache for incompatible vertex offset/stride/format fallback. */ + struct translate_cache *translate_cache; + + /* The vertex buffer slot containing the translated buffer. */ + unsigned vb_slot; + + /* Saved and new vertex element state. */ + void *saved_velems, *new_velems; +}; + struct r300_context { /* Parent class */ struct pipe_context context; @@ -354,21 +409,30 @@ struct r300_context { struct blitter_context* blitter; /* Stencil two-sided reference value fallback. */ struct r300_stencilref_context *stencilref_fallback; + /* For translating vertex buffers having incompatible vertex layout. */ + struct r300_translate_context tran; /* Vertex buffer for rendering. */ struct pipe_resource* vbo; + /* The KIL opcode needs the first texture unit to be enabled + * on r3xx-r4xx. In order to calm down the CS checker, we bind this + * dummy texture there. */ + struct r300_sampler_view *texkill_sampler; /* Offset into the VBO. */ size_t vbo_offset; - /* Occlusion query buffer. */ - struct pipe_resource* oqbo; - /* Query list. */ + /* The currently active query. */ struct r300_query *query_current; + /* The saved query for blitter operations. */ + struct r300_query *blitter_saved_query; + /* Query list. */ struct r300_query query_list; /* Various CSO state objects. */ /* Beginning of atom list. */ struct r300_atom atom_list; + /* Anti-aliasing (MSAA) state. */ + struct r300_atom aa_state; /* Blend state. */ struct r300_atom blend_state; /* Blend color state. */ @@ -409,6 +473,8 @@ struct r300_context { struct r300_atom pvs_flush; /* Texture cache invalidate. */ struct r300_atom texture_cache_inval; + /* GPU flush. */ + struct r300_atom gpu_flush; /* Invariant state. This must be emitted to get the engine started. */ struct r300_atom invariant_state; @@ -425,9 +491,6 @@ struct r300_context { struct vertex_info vertex_info; struct pipe_stencil_ref stencil_ref; - - struct pipe_clip_state clip; - struct pipe_viewport_state viewport; /* Stream locations for SWTCL. */ @@ -462,6 +525,11 @@ static INLINE struct r300_query* r300_query(struct pipe_query* q) return (struct r300_query*)q; } +static INLINE struct r300_surface* r300_surface(struct pipe_surface* surf) +{ + return (struct r300_surface*)surf; +} + static INLINE struct r300_texture* r300_texture(struct pipe_resource* tex) { return (struct r300_texture*)tex; @@ -480,6 +548,9 @@ static INLINE struct r300_fragment_shader *r300_fs(struct r300_context *r300) struct pipe_context* r300_create_context(struct pipe_screen* screen, void *priv); +boolean r300_check_cs(struct r300_context *r300, unsigned size); +void r300_finish(struct r300_context *r300); + /* Context initialization. */ struct draw_stage* r300_draw_stage(struct r300_context* r300); void r300_init_blit_functions(struct r300_context *r300); @@ -489,10 +560,29 @@ void r300_init_render_functions(struct r300_context *r300); void r300_init_state_functions(struct r300_context* r300); void r300_init_resource_functions(struct r300_context* r300); -boolean r300_check_cs(struct r300_context *r300, unsigned size); -void r300_finish(struct r300_context *r300); +/* r300_query.c */ +void r300_resume_query(struct r300_context *r300, + struct r300_query *query); +void r300_stop_query(struct r300_context *r300); + +/* r300_render_translate.c */ +void r300_begin_vertex_translate(struct r300_context *r300); +void r300_end_vertex_translate(struct r300_context *r300); +void r300_translate_index_buffer(struct r300_context *r300, + struct pipe_resource **index_buffer, + unsigned *index_size, unsigned index_offset, + unsigned *start, unsigned count); + +/* r300_render_stencilref.c */ +void r300_plug_in_stencil_ref_fallback(struct r300_context *r300); + +/* r300_state.c */ +void r300_mark_fs_code_dirty(struct r300_context *r300); + +/* r300_debug.c */ void r500_dump_rs_block(struct r300_rs_block *rs); + static INLINE boolean CTX_DBG_ON(struct r300_context * ctx, unsigned flags) { return SCREEN_DBG_ON(ctx->screen, flags); diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 9c8c273902b..1db7da642bd 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -20,171 +20,133 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/** + * This file contains macros for immediate command submission. + */ + #ifndef R300_CS_H #define R300_CS_H -#include "util/u_math.h" - #include "r300_reg.h" +#include "r300_context.h" #include "r300_winsys.h" /* Yes, I know macros are ugly. However, they are much prettier than the code * that they neatly hide away, and don't have the cost of function setup,so * we're going to use them. */ -#define MAX_CS_SIZE 64 * 1024 / 4 - -#define VERY_VERBOSE_CS 1 -#define VERY_VERBOSE_REGISTERS 1 +#ifdef DEBUG +#define CS_DEBUG(x) x +#else +#define CS_DEBUG(x) +#endif -/* XXX stolen from radeon_reg.h */ -#define RADEON_CP_PACKET0 0x0 - -#define CP_PACKET0(register, count) \ - (RADEON_CP_PACKET0 | ((count) << 16) | ((register) >> 2)) +/** + * Command submission setup. + */ #define CS_LOCALS(context) \ struct r300_context* const cs_context_copy = (context); \ struct r300_winsys_screen *cs_winsys = cs_context_copy->rws; \ - int cs_count = 0; (void) cs_count; - -#define CHECK_CS(size) \ - assert(r300_check_cs(cs_context_copy, (size))) + CS_DEBUG(int cs_count = 0; (void) cs_count;) #define BEGIN_CS(size) do { \ - CHECK_CS(size); \ - if (VERY_VERBOSE_CS) { \ - DBG(cs_context_copy, DBG_CS, "r300: BEGIN_CS, count %d, in %s (%s:%d)\n", \ - size, __FUNCTION__, __FILE__, __LINE__); \ - } \ - cs_winsys->begin_cs(cs_winsys, (size), \ - __FILE__, __FUNCTION__, __LINE__); \ - cs_count = size; \ + assert(r300_check_cs(cs_context_copy, (size))); \ + CS_DEBUG(cs_count = size;) \ +} while (0) + +#ifdef DEBUG +#define END_CS do { \ + if (cs_count != 0) \ + debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \ + cs_count, __FUNCTION__, __FILE__, __LINE__); \ + cs_count = 0; \ } while (0) +#else +#define END_CS +#endif + +/** + * Writing pure DWORDs. + */ #define OUT_CS(value) do { \ - if (VERY_VERBOSE_CS || VERY_VERBOSE_REGISTERS) { \ - DBG(cs_context_copy, DBG_CS, "r300: writing %08x\n", value); \ - } \ cs_winsys->write_cs_dword(cs_winsys, (value)); \ - cs_count--; \ + CS_DEBUG(cs_count--;) \ } while (0) #define OUT_CS_32F(value) do { \ - if (VERY_VERBOSE_CS || VERY_VERBOSE_REGISTERS) { \ - DBG(cs_context_copy, DBG_CS, "r300: writing %f\n", value); \ - } \ cs_winsys->write_cs_dword(cs_winsys, fui(value)); \ - cs_count--; \ + CS_DEBUG(cs_count--;) \ } while (0) #define OUT_CS_REG(register, value) do { \ - if (VERY_VERBOSE_REGISTERS) \ - DBG(cs_context_copy, DBG_CS, "r300: writing 0x%08X to register 0x%04X\n", \ - value, register); \ assert(register); \ cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0(register, 0)); \ cs_winsys->write_cs_dword(cs_winsys, value); \ - cs_count -= 2; \ + CS_DEBUG(cs_count -= 2;) \ } while (0) /* Note: This expects count to be the number of registers, * not the actual packet0 count! */ #define OUT_CS_REG_SEQ(register, count) do { \ - if (VERY_VERBOSE_REGISTERS) \ - DBG(cs_context_copy, DBG_CS, "r300: writing register sequence of %d to 0x%04X\n", \ - count, register); \ assert(register); \ cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0((register), ((count) - 1))); \ - cs_count--; \ + CS_DEBUG(cs_count--;) \ } while (0) #define OUT_CS_TABLE(values, count) do { \ - if (VERY_VERBOSE_REGISTERS) \ - DBG(cs_context_copy, DBG_CS, "r300: writing table of %d dwords\n", count); \ cs_winsys->write_cs_table(cs_winsys, values, count); \ - cs_count -= count; \ + CS_DEBUG(cs_count -= count;) \ } while (0) -#define OUT_CS_BUF_RELOC(bo, offset, rd, wd, flags) do { \ - DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, offset %d, " \ - "domains (%d, %d, %d)\n", \ - bo, offset, rd, wd, flags); \ - assert(bo); \ - cs_winsys->write_cs_dword(cs_winsys, offset); \ - r300_buffer_write_reloc(cs_winsys, r300_buffer(bo), rd, wd, flags); \ - cs_count -= 3; \ +#define OUT_CS_ONE_REG(register, count) do { \ + assert(register); \ + cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0((register), ((count) - 1)) | RADEON_ONE_REG_WR); \ + CS_DEBUG(cs_count--;) \ } while (0) - -#define OUT_CS_TEX_RELOC(tex, offset, rd, wd, flags) do { \ - DBG(cs_context_copy, DBG_CS, "r300: writing relocation for texture %p, offset %d, " \ - "domains (%d, %d, %d)\n", \ - tex, offset, rd, wd, flags); \ - assert(tex); \ - cs_winsys->write_cs_dword(cs_winsys, offset); \ - r300_texture_write_reloc(cs_winsys, tex, rd, wd, flags); \ - cs_count -= 3; \ +#define OUT_CS_PKT3(op, count) do { \ + cs_winsys->write_cs_dword(cs_winsys, CP_PACKET3(op, count)); \ + CS_DEBUG(cs_count--;) \ } while (0) -#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd, flags) do { \ - DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, " \ - "domains (%d, %d, %d)\n", \ - bo, rd, wd, flags); \ +/** + * Writing relocations. + */ + +#define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \ assert(bo); \ - r300_buffer_write_reloc(cs_winsys, r300_buffer(bo), rd, wd, flags); \ - cs_count -= 2; \ + cs_winsys->write_cs_dword(cs_winsys, offset); \ + cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \ + CS_DEBUG(cs_count -= 3;) \ } while (0) -#define END_CS do { \ - if (VERY_VERBOSE_CS) { \ - DBG(cs_context_copy, DBG_CS, "r300: END_CS in %s (%s:%d)\n", __FUNCTION__, \ - __FILE__, __LINE__); \ - } \ - if (cs_count != 0) \ - debug_printf("r300: Warning: cs_count off by %d\n", cs_count); \ - cs_winsys->end_cs(cs_winsys, __FILE__, __FUNCTION__, __LINE__); \ +#define OUT_CS_BUF_RELOC(bo, offset, rd, wd, flags) do { \ + assert(bo); \ + OUT_CS_RELOC(r300_buffer(bo)->buf, offset, rd, wd, flags); \ } while (0) -#define FLUSH_CS do { \ - if (VERY_VERBOSE_CS) { \ - DBG(cs_context_copy, DBG_CS, "r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, \ - __FILE__, __LINE__); \ - } \ - if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) { \ - r300->flush_counter++; \ - } \ - cs_winsys->flush_cs(cs_winsys); \ +#define OUT_CS_TEX_RELOC(tex, offset, rd, wd, flags) do { \ + assert(tex); \ + OUT_CS_RELOC(tex->buffer, offset, rd, wd, flags); \ } while (0) -#define RADEON_ONE_REG_WR (1 << 15) - -#define OUT_CS_ONE_REG(register, count) do { \ - if (VERY_VERBOSE_REGISTERS) \ - DBG(cs_context_copy, DBG_CS, "r300: writing data sequence of %d to 0x%04X\n", \ - count, register); \ - assert(register); \ - cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0((register), ((count) - 1)) | RADEON_ONE_REG_WR); \ - cs_count--; \ +#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd, flags) do { \ + assert(bo); \ + cs_winsys->write_cs_reloc(cs_winsys, r300_buffer(bo)->buf, rd, wd, flags); \ + CS_DEBUG(cs_count -= 2;) \ } while (0) -#define CP_PACKET3(op, count) \ - (RADEON_CP_PACKET3 | (op) | ((count) << 16)) -#define OUT_CS_PKT3(op, count) do { \ - cs_winsys->write_cs_dword(cs_winsys, CP_PACKET3(op, count)); \ - cs_count--; \ -} while (0) +/** + * Command buffer emission. + */ -#define OUT_CS_INDEX_RELOC(bo, offset, count, rd, wd, flags) do { \ - DBG(cs_context_copy, DBG_CS, "r300: writing relocation for index buffer %p," \ - "offset %d\n", bo, offset); \ - assert(bo); \ - cs_winsys->write_cs_dword(cs_winsys, offset); \ - cs_winsys->write_cs_dword(cs_winsys, count); \ - cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \ - cs_count -= 4; \ +#define WRITE_CS_TABLE(values, count) do { \ + CS_DEBUG(assert(cs_count == 0);) \ + cs_winsys->write_cs_table(cs_winsys, values, count); \ } while (0) #endif /* R300_CS_H */ diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 297791f3ffe..a6cd86e3920 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -29,7 +29,6 @@ static const struct debug_named_value debug_options[] = { { "fp", DBG_FP, "Fragment program handling (for debugging)" }, { "vp", DBG_VP, "Vertex program handling (for debugging)" }, - { "cs", DBG_CS, "Command submissions (for debugging)" }, { "draw", DBG_DRAW, "Draw and emit (for debugging)" }, { "tex", DBG_TEX, "Textures (for debugging)" }, { "texalloc", DBG_TEXALLOC, "Texture allocation (for debugging)" }, @@ -39,6 +38,7 @@ static const struct debug_named_value debug_options[] = { { "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking)" }, { "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" }, { "noimmd", DBG_NO_IMMD, "Disable immediate mode (for benchmarking)" }, + { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries (for lulz)" }, { "stats", DBG_STATS, "Gather statistics (for lulz)" }, /* must be last */ diff --git a/src/gallium/drivers/r300/r300_defines.h b/src/gallium/drivers/r300/r300_defines.h index 565a2f3e0e9..d510d80a7bb 100644 --- a/src/gallium/drivers/r300/r300_defines.h +++ b/src/gallium/drivers/r300/r300_defines.h @@ -32,12 +32,6 @@ #define R300_INVALID_FORMAT 0xffff -/* XXX: this is just a bandaid on larger problems in - * r300_screen_buffer.h which doesn't seem to be fully ported to - * gallium-resources. - */ -#define R300_BIND_OQBO (1<<21) - /* Tiling flags. */ enum r300_buffer_tiling { R300_BUFFER_LINEAR = 0, diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index dd9bca88f10..16cb16895ec 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -43,21 +43,11 @@ void r300_emit_blend_state(struct r300_context* r300, (struct pipe_framebuffer_state*)r300->fb_state.state; CS_LOCALS(r300); - BEGIN_CS(size); - OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); - OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3); if (fb->nr_cbufs) { - OUT_CS(blend->blend_control); - OUT_CS(blend->alpha_blend_control); - OUT_CS(blend->color_channel_mask); + WRITE_CS_TABLE(blend->cb, size); } else { - OUT_CS(0); - OUT_CS(0); - OUT_CS(0); - /* XXX also disable fastfill here once it's supported */ + WRITE_CS_TABLE(blend->cb_no_readwrite, size); } - OUT_CS_REG(R300_RB3D_DITHER_CTL, blend->dither); - END_CS; } void r300_emit_blend_color_state(struct r300_context* r300, @@ -66,40 +56,16 @@ void r300_emit_blend_color_state(struct r300_context* r300, struct r300_blend_color_state* bc = (struct r300_blend_color_state*)state; CS_LOCALS(r300); - if (r300->screen->caps.is_r500) { - BEGIN_CS(size); - OUT_CS_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2); - OUT_CS(bc->blend_color_red_alpha); - OUT_CS(bc->blend_color_green_blue); - END_CS; - } else { - BEGIN_CS(size); - OUT_CS_REG(R300_RB3D_BLEND_COLOR, bc->blend_color); - END_CS; - } + WRITE_CS_TABLE(bc->cb, size); } void r300_emit_clip_state(struct r300_context* r300, unsigned size, void* state) { - struct pipe_clip_state* clip = (struct pipe_clip_state*)state; + struct r300_clip_state* clip = (struct r300_clip_state*)state; CS_LOCALS(r300); - if (r300->screen->caps.has_tcl) { - BEGIN_CS(size); - OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, - (r300->screen->caps.is_r500 ? - R500_PVS_UCP_START : R300_PVS_UCP_START)); - OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 6 * 4); - OUT_CS_TABLE(clip->ucp, 6 * 4); - OUT_CS_REG(R300_VAP_CLIP_CNTL, ((1 << clip->nr) - 1) | - R300_PS_UCP_MODE_CLIP_AS_TRIFAN); - END_CS; - } else { - BEGIN_CS(size); - OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); - END_CS; - } + WRITE_CS_TABLE(clip->cb, size); } void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state) @@ -107,27 +73,13 @@ void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state) struct r300_dsa_state* dsa = (struct r300_dsa_state*)state; struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - struct pipe_stencil_ref stencil_ref = r300->stencil_ref; CS_LOCALS(r300); - BEGIN_CS(size); - OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); - OUT_CS_REG_SEQ(R300_ZB_CNTL, 3); - if (fb->zsbuf) { - OUT_CS(dsa->z_buffer_control); - OUT_CS(dsa->z_stencil_control); + WRITE_CS_TABLE(&dsa->cb_begin, size); } else { - OUT_CS(0); - OUT_CS(0); + WRITE_CS_TABLE(dsa->cb_no_readwrite, size); } - - OUT_CS(dsa->stencil_ref_mask | stencil_ref.ref_value[0]); - - if (r300->screen->caps.is_r500) { - OUT_CS_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf | stencil_ref.ref_value[1]); - } - END_CS; } static const float * get_rc_constant_state( @@ -175,7 +127,7 @@ static const float * get_rc_constant_state( /* Convert a normal single-precision float into the 7.16 format * used by the R300 fragment shader. */ -static uint32_t pack_float24(float f) +uint32_t pack_float24(float f) { union { float fl; @@ -206,101 +158,27 @@ static uint32_t pack_float24(float f) return float24; } -unsigned r300_get_fs_atom_size(struct r300_context *r300) -{ - struct r300_fragment_shader *fs = r300_fs(r300); - unsigned imm_count = fs->shader->immediates_count; - struct r300_fragment_program_code *code = &fs->shader->code.code.r300; - - return 19 + - code->alu.length * 4 + - (code->tex.length ? (1 + code->tex.length) : 0) + - (imm_count ? imm_count * 5 : 0); -} - void r300_emit_fs(struct r300_context* r300, unsigned size, void *state) { struct r300_fragment_shader *fs = r300_fs(r300); - struct rX00_fragment_program_code* generic_code = &fs->shader->code; - struct r300_fragment_program_code * code = &generic_code->code.r300; - unsigned i; - unsigned imm_count = fs->shader->immediates_count; - unsigned imm_first = fs->shader->externals_count; - unsigned imm_end = generic_code->constants.Count; - struct rc_constant *constants = generic_code->constants.Constants; CS_LOCALS(r300); - BEGIN_CS(size); - OUT_CS_REG(R300_US_CONFIG, code->config); - OUT_CS_REG(R300_US_PIXSIZE, code->pixsize); - OUT_CS_REG(R300_US_CODE_OFFSET, code->code_offset); - - OUT_CS_REG_SEQ(R300_US_CODE_ADDR_0, 4); - OUT_CS_TABLE(code->code_addr, 4); - - OUT_CS_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CS(code->alu.inst[i].rgb_inst); - - OUT_CS_REG_SEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CS(code->alu.inst[i].rgb_addr); - - OUT_CS_REG_SEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CS(code->alu.inst[i].alpha_inst); - - OUT_CS_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CS(code->alu.inst[i].alpha_addr); - - if (code->tex.length) { - OUT_CS_REG_SEQ(R300_US_TEX_INST_0, code->tex.length); - OUT_CS_TABLE(code->tex.inst, code->tex.length); - } - - /* Emit immediates. */ - if (imm_count) { - for(i = imm_first; i < imm_end; ++i) { - if (constants[i].Type == RC_CONSTANT_IMMEDIATE) { - const float *data = constants[i].u.Immediate; - - OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X + i * 16, 4); - OUT_CS(pack_float24(data[0])); - OUT_CS(pack_float24(data[1])); - OUT_CS(pack_float24(data[2])); - OUT_CS(pack_float24(data[3])); - } - } - } - - OUT_CS_REG(R300_FG_DEPTH_SRC, fs->shader->fg_depth_src); - OUT_CS_REG(R300_US_W_FMT, fs->shader->us_out_w); - END_CS; + WRITE_CS_TABLE(fs->shader->cb_code, fs->shader->cb_code_size); } void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *state) { struct r300_fragment_shader *fs = r300_fs(r300); - struct rc_constant_list *constants = &fs->shader->code.constants; struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state; - unsigned i, count = fs->shader->externals_count; + unsigned count = fs->shader->externals_count * 4; CS_LOCALS(r300); if (count == 0) return; BEGIN_CS(size); - OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, count * 4); - for(i = 0; i < count; ++i) { - const float *data; - assert(constants->Constants[i].Type == RC_CONSTANT_EXTERNAL); - data = buf->constants[i]; - OUT_CS(pack_float24(data[0])); - OUT_CS(pack_float24(data[1])); - OUT_CS(pack_float24(data[2])); - OUT_CS(pack_float24(data[3])); - } + OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, count); + OUT_CS_TABLE(buf->constants, count); END_CS; } @@ -312,6 +190,8 @@ void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo unsigned count = fs->shader->rc_state_count; unsigned first = fs->shader->externals_count; unsigned end = constants->Count; + uint32_t cdata[4]; + unsigned j; CS_LOCALS(r300); if (count == 0) @@ -323,85 +203,29 @@ void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo const float *data = get_rc_constant_state(r300, &constants->Constants[i]); + for (j = 0; j < 4; j++) + cdata[j] = pack_float24(data[j]); + OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X + i * 16, 4); - OUT_CS(pack_float24(data[0])); - OUT_CS(pack_float24(data[1])); - OUT_CS(pack_float24(data[2])); - OUT_CS(pack_float24(data[3])); + OUT_CS_TABLE(cdata, 4); } } END_CS; } -unsigned r500_get_fs_atom_size(struct r300_context *r300) -{ - struct r300_fragment_shader *fs = r300_fs(r300); - unsigned imm_count = fs->shader->immediates_count; - struct r500_fragment_program_code *code = &fs->shader->code.code.r500; - - return 17 + - ((code->inst_end + 1) * 6) + - (imm_count ? imm_count * 7 : 0); -} - void r500_emit_fs(struct r300_context* r300, unsigned size, void *state) { struct r300_fragment_shader *fs = r300_fs(r300); - struct rX00_fragment_program_code* generic_code = &fs->shader->code; - struct r500_fragment_program_code * code = &generic_code->code.r500; - unsigned i; - unsigned imm_count = fs->shader->immediates_count; - unsigned imm_first = fs->shader->externals_count; - unsigned imm_end = generic_code->constants.Count; - struct rc_constant *constants = generic_code->constants.Constants; CS_LOCALS(r300); - BEGIN_CS(size); - OUT_CS_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); - OUT_CS_REG(R500_US_PIXSIZE, code->max_temp_idx); - OUT_CS_REG(R500_US_CODE_RANGE, - R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end)); - OUT_CS_REG(R500_US_CODE_OFFSET, 0); - OUT_CS_REG(R500_US_CODE_ADDR, - R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(code->inst_end)); - - OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_INSTR); - OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, (code->inst_end + 1) * 6); - for (i = 0; i <= code->inst_end; i++) { - OUT_CS(code->inst[i].inst0); - OUT_CS(code->inst[i].inst1); - OUT_CS(code->inst[i].inst2); - OUT_CS(code->inst[i].inst3); - OUT_CS(code->inst[i].inst4); - OUT_CS(code->inst[i].inst5); - } - - /* Emit immediates. */ - if (imm_count) { - for(i = imm_first; i < imm_end; ++i) { - if (constants[i].Type == RC_CONSTANT_IMMEDIATE) { - const float *data = constants[i].u.Immediate; - - OUT_CS_REG(R500_GA_US_VECTOR_INDEX, - R500_GA_US_VECTOR_INDEX_TYPE_CONST | - (i & R500_GA_US_VECTOR_INDEX_MASK)); - OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, 4); - OUT_CS_TABLE(data, 4); - } - } - } - - OUT_CS_REG(R300_FG_DEPTH_SRC, fs->shader->fg_depth_src); - OUT_CS_REG(R300_US_W_FMT, fs->shader->us_out_w); - END_CS; + WRITE_CS_TABLE(fs->shader->cb_code, fs->shader->cb_code_size); } void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *state) { struct r300_fragment_shader *fs = r300_fs(r300); - struct rc_constant_list *constants = &fs->shader->code.constants; struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state; - unsigned i, count = fs->shader->externals_count; + unsigned count = fs->shader->externals_count * 4; CS_LOCALS(r300); if (count == 0) @@ -409,11 +233,8 @@ void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat BEGIN_CS(size); OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST); - OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count * 4); - for(i = 0; i < count; ++i) { - assert(constants->Constants[i].Type == RC_CONSTANT_EXTERNAL); - } - OUT_CS_TABLE(buf->constants, count * 4); + OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count); + OUT_CS_TABLE(buf->constants, count); END_CS; } @@ -446,83 +267,117 @@ void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo END_CS; } +void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state) +{ + struct r300_gpu_flush *gpuflush = (struct r300_gpu_flush*)state; + struct pipe_framebuffer_state* fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + CS_LOCALS(r300); + + BEGIN_CS(size); + + /* Set up scissors. + * By writing to the SC registers, SC & US assert idle. */ + OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); + if (r300->screen->caps.is_r500) { + OUT_CS(0); + OUT_CS(((fb->width - 1) << R300_SCISSORS_X_SHIFT) | + ((fb->height - 1) << R300_SCISSORS_Y_SHIFT)); + } else { + OUT_CS((1440 << R300_SCISSORS_X_SHIFT) | + (1440 << R300_SCISSORS_Y_SHIFT)); + OUT_CS(((fb->width + 1440-1) << R300_SCISSORS_X_SHIFT) | + ((fb->height + 1440-1) << R300_SCISSORS_Y_SHIFT)); + } + + /* Flush CB & ZB caches and wait until the 3D engine is idle and clean. */ + OUT_CS_TABLE(gpuflush->cb_flush_clean, 6); + END_CS; +} + +void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state) +{ + struct r300_aa_state *aa = (struct r300_aa_state*)state; + CS_LOCALS(r300); + + BEGIN_CS(size); + OUT_CS_REG(R300_GB_AA_CONFIG, aa->aa_config); + + if (aa->dest) { + OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 1); + OUT_CS_RELOC(aa->dest->buffer, aa->dest->offset, 0, aa->dest->domain, 0); + + OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_PITCH, 1); + OUT_CS_RELOC(aa->dest->buffer, aa->dest->pitch, 0, aa->dest->domain, 0); + } + + OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, aa->aaresolve_ctl); + END_CS; +} + void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) { struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state; - struct r300_texture* tex; - struct pipe_surface* surf; - int i; + struct r300_surface* surf; + unsigned i; CS_LOCALS(r300); BEGIN_CS(size); - /* Flush and free renderbuffer caches. */ - OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, - R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | - R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); - OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, - R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | - R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); - - /* Set the number of colorbuffers. */ - if (fb->nr_cbufs > 1) { - if (r300->screen->caps.is_r500) { - OUT_CS_REG(R300_RB3D_CCTL, - R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs) | - R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE); - } else { - OUT_CS_REG(R300_RB3D_CCTL, - R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs)); - } + /* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers, which is not + * what we usually want. */ + if (r300->screen->caps.is_r500) { + OUT_CS_REG(R300_RB3D_CCTL, + R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE); } else { - OUT_CS_REG(R300_RB3D_CCTL, 0x0); + OUT_CS_REG(R300_RB3D_CCTL, 0); } /* Set up colorbuffers. */ for (i = 0; i < fb->nr_cbufs; i++) { - surf = fb->cbufs[i]; - tex = r300_texture(surf->texture); - assert(tex && tex->buffer && "cbuf is marked, but NULL!"); + surf = r300_surface(fb->cbufs[i]); OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); - OUT_CS_TEX_RELOC(tex, surf->offset, 0, tex->domain, 0); + OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain, 0); OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); - OUT_CS_TEX_RELOC(tex, tex->fb_state.colorpitch[surf->level], - 0, tex->domain, 0); - - OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), tex->fb_state.us_out_fmt); - } - for (; i < 4; i++) { - OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), R300_US_OUT_FMT_UNUSED); + OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain, 0); } /* Set up a zbuffer. */ if (fb->zsbuf) { - surf = fb->zsbuf; - tex = r300_texture(surf->texture); - assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); + surf = r300_surface(fb->zsbuf); - OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_TEX_RELOC(tex, surf->offset, 0, tex->domain, 0); + OUT_CS_REG(R300_ZB_FORMAT, surf->format); + OUT_CS_REG(R300_ZB_BW_CNTL, 0); - OUT_CS_REG(R300_ZB_FORMAT, tex->fb_state.zb_format); + OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); + OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain, 0); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_TEX_RELOC(tex, tex->fb_state.depthpitch[surf->level], - 0, tex->domain, 0); + OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain, 0); + + OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0); + + /* HiZ RAM. */ + if (r300->screen->caps.has_hiz) { + OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); + OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); + } + + /* Z Mask RAM. (compressed zbuffer) */ + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); } - OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); - if (r300->screen->caps.is_r500) { - OUT_CS(0); - OUT_CS(((fb->width - 1) << R300_SCISSORS_X_SHIFT) | - ((fb->height - 1) << R300_SCISSORS_Y_SHIFT)); - } else { - OUT_CS((1440 << R300_SCISSORS_X_SHIFT) | - (1440 << R300_SCISSORS_Y_SHIFT)); - OUT_CS(((fb->width + 1440-1) << R300_SCISSORS_X_SHIFT) | - ((fb->height + 1440-1) << R300_SCISSORS_Y_SHIFT)); + /* Colorbuffer format in the US block. + * (must be written after unpipelined regs) */ + OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4); + for (i = 0; i < fb->nr_cbufs; i++) { + OUT_CS(r300_surface(fb->cbufs[i])->format); + } + for (; i < 4; i++) { + OUT_CS(R300_US_OUT_FMT_UNUSED); } END_CS; } @@ -544,13 +399,14 @@ void r300_emit_query_start(struct r300_context *r300, unsigned size, void*state) OUT_CS_REG(R300_ZB_ZPASS_DATA, 0); END_CS; query->begin_emitted = TRUE; + query->flushed = FALSE; } - static void r300_emit_query_end_frag_pipes(struct r300_context *r300, struct r300_query *query) { struct r300_capabilities* caps = &r300->screen->caps; + struct r300_winsys_buffer *buf = r300->query_current->buffer; CS_LOCALS(r300); assert(caps->num_frag_pipes); @@ -569,28 +425,28 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, /* pipe 3 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 3); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 3), - 0, r300_buffer(r300->oqbo)->domain, 0); + OUT_CS_RELOC(buf, (query->num_results + 3) * 4, + 0, query->domain, 0); case 3: /* pipe 2 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 2); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 2), - 0, r300_buffer(r300->oqbo)->domain, 0); + OUT_CS_RELOC(buf, (query->num_results + 2) * 4, + 0, query->domain, 0); case 2: /* pipe 1 only */ /* As mentioned above, accomodate RV380 and older. */ OUT_CS_REG(R300_SU_REG_DEST, 1 << (caps->high_second_pipe ? 3 : 1)); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 1), - 0, r300_buffer(r300->oqbo)->domain, 0); + OUT_CS_RELOC(buf, (query->num_results + 1) * 4, + 0, query->domain, 0); case 1: /* pipe 0 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 0), - 0, r300_buffer(r300->oqbo)->domain, 0); + OUT_CS_RELOC(buf, (query->num_results + 0) * 4, + 0, query->domain, 0); break; default: fprintf(stderr, "r300: Implementation error: Chipset reports %d" @@ -606,12 +462,13 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, static void rv530_emit_query_end_single_z(struct r300_context *r300, struct r300_query *query) { + struct r300_winsys_buffer *buf = r300->query_current->buffer; CS_LOCALS(r300); BEGIN_CS(8); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_BUF_RELOC(r300->oqbo, query->offset, 0, r300_buffer(r300->oqbo)->domain, 0); + OUT_CS_RELOC(buf, query->num_results * 4, 0, query->domain, 0); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -619,15 +476,16 @@ static void rv530_emit_query_end_single_z(struct r300_context *r300, static void rv530_emit_query_end_double_z(struct r300_context *r300, struct r300_query *query) { + struct r300_winsys_buffer *buf = r300->query_current->buffer; CS_LOCALS(r300); BEGIN_CS(14); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_BUF_RELOC(r300->oqbo, query->offset, 0, r300_buffer(r300->oqbo)->domain, 0); + OUT_CS_RELOC(buf, (query->num_results + 0) * 4, 0, query->domain, 0); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_BUF_RELOC(r300->oqbo, query->offset + sizeof(uint32_t), 0, r300_buffer(r300->oqbo)->domain, 0); + OUT_CS_RELOC(buf, (query->num_results + 1) * 4, 0, query->domain, 0); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -652,18 +510,63 @@ void r300_emit_query_end(struct r300_context* r300) r300_emit_query_end_frag_pipes(r300, query); query->begin_emitted = FALSE; + query->num_results += query->num_pipes; + + /* XXX grab all the results and reset the counter. */ + if (query->num_results >= query->buffer_size / 4 - 4) { + query->num_results = (query->buffer_size / 4) / 2; + fprintf(stderr, "r300: Rewinding OQBO...\n"); + } } void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state) { - struct r300_rs_state* rs = (struct r300_rs_state*)state; + struct r300_rs_state* rs = state; + struct pipe_framebuffer_state* fb = r300->fb_state.state; float scale, offset; + unsigned mspos0, mspos1; CS_LOCALS(r300); BEGIN_CS(size); OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status); - OUT_CS_REG(R300_GB_AA_CONFIG, rs->antialiasing_config); + /* Multisampling. Depends on framebuffer sample count. */ + if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { + if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) { + /* Subsample placement. These may not be optimal. */ + switch (fb->cbufs[0]->texture->nr_samples) { + case 2: + mspos0 = 0x33996633; + mspos1 = 0x6666663; + break; + case 3: + mspos0 = 0x33936933; + mspos1 = 0x6666663; + break; + case 4: + mspos0 = 0x33939933; + mspos1 = 0x3966663; + break; + case 6: + mspos0 = 0x22a2aa22; + mspos1 = 0x2a65672; + break; + default: + debug_printf("r300: Bad number of multisamples!\n"); + mspos0 = rs->multisample_position_0; + mspos1 = rs->multisample_position_1; + break; + } + + OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); + OUT_CS(mspos0); + OUT_CS(mspos1); + } else { + OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); + OUT_CS(rs->multisample_position_0); + OUT_CS(rs->multisample_position_1); + } + } OUT_CS_REG(R300_GA_POINT_SIZE, rs->point_size); OUT_CS_REG_SEQ(R300_GA_POINT_MINMAX, 2); @@ -940,6 +843,17 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) CS_LOCALS(r300); BEGIN_CS(size); + /* Amount of time to wait for vertex fetches in PVS */ + OUT_CS_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff); + + OUT_CS_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4); + OUT_CS_32F(1.0); + OUT_CS_32F(1.0); + OUT_CS_32F(1.0); + OUT_CS_32F(1.0); + + OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO); + /* R300_VAP_PVS_CODE_CNTL_0 * R300_VAP_PVS_CONST_CNTL * R300_VAP_PVS_CODE_CNTL_1 @@ -1004,12 +918,7 @@ void r300_emit_viewport_state(struct r300_context* r300, BEGIN_CS(size); OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); - OUT_CS_32F(viewport->xscale); - OUT_CS_32F(viewport->xoffset); - OUT_CS_32F(viewport->yscale); - OUT_CS_32F(viewport->yoffset); - OUT_CS_32F(viewport->zscale); - OUT_CS_32F(viewport->zoffset); + OUT_CS_TABLE(&viewport->xscale, 6); OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); END_CS; } @@ -1091,10 +1000,9 @@ validate: } } /* ...occlusion query buffer... */ - if (r300->query_start.dirty || - (r300->query_current && r300->query_current->begin_emitted)) { - if (!r300_add_buffer(r300->rws, r300->oqbo, - 0, r300_buffer(r300->oqbo)->domain)) { + if (r300->query_current) { + if (!r300->rws->add_buffer(r300->rws, r300->query_current->buffer, + 0, r300->query_current->domain)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 90660883423..0d4e1f7a23b 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -29,6 +29,8 @@ struct rX00_fragment_program_code; struct r300_vertex_program_code; +uint32_t pack_float24(float f); + void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed); void r300_emit_blend_state(struct r300_context* r300, @@ -43,16 +45,12 @@ void r300_emit_clip_state(struct r300_context* r300, void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state); -unsigned r300_get_fs_atom_size(struct r300_context *r300); - void r300_emit_fs(struct r300_context* r300, unsigned size, void *state); void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *state); void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, void *state); -unsigned r500_get_fs_atom_size(struct r300_context *r300); - void r500_emit_fs(struct r300_context* r300, unsigned size, void *state); void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *state); @@ -61,6 +59,10 @@ void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state); +void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state); + +void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state); + void r300_emit_query_start(struct r300_context *r300, unsigned size, void *state); void r300_emit_query_end(struct r300_context* r300); diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 360b19a0c1e..ba840bfff81 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -39,8 +39,6 @@ static void r300_flush(struct pipe_context* pipe, struct r300_atom *atom; struct r300_fence **rfence = (struct r300_fence**)fence; - CS_LOCALS(r300); - (void) cs_count; /* We probably need to flush Draw, but we may have been called from * within Draw. This feels kludgy, but it might be the best thing. * @@ -52,7 +50,10 @@ static void r300_flush(struct pipe_context* pipe, if (r300->dirty_hw) { r300_emit_query_end(r300); - FLUSH_CS; + if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) { + r300->flush_counter++; + } + r300->rws->flush_cs(r300->rws); r300->dirty_hw = 0; /* New kitchen sink, baby. */ diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index a434808046c..424f831731d 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -28,7 +28,9 @@ #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_ureg.h" +#include "r300_cb.h" #include "r300_context.h" +#include "r300_emit.h" #include "r300_screen.h" #include "r300_fs.h" #include "r300_reg.h" @@ -229,6 +231,122 @@ static void r300_dummy_fragment_shader( ureg_destroy(ureg); } +static void r300_emit_fs_code_to_buffer( + struct r300_context *r300, + struct r300_fragment_shader_code *shader) +{ + struct rX00_fragment_program_code *generic_code = &shader->code; + unsigned imm_count = shader->immediates_count; + unsigned imm_first = shader->externals_count; + unsigned imm_end = generic_code->constants.Count; + struct rc_constant *constants = generic_code->constants.Constants; + unsigned i; + CB_LOCALS; + + if (r300->screen->caps.is_r500) { + struct r500_fragment_program_code *code = &generic_code->code.r500; + + shader->cb_code_size = 17 + + ((code->inst_end + 1) * 6) + + imm_count * 7; + + NEW_CB(shader->cb_code, shader->cb_code_size); + OUT_CB_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); + OUT_CB_REG(R500_US_PIXSIZE, code->max_temp_idx); + OUT_CB_REG(R500_US_CODE_RANGE, + R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end)); + OUT_CB_REG(R500_US_CODE_OFFSET, 0); + OUT_CB_REG(R500_US_CODE_ADDR, + R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(code->inst_end)); + + OUT_CB_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_INSTR); + OUT_CB_ONE_REG(R500_GA_US_VECTOR_DATA, (code->inst_end + 1) * 6); + for (i = 0; i <= code->inst_end; i++) { + OUT_CB(code->inst[i].inst0); + OUT_CB(code->inst[i].inst1); + OUT_CB(code->inst[i].inst2); + OUT_CB(code->inst[i].inst3); + OUT_CB(code->inst[i].inst4); + OUT_CB(code->inst[i].inst5); + } + + /* Emit immediates. */ + if (imm_count) { + for(i = imm_first; i < imm_end; ++i) { + if (constants[i].Type == RC_CONSTANT_IMMEDIATE) { + const float *data = constants[i].u.Immediate; + + OUT_CB_REG(R500_GA_US_VECTOR_INDEX, + R500_GA_US_VECTOR_INDEX_TYPE_CONST | + (i & R500_GA_US_VECTOR_INDEX_MASK)); + OUT_CB_ONE_REG(R500_GA_US_VECTOR_DATA, 4); + OUT_CB_TABLE(data, 4); + } + } + } + } else { /* r300 */ + struct r300_fragment_program_code *code = &generic_code->code.r300; + + shader->cb_code_size = 19 + + (r300->screen->caps.is_r400 ? 2 : 0) + + code->alu.length * 4 + + (code->tex.length ? (1 + code->tex.length) : 0) + + imm_count * 5; + + NEW_CB(shader->cb_code, shader->cb_code_size); + + if (r300->screen->caps.is_r400) + OUT_CB_REG(R400_US_CODE_BANK, 0); + + OUT_CB_REG(R300_US_CONFIG, code->config); + OUT_CB_REG(R300_US_PIXSIZE, code->pixsize); + OUT_CB_REG(R300_US_CODE_OFFSET, code->code_offset); + + OUT_CB_REG_SEQ(R300_US_CODE_ADDR_0, 4); + OUT_CB_TABLE(code->code_addr, 4); + + OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) + OUT_CB(code->alu.inst[i].rgb_inst); + + OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) + OUT_CB(code->alu.inst[i].rgb_addr); + + OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) + OUT_CB(code->alu.inst[i].alpha_inst); + + OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) + OUT_CB(code->alu.inst[i].alpha_addr); + + if (code->tex.length) { + OUT_CB_REG_SEQ(R300_US_TEX_INST_0, code->tex.length); + OUT_CB_TABLE(code->tex.inst, code->tex.length); + } + + /* Emit immediates. */ + if (imm_count) { + for(i = imm_first; i < imm_end; ++i) { + if (constants[i].Type == RC_CONSTANT_IMMEDIATE) { + const float *data = constants[i].u.Immediate; + + OUT_CB_REG_SEQ(R300_PFS_PARAM_0_X + i * 16, 4); + OUT_CB(pack_float24(data[0])); + OUT_CB(pack_float24(data[1])); + OUT_CB(pack_float24(data[2])); + OUT_CB(pack_float24(data[3])); + } + } + } + } + + OUT_CB_REG(R300_FG_DEPTH_SRC, shader->fg_depth_src); + OUT_CB_REG(R300_US_W_FMT, shader->us_out_w); + END_CB; +} + static void r300_translate_fragment_shader( struct r300_context* r300, struct r300_fragment_shader_code* shader, @@ -338,6 +456,9 @@ static void r300_translate_fragment_shader( /* And, finally... */ rc_destroy(&compiler.Base); + + /* Build the command buffer. */ + r300_emit_fs_code_to_buffer(r300, shader); } boolean r300_pick_fragment_shader(struct r300_context* r300) diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h index 1cc43553595..51bfa88c5ef 100644 --- a/src/gallium/drivers/r300/r300_fs.h +++ b/src/gallium/drivers/r300/r300_fs.h @@ -50,6 +50,9 @@ struct r300_fragment_shader_code { struct r300_fragment_program_external_state compare_state; struct rX00_fragment_program_code code; + unsigned cb_code_size; + uint32_t *cb_code; + struct r300_fragment_shader_code* next; }; diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index b41b6b1508d..e5c76589528 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -31,33 +31,46 @@ /* The ZTOP state */ /*****************************************************************************/ -static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa) +static boolean r300_dsa_writes_stencil( + struct pipe_stencil_state *s) { - /* We are interested only in the cases when a new depth or stencil value - * can be written and changed. */ + return s->enabled && s->writemask && + (s->fail_op != PIPE_STENCIL_OP_KEEP || + s->zfail_op != PIPE_STENCIL_OP_KEEP || + s->zpass_op != PIPE_STENCIL_OP_KEEP); +} + +static boolean r300_dsa_writes_depth_stencil( + struct pipe_depth_stencil_alpha_state *dsa) +{ + /* We are interested only in the cases when a depth or stencil value + * can be changed. */ + + if (dsa->depth.enabled && dsa->depth.writemask && + dsa->depth.func != PIPE_FUNC_NEVER) + return TRUE; + + if (r300_dsa_writes_stencil(&dsa->stencil[0]) || + r300_dsa_writes_stencil(&dsa->stencil[1])) + return TRUE; - /* We might optionally check for [Z func: never] and inspect the stencil - * state in a similar fashion, but it's not terribly important. */ - return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE) || - (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK) || - ((dsa->z_buffer_control & R500_STENCIL_REFMASK_FRONT_BACK) && - (dsa->stencil_ref_bf & R300_STENCILWRITEMASK_MASK)); + return FALSE; } -static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa) +static boolean r300_dsa_alpha_test_enabled( + struct pipe_depth_stencil_alpha_state *dsa) { /* We are interested only in the cases when alpha testing can kill * a fragment. */ - uint32_t af = dsa->alpha_function; - return (af & R300_FG_ALPHA_FUNC_ENABLE) && - (af & R300_FG_ALPHA_FUNC_ALWAYS) != R300_FG_ALPHA_FUNC_ALWAYS; + return dsa->alpha.enabled && dsa->alpha.func != PIPE_FUNC_ALWAYS; } static void r300_update_ztop(struct r300_context* r300) { struct r300_ztop_state* ztop_state = (struct r300_ztop_state*)r300->ztop_state.state; + uint32_t old_ztop = ztop_state->z_buffer_top; /* This is important enough that I felt it warranted a comment. * @@ -99,7 +112,8 @@ static void r300_update_ztop(struct r300_context* r300) ztop_state->z_buffer_top = R300_ZTOP_ENABLE; } - r300->ztop_state.dirty = TRUE; + if (ztop_state->z_buffer_top != old_ztop) + r300->ztop_state.dirty = TRUE; } void r300_update_hyperz_state(struct r300_context* r300) diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 7c088063683..10086ee9256 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -26,6 +26,7 @@ #include "r300_context.h" #include "r300_screen.h" #include "r300_emit.h" +#include "r300_winsys.h" #include <stdio.h> @@ -34,30 +35,29 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, { struct r300_context *r300 = r300_context(pipe); struct r300_screen *r300screen = r300->screen; - unsigned query_size; - struct r300_query *q, *qptr; + struct r300_query *q; + + if (query_type != PIPE_QUERY_OCCLUSION_COUNTER) { + return NULL; + } q = CALLOC_STRUCT(r300_query); + if (!q) + return NULL; q->type = query_type; - assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); + q->domain = R300_DOMAIN_GTT; + q->buffer_size = 4096; if (r300screen->caps.family == CHIP_FAMILY_RV530) - query_size = r300screen->caps.num_z_pipes * sizeof(uint32_t); + q->num_pipes = r300screen->caps.num_z_pipes; else - query_size = r300screen->caps.num_frag_pipes * sizeof(uint32_t); + q->num_pipes = r300screen->caps.num_frag_pipes; - if (!is_empty_list(&r300->query_list)) { - qptr = last_elem(&r300->query_list); - q->offset = qptr->offset + query_size; - } insert_at_tail(&r300->query_list, q); - /* XXX */ - if (q->offset >= 4096) { - q->offset = 0; - fprintf(stderr, "r300: Rewinding OQBO...\n"); - } + /* Open up the occlusion query buffer. */ + q->buffer = r300->rws->buffer_create(r300->rws, 4096, 0, q->domain, q->buffer_size); return (struct pipe_query*)q; } @@ -65,18 +65,26 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, static void r300_destroy_query(struct pipe_context* pipe, struct pipe_query* query) { - struct r300_query* q = (struct r300_query*)query; + struct r300_context *r300 = r300_context(pipe); + struct r300_query* q = r300_query(query); + r300->rws->buffer_reference(r300->rws, &q->buffer, NULL); remove_from_list(q); FREE(query); } +void r300_resume_query(struct r300_context *r300, + struct r300_query *query) +{ + r300->query_current = query; + r300->query_start.dirty = TRUE; +} + static void r300_begin_query(struct pipe_context* pipe, struct pipe_query* query) { - uint32_t value = ~0U; struct r300_context* r300 = r300_context(pipe); - struct r300_query* q = (struct r300_query*)query; + struct r300_query* q = r300_query(query); if (r300->query_current != NULL) { fprintf(stderr, "r300: begin_query: " @@ -85,30 +93,29 @@ static void r300_begin_query(struct pipe_context* pipe, return; } - pipe_buffer_write(pipe, - r300->oqbo, - q->offset, - sizeof value, - &value); + q->num_results = 0; + r300_resume_query(r300, q); +} - q->flushed = FALSE; - r300->query_current = q; - r300->query_start.dirty = TRUE; +void r300_stop_query(struct r300_context *r300) +{ + r300_emit_query_end(r300); + r300->query_current = NULL; } static void r300_end_query(struct pipe_context* pipe, struct pipe_query* query) { struct r300_context* r300 = r300_context(pipe); + struct r300_query *q = r300_query(query); - if ((struct r300_query*)query != r300->query_current) { + if (q != r300->query_current) { fprintf(stderr, "r300: end_query: Got invalid query.\n"); assert(0); return; } - r300_emit_query_end(r300); - r300->query_current = NULL; + r300_stop_query(r300); } static boolean r300_get_query_result(struct pipe_context* pipe, @@ -117,54 +124,28 @@ static boolean r300_get_query_result(struct pipe_context* pipe, void* vresult) { struct r300_context* r300 = r300_context(pipe); - struct r300_screen* r300screen = r300->screen; - struct r300_query *q = (struct r300_query*)query; - struct pipe_transfer *transfer; - unsigned flags = PIPE_TRANSFER_READ; - uint32_t* map; - uint32_t temp = 0; - unsigned i, num_results; + struct r300_query *q = r300_query(query); + unsigned flags, i; + uint32_t temp, *map; uint64_t *result = (uint64_t*)vresult; - if (q->flushed == FALSE) + if (!q->flushed) pipe->flush(pipe, 0, NULL); - if (!wait) { - flags |= PIPE_TRANSFER_DONTBLOCK; - } - map = pipe_buffer_map(pipe, r300->oqbo, flags, &transfer); + flags = PIPE_TRANSFER_READ | (!wait ? PIPE_TRANSFER_DONTBLOCK : 0); + + map = r300->rws->buffer_map(r300->rws, q->buffer, flags); if (!map) return FALSE; - map += q->offset / 4; - if (r300screen->caps.family == CHIP_FAMILY_RV530) - num_results = r300screen->caps.num_z_pipes; - else - num_results = r300screen->caps.num_frag_pipes; - - for (i = 0; i < num_results; i++) { - if (*map == ~0U) { - /* Looks like our results aren't ready yet. */ - if (wait) { - fprintf(stderr, "r300: Despite waiting, OQ results haven't " - "come in yet. This is a driver bug.\n" - "r300: Returning bogus results to avoid " - "a possible infinite loop...\n"); - temp = 987654321; - } else { - temp = ~0U; - } - break; - } + /* Sum up the results. */ + temp = 0; + for (i = 0; i < q->num_results; i++) { temp += *map; map++; } - pipe_buffer_unmap(pipe, r300->oqbo, transfer); - if (temp == ~0U) { - /* Our results haven't been written yet... */ - return FALSE; - } + r300->rws->buffer_unmap(r300->rws, q->buffer); *result = temp; return TRUE; @@ -192,11 +173,61 @@ static void r300_render_condition(struct pipe_context *pipe, } } +/*************************************************************************** + * Fake occlusion queries (for debugging) + ***************************************************************************/ + +static unsigned r300_fake_query; + +static struct pipe_query *r300_fake_create_query(struct pipe_context *pipe, + unsigned query_type) +{ + return (struct pipe_query*)&r300_fake_query; +} + +static void r300_fake_destroy_query(struct pipe_context* pipe, + struct pipe_query* query) +{ +} + +static void r300_fake_begin_query(struct pipe_context* pipe, + struct pipe_query* query) +{ +} + +static void r300_fake_end_query(struct pipe_context* pipe, + struct pipe_query* query) +{ +} + +static boolean r300_fake_get_query_result(struct pipe_context* pipe, + struct pipe_query* query, + boolean wait, void* vresult) +{ + uint64_t *result = (uint64_t*)vresult; + *result = 1000000; + return TRUE; +} + +static void r300_fake_render_condition(struct pipe_context *pipe, + struct pipe_query *query, uint mode) +{ +} + void r300_init_query_functions(struct r300_context* r300) { - r300->context.create_query = r300_create_query; - r300->context.destroy_query = r300_destroy_query; - r300->context.begin_query = r300_begin_query; - r300->context.end_query = r300_end_query; - r300->context.get_query_result = r300_get_query_result; - r300->context.render_condition = r300_render_condition; + if (DBG_ON(r300, DBG_FAKE_OCC)) { + r300->context.create_query = r300_fake_create_query; + r300->context.destroy_query = r300_fake_destroy_query; + r300->context.begin_query = r300_fake_begin_query; + r300->context.end_query = r300_fake_end_query; + r300->context.get_query_result = r300_fake_get_query_result; + r300->context.render_condition = r300_fake_render_condition; + } else { + r300->context.create_query = r300_create_query; + r300->context.destroy_query = r300_destroy_query; + r300->context.begin_query = r300_begin_query; + r300->context.end_query = r300_end_query; + r300->context.get_query_result = r300_get_query_result; + r300->context.render_condition = r300_render_condition; + } } diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index c4fa19ace53..180560175a4 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -1630,6 +1630,40 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_FORMAT_GAMMA (1 << 21) # define R300_TX_FORMAT_YUV_TO_RGB (1 << 22) +# define R300_TX_CACHE(x) ((x) << 27) +# define R300_TX_CACHE_WHOLE 0 +/* reserved */ +# define R300_TX_CACHE_HALF_0 2 +# define R300_TX_CACHE_HALF_1 3 +# define R300_TX_CACHE_FOURTH_0 4 +# define R300_TX_CACHE_FOURTH_1 5 +# define R300_TX_CACHE_FOURTH_2 6 +# define R300_TX_CACHE_FOURTH_3 7 +# define R300_TX_CACHE_EIGHTH_0 8 +# define R300_TX_CACHE_EIGHTH_1 9 +# define R300_TX_CACHE_EIGHTH_2 10 +# define R300_TX_CACHE_EIGHTH_3 11 +# define R300_TX_CACHE_EIGHTH_4 12 +# define R300_TX_CACHE_EIGHTH_5 13 +# define R300_TX_CACHE_EIGHTH_6 14 +# define R300_TX_CACHE_EIGHTH_7 15 +# define R300_TX_CACHE_SIXTEENTH_0 16 +# define R300_TX_CACHE_SIXTEENTH_1 17 +# define R300_TX_CACHE_SIXTEENTH_2 18 +# define R300_TX_CACHE_SIXTEENTH_3 19 +# define R300_TX_CACHE_SIXTEENTH_4 20 +# define R300_TX_CACHE_SIXTEENTH_5 21 +# define R300_TX_CACHE_SIXTEENTH_6 22 +# define R300_TX_CACHE_SIXTEENTH_7 23 +# define R300_TX_CACHE_SIXTEENTH_8 24 +# define R300_TX_CACHE_SIXTEENTH_9 25 +# define R300_TX_CACHE_SIXTEENTH_10 26 +# define R300_TX_CACHE_SIXTEENTH_11 27 +# define R300_TX_CACHE_SIXTEENTH_12 28 +# define R300_TX_CACHE_SIXTEENTH_13 29 +# define R300_TX_CACHE_SIXTEENTH_14 30 +# define R300_TX_CACHE_SIXTEENTH_15 31 + #define R300_TX_FORMAT2_0 0x4500 /* obvious missing in gap */ # define R300_TX_PITCHMASK_SHIFT 0 # define R300_TX_PITCHMASK_MASK (2047 << 0) @@ -2639,6 +2673,24 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Z Buffer Clear Value */ #define R300_ZB_DEPTHCLEARVALUE 0x4f28 +/* Z Mask RAM is a Z compression buffer. + * Each dword of the Z Mask contains compression info for 16 4x4 pixel blocks, + * that is 2 bits for each block. + * On chips with 2 Z pipes, every other dword maps to a different pipe. + */ + +/* The dword offset into Z mask RAM (bits 18:4) */ +#define R300_ZB_ZMASK_OFFSET 0x4f30 + +/* Z Mask Pitch. */ +#define R300_ZB_ZMASK_PITCH 0x4f34 + +/* Access to Z Mask RAM in a manner similar to HiZ RAM. + * The indices are autoincrementing. */ +#define R300_ZB_ZMASK_WRINDEX 0x4f38 +#define R300_ZB_ZMASK_DWORD 0x4f3c +#define R300_ZB_ZMASK_RDINDEX 0x4f40 + /* Hierarchical Z Memory Offset */ #define R300_ZB_HIZ_OFFSET 0x4f44 @@ -3437,9 +3489,18 @@ enum { # define RADEON_WAIT_3D_IDLECLEAN (1 << 17) # define RADEON_WAIT_HOST_IDLECLEAN (1 << 18) +#define R200_3D_DRAW_IMMD_2 0xC0003500 + +#define RADEON_CP_PACKET0 0x0 /* XXX stolen from radeon_reg.h */ #define RADEON_CP_PACKET3 0xC0000000 -#define R200_3D_DRAW_IMMD_2 0xC0003500 +#define RADEON_ONE_REG_WR (1 << 15) + +#define CP_PACKET0(register, count) \ + (RADEON_CP_PACKET0 | ((count) << 16) | ((register) >> 2)) + +#define CP_PACKET3(op, count) \ + (RADEON_CP_PACKET3 | (op) | ((count) << 16)) #endif /* _R300_REG_H */ diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 80dea8be989..99ad162504c 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -35,6 +35,7 @@ #include "util/u_prim.h" #include "r300_cs.h" +#include "r300_cb.h" #include "r300_context.h" #include "r300_screen_buffer.h" #include "r300_emit.h" @@ -43,6 +44,8 @@ #include <limits.h> +#define IMMD_DWORDS 32 + static uint32_t r300_translate_primitive(unsigned prim) { switch (prim) { @@ -269,7 +272,7 @@ static boolean immd_is_good_idea(struct r300_context *r300, return FALSE; } - if (count > 10) { + if (count * r300->velems->vertex_size_dwords > IMMD_DWORDS) { return FALSE; } @@ -308,10 +311,10 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, struct pipe_vertex_element* velem; struct pipe_vertex_buffer* vbuf; unsigned vertex_element_count = r300->velems->count; - unsigned i, v, vbi, dw, elem_offset, dwords; + unsigned i, v, vbi, dwords; /* Size of the vertex, in dwords. */ - unsigned vertex_size = 0; + unsigned vertex_size = r300->velems->vertex_size_dwords; /* Offsets of the attribute, in dwords, from the start of the vertex. */ unsigned offset[PIPE_MAX_ATTRIBS]; @@ -327,14 +330,13 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, uint32_t* map[PIPE_MAX_ATTRIBS] = {0}; struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {NULL}; - CS_LOCALS(r300); + CB_LOCALS; /* Calculate the vertex size, offsets, strides etc. and map the buffers. */ for (i = 0; i < vertex_element_count; i++) { velem = &r300->velems->velem[i]; offset[i] = velem->src_offset / 4; size[i] = r300->velems->hw_format_size[i] / 4; - vertex_size += size[i]; vbi = velem->vertex_buffer_index; /* Map the buffer. */ @@ -344,8 +346,8 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, vbuf->buffer, PIPE_TRANSFER_READ, &transfer[vbi]); - map[vbi] += vbuf->buffer_offset / 4; stride[vbi] = vbuf->stride / 4; + map[vbi] += vbuf->buffer_offset / 4 + stride[vbi] * start; } } @@ -353,30 +355,26 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL); - BEGIN_CS(dwords); - OUT_CS_REG(R300_GA_COLOR_CONTROL, + BEGIN_CS_AS_CB(r300, dwords); + OUT_CB_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); - OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); - OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); - OUT_CS(count - 1); - OUT_CS(0); - OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); - OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | + OUT_CB_REG(R300_VAP_VTX_SIZE, vertex_size); + OUT_CB_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); + OUT_CB(count - 1); + OUT_CB(0); + OUT_CB_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); + OUT_CB(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | r300_translate_primitive(mode)); /* Emit vertices. */ for (v = 0; v < count; v++) { for (i = 0; i < vertex_element_count; i++) { - velem = &r300->velems->velem[i]; - vbi = velem->vertex_buffer_index; - elem_offset = offset[i] + stride[vbi] * (v + start); + vbi = r300->velems->velem[i].vertex_buffer_index; - for (dw = 0; dw < size[i]; dw++) { - OUT_CS(map[vbi][elem_offset + dw]); - } + OUT_CB_TABLE(&map[vbi][offset[i] + stride[vbi] * v], size[i]); } } - END_CS; + END_CB; /* Unmap buffers. */ for (i = 0; i < vertex_element_count; i++) { @@ -482,111 +480,6 @@ static void r300_emit_draw_elements(struct r300_context *r300, END_CS; } -static void r300_shorten_ubyte_elts(struct r300_context* r300, - struct pipe_resource** elts, - int index_bias, - unsigned start, - unsigned count) -{ - struct pipe_context* context = &r300->context; - struct pipe_screen* screen = r300->context.screen; - struct pipe_resource* new_elts; - unsigned char *in_map; - unsigned short *out_map; - struct pipe_transfer *src_transfer, *dst_transfer; - unsigned i; - - new_elts = pipe_buffer_create(screen, - PIPE_BIND_INDEX_BUFFER, - 2 * count); - - in_map = pipe_buffer_map(context, *elts, PIPE_TRANSFER_READ, &src_transfer); - out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &dst_transfer); - - in_map += start; - - for (i = 0; i < count; i++) { - *out_map = (unsigned short)(*in_map + index_bias); - in_map++; - out_map++; - } - - pipe_buffer_unmap(context, *elts, src_transfer); - pipe_buffer_unmap(context, new_elts, dst_transfer); - - *elts = new_elts; -} - -static void r300_rebuild_ushort_elts(struct r300_context *r300, - struct pipe_resource **elts, - int index_bias, - unsigned start, unsigned count) -{ - struct pipe_context *context = &r300->context; - struct pipe_transfer *in_transfer = NULL; - struct pipe_transfer *out_transfer = NULL; - struct pipe_resource *new_elts; - unsigned short *in_map; - unsigned short *out_map; - unsigned i; - - new_elts = pipe_buffer_create(context->screen, - PIPE_BIND_INDEX_BUFFER, - 2 * count); - - in_map = pipe_buffer_map(context, *elts, - PIPE_TRANSFER_READ, &in_transfer); - out_map = pipe_buffer_map(context, new_elts, - PIPE_TRANSFER_WRITE, &out_transfer); - - in_map += start; - for (i = 0; i < count; i++) { - *out_map = (unsigned short)(*in_map + index_bias); - in_map++; - out_map++; - } - - pipe_buffer_unmap(context, *elts, in_transfer); - pipe_buffer_unmap(context, new_elts, out_transfer); - - *elts = new_elts; -} - -static void r300_rebuild_uint_elts(struct r300_context *r300, - struct pipe_resource **elts, - int index_bias, - unsigned start, unsigned count) -{ - struct pipe_context *context = &r300->context; - struct pipe_transfer *in_transfer = NULL; - struct pipe_transfer *out_transfer = NULL; - struct pipe_resource *new_elts; - unsigned int *in_map; - unsigned int *out_map; - unsigned i; - - new_elts = pipe_buffer_create(context->screen, - PIPE_BIND_INDEX_BUFFER, - 2 * count); - - in_map = pipe_buffer_map(context, *elts, - PIPE_TRANSFER_READ, &in_transfer); - out_map = pipe_buffer_map(context, new_elts, - PIPE_TRANSFER_WRITE, &out_transfer); - - in_map += start; - for (i = 0; i < count; i++) { - *out_map = (unsigned int)(*in_map + index_bias); - in_map++; - out_map++; - } - - pipe_buffer_unmap(context, *elts, in_transfer); - pipe_buffer_unmap(context, new_elts, out_transfer); - - *elts = new_elts; -} - /* This is the fast-path drawing & emission for HW TCL. */ static void r300_draw_range_elements(struct pipe_context* pipe, struct pipe_resource* indexBuffer, @@ -605,46 +498,34 @@ static void r300_draw_range_elements(struct pipe_context* pipe, r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); unsigned short_count; int buffer_offset = 0, index_offset = 0; /* for index bias emulation */ + boolean translate = FALSE; if (r300->skip_rendering) { return; } - if (r300->incompatible_vb_layout || - r300->velems->incompatible_layout) { + if (!u_trim_pipe_prim(mode, &count)) { return; } - if (!u_trim_pipe_prim(mode, &count)) { + /* Index buffer range checking. */ + if ((start + count) * indexSize > indexBuffer->width0) { + fprintf(stderr, "r300: Invalid index buffer range. Skipping rendering.\n"); return; } + /* Set up fallback for incompatible vertex layout if needed. */ + if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) { + r300_begin_vertex_translate(r300); + translate = TRUE; + } + if (indexBias && !index_bias_supported(r300)) { r300_split_index_bias(r300, indexBias, &buffer_offset, &index_offset); } - /* Rebuild the index buffer if needed. */ - switch (indexSize) { - case 1: - r300_shorten_ubyte_elts(r300, &indexBuffer, index_offset, start, count); - indexSize = 2; - start = 0; - break; - - case 2: - if (start % 2 != 0 || index_offset) { - r300_rebuild_ushort_elts(r300, &indexBuffer, index_offset, start, count); - start = 0; - } - break; - - case 4: - if (index_offset) { - r300_rebuild_uint_elts(r300, &indexBuffer, index_offset, start, count); - start = 0; - } - break; - } + r300_translate_index_buffer(r300, &indexBuffer, &indexSize, index_offset, + &start, count); r300_update_derived_state(r300); r300_upload_index_buffer(r300, &indexBuffer, indexSize, start, count); @@ -681,6 +562,10 @@ static void r300_draw_range_elements(struct pipe_context* pipe, if (indexBuffer != orgIndexBuffer) { pipe_resource_reference( &indexBuffer, NULL ); } + + if (translate) { + r300_end_vertex_translate(r300); + } } /* Simple helpers for context setup. Should probably be moved to util. */ @@ -704,18 +589,20 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, count > 65536 && r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); unsigned short_count; + boolean translate = FALSE; if (r300->skip_rendering) { return; } - if (r300->incompatible_vb_layout || - r300->velems->incompatible_layout) { + if (!u_trim_pipe_prim(mode, &count)) { return; } - if (!u_trim_pipe_prim(mode, &count)) { - return; + /* Set up fallback for incompatible vertex layout if needed. */ + if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) { + r300_begin_vertex_translate(r300); + translate = TRUE; } r300_update_derived_state(r300); @@ -747,6 +634,10 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, } u_upload_flush(r300->upload_vb); } + + if (translate) { + r300_end_vertex_translate(r300); + } } /**************************************************************************** @@ -1026,7 +917,7 @@ static void r300_render_draw_elements(struct vbuf_render* render, unsigned max_index = (r300render->vbo_size - r300render->vbo_offset) / (r300render->r300->vertex_info.size * 4) - 1; unsigned short_count; - struct r300_cs_info cs_info; + unsigned free_dwords; CS_LOCALS(r300); @@ -1039,9 +930,9 @@ static void r300_render_draw_elements(struct vbuf_render* render, NULL, 256, 0, 0, &end_cs_dwords); while (count) { - r300->rws->get_cs_info(r300->rws, &cs_info); + free_dwords = r300->rws->get_cs_free_dwords(r300->rws); - short_count = MIN2(count, (cs_info.free - end_cs_dwords - 6) * 2); + short_count = MIN2(count, (free_dwords - end_cs_dwords - 6) * 2); BEGIN_CS(6 + (short_count+1)/2); OUT_CS_REG(R300_GA_COLOR_CONTROL, @@ -1127,132 +1018,45 @@ struct draw_stage* r300_draw_stage(struct r300_context* r300) } /**************************************************************************** - * Two-sided stencil reference value fallback. It's designed to be as much - * separate from rest of the driver as possible. + * End of SW TCL functions * ***************************************************************************/ -struct r300_stencilref_context { - void (*draw_arrays)(struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count); - - void (*draw_range_elements)( - struct pipe_context *pipe, struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, - unsigned mode, unsigned start, unsigned count); - - uint32_t rs_cull_mode; - uint32_t zb_stencilrefmask; - ubyte ref_value_front; -}; - -static boolean r300_stencilref_needed(struct r300_context *r300) +static void r300_resource_resolve(struct pipe_context* pipe, + struct pipe_resource* dest, + struct pipe_subresource subdest, + struct pipe_resource* src, + struct pipe_subresource subsrc) { - struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; - - return dsa->two_sided_stencil_ref || - (dsa->two_sided && - r300->stencil_ref.ref_value[0] != r300->stencil_ref.ref_value[1]); -} - -/* Set drawing for front faces. */ -static void r300_stencilref_begin(struct r300_context *r300) -{ - struct r300_stencilref_context *sr = r300->stencilref_fallback; - struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; - struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; - - /* Save state. */ - sr->rs_cull_mode = rs->cull_mode; - sr->zb_stencilrefmask = dsa->stencil_ref_mask; - sr->ref_value_front = r300->stencil_ref.ref_value[0]; - - /* We *cull* pixels, therefore no need to mask out the bits. */ - rs->cull_mode |= R300_CULL_BACK; - - r300->rs_state.dirty = TRUE; -} - -/* Set drawing for back faces. */ -static void r300_stencilref_switch_side(struct r300_context *r300) -{ - struct r300_stencilref_context *sr = r300->stencilref_fallback; - struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; - struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; - - rs->cull_mode = sr->rs_cull_mode | R300_CULL_FRONT; - dsa->stencil_ref_mask = dsa->stencil_ref_bf; - r300->stencil_ref.ref_value[0] = r300->stencil_ref.ref_value[1]; - - r300->rs_state.dirty = TRUE; - r300->dsa_state.dirty = TRUE; -} - -/* Restore the original state. */ -static void r300_stencilref_end(struct r300_context *r300) -{ - struct r300_stencilref_context *sr = r300->stencilref_fallback; - struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; - struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; - - /* Restore state. */ - rs->cull_mode = sr->rs_cull_mode; - dsa->stencil_ref_mask = sr->zb_stencilrefmask; - r300->stencil_ref.ref_value[0] = sr->ref_value_front; - - r300->rs_state.dirty = TRUE; - r300->dsa_state.dirty = TRUE; -} - -static void r300_stencilref_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count) -{ - struct r300_context *r300 = r300_context(pipe); - struct r300_stencilref_context *sr = r300->stencilref_fallback; - - if (!r300_stencilref_needed(r300)) { - sr->draw_arrays(pipe, mode, start, count); - } else { - r300_stencilref_begin(r300); - sr->draw_arrays(pipe, mode, start, count); - r300_stencilref_switch_side(r300); - sr->draw_arrays(pipe, mode, start, count); - r300_stencilref_end(r300); - } -} - -static void r300_stencilref_draw_range_elements( - struct pipe_context *pipe, struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, - unsigned mode, unsigned start, unsigned count) -{ - struct r300_context *r300 = r300_context(pipe); - struct r300_stencilref_context *sr = r300->stencilref_fallback; - - if (!r300_stencilref_needed(r300)) { - sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, - minIndex, maxIndex, mode, start, count); - } else { - r300_stencilref_begin(r300); - sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, - minIndex, maxIndex, mode, start, count); - r300_stencilref_switch_side(r300); - sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, - minIndex, maxIndex, mode, start, count); - r300_stencilref_end(r300); - } -} - -static void r300_plug_in_stencil_ref_fallback(struct r300_context *r300) -{ - r300->stencilref_fallback = CALLOC_STRUCT(r300_stencilref_context); - - /* Save original draw functions. */ - r300->stencilref_fallback->draw_arrays = r300->context.draw_arrays; - r300->stencilref_fallback->draw_range_elements = r300->context.draw_range_elements; - - /* Override the draw functions. */ - r300->context.draw_arrays = r300_stencilref_draw_arrays; - r300->context.draw_range_elements = r300_stencilref_draw_range_elements; + struct r300_context* r300 = r300_context(pipe); + struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; + struct pipe_surface* srcsurf = src->screen->get_tex_surface(src->screen, + src, subsrc.face, subsrc.level, 0, 0); + float color[] = {0, 0, 0, 0}; + + DBG(r300, DBG_DRAW, "r300: Resolving resource...\n"); + + /* Enable AA resolve. */ + aa->dest = r300_surface( + dest->screen->get_tex_surface(dest->screen, dest, subdest.face, + subdest.level, 0, 0)); + + aa->aaresolve_ctl = + R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE | + R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE; + r300->aa_state.size = 12; + r300->aa_state.dirty = TRUE; + + /* Resolve the surface. */ + r300->context.clear_render_target(pipe, + srcsurf, color, 0, 0, src->width0, src->height0); + + /* Disable AA resolve. */ + aa->aaresolve_ctl = 0; + r300->aa_state.size = 4; + r300->aa_state.dirty = TRUE; + + pipe_surface_reference((struct pipe_surface**)&srcsurf, NULL); + pipe_surface_reference((struct pipe_surface**)&aa->dest, NULL); } void r300_init_render_functions(struct r300_context *r300) @@ -1269,7 +1073,9 @@ void r300_init_render_functions(struct r300_context *r300) r300->context.draw_range_elements = r300_swtcl_draw_range_elements; } - /* Plug in two-sided stencil reference value fallback if needed. */ + r300->context.resource_resolve = r300_resource_resolve; + + /* Plug in the two-sided stencil reference value fallback if needed. */ if (!r300->screen->caps.is_r500) r300_plug_in_stencil_ref_fallback(r300); } diff --git a/src/gallium/drivers/r300/r300_render_stencilref.c b/src/gallium/drivers/r300/r300_render_stencilref.c new file mode 100644 index 00000000000..d509ded3ec8 --- /dev/null +++ b/src/gallium/drivers/r300/r300_render_stencilref.c @@ -0,0 +1,158 @@ +/* + * Copyright 2010 Marek Olšák <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +/** + * The two-sided stencil reference value fallback for r3xx-r4xx chips. + * These chips support two-sided stencil functions but they do not support + * a two-sided reference value. + * + * The functions below split every draw call which uses the two-sided + * reference value into two draw calls -- the first one renders front faces + * and the second renders back faces with the other reference value. + */ + +#include "r300_context.h" +#include "r300_reg.h" + +struct r300_stencilref_context { + void (*draw_arrays)(struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count); + + void (*draw_range_elements)( + struct pipe_context *pipe, struct pipe_resource *indexBuffer, + unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, + unsigned mode, unsigned start, unsigned count); + + uint32_t rs_cull_mode; + uint32_t zb_stencilrefmask; + ubyte ref_value_front; +}; + +static boolean r300_stencilref_needed(struct r300_context *r300) +{ + struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; + + return dsa->two_sided_stencil_ref || + (dsa->two_sided && + r300->stencil_ref.ref_value[0] != r300->stencil_ref.ref_value[1]); +} + +/* Set drawing for front faces. */ +static void r300_stencilref_begin(struct r300_context *r300) +{ + struct r300_stencilref_context *sr = r300->stencilref_fallback; + struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; + struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; + + /* Save state. */ + sr->rs_cull_mode = rs->cull_mode; + sr->zb_stencilrefmask = dsa->stencil_ref_mask; + sr->ref_value_front = r300->stencil_ref.ref_value[0]; + + /* We *cull* pixels, therefore no need to mask out the bits. */ + rs->cull_mode |= R300_CULL_BACK; + + r300->rs_state.dirty = TRUE; +} + +/* Set drawing for back faces. */ +static void r300_stencilref_switch_side(struct r300_context *r300) +{ + struct r300_stencilref_context *sr = r300->stencilref_fallback; + struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; + struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; + + rs->cull_mode = sr->rs_cull_mode | R300_CULL_FRONT; + dsa->stencil_ref_mask = dsa->stencil_ref_bf; + r300->stencil_ref.ref_value[0] = r300->stencil_ref.ref_value[1]; + + r300->rs_state.dirty = TRUE; + r300->dsa_state.dirty = TRUE; +} + +/* Restore the original state. */ +static void r300_stencilref_end(struct r300_context *r300) +{ + struct r300_stencilref_context *sr = r300->stencilref_fallback; + struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; + struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; + + /* Restore state. */ + rs->cull_mode = sr->rs_cull_mode; + dsa->stencil_ref_mask = sr->zb_stencilrefmask; + r300->stencil_ref.ref_value[0] = sr->ref_value_front; + + r300->rs_state.dirty = TRUE; + r300->dsa_state.dirty = TRUE; +} + +static void r300_stencilref_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count) +{ + struct r300_context *r300 = r300_context(pipe); + struct r300_stencilref_context *sr = r300->stencilref_fallback; + + if (!r300_stencilref_needed(r300)) { + sr->draw_arrays(pipe, mode, start, count); + } else { + r300_stencilref_begin(r300); + sr->draw_arrays(pipe, mode, start, count); + r300_stencilref_switch_side(r300); + sr->draw_arrays(pipe, mode, start, count); + r300_stencilref_end(r300); + } +} + +static void r300_stencilref_draw_range_elements( + struct pipe_context *pipe, struct pipe_resource *indexBuffer, + unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, + unsigned mode, unsigned start, unsigned count) +{ + struct r300_context *r300 = r300_context(pipe); + struct r300_stencilref_context *sr = r300->stencilref_fallback; + + if (!r300_stencilref_needed(r300)) { + sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, + minIndex, maxIndex, mode, start, count); + } else { + r300_stencilref_begin(r300); + sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, + minIndex, maxIndex, mode, start, count); + r300_stencilref_switch_side(r300); + sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, + minIndex, maxIndex, mode, start, count); + r300_stencilref_end(r300); + } +} + +void r300_plug_in_stencil_ref_fallback(struct r300_context *r300) +{ + r300->stencilref_fallback = CALLOC_STRUCT(r300_stencilref_context); + + /* Save original draw functions. */ + r300->stencilref_fallback->draw_arrays = r300->context.draw_arrays; + r300->stencilref_fallback->draw_range_elements = r300->context.draw_range_elements; + + /* Override the draw functions. */ + r300->context.draw_arrays = r300_stencilref_draw_arrays; + r300->context.draw_range_elements = r300_stencilref_draw_range_elements; +} diff --git a/src/gallium/drivers/r300/r300_render_translate.c b/src/gallium/drivers/r300/r300_render_translate.c new file mode 100644 index 00000000000..0ea11e5bfc2 --- /dev/null +++ b/src/gallium/drivers/r300/r300_render_translate.c @@ -0,0 +1,322 @@ +/* + * Copyright 2010 Marek Olšák <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +/** + * The functions below translate vertex and index buffers to the layout + * compatible with the hardware, so that all vertex and index fetches are + * DWORD-aligned and all used vertex and index formats are supported. + * For indices, an optional index offset is added to each index. + */ + +#include "r300_context.h" +#include "translate/translate.h" + +void r300_begin_vertex_translate(struct r300_context *r300) +{ + struct pipe_context *pipe = &r300->context; + struct translate_key key = {0}; + struct translate_element *te; + unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0}; + struct translate *tr; + struct r300_vertex_element_state *ve = r300->velems; + boolean vb_translated[PIPE_MAX_ATTRIBS] = {0}; + void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map; + struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer; + struct pipe_resource *out_buffer; + unsigned i, num_verts; + + /* Initialize the translate key, i.e. the recipe how vertices should be + * translated. */ + for (i = 0; i < ve->count; i++) { + struct pipe_vertex_buffer *vb = + &r300->vertex_buffer[ve->velem[i].vertex_buffer_index]; + enum pipe_format output_format = ve->hw_format[i]; + unsigned output_format_size = ve->hw_format_size[i]; + + /* Check for support. */ + if (ve->velem[i].src_format == ve->hw_format[i] && + (vb->buffer_offset + ve->velem[i].src_offset) % 4 == 0 && + vb->stride % 4 == 0) { + continue; + } + + /* Workaround for translate: output floats instead of halfs. */ + switch (output_format) { + case PIPE_FORMAT_R16_FLOAT: + output_format = PIPE_FORMAT_R32_FLOAT; + output_format_size = 4; + break; + case PIPE_FORMAT_R16G16_FLOAT: + output_format = PIPE_FORMAT_R32G32_FLOAT; + output_format_size = 8; + break; + case PIPE_FORMAT_R16G16B16_FLOAT: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + output_format_size = 12; + break; + case PIPE_FORMAT_R16G16B16A16_FLOAT: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + output_format_size = 16; + break; + default:; + } + + /* Add this vertex element. */ + te = &key.element[key.nr_elements]; + /*te->type; + te->instance_divisor;*/ + te->input_buffer = ve->velem[i].vertex_buffer_index; + te->input_format = ve->velem[i].src_format; + te->input_offset = vb->buffer_offset + ve->velem[i].src_offset; + te->output_format = output_format; + te->output_offset = key.output_stride; + + key.output_stride += output_format_size; + vb_translated[ve->velem[i].vertex_buffer_index] = TRUE; + tr_elem_index[i] = key.nr_elements; + key.nr_elements++; + } + + /* Get a translate object. */ + tr = translate_cache_find(r300->tran.translate_cache, &key); + + /* Map buffers we want to translate. */ + for (i = 0; i < r300->vertex_buffer_count; i++) { + if (vb_translated[i]) { + struct pipe_vertex_buffer *vb = &r300->vertex_buffer[i]; + + vb_map[i] = pipe_buffer_map(pipe, vb->buffer, + PIPE_TRANSFER_READ, &vb_transfer[i]); + + tr->set_buffer(tr, i, vb_map[i], vb->stride, vb->max_index); + } + } + + /* Create and map the output buffer. */ + num_verts = r300->vertex_buffer_max_index + 1; + + out_buffer = pipe_buffer_create(&r300->screen->screen, + PIPE_BIND_VERTEX_BUFFER, + key.output_stride * num_verts); + + out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE, + &out_transfer); + + /* Translate. */ + tr->run(tr, 0, num_verts, 0, out_map); + + /* Unmap all buffers. */ + for (i = 0; i < r300->vertex_buffer_count; i++) { + if (vb_translated[i]) { + pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, + vb_transfer[i]); + } + } + + pipe_buffer_unmap(pipe, out_buffer, out_transfer); + + /* Setup the new vertex buffer in the first free slot. */ + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + struct pipe_vertex_buffer *vb = &r300->vertex_buffer[i]; + + if (!vb->buffer) { + pipe_resource_reference(&vb->buffer, out_buffer); + vb->buffer_offset = 0; + vb->max_index = num_verts - 1; + vb->stride = key.output_stride; + r300->tran.vb_slot = i; + break; + } + } + + /* Save and replace vertex elements. */ + { + struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; + + r300->tran.saved_velems = r300->velems; + + for (i = 0; i < ve->count; i++) { + if (vb_translated[ve->velem[i].vertex_buffer_index]) { + te = &key.element[tr_elem_index[i]]; + new_velems[i].instance_divisor = ve->velem[i].instance_divisor; + new_velems[i].src_format = te->output_format; + new_velems[i].src_offset = te->output_offset; + new_velems[i].vertex_buffer_index = r300->tran.vb_slot; + } else { + memcpy(&new_velems[i], &ve->velem[i], + sizeof(struct pipe_vertex_element)); + } + } + + r300->tran.new_velems = + pipe->create_vertex_elements_state(pipe, ve->count, new_velems); + pipe->bind_vertex_elements_state(pipe, r300->tran.new_velems); + } + + pipe_resource_reference(&out_buffer, NULL); +} + +void r300_end_vertex_translate(struct r300_context *r300) +{ + struct pipe_context *pipe = &r300->context; + + /* Restore vertex elements. */ + pipe->bind_vertex_elements_state(pipe, r300->tran.saved_velems); + pipe->delete_vertex_elements_state(pipe, r300->tran.new_velems); + + /* Delete the now-unused VBO. */ + pipe_resource_reference(&r300->vertex_buffer[r300->tran.vb_slot].buffer, + NULL); +} + +static void r300_shorten_ubyte_elts(struct r300_context* r300, + struct pipe_resource** elts, + int index_bias, + unsigned start, + unsigned count) +{ + struct pipe_context* context = &r300->context; + struct pipe_screen* screen = r300->context.screen; + struct pipe_resource* new_elts; + unsigned char *in_map; + unsigned short *out_map; + struct pipe_transfer *src_transfer, *dst_transfer; + unsigned i; + + new_elts = pipe_buffer_create(screen, + PIPE_BIND_INDEX_BUFFER, + 2 * count); + + in_map = pipe_buffer_map(context, *elts, PIPE_TRANSFER_READ, &src_transfer); + out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &dst_transfer); + + in_map += start; + + for (i = 0; i < count; i++) { + *out_map = (unsigned short)(*in_map + index_bias); + in_map++; + out_map++; + } + + pipe_buffer_unmap(context, *elts, src_transfer); + pipe_buffer_unmap(context, new_elts, dst_transfer); + + *elts = new_elts; +} + +static void r300_rebuild_ushort_elts(struct r300_context *r300, + struct pipe_resource **elts, + int index_bias, + unsigned start, unsigned count) +{ + struct pipe_context *context = &r300->context; + struct pipe_transfer *in_transfer = NULL; + struct pipe_transfer *out_transfer = NULL; + struct pipe_resource *new_elts; + unsigned short *in_map; + unsigned short *out_map; + unsigned i; + + new_elts = pipe_buffer_create(context->screen, + PIPE_BIND_INDEX_BUFFER, + 2 * count); + + in_map = pipe_buffer_map(context, *elts, + PIPE_TRANSFER_READ, &in_transfer); + out_map = pipe_buffer_map(context, new_elts, + PIPE_TRANSFER_WRITE, &out_transfer); + + in_map += start; + for (i = 0; i < count; i++) { + *out_map = (unsigned short)(*in_map + index_bias); + in_map++; + out_map++; + } + + pipe_buffer_unmap(context, *elts, in_transfer); + pipe_buffer_unmap(context, new_elts, out_transfer); + + *elts = new_elts; +} + +static void r300_rebuild_uint_elts(struct r300_context *r300, + struct pipe_resource **elts, + int index_bias, + unsigned start, unsigned count) +{ + struct pipe_context *context = &r300->context; + struct pipe_transfer *in_transfer = NULL; + struct pipe_transfer *out_transfer = NULL; + struct pipe_resource *new_elts; + unsigned int *in_map; + unsigned int *out_map; + unsigned i; + + new_elts = pipe_buffer_create(context->screen, + PIPE_BIND_INDEX_BUFFER, + 2 * count); + + in_map = pipe_buffer_map(context, *elts, + PIPE_TRANSFER_READ, &in_transfer); + out_map = pipe_buffer_map(context, new_elts, + PIPE_TRANSFER_WRITE, &out_transfer); + + in_map += start; + for (i = 0; i < count; i++) { + *out_map = (unsigned int)(*in_map + index_bias); + in_map++; + out_map++; + } + + pipe_buffer_unmap(context, *elts, in_transfer); + pipe_buffer_unmap(context, new_elts, out_transfer); + + *elts = new_elts; +} + +void r300_translate_index_buffer(struct r300_context *r300, + struct pipe_resource **index_buffer, + unsigned *index_size, unsigned index_offset, + unsigned *start, unsigned count) +{ + switch (*index_size) { + case 1: + r300_shorten_ubyte_elts(r300, index_buffer, index_offset, *start, count); + *index_size = 2; + *start = 0; + break; + + case 2: + if (*start % 2 != 0 || index_offset) { + r300_rebuild_ushort_elts(r300, index_buffer, index_offset, *start, count); + *start = 0; + } + break; + + case 4: + if (index_offset) { + r300_rebuild_uint_elts(r300, index_buffer, index_offset, *start, count); + *start = 0; + } + break; + } +} diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index f0c562b76b3..d3d36a782c5 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -269,15 +269,24 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, format == PIPE_FORMAT_R16G16B16_FLOAT || format == PIPE_FORMAT_R16G16B16A16_FLOAT; - if (target >= PIPE_MAX_TEXTURE_TYPES) { - fprintf(stderr, "r300: Implementation error: Received bogus texture " - "target %d in %s\n", target, __FUNCTION__); - return FALSE; + switch (sample_count) { + case 0: + case 1: + break; + case 2: + case 3: + case 4: + case 6: + if (usage != PIPE_BIND_RENDER_TARGET || + !util_format_is_rgba8_variant( + util_format_description(format))) { + return FALSE; + } + break; + default: + return FALSE; } - if (sample_count > 1) - return FALSE; - /* Check sampler format support. */ if ((usage & PIPE_BIND_SAMPLER_VIEW) && /* Z24 cannot be sampled from on non-r5xx. */ diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 29492024fe3..29cd5dbe267 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -65,7 +65,7 @@ static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) { /* Logging. */ #define DBG_FP (1 << 1) #define DBG_VP (1 << 2) -#define DBG_CS (1 << 3) +/* The bit (1 << 3) is unused. */ #define DBG_DRAW (1 << 4) #define DBG_TEX (1 << 5) #define DBG_TEXALLOC (1 << 6) @@ -76,6 +76,7 @@ static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) { #define DBG_ANISOHQ (1 << 16) #define DBG_NO_TILING (1 << 17) #define DBG_NO_IMMD (1 << 18) +#define DBG_FAKE_OCC (1 << 19) /* Statistics. */ #define DBG_STATS (1 << 24) /*@}*/ diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index 44179f19ed4..7959e6a2f9e 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -93,25 +93,28 @@ int r300_upload_user_buffers(struct r300_context *r300) enum pipe_error ret = PIPE_OK; int i, nr; - nr = r300->vertex_buffer_count; + nr = r300->velems->count; for (i = 0; i < nr; i++) { - if (r300_buffer_is_user_buffer(r300->vertex_buffer[i].buffer)) { - struct pipe_resource *upload_buffer = NULL; - unsigned offset = 0; /*r300->vertex_buffer[i].buffer_offset * 4;*/ - unsigned size = r300->vertex_buffer[i].buffer->width0; - unsigned upload_offset; - ret = u_upload_buffer(r300->upload_vb, - offset, size, - r300->vertex_buffer[i].buffer, - &upload_offset, &upload_buffer); - if (ret) - return ret; - - pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL); - r300->vertex_buffer[i].buffer = upload_buffer; - r300->vertex_buffer[i].buffer_offset = upload_offset; - } + struct pipe_vertex_buffer *vb = + &r300->vertex_buffer[r300->velems->velem[i].vertex_buffer_index]; + + if (r300_buffer_is_user_buffer(vb->buffer)) { + struct pipe_resource *upload_buffer = NULL; + unsigned offset = 0; /*vb->buffer_offset * 4;*/ + unsigned size = vb->buffer->width0; + unsigned upload_offset; + ret = u_upload_buffer(r300->upload_vb, + offset, size, + vb->buffer, + &upload_offset, &upload_buffer); + if (ret) + return ret; + + pipe_resource_reference(&vb->buffer, NULL); + vb->buffer = upload_buffer; + vb->buffer_offset = upload_offset; + } } return ret; } @@ -261,10 +264,6 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, rbuf->b.vtbl = &r300_buffer_vtbl; pipe_reference_init(&rbuf->b.b.reference, 1); rbuf->b.b.screen = screen; - - if (rbuf->b.b.bind & R300_BIND_OQBO) - alignment = 4096; - rbuf->domain = R300_DOMAIN_GTT; rbuf->buf = r300screen->rws->buffer_create(r300screen->rws, diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h index 87b42b94122..ff355858704 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.h +++ b/src/gallium/drivers/r300/r300_screen_buffer.h @@ -116,25 +116,4 @@ static INLINE boolean r300_add_texture(struct r300_winsys_screen *rws, return rws->add_buffer(rws, tex->buffer, rd, wr); } -static INLINE void r300_buffer_write_reloc(struct r300_winsys_screen *rws, - struct r300_buffer *buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd, - uint32_t flags) -{ - if (!buf->buf) - return; - - rws->write_cs_reloc(rws, buf->buf, rd, wd, flags); -} - -static INLINE void r300_texture_write_reloc(struct r300_winsys_screen *rws, - struct r300_texture *texture, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd, - uint32_t flags) -{ - rws->write_cs_reloc(rws, texture->buffer, rd, wd, flags); -} - #endif diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index d19563c18ae..9c0f877e814 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -31,12 +31,12 @@ #include "pipe/p_config.h" +#include "r300_cb.h" #include "r300_context.h" #include "r300_emit.h" #include "r300_reg.h" #include "r300_screen.h" #include "r300_screen_buffer.h" -#include "r300_state.h" #include "r300_state_inlines.h" #include "r300_fs.h" #include "r300_texture.h" @@ -183,6 +183,12 @@ static void* r300_create_blend_state(struct pipe_context* pipe, { struct r300_screen* r300screen = r300_screen(pipe->screen); struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state); + uint32_t blend_control = 0; /* R300_RB3D_CBLEND: 0x4e04 */ + uint32_t alpha_blend_control = 0; /* R300_RB3D_ABLEND: 0x4e08 */ + uint32_t color_channel_mask = 0; /* R300_RB3D_COLOR_CHANNEL_MASK: 0x4e0c */ + uint32_t rop = 0; /* R300_RB3D_ROPCNTL: 0x4e18 */ + uint32_t dither = 0; /* R300_RB3D_DITHER_CTL: 0x4e50 */ + CB_LOCALS; if (state->rt[0].blend_enable) { @@ -196,7 +202,7 @@ static void* r300_create_blend_state(struct pipe_context* pipe, /* despite the name, ALPHA_BLEND_ENABLE has nothing to do with alpha, * this is just the crappy D3D naming */ - blend->blend_control = R300_ALPHA_BLEND_ENABLE | + blend_control = R300_ALPHA_BLEND_ENABLE | r300_translate_blend_function(eqRGB) | ( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) | ( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT); @@ -220,7 +226,7 @@ static void* r300_create_blend_state(struct pipe_context* pipe, srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA || srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) { /* Enable reading from the colorbuffer. */ - blend->blend_control |= R300_READ_ENABLE; + blend_control |= R300_READ_ENABLE; if (r300screen->caps.is_r500) { /* Optimization: Depending on incoming pixels, we can @@ -233,7 +239,7 @@ static void* r300_create_blend_state(struct pipe_context* pipe, (dstA == PIPE_BLENDFACTOR_SRC_COLOR || dstA == PIPE_BLENDFACTOR_SRC_ALPHA || dstA == PIPE_BLENDFACTOR_ZERO)) { - blend->blend_control |= R500_SRC_ALPHA_0_NO_READ; + blend_control |= R500_SRC_ALPHA_0_NO_READ; } /* Disable reading if SRC_ALPHA == 1. */ @@ -242,7 +248,7 @@ static void* r300_create_blend_state(struct pipe_context* pipe, (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR || dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || dstA == PIPE_BLENDFACTOR_ZERO)) { - blend->blend_control |= R500_SRC_ALPHA_1_NO_READ; + blend_control |= R500_SRC_ALPHA_1_NO_READ; } } } @@ -272,31 +278,31 @@ static void* r300_create_blend_state(struct pipe_context* pipe, * pixels. */ if (blend_discard_if_src_alpha_0(srcRGB, srcA, dstRGB, dstA)) { - blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0; + blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0; } else if (blend_discard_if_src_alpha_1(srcRGB, srcA, dstRGB, dstA)) { - blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1; + blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1; } else if (blend_discard_if_src_color_0(srcRGB, srcA, dstRGB, dstA)) { - blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_0; + blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_0; } else if (blend_discard_if_src_color_1(srcRGB, srcA, dstRGB, dstA)) { - blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_1; + blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_1; } else if (blend_discard_if_src_alpha_color_0(srcRGB, srcA, dstRGB, dstA)) { - blend->blend_control |= + blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0; } else if (blend_discard_if_src_alpha_color_1(srcRGB, srcA, dstRGB, dstA)) { - blend->blend_control |= + blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1; } } /* separate alpha */ if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { - blend->blend_control |= R300_SEPARATE_ALPHA_ENABLE; - blend->alpha_blend_control = + blend_control |= R300_SEPARATE_ALPHA_ENABLE; + alpha_blend_control = r300_translate_blend_function(eqA) | (r300_translate_blend_factor(srcA) << R300_SRC_BLEND_SHIFT) | (r300_translate_blend_factor(dstA) << R300_DST_BLEND_SHIFT); @@ -305,21 +311,21 @@ static void* r300_create_blend_state(struct pipe_context* pipe, /* PIPE_LOGICOP_* don't need to be translated, fortunately. */ if (state->logicop_enable) { - blend->rop = R300_RB3D_ROPCNTL_ROP_ENABLE | + rop = R300_RB3D_ROPCNTL_ROP_ENABLE | (state->logicop_func) << R300_RB3D_ROPCNTL_ROP_SHIFT; } /* Color channel masks for all MRTs. */ - blend->color_channel_mask = bgra_cmask(state->rt[0].colormask); + color_channel_mask = bgra_cmask(state->rt[0].colormask); if (r300screen->caps.is_r500 && state->independent_blend_enable) { if (state->rt[1].blend_enable) { - blend->color_channel_mask |= bgra_cmask(state->rt[1].colormask) << 4; + color_channel_mask |= bgra_cmask(state->rt[1].colormask) << 4; } if (state->rt[2].blend_enable) { - blend->color_channel_mask |= bgra_cmask(state->rt[2].colormask) << 8; + color_channel_mask |= bgra_cmask(state->rt[2].colormask) << 8; } if (state->rt[3].blend_enable) { - blend->color_channel_mask |= bgra_cmask(state->rt[3].colormask) << 12; + color_channel_mask |= bgra_cmask(state->rt[3].colormask) << 12; } } @@ -330,11 +336,31 @@ static void* r300_create_blend_state(struct pipe_context* pipe, * This could be revisited if we ever get quality or conformance hints. * if (state->dither) { - blend->dither = R300_RB3D_DITHER_CTL_DITHER_MODE_LUT | + dither = R300_RB3D_DITHER_CTL_DITHER_MODE_LUT | R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT; } */ + /* Build a command buffer. */ + BEGIN_CB(blend->cb, 8); + OUT_CB_REG(R300_RB3D_ROPCNTL, rop); + OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3); + OUT_CB(blend_control); + OUT_CB(alpha_blend_control); + OUT_CB(color_channel_mask); + OUT_CB_REG(R300_RB3D_DITHER_CTL, dither); + END_CB; + + /* The same as above, but with no colorbuffer reads and writes. */ + BEGIN_CB(blend->cb_no_readwrite, 8); + OUT_CB_REG(R300_RB3D_ROPCNTL, rop); + OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3); + OUT_CB(0); + OUT_CB(0); + OUT_CB(0); + OUT_CB_REG(R300_RB3D_DITHER_CTL, dither); + END_CB; + return (void*)blend; } @@ -368,20 +394,26 @@ static void r300_set_blend_color(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); struct r300_blend_color_state* state = (struct r300_blend_color_state*)r300->blend_color_state.state; - union util_color uc; + CB_LOCALS; - util_pack_color(color->color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - state->blend_color = uc.ui; + if (r300->screen->caps.is_r500) { + /* XXX if FP16 blending is enabled, we should use the FP16 format */ + BEGIN_CB(state->cb, 3); + OUT_CB_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2); + OUT_CB(float_to_fixed10(color->color[0]) | + (float_to_fixed10(color->color[3]) << 16)); + OUT_CB(float_to_fixed10(color->color[2]) | + (float_to_fixed10(color->color[1]) << 16)); + END_CB; + } else { + union util_color uc; + util_pack_color(color->color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - /* XXX if FP16 blending is enabled, we should use the FP16 format */ - state->blend_color_red_alpha = - float_to_fixed10(color->color[0]) | - (float_to_fixed10(color->color[3]) << 16); - state->blend_color_green_blue = - float_to_fixed10(color->color[2]) | - (float_to_fixed10(color->color[1]) << 16); + BEGIN_CB(state->cb, 2); + OUT_CB_REG(R300_RB3D_BLEND_COLOR, uc.ui); + END_CB; + } - r300->blend_color_state.size = r300->screen->caps.is_r500 ? 3 : 2; r300->blend_color_state.dirty = TRUE; } @@ -389,18 +421,27 @@ static void r300_set_clip_state(struct pipe_context* pipe, const struct pipe_clip_state* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_clip_state *clip = + (struct r300_clip_state*)r300->clip_state.state; + CB_LOCALS; - r300->clip = *state; + clip->clip = *state; if (r300->screen->caps.has_tcl) { - memcpy(r300->clip_state.state, state, sizeof(struct pipe_clip_state)); - r300->clip_state.size = 29; + BEGIN_CB(clip->cb, 29); + OUT_CB_REG(R300_VAP_PVS_VECTOR_INDX_REG, + (r300->screen->caps.is_r500 ? + R500_PVS_UCP_START : R300_PVS_UCP_START)); + OUT_CB_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 6 * 4); + OUT_CB_TABLE(state->ucp, 6 * 4); + OUT_CB_REG(R300_VAP_CLIP_CNTL, ((1 << state->nr) - 1) | + R300_PS_UCP_MODE_CLIP_AS_TRIFAN); + END_CB; r300->clip_state.dirty = TRUE; } else { draw_flush(r300->draw); draw_set_clip_state(r300->draw, state); - r300->clip_state.size = 2; } } @@ -422,6 +463,9 @@ static void* { struct r300_capabilities *caps = &r300_screen(pipe->screen)->caps; struct r300_dsa_state* dsa = CALLOC_STRUCT(r300_dsa_state); + CB_LOCALS; + + dsa->dsa = *state; /* Depth test setup. */ if (state->depth.enabled) { @@ -494,9 +538,43 @@ static void* dsa->alpha_function |= R500_FG_ALPHA_FUNC_8BIT; } + BEGIN_CB(&dsa->cb_begin, 8); + OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); + OUT_CB_REG_SEQ(R300_ZB_CNTL, 3); + OUT_CB(dsa->z_buffer_control); + OUT_CB(dsa->z_stencil_control); + OUT_CB(dsa->stencil_ref_mask); + OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf); + END_CB; + + BEGIN_CB(dsa->cb_no_readwrite, 8); + OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); + OUT_CB_REG_SEQ(R300_ZB_CNTL, 3); + OUT_CB(0); + OUT_CB(0); + OUT_CB(0); + OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, 0); + END_CB; + return (void*)dsa; } +static void r300_dsa_inject_stencilref(struct r300_context *r300) +{ + struct r300_dsa_state *dsa = + (struct r300_dsa_state*)r300->dsa_state.state; + + if (!dsa) + return; + + dsa->stencil_ref_mask = + (dsa->stencil_ref_mask & ~R300_STENCILREF_MASK) | + r300->stencil_ref.ref_value[0]; + dsa->stencil_ref_bf = + (dsa->stencil_ref_bf & ~R300_STENCILREF_MASK) | + r300->stencil_ref.ref_value[1]; +} + /* Bind DSA state. */ static void r300_bind_dsa_state(struct pipe_context* pipe, void* state) @@ -508,6 +586,8 @@ static void r300_bind_dsa_state(struct pipe_context* pipe, } UPDATE_STATE(state, r300->dsa_state); + + r300_dsa_inject_stencilref(r300); } /* Free DSA state. */ @@ -523,6 +603,8 @@ static void r300_set_stencil_ref(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); r300->stencil_ref = *sr; + + r300_dsa_inject_stencilref(r300); r300->dsa_state.dirty = TRUE; } @@ -582,16 +664,11 @@ static void const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; struct pipe_framebuffer_state *old_state = r300->fb_state.state; unsigned max_width, max_height, i; uint32_t zbuffer_bpp = 0; - if (state->nr_cbufs > 4) { - fprintf(stderr, "r300: Implementation error: Too many MRTs in %s, " - "refusing to bind framebuffer state!\n", __FUNCTION__); - return; - } - if (r300->screen->caps.is_r500) { max_width = max_height = 4096; } else if (r300->screen->caps.is_r400) { @@ -610,6 +687,8 @@ static void draw_flush(r300->draw); } + r300->gpu_flush.dirty = TRUE; + r300->aa_state.dirty = TRUE; r300->fb_state.dirty = TRUE; /* If nr_cbufs is changed from zero to non-zero or vice versa... */ @@ -626,8 +705,10 @@ static void memcpy(r300->fb_state.state, state, sizeof(struct pipe_framebuffer_state)); - r300->fb_state.size = (10 * state->nr_cbufs) + (2 * (4 - state->nr_cbufs)) + - (state->zsbuf ? 10 : 0) + 9; + r300->fb_state.size = + 7 + + (8 * state->nr_cbufs) + + (state->zsbuf ? (r300->screen->caps.has_hiz ? 22 : 18) : 0); /* Polygon offset depends on the zbuffer bit depth. */ if (state->zsbuf && r300->polygon_offset_enabled) { @@ -646,6 +727,30 @@ static void } } + /* Set up AA config. */ + if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { + if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) { + aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE; + + switch (state->cbufs[0]->texture->nr_samples) { + case 2: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2; + break; + case 3: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3; + break; + case 4: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4; + break; + case 6: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6; + break; + } + } else { + aa->aa_config = 0; + } + } + if (DBG_ON(r300, DBG_FB)) { fprintf(stderr, "r300: set_framebuffer_state:\n"); for (i = 0; i < state->nr_cbufs; i++) { @@ -679,13 +784,12 @@ void r300_mark_fs_code_dirty(struct r300_context *r300) r300->fs.dirty = TRUE; r300->fs_rc_constant_state.dirty = TRUE; r300->fs_constants.dirty = TRUE; + r300->fs.size = fs->shader->cb_code_size; if (r300->screen->caps.is_r500) { - r300->fs.size = r500_get_fs_atom_size(r300); r300->fs_rc_constant_state.size = fs->shader->rc_state_count * 7; r300->fs_constants.size = fs->shader->externals_count * 4 + 3; } else { - r300->fs.size = r300_get_fs_atom_size(r300); r300->fs_rc_constant_state.size = fs->shader->rc_state_count * 5; r300->fs_constants.size = fs->shader->externals_count * 4 + 1; } @@ -719,6 +823,7 @@ static void r300_delete_fs_state(struct pipe_context* pipe, void* shader) tmp = ptr; ptr = ptr->next; rc_constants_destroy(&tmp->code.constants); + FREE(tmp->cb_code); FREE(tmp); } FREE((void*)fs->state.tokens); @@ -870,6 +975,11 @@ static void* r300_create_rs_state(struct pipe_context* pipe, } } + if (state->gl_rasterization_rules) { + rs->multisample_position_0 = 0x66666666; + rs->multisample_position_1 = 0x6666666; + } + return (void*)rs; } @@ -899,7 +1009,8 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) } UPDATE_STATE(state, r300->rs_state); - r300->rs_state.size = 27 + (r300->polygon_offset_enabled ? 5 : 0); + r300->rs_state.size = 25 + (r300->polygon_offset_enabled ? 5 : 0) + + (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0) ? 3 : 0); if (last_sprite_coord_enable != r300->sprite_coord_enable || last_two_sided_color != r300->two_sided_color) { @@ -925,10 +1036,34 @@ static void* sampler->state = *state; + /* r300 doesn't handle CLAMP and MIRROR_CLAMP correctly when either MAG + * or MIN filter is NEAREST. Since texwrap produces same results + * for CLAMP and CLAMP_TO_EDGE, we use them instead. */ + if (sampler->state.min_img_filter == PIPE_TEX_FILTER_NEAREST || + sampler->state.mag_img_filter == PIPE_TEX_FILTER_NEAREST) { + /* Wrap S. */ + if (sampler->state.wrap_s == PIPE_TEX_WRAP_CLAMP) + sampler->state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + else if (sampler->state.wrap_s == PIPE_TEX_WRAP_MIRROR_CLAMP) + sampler->state.wrap_s = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE; + + /* Wrap T. */ + if (sampler->state.wrap_t == PIPE_TEX_WRAP_CLAMP) + sampler->state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + else if (sampler->state.wrap_t == PIPE_TEX_WRAP_MIRROR_CLAMP) + sampler->state.wrap_t = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE; + + /* Wrap R. */ + if (sampler->state.wrap_r == PIPE_TEX_WRAP_CLAMP) + sampler->state.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + else if (sampler->state.wrap_r == PIPE_TEX_WRAP_MIRROR_CLAMP) + sampler->state.wrap_r = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE; + } + sampler->filter0 |= - (r300_translate_wrap(state->wrap_s) << R300_TX_WRAP_S_SHIFT) | - (r300_translate_wrap(state->wrap_t) << R300_TX_WRAP_T_SHIFT) | - (r300_translate_wrap(state->wrap_r) << R300_TX_WRAP_R_SHIFT); + (r300_translate_wrap(sampler->state.wrap_s) << R300_TX_WRAP_S_SHIFT) | + (r300_translate_wrap(sampler->state.wrap_t) << R300_TX_WRAP_T_SHIFT) | + (r300_translate_wrap(sampler->state.wrap_r) << R300_TX_WRAP_R_SHIFT); sampler->filter0 |= r300_translate_tex_filters(state->min_img_filter, state->mag_img_filter, @@ -995,6 +1130,31 @@ static void r300_delete_sampler_state(struct pipe_context* pipe, void* state) FREE(state); } +static uint32_t r300_assign_texture_cache_region(unsigned index, unsigned num) +{ + /* This looks like a hack, but I believe it's suppose to work like + * that. To illustrate how this works, let's assume you have 5 textures. + * From docs, 5 and the successive numbers are: + * + * FOURTH_1 = 5 + * FOURTH_2 = 6 + * FOURTH_3 = 7 + * EIGHTH_0 = 8 + * EIGHTH_1 = 9 + * + * First 3 textures will get 3/4 of size of the cache, divived evenly + * between them. The last 1/4 of the cache must be divided between + * the last 2 textures, each will therefore get 1/8 of the cache. + * Why not just to use "5 + texture_index" ? + * + * This simple trick works for all "num" <= 16. + */ + if (num <= 1) + return R300_TX_CACHE(R300_TX_CACHE_WHOLE); + else + return R300_TX_CACHE(num + index); +} + static void r300_set_fragment_sampler_views(struct pipe_context* pipe, unsigned count, struct pipe_sampler_view** views) @@ -1003,7 +1163,7 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, struct r300_textures_state* state = (struct r300_textures_state*)r300->textures_state.state; struct r300_texture *texture; - unsigned i; + unsigned i, real_num_views = 0, view_index = 0; unsigned tex_units = r300->screen->caps.num_tex_units; boolean dirty_tex = FALSE; @@ -1011,6 +1171,12 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, return; } + /* Calculate the real number of views. */ + for (i = 0; i < count; i++) { + if (views[i]) + real_num_views++; + } + for (i = 0; i < count; i++) { if (&state->sampler_views[i]->base != views[i]) { pipe_sampler_view_reference( @@ -1030,6 +1196,10 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, if (texture->uses_pitch) { r300->fs_rc_constant_state.dirty = TRUE; } + + state->sampler_views[i]->texcache_region = + r300_assign_texture_cache_region(view_index, real_num_views); + view_index++; } } @@ -1296,7 +1466,6 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe, if (velems != NULL) { velems->count = count; memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count); - velems->incompatible_layout = FALSE; if (r300_screen(pipe->screen)->caps.has_tcl) { /* Set the best hw format in case the original format is not @@ -1354,11 +1523,13 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe, /* Align the formats to the size of DWORD. * We only care about the blocksizes of the formats since - * swizzles are already set up. */ + * swizzles are already set up. + * Also compute the vertex size. */ for (i = 0; i < count; i++) { /* This is OK because we check for aligned strides too. */ velems->hw_format_size[i] = align(util_format_get_blocksize(velems->hw_format[i]), 4); + velems->vertex_size_dwords += velems->hw_format_size[i] / 4; } } } @@ -1433,7 +1604,7 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) if (r300->screen->caps.has_tcl) { r300->vs_state.dirty = TRUE; r300->vs_state.size = - vs->code.length + 9 + + vs->code.length + 18 + (vs->immediates_count ? vs->immediates_count * 4 + 3 : 0); if (vs->externals_count) { @@ -1474,7 +1645,7 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, struct r300_context* r300 = r300_context(pipe); struct r300_constant_buffer *cbuf; struct pipe_transfer *tr; - void *mapped; + float *mapped; int max_size = 0, max_size_bytes = 0, clamped_size = 0; switch (shader) { @@ -1513,10 +1684,20 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, fprintf(stderr, "r300: Max size of the constant buffer is " "%i*4 floats.\n", max_size); } - clamped_size = MIN2(buf->width0, max_size_bytes); - memcpy(cbuf->constants, mapped, clamped_size); + clamped_size = MIN2(buf->width0, max_size_bytes); cbuf->count = clamped_size / (4 * sizeof(float)); + + if (shader == PIPE_SHADER_FRAGMENT && !r300->screen->caps.is_r500) { + unsigned i,j; + + /* Convert constants to float24. */ + for (i = 0; i < cbuf->count; i++) + for (j = 0; j < 4; j++) + cbuf->constants[i][j] = pack_float24(mapped[i*4+j]); + } else { + memcpy(cbuf->constants, mapped, clamped_size); + } } if (shader == PIPE_SHADER_VERTEX) { diff --git a/src/gallium/drivers/r300/r300_state.h b/src/gallium/drivers/r300/r300_state.h deleted file mode 100644 index 1d557506cf3..00000000000 --- a/src/gallium/drivers/r300/r300_state.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright 2008 Marek Olšák <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_STATE_H -#define R300_STATE_H - -struct r300_context; - -void r300_mark_fs_code_dirty(struct r300_context *r300); - -#endif /* R300_STATE_H */ diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index cc75fad3bb9..3aa8deb63c8 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -31,7 +31,6 @@ #include "r300_hyperz.h" #include "r300_screen.h" #include "r300_shader_semantics.h" -#include "r300_state.h" #include "r300_state_derived.h" #include "r300_state_inlines.h" #include "r300_texture.h" @@ -537,6 +536,10 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) UTIL_FORMAT_SWIZZLE_X }; + /* The KIL opcode fix, see below. */ + if (!count && !r300->screen->caps.is_r500) + count = 1; + state->tx_enable = 0; state->count = 0; size = 2; @@ -555,6 +558,9 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) texstate->filter1 = sampler->filter1; texstate->border_color = sampler->border_color; + /* Assign a texture cache region. */ + texstate->format.format1 |= view->texcache_region; + /* If compare mode is disabled, the sampler view swizzles * are stored in the format. * Otherwise, swizzles must be applied after the compare mode @@ -613,6 +619,36 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) size += 16; state->count = i+1; + } else { + /* For the KIL opcode to work on r3xx-r4xx, the texture unit + * assigned to this opcode (it's always the first one) must be + * enabled. Otherwise the opcode doesn't work. + * + * In order to not depend on the fragment shader, we just make + * the first unit enabled all the time. */ + if (i == 0 && !r300->screen->caps.is_r500) { + pipe_sampler_view_reference( + (struct pipe_sampler_view**)&state->sampler_views[i], + &r300->texkill_sampler->base); + + state->tx_enable |= 1 << i; + + texstate = &state->regs[i]; + + /* Just set some valid state. */ + texstate->format = r300->texkill_sampler->format; + texstate->filter0 = + r300_translate_tex_filters(PIPE_TEX_FILTER_NEAREST, + PIPE_TEX_FILTER_NEAREST, + PIPE_TEX_FILTER_NEAREST, + FALSE); + texstate->filter1 = 0; + texstate->border_color = 0; + + texstate->filter0 |= i << 28; + size += 16; + state->count = i+1; + } } } diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index 34d3a169d57..acd20974a00 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -38,79 +38,22 @@ void r300_emit_invariant_state(struct r300_context* r300, { CS_LOCALS(r300); - if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { - /* Subpixel multisampling for AA. */ - BEGIN_CS(4); - OUT_CS_REG(R300_GB_MSPOS0, 0x66666666); - OUT_CS_REG(R300_GB_MSPOS1, 0x6666666); - END_CS; - } - - BEGIN_CS(12 + (r300->screen->caps.has_tcl ? 2 : 0)); - - /*** Graphics Backend (GB) ***/ - /* Source of fog depth */ - OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W); - - /*** Fog (FG) ***/ - OUT_CS_REG(R300_FG_FOG_BLEND, 0x0); - OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x0); - OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x0); - OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x0); - - /*** VAP ***/ - /* Sign/normalize control */ - OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO); - /* TCL-only stuff */ - if (r300->screen->caps.has_tcl) { - /* Amount of time to wait for vertex fetches in PVS */ - OUT_CS_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff); - } - - END_CS; + BEGIN_CS(18 + (r300->screen->caps.is_rv350 ? 4 : 0)); - /* XXX unsorted stuff from surface_fill */ - BEGIN_CS(38 + (r300->screen->caps.has_tcl ? 7 : 0) + - (r300->screen->caps.is_rv350 ? 4 : 0)); - - if (r300->screen->caps.has_tcl) { - /*Flushing PVS is required before the VAP_GB registers can be changed*/ - OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0); - OUT_CS_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4); - OUT_CS_32F(1.0); - OUT_CS_32F(1.0); - OUT_CS_32F(1.0); - OUT_CS_32F(1.0); - } - /* XXX line tex stuffing */ - OUT_CS_REG_SEQ(R300_GA_LINE_S0, 1); - OUT_CS_32F(0.0); - OUT_CS_REG_SEQ(R300_GA_LINE_S1, 1); - OUT_CS_32F(1.0); - OUT_CS_REG(R300_GA_TRIANGLE_STIPPLE, 0x5 | - (0x5 << R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT)); - /* XXX this big chunk should be refactored into rs_state */ - OUT_CS_REG(R300_GA_SOLID_RG, 0x00000000); - OUT_CS_REG(R300_GA_SOLID_BA, 0x00000000); - OUT_CS_REG(R300_GA_ROUND_MODE, 0x00000001); - OUT_CS_REG(R300_GA_OFFSET, 0x00000000); - OUT_CS_REG(R300_GA_FOG_SCALE, 0x3DBF1412); - OUT_CS_REG(R300_GA_FOG_OFFSET, 0x00000000); - OUT_CS_REG(R300_SU_TEX_WRAP, 0x00000000); + OUT_CS_REG(R300_GB_SELECT, 0); + OUT_CS_REG(R300_FG_FOG_BLEND, 0); + OUT_CS_REG(R300_GA_ROUND_MODE, 1); + OUT_CS_REG(R300_GA_OFFSET, 0); + OUT_CS_REG(R300_SU_TEX_WRAP, 0); OUT_CS_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF); - OUT_CS_REG(R300_SU_DEPTH_OFFSET, 0x00000000); - OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C); + OUT_CS_REG(R300_SU_DEPTH_OFFSET, 0); + OUT_CS_REG(R300_SC_HYPERZ, 0x1C); OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525); - OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000); if (r300->screen->caps.is_rv350) { OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE); } - OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000); - OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000); - OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000); - OUT_CS_REG(R300_ZB_HIZ_PITCH, 0x00000000); END_CS; } diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 4ca8ce037bf..ddb66000561 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -21,30 +21,26 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "pipe/p_screen.h" - -#include "util/u_format.h" -#include "util/u_format_s3tc.h" -#include "util/u_math.h" -#include "util/u_memory.h" +/* Always include headers in the reverse order!! ~ M. */ +#include "r300_texture.h" #include "r300_context.h" #include "r300_reg.h" -#include "r300_texture.h" #include "r300_transfer.h" #include "r300_screen.h" #include "r300_winsys.h" -#define TILE_WIDTH 0 -#define TILE_HEIGHT 1 +#include "util/u_format.h" +#include "util/u_format_s3tc.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "pipe/p_screen.h" +#include "state_tracker/drm_api.h" -static const unsigned microblock_table[5][3][2] = { - /*linear tiled square-tiled */ - {{32, 1}, {8, 4}, {0, 0}}, /* 8 bits per pixel */ - {{16, 1}, {8, 2}, {4, 4}}, /* 16 bits per pixel */ - {{ 8, 1}, {4, 2}, {0, 0}}, /* 32 bits per pixel */ - {{ 4, 1}, {0, 0}, {2, 2}}, /* 64 bits per pixel */ - {{ 2, 1}, {0, 0}, {0, 0}} /* 128 bits per pixel */ +enum r300_dim { + DIM_WIDTH = 0, + DIM_HEIGHT = 1 }; unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, @@ -596,21 +592,21 @@ static void r300_texture_setup_fb_state(struct r300_screen* screen, /* Set framebuffer state. */ if (util_format_is_depth_or_stencil(tex->b.b.format)) { for (i = 0; i <= tex->b.b.last_level; i++) { - tex->fb_state.depthpitch[i] = + tex->fb_state.pitch[i] = tex->hwpitch[i] | R300_DEPTHMACROTILE(tex->mip_macrotile[i]) | R300_DEPTHMICROTILE(tex->microtile); } - tex->fb_state.zb_format = r300_translate_zsformat(tex->b.b.format); + tex->fb_state.format = r300_translate_zsformat(tex->b.b.format); } else { for (i = 0; i <= tex->b.b.last_level; i++) { - tex->fb_state.colorpitch[i] = + tex->fb_state.pitch[i] = tex->hwpitch[i] | r300_translate_colorformat(tex->b.b.format) | R300_COLOR_TILE(tex->mip_macrotile[i]) | R300_COLOR_MICROTILE(tex->microtile); } - tex->fb_state.us_out_fmt = r300_translate_out_fmt(tex->b.b.format); + tex->fb_state.format = r300_translate_out_fmt(tex->b.b.format); } } @@ -620,8 +616,10 @@ void r300_texture_reinterpret_format(struct pipe_screen *screen, { struct r300_screen *r300screen = r300_screen(screen); - SCREEN_DBG(r300screen, DBG_TEX, "r300: texture_reinterpret_format: %s -> %s\n", - util_format_short_name(tex->format), util_format_short_name(new_format)); + SCREEN_DBG(r300screen, DBG_TEX, + "r300: texture_reinterpret_format: %s -> %s\n", + util_format_short_name(tex->format), + util_format_short_name(new_format)); tex->format = new_format; @@ -648,36 +646,65 @@ unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, } } -/** - * Return the width (dim==TILE_WIDTH) or height (dim==TILE_HEIGHT) of one tile - * of the given texture. - */ -static unsigned r300_texture_get_tile_size(struct r300_texture* tex, - int dim, boolean macrotile) +/* Returns the number of pixels that the texture should be aligned to + * in the given dimension. */ +static unsigned r300_get_pixel_alignment(struct r300_texture *tex, + enum r300_buffer_tiling macrotile, + enum r300_dim dim) { - unsigned pixsize, tile_size; - - pixsize = util_format_get_blocksize(tex->b.b.format); - tile_size = microblock_table[util_logbase2(pixsize)][tex->microtile][dim]; - - if (macrotile) { - tile_size *= 8; + static const unsigned table[2][5][3][2] = + { + { + /* Macro: linear linear linear + Micro: linear tiled square-tiled */ + {{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */ + {{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */ + {{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */ + {{ 4, 1}, { 0, 0}, { 2, 2}}, /* 64 bits per pixel */ + {{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ + }, + { + /* Macro: tiled tiled tiled + Micro: linear tiled square-tiled */ + {{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */ + {{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */ + {{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */ + {{ 32, 8}, { 0, 0}, {16, 16}}, /* 64 bits per pixel */ + {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ + } + }; + static const unsigned aa_block[2] = {4, 8}; + unsigned res = 0; + unsigned pixsize = util_format_get_blocksize(tex->b.b.format); + + assert(macrotile <= R300_BUFFER_TILED); + assert(tex->microtile <= R300_BUFFER_SQUARETILED); + assert(pixsize <= 16); + assert(dim <= DIM_HEIGHT); + + if (tex->b.b.nr_samples > 1) { + /* Multisampled textures have their own alignment scheme. */ + if (pixsize == 4) + res = aa_block[dim]; + } else { + /* Standard alignment. */ + res = table[macrotile][util_logbase2(pixsize)][tex->microtile][dim]; } - assert(tile_size); - return tile_size; + assert(res); + return res; } /* Return true if macrotiling should be enabled on the miplevel. */ static boolean r300_texture_macro_switch(struct r300_texture *tex, unsigned level, boolean rv350_mode, - int dim) + enum r300_dim dim) { unsigned tile, texdim; - tile = r300_texture_get_tile_size(tex, dim, TRUE); - if (dim == TILE_WIDTH) { + tile = r300_get_pixel_alignment(tex, R300_BUFFER_TILED, dim); + if (dim == DIM_WIDTH) { texdim = u_minify(tex->b.b.width0, level); } else { texdim = u_minify(tex->b.b.height0, level); @@ -713,8 +740,8 @@ unsigned r300_texture_get_stride(struct r300_screen* screen, width = u_minify(tex->b.b.width0, level); if (util_format_is_plain(tex->b.b.format)) { - tile_width = r300_texture_get_tile_size(tex, TILE_WIDTH, - tex->mip_macrotile[level]); + tile_width = r300_get_pixel_alignment(tex, tex->mip_macrotile[level], + DIM_WIDTH); width = align(width, tile_width); stride = util_format_get_stride(tex->b.b.format, width); @@ -743,8 +770,8 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture* tex, height = u_minify(tex->b.b.height0, level); if (util_format_is_plain(tex->b.b.format)) { - tile_height = r300_texture_get_tile_size(tex, TILE_HEIGHT, - tex->mip_macrotile[level]); + tile_height = r300_get_pixel_alignment(tex, tex->mip_macrotile[level], + DIM_HEIGHT); height = align(height, tile_height); /* This is needed for the kernel checker, unfortunately. */ @@ -784,21 +811,26 @@ static void r300_setup_miptree(struct r300_screen* screen, unsigned stride, size, layer_size, nblocksy, i; boolean rv350_mode = screen->caps.is_rv350; - SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Making miptree for texture, format %s\n", - util_format_short_name(base->format)); + SCREEN_DBG(screen, DBG_TEXALLOC, + "r300: Making miptree for texture, format %s\n", + util_format_short_name(base->format)); for (i = 0; i <= base->last_level; i++) { /* Let's see if this miplevel can be macrotiled. */ tex->mip_macrotile[i] = (tex->macrotile == R300_BUFFER_TILED && - r300_texture_macro_switch(tex, i, rv350_mode, TILE_WIDTH) && - r300_texture_macro_switch(tex, i, rv350_mode, TILE_HEIGHT)) ? + r300_texture_macro_switch(tex, i, rv350_mode, DIM_WIDTH) && + r300_texture_macro_switch(tex, i, rv350_mode, DIM_HEIGHT)) ? R300_BUFFER_TILED : R300_BUFFER_LINEAR; stride = r300_texture_get_stride(screen, tex, i); nblocksy = r300_texture_get_nblocksy(tex, i); layer_size = stride * nblocksy; + if (base->nr_samples) { + layer_size *= base->nr_samples; + } + if (base->target == PIPE_TEXTURE_CUBE) size = layer_size * 6; else @@ -864,8 +896,8 @@ static void r300_setup_tiling(struct pipe_screen *screen, } /* Set macrotiling. */ - if (r300_texture_macro_switch(tex, 0, rv350_mode, TILE_WIDTH) && - r300_texture_macro_switch(tex, 0, rv350_mode, TILE_HEIGHT)) { + if (r300_texture_macro_switch(tex, 0, rv350_mode, DIM_WIDTH) && + r300_texture_macro_switch(tex, 0, rv350_mode, DIM_HEIGHT)) { tex->macrotile = R300_BUFFER_TILED; } } @@ -899,17 +931,14 @@ static boolean r300_texture_get_handle(struct pipe_screen* screen, { struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; struct r300_texture* tex = (struct r300_texture*)texture; - unsigned stride; if (!tex) { return FALSE; } - stride = r300_texture_get_stride(r300_screen(screen), tex, 0); - - rws->buffer_get_handle(rws, tex->buffer, stride, whandle); + whandle->stride = r300_texture_get_stride(r300_screen(screen), tex, 0); - return TRUE; + return rws->buffer_get_handle(rws, tex->buffer, whandle); } struct u_resource_vtbl r300_texture_vtbl = @@ -1003,26 +1032,27 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, unsigned flags) { struct r300_texture* tex = r300_texture(texture); - struct pipe_surface* surface = CALLOC_STRUCT(pipe_surface); - unsigned offset; - - offset = r300_texture_get_offset(tex, level, zslice, face); + struct r300_surface* surface = CALLOC_STRUCT(r300_surface); if (surface) { - pipe_reference_init(&surface->reference, 1); - pipe_resource_reference(&surface->texture, texture); - surface->format = texture->format; - surface->width = u_minify(texture->width0, level); - surface->height = u_minify(texture->height0, level); - surface->offset = offset; - surface->usage = flags; - surface->zslice = zslice; - surface->texture = texture; - surface->face = face; - surface->level = level; + pipe_reference_init(&surface->base.reference, 1); + pipe_resource_reference(&surface->base.texture, texture); + surface->base.format = texture->format; + surface->base.width = u_minify(texture->width0, level); + surface->base.height = u_minify(texture->height0, level); + surface->base.usage = flags; + surface->base.zslice = zslice; + surface->base.face = face; + surface->base.level = level; + + surface->buffer = tex->buffer; + surface->domain = tex->domain; + surface->offset = r300_texture_get_offset(tex, level, zslice, face); + surface->pitch = tex->fb_state.pitch[level]; + surface->format = tex->fb_state.format; } - return surface; + return &surface->base; } /* Not required to implement u_resource_vtbl, consider moving to another file: @@ -1042,7 +1072,6 @@ r300_texture_from_handle(struct pipe_screen* screen, struct r300_screen* rscreen = r300_screen(screen); struct r300_winsys_buffer *buffer; struct r300_texture* tex; - unsigned stride; boolean override_zb_flags; /* Support only 2D textures without mipmaps */ @@ -1052,7 +1081,7 @@ r300_texture_from_handle(struct pipe_screen* screen, return NULL; } - buffer = rws->buffer_from_handle(rws, screen, whandle, &stride); + buffer = rws->buffer_from_handle(rws, whandle->handle); if (!buffer) { return NULL; } @@ -1068,7 +1097,7 @@ r300_texture_from_handle(struct pipe_screen* screen, tex->b.b.screen = screen; tex->domain = R300_DOMAIN_VRAM; - tex->stride_override = stride; + tex->stride_override = whandle->stride; /* one ref already taken */ tex->buffer = buffer; @@ -1080,7 +1109,7 @@ r300_texture_from_handle(struct pipe_screen* screen, "Pitch: % 4i, Dim: %ix%i, Format: %s\n", tex->macrotile ? "YES" : " NO", tex->microtile ? "YES" : " NO", - stride / util_format_get_blocksize(base->format), + whandle->stride / util_format_get_blocksize(base->format), base->width0, base->height0, util_format_short_name(base->format)); diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index ff640c56eed..99e7694254e 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -23,8 +23,11 @@ #ifndef R300_TEXTURE_H #define R300_TEXTURE_H -#include "util/u_format.h" +#include "pipe/p_format.h" +struct pipe_screen; +struct pipe_resource; +struct winsys_handle; struct r300_texture; struct r300_screen; diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 89f39af9761..5394e04f727 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -105,12 +105,12 @@ static unsigned translate_opcode(unsigned opcode) /* case TGSI_OPCODE_DIV: return RC_OPCODE_DIV; */ /* case TGSI_OPCODE_DP2: return RC_OPCODE_DP2; */ case TGSI_OPCODE_TXL: return RC_OPCODE_TXL; - /* case TGSI_OPCODE_BRK: return RC_OPCODE_BRK; */ + case TGSI_OPCODE_BRK: return RC_OPCODE_BRK; case TGSI_OPCODE_IF: return RC_OPCODE_IF; - /* case TGSI_OPCODE_LOOP: return RC_OPCODE_LOOP; */ + case TGSI_OPCODE_BGNLOOP: return RC_OPCODE_BGNLOOP; case TGSI_OPCODE_ELSE: return RC_OPCODE_ELSE; case TGSI_OPCODE_ENDIF: return RC_OPCODE_ENDIF; - /* case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP; */ + case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP; /* case TGSI_OPCODE_PUSHA: return RC_OPCODE_PUSHA; */ /* case TGSI_OPCODE_POPA: return RC_OPCODE_POPA; */ case TGSI_OPCODE_CEIL: return RC_OPCODE_CEIL; diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index 4f37fabb289..02421a58b83 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -57,22 +57,11 @@ static void r300_copy_from_tiled_texture(struct pipe_context *ctx, subdst.face = 0; subdst.level = 0; - /* XXX if we don't flush before copying the texture and mapping it, - * we get wrong pixels, i.e. it's like latest draw calls didn't happen, - * including this blit. Tests: e.g. piglit/provoking-vertex - * - * Since the flush immediately before mapping is implicit (the buffer is - * always referenced in resource_copy_region), every read transfer costs - * 2 flushes. That sucks. */ - ctx->flush(ctx, 0, NULL); - ctx->resource_copy_region(ctx, &r300transfer->detiled_texture->b.b, subdst, 0, 0, 0, tex, transfer->sr, transfer->box.x, transfer->box.y, transfer->box.z, transfer->box.width, transfer->box.height); - - /* Flushing after the copy is implicit, issued by winsys. */ } /* Copy a detiled texture to a tiled one. */ @@ -92,7 +81,6 @@ static void r300_copy_into_tiled_texture(struct pipe_context *ctx, 0, 0, 0, transfer->box.width, transfer->box.height); - /* XXX this flush fixes a few piglit tests (e.g. glean/pixelFormats). */ ctx->flush(ctx, 0, NULL); } diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 6ce218923b1..77c1c13ef9a 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -47,13 +47,6 @@ enum r300_reference_domain { /* bitfield */ R300_REF_HW = 2 }; -struct r300_cs_info { - /* In DWORDs. */ - unsigned used; - unsigned free; - unsigned capacity; -}; - struct r300_winsys_screen { void (*destroy)(struct r300_winsys_screen *ws); @@ -109,16 +102,13 @@ struct r300_winsys_screen { * Returns TRUE if a flush is required. */ boolean (*validate)(struct r300_winsys_screen* winsys); - /* Return current CS info. */ - void (*get_cs_info)(struct r300_winsys_screen *winsys, - struct r300_cs_info *info); + /* Return the number of free dwords in CS. */ + unsigned (*get_cs_free_dwords)(struct r300_winsys_screen *winsys); - /* Start a command emit. */ - void (*begin_cs)(struct r300_winsys_screen* winsys, - int size, - const char* file, - const char* function, - int line); + /* Return the pointer to the first free dword in CS and assume a pipe + * driver wants to fill "count" dwords. */ + uint32_t *(*get_cs_pointer)(struct r300_winsys_screen *winsys, + unsigned count); /* Write a dword to the command buffer. */ void (*write_cs_dword)(struct r300_winsys_screen* winsys, uint32_t dword); @@ -134,12 +124,6 @@ struct r300_winsys_screen { enum r300_buffer_domain wd, uint32_t flags); - /* Finish a command emit. */ - void (*end_cs)(struct r300_winsys_screen* winsys, - const char* file, - const char* function, - int line); - /* Flush the CS. */ void (*flush_cs)(struct r300_winsys_screen* winsys); @@ -164,12 +148,10 @@ struct r300_winsys_screen { enum r300_value_id vid); struct r300_winsys_buffer *(*buffer_from_handle)(struct r300_winsys_screen *winsys, - struct pipe_screen *screen, - struct winsys_handle *whandle, - unsigned *stride); + unsigned handle); + boolean (*buffer_get_handle)(struct r300_winsys_screen *winsys, struct r300_winsys_buffer *buffer, - unsigned stride, struct winsys_handle *whandle); boolean (*is_buffer_referenced)(struct r300_winsys_screen *winsys, diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 401a28ad312..12ef98aac75 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -114,6 +114,11 @@ softpipe_destroy( struct pipe_context *pipe ) pipe_sampler_view_reference(&softpipe->vertex_sampler_views[i], NULL); } + for (i = 0; i < PIPE_MAX_GEOMETRY_SAMPLERS; i++) { + sp_destroy_tex_tile_cache(softpipe->geometry_tex_cache[i]); + pipe_sampler_view_reference(&softpipe->geometry_sampler_views[i], NULL); + } + for (i = 0; i < PIPE_SHADER_TYPES; i++) { uint j; @@ -174,7 +179,12 @@ softpipe_is_resource_referenced( struct pipe_context *pipe, softpipe->vertex_tex_cache[i]->texture == texture) return PIPE_REFERENCED_FOR_READ; } - + for (i = 0; i < PIPE_MAX_GEOMETRY_SAMPLERS; i++) { + if (softpipe->geometry_tex_cache[i] && + softpipe->geometry_tex_cache[i]->texture == texture) + return PIPE_REFERENCED_FOR_READ; + } + return PIPE_UNREFERENCED; } @@ -225,6 +235,7 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->pipe.create_sampler_state = softpipe_create_sampler_state; softpipe->pipe.bind_fragment_sampler_states = softpipe_bind_sampler_states; softpipe->pipe.bind_vertex_sampler_states = softpipe_bind_vertex_sampler_states; + softpipe->pipe.bind_geometry_sampler_states = softpipe_bind_geometry_sampler_states; softpipe->pipe.delete_sampler_state = softpipe_delete_sampler_state; softpipe->pipe.create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state; @@ -265,6 +276,7 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->pipe.set_scissor_state = softpipe_set_scissor_state; softpipe->pipe.set_fragment_sampler_views = softpipe_set_sampler_views; softpipe->pipe.set_vertex_sampler_views = softpipe_set_vertex_sampler_views; + softpipe->pipe.set_geometry_sampler_views = softpipe_set_geometry_sampler_views; softpipe->pipe.create_sampler_view = softpipe_create_sampler_view; softpipe->pipe.sampler_view_destroy = softpipe_sampler_view_destroy; softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; @@ -301,6 +313,9 @@ softpipe_create_context( struct pipe_screen *screen, for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { softpipe->vertex_tex_cache[i] = sp_create_tex_tile_cache( &softpipe->pipe ); } + for (i = 0; i < PIPE_MAX_GEOMETRY_SAMPLERS; i++) { + softpipe->geometry_tex_cache[i] = sp_create_tex_tile_cache( &softpipe->pipe ); + } softpipe->fs_machine = tgsi_exec_machine_create(); @@ -319,10 +334,17 @@ softpipe_create_context( struct pipe_screen *screen, goto fail; draw_texture_samplers(softpipe->draw, + PIPE_SHADER_VERTEX, PIPE_MAX_VERTEX_SAMPLERS, (struct tgsi_sampler **) softpipe->tgsi.vert_samplers_list); + draw_texture_samplers(softpipe->draw, + PIPE_SHADER_GEOMETRY, + PIPE_MAX_GEOMETRY_SAMPLERS, + (struct tgsi_sampler **) + softpipe->tgsi.geom_samplers_list); + if (debug_get_bool_option( "SP_NO_RAST", FALSE )) softpipe->no_rast = TRUE; diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index e641a81d1fb..53115a827d0 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -60,6 +60,7 @@ struct softpipe_context { struct pipe_blend_state *blend; struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; struct pipe_sampler_state *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS]; + struct pipe_sampler_state *geometry_samplers[PIPE_MAX_GEOMETRY_SAMPLERS]; struct pipe_depth_stencil_alpha_state *depth_stencil; struct pipe_rasterizer_state *rasterizer; struct sp_fragment_shader *fs; @@ -78,6 +79,7 @@ struct softpipe_context { struct pipe_scissor_state scissor; struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; struct pipe_sampler_view *vertex_sampler_views[PIPE_MAX_VERTEX_SAMPLERS]; + struct pipe_sampler_view *geometry_sampler_views[PIPE_MAX_GEOMETRY_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; struct { @@ -92,6 +94,8 @@ struct softpipe_context { unsigned num_sampler_views; unsigned num_vertex_samplers; unsigned num_vertex_sampler_views; + unsigned num_geometry_samplers; + unsigned num_geometry_sampler_views; unsigned num_vertex_buffers; unsigned dirty; /**< Mask of SP_NEW_x flags */ @@ -148,6 +152,7 @@ struct softpipe_context { /** TGSI exec things */ struct { + struct sp_sampler_varient *geom_samplers_list[PIPE_MAX_GEOMETRY_SAMPLERS]; struct sp_sampler_varient *vert_samplers_list[PIPE_MAX_VERTEX_SAMPLERS]; struct sp_sampler_varient *frag_samplers_list[PIPE_MAX_SAMPLERS]; } tgsi; @@ -169,6 +174,7 @@ struct softpipe_context { unsigned tex_timestamp; struct softpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; struct softpipe_tex_tile_cache *vertex_tex_cache[PIPE_MAX_VERTEX_SAMPLERS]; + struct softpipe_tex_tile_cache *geometry_tex_cache[PIPE_MAX_GEOMETRY_SAMPLERS]; unsigned use_sse : 1; unsigned dump_fs : 1; diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index 5024fc8a819..4a53ef048f3 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -56,6 +56,9 @@ softpipe_flush( struct pipe_context *pipe, for (i = 0; i < softpipe->num_vertex_sampler_views; i++) { sp_flush_tex_tile_cache(softpipe->vertex_tex_cache[i]); } + for (i = 0; i < softpipe->num_geometry_sampler_views; i++) { + sp_flush_tex_tile_cache(softpipe->geometry_tex_cache[i]); + } } if (flags & PIPE_FLUSH_SWAPBUFFERS) { diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index ddfe56f73a4..c60249dbfbc 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -543,15 +543,17 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } static void -sp_vbuf_so_info(struct vbuf_render *vbr, uint buffer, uint vertices) +sp_vbuf_so_info(struct vbuf_render *vbr, uint primitives, uint vertices) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); struct softpipe_context *softpipe = cvbr->softpipe; + unsigned i; - softpipe->so_target.so_count[buffer] += vertices; + for (i = 0; i < softpipe->so_target.num_buffers; ++i) { + softpipe->so_target.so_count[i] += vertices; + } - softpipe->so_stats.num_primitives_written = - vertices / u_vertices_per_prim(cvbr->prim); + softpipe->so_stats.num_primitives_written = primitives; softpipe->so_stats.primitives_storage_needed = vertices * 4 /*sizeof(float|int32)*/ * 4 /*x,y,z,w*/; } diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index 245f1b554c9..4ae69c1c2bd 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -58,7 +58,9 @@ softpipe_create_query(struct pipe_context *pipe, assert(type == PIPE_QUERY_OCCLUSION_COUNTER || type == PIPE_QUERY_TIME_ELAPSED || - type == PIPE_QUERY_SO_STATISTICS); + type == PIPE_QUERY_SO_STATISTICS || + type == PIPE_QUERY_GPU_FINISHED || + type == PIPE_QUERY_TIMESTAMP_DISJOINT); sq = CALLOC_STRUCT( softpipe_query ); sq->type = type; @@ -78,7 +80,7 @@ softpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) { struct softpipe_context *softpipe = softpipe_context( pipe ); struct softpipe_query *sq = softpipe_query(q); - + switch (sq->type) { case PIPE_QUERY_OCCLUSION_COUNTER: sq->start = softpipe->occlusion_count; @@ -90,6 +92,9 @@ softpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) sq->so.num_primitives_written = 0; sq->so.primitives_storage_needed = 0; break; + case PIPE_QUERY_GPU_FINISHED: + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: default: assert(0); break; @@ -119,6 +124,9 @@ softpipe_end_query(struct pipe_context *pipe, struct pipe_query *q) sq->so.primitives_storage_needed = softpipe->so_stats.primitives_storage_needed; break; + case PIPE_QUERY_GPU_FINISHED: + case PIPE_QUERY_TIMESTAMP_DISJOINT: + break; default: assert(0); break; @@ -141,6 +149,18 @@ softpipe_get_query_result(struct pipe_context *pipe, memcpy(vresult, &sq->so, sizeof(struct pipe_query_data_so_statistics)); break; + case PIPE_QUERY_GPU_FINISHED: + *result = TRUE; + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: { + struct pipe_query_data_timestamp_disjoint td; + /*os_get_time is in microseconds*/ + td.frequency = 1000000; + td.disjoint = FALSE; + memcpy(vresult, &sq->so, + sizeof(struct pipe_query_data_timestamp_disjoint)); + } + break; default: *result = sq->end - sq->start; break; diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index d0b73cc850c..7d6b86dce04 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -100,6 +100,7 @@ struct sp_vertex_shader { struct sp_geometry_shader { struct pipe_shader_state shader; struct draw_geometry_shader *draw_data; + int max_sampler; }; struct sp_velems_state { @@ -128,6 +129,10 @@ void softpipe_bind_vertex_sampler_states(struct pipe_context *, unsigned num_samplers, void **samplers); +void +softpipe_bind_geometry_sampler_states(struct pipe_context *, + unsigned num_samplers, + void **samplers); void softpipe_delete_sampler_state(struct pipe_context *, void *); void * @@ -195,6 +200,11 @@ softpipe_set_vertex_sampler_views(struct pipe_context *, unsigned num, struct pipe_sampler_view **); +void +softpipe_set_geometry_sampler_views(struct pipe_context *, + unsigned num, + struct pipe_sampler_view **); + struct pipe_sampler_view * softpipe_create_sampler_view(struct pipe_context *pipe, struct pipe_resource *texture, diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 4c6d4909f5b..3ba4d934fd2 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -225,6 +225,19 @@ update_tgsi_samplers( struct softpipe_context *softpipe ) } } } + + for (i = 0; i < PIPE_MAX_GEOMETRY_SAMPLERS; i++) { + struct softpipe_tex_tile_cache *tc = softpipe->geometry_tex_cache[i]; + + if (tc->texture) { + struct softpipe_resource *spt = softpipe_resource(tc->texture); + + if (spt->timestamp != tc->timestamp) { + sp_tex_tile_cache_validate_texture(tc); + tc->timestamp = spt->timestamp; + } + } + } } diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 2fff80c4385..3fbf1f25781 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -35,6 +35,7 @@ #include "util/u_inlines.h" #include "draw/draw_context.h" #include "draw/draw_vs.h" +#include "draw/draw_gs.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_scan.h" @@ -223,6 +224,8 @@ softpipe_create_gs_state(struct pipe_context *pipe, if (state->draw_data == NULL) goto fail; + state->max_sampler = state->draw_data->info.file_max[TGSI_FILE_SAMPLER]; + return state; fail: diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 2692f06c927..79d9516ad9c 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -121,6 +121,33 @@ softpipe_bind_vertex_sampler_states(struct pipe_context *pipe, softpipe->dirty |= SP_NEW_SAMPLER; } +void +softpipe_bind_geometry_sampler_states(struct pipe_context *pipe, + unsigned num_samplers, + void **samplers) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + unsigned i; + + assert(num_samplers <= PIPE_MAX_GEOMETRY_SAMPLERS); + + /* Check for no-op */ + if (num_samplers == softpipe->num_geometry_samplers && + !memcmp(softpipe->geometry_samplers, samplers, num_samplers * sizeof(void *))) + return; + + draw_flush(softpipe->draw); + + for (i = 0; i < num_samplers; ++i) + softpipe->geometry_samplers[i] = samplers[i]; + for (i = num_samplers; i < PIPE_MAX_GEOMETRY_SAMPLERS; ++i) + softpipe->geometry_samplers[i] = NULL; + + softpipe->num_geometry_samplers = num_samplers; + + softpipe->dirty |= SP_NEW_SAMPLER; +} + struct pipe_sampler_view * softpipe_create_sampler_view(struct pipe_context *pipe, @@ -210,6 +237,36 @@ softpipe_set_vertex_sampler_views(struct pipe_context *pipe, softpipe->dirty |= SP_NEW_TEXTURE; } +void +softpipe_set_geometry_sampler_views(struct pipe_context *pipe, + unsigned num, + struct pipe_sampler_view **views) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + uint i; + + assert(num <= PIPE_MAX_GEOMETRY_SAMPLERS); + + /* Check for no-op */ + if (num == softpipe->num_geometry_sampler_views && + !memcmp(softpipe->geometry_sampler_views, views, num * sizeof(struct pipe_sampler_view *))) { + return; + } + + draw_flush(softpipe->draw); + + for (i = 0; i < PIPE_MAX_GEOMETRY_SAMPLERS; i++) { + struct pipe_sampler_view *view = i < num ? views[i] : NULL; + + pipe_sampler_view_reference(&softpipe->geometry_sampler_views[i], view); + sp_tex_tile_cache_set_sampler_view(softpipe->geometry_tex_cache[i], view); + } + + softpipe->num_geometry_sampler_views = num; + + softpipe->dirty |= SP_NEW_TEXTURE; +} + /** * Find/create an sp_sampler_varient object for sampling the given texture, @@ -293,6 +350,30 @@ softpipe_reset_sampler_varients(struct softpipe_context *softpipe) } } + if (softpipe->gs) { + for (i = 0; i <= softpipe->gs->max_sampler; i++) { + if (softpipe->geometry_samplers[i]) { + struct pipe_resource *texture = NULL; + + if (softpipe->geometry_sampler_views[i]) { + texture = softpipe->geometry_sampler_views[i]->texture; + } + + softpipe->tgsi.geom_samplers_list[i] = + get_sampler_varient( + i, + sp_sampler(softpipe->geometry_samplers[i]), + texture, + TGSI_PROCESSOR_GEOMETRY ); + + sp_sampler_varient_bind_texture( + softpipe->tgsi.geom_samplers_list[i], + softpipe->geometry_tex_cache[i], + texture ); + } + } + } + for (i = 0; i <= softpipe->fs->info.file_max[TGSI_FILE_SAMPLER]; i++) { if (softpipe->sampler[i]) { struct pipe_resource *texture = NULL; diff --git a/src/gallium/drivers/softpipe/sp_state_so.c b/src/gallium/drivers/softpipe/sp_state_so.c index 27acd3dfc12..cfe23f9e846 100644 --- a/src/gallium/drivers/softpipe/sp_state_so.c +++ b/src/gallium/drivers/softpipe/sp_state_so.c @@ -89,6 +89,8 @@ softpipe_set_stream_output_buffers(struct pipe_context *pipe, void *map_buffers[PIPE_MAX_SO_BUFFERS]; assert(num_buffers <= PIPE_MAX_SO_BUFFERS); + if (num_buffers > PIPE_MAX_SO_BUFFERS) + num_buffers = PIPE_MAX_SO_BUFFERS; softpipe->dirty |= SP_NEW_SO_BUFFERS; @@ -99,7 +101,7 @@ softpipe_set_stream_output_buffers(struct pipe_context *pipe, if (!res) { /* the whole call is invalid, bail out */ softpipe->so_target.num_buffers = 0; - draw_set_mapped_so_buffers(softpipe->draw, map_buffers, 0); + draw_set_mapped_so_buffers(softpipe->draw, 0, 0); return; } diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 72afad60ba9..7ec3d63a3fd 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -158,6 +158,9 @@ struct pipe_context { void (*bind_vertex_sampler_states)(struct pipe_context *, unsigned num_samplers, void **samplers); + void (*bind_geometry_sampler_states)(struct pipe_context *, + unsigned num_samplers, + void **samplers); void (*delete_sampler_state)(struct pipe_context *, void *); void * (*create_rasterizer_state)(struct pipe_context *, @@ -238,6 +241,10 @@ struct pipe_context { unsigned num_views, struct pipe_sampler_view **); + void (*set_geometry_sampler_views)(struct pipe_context *, + unsigned num_views, + struct pipe_sampler_view **); + void (*set_vertex_buffers)( struct pipe_context *, unsigned num_buffers, const struct pipe_vertex_buffer * ); diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 85551cac25a..3b87d998ceb 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -382,7 +382,9 @@ enum pipe_transfer_usage { #define PIPE_QUERY_PRIMITIVES_EMITTED 2 #define PIPE_QUERY_TIME_ELAPSED 3 #define PIPE_QUERY_SO_STATISTICS 5 -#define PIPE_QUERY_TYPES 6 +#define PIPE_QUERY_GPU_FINISHED 6 +#define PIPE_QUERY_TIMESTAMP_DISJOINT 7 +#define PIPE_QUERY_TYPES 8 /** @@ -507,6 +509,11 @@ struct pipe_query_data_so_statistics uint64_t num_primitives_written; uint64_t primitives_storage_needed; }; +struct pipe_query_data_timestamp_disjoint +{ + uint64_t frequency; + boolean disjoint; +}; #ifdef __cplusplus } diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index c46c7e3d14e..9df20ea8581 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -65,16 +65,18 @@ struct tgsi_token }; enum tgsi_file_type { - TGSI_FILE_NULL =0, - TGSI_FILE_CONSTANT =1, - TGSI_FILE_INPUT =2, - TGSI_FILE_OUTPUT =3, - TGSI_FILE_TEMPORARY =4, - TGSI_FILE_SAMPLER =5, - TGSI_FILE_ADDRESS =6, - TGSI_FILE_IMMEDIATE =7, - TGSI_FILE_PREDICATE =8, - TGSI_FILE_SYSTEM_VALUE =9, + TGSI_FILE_NULL =0, + TGSI_FILE_CONSTANT =1, + TGSI_FILE_INPUT =2, + TGSI_FILE_OUTPUT =3, + TGSI_FILE_TEMPORARY =4, + TGSI_FILE_SAMPLER =5, + TGSI_FILE_ADDRESS =6, + TGSI_FILE_IMMEDIATE =7, + TGSI_FILE_PREDICATE =8, + TGSI_FILE_SYSTEM_VALUE =9, + TGSI_FILE_IMMEDIATE_ARRAY =10, + TGSI_FILE_TEMPORARY_ARRAY =11, TGSI_FILE_COUNT /**< how many TGSI_FILE_ types */ }; @@ -159,9 +161,9 @@ struct tgsi_declaration_semantic struct tgsi_immediate { unsigned Type : 4; /**< TGSI_TOKEN_TYPE_IMMEDIATE */ - unsigned NrTokens : 8; /**< UINT */ + unsigned NrTokens : 14; /**< UINT */ unsigned DataType : 4; /**< one of TGSI_IMM_x */ - unsigned Padding : 16; + unsigned Padding : 10; }; union tgsi_immediate_data diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 5ed1cca67a5..6231f06ec71 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -60,6 +60,7 @@ extern "C" { #define PIPE_MAX_CONSTANT_BUFFERS 32 #define PIPE_MAX_SAMPLERS 16 #define PIPE_MAX_VERTEX_SAMPLERS 16 +#define PIPE_MAX_GEOMETRY_SAMPLERS 16 #define PIPE_MAX_SHADER_INPUTS 16 #define PIPE_MAX_SHADER_OUTPUTS 16 #define PIPE_MAX_TEXTURE_LEVELS 16 diff --git a/src/gallium/state_trackers/egl/Makefile b/src/gallium/state_trackers/egl/Makefile index 8933890323a..fec178ffb30 100644 --- a/src/gallium/state_trackers/egl/Makefile +++ b/src/gallium/state_trackers/egl/Makefile @@ -31,15 +31,20 @@ kms_SOURCES = $(wildcard kms/*.c) kms_OBJECTS = $(kms_SOURCES:.c=.o) -ALL_INCLUDES = $(common_INCLUDES) $(x11_INCLUDES) $(kms_INCLUDES) -ALL_SOURCES = $(common_SOURCES) $(x11_SOURCES) $(kms_SOURCES) -ALL_OBJECTS = $(common_OBJECTS) $(x11_OBJECTS) $(kms_OBJECTS) +fbdev_INCLUDES = -I$(TOP)/src/gallium/winsys/sw -I$(TOP)/src/gallium/drivers +fbdev_SOURCES = $(wildcard fbdev/*.c) +fbdev_OBJECTS = $(fbdev_SOURCES:.c=.o) + + +ALL_INCLUDES = $(common_INCLUDES) $(x11_INCLUDES) $(kms_INCLUDES) $(fbdev_INCLUDES) +ALL_SOURCES = $(common_SOURCES) $(x11_SOURCES) $(kms_SOURCES) $(fbdev_SOURCES) +ALL_OBJECTS = $(common_OBJECTS) $(x11_OBJECTS) $(kms_OBJECTS) $(fbdev_OBJECTS) ##### TARGETS ##### -EGL_DISPLAYS_MODS = $(foreach dpy, $(EGL_DISPLAYS), libegl$(dpy).a) +EGL_PLATFORMS_MODS = $(foreach plat, $(EGL_PLATFORMS), libegl$(plat).a) -default: depend $(EGL_DISPLAYS_MODS) +default: depend $(EGL_PLATFORMS_MODS) libeglx11.a: $(x11_OBJECTS) $(common_OBJECTS) Makefile @@ -48,6 +53,9 @@ libeglx11.a: $(x11_OBJECTS) $(common_OBJECTS) Makefile libeglkms.a: $(kms_OBJECTS) $(common_OBJECTS) Makefile $(MKLIB) -o eglkms -static $(kms_OBJECTS) $(common_OBJECTS) +libeglfbdev.a: $(fbdev_OBJECTS) $(common_OBJECTS) Makefile + $(MKLIB) -o eglfbdev -static $(fbdev_OBJECTS) $(common_OBJECTS) + depend: rm -f depend touch depend @@ -55,7 +63,7 @@ depend: clean: rm -f $(ALL_OBJECTS) - rm -f $(EGL_DISPLAYS_MODS) + rm -f $(EGL_PLATFORMS_MODS) rm -f depend depend.bak # Dummy target @@ -73,4 +81,7 @@ $(x11_OBJECTS): %.o: %.c $(kms_OBJECTS): %.o: %.c $(CC) -c $(common_INCLUDES) $(kms_INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@ +$(fbdev_OBJECTS): %.o: %.c + $(CC) -c $(common_INCLUDES) $(fbdev_INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@ + sinclude depend diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c b/src/gallium/state_trackers/egl/common/egl_g3d.c index 361cc7960bd..8c7d2cb33e7 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d.c @@ -74,10 +74,10 @@ egl_g3d_get_probe(_EGLDriver *drv, _EGLDisplay *dpy) struct native_probe *nprobe; nprobe = (struct native_probe *) _eglGetProbeCache(gdrv->probe_key); - if (!nprobe || nprobe->display != dpy->NativeDisplay) { + if (!nprobe || nprobe->display != dpy->PlatformDisplay) { if (nprobe) nprobe->destroy(nprobe); - nprobe = native_create_probe(dpy->NativeDisplay); + nprobe = native_create_probe(dpy->PlatformDisplay); _eglSetProbeCache(gdrv->probe_key, (void *) nprobe); } @@ -96,7 +96,7 @@ egl_g3d_destroy_probe(_EGLDriver *drv, _EGLDisplay *dpy) struct native_probe *nprobe; nprobe = (struct native_probe *) _eglGetProbeCache(gdrv->probe_key); - if (nprobe && (!dpy || nprobe->display == dpy->NativeDisplay)) { + if (nprobe && (!dpy || nprobe->display == dpy->PlatformDisplay)) { nprobe->destroy(nprobe); _eglSetProbeCache(gdrv->probe_key, NULL); } @@ -479,7 +479,7 @@ egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy, } dpy->DriverData = gdpy; - gdpy->native = native_create_display(dpy->NativeDisplay, + gdpy->native = native_create_display(dpy->PlatformDisplay, &egl_g3d_native_event_handler); if (!gdpy->native) { _eglError(EGL_NOT_INITIALIZED, "eglInitialize(no usable display)"); diff --git a/src/gallium/state_trackers/egl/common/native.h b/src/gallium/state_trackers/egl/common/native.h index 3f60348c489..494becb61f2 100644 --- a/src/gallium/state_trackers/egl/common/native.h +++ b/src/gallium/state_trackers/egl/common/native.h @@ -211,7 +211,6 @@ const char * native_get_name(void); struct native_display * -native_create_display(EGLNativeDisplayType dpy, - struct native_event_handler *handler); +native_create_display(void *dpy, struct native_event_handler *handler); #endif /* _NATIVE_H_ */ diff --git a/src/gallium/state_trackers/egl/common/native_probe.h b/src/gallium/state_trackers/egl/common/native_probe.h index aeed9f85dd5..539c4aa70d2 100644 --- a/src/gallium/state_trackers/egl/common/native_probe.h +++ b/src/gallium/state_trackers/egl/common/native_probe.h @@ -43,7 +43,7 @@ enum native_probe_result { */ struct native_probe { int magic; - EGLNativeDisplayType display; + void *display; void *data; void (*destroy)(struct native_probe *nprobe); @@ -57,7 +57,7 @@ struct native_probe { * same display. */ struct native_probe * -native_create_probe(EGLNativeDisplayType dpy); +native_create_probe(void *dpy); /** * Probe the probe object. diff --git a/src/gallium/state_trackers/egl/fbdev/native_fbdev.c b/src/gallium/state_trackers/egl/fbdev/native_fbdev.c new file mode 100644 index 00000000000..399c1251ef6 --- /dev/null +++ b/src/gallium/state_trackers/egl/fbdev/native_fbdev.c @@ -0,0 +1,469 @@ +/* + * Mesa 3-D graphics library + * Version: 7.9 + * + * Copyright (C) 2010 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <[email protected]> + */ + +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <linux/fb.h> + +#include "pipe/p_screen.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_pointer.h" + +#include "common/native.h" +#include "common/native_helper.h" +#include "fbdev/fbdev_sw_winsys.h" + +struct fbdev_display { + struct native_display base; + + int fd; + struct native_event_handler *event_handler; + + struct fb_fix_screeninfo finfo; + struct fb_var_screeninfo vinfo; + + struct native_config config; + struct native_connector connector; + struct native_mode mode; + + struct fbdev_surface *current_surface; +}; + +struct fbdev_surface { + struct native_surface base; + + struct fbdev_display *fbdpy; + struct resource_surface *rsurf; + int width, height; + + unsigned int sequence_number; + + boolean is_current; +}; + +static INLINE struct fbdev_display * +fbdev_display(const struct native_display *ndpy) +{ + return (struct fbdev_display *) ndpy; +} + +static INLINE struct fbdev_surface * +fbdev_surface(const struct native_surface *nsurf) +{ + return (struct fbdev_surface *) nsurf; +} + +static boolean +fbdev_surface_validate(struct native_surface *nsurf, uint attachment_mask, + unsigned int *seq_num, struct pipe_resource **textures, + int *width, int *height) +{ + struct fbdev_surface *fbsurf = fbdev_surface(nsurf); + + if (!resource_surface_add_resources(fbsurf->rsurf, attachment_mask)) + return FALSE; + if (textures) + resource_surface_get_resources(fbsurf->rsurf, textures, attachment_mask); + + if (seq_num) + *seq_num = fbsurf->sequence_number; + if (width) + *width = fbsurf->width; + if (height) + *height = fbsurf->height; + + return TRUE; +} + +static boolean +fbdev_surface_flush_frontbuffer(struct native_surface *nsurf) +{ + struct fbdev_surface *fbsurf = fbdev_surface(nsurf); + + if (!fbsurf->is_current) + return TRUE; + + return resource_surface_present(fbsurf->rsurf, + NATIVE_ATTACHMENT_FRONT_LEFT, NULL); +} + +static boolean +fbdev_surface_swap_buffers(struct native_surface *nsurf) +{ + struct fbdev_surface *fbsurf = fbdev_surface(nsurf); + struct fbdev_display *fbdpy = fbsurf->fbdpy; + boolean ret = TRUE; + + if (fbsurf->is_current) { + ret = resource_surface_present(fbsurf->rsurf, + NATIVE_ATTACHMENT_BACK_LEFT, NULL); + } + + resource_surface_swap_buffers(fbsurf->rsurf, + NATIVE_ATTACHMENT_FRONT_LEFT, NATIVE_ATTACHMENT_BACK_LEFT, TRUE); + /* the front/back textures are swapped */ + fbsurf->sequence_number++; + fbdpy->event_handler->invalid_surface(&fbdpy->base, + &fbsurf->base, fbsurf->sequence_number); + + return ret; +} + +static void +fbdev_surface_wait(struct native_surface *nsurf) +{ + /* no-op */ +} + +static void +fbdev_surface_destroy(struct native_surface *nsurf) +{ + struct fbdev_surface *fbsurf = fbdev_surface(nsurf); + + resource_surface_destroy(fbsurf->rsurf); + FREE(fbsurf); +} + +static struct native_surface * +fbdev_display_create_scanout_surface(struct native_display *ndpy, + const struct native_config *nconf, + uint width, uint height) +{ + struct fbdev_display *fbdpy = fbdev_display(ndpy); + struct fbdev_surface *fbsurf; + + fbsurf = CALLOC_STRUCT(fbdev_surface); + if (!fbsurf) + return NULL; + + fbsurf->fbdpy = fbdpy; + fbsurf->width = width; + fbsurf->height = height; + + fbsurf->rsurf = resource_surface_create(fbdpy->base.screen, + nconf->color_format, + PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT); + if (!fbsurf->rsurf) { + FREE(fbsurf); + return NULL; + } + + resource_surface_set_size(fbsurf->rsurf, fbsurf->width, fbsurf->height); + + fbsurf->base.destroy = fbdev_surface_destroy; + fbsurf->base.swap_buffers = fbdev_surface_swap_buffers; + fbsurf->base.flush_frontbuffer = fbdev_surface_flush_frontbuffer; + fbsurf->base.validate = fbdev_surface_validate; + fbsurf->base.wait = fbdev_surface_wait; + + return &fbsurf->base; +} + +static boolean +fbdev_display_program(struct native_display *ndpy, int crtc_idx, + struct native_surface *nsurf, uint x, uint y, + const struct native_connector **nconns, int num_nconns, + const struct native_mode *nmode) +{ + struct fbdev_display *fbdpy = fbdev_display(ndpy); + struct fbdev_surface *fbsurf = fbdev_surface(nsurf); + + if (x || y) + return FALSE; + + if (fbdpy->current_surface) { + if (fbdpy->current_surface == fbsurf) + return TRUE; + fbdpy->current_surface->is_current = FALSE; + } + + if (fbsurf) + fbsurf->is_current = TRUE; + fbdpy->current_surface = fbsurf; + + return TRUE; +} + +static const struct native_mode ** +fbdev_display_get_modes(struct native_display *ndpy, + const struct native_connector *nconn, + int *num_modes) +{ + struct fbdev_display *fbdpy = fbdev_display(ndpy); + const struct native_mode **modes; + + modes = MALLOC(sizeof(*modes)); + if (modes) { + modes[0] = &fbdpy->mode; + if (num_modes) + *num_modes = 1; + } + + return modes; +} + +static const struct native_connector ** +fbdev_display_get_connectors(struct native_display *ndpy, int *num_connectors, + int *num_crtc) +{ + struct fbdev_display *fbdpy = fbdev_display(ndpy); + const struct native_connector **connectors; + + connectors = MALLOC(sizeof(*connectors)); + if (connectors) { + connectors[0] = &fbdpy->connector; + if (num_connectors) + *num_connectors = 1; + } + + return connectors; +} + +static struct native_display_modeset fbdev_display_modeset = { + .get_connectors = fbdev_display_get_connectors, + .get_modes = fbdev_display_get_modes, + .create_scanout_surface = fbdev_display_create_scanout_surface, + .program = fbdev_display_program +}; + +static const struct native_config ** +fbdev_display_get_configs(struct native_display *ndpy, int *num_configs) +{ + struct fbdev_display *fbdpy = fbdev_display(ndpy); + const struct native_config **configs; + + configs = MALLOC(sizeof(*configs)); + if (configs) { + configs[0] = &fbdpy->config; + if (num_configs) + *num_configs = 1; + } + + return configs; +} + +static int +fbdev_display_get_param(struct native_display *ndpy, + enum native_param_type param) +{ + int val; + + switch (param) { + default: + val = 0; + break; + } + + return val; +} + +static void +fbdev_display_destroy(struct native_display *ndpy) +{ + struct fbdev_display *fbdpy = fbdev_display(ndpy); + + fbdpy->base.screen->destroy(fbdpy->base.screen); + close(fbdpy->fd); + FREE(fbdpy); +} + +static boolean +fbdev_display_init_modes(struct native_display *ndpy) +{ + struct fbdev_display *fbdpy = fbdev_display(ndpy); + struct native_mode *nmode = &fbdpy->mode; + + nmode->desc = "Current Mode"; + nmode->width = fbdpy->vinfo.xres; + nmode->height = fbdpy->vinfo.yres; + nmode->refresh_rate = 60 * 1000; /* dummy */ + + return TRUE; +} + +static boolean +fbdev_display_init_connectors(struct native_display *ndpy) +{ + return TRUE; +} + +static enum pipe_format +vinfo_to_format(const struct fb_var_screeninfo *vinfo) +{ + enum pipe_format format = PIPE_FORMAT_NONE; + + switch (vinfo->bits_per_pixel) { + case 32: + if (vinfo->red.length == 8 && + vinfo->green.length == 8 && + vinfo->blue.length == 8) { + format = (vinfo->transp.length == 8) ? + PIPE_FORMAT_B8G8R8A8_UNORM : PIPE_FORMAT_B8G8R8X8_UNORM; + } + break; + case 16: + if (vinfo->red.length == 5 && + vinfo->green.length == 6 && + vinfo->blue.length == 5 && + vinfo->transp.length == 0) + format = PIPE_FORMAT_B5G6R5_UNORM; + break; + default: + break; + } + + return format; +} + +static boolean +fbdev_display_init_configs(struct native_display *ndpy) +{ + struct fbdev_display *fbdpy = fbdev_display(ndpy); + struct native_config *nconf = &fbdpy->config; + + nconf->color_format = vinfo_to_format(&fbdpy->vinfo); + if (nconf->color_format == PIPE_FORMAT_NONE) + return FALSE; + + nconf->buffer_mask = + (1 << NATIVE_ATTACHMENT_FRONT_LEFT) | + (1 << NATIVE_ATTACHMENT_BACK_LEFT); + + nconf->scanout_bit = TRUE; + + return TRUE; +} + +static boolean +fbdev_display_init(struct native_display *ndpy) +{ + struct fbdev_display *fbdpy = fbdev_display(ndpy); + struct sw_winsys *ws; + + if (ioctl(fbdpy->fd, FBIOGET_FSCREENINFO, &fbdpy->finfo)) + return FALSE; + + if (ioctl(fbdpy->fd, FBIOGET_VSCREENINFO, &fbdpy->vinfo)) + return FALSE; + + if (fbdpy->finfo.visual != FB_VISUAL_TRUECOLOR || + fbdpy->finfo.type != FB_TYPE_PACKED_PIXELS) + return FALSE; + + if (!fbdev_display_init_configs(&fbdpy->base) || + !fbdev_display_init_connectors(&fbdpy->base) || + !fbdev_display_init_modes(&fbdpy->base)) + return FALSE; + + ws = fbdev_create_sw_winsys(fbdpy->fd, fbdpy->config.color_format); + if (ws) + fbdpy->base.screen = native_create_sw_screen(ws); + + if (fbdpy->base.screen) { + if (!fbdpy->base.screen->is_format_supported(fbdpy->base.screen, + fbdpy->config.color_format, PIPE_TEXTURE_2D, 0, + PIPE_BIND_RENDER_TARGET, 0)) { + fbdpy->base.screen->destroy(fbdpy->base.screen); + fbdpy->base.screen = NULL; + } + } + + return (fbdpy->base.screen != NULL); +} + +static struct native_display * +fbdev_display_create(int fd, struct native_event_handler *event_handler) +{ + struct fbdev_display *fbdpy; + + fbdpy = CALLOC_STRUCT(fbdev_display); + if (!fbdpy) + return NULL; + + fbdpy->fd = fd; + fbdpy->event_handler = event_handler; + + if (!fbdev_display_init(&fbdpy->base)) { + FREE(fbdpy); + return NULL; + } + + fbdpy->base.destroy = fbdev_display_destroy; + fbdpy->base.get_param = fbdev_display_get_param; + fbdpy->base.get_configs = fbdev_display_get_configs; + + fbdpy->base.modeset = &fbdev_display_modeset; + + return &fbdpy->base; +} + +struct native_probe * +native_create_probe(void *dpy) +{ + return NULL; +} + +enum native_probe_result +native_get_probe_result(struct native_probe *nprobe) +{ + return NATIVE_PROBE_UNKNOWN; +} + +const char * +native_get_name(void) +{ + return "FBDEV"; +} + +struct native_display * +native_create_display(void *dpy, struct native_event_handler *event_handler) +{ + struct native_display *ndpy; + int fd; + + /* well, this makes fd 0 being ignored */ + if (!dpy) { + fd = open("/dev/fb0", O_RDWR); + } + else { + fd = dup((int) pointer_to_intptr(dpy)); + } + if (fd < 0) + return NULL; + + ndpy = fbdev_display_create(fd, event_handler); + if (!ndpy) + close(fd); + + return ndpy; +} diff --git a/src/gallium/state_trackers/egl/gdi/native_gdi.c b/src/gallium/state_trackers/egl/gdi/native_gdi.c index 1791d198d50..56f190de002 100644 --- a/src/gallium/state_trackers/egl/gdi/native_gdi.c +++ b/src/gallium/state_trackers/egl/gdi/native_gdi.c @@ -367,7 +367,7 @@ gdi_create_display(HDC hDC, struct pipe_screen *screen, } struct native_probe * -native_create_probe(EGLNativeDisplayType dpy) +native_create_probe(void *dpy) { return NULL; } @@ -385,8 +385,7 @@ native_get_name(void) } struct native_display * -native_create_display(EGLNativeDisplayType dpy, - struct native_event_handler *event_handler) +native_create_display(void *dpy, struct native_event_handler *event_handler) { struct sw_winsys *winsys; struct pipe_screen *screen; diff --git a/src/gallium/state_trackers/egl/kms/native_kms.c b/src/gallium/state_trackers/egl/kms/native_kms.c index bfb4a9d2588..f90b8714c99 100644 --- a/src/gallium/state_trackers/egl/kms/native_kms.c +++ b/src/gallium/state_trackers/egl/kms/native_kms.c @@ -779,7 +779,7 @@ kms_create_display(int fd, struct native_event_handler *event_handler, } struct native_probe * -native_create_probe(EGLNativeDisplayType dpy) +native_create_probe(void *dpy) { return NULL; } @@ -810,8 +810,7 @@ native_get_name(void) } struct native_display * -native_create_display(EGLNativeDisplayType dpy, - struct native_event_handler *event_handler) +native_create_display(void *dpy, struct native_event_handler *event_handler) { struct native_display *ndpy = NULL; int fd; diff --git a/src/gallium/state_trackers/egl/x11/native_dri2.c b/src/gallium/state_trackers/egl/x11/native_dri2.c index 3f802dd713f..e90c33b824d 100644 --- a/src/gallium/state_trackers/egl/x11/native_dri2.c +++ b/src/gallium/state_trackers/egl/x11/native_dri2.c @@ -741,7 +741,7 @@ dri2_display_hash_table_compare(void *key1, void *key2) } struct native_display * -x11_create_dri2_display(EGLNativeDisplayType dpy, +x11_create_dri2_display(Display *dpy, struct native_event_handler *event_handler, struct drm_api *api) { diff --git a/src/gallium/state_trackers/egl/x11/native_x11.c b/src/gallium/state_trackers/egl/x11/native_x11.c index b6d51bbf9fb..bfa12b26a77 100644 --- a/src/gallium/state_trackers/egl/x11/native_x11.c +++ b/src/gallium/state_trackers/egl/x11/native_x11.c @@ -46,7 +46,7 @@ x11_probe_destroy(struct native_probe *nprobe) } struct native_probe * -native_create_probe(EGLNativeDisplayType dpy) +native_create_probe(void *dpy) { struct native_probe *nprobe; struct x11_screen *xscr; @@ -127,8 +127,7 @@ native_get_name(void) } struct native_display * -native_create_display(EGLNativeDisplayType dpy, - struct native_event_handler *event_handler) +native_create_display(void *dpy, struct native_event_handler *event_handler) { struct native_display *ndpy = NULL; boolean force_sw; @@ -138,14 +137,14 @@ native_create_display(EGLNativeDisplayType dpy, force_sw = debug_get_bool_option("EGL_SOFTWARE", FALSE); if (api && !force_sw) { - ndpy = x11_create_dri2_display(dpy, event_handler, api); + ndpy = x11_create_dri2_display((Display *) dpy, event_handler, api); } if (!ndpy) { EGLint level = (force_sw) ? _EGL_INFO : _EGL_WARNING; _eglLog(level, "use software fallback"); - ndpy = x11_create_ximage_display(dpy, event_handler); + ndpy = x11_create_ximage_display((Display *) dpy, event_handler); } return ndpy; diff --git a/src/gallium/state_trackers/egl/x11/native_x11.h b/src/gallium/state_trackers/egl/x11/native_x11.h index 1678403b459..f1fea7f3de4 100644 --- a/src/gallium/state_trackers/egl/x11/native_x11.h +++ b/src/gallium/state_trackers/egl/x11/native_x11.h @@ -30,11 +30,11 @@ #include "common/native.h" struct native_display * -x11_create_ximage_display(EGLNativeDisplayType dpy, +x11_create_ximage_display(Display *dpy, struct native_event_handler *event_handler); struct native_display * -x11_create_dri2_display(EGLNativeDisplayType dpy, +x11_create_dri2_display(Display *dpy, struct native_event_handler *event_handler, struct drm_api *api); diff --git a/src/gallium/state_trackers/egl/x11/native_ximage.c b/src/gallium/state_trackers/egl/x11/native_ximage.c index 45679fc9b4e..ee10a04cfb2 100644 --- a/src/gallium/state_trackers/egl/x11/native_ximage.c +++ b/src/gallium/state_trackers/egl/x11/native_ximage.c @@ -441,7 +441,7 @@ ximage_display_destroy(struct native_display *ndpy) } struct native_display * -x11_create_ximage_display(EGLNativeDisplayType dpy, +x11_create_ximage_display(Display *dpy, struct native_event_handler *event_handler) { struct ximage_display *xdpy; diff --git a/src/gallium/targets/Makefile.egl b/src/gallium/targets/Makefile.egl index 4fa13e85ce4..315856014b7 100644 --- a/src/gallium/targets/Makefile.egl +++ b/src/gallium/targets/Makefile.egl @@ -24,15 +24,26 @@ x11_ST = $(TOP)/src/gallium/state_trackers/egl/libeglx11.a \ x11_LIBS = $(common_LIBS) -lX11 -lXext -lXfixes +kms_ST = $(TOP)/src/gallium/state_trackers/egl/libeglkms.a +kms_LIBS = $(common_LIBS) + +fbdev_ST = \ + $(TOP)/src/gallium/state_trackers/egl/libeglfbdev.a \ + $(TOP)/src/gallium/winsys/sw/fbdev/libfbdev.a \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/identity/libidentity.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/rbug/librbug.a +fbdev_LIBS = $(common_LIBS) + ifeq ($(MESA_LLVM),1) x11_ST += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a x11_LIBS += $(LLVM_LIBS) +fbdev_ST += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a +fbdev_LIBS += $(LLVM_LIBS) LDFLAGS += $(LLVM_LDFLAGS) endif -kms_ST = $(TOP)/src/gallium/state_trackers/egl/libeglkms.a -kms_LIBS = $(common_LIBS) - ### Include directories INCLUDES = \ -I$(TOP)/include \ @@ -51,13 +62,19 @@ INCLUDES = \ ##### TARGETS ##### -EGL_DISPLAY_DRIVERS = $(foreach dpy, $(EGL_DISPLAYS), egl_$(dpy)_$(EGL_DRIVER_NAME).so) +ifeq ($(EGL_DRIVER_NAME),swrast) +EGL_PLATFORMS := $(filter-out kms, $(EGL_PLATFORMS)) +else +EGL_PLATFORMS := $(filter-out fbdev, $(EGL_PLATFORMS)) +endif -EGL_DISPLAY_LIBS = $(foreach drv, $(EGL_DISPLAY_DRIVERS), $(TOP)/$(LIB_DIR)/egl/$(drv)) +EGL_PLATFORM_DRIVERS = $(foreach plat, $(EGL_PLATFORMS), egl_$(plat)_$(EGL_DRIVER_NAME).so) -default: $(EGL_DISPLAY_LIBS) +EGL_PLATFORM_LIBS = $(foreach drv, $(EGL_PLATFORM_DRIVERS), $(TOP)/$(LIB_DIR)/egl/$(drv)) -$(EGL_DISPLAY_LIBS): $(TOP)/$(LIB_DIR)/egl/%.so: %.so +default: $(EGL_PLATFORM_LIBS) + +$(EGL_PLATFORM_LIBS): $(TOP)/$(LIB_DIR)/egl/%.so: %.so @$(INSTALL) -d $(TOP)/$(LIB_DIR)/egl $(INSTALL) $< $(TOP)/$(LIB_DIR)/egl @@ -75,13 +92,16 @@ egl_x11_$(EGL_DRIVER_NAME).so: $(EGL_DRIVER_OBJECTS) $(x11_ST) $(EGL_DRIVER_PIPE egl_kms_$(EGL_DRIVER_NAME).so: $(EGL_DRIVER_OBJECTS) $(kms_ST) $(EGL_DRIVER_PIPES) $(GALLIUM_AUXILIARIES) Makefile $(call mklib-egl,kms) +egl_fbdev_$(EGL_DRIVER_NAME).so: $(EGL_DRIVER_OBJECTS) $(fbdev_ST) $(EGL_DRIVER_PIPES) $(GALLIUM_AUXILIARIES) Makefile + $(call mklib-egl,fbdev) + clean: -rm -f $(EGL_DRIVER_OBJECTS) - -rm -f $(EGL_DISPLAY_DRIVERS) + -rm -f $(EGL_PLATFORM_DRIVERS) -install: $(EGL_DISPLAY_LIBS) +install: $(EGL_PLATFORM_LIBS) $(INSTALL) -d $(DESTDIR)$(EGL_DRIVER_INSTALL_DIR) - for lib in $(EGL_DISPLAY_LIBS); do \ + for lib in $(EGL_PLATFORM_LIBS); do \ $(MINSTALL) -m 755 "$$lib" $(DESTDIR)$(EGL_DRIVER_INSTALL_DIR); \ done diff --git a/src/gallium/targets/dri-radeong/Makefile b/src/gallium/targets/dri-radeong/Makefile index 8ef24c08215..8ba1972ffa4 100644 --- a/src/gallium/targets/dri-radeong/Makefile +++ b/src/gallium/targets/dri-radeong/Makefile @@ -7,6 +7,7 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/state_trackers/dri/drm/libdridrm.a \ $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/drivers/rbug/librbug.a \ $(TOP)/src/gallium/drivers/r300/libr300.a diff --git a/src/gallium/targets/egl-radeon/Makefile b/src/gallium/targets/egl-radeon/Makefile index 8fcca268265..64c20afc2be 100644 --- a/src/gallium/targets/egl-radeon/Makefile +++ b/src/gallium/targets/egl-radeon/Makefile @@ -7,6 +7,7 @@ EGL_DRIVER_LIBS = -ldrm_radeon EGL_DRIVER_PIPES = \ $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ + $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/drivers/rbug/librbug.a \ $(TOP)/src/gallium/drivers/r300/libr300.a diff --git a/src/gallium/targets/xorg-radeon/Makefile b/src/gallium/targets/xorg-radeon/Makefile index a4951c4bba0..6cbc61e7aec 100644 --- a/src/gallium/targets/xorg-radeon/Makefile +++ b/src/gallium/targets/xorg-radeon/Makefile @@ -13,10 +13,11 @@ DRIVER_LINKS = \ $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/r300/libr300.a \ + $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/drivers/rbug/librbug.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(GALLIUM_AUXILIARIES) \ - $(shell pkg-config --libs libdrm libdrm_intel) + $(shell pkg-config --libs libdrm libdrm_radeon) include ../Makefile.xorg diff --git a/src/gallium/tests/graw/fs-test.c b/src/gallium/tests/graw/fs-test.c index 3389efb7ca8..dea087357d6 100644 --- a/src/gallium/tests/graw/fs-test.c +++ b/src/gallium/tests/graw/fs-test.c @@ -42,7 +42,8 @@ static const int HEIGHT = 250; static struct pipe_screen *screen = NULL; static struct pipe_context *ctx = NULL; static struct pipe_resource *rttex = NULL; -static struct pipe_resource *constbuf = NULL; +static struct pipe_resource *constbuf1 = NULL; +static struct pipe_resource *constbuf2 = NULL; static struct pipe_surface *surf = NULL; static struct pipe_sampler_view *sv = NULL; static void *sampler = NULL; @@ -73,7 +74,7 @@ static struct vertex vertices[] = { -1, 0, 0, 1 } }, }; -static float constants[] = +static float constants1[] = { 0.4, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, @@ -81,7 +82,25 @@ static float constants[] = 3, 0, 0, 0, 0, .5, 0, 0, - 0, 0, 1, 0, + 1, 0, 0, 1, + 0, 0, 0, 1, + + 1, 0, 0, 0.5, + 0, 1, 0, 0.5, + 0, 0, 1, 0, + 0, 0, 0, 1, +}; + + +static float constants2[] = +{ 1, 0, 0, 1, + 0, 1, 0, 1, + 0, 0, 1, 1, + 0, 0, 0, 0, + + 1, 1, 0, 1, + 1, .5, 0, 1, + 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0.5, @@ -97,34 +116,58 @@ static void init_fs_constbuf( void ) templat.target = PIPE_BUFFER; templat.format = PIPE_FORMAT_R8_UNORM; - templat.width0 = sizeof(constants); + templat.width0 = sizeof(constants1); templat.height0 = 1; templat.depth0 = 1; templat.last_level = 0; templat.nr_samples = 1; templat.bind = PIPE_BIND_CONSTANT_BUFFER; - constbuf = screen->resource_create(screen, - &templat); - if (constbuf == NULL) + constbuf1 = screen->resource_create(screen, + &templat); + if (constbuf1 == NULL) exit(4); + constbuf2 = screen->resource_create(screen, + &templat); + if (constbuf2 == NULL) + exit(4); - u_box_2d(0,0,sizeof(constants),1, &box); - ctx->transfer_inline_write(ctx, - constbuf, - u_subresource(0,0), - PIPE_TRANSFER_WRITE, - &box, - constants, - sizeof constants, - sizeof constants); + { + u_box_2d(0,0,sizeof(constants1),1, &box); + + ctx->transfer_inline_write(ctx, + constbuf1, + u_subresource(0,0), + PIPE_TRANSFER_WRITE, + &box, + constants1, + sizeof constants1, + sizeof constants1); - ctx->set_constant_buffer(ctx, - PIPE_SHADER_FRAGMENT, 0, - constbuf); + ctx->set_constant_buffer(ctx, + PIPE_SHADER_FRAGMENT, 0, + constbuf1); + } + { + u_box_2d(0,0,sizeof(constants2),1, &box); + + ctx->transfer_inline_write(ctx, + constbuf2, + u_subresource(0,0), + PIPE_TRANSFER_WRITE, + &box, + constants2, + sizeof constants2, + sizeof constants2); + + + ctx->set_constant_buffer(ctx, + PIPE_SHADER_FRAGMENT, 1, + constbuf2); + } } diff --git a/src/gallium/tests/graw/geometry-shader/mov-cb-2d.txt b/src/gallium/tests/graw/geometry-shader/mov-cb-2d.txt new file mode 100644 index 00000000000..058acfbcb5a --- /dev/null +++ b/src/gallium/tests/graw/geometry-shader/mov-cb-2d.txt @@ -0,0 +1,24 @@ +GEOM +PROPERTY GS_INPUT_PRIMITIVE TRIANGLES +PROPERTY GS_OUTPUT_PRIMITIVE TRIANGLE_STRIP +DCL IN[][0], POSITION, CONSTANT +DCL IN[][1], COLOR, CONSTANT +DCL OUT[0], POSITION, CONSTANT +DCL OUT[1], COLOR, CONSTANT +DCL CONST[1][0..6] + +MOV OUT[0], IN[0][0] +MOV OUT[1], CONST[1][0] +EMIT + +MOV OUT[0], IN[1][0] +MOV OUT[1], CONST[1][1] +EMIT + +MOV OUT[0], IN[2][0] +MOV OUT[1], CONST[1][4] +EMIT + +ENDPRIM + +END diff --git a/src/gallium/tests/graw/geometry-shader/mov.txt b/src/gallium/tests/graw/geometry-shader/mov.txt index c37051de8a7..97150a5da45 100644 --- a/src/gallium/tests/graw/geometry-shader/mov.txt +++ b/src/gallium/tests/graw/geometry-shader/mov.txt @@ -11,7 +11,7 @@ MOV OUT[1], IN[0][1] EMIT MOV OUT[0], IN[1][0] -MOV OUT[1], IN[0][1] +MOV OUT[1], IN[1][1] EMIT MOV OUT[0], IN[2][0] diff --git a/src/gallium/tests/graw/gs-test.c b/src/gallium/tests/graw/gs-test.c index e8c82baaaff..3087d446fca 100644 --- a/src/gallium/tests/graw/gs-test.c +++ b/src/gallium/tests/graw/gs-test.c @@ -44,7 +44,8 @@ static const int HEIGHT = 250; static struct pipe_screen *screen = NULL; static struct pipe_context *ctx = NULL; static struct pipe_resource *rttex = NULL; -static struct pipe_resource *constbuf = NULL; +static struct pipe_resource *constbuf1 = NULL; +static struct pipe_resource *constbuf2 = NULL; static struct pipe_surface *surf = NULL; static struct pipe_sampler_view *sv = NULL; static void *sampler = NULL; @@ -55,6 +56,7 @@ struct vertex { float position[4]; float color[4]; float texcoord[4]; + float generic[4]; }; /* Vertex data matches progs/fp/fp-tri.c, but flipped in Y dimension @@ -64,37 +66,51 @@ static struct vertex vertices[] = { { { 0.9, 0.9, 0.0, 1.0 }, { 0, 0, 1, 1 }, - { 1, 1, 0, 1 } }, + { 1, 1, 0, 1 }, + { 1, 0, 1, 0 } + }, { { 0.9, -0.9, 0.0, 1.0 }, { 1, 0, 0, 1 }, - { 1, -1, 0, 1 } }, + { 1, -1, 0, 1 }, + { 0, 1, 0, 1 } + }, { {-0.9, 0.0, 0.0, 1.0 }, { 0, 1, 0, 1 }, - { -1, 0, 0, 1 } }, + { -1, 0, 0, 1 }, + { 0, 0, 1, 1 } + }, }; static struct vertex vertices_strip[] = { { { 0.9, 0.9, 0.0, 1.0 }, { 0, 0, 1, 1 }, - { 1, 1, 0, 1 } }, + { 1, 1, 0, 1 }, + { 1, 0, 0, 1 } + }, { { 0.9, -0.9, 0.0, 1.0 }, { 1, 0, 0, 1 }, - { 1, -1, 0, 1 } }, + { 1, -1, 0, 1 }, + { 0, 1, 0, 1 } + }, - { {-0.9, -0.9, 0.0, 1.0 }, + { {-0.9, 0.9, 0.0, 1.0 }, { 0, 1, 0, 1 }, - { -1, -1, 0, 1 } }, + { -1, 1, 0, 1 }, + { 0, 0, 1, 1 } + }, - { {-0.9, 0.9, 0.0, 1.0 }, + { {-0.9, -0.9, 0.0, 1.0 }, { 1, 1, 0, 1 }, - { -1, 1, 0, 1 } }, + { -1, -1, 0, 1 }, + { 1, 1, 0, 1 } + }, }; -static float constants[] = +static float constants1[] = { 0.4, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, @@ -111,6 +127,25 @@ static float constants[] = 0, 0, 0, 1, }; + +static float constants2[] = +{ 1, 0, 0, 1, + 0, 1, 0, 1, + 0, 0, 1, 1, + 0, 0, 0, 1, + + 1, 1, 0, 1, + 1, .5, 0, 1, + 0, 1, 1, 1, + 1, 0, 1, 1, + + 1, 0, 0, 0.5, + 0, 1, 0, 0.5, + 0, 0, 1, 0, + 0, 0, 0, 1, +}; + + static void init_fs_constbuf( void ) { struct pipe_resource templat; @@ -118,34 +153,54 @@ static void init_fs_constbuf( void ) templat.target = PIPE_BUFFER; templat.format = PIPE_FORMAT_R8_UNORM; - templat.width0 = sizeof(constants); + templat.width0 = sizeof(constants1); templat.height0 = 1; templat.depth0 = 1; templat.last_level = 0; templat.nr_samples = 1; templat.bind = PIPE_BIND_CONSTANT_BUFFER; - constbuf = screen->resource_create(screen, - &templat); - if (constbuf == NULL) + constbuf1 = screen->resource_create(screen, &templat); + if (constbuf1 == NULL) + exit(4); + constbuf2 = screen->resource_create(screen, &templat); + if (constbuf2 == NULL) exit(4); + { + u_box_2d(0,0,sizeof(constants1),1, &box); - u_box_2d(0,0,sizeof(constants),1, &box); + ctx->transfer_inline_write(ctx, + constbuf1, + u_subresource(0,0), + PIPE_TRANSFER_WRITE, + &box, + constants1, + sizeof constants1, + sizeof constants1); - ctx->transfer_inline_write(ctx, - constbuf, - u_subresource(0,0), - PIPE_TRANSFER_WRITE, - &box, - constants, - sizeof constants, - sizeof constants); + ctx->set_constant_buffer(ctx, + PIPE_SHADER_GEOMETRY, 0, + constbuf1); + } + { + u_box_2d(0,0,sizeof(constants2),1, &box); + + ctx->transfer_inline_write(ctx, + constbuf2, + u_subresource(0,0), + PIPE_TRANSFER_WRITE, + &box, + constants2, + sizeof constants2, + sizeof constants2); - ctx->set_constant_buffer(ctx, - PIPE_SHADER_FRAGMENT, 0, - constbuf); + + ctx->set_constant_buffer(ctx, + PIPE_SHADER_GEOMETRY, 1, + constbuf2); + } } @@ -174,7 +229,7 @@ static void set_viewport( float x, float y, static void set_vertices( void ) { - struct pipe_vertex_element ve[3]; + struct pipe_vertex_element ve[4]; struct pipe_vertex_buffer vbuf; void *handle; @@ -186,11 +241,12 @@ static void set_vertices( void ) ve[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; ve[2].src_offset = Offset(struct vertex, texcoord); ve[2].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + ve[3].src_offset = Offset(struct vertex, generic); + ve[3].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - handle = ctx->create_vertex_elements_state(ctx, 3, ve); + handle = ctx->create_vertex_elements_state(ctx, 4, ve); ctx->bind_vertex_elements_state(ctx, handle); - vbuf.stride = sizeof( struct vertex ); vbuf.buffer_offset = 0; if (draw_strip) { @@ -218,12 +274,15 @@ static void set_vertex_shader( void ) "DCL IN[0]\n" "DCL IN[1]\n" "DCL IN[2]\n" + "DCL IN[3]\n" "DCL OUT[0], POSITION\n" "DCL OUT[1], COLOR[0]\n" "DCL OUT[2], GENERIC[0]\n" + "DCL OUT[3], GENERIC[1]\n" " MOV OUT[0], IN[0]\n" " MOV OUT[1], IN[1]\n" " MOV OUT[2], IN[2]\n" + " MOV OUT[3], IN[3]\n" " END\n"; handle = graw_parse_vertex_shader(ctx, text); diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-cb-2d.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-cb-2d.sh index f70a5146f4e..bbc3a10f9b5 100644 --- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-cb-2d.sh +++ b/src/gallium/tests/python/tests/regress/fragment-shader/frag-cb-2d.sh @@ -2,8 +2,8 @@ FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR -DCL CONST[1][1..2] +DCL CONST[1][6] -MAD OUT[0], IN[0], CONST[1][2], CONST[1][1] +MOV OUT[0], CONST[1][6] END diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-mad-immx.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-mad-immx.sh new file mode 100644 index 00000000000..6b034915769 --- /dev/null +++ b/src/gallium/tests/python/tests/regress/fragment-shader/frag-mad-immx.sh @@ -0,0 +1,10 @@ +FRAG + +DCL IN[0], COLOR, LINEAR +DCL OUT[0], COLOR +DCL IMMX[0..1] {{ 0.5, 0.4, 0.6, 1.0 }, + { 0.5, 0.4, 0.6, 0.0 }} + +MAD OUT[0], IN[0], IMMX[0], IMMX[1] + +END diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-tempx.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-tempx.sh new file mode 100644 index 00000000000..81bcad2d65c --- /dev/null +++ b/src/gallium/tests/python/tests/regress/fragment-shader/frag-tempx.sh @@ -0,0 +1,14 @@ +FRAG + +DCL IN[0], COLOR, LINEAR +DCL OUT[0], COLOR + +DCL TEMPX[0][0..1] + +IMM FLT32 { -0.5, -0.4, -0.6, 0.0 } + +ADD TEMPX[0][0], IN[0], IMM[0] +ADD TEMPX[0][1], IN[0], IMM[0] +ABS OUT[0], TEMPX[0][1] + +END diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index b772ff0dd9f..803049d58c9 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -48,19 +48,22 @@ boolean r600_buffer_get_handle(struct radeon *rw, struct winsys_handle *whandle) { struct drm_gem_flink flink; - struct r600_buffer* rbuffer; - int r; + struct r600_buffer* rbuffer = (struct r600_buffer*)buf; - rbuffer = (struct r600_buffer*)buf; - if (!rbuffer->flink) { - flink.handle = rbuffer->bo->handle; - r = ioctl(rw->fd, DRM_IOCTL_GEM_FLINK, &flink); - if (r) { - return FALSE; + if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { + if (!rbuffer->flink) { + flink.handle = rbuffer->bo->handle; + + if (ioctl(rw->fd, DRM_IOCTL_GEM_FLINK, &flink)) { + return FALSE; + } + + rbuffer->flink = flink.name; } - rbuffer->flink = flink.name; + whandle->handle = rbuffer->flink; + } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) { + whandle->handle = rbuffer->bo->handle; } - whandle->handle = rbuffer->flink; return TRUE; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm.c b/src/gallium/winsys/radeon/drm/radeon_drm.c index 59f1b10230e..a9ae09cb60e 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm.c @@ -34,6 +34,8 @@ #include "radeon_buffer.h" #include "r300_winsys.h" + +#include "galahad/glhd_drm.h" #include "trace/tr_drm.h" #include "util/u_memory.h" @@ -188,5 +190,5 @@ static struct drm_api radeon_drm_api_hooks = { struct drm_api* drm_api_create() { - return trace_drm_create(&radeon_drm_api_hooks); + return galahad_drm_create(trace_drm_create(&radeon_drm_api_hooks)); } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c index ee1b9ede257..a4b6cff33d1 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c @@ -267,16 +267,14 @@ static struct radeon_drm_buffer *get_drm_buffer(struct pb_buffer *_buf) boolean radeon_drm_bufmgr_get_handle(struct pb_buffer *_buf, struct winsys_handle *whandle) { - int retval, fd; struct drm_gem_flink flink; struct radeon_drm_buffer *buf = get_drm_buffer(_buf); + if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { if (!buf->flinked) { - fd = buf->mgr->rws->fd; flink.handle = buf->bo->handle; - retval = ioctl(fd, DRM_IOCTL_GEM_FLINK, &flink); - if (retval) { + if (ioctl(buf->mgr->rws->fd, DRM_IOCTL_GEM_FLINK, &flink)) { return FALSE; } diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c index 70ae01a694b..d2d317dc209 100644 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ b/src/gallium/winsys/radeon/drm/radeon_r300.c @@ -136,29 +136,21 @@ static boolean radeon_r300_winsys_is_buffer_referenced(struct r300_winsys_screen } static struct r300_winsys_buffer *radeon_r300_winsys_buffer_from_handle(struct r300_winsys_screen *rws, - struct pipe_screen *screen, - struct winsys_handle *whandle, - unsigned *stride) + unsigned handle) { struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); struct pb_buffer *_buf; - _buf = radeon_drm_bufmgr_create_buffer_from_handle(ws->kman, whandle->handle); - *stride = whandle->stride; + _buf = radeon_drm_bufmgr_create_buffer_from_handle(ws->kman, handle); return radeon_libdrm_winsys_buffer(_buf); } static boolean radeon_r300_winsys_buffer_get_handle(struct r300_winsys_screen *rws, struct r300_winsys_buffer *buffer, - unsigned stride, - struct winsys_handle *whandle) + struct winsys_handle *whandle) { struct pb_buffer *_buf = radeon_pb_buffer(buffer); - boolean ret; - ret = radeon_drm_bufmgr_get_handle(_buf, whandle); - if (ret) - whandle->stride = stride; - return ret; + return radeon_drm_bufmgr_get_handle(_buf, whandle); } static void radeon_set_flush_cb(struct r300_winsys_screen *rws, @@ -192,25 +184,23 @@ static boolean radeon_validate(struct r300_winsys_screen *rws) return TRUE; } -static void radeon_get_cs_info(struct r300_winsys_screen *rws, - struct r300_cs_info *info) +static unsigned radeon_get_cs_free_dwords(struct r300_winsys_screen *rws) { struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); struct radeon_cs *cs = ws->cs; - info->capacity = cs->ndw; - info->used = cs->cdw; - info->free = cs->ndw - cs->cdw; + return cs->ndw - cs->cdw; } -static void radeon_begin_cs(struct r300_winsys_screen *rws, - int size, - const char* file, - const char* function, - int line) +static uint32_t *radeon_get_cs_pointer(struct r300_winsys_screen *rws, + unsigned count) { struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); - radeon_cs_begin(ws->cs, size, file, function, line); + struct radeon_cs *cs = ws->cs; + uint32_t *ptr = cs->packets + cs->cdw; + + cs->cdw += count; + return ptr; } static void radeon_write_cs_dword(struct r300_winsys_screen *rws, @@ -243,15 +233,6 @@ static void radeon_reset_bos(struct r300_winsys_screen *rws) radeon_cs_space_reset_bos(ws->cs); } -static void radeon_end_cs(struct r300_winsys_screen *rws, - const char* file, - const char* function, - int line) -{ - struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); - radeon_cs_end(ws->cs, file, function, line); -} - static void radeon_flush_cs(struct r300_winsys_screen *rws) { struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); @@ -345,12 +326,11 @@ radeon_setup_winsys(int fd, struct radeon_libdrm_winsys* ws) ws->base.add_buffer = radeon_add_buffer; ws->base.validate = radeon_validate; ws->base.destroy = radeon_winsys_destroy; - ws->base.get_cs_info = radeon_get_cs_info; - ws->base.begin_cs = radeon_begin_cs; + ws->base.get_cs_free_dwords = radeon_get_cs_free_dwords; + ws->base.get_cs_pointer = radeon_get_cs_pointer; ws->base.write_cs_dword = radeon_write_cs_dword; ws->base.write_cs_table = radeon_write_cs_table; ws->base.write_cs_reloc = radeon_write_cs_reloc; - ws->base.end_cs = radeon_end_cs; ws->base.flush_cs = radeon_flush_cs; ws->base.reset_bos = radeon_reset_bos; ws->base.set_flush_cb = radeon_set_flush_cb; diff --git a/src/gallium/winsys/sw/fbdev/Makefile b/src/gallium/winsys/sw/fbdev/Makefile new file mode 100644 index 00000000000..8832aab1934 --- /dev/null +++ b/src/gallium/winsys/sw/fbdev/Makefile @@ -0,0 +1,13 @@ +TOP = ../../../../.. +include $(TOP)/configs/current + +LIBNAME = fbdev + +LIBRARY_INCLUDES = + +LIBRARY_DEFINES = + +C_SOURCES = \ + fbdev_sw_winsys.c + +include ../../../Makefile.template diff --git a/src/gallium/winsys/sw/fbdev/SConscript b/src/gallium/winsys/sw/fbdev/SConscript new file mode 100644 index 00000000000..3b5b4ff1c07 --- /dev/null +++ b/src/gallium/winsys/sw/fbdev/SConscript @@ -0,0 +1,23 @@ +####################################################################### +# SConscript for fbdev winsys + + +Import('*') + +if env['platform'] == 'linux': + + env = env.Clone() + + env.Append(CPPPATH = [ + '#/src/gallium/include', + '#/src/gallium/auxiliary', + '#/src/gallium/drivers', + ]) + + ws_fbdev = env.ConvenienceLibrary( + target = 'ws_fbdev', + source = [ + 'fbdev_sw_winsys.c', + ] + ) + Export('ws_fbdev') diff --git a/src/gallium/winsys/sw/fbdev/fbdev_sw_winsys.c b/src/gallium/winsys/sw/fbdev/fbdev_sw_winsys.c new file mode 100644 index 00000000000..f4f4cd7969b --- /dev/null +++ b/src/gallium/winsys/sw/fbdev/fbdev_sw_winsys.c @@ -0,0 +1,224 @@ +/* + * Mesa 3-D graphics library + * Version: 7.9 + * + * Copyright (C) 2010 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <[email protected]> + */ + +#include <sys/mman.h> +#include <sys/ioctl.h> +#include <linux/fb.h> + +#include "pipe/p_compiler.h" +#include "util/u_format.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "state_tracker/sw_winsys.h" + +#include "fbdev_sw_winsys.h" + +struct fbdev_sw_displaytarget +{ + enum pipe_format format; + unsigned width; + unsigned height; + unsigned stride; + + void *data; + void *mapped; +}; + +struct fbdev_sw_winsys +{ + struct sw_winsys base; + + int fd; + enum pipe_format format; + + struct fb_fix_screeninfo finfo; + void *fbmem; + unsigned rows; + unsigned stride; +}; + +static INLINE struct fbdev_sw_displaytarget * +fbdev_sw_displaytarget(struct sw_displaytarget *dt) +{ + return (struct fbdev_sw_displaytarget *) dt; +} + +static INLINE struct fbdev_sw_winsys * +fbdev_sw_winsys(struct sw_winsys *ws) +{ + return (struct fbdev_sw_winsys *) ws; +} + +static void +fbdev_displaytarget_display(struct sw_winsys *ws, + struct sw_displaytarget *dt, + void *context_private) +{ + struct fbdev_sw_winsys *fbdev = fbdev_sw_winsys(ws); + struct fbdev_sw_displaytarget *fbdt = fbdev_sw_displaytarget(dt); + unsigned rows, len, i; + + rows = MIN2(fbdt->height, fbdev->rows); + len = util_format_get_stride(fbdt->format, fbdt->width); + len = MIN2(len, fbdev->stride); + + for (i = 0; i < rows; i++) { + void *dst = fbdev->fbmem + fbdev->stride * i; + void *src = fbdt->data + fbdt->stride * i; + + memcpy(dst, src, len); + } +} + +static void +fbdev_displaytarget_unmap(struct sw_winsys *ws, + struct sw_displaytarget *dt) +{ + struct fbdev_sw_displaytarget *fbdt = fbdev_sw_displaytarget(dt); + fbdt->mapped = NULL; +} + +static void * +fbdev_displaytarget_map(struct sw_winsys *ws, + struct sw_displaytarget *dt, + unsigned flags) +{ + struct fbdev_sw_displaytarget *fbdt = fbdev_sw_displaytarget(dt); + fbdt->mapped = fbdt->data; + return fbdt->mapped; +} + +static void +fbdev_displaytarget_destroy(struct sw_winsys *ws, + struct sw_displaytarget *dt) +{ + struct fbdev_sw_displaytarget *fbdt = fbdev_sw_displaytarget(dt); + + if (fbdt->data) + align_free(fbdt->data); + + FREE(fbdt); +} + +static struct sw_displaytarget * +fbdev_displaytarget_create(struct sw_winsys *ws, + unsigned tex_usage, + enum pipe_format format, + unsigned width, unsigned height, + unsigned alignment, + unsigned *stride) +{ + struct fbdev_sw_winsys *fbdev = fbdev_sw_winsys(ws); + struct fbdev_sw_displaytarget *fbdt; + unsigned nblocksy, size, format_stride; + + if (fbdev->format != format) + return NULL; + + fbdt = CALLOC_STRUCT(fbdev_sw_displaytarget); + if (!fbdt) + return NULL; + + fbdt->format = format; + fbdt->width = width; + fbdt->height = height; + + format_stride = util_format_get_stride(format, width); + fbdt->stride = align(format_stride, alignment); + + nblocksy = util_format_get_nblocksy(format, height); + size = fbdt->stride * nblocksy; + + fbdt->data = align_malloc(size, alignment); + if (!fbdt->data) { + FREE(fbdt); + return NULL; + } + + *stride = fbdt->stride; + + return (struct sw_displaytarget *) fbdt; +} + +static boolean +fbdev_is_displaytarget_format_supported(struct sw_winsys *ws, + unsigned tex_usage, + enum pipe_format format) +{ + struct fbdev_sw_winsys *fbdev = fbdev_sw_winsys(ws); + return (fbdev->format == format); +} + +static void +fbdev_destroy(struct sw_winsys *ws) +{ + struct fbdev_sw_winsys *fbdev = fbdev_sw_winsys(ws); + + munmap(fbdev->fbmem, fbdev->finfo.smem_len); + FREE(fbdev); +} + +struct sw_winsys * +fbdev_create_sw_winsys(int fd, enum pipe_format format) +{ + struct fbdev_sw_winsys *fbdev; + + fbdev = CALLOC_STRUCT(fbdev_sw_winsys); + if (!fbdev) + return NULL; + + fbdev->fd = fd; + fbdev->format = format; + if (ioctl(fbdev->fd, FBIOGET_FSCREENINFO, &fbdev->finfo)) { + FREE(fbdev); + return NULL; + } + + fbdev->fbmem = mmap(0, fbdev->finfo.smem_len, + PROT_WRITE, MAP_SHARED, fbdev->fd, 0); + if (fbdev->fbmem == MAP_FAILED) { + FREE(fbdev); + return NULL; + } + + fbdev->rows = fbdev->finfo.smem_len / fbdev->finfo.line_length; + fbdev->stride = fbdev->finfo.line_length; + + fbdev->base.destroy = fbdev_destroy; + fbdev->base.is_displaytarget_format_supported = + fbdev_is_displaytarget_format_supported; + + fbdev->base.displaytarget_create = fbdev_displaytarget_create; + fbdev->base.displaytarget_destroy = fbdev_displaytarget_destroy; + fbdev->base.displaytarget_map = fbdev_displaytarget_map; + fbdev->base.displaytarget_unmap = fbdev_displaytarget_unmap; + + fbdev->base.displaytarget_display = fbdev_displaytarget_display; + + return &fbdev->base; +} diff --git a/src/gallium/winsys/sw/fbdev/fbdev_sw_winsys.h b/src/gallium/winsys/sw/fbdev/fbdev_sw_winsys.h new file mode 100644 index 00000000000..d958ab9db3e --- /dev/null +++ b/src/gallium/winsys/sw/fbdev/fbdev_sw_winsys.h @@ -0,0 +1,38 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2010 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <[email protected]> + */ + +#ifndef FBDEV_SW_WINSYS +#define FBDEV_SW_WINSYS + +struct sw_winsys; +enum pipe_format; + +struct sw_winsys * +fbdev_create_sw_winsys(int fd, enum pipe_format format); + +#endif /* FBDEV_SW_WINSYS */ diff --git a/src/mapi/mapi/u_execmem.c b/src/mapi/mapi/u_execmem.c index 00df8300de3..e5072e06e9a 100644 --- a/src/mapi/mapi/u_execmem.c +++ b/src/mapi/mapi/u_execmem.c @@ -105,6 +105,8 @@ init_map(void) #else +#include <stdlib.h> + static int init_map(void) { diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 629ec0ffec5..c548e104203 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -2400,6 +2400,9 @@ _mesa_meta_GenerateMipmap(GLcontext *ctx, GLenum target, break; } + /* Set MaxLevel large enough to hold the new level when we allocate it */ + _mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, dstLevel); + /* Create empty dest image */ if (target == GL_TEXTURE_1D) { _mesa_TexImage1D(target, dstLevel, srcImage->InternalFormat, diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index a0039e800d2..831981558d8 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -61,6 +61,7 @@ DRIVER_SOURCES = \ brw_sf.c \ brw_sf_emit.c \ brw_sf_state.c \ + brw_state.c \ brw_state_batch.c \ brw_state_cache.c \ brw_state_dump.c \ diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index c9e42a1529b..cfce5d31405 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -36,7 +36,8 @@ #include "brw_util.h" #include "main/macros.h" -static void prepare_cc_vp( struct brw_context *brw ) +void +brw_update_cc_vp(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct brw_cc_viewport ccv; @@ -54,40 +55,9 @@ static void prepare_cc_vp( struct brw_context *brw ) } drm_intel_bo_unreference(brw->cc.vp_bo); - brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv), - NULL, 0); + brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv)); } -const struct brw_tracked_state brw_cc_vp = { - .dirty = { - .mesa = _NEW_VIEWPORT | _NEW_TRANSFORM, - .brw = BRW_NEW_CONTEXT, - .cache = 0 - }, - .prepare = prepare_cc_vp -}; - -struct brw_cc_unit_key { - GLboolean stencil, stencil_two_side, color_blend, alpha_enabled; - - GLenum stencil_func[2], stencil_fail_op[2]; - GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2]; - GLubyte stencil_ref[2], stencil_write_mask[2], stencil_test_mask[2]; - GLenum logic_op; - - GLenum blend_eq_rgb, blend_eq_a; - GLenum blend_src_rgb, blend_src_a; - GLenum blend_dst_rgb, blend_dst_a; - - GLenum alpha_func; - GLclampf alpha_ref; - - GLboolean dither; - - GLboolean depth_test, depth_write; - GLenum depth_func; -}; - /** * Modify blend function to force destination alpha to 1.0 * @@ -110,136 +80,83 @@ fix_xRGB_alpha(GLenum function) return function; } -static void -cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) +static void prepare_cc_unit(struct brw_context *brw) { - GLcontext *ctx = &brw->intel.ctx; - const unsigned back = ctx->Stencil._BackFace; - - memset(key, 0, sizeof(*key)); - - key->stencil = ctx->Stencil._Enabled; - key->stencil_two_side = ctx->Stencil._TestTwoSide; - - if (key->stencil) { - key->stencil_func[0] = ctx->Stencil.Function[0]; - key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0]; - key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0]; - key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0]; - key->stencil_ref[0] = ctx->Stencil.Ref[0]; - key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0]; - key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0]; - } - if (key->stencil_two_side) { - key->stencil_func[1] = ctx->Stencil.Function[back]; - key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back]; - key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back]; - key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back]; - key->stencil_ref[1] = ctx->Stencil.Ref[back]; - key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back]; - key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back]; - } - - if (ctx->Color._LogicOpEnabled) - key->logic_op = ctx->Color.LogicOp; - else - key->logic_op = GL_COPY; - - key->color_blend = ctx->Color.BlendEnabled; - if (key->color_blend) { - key->blend_eq_rgb = ctx->Color.BlendEquationRGB; - key->blend_eq_a = ctx->Color.BlendEquationA; - key->blend_src_rgb = ctx->Color.BlendSrcRGB; - key->blend_dst_rgb = ctx->Color.BlendDstRGB; - key->blend_src_a = ctx->Color.BlendSrcA; - key->blend_dst_a = ctx->Color.BlendDstA; - - /* If the renderbuffer is XRGB, we have to frob the blend function to - * force the destination alpha to 1.0. This means replacing GL_DST_ALPHA - * with GL_ONE and GL_ONE_MINUS_DST_ALPHA with GL_ZERO. - */ - if (ctx->DrawBuffer->Visual.alphaBits == 0) { - key->blend_src_rgb = fix_xRGB_alpha(key->blend_src_rgb); - key->blend_src_a = fix_xRGB_alpha(key->blend_src_a); - key->blend_dst_rgb = fix_xRGB_alpha(key->blend_dst_rgb); - key->blend_dst_a = fix_xRGB_alpha(key->blend_dst_a); - } - } - - key->alpha_enabled = ctx->Color.AlphaEnabled; - if (key->alpha_enabled) { - key->alpha_func = ctx->Color.AlphaFunc; - key->alpha_ref = ctx->Color.AlphaRef; - } - - key->dither = ctx->Color.DitherFlag; - - key->depth_test = ctx->Depth.Test; - if (key->depth_test) { - key->depth_func = ctx->Depth.Func; - key->depth_write = ctx->Depth.Mask; - } + brw_add_validated_bo(brw, brw->cc.vp_bo); } /** * Creates the state cache entry for the given CC unit key. */ -static drm_intel_bo * -cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) +static void upload_cc_unit(struct brw_context *brw) { + GLcontext *ctx = &brw->intel.ctx; struct brw_cc_unit_state cc; - drm_intel_bo *bo; + void *map; memset(&cc, 0, sizeof(cc)); /* _NEW_STENCIL */ - if (key->stencil) { + if (ctx->Stencil._Enabled) { + const unsigned back = ctx->Stencil._BackFace; + cc.cc0.stencil_enable = 1; cc.cc0.stencil_func = - intel_translate_compare_func(key->stencil_func[0]); + intel_translate_compare_func(ctx->Stencil.Function[0]); cc.cc0.stencil_fail_op = - intel_translate_stencil_op(key->stencil_fail_op[0]); + intel_translate_stencil_op(ctx->Stencil.FailFunc[0]); cc.cc0.stencil_pass_depth_fail_op = - intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]); + intel_translate_stencil_op(ctx->Stencil.ZFailFunc[0]); cc.cc0.stencil_pass_depth_pass_op = - intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]); - cc.cc1.stencil_ref = key->stencil_ref[0]; - cc.cc1.stencil_write_mask = key->stencil_write_mask[0]; - cc.cc1.stencil_test_mask = key->stencil_test_mask[0]; + intel_translate_stencil_op(ctx->Stencil.ZPassFunc[0]); + cc.cc1.stencil_ref = ctx->Stencil.Ref[0]; + cc.cc1.stencil_write_mask = ctx->Stencil.WriteMask[0]; + cc.cc1.stencil_test_mask = ctx->Stencil.ValueMask[0]; - if (key->stencil_two_side) { + if (ctx->Stencil._TestTwoSide) { cc.cc0.bf_stencil_enable = 1; cc.cc0.bf_stencil_func = - intel_translate_compare_func(key->stencil_func[1]); + intel_translate_compare_func(ctx->Stencil.Function[back]); cc.cc0.bf_stencil_fail_op = - intel_translate_stencil_op(key->stencil_fail_op[1]); + intel_translate_stencil_op(ctx->Stencil.FailFunc[back]); cc.cc0.bf_stencil_pass_depth_fail_op = - intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]); + intel_translate_stencil_op(ctx->Stencil.ZFailFunc[back]); cc.cc0.bf_stencil_pass_depth_pass_op = - intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]); - cc.cc1.bf_stencil_ref = key->stencil_ref[1]; - cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1]; - cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1]; + intel_translate_stencil_op(ctx->Stencil.ZPassFunc[back]); + cc.cc1.bf_stencil_ref = ctx->Stencil.Ref[back]; + cc.cc2.bf_stencil_write_mask = ctx->Stencil.WriteMask[back]; + cc.cc2.bf_stencil_test_mask = ctx->Stencil.ValueMask[back]; } /* Not really sure about this: */ - if (key->stencil_write_mask[0] || - (key->stencil_two_side && key->stencil_write_mask[1])) + if (ctx->Stencil.WriteMask[0] || + (ctx->Stencil._TestTwoSide && ctx->Stencil.WriteMask[back])) cc.cc0.stencil_write_enable = 1; } /* _NEW_COLOR */ - if (key->logic_op != GL_COPY) { + if (ctx->Color._LogicOpEnabled && ctx->Color.LogicOp != GL_COPY) { cc.cc2.logicop_enable = 1; - cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op); - } else if (key->color_blend) { - GLenum eqRGB = key->blend_eq_rgb; - GLenum eqA = key->blend_eq_a; - GLenum srcRGB = key->blend_src_rgb; - GLenum dstRGB = key->blend_dst_rgb; - GLenum srcA = key->blend_src_a; - GLenum dstA = key->blend_dst_a; + cc.cc5.logicop_func = intel_translate_logic_op(ctx->Color.LogicOp); + } else if (ctx->Color.BlendEnabled) { + GLenum eqRGB = ctx->Color.BlendEquationRGB; + GLenum eqA = ctx->Color.BlendEquationA; + GLenum srcRGB = ctx->Color.BlendSrcRGB; + GLenum dstRGB = ctx->Color.BlendDstRGB; + GLenum srcA = ctx->Color.BlendSrcA; + GLenum dstA = ctx->Color.BlendDstA; + + /* If the renderbuffer is XRGB, we have to frob the blend function to + * force the destination alpha to 1.0. This means replacing GL_DST_ALPHA + * with GL_ONE and GL_ONE_MINUS_DST_ALPHA with GL_ZERO. + */ + if (ctx->DrawBuffer->Visual.alphaBits == 0) { + srcRGB = fix_xRGB_alpha(srcRGB); + srcA = fix_xRGB_alpha(srcA); + dstRGB = fix_xRGB_alpha(dstRGB); + dstA = fix_xRGB_alpha(dstA); + } if (eqRGB == GL_MIN || eqRGB == GL_MAX) { srcRGB = dstRGB = GL_ONE; @@ -263,25 +180,27 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) eqA != eqRGB); } - if (key->alpha_enabled) { + if (ctx->Color.AlphaEnabled) { cc.cc3.alpha_test = 1; - cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func); + cc.cc3.alpha_test_func = + intel_translate_compare_func(ctx->Color.AlphaFunc); cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; - UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref); + UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], ctx->Color.AlphaRef); } - if (key->dither) { + if (ctx->Color.DitherFlag) { cc.cc5.dither_enable = 1; cc.cc6.y_dither_offset = 0; cc.cc6.x_dither_offset = 0; } /* _NEW_DEPTH */ - if (key->depth_test) { + if (ctx->Depth.Test) { cc.cc2.depth_test = 1; - cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func); - cc.cc2.depth_write_enable = key->depth_write; + cc.cc2.depth_test_function = + intel_translate_compare_func(ctx->Depth.Func); + cc.cc2.depth_write_enable = ctx->Depth.Mask; } /* CACHE_NEW_CC_VP */ @@ -290,43 +209,25 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) if (INTEL_DEBUG & DEBUG_STATS) cc.cc5.statistics_enable = 1; - bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT, - key, sizeof(*key), - &brw->cc.vp_bo, 1, - &cc, sizeof(cc)); + map = brw_state_batch(brw, sizeof(cc), 64, + &brw->cc.state_bo, &brw->cc.state_offset); + memcpy(map, &cc, sizeof(cc)); + brw->state.dirty.cache |= CACHE_NEW_CC_UNIT; /* Emit CC viewport relocation */ - drm_intel_bo_emit_reloc(bo, offsetof(struct brw_cc_unit_state, cc4), + drm_intel_bo_emit_reloc(brw->cc.state_bo, (brw->cc.state_offset + + offsetof(struct brw_cc_unit_state, + cc4)), brw->cc.vp_bo, 0, I915_GEM_DOMAIN_INSTRUCTION, 0); - - return bo; -} - -static void prepare_cc_unit( struct brw_context *brw ) -{ - struct brw_cc_unit_key key; - - cc_unit_populate_key(brw, &key); - - drm_intel_bo_unreference(brw->cc.state_bo); - brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_CC_UNIT, - &key, sizeof(key), - &brw->cc.vp_bo, 1, - NULL); - - if (brw->cc.state_bo == NULL) - brw->cc.state_bo = cc_unit_create_from_key(brw, &key); } const struct brw_tracked_state brw_cc_unit = { .dirty = { .mesa = _NEW_STENCIL | _NEW_COLOR | _NEW_DEPTH, - .brw = 0, + .brw = BRW_NEW_BATCH, .cache = CACHE_NEW_CC_VP }, .prepare = prepare_cc_unit, + .emit = upload_cc_unit, }; - - - diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index e688431b126..6d064b822e5 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -34,8 +34,6 @@ #include "main/api_noop.h" #include "main/macros.h" #include "main/simple_list.h" -#include "program/shader_api.h" - #include "brw_context.h" #include "brw_defines.h" #include "brw_draw.h" @@ -54,6 +52,9 @@ static void brwInitDriverFunctions( struct dd_function_table *functions ) brwInitFragProgFuncs( functions ); brw_init_queryobj_functions(functions); + + functions->Enable = brw_enable; + functions->DepthRange = brw_depth_range; } GLboolean brwCreateContext( int api, @@ -187,6 +188,11 @@ GLboolean brwCreateContext( int api, brw_draw_init( brw ); + /* Now that most driver functions are hooked up, initialize some of the + * immediate state. + */ + brw_update_cc_vp(brw); + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index d97634c1c60..cc4e6638e8b 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -131,6 +131,7 @@ struct brw_context; #define BRW_NEW_WM_INPUT_DIMENSIONS 0x100 #define BRW_NEW_PSP 0x800 #define BRW_NEW_WM_SURFACES 0x1000 +#define BRW_NEW_BINDING_TABLE 0x2000 #define BRW_NEW_INDICES 0x4000 #define BRW_NEW_VERTICES 0x8000 /** @@ -143,6 +144,8 @@ struct brw_context; #define BRW_NEW_NR_WM_SURFACES 0x40000 #define BRW_NEW_NR_VS_SURFACES 0x80000 #define BRW_NEW_INDEX_BUFFER 0x100000 +#define BRW_NEW_VS_CONSTBUF 0x200000 +#define BRW_NEW_WM_CONSTBUF 0x200000 struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -160,7 +163,6 @@ struct brw_state_flags { struct brw_vertex_program { struct gl_vertex_program program; GLuint id; - drm_intel_bo *const_buffer; /** Program constant buffer/surface */ GLboolean use_const_buffer; }; @@ -172,7 +174,6 @@ struct brw_fragment_program { GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ GLboolean use_const_buffer; - drm_intel_bo *const_buffer; /** Program constant buffer/surface */ /** for debugging, which texture units are referenced */ GLbitfield tex_units_used; @@ -301,8 +302,6 @@ enum brw_cache_id { BRW_CLIP_VP, BRW_CLIP_UNIT, BRW_CLIP_PROG, - BRW_SS_SURFACE, - BRW_SS_SURF_BIND, BRW_MAX_CACHE }; @@ -376,8 +375,6 @@ struct brw_tracked_state { #define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP) #define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT) #define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG) -#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE) -#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND) struct brw_cached_batch_item { struct header *header; @@ -460,12 +457,11 @@ struct brw_context * consisting of the vertex buffers, pipelined state pointers, * the CURBE, the depth buffer, and a query BO. */ - drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + 16]; + drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16]; int validated_bo_count; } state; - struct brw_cache cache; /** non-surface items */ - struct brw_cache surface_cache; /* surface items */ + struct brw_cache cache; struct brw_cached_batch_item *cached_batch_items; struct { @@ -594,10 +590,13 @@ struct brw_context drm_intel_bo *prog_bo; drm_intel_bo *state_bo; + drm_intel_bo *const_bo; /** Binding table of pointers to surf_bo entries */ drm_intel_bo *bind_bo; + uint32_t bind_bo_offset; drm_intel_bo *surf_bo[BRW_VS_MAX_SURF]; + uint32_t surf_offset[BRW_VS_MAX_SURF]; GLuint nr_surfaces; } vs; @@ -649,10 +648,13 @@ struct brw_context /** Binding table of pointers to surf_bo entries */ drm_intel_bo *bind_bo; + uint32_t bind_bo_offset; drm_intel_bo *surf_bo[BRW_WM_MAX_SURF]; + uint32_t surf_offset[BRW_WM_MAX_SURF]; drm_intel_bo *prog_bo; drm_intel_bo *state_bo; + drm_intel_bo *const_bo; } wm; @@ -667,6 +669,7 @@ struct brw_context drm_intel_bo *color_calc_state_bo; drm_intel_bo *state_bo; + uint32_t state_offset; } cc; struct { @@ -727,6 +730,9 @@ void brwInitFragProgFuncs( struct dd_function_table *functions ); */ void brw_upload_urb_fence(struct brw_context *brw); +/* brw_cc.c */ +void brw_update_cc_vp(struct brw_context *brw); + /* brw_curbe.c */ void brw_upload_cs_urb_state(struct brw_context *brw); @@ -734,6 +740,10 @@ void brw_upload_cs_urb_state(struct brw_context *brw); /* brw_disasm.c */ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen); +/* brw_state.c */ +void brw_enable(GLcontext * ctx, GLenum cap, GLboolean state); +void brw_depth_range(GLcontext *ctx, GLclampd nearval, GLclampd farval); + /*====================================================================== * Inline conversion functions. These are better-typed than the * macros used previously: diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 3d52f6f6047..8196d8ca625 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -182,8 +182,6 @@ static void prepare_constant_buffer(struct brw_context *brw) GLcontext *ctx = &brw->intel.ctx; const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); - const struct brw_fragment_program *fp = - brw_fragment_program_const(brw->fragment_program); const GLuint sz = brw->curbe.total_size; const GLuint bufsz = sz * 16 * sizeof(GLfloat); GLfloat *buf; @@ -200,8 +198,6 @@ static void prepare_constant_buffer(struct brw_context *brw) if (brw->curbe.wm_size) { GLuint offset = brw->curbe.wm_start * 16; - _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); - /* copy float constants */ for (i = 0; i < brw->wm.prog_data->nr_params; i++) buf[offset + i] = *brw->wm.prog_data->param[i]; @@ -244,14 +240,6 @@ static void prepare_constant_buffer(struct brw_context *brw) GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; - if (brw->vertex_program->IsNVProgram) - _mesa_load_tracked_matrices(ctx); - - /* Updates the ParamaterValues[i] pointers for all parameters of the - * basic type of PROGRAM_STATE_VAR. - */ - _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); - if (vp->use_const_buffer) { /* Load the subset of push constants that will get used when * we also have a pull constant buffer. diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 2d3556b8054..39bf5b63fc2 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -998,7 +998,7 @@ # define GEN6_WM_LINE_AA_WIDTH_2_0 (2 << 14) # define GEN6_WM_LINE_AA_WIDTH_4_0 (3 << 14) # define GEN6_WM_POLYGON_STIPPLE_ENABLE (1 << 13) -# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 12) +# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 11) # define GEN6_WM_OMASK_TO_RENDER_TARGET (1 << 9) # define GEN6_WM_USES_SOURCE_W (1 << 8) # define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE (1 << 7) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 3e305c89686..16331cc3ac0 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -151,9 +151,6 @@ static void brw_emit_prim(struct brw_context *brw, prim_packet.start_instance_location = 0; prim_packet.base_vert_location = prim->basevertex; - /* Can't wrap here, since we rely on the validated state. */ - intel->no_batch_wrap = GL_TRUE; - /* If we're set to always flush, do it before and after the primitive emit. * We want to catch both missed flushes that hurt instruction/state cache * and missed flushes of the render cache as it heads to other parts of @@ -169,8 +166,6 @@ static void brw_emit_prim(struct brw_context *brw, if (intel->always_flush_cache) { intel_batchbuffer_emit_mi_flush(intel->batch); } - - intel->no_batch_wrap = GL_FALSE; } static void brw_merge_inputs( struct brw_context *brw, @@ -394,11 +389,14 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, } } + intel->no_batch_wrap = GL_TRUE; brw_upload_state(brw); } brw_emit_prim(brw, &prim[i], hw_prim); + intel->no_batch_wrap = GL_FALSE; + retval = GL_TRUE; } diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 175899b0268..34dfe10cb93 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -286,6 +286,7 @@ static void brw_set_ff_sync_message(struct brw_context *brw, GLuint response_length, GLboolean end_of_thread) { + struct intel_context *intel = &brw->intel; brw_set_src1(insn, brw_imm_d(0)); insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ @@ -298,8 +299,12 @@ static void brw_set_ff_sync_message(struct brw_context *brw, insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */ insn->bits3.urb_gen5.msg_length = 1; insn->bits3.urb_gen5.end_of_thread = end_of_thread; - insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; - insn->bits2.send_gen5.end_of_thread = end_of_thread; + if (intel->gen >= 6) { + insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; + } else { + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_gen5.end_of_thread = end_of_thread; + } } static void brw_set_urb_message( struct brw_context *brw, @@ -966,10 +971,25 @@ void brw_math_16( struct brw_compile *p, struct brw_reg src, GLuint precision ) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + if (intel->gen >= 6) { + insn = next_insn(p, BRW_OPCODE_MATH); + + /* Math is the same ISA format as other opcodes, except that CondModifier + * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. + */ + insn->header.destreg__conditionalmod = function; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_src1(insn, brw_null_reg()); + return; + } + /* First instruction: */ brw_push_insn_state(p); diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 35908ee7b69..572175f463e 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -96,18 +96,12 @@ const struct brw_tracked_state brw_drawing_rect = { .emit = upload_drawing_rect }; -static void prepare_binding_table_pointers(struct brw_context *brw) -{ - brw_add_validated_bo(brw, brw->vs.bind_bo); - brw_add_validated_bo(brw, brw->wm.bind_bo); -} - /** * Upload the binding table pointers, which point each stage's array of surface * state pointers. * * The binding table pointers are relative to the surface state base address, - * which is 0. + * which points at the batchbuffer containing the streamed batch state. */ static void upload_binding_table_pointers(struct brw_context *brw) { @@ -115,24 +109,20 @@ static void upload_binding_table_pointers(struct brw_context *brw) BEGIN_BATCH(6); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); - if (brw->vs.bind_bo != NULL) - OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ - else - OUT_BATCH(0); + OUT_BATCH(brw->vs.bind_bo_offset); OUT_BATCH(0); /* gs */ OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ - OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */ + OUT_BATCH(brw->wm.bind_bo_offset); ADVANCE_BATCH(); } const struct brw_tracked_state brw_binding_table_pointers = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH, - .cache = CACHE_NEW_SURF_BIND, + .brw = BRW_NEW_BATCH | BRW_NEW_BINDING_TABLE, + .cache = 0, }, - .prepare = prepare_binding_table_pointers, .emit = upload_binding_table_pointers, }; @@ -141,7 +131,7 @@ const struct brw_tracked_state brw_binding_table_pointers = { * state pointers. * * The binding table pointers are relative to the surface state base address, - * which is 0. + * which points at the batchbuffer containing the streamed batch state. */ static void upload_gen6_binding_table_pointers(struct brw_context *brw) { @@ -153,22 +143,18 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw) GEN6_BINDING_TABLE_MODIFY_GS | GEN6_BINDING_TABLE_MODIFY_PS | (4 - 2)); - if (brw->vs.bind_bo != NULL) - OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ - else - OUT_BATCH(0); + OUT_BATCH(brw->vs.bind_bo_offset); /* vs */ OUT_BATCH(0); /* gs */ - OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */ + OUT_BATCH(brw->wm.bind_bo_offset); /* wm/ps */ ADVANCE_BATCH(); } const struct brw_tracked_state gen6_binding_table_pointers = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH, - .cache = CACHE_NEW_SURF_BIND, + .brw = BRW_NEW_BATCH | BRW_NEW_BINDING_TABLE, + .cache = 0, }, - .prepare = prepare_binding_table_pointers, .emit = upload_gen6_binding_table_pointers, }; @@ -199,7 +185,8 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + brw->cc.state_offset); ADVANCE_BATCH(); brw->state.dirty.brw |= BRW_NEW_PSP; @@ -213,7 +200,6 @@ static void prepare_psp_urb_cbs(struct brw_context *brw) brw_add_validated_bo(brw, brw->clip.state_bo); brw_add_validated_bo(brw, brw->sf.state_bo); brw_add_validated_bo(brw, brw->wm.state_bo); - brw_add_validated_bo(brw, brw->cc.state_bo); } static void upload_psp_urb_cbs(struct brw_context *brw ) @@ -590,23 +576,23 @@ const struct brw_tracked_state brw_invarient_state = { /** * Define the base addresses which some state is referenced from. * - * This allows us to avoid having to emit relocations in many places for - * cached state, and instead emit pointers inside of large, mostly-static - * state pools. This comes at the expense of memory, and more expensive cache - * misses. + * This allows us to avoid having to emit relocations for the objects, + * and is actually required for binding table pointers on gen6. + * + * Surface state base address covers binding table pointers and + * surface state objects, but not the surfaces that the surface state + * objects point to. */ static void upload_state_base_address( struct brw_context *brw ) { struct intel_context *intel = &brw->intel; - /* Output the structure (brw_state_base_address) directly to the - * batchbuffer, so we can emit relocations inline. - */ if (intel->gen >= 6) { BEGIN_BATCH(10); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); OUT_BATCH(1); /* General state base address */ - OUT_BATCH(1); /* Surface state base address */ + OUT_RELOC(intel->batch->buf, I915_GEM_DOMAIN_SAMPLER, 0, + 1); /* Surface state base address */ OUT_BATCH(1); /* Dynamic state base address */ OUT_BATCH(1); /* Indirect object base address */ OUT_BATCH(1); /* Instruction base address */ @@ -619,7 +605,8 @@ static void upload_state_base_address( struct brw_context *brw ) BEGIN_BATCH(8); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); OUT_BATCH(1); /* General state base address */ - OUT_BATCH(1); /* Surface state base address */ + OUT_RELOC(intel->batch->buf, I915_GEM_DOMAIN_SAMPLER, 0, + 1); /* Surface state base address */ OUT_BATCH(1); /* Indirect object base address */ OUT_BATCH(1); /* Instruction base address */ OUT_BATCH(1); /* General state upper bound */ @@ -630,7 +617,8 @@ static void upload_state_base_address( struct brw_context *brw ) BEGIN_BATCH(6); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); OUT_BATCH(1); /* General state base address */ - OUT_BATCH(1); /* Surface state base address */ + OUT_RELOC(intel->batch->buf, I915_GEM_DOMAIN_SAMPLER, 0, + 1); /* Surface state base address */ OUT_BATCH(1); /* Indirect object base address */ OUT_BATCH(1); /* General state upper bound */ OUT_BATCH(1); /* Indirect object upper bound */ @@ -641,7 +629,7 @@ static void upload_state_base_address( struct brw_context *brw ) const struct brw_tracked_state brw_state_base_address = { .dirty = { .mesa = 0, - .brw = BRW_NEW_CONTEXT, + .brw = BRW_NEW_BATCH, .cache = 0, }, .emit = upload_state_base_address diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index cc9ac6d5749..aeed24d4e14 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -31,10 +31,10 @@ #include "main/imports.h" #include "main/enums.h" +#include "main/shaderobj.h" #include "program/prog_parameter.h" #include "program/program.h" #include "program/programopt.h" -#include "program/shader_api.h" #include "tnl/tnl.h" #include "brw_context.h" @@ -95,20 +95,6 @@ static struct gl_program *brwNewProgram( GLcontext *ctx, static void brwDeleteProgram( GLcontext *ctx, struct gl_program *prog ) { - if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) { - struct gl_fragment_program *fp = (struct gl_fragment_program *) prog; - struct brw_fragment_program *brw_fp = brw_fragment_program(fp); - - drm_intel_bo_unreference(brw_fp->const_buffer); - } - - if (prog->Target == GL_VERTEX_PROGRAM_ARB) { - struct gl_vertex_program *vp = (struct gl_vertex_program *) prog; - struct brw_vertex_program *brw_vp = brw_vertex_program(vp); - - drm_intel_bo_unreference(brw_vp->const_buffer); - } - _mesa_delete_program( ctx, prog ); } diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index a95acb4cf82..e290ca92f60 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -105,8 +105,7 @@ static void upload_sf_vp(struct brw_context *brw) } drm_intel_bo_unreference(brw->sf.vp_bo); - brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, &sfv, sizeof(sfv), - NULL, 0); + brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, &sfv, sizeof(sfv)); } const struct brw_tracked_state brw_sf_vp = { diff --git a/src/mesa/drivers/dri/i965/brw_state.c b/src/mesa/drivers/dri/i965/brw_state.c new file mode 100644 index 00000000000..1e77e427d38 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_state.c @@ -0,0 +1,49 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <[email protected]> + * + */ + +#include "brw_context.h" + +void +brw_enable(GLcontext *ctx, GLenum cap, GLboolean state) +{ + struct brw_context *brw = brw_context(ctx); + + switch (cap) { + case GL_DEPTH_CLAMP: + brw_update_cc_vp(brw); + break; + } +} + +void +brw_depth_range(GLcontext *ctx, GLclampd nearval, GLclampd farval) +{ + struct brw_context *brw = brw_context(ctx); + + if (ctx->Transform.DepthClamp) + brw_update_cc_vp(brw); +} diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 85949215e82..40eece276b7 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -48,10 +48,11 @@ brw_add_validated_bo(struct brw_context *brw, drm_intel_bo *bo) const struct brw_tracked_state brw_blend_constant_color; const struct brw_tracked_state brw_cc_unit; -const struct brw_tracked_state brw_cc_vp; const struct brw_tracked_state brw_check_fallback; const struct brw_tracked_state brw_clip_prog; const struct brw_tracked_state brw_clip_unit; +const struct brw_tracked_state brw_vs_constants; +const struct brw_tracked_state brw_wm_constants; const struct brw_tracked_state brw_constant_buffer; const struct brw_tracked_state brw_curbe_offsets; const struct brw_tracked_state brw_invarient_state; @@ -80,6 +81,7 @@ const struct brw_tracked_state brw_wm_prog; const struct brw_tracked_state brw_wm_samplers; const struct brw_tracked_state brw_wm_constant_surface; const struct brw_tracked_state brw_wm_surfaces; +const struct brw_tracked_state brw_wm_binding_table; const struct brw_tracked_state brw_wm_unit; const struct brw_tracked_state brw_psp_urb_cbs; @@ -93,7 +95,6 @@ const struct brw_tracked_state brw_index_buffer; const struct brw_tracked_state gen6_binding_table_pointers; const struct brw_tracked_state gen6_blend_state; const struct brw_tracked_state gen6_cc_state_pointers; -const struct brw_tracked_state gen6_cc_vp; const struct brw_tracked_state gen6_clip_state; const struct brw_tracked_state gen6_clip_vp; const struct brw_tracked_state gen6_color_calc_state; @@ -108,20 +109,6 @@ const struct brw_tracked_state gen6_viewport_state; const struct brw_tracked_state gen6_vs_state; const struct brw_tracked_state gen6_wm_state; -/** - * Use same key for WM and VS surfaces. - */ -struct brw_surface_key { - GLenum target, depthmode; - drm_intel_bo *bo; - GLint format, internal_format; - GLint first_level, last_level; - GLint width, height, depth; - GLint pitch, cpp; - uint32_t tiling; - GLuint offset; -}; - /*********************************************************************** * brw_state.c */ @@ -137,9 +124,7 @@ void brw_clear_validated_bos(struct brw_context *brw); drm_intel_bo *brw_cache_data(struct brw_cache *cache, enum brw_cache_id cache_id, const void *data, - GLuint size, - drm_intel_bo **reloc_bufs, - GLuint nr_reloc_bufs); + GLuint size); drm_intel_bo *brw_upload_cache(struct brw_cache *cache, enum brw_cache_id cache_id, @@ -173,7 +158,6 @@ void brw_state_cache_check_size( struct brw_context *brw ); void brw_init_caches( struct brw_context *brw ); void brw_destroy_caches( struct brw_context *brw ); -void brw_state_cache_bo_delete(struct brw_cache *cache, drm_intel_bo *bo); /*********************************************************************** * brw_state_batch.c @@ -186,10 +170,17 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, GLuint sz ); void brw_destroy_batch_cache( struct brw_context *brw ); void brw_clear_batch_cache( struct brw_context *brw ); +void *brw_state_batch(struct brw_context *brw, + int size, + int alignment, + drm_intel_bo **out_bo, + uint32_t *out_offset); /* brw_wm_surface_state.c */ -drm_intel_bo * -brw_create_constant_surface( struct brw_context *brw, - struct brw_surface_key *key ); +void brw_create_constant_surface(struct brw_context *brw, + drm_intel_bo *bo, + int width, + drm_intel_bo **out_bo, + uint32_t *out_offset); #endif diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index 39019412fda..be3989eb7db 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -97,3 +97,52 @@ void brw_destroy_batch_cache( struct brw_context *brw ) { brw_clear_batch_cache(brw); } + +/** + * Allocates a block of space in the batchbuffer for indirect state. + * + * We don't want to allocate separate BOs for every bit of indirect + * state in the driver. It means overallocating by a significant + * margin (4096 bytes, even if the object is just a 20-byte surface + * state), and more buffers to walk and count for aperture size checking. + * + * However, due to the restrictions inposed by the aperture size + * checking performance hacks, we can't have the batch point at a + * separate indirect state buffer, because once the batch points at + * it, no more relocations can be added to it. So, we sneak these + * buffers in at the top of the batchbuffer. + */ +void * +brw_state_batch(struct brw_context *brw, + int size, + int alignment, + drm_intel_bo **out_bo, + uint32_t *out_offset) +{ + struct intel_batchbuffer *batch = brw->intel.batch; + uint32_t offset; + + assert(size < batch->buf->size); + offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment); + + /* If allocating from the top would wrap below the batchbuffer, or + * if the batch's used space (plus the reserved pad) collides with our + * space, then flush and try again. + */ + if (batch->state_batch_offset < size || + offset < batch->ptr - batch->map + batch->reserved_space) { + intel_batchbuffer_flush(batch); + offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment); + } + + batch->state_batch_offset = offset; + + if (*out_bo != batch->buf) { + drm_intel_bo_unreference(*out_bo); + drm_intel_bo_reference(batch->buf); + *out_bo = batch->buf; + } + + *out_offset = offset; + return batch->map + offset; +} diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index ea81ad13417..b31d84953a1 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -310,9 +310,7 @@ drm_intel_bo * brw_cache_data(struct brw_cache *cache, enum brw_cache_id cache_id, const void *data, - GLuint data_size, - drm_intel_bo **reloc_bufs, - GLuint nr_reloc_bufs) + GLuint data_size) { drm_intel_bo *bo; struct brw_cache_item *item, lookup; @@ -321,8 +319,8 @@ brw_cache_data(struct brw_cache *cache, lookup.cache_id = cache_id; lookup.key = data; lookup.key_size = data_size; - lookup.reloc_bufs = reloc_bufs; - lookup.nr_reloc_bufs = nr_reloc_bufs; + lookup.reloc_bufs = NULL; + lookup.nr_reloc_bufs = 0; hash = hash_key(&lookup); lookup.hash = hash; @@ -335,7 +333,7 @@ brw_cache_data(struct brw_cache *cache, bo = brw_upload_cache(cache, cache_id, data, data_size, - reloc_bufs, nr_reloc_bufs, + NULL, 0, data, data_size); return bo; @@ -396,29 +394,10 @@ brw_init_non_surface_cache(struct brw_context *brw) brw_init_cache_id(cache, "DEPTH_STENCIL_STATE", BRW_DEPTH_STENCIL_STATE); } - -static void -brw_init_surface_cache(struct brw_context *brw) -{ - struct brw_cache *cache = &brw->surface_cache; - - cache->brw = brw; - - cache->size = 7; - cache->n_items = 0; - cache->items = (struct brw_cache_item **) - calloc(1, cache->size * sizeof(struct brw_cache_item)); - - brw_init_cache_id(cache, "SS_SURFACE", BRW_SS_SURFACE); - brw_init_cache_id(cache, "SS_SURF_BIND", BRW_SS_SURF_BIND); -} - - void brw_init_caches(struct brw_context *brw) { brw_init_non_surface_cache(brw); - brw_init_surface_cache(brw); } @@ -452,56 +431,17 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) brw->state.dirty.cache |= ~0; } -/* Clear all entries from the cache that point to the given bo. - * - * This lets us release memory for reuse earlier for known-dead buffers, - * at the cost of walking the entire hash table. - */ -void -brw_state_cache_bo_delete(struct brw_cache *cache, drm_intel_bo *bo) -{ - struct brw_cache_item **prev; - GLuint i; - - if (INTEL_DEBUG & DEBUG_STATE) - printf("%s\n", __FUNCTION__); - - for (i = 0; i < cache->size; i++) { - for (prev = &cache->items[i]; *prev;) { - struct brw_cache_item *c = *prev; - - if (drm_intel_bo_references(c->bo, bo)) { - int j; - - *prev = c->next; - - for (j = 0; j < c->nr_reloc_bufs; j++) - drm_intel_bo_unreference(c->reloc_bufs[j]); - drm_intel_bo_unreference(c->bo); - free((void *)c->key); - free(c); - cache->n_items--; - } else { - prev = &c->next; - } - } - } -} - void brw_state_cache_check_size(struct brw_context *brw) { if (INTEL_DEBUG & DEBUG_STATE) printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); - /* un-tuned guess. We've got around 20 state objects for a total of around - * 32k, so 1000 of them is around 1.5MB. + /* un-tuned guess. Each object is generally a page, so 1000 of them is 4 MB of + * state cache. */ if (brw->cache.n_items > 1000) brw_clear_cache(brw, &brw->cache); - - if (brw->surface_cache.n_items > 1000) - brw_clear_cache(brw, &brw->surface_cache); } @@ -528,5 +468,4 @@ void brw_destroy_caches(struct brw_context *brw) { brw_destroy_cache(brw, &brw->cache); - brw_destroy_cache(brw, &brw->surface_cache); } diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index cb66806ebf3..d410861bdf6 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -111,8 +111,8 @@ static void dump_wm_surface_state(struct brw_context *brw) continue; } drm_intel_bo_map(surf_bo, GL_FALSE); - surfoff = surf_bo->offset; - surf = (struct brw_surface_state *)(surf_bo->virtual); + surfoff = surf_bo->offset + brw->wm.surf_offset[i]; + surf = (struct brw_surface_state *)(surf_bo->virtual + brw->wm.surf_offset[i]); sprintf(name, "WM SS%d", i); state_out(name, surf, surfoff, 0, "%s %s\n", diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 49629ba2289..f92a19c2aa0 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -61,12 +61,15 @@ static const struct brw_tracked_state *gen4_atoms[] = &brw_curbe_offsets, &brw_recalculate_urb_fence, - &brw_cc_vp, &brw_cc_unit, + &brw_vs_constants, /* Before vs_surfaces and constant_buffer */ + &brw_wm_constants, /* Before wm_surfaces and constant_buffer */ + &brw_vs_surfaces, /* must do before unit */ &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */ &brw_wm_surfaces, /* must do before samplers and unit */ + &brw_wm_binding_table, &brw_wm_samplers, &brw_wm_unit, @@ -113,7 +116,6 @@ const struct brw_tracked_state *gen6_atoms[] = &gen6_clip_vp, &gen6_sf_vp, - &gen6_cc_vp, /* Command packets: */ &brw_invarient_state, @@ -126,9 +128,13 @@ const struct brw_tracked_state *gen6_atoms[] = &gen6_depth_stencil_state, /* must do before cc unit */ &gen6_cc_state_pointers, + &brw_vs_constants, /* Before vs_surfaces and constant_buffer */ + &brw_wm_constants, /* Before wm_surfaces and constant_buffer */ + &brw_vs_surfaces, /* must do before unit */ &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */ &brw_wm_surfaces, /* must do before samplers and unit */ + &brw_wm_binding_table, &brw_wm_samplers, &gen6_sampler_state, @@ -266,6 +272,8 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_CONTEXT), DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS), DEFINE_BIT(BRW_NEW_PSP), + DEFINE_BIT(BRW_NEW_WM_SURFACES), + DEFINE_BIT(BRW_NEW_BINDING_TABLE), DEFINE_BIT(BRW_NEW_INDICES), DEFINE_BIT(BRW_NEW_INDEX_BUFFER), DEFINE_BIT(BRW_NEW_VERTICES), @@ -292,8 +300,6 @@ static struct dirty_bit_map cache_bits[] = { DEFINE_BIT(CACHE_NEW_CLIP_VP), DEFINE_BIT(CACHE_NEW_CLIP_UNIT), DEFINE_BIT(CACHE_NEW_CLIP_PROG), - DEFINE_BIT(CACHE_NEW_SURFACE), - DEFINE_BIT(CACHE_NEW_SURF_BIND), {0, 0, 0} }; diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index f17fe485306..2a7fa5b6997 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -278,7 +278,7 @@ struct brw_aa_line_parameters struct header header; struct { - GLuint aa_coverage_scope:8; + GLuint aa_coverage_slope:8; GLuint pad0:8; GLuint aa_coverage_bias:8; GLuint pad1:8; diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index 568c2e3b030..0250a68d292 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -42,42 +42,59 @@ * Otherwise, constants go through the CURBEs using the brw_constant_buffer * state atom. */ -static drm_intel_bo * -brw_vs_update_constant_buffer(struct brw_context *brw) +static void +prepare_vs_constants(struct brw_context *brw) { + GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; struct brw_vertex_program *vp = (struct brw_vertex_program *) brw->vertex_program; const struct gl_program_parameter_list *params = vp->program.Base.Parameters; const int size = params->NumParameters * 4 * sizeof(GLfloat); - drm_intel_bo *const_buffer; int i; - /* BRW_NEW_VERTEX_PROGRAM */ - if (!vp->use_const_buffer) - return NULL; - - const_buffer = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", - size, 64); - - /* _NEW_PROGRAM_CONSTANTS */ + if (vp->program.IsNVProgram) + _mesa_load_tracked_matrices(ctx); /* Updates the ParamaterValues[i] pointers for all parameters of the * basic type of PROGRAM_STATE_VAR. */ _mesa_load_state_parameters(&brw->intel.ctx, vp->program.Base.Parameters); - drm_intel_gem_bo_map_gtt(const_buffer); + /* BRW_NEW_VERTEX_PROGRAM */ + if (!vp->use_const_buffer) { + if (brw->vs.const_bo) { + drm_intel_bo_unreference(brw->vs.const_bo); + brw->vs.const_bo = NULL; + brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF; + } + return; + } + + /* _NEW_PROGRAM_CONSTANTS */ + drm_intel_bo_unreference(brw->vs.const_bo); + brw->vs.const_bo = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", + size, 64); + + drm_intel_gem_bo_map_gtt(brw->vs.const_bo); for (i = 0; i < params->NumParameters; i++) { - memcpy(const_buffer->virtual + i * 4 * sizeof(float), + memcpy(brw->vs.const_bo->virtual + i * 4 * sizeof(float), params->ParameterValues[i], 4 * sizeof(float)); } - drm_intel_gem_bo_unmap_gtt(const_buffer); - - return const_buffer; + drm_intel_gem_bo_unmap_gtt(brw->vs.const_bo); + brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF; } +const struct brw_tracked_state brw_vs_constants = { + .dirty = { + .mesa = (_NEW_PROGRAM_CONSTANTS), + .brw = (BRW_NEW_VERTEX_PROGRAM), + .cache = 0 + }, + .prepare = prepare_vs_constants, +}; + /** * Update the surface state for a VS constant buffer. * @@ -88,101 +105,41 @@ brw_update_vs_constant_surface( GLcontext *ctx, GLuint surf) { struct brw_context *brw = brw_context(ctx); - struct brw_surface_key key; struct brw_vertex_program *vp = (struct brw_vertex_program *) brw->vertex_program; const struct gl_program_parameter_list *params = vp->program.Base.Parameters; assert(surf == 0); - /* If we're in this state update atom, we need to update VS constants, so - * free the old buffer and create a new one for the new contents. - */ - drm_intel_bo_unreference(vp->const_buffer); - vp->const_buffer = brw_vs_update_constant_buffer(brw); - /* If there's no constant buffer, then no surface BO is needed to point at * it. */ - if (vp->const_buffer == NULL) { + if (brw->vs.const_bo == NULL) { drm_intel_bo_unreference(brw->vs.surf_bo[surf]); brw->vs.surf_bo[surf] = NULL; return; } - memset(&key, 0, sizeof(key)); - - key.format = MESA_FORMAT_RGBA_FLOAT32; - key.internal_format = GL_RGBA; - key.bo = vp->const_buffer; - key.depthmode = GL_NONE; - key.pitch = params->NumParameters; - key.width = params->NumParameters; - key.height = 1; - key.depth = 1; - key.cpp = 16; - - /* - printf("%s:\n", __FUNCTION__); - printf(" width %d height %d depth %d cpp %d pitch %d\n", - key.width, key.height, key.depth, key.cpp, key.pitch); - */ - - drm_intel_bo_unreference(brw->vs.surf_bo[surf]); - brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache, - BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, 1, - NULL); - if (brw->vs.surf_bo[surf] == NULL) { - brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); - } + brw_create_constant_surface(brw, brw->vs.const_bo, params->NumParameters, + &brw->vs.surf_bo[surf], + &brw->vs.surf_offset[surf]); } -/** - * Constructs the binding table for the VS surface state. - */ -static drm_intel_bo * -brw_vs_get_binding_table(struct brw_context *brw) +static void +prepare_vs_surfaces(struct brw_context *brw) { - drm_intel_bo *bind_bo; - - bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->vs.surf_bo, BRW_VS_MAX_SURF, - NULL); - - if (bind_bo == NULL) { - GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint); - uint32_t data[BRW_VS_MAX_SURF]; - int i; - - for (i = 0; i < BRW_VS_MAX_SURF; i++) - if (brw->vs.surf_bo[i]) - data[i] = brw->vs.surf_bo[i]->offset; - else - data[i] = 0; - - bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->vs.surf_bo, BRW_VS_MAX_SURF, - data, data_size); - - /* Emit binding table relocations to surface state */ - for (i = 0; i < BRW_VS_MAX_SURF; i++) { - if (brw->vs.surf_bo[i] != NULL) { - /* The presumed offsets were set in the data values for - * brw_upload_cache. - */ - drm_intel_bo_emit_reloc(bind_bo, i * 4, - brw->vs.surf_bo[i], 0, - I915_GEM_DOMAIN_INSTRUCTION, 0); - } - } + int nr_surfaces = 0; + + if (brw->vs.const_bo) { + brw_add_validated_bo(brw, brw->vs.const_bo); + nr_surfaces = 1; } - return bind_bo; + if (brw->vs.nr_surfaces != nr_surfaces) { + brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; + brw->vs.nr_surfaces = nr_surfaces; + } } /** @@ -192,43 +149,50 @@ brw_vs_get_binding_table(struct brw_context *brw) * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and * CACHE_NEW_SURF_BIND for the binding table upload. */ -static void prepare_vs_surfaces(struct brw_context *brw ) +static void upload_vs_surfaces(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; + uint32_t *bind; int i; - int nr_surfaces = 0; - - brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER); - for (i = 0; i < BRW_VS_MAX_SURF; i++) { - if (brw->vs.surf_bo[i] != NULL) { - nr_surfaces = i + 1; + /* BRW_NEW_NR_VS_SURFACES */ + if (brw->vs.nr_surfaces == 0) { + if (brw->vs.bind_bo) { + drm_intel_bo_unreference(brw->vs.bind_bo); + brw->vs.bind_bo = NULL; + brw->state.dirty.brw |= BRW_NEW_BINDING_TABLE; } + return; } - if (brw->vs.nr_surfaces != nr_surfaces) { - brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; - brw->vs.nr_surfaces = nr_surfaces; - } + brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER); - /* Note that we don't end up updating the bind_bo if we don't have a - * surface to be pointing at. This should be relatively harmless, as it - * just slightly increases our working set size. + /* Might want to calculate nr_surfaces first, to avoid taking up so much + * space for the binding table. (once we have vs samplers) */ - if (brw->vs.nr_surfaces != 0) { - drm_intel_bo_unreference(brw->vs.bind_bo); - brw->vs.bind_bo = brw_vs_get_binding_table(brw); + bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_VS_MAX_SURF, + 32, &brw->vs.bind_bo, &brw->vs.bind_bo_offset); + + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + /* BRW_NEW_VS_CONSTBUF */ + if (brw->vs.surf_bo[i]) { + bind[i] = brw->vs.surf_offset[i]; + } else { + bind[i] = 0; + } } + + brw->state.dirty.brw |= BRW_NEW_BINDING_TABLE; } const struct brw_tracked_state brw_vs_surfaces = { .dirty = { - .mesa = (_NEW_PROGRAM_CONSTANTS), - .brw = (BRW_NEW_VERTEX_PROGRAM), + .mesa = 0, + .brw = (BRW_NEW_VS_CONSTBUF | + BRW_NEW_NR_VS_SURFACES | + BRW_NEW_BATCH), .cache = 0 }, .prepare = prepare_vs_surfaces, + .emit = upload_vs_surfaces, }; - - - diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index a02e958c5e6..14227a51332 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -83,6 +83,7 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->vs.prog_bo); dri_bo_release(&brw->vs.state_bo); dri_bo_release(&brw->vs.bind_bo); + dri_bo_release(&brw->vs.const_bo); dri_bo_release(&brw->gs.prog_bo); dri_bo_release(&brw->gs.state_bo); dri_bo_release(&brw->clip.prog_bo); @@ -99,6 +100,7 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->wm.sampler_bo); dri_bo_release(&brw->wm.prog_bo); dri_bo_release(&brw->wm.state_bo); + dri_bo_release(&brw->wm.const_bo); dri_bo_release(&brw->cc.prog_bo); dri_bo_release(&brw->cc.state_bo); dri_bo_release(&brw->cc.vp_bo); diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 9fbabdc2852..1fc802cfa65 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -74,7 +74,7 @@ static drm_intel_bo *upload_default_color( struct brw_context *brw, COPY_4V(sdc.color, color); return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, - &sdc, sizeof(sdc), NULL, 0); + &sdc, sizeof(sdc)); } diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index c7b61240e75..17b016b569b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -196,36 +196,40 @@ brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) } } -static drm_intel_bo * -brw_create_texture_surface( struct brw_context *brw, - struct brw_surface_key *key ) +static void +brw_update_texture_surface( GLcontext *ctx, GLuint unit ) { + struct brw_context *brw = brw_context(ctx); + struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; + struct intel_texture_object *intelObj = intel_texture_object(tObj); + struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; + const GLuint surf_index = SURF_INDEX_TEXTURE(unit); struct brw_surface_state surf; - drm_intel_bo *bo; + void *map; memset(&surf, 0, sizeof(surf)); surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; - surf.ss0.surface_type = translate_tex_target(key->target); - surf.ss0.surface_format = translate_tex_format(key->format, - key->internal_format, - key->depthmode); + surf.ss0.surface_type = translate_tex_target(tObj->Target); + surf.ss0.surface_format = translate_tex_format(firstImage->TexFormat, + firstImage->InternalFormat, + tObj->DepthMode); /* This is ok for all textures with channel width 8bit or less: */ /* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ - surf.ss1.base_addr = key->bo->offset; /* reloc */ + surf.ss1.base_addr = intelObj->mt->region->buffer->offset; /* reloc */ - surf.ss2.mip_count = key->last_level - key->first_level; - surf.ss2.width = key->width - 1; - surf.ss2.height = key->height - 1; - brw_set_surface_tiling(&surf, key->tiling); - surf.ss3.pitch = (key->pitch * key->cpp) - 1; - surf.ss3.depth = key->depth - 1; + surf.ss2.mip_count = intelObj->lastLevel - intelObj->firstLevel; + surf.ss2.width = firstImage->Width - 1; + surf.ss2.height = firstImage->Height - 1; + brw_set_surface_tiling(&surf, intelObj->mt->region->tiling); + surf.ss3.pitch = (intelObj->mt->region->pitch * intelObj->mt->cpp) - 1; + surf.ss3.depth = firstImage->Depth - 1; surf.ss4.min_lod = 0; - if (key->target == GL_TEXTURE_CUBE_MAP) { + if (tObj->Target == GL_TEXTURE_CUBE_MAP) { surf.ss0.cube_pos_x = 1; surf.ss0.cube_pos_y = 1; surf.ss0.cube_pos_z = 1; @@ -234,71 +238,33 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss0.cube_neg_z = 1; } - bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, - key, sizeof(*key), - &key->bo, 1, - &surf, sizeof(surf)); + map = brw_state_batch(brw, sizeof(surf), 32, + &brw->wm.surf_bo[surf_index], + &brw->wm.surf_offset[surf_index]); + memcpy(map, &surf, sizeof(surf)); /* Emit relocation to surface contents */ - drm_intel_bo_emit_reloc(bo, offsetof(struct brw_surface_state, ss1), - key->bo, 0, + drm_intel_bo_emit_reloc(brw->wm.surf_bo[surf_index], + brw->wm.surf_offset[surf_index] + + offsetof(struct brw_surface_state, ss1), + intelObj->mt->region->buffer, 0, I915_GEM_DOMAIN_SAMPLER, 0); - - return bo; -} - -static void -brw_update_texture_surface( GLcontext *ctx, GLuint unit ) -{ - struct brw_context *brw = brw_context(ctx); - struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; - struct intel_texture_object *intelObj = intel_texture_object(tObj); - struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; - struct brw_surface_key key; - const GLuint surf = SURF_INDEX_TEXTURE(unit); - - memset(&key, 0, sizeof(key)); - - key.format = firstImage->TexFormat; - key.internal_format = firstImage->InternalFormat; - key.pitch = intelObj->mt->region->pitch; - key.depth = firstImage->Depth; - key.bo = intelObj->mt->region->buffer; - key.offset = 0; - - key.target = tObj->Target; - key.depthmode = tObj->DepthMode; - key.first_level = intelObj->firstLevel; - key.last_level = intelObj->lastLevel; - key.width = firstImage->Width; - key.height = firstImage->Height; - key.cpp = intelObj->mt->cpp; - key.tiling = intelObj->mt->region->tiling; - - drm_intel_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, - BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, 1, - NULL); - if (brw->wm.surf_bo[surf] == NULL) { - brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key); - } } - - /** * Create the constant buffer surface. Vertex/fragment shader constants will be * read from this buffer with Data Port Read instructions/messages. */ -drm_intel_bo * -brw_create_constant_surface( struct brw_context *brw, - struct brw_surface_key *key ) +void +brw_create_constant_surface(struct brw_context *brw, + drm_intel_bo *bo, + int width, + drm_intel_bo **out_bo, + uint32_t *out_offset) { - const GLint w = key->width - 1; + const GLint w = width - 1; struct brw_surface_state surf; - drm_intel_bo *bo; + void *map; memset(&surf, 0, sizeof(surf)); @@ -306,29 +272,26 @@ brw_create_constant_surface( struct brw_context *brw, surf.ss0.surface_type = BRW_SURFACE_BUFFER; surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; - assert(key->bo); - surf.ss1.base_addr = key->bo->offset; /* reloc */ + assert(bo); + surf.ss1.base_addr = bo->offset; /* reloc */ surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */ surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */ - surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */ - brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */ - - bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, - key, sizeof(*key), - &key->bo, 1, - &surf, sizeof(surf)); + surf.ss3.pitch = (width * 16) - 1; /* ignored?? */ + brw_set_surface_tiling(&surf, I915_TILING_NONE); /* tiling now allowed */ + + map = brw_state_batch(brw, sizeof(surf), 32, out_bo, out_offset); + memcpy(map, &surf, sizeof(surf)); /* Emit relocation to surface contents. Section 5.1.1 of the gen4 * bspec ("Data Cache") says that the data cache does not exist as * a separate cache and is just the sampler cache. */ - drm_intel_bo_emit_reloc(bo, offsetof(struct brw_surface_state, ss1), - key->bo, 0, + drm_intel_bo_emit_reloc(*out_bo, (*out_offset + + offsetof(struct brw_surface_state, ss1)), + bo, 0, I915_GEM_DOMAIN_SAMPLER, 0); - - return bo; } /* Creates a new WM constant buffer reflecting the current fragment program's @@ -337,89 +300,45 @@ brw_create_constant_surface( struct brw_context *brw, * Otherwise, constants go through the CURBEs using the brw_constant_buffer * state atom. */ -static drm_intel_bo * -brw_wm_update_constant_buffer(struct brw_context *brw) +static void +prepare_wm_constants(struct brw_context *brw) { + GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; struct brw_fragment_program *fp = (struct brw_fragment_program *) brw->fragment_program; const struct gl_program_parameter_list *params = fp->program.Base.Parameters; const int size = params->NumParameters * 4 * sizeof(GLfloat); - drm_intel_bo *const_buffer; - - /* BRW_NEW_FRAGMENT_PROGRAM */ - if (!fp->use_const_buffer) - return NULL; - - const_buffer = drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", - size, 64); - /* _NEW_PROGRAM_CONSTANTS */ - drm_intel_bo_subdata(const_buffer, 0, size, params->ParameterValues); - - return const_buffer; -} + _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); -/** - * Update the surface state for a WM constant buffer. - * The constant buffer will be (re)allocated here if needed. - */ -static void -brw_update_wm_constant_surface( GLcontext *ctx, - GLuint surf) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_surface_key key; - struct brw_fragment_program *fp = - (struct brw_fragment_program *) brw->fragment_program; - const struct gl_program_parameter_list *params = - fp->program.Base.Parameters; - - /* If we're in this state update atom, we need to update WM constants, so - * free the old buffer and create a new one for the new contents. - */ - drm_intel_bo_unreference(fp->const_buffer); - fp->const_buffer = brw_wm_update_constant_buffer(brw); - - /* If there's no constant buffer, then no surface BO is needed to point at - * it. - */ - if (fp->const_buffer == NULL) { - drm_intel_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = NULL; + /* BRW_NEW_FRAGMENT_PROGRAM */ + if (!fp->use_const_buffer) { + if (brw->wm.const_bo) { + drm_intel_bo_unreference(brw->wm.const_bo); + brw->wm.const_bo = NULL; + brw->state.dirty.brw |= BRW_NEW_WM_CONSTBUF; + } return; } - memset(&key, 0, sizeof(key)); + drm_intel_bo_unreference(brw->wm.const_bo); + brw->wm.const_bo = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", + size, 64); - key.format = MESA_FORMAT_RGBA_FLOAT32; - key.internal_format = GL_RGBA; - key.bo = fp->const_buffer; - key.depthmode = GL_NONE; - key.pitch = params->NumParameters; - key.width = params->NumParameters; - key.height = 1; - key.depth = 1; - key.cpp = 16; - - /* - printf("%s:\n", __FUNCTION__); - printf(" width %d height %d depth %d cpp %d pitch %d\n", - key.width, key.height, key.depth, key.cpp, key.pitch); - */ - - drm_intel_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, - BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, 1, - NULL); - if (brw->wm.surf_bo[surf] == NULL) { - brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key); - } - brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; + /* _NEW_PROGRAM_CONSTANTS */ + drm_intel_bo_subdata(brw->wm.const_bo, 0, size, params->ParameterValues); } +const struct brw_tracked_state brw_wm_constants = { + .dirty = { + .mesa = (_NEW_PROGRAM_CONSTANTS), + .brw = (BRW_NEW_FRAGMENT_PROGRAM), + .cache = 0 + }, + .prepare = prepare_wm_constants, +}; + /** * Updates surface / buffer for fragment shader constant buffer, if * one is required. @@ -428,20 +347,18 @@ brw_update_wm_constant_surface( GLcontext *ctx, * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for * inclusion in the binding table. */ -static void prepare_wm_constant_surface(struct brw_context *brw ) +static void upload_wm_constant_surface(struct brw_context *brw ) { - GLcontext *ctx = &brw->intel.ctx; + GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; struct brw_fragment_program *fp = (struct brw_fragment_program *) brw->fragment_program; - GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; - - drm_intel_bo_unreference(fp->const_buffer); - fp->const_buffer = brw_wm_update_constant_buffer(brw); + const struct gl_program_parameter_list *params = + fp->program.Base.Parameters; /* If there's no constant buffer, then no surface BO is needed to point at * it. */ - if (fp->const_buffer == 0) { + if (brw->wm.const_bo == 0) { if (brw->wm.surf_bo[surf] != NULL) { drm_intel_bo_unreference(brw->wm.surf_bo[surf]); brw->wm.surf_bo[surf] = NULL; @@ -450,16 +367,20 @@ static void prepare_wm_constant_surface(struct brw_context *brw ) return; } - brw_update_wm_constant_surface(ctx, surf); + brw_create_constant_surface(brw, brw->wm.const_bo, params->NumParameters, + &brw->wm.surf_bo[surf], + &brw->wm.surf_offset[surf]); + brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; } const struct brw_tracked_state brw_wm_constant_surface = { .dirty = { - .mesa = (_NEW_PROGRAM_CONSTANTS), - .brw = (BRW_NEW_FRAGMENT_PROGRAM), + .mesa = 0, + .brw = (BRW_NEW_WM_CONSTBUF | + BRW_NEW_BATCH), .cache = 0 }, - .prepare = prepare_wm_constant_surface, + .emit = upload_wm_constant_surface, }; @@ -488,6 +409,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, uint32_t draw_x; uint32_t draw_y; } key; + struct brw_surface_state surf; + void *map; memset(&key, 0, sizeof(key)); @@ -554,137 +477,123 @@ brw_update_renderbuffer_surface(struct brw_context *brw, (ctx->Color.BlendEnabled & (1 << unit))); } - drm_intel_bo_unreference(brw->wm.surf_bo[unit]); - brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache, - BRW_SS_SURFACE, - &key, sizeof(key), - ®ion_bo, 1, - NULL); - - if (brw->wm.surf_bo[unit] == NULL) { - struct brw_surface_state surf; - - memset(&surf, 0, sizeof(surf)); + memset(&surf, 0, sizeof(surf)); - surf.ss0.surface_format = key.surface_format; - surf.ss0.surface_type = key.surface_type; - if (key.tiling == I915_TILING_NONE) { - surf.ss1.base_addr = (key.draw_x + key.draw_y * key.pitch) * key.cpp; + surf.ss0.surface_format = key.surface_format; + surf.ss0.surface_type = key.surface_type; + if (key.tiling == I915_TILING_NONE) { + surf.ss1.base_addr = (key.draw_x + key.draw_y * key.pitch) * key.cpp; + } else { + uint32_t tile_base, tile_x, tile_y; + uint32_t pitch = key.pitch * key.cpp; + + if (key.tiling == I915_TILING_X) { + tile_x = key.draw_x % (512 / key.cpp); + tile_y = key.draw_y % 8; + tile_base = ((key.draw_y / 8) * (8 * pitch)); + tile_base += (key.draw_x - tile_x) / (512 / key.cpp) * 4096; } else { - uint32_t tile_base, tile_x, tile_y; - uint32_t pitch = key.pitch * key.cpp; - - if (key.tiling == I915_TILING_X) { - tile_x = key.draw_x % (512 / key.cpp); - tile_y = key.draw_y % 8; - tile_base = ((key.draw_y / 8) * (8 * pitch)); - tile_base += (key.draw_x - tile_x) / (512 / key.cpp) * 4096; - } else { - /* Y */ - tile_x = key.draw_x % (128 / key.cpp); - tile_y = key.draw_y % 32; - tile_base = ((key.draw_y / 32) * (32 * pitch)); - tile_base += (key.draw_x - tile_x) / (128 / key.cpp) * 4096; - } - assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0)); - assert(tile_x % 4 == 0); - assert(tile_y % 2 == 0); - /* Note that the low bits of these fields are missing, so - * there's the possibility of getting in trouble. - */ - surf.ss1.base_addr = tile_base; - surf.ss5.x_offset = tile_x / 4; - surf.ss5.y_offset = tile_y / 2; - } - if (region_bo != NULL) - surf.ss1.base_addr += region_bo->offset; /* reloc */ - - surf.ss2.width = key.width - 1; - surf.ss2.height = key.height - 1; - brw_set_surface_tiling(&surf, key.tiling); - surf.ss3.pitch = (key.pitch * key.cpp) - 1; - - if (intel->gen < 6) { - /* _NEW_COLOR */ - surf.ss0.color_blend = key.color_blend; - surf.ss0.writedisable_red = !key.color_mask[0]; - surf.ss0.writedisable_green = !key.color_mask[1]; - surf.ss0.writedisable_blue = !key.color_mask[2]; - surf.ss0.writedisable_alpha = !key.color_mask[3]; + /* Y */ + tile_x = key.draw_x % (128 / key.cpp); + tile_y = key.draw_y % 32; + tile_base = ((key.draw_y / 32) * (32 * pitch)); + tile_base += (key.draw_x - tile_x) / (128 / key.cpp) * 4096; } + assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0)); + assert(tile_x % 4 == 0); + assert(tile_y % 2 == 0); + /* Note that the low bits of these fields are missing, so + * there's the possibility of getting in trouble. + */ + surf.ss1.base_addr = tile_base; + surf.ss5.x_offset = tile_x / 4; + surf.ss5.y_offset = tile_y / 2; + } + if (region_bo != NULL) + surf.ss1.base_addr += region_bo->offset; /* reloc */ - /* Key size will never match key size for textures, so we're safe. */ - brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache, - BRW_SS_SURFACE, - &key, sizeof(key), - ®ion_bo, 1, - &surf, sizeof(surf)); - if (region_bo != NULL) { - /* We might sample from it, and we might render to it, so flag - * them both. We might be able to figure out from other state - * a more restrictive relocation to emit. - */ - drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit], - offsetof(struct brw_surface_state, ss1), - region_bo, - surf.ss1.base_addr - region_bo->offset, - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER); - } + surf.ss2.width = key.width - 1; + surf.ss2.height = key.height - 1; + brw_set_surface_tiling(&surf, key.tiling); + surf.ss3.pitch = (key.pitch * key.cpp) - 1; + + if (intel->gen < 6) { + /* _NEW_COLOR */ + surf.ss0.color_blend = key.color_blend; + surf.ss0.writedisable_red = !key.color_mask[0]; + surf.ss0.writedisable_green = !key.color_mask[1]; + surf.ss0.writedisable_blue = !key.color_mask[2]; + surf.ss0.writedisable_alpha = !key.color_mask[3]; } -} + map = brw_state_batch(brw, sizeof(surf), 32, + &brw->wm.surf_bo[unit], + &brw->wm.surf_offset[unit]); + memcpy(map, &surf, sizeof(surf)); + + if (region_bo != NULL) { + drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit], + brw->wm.surf_offset[unit] + + offsetof(struct brw_surface_state, ss1), + region_bo, + surf.ss1.base_addr - region_bo->offset, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER); + } +} -/** - * Constructs the binding table for the WM surface state, which maps unit - * numbers to surface state objects. - */ -static drm_intel_bo * -brw_wm_get_binding_table(struct brw_context *brw) +static void +prepare_wm_surfaces(struct brw_context *brw) { - drm_intel_bo *bind_bo; - - assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); - - bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->wm.surf_bo, brw->wm.nr_surfaces, - NULL); - - if (bind_bo == NULL) { - GLuint data_size = brw->wm.nr_surfaces * sizeof(GLuint); - uint32_t data[BRW_WM_MAX_SURF]; - int i; - - for (i = 0; i < brw->wm.nr_surfaces; i++) - if (brw->wm.surf_bo[i]) - data[i] = brw->wm.surf_bo[i]->offset; - else - data[i] = 0; - - bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->wm.surf_bo, brw->wm.nr_surfaces, - data, data_size); - - /* Emit binding table relocations to surface state */ - for (i = 0; i < BRW_WM_MAX_SURF; i++) { - if (brw->wm.surf_bo[i] != NULL) { - drm_intel_bo_emit_reloc(bind_bo, i * sizeof(GLuint), - brw->wm.surf_bo[i], 0, - I915_GEM_DOMAIN_INSTRUCTION, 0); - } + GLcontext *ctx = &brw->intel.ctx; + int i; + int nr_surfaces = 0; + + if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) { + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + struct intel_region *region = irb ? irb->region : NULL; + + brw_add_validated_bo(brw, region->buffer); + nr_surfaces = SURF_INDEX_DRAW(i) + 1; + } + } + + if (brw->wm.const_bo) { + brw_add_validated_bo(brw, brw->wm.const_bo); + nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1; + } + + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; + struct gl_texture_object *tObj = texUnit->_Current; + struct intel_texture_object *intelObj = intel_texture_object(tObj); + + if (texUnit->_ReallyEnabled) { + brw_add_validated_bo(brw, intelObj->mt->region->buffer); + nr_surfaces = SURF_INDEX_TEXTURE(i) + 1; } } - return bind_bo; + /* Have to update this in our prepare, since the unit's prepare + * relies on it. + */ + if (brw->wm.nr_surfaces != nr_surfaces) { + brw->wm.nr_surfaces = nr_surfaces; + brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES; + } } -static void prepare_wm_surfaces(struct brw_context *brw ) +/** + * Constructs the set of surface state objects pointed to by the + * binding table. + */ +static void +upload_wm_surfaces(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; GLuint i; - int old_nr_surfaces; /* _NEW_BUFFERS | _NEW_COLOR */ /* Update surfaces for drawing buffers */ @@ -698,32 +607,21 @@ static void prepare_wm_surfaces(struct brw_context *brw ) brw_update_renderbuffer_surface(brw, NULL, 0); } - old_nr_surfaces = brw->wm.nr_surfaces; - brw->wm.nr_surfaces = BRW_MAX_DRAW_BUFFERS; - - if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL) - brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1; - /* Update surfaces for textures */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; const GLuint surf = SURF_INDEX_TEXTURE(i); - /* _NEW_TEXTURE, BRW_NEW_TEXDATA */ + /* _NEW_TEXTURE */ if (texUnit->_ReallyEnabled) { brw_update_texture_surface(ctx, i); - brw->wm.nr_surfaces = surf + 1; } else { drm_intel_bo_unreference(brw->wm.surf_bo[surf]); brw->wm.surf_bo[surf] = NULL; } } - drm_intel_bo_unreference(brw->wm.bind_bo); - brw->wm.bind_bo = brw_wm_get_binding_table(brw); - - if (brw->wm.nr_surfaces != old_nr_surfaces) - brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES; + brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; } const struct brw_tracked_state brw_wm_surfaces = { @@ -731,12 +629,48 @@ const struct brw_tracked_state brw_wm_surfaces = { .mesa = (_NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS), - .brw = (BRW_NEW_CONTEXT | - BRW_NEW_WM_SURFACES), + .brw = (BRW_NEW_BATCH), .cache = 0 }, .prepare = prepare_wm_surfaces, + .emit = upload_wm_surfaces, }; +/** + * Constructs the binding table for the WM surface state, which maps unit + * numbers to surface state objects. + */ +static void +brw_wm_upload_binding_table(struct brw_context *brw) +{ + uint32_t *bind; + int i; + + /* Might want to calculate nr_surfaces first, to avoid taking up so much + * space for the binding table. + */ + bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_WM_MAX_SURF, + 32, &brw->wm.bind_bo, &brw->wm.bind_bo_offset); + + for (i = 0; i < BRW_WM_MAX_SURF; i++) { + /* BRW_NEW_WM_SURFACES */ + bind[i] = brw->wm.surf_offset[i]; + if (brw->wm.surf_bo[i]) { + bind[i] = brw->wm.surf_offset[i]; + } else { + bind[i] = 0; + } + } + brw->state.dirty.brw |= BRW_NEW_BINDING_TABLE; +} +const struct brw_tracked_state brw_wm_binding_table = { + .dirty = { + .mesa = 0, + .brw = (BRW_NEW_BATCH | + BRW_NEW_WM_SURFACES), + .cache = 0 + }, + .emit = brw_wm_upload_binding_table, +}; diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c index 2e21e5f7335..34a9dc234c2 100644 --- a/src/mesa/drivers/dri/i965/gen6_scissor_state.c +++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c @@ -63,8 +63,7 @@ prepare_scissor_state(struct brw_context *brw) drm_intel_bo_unreference(brw->sf.state_bo); brw->sf.state_bo = brw_cache_data(&brw->cache, BRW_SF_UNIT, - &scissor, sizeof(scissor), - NULL, 0); + &scissor, sizeof(scissor)); } const struct brw_tracked_state gen6_scissor_state = { diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 8d96b44f1dc..51940efb443 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -87,7 +87,7 @@ upload_sf_state(struct brw_context *brw) if (ctx->Polygon.CullFlag) { switch (ctx->Polygon.CullFaceMode) { case GL_FRONT: - dw3 |= GEN6_SF_CULL_BOTH; + dw3 |= GEN6_SF_CULL_FRONT; break; case GL_BACK: dw3 |= GEN6_SF_CULL_BACK; diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c index 0c2aa4206c6..301c68e7f9e 100644 --- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c @@ -29,7 +29,6 @@ #include "brw_state.h" #include "brw_defines.h" #include "intel_batchbuffer.h" -#include "main/macros.h" /* The clip VP defines the guardband region where expensive clipping is skipped * and fragments are allowed to be generated and clipped out cheaply by the SF. @@ -51,8 +50,7 @@ prepare_clip_vp(struct brw_context *brw) drm_intel_bo_unreference(brw->clip.vp_bo); brw->clip.vp_bo = brw_cache_data(&brw->cache, BRW_CLIP_VP, - &vp, sizeof(vp), - NULL, 0); + &vp, sizeof(vp)); } const struct brw_tracked_state gen6_clip_vp = { @@ -95,8 +93,7 @@ prepare_sf_vp(struct brw_context *brw) drm_intel_bo_unreference(brw->sf.vp_bo); brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, - &sfv, sizeof(sfv), - NULL, 0); + &sfv, sizeof(sfv)); } const struct brw_tracked_state gen6_sf_vp = { @@ -108,36 +105,6 @@ const struct brw_tracked_state gen6_sf_vp = { .prepare = prepare_sf_vp, }; -static void -prepare_cc_vp(struct brw_context *brw) -{ - GLcontext *ctx = &brw->intel.ctx; - struct brw_cc_viewport ccv; - - /* _NEW_TRANSOFORM */ - if (ctx->Transform.DepthClamp) { - /* _NEW_VIEWPORT */ - ccv.min_depth = MIN2(ctx->Viewport.Near, ctx->Viewport.Far); - ccv.max_depth = MAX2(ctx->Viewport.Near, ctx->Viewport.Far); - } else { - ccv.min_depth = 0.0; - ccv.max_depth = 1.0; - } - - drm_intel_bo_unreference(brw->cc.vp_bo); - brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv), - NULL, 0); -} - -const struct brw_tracked_state gen6_cc_vp = { - .dirty = { - .mesa = _NEW_VIEWPORT | _NEW_TRANSFORM, - .brw = 0, - .cache = 0, - }, - .prepare = prepare_cc_vp, -}; - static void prepare_viewport_state_pointers(struct brw_context *brw) { brw_add_validated_bo(brw, brw->sf.state_bo); diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 325f6b43d30..863c85449d9 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -98,7 +98,8 @@ upload_wm_state(struct brw_context *brw) /* CACHE_NEW_SAMPLER */ dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT; - dw4 |= (1 << GEN6_WM_DISPATCH_START_GRF_SHIFT_0); + dw4 |= (brw->wm.prog_data->first_curbe_grf << + GEN6_WM_DISPATCH_START_GRF_SHIFT_0); dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT; dw5 |= GEN6_WM_DISPATCH_ENABLE; diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 1116bccd8e7..698445c5268 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -49,6 +49,7 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch) batch->ptr = batch->map; batch->reserved_space = BATCH_RESERVED; batch->dirty_state = ~0; + batch->state_batch_offset = batch->size; } struct intel_batchbuffer * @@ -84,6 +85,12 @@ do_flush_locked(struct intel_batchbuffer *batch, GLuint used) int x_off = 0, y_off = 0; drm_intel_bo_subdata(batch->buf, 0, used, batch->buffer); + if (batch->state_batch_offset != batch->size) { + drm_intel_bo_subdata(batch->buf, + batch->state_batch_offset, + batch->size - batch->state_batch_offset, + batch->buffer + batch->state_batch_offset); + } batch->ptr = NULL; diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index f4ac1825cd1..ae53f455117 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -23,6 +23,7 @@ struct intel_batchbuffer GLubyte *ptr; GLuint size; + uint32_t state_batch_offset; #ifdef DEBUG /** Tracking of BEGIN_BATCH()/OUT_BATCH()/ADVANCE_BATCH() debugging */ @@ -92,7 +93,8 @@ static INLINE uint32_t float_as_int(float f) static INLINE GLint intel_batchbuffer_space(struct intel_batchbuffer *batch) { - return (batch->size - batch->reserved_space) - (batch->ptr - batch->map); + return (batch->state_batch_offset - batch->reserved_space) - + (batch->ptr - batch->map); } diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index dec47974f2a..5f2035d79c9 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -827,8 +827,6 @@ intelDestroyContext(__DRIcontext * driContextPriv) assert(intel); /* should never be null */ if (intel) { - GLboolean release_texture_heaps; - INTEL_FIREVERTICES(intel); _mesa_meta_free(&intel->ctx); @@ -837,7 +835,6 @@ intelDestroyContext(__DRIcontext * driContextPriv) intel->vtbl.destroy(intel); - release_texture_heaps = (intel->ctx.Shared->RefCount == 1); _swsetup_DestroyContext(&intel->ctx); _tnl_DestroyContext(&intel->ctx); _vbo_DestroyContext(&intel->ctx); @@ -855,18 +852,6 @@ intelDestroyContext(__DRIcontext * driContextPriv) drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); intel->first_post_swapbuffers_batch = NULL; - if (release_texture_heaps) { - /* Nothing is currently done here to free texture heaps; - * but we're not using the texture heap utilities, so I - * rather think we shouldn't. I've taken a look, and can't - * find any private texture data hanging around anywhere, but - * I'm not yet certain there isn't any at all... - */ - /* if (INTEL_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "do something to free texture heaps\n"); - */ - } - driDestroyOptionCache(&intel->optionCache); /* free the Mesa context */ diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 14ff4a96950..c7ac2de01e6 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -261,6 +261,8 @@ extern char *__progname; #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) #define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1)) +#define ROUND_DOWN_TO(value, alignment) (ALIGN(value - alignment - 1, \ + alignment)) #define IS_POWER_OF_TWO(val) (((val) & (val - 1)) == 0) static INLINE uint32_t diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 8f61f1f5b24..4a83886fc16 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -42,9 +42,6 @@ #include "intel_fbo.h" #include "intel_mipmap_tree.h" #include "intel_regions.h" -#ifndef I915 -#include "brw_state.h" -#endif #define FILE_DEBUG_FLAG DEBUG_FBO @@ -296,12 +293,6 @@ intel_renderbuffer_set_region(struct intel_context *intel, old = rb->region; rb->region = NULL; intel_region_reference(&rb->region, region); -#ifndef I915 - if (old) { - brw_state_cache_bo_delete(&brw_context(&intel->ctx)->surface_cache, - old->buffer); - } -#endif intel_region_release(&old); } diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index 71ef7a8e39b..39ac0205fa1 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -29,9 +29,6 @@ #include "intel_mipmap_tree.h" #include "intel_regions.h" #include "intel_tex_layout.h" -#ifndef I915 -#include "brw_state.h" -#endif #include "main/enums.h" #define FILE_DEBUG_FLAG DEBUG_MIPTREE @@ -203,19 +200,6 @@ intel_miptree_release(struct intel_context *intel, DBG("%s deleting %p\n", __FUNCTION__, *mt); -#ifndef I915 - /* Free up cached binding tables holding a reference on our buffer, to - * avoid excessive memory consumption. - * - * This isn't as aggressive as we could be, as we'd like to do - * it from any time we free the last ref on a region. But intel_region.c - * is context-agnostic. Perhaps our constant state cache should be, as - * well. - */ - brw_state_cache_bo_delete(&brw_context(&intel->ctx)->surface_cache, - (*mt)->region->buffer); -#endif - intel_region_release(&((*mt)->region)); for (i = 0; i < MAX_TEXTURE_LEVELS; i++) { diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index c30552c5a79..fb840c1020d 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -257,6 +257,8 @@ intelSpanRenderStart(GLcontext * ctx) for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled) { struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current; + + intel_finalize_mipmap_tree(intel, i); intel_tex_map_images(intel, intel_texture_object(texObj)); } } diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index 34d22b45591..ff3801dc676 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -9,6 +9,7 @@ C_SOURCES = \ radeon_code.c \ radeon_compiler.c \ radeon_emulate_branches.c \ + radeon_emulate_loops.c \ radeon_program.c \ radeon_program_print.c \ radeon_opcodes.c \ diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript index 663926e3216..50d9cdb7f2d 100755 --- a/src/mesa/drivers/dri/r300/compiler/SConscript +++ b/src/mesa/drivers/dri/r300/compiler/SConscript @@ -23,6 +23,7 @@ r300compiler = env.ConvenienceLibrary( 'radeon_pair_regalloc.c', 'radeon_optimize.c', 'radeon_emulate_branches.c', + 'radeon_emulate_loops.c', 'radeon_dataflow.c', 'radeon_dataflow_deadcode.c', 'radeon_dataflow_swizzles.c', diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 7f3b88ed759..bbdfa0d56f9 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -26,6 +26,7 @@ #include "radeon_dataflow.h" #include "radeon_emulate_branches.h" +#include "radeon_emulate_loops.h" #include "radeon_program_alu.h" #include "radeon_program_tex.h" #include "r300_fragprog.h" @@ -103,6 +104,14 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) /* XXX Ideally this should be done only for r3xx, but since * we don't have branching support for r5xx, we use the emulation * on all chipsets. */ + + if (c->Base.is_r500) { + rc_emulate_loops(&c->Base, R500_PFS_MAX_INST); + } else { + rc_emulate_loops(&c->Base, R300_PFS_MAX_ALU_INST); + } + debug_program_log(c, "after emulate loops"); + rc_emulate_branches(&c->Base); debug_program_log(c, "after emulate branches"); diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 507b2e532fe..e984797e2d3 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -30,6 +30,7 @@ #include "radeon_program_alu.h" #include "radeon_swizzle.h" #include "radeon_emulate_branches.h" +#include "radeon_emulate_loops.h" /* * Take an already-setup and valid source then swizzle it appropriately to @@ -348,7 +349,8 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi if (!valid_dst(compiler->code, &vpi->DstReg)) continue; - if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) { + if (compiler->code->length >= R500_VS_MAX_ALU_DWORDS || + (compiler->code->length >= R300_VS_MAX_ALU_DWORDS && !compiler->Base.is_r500)) { rc_error(&compiler->Base, "Vertex program has too many instructions\n"); return; } @@ -404,7 +406,7 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c { struct rc_instruction *inst; unsigned int num_orig_temps = 0; - char hwtemps[VSF_MAX_FRAGMENT_TEMPS]; + char hwtemps[R300_VS_MAX_TEMPS]; struct temporary_allocation * ta; unsigned int i, j; @@ -463,11 +465,11 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c unsigned int orig = inst->U.I.DstReg.Index; if (!ta[orig].Allocated) { - for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) { + for(j = 0; j < R300_VS_MAX_TEMPS; ++j) { if (!hwtemps[j]) break; } - if (j >= VSF_MAX_FRAGMENT_TEMPS) { + if (j >= R300_VS_MAX_TEMPS) { fprintf(stderr, "Out of hw temporaries\n"); } else { ta[orig].Allocated = 1; @@ -600,6 +602,13 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) /* XXX Ideally this should be done only for r3xx, but since * we don't have branching support for r5xx, we use the emulation * on all chipsets. */ + if (compiler->Base.is_r500){ + rc_emulate_loops(&compiler->Base, R500_VS_MAX_ALU); + } else { + rc_emulate_loops(&compiler->Base, R300_VS_MAX_ALU); + } + debug_program_log(compiler, "after emulate loops"); + rc_emulate_branches(&compiler->Base); debug_program_log(compiler, "after emulate branches"); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index 1979e7e4e49..d03689763bc 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -235,8 +235,11 @@ struct rX00_fragment_program_code { }; -#define VSF_MAX_FRAGMENT_LENGTH (255*4) -#define VSF_MAX_FRAGMENT_TEMPS (14) +#define R300_VS_MAX_ALU 256 +#define R300_VS_MAX_ALU_DWORDS (R300_VS_MAX_ALU * 4) +#define R500_VS_MAX_ALU 1024 +#define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4) +#define R300_VS_MAX_TEMPS 32 #define VSF_MAX_INPUTS 32 #define VSF_MAX_OUTPUTS 32 @@ -244,8 +247,8 @@ struct rX00_fragment_program_code { struct r300_vertex_program_code { int length; union { - uint32_t d[VSF_MAX_FRAGMENT_LENGTH]; - float f[VSF_MAX_FRAGMENT_LENGTH]; + uint32_t d[R500_VS_MAX_ALU_DWORDS]; + float f[R500_VS_MAX_ALU_DWORDS]; } body; int pos_end; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c new file mode 100644 index 00000000000..4c5d29f4217 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c @@ -0,0 +1,474 @@ +/* + * Copyright 2010 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + */ + +#include "radeon_emulate_loops.h" + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + +struct emulate_loop_state { + struct radeon_compiler * C; + struct loop_info * Loops; + unsigned int LoopCount; + unsigned int LoopReserved; +}; + +struct loop_info { + struct rc_instruction * BeginLoop; + struct rc_instruction * Cond; + struct rc_instruction * If; + struct rc_instruction * Brk; + struct rc_instruction * EndIf; + struct rc_instruction * EndLoop; +}; + +struct const_value { + + struct radeon_compiler * C; + struct rc_src_register * Src; + float Value; + int HasValue; +}; + +struct count_inst { + struct radeon_compiler * C; + int Index; + rc_swizzle Swz; + float Amount; + int Unknown; +}; + +static float get_constant_value(struct radeon_compiler * c, + struct rc_src_register * src, + int chan) +{ + float base = 1.0f; + int swz = GET_SWZ(src->Swizzle, chan); + if(swz >= 4 || src->Index >= c->Program.Constants.Count ){ + rc_error(c, "get_constant_value: Can't find a value.\n"); + return 0.0f; + } + if(GET_BIT(src->Negate, chan)){ + base = -1.0f; + } + return base * + c->Program.Constants.Constants[src->Index].u.Immediate[swz]; +} + +static int src_reg_is_immediate(struct rc_src_register * src, + struct radeon_compiler * c) +{ + return src->File == RC_FILE_CONSTANT && + c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE; +} + +static unsigned int loop_count_instructions(struct loop_info * loop) +{ + unsigned int count = 0; + struct rc_instruction * inst = loop->BeginLoop->Next; + while(inst != loop->EndLoop){ + count++; + inst = inst->Next; + } + return count; +} + +static unsigned int loop_calc_iterations(struct loop_info * loop, + unsigned int loop_count, unsigned int max_instructions) +{ + unsigned int icount = loop_count_instructions(loop); + return max_instructions / (loop_count * icount); +} + +static void loop_unroll(struct emulate_loop_state * s, + struct loop_info *loop, unsigned int iterations) +{ + unsigned int i; + struct rc_instruction * ptr; + struct rc_instruction * first = loop->BeginLoop->Next; + struct rc_instruction * last = loop->EndLoop->Prev; + struct rc_instruction * append_to = last; + rc_remove_instruction(loop->BeginLoop); + rc_remove_instruction(loop->EndLoop); + for( i = 1; i < iterations; i++){ + for(ptr = first; ptr != last->Next; ptr = ptr->Next){ + struct rc_instruction *new = rc_alloc_instruction(s->C); + memcpy(new, ptr, sizeof(struct rc_instruction)); + rc_insert_instruction(append_to, new); + append_to = new; + } + } +} + + +static void update_const_value(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct const_value * value = data; + if(value->Src->File != file || + value->Src->Index != index || + !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){ + return; + } + switch(inst->U.I.Opcode){ + case RC_OPCODE_MOV: + if(!src_reg_is_immediate(&inst->U.I.SrcReg[0], value->C)){ + return; + } + value->HasValue = 1; + value->Value = + get_constant_value(value->C, &inst->U.I.SrcReg[0], 0); + break; + } +} + +static void get_incr_amount(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct count_inst * count_inst = data; + int amnt_src_index; + const struct rc_opcode_info * opcode; + float amount; + + if(file != RC_FILE_TEMPORARY || + count_inst->Index != index || + (1 << GET_SWZ(count_inst->Swz,0) != mask)){ + return; + } + /* Find the index of the counter register. */ + opcode = rc_get_opcode_info(inst->U.I.Opcode); + if(opcode->NumSrcRegs != 2){ + count_inst->Unknown = 1; + return; + } + if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[0].Index == count_inst->Index && + inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){ + amnt_src_index = 1; + } else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[1].Index == count_inst->Index && + inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){ + amnt_src_index = 0; + } + else{ + count_inst->Unknown = 1; + return; + } + if(src_reg_is_immediate(&inst->U.I.SrcReg[amnt_src_index], + count_inst->C)){ + amount = get_constant_value(count_inst->C, + &inst->U.I.SrcReg[amnt_src_index], 0); + } + else{ + count_inst->Unknown = 1 ; + return; + } + switch(inst->U.I.Opcode){ + case RC_OPCODE_ADD: + count_inst->Amount += amount; + break; + case RC_OPCODE_SUB: + if(amnt_src_index == 0){ + count_inst->Unknown = 0; + return; + } + count_inst->Amount -= amount; + break; + default: + count_inst->Unknown = 1; + return; + } + +} + +static int transform_const_loop(struct emulate_loop_state * s, + struct loop_info * loop, + struct rc_instruction * cond) +{ + int end_loops = 1; + int iterations; + struct count_inst count_inst; + float limit_value; + struct rc_src_register * counter; + struct rc_src_register * limit; + struct const_value counter_value; + struct rc_instruction * inst; + + /* Find the counter and the upper limit */ + + if(src_reg_is_immediate(&cond->U.I.SrcReg[0], s->C)){ + limit = &cond->U.I.SrcReg[0]; + counter = &cond->U.I.SrcReg[1]; + } + else if(src_reg_is_immediate(&cond->U.I.SrcReg[1], s->C)){ + limit = &cond->U.I.SrcReg[1]; + counter = &cond->U.I.SrcReg[0]; + } + else{ + DBG("No constant limit.\n"); + return 0; + } + + /* Find the initial value of the counter */ + counter_value.Src = counter; + counter_value.Value = 0.0f; + counter_value.HasValue = 0; + counter_value.C = s->C; + for(inst = s->C->Program.Instructions.Next; inst != loop->BeginLoop; + inst = inst->Next){ + rc_for_all_writes_mask(inst, update_const_value, &counter_value); + } + if(!counter_value.HasValue){ + DBG("Initial counter value cannot be determined.\n"); + return 0; + } + DBG("Initial counter value is %f\n", counter_value.Value); + /* Determine how the counter is modified each loop */ + count_inst.C = s->C; + count_inst.Index = counter->Index; + count_inst.Swz = counter->Swizzle; + count_inst.Amount = 0.0f; + count_inst.Unknown = 0; + for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){ + switch(inst->U.I.Opcode){ + /* XXX In the future we might want to try to unroll nested + * loops here.*/ + case RC_OPCODE_BGNLOOP: + end_loops++; + break; + case RC_OPCODE_ENDLOOP: + loop->EndLoop = inst; + end_loops--; + break; + /* XXX Check if the counter is modified within an if statement. + */ + case RC_OPCODE_IF: + break; + default: + rc_for_all_writes_mask(inst, get_incr_amount, &count_inst); + if(count_inst.Unknown){ + return 0; + } + break; + } + } + /* Infinite loop */ + if(count_inst.Amount == 0.0f){ + return 0; + } + DBG("Counter is increased by %f each iteration.\n", count_inst.Amount); + /* Calculate the number of iterations of this loop. Keeping this + * simple, since we only support increment and decrement loops. + */ + limit_value = get_constant_value(s->C, limit, 0); + iterations = (int) ((limit_value - counter_value.Value) / + count_inst.Amount); + + DBG("Loop will have %d iterations.\n", iterations); + + /* Prepare loop for unrolling */ + rc_remove_instruction(loop->Cond); + rc_remove_instruction(loop->If); + rc_remove_instruction(loop->Brk); + rc_remove_instruction(loop->EndIf); + + loop_unroll(s, loop, iterations); + loop->EndLoop = NULL; + return 1; +} + +/** + * This function prepares a loop to be unrolled by converting it into an if + * statement. Here is an outline of the conversion process: + * BGNLOOP; -> BGNLOOP; + * <Additional conditional code> -> <Additional conditional code> + * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2]; + * IF temp[0]; -> IF temp[0]; + * BRK; -> + * ENDIF; -> <Loop Body> + * <Loop Body> -> ENDIF; + * ENDLOOP; -> ENDLOOP + * + * @param inst A pointer to a BGNLOOP instruction. + * @return If the loop can be unrolled, a pointer to the first instruction of + * the unrolled loop. + * Otherwise, A pointer to the ENDLOOP instruction. + * Null if there is an error. + */ +static struct rc_instruction * transform_loop(struct emulate_loop_state * s, + struct rc_instruction * inst) +{ + struct loop_info *loop; + struct rc_instruction * ptr; + + memory_pool_array_reserve(&s->C->Pool, struct loop_info, + s->Loops, s->LoopCount, s->LoopReserved, 1); + + loop = &s->Loops[s->LoopCount++]; + memset(loop, 0, sizeof(struct loop_info)); + if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){ + rc_error(s->C, "expected BGNLOOP\n", __FUNCTION__); + return NULL; + } + loop->BeginLoop = inst; + + for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next){ + switch(ptr->U.I.Opcode){ + case RC_OPCODE_BGNLOOP: + /* Nested loop */ + ptr = transform_loop(s, ptr); + if(!ptr){ + return NULL; + } + break; + case RC_OPCODE_BRK: + loop->Brk = ptr; + if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF){ + rc_error(s->C, + "%s: expected ENDIF\n",__FUNCTION__); + return NULL; + } + loop->EndIf = ptr->Next; + if(ptr->Prev->U.I.Opcode != RC_OPCODE_IF){ + rc_error(s->C, + "%s: expected IF\n", __FUNCTION__); + return NULL; + } + loop->If = ptr->Prev; + switch(loop->If->Prev->U.I.Opcode){ + case RC_OPCODE_SLT: + case RC_OPCODE_SGE: + case RC_OPCODE_SGT: + case RC_OPCODE_SLE: + case RC_OPCODE_SEQ: + case RC_OPCODE_SNE: + break; + default: + rc_error(s->C, "%s expected conditional\n", + __FUNCTION__); + return NULL; + } + loop->Cond = loop->If->Prev; + ptr = loop->EndIf; + break; + case RC_OPCODE_ENDLOOP: + loop->EndLoop = ptr; + break; + } + } + /* Reverse the conditional instruction */ + switch(loop->Cond->U.I.Opcode){ + case RC_OPCODE_SGE: + loop->Cond->U.I.Opcode = RC_OPCODE_SLT; + break; + case RC_OPCODE_SLT: + loop->Cond->U.I.Opcode = RC_OPCODE_SGE; + break; + case RC_OPCODE_SLE: + loop->Cond->U.I.Opcode = RC_OPCODE_SGT; + break; + case RC_OPCODE_SGT: + loop->Cond->U.I.Opcode = RC_OPCODE_SLE; + break; + case RC_OPCODE_SEQ: + loop->Cond->U.I.Opcode = RC_OPCODE_SNE; + break; + case RC_OPCODE_SNE: + loop->Cond->U.I.Opcode = RC_OPCODE_SEQ; + break; + default: + rc_error(s->C, "loop->Cond is not a conditional.\n"); + return NULL; + } + + /* Check if the number of loops is known at compile time. */ + if(transform_const_loop(s, loop, ptr)){ + return loop->BeginLoop->Next; + } + + /* Prepare the loop to be unrolled */ + rc_remove_instruction(loop->Brk); + rc_remove_instruction(loop->EndIf); + rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf); + return loop->EndLoop; +} + +static void rc_transform_loops(struct emulate_loop_state * s) +{ + struct rc_instruction * ptr = s->C->Program.Instructions.Next; + while(ptr != &s->C->Program.Instructions) { + if(ptr->Type == RC_INSTRUCTION_NORMAL && + ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ + ptr = transform_loop(s, ptr); + if(!ptr){ + return; + } + } + ptr = ptr->Next; + } +} + +static void rc_unroll_loops(struct emulate_loop_state *s, + unsigned int max_instructions) +{ + int i; + /* Iterate backwards of the list of loops so that loops that nested + * loops are unrolled first. + */ + for( i = s->LoopCount - 1; i >= 0; i-- ){ + if(!s->Loops[i].EndLoop){ + continue; + } + unsigned int iterations = loop_calc_iterations(&s->Loops[i], + s->LoopCount, max_instructions); + loop_unroll(s, &s->Loops[i], iterations); + } +} + +void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions) +{ + struct emulate_loop_state s; + + memset(&s, 0, sizeof(struct emulate_loop_state)); + s.C = c; + + /* We may need to move these two operations to r3xx_(vert|frag)prog.c + * and run the optimization passes between them in order to increase + * the number of unrolls we can do for each loop. + */ + rc_transform_loops(&s); + + rc_unroll_loops(&s, max_instructions); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h new file mode 100644 index 00000000000..ddcf1c0fabe --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h @@ -0,0 +1,12 @@ + + +#ifndef RADEON_EMULATE_LOOPS_H +#define RADEON_EMULATE_LOOPS_H + +#define MAX_ITERATIONS 8 + +struct radeon_compiler; + +void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions); + +#endif /* RADEON_EMULATE_LOOPS_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index d593b3e81ae..1dc16855dc1 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -368,6 +368,24 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .NumSrcRegs = 0 }, { + .Opcode = RC_OPCODE_BGNLOOP, + .Name = "BGNLOOP", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_BRK, + .Name = "BRK", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_ENDLOOP, + .Name = "ENDLOOP", + .IsFlowControl = 1, + .NumSrcRegs = 0, + }, + { .Opcode = RC_OPCODE_REPL_ALPHA, .Name = "REPL_ALPHA", .HasDstReg = 1 diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h index 87a2e23084c..91c82ac0890 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -180,6 +180,12 @@ typedef enum { /** branch instruction: has no effect */ RC_OPCODE_ENDIF, + + RC_OPCODE_BGNLOOP, + + RC_OPCODE_BRK, + + RC_OPCODE_ENDLOOP, /** special instruction, used in R300-R500 fragment program pair instructions * indicates that the result of the alpha operation shall be replicated diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 6992ca59dbf..e4b302bbad9 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -376,13 +376,12 @@ static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen) ctx->Const.MaxDrawBuffers = 1; ctx->Const.MaxColorAttachments = 1; - /* currently bogus data */ if (r300->options.hw_tcl_enabled) { - ctx->Const.VertexProgram.MaxNativeInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeAluInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ + ctx->Const.VertexProgram.MaxNativeInstructions = 255; + ctx->Const.VertexProgram.MaxNativeAluInstructions = 255; + ctx->Const.VertexProgram.MaxNativeAttribs = 16; ctx->Const.VertexProgram.MaxNativeTemps = 32; - ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ + ctx->Const.VertexProgram.MaxNativeParameters = 256; ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; } diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 61133e686f1..88d6b06df56 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -6159,7 +6159,7 @@ GLboolean callPreSub(r700_AssemblerBase* pAsm, } if(uNumValidSrc > 0) { - prelude_cf_ptr = pAsm->cf_current_alu_clause_ptr; + prelude_cf_ptr = (R700ControlFlowGenericClause*) pAsm->cf_current_alu_clause_ptr; pAsm->alu_x_opcode = SQ_CF_INST_ALU; } @@ -6279,7 +6279,7 @@ GLboolean callPreSub(r700_AssemblerBase* pAsm, next_ins(pAsm); - pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = pAsm->cf_current_alu_clause_ptr; + pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = (R700ControlFlowGenericClause*) pAsm->cf_current_alu_clause_ptr; pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr; pAsm->alu_x_opcode = SQ_CF_INST_ALU; } diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index 5a90f729e68..aab1a7947ab 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -563,11 +563,15 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) /* see if we need any point_sprite replacements, also increase num_interp * as there's no vp output for them */ - for (i = FRAG_ATTRIB_TEX0; i<= FRAG_ATTRIB_TEX7; i++) + if (ctx->Point.PointSprite) { - if(ctx->Point.CoordReplace[i - FRAG_ATTRIB_TEX0] == GL_TRUE) { - ui++; - point_sprite = GL_TRUE; + for (i = FRAG_ATTRIB_TEX0; i<= FRAG_ATTRIB_TEX7; i++) + { + if (ctx->Point.CoordReplace[i - FRAG_ATTRIB_TEX0] == GL_TRUE) + { + ui++; + point_sprite = GL_TRUE; + } } } @@ -670,8 +674,9 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) for(i=0; i<8; i++) { + GLboolean coord_replace = ctx->Point.PointSprite && ctx->Point.CoordReplace[i]; unBit = 1 << (VERT_RESULT_TEX0 + i); - if((OutputsWritten & unBit) || (ctx->Point.CoordReplace[i] == GL_TRUE)) + if ((OutputsWritten & unBit) || coord_replace) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); @@ -679,7 +684,7 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) SEMANTIC_shift, SEMANTIC_mask); CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); /* ARB_point_sprite */ - if(ctx->Point.CoordReplace[i] == GL_TRUE) + if (coord_replace) { SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit); } diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index bcac125baf4..d2b190e42e0 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -593,12 +593,7 @@ static int image_matches_texture_obj(struct gl_texture_object *texObj, if (!baseImage) return 0; - /* Check image level against object BaseLevel, but not MaxLevel. MaxLevel is not - * the highest level that can be assigned to the miptree. - */ - const unsigned maxLevel = texObj->BaseLevel + baseImage->MaxLog2; - if (level < texObj->BaseLevel || level > maxLevel - || level > RADEON_MIPTREE_MAX_TEXTURE_LEVELS) + if (level < texObj->BaseLevel || level > texObj->MaxLevel) return 0; const unsigned levelDiff = level - texObj->BaseLevel; @@ -620,7 +615,9 @@ static void teximage_assign_miptree(radeonContextPtr rmesa, radeonTexObj *t = radeon_tex_obj(texObj); radeon_texture_image* image = get_radeon_texture_image(texImage); - /* check image for dimension and level compatibility with texture */ + /* Since miptree holds only images for levels <BaseLevel..MaxLevel> + * don't allocate the miptree if the teximage won't fit. + */ if (!image_matches_texture_obj(texObj, texImage, level)) return; diff --git a/src/mesa/drivers/osmesa/Makefile b/src/mesa/drivers/osmesa/Makefile index ea49a896590..c6b4a040851 100644 --- a/src/mesa/drivers/osmesa/Makefile +++ b/src/mesa/drivers/osmesa/Makefile @@ -20,17 +20,11 @@ INCLUDE_DIRS = \ -I$(TOP)/src/mesa \ -I$(TOP)/src/mesa/main -# Standalone osmesa needs to be linked with core Mesa APIs -ifeq ($(DRIVER_DIRS), osmesa) CORE_MESA = \ $(TOP)/src/mesa/libmesa.a \ $(TOP)/src/mapi/glapi/libglapi.a \ $(TOP)/src/glsl/cl/libglslcl.a \ $(TOP)/src/glsl/pp/libglslpp.a -else -CORE_MESA = -endif - .c.o: $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ diff --git a/src/mesa/main/arbprogram.h b/src/mesa/main/arbprogram.h index df16513e398..787ffd62f4b 100644 --- a/src/mesa/main/arbprogram.h +++ b/src/mesa/main/arbprogram.h @@ -27,6 +27,10 @@ #define ARBPROGRAM_H +#include "compiler.h" +#include "glheader.h" + + extern void GLAPIENTRY _mesa_BindProgram(GLenum target, GLuint id); diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 1a2e9b1da6f..48b9904642a 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -147,6 +147,8 @@ invalidate_framebuffer(struct gl_framebuffer *fb) /** * Given a GL_*_ATTACHMENTn token, return a pointer to the corresponding * gl_renderbuffer_attachment object. + * This function is only used for user-created FB objects, not the + * default / window-system FB object. * If \p attachment is GL_DEPTH_STENCIL_ATTACHMENT, return a pointer to * the depth buffer attachment point. */ @@ -156,6 +158,8 @@ _mesa_get_attachment(GLcontext *ctx, struct gl_framebuffer *fb, { GLuint i; + assert(fb->Name > 0); + switch (attachment) { case GL_COLOR_ATTACHMENT0_EXT: case GL_COLOR_ATTACHMENT1_EXT: @@ -195,6 +199,45 @@ _mesa_get_attachment(GLcontext *ctx, struct gl_framebuffer *fb, /** + * As above, but only used for getting attachments of the default / + * window-system framebuffer (not user-created framebuffer objects). + */ +static struct gl_renderbuffer_attachment * +_mesa_get_fb0_attachment(GLcontext *ctx, struct gl_framebuffer *fb, + GLenum attachment) +{ + assert(fb->Name == 0); + + switch (attachment) { + case GL_FRONT_LEFT: + return &fb->Attachment[BUFFER_FRONT_LEFT]; + case GL_FRONT_RIGHT: + return &fb->Attachment[BUFFER_FRONT_RIGHT]; + case GL_BACK_LEFT: + return &fb->Attachment[BUFFER_BACK_LEFT]; + case GL_BACK_RIGHT: + return &fb->Attachment[BUFFER_BACK_RIGHT]; + case GL_AUX0: + if (fb->Visual.numAuxBuffers == 1) { + return &fb->Attachment[BUFFER_AUX0]; + } + return NULL; + case GL_DEPTH_BUFFER: + /* fall-through / new in GL 3.0 */ + case GL_DEPTH_ATTACHMENT_EXT: + return &fb->Attachment[BUFFER_DEPTH]; + case GL_STENCIL_BUFFER: + /* fall-through / new in GL 3.0 */ + case GL_STENCIL_ATTACHMENT_EXT: + return &fb->Attachment[BUFFER_STENCIL]; + default: + return NULL; + } +} + + + +/** * Remove any texture or renderbuffer attached to the given attachment * point. Update reference counts, etc. */ @@ -1878,12 +1921,14 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, } if (buffer->Name == 0) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetFramebufferAttachmentParameterivEXT"); - return; + /* the default / window-system FBO */ + att = _mesa_get_fb0_attachment(ctx, buffer, attachment); + } + else { + /* user-created framebuffer FBO */ + att = _mesa_get_attachment(ctx, buffer, attachment); } - att = _mesa_get_attachment(ctx, buffer, attachment); if (att == NULL) { _mesa_error(ctx, GL_INVALID_ENUM, "glGetFramebufferAttachmentParameterivEXT(attachment)"); diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index 01f84180af7..56558cfcc1e 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -879,6 +879,7 @@ _mesa_source_buffer_exists(GLcontext *ctx, GLenum format) return GL_FALSE; } ASSERT(_mesa_get_format_bits(ctx->ReadBuffer->_ColorReadBuffer->Format, GL_RED_BITS) > 0 || + _mesa_get_format_bits(ctx->ReadBuffer->_ColorReadBuffer->Format, GL_ALPHA_BITS) > 0 || _mesa_get_format_bits(ctx->ReadBuffer->_ColorReadBuffer->Format, GL_INDEX_BITS) > 0); break; case GL_DEPTH: diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 2101b9bc18d..8f7ebeed976 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -1242,8 +1242,6 @@ st_CompressedTexSubImage2D(GLcontext *ctx, GLenum target, GLint level, assert(xoffset % util_format_get_blockwidth(pformat) == 0); assert(yoffset % util_format_get_blockheight(pformat) == 0); - assert(width % util_format_get_blockwidth(pformat) == 0); - assert(height % util_format_get_blockheight(pformat) == 0); for (y = 0; y < height; y += util_format_get_blockheight(pformat)) { /* don't need to adjust for xoffset and yoffset as st_texture_image_map does that */ diff --git a/src/mesa/swrast_setup/ss_triangle.c b/src/mesa/swrast_setup/ss_triangle.c index bad0d819460..f22bc52f0a8 100644 --- a/src/mesa/swrast_setup/ss_triangle.c +++ b/src/mesa/swrast_setup/ss_triangle.c @@ -159,7 +159,7 @@ static void _swsetup_render_tri(GLcontext *ctx, } #define SS_COLOR(a,b) UNCLAMPED_FLOAT_TO_RGBA_CHAN(a,b) -#define SS_SPEC(a,b) UNCLAMPED_FLOAT_TO_RGB_CHAN(a,b) +#define SS_SPEC(a,b) COPY_4V(a,b) #define SS_IND(a,b) (a = b) #define IND (0) |