diff options
author | David Nusinow <[email protected]> | 2006-09-24 21:21:15 +0000 |
---|---|---|
committer | David Nusinow <[email protected]> | 2006-09-24 21:21:15 +0000 |
commit | 387acaac6925e42f47031f26360e33a3f30e7312 (patch) | |
tree | a4218beb5aa67b58a74299d899dab9f2f44f971e /src/mesa/drivers | |
parent | 952b775dbe0fb434310808058d96931b832050da (diff) |
* New upstream version
Diffstat (limited to 'src/mesa/drivers')
240 files changed, 42832 insertions, 3143 deletions
diff --git a/src/mesa/drivers/beos/Makefile b/src/mesa/drivers/beos/Makefile index 41aa3a65c16..1897d6aa3e8 100644 --- a/src/mesa/drivers/beos/Makefile +++ b/src/mesa/drivers/beos/Makefile @@ -167,7 +167,7 @@ OBJECTS := $(DRIVER_OBJECTS:.cpp=.o) $(CC) $(INCLUDES) $(CFLAGS) -c $< -o $@ -default: depend $(LIB_DIR) $(LIB_DIR)/$(GL_LIB_NAME) +default: depend $(TOP)/$(LIB_DIR) $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) $(MESA_MODULES): cd $(TOP)/src/mesa; $(MAKE) mesa.a ; @@ -175,11 +175,11 @@ $(MESA_MODULES): $(GLU_MODULES): cd $(GLU_DIR); $(MAKE) $(subst $(GLU_DIR)/,,$(GLU_MODULES)) ; -$(LIB_DIR): - mkdir $(LIB_DIR) +$(TOP)/$(LIB_DIR): + mkdir $(TOP)/$(LIB_DIR) -$(LIB_DIR)/$(GL_LIB_NAME): $(OBJECTS) $(MESA_MODULES) $(GLU_MODULES) - @$(TOP)/bin/mklib -o $(GL_LIB) -install $(LIB_DIR) -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ +$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(OBJECTS) $(MESA_MODULES) $(GLU_MODULES) + @$(TOP)/bin/mklib -o $(GL_LIB) -install $(TOP)/$(LIB_DIR) -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ $(MKLIB_OPTIONS) $(GL_LIB_DEPS) $(OBJECTS) $(MESA_MODULES) $(GLU_MODULES) # $(GLU_OBJECTS): diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 9838e0b5ec0..3ccbe54817a 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -43,6 +43,7 @@ #include "fbobject.h" #include "texrender.h" #endif +#include "arrayobj.h" #include "driverfuncs.h" #include "tnl/tnl.h" @@ -220,6 +221,11 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->BeginQuery = NULL; driver->EndQuery = NULL; + /* APPLE_vertex_array_object */ + driver->NewArrayObject = _mesa_new_array_object; + driver->DeleteArrayObject = _mesa_delete_array_object; + driver->BindArrayObject = NULL; + /* T&L stuff */ driver->NeedValidate = GL_FALSE; driver->ValidateTnlModule = NULL; diff --git a/src/mesa/drivers/directfb/Makefile b/src/mesa/drivers/directfb/Makefile index 56f7eac7cae..c515785b2a1 100644 --- a/src/mesa/drivers/directfb/Makefile +++ b/src/mesa/drivers/directfb/Makefile @@ -34,7 +34,7 @@ default: directfbgl_mesa # Mesa DirectFBGL module directfbgl_mesa: $(DIRECTFBGL_MESA_OBJECTS) $(CC) -shared $(CFLAGS) $(DIRECTFBGL_MESA_OBJECTS) -o $(DIRECTFBGL_MESA) \ - -Wl,-soname -Wl,$(DIRECTFBGL_MESA) -L$(TOP)/lib -lGL -lm + -Wl,-soname -Wl,$(DIRECTFBGL_MESA) -L$(TOP)/$(LIB_DIR) -lGL -lm install: diff --git a/src/mesa/drivers/directfb/idirectfbgl_mesa.c b/src/mesa/drivers/directfb/idirectfbgl_mesa.c index 1c481af3f2a..d11241b2b21 100644 --- a/src/mesa/drivers/directfb/idirectfbgl_mesa.c +++ b/src/mesa/drivers/directfb/idirectfbgl_mesa.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004-2005 Claudio Ciccani <[email protected]> + * Copyright (C) 2004-2006 Claudio Ciccani <[email protected]> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -23,12 +23,21 @@ #include <stdlib.h> #include <unistd.h> -#include <directfb.h> +#include <pthread.h> #include <direct/messages.h> #include <direct/interface.h> #include <direct/mem.h> +#include <directfb.h> +#include <directfb_version.h> + +#define VERSION_CODE( M, m, r ) (((M) * 1000) + ((m) * 100) + ((r))) +#define DIRECTFB_VERSION_CODE VERSION_CODE( DIRECTFB_MAJOR_VERSION, \ + DIRECTFB_MINOR_VERSION, \ + DIRECTFB_MICRO_VERSION ) + + #ifdef CLAMP # undef CLAMP #endif @@ -70,7 +79,7 @@ DIRECT_INTERFACE_IMPLEMENTATION( IDirectFBGL, Mesa ) typedef struct { int ref; /* reference counter */ - bool locked; + DFBBoolean locked; IDirectFBSurface *surface; DFBSurfacePixelFormat format; @@ -89,32 +98,68 @@ typedef struct { struct gl_renderbuffer render; } IDirectFBGL_data; +/******************************************************************************/ -static bool dfb_mesa_setup_visual ( GLvisual *visual, - DFBSurfacePixelFormat format ); -static bool dfb_mesa_create_context ( GLcontext *context, - GLframebuffer *framebuffer, - GLvisual *visual, - DFBSurfacePixelFormat format, - IDirectFBGL_data *data ); -static void dfb_mesa_destroy_context( GLcontext *context, - GLframebuffer *framebuffer ); +static pthread_mutex_t global_lock = PTHREAD_MUTEX_INITIALIZER; +static unsigned int global_ref = 0; + +static inline int directfbgl_init( void ) +{ + pthread_mutexattr_t attr; + int ret; + + if (global_ref++) + return 0; + + pthread_mutexattr_init( &attr ); + pthread_mutexattr_settype( &attr, PTHREAD_MUTEX_ERRORCHECK ); + ret = pthread_mutex_init( &global_lock, &attr ); + pthread_mutexattr_destroy( &attr ); + + return ret; +} + +static inline void directfbgl_finish( void ) +{ + if (--global_ref == 0) + pthread_mutex_destroy( &global_lock ); +} + +#define directfbgl_lock() pthread_mutex_lock( &global_lock ) +#define directfbgl_unlock() pthread_mutex_unlock( &global_lock ) + +/******************************************************************************/ + +static bool directfbgl_init_visual ( GLvisual *visual, + DFBSurfacePixelFormat format ); +static bool directfbgl_create_context ( GLcontext *context, + GLframebuffer *framebuffer, + GLvisual *visual, + DFBSurfacePixelFormat format, + IDirectFBGL_data *data ); +static void directfbgl_destroy_context( GLcontext *context, + GLframebuffer *framebuffer ); + +/******************************************************************************/ static void -IDirectFBGL_Destruct( IDirectFBGL *thiz ) +IDirectFBGL_Mesa_Destruct( IDirectFBGL *thiz ) { IDirectFBGL_data *data = (IDirectFBGL_data*) thiz->priv; - dfb_mesa_destroy_context( &data->context, &data->framebuffer ); + directfbgl_destroy_context( &data->context, &data->framebuffer ); - data->surface->Release( data->surface ); + if (data->surface) + data->surface->Release( data->surface ); DIRECT_DEALLOCATE_INTERFACE( thiz ); + + directfbgl_finish(); } static DFBResult -IDirectFBGL_AddRef( IDirectFBGL *thiz ) +IDirectFBGL_Mesa_AddRef( IDirectFBGL *thiz ) { DIRECT_INTERFACE_GET_DATA( IDirectFBGL ); @@ -124,19 +169,18 @@ IDirectFBGL_AddRef( IDirectFBGL *thiz ) } static DFBResult -IDirectFBGL_Release( IDirectFBGL *thiz ) +IDirectFBGL_Mesa_Release( IDirectFBGL *thiz ) { DIRECT_INTERFACE_GET_DATA( IDirectFBGL ) - if (--data->ref == 0) { - IDirectFBGL_Destruct( thiz ); - } + if (--data->ref == 0) + IDirectFBGL_Mesa_Destruct( thiz ); return DFB_OK; } static DFBResult -IDirectFBGL_Lock( IDirectFBGL *thiz ) +IDirectFBGL_Mesa_Lock( IDirectFBGL *thiz ) { IDirectFBSurface *surface; int width = 0; @@ -148,11 +192,14 @@ IDirectFBGL_Lock( IDirectFBGL *thiz ) if (data->locked) return DFB_LOCKED; + if (directfbgl_lock()) + return DFB_LOCKED; + surface = data->surface; surface->GetSize( surface, &width, &height ); err = surface->Lock( surface, DSLF_READ | DSLF_WRITE, - (void**) &data->video.start, &data->video.pitch ); + (void*)&data->video.start, &data->video.pitch ); if (err != DFB_OK) { D_ERROR( "DirectFBGL/Mesa: couldn't lock surface.\n" ); return err; @@ -160,46 +207,54 @@ IDirectFBGL_Lock( IDirectFBGL *thiz ) data->video.end = data->video.start + (height-1) * data->video.pitch; data->render.Data = data->video.start; + + _mesa_make_current( &data->context, + &data->framebuffer, &data->framebuffer ); if (data->width != width || data->height != height) { data->width = width; data->height = height; _mesa_ResizeBuffersMESA(); } - - data->locked = true; + + data->locked = DFB_TRUE; return DFB_OK; } static DFBResult -IDirectFBGL_Unlock( IDirectFBGL *thiz ) +IDirectFBGL_Mesa_Unlock( IDirectFBGL *thiz ) { DIRECT_INTERFACE_GET_DATA( IDirectFBGL ); if (!data->locked) return DFB_OK; + _mesa_make_current( NULL, NULL, NULL ); + data->surface->Unlock( data->surface ); - data->video.start = NULL; - data->video.end = NULL; - - data->locked = false; + directfbgl_unlock(); + + data->locked = DFB_FALSE; + return DFB_OK; } static DFBResult -IDirectFBGL_GetAttributes( IDirectFBGL *thiz, - DFBGLAttributes *attributes ) +IDirectFBGL_Mesa_GetAttributes( IDirectFBGL *thiz, + DFBGLAttributes *attributes ) { - GLvisual *visual; + DFBSurfaceCapabilities caps; + GLvisual *visual; DIRECT_INTERFACE_GET_DATA( IDirectFBGL ); if (!attributes) return DFB_INVARG; + data->surface->GetCapabilities( data->surface, &caps ); + visual = &data->visual; attributes->buffer_size = visual->rgbBits ? : visual->indexBits; @@ -214,7 +269,7 @@ IDirectFBGL_GetAttributes( IDirectFBGL *thiz, attributes->accum_green_size = visual->accumGreenBits; attributes->accum_blue_size = visual->accumBlueBits; attributes->accum_alpha_size = visual->accumAlphaBits; - attributes->double_buffer = (visual->doubleBufferMode != 0); + attributes->double_buffer = (caps & DSCAPS_FLIPPING) ? 1 : 0; attributes->stereo = (visual->stereoMode != 0); return DFB_OK; @@ -230,12 +285,15 @@ Probe( void *data ) } static DFBResult -Construct( IDirectFBGL *thiz, - IDirectFBSurface *surface ) -{ +Construct( IDirectFBGL *thiz, IDirectFBSurface *surface ) +{ + /* Initialize global resources. */ + if (directfbgl_init()) + return DFB_INIT; + /* Allocate interface data. */ DIRECT_ALLOCATE_INTERFACE_DATA( thiz, IDirectFBGL ); - + /* Initialize interface data. */ data->ref = 1; data->surface = surface; @@ -245,48 +303,41 @@ Construct( IDirectFBGL *thiz, surface->GetSize( surface, &data->width, &data->height ); /* Configure visual. */ - if (!dfb_mesa_setup_visual( &data->visual, data->format )) { + if (!directfbgl_init_visual( &data->visual, data->format )) { D_ERROR( "DirectFBGL/Mesa: failed to initialize visual.\n" ); - surface->Release( surface ); + IDirectFBGL_Mesa_Destruct( thiz ); return DFB_UNSUPPORTED; } /* Create context. */ - if (!dfb_mesa_create_context( &data->context, &data->framebuffer, - &data->visual, data->format, data )) { + if (!directfbgl_create_context( &data->context, &data->framebuffer, + &data->visual, data->format, data )) { D_ERROR( "DirectFBGL/Mesa: failed to create context.\n" ); - surface->Release( surface ); + IDirectFBGL_Mesa_Destruct( thiz ); return DFB_UNSUPPORTED; } /* Assign interface pointers. */ - thiz->AddRef = IDirectFBGL_AddRef; - thiz->Release = IDirectFBGL_Release; - thiz->Lock = IDirectFBGL_Lock; - thiz->Unlock = IDirectFBGL_Unlock; - thiz->GetAttributes = IDirectFBGL_GetAttributes; + thiz->AddRef = IDirectFBGL_Mesa_AddRef; + thiz->Release = IDirectFBGL_Mesa_Release; + thiz->Lock = IDirectFBGL_Mesa_Lock; + thiz->Unlock = IDirectFBGL_Mesa_Unlock; + thiz->GetAttributes = IDirectFBGL_Mesa_GetAttributes; return DFB_OK; } -/* internal functions */ +/***************************** Driver functions ******************************/ static const GLubyte* -get_string( GLcontext *ctx, GLenum pname ) +dfbGetString( GLcontext *ctx, GLenum pname ) { - switch (pname) { - case GL_VENDOR: - return "Claudio Ciccani"; - case GL_VERSION: - return "1.0"; - default: - return NULL; - } + return NULL; } static void -update_state( GLcontext *ctx, GLuint new_state ) +dfbUpdateState( GLcontext *ctx, GLuint new_state ) { _swrast_InvalidateState( ctx, new_state ); _swsetup_InvalidateState( ctx, new_state ); @@ -295,7 +346,7 @@ update_state( GLcontext *ctx, GLuint new_state ) } static void -get_buffer_size( GLframebuffer *buffer, GLuint *width, GLuint *height ) +dfbGetBufferSize( GLframebuffer *buffer, GLuint *width, GLuint *height ) { GLcontext *ctx = _mesa_get_current_context(); IDirectFBGL_data *data = (IDirectFBGL_data*) ctx->DriverCtx; @@ -305,42 +356,96 @@ get_buffer_size( GLframebuffer *buffer, GLuint *width, GLuint *height ) } static void -set_viewport( GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h ) +dfbSetViewport( GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h ) { _mesa_ResizeBuffersMESA(); } -/* required but not used */ static void -set_buffer( GLcontext *ctx, GLframebuffer *buffer, GLuint bufferBit ) +dfbClear( GLcontext *ctx, GLbitfield mask, GLboolean all, + GLint x, GLint y, GLint width, GLint height ) { - return; -} + IDirectFBGL_data *data = (IDirectFBGL_data*) ctx->DriverCtx; + + if (mask & BUFFER_BIT_FRONT_LEFT && + ctx->Color.ColorMask[0] && + ctx->Color.ColorMask[1] && + ctx->Color.ColorMask[2] && + ctx->Color.ColorMask[3]) + { + DFBRegion clip; + __u8 a, r, g, b; + + UNCLAMPED_FLOAT_TO_UBYTE( a, ctx->Color.ClearColor[ACOMP] ); + UNCLAMPED_FLOAT_TO_UBYTE( r, ctx->Color.ClearColor[RCOMP] ); + UNCLAMPED_FLOAT_TO_UBYTE( g, ctx->Color.ClearColor[GCOMP] ); + UNCLAMPED_FLOAT_TO_UBYTE( b, ctx->Color.ClearColor[BCOMP] ); + + data->surface->Unlock( data->surface ); + +#if DIRECTFB_VERSION_CODE >= VERSION_CODE(0,9,25) + data->surface->GetClip( data->surface, &clip ); +#else + (void)clip; +#endif + + if (all) { + data->surface->SetClip( data->surface, NULL ); + } + else { + DFBRegion reg = { x1:x, y1:y, x2:x+width-1, y2:y+height-1 }; + data->surface->SetClip( data->surface, ® ); + } + + data->surface->Clear( data->surface, r, g, b, a ); + +#if DIRECTFB_VERSION_CODE >= VERSION_CODE(0,9,25) + data->surface->SetClip( data->surface, &clip ); +#endif + + data->surface->Lock( data->surface, DSLF_READ | DSLF_WRITE, + (void*)&data->video.start, &data->video.pitch ); + + mask &= ~BUFFER_BIT_FRONT_LEFT; + } + + if (mask) + _swrast_Clear( ctx, mask, all, x, y, width, height ); +} + + + +/************************ RenderBuffer functions *****************************/ static void -delete_renderbuffer( struct gl_renderbuffer *render ) +dfbDeleteRenderbuffer( struct gl_renderbuffer *render ) { return; } static GLboolean -renderbuffer_storage( GLcontext *ctx, struct gl_renderbuffer *render, - GLenum internalFormat, GLuint width, GLuint height ) +dfbRenderbufferStorage( GLcontext *ctx, struct gl_renderbuffer *render, + GLenum internalFormat, GLuint width, GLuint height ) { return GL_TRUE; } +/***************************** Span functions ********************************/ + /* RGB332 */ #define NAME(PREFIX) PREFIX##_RGB332 #define FORMAT GL_RGBA8 +#define RB_TYPE GLubyte #define SPAN_VARS \ IDirectFBGL_data *data = (IDirectFBGL_data*) ctx->DriverCtx; #define INIT_PIXEL_PTR(P, X, Y) \ GLubyte *P = data->video.end - (Y) * data->video.pitch + (X); #define INC_PIXEL_PTR(P) P += 1 #define STORE_PIXEL(P, X, Y, S) \ - *P = ( (((S[RCOMP]) & 0xe0)) | (((S[GCOMP]) & 0xe0) >> 3) | ((S[BCOMP]) >> 6) ) + *P = ( (((S[RCOMP]) & 0xe0) ) | \ + (((S[GCOMP]) & 0xe0) >> 3) | \ + (((S[BCOMP]) ) >> 6) ) #define FETCH_PIXEL(D, P) \ D[RCOMP] = ((*P & 0xe0) ); \ D[GCOMP] = ((*P & 0x1c) << 3); \ @@ -349,16 +454,79 @@ renderbuffer_storage( GLcontext *ctx, struct gl_renderbuffer *render, #include "swrast/s_spantemp.h" +/* ARGB4444 */ +#define NAME(PREFIX) PREFIX##_ARGB4444 +#define FORMAT GL_RGBA8 +#define RB_TYPE GLubyte +#define SPAN_VARS \ + IDirectFBGL_data *data = (IDirectFBGL_data*) ctx->DriverCtx; +#define INIT_PIXEL_PTR(P, X, Y) \ + GLushort *P = (GLushort *) (data->video.end - (Y) * data->video.pitch + (X) * 2); +#define INC_PIXEL_PTR(P) P += 1 +#define STORE_PIXEL_RGB(P, X, Y, S) \ + *P = ( 0xf000 | \ + (((S[RCOMP]) & 0xf0) << 4) | \ + (((S[GCOMP]) & 0xf0) ) | \ + (((S[BCOMP]) & 0xf0) >> 4) ) +#define STORE_PIXEL(P, X, Y, S) \ + *P = ( (((S[ACOMP]) & 0xf0) << 8) | \ + (((S[RCOMP]) & 0xf0) << 4) | \ + (((S[GCOMP]) & 0xf0) ) | \ + (((S[BCOMP]) & 0xf0) >> 4) ) +#define FETCH_PIXEL(D, P) \ + D[RCOMP] = ((*P & 0x0f00) >> 4); \ + D[GCOMP] = ((*P & 0x00f0) ); \ + D[BCOMP] = ((*P & 0x000f) << 4); \ + D[ACOMP] = ((*P & 0xf000) >> 8) + +#include "swrast/s_spantemp.h" + +/* ARGB2554 */ +#define NAME(PREFIX) PREFIX##_ARGB2554 +#define FORMAT GL_RGBA8 +#define RB_TYPE GLubyte +#define SPAN_VARS \ + IDirectFBGL_data *data = (IDirectFBGL_data*) ctx->DriverCtx; +#define INIT_PIXEL_PTR(P, X, Y) \ + GLushort *P = (GLushort *) (data->video.end - (Y) * data->video.pitch + (X) * 2); +#define INC_PIXEL_PTR(P) P += 1 +#define STORE_PIXEL_RGB(P, X, Y, S) \ + *P = ( 0xc000 | \ + (((S[RCOMP]) & 0xf8) << 6) | \ + (((S[GCOMP]) & 0xf8) << 1) | \ + (((S[BCOMP]) & 0xf0) >> 4) ) +#define STORE_PIXEL(P, X, Y, S) \ + *P = ( (((S[ACOMP]) & 0xc0) << 8) | \ + (((S[RCOMP]) & 0xf8) << 6) | \ + (((S[GCOMP]) & 0xf8) << 1) | \ + (((S[BCOMP]) & 0xf0) >> 4) ) +#define FETCH_PIXEL(D, P) \ + D[RCOMP] = ((*P & 0x3e00) >> 9); \ + D[GCOMP] = ((*P & 0x01f0) >> 4); \ + D[BCOMP] = ((*P & 0x000f) << 4); \ + D[ACOMP] = ((*P & 0xc000) >> 14) + +#include "swrast/s_spantemp.h" + /* ARGB1555 */ #define NAME(PREFIX) PREFIX##_ARGB1555 #define FORMAT GL_RGBA8 +#define RB_TYPE GLubyte #define SPAN_VARS \ IDirectFBGL_data *data = (IDirectFBGL_data*) ctx->DriverCtx; #define INIT_PIXEL_PTR(P, X, Y) \ GLushort *P = (GLushort *) (data->video.end - (Y) * data->video.pitch + (X) * 2); #define INC_PIXEL_PTR(P) P += 1 +#define STORE_PIXEL_RGB(P, X, Y, S) \ + *P = ( 0x8000 | \ + (((S[RCOMP]) & 0xf8) << 7) | \ + (((S[GCOMP]) & 0xf8) << 2) | \ + (((S[BCOMP]) ) >> 3) ) #define STORE_PIXEL(P, X, Y, S) \ - *P = ( (((S[RCOMP]) & 0xf8) << 7) | (((S[GCOMP]) & 0xf8) << 2) | ((S[BCOMP]) >> 3) ) + *P = ( (((S[ACOMP]) & 0x80) << 16) | \ + (((S[RCOMP]) & 0xf8) << 7) | \ + (((S[GCOMP]) & 0xf8) << 2) | \ + (((S[BCOMP]) ) >> 3) ) #define FETCH_PIXEL(D, P) \ D[RCOMP] = ((*P & 0x7c00) >> 7); \ D[GCOMP] = ((*P & 0x03e0) >> 2); \ @@ -370,13 +538,16 @@ renderbuffer_storage( GLcontext *ctx, struct gl_renderbuffer *render, /* RGB16 */ #define NAME(PREFIX) PREFIX##_RGB16 #define FORMAT GL_RGBA8 +#define RB_TYPE GLubyte #define SPAN_VARS \ IDirectFBGL_data *data = (IDirectFBGL_data*) ctx->DriverCtx; #define INIT_PIXEL_PTR(P, X, Y) \ GLushort *P = (GLushort *) (data->video.end - (Y) * data->video.pitch + (X) * 2); #define INC_PIXEL_PTR(P) P += 1 #define STORE_PIXEL(P, X, Y, S) \ - *P = ( (((S[RCOMP]) & 0xf8) << 8) | (((S[GCOMP]) & 0xfc) << 3) | ((S[BCOMP]) >> 3) ) + *P = ( (((S[RCOMP]) & 0xf8) << 8) | \ + (((S[GCOMP]) & 0xfc) << 3) | \ + (((S[BCOMP]) ) >> 3) ) #define FETCH_PIXEL(D, P) \ D[RCOMP] = ((*P & 0xf800) >> 8); \ D[GCOMP] = ((*P & 0x07e0) >> 3); \ @@ -388,6 +559,7 @@ renderbuffer_storage( GLcontext *ctx, struct gl_renderbuffer *render, /* RGB24 */ #define NAME(PREFIX) PREFIX##_RGB24 #define FORMAT GL_RGBA8 +#define RB_TYPE GLubyte #define SPAN_VARS \ IDirectFBGL_data *data = ctx->DriverCtx; #define INIT_PIXEL_PTR(P, X, Y) \ @@ -403,13 +575,16 @@ renderbuffer_storage( GLcontext *ctx, struct gl_renderbuffer *render, /* RGB32 */ #define NAME(PREFIX) PREFIX##_RGB32 #define FORMAT GL_RGBA8 +#define RB_TYPE GLubyte #define SPAN_VARS \ IDirectFBGL_data *data = (IDirectFBGL_data*) ctx->DriverCtx; #define INIT_PIXEL_PTR(P, X, Y) \ GLuint *P = (GLuint*) (data->video.end - (Y) * data->video.pitch + (X) * 4); #define INC_PIXEL_PTR(P) P += 1 #define STORE_PIXEL(P, X, Y, S) \ - *P = ( ((S[RCOMP]) << 16) | ((S[GCOMP]) << 8) | (S[BCOMP]) ) + *P = ( ((S[RCOMP]) << 16) | \ + ((S[GCOMP]) << 8) | \ + ((S[BCOMP]) ) ) #define FETCH_PIXEL(D, P) \ D[RCOMP] = ((*P & 0x00ff0000) >> 16); \ D[GCOMP] = ((*P & 0x0000ff00) >> 8); \ @@ -421,15 +596,22 @@ renderbuffer_storage( GLcontext *ctx, struct gl_renderbuffer *render, /* ARGB */ #define NAME(PREFIX) PREFIX##_ARGB #define FORMAT GL_RGBA8 +#define RB_TYPE GLubyte #define SPAN_VARS \ IDirectFBGL_data *data = (IDirectFBGL_data*) ctx->DriverCtx; #define INIT_PIXEL_PTR(P, X, Y) \ GLuint *P = (GLuint*) (data->video.end - (Y) * data->video.pitch + (X) * 4); #define INC_PIXEL_PTR(P) P += 1 #define STORE_PIXEL_RGB(P, X, Y, S) \ - *P = ( 0xff000000 | ((S[RCOMP]) << 16) | ((S[GCOMP]) << 8) | (S[BCOMP]) ) + *P = ( 0xff000000 | \ + ((S[RCOMP]) << 16) | \ + ((S[GCOMP]) << 8) | \ + ((S[BCOMP]) ) ) #define STORE_PIXEL(P, X, Y, S) \ - *P = ( ((S[ACOMP]) << 24) | ((S[RCOMP]) << 16) | ((S[GCOMP]) << 8) | (S[BCOMP]) ) + *P = ( ((S[ACOMP]) << 24) | \ + ((S[RCOMP]) << 16) | \ + ((S[GCOMP]) << 8) | \ + ((S[BCOMP]) ) ) #define FETCH_PIXEL(D, P) \ D[RCOMP] = ((*P & 0x00ff0000) >> 16); \ D[GCOMP] = ((*P & 0x0000ff00) >> 8); \ @@ -441,27 +623,35 @@ renderbuffer_storage( GLcontext *ctx, struct gl_renderbuffer *render, /* AiRGB */ #define NAME(PREFIX) PREFIX##_AiRGB #define FORMAT GL_RGBA8 +#define RB_TYPE GLubyte #define SPAN_VARS \ IDirectFBGL_data *data = (IDirectFBGL_data*) ctx->DriverCtx; #define INIT_PIXEL_PTR(P, X, Y) \ GLuint *P = (GLuint*) (data->video.end - (Y) * data->video.pitch + (X) * 4); #define INC_PIXEL_PTR(P) P += 1 #define STORE_PIXEL_RGB(P, X, Y, S) \ - *P = ( ((S[RCOMP]) << 16) | ((S[GCOMP]) << 8) | (S[BCOMP]) ) + *P = ( ((S[RCOMP]) << 16) | \ + ((S[GCOMP]) << 8) | \ + ((S[BCOMP]) ) ) #define STORE_PIXEL(P, X, Y, S) \ - *P = ( ((0xff - (S[ACOMP])) << 24) | ((S[RCOMP]) << 16) | ((S[GCOMP]) << 8) | (S[BCOMP]) ) + *P = ( (((S[ACOMP]) ^ 0xff) << 24) | \ + (((S[RCOMP]) ) << 16) | \ + (((S[GCOMP]) ) << 8) | \ + (((S[BCOMP]) ) ) ) #define FETCH_PIXEL(D, P) \ - D[RCOMP] = ((*P & 0x00ff0000) >> 16); \ - D[GCOMP] = ((*P & 0x0000ff00) >> 8); \ - D[BCOMP] = ((*P & 0x000000ff) ); \ - D[ACOMP] = (0xff - ((*P & 0xff000000) >> 24)) + D[RCOMP] = ((*P & 0x00ff0000) >> 16); \ + D[GCOMP] = ((*P & 0x0000ff00) >> 8); \ + D[BCOMP] = ((*P & 0x000000ff) ); \ + D[ACOMP] = (((*P & 0xff000000) >> 24) ^ 0xff) #include "swrast/s_spantemp.h" +/*****************************************************************************/ + static bool -dfb_mesa_setup_visual( GLvisual *visual, - DFBSurfacePixelFormat format ) +directfbgl_init_visual( GLvisual *visual, + DFBSurfacePixelFormat format ) { GLboolean rgbFlag = GL_TRUE; GLboolean dbFlag = GL_FALSE; @@ -486,6 +676,18 @@ dfb_mesa_setup_visual( GLvisual *visual, greenBits = 3; blueBits = 2; break; + case DSPF_ARGB4444: + redBits = 4; + greenBits = 4; + blueBits = 4; + alphaBits = 4; + break; + case DSPF_ARGB2554: + redBits = 5; + greenBits = 5; + blueBits = 4; + alphaBits = 2; + break; case DSPF_ARGB1555: redBits = 5; greenBits = 5; @@ -531,22 +733,22 @@ dfb_mesa_setup_visual( GLvisual *visual, } static bool -dfb_mesa_create_context( GLcontext *context, - GLframebuffer *framebuffer, - GLvisual *visual, - DFBSurfacePixelFormat format, - IDirectFBGL_data *data ) +directfbgl_create_context( GLcontext *context, + GLframebuffer *framebuffer, + GLvisual *visual, + DFBSurfacePixelFormat format, + IDirectFBGL_data *data ) { - struct dd_function_table functions; - struct swrast_device_driver *swdd; + struct dd_function_table functions; _mesa_initialize_framebuffer( framebuffer, visual ); _mesa_init_driver_functions( &functions ); - functions.GetString = get_string; - functions.UpdateState = update_state; - functions.GetBufferSize = get_buffer_size; - functions.Viewport = set_viewport; + functions.GetString = dfbGetString; + functions.UpdateState = dfbUpdateState; + functions.GetBufferSize = dfbGetBufferSize; + functions.Viewport = dfbSetViewport; + functions.Clear = dfbClear; if (!_mesa_initialize_context( context, visual, NULL, &functions, (void*) data )) { @@ -560,17 +762,14 @@ dfb_mesa_create_context( GLcontext *context, _tnl_CreateContext( context ); _swsetup_CreateContext( context ); _swsetup_Wakeup( context ); - - swdd = _swrast_GetDeviceDriverReference( context ); - swdd->SetBuffer = set_buffer; _mesa_init_renderbuffer( &data->render, 0 ); data->render.InternalFormat = GL_RGBA; data->render._BaseFormat = GL_RGBA; data->render.DataType = GL_UNSIGNED_BYTE; data->render.Data = data->video.start; - data->render.Delete = delete_renderbuffer; - data->render.AllocStorage = renderbuffer_storage; + data->render.Delete = dfbDeleteRenderbuffer; + data->render.AllocStorage = dfbRenderbufferStorage; switch (format) { case DSPF_RGB332: @@ -581,6 +780,22 @@ dfb_mesa_create_context( GLcontext *context, data->render.PutValues = put_values_RGB332; data->render.PutMonoValues = put_mono_values_RGB332; break; + case DSPF_ARGB4444: + data->render.GetRow = get_row_ARGB4444; + data->render.GetValues = get_values_ARGB4444; + data->render.PutRow = put_row_ARGB4444; + data->render.PutMonoRow = put_mono_row_ARGB4444; + data->render.PutValues = put_values_ARGB4444; + data->render.PutMonoValues = put_mono_values_ARGB4444; + break; + case DSPF_ARGB2554: + data->render.GetRow = get_row_ARGB2554; + data->render.GetValues = get_values_ARGB2554; + data->render.PutRow = put_row_ARGB2554; + data->render.PutMonoRow = put_mono_row_ARGB2554; + data->render.PutValues = put_values_ARGB2554; + data->render.PutMonoValues = put_mono_values_ARGB2554; + break; case DSPF_ARGB1555: data->render.GetRow = get_row_ARGB1555; data->render.GetValues = get_values_ARGB1555; @@ -647,17 +862,14 @@ dfb_mesa_create_context( GLcontext *context, TNL_CONTEXT( context )->Driver.RunPipeline = _tnl_run_pipeline; _mesa_enable_sw_extensions( context ); - - _mesa_make_current( context, framebuffer, framebuffer ); return true; } static void -dfb_mesa_destroy_context( GLcontext *context, - GLframebuffer *framebuffer ) +directfbgl_destroy_context( GLcontext *context, + GLframebuffer *framebuffer ) { - _mesa_make_current( NULL, NULL, NULL ); _mesa_free_framebuffer_data( framebuffer ); _mesa_notifyDestroy( context ); _mesa_free_context_data( context ); diff --git a/src/mesa/drivers/dri/Makefile b/src/mesa/drivers/dri/Makefile index a49de0f0faa..1db878bab70 100644 --- a/src/mesa/drivers/dri/Makefile +++ b/src/mesa/drivers/dri/Makefile @@ -6,11 +6,11 @@ include $(TOP)/configs/current -default: $(LIB_DIR) subdirs +default: $(TOP)/$(LIB_DIR) subdirs -$(LIB_DIR): - -mkdir $(LIB_DIR) +$(TOP)/$(LIB_DIR): + -mkdir $(TOP)/$(LIB_DIR) subdirs: diff --git a/src/mesa/drivers/dri/Makefile.template b/src/mesa/drivers/dri/Makefile.template index 4a7a6552ac0..58912730fdd 100644 --- a/src/mesa/drivers/dri/Makefile.template +++ b/src/mesa/drivers/dri/Makefile.template @@ -62,21 +62,21 @@ SHARED_INCLUDES = \ ##### TARGETS ##### -default: depend symlinks $(LIBNAME) $(LIB_DIR)/$(LIBNAME) +default: depend symlinks $(LIBNAME) $(TOP)/$(LIB_DIR)/$(LIBNAME) -#$(LIB_DIR)/$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(WINOBJ) Makefile +#$(TOP)/$(LIB_DIR)/$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(WINOBJ) Makefile # @echo BUILDING FOR: $(WINDOW_SYSTEM) -# $(TOP)/bin/mklib -o $(LIBNAME) -noprefix -install $(LIB_DIR) \ +# $(TOP)/bin/mklib -o $(LIBNAME) -noprefix -install $(TOP)/$(LIB_DIR) \ # $(WINLIB) $(LIB_DEPS) $(WINOBJ) $(MESA_MODULES) $(OBJECTS) -$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(WINOBJ) Makefile $(TOP)/src/mesa/drivers/dri/Makefile.template +$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(WINOBJ) Makefile $(TOP)/src/mesa/drivers/dri/Makefile.template $(TOP)/bin/mklib -noprefix -o $@ \ $(OBJECTS) $(MESA_MODULES) $(WINOBJ) $(DRI_LIB_DEPS) -$(LIB_DIR)/$(LIBNAME): $(LIBNAME) - install $(LIBNAME) $(LIB_DIR) +$(TOP)/$(LIB_DIR)/$(LIBNAME): $(LIBNAME) + $(INSTALL) $(LIBNAME) $(TOP)/$(LIB_DIR) @@ -99,6 +99,7 @@ clean: -rm -f depend depend.bak install: $(LIBNAME) - install $(LIBNAME) /usr/X11R6/lib/modules/dri/$(LIBNAME) + $(INSTALL) -d $(DRI_DRIVER_INSTALL_DIR) + $(INSTALL) -m 755 $(LIBNAME) $(DRI_DRIVER_INSTALL_DIR) include depend diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index d68837b545a..b5c20a066a5 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -845,6 +845,10 @@ static void driDestroyScreen(__DRInativeDisplay *dpy, int scrn, void *screenPriv if ( psp->modes != NULL ) { (*dri_interface->destroyContextModes)( psp->modes ); } + + assert(psp->drawHash); + drmHashDestroy(psp->drawHash); + _mesa_free(psp); } } diff --git a/src/mesa/drivers/dri/common/extension_helper.h b/src/mesa/drivers/dri/common/extension_helper.h index c21607c6beb..0f762837a4f 100644 --- a/src/mesa/drivers/dri/common/extension_helper.h +++ b/src/mesa/drivers/dri/common/extension_helper.h @@ -132,10 +132,11 @@ static const char VertexAttrib4ubvNV_names[] = ""; #endif -#if defined(need_GL_SGI_color_table) +#if defined(need_GL_SGI_color_table) || defined(need_GL_EXT_paletted_texture) static const char GetColorTableParameterfvSGI_names[] = "iip\0" /* Parameter signature */ "glGetColorTableParameterfvSGI\0" + "glGetColorTableParameterfvEXT\0" ""; #endif @@ -177,16 +178,10 @@ static const char ReplacementCodeuiColor3fVertex3fvSUN_names[] = ""; #endif -#if defined(need_GL_EXT_paletted_texture) -static const char GetColorTableParameterivEXT_names[] = - "iip\0" /* Parameter signature */ - "glGetColorTableParameterivEXT\0" - ""; -#endif - -#if defined(need_GL_EXT_blend_equation_separate) || defined(need_GL_ATI_blend_equation_separate) +#if defined(need_GL_VERSION_2_0) || defined(need_GL_EXT_blend_equation_separate) || defined(need_GL_ATI_blend_equation_separate) static const char BlendEquationSeparateEXT_names[] = "ii\0" /* Parameter signature */ + "glBlendEquationSeparate\0" "glBlendEquationSeparateEXT\0" "glBlendEquationSeparateATI\0" ""; @@ -258,6 +253,13 @@ static const char VertexAttrib2fARB_names[] = ""; #endif +#if defined(need_GL_MESA_shader_debug) +static const char GetDebugLogLengthMESA_names[] = + "iii\0" /* Parameter signature */ + "glGetDebugLogLengthMESA\0" + ""; +#endif + #if defined(need_GL_EXT_histogram) static const char GetHistogramParameterivEXT_names[] = "iip\0" /* Parameter signature */ @@ -397,6 +399,13 @@ static const char UniformMatrix4fvARB_names[] = ""; #endif +#if defined(need_GL_APPLE_vertex_array_object) +static const char DeleteVertexArraysAPPLE_names[] = + "ip\0" /* Parameter signature */ + "glDeleteVertexArraysAPPLE\0" + ""; +#endif + #if defined(need_GL_SGIX_instruments) static const char ReadInstrumentsSGIX_names[] = "i\0" /* Parameter signature */ @@ -475,6 +484,13 @@ static const char GlobalAlphaFactorubSUN_names[] = ""; #endif +#if defined(need_GL_MESA_shader_debug) +static const char ClearDebugLogMESA_names[] = + "iii\0" /* Parameter signature */ + "glClearDebugLogMESA\0" + ""; +#endif + #if defined(need_GL_EXT_histogram) static const char ResetHistogram_names[] = "i\0" /* Parameter signature */ @@ -628,6 +644,13 @@ static const char FinishAsyncSGIX_names[] = ""; #endif +#if defined(need_GL_MESA_shader_debug) +static const char GetDebugLogMESA_names[] = + "iiiipp\0" /* Parameter signature */ + "glGetDebugLogMESA\0" + ""; +#endif + #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_fog_coord) static const char FogCoorddEXT_names[] = "d\0" /* Parameter signature */ @@ -1024,11 +1047,10 @@ static const char ReplacementCodeuiTexCoord2fVertex3fSUN_names[] = ""; #endif -#if defined(need_GL_ARB_draw_buffers) || defined(need_GL_ATI_draw_buffers) -static const char DrawBuffersARB_names[] = - "ip\0" /* Parameter signature */ - "glDrawBuffersARB\0" - "glDrawBuffersATI\0" +#if defined(need_GL_ARB_shader_objects) +static const char Uniform1fARB_names[] = + "if\0" /* Parameter signature */ + "glUniform1fARB\0" ""; #endif @@ -1125,10 +1147,10 @@ static const char FragmentMaterialfSGIX_names[] = ""; #endif -#if defined(need_GL_EXT_paletted_texture) -static const char GetColorTableEXT_names[] = - "iiip\0" /* Parameter signature */ - "glGetColorTableEXT\0" +#if defined(need_GL_SUN_vertex) +static const char TexCoord2fNormal3fVertex3fSUN_names[] = + "ffffffff\0" /* Parameter signature */ + "glTexCoord2fNormal3fVertex3fSUN\0" ""; #endif @@ -2344,6 +2366,13 @@ static const char MultiTexCoord4ivARB_names[] = ""; #endif +#if defined(need_GL_EXT_gpu_program_parameters) +static const char ProgramLocalParameters4fvEXT_names[] = + "iiip\0" /* Parameter signature */ + "glProgramLocalParameters4fvEXT\0" + ""; +#endif + #if defined(need_GL_NV_evaluators) static const char GetMapAttribParameterfvNV_names[] = "iiip\0" /* Parameter signature */ @@ -2472,6 +2501,20 @@ static const char ReplacementCodeuivSUN_names[] = ""; #endif +#if defined(need_GL_APPLE_vertex_array_object) +static const char GenVertexArraysAPPLE_names[] = + "ip\0" /* Parameter signature */ + "glGenVertexArraysAPPLE\0" + ""; +#endif + +#if defined(need_GL_EXT_gpu_program_parameters) +static const char ProgramEnvParameters4fvEXT_names[] = + "iiip\0" /* Parameter signature */ + "glProgramEnvParameters4fvEXT\0" + ""; +#endif + #if defined(need_GL_VERSION_1_4) || defined(need_GL_ARB_window_pos) || defined(need_GL_MESA_window_pos) static const char WindowPos2iMESA_names[] = "ii\0" /* Parameter signature */ @@ -2497,13 +2540,6 @@ static const char CompressedTexSubImage1DARB_names[] = ""; #endif -#if defined(need_GL_SUN_vertex) -static const char TexCoord2fNormal3fVertex3fSUN_names[] = - "ffffffff\0" /* Parameter signature */ - "glTexCoord2fNormal3fVertex3fSUN\0" - ""; -#endif - #if defined(need_GL_NV_vertex_program) static const char GetVertexAttribivNV_names[] = "iip\0" /* Parameter signature */ @@ -2962,10 +2998,11 @@ static const char LightEnviSGIX_names[] = ""; #endif -#if defined(need_GL_SGI_color_table) +#if defined(need_GL_SGI_color_table) || defined(need_GL_EXT_paletted_texture) static const char GetColorTableParameterivSGI_names[] = "iip\0" /* Parameter signature */ "glGetColorTableParameterivSGI\0" + "glGetColorTableParameterivEXT\0" ""; #endif @@ -3532,10 +3569,11 @@ static const char MultiTexCoord4dARB_names[] = ""; #endif -#if defined(need_GL_SGI_color_table) +#if defined(need_GL_SGI_color_table) || defined(need_GL_EXT_paletted_texture) static const char GetColorTableSGI_names[] = "iiip\0" /* Parameter signature */ "glGetColorTableSGI\0" + "glGetColorTableEXT\0" ""; #endif @@ -3775,6 +3813,13 @@ static const char TexCoord4fVertex4fSUN_names[] = ""; #endif +#if defined(need_GL_APPLE_vertex_array_object) +static const char BindVertexArrayAPPLE_names[] = + "i\0" /* Parameter signature */ + "glBindVertexArrayAPPLE\0" + ""; +#endif + #if defined(need_GL_ARB_vertex_program) static const char GetProgramLocalParameterdvARB_names[] = "iip\0" /* Parameter signature */ @@ -3806,6 +3851,13 @@ static const char BlendFuncSeparateEXT_names[] = ""; #endif +#if defined(need_GL_APPLE_vertex_array_object) +static const char IsVertexArrayAPPLE_names[] = + "i\0" /* Parameter signature */ + "glIsVertexArrayAPPLE\0" + ""; +#endif + #if defined(need_GL_NV_vertex_program) static const char ProgramParameters4dvNV_names[] = "iiip\0" /* Parameter signature */ @@ -3920,13 +3972,6 @@ static const char ActiveStencilFaceEXT_names[] = ""; #endif -#if defined(need_GL_EXT_paletted_texture) -static const char GetColorTableParameterfvEXT_names[] = - "iip\0" /* Parameter signature */ - "glGetColorTableParameterfvEXT\0" - ""; -#endif - #if defined(need_GL_ARB_shader_objects) static const char GetShaderSourceARB_names[] = "iipp\0" /* Parameter signature */ @@ -3999,10 +4044,12 @@ static const char GetMapControlPointsNV_names[] = ""; #endif -#if defined(need_GL_ARB_shader_objects) -static const char Uniform1fARB_names[] = - "if\0" /* Parameter signature */ - "glUniform1fARB\0" +#if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_draw_buffers) || defined(need_GL_ATI_draw_buffers) +static const char DrawBuffersARB_names[] = + "ip\0" /* Parameter signature */ + "glDrawBuffers\0" + "glDrawBuffersARB\0" + "glDrawBuffersATI\0" ""; #endif @@ -4257,6 +4304,13 @@ static const char VertexAttrib4svARB_names[] = ""; #endif +#if defined(need_GL_MESA_shader_debug) +static const char CreateDebugObjectMESA_names[] = + "\0" /* Parameter signature */ + "glCreateDebugObjectMESA\0" + ""; +#endif + #if defined(need_GL_ARB_shader_objects) static const char Uniform3fARB_names[] = "ifff\0" /* Parameter signature */ @@ -4601,14 +4655,24 @@ static const char WindowPos2fMESA_names[] = #if defined(need_GL_3DFX_tbuffer) static const struct dri_extension_function GL_3DFX_tbuffer_functions[] = { - { TbufferMask3DFX_names, TbufferMask3DFX_remap_index, 553 }, + { TbufferMask3DFX_names, TbufferMask3DFX_remap_index, -1 }, + { NULL, 0, 0 } +}; +#endif + +#if defined(need_GL_APPLE_vertex_array_object) +static const struct dri_extension_function GL_APPLE_vertex_array_object_functions[] = { + { DeleteVertexArraysAPPLE_names, DeleteVertexArraysAPPLE_remap_index, -1 }, + { GenVertexArraysAPPLE_names, GenVertexArraysAPPLE_remap_index, -1 }, + { BindVertexArrayAPPLE_names, BindVertexArrayAPPLE_remap_index, -1 }, + { IsVertexArrayAPPLE_names, IsVertexArrayAPPLE_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_ARB_draw_buffers) static const struct dri_extension_function GL_ARB_draw_buffers_functions[] = { - { DrawBuffersARB_names, DrawBuffersARB_remap_index, 413 }, + { DrawBuffersARB_names, DrawBuffersARB_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -4626,97 +4690,97 @@ static const struct dri_extension_function GL_ARB_matrix_palette_functions[] = { #if defined(need_GL_ARB_multisample) static const struct dri_extension_function GL_ARB_multisample_functions[] = { - { SampleCoverageARB_names, SampleCoverageARB_remap_index, 412 }, + { SampleCoverageARB_names, SampleCoverageARB_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_ARB_occlusion_query) static const struct dri_extension_function GL_ARB_occlusion_query_functions[] = { - { BeginQueryARB_names, BeginQueryARB_remap_index, 703 }, - { GetQueryivARB_names, GetQueryivARB_remap_index, 705 }, - { GetQueryObjectivARB_names, GetQueryObjectivARB_remap_index, 706 }, - { EndQueryARB_names, EndQueryARB_remap_index, 704 }, - { GetQueryObjectuivARB_names, GetQueryObjectuivARB_remap_index, 707 }, - { DeleteQueriesARB_names, DeleteQueriesARB_remap_index, 701 }, - { IsQueryARB_names, IsQueryARB_remap_index, 702 }, - { GenQueriesARB_names, GenQueriesARB_remap_index, 700 }, + { BeginQueryARB_names, BeginQueryARB_remap_index, -1 }, + { GetQueryivARB_names, GetQueryivARB_remap_index, -1 }, + { GetQueryObjectivARB_names, GetQueryObjectivARB_remap_index, -1 }, + { EndQueryARB_names, EndQueryARB_remap_index, -1 }, + { GetQueryObjectuivARB_names, GetQueryObjectuivARB_remap_index, -1 }, + { DeleteQueriesARB_names, DeleteQueriesARB_remap_index, -1 }, + { IsQueryARB_names, IsQueryARB_remap_index, -1 }, + { GenQueriesARB_names, GenQueriesARB_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_ARB_point_parameters) static const struct dri_extension_function GL_ARB_point_parameters_functions[] = { - { PointParameterfEXT_names, PointParameterfEXT_remap_index, 458 }, - { PointParameterfvEXT_names, PointParameterfvEXT_remap_index, 459 }, + { PointParameterfEXT_names, PointParameterfEXT_remap_index, -1 }, + { PointParameterfvEXT_names, PointParameterfvEXT_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_ARB_shader_objects) static const struct dri_extension_function GL_ARB_shader_objects_functions[] = { - { UniformMatrix3fvARB_names, UniformMatrix3fvARB_remap_index, 739 }, - { Uniform2fARB_names, Uniform2fARB_remap_index, 723 }, - { Uniform2ivARB_names, Uniform2ivARB_remap_index, 735 }, - { UniformMatrix4fvARB_names, UniformMatrix4fvARB_remap_index, 740 }, - { CreateProgramObjectARB_names, CreateProgramObjectARB_remap_index, 717 }, - { Uniform3iARB_names, Uniform3iARB_remap_index, 728 }, - { CreateShaderObjectARB_names, CreateShaderObjectARB_remap_index, 714 }, - { AttachObjectARB_names, AttachObjectARB_remap_index, 718 }, - { UniformMatrix2fvARB_names, UniformMatrix2fvARB_remap_index, 738 }, - { GetAttachedObjectsARB_names, GetAttachedObjectsARB_remap_index, 744 }, - { Uniform3fvARB_names, Uniform3fvARB_remap_index, 732 }, - { GetHandleARB_names, GetHandleARB_remap_index, 712 }, - { GetActiveUniformARB_names, GetActiveUniformARB_remap_index, 746 }, - { GetUniformivARB_names, GetUniformivARB_remap_index, 748 }, - { Uniform2fvARB_names, Uniform2fvARB_remap_index, 731 }, - { DeleteObjectARB_names, DeleteObjectARB_remap_index, 711 }, - { UseProgramObjectARB_names, UseProgramObjectARB_remap_index, 720 }, - { Uniform3ivARB_names, Uniform3ivARB_remap_index, 736 }, - { CompileShaderARB_names, CompileShaderARB_remap_index, 716 }, - { Uniform4fARB_names, Uniform4fARB_remap_index, 725 }, - { LinkProgramARB_names, LinkProgramARB_remap_index, 719 }, - { ShaderSourceARB_names, ShaderSourceARB_remap_index, 715 }, - { Uniform4ivARB_names, Uniform4ivARB_remap_index, 737 }, - { Uniform1ivARB_names, Uniform1ivARB_remap_index, 734 }, - { ValidateProgramARB_names, ValidateProgramARB_remap_index, 721 }, - { Uniform1iARB_names, Uniform1iARB_remap_index, 726 }, - { Uniform4fvARB_names, Uniform4fvARB_remap_index, 733 }, - { GetUniformfvARB_names, GetUniformfvARB_remap_index, 747 }, - { DetachObjectARB_names, DetachObjectARB_remap_index, 713 }, - { Uniform4iARB_names, Uniform4iARB_remap_index, 729 }, - { Uniform2iARB_names, Uniform2iARB_remap_index, 727 }, - { GetObjectParameterivARB_names, GetObjectParameterivARB_remap_index, 742 }, - { GetUniformLocationARB_names, GetUniformLocationARB_remap_index, 745 }, - { GetShaderSourceARB_names, GetShaderSourceARB_remap_index, 749 }, - { Uniform1fARB_names, Uniform1fARB_remap_index, 722 }, - { Uniform1fvARB_names, Uniform1fvARB_remap_index, 730 }, - { Uniform3fARB_names, Uniform3fARB_remap_index, 724 }, - { GetObjectParameterfvARB_names, GetObjectParameterfvARB_remap_index, 741 }, - { GetInfoLogARB_names, GetInfoLogARB_remap_index, 743 }, + { UniformMatrix3fvARB_names, UniformMatrix3fvARB_remap_index, -1 }, + { Uniform2fARB_names, Uniform2fARB_remap_index, -1 }, + { Uniform2ivARB_names, Uniform2ivARB_remap_index, -1 }, + { UniformMatrix4fvARB_names, UniformMatrix4fvARB_remap_index, -1 }, + { CreateProgramObjectARB_names, CreateProgramObjectARB_remap_index, -1 }, + { Uniform3iARB_names, Uniform3iARB_remap_index, -1 }, + { CreateShaderObjectARB_names, CreateShaderObjectARB_remap_index, -1 }, + { Uniform1fARB_names, Uniform1fARB_remap_index, -1 }, + { AttachObjectARB_names, AttachObjectARB_remap_index, -1 }, + { UniformMatrix2fvARB_names, UniformMatrix2fvARB_remap_index, -1 }, + { GetAttachedObjectsARB_names, GetAttachedObjectsARB_remap_index, -1 }, + { Uniform3fvARB_names, Uniform3fvARB_remap_index, -1 }, + { GetHandleARB_names, GetHandleARB_remap_index, -1 }, + { GetActiveUniformARB_names, GetActiveUniformARB_remap_index, -1 }, + { GetUniformivARB_names, GetUniformivARB_remap_index, -1 }, + { Uniform2fvARB_names, Uniform2fvARB_remap_index, -1 }, + { DeleteObjectARB_names, DeleteObjectARB_remap_index, -1 }, + { UseProgramObjectARB_names, UseProgramObjectARB_remap_index, -1 }, + { Uniform3ivARB_names, Uniform3ivARB_remap_index, -1 }, + { CompileShaderARB_names, CompileShaderARB_remap_index, -1 }, + { Uniform4fARB_names, Uniform4fARB_remap_index, -1 }, + { LinkProgramARB_names, LinkProgramARB_remap_index, -1 }, + { ShaderSourceARB_names, ShaderSourceARB_remap_index, -1 }, + { Uniform4ivARB_names, Uniform4ivARB_remap_index, -1 }, + { Uniform1ivARB_names, Uniform1ivARB_remap_index, -1 }, + { ValidateProgramARB_names, ValidateProgramARB_remap_index, -1 }, + { Uniform1iARB_names, Uniform1iARB_remap_index, -1 }, + { Uniform4fvARB_names, Uniform4fvARB_remap_index, -1 }, + { GetUniformfvARB_names, GetUniformfvARB_remap_index, -1 }, + { DetachObjectARB_names, DetachObjectARB_remap_index, -1 }, + { Uniform4iARB_names, Uniform4iARB_remap_index, -1 }, + { Uniform2iARB_names, Uniform2iARB_remap_index, -1 }, + { GetObjectParameterivARB_names, GetObjectParameterivARB_remap_index, -1 }, + { GetUniformLocationARB_names, GetUniformLocationARB_remap_index, -1 }, + { GetShaderSourceARB_names, GetShaderSourceARB_remap_index, -1 }, + { Uniform1fvARB_names, Uniform1fvARB_remap_index, -1 }, + { Uniform3fARB_names, Uniform3fARB_remap_index, -1 }, + { GetObjectParameterfvARB_names, GetObjectParameterfvARB_remap_index, -1 }, + { GetInfoLogARB_names, GetInfoLogARB_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_ARB_texture_compression) static const struct dri_extension_function GL_ARB_texture_compression_functions[] = { - { CompressedTexSubImage2DARB_names, CompressedTexSubImage2DARB_remap_index, 558 }, - { CompressedTexImage3DARB_names, CompressedTexImage3DARB_remap_index, 554 }, - { CompressedTexImage1DARB_names, CompressedTexImage1DARB_remap_index, 556 }, - { CompressedTexSubImage1DARB_names, CompressedTexSubImage1DARB_remap_index, 559 }, - { CompressedTexSubImage3DARB_names, CompressedTexSubImage3DARB_remap_index, 557 }, - { CompressedTexImage2DARB_names, CompressedTexImage2DARB_remap_index, 555 }, - { GetCompressedTexImageARB_names, GetCompressedTexImageARB_remap_index, 560 }, + { CompressedTexSubImage2DARB_names, CompressedTexSubImage2DARB_remap_index, -1 }, + { CompressedTexImage3DARB_names, CompressedTexImage3DARB_remap_index, -1 }, + { CompressedTexImage1DARB_names, CompressedTexImage1DARB_remap_index, -1 }, + { CompressedTexSubImage1DARB_names, CompressedTexSubImage1DARB_remap_index, -1 }, + { CompressedTexSubImage3DARB_names, CompressedTexSubImage3DARB_remap_index, -1 }, + { CompressedTexImage2DARB_names, CompressedTexImage2DARB_remap_index, -1 }, + { GetCompressedTexImageARB_names, GetCompressedTexImageARB_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_ARB_transpose_matrix) static const struct dri_extension_function GL_ARB_transpose_matrix_functions[] = { - { MultTransposeMatrixdARB_names, MultTransposeMatrixdARB_remap_index, 411 }, - { LoadTransposeMatrixdARB_names, LoadTransposeMatrixdARB_remap_index, 409 }, - { MultTransposeMatrixfARB_names, MultTransposeMatrixfARB_remap_index, 410 }, - { LoadTransposeMatrixfARB_names, LoadTransposeMatrixfARB_remap_index, 408 }, + { MultTransposeMatrixdARB_names, MultTransposeMatrixdARB_remap_index, -1 }, + { LoadTransposeMatrixdARB_names, LoadTransposeMatrixdARB_remap_index, -1 }, + { MultTransposeMatrixfARB_names, MultTransposeMatrixfARB_remap_index, -1 }, + { LoadTransposeMatrixfARB_names, LoadTransposeMatrixfARB_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -4739,150 +4803,150 @@ static const struct dri_extension_function GL_ARB_vertex_blend_functions[] = { #if defined(need_GL_ARB_vertex_buffer_object) static const struct dri_extension_function GL_ARB_vertex_buffer_object_functions[] = { - { GetBufferSubDataARB_names, GetBufferSubDataARB_remap_index, 695 }, - { BufferSubDataARB_names, BufferSubDataARB_remap_index, 690 }, - { BufferDataARB_names, BufferDataARB_remap_index, 689 }, - { GetBufferPointervARB_names, GetBufferPointervARB_remap_index, 694 }, - { GetBufferParameterivARB_names, GetBufferParameterivARB_remap_index, 693 }, - { MapBufferARB_names, MapBufferARB_remap_index, 697 }, - { IsBufferARB_names, IsBufferARB_remap_index, 696 }, - { DeleteBuffersARB_names, DeleteBuffersARB_remap_index, 691 }, - { UnmapBufferARB_names, UnmapBufferARB_remap_index, 698 }, - { BindBufferARB_names, BindBufferARB_remap_index, 688 }, - { GenBuffersARB_names, GenBuffersARB_remap_index, 692 }, + { GetBufferSubDataARB_names, GetBufferSubDataARB_remap_index, -1 }, + { BufferSubDataARB_names, BufferSubDataARB_remap_index, -1 }, + { BufferDataARB_names, BufferDataARB_remap_index, -1 }, + { GetBufferPointervARB_names, GetBufferPointervARB_remap_index, -1 }, + { GetBufferParameterivARB_names, GetBufferParameterivARB_remap_index, -1 }, + { MapBufferARB_names, MapBufferARB_remap_index, -1 }, + { IsBufferARB_names, IsBufferARB_remap_index, -1 }, + { DeleteBuffersARB_names, DeleteBuffersARB_remap_index, -1 }, + { UnmapBufferARB_names, UnmapBufferARB_remap_index, -1 }, + { BindBufferARB_names, BindBufferARB_remap_index, -1 }, + { GenBuffersARB_names, GenBuffersARB_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_ARB_vertex_program) static const struct dri_extension_function GL_ARB_vertex_program_functions[] = { - { ProgramEnvParameter4dvARB_names, ProgramEnvParameter4dvARB_remap_index, 669 }, - { VertexAttrib2fARB_names, VertexAttrib2fARB_remap_index, 611 }, - { VertexAttrib3fARB_names, VertexAttrib3fARB_remap_index, 617 }, - { VertexAttrib1svARB_names, VertexAttrib1svARB_remap_index, 608 }, - { VertexAttrib4NusvARB_names, VertexAttrib4NusvARB_remap_index, 662 }, - { DisableVertexAttribArrayARB_names, DisableVertexAttribArrayARB_remap_index, 666 }, - { ProgramLocalParameter4dARB_names, ProgramLocalParameter4dARB_remap_index, 672 }, - { VertexAttrib1fARB_names, VertexAttrib1fARB_remap_index, 605 }, - { VertexAttrib4NbvARB_names, VertexAttrib4NbvARB_remap_index, 659 }, - { VertexAttrib1sARB_names, VertexAttrib1sARB_remap_index, 607 }, - { GetProgramLocalParameterfvARB_names, GetProgramLocalParameterfvARB_remap_index, 679 }, - { VertexAttrib3dvARB_names, VertexAttrib3dvARB_remap_index, 616 }, - { ProgramEnvParameter4fvARB_names, ProgramEnvParameter4fvARB_remap_index, 671 }, - { GetVertexAttribivARB_names, GetVertexAttribivARB_remap_index, 590 }, - { VertexAttrib4ivARB_names, VertexAttrib4ivARB_remap_index, 655 }, - { VertexAttrib4bvARB_names, VertexAttrib4bvARB_remap_index, 654 }, - { VertexAttrib3dARB_names, VertexAttrib3dARB_remap_index, 615 }, - { VertexAttrib4fARB_names, VertexAttrib4fARB_remap_index, 623 }, - { VertexAttrib4fvARB_names, VertexAttrib4fvARB_remap_index, 624 }, - { ProgramLocalParameter4dvARB_names, ProgramLocalParameter4dvARB_remap_index, 673 }, - { VertexAttrib4usvARB_names, VertexAttrib4usvARB_remap_index, 657 }, - { VertexAttrib2dARB_names, VertexAttrib2dARB_remap_index, 609 }, - { VertexAttrib1dvARB_names, VertexAttrib1dvARB_remap_index, 604 }, - { GetVertexAttribfvARB_names, GetVertexAttribfvARB_remap_index, 589 }, - { VertexAttrib4ubvARB_names, VertexAttrib4ubvARB_remap_index, 656 }, - { ProgramEnvParameter4fARB_names, ProgramEnvParameter4fARB_remap_index, 670 }, - { VertexAttrib4sARB_names, VertexAttrib4sARB_remap_index, 625 }, - { VertexAttrib2dvARB_names, VertexAttrib2dvARB_remap_index, 610 }, - { VertexAttrib2fvARB_names, VertexAttrib2fvARB_remap_index, 612 }, - { VertexAttrib4NivARB_names, VertexAttrib4NivARB_remap_index, 661 }, - { GetProgramStringARB_names, GetProgramStringARB_remap_index, 681 }, - { VertexAttrib4NuivARB_names, VertexAttrib4NuivARB_remap_index, 663 }, - { IsProgramNV_names, IsProgramNV_remap_index, 592 }, - { ProgramEnvParameter4dARB_names, ProgramEnvParameter4dARB_remap_index, 668 }, - { VertexAttrib1dARB_names, VertexAttrib1dARB_remap_index, 603 }, - { VertexAttrib3svARB_names, VertexAttrib3svARB_remap_index, 620 }, - { GetVertexAttribdvARB_names, GetVertexAttribdvARB_remap_index, 588 }, - { VertexAttrib4dvARB_names, VertexAttrib4dvARB_remap_index, 622 }, - { VertexAttribPointerARB_names, VertexAttribPointerARB_remap_index, 664 }, - { VertexAttrib4NsvARB_names, VertexAttrib4NsvARB_remap_index, 660 }, - { VertexAttrib3fvARB_names, VertexAttrib3fvARB_remap_index, 618 }, - { VertexAttrib4NubARB_names, VertexAttrib4NubARB_remap_index, 627 }, - { GetProgramEnvParameterfvARB_names, GetProgramEnvParameterfvARB_remap_index, 677 }, - { ProgramLocalParameter4fvARB_names, ProgramLocalParameter4fvARB_remap_index, 675 }, - { DeleteProgramsNV_names, DeleteProgramsNV_remap_index, 580 }, - { GetVertexAttribPointervNV_names, GetVertexAttribPointervNV_remap_index, 591 }, - { VertexAttrib4dARB_names, VertexAttrib4dARB_remap_index, 621 }, - { GetProgramLocalParameterdvARB_names, GetProgramLocalParameterdvARB_remap_index, 678 }, - { GetProgramivARB_names, GetProgramivARB_remap_index, 680 }, - { VertexAttrib3sARB_names, VertexAttrib3sARB_remap_index, 619 }, - { ProgramStringARB_names, ProgramStringARB_remap_index, 667 }, - { ProgramLocalParameter4fARB_names, ProgramLocalParameter4fARB_remap_index, 674 }, - { EnableVertexAttribArrayARB_names, EnableVertexAttribArrayARB_remap_index, 665 }, - { VertexAttrib4uivARB_names, VertexAttrib4uivARB_remap_index, 658 }, - { BindProgramNV_names, BindProgramNV_remap_index, 579 }, - { VertexAttrib4svARB_names, VertexAttrib4svARB_remap_index, 626 }, - { VertexAttrib2svARB_names, VertexAttrib2svARB_remap_index, 614 }, - { VertexAttrib4NubvARB_names, VertexAttrib4NubvARB_remap_index, 628 }, - { GetProgramEnvParameterdvARB_names, GetProgramEnvParameterdvARB_remap_index, 676 }, - { VertexAttrib2sARB_names, VertexAttrib2sARB_remap_index, 613 }, - { VertexAttrib1fvARB_names, VertexAttrib1fvARB_remap_index, 606 }, - { GenProgramsNV_names, GenProgramsNV_remap_index, 582 }, + { ProgramEnvParameter4dvARB_names, ProgramEnvParameter4dvARB_remap_index, -1 }, + { VertexAttrib2fARB_names, VertexAttrib2fARB_remap_index, -1 }, + { VertexAttrib3fARB_names, VertexAttrib3fARB_remap_index, -1 }, + { VertexAttrib1svARB_names, VertexAttrib1svARB_remap_index, -1 }, + { VertexAttrib4NusvARB_names, VertexAttrib4NusvARB_remap_index, -1 }, + { DisableVertexAttribArrayARB_names, DisableVertexAttribArrayARB_remap_index, -1 }, + { ProgramLocalParameter4dARB_names, ProgramLocalParameter4dARB_remap_index, -1 }, + { VertexAttrib1fARB_names, VertexAttrib1fARB_remap_index, -1 }, + { VertexAttrib4NbvARB_names, VertexAttrib4NbvARB_remap_index, -1 }, + { VertexAttrib1sARB_names, VertexAttrib1sARB_remap_index, -1 }, + { GetProgramLocalParameterfvARB_names, GetProgramLocalParameterfvARB_remap_index, -1 }, + { VertexAttrib3dvARB_names, VertexAttrib3dvARB_remap_index, -1 }, + { ProgramEnvParameter4fvARB_names, ProgramEnvParameter4fvARB_remap_index, -1 }, + { GetVertexAttribivARB_names, GetVertexAttribivARB_remap_index, -1 }, + { VertexAttrib4ivARB_names, VertexAttrib4ivARB_remap_index, -1 }, + { VertexAttrib4bvARB_names, VertexAttrib4bvARB_remap_index, -1 }, + { VertexAttrib3dARB_names, VertexAttrib3dARB_remap_index, -1 }, + { VertexAttrib4fARB_names, VertexAttrib4fARB_remap_index, -1 }, + { VertexAttrib4fvARB_names, VertexAttrib4fvARB_remap_index, -1 }, + { ProgramLocalParameter4dvARB_names, ProgramLocalParameter4dvARB_remap_index, -1 }, + { VertexAttrib4usvARB_names, VertexAttrib4usvARB_remap_index, -1 }, + { VertexAttrib2dARB_names, VertexAttrib2dARB_remap_index, -1 }, + { VertexAttrib1dvARB_names, VertexAttrib1dvARB_remap_index, -1 }, + { GetVertexAttribfvARB_names, GetVertexAttribfvARB_remap_index, -1 }, + { VertexAttrib4ubvARB_names, VertexAttrib4ubvARB_remap_index, -1 }, + { ProgramEnvParameter4fARB_names, ProgramEnvParameter4fARB_remap_index, -1 }, + { VertexAttrib4sARB_names, VertexAttrib4sARB_remap_index, -1 }, + { VertexAttrib2dvARB_names, VertexAttrib2dvARB_remap_index, -1 }, + { VertexAttrib2fvARB_names, VertexAttrib2fvARB_remap_index, -1 }, + { VertexAttrib4NivARB_names, VertexAttrib4NivARB_remap_index, -1 }, + { GetProgramStringARB_names, GetProgramStringARB_remap_index, -1 }, + { VertexAttrib4NuivARB_names, VertexAttrib4NuivARB_remap_index, -1 }, + { IsProgramNV_names, IsProgramNV_remap_index, -1 }, + { ProgramEnvParameter4dARB_names, ProgramEnvParameter4dARB_remap_index, -1 }, + { VertexAttrib1dARB_names, VertexAttrib1dARB_remap_index, -1 }, + { VertexAttrib3svARB_names, VertexAttrib3svARB_remap_index, -1 }, + { GetVertexAttribdvARB_names, GetVertexAttribdvARB_remap_index, -1 }, + { VertexAttrib4dvARB_names, VertexAttrib4dvARB_remap_index, -1 }, + { VertexAttribPointerARB_names, VertexAttribPointerARB_remap_index, -1 }, + { VertexAttrib4NsvARB_names, VertexAttrib4NsvARB_remap_index, -1 }, + { VertexAttrib3fvARB_names, VertexAttrib3fvARB_remap_index, -1 }, + { VertexAttrib4NubARB_names, VertexAttrib4NubARB_remap_index, -1 }, + { GetProgramEnvParameterfvARB_names, GetProgramEnvParameterfvARB_remap_index, -1 }, + { ProgramLocalParameter4fvARB_names, ProgramLocalParameter4fvARB_remap_index, -1 }, + { DeleteProgramsNV_names, DeleteProgramsNV_remap_index, -1 }, + { GetVertexAttribPointervNV_names, GetVertexAttribPointervNV_remap_index, -1 }, + { VertexAttrib4dARB_names, VertexAttrib4dARB_remap_index, -1 }, + { GetProgramLocalParameterdvARB_names, GetProgramLocalParameterdvARB_remap_index, -1 }, + { GetProgramivARB_names, GetProgramivARB_remap_index, -1 }, + { VertexAttrib3sARB_names, VertexAttrib3sARB_remap_index, -1 }, + { ProgramStringARB_names, ProgramStringARB_remap_index, -1 }, + { ProgramLocalParameter4fARB_names, ProgramLocalParameter4fARB_remap_index, -1 }, + { EnableVertexAttribArrayARB_names, EnableVertexAttribArrayARB_remap_index, -1 }, + { VertexAttrib4uivARB_names, VertexAttrib4uivARB_remap_index, -1 }, + { BindProgramNV_names, BindProgramNV_remap_index, -1 }, + { VertexAttrib4svARB_names, VertexAttrib4svARB_remap_index, -1 }, + { VertexAttrib2svARB_names, VertexAttrib2svARB_remap_index, -1 }, + { VertexAttrib4NubvARB_names, VertexAttrib4NubvARB_remap_index, -1 }, + { GetProgramEnvParameterdvARB_names, GetProgramEnvParameterdvARB_remap_index, -1 }, + { VertexAttrib2sARB_names, VertexAttrib2sARB_remap_index, -1 }, + { VertexAttrib1fvARB_names, VertexAttrib1fvARB_remap_index, -1 }, + { GenProgramsNV_names, GenProgramsNV_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_ARB_vertex_shader) static const struct dri_extension_function GL_ARB_vertex_shader_functions[] = { - { GetActiveAttribARB_names, GetActiveAttribARB_remap_index, 751 }, - { GetAttribLocationARB_names, GetAttribLocationARB_remap_index, 752 }, - { BindAttribLocationARB_names, BindAttribLocationARB_remap_index, 750 }, + { GetActiveAttribARB_names, GetActiveAttribARB_remap_index, -1 }, + { GetAttribLocationARB_names, GetAttribLocationARB_remap_index, -1 }, + { BindAttribLocationARB_names, BindAttribLocationARB_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_ARB_window_pos) static const struct dri_extension_function GL_ARB_window_pos_functions[] = { - { WindowPos3fMESA_names, WindowPos3fMESA_remap_index, 523 }, - { WindowPos2dvMESA_names, WindowPos2dvMESA_remap_index, 514 }, - { WindowPos2svMESA_names, WindowPos2svMESA_remap_index, 520 }, - { WindowPos3dMESA_names, WindowPos3dMESA_remap_index, 521 }, - { WindowPos2fvMESA_names, WindowPos2fvMESA_remap_index, 516 }, - { WindowPos2dMESA_names, WindowPos2dMESA_remap_index, 513 }, - { WindowPos3dvMESA_names, WindowPos3dvMESA_remap_index, 522 }, - { WindowPos3fvMESA_names, WindowPos3fvMESA_remap_index, 524 }, - { WindowPos2iMESA_names, WindowPos2iMESA_remap_index, 517 }, - { WindowPos3sMESA_names, WindowPos3sMESA_remap_index, 527 }, - { WindowPos2ivMESA_names, WindowPos2ivMESA_remap_index, 518 }, - { WindowPos2sMESA_names, WindowPos2sMESA_remap_index, 519 }, - { WindowPos3iMESA_names, WindowPos3iMESA_remap_index, 525 }, - { WindowPos3ivMESA_names, WindowPos3ivMESA_remap_index, 526 }, - { WindowPos3svMESA_names, WindowPos3svMESA_remap_index, 528 }, - { WindowPos2fMESA_names, WindowPos2fMESA_remap_index, 515 }, + { WindowPos3fMESA_names, WindowPos3fMESA_remap_index, -1 }, + { WindowPos2dvMESA_names, WindowPos2dvMESA_remap_index, -1 }, + { WindowPos2svMESA_names, WindowPos2svMESA_remap_index, -1 }, + { WindowPos3dMESA_names, WindowPos3dMESA_remap_index, -1 }, + { WindowPos2fvMESA_names, WindowPos2fvMESA_remap_index, -1 }, + { WindowPos2dMESA_names, WindowPos2dMESA_remap_index, -1 }, + { WindowPos3dvMESA_names, WindowPos3dvMESA_remap_index, -1 }, + { WindowPos3fvMESA_names, WindowPos3fvMESA_remap_index, -1 }, + { WindowPos2iMESA_names, WindowPos2iMESA_remap_index, -1 }, + { WindowPos3sMESA_names, WindowPos3sMESA_remap_index, -1 }, + { WindowPos2ivMESA_names, WindowPos2ivMESA_remap_index, -1 }, + { WindowPos2sMESA_names, WindowPos2sMESA_remap_index, -1 }, + { WindowPos3iMESA_names, WindowPos3iMESA_remap_index, -1 }, + { WindowPos3ivMESA_names, WindowPos3ivMESA_remap_index, -1 }, + { WindowPos3svMESA_names, WindowPos3svMESA_remap_index, -1 }, + { WindowPos2fMESA_names, WindowPos2fMESA_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_ATI_blend_equation_separate) static const struct dri_extension_function GL_ATI_blend_equation_separate_functions[] = { - { BlendEquationSeparateEXT_names, BlendEquationSeparateEXT_remap_index, 710 }, + { BlendEquationSeparateEXT_names, BlendEquationSeparateEXT_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_ATI_draw_buffers) static const struct dri_extension_function GL_ATI_draw_buffers_functions[] = { - { DrawBuffersARB_names, DrawBuffersARB_remap_index, 413 }, + { DrawBuffersARB_names, DrawBuffersARB_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_ATI_fragment_shader) static const struct dri_extension_function GL_ATI_fragment_shader_functions[] = { - { ColorFragmentOp3ATI_names, ColorFragmentOp3ATI_remap_index, 791 }, - { ColorFragmentOp2ATI_names, ColorFragmentOp2ATI_remap_index, 790 }, - { DeleteFragmentShaderATI_names, DeleteFragmentShaderATI_remap_index, 784 }, - { SetFragmentShaderConstantATI_names, SetFragmentShaderConstantATI_remap_index, 795 }, - { SampleMapATI_names, SampleMapATI_remap_index, 788 }, - { AlphaFragmentOp2ATI_names, AlphaFragmentOp2ATI_remap_index, 793 }, - { AlphaFragmentOp1ATI_names, AlphaFragmentOp1ATI_remap_index, 792 }, - { ColorFragmentOp1ATI_names, ColorFragmentOp1ATI_remap_index, 789 }, - { AlphaFragmentOp3ATI_names, AlphaFragmentOp3ATI_remap_index, 794 }, - { PassTexCoordATI_names, PassTexCoordATI_remap_index, 787 }, - { BeginFragmentShaderATI_names, BeginFragmentShaderATI_remap_index, 785 }, - { BindFragmentShaderATI_names, BindFragmentShaderATI_remap_index, 783 }, - { GenFragmentShadersATI_names, GenFragmentShadersATI_remap_index, 782 }, - { EndFragmentShaderATI_names, EndFragmentShaderATI_remap_index, 786 }, + { ColorFragmentOp3ATI_names, ColorFragmentOp3ATI_remap_index, -1 }, + { ColorFragmentOp2ATI_names, ColorFragmentOp2ATI_remap_index, -1 }, + { DeleteFragmentShaderATI_names, DeleteFragmentShaderATI_remap_index, -1 }, + { SetFragmentShaderConstantATI_names, SetFragmentShaderConstantATI_remap_index, -1 }, + { SampleMapATI_names, SampleMapATI_remap_index, -1 }, + { AlphaFragmentOp2ATI_names, AlphaFragmentOp2ATI_remap_index, -1 }, + { AlphaFragmentOp1ATI_names, AlphaFragmentOp1ATI_remap_index, -1 }, + { ColorFragmentOp1ATI_names, ColorFragmentOp1ATI_remap_index, -1 }, + { AlphaFragmentOp3ATI_names, AlphaFragmentOp3ATI_remap_index, -1 }, + { PassTexCoordATI_names, PassTexCoordATI_remap_index, -1 }, + { BeginFragmentShaderATI_names, BeginFragmentShaderATI_remap_index, -1 }, + { BindFragmentShaderATI_names, BindFragmentShaderATI_remap_index, -1 }, + { GenFragmentShadersATI_names, GenFragmentShadersATI_remap_index, -1 }, + { EndFragmentShaderATI_names, EndFragmentShaderATI_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -4896,14 +4960,14 @@ static const struct dri_extension_function GL_EXT_blend_color_functions[] = { #if defined(need_GL_EXT_blend_equation_separate) static const struct dri_extension_function GL_EXT_blend_equation_separate_functions[] = { - { BlendEquationSeparateEXT_names, BlendEquationSeparateEXT_remap_index, 710 }, + { BlendEquationSeparateEXT_names, BlendEquationSeparateEXT_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_EXT_blend_func_separate) static const struct dri_extension_function GL_EXT_blend_func_separate_functions[] = { - { BlendFuncSeparateEXT_names, BlendFuncSeparateEXT_remap_index, 537 }, + { BlendFuncSeparateEXT_names, BlendFuncSeparateEXT_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -4925,8 +4989,8 @@ static const struct dri_extension_function GL_EXT_color_subtable_functions[] = { #if defined(need_GL_EXT_compiled_vertex_array) static const struct dri_extension_function GL_EXT_compiled_vertex_array_functions[] = { - { UnlockArraysEXT_names, UnlockArraysEXT_remap_index, 541 }, - { LockArraysEXT_names, LockArraysEXT_remap_index, 540 }, + { UnlockArraysEXT_names, UnlockArraysEXT_remap_index, -1 }, + { LockArraysEXT_names, LockArraysEXT_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -4938,13 +5002,13 @@ static const struct dri_extension_function GL_EXT_convolution_functions[] = { { ConvolutionFilter2D_names, -1, 349 }, { ConvolutionParameteriv_names, -1, 353 }, { ConvolutionParameterfv_names, -1, 351 }, - { GetSeparableFilterEXT_names, GetSeparableFilterEXT_remap_index, 426 }, - { GetConvolutionFilterEXT_names, GetConvolutionFilterEXT_remap_index, 423 }, - { GetConvolutionParameterfvEXT_names, GetConvolutionParameterfvEXT_remap_index, 424 }, + { GetSeparableFilterEXT_names, GetSeparableFilterEXT_remap_index, -1 }, + { GetConvolutionFilterEXT_names, GetConvolutionFilterEXT_remap_index, -1 }, + { GetConvolutionParameterfvEXT_names, GetConvolutionParameterfvEXT_remap_index, -1 }, { SeparableFilter2D_names, -1, 360 }, { ConvolutionParameteri_names, -1, 352 }, { ConvolutionParameterf_names, -1, 350 }, - { GetConvolutionParameterivEXT_names, GetConvolutionParameterivEXT_remap_index, 425 }, + { GetConvolutionParameterivEXT_names, GetConvolutionParameterivEXT_remap_index, -1 }, { CopyConvolutionFilter2D_names, -1, 355 }, { NULL, 0, 0 } }; @@ -4991,15 +5055,15 @@ static const struct dri_extension_function GL_EXT_copy_texture_functions[] = { #if defined(need_GL_EXT_cull_vertex) static const struct dri_extension_function GL_EXT_cull_vertex_functions[] = { - { CullParameterdvEXT_names, CullParameterdvEXT_remap_index, 542 }, - { CullParameterfvEXT_names, CullParameterfvEXT_remap_index, 543 }, + { CullParameterdvEXT_names, CullParameterdvEXT_remap_index, -1 }, + { CullParameterfvEXT_names, CullParameterfvEXT_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_EXT_depth_bounds_test) static const struct dri_extension_function GL_EXT_depth_bounds_test_functions[] = { - { DepthBoundsEXT_names, DepthBoundsEXT_remap_index, 699 }, + { DepthBoundsEXT_names, DepthBoundsEXT_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -5013,41 +5077,49 @@ static const struct dri_extension_function GL_EXT_draw_range_elements_functions[ #if defined(need_GL_EXT_fog_coord) static const struct dri_extension_function GL_EXT_fog_coord_functions[] = { - { FogCoorddEXT_names, FogCoorddEXT_remap_index, 547 }, - { FogCoordfEXT_names, FogCoordfEXT_remap_index, 545 }, - { FogCoordPointerEXT_names, FogCoordPointerEXT_remap_index, 549 }, - { FogCoordfvEXT_names, FogCoordfvEXT_remap_index, 546 }, - { FogCoorddvEXT_names, FogCoorddvEXT_remap_index, 548 }, + { FogCoorddEXT_names, FogCoorddEXT_remap_index, -1 }, + { FogCoordfEXT_names, FogCoordfEXT_remap_index, -1 }, + { FogCoordPointerEXT_names, FogCoordPointerEXT_remap_index, -1 }, + { FogCoordfvEXT_names, FogCoordfvEXT_remap_index, -1 }, + { FogCoorddvEXT_names, FogCoorddvEXT_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_EXT_framebuffer_blit) static const struct dri_extension_function GL_EXT_framebuffer_blit_functions[] = { - { BlitFramebufferEXT_names, BlitFramebufferEXT_remap_index, 818 }, + { BlitFramebufferEXT_names, BlitFramebufferEXT_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_EXT_framebuffer_object) static const struct dri_extension_function GL_EXT_framebuffer_object_functions[] = { - { GenerateMipmapEXT_names, GenerateMipmapEXT_remap_index, 812 }, - { IsRenderbufferEXT_names, IsRenderbufferEXT_remap_index, 796 }, - { RenderbufferStorageEXT_names, RenderbufferStorageEXT_remap_index, 800 }, - { CheckFramebufferStatusEXT_names, CheckFramebufferStatusEXT_remap_index, 806 }, - { DeleteRenderbuffersEXT_names, DeleteRenderbuffersEXT_remap_index, 798 }, - { FramebufferTexture3DEXT_names, FramebufferTexture3DEXT_remap_index, 809 }, - { FramebufferRenderbufferEXT_names, FramebufferRenderbufferEXT_remap_index, 810 }, - { FramebufferTexture1DEXT_names, FramebufferTexture1DEXT_remap_index, 807 }, - { BindFramebufferEXT_names, BindFramebufferEXT_remap_index, 803 }, - { GenRenderbuffersEXT_names, GenRenderbuffersEXT_remap_index, 799 }, - { IsFramebufferEXT_names, IsFramebufferEXT_remap_index, 802 }, - { FramebufferTexture2DEXT_names, FramebufferTexture2DEXT_remap_index, 808 }, - { GetFramebufferAttachmentParameterivEXT_names, GetFramebufferAttachmentParameterivEXT_remap_index, 811 }, - { DeleteFramebuffersEXT_names, DeleteFramebuffersEXT_remap_index, 804 }, - { GenFramebuffersEXT_names, GenFramebuffersEXT_remap_index, 805 }, - { BindRenderbufferEXT_names, BindRenderbufferEXT_remap_index, 797 }, - { GetRenderbufferParameterivEXT_names, GetRenderbufferParameterivEXT_remap_index, 801 }, + { GenerateMipmapEXT_names, GenerateMipmapEXT_remap_index, -1 }, + { IsRenderbufferEXT_names, IsRenderbufferEXT_remap_index, -1 }, + { RenderbufferStorageEXT_names, RenderbufferStorageEXT_remap_index, -1 }, + { CheckFramebufferStatusEXT_names, CheckFramebufferStatusEXT_remap_index, -1 }, + { DeleteRenderbuffersEXT_names, DeleteRenderbuffersEXT_remap_index, -1 }, + { FramebufferTexture3DEXT_names, FramebufferTexture3DEXT_remap_index, -1 }, + { FramebufferRenderbufferEXT_names, FramebufferRenderbufferEXT_remap_index, -1 }, + { FramebufferTexture1DEXT_names, FramebufferTexture1DEXT_remap_index, -1 }, + { BindFramebufferEXT_names, BindFramebufferEXT_remap_index, -1 }, + { GenRenderbuffersEXT_names, GenRenderbuffersEXT_remap_index, -1 }, + { IsFramebufferEXT_names, IsFramebufferEXT_remap_index, -1 }, + { FramebufferTexture2DEXT_names, FramebufferTexture2DEXT_remap_index, -1 }, + { GetFramebufferAttachmentParameterivEXT_names, GetFramebufferAttachmentParameterivEXT_remap_index, -1 }, + { DeleteFramebuffersEXT_names, DeleteFramebuffersEXT_remap_index, -1 }, + { GenFramebuffersEXT_names, GenFramebuffersEXT_remap_index, -1 }, + { BindRenderbufferEXT_names, BindRenderbufferEXT_remap_index, -1 }, + { GetRenderbufferParameterivEXT_names, GetRenderbufferParameterivEXT_remap_index, -1 }, + { NULL, 0, 0 } +}; +#endif + +#if defined(need_GL_EXT_gpu_program_parameters) +static const struct dri_extension_function GL_EXT_gpu_program_parameters_functions[] = { + { ProgramLocalParameters4fvEXT_names, ProgramLocalParameters4fvEXT_remap_index, -1 }, + { ProgramEnvParameters4fvEXT_names, ProgramEnvParameters4fvEXT_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -5055,14 +5127,14 @@ static const struct dri_extension_function GL_EXT_framebuffer_object_functions[] #if defined(need_GL_EXT_histogram) static const struct dri_extension_function GL_EXT_histogram_functions[] = { { Histogram_names, -1, 367 }, - { GetHistogramParameterivEXT_names, GetHistogramParameterivEXT_remap_index, 419 }, + { GetHistogramParameterivEXT_names, GetHistogramParameterivEXT_remap_index, -1 }, { ResetHistogram_names, -1, 369 }, - { GetMinmaxEXT_names, GetMinmaxEXT_remap_index, 420 }, - { GetHistogramParameterfvEXT_names, GetHistogramParameterfvEXT_remap_index, 418 }, - { GetHistogramEXT_names, GetHistogramEXT_remap_index, 417 }, - { GetMinmaxParameterfvEXT_names, GetMinmaxParameterfvEXT_remap_index, 421 }, + { GetMinmaxEXT_names, GetMinmaxEXT_remap_index, -1 }, + { GetHistogramParameterfvEXT_names, GetHistogramParameterfvEXT_remap_index, -1 }, + { GetHistogramEXT_names, GetHistogramEXT_remap_index, -1 }, + { GetMinmaxParameterfvEXT_names, GetMinmaxParameterfvEXT_remap_index, -1 }, { ResetMinmax_names, -1, 370 }, - { GetMinmaxParameterivEXT_names, GetMinmaxParameterivEXT_remap_index, 422 }, + { GetMinmaxParameterivEXT_names, GetMinmaxParameterivEXT_remap_index, -1 }, { Minmax_names, -1, 368 }, { NULL, 0, 0 } }; @@ -5070,14 +5142,14 @@ static const struct dri_extension_function GL_EXT_histogram_functions[] = { #if defined(need_GL_EXT_index_func) static const struct dri_extension_function GL_EXT_index_func_functions[] = { - { IndexFuncEXT_names, IndexFuncEXT_remap_index, 539 }, + { IndexFuncEXT_names, IndexFuncEXT_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_EXT_index_material) static const struct dri_extension_function GL_EXT_index_material_functions[] = { - { IndexMaterialEXT_names, IndexMaterialEXT_remap_index, 538 }, + { IndexMaterialEXT_names, IndexMaterialEXT_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -5093,26 +5165,26 @@ static const struct dri_extension_function GL_EXT_light_texture_functions[] = { #if defined(need_GL_EXT_multi_draw_arrays) static const struct dri_extension_function GL_EXT_multi_draw_arrays_functions[] = { - { MultiDrawElementsEXT_names, MultiDrawElementsEXT_remap_index, 645 }, - { MultiDrawArraysEXT_names, MultiDrawArraysEXT_remap_index, 644 }, + { MultiDrawElementsEXT_names, MultiDrawElementsEXT_remap_index, -1 }, + { MultiDrawArraysEXT_names, MultiDrawArraysEXT_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_EXT_multisample) static const struct dri_extension_function GL_EXT_multisample_functions[] = { - { SampleMaskSGIS_names, SampleMaskSGIS_remap_index, 446 }, - { SamplePatternSGIS_names, SamplePatternSGIS_remap_index, 447 }, + { SampleMaskSGIS_names, SampleMaskSGIS_remap_index, -1 }, + { SamplePatternSGIS_names, SamplePatternSGIS_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_EXT_paletted_texture) static const struct dri_extension_function GL_EXT_paletted_texture_functions[] = { - { GetColorTableParameterivEXT_names, GetColorTableParameterivEXT_remap_index, 551 }, - { GetColorTableEXT_names, GetColorTableEXT_remap_index, 550 }, + { GetColorTableParameterfvSGI_names, GetColorTableParameterfvSGI_remap_index, -1 }, { ColorTable_names, -1, 339 }, - { GetColorTableParameterfvEXT_names, GetColorTableParameterfvEXT_remap_index, 552 }, + { GetColorTableParameterivSGI_names, GetColorTableParameterivSGI_remap_index, -1 }, + { GetColorTableSGI_names, GetColorTableSGI_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -5129,45 +5201,45 @@ static const struct dri_extension_function GL_EXT_pixel_transform_functions[] = #if defined(need_GL_EXT_point_parameters) static const struct dri_extension_function GL_EXT_point_parameters_functions[] = { - { PointParameterfEXT_names, PointParameterfEXT_remap_index, 458 }, - { PointParameterfvEXT_names, PointParameterfvEXT_remap_index, 459 }, + { PointParameterfEXT_names, PointParameterfEXT_remap_index, -1 }, + { PointParameterfvEXT_names, PointParameterfvEXT_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_EXT_polygon_offset) static const struct dri_extension_function GL_EXT_polygon_offset_functions[] = { - { PolygonOffsetEXT_names, PolygonOffsetEXT_remap_index, 414 }, + { PolygonOffsetEXT_names, PolygonOffsetEXT_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_EXT_secondary_color) static const struct dri_extension_function GL_EXT_secondary_color_functions[] = { - { SecondaryColor3iEXT_names, SecondaryColor3iEXT_remap_index, 567 }, - { SecondaryColor3bEXT_names, SecondaryColor3bEXT_remap_index, 561 }, - { SecondaryColor3bvEXT_names, SecondaryColor3bvEXT_remap_index, 562 }, - { SecondaryColor3sEXT_names, SecondaryColor3sEXT_remap_index, 569 }, - { SecondaryColor3dEXT_names, SecondaryColor3dEXT_remap_index, 563 }, - { SecondaryColorPointerEXT_names, SecondaryColorPointerEXT_remap_index, 577 }, - { SecondaryColor3uiEXT_names, SecondaryColor3uiEXT_remap_index, 573 }, - { SecondaryColor3usvEXT_names, SecondaryColor3usvEXT_remap_index, 576 }, - { SecondaryColor3ivEXT_names, SecondaryColor3ivEXT_remap_index, 568 }, - { SecondaryColor3fvEXT_names, SecondaryColor3fvEXT_remap_index, 566 }, - { SecondaryColor3ubvEXT_names, SecondaryColor3ubvEXT_remap_index, 572 }, - { SecondaryColor3uivEXT_names, SecondaryColor3uivEXT_remap_index, 574 }, - { SecondaryColor3dvEXT_names, SecondaryColor3dvEXT_remap_index, 564 }, - { SecondaryColor3usEXT_names, SecondaryColor3usEXT_remap_index, 575 }, - { SecondaryColor3ubEXT_names, SecondaryColor3ubEXT_remap_index, 571 }, - { SecondaryColor3fEXT_names, SecondaryColor3fEXT_remap_index, 565 }, - { SecondaryColor3svEXT_names, SecondaryColor3svEXT_remap_index, 570 }, + { SecondaryColor3iEXT_names, SecondaryColor3iEXT_remap_index, -1 }, + { SecondaryColor3bEXT_names, SecondaryColor3bEXT_remap_index, -1 }, + { SecondaryColor3bvEXT_names, SecondaryColor3bvEXT_remap_index, -1 }, + { SecondaryColor3sEXT_names, SecondaryColor3sEXT_remap_index, -1 }, + { SecondaryColor3dEXT_names, SecondaryColor3dEXT_remap_index, -1 }, + { SecondaryColorPointerEXT_names, SecondaryColorPointerEXT_remap_index, -1 }, + { SecondaryColor3uiEXT_names, SecondaryColor3uiEXT_remap_index, -1 }, + { SecondaryColor3usvEXT_names, SecondaryColor3usvEXT_remap_index, -1 }, + { SecondaryColor3ivEXT_names, SecondaryColor3ivEXT_remap_index, -1 }, + { SecondaryColor3fvEXT_names, SecondaryColor3fvEXT_remap_index, -1 }, + { SecondaryColor3ubvEXT_names, SecondaryColor3ubvEXT_remap_index, -1 }, + { SecondaryColor3uivEXT_names, SecondaryColor3uivEXT_remap_index, -1 }, + { SecondaryColor3dvEXT_names, SecondaryColor3dvEXT_remap_index, -1 }, + { SecondaryColor3usEXT_names, SecondaryColor3usEXT_remap_index, -1 }, + { SecondaryColor3ubEXT_names, SecondaryColor3ubEXT_remap_index, -1 }, + { SecondaryColor3fEXT_names, SecondaryColor3fEXT_remap_index, -1 }, + { SecondaryColor3svEXT_names, SecondaryColor3svEXT_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_EXT_stencil_two_side) static const struct dri_extension_function GL_EXT_stencil_two_side_functions[] = { - { ActiveStencilFaceEXT_names, ActiveStencilFaceEXT_remap_index, 646 }, + { ActiveStencilFaceEXT_names, ActiveStencilFaceEXT_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -5192,10 +5264,10 @@ static const struct dri_extension_function GL_EXT_texture3D_functions[] = { static const struct dri_extension_function GL_EXT_texture_object_functions[] = { { PrioritizeTextures_names, -1, 331 }, { DeleteTextures_names, -1, 327 }, - { GenTexturesEXT_names, GenTexturesEXT_remap_index, 440 }, - { AreTexturesResidentEXT_names, AreTexturesResidentEXT_remap_index, 439 }, + { GenTexturesEXT_names, GenTexturesEXT_remap_index, -1 }, + { AreTexturesResidentEXT_names, AreTexturesResidentEXT_remap_index, -1 }, { BindTexture_names, -1, 307 }, - { IsTextureEXT_names, IsTextureEXT_remap_index, 441 }, + { IsTextureEXT_names, IsTextureEXT_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -5209,21 +5281,21 @@ static const struct dri_extension_function GL_EXT_texture_perturb_normal_functio #if defined(need_GL_EXT_timer_query) static const struct dri_extension_function GL_EXT_timer_query_functions[] = { - { GetQueryObjectui64vEXT_names, GetQueryObjectui64vEXT_remap_index, 817 }, - { GetQueryObjecti64vEXT_names, GetQueryObjecti64vEXT_remap_index, 816 }, + { GetQueryObjectui64vEXT_names, GetQueryObjectui64vEXT_remap_index, -1 }, + { GetQueryObjecti64vEXT_names, GetQueryObjecti64vEXT_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_EXT_vertex_array) static const struct dri_extension_function GL_EXT_vertex_array_functions[] = { - { IndexPointerEXT_names, IndexPointerEXT_remap_index, 450 }, - { NormalPointerEXT_names, NormalPointerEXT_remap_index, 451 }, - { VertexPointerEXT_names, VertexPointerEXT_remap_index, 453 }, - { TexCoordPointerEXT_names, TexCoordPointerEXT_remap_index, 452 }, - { EdgeFlagPointerEXT_names, EdgeFlagPointerEXT_remap_index, 449 }, + { IndexPointerEXT_names, IndexPointerEXT_remap_index, -1 }, + { NormalPointerEXT_names, NormalPointerEXT_remap_index, -1 }, + { VertexPointerEXT_names, VertexPointerEXT_remap_index, -1 }, + { TexCoordPointerEXT_names, TexCoordPointerEXT_remap_index, -1 }, + { EdgeFlagPointerEXT_names, EdgeFlagPointerEXT_remap_index, -1 }, { ArrayElement_names, -1, 306 }, - { ColorPointerEXT_names, ColorPointerEXT_remap_index, 448 }, + { ColorPointerEXT_names, ColorPointerEXT_remap_index, -1 }, { GetPointerv_names, -1, 329 }, { DrawArrays_names, -1, 310 }, { NULL, 0, 0 } @@ -5232,9 +5304,9 @@ static const struct dri_extension_function GL_EXT_vertex_array_functions[] = { #if defined(need_GL_EXT_vertex_weighting) static const struct dri_extension_function GL_EXT_vertex_weighting_functions[] = { - { VertexWeightfvEXT_names, VertexWeightfvEXT_remap_index, 495 }, - { VertexWeightfEXT_names, VertexWeightfEXT_remap_index, 494 }, - { VertexWeightPointerEXT_names, VertexWeightPointerEXT_remap_index, 496 }, + { VertexWeightfvEXT_names, VertexWeightfvEXT_remap_index, -1 }, + { VertexWeightfEXT_names, VertexWeightfEXT_remap_index, -1 }, + { VertexWeightPointerEXT_names, VertexWeightPointerEXT_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -5253,8 +5325,8 @@ static const struct dri_extension_function GL_HP_image_transform_functions[] = { #if defined(need_GL_IBM_multimode_draw_arrays) static const struct dri_extension_function GL_IBM_multimode_draw_arrays_functions[] = { - { MultiModeDrawArraysIBM_names, MultiModeDrawArraysIBM_remap_index, 708 }, - { MultiModeDrawElementsIBM_names, MultiModeDrawElementsIBM_remap_index, 709 }, + { MultiModeDrawArraysIBM_names, MultiModeDrawArraysIBM_remap_index, -1 }, + { MultiModeDrawElementsIBM_names, MultiModeDrawElementsIBM_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -5275,7 +5347,7 @@ static const struct dri_extension_function GL_IBM_vertex_array_lists_functions[] #if defined(need_GL_INGR_blend_func_separate) static const struct dri_extension_function GL_INGR_blend_func_separate_functions[] = { - { BlendFuncSeparateEXT_names, BlendFuncSeparateEXT_remap_index, 537 }, + { BlendFuncSeparateEXT_names, BlendFuncSeparateEXT_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -5292,37 +5364,47 @@ static const struct dri_extension_function GL_INTEL_parallel_arrays_functions[] #if defined(need_GL_MESA_resize_buffers) static const struct dri_extension_function GL_MESA_resize_buffers_functions[] = { - { ResizeBuffersMESA_names, ResizeBuffersMESA_remap_index, 512 }, + { ResizeBuffersMESA_names, ResizeBuffersMESA_remap_index, -1 }, + { NULL, 0, 0 } +}; +#endif + +#if defined(need_GL_MESA_shader_debug) +static const struct dri_extension_function GL_MESA_shader_debug_functions[] = { + { GetDebugLogLengthMESA_names, GetDebugLogLengthMESA_remap_index, -1 }, + { ClearDebugLogMESA_names, ClearDebugLogMESA_remap_index, -1 }, + { GetDebugLogMESA_names, GetDebugLogMESA_remap_index, -1 }, + { CreateDebugObjectMESA_names, CreateDebugObjectMESA_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_MESA_window_pos) static const struct dri_extension_function GL_MESA_window_pos_functions[] = { - { WindowPos3fMESA_names, WindowPos3fMESA_remap_index, 523 }, - { WindowPos2dvMESA_names, WindowPos2dvMESA_remap_index, 514 }, - { WindowPos4svMESA_names, WindowPos4svMESA_remap_index, 536 }, - { WindowPos2svMESA_names, WindowPos2svMESA_remap_index, 520 }, - { WindowPos3dMESA_names, WindowPos3dMESA_remap_index, 521 }, - { WindowPos2fvMESA_names, WindowPos2fvMESA_remap_index, 516 }, - { WindowPos4dMESA_names, WindowPos4dMESA_remap_index, 529 }, - { WindowPos2dMESA_names, WindowPos2dMESA_remap_index, 513 }, - { WindowPos4ivMESA_names, WindowPos4ivMESA_remap_index, 534 }, - { WindowPos4fMESA_names, WindowPos4fMESA_remap_index, 531 }, - { WindowPos3dvMESA_names, WindowPos3dvMESA_remap_index, 522 }, - { WindowPos3fvMESA_names, WindowPos3fvMESA_remap_index, 524 }, - { WindowPos4dvMESA_names, WindowPos4dvMESA_remap_index, 530 }, - { WindowPos2iMESA_names, WindowPos2iMESA_remap_index, 517 }, - { WindowPos3sMESA_names, WindowPos3sMESA_remap_index, 527 }, - { WindowPos4sMESA_names, WindowPos4sMESA_remap_index, 535 }, - { WindowPos2ivMESA_names, WindowPos2ivMESA_remap_index, 518 }, - { WindowPos2sMESA_names, WindowPos2sMESA_remap_index, 519 }, - { WindowPos3iMESA_names, WindowPos3iMESA_remap_index, 525 }, - { WindowPos3ivMESA_names, WindowPos3ivMESA_remap_index, 526 }, - { WindowPos4iMESA_names, WindowPos4iMESA_remap_index, 533 }, - { WindowPos4fvMESA_names, WindowPos4fvMESA_remap_index, 532 }, - { WindowPos3svMESA_names, WindowPos3svMESA_remap_index, 528 }, - { WindowPos2fMESA_names, WindowPos2fMESA_remap_index, 515 }, + { WindowPos3fMESA_names, WindowPos3fMESA_remap_index, -1 }, + { WindowPos2dvMESA_names, WindowPos2dvMESA_remap_index, -1 }, + { WindowPos4svMESA_names, WindowPos4svMESA_remap_index, -1 }, + { WindowPos2svMESA_names, WindowPos2svMESA_remap_index, -1 }, + { WindowPos3dMESA_names, WindowPos3dMESA_remap_index, -1 }, + { WindowPos2fvMESA_names, WindowPos2fvMESA_remap_index, -1 }, + { WindowPos4dMESA_names, WindowPos4dMESA_remap_index, -1 }, + { WindowPos2dMESA_names, WindowPos2dMESA_remap_index, -1 }, + { WindowPos4ivMESA_names, WindowPos4ivMESA_remap_index, -1 }, + { WindowPos4fMESA_names, WindowPos4fMESA_remap_index, -1 }, + { WindowPos3dvMESA_names, WindowPos3dvMESA_remap_index, -1 }, + { WindowPos3fvMESA_names, WindowPos3fvMESA_remap_index, -1 }, + { WindowPos4dvMESA_names, WindowPos4dvMESA_remap_index, -1 }, + { WindowPos2iMESA_names, WindowPos2iMESA_remap_index, -1 }, + { WindowPos3sMESA_names, WindowPos3sMESA_remap_index, -1 }, + { WindowPos4sMESA_names, WindowPos4sMESA_remap_index, -1 }, + { WindowPos2ivMESA_names, WindowPos2ivMESA_remap_index, -1 }, + { WindowPos2sMESA_names, WindowPos2sMESA_remap_index, -1 }, + { WindowPos3iMESA_names, WindowPos3iMESA_remap_index, -1 }, + { WindowPos3ivMESA_names, WindowPos3ivMESA_remap_index, -1 }, + { WindowPos4iMESA_names, WindowPos4iMESA_remap_index, -1 }, + { WindowPos4fvMESA_names, WindowPos4fvMESA_remap_index, -1 }, + { WindowPos3svMESA_names, WindowPos3svMESA_remap_index, -1 }, + { WindowPos2fMESA_names, WindowPos2fMESA_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -5344,52 +5426,52 @@ static const struct dri_extension_function GL_NV_evaluators_functions[] = { #if defined(need_GL_NV_fence) static const struct dri_extension_function GL_NV_fence_functions[] = { - { GenFencesNV_names, GenFencesNV_remap_index, 648 }, - { TestFenceNV_names, TestFenceNV_remap_index, 650 }, - { IsFenceNV_names, IsFenceNV_remap_index, 649 }, - { DeleteFencesNV_names, DeleteFencesNV_remap_index, 647 }, - { SetFenceNV_names, SetFenceNV_remap_index, 653 }, - { GetFenceivNV_names, GetFenceivNV_remap_index, 651 }, - { FinishFenceNV_names, FinishFenceNV_remap_index, 652 }, + { GenFencesNV_names, GenFencesNV_remap_index, -1 }, + { TestFenceNV_names, TestFenceNV_remap_index, -1 }, + { IsFenceNV_names, IsFenceNV_remap_index, -1 }, + { DeleteFencesNV_names, DeleteFencesNV_remap_index, -1 }, + { SetFenceNV_names, SetFenceNV_remap_index, -1 }, + { GetFenceivNV_names, GetFenceivNV_remap_index, -1 }, + { FinishFenceNV_names, FinishFenceNV_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_NV_fragment_program) static const struct dri_extension_function GL_NV_fragment_program_functions[] = { - { GetProgramNamedParameterdvNV_names, GetProgramNamedParameterdvNV_remap_index, 687 }, - { GetProgramNamedParameterfvNV_names, GetProgramNamedParameterfvNV_remap_index, 686 }, - { ProgramNamedParameter4fNV_names, ProgramNamedParameter4fNV_remap_index, 682 }, - { ProgramNamedParameter4fvNV_names, ProgramNamedParameter4fvNV_remap_index, 684 }, - { ProgramNamedParameter4dvNV_names, ProgramNamedParameter4dvNV_remap_index, 685 }, - { ProgramNamedParameter4dNV_names, ProgramNamedParameter4dNV_remap_index, 683 }, + { GetProgramNamedParameterdvNV_names, GetProgramNamedParameterdvNV_remap_index, -1 }, + { GetProgramNamedParameterfvNV_names, GetProgramNamedParameterfvNV_remap_index, -1 }, + { ProgramNamedParameter4fNV_names, ProgramNamedParameter4fNV_remap_index, -1 }, + { ProgramNamedParameter4fvNV_names, ProgramNamedParameter4fvNV_remap_index, -1 }, + { ProgramNamedParameter4dvNV_names, ProgramNamedParameter4dvNV_remap_index, -1 }, + { ProgramNamedParameter4dNV_names, ProgramNamedParameter4dNV_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_NV_point_sprite) static const struct dri_extension_function GL_NV_point_sprite_functions[] = { - { PointParameteriNV_names, PointParameteriNV_remap_index, 642 }, - { PointParameterivNV_names, PointParameterivNV_remap_index, 643 }, + { PointParameteriNV_names, PointParameteriNV_remap_index, -1 }, + { PointParameterivNV_names, PointParameterivNV_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_NV_register_combiners) static const struct dri_extension_function GL_NV_register_combiners_functions[] = { - { CombinerParameterfvNV_names, CombinerParameterfvNV_remap_index, 499 }, - { GetCombinerOutputParameterfvNV_names, GetCombinerOutputParameterfvNV_remap_index, 508 }, - { FinalCombinerInputNV_names, FinalCombinerInputNV_remap_index, 505 }, - { GetCombinerInputParameterfvNV_names, GetCombinerInputParameterfvNV_remap_index, 506 }, - { GetCombinerOutputParameterivNV_names, GetCombinerOutputParameterivNV_remap_index, 509 }, - { CombinerOutputNV_names, CombinerOutputNV_remap_index, 504 }, - { CombinerParameteriNV_names, CombinerParameteriNV_remap_index, 502 }, - { GetFinalCombinerInputParameterivNV_names, GetFinalCombinerInputParameterivNV_remap_index, 511 }, - { CombinerInputNV_names, CombinerInputNV_remap_index, 503 }, - { CombinerParameterfNV_names, CombinerParameterfNV_remap_index, 500 }, - { GetFinalCombinerInputParameterfvNV_names, GetFinalCombinerInputParameterfvNV_remap_index, 510 }, - { GetCombinerInputParameterivNV_names, GetCombinerInputParameterivNV_remap_index, 507 }, - { CombinerParameterivNV_names, CombinerParameterivNV_remap_index, 501 }, + { CombinerParameterfvNV_names, CombinerParameterfvNV_remap_index, -1 }, + { GetCombinerOutputParameterfvNV_names, GetCombinerOutputParameterfvNV_remap_index, -1 }, + { FinalCombinerInputNV_names, FinalCombinerInputNV_remap_index, -1 }, + { GetCombinerInputParameterfvNV_names, GetCombinerInputParameterfvNV_remap_index, -1 }, + { GetCombinerOutputParameterivNV_names, GetCombinerOutputParameterivNV_remap_index, -1 }, + { CombinerOutputNV_names, CombinerOutputNV_remap_index, -1 }, + { CombinerParameteriNV_names, CombinerParameteriNV_remap_index, -1 }, + { GetFinalCombinerInputParameterivNV_names, GetFinalCombinerInputParameterivNV_remap_index, -1 }, + { CombinerInputNV_names, CombinerInputNV_remap_index, -1 }, + { CombinerParameterfNV_names, CombinerParameterfNV_remap_index, -1 }, + { GetFinalCombinerInputParameterfvNV_names, GetFinalCombinerInputParameterfvNV_remap_index, -1 }, + { GetCombinerInputParameterivNV_names, GetCombinerInputParameterivNV_remap_index, -1 }, + { CombinerParameterivNV_names, CombinerParameterivNV_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -5404,93 +5486,93 @@ static const struct dri_extension_function GL_NV_register_combiners2_functions[] #if defined(need_GL_NV_vertex_array_range) static const struct dri_extension_function GL_NV_vertex_array_range_functions[] = { - { FlushVertexArrayRangeNV_names, FlushVertexArrayRangeNV_remap_index, 497 }, - { VertexArrayRangeNV_names, VertexArrayRangeNV_remap_index, 498 }, + { FlushVertexArrayRangeNV_names, FlushVertexArrayRangeNV_remap_index, -1 }, + { VertexArrayRangeNV_names, VertexArrayRangeNV_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_NV_vertex_program) static const struct dri_extension_function GL_NV_vertex_program_functions[] = { - { ProgramParameter4fNV_names, ProgramParameter4fNV_remap_index, 596 }, - { VertexAttrib4ubvNV_names, VertexAttrib4ubvNV_remap_index, 781 }, - { VertexAttrib4svNV_names, VertexAttrib4svNV_remap_index, 779 }, - { VertexAttribs1dvNV_names, VertexAttribs1dvNV_remap_index, 629 }, - { ProgramParameter4dvNV_names, ProgramParameter4dvNV_remap_index, 595 }, - { VertexAttrib4fNV_names, VertexAttrib4fNV_remap_index, 776 }, - { VertexAttrib2dNV_names, VertexAttrib2dNV_remap_index, 762 }, - { VertexAttrib4ubNV_names, VertexAttrib4ubNV_remap_index, 780 }, - { VertexAttribs3dvNV_names, VertexAttribs3dvNV_remap_index, 635 }, - { VertexAttribs4fvNV_names, VertexAttribs4fvNV_remap_index, 639 }, - { VertexAttrib2sNV_names, VertexAttrib2sNV_remap_index, 766 }, - { VertexAttribs3fvNV_names, VertexAttribs3fvNV_remap_index, 636 }, - { ProgramParameter4dNV_names, ProgramParameter4dNV_remap_index, 594 }, - { LoadProgramNV_names, LoadProgramNV_remap_index, 593 }, - { VertexAttrib4fvNV_names, VertexAttrib4fvNV_remap_index, 777 }, - { VertexAttrib3fNV_names, VertexAttrib3fNV_remap_index, 770 }, - { VertexAttribs2dvNV_names, VertexAttribs2dvNV_remap_index, 632 }, - { GetProgramParameterfvNV_names, GetProgramParameterfvNV_remap_index, 584 }, - { VertexAttrib3dNV_names, VertexAttrib3dNV_remap_index, 768 }, - { VertexAttrib2fvNV_names, VertexAttrib2fvNV_remap_index, 765 }, - { VertexAttrib2dvNV_names, VertexAttrib2dvNV_remap_index, 763 }, - { VertexAttrib1dvNV_names, VertexAttrib1dvNV_remap_index, 757 }, - { ProgramParameter4fvNV_names, ProgramParameter4fvNV_remap_index, 597 }, - { VertexAttrib1svNV_names, VertexAttrib1svNV_remap_index, 761 }, - { VertexAttribs2svNV_names, VertexAttribs2svNV_remap_index, 634 }, - { GetVertexAttribivNV_names, GetVertexAttribivNV_remap_index, 755 }, - { GetVertexAttribfvNV_names, GetVertexAttribfvNV_remap_index, 754 }, - { VertexAttrib2svNV_names, VertexAttrib2svNV_remap_index, 767 }, - { VertexAttribs1fvNV_names, VertexAttribs1fvNV_remap_index, 630 }, - { IsProgramNV_names, IsProgramNV_remap_index, 592 }, - { VertexAttrib4sNV_names, VertexAttrib4sNV_remap_index, 778 }, - { VertexAttrib2fNV_names, VertexAttrib2fNV_remap_index, 764 }, - { RequestResidentProgramsNV_names, RequestResidentProgramsNV_remap_index, 600 }, - { ExecuteProgramNV_names, ExecuteProgramNV_remap_index, 581 }, - { VertexAttribPointerNV_names, VertexAttribPointerNV_remap_index, 602 }, - { TrackMatrixNV_names, TrackMatrixNV_remap_index, 601 }, - { GetProgramParameterdvNV_names, GetProgramParameterdvNV_remap_index, 583 }, - { VertexAttrib3sNV_names, VertexAttrib3sNV_remap_index, 772 }, - { GetTrackMatrixivNV_names, GetTrackMatrixivNV_remap_index, 587 }, - { VertexAttrib3svNV_names, VertexAttrib3svNV_remap_index, 773 }, - { ProgramParameters4fvNV_names, ProgramParameters4fvNV_remap_index, 599 }, - { GetProgramivNV_names, GetProgramivNV_remap_index, 585 }, - { GetVertexAttribdvNV_names, GetVertexAttribdvNV_remap_index, 753 }, - { VertexAttrib3fvNV_names, VertexAttrib3fvNV_remap_index, 771 }, - { VertexAttribs2fvNV_names, VertexAttribs2fvNV_remap_index, 633 }, - { VertexAttrib1fvNV_names, VertexAttrib1fvNV_remap_index, 759 }, - { DeleteProgramsNV_names, DeleteProgramsNV_remap_index, 580 }, - { GetVertexAttribPointervNV_names, GetVertexAttribPointervNV_remap_index, 591 }, - { GetProgramStringNV_names, GetProgramStringNV_remap_index, 586 }, - { VertexAttribs4dvNV_names, VertexAttribs4dvNV_remap_index, 638 }, - { ProgramParameters4dvNV_names, ProgramParameters4dvNV_remap_index, 598 }, - { VertexAttrib1fNV_names, VertexAttrib1fNV_remap_index, 758 }, - { VertexAttrib4dNV_names, VertexAttrib4dNV_remap_index, 774 }, - { VertexAttribs4ubvNV_names, VertexAttribs4ubvNV_remap_index, 641 }, - { VertexAttribs3svNV_names, VertexAttribs3svNV_remap_index, 637 }, - { VertexAttrib1sNV_names, VertexAttrib1sNV_remap_index, 760 }, - { BindProgramNV_names, BindProgramNV_remap_index, 579 }, - { AreProgramsResidentNV_names, AreProgramsResidentNV_remap_index, 578 }, - { VertexAttrib3dvNV_names, VertexAttrib3dvNV_remap_index, 769 }, - { VertexAttrib1dNV_names, VertexAttrib1dNV_remap_index, 756 }, - { VertexAttribs4svNV_names, VertexAttribs4svNV_remap_index, 640 }, - { VertexAttribs1svNV_names, VertexAttribs1svNV_remap_index, 631 }, - { GenProgramsNV_names, GenProgramsNV_remap_index, 582 }, - { VertexAttrib4dvNV_names, VertexAttrib4dvNV_remap_index, 775 }, + { ProgramParameter4fNV_names, ProgramParameter4fNV_remap_index, -1 }, + { VertexAttrib4ubvNV_names, VertexAttrib4ubvNV_remap_index, -1 }, + { VertexAttrib4svNV_names, VertexAttrib4svNV_remap_index, -1 }, + { VertexAttribs1dvNV_names, VertexAttribs1dvNV_remap_index, -1 }, + { ProgramParameter4dvNV_names, ProgramParameter4dvNV_remap_index, -1 }, + { VertexAttrib4fNV_names, VertexAttrib4fNV_remap_index, -1 }, + { VertexAttrib2dNV_names, VertexAttrib2dNV_remap_index, -1 }, + { VertexAttrib4ubNV_names, VertexAttrib4ubNV_remap_index, -1 }, + { VertexAttribs3dvNV_names, VertexAttribs3dvNV_remap_index, -1 }, + { VertexAttribs4fvNV_names, VertexAttribs4fvNV_remap_index, -1 }, + { VertexAttrib2sNV_names, VertexAttrib2sNV_remap_index, -1 }, + { VertexAttribs3fvNV_names, VertexAttribs3fvNV_remap_index, -1 }, + { ProgramParameter4dNV_names, ProgramParameter4dNV_remap_index, -1 }, + { LoadProgramNV_names, LoadProgramNV_remap_index, -1 }, + { VertexAttrib4fvNV_names, VertexAttrib4fvNV_remap_index, -1 }, + { VertexAttrib3fNV_names, VertexAttrib3fNV_remap_index, -1 }, + { VertexAttribs2dvNV_names, VertexAttribs2dvNV_remap_index, -1 }, + { GetProgramParameterfvNV_names, GetProgramParameterfvNV_remap_index, -1 }, + { VertexAttrib3dNV_names, VertexAttrib3dNV_remap_index, -1 }, + { VertexAttrib2fvNV_names, VertexAttrib2fvNV_remap_index, -1 }, + { VertexAttrib2dvNV_names, VertexAttrib2dvNV_remap_index, -1 }, + { VertexAttrib1dvNV_names, VertexAttrib1dvNV_remap_index, -1 }, + { ProgramParameter4fvNV_names, ProgramParameter4fvNV_remap_index, -1 }, + { VertexAttrib1svNV_names, VertexAttrib1svNV_remap_index, -1 }, + { VertexAttribs2svNV_names, VertexAttribs2svNV_remap_index, -1 }, + { GetVertexAttribivNV_names, GetVertexAttribivNV_remap_index, -1 }, + { GetVertexAttribfvNV_names, GetVertexAttribfvNV_remap_index, -1 }, + { VertexAttrib2svNV_names, VertexAttrib2svNV_remap_index, -1 }, + { VertexAttribs1fvNV_names, VertexAttribs1fvNV_remap_index, -1 }, + { IsProgramNV_names, IsProgramNV_remap_index, -1 }, + { VertexAttrib4sNV_names, VertexAttrib4sNV_remap_index, -1 }, + { VertexAttrib2fNV_names, VertexAttrib2fNV_remap_index, -1 }, + { RequestResidentProgramsNV_names, RequestResidentProgramsNV_remap_index, -1 }, + { ExecuteProgramNV_names, ExecuteProgramNV_remap_index, -1 }, + { VertexAttribPointerNV_names, VertexAttribPointerNV_remap_index, -1 }, + { TrackMatrixNV_names, TrackMatrixNV_remap_index, -1 }, + { GetProgramParameterdvNV_names, GetProgramParameterdvNV_remap_index, -1 }, + { VertexAttrib3sNV_names, VertexAttrib3sNV_remap_index, -1 }, + { GetTrackMatrixivNV_names, GetTrackMatrixivNV_remap_index, -1 }, + { VertexAttrib3svNV_names, VertexAttrib3svNV_remap_index, -1 }, + { ProgramParameters4fvNV_names, ProgramParameters4fvNV_remap_index, -1 }, + { GetProgramivNV_names, GetProgramivNV_remap_index, -1 }, + { GetVertexAttribdvNV_names, GetVertexAttribdvNV_remap_index, -1 }, + { VertexAttrib3fvNV_names, VertexAttrib3fvNV_remap_index, -1 }, + { VertexAttribs2fvNV_names, VertexAttribs2fvNV_remap_index, -1 }, + { VertexAttrib1fvNV_names, VertexAttrib1fvNV_remap_index, -1 }, + { DeleteProgramsNV_names, DeleteProgramsNV_remap_index, -1 }, + { GetVertexAttribPointervNV_names, GetVertexAttribPointervNV_remap_index, -1 }, + { GetProgramStringNV_names, GetProgramStringNV_remap_index, -1 }, + { VertexAttribs4dvNV_names, VertexAttribs4dvNV_remap_index, -1 }, + { ProgramParameters4dvNV_names, ProgramParameters4dvNV_remap_index, -1 }, + { VertexAttrib1fNV_names, VertexAttrib1fNV_remap_index, -1 }, + { VertexAttrib4dNV_names, VertexAttrib4dNV_remap_index, -1 }, + { VertexAttribs4ubvNV_names, VertexAttribs4ubvNV_remap_index, -1 }, + { VertexAttribs3svNV_names, VertexAttribs3svNV_remap_index, -1 }, + { VertexAttrib1sNV_names, VertexAttrib1sNV_remap_index, -1 }, + { BindProgramNV_names, BindProgramNV_remap_index, -1 }, + { AreProgramsResidentNV_names, AreProgramsResidentNV_remap_index, -1 }, + { VertexAttrib3dvNV_names, VertexAttrib3dvNV_remap_index, -1 }, + { VertexAttrib1dNV_names, VertexAttrib1dNV_remap_index, -1 }, + { VertexAttribs4svNV_names, VertexAttribs4svNV_remap_index, -1 }, + { VertexAttribs1svNV_names, VertexAttribs1svNV_remap_index, -1 }, + { GenProgramsNV_names, GenProgramsNV_remap_index, -1 }, + { VertexAttrib4dvNV_names, VertexAttrib4dvNV_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_PGI_misc_hints) static const struct dri_extension_function GL_PGI_misc_hints_functions[] = { - { HintPGI_names, HintPGI_remap_index, 544 }, + { HintPGI_names, HintPGI_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_SGIS_detail_texture) static const struct dri_extension_function GL_SGIS_detail_texture_functions[] = { - { GetDetailTexFuncSGIS_names, GetDetailTexFuncSGIS_remap_index, 443 }, - { DetailTexFuncSGIS_names, DetailTexFuncSGIS_remap_index, 442 }, + { GetDetailTexFuncSGIS_names, GetDetailTexFuncSGIS_remap_index, -1 }, + { DetailTexFuncSGIS_names, DetailTexFuncSGIS_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -5505,44 +5587,44 @@ static const struct dri_extension_function GL_SGIS_fog_function_functions[] = { #if defined(need_GL_SGIS_multisample) static const struct dri_extension_function GL_SGIS_multisample_functions[] = { - { SampleMaskSGIS_names, SampleMaskSGIS_remap_index, 446 }, - { SamplePatternSGIS_names, SamplePatternSGIS_remap_index, 447 }, + { SampleMaskSGIS_names, SampleMaskSGIS_remap_index, -1 }, + { SamplePatternSGIS_names, SamplePatternSGIS_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_SGIS_pixel_texture) static const struct dri_extension_function GL_SGIS_pixel_texture_functions[] = { - { PixelTexGenParameterfvSGIS_names, PixelTexGenParameterfvSGIS_remap_index, 434 }, - { GetPixelTexGenParameterivSGIS_names, GetPixelTexGenParameterivSGIS_remap_index, 435 }, - { PixelTexGenParameteriSGIS_names, PixelTexGenParameteriSGIS_remap_index, 431 }, - { PixelTexGenParameterivSGIS_names, PixelTexGenParameterivSGIS_remap_index, 432 }, - { PixelTexGenParameterfSGIS_names, PixelTexGenParameterfSGIS_remap_index, 433 }, - { GetPixelTexGenParameterfvSGIS_names, GetPixelTexGenParameterfvSGIS_remap_index, 436 }, + { PixelTexGenParameterfvSGIS_names, PixelTexGenParameterfvSGIS_remap_index, -1 }, + { GetPixelTexGenParameterivSGIS_names, GetPixelTexGenParameterivSGIS_remap_index, -1 }, + { PixelTexGenParameteriSGIS_names, PixelTexGenParameteriSGIS_remap_index, -1 }, + { PixelTexGenParameterivSGIS_names, PixelTexGenParameterivSGIS_remap_index, -1 }, + { PixelTexGenParameterfSGIS_names, PixelTexGenParameterfSGIS_remap_index, -1 }, + { GetPixelTexGenParameterfvSGIS_names, GetPixelTexGenParameterfvSGIS_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_SGIS_point_parameters) static const struct dri_extension_function GL_SGIS_point_parameters_functions[] = { - { PointParameterfEXT_names, PointParameterfEXT_remap_index, 458 }, - { PointParameterfvEXT_names, PointParameterfvEXT_remap_index, 459 }, + { PointParameterfEXT_names, PointParameterfEXT_remap_index, -1 }, + { PointParameterfvEXT_names, PointParameterfvEXT_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_SGIS_sharpen_texture) static const struct dri_extension_function GL_SGIS_sharpen_texture_functions[] = { - { GetSharpenTexFuncSGIS_names, GetSharpenTexFuncSGIS_remap_index, 445 }, - { SharpenTexFuncSGIS_names, SharpenTexFuncSGIS_remap_index, 444 }, + { GetSharpenTexFuncSGIS_names, GetSharpenTexFuncSGIS_remap_index, -1 }, + { SharpenTexFuncSGIS_names, SharpenTexFuncSGIS_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_SGIS_texture4D) static const struct dri_extension_function GL_SGIS_texture4D_functions[] = { - { TexImage4DSGIS_names, TexImage4DSGIS_remap_index, 437 }, - { TexSubImage4DSGIS_names, TexSubImage4DSGIS_remap_index, 438 }, + { TexImage4DSGIS_names, TexImage4DSGIS_remap_index, -1 }, + { TexSubImage4DSGIS_names, TexSubImage4DSGIS_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -5556,8 +5638,8 @@ static const struct dri_extension_function GL_SGIS_texture_color_mask_functions[ #if defined(need_GL_SGIS_texture_filter4) static const struct dri_extension_function GL_SGIS_texture_filter4_functions[] = { - { GetTexFilterFuncSGIS_names, GetTexFilterFuncSGIS_remap_index, 415 }, - { TexFilterFuncSGIS_names, TexFilterFuncSGIS_remap_index, 416 }, + { GetTexFilterFuncSGIS_names, GetTexFilterFuncSGIS_remap_index, -1 }, + { TexFilterFuncSGIS_names, TexFilterFuncSGIS_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -5576,38 +5658,38 @@ static const struct dri_extension_function GL_SGIX_async_functions[] = { #if defined(need_GL_SGIX_flush_raster) static const struct dri_extension_function GL_SGIX_flush_raster_functions[] = { - { FlushRasterSGIX_names, FlushRasterSGIX_remap_index, 469 }, + { FlushRasterSGIX_names, FlushRasterSGIX_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_SGIX_fragment_lighting) static const struct dri_extension_function GL_SGIX_fragment_lighting_functions[] = { - { FragmentMaterialfvSGIX_names, FragmentMaterialfvSGIX_remap_index, 486 }, - { FragmentLightModelivSGIX_names, FragmentLightModelivSGIX_remap_index, 484 }, - { FragmentLightiSGIX_names, FragmentLightiSGIX_remap_index, 479 }, - { GetFragmentMaterialfvSGIX_names, GetFragmentMaterialfvSGIX_remap_index, 491 }, - { FragmentMaterialfSGIX_names, FragmentMaterialfSGIX_remap_index, 485 }, - { GetFragmentLightivSGIX_names, GetFragmentLightivSGIX_remap_index, 490 }, - { FragmentLightModeliSGIX_names, FragmentLightModeliSGIX_remap_index, 483 }, - { FragmentLightivSGIX_names, FragmentLightivSGIX_remap_index, 480 }, - { GetFragmentMaterialivSGIX_names, GetFragmentMaterialivSGIX_remap_index, 492 }, - { FragmentLightModelfSGIX_names, FragmentLightModelfSGIX_remap_index, 481 }, - { FragmentColorMaterialSGIX_names, FragmentColorMaterialSGIX_remap_index, 476 }, - { FragmentMaterialiSGIX_names, FragmentMaterialiSGIX_remap_index, 487 }, - { LightEnviSGIX_names, LightEnviSGIX_remap_index, 493 }, - { FragmentLightModelfvSGIX_names, FragmentLightModelfvSGIX_remap_index, 482 }, - { FragmentLightfvSGIX_names, FragmentLightfvSGIX_remap_index, 478 }, - { FragmentLightfSGIX_names, FragmentLightfSGIX_remap_index, 477 }, - { GetFragmentLightfvSGIX_names, GetFragmentLightfvSGIX_remap_index, 489 }, - { FragmentMaterialivSGIX_names, FragmentMaterialivSGIX_remap_index, 488 }, + { FragmentMaterialfvSGIX_names, FragmentMaterialfvSGIX_remap_index, -1 }, + { FragmentLightModelivSGIX_names, FragmentLightModelivSGIX_remap_index, -1 }, + { FragmentLightiSGIX_names, FragmentLightiSGIX_remap_index, -1 }, + { GetFragmentMaterialfvSGIX_names, GetFragmentMaterialfvSGIX_remap_index, -1 }, + { FragmentMaterialfSGIX_names, FragmentMaterialfSGIX_remap_index, -1 }, + { GetFragmentLightivSGIX_names, GetFragmentLightivSGIX_remap_index, -1 }, + { FragmentLightModeliSGIX_names, FragmentLightModeliSGIX_remap_index, -1 }, + { FragmentLightivSGIX_names, FragmentLightivSGIX_remap_index, -1 }, + { GetFragmentMaterialivSGIX_names, GetFragmentMaterialivSGIX_remap_index, -1 }, + { FragmentLightModelfSGIX_names, FragmentLightModelfSGIX_remap_index, -1 }, + { FragmentColorMaterialSGIX_names, FragmentColorMaterialSGIX_remap_index, -1 }, + { FragmentMaterialiSGIX_names, FragmentMaterialiSGIX_remap_index, -1 }, + { LightEnviSGIX_names, LightEnviSGIX_remap_index, -1 }, + { FragmentLightModelfvSGIX_names, FragmentLightModelfvSGIX_remap_index, -1 }, + { FragmentLightfvSGIX_names, FragmentLightfvSGIX_remap_index, -1 }, + { FragmentLightfSGIX_names, FragmentLightfSGIX_remap_index, -1 }, + { GetFragmentLightfvSGIX_names, GetFragmentLightfvSGIX_remap_index, -1 }, + { FragmentMaterialivSGIX_names, FragmentMaterialivSGIX_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_SGIX_framezoom) static const struct dri_extension_function GL_SGIX_framezoom_functions[] = { - { FrameZoomSGIX_names, FrameZoomSGIX_remap_index, 466 }, + { FrameZoomSGIX_names, FrameZoomSGIX_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -5621,31 +5703,31 @@ static const struct dri_extension_function GL_SGIX_igloo_interface_functions[] = #if defined(need_GL_SGIX_instruments) static const struct dri_extension_function GL_SGIX_instruments_functions[] = { - { ReadInstrumentsSGIX_names, ReadInstrumentsSGIX_remap_index, 463 }, - { GetInstrumentsSGIX_names, GetInstrumentsSGIX_remap_index, 460 }, - { StartInstrumentsSGIX_names, StartInstrumentsSGIX_remap_index, 464 }, - { StopInstrumentsSGIX_names, StopInstrumentsSGIX_remap_index, 465 }, - { InstrumentsBufferSGIX_names, InstrumentsBufferSGIX_remap_index, 461 }, - { PollInstrumentsSGIX_names, PollInstrumentsSGIX_remap_index, 462 }, + { ReadInstrumentsSGIX_names, ReadInstrumentsSGIX_remap_index, -1 }, + { GetInstrumentsSGIX_names, GetInstrumentsSGIX_remap_index, -1 }, + { StartInstrumentsSGIX_names, StartInstrumentsSGIX_remap_index, -1 }, + { StopInstrumentsSGIX_names, StopInstrumentsSGIX_remap_index, -1 }, + { InstrumentsBufferSGIX_names, InstrumentsBufferSGIX_remap_index, -1 }, + { PollInstrumentsSGIX_names, PollInstrumentsSGIX_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_SGIX_list_priority) static const struct dri_extension_function GL_SGIX_list_priority_functions[] = { - { ListParameterfSGIX_names, ListParameterfSGIX_remap_index, 472 }, - { GetListParameterivSGIX_names, GetListParameterivSGIX_remap_index, 471 }, - { GetListParameterfvSGIX_names, GetListParameterfvSGIX_remap_index, 470 }, - { ListParameteriSGIX_names, ListParameteriSGIX_remap_index, 474 }, - { ListParameterfvSGIX_names, ListParameterfvSGIX_remap_index, 473 }, - { ListParameterivSGIX_names, ListParameterivSGIX_remap_index, 475 }, + { ListParameterfSGIX_names, ListParameterfSGIX_remap_index, -1 }, + { GetListParameterivSGIX_names, GetListParameterivSGIX_remap_index, -1 }, + { GetListParameterfvSGIX_names, GetListParameterfvSGIX_remap_index, -1 }, + { ListParameteriSGIX_names, ListParameteriSGIX_remap_index, -1 }, + { ListParameterfvSGIX_names, ListParameterfvSGIX_remap_index, -1 }, + { ListParameterivSGIX_names, ListParameterivSGIX_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_SGIX_pixel_texture) static const struct dri_extension_function GL_SGIX_pixel_texture_functions[] = { - { PixelTexGenSGIX_names, PixelTexGenSGIX_remap_index, 430 }, + { PixelTexGenSGIX_names, PixelTexGenSGIX_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -5662,37 +5744,37 @@ static const struct dri_extension_function GL_SGIX_polynomial_ffd_functions[] = #if defined(need_GL_SGIX_reference_plane) static const struct dri_extension_function GL_SGIX_reference_plane_functions[] = { - { ReferencePlaneSGIX_names, ReferencePlaneSGIX_remap_index, 468 }, + { ReferencePlaneSGIX_names, ReferencePlaneSGIX_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_SGIX_sprite) static const struct dri_extension_function GL_SGIX_sprite_functions[] = { - { SpriteParameterfvSGIX_names, SpriteParameterfvSGIX_remap_index, 455 }, - { SpriteParameteriSGIX_names, SpriteParameteriSGIX_remap_index, 456 }, - { SpriteParameterfSGIX_names, SpriteParameterfSGIX_remap_index, 454 }, - { SpriteParameterivSGIX_names, SpriteParameterivSGIX_remap_index, 457 }, + { SpriteParameterfvSGIX_names, SpriteParameterfvSGIX_remap_index, -1 }, + { SpriteParameteriSGIX_names, SpriteParameteriSGIX_remap_index, -1 }, + { SpriteParameterfSGIX_names, SpriteParameterfSGIX_remap_index, -1 }, + { SpriteParameterivSGIX_names, SpriteParameterivSGIX_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_SGIX_tag_sample_buffer) static const struct dri_extension_function GL_SGIX_tag_sample_buffer_functions[] = { - { TagSampleBufferSGIX_names, TagSampleBufferSGIX_remap_index, 467 }, + { TagSampleBufferSGIX_names, TagSampleBufferSGIX_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_SGI_color_table) static const struct dri_extension_function GL_SGI_color_table_functions[] = { - { GetColorTableParameterfvSGI_names, GetColorTableParameterfvSGI_remap_index, 428 }, + { GetColorTableParameterfvSGI_names, GetColorTableParameterfvSGI_remap_index, -1 }, { ColorTableParameteriv_names, -1, 341 }, { ColorTable_names, -1, 339 }, { CopyColorTable_names, -1, 342 }, { ColorTableParameterfv_names, -1, 340 }, - { GetColorTableParameterivSGI_names, GetColorTableParameterivSGI_remap_index, 429 }, - { GetColorTableSGI_names, GetColorTableSGI_remap_index, 427 }, + { GetColorTableParameterivSGI_names, GetColorTableParameterivSGI_remap_index, -1 }, + { GetColorTableSGI_names, GetColorTableSGI_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -5753,6 +5835,7 @@ static const struct dri_extension_function GL_SUN_vertex_functions[] = { { TexCoord2fNormal3fVertex3fvSUN_names, TexCoord2fNormal3fVertex3fvSUN_remap_index, -1 }, { ReplacementCodeuiTexCoord2fNormal3fVertex3fSUN_names, ReplacementCodeuiTexCoord2fNormal3fVertex3fSUN_remap_index, -1 }, { ReplacementCodeuiTexCoord2fVertex3fSUN_names, ReplacementCodeuiTexCoord2fVertex3fSUN_remap_index, -1 }, + { TexCoord2fNormal3fVertex3fSUN_names, TexCoord2fNormal3fVertex3fSUN_remap_index, -1 }, { Color3fVertex3fSUN_names, Color3fVertex3fSUN_remap_index, -1 }, { ReplacementCodeuiNormal3fVertex3fvSUN_names, ReplacementCodeuiNormal3fVertex3fvSUN_remap_index, -1 }, { Color3fVertex3fvSUN_names, Color3fVertex3fvSUN_remap_index, -1 }, @@ -5765,7 +5848,6 @@ static const struct dri_extension_function GL_SUN_vertex_functions[] = { { Color4ubVertex2fvSUN_names, Color4ubVertex2fvSUN_remap_index, -1 }, { Normal3fVertex3fSUN_names, Normal3fVertex3fSUN_remap_index, -1 }, { ReplacementCodeuiColor4fNormal3fVertex3fSUN_names, ReplacementCodeuiColor4fNormal3fVertex3fSUN_remap_index, -1 }, - { TexCoord2fNormal3fVertex3fSUN_names, TexCoord2fNormal3fVertex3fSUN_remap_index, -1 }, { TexCoord2fVertex3fvSUN_names, TexCoord2fVertex3fvSUN_remap_index, -1 }, { Color4ubVertex2fSUN_names, Color4ubVertex2fSUN_remap_index, -1 }, { ReplacementCodeuiColor4ubVertex3fSUN_names, ReplacementCodeuiColor4ubVertex3fSUN_remap_index, -1 }, @@ -5786,19 +5868,19 @@ static const struct dri_extension_function GL_SUN_vertex_functions[] = { #if defined(need_GL_VERSION_1_3) static const struct dri_extension_function GL_VERSION_1_3_functions[] = { - { SampleCoverageARB_names, SampleCoverageARB_remap_index, 412 }, + { SampleCoverageARB_names, SampleCoverageARB_remap_index, -1 }, { MultiTexCoord3sARB_names, -1, 398 }, { ActiveTextureARB_names, -1, 374 }, - { CompressedTexSubImage2DARB_names, CompressedTexSubImage2DARB_remap_index, 558 }, - { CompressedTexImage3DARB_names, CompressedTexImage3DARB_remap_index, 554 }, + { CompressedTexSubImage2DARB_names, CompressedTexSubImage2DARB_remap_index, -1 }, + { CompressedTexImage3DARB_names, CompressedTexImage3DARB_remap_index, -1 }, { MultiTexCoord1fvARB_names, -1, 379 }, - { MultTransposeMatrixdARB_names, MultTransposeMatrixdARB_remap_index, 411 }, - { CompressedTexImage1DARB_names, CompressedTexImage1DARB_remap_index, 556 }, + { MultTransposeMatrixdARB_names, MultTransposeMatrixdARB_remap_index, -1 }, + { CompressedTexImage1DARB_names, CompressedTexImage1DARB_remap_index, -1 }, { MultiTexCoord3dARB_names, -1, 392 }, { MultiTexCoord2iARB_names, -1, 388 }, { MultiTexCoord2svARB_names, -1, 391 }, { MultiTexCoord2fARB_names, -1, 386 }, - { LoadTransposeMatrixdARB_names, LoadTransposeMatrixdARB_remap_index, 409 }, + { LoadTransposeMatrixdARB_names, LoadTransposeMatrixdARB_remap_index, -1 }, { MultiTexCoord3fvARB_names, -1, 395 }, { MultiTexCoord4sARB_names, -1, 406 }, { MultiTexCoord2dvARB_names, -1, 385 }, @@ -5811,26 +5893,26 @@ static const struct dri_extension_function GL_VERSION_1_3_functions[] = { { MultiTexCoord3ivARB_names, -1, 397 }, { MultiTexCoord2sARB_names, -1, 390 }, { MultiTexCoord4ivARB_names, -1, 405 }, - { CompressedTexSubImage1DARB_names, CompressedTexSubImage1DARB_remap_index, 559 }, + { CompressedTexSubImage1DARB_names, CompressedTexSubImage1DARB_remap_index, -1 }, { ClientActiveTextureARB_names, -1, 375 }, - { CompressedTexSubImage3DARB_names, CompressedTexSubImage3DARB_remap_index, 557 }, + { CompressedTexSubImage3DARB_names, CompressedTexSubImage3DARB_remap_index, -1 }, { MultiTexCoord2dARB_names, -1, 384 }, { MultiTexCoord4dvARB_names, -1, 401 }, { MultiTexCoord4fvARB_names, -1, 403 }, { MultiTexCoord3fARB_names, -1, 394 }, - { MultTransposeMatrixfARB_names, MultTransposeMatrixfARB_remap_index, 410 }, - { CompressedTexImage2DARB_names, CompressedTexImage2DARB_remap_index, 555 }, + { MultTransposeMatrixfARB_names, MultTransposeMatrixfARB_remap_index, -1 }, + { CompressedTexImage2DARB_names, CompressedTexImage2DARB_remap_index, -1 }, { MultiTexCoord4dARB_names, -1, 400 }, { MultiTexCoord1sARB_names, -1, 382 }, { MultiTexCoord1dvARB_names, -1, 377 }, { MultiTexCoord1ivARB_names, -1, 381 }, { MultiTexCoord2ivARB_names, -1, 389 }, { MultiTexCoord1iARB_names, -1, 380 }, - { GetCompressedTexImageARB_names, GetCompressedTexImageARB_remap_index, 560 }, + { GetCompressedTexImageARB_names, GetCompressedTexImageARB_remap_index, -1 }, { MultiTexCoord4svARB_names, -1, 407 }, { MultiTexCoord1fARB_names, -1, 378 }, { MultiTexCoord4fARB_names, -1, 402 }, - { LoadTransposeMatrixfARB_names, LoadTransposeMatrixfARB_remap_index, 408 }, + { LoadTransposeMatrixfARB_names, LoadTransposeMatrixfARB_remap_index, -1 }, { MultiTexCoord2fvARB_names, -1, 387 }, { NULL, 0, 0 } }; @@ -5838,85 +5920,87 @@ static const struct dri_extension_function GL_VERSION_1_3_functions[] = { #if defined(need_GL_VERSION_1_4) static const struct dri_extension_function GL_VERSION_1_4_functions[] = { - { PointParameteriNV_names, PointParameteriNV_remap_index, 642 }, - { SecondaryColor3iEXT_names, SecondaryColor3iEXT_remap_index, 567 }, - { WindowPos3fMESA_names, WindowPos3fMESA_remap_index, 523 }, - { WindowPos2dvMESA_names, WindowPos2dvMESA_remap_index, 514 }, - { SecondaryColor3bEXT_names, SecondaryColor3bEXT_remap_index, 561 }, - { PointParameterfEXT_names, PointParameterfEXT_remap_index, 458 }, - { FogCoorddEXT_names, FogCoorddEXT_remap_index, 547 }, - { FogCoordfEXT_names, FogCoordfEXT_remap_index, 545 }, - { WindowPos2svMESA_names, WindowPos2svMESA_remap_index, 520 }, - { WindowPos3dMESA_names, WindowPos3dMESA_remap_index, 521 }, - { PointParameterfvEXT_names, PointParameterfvEXT_remap_index, 459 }, - { WindowPos2fvMESA_names, WindowPos2fvMESA_remap_index, 516 }, - { SecondaryColor3bvEXT_names, SecondaryColor3bvEXT_remap_index, 562 }, - { SecondaryColor3sEXT_names, SecondaryColor3sEXT_remap_index, 569 }, - { SecondaryColor3dEXT_names, SecondaryColor3dEXT_remap_index, 563 }, - { WindowPos2dMESA_names, WindowPos2dMESA_remap_index, 513 }, - { SecondaryColorPointerEXT_names, SecondaryColorPointerEXT_remap_index, 577 }, - { SecondaryColor3uiEXT_names, SecondaryColor3uiEXT_remap_index, 573 }, - { SecondaryColor3usvEXT_names, SecondaryColor3usvEXT_remap_index, 576 }, - { WindowPos3dvMESA_names, WindowPos3dvMESA_remap_index, 522 }, - { PointParameterivNV_names, PointParameterivNV_remap_index, 643 }, - { WindowPos3fvMESA_names, WindowPos3fvMESA_remap_index, 524 }, - { SecondaryColor3ivEXT_names, SecondaryColor3ivEXT_remap_index, 568 }, - { WindowPos2iMESA_names, WindowPos2iMESA_remap_index, 517 }, - { SecondaryColor3fvEXT_names, SecondaryColor3fvEXT_remap_index, 566 }, - { WindowPos3sMESA_names, WindowPos3sMESA_remap_index, 527 }, - { WindowPos2ivMESA_names, WindowPos2ivMESA_remap_index, 518 }, - { MultiDrawElementsEXT_names, MultiDrawElementsEXT_remap_index, 645 }, - { WindowPos2sMESA_names, WindowPos2sMESA_remap_index, 519 }, - { FogCoordPointerEXT_names, FogCoordPointerEXT_remap_index, 549 }, - { SecondaryColor3ubvEXT_names, SecondaryColor3ubvEXT_remap_index, 572 }, - { SecondaryColor3uivEXT_names, SecondaryColor3uivEXT_remap_index, 574 }, - { WindowPos3iMESA_names, WindowPos3iMESA_remap_index, 525 }, - { SecondaryColor3dvEXT_names, SecondaryColor3dvEXT_remap_index, 564 }, - { MultiDrawArraysEXT_names, MultiDrawArraysEXT_remap_index, 644 }, - { SecondaryColor3usEXT_names, SecondaryColor3usEXT_remap_index, 575 }, - { FogCoordfvEXT_names, FogCoordfvEXT_remap_index, 546 }, - { SecondaryColor3ubEXT_names, SecondaryColor3ubEXT_remap_index, 571 }, - { BlendFuncSeparateEXT_names, BlendFuncSeparateEXT_remap_index, 537 }, - { SecondaryColor3fEXT_names, SecondaryColor3fEXT_remap_index, 565 }, - { WindowPos3ivMESA_names, WindowPos3ivMESA_remap_index, 526 }, - { SecondaryColor3svEXT_names, SecondaryColor3svEXT_remap_index, 570 }, - { FogCoorddvEXT_names, FogCoorddvEXT_remap_index, 548 }, - { WindowPos3svMESA_names, WindowPos3svMESA_remap_index, 528 }, - { WindowPos2fMESA_names, WindowPos2fMESA_remap_index, 515 }, + { PointParameteriNV_names, PointParameteriNV_remap_index, -1 }, + { SecondaryColor3iEXT_names, SecondaryColor3iEXT_remap_index, -1 }, + { WindowPos3fMESA_names, WindowPos3fMESA_remap_index, -1 }, + { WindowPos2dvMESA_names, WindowPos2dvMESA_remap_index, -1 }, + { SecondaryColor3bEXT_names, SecondaryColor3bEXT_remap_index, -1 }, + { PointParameterfEXT_names, PointParameterfEXT_remap_index, -1 }, + { FogCoorddEXT_names, FogCoorddEXT_remap_index, -1 }, + { FogCoordfEXT_names, FogCoordfEXT_remap_index, -1 }, + { WindowPos2svMESA_names, WindowPos2svMESA_remap_index, -1 }, + { WindowPos3dMESA_names, WindowPos3dMESA_remap_index, -1 }, + { PointParameterfvEXT_names, PointParameterfvEXT_remap_index, -1 }, + { WindowPos2fvMESA_names, WindowPos2fvMESA_remap_index, -1 }, + { SecondaryColor3bvEXT_names, SecondaryColor3bvEXT_remap_index, -1 }, + { SecondaryColor3sEXT_names, SecondaryColor3sEXT_remap_index, -1 }, + { SecondaryColor3dEXT_names, SecondaryColor3dEXT_remap_index, -1 }, + { WindowPos2dMESA_names, WindowPos2dMESA_remap_index, -1 }, + { SecondaryColorPointerEXT_names, SecondaryColorPointerEXT_remap_index, -1 }, + { SecondaryColor3uiEXT_names, SecondaryColor3uiEXT_remap_index, -1 }, + { SecondaryColor3usvEXT_names, SecondaryColor3usvEXT_remap_index, -1 }, + { WindowPos3dvMESA_names, WindowPos3dvMESA_remap_index, -1 }, + { PointParameterivNV_names, PointParameterivNV_remap_index, -1 }, + { WindowPos3fvMESA_names, WindowPos3fvMESA_remap_index, -1 }, + { SecondaryColor3ivEXT_names, SecondaryColor3ivEXT_remap_index, -1 }, + { WindowPos2iMESA_names, WindowPos2iMESA_remap_index, -1 }, + { SecondaryColor3fvEXT_names, SecondaryColor3fvEXT_remap_index, -1 }, + { WindowPos3sMESA_names, WindowPos3sMESA_remap_index, -1 }, + { WindowPos2ivMESA_names, WindowPos2ivMESA_remap_index, -1 }, + { MultiDrawElementsEXT_names, MultiDrawElementsEXT_remap_index, -1 }, + { WindowPos2sMESA_names, WindowPos2sMESA_remap_index, -1 }, + { FogCoordPointerEXT_names, FogCoordPointerEXT_remap_index, -1 }, + { SecondaryColor3ubvEXT_names, SecondaryColor3ubvEXT_remap_index, -1 }, + { SecondaryColor3uivEXT_names, SecondaryColor3uivEXT_remap_index, -1 }, + { WindowPos3iMESA_names, WindowPos3iMESA_remap_index, -1 }, + { SecondaryColor3dvEXT_names, SecondaryColor3dvEXT_remap_index, -1 }, + { MultiDrawArraysEXT_names, MultiDrawArraysEXT_remap_index, -1 }, + { SecondaryColor3usEXT_names, SecondaryColor3usEXT_remap_index, -1 }, + { FogCoordfvEXT_names, FogCoordfvEXT_remap_index, -1 }, + { SecondaryColor3ubEXT_names, SecondaryColor3ubEXT_remap_index, -1 }, + { BlendFuncSeparateEXT_names, BlendFuncSeparateEXT_remap_index, -1 }, + { SecondaryColor3fEXT_names, SecondaryColor3fEXT_remap_index, -1 }, + { WindowPos3ivMESA_names, WindowPos3ivMESA_remap_index, -1 }, + { SecondaryColor3svEXT_names, SecondaryColor3svEXT_remap_index, -1 }, + { FogCoorddvEXT_names, FogCoorddvEXT_remap_index, -1 }, + { WindowPos3svMESA_names, WindowPos3svMESA_remap_index, -1 }, + { WindowPos2fMESA_names, WindowPos2fMESA_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_VERSION_1_5) static const struct dri_extension_function GL_VERSION_1_5_functions[] = { - { BeginQueryARB_names, BeginQueryARB_remap_index, 703 }, - { GetBufferSubDataARB_names, GetBufferSubDataARB_remap_index, 695 }, - { BufferSubDataARB_names, BufferSubDataARB_remap_index, 690 }, - { GetQueryivARB_names, GetQueryivARB_remap_index, 705 }, - { GetQueryObjectivARB_names, GetQueryObjectivARB_remap_index, 706 }, - { BufferDataARB_names, BufferDataARB_remap_index, 689 }, - { EndQueryARB_names, EndQueryARB_remap_index, 704 }, - { GetBufferPointervARB_names, GetBufferPointervARB_remap_index, 694 }, - { GetQueryObjectuivARB_names, GetQueryObjectuivARB_remap_index, 707 }, - { GetBufferParameterivARB_names, GetBufferParameterivARB_remap_index, 693 }, - { DeleteQueriesARB_names, DeleteQueriesARB_remap_index, 701 }, - { IsQueryARB_names, IsQueryARB_remap_index, 702 }, - { MapBufferARB_names, MapBufferARB_remap_index, 697 }, - { GenQueriesARB_names, GenQueriesARB_remap_index, 700 }, - { IsBufferARB_names, IsBufferARB_remap_index, 696 }, - { DeleteBuffersARB_names, DeleteBuffersARB_remap_index, 691 }, - { UnmapBufferARB_names, UnmapBufferARB_remap_index, 698 }, - { BindBufferARB_names, BindBufferARB_remap_index, 688 }, - { GenBuffersARB_names, GenBuffersARB_remap_index, 692 }, + { BeginQueryARB_names, BeginQueryARB_remap_index, -1 }, + { GetBufferSubDataARB_names, GetBufferSubDataARB_remap_index, -1 }, + { BufferSubDataARB_names, BufferSubDataARB_remap_index, -1 }, + { GetQueryivARB_names, GetQueryivARB_remap_index, -1 }, + { GetQueryObjectivARB_names, GetQueryObjectivARB_remap_index, -1 }, + { BufferDataARB_names, BufferDataARB_remap_index, -1 }, + { EndQueryARB_names, EndQueryARB_remap_index, -1 }, + { GetBufferPointervARB_names, GetBufferPointervARB_remap_index, -1 }, + { GetQueryObjectuivARB_names, GetQueryObjectuivARB_remap_index, -1 }, + { GetBufferParameterivARB_names, GetBufferParameterivARB_remap_index, -1 }, + { DeleteQueriesARB_names, DeleteQueriesARB_remap_index, -1 }, + { IsQueryARB_names, IsQueryARB_remap_index, -1 }, + { MapBufferARB_names, MapBufferARB_remap_index, -1 }, + { GenQueriesARB_names, GenQueriesARB_remap_index, -1 }, + { IsBufferARB_names, IsBufferARB_remap_index, -1 }, + { DeleteBuffersARB_names, DeleteBuffersARB_remap_index, -1 }, + { UnmapBufferARB_names, UnmapBufferARB_remap_index, -1 }, + { BindBufferARB_names, BindBufferARB_remap_index, -1 }, + { GenBuffersARB_names, GenBuffersARB_remap_index, -1 }, { NULL, 0, 0 } }; #endif #if defined(need_GL_VERSION_2_0) static const struct dri_extension_function GL_VERSION_2_0_functions[] = { - { StencilMaskSeparate_names, StencilMaskSeparate_remap_index, 815 }, - { StencilOpSeparate_names, StencilOpSeparate_remap_index, 814 }, - { StencilFuncSeparate_names, StencilFuncSeparate_remap_index, 813 }, + { BlendEquationSeparateEXT_names, BlendEquationSeparateEXT_remap_index, -1 }, + { StencilMaskSeparate_names, StencilMaskSeparate_remap_index, -1 }, + { StencilOpSeparate_names, StencilOpSeparate_remap_index, -1 }, + { StencilFuncSeparate_names, StencilFuncSeparate_remap_index, -1 }, + { DrawBuffersARB_names, DrawBuffersARB_remap_index, -1 }, { NULL, 0, 0 } }; #endif diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c index e3eca86da1b..74ed299a628 100644 --- a/src/mesa/drivers/dri/common/utils.c +++ b/src/mesa/drivers/dri/common/utils.c @@ -309,15 +309,17 @@ void driInitSingleExtension( GLcontext * ctx, /* Add each entry-point to the dispatch table. */ offset = _glapi_add_dispatch( functions, parameter_signature ); - if ( ext->functions[i].remap_index != -1 ) { - driDispatchRemapTable[ ext->functions[i].remap_index ] = offset; + if (offset == -1) { + fprintf(stderr, "DISPATCH ERROR! _glapi_add_dispatch failed " + "to add %s!\n", functions[0]); } - - if ( (ext->functions[i].offset != -1) - && (ext->functions[i].offset != offset) ) { - fprintf(stderr, "DISPATCH ERROR! %s -> %u != %u\n", functions[0], - driDispatchRemapTable[ ext->functions[i].remap_index ], - ext->functions[i].offset); + else if (ext->functions[i].remap_index != -1) { + driDispatchRemapTable[ ext->functions[i].remap_index ] = + offset; + } + else if (ext->functions[i].offset != offset) { + fprintf(stderr, "DISPATCH ERROR! %s -> %u != %u\n", + functions[0], offset, ext->functions[i].offset); } } } diff --git a/src/mesa/drivers/dri/glcore/Makefile b/src/mesa/drivers/dri/glcore/Makefile index 744b528c35e..a9e96970fae 100644 --- a/src/mesa/drivers/dri/glcore/Makefile +++ b/src/mesa/drivers/dri/glcore/Makefile @@ -57,11 +57,11 @@ OBJECTS = $(C_SOURCES:.c=.o) \ ##### TARGETS ##### -default: depend $(LIB_DIR)/$(LIBNAME) +default: depend $(TOP)/$(LIB_DIR)/$(LIBNAME) -$(LIB_DIR)/$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(WINOBJ) Makefile - CC="$(CC)" CXX="$(CXX)" $(TOP)/bin/mklib -o $(LIBNAME) -noprefix -install $(LIB_DIR) \ +$(TOP)/$(LIB_DIR)/$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(WINOBJ) Makefile + CC="$(CC)" CXX="$(CXX)" $(TOP)/bin/mklib -o $(LIBNAME) -noprefix -install $(TOP)/$(LIB_DIR) \ $(OBJECTS) $(WINLIB) $(LIB_DEPS) $(WINOBJ) $(MESA_MODULES) diff --git a/src/mesa/drivers/dri/i915/i830_context.c b/src/mesa/drivers/dri/i915/i830_context.c index d16b153fac2..7ca601e1b5c 100644 --- a/src/mesa/drivers/dri/i915/i830_context.c +++ b/src/mesa/drivers/dri/i915/i830_context.c @@ -93,7 +93,7 @@ GLboolean i830CreateContext( const __GLcontextModes *mesaVis, * FIXME: packed, but they're not in Intel graphics hardware. */ intel->ctx.Const.MaxTextureUnits = I830_TEX_UNITS; - i = driQueryOptioni( &intel->intelScreen->optionCache, "allow_large_textures"); + i = driQueryOptioni( &intel->optionCache, "allow_large_textures"); driCalculateMaxTextureLevels( intel->texture_heaps, intel->nr_heaps, &intel->ctx.Const, diff --git a/src/mesa/drivers/dri/i915/i830_context.h b/src/mesa/drivers/dri/i915/i830_context.h index d5811e6c349..bae777dd5a4 100644 --- a/src/mesa/drivers/dri/i915/i830_context.h +++ b/src/mesa/drivers/dri/i915/i830_context.h @@ -39,6 +39,7 @@ #define I830_UPLOAD_CTX 0x1 #define I830_UPLOAD_BUFFERS 0x2 #define I830_UPLOAD_STIPPLE 0x4 +#define I830_UPLOAD_INVARIENT 0x8 #define I830_UPLOAD_TEX(i) (0x10<<(i)) #define I830_UPLOAD_TEXBLEND(i) (0x100<<(i)) #define I830_UPLOAD_TEX_ALL (0x0f0) diff --git a/src/mesa/drivers/dri/i915/i830_metaops.c b/src/mesa/drivers/dri/i915/i830_metaops.c index e7215dfaf09..17fde2f4804 100644 --- a/src/mesa/drivers/dri/i915/i830_metaops.c +++ b/src/mesa/drivers/dri/i915/i830_metaops.c @@ -40,7 +40,8 @@ /* A large amount of state doesn't need to be uploaded. */ -#define ACTIVE (I830_UPLOAD_TEXBLEND(0) | \ +#define ACTIVE (I830_UPLOAD_INVARIENT | \ + I830_UPLOAD_TEXBLEND(0) | \ I830_UPLOAD_STIPPLE | \ I830_UPLOAD_CTX | \ I830_UPLOAD_BUFFERS | \ diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 9e71b111091..d40cf705a35 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -264,7 +264,7 @@ static void i830_emit_invarient_state( intelContextPtr intel ) { BATCH_LOCALS; - BEGIN_BATCH( 200 ); + BEGIN_BATCH( 40 ); OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(0)); OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(1)); @@ -370,6 +370,9 @@ static GLuint get_state_size( struct i830_hw_state *state ) GLuint sz = 0; GLuint i; + if (dirty & I830_UPLOAD_INVARIENT) + sz += 40 * sizeof(int); + if (dirty & I830_UPLOAD_CTX) sz += sizeof(state->Ctx); @@ -408,6 +411,11 @@ static void i830_emit_state( intelContextPtr intel ) counter = intel->batch.counter; } + if (dirty & I830_UPLOAD_INVARIENT) { + if (VERBOSE) fprintf(stderr, "I830_UPLOAD_INVARIENT:\n"); + i830_emit_invarient_state( intel ); + } + if (dirty & I830_UPLOAD_CTX) { if (VERBOSE) fprintf(stderr, "I830_UPLOAD_CTX:\n"); emit( i830, state->Ctx, sizeof(state->Ctx) ); @@ -514,7 +522,6 @@ void i830InitVtbl( i830ContextPtr i830 ) i830->intel.vtbl.clear_with_tris = i830ClearWithTris; i830->intel.vtbl.rotate_window = i830RotateWindow; i830->intel.vtbl.destroy = i830_destroy_context; - i830->intel.vtbl.emit_invarient_state = i830_emit_invarient_state; i830->intel.vtbl.emit_state = i830_emit_state; i830->intel.vtbl.lost_hardware = i830_lost_hardware; i830->intel.vtbl.reduced_primitive_state = i830_reduced_primitive_state; diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index 783bbc2ab89..8c99d0e1d67 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -137,7 +137,7 @@ GLboolean i915CreateContext( const __GLcontextModes *mesaVis, * hardware. */ ctx->Const.MaxTextureUnits = I915_TEX_UNITS; - i = driQueryOptioni( &intel->intelScreen->optionCache, "allow_large_textures"); + i = driQueryOptioni( &intel->optionCache, "allow_large_textures"); driCalculateMaxTextureLevels( intel->texture_heaps, intel->nr_heaps, &intel->ctx.Const, diff --git a/src/mesa/drivers/dri/i915/i915_context.h b/src/mesa/drivers/dri/i915/i915_context.h index a088c087b65..ec1550126a6 100644 --- a/src/mesa/drivers/dri/i915/i915_context.h +++ b/src/mesa/drivers/dri/i915/i915_context.h @@ -45,6 +45,7 @@ #define I915_UPLOAD_PROGRAM 0x8 #define I915_UPLOAD_CONSTANTS 0x10 #define I915_UPLOAD_FOG 0x20 +#define I915_UPLOAD_INVARIENT 0x40 #define I915_UPLOAD_TEX(i) (0x00010000<<(i)) #define I915_UPLOAD_TEX_ALL (0x00ff0000) #define I915_UPLOAD_TEX_0_SHIFT 16 @@ -107,7 +108,7 @@ * mesa fragment_program struct. */ struct i915_fragment_program { - struct fragment_program FragProg; + struct gl_fragment_program FragProg; GLboolean translated; GLboolean params_uptodate; diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 5cd6ea4de51..b0cc59c3063 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -59,7 +59,7 @@ static const GLfloat cos_constants[4] = { 1.0, */ static GLuint src_vector( struct i915_fragment_program *p, const struct prog_src_register *source, - const struct fragment_program *program ) + const struct gl_fragment_program *program ) { GLuint src; @@ -244,7 +244,7 @@ do { \ */ static void upload_program( struct i915_fragment_program *p ) { - const struct fragment_program *program = p->ctx->FragmentProgram._Current; + const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current; const struct prog_instruction *inst = program->Base.Instructions; /* _mesa_debug_fp_inst(program->Base.NumInstructions, inst); */ @@ -837,7 +837,7 @@ static void track_params( struct i915_fragment_program *p ) static void i915BindProgram( GLcontext *ctx, GLenum target, - struct program *prog ) + struct gl_program *prog ) { if (target == GL_FRAGMENT_PROGRAM_ARB) { i915ContextPtr i915 = I915_CONTEXT(ctx); @@ -864,13 +864,13 @@ static void i915BindProgram( GLcontext *ctx, } } -static struct program *i915NewProgram( GLcontext *ctx, +static struct gl_program *i915NewProgram( GLcontext *ctx, GLenum target, GLuint id ) { switch (target) { case GL_VERTEX_PROGRAM_ARB: - return _mesa_init_vertex_program( ctx, CALLOC_STRUCT(vertex_program), + return _mesa_init_vertex_program( ctx, CALLOC_STRUCT(gl_vertex_program), target, id ); case GL_FRAGMENT_PROGRAM_ARB: { @@ -893,7 +893,7 @@ static struct program *i915NewProgram( GLcontext *ctx, } static void i915DeleteProgram( GLcontext *ctx, - struct program *prog ) + struct gl_program *prog ) { if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) { i915ContextPtr i915 = I915_CONTEXT(ctx); @@ -909,7 +909,7 @@ static void i915DeleteProgram( GLcontext *ctx, static GLboolean i915IsProgramNative( GLcontext *ctx, GLenum target, - struct program *prog ) + struct gl_program *prog ) { if (target == GL_FRAGMENT_PROGRAM_ARB) { struct i915_fragment_program *p = (struct i915_fragment_program *)prog; @@ -925,7 +925,7 @@ static GLboolean i915IsProgramNative( GLcontext *ctx, static void i915ProgramStringNotify( GLcontext *ctx, GLenum target, - struct program *prog ) + struct gl_program *prog ) { if (target == GL_FRAGMENT_PROGRAM_ARB) { struct i915_fragment_program *p = (struct i915_fragment_program *)prog; diff --git a/src/mesa/drivers/dri/i915/i915_metaops.c b/src/mesa/drivers/dri/i915/i915_metaops.c index f7b8e5415ea..3ab5dbfd685 100644 --- a/src/mesa/drivers/dri/i915/i915_metaops.c +++ b/src/mesa/drivers/dri/i915/i915_metaops.c @@ -41,7 +41,8 @@ /* A large amount of state doesn't need to be uploaded. */ -#define ACTIVE (I915_UPLOAD_PROGRAM | \ +#define ACTIVE (I915_UPLOAD_INVARIENT | \ + I915_UPLOAD_PROGRAM | \ I915_UPLOAD_STIPPLE | \ I915_UPLOAD_CTX | \ I915_UPLOAD_BUFFERS | \ diff --git a/src/mesa/drivers/dri/i915/i915_program.c b/src/mesa/drivers/dri/i915/i915_program.c index 45276fb6908..0faadb4f1a7 100644 --- a/src/mesa/drivers/dri/i915/i915_program.c +++ b/src/mesa/drivers/dri/i915/i915_program.c @@ -195,29 +195,47 @@ GLuint i915_emit_arith( struct i915_fragment_program *p, } GLuint i915_emit_texld( struct i915_fragment_program *p, - GLuint dest, - GLuint destmask, - GLuint sampler, - GLuint coord, - GLuint op ) + GLuint dest, + GLuint destmask, + GLuint sampler, + GLuint coord, + GLuint op ) { - assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); - assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); + if (coord != UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord))) { + /* No real way to work around this in the general case - need to + * allocate and declare a new temporary register (a utemp won't + * do). Will fallback for now. + */ + i915_program_error(p, "Can't (yet) swizzle TEX arguments"); + return 0; + } - if (GET_UREG_TYPE(coord) != REG_TYPE_T) { - p->nr_tex_indirect++; + /* Don't worry about saturate as we only support + */ + if (destmask != A0_DEST_CHANNEL_ALL) { + GLuint tmp = i915_get_utemp(p); + i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, op ); + i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 ); + return dest; } + else { + assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); + assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); - *(p->csr++) = (op | - T0_DEST( dest ) | - destmask | - T0_SAMPLER( sampler )); + if (GET_UREG_TYPE(coord) != REG_TYPE_T) { + p->nr_tex_indirect++; + } - *(p->csr++) = T1_ADDRESS_REG( coord ); - *(p->csr++) = T2_MBZ; + *(p->csr++) = (op | + T0_DEST( dest ) | + T0_SAMPLER( sampler )); - p->nr_tex_insn++; - return dest; + *(p->csr++) = T1_ADDRESS_REG( coord ); + *(p->csr++) = T2_MBZ; + + p->nr_tex_insn++; + return dest; + } } diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c index bad947f5de5..db5bb9ddc78 100644 --- a/src/mesa/drivers/dri/i915/i915_state.c +++ b/src/mesa/drivers/dri/i915/i915_state.c @@ -919,9 +919,10 @@ static void i915_init_packets( i915ContextPtr i915 ) * we get hardware contexts working. */ i915->state.active = (I915_UPLOAD_PROGRAM | - I915_UPLOAD_STIPPLE | - I915_UPLOAD_CTX | - I915_UPLOAD_BUFFERS); + I915_UPLOAD_STIPPLE | + I915_UPLOAD_CTX | + I915_UPLOAD_BUFFERS | + I915_UPLOAD_INVARIENT); } void i915InitStateFunctions( struct dd_function_table *functions ) diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 25575e5ed26..9ec54de23c5 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -136,7 +136,7 @@ static void i915_emit_invarient_state( intelContextPtr intel ) { BATCH_LOCALS; - BEGIN_BATCH( 200 ); + BEGIN_BATCH( 20 ); OUT_BATCH(_3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | @@ -235,6 +235,9 @@ static GLuint get_state_size( struct i915_hw_state *state ) GLuint i; GLuint sz = 0; + if (dirty & I915_UPLOAD_INVARIENT) + sz += 20 * sizeof(int); + if (dirty & I915_UPLOAD_CTX) sz += sizeof(state->Ctx); @@ -286,6 +289,11 @@ static void i915_emit_state( intelContextPtr intel ) if (VERBOSE) fprintf(stderr, "%s dirty: %x\n", __FUNCTION__, dirty); + if (dirty & I915_UPLOAD_INVARIENT) { + if (VERBOSE) fprintf(stderr, "I915_UPLOAD_INVARIENT:\n"); + i915_emit_invarient_state( intel ); + } + if (dirty & I915_UPLOAD_CTX) { if (VERBOSE) fprintf(stderr, "I915_UPLOAD_CTX:\n"); emit( i915, state->Ctx, sizeof(state->Ctx) ); @@ -439,7 +447,6 @@ void i915InitVtbl( i915ContextPtr i915 ) i915->intel.vtbl.clear_with_tris = i915ClearWithTris; i915->intel.vtbl.rotate_window = i915RotateWindow; i915->intel.vtbl.destroy = i915_destroy_context; - i915->intel.vtbl.emit_invarient_state = i915_emit_invarient_state; i915->intel.vtbl.emit_state = i915_emit_state; i915->intel.vtbl.lost_hardware = i915_lost_hardware; i915->intel.vtbl.reduced_primitive_state = i915_reduced_primitive_state; diff --git a/src/mesa/drivers/dri/i915/intel_batchbuffer.c b/src/mesa/drivers/dri/i915/intel_batchbuffer.c index 06312372776..dd754c67d44 100644 --- a/src/mesa/drivers/dri/i915/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i915/intel_batchbuffer.c @@ -341,24 +341,22 @@ static void intelWaitForFrameCompletion( intelContextPtr intel ) ; } else { - UNLOCK_HARDWARE( intel ); intelWaitIrq( intel, intel->alloc.irq_emitted ); - LOCK_HARDWARE( intel ); } intel->irqsEmitted = 10; } if (intel->irqsEmitted) { + LOCK_HARDWARE( intel ); intelEmitIrqLocked( intel ); intel->irqsEmitted--; + UNLOCK_HARDWARE( intel ); } } else { while (intelGetLastFrame (intel) < sarea->last_dispatch) { - UNLOCK_HARDWARE( intel ); if (intel->do_usleeps) DO_USLEEP( 1 ); - LOCK_HARDWARE( intel ); } } } @@ -384,8 +382,8 @@ void intelCopyBuffer( const __DRIdrawablePrivate *dPriv, intelFlush( &intel->ctx ); - LOCK_HARDWARE( intel ); intelWaitForFrameCompletion( intel ); + LOCK_HARDWARE( intel ); if (!rect) { diff --git a/src/mesa/drivers/dri/i915/intel_context.c b/src/mesa/drivers/dri/i915/intel_context.c index 067f5da49e2..770b330d0a3 100644 --- a/src/mesa/drivers/dri/i915/intel_context.c +++ b/src/mesa/drivers/dri/i915/intel_context.c @@ -279,16 +279,6 @@ void intelInitDriverFunctions( struct dd_function_table *functions ) static void intel_emit_invarient_state( GLcontext *ctx ) { - intelContextPtr intel = INTEL_CONTEXT(ctx); - - intel->vtbl.emit_invarient_state( intel ); - intel->prim.flush = 0; - - /* Make sure this gets to the hardware, even if we have no cliprects: - */ - LOCK_HARDWARE( intel ); - intelFlushBatchLocked( intel, GL_TRUE, GL_FALSE, GL_TRUE ); - UNLOCK_HARDWARE( intel ); } @@ -398,7 +388,7 @@ GLboolean intelInitContext( intelContextPtr intel, intel->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); intel->vblank_flags = (intel->intelScreen->irq_active != 0) - ? driGetDefaultVBlankFlags(&intelScreen->optionCache) : VBLANK_FLAG_NO_IRQ; + ? driGetDefaultVBlankFlags(&intel->optionCache) : VBLANK_FLAG_NO_IRQ; (*dri_interface->getUST)(&intel->swap_ust); _math_matrix_ctr (&intel->ViewportMatrix); @@ -409,7 +399,7 @@ GLboolean intelInitContext( intelContextPtr intel, _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); _mesa_enable_extension( ctx, "GL_S3_s3tc" ); } - else if (driQueryOptionb (&intelScreen->optionCache, "force_s3tc_enable")) { + else if (driQueryOptionb (&intel->optionCache, "force_s3tc_enable")) { _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); } @@ -453,6 +443,7 @@ void intelDestroyContext(__DRIcontextPrivate *driContextPriv) if (intel) { GLboolean release_texture_heaps; + INTEL_FIREVERTICES( intel ); intel->vtbl.destroy( intel ); @@ -551,6 +542,8 @@ void intelSetBackClipRects( intelContextPtr intel ) void intelWindowMoved( intelContextPtr intel ) { + __DRIdrawablePrivate *dPriv = intel->driDrawable; + if (!intel->ctx.DrawBuffer) { intelSetFrontClipRects( intel ); } @@ -570,6 +563,10 @@ void intelWindowMoved( intelContextPtr intel ) } } + _mesa_resize_framebuffer(&intel->ctx, + (GLframebuffer*)dPriv->driverPrivate, + dPriv->w, dPriv->h); + /* Set state we know depends on drawable parameters: */ { @@ -745,7 +742,6 @@ void intelCopySubBuffer( __DRIdrawablePrivate *dPriv, intel = (intelContextPtr) dPriv->driContextPriv->driverPrivate; ctx = &intel->ctx; if (ctx->Visual.doubleBufferMode) { - intelScreenPrivate *screen = intel->intelScreen; drm_clip_rect_t rect; rect.x1 = x + dPriv->x; rect.y1 = (dPriv->h - y - h) + dPriv->y; diff --git a/src/mesa/drivers/dri/i915/intel_context.h b/src/mesa/drivers/dri/i915/intel_context.h index 19213b7bc55..0ca8ff268de 100644 --- a/src/mesa/drivers/dri/i915/intel_context.h +++ b/src/mesa/drivers/dri/i915/intel_context.h @@ -107,7 +107,6 @@ struct intel_context struct { void (*destroy)( intelContextPtr intel ); void (*emit_state)( intelContextPtr intel ); - void (*emit_invarient_state)( intelContextPtr intel ); void (*lost_hardware)( intelContextPtr intel ); void (*update_texture_state)( intelContextPtr intel ); diff --git a/src/mesa/drivers/dri/i915/intel_screen.c b/src/mesa/drivers/dri/i915/intel_screen.c index a87de17304f..107cf137ff6 100644 --- a/src/mesa/drivers/dri/i915/intel_screen.c +++ b/src/mesa/drivers/dri/i915/intel_screen.c @@ -357,6 +357,9 @@ static void intelDestroyScreen(__DRIscreenPrivate *sPriv) intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private; intelUnmapScreenRegions(intelScreen); + + driDestroyOptionInfo (&intelScreen->optionCache); + FREE(intelScreen); sPriv->private = NULL; } diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index e1a53212a51..cf1673b429a 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -642,7 +642,7 @@ void intelChooseRenderState(GLcontext *ctx) TNLcontext *tnl = TNL_CONTEXT(ctx); intelContextPtr intel = INTEL_CONTEXT(ctx); GLuint flags = ctx->_TriangleCaps; - const struct fragment_program *fprog = ctx->FragmentProgram._Current; + const struct gl_fragment_program *fprog = ctx->FragmentProgram._Current; GLboolean have_wpos = (fprog && (fprog->Base.InputsRead & FRAG_BIT_WPOS)); GLuint index = 0; diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile new file mode 100644 index 00000000000..e4fb451cc09 --- /dev/null +++ b/src/mesa/drivers/dri/i965/Makefile @@ -0,0 +1,97 @@ + +TOP = ../../../../.. +include $(TOP)/configs/current + +LIBNAME = i965_dri.so + +DRIVER_SOURCES = \ + bufmgr_fake.c \ + intel_batchbuffer.c \ + intel_blit.c \ + intel_buffer_objects.c \ + intel_buffers.c \ + intel_context.c \ + intel_ioctl.c \ + intel_mipmap_tree.c \ + intel_regions.c \ + intel_screen.c \ + intel_span.c \ + intel_state.c \ + intel_tex.c \ + intel_tex_validate.c \ + brw_aub.c \ + brw_aub_playback.c \ + brw_cc.c \ + brw_clip.c \ + brw_clip_line.c \ + brw_clip_point.c \ + brw_clip_state.c \ + brw_clip_tri.c \ + brw_clip_unfilled.c \ + brw_clip_util.c \ + brw_context.c \ + brw_curbe.c \ + brw_draw.c \ + brw_draw_current.c \ + brw_draw_upload.c \ + brw_eu.c \ + brw_eu_debug.c \ + brw_eu_emit.c \ + brw_eu_util.c \ + brw_exec.c \ + brw_exec_api.c \ + brw_exec_array.c \ + brw_exec_draw.c \ + brw_exec_eval.c \ + brw_fallback.c \ + brw_gs.c \ + brw_gs_emit.c \ + brw_gs_state.c \ + brw_hal.c \ + brw_metaops.c \ + brw_misc_state.c \ + brw_program.c \ + brw_save.c \ + brw_save_api.c \ + brw_save_draw.c \ + brw_sf.c \ + brw_sf_emit.c \ + brw_sf_state.c \ + brw_state_batch.c \ + brw_state_cache.c \ + brw_state_pool.c \ + brw_state_upload.c \ + brw_tex.c \ + brw_tex_layout.c \ + brw_urb.c \ + brw_util.c \ + brw_vs.c \ + brw_vs_constval.c \ + brw_vs_emit.c \ + brw_vs_state.c \ + brw_vs_tnl.c \ + brw_vtbl.c \ + brw_wm.c \ + brw_wm_debug.c \ + brw_wm_emit.c \ + brw_wm_fp.c \ + brw_wm_iz.c \ + brw_wm_pass0.c \ + brw_wm_pass1.c \ + brw_wm_pass2.c \ + brw_wm_sampler_state.c \ + brw_wm_state.c \ + brw_wm_surface_state.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(MINIGLX_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + + + +include ../Makefile.template + +symlinks: diff --git a/src/mesa/drivers/dri/i965/brw_attrib.h b/src/mesa/drivers/dri/i965/brw_attrib.h new file mode 100644 index 00000000000..a8efc3a528b --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_attrib.h @@ -0,0 +1,113 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#ifndef BRW_ATTRIB_H +#define BRW_ATTRIB_H + + +/* + * Note: The first attributes match the VERT_ATTRIB_* definitions + * in mtypes.h. However, the tnl module has additional attributes + * for materials, color indexes, edge flags, etc. + */ +/* Although it's nice to use these as bit indexes in a DWORD flag, we + * could manage without if necessary. Another limit currently is the + * number of bits allocated for these numbers in places like vertex + * program instruction formats and register layouts. + */ +enum { + BRW_ATTRIB_POS = 0, + BRW_ATTRIB_WEIGHT = 1, + BRW_ATTRIB_NORMAL = 2, + BRW_ATTRIB_COLOR0 = 3, + BRW_ATTRIB_COLOR1 = 4, + BRW_ATTRIB_FOG = 5, + BRW_ATTRIB_INDEX = 6, + BRW_ATTRIB_EDGEFLAG = 7, + BRW_ATTRIB_TEX0 = 8, + BRW_ATTRIB_TEX1 = 9, + BRW_ATTRIB_TEX2 = 10, + BRW_ATTRIB_TEX3 = 11, + BRW_ATTRIB_TEX4 = 12, + BRW_ATTRIB_TEX5 = 13, + BRW_ATTRIB_TEX6 = 14, + BRW_ATTRIB_TEX7 = 15, + + BRW_ATTRIB_GENERIC0 = 16, /* Not used? */ + BRW_ATTRIB_GENERIC1 = 17, + BRW_ATTRIB_GENERIC2 = 18, + BRW_ATTRIB_GENERIC3 = 19, + BRW_ATTRIB_GENERIC4 = 20, + BRW_ATTRIB_GENERIC5 = 21, + BRW_ATTRIB_GENERIC6 = 22, + BRW_ATTRIB_GENERIC7 = 23, + BRW_ATTRIB_GENERIC8 = 24, + BRW_ATTRIB_GENERIC9 = 25, + BRW_ATTRIB_GENERIC10 = 26, + BRW_ATTRIB_GENERIC11 = 27, + BRW_ATTRIB_GENERIC12 = 28, + BRW_ATTRIB_GENERIC13 = 29, + BRW_ATTRIB_GENERIC14 = 30, + BRW_ATTRIB_GENERIC15 = 31, + + BRW_ATTRIB_MAT_FRONT_AMBIENT = 32, + BRW_ATTRIB_MAT_BACK_AMBIENT = 33, + BRW_ATTRIB_MAT_FRONT_DIFFUSE = 34, + BRW_ATTRIB_MAT_BACK_DIFFUSE = 35, + BRW_ATTRIB_MAT_FRONT_SPECULAR = 36, + BRW_ATTRIB_MAT_BACK_SPECULAR = 37, + BRW_ATTRIB_MAT_FRONT_EMISSION = 38, + BRW_ATTRIB_MAT_BACK_EMISSION = 39, + BRW_ATTRIB_MAT_FRONT_SHININESS = 40, + BRW_ATTRIB_MAT_BACK_SHININESS = 41, + BRW_ATTRIB_MAT_FRONT_INDEXES = 42, + BRW_ATTRIB_MAT_BACK_INDEXES = 43, + + BRW_ATTRIB_MAX = 44 +} ; + +#define BRW_ATTRIB_FIRST_MATERIAL BRW_ATTRIB_MAT_FRONT_AMBIENT + +#define BRW_MAX_COPIED_VERTS 3 + + +static inline GLuint64EXT brw_translate_inputs( GLboolean vp_enabled, + GLuint mesa_inputs ) +{ + GLuint64EXT inputs = mesa_inputs; + if (vp_enabled) + return inputs; + else + return (inputs & 0xffff) | ((inputs & 0xffff0000) << 16); +} + + +#endif diff --git a/src/mesa/drivers/dri/i965/brw_attrib_tmp.h b/src/mesa/drivers/dri/i965/brw_attrib_tmp.h new file mode 100644 index 00000000000..3089bd6cac2 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_attrib_tmp.h @@ -0,0 +1,485 @@ +/************************************************************************** + +Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +#define ATTR1FV( A, V ) ATTR( A, 1, (V)[0], 0, 0, 1 ) +#define ATTR2FV( A, V ) ATTR( A, 2, (V)[0], (V)[1], 0, 1 ) +#define ATTR3FV( A, V ) ATTR( A, 3, (V)[0], (V)[1], (V)[2], 1 ) +#define ATTR4FV( A, V ) ATTR( A, 4, (V)[0], (V)[1], (V)[2], (V)[3] ) + +#define ATTR1F( A, X ) ATTR( A, 1, X, 0, 0, 1 ) +#define ATTR2F( A, X, Y ) ATTR( A, 2, X, Y, 0, 1 ) +#define ATTR3F( A, X, Y, Z ) ATTR( A, 3, X, Y, Z, 1 ) +#define ATTR4F( A, X, Y, Z, W ) ATTR( A, 4, X, Y, Z, W ) + +#define MAT_ATTR( A, N, V ) ATTR( A, N, (V)[0], (V)[1], (V)[2], (V)[3] ) + +static void GLAPIENTRY TAG(Vertex2f)( GLfloat x, GLfloat y ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR2F( BRW_ATTRIB_POS, x, y ); +} + +static void GLAPIENTRY TAG(Vertex2fv)( const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR2FV( BRW_ATTRIB_POS, v ); +} + +static void GLAPIENTRY TAG(Vertex3f)( GLfloat x, GLfloat y, GLfloat z ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR3F( BRW_ATTRIB_POS, x, y, z ); +} + +static void GLAPIENTRY TAG(Vertex3fv)( const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR3FV( BRW_ATTRIB_POS, v ); +} + +static void GLAPIENTRY TAG(Vertex4f)( GLfloat x, GLfloat y, GLfloat z, GLfloat w ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR4F( BRW_ATTRIB_POS, x, y, z, w ); +} + +static void GLAPIENTRY TAG(Vertex4fv)( const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR4FV( BRW_ATTRIB_POS, v ); +} + +static void GLAPIENTRY TAG(TexCoord1f)( GLfloat x ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR1F( BRW_ATTRIB_TEX0, x ); +} + +static void GLAPIENTRY TAG(TexCoord1fv)( const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR1FV( BRW_ATTRIB_TEX0, v ); +} + +static void GLAPIENTRY TAG(TexCoord2f)( GLfloat x, GLfloat y ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR2F( BRW_ATTRIB_TEX0, x, y ); +} + +static void GLAPIENTRY TAG(TexCoord2fv)( const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR2FV( BRW_ATTRIB_TEX0, v ); +} + +static void GLAPIENTRY TAG(TexCoord3f)( GLfloat x, GLfloat y, GLfloat z ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR3F( BRW_ATTRIB_TEX0, x, y, z ); +} + +static void GLAPIENTRY TAG(TexCoord3fv)( const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR3FV( BRW_ATTRIB_TEX0, v ); +} + +static void GLAPIENTRY TAG(TexCoord4f)( GLfloat x, GLfloat y, GLfloat z, GLfloat w ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR4F( BRW_ATTRIB_TEX0, x, y, z, w ); +} + +static void GLAPIENTRY TAG(TexCoord4fv)( const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR4FV( BRW_ATTRIB_TEX0, v ); +} + +static void GLAPIENTRY TAG(Normal3f)( GLfloat x, GLfloat y, GLfloat z ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR3F( BRW_ATTRIB_NORMAL, x, y, z ); +} + +static void GLAPIENTRY TAG(Normal3fv)( const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR3FV( BRW_ATTRIB_NORMAL, v ); +} + +static void GLAPIENTRY TAG(FogCoordfEXT)( GLfloat x ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR1F( BRW_ATTRIB_FOG, x ); +} + +static void GLAPIENTRY TAG(FogCoordfvEXT)( const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR1FV( BRW_ATTRIB_FOG, v ); +} + +static void GLAPIENTRY TAG(Color3f)( GLfloat x, GLfloat y, GLfloat z ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR3F( BRW_ATTRIB_COLOR0, x, y, z ); +} + +static void GLAPIENTRY TAG(Color3fv)( const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR3FV( BRW_ATTRIB_COLOR0, v ); +} + +static void GLAPIENTRY TAG(Color4f)( GLfloat x, GLfloat y, GLfloat z, GLfloat w ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR4F( BRW_ATTRIB_COLOR0, x, y, z, w ); +} + +static void GLAPIENTRY TAG(Color4fv)( const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR4FV( BRW_ATTRIB_COLOR0, v ); +} + +static void GLAPIENTRY TAG(SecondaryColor3fEXT)( GLfloat x, GLfloat y, GLfloat z ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR3F( BRW_ATTRIB_COLOR1, x, y, z ); +} + +static void GLAPIENTRY TAG(SecondaryColor3fvEXT)( const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR3FV( BRW_ATTRIB_COLOR1, v ); +} + + +static void GLAPIENTRY TAG(EdgeFlag)( GLboolean b ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR1F( BRW_ATTRIB_EDGEFLAG, (GLfloat)b ); +} + +static void GLAPIENTRY TAG(Indexf)( GLfloat f ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR1F( BRW_ATTRIB_INDEX, f ); +} + +static void GLAPIENTRY TAG(Indexfv)( const GLfloat *f ) +{ + GET_CURRENT_CONTEXT( ctx ); + ATTR1FV( BRW_ATTRIB_INDEX, f ); +} + + +static void GLAPIENTRY TAG(MultiTexCoord1f)( GLenum target, GLfloat x ) +{ + GET_CURRENT_CONTEXT( ctx ); + GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; + ATTR1F( attr, x ); +} + +static void GLAPIENTRY TAG(MultiTexCoord1fv)( GLenum target, const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; + ATTR1FV( attr, v ); +} + +static void GLAPIENTRY TAG(MultiTexCoord2f)( GLenum target, GLfloat x, GLfloat y ) +{ + GET_CURRENT_CONTEXT( ctx ); + GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; + ATTR2F( attr, x, y ); +} + +static void GLAPIENTRY TAG(MultiTexCoord2fv)( GLenum target, const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; + ATTR2FV( attr, v ); +} + +static void GLAPIENTRY TAG(MultiTexCoord3f)( GLenum target, GLfloat x, GLfloat y, + GLfloat z) +{ + GET_CURRENT_CONTEXT( ctx ); + GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; + ATTR3F( attr, x, y, z ); +} + +static void GLAPIENTRY TAG(MultiTexCoord3fv)( GLenum target, const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; + ATTR3FV( attr, v ); +} + +static void GLAPIENTRY TAG(MultiTexCoord4f)( GLenum target, GLfloat x, GLfloat y, + GLfloat z, GLfloat w ) +{ + GET_CURRENT_CONTEXT( ctx ); + GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; + ATTR4F( attr, x, y, z, w ); +} + +static void GLAPIENTRY TAG(MultiTexCoord4fv)( GLenum target, const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; + ATTR4FV( attr, v ); +} + + +static void GLAPIENTRY TAG(VertexAttrib1fARB)( GLuint index, GLfloat x ) +{ + GET_CURRENT_CONTEXT( ctx ); + if (index == 0) + ATTR1F(0, x); + else if (index < MAX_VERTEX_ATTRIBS) + ATTR1F(BRW_ATTRIB_GENERIC0 + index, x); + else + ERROR(); +} + +static void GLAPIENTRY TAG(VertexAttrib1fvARB)( GLuint index, + const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + if (index == 0) + ATTR1FV(0, v); + else if (index < MAX_VERTEX_ATTRIBS) + ATTR1FV(BRW_ATTRIB_GENERIC0 + index, v); + else + ERROR(); +} + +static void GLAPIENTRY TAG(VertexAttrib2fARB)( GLuint index, GLfloat x, + GLfloat y ) +{ + GET_CURRENT_CONTEXT( ctx ); + if (index == 0) + ATTR2F(0, x, y); + else if (index < MAX_VERTEX_ATTRIBS) + ATTR2F(BRW_ATTRIB_GENERIC0 + index, x, y); + else + ERROR(); +} + +static void GLAPIENTRY TAG(VertexAttrib2fvARB)( GLuint index, + const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + if (index == 0) + ATTR2FV(0, v); + else if (index < MAX_VERTEX_ATTRIBS) + ATTR2FV(BRW_ATTRIB_GENERIC0 + index, v); + else + ERROR(); +} + +static void GLAPIENTRY TAG(VertexAttrib3fARB)( GLuint index, GLfloat x, + GLfloat y, GLfloat z ) +{ + GET_CURRENT_CONTEXT( ctx ); + if (index == 0) + ATTR3F(0, x, y, z); + else if (index < MAX_VERTEX_ATTRIBS) + ATTR3F(BRW_ATTRIB_GENERIC0 + index, x, y, z); + else + ERROR(); +} + +static void GLAPIENTRY TAG(VertexAttrib3fvARB)( GLuint index, + const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + if (index == 0) + ATTR3FV(0, v); + else if (index < MAX_VERTEX_ATTRIBS) + ATTR3FV(BRW_ATTRIB_GENERIC0 + index, v); + else + ERROR(); +} + +static void GLAPIENTRY TAG(VertexAttrib4fARB)( GLuint index, GLfloat x, + GLfloat y, GLfloat z, + GLfloat w ) +{ + GET_CURRENT_CONTEXT( ctx ); + if (index == 0) + ATTR4F(0, x, y, z, w); + else if (index < MAX_VERTEX_ATTRIBS) + ATTR4F(BRW_ATTRIB_GENERIC0 + index, x, y, z, w); + else + ERROR(); +} + +static void GLAPIENTRY TAG(VertexAttrib4fvARB)( GLuint index, + const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + if (index == 0) + ATTR4FV(0, v); + else if (index < MAX_VERTEX_ATTRIBS) + ATTR4FV(BRW_ATTRIB_GENERIC0 + index, v); + else + ERROR(); +} + + +/* Although we don't export NV_vertex_program, these entrypoints are + * used by the display list and other code specifically because of + * their property of aliasing with other attributes. + */ +static void GLAPIENTRY TAG(VertexAttrib1fNV)( GLuint index, GLfloat x ) +{ + GET_CURRENT_CONTEXT( ctx ); + if (index < BRW_ATTRIB_MAX) + ATTR1F(index, x); +} + +static void GLAPIENTRY TAG(VertexAttrib1fvNV)( GLuint index, + const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + if (index < BRW_ATTRIB_MAX) + ATTR1FV(index, v); +} + +static void GLAPIENTRY TAG(VertexAttrib2fNV)( GLuint index, GLfloat x, + GLfloat y ) +{ + GET_CURRENT_CONTEXT( ctx ); + if (index < BRW_ATTRIB_MAX) + ATTR2F(index, x, y); +} + +static void GLAPIENTRY TAG(VertexAttrib2fvNV)( GLuint index, + const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + if (index < BRW_ATTRIB_MAX) + ATTR2FV(index, v); +} + +static void GLAPIENTRY TAG(VertexAttrib3fNV)( GLuint index, GLfloat x, + GLfloat y, GLfloat z ) +{ + GET_CURRENT_CONTEXT( ctx ); + if (index < BRW_ATTRIB_MAX) + ATTR3F(index, x, y, z); +} + +static void GLAPIENTRY TAG(VertexAttrib3fvNV)( GLuint index, + const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + if (index < BRW_ATTRIB_MAX) + ATTR3FV(index, v); +} + +static void GLAPIENTRY TAG(VertexAttrib4fNV)( GLuint index, GLfloat x, + GLfloat y, GLfloat z, + GLfloat w ) +{ + GET_CURRENT_CONTEXT( ctx ); + if (index < BRW_ATTRIB_MAX) + ATTR4F(index, x, y, z, w); +} + +static void GLAPIENTRY TAG(VertexAttrib4fvNV)( GLuint index, + const GLfloat *v ) +{ + GET_CURRENT_CONTEXT( ctx ); + if (index < BRW_ATTRIB_MAX) + ATTR4FV(index, v); +} + + +#define MAT( ATTR, N, face, params ) \ +do { \ + if (face != GL_BACK) \ + MAT_ATTR( ATTR, N, params ); /* front */ \ + if (face != GL_FRONT) \ + MAT_ATTR( ATTR + 1, N, params ); /* back */ \ +} while (0) + + +/* Colormaterial conflicts are dealt with later. + */ +static void GLAPIENTRY TAG(Materialfv)( GLenum face, GLenum pname, + const GLfloat *params ) +{ + GET_CURRENT_CONTEXT( ctx ); + switch (pname) { + case GL_EMISSION: + MAT( BRW_ATTRIB_MAT_FRONT_EMISSION, 4, face, params ); + break; + case GL_AMBIENT: + MAT( BRW_ATTRIB_MAT_FRONT_AMBIENT, 4, face, params ); + break; + case GL_DIFFUSE: + MAT( BRW_ATTRIB_MAT_FRONT_DIFFUSE, 4, face, params ); + break; + case GL_SPECULAR: + MAT( BRW_ATTRIB_MAT_FRONT_SPECULAR, 4, face, params ); + break; + case GL_SHININESS: + MAT( BRW_ATTRIB_MAT_FRONT_SHININESS, 1, face, params ); + break; + case GL_COLOR_INDEXES: + MAT( BRW_ATTRIB_MAT_FRONT_INDEXES, 3, face, params ); + break; + case GL_AMBIENT_AND_DIFFUSE: + MAT( BRW_ATTRIB_MAT_FRONT_AMBIENT, 4, face, params ); + MAT( BRW_ATTRIB_MAT_FRONT_DIFFUSE, 4, face, params ); + break; + default: + ERROR(); + return; + } +} + + +#undef ATTR1FV +#undef ATTR2FV +#undef ATTR3FV +#undef ATTR4FV + +#undef ATTR1F +#undef ATTR2F +#undef ATTR3F +#undef ATTR4F + +#undef MAT +#undef MAT_ATTR diff --git a/src/mesa/drivers/dri/i965/brw_aub.c b/src/mesa/drivers/dri/i965/brw_aub.c new file mode 100644 index 00000000000..f851a5b7955 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_aub.c @@ -0,0 +1,353 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "brw_context.h" +#include "brw_aub.h" +#include "intel_regions.h" +#include <stdio.h> + +extern char *__progname; + + +/* Registers to control page table + */ +#define PGETBL_CTL 0x2020 +#define PGETBL_ENABLED 0x1 + +#define NR_GTT_ENTRIES 65536 /* 256 mb */ + +#define FAIL \ +do { \ + fprintf(stderr, "failed to write aub data at %s/%d\n", __FUNCTION__, __LINE__); \ + exit(1); \ +} while (0) + + +/* Emit the headers at the top of each aubfile. Initialize the GTT. + */ +static void init_aubfile( FILE *aub_file ) +{ + struct aub_file_header fh; + struct aub_block_header bh; + unsigned int data; + + static int nr; + + nr++; + + /* Emit the aub header: + */ + memset(&fh, 0, sizeof(fh)); + + fh.instruction_type = AUB_FILE_HEADER; + fh.minor = 0x0; + fh.major = 0x7; + memcpy(fh.application, __progname, sizeof(fh.application)); + fh.day = (nr>>24) & 0xff; + fh.month = 0x0; + fh.year = 0x0; + fh.timezone = 0x0; + fh.second = nr & 0xff; + fh.minute = (nr>>8) & 0xff; + fh.hour = (nr>>16) & 0xff; + fh.comment_length = 0x0; + + if (fwrite(&fh, sizeof(fh), 1, aub_file) < 0) + FAIL; + + /* Setup the GTT starting at main memory address zero (!): + */ + memset(&bh, 0, sizeof(bh)); + + bh.instruction_type = AUB_BLOCK_HEADER; + bh.operation = BH_MMI0_WRITE32; + bh.type = 0x0; + bh.address_space = ADDR_GTT; /* ??? */ + bh.general_state_type = 0x0; + bh.surface_state_type = 0x0; + bh.address = PGETBL_CTL; + bh.length = 0x4; + + if (fwrite(&bh, sizeof(bh), 1, aub_file) < 0) + FAIL; + + data = 0x0 | PGETBL_ENABLED; + + if (fwrite(&data, sizeof(data), 1, aub_file) < 0) + FAIL; +} + + +static void init_aub_gtt( struct brw_context *brw, + GLuint start_offset, + GLuint size ) +{ + FILE *aub_file = brw->intel.aub_file; + struct aub_block_header bh; + unsigned int i; + + assert(start_offset + size < NR_GTT_ENTRIES * 4096); + + + memset(&bh, 0, sizeof(bh)); + + bh.instruction_type = AUB_BLOCK_HEADER; + bh.operation = BH_DATA_WRITE; + bh.type = 0x0; + bh.address_space = ADDR_MAIN; + bh.general_state_type = 0x0; + bh.surface_state_type = 0x0; + bh.address = start_offset / 4096 * 4; + bh.length = size / 4096 * 4; + + if (fwrite(&bh, sizeof(bh), 1, aub_file) < 0) + FAIL; + + for (i = 0; i < size / 4096; i++) { + GLuint data = brw->next_free_page | 1; + + brw->next_free_page += 4096; + + if (fwrite(&data, sizeof(data), 1, aub_file) < 0) + FAIL; + } + +} + +static void write_block_header( FILE *aub_file, + struct aub_block_header *bh, + const GLuint *data, + GLuint sz ) +{ + sz = (sz + 3) & ~3; + + if (fwrite(bh, sizeof(*bh), 1, aub_file) < 0) + FAIL; + + if (fwrite(data, sz, 1, aub_file) < 0) + FAIL; + + fflush(aub_file); +} + + +static void write_dump_bmp( FILE *aub_file, + struct aub_dump_bmp *db ) +{ + if (fwrite(db, sizeof(*db), 1, aub_file) < 0) + FAIL; + + fflush(aub_file); +} + + + +static void brw_aub_gtt_data( struct intel_context *intel, + GLuint offset, + const void *data, + GLuint sz, + GLuint type, + GLuint state_type ) +{ + struct aub_block_header bh; + + bh.instruction_type = AUB_BLOCK_HEADER; + bh.operation = BH_DATA_WRITE; + bh.type = type; + bh.address_space = ADDR_GTT; + bh.pad0 = 0; + + if (type == DW_GENERAL_STATE) { + bh.general_state_type = state_type; + bh.surface_state_type = 0; + } + else { + bh.general_state_type = 0; + bh.surface_state_type = state_type; + } + + bh.pad1 = 0; + bh.address = offset; + bh.length = sz; + + write_block_header(intel->aub_file, &bh, data, sz); +} + + + +static void brw_aub_gtt_cmds( struct intel_context *intel, + GLuint offset, + const void *data, + GLuint sz ) +{ + struct brw_context *brw = brw_context(&intel->ctx); + struct aub_block_header bh; + GLuint type = CW_PRIMARY_RING_A; + + + bh.instruction_type = AUB_BLOCK_HEADER; + bh.operation = BH_COMMAND_WRITE; + bh.type = type; + bh.address_space = ADDR_GTT; + bh.pad0 = 0; + bh.general_state_type = 0; + bh.surface_state_type = 0; + bh.pad1 = 0; + bh.address = offset; + bh.length = sz; + + write_block_header(brw->intel.aub_file, &bh, data, sz); +} + +static void brw_aub_dump_bmp( struct intel_context *intel, + GLuint buffer ) +{ + struct brw_context *brw = brw_context(&intel->ctx); + intelScreenPrivate *intelScreen = brw->intel.intelScreen; + struct aub_dump_bmp db; + GLuint format; + + if (intelScreen->cpp == 4) + format = 0x7; + else + format = 0x3; + + + if (buffer == 0) { + db.instruction_type = AUB_DUMP_BMP; + db.xmin = 0; + db.ymin = 0; + db.format = format; + db.bpp = intelScreen->cpp * 8; + db.pitch = intelScreen->front.pitch / intelScreen->cpp; + db.xsize = intelScreen->width; + db.ysize = intelScreen->height; + db.addr = intelScreen->front.offset; + db.unknown = 0x0; /* 4: xmajor tiled, 0: not tiled */ + + write_dump_bmp(brw->intel.aub_file, &db); + } + else { + db.instruction_type = AUB_DUMP_BMP; + db.xmin = 0; + db.ymin = 0; + db.format = format; + db.bpp = intel->back_region->cpp * 8; + db.pitch = intel->back_region->pitch; + db.xsize = intel->back_region->pitch; + db.ysize = intel->back_region->height; + db.addr = intelScreen->back.offset; + db.unknown = intel->back_region->tiled ? 0x4 : 0x0; + + write_dump_bmp(brw->intel.aub_file, &db); + } +} + +/* Attempt to prevent monster aubfiles by closing and reopening when + * the state pools wrap. + */ +static void brw_aub_wrap( struct intel_context *intel ) +{ + struct brw_context *brw = brw_context(&intel->ctx); + if (intel->aub_file) { + brw_aub_destroy(brw); + brw_aub_init(brw); + } + brw->wrap = 1; /* ??? */ +} + + +int brw_aub_init( struct brw_context *brw ) +{ + struct intel_context *intel = &brw->intel; + intelScreenPrivate *intelScreen = intel->intelScreen; + char filename[80]; + int val; + static int i = 0; + + i++; + + if (_mesa_getenv("INTEL_REPLAY")) + return 0; + + if (_mesa_getenv("INTEL_AUBFILE")) { + val = snprintf(filename, sizeof(filename), "%s%d.aub", _mesa_getenv("INTEL_AUBFILE"), i%4); + _mesa_printf("--> Aub file: %s\n", filename); + brw->intel.aub_file = fopen(filename, "w"); + } + else if (_mesa_getenv("INTEL_AUB")) { + val = snprintf(filename, sizeof(filename), "%s.aub", __progname); + if (val < 0 || val > sizeof(filename)) + strcpy(filename, "default.aub"); + + _mesa_printf("--> Aub file: %s\n", filename); + brw->intel.aub_file = fopen(filename, "w"); + } + else { + return 0; + } + + if (!brw->intel.aub_file) { + _mesa_printf("couldn't open aubfile\n"); + exit(1); + } + + brw->intel.vtbl.aub_commands = brw_aub_gtt_cmds; + brw->intel.vtbl.aub_dump_bmp = brw_aub_dump_bmp; + brw->intel.vtbl.aub_gtt_data = brw_aub_gtt_data; + brw->intel.vtbl.aub_wrap = brw_aub_wrap; + + init_aubfile(brw->intel.aub_file); + + /* The GTT is located starting address zero in main memory. Pages + * to populate the gtt start after this point. + */ + brw->next_free_page = (NR_GTT_ENTRIES * 4 + 4095) & ~4095; + + /* More or less correspond with all the agp regions mapped by the + * driver: + */ + init_aub_gtt(brw, 0, 4096*4); /* so new fulsim doesn't crash */ + init_aub_gtt(brw, intelScreen->front.offset, intelScreen->back.size); + init_aub_gtt(brw, intelScreen->back.offset, intelScreen->back.size); + init_aub_gtt(brw, intelScreen->depth.offset, intelScreen->back.size); + init_aub_gtt(brw, intelScreen->tex.offset, intelScreen->tex.size); + + return 0; +} + +void brw_aub_destroy( struct brw_context *brw ) +{ + if (brw->intel.aub_file) { + fclose(brw->intel.aub_file); + brw->intel.aub_file = NULL; + } +} diff --git a/src/mesa/drivers/dri/i965/brw_aub.h b/src/mesa/drivers/dri/i965/brw_aub.h new file mode 100644 index 00000000000..198e36dc3c0 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_aub.h @@ -0,0 +1,172 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#ifndef BRW_AUB_H +#define BRW_AUB_H + +struct aub_file_header { + unsigned int instruction_type; + unsigned int pad0:16; + unsigned int minor:8; + unsigned int major:8; + unsigned char application[8*4]; + unsigned int day:8; + unsigned int month:8; + unsigned int year:16; + unsigned int timezone:8; + unsigned int second:8; + unsigned int minute:8; + unsigned int hour:8; + unsigned int comment_length:16; + unsigned int pad1:16; +}; + +struct aub_block_header { + unsigned int instruction_type; + unsigned int operation:8; + unsigned int type:8; + unsigned int address_space:8; + unsigned int pad0:8; + unsigned int general_state_type:8; + unsigned int surface_state_type:8; + unsigned int pad1:16; + unsigned int address; + unsigned int length; +}; + +struct aub_dump_bmp { + unsigned int instruction_type; + unsigned int xmin:16; + unsigned int ymin:16; + unsigned int pitch:16; + unsigned int bpp:8; + unsigned int format:8; + unsigned int xsize:16; + unsigned int ysize:16; + unsigned int addr; + unsigned int unknown; +}; + +enum bh_operation { + BH_COMMENT, + BH_DATA_WRITE, + BH_COMMAND_WRITE, + BH_MMI0_WRITE32, + BH_END_SCENE, + BH_CONFIG_MEMORY_MAP, + BH_MAX_OPERATION +}; + +enum command_write_type { + CW_HWB_RING = 1, + CW_PRIMARY_RING_A, + CW_PRIMARY_RING_B, /* XXX - disagreement with listaub! */ + CW_PRIMARY_RING_C, + CW_MAX_TYPE +}; + +enum data_write_type { + DW_NOTYPE, + DW_BATCH_BUFFER, + DW_BIN_BUFFER, + DW_BIN_POINTER_LIST, + DW_SLOW_STATE_BUFFER, + DW_VERTEX_BUFFER, + DW_2D_MAP, + DW_CUBE_MAP, + DW_INDIRECT_STATE_BUFFER, + DW_VOLUME_MAP, + DW_1D_MAP, + DW_CONSTANT_BUFFER, + DW_CONSTANT_URB_ENTRY, + DW_INDEX_BUFFER, + DW_GENERAL_STATE, + DW_SURFACE_STATE, + DW_MEDIA_OBJECT_INDIRECT_DATA, + DW_MAX_TYPE +}; + +enum data_write_general_state_type { + DWGS_NOTYPE, + DWGS_VERTEX_SHADER_STATE, + DWGS_GEOMETRY_SHADER_STATE , + DWGS_CLIPPER_STATE, + DWGS_STRIPS_FANS_STATE, + DWGS_WINDOWER_IZ_STATE, + DWGS_COLOR_CALC_STATE, + DWGS_CLIPPER_VIEWPORT_STATE, /* was 0x7 */ + DWGS_STRIPS_FANS_VIEWPORT_STATE, + DWGS_COLOR_CALC_VIEWPORT_STATE, /* was 0x9 */ + DWGS_SAMPLER_STATE, + DWGS_KERNEL_INSTRUCTIONS, + DWGS_SCRATCH_SPACE, + DWGS_SAMPLER_DEFAULT_COLOR, + DWGS_INTERFACE_DESCRIPTOR, + DWGS_VLD_STATE, + DWGS_VFE_STATE, + DWGS_MAX_TYPE +}; + +enum data_write_surface_state_type { + DWSS_NOTYPE, + DWSS_BINDING_TABLE_STATE, + DWSS_SURFACE_STATE, + DWSS_MAX_TYPE +}; + +enum memory_map_type { + MM_DEFAULT, + MM_DYNAMIC, + MM_MAX_TYPE +}; + +enum address_space { + ADDR_GTT, + ADDR_LOCAL, + ADDR_MAIN, + ADDR_MAX +}; + + +#define AUB_FILE_HEADER 0xe085000b +#define AUB_BLOCK_HEADER 0xe0c10003 +#define AUB_DUMP_BMP 0xe09e0004 + +struct brw_context; +struct intel_context; + +int brw_aub_init( struct brw_context *brw ); +void brw_aub_destroy( struct brw_context *brw ); + +int brw_playback_aubfile(struct brw_context *brw, + const char *filename); + +#endif diff --git a/src/mesa/drivers/dri/i965/brw_aub_playback.c b/src/mesa/drivers/dri/i965/brw_aub_playback.c new file mode 100644 index 00000000000..49cc967716e --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_aub_playback.c @@ -0,0 +1,443 @@ + +#include <stdio.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> + +#include "brw_aub.h" +#include "brw_defines.h" +#include "brw_context.h" +#include "intel_ioctl.h" +#include "bufmgr.h" + +struct aub_state { + struct intel_context *intel; + const char *map; + unsigned int csr; + unsigned int sz; +}; + + +static int gobble( struct aub_state *s, int size ) +{ + if (s->csr + size > s->sz) { + _mesa_printf("EOF in %s\n", __FUNCTION__); + return 1; + } + + s->csr += size; + return 0; +} + +static void flush_and_fence( struct aub_state *s ) +{ + struct intel_context *intel = s->intel; + GLuint buf[2]; + + buf[0] = intel->vtbl.flush_cmd(); + buf[1] = 0; + + intel_cmd_ioctl(intel, (char *)&buf, sizeof(buf), GL_TRUE); + + intelWaitIrq( intel, intelEmitIrqLocked( intel )); +} + +static void flush_cmds( struct aub_state *s, + const void *data, + int len ) +{ + DBG("%s %d\n", __FUNCTION__, len); + + if (len & 0x4) { + unsigned int *tmp = malloc(len + 4); + DBG("padding to octword\n"); + memcpy(tmp, data, len); + tmp[len/4] = MI_NOOP; + flush_cmds(s, tmp, len+4); + free(tmp); + return; + } + + /* For ring data, just send off immediately via an ioctl. + * This differs slightly from how the stream was executed + * initially as this would have been a batchbuffer. + */ + intel_cmd_ioctl(s->intel, (void *)data, len, GL_TRUE); + + if (1) + flush_and_fence(s); +} + +static const char *pstrings[] = { + "none", + "POINTLIST", + "LINELIST", + "LINESTRIP", + "TRILIST", + "TRISTRIP", + "TRIFAN", + "QUADLIST", + "QUADSTRIP", + "LINELIST_ADJ", + "LINESTRIP_ADJ", + "TRILIST_ADJ", + "TRISTRIP_ADJ", + "TRISTRIP_REVERSE", + "POLYGON", + "RECTLIST", + "LINELOOP", + "POINTLIST_BF", + "LINESTRIP_CONT", + "LINESTRIP_BF", + "LINESTRIP_CONT_BF", + "TRIFAN_NOSTIPPLE", +}; + +static void do_3d_prim( struct aub_state *s, + const void *data, + int len ) +{ + struct brw_3d_primitive prim; + const struct brw_3d_primitive *orig = data; + int i; + + assert(len == sizeof(prim)); + memcpy(&prim, data, sizeof(prim)); + +#define START 0 +#define BLOCK (12*28) + + if (orig->verts_per_instance < BLOCK) + flush_cmds(s, &prim, sizeof(prim)); + else { + for (i = START; i + BLOCK < orig->verts_per_instance; i += BLOCK/2) { + prim.start_vert_location = i; + prim.verts_per_instance = BLOCK; + _mesa_printf("%sprim %d/%s verts %d..%d (of %d)\n", + prim.header.indexed ? "INDEXED " : "", + prim.header.topology, pstrings[prim.header.topology%16], + prim.start_vert_location, + prim.start_vert_location + prim.verts_per_instance, + orig->verts_per_instance); + flush_cmds(s, &prim, sizeof(prim)); + } + } +} + + + +static struct { + int cmd; + const char *name; + int has_length; +} cmd_info[] = { + { 0, "NOOP", 0 }, + { 0x5410, "XY_COLOR_BLT_RGB", 1 }, + { 0x5430, "XY_COLOR_BLT_RGBA", 1 }, + { 0x54d0, "XY_SRC_COPY_BLT_RGB", 1 }, + { 0x54f0, "XY_SRC_COPY_BLT_RGBA", 1 }, + { CMD_URB_FENCE, "URB_FENCE", 1 }, + { CMD_CONST_BUFFER_STATE, "CONST_BUFFER_STATE", 1 }, + { CMD_CONST_BUFFER, "CONST_BUFFER", 1 }, + { CMD_STATE_BASE_ADDRESS, "STATE_BASE_ADDRESS", 1 }, + { CMD_STATE_INSN_POINTER, "STATE_INSN_POINTER", 1 }, + { CMD_PIPELINE_SELECT, "PIPELINE_SELECT", 0, }, + { CMD_PIPELINED_STATE_POINTERS, "PIPELINED_STATE_POINTERS", 1 }, + { CMD_BINDING_TABLE_PTRS, "BINDING_TABLE_PTRS", 1 }, + { CMD_VERTEX_BUFFER, "VERTEX_BUFFER", 1 }, + { CMD_VERTEX_ELEMENT, "VERTEX_ELEMENT", 1 }, + { CMD_INDEX_BUFFER, "INDEX_BUFFER", 1 }, + { CMD_VF_STATISTICS, "VF_STATISTICS", 0 }, + { CMD_DRAW_RECT, "DRAW_RECT", 1 }, + { CMD_BLEND_CONSTANT_COLOR, "BLEND_CONSTANT_COLOR", 1 }, + { CMD_CHROMA_KEY, "CHROMA_KEY", 1 }, + { CMD_DEPTH_BUFFER, "DEPTH_BUFFER", 1 }, + { CMD_POLY_STIPPLE_OFFSET, "POLY_STIPPLE_OFFSET", 1 }, + { CMD_POLY_STIPPLE_PATTERN, "POLY_STIPPLE_PATTERN", 1 }, + { CMD_LINE_STIPPLE_PATTERN, "LINE_STIPPLE_PATTERN", 1 }, + { CMD_GLOBAL_DEPTH_OFFSET_CLAMP, "GLOBAL_DEPTH_OFFSET_CLAMP", 1 }, + { CMD_PIPE_CONTROL, "PIPE_CONTROL", 1 }, + { CMD_MI_FLUSH, "MI_FLUSH", 0 }, + { CMD_3D_PRIM, "3D_PRIM", 1 }, +}; + +#define NR_CMDS (sizeof(cmd_info)/sizeof(cmd_info[0])) + + +static int find_command( unsigned int cmd ) +{ + int i; + + for (i = 0; i < NR_CMDS; i++) + if (cmd == cmd_info[i].cmd) + return i; + + return -1; +} + + + +static int parse_commands( struct aub_state *s, + const unsigned int *data, + int len ) +{ + while (len) { + int cmd = data[0] >> 16; + int dwords; + int i; + + i = find_command(cmd); + + if (i < 0) { + _mesa_printf("couldn't find info for cmd %x\n", cmd); + return 1; + } + + if (cmd_info[i].has_length) + dwords = (data[0] & 0xff) + 2; + else + dwords = 1; + + _mesa_printf("%s (%d dwords) 0x%x\n", cmd_info[i].name, dwords, data[0]); + + if (len < dwords * 4) { + _mesa_printf("EOF in %s (%d bytes)\n", __FUNCTION__, len); + return 1; + } + + + if (0 && cmd == CMD_3D_PRIM) + do_3d_prim(s, data, dwords * 4); + else + flush_cmds(s, data, dwords * 4); + + data += dwords; + len -= dwords * 4; + } + + return 0; +} + + + +static void parse_data_write( struct aub_state *s, + const struct aub_block_header *bh, + void *dest, + const unsigned int *data, + int len ) +{ + switch (bh->type) { + case DW_GENERAL_STATE: + switch (bh->general_state_type) { + case DWGS_VERTEX_SHADER_STATE: { + struct brw_vs_unit_state vs; + assert(len == sizeof(vs)); + + _mesa_printf("DWGS_VERTEX_SHADER_STATE\n"); + memcpy(&vs, data, sizeof(vs)); + +/* vs.vs6.vert_cache_disable = 1; */ +/* vs.thread4.max_threads = 4; */ + + memcpy(dest, &vs, sizeof(vs)); + return; + } + case DWGS_CLIPPER_STATE: { + struct brw_clip_unit_state clip; + assert(len == sizeof(clip)); + + _mesa_printf("DWGS_CLIPPER_STATE\n"); + memcpy(&clip, data, sizeof(clip)); + +/* clip.thread4.max_threads = 0; */ +/* clip.clip5.clip_mode = BRW_CLIPMODE_REJECT_ALL; */ + + memcpy(dest, &clip, sizeof(clip)); + return; + } + + case DWGS_NOTYPE: + case DWGS_GEOMETRY_SHADER_STATE: + case DWGS_STRIPS_FANS_STATE: + break; + + case DWGS_WINDOWER_IZ_STATE: { + struct brw_wm_unit_state wm; + assert(len == sizeof(wm)); + + _mesa_printf("DWGS_WINDOWER_IZ_STATE\n"); + memcpy(&wm, data, sizeof(wm)); + +/* wm.wm5.max_threads = 10; */ + + memcpy(dest, &wm, sizeof(wm)); + return; + } + + case DWGS_COLOR_CALC_STATE: + case DWGS_CLIPPER_VIEWPORT_STATE: + case DWGS_STRIPS_FANS_VIEWPORT_STATE: + case DWGS_COLOR_CALC_VIEWPORT_STATE: + case DWGS_SAMPLER_STATE: + case DWGS_KERNEL_INSTRUCTIONS: + case DWGS_SCRATCH_SPACE: + case DWGS_SAMPLER_DEFAULT_COLOR: + case DWGS_INTERFACE_DESCRIPTOR: + case DWGS_VLD_STATE: + case DWGS_VFE_STATE: + default: + break; + } + break; + case DW_SURFACE_STATE: + break; + case DW_1D_MAP: + case DW_2D_MAP: + case DW_CUBE_MAP: + case DW_VOLUME_MAP: + case DW_CONSTANT_BUFFER: + case DW_CONSTANT_URB_ENTRY: + case DW_VERTEX_BUFFER: + case DW_INDEX_BUFFER: + default: + break; + } + + memcpy(dest, data, len); +} + + +/* In order to work, the memory layout has to be the same as the X + * server which created the aubfile. + */ +static int parse_block_header( struct aub_state *s ) +{ + struct aub_block_header *bh = (struct aub_block_header *)(s->map + s->csr); + void *data = (void *)(bh + 1); + unsigned int len = (bh->length + 3) & ~3; + + _mesa_printf("block header at 0x%x\n", s->csr); + + if (s->csr + len + sizeof(*bh) > s->sz) { + _mesa_printf("EOF in data in %s\n", __FUNCTION__); + return 1; + } + + if (bh->address_space == ADDR_GTT) { + + switch (bh->operation) + { + case BH_DATA_WRITE: { + void *dest = bmFindVirtual( s->intel, bh->address, len ); + if (dest == NULL) { + _mesa_printf("Couldn't find virtual address for offset %x\n", bh->address); + return 1; + } + +#if 1 + parse_data_write(s, bh, dest, data, len); +#else + memcpy(dest, data, len); +#endif + break; + } + case BH_COMMAND_WRITE: +#if 0 + intel_cmd_ioctl(s->intel, (void *)data, len, GL_TRUE); +#else + if (parse_commands(s, data, len) != 0) + _mesa_printf("parse_commands failed\n"); +#endif + break; + default: + break; + } + } + + s->csr += sizeof(*bh) + len; + return 0; +} + + +#define AUB_FILE_HEADER 0xe085000b +#define AUB_BLOCK_HEADER 0xe0c10003 +#define AUB_DUMP_BMP 0xe09e0004 + +int brw_playback_aubfile(struct brw_context *brw, + const char *filename) +{ + struct intel_context *intel = &brw->intel; + struct aub_state state; + struct stat sb; + int fd; + int retval = 0; + + state.intel = intel; + + fd = open(filename, O_RDONLY, 0); + if (fd < 0) { + _mesa_printf("couldn't open aubfile: %s\n", filename); + return 1; + } + + if (fstat(fd, &sb) != 0) { + _mesa_printf("couldn't open %s\n", filename); + return 1; + } + + state.csr = 0; + state.sz = sb.st_size; + state.map = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + + if (state.map == NULL) { + _mesa_printf("couldn't mmap %s\n", filename); + return 1; + } + + LOCK_HARDWARE(intel); + { + /* Make sure we don't confuse anything that might happen to be + * going on with the hardware: + */ +/* bmEvictAll(intel); */ +/* intel->vtbl.lost_hardware(intel); */ + + + /* Replay the aubfile item by item: + */ + while (retval == 0 && + state.csr != state.sz) { + unsigned int insn = *(unsigned int *)(state.map + state.csr); + + switch (insn) { + case AUB_FILE_HEADER: + retval = gobble(&state, sizeof(struct aub_file_header)); + break; + + case AUB_BLOCK_HEADER: + retval = parse_block_header(&state); + break; + + case AUB_DUMP_BMP: + retval = gobble(&state, sizeof(struct aub_dump_bmp)); + break; + + default: + _mesa_printf("unknown instruction %x\n", insn); + retval = 1; + break; + } + } + } + UNLOCK_HARDWARE(intel); + return retval; +} + + + + + + + diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c new file mode 100644 index 00000000000..84a02f8a8c9 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -0,0 +1,172 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_util.h" +#include "enums.h" + +static void upload_cc_vp( struct brw_context *brw ) +{ + struct brw_cc_viewport ccv; + + memset(&ccv, 0, sizeof(ccv)); + + ccv.min_depth = 0.0; + ccv.max_depth = 1.0; + + brw->cc.vp_gs_offset = brw_cache_data( &brw->cache[BRW_CC_VP], &ccv ); +} + +const struct brw_tracked_state brw_cc_vp = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CONTEXT, + .cache = 0 + }, + .update = upload_cc_vp +}; + + +static void upload_cc_unit( struct brw_context *brw ) +{ + struct brw_cc_unit_state cc; + + memset(&cc, 0, sizeof(cc)); + + /* _NEW_STENCIL */ + if (brw->attribs.Stencil->Enabled) { + cc.cc0.stencil_enable = brw->attribs.Stencil->Enabled; + cc.cc0.stencil_func = intel_translate_compare_func(brw->attribs.Stencil->Function[0]); + cc.cc0.stencil_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->FailFunc[0]); + cc.cc0.stencil_pass_depth_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->ZFailFunc[0]); + cc.cc0.stencil_pass_depth_pass_op = intel_translate_stencil_op(brw->attribs.Stencil->ZPassFunc[0]); + cc.cc1.stencil_ref = brw->attribs.Stencil->Ref[0]; + cc.cc1.stencil_write_mask = brw->attribs.Stencil->WriteMask[0]; + cc.cc1.stencil_test_mask = brw->attribs.Stencil->ValueMask[0]; + + if (brw->attribs.Stencil->TestTwoSide) { + cc.cc0.bf_stencil_enable = brw->attribs.Stencil->TestTwoSide; + cc.cc0.bf_stencil_func = intel_translate_compare_func(brw->attribs.Stencil->Function[1]); + cc.cc0.bf_stencil_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->FailFunc[1]); + cc.cc0.bf_stencil_pass_depth_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->ZFailFunc[1]); + cc.cc0.bf_stencil_pass_depth_pass_op = intel_translate_stencil_op(brw->attribs.Stencil->ZPassFunc[1]); + cc.cc1.bf_stencil_ref = brw->attribs.Stencil->Ref[1]; + cc.cc2.bf_stencil_write_mask = brw->attribs.Stencil->WriteMask[1]; + cc.cc2.bf_stencil_test_mask = brw->attribs.Stencil->ValueMask[1]; + } + + /* Not really sure about this: + */ + if (brw->attribs.Stencil->WriteMask[0] || + (brw->attribs.Stencil->TestTwoSide && brw->attribs.Stencil->WriteMask[1])) + cc.cc0.stencil_write_enable = 1; + } + + /* _NEW_COLOR */ + if (brw->attribs.Color->_LogicOpEnabled) { + cc.cc2.logicop_enable = 1; + cc.cc5.logicop_func = intel_translate_logic_op( brw->attribs.Color->LogicOp ); + } + else if (brw->attribs.Color->BlendEnabled) { + GLenum eqRGB = brw->attribs.Color->BlendEquationRGB; + GLenum eqA = brw->attribs.Color->BlendEquationA; + GLenum srcRGB = brw->attribs.Color->BlendSrcRGB; + GLenum dstRGB = brw->attribs.Color->BlendDstRGB; + GLenum srcA = brw->attribs.Color->BlendSrcA; + GLenum dstA = brw->attribs.Color->BlendDstA; + + if (eqRGB == GL_MIN || eqRGB == GL_MAX) { + srcRGB = dstRGB = GL_ONE; + } + + if (eqA == GL_MIN || eqA == GL_MAX) { + srcA = dstA = GL_ONE; + } + + cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); + cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); + cc.cc6.blend_function = brw_translate_blend_equation( eqRGB ); + + cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); + cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); + cc.cc5.ia_blend_function = brw_translate_blend_equation( eqA ); + + cc.cc3.blend_enable = 1; + cc.cc3.ia_blend_enable = (srcA != srcRGB || + dstA != dstRGB || + eqA != eqRGB); + } + + if (brw->attribs.Color->AlphaEnabled) { + cc.cc3.alpha_test = 1; + cc.cc3.alpha_test_func = intel_translate_compare_func(brw->attribs.Color->AlphaFunc); + + UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], brw->attribs.Color->AlphaRef); + + cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; + } + + if (brw->attribs.Color->DitherFlag) { + cc.cc5.dither_enable = 1; + cc.cc6.y_dither_offset = 0; + cc.cc6.x_dither_offset = 0; + } + + /* _NEW_DEPTH */ + if (brw->attribs.Depth->Test) { + cc.cc2.depth_test = brw->attribs.Depth->Test; + cc.cc2.depth_test_function = intel_translate_compare_func(brw->attribs.Depth->Func); + cc.cc2.depth_write_enable = brw->attribs.Depth->Mask; + } + + /* CACHE_NEW_CC_VP */ + cc.cc4.cc_viewport_state_offset = brw->cc.vp_gs_offset >> 5; + + if (INTEL_DEBUG & DEBUG_STATS) + cc.cc5.statistics_enable = 1; + + brw->cc.state_gs_offset = brw_cache_data( &brw->cache[BRW_CC_UNIT], &cc ); +} + +const struct brw_tracked_state brw_cc_unit = { + .dirty = { + .mesa = _NEW_STENCIL | _NEW_COLOR | _NEW_DEPTH, + .brw = 0, + .cache = CACHE_NEW_CC_VP + }, + .update = upload_cc_unit +}; + + + diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c new file mode 100644 index 00000000000..0e8591aaa8d --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -0,0 +1,264 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" + +#include "intel_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_state.h" +#include "brw_clip.h" + + +#define FRONT_UNFILLED_BIT 0x1 +#define BACK_UNFILLED_BIT 0x2 + + +static void compile_clip_prog( struct brw_context *brw, + struct brw_clip_prog_key *key ) +{ + struct brw_clip_compile c; + const GLuint *program; + GLuint program_size; + GLuint delta; + GLuint i; + + memset(&c, 0, sizeof(c)); + + /* Begin the compilation: + */ + brw_init_compile(&c.func); + + c.key = *key; + + + /* Need to locate the two positions present in vertex + header. + * These are currently hardcoded: + */ + c.header_position_offset = ATTR_SIZE; + + for (i = 0, delta = REG_SIZE; i < VERT_RESULT_MAX; i++) + if (c.key.attrs & (1<<i)) { + c.offset[i] = delta; + delta += ATTR_SIZE; + } + + c.nr_attrs = brw_count_bits(c.key.attrs); + c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + c.nr_bytes = c.nr_regs * REG_SIZE; + + c.prog_data.clip_mode = c.key.clip_mode; /* XXX */ + + /* For some reason the thread is spawned with only 4 channels + * unmasked. + */ + brw_set_mask_control(&c.func, BRW_MASK_DISABLE); + + + /* Would ideally have the option of producing a program which could + * do all three: + */ + switch (key->primitive) { + case GL_TRIANGLES: + if (key->do_unfilled) + brw_emit_unfilled_clip( &c ); + else + brw_emit_tri_clip( &c ); + break; + case GL_LINES: + brw_emit_line_clip( &c ); + break; + case GL_POINTS: + brw_emit_point_clip( &c ); + break; + default: + assert(0); + return; + } + + + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + /* Upload + */ + brw->clip.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_CLIP_PROG], + &c.key, + sizeof(c.key), + program, + program_size, + &c.prog_data, + &brw->clip.prog_data ); +} + + +static GLboolean search_cache( struct brw_context *brw, + struct brw_clip_prog_key *key ) +{ + return brw_search_cache(&brw->cache[BRW_CLIP_PROG], + key, sizeof(*key), + &brw->clip.prog_data, + &brw->clip.prog_gs_offset); +} + + + + +/* Calculate interpolants for triangle and line rasterization. + */ +static void upload_clip_prog( struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + struct brw_clip_prog_key key; + + memset(&key, 0, sizeof(key)); + + /* Populate the key: + */ + /* BRW_NEW_REDUCED_PRIMITIVE */ + key.primitive = brw->intel.reduced_primitive; + /* CACHE_NEW_VS_PROG */ + key.attrs = brw->vs.prog_data->outputs_written; + /* _NEW_LIGHT */ + key.do_flat_shading = (brw->attribs.Light->ShadeModel == GL_FLAT); + /* _NEW_TRANSFORM */ + key.nr_userclip = brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled); + key.clip_mode = BRW_CLIPMODE_NORMAL; + + /* _NEW_POLYGON */ + if (key.primitive == GL_TRIANGLES) { + if (brw->attribs.Polygon->CullFaceMode == GL_FRONT_AND_BACK) + key.clip_mode = BRW_CLIPMODE_REJECT_ALL; + else { + GLuint fill_front = CLIP_CULL; + GLuint fill_back = CLIP_CULL; + GLuint offset_front = 0; + GLuint offset_back = 0; + + if (!brw->attribs.Polygon->CullFlag || + brw->attribs.Polygon->CullFaceMode != GL_FRONT) { + switch (brw->attribs.Polygon->FrontMode) { + case GL_FILL: + fill_front = CLIP_FILL; + offset_front = 0; + break; + case GL_LINE: + key.do_unfilled = 1; + fill_front = CLIP_LINE; + offset_front = brw->attribs.Polygon->OffsetLine; + break; + case GL_POINT: + key.do_unfilled = 1; + fill_front = CLIP_POINT; + offset_front = brw->attribs.Polygon->OffsetPoint; + break; + } + } + + if (!brw->attribs.Polygon->CullFlag || + brw->attribs.Polygon->CullFaceMode != GL_BACK) { + switch (brw->attribs.Polygon->BackMode) { + case GL_FILL: + fill_back = CLIP_FILL; + offset_back = 0; + break; + case GL_LINE: + key.do_unfilled = 1; + fill_back = CLIP_LINE; + offset_back = brw->attribs.Polygon->OffsetLine; + break; + case GL_POINT: + key.do_unfilled = 1; + fill_back = CLIP_POINT; + offset_back = brw->attribs.Polygon->OffsetPoint; + break; + } + } + + /* Most cases the fixed function units will handle. Cases where + * one or more polygon faces are unfilled will require help: + */ + if (key.do_unfilled) { + key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; + + if (offset_back || offset_front) { + /* _NEW_POLYGON, _NEW_BUFFERS */ + key.offset_units = brw->attribs.Polygon->OffsetUnits * brw->intel.polygon_offset_scale; + key.offset_factor = brw->attribs.Polygon->OffsetFactor * ctx->DrawBuffer->_MRD; + } + + switch (brw->attribs.Polygon->FrontFace) { + case GL_CCW: + key.fill_ccw = fill_front; + key.fill_cw = fill_back; + key.offset_ccw = offset_front; + key.offset_cw = offset_back; + if (brw->attribs.Light->Model.TwoSide && + key.fill_cw != CLIP_CULL) + key.copy_bfc_cw = 1; + break; + case GL_CW: + key.fill_cw = fill_front; + key.fill_ccw = fill_back; + key.offset_cw = offset_front; + key.offset_ccw = offset_back; + if (brw->attribs.Light->Model.TwoSide && + key.fill_ccw != CLIP_CULL) + key.copy_bfc_ccw = 1; + break; + } + } + } + } + + if (!search_cache(brw, &key)) + compile_clip_prog( brw, &key ); +} + + +const struct brw_tracked_state brw_clip_prog = { + .dirty = { + .mesa = (_NEW_LIGHT | + _NEW_TRANSFORM | + _NEW_POLYGON | + _NEW_BUFFERS), + .brw = (BRW_NEW_REDUCED_PRIMITIVE), + .cache = CACHE_NEW_VS_PROG + }, + .update = upload_clip_prog +}; diff --git a/src/mesa/drivers/dri/i965/brw_clip.h b/src/mesa/drivers/dri/i965/brw_clip.h new file mode 100644 index 00000000000..c3967c8c614 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_clip.h @@ -0,0 +1,170 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#ifndef BRW_CLIP_H +#define BRW_CLIP_H + + +#include "brw_context.h" +#include "brw_eu.h" + +#define MAX_VERTS (3+6+6) + +/* Note that if unfilled primitives are being emitted, we have to fix + * up polygon offset and flatshading at this point: + */ +struct brw_clip_prog_key { + GLuint attrs:16; + GLuint primitive:4; + GLuint nr_userclip:3; + GLuint do_flat_shading:1; + GLuint do_unfilled:1; + GLuint fill_cw:2; /* includes cull information */ + GLuint fill_ccw:2; /* includes cull information */ + GLuint offset_cw:1; + GLuint offset_ccw:1; + GLuint pad0:1; + + GLuint copy_bfc_cw:1; + GLuint copy_bfc_ccw:1; + GLuint clip_mode:3; + GLuint pad1:27; + + GLfloat offset_factor; + GLfloat offset_units; +}; + + +#define CLIP_LINE 0 +#define CLIP_POINT 1 +#define CLIP_FILL 2 +#define CLIP_CULL 3 + + +#define PRIM_MASK (0x1f) + +struct brw_clip_compile { + struct brw_compile func; + struct brw_clip_prog_key key; + struct brw_clip_prog_data prog_data; + + struct { + struct brw_reg R0; + struct brw_reg vertex[MAX_VERTS]; + + struct brw_reg t; + struct brw_reg t0, t1; + struct brw_reg dp0, dp1; + + struct brw_reg dpPrev; + struct brw_reg dp; + struct brw_reg loopcount; + struct brw_reg nr_verts; + struct brw_reg planemask; + + struct brw_reg inlist; + struct brw_reg outlist; + struct brw_reg freelist; + + struct brw_reg dir; + struct brw_reg tmp0, tmp1; + struct brw_reg offset; + + struct brw_reg fixed_planes; + struct brw_reg plane_equation; + } reg; + + /* 3 different ways of expressing vertex size: + */ + GLuint nr_attrs; + GLuint nr_regs; + GLuint nr_bytes; + + GLuint first_tmp; + GLuint last_tmp; + + GLboolean need_direction; + + GLuint last_mrf; + + GLuint header_position_offset; + GLuint offset[BRW_ATTRIB_MAX]; +}; + +#define ATTR_SIZE (4*4) + +/* Points are only culled, so no need for a clip routine, however it + * works out easier to have a dummy one. + */ +void brw_emit_unfilled_clip( struct brw_clip_compile *c ); +void brw_emit_tri_clip( struct brw_clip_compile *c ); +void brw_emit_line_clip( struct brw_clip_compile *c ); +void brw_emit_point_clip( struct brw_clip_compile *c ); + +/* brw_clip_tri.c, for use by the unfilled clip routine: + */ +void brw_clip_tri_init_vertices( struct brw_clip_compile *c ); +void brw_clip_tri_flat_shade( struct brw_clip_compile *c ); +void brw_clip_tri( struct brw_clip_compile *c ); +void brw_clip_tri_emit_polygon( struct brw_clip_compile *c ); +void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, + GLuint nr_verts ); + + +/* Utils: + */ + +void brw_clip_interp_vertex( struct brw_clip_compile *c, + struct brw_indirect dest_ptr, + struct brw_indirect v0_ptr, /* from */ + struct brw_indirect v1_ptr, /* to */ + struct brw_reg t0, + GLboolean force_edgeflag ); + +void brw_clip_init_planes( struct brw_clip_compile *c ); + +void brw_clip_emit_vue(struct brw_clip_compile *c, + struct brw_indirect vert, + GLboolean allocate, + GLboolean eot, + GLuint header); + +void brw_clip_kill_thread(struct brw_clip_compile *c); + +struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ); +struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ); + +void brw_clip_copy_colors( struct brw_clip_compile *c, + GLuint to, GLuint from ); + +void brw_clip_init_clipmask( struct brw_clip_compile *c ); + +#endif diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c new file mode 100644 index 00000000000..83182270eac --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_clip_line.c @@ -0,0 +1,233 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" + +#include "shader/program.h" +#include "intel_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + +static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) +{ + GLuint i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + if (c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec4_grf(i, 0); + i += (6 + c->key.nr_userclip + 1) / 2; + + c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; + } + else + c->prog_data.curb_read_length = 0; + + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < 4; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + c->reg.t = brw_vec1_grf(i, 0); + c->reg.t0 = brw_vec1_grf(i, 1); + c->reg.t1 = brw_vec1_grf(i, 2); + c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); + c->reg.plane_equation = brw_vec4_grf(i, 4); + i++; + + c->reg.dp0 = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ + c->reg.dp1 = brw_vec1_grf(i, 4); + i++; + + if (!c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec8_grf(i, 0); + i++; + } + + + c->first_tmp = i; + c->last_tmp = i; + + c->prog_data.urb_read_length = c->nr_regs; /* ? */ + c->prog_data.total_grf = i; +} + + + +/* Line clipping, more or less following the following algorithm: + * + * for (p=0;p<MAX_PLANES;p++) { + * if (clipmask & (1 << p)) { + * GLfloat dp0 = DOTPROD( vtx0, plane[p] ); + * GLfloat dp1 = DOTPROD( vtx1, plane[p] ); + * + * if (IS_NEGATIVE(dp1)) { + * GLfloat t = dp1 / (dp1 - dp0); + * if (t > t1) t1 = t; + * } else { + * GLfloat t = dp0 / (dp0 - dp1); + * if (t > t0) t0 = t; + * } + * + * if (t0 + t1 >= 1.0) + * return; + * } + * } + * + * interp( ctx, newvtx0, vtx0, vtx1, t0 ); + * interp( ctx, newvtx1, vtx1, vtx0, t1 ); + * + */ +static void clip_and_emit_line( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_indirect vtx0 = brw_indirect(0, 0); + struct brw_indirect vtx1 = brw_indirect(1, 0); + struct brw_indirect newvtx0 = brw_indirect(2, 0); + struct brw_indirect newvtx1 = brw_indirect(3, 0); + struct brw_indirect plane_ptr = brw_indirect(4, 0); + struct brw_instruction *plane_loop; + struct brw_instruction *plane_active; + struct brw_instruction *is_negative; + struct brw_instruction *not_culled; + struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); + + brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0])); + brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1])); + brw_MOV(p, get_addr_reg(newvtx0), brw_address(c->reg.vertex[2])); + brw_MOV(p, get_addr_reg(newvtx1), brw_address(c->reg.vertex[3])); + brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); + + /* Note: init t0, t1 together: + */ + brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0)); + + brw_clip_init_planes(c); + brw_clip_init_clipmask(c); + + plane_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* if (planemask & 1) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1)); + + plane_active = brw_IF(p, BRW_EXECUTE_1); + { + if (c->key.nr_userclip) + brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); + else + brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); + + /* dp = DP4(vtx->position, plane) + */ + brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + + /* if (IS_NEGATIVE(dp1)) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + is_negative = brw_IF(p, BRW_EXECUTE_1); + { + brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 ); + brw_MOV(p, c->reg.t1, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + is_negative = brw_ELSE(p, is_negative); + { + /* Coming back in. We know that both cannot be negative + * because the line would have been culled in that case. + */ + brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); + brw_MOV(p, c->reg.t0, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + brw_ENDIF(p, is_negative); + } + brw_ENDIF(p, plane_active); + + /* plane_ptr++; + */ + brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); + + /* while (planemask>>=1) != 0 + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); + } + brw_WHILE(p, plane_loop); + + brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1); + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0)); + not_culled = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, GL_FALSE); + brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, GL_FALSE); + + brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); + brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); + } + brw_ENDIF(p, not_culled); + brw_clip_kill_thread(c); +} + + + +void brw_emit_line_clip( struct brw_clip_compile *c ) +{ + brw_clip_line_alloc_regs(c); + + if (c->key.do_flat_shading) + brw_clip_copy_colors(c, 0, 1); + + clip_and_emit_line(c); +} diff --git a/src/mesa/drivers/dri/i965/brw_clip_point.c b/src/mesa/drivers/dri/i965/brw_clip_point.c new file mode 100644 index 00000000000..2346980a562 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_clip_point.c @@ -0,0 +1,54 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" + +#include "shader/program.h" +#include "intel_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + +/* Point clipping, nothing to do? + */ +void brw_emit_point_clip( struct brw_clip_compile *c ) +{ + /* Send an empty message to kill the thread: + */ + brw_clip_tri_alloc_regs(c, 0); + brw_clip_kill_thread(c); +} diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c new file mode 100644 index 00000000000..1e6d6fa1762 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -0,0 +1,93 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "macros.h" + + + +static void upload_clip_unit( struct brw_context *brw ) +{ + struct brw_clip_unit_state clip; + + memset(&clip, 0, sizeof(clip)); + + /* CACHE_NEW_CLIP_PROG */ + clip.thread0.grf_reg_count = ((brw->clip.prog_data->total_grf-1) & ~15) / 16; + clip.thread0.kernel_start_pointer = brw->clip.prog_gs_offset >> 6; + clip.thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length; + clip.thread3.const_urb_entry_read_length = brw->clip.prog_data->curb_read_length; + clip.clip5.clip_mode = brw->clip.prog_data->clip_mode; + + /* BRW_NEW_CURBE_OFFSETS */ + clip.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; + + /* BRW_NEW_URB_FENCE */ + clip.thread4.nr_urb_entries = brw->urb.nr_clip_entries; + clip.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; + clip.thread4.max_threads = 0; /* Hmm, maybe the max is 1 or 2 threads */ + + if (INTEL_DEBUG & DEBUG_STATS) + clip.thread4.stats_enable = 1; + + /* CONSTANT */ + clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + clip.thread1.single_program_flow = 1; + clip.thread3.dispatch_grf_start_reg = 1; + clip.thread3.urb_entry_read_offset = 0; + clip.clip5.userclip_enable_flags = 0x7f; + clip.clip5.userclip_must_clip = 1; + clip.clip5.guard_band_enable = 0; + clip.clip5.viewport_z_clip_enable = 1; + clip.clip5.viewport_xy_clip_enable = 1; + clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE; + clip.clip5.api_mode = BRW_CLIP_API_OGL; + clip.clip6.clipper_viewport_state_ptr = 0; + clip.viewport_xmin = -1; + clip.viewport_xmax = 1; + clip.viewport_ymin = -1; + clip.viewport_ymax = 1; + + brw->clip.state_gs_offset = brw_cache_data( &brw->cache[BRW_CLIP_UNIT], &clip ); +} + + +const struct brw_tracked_state brw_clip_unit = { + .dirty = { + .mesa = 0, + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE), + .cache = CACHE_NEW_CLIP_PROG + }, + .update = upload_clip_unit +}; diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c new file mode 100644 index 00000000000..f62b02cedfd --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c @@ -0,0 +1,467 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" + +#include "shader/program.h" +#include "intel_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + +void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, + GLuint nr_verts ) +{ + GLuint i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + if (c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec4_grf(i, 0); + i += (6 + c->key.nr_userclip + 1) / 2; + + c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; + } + else + c->prog_data.curb_read_length = 0; + + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < nr_verts; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + if (c->nr_attrs & 1) { + for (j = 0; j < 3; j++) { + GLuint delta = c->nr_attrs*16 + 32; + brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); + } + } + + c->reg.t = brw_vec1_grf(i, 0); + c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_UD); + c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD); + c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); + c->reg.plane_equation = brw_vec4_grf(i, 4); + i++; + + c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ + c->reg.dp = brw_vec1_grf(i, 4); + i++; + + c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + if (!c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec8_grf(i, 0); + i++; + } + + if (c->key.do_unfilled) { + c->reg.dir = brw_vec4_grf(i, 0); + c->reg.offset = brw_vec4_grf(i, 4); + i++; + c->reg.tmp0 = brw_vec4_grf(i, 0); + c->reg.tmp1 = brw_vec4_grf(i, 4); + i++; + } + + c->first_tmp = i; + c->last_tmp = i; + + c->prog_data.urb_read_length = c->nr_regs; /* ? */ + c->prog_data.total_grf = i; +} + + + +void brw_clip_tri_init_vertices( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ + struct brw_instruction *is_rev; + + /* Initial list of indices for incoming vertexes: + */ + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); + + /* XXX: Is there an easier way to do this? Need to reverse every + * second tristrip element: Can ignore sometimes? + */ + is_rev = brw_IF(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[1]) ); + brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[0]) ); + if (c->need_direction) + brw_MOV(p, c->reg.dir, brw_imm_f(-1)); + } + is_rev = brw_ELSE(p, is_rev); + { + brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[0]) ); + brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[1]) ); + if (c->need_direction) + brw_MOV(p, c->reg.dir, brw_imm_f(1)); + } + brw_ENDIF(p, is_rev); + + brw_MOV(p, get_element(c->reg.inlist, 2), brw_address(c->reg.vertex[2]) ); + brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0)); + brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3)); +} + + + +void brw_clip_tri_flat_shade( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *is_poly; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ + + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_POLYGON)); + + is_poly = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_copy_colors(c, 1, 0); + brw_clip_copy_colors(c, 2, 0); + } + is_poly = brw_ELSE(p, is_poly); + { + brw_clip_copy_colors(c, 0, 2); + brw_clip_copy_colors(c, 1, 2); + } + brw_ENDIF(p, is_poly); +} + + + +/* Use mesa's clipping algorithms, translated to GEN4 assembly. + */ +void brw_clip_tri( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_indirect vtx = brw_indirect(0, 0); + struct brw_indirect vtxPrev = brw_indirect(1, 0); + struct brw_indirect vtxOut = brw_indirect(2, 0); + struct brw_indirect plane_ptr = brw_indirect(3, 0); + struct brw_indirect inlist_ptr = brw_indirect(4, 0); + struct brw_indirect outlist_ptr = brw_indirect(5, 0); + struct brw_indirect freelist_ptr = brw_indirect(6, 0); + struct brw_instruction *plane_loop; + struct brw_instruction *plane_active; + struct brw_instruction *vertex_loop; + struct brw_instruction *next_test; + struct brw_instruction *prev_test; + + brw_MOV(p, get_addr_reg(vtxPrev), brw_address(c->reg.vertex[2]) ); + brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); + brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); + + brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) ); + + plane_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* if (planemask & 1) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1)); + + plane_active = brw_IF(p, BRW_EXECUTE_1); + { + /* vtxOut = freelist_ptr++ + */ + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(freelist_ptr) ); + brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE)); + + if (c->key.nr_userclip) + brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); + else + brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); + + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0)); + + vertex_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* vtx = *input_ptr; + */ + brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0)); + + /* IS_NEGATIVE(prev) */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + prev_test = brw_IF(p, BRW_EXECUTE_1); + { + /* IS_POSITIVE(next) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_GE); + brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + next_test = brw_IF(p, BRW_EXECUTE_1); + { + + /* Coming back in. + */ + brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev); + + /* If (vtxOut == 0) vtxOut = vtxPrev + */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) ); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, GL_FALSE); + + /* *outlist_ptr++ = vtxOut; + * nr_verts++; + * vtxOut = 0; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); + } + brw_ENDIF(p, next_test); + + } + prev_test = brw_ELSE(p, prev_test); + { + /* *outlist_ptr++ = vtxPrev; + * nr_verts++; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + + /* IS_NEGATIVE(next) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + next_test = brw_IF(p, BRW_EXECUTE_1); + { + /* Going out of bounds. Avoid division by zero as we + * know dp != dpPrev from DIFFERENT_SIGNS, above. + */ + brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp); + + /* If (vtxOut == 0) vtxOut = vtx + */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) ); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, GL_TRUE); + + /* *outlist_ptr++ = vtxOut; + * nr_verts++; + * vtxOut = 0; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); + } + brw_ENDIF(p, next_test); + } + brw_ENDIF(p, prev_test); + + /* vtxPrev = vtx; + * inlist_ptr++; + */ + brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx)); + brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short))); + + /* while (--loopcount != 0) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, vertex_loop); + + /* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1] + * inlist = outlist + * inlist_ptr = &inlist[0] + * outlist_ptr = &outlist[0] + */ + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2)); + brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0)); + brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0)); + brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); + } + brw_ENDIF(p, plane_active); + + /* plane_ptr++; + */ + brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); + + /* nr_verts >= 3 + */ + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + c->reg.nr_verts, + brw_imm_ud(3)); + + /* && (planemask>>=1) != 0 + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); + } + brw_WHILE(p, plane_loop); +} + + + +void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop, *if_insn; + + /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_G); + brw_ADD(p, + c->reg.loopcount, + c->reg.nr_verts, + brw_imm_d(-2)); + + if_insn = brw_IF(p, BRW_EXECUTE_1); + { + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect vptr = brw_indirect(1, 0); + + brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + brw_clip_emit_vue(c, v0, 1, 0, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_START)); + + brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN << 2)); + + brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); + + brw_clip_emit_vue(c, v0, 0, 1, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_END)); + } + brw_ENDIF(p, if_insn); +} + +static void do_clip_tri( struct brw_clip_compile *c ) +{ + brw_clip_init_planes(c); + + brw_clip_tri(c); +} + + +static void maybe_do_clip_tri( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *do_clip; + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); + do_clip = brw_IF(p, BRW_EXECUTE_1); + { + do_clip_tri(c); + } + brw_ENDIF(p, do_clip); +} + + + + +void brw_emit_tri_clip( struct brw_clip_compile *c ) +{ + brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); + brw_clip_tri_init_vertices(c); + brw_clip_init_clipmask(c); + + /* Can't push into do_clip_tri because with polygon (or quad) + * flatshading, need to apply the flatshade here because we don't + * respect the PV when converting to trifan for emit: + */ + if (c->key.do_flat_shading) + brw_clip_tri_flat_shade(c); + + if (c->key.clip_mode == BRW_CLIPMODE_NORMAL) + do_clip_tri(c); + else + maybe_do_clip_tri(c); + + brw_clip_tri_emit_polygon(c); + + /* Send an empty message to kill the thread: + */ + brw_clip_kill_thread(c); +} + + + diff --git a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c new file mode 100644 index 00000000000..918e0001870 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c @@ -0,0 +1,484 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" + +#include "shader/program.h" +#include "intel_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + +/* This is performed against the original triangles, so no indirection + * required: +BZZZT! + */ +static void compute_tri_direction( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg e = c->reg.tmp0; + struct brw_reg f = c->reg.tmp1; + struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]); + struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]); + struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]); + + + /* Calculate the vectors of two edges of the triangle: + */ + brw_ADD(p, e, v0, negate(v2)); + brw_ADD(p, f, v1, negate(v2)); + + /* Take their crossproduct: + */ + brw_set_access_mode(p, BRW_ALIGN_16); + brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3), brw_swizzle(f,2,0,1,3)); + brw_MAC(p, vec4(e), negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3)); + brw_set_access_mode(p, BRW_ALIGN_1); + + brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e)); +} + + +static void cull_direction( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + GLuint conditional; + + assert (!(c->key.fill_ccw == CLIP_CULL && + c->key.fill_cw == CLIP_CULL)); + + if (c->key.fill_ccw == CLIP_CULL) + conditional = BRW_CONDITIONAL_GE; + else + conditional = BRW_CONDITIONAL_L; + + brw_CMP(p, + vec1(brw_null_reg()), + conditional, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, ccw); +} + + + +static void copy_bfc( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + GLuint conditional; + + /* Do we have any colors to copy? + */ + if (!(c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) && + !(c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1])) + return; + + /* In some wierd degnerate cases we can end up testing the + * direction twice, once for culling and once for bfc copying. Oh + * well, that's what you get for setting wierd GL state. + */ + if (c->key.copy_bfc_ccw) + conditional = BRW_CONDITIONAL_GE; + else + conditional = BRW_CONDITIONAL_L; + + brw_CMP(p, + vec1(brw_null_reg()), + conditional, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + GLuint i; + + for (i = 0; i < 3; i++) { + if (c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) + brw_MOV(p, + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL0]), + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC0])); + + if (c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1]) + brw_MOV(p, + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL1]), + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC1])); + } + } + brw_ENDIF(p, ccw); +} + + + + +/* + GLfloat iz = 1.0 / dir.z; + GLfloat ac = dir.x * iz; + GLfloat bc = dir.y * iz; + offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE; + offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor; + offset *= MRD; +*/ +static void compute_offset( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg off = c->reg.offset; + struct brw_reg dir = c->reg.dir; + + brw_math_invert(p, get_element(off, 2), get_element(dir, 2)); + brw_MUL(p, vec2(off), dir, get_element(off, 2)); + + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + brw_abs(get_element(off, 0)), + brw_abs(get_element(off, 1))); + + brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor)); + brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units)); +} + + +static void merge_edgeflags( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *is_poly; + struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0); + + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_POLYGON)); + + /* Get away with using reg.vertex because we know that this is not + * a _3DPRIM_TRISTRIP_REVERSE: + */ + is_poly = brw_IF(p, BRW_EXECUTE_1); + { + brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); + brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8)); + brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); + brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9)); + brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + brw_ENDIF(p, is_poly); +} + + + +static void apply_one_offset( struct brw_clip_compile *c, + struct brw_indirect vert ) +{ + struct brw_compile *p = &c->func; + struct brw_reg pos = deref_4f(vert, c->offset[VERT_RESULT_HPOS]); + struct brw_reg z = get_element(pos, 2); + + brw_ADD(p, z, z, vec1(c->reg.offset)); +} + + + +/*********************************************************************** + * Output clipped polygon as an unfilled primitive: + */ +static void emit_lines(struct brw_clip_compile *c, + GLboolean do_offset) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop; + struct brw_instruction *draw_edge; + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect v1 = brw_indirect(1, 0); + struct brw_indirect v0ptr = brw_indirect(2, 0); + struct brw_indirect v1ptr = brw_indirect(3, 0); + + /* Need a seperate loop for offset: + */ + if (do_offset) { + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + apply_one_offset(c, v0); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_G); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); + } + + /* v1ptr = &inlist[nr_verts] + * *v1ptr = v0 + */ + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); + brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); + brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + /* draw edge if edgeflag != 0 */ + brw_CMP(p, + vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, + deref_1f(v0, c->offset[VERT_RESULT_EDGE]), + brw_imm_f(0)); + draw_edge = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); + brw_clip_emit_vue(c, v1, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); + } + brw_ENDIF(p, draw_edge); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); +} + + + +static void emit_points(struct brw_clip_compile *c, + GLboolean do_offset ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop; + struct brw_instruction *draw_point; + + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect v0ptr = brw_indirect(2, 0); + + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + /* draw if edgeflag != 0 + */ + brw_CMP(p, + vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, + deref_1f(v0, c->offset[VERT_RESULT_EDGE]), + brw_imm_f(0)); + draw_point = brw_IF(p, BRW_EXECUTE_1); + { + if (do_offset) + apply_one_offset(c, v0); + + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END); + } + brw_ENDIF(p, draw_point); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); +} + + + + + + + +static void emit_primitives( struct brw_clip_compile *c, + GLuint mode, + GLboolean do_offset ) +{ + switch (mode) { + case CLIP_FILL: + brw_clip_tri_emit_polygon(c); + break; + + case CLIP_LINE: + emit_lines(c, do_offset); + break; + + case CLIP_POINT: + emit_points(c, do_offset); + break; + + case CLIP_CULL: + assert(0); + break; + } +} + + + +static void emit_unfilled_primitives( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + + /* Direction culling has already been done. + */ + if (c->key.fill_ccw != c->key.fill_cw && + c->key.fill_ccw != CLIP_CULL && + c->key.fill_cw != CLIP_CULL) + { + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); + } + ccw = brw_ELSE(p, ccw); + { + emit_primitives(c, c->key.fill_cw, c->key.offset_cw); + } + brw_ENDIF(p, ccw); + } + else if (c->key.fill_cw != CLIP_CULL) { + emit_primitives(c, c->key.fill_cw, c->key.offset_cw); + } + else if (c->key.fill_ccw != CLIP_CULL) { + emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); + } +} + + + + +static void check_nr_verts( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *if_insn; + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3)); + if_insn = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, if_insn); +} + + +void brw_emit_unfilled_clip( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *do_clip; + + + c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) || + (c->key.fill_ccw != c->key.fill_cw) || + c->key.fill_ccw == CLIP_CULL || + c->key.fill_cw == CLIP_CULL || + c->key.copy_bfc_cw || + c->key.copy_bfc_ccw); + + brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); + brw_clip_tri_init_vertices(c); + + assert(c->offset[VERT_RESULT_EDGE]); + + if (c->key.fill_ccw == CLIP_CULL && + c->key.fill_cw == CLIP_CULL) { + brw_clip_kill_thread(c); + return; + } + + merge_edgeflags(c); + + /* Need to use the inlist indirection here: + */ + if (c->need_direction) + compute_tri_direction(c); + + if (c->key.fill_ccw == CLIP_CULL || + c->key.fill_cw == CLIP_CULL) + cull_direction(c); + + if (c->key.offset_ccw || + c->key.offset_cw) + compute_offset(c); + + if (c->key.copy_bfc_ccw || + c->key.copy_bfc_cw) + copy_bfc(c); + + /* Need to do this whether we clip or not: + */ + if (c->key.do_flat_shading) + brw_clip_tri_flat_shade(c); + + brw_clip_init_clipmask(c); + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); + do_clip = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_init_planes(c); + brw_clip_tri(c); + check_nr_verts(c); + } + brw_ENDIF(p, do_clip); + + emit_unfilled_primitives(c); + brw_clip_kill_thread(c); +} + + + diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c new file mode 100644 index 00000000000..19bef19801a --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_clip_util.c @@ -0,0 +1,354 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "glheader.h" +#include "macros.h" +#include "enums.h" + +#include "shader/program.h" +#include "intel_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + + + +static struct brw_reg get_tmp( struct brw_clip_compile *c ) +{ + struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} + +static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + + +static struct brw_reg make_plane_ud(GLuint x, GLuint y, GLuint z, GLuint w) +{ + return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x); +} + + +void brw_clip_init_planes( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + + if (!c->key.nr_userclip) { + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0, 0, 0xff, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0, 0, 1, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0, 1, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff, 0, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1, 0, 0, 1)); + } +} + + + +#define W 3 + +/* Project 'pos' to screen space (or back again), overwrite with results: + */ +static void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos ) +{ + struct brw_compile *p = &c->func; + + /* calc rhw + */ + brw_math_invert(p, get_element(pos, W), get_element(pos, W)); + + /* value.xyz *= value.rhw + */ + brw_set_access_mode(p, BRW_ALIGN_16); + brw_MUL(p, brw_writemask(pos, WRITEMASK_XYZ), pos, brw_swizzle1(pos, W)); + brw_set_access_mode(p, BRW_ALIGN_1); +} + + +static void brw_clip_project_vertex( struct brw_clip_compile *c, + struct brw_indirect vert_addr ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = get_tmp(c); + + /* Fixup position. Extract from the original vertex and re-project + * to screen space: + */ + brw_MOV(p, tmp, deref_4f(vert_addr, c->offset[VERT_RESULT_HPOS])); + brw_clip_project_position(c, tmp); + brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp); + + release_tmp(c, tmp); +} + + + + +/* Interpolate between two vertices and put the result into a0.0. + * Increment a0.0 accordingly. + */ +void brw_clip_interp_vertex( struct brw_clip_compile *c, + struct brw_indirect dest_ptr, + struct brw_indirect v0_ptr, /* from */ + struct brw_indirect v1_ptr, /* to */ + struct brw_reg t0, + GLboolean force_edgeflag) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = get_tmp(c); + GLuint i; + + /* Just copy the vertex header: + */ + brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1); + + /* Iterate over each attribute (could be done in pairs?) + */ + for (i = 0; i < c->nr_attrs; i++) { + GLuint delta = i*16 + 32; + + if (delta == c->offset[VERT_RESULT_EDGE]) { + if (force_edgeflag) + brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1)); + else + brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta)); + } + else { + /* Interpolate: + * + * New = attr0 + t*attr1 - t*attr0 + */ + brw_MUL(p, + vec4(brw_null_reg()), + deref_4f(v1_ptr, delta), + t0); + + brw_MAC(p, + tmp, + negate(deref_4f(v0_ptr, delta)), + t0); + + brw_ADD(p, + deref_4f(dest_ptr, delta), + deref_4f(v0_ptr, delta), + tmp); + } + } + + if (i & 1) { + GLuint delta = i*16 + 32; + brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); + } + + release_tmp(c, tmp); + + /* Recreate the projected (NDC) coordinate in the new vertex + * header: + */ + brw_clip_project_vertex(c, dest_ptr ); +} + + + + +#define MAX_MRF 16 + +void brw_clip_emit_vue(struct brw_clip_compile *c, + struct brw_indirect vert, + GLboolean allocate, + GLboolean eot, + GLuint header) +{ + struct brw_compile *p = &c->func; + GLuint start = c->last_mrf; + + assert(!(allocate && eot)); + + /* Cycle through mrf regs - probably futile as we have to wait for + * the allocation response anyway. Also, the order this function + * is invoked doesn't correspond to the order the instructions will + * be executed, so it won't have any effect in many cases. + */ +#if 0 + if (start + c->nr_regs + 1 >= MAX_MRF) + start = 0; + + c->last_mrf = start + c->nr_regs + 1; +#endif + + /* Copy the vertex from vertn into m1..mN+1: + */ + brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs); + + /* Overwrite PrimType and PrimStart in the message header, for + * each vertex in turn: + */ + brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); + + + /* Send each vertex as a seperate write to the urb. This + * is different to the concept in brw_sf_emit.c, where + * subsequent writes are used to build up a single urb + * entry. Each of these writes instantiates a seperate + * urb entry - (I think... what about 'allocate'?) + */ + brw_urb_WRITE(p, + allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + start, + c->reg.R0, + allocate, + 1, /* used */ + c->nr_regs + 1, /* msg length */ + allocate ? 1 : 0, /* response_length */ + eot, /* eot */ + 1, /* writes_complete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} + + + +void brw_clip_kill_thread(struct brw_clip_compile *c) +{ + struct brw_compile *p = &c->func; + + /* Send an empty message to kill the thread and release any + * allocated urb entry: + */ + brw_urb_WRITE(p, + retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + 0, + c->reg.R0, + 0, /* allocate */ + 0, /* used */ + 0, /* msg len */ + 0, /* response len */ + 1, /* eot */ + 1, /* writes complete */ + 0, + BRW_URB_SWIZZLE_NONE); +} + + + +struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ) +{ + return brw_address(c->reg.fixed_planes); +} + + +struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ) +{ + if (c->key.nr_userclip) { + return brw_imm_uw(16); + } + else { + return brw_imm_uw(4); + } +} + + +/* If flatshading, distribute color from provoking vertex prior to + * clipping. + */ +void brw_clip_copy_colors( struct brw_clip_compile *c, + GLuint to, GLuint from ) +{ + struct brw_compile *p = &c->func; + + if (c->offset[VERT_RESULT_COL0]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL0]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL0])); + + if (c->offset[VERT_RESULT_COL1]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL1]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL1])); + + if (c->offset[VERT_RESULT_BFC0]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC0]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC0])); + + if (c->offset[VERT_RESULT_BFC1]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC1]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC1])); +} + + + +void brw_clip_init_clipmask( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg incoming = get_element_ud(c->reg.R0, 2); + + /* Shift so that lowest outcode bit is rightmost: + */ + brw_MOV(p, c->reg.planemask, incoming); + brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(26)); + + if (c->key.nr_userclip) { + struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD); + + /* Rearrange userclip outcodes so that they come directly after + * the fixed plane bits. + */ + brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14)); + brw_SHR(p, tmp, tmp, brw_imm_ud(8)); + brw_OR(p, c->reg.planemask, c->reg.planemask, tmp); + + release_tmp(c, tmp); + } + + /* Test for -ve rhw workaround + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, vec1(brw_null_reg()), incoming, brw_imm_ud(1<<20)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + +} + diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c new file mode 100644 index 00000000000..c1f6617f3fd --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -0,0 +1,175 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_context.h" +#include "brw_aub.h" +#include "brw_defines.h" +#include "brw_draw.h" +#include "brw_exec.h" +#include "brw_save.h" +#include "brw_vs.h" +#include "imports.h" +#include "intel_tex.h" +#include "intel_blit.h" +#include "intel_batchbuffer.h" + +#include "utils.h" +#include "api_noop.h" +#include "vtxfmt.h" + +/*************************************** + * Mesa's Driver Functions + ***************************************/ + +static const struct dri_extension brw_extensions[] = +{ + { "GL_ARB_depth_texture", NULL }, + { "GL_ARB_fragment_program", NULL }, + { "GL_ARB_shadow", NULL }, + { "GL_EXT_shadow_funcs", NULL }, + /* ARB extn won't work if not enabled */ + { "GL_SGIX_depth_texture", NULL }, + { "GL_ARB_texture_env_crossbar", NULL }, + { NULL, NULL } +}; + + +static void brwInitDriverFunctions( struct dd_function_table *functions ) +{ + intelInitDriverFunctions( functions ); + brwInitTextureFuncs( functions ); + brwInitFragProgFuncs( functions ); +} + + +static void brw_init_attribs( struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + + brw->attribs.Color = &ctx->Color; + brw->attribs.Depth = &ctx->Depth; + brw->attribs.Fog = &ctx->Fog; + brw->attribs.Hint = &ctx->Hint; + brw->attribs.Light = &ctx->Light; + brw->attribs.Line = &ctx->Line; + brw->attribs.Point = &ctx->Point; + brw->attribs.Polygon = &ctx->Polygon; + brw->attribs.Scissor = &ctx->Scissor; + brw->attribs.Stencil = &ctx->Stencil; + brw->attribs.Texture = &ctx->Texture; + brw->attribs.Transform = &ctx->Transform; + brw->attribs.Viewport = &ctx->Viewport; + brw->attribs.VertexProgram = &ctx->VertexProgram; + brw->attribs.FragmentProgram = &ctx->FragmentProgram; + brw->attribs.PolygonStipple = &ctx->PolygonStipple[0]; +} + + +GLboolean brwCreateContext( const __GLcontextModes *mesaVis, + __DRIcontextPrivate *driContextPriv, + void *sharedContextPrivate) +{ + struct dd_function_table functions; + struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context); + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + + if (!brw) { + _mesa_printf("%s: failed to alloc context\n", __FUNCTION__); + return GL_FALSE; + } + + brwInitVtbl( brw ); + brwInitDriverFunctions( &functions ); + + if (!intelInitContext( intel, mesaVis, driContextPriv, + sharedContextPrivate, &functions )) { + _mesa_printf("%s: failed to init intel context\n", __FUNCTION__); + FREE(brw); + return GL_FALSE; + } + + ctx->Const.MaxTextureUnits = BRW_MAX_TEX_UNIT; + ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT; + ctx->Const.MaxTextureCoordUnits = BRW_MAX_TEX_UNIT; + + + /* Advertise the full hardware capabilities. The new memory + * manager should cope much better with overload situations: + */ + ctx->Const.MaxTextureLevels = 12; + ctx->Const.Max3DTextureLevels = 9; + ctx->Const.MaxCubeTextureLevels = 12; + ctx->Const.MaxTextureRectSize = (1<<11); + ctx->Const.MaxTextureUnits = BRW_MAX_TEX_UNIT; + +/* ctx->Const.MaxNativeVertexProgramTemps = 32; */ + + + driInitExtensions( ctx, brw_extensions, GL_FALSE ); + + brw_aub_init( brw ); + + brw_init_attribs( brw ); + brw_init_metaops( brw ); + brw_init_state( brw ); + + brw->state.dirty.mesa = ~0; + brw->state.dirty.brw = ~0; + + memset(&brw->wm.bind, ~0, sizeof(brw->wm.bind)); + + brw->emit_state_always = 0; + + ctx->_MaintainTexEnvProgram = 1; + + brw_draw_init( brw ); + + brw_ProgramCacheInit( ctx ); + + /* Hook our functions into exec and compile dispatch tables. Only + * fallback on out-of-memory situations. + */ + brw_exec_init( ctx ); + brw_save_init( ctx ); + + { + const char *filename = getenv("INTEL_REPLAY"); + if (filename) { + brw_playback_aubfile(brw, filename); + exit(0); + } + } + + return GL_TRUE; +} + diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h new file mode 100644 index 00000000000..1137bfd2c76 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -0,0 +1,708 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#ifndef BRWCONTEXT_INC +#define BRWCONTEXT_INC + +#include "intel_context.h" +#include "brw_structs.h" +#include "imports.h" +#include "brw_attrib.h" + + +/* Glossary: + * + * URB - uniform resource buffer. A mid-sized buffer which is + * partitioned between the fixed function units and used for passing + * values (vertices, primitives, constants) between them. + * + * CURBE - constant URB entry. An urb region (entry) used to hold + * constant values which the fixed function units can be instructed to + * preload into the GRF when spawining a thread. + * + * VUE - vertex URB entry. An urb entry holding a vertex and usually + * a vertex header. The header contains control information and + * things like primitive type, Begin/end flags and clip codes. + * + * PUE - primitive URB entry. An urb entry produced by the setup (SF) + * unit holding rasterization and interpolation parameters. + * + * GRF - general register file. One of several register files + * addressable by programmed threads. The inputs (r0, payload, curbe, + * urb) of the thread are preloaded to this area before the thread is + * spawned. The registers are individually 8 dwords wide and suitable + * for general usage. Registers holding thread input values are not + * special and may be overwritten. + * + * MRF - message register file. Threads communicate (and terminate) + * by sending messages. Message parameters are placed in contigous + * MRF registers. All program output is via these messages. URB + * entries are populated by sending a message to the shared URB + * function containing the new data, together with a control word, + * often an unmodified copy of R0. + * + * R0 - GRF register 0. Typically holds control information used when + * sending messages to other threads. + * + * EU or GEN4 EU: The name of the programmable subsystem of the + * i965 hardware. Threads are executed by the EU, the registers + * described above are part of the EU architecture. + * + * Fixed function units: + * + * CS - Command streamer. Notional first unit, little software + * interaction. Holds the URB entries used for constant data, ie the + * CURBEs. + * + * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of + * this unit is responsible for pulling vertices out of vertex buffers + * in vram and injecting them into the processing pipe as VUEs. If + * enabled, it first passes them to a VS thread which is a good place + * for the driver to implement any active vertex shader. + * + * GS - Geometry Shader. This corresponds to a new DX10 concept. If + * enabled, incoming strips etc are passed to GS threads in individual + * line/triangle/point units. The GS thread may perform arbitary + * computation and emit whatever primtives with whatever vertices it + * chooses. This makes GS an excellent place to implement GL's + * unfilled polygon modes, though of course it is capable of much + * more. Additionally, GS is used to translate away primitives not + * handled by latter units, including Quads and Lineloops. + * + * CS - Clipper. Mesa's clipping algorithms are imported to run on + * this unit. The fixed function part performs cliptesting against + * the 6 fixed clipplanes and makes descisions on whether or not the + * incoming primitive needs to be passed to a thread for clipping. + * User clip planes are handled via cooperation with the VS thread. + * + * SF - Strips Fans or Setup: Triangles are prepared for + * rasterization. Interpolation coefficients are calculated. + * Flatshading and two-side lighting usually performed here. + * + * WM - Windower. Interpolation of vertex attributes performed here. + * Fragment shader implemented here. SIMD aspects of EU taken full + * advantage of, as pixels are processed in blocks of 16. + * + * CC - Color Calculator. No EU threads associated with this unit. + * Handles blending and (presumably) depth and stencil testing. + */ + +#define BRW_FALLBACK_TEXTURE 0x1 +#define BRW_MAX_CURBE (32*16) + +struct brw_context; + +#define BRW_NEW_URB_FENCE 0x1 +#define BRW_NEW_FRAGMENT_PROGRAM 0x2 +#define BRW_NEW_VERTEX_PROGRAM 0x4 +#define BRW_NEW_INPUT_DIMENSIONS 0x8 +#define BRW_NEW_CURBE_OFFSETS 0x10 +#define BRW_NEW_REDUCED_PRIMITIVE 0x20 +#define BRW_NEW_PRIMITIVE 0x40 +#define BRW_NEW_CONTEXT 0x80 +#define BRW_NEW_WM_INPUT_DIMENSIONS 0x100 +#define BRW_NEW_INPUT_VARYING 0x200 +#define BRW_NEW_TNL_PROGRAM 0x400 +#define BRW_NEW_PSP 0x800 +#define BRW_NEW_METAOPS 0x1000 +#define BRW_NEW_FENCE 0x2000 +#define BRW_NEW_LOCK 0x4000 + + + +struct brw_state_flags { + GLuint mesa; + GLuint cache; + GLuint brw; +}; + +struct brw_vertex_program { + struct gl_vertex_program program; + GLuint id; + GLuint param_state; /* flags indicating state tracked by params */ +}; + + + +struct brw_fragment_program { + struct gl_fragment_program program; + GLuint id; + GLuint param_state; /* flags indicating state tracked by params */ +}; + + + + +/* Data about a particular attempt to compile a program. Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ +struct brw_wm_prog_data { + GLuint curb_read_length; + GLuint urb_read_length; + + GLuint first_curbe_grf; + GLuint total_grf; + GLuint total_scratch; + + GLuint nr_params; + GLboolean error; + + /* Pointer to tracked values (only valid once + * _mesa_load_state_parameters has been called at runtime). + */ + const GLfloat *param[BRW_MAX_CURBE]; +}; + +struct brw_sf_prog_data { + GLuint urb_read_length; + GLuint total_grf; + + /* Each vertex may have upto 12 attributes, 4 components each, + * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11 + * rows. + * + * Actually we use 4 for each, so call it 12 rows. + */ + GLuint urb_entry_size; +}; + +struct brw_clip_prog_data { + GLuint curb_read_length; /* user planes? */ + GLuint clip_mode; + GLuint urb_read_length; + GLuint total_grf; +}; + +struct brw_gs_prog_data { + GLuint urb_read_length; + GLuint total_grf; +}; + +struct brw_vs_prog_data { + GLuint curb_read_length; + GLuint urb_read_length; + GLuint total_grf; + GLuint outputs_written; + + GLuint64EXT inputs_read; + + /* Used for calculating urb partitions: + */ + GLuint urb_entry_size; +}; + + +/* Size == 0 if output either not written, or always [0,0,0,1] + */ +struct brw_vs_ouput_sizes { + GLubyte output_size[VERT_RESULT_MAX]; +}; + + +#define BRW_MAX_TEX_UNIT 8 +#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1 + +/* Create a fixed sized struct for caching binding tables: + */ +struct brw_surface_binding_table { + GLuint surf_ss_offset[BRW_WM_MAX_SURF]; +}; + + +struct brw_cache; + +struct brw_mem_pool { + struct buffer *buffer; + + GLuint size; + GLuint offset; /* offset of first free byte */ + + struct brw_context *brw; +}; + +struct brw_cache_item { + GLuint hash; + GLuint key_size; /* for variable-sized keys */ + const void *key; + + GLuint offset; /* offset within pool's buffer */ + GLuint data_size; + + struct brw_cache_item *next; +}; + + + +struct brw_cache { + GLuint id; + + const char *name; + + struct brw_context *brw; + struct brw_mem_pool *pool; + + struct brw_cache_item **items; + GLuint size, n_items; + + GLuint key_size; /* for fixed-size keys */ + GLuint aux_size; + + GLuint aub_type; + GLuint aub_sub_type; + + GLuint last_addr; /* offset of active item */ +}; + + + +struct brw_state_pointers { + struct gl_colorbuffer_attrib *Color; + struct gl_depthbuffer_attrib *Depth; + struct gl_fog_attrib *Fog; + struct gl_hint_attrib *Hint; + struct gl_light_attrib *Light; + struct gl_line_attrib *Line; + struct gl_point_attrib *Point; + struct gl_polygon_attrib *Polygon; + GLuint *PolygonStipple; + struct gl_scissor_attrib *Scissor; + struct gl_stencil_attrib *Stencil; + struct gl_texture_attrib *Texture; + struct gl_transform_attrib *Transform; + struct gl_viewport_attrib *Viewport; + struct gl_vertex_program_state *VertexProgram; + struct gl_fragment_program_state *FragmentProgram; +}; + +/* Considered adding a member to this struct to document which flags + * an update might raise so that ordering of the state atoms can be + * checked or derived at runtime. Dropped the idea in favor of having + * a debug mode where the state is monitored for flags which are + * raised that have already been tested against. + */ +struct brw_tracked_state { + struct brw_state_flags dirty; + void (*update)( struct brw_context *brw ); +}; + + +enum brw_cache_id { + BRW_CC_VP, + BRW_CC_UNIT, + BRW_WM_PROG, + BRW_SAMPLER_DEFAULT_COLOR, + BRW_SAMPLER, + BRW_WM_UNIT, + BRW_SF_PROG, + BRW_SF_VP, + BRW_SF_UNIT, + BRW_VS_UNIT, + BRW_VS_PROG, + BRW_GS_UNIT, + BRW_GS_PROG, + BRW_CLIP_VP, + BRW_CLIP_UNIT, + BRW_CLIP_PROG, + + /* These two are in the SS pool: + */ + BRW_SS_SURFACE, + BRW_SS_SURF_BIND, + + BRW_MAX_CACHE +}; + +/* Flags for brw->state.cache. + */ +#define CACHE_NEW_CC_VP (1<<BRW_CC_VP) +#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT) +#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG) +#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR) +#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER) +#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT) +#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG) +#define CACHE_NEW_SF_VP (1<<BRW_SF_VP) +#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT) +#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT) +#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG) +#define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT) +#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG) +#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP) +#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT) +#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG) +#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE) +#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND) + + + + +enum brw_mempool_id { + BRW_GS_POOL, + BRW_SS_POOL, + BRW_MAX_POOL +}; + + +struct brw_cached_batch_item { + struct header *header; + GLuint sz; + struct brw_cached_batch_item *next; +}; + + + +/* Protect against a future where BRW_ATTRIB_MAX > 32. Wouldn't life + * be easier if C allowed arrays of packed elements? + */ +#define ATTRIB_BIT_DWORDS ((BRW_ATTRIB_MAX+31)/32) + +struct brw_vertex_element { + const struct gl_client_array *glarray; + + struct brw_vertex_element_state *vep; + + GLuint index; + GLuint element_size; + GLuint count; + GLuint vbo_rebase_offset; +}; + + + +struct brw_vertex_info { + GLuint64EXT varying; /* varying:1[BRW_ATTRIB_MAX] */ + GLuint sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[BRW_ATTRIB_MAX] */ +}; + + + + +/* Cache for TNL programs. + */ +struct brw_tnl_cache_item { + GLuint hash; + void *key; + void *data; + struct brw_tnl_cache_item *next; +}; + +struct brw_tnl_cache { + struct brw_tnl_cache_item **items; + GLuint size, n_items; +}; + + + +struct brw_context +{ + struct intel_context intel; + GLuint primitive; + + GLboolean emit_state_always; + GLboolean wrap; + GLboolean tmp_fallback; + + struct { + struct brw_state_flags dirty; + struct brw_tracked_state **atoms; + GLuint nr_atoms; + + + struct intel_region *draw_region; + struct intel_region *depth_region; + } state; + + struct brw_state_pointers attribs; + struct brw_mem_pool pool[BRW_MAX_POOL]; + struct brw_cache cache[BRW_MAX_CACHE]; + struct brw_cached_batch_item *cached_batch_items; + + struct { + /* Fallback values for inputs not supplied: + */ + struct gl_client_array current_values[BRW_ATTRIB_MAX]; + + /* Arrays with buffer objects to copy non-bufferobj arrays into + * for upload: + */ + struct gl_client_array vbo_array[BRW_ATTRIB_MAX]; + +#define BRW_NR_UPLOAD_BUFS 17 +#define BRW_UPLOAD_INIT_SIZE (128*1024) + + struct { + struct gl_buffer_object *vbo[BRW_NR_UPLOAD_BUFS]; + GLuint buf; + GLuint offset; + GLuint size; + GLuint wrap; + } upload; + + /* Currenly bound arrays, including fallbacks to current_values + * above: + */ + struct brw_vertex_element inputs[BRW_ATTRIB_MAX]; + + /* Summary of size and varying of active arrays, so we can check + * for changes to this state: + */ + struct brw_vertex_info info; + } vb; + + struct { + /* Will be allocated on demand if needed. + */ + struct brw_state_pointers attribs; + struct gl_vertex_program *vp; + struct gl_fragment_program *fp; + + struct gl_buffer_object *vbo; + + struct intel_region *saved_draw_region; + struct intel_region *saved_depth_region; + + GLuint restore_draw_mask; + GLboolean active; + } metaops; + + /* Track fixed function t&l in a vertex program: + */ + struct gl_vertex_program *tnl_program; + struct brw_tnl_cache tnl_program_cache; + + /* Active vertex program: + */ + struct gl_vertex_program *vertex_program; + struct gl_fragment_program *fragment_program; + + + /* For populating the gtt: + */ + GLuint next_free_page; + + + /* BRW_NEW_URB_ALLOCATIONS: + */ + struct { + GLuint vsize; /* vertex size plus header in urb registers */ + GLuint csize; /* constant buffer size in urb registers */ + GLuint sfsize; /* setup data size in urb registers */ + + GLboolean constrained; + + GLuint nr_vs_entries; + GLuint nr_gs_entries; + GLuint nr_clip_entries; + GLuint nr_sf_entries; + GLuint nr_cs_entries; + +/* GLuint vs_size; */ +/* GLuint gs_size; */ +/* GLuint clip_size; */ +/* GLuint sf_size; */ +/* GLuint cs_size; */ + + GLuint vs_start; + GLuint gs_start; + GLuint clip_start; + GLuint sf_start; + GLuint cs_start; + } urb; + + + /* BRW_NEW_CURBE_OFFSETS: + */ + struct { + GLuint wm_start; + GLuint wm_size; + GLuint clip_start; + GLuint clip_size; + GLuint vs_start; + GLuint vs_size; + GLuint total_size; + + /* Dynamic tracker which changes to reflect the state referenced + * by active fp and vp program parameters: + */ + struct brw_tracked_state tracked_state; + + GLuint gs_offset; + + GLfloat *last_buf; + GLuint last_bufsz; + } curbe; + + struct { + struct brw_vs_prog_data *prog_data; + + GLuint prog_gs_offset; + GLuint state_gs_offset; + } vs; + + struct { + struct brw_gs_prog_data *prog_data; + + GLboolean prog_active; + GLuint prog_gs_offset; + GLuint state_gs_offset; + } gs; + + struct { + struct brw_clip_prog_data *prog_data; + + GLuint prog_gs_offset; + GLuint vp_gs_offset; + GLuint state_gs_offset; + } clip; + + + struct { + struct brw_sf_prog_data *prog_data; + + GLuint prog_gs_offset; + GLuint vp_gs_offset; + GLuint state_gs_offset; + } sf; + + struct { + struct brw_wm_prog_data *prog_data; + + /* Input sizes, calculated from active vertex program: + */ + GLuint input_size_masks[4]; + + + /* State structs + */ + struct brw_sampler_default_color sdc[BRW_MAX_TEX_UNIT]; + struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; + + GLuint render_surf; + GLuint nr_surfaces; + + GLuint max_threads; + struct buffer *scratch_buffer; + GLuint scratch_buffer_size; + + GLuint sampler_count; + GLuint sampler_gs_offset; + + struct brw_surface_binding_table bind; + GLuint bind_ss_offset; + + GLuint prog_gs_offset; + GLuint state_gs_offset; + } wm; + + + struct { + GLuint vp_gs_offset; + GLuint state_gs_offset; + } cc; + + + /* Used to give every program string a unique id + */ + GLuint program_id; +}; + + +#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a) + + + +/*====================================================================== + * brw_vtbl.c + */ +void brwInitVtbl( struct brw_context *brw ); +void brw_do_flush( struct brw_context *brw, + GLuint flags ); + +/*====================================================================== + * brw_context.c + */ +GLboolean brwCreateContext( const __GLcontextModes *mesaVis, + __DRIcontextPrivate *driContextPriv, + void *sharedContextPrivate); + + + +/*====================================================================== + * brw_state.c + */ +void brw_validate_state( struct brw_context *brw ); +void brw_init_state( struct brw_context *brw ); +void brw_destroy_state( struct brw_context *brw ); + + + +/*====================================================================== + * brw_tex.c + */ +void brwUpdateTextureState( struct intel_context *intel ); +void brwInitTextureFuncs( struct dd_function_table *functions ); + +/*====================================================================== + * brw_metaops.c + */ + +void brw_init_metaops( struct brw_context *brw ); +void brw_destroy_metaops( struct brw_context *brw ); + + +/*====================================================================== + * brw_program.c + */ +void brwInitFragProgFuncs( struct dd_function_table *functions ); + + +/* brw_urb.c + */ +void brw_upload_urb_fence(struct brw_context *brw); + +void brw_upload_constant_buffer_state(struct brw_context *brw); + + +/*====================================================================== + * Inline conversion functions. These are better-typed than the + * macros used previously: + */ +static inline struct brw_context * +brw_context( GLcontext *ctx ) +{ + return (struct brw_context *)ctx; +} + +#endif + diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c new file mode 100644 index 00000000000..4352c3bcb0f --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -0,0 +1,379 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + + +#include "glheader.h" +#include "context.h" +#include "macros.h" +#include "enums.h" +#include "shader/program.h" +#include "intel_batchbuffer.h" +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" +#include "brw_util.h" +#include "brw_aub.h" + + +/* Partition the CURBE between the various users of constant values: + */ +static void calculate_curbe_offsets( struct brw_context *brw ) +{ + /* CACHE_NEW_WM_PROG */ + GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; + + /* BRW_NEW_VERTEX_PROGRAM */ + struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; + GLuint nr_vp_regs = (vp->program.Base.Parameters->NumParameters * 4 + 15) / 16; + GLuint nr_clip_regs = 0; + GLuint total_regs; + + /* _NEW_TRANSFORM */ + if (brw->attribs.Transform->ClipPlanesEnabled) { + GLuint nr_planes = 6 + brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled); + nr_clip_regs = (nr_planes * 4 + 15) / 16; + } + + + total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs; + + /* This can happen - what to do? Probably rather than falling + * back, the best thing to do is emit programs which code the + * constants as immediate values. Could do this either as a static + * cap on WM and VS, or adaptively. + * + * Unfortunately, this is currently dependent on the results of the + * program generation process (in the case of wm), so this would + * introduce the need to re-generate programs in the event of a + * curbe allocation failure. + */ + /* Max size is 32 - just large enough to + * hold the 128 parameters allowed by + * the fragment and vertex program + * api's. It's not clear what happens + * when both VP and FP want to use 128 + * parameters, though. + */ + assert(total_regs <= 32); + + /* Lazy resize: + */ + if (nr_fp_regs > brw->curbe.wm_size || + nr_vp_regs > brw->curbe.vs_size || + nr_clip_regs > brw->curbe.clip_size || + (total_regs < brw->curbe.total_size / 4 && + brw->curbe.total_size > 16)) { + + GLuint reg = 0; + + /* Calculate a new layout: + */ + reg = 0; + brw->curbe.wm_start = reg; + brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs; + brw->curbe.clip_start = reg; + brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs; + brw->curbe.vs_start = reg; + brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs; + brw->curbe.total_size = reg; + + if (0) + _mesa_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n", + brw->curbe.wm_start, + brw->curbe.wm_size, + brw->curbe.clip_start, + brw->curbe.clip_size, + brw->curbe.vs_start, + brw->curbe.vs_size ); + + brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS; + } +} + + +const struct brw_tracked_state brw_curbe_offsets = { + .dirty = { + .mesa = _NEW_TRANSFORM, + .brw = BRW_NEW_VERTEX_PROGRAM, + .cache = CACHE_NEW_WM_PROG + }, + .update = calculate_curbe_offsets +}; + + + + +/* Define the number of curbes within CS's urb allocation. Multiple + * urb entries -> multiple curbes. These will be used by + * fixed-function hardware in a double-buffering scheme to avoid a + * pipeline stall each time the contents of the curbe is changed. + */ +void brw_upload_constant_buffer_state(struct brw_context *brw) +{ + struct brw_constant_buffer_state cbs; + memset(&cbs, 0, sizeof(cbs)); + + /* It appears that this is the state packet for the CS unit, ie. the + * urb entries detailed here are housed in the CS range from the + * URB_FENCE command. + */ + cbs.header.opcode = CMD_CONST_BUFFER_STATE; + cbs.header.length = sizeof(cbs)/4 - 2; + + /* BRW_NEW_URB_FENCE */ + cbs.bits0.nr_urb_entries = brw->urb.nr_cs_entries; + cbs.bits0.urb_entry_size = brw->urb.csize - 1; + + assert(brw->urb.nr_cs_entries); + BRW_CACHED_BATCH_STRUCT(brw, &cbs); +} + +#if 0 +const struct brw_tracked_state brw_constant_buffer_state = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_URB_FENCE, + .cache = 0 + }, + .update = brw_upload_constant_buffer_state +}; +#endif + + +static GLfloat fixed_plane[6][4] = { + { 0, 0, -1, 1 }, + { 0, 0, 1, 1 }, + { 0, -1, 0, 1 }, + { 0, 1, 0, 1 }, + {-1, 0, 0, 1 }, + { 1, 0, 0, 1 } +}; + +/* Upload a new set of constants. Too much variability to go into the + * cache mechanism, but maybe would benefit from a comparison against + * the current uploaded set of constants. + */ +static void upload_constant_buffer(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; + struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; + struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL]; + GLuint sz = brw->curbe.total_size; + GLuint bufsz = sz * 16 * sizeof(GLfloat); + GLfloat *buf; + GLuint i; + + /* Update our own dependency flags. This works because this + * function will also be called whenever fp or vp changes. + */ + brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION); + brw->curbe.tracked_state.dirty.mesa |= vp->param_state; + brw->curbe.tracked_state.dirty.mesa |= fp->param_state; + + if (sz == 0) { + struct brw_constant_buffer cb; + cb.header.opcode = CMD_CONST_BUFFER; + cb.header.length = sizeof(cb)/4 - 2; + cb.header.valid = 0; + cb.bits0.buffer_length = 0; + cb.bits0.buffer_address = 0; + BRW_BATCH_STRUCT(brw, &cb); + + if (brw->curbe.last_buf) { + free(brw->curbe.last_buf); + brw->curbe.last_buf = NULL; + brw->curbe.last_bufsz = 0; + } + + return; + } + + buf = (GLfloat *)malloc(bufsz); + + memset(buf, 0, bufsz); + + if (brw->curbe.wm_size) { + GLuint offset = brw->curbe.wm_start * 16; + + _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); + + for (i = 0; i < brw->wm.prog_data->nr_params; i++) + buf[offset + i] = brw->wm.prog_data->param[i][0]; + } + + + /* The clipplanes are actually delivered to both CLIP and VS units. + * VS uses them to calculate the outcode bitmasks. + */ + if (brw->curbe.clip_size) { + GLuint offset = brw->curbe.clip_start * 16; + GLuint j; + + /* If any planes are going this way, send them all this way: + */ + for (i = 0; i < 6; i++) { + buf[offset + i * 4 + 0] = fixed_plane[i][0]; + buf[offset + i * 4 + 1] = fixed_plane[i][1]; + buf[offset + i * 4 + 2] = fixed_plane[i][2]; + buf[offset + i * 4 + 3] = fixed_plane[i][3]; + } + + /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to + * clip-space: + */ + assert(MAX_CLIP_PLANES == 6); + for (j = 0; j < MAX_CLIP_PLANES; j++) { + if (brw->attribs.Transform->ClipPlanesEnabled & (1<<j)) { + buf[offset + i * 4 + 0] = brw->attribs.Transform->_ClipUserPlane[j][0]; + buf[offset + i * 4 + 1] = brw->attribs.Transform->_ClipUserPlane[j][1]; + buf[offset + i * 4 + 2] = brw->attribs.Transform->_ClipUserPlane[j][2]; + buf[offset + i * 4 + 3] = brw->attribs.Transform->_ClipUserPlane[j][3]; + i++; + } + } + } + + + if (brw->curbe.vs_size) { + GLuint offset = brw->curbe.vs_start * 16; + GLuint nr = vp->program.Base.Parameters->NumParameters; + + _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); + + for (i = 0; i < nr; i++) { + buf[offset + i * 4 + 0] = vp->program.Base.Parameters->ParameterValues[i][0]; + buf[offset + i * 4 + 1] = vp->program.Base.Parameters->ParameterValues[i][1]; + buf[offset + i * 4 + 2] = vp->program.Base.Parameters->ParameterValues[i][2]; + buf[offset + i * 4 + 3] = vp->program.Base.Parameters->ParameterValues[i][3]; + } + } + + if (0) { + for (i = 0; i < sz*16; i+=4) + _mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, + buf[i+0], buf[i+1], buf[i+2], buf[i+3]); + + _mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n", + brw->curbe.last_buf, buf, + bufsz, brw->curbe.last_bufsz, + brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); + } + + if (brw->curbe.last_buf && + bufsz == brw->curbe.last_bufsz && + memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { + free(buf); +/* return; */ + } + else { + if (brw->curbe.last_buf) + free(brw->curbe.last_buf); + brw->curbe.last_buf = buf; + brw->curbe.last_bufsz = bufsz; + + + if (!brw_pool_alloc(pool, + bufsz, + 6, + &brw->curbe.gs_offset)) { + _mesa_printf("out of GS memory for curbe\n"); + assert(0); + return; + } + + + /* Copy data to the buffer: + */ + bmBufferSubDataAUB(&brw->intel, + pool->buffer, + brw->curbe.gs_offset, + bufsz, + buf, + DW_CONSTANT_BUFFER, + 0); + } + + /* TODO: only emit the constant_buffer packet when necessary, ie: + - contents have changed + - offset has changed + - hw requirements due to other packets emitted. + */ + { + struct brw_constant_buffer cb; + + memset(&cb, 0, sizeof(cb)); + + cb.header.opcode = CMD_CONST_BUFFER; + cb.header.length = sizeof(cb)/4 - 2; + cb.header.valid = 1; + cb.bits0.buffer_length = sz - 1; + cb.bits0.buffer_address = brw->curbe.gs_offset >> 6; + + /* Because this provokes an action (ie copy the constants into the + * URB), it shouldn't be shortcircuited if identical to the + * previous time - because eg. the urb destination may have + * changed, or the urb contents different to last time. + * + * Note that the data referred to is actually copied internally, + * not just used in place according to passed pointer. + * + * It appears that the CS unit takes care of using each available + * URB entry (Const URB Entry == CURBE) in turn, and issuing + * flushes as necessary when doublebuffering of CURBEs isn't + * possible. + */ +/* intel_batchbuffer_align(brw->intel.batch, 64, sizeof(cb)); */ + BRW_BATCH_STRUCT(brw, &cb); +/* intel_batchbuffer_align(brw->intel.batch, 64, 0); */ + } +} + +/* This tracked state is unique in that the state it monitors varies + * dynamically depending on the parameters tracked by the fragment and + * vertex programs. This is the template used as a starting point, + * each context will maintain a copy of this internally and update as + * required. + */ +const struct brw_tracked_state brw_constant_buffer = { + .dirty = { + .mesa = (_NEW_TRANSFORM|_NEW_PROJECTION), /* plus fp and vp flags */ + .brw = (BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ + BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */ + BRW_NEW_CURBE_OFFSETS), + .cache = (CACHE_NEW_WM_PROG) + }, + .update = upload_constant_buffer +}; + diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h new file mode 100644 index 00000000000..e8f878a7018 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -0,0 +1,850 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#ifndef BRW_DEFINES_H +#define BRW_DEFINES_H + +/* + */ +#define MI_NOOP 0x00 +#define MI_USER_INTERRUPT 0x02 +#define MI_WAIT_FOR_EVENT 0x03 +#define MI_FLUSH 0x04 +#define MI_REPORT_HEAD 0x07 +#define MI_ARB_ON_OFF 0x08 +#define MI_BATCH_BUFFER_END 0x0A +#define MI_OVERLAY_FLIP 0x11 +#define MI_LOAD_SCAN_LINES_INCL 0x12 +#define MI_LOAD_SCAN_LINES_EXCL 0x13 +#define MI_DISPLAY_BUFFER_INFO 0x14 +#define MI_SET_CONTEXT 0x18 +#define MI_STORE_DATA_IMM 0x20 +#define MI_STORE_DATA_INDEX 0x21 +#define MI_LOAD_REGISTER_IMM 0x22 +#define MI_STORE_REGISTER_MEM 0x24 +#define MI_BATCH_BUFFER_START 0x31 + +#define MI_SYNCHRONOUS_FLIP 0x0 +#define MI_ASYNCHRONOUS_FLIP 0x1 + +#define MI_BUFFER_SECURE 0x0 +#define MI_BUFFER_NONSECURE 0x1 + +#define MI_ARBITRATE_AT_CHAIN_POINTS 0x0 +#define MI_ARBITRATE_BETWEEN_INSTS 0x1 +#define MI_NO_ARBITRATION 0x3 + +#define MI_CONDITION_CODE_WAIT_DISABLED 0x0 +#define MI_CONDITION_CODE_WAIT_0 0x1 +#define MI_CONDITION_CODE_WAIT_1 0x2 +#define MI_CONDITION_CODE_WAIT_2 0x3 +#define MI_CONDITION_CODE_WAIT_3 0x4 +#define MI_CONDITION_CODE_WAIT_4 0x5 + +#define MI_DISPLAY_PIPE_A 0x0 +#define MI_DISPLAY_PIPE_B 0x1 + +#define MI_DISPLAY_PLANE_A 0x0 +#define MI_DISPLAY_PLANE_B 0x1 +#define MI_DISPLAY_PLANE_C 0x2 + +#define MI_STANDARD_FLIP 0x0 +#define MI_ENQUEUE_FLIP_PERFORM_BASE_FRAME_NUMBER_LOAD 0x1 +#define MI_ENQUEUE_FLIP_TARGET_FRAME_NUMBER_RELATIVE 0x2 +#define MI_ENQUEUE_FLIP_ABSOLUTE_TARGET_FRAME_NUMBER 0x3 + +#define MI_PHYSICAL_ADDRESS 0x0 +#define MI_VIRTUAL_ADDRESS 0x1 + +#define MI_BUFFER_MEMORY_MAIN 0x0 +#define MI_BUFFER_MEMORY_GTT 0x2 +#define MI_BUFFER_MEMORY_PER_PROCESS_GTT 0x3 + +#define MI_FLIP_CONTINUE 0x0 +#define MI_FLIP_ON 0x1 +#define MI_FLIP_OFF 0x2 + +#define MI_UNTRUSTED_REGISTER_SPACE 0x0 +#define MI_TRUSTED_REGISTER_SPACE 0x1 + +/* 3D state: + */ +#define _3DOP_3DSTATE_PIPELINED 0x0 +#define _3DOP_3DSTATE_NONPIPELINED 0x1 +#define _3DOP_3DCONTROL 0x2 +#define _3DOP_3DPRIMITIVE 0x3 + +#define _3DSTATE_PIPELINED_POINTERS 0x00 +#define _3DSTATE_BINDING_TABLE_POINTERS 0x01 +#define _3DSTATE_VERTEX_BUFFERS 0x08 +#define _3DSTATE_VERTEX_ELEMENTS 0x09 +#define _3DSTATE_INDEX_BUFFER 0x0A +#define _3DSTATE_VF_STATISTICS 0x0B +#define _3DSTATE_DRAWING_RECTANGLE 0x00 +#define _3DSTATE_CONSTANT_COLOR 0x01 +#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02 +#define _3DSTATE_CHROMA_KEY 0x04 +#define _3DSTATE_DEPTH_BUFFER 0x05 +#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06 +#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07 +#define _3DSTATE_LINE_STIPPLE 0x08 +#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09 +#define _3DCONTROL 0x00 +#define _3DPRIMITIVE 0x00 + +#define PIPE_CONTROL_NOWRITE 0x00 +#define PIPE_CONTROL_WRITEIMMEDIATE 0x01 +#define PIPE_CONTROL_WRITEDEPTH 0x02 +#define PIPE_CONTROL_WRITETIMESTAMP 0x03 + +#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00 +#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01 + +#define _3DPRIM_POINTLIST 0x01 +#define _3DPRIM_LINELIST 0x02 +#define _3DPRIM_LINESTRIP 0x03 +#define _3DPRIM_TRILIST 0x04 +#define _3DPRIM_TRISTRIP 0x05 +#define _3DPRIM_TRIFAN 0x06 +#define _3DPRIM_QUADLIST 0x07 +#define _3DPRIM_QUADSTRIP 0x08 +#define _3DPRIM_LINELIST_ADJ 0x09 +#define _3DPRIM_LINESTRIP_ADJ 0x0A +#define _3DPRIM_TRILIST_ADJ 0x0B +#define _3DPRIM_TRISTRIP_ADJ 0x0C +#define _3DPRIM_TRISTRIP_REVERSE 0x0D +#define _3DPRIM_POLYGON 0x0E +#define _3DPRIM_RECTLIST 0x0F +#define _3DPRIM_LINELOOP 0x10 +#define _3DPRIM_POINTLIST_BF 0x11 +#define _3DPRIM_LINESTRIP_CONT 0x12 +#define _3DPRIM_LINESTRIP_BF 0x13 +#define _3DPRIM_LINESTRIP_CONT_BF 0x14 +#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 + +#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0 +#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1 + +#define BRW_ANISORATIO_2 0 +#define BRW_ANISORATIO_4 1 +#define BRW_ANISORATIO_6 2 +#define BRW_ANISORATIO_8 3 +#define BRW_ANISORATIO_10 4 +#define BRW_ANISORATIO_12 5 +#define BRW_ANISORATIO_14 6 +#define BRW_ANISORATIO_16 7 + +#define BRW_BLENDFACTOR_ONE 0x1 +#define BRW_BLENDFACTOR_SRC_COLOR 0x2 +#define BRW_BLENDFACTOR_SRC_ALPHA 0x3 +#define BRW_BLENDFACTOR_DST_ALPHA 0x4 +#define BRW_BLENDFACTOR_DST_COLOR 0x5 +#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 +#define BRW_BLENDFACTOR_CONST_COLOR 0x7 +#define BRW_BLENDFACTOR_CONST_ALPHA 0x8 +#define BRW_BLENDFACTOR_SRC1_COLOR 0x9 +#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A +#define BRW_BLENDFACTOR_ZERO 0x11 +#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12 +#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13 +#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14 +#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15 +#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17 +#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18 +#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19 +#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A + +#define BRW_BLENDFUNCTION_ADD 0 +#define BRW_BLENDFUNCTION_SUBTRACT 1 +#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BRW_BLENDFUNCTION_MIN 3 +#define BRW_BLENDFUNCTION_MAX 4 + +#define BRW_ALPHATEST_FORMAT_UNORM8 0 +#define BRW_ALPHATEST_FORMAT_FLOAT32 1 + +#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0 +#define BRW_CHROMAKEY_REPLACE_BLACK 1 + +#define BRW_CLIP_API_OGL 0 +#define BRW_CLIP_API_DX 1 + +#define BRW_CLIPMODE_NORMAL 0 +#define BRW_CLIPMODE_CLIP_ALL 1 +#define BRW_CLIPMODE_CLIP_NON_REJECTED 2 +#define BRW_CLIPMODE_REJECT_ALL 3 +#define BRW_CLIPMODE_ACCEPT_ALL 4 + +#define BRW_CLIP_NDCSPACE 0 +#define BRW_CLIP_SCREENSPACE 1 + +#define BRW_COMPAREFUNCTION_ALWAYS 0 +#define BRW_COMPAREFUNCTION_NEVER 1 +#define BRW_COMPAREFUNCTION_LESS 2 +#define BRW_COMPAREFUNCTION_EQUAL 3 +#define BRW_COMPAREFUNCTION_LEQUAL 4 +#define BRW_COMPAREFUNCTION_GREATER 5 +#define BRW_COMPAREFUNCTION_NOTEQUAL 6 +#define BRW_COMPAREFUNCTION_GEQUAL 7 + +#define BRW_COVERAGE_PIXELS_HALF 0 +#define BRW_COVERAGE_PIXELS_1 1 +#define BRW_COVERAGE_PIXELS_2 2 +#define BRW_COVERAGE_PIXELS_4 3 + +#define BRW_CULLMODE_BOTH 0 +#define BRW_CULLMODE_NONE 1 +#define BRW_CULLMODE_FRONT 2 +#define BRW_CULLMODE_BACK 3 + +#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0 +#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1 + +#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 +#define BRW_DEPTHFORMAT_D32_FLOAT 1 +#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2 +#define BRW_DEPTHFORMAT_D16_UNORM 5 + +#define BRW_FLOATING_POINT_IEEE_754 0 +#define BRW_FLOATING_POINT_NON_IEEE_754 1 + +#define BRW_FRONTWINDING_CW 0 +#define BRW_FRONTWINDING_CCW 1 + +#define BRW_INDEX_BYTE 0 +#define BRW_INDEX_WORD 1 +#define BRW_INDEX_DWORD 2 + +#define BRW_LOGICOPFUNCTION_CLEAR 0 +#define BRW_LOGICOPFUNCTION_NOR 1 +#define BRW_LOGICOPFUNCTION_AND_INVERTED 2 +#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3 +#define BRW_LOGICOPFUNCTION_AND_REVERSE 4 +#define BRW_LOGICOPFUNCTION_INVERT 5 +#define BRW_LOGICOPFUNCTION_XOR 6 +#define BRW_LOGICOPFUNCTION_NAND 7 +#define BRW_LOGICOPFUNCTION_AND 8 +#define BRW_LOGICOPFUNCTION_EQUIV 9 +#define BRW_LOGICOPFUNCTION_NOOP 10 +#define BRW_LOGICOPFUNCTION_OR_INVERTED 11 +#define BRW_LOGICOPFUNCTION_COPY 12 +#define BRW_LOGICOPFUNCTION_OR_REVERSE 13 +#define BRW_LOGICOPFUNCTION_OR 14 +#define BRW_LOGICOPFUNCTION_SET 15 + +#define BRW_MAPFILTER_NEAREST 0x0 +#define BRW_MAPFILTER_LINEAR 0x1 +#define BRW_MAPFILTER_ANISOTROPIC 0x2 + +#define BRW_MIPFILTER_NONE 0 +#define BRW_MIPFILTER_NEAREST 1 +#define BRW_MIPFILTER_LINEAR 3 + +#define BRW_POLYGON_FRONT_FACING 0 +#define BRW_POLYGON_BACK_FACING 1 + +#define BRW_PREFILTER_ALWAYS 0x0 +#define BRW_PREFILTER_NEVER 0x1 +#define BRW_PREFILTER_LESS 0x2 +#define BRW_PREFILTER_EQUAL 0x3 +#define BRW_PREFILTER_LEQUAL 0x4 +#define BRW_PREFILTER_GREATER 0x5 +#define BRW_PREFILTER_NOTEQUAL 0x6 +#define BRW_PREFILTER_GEQUAL 0x7 + +#define BRW_PROVOKING_VERTEX_0 0 +#define BRW_PROVOKING_VERTEX_1 1 +#define BRW_PROVOKING_VERTEX_2 2 + +#define BRW_RASTRULE_UPPER_LEFT 0 +#define BRW_RASTRULE_UPPER_RIGHT 1 + +#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0 +#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1 +#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2 + +#define BRW_STENCILOP_KEEP 0 +#define BRW_STENCILOP_ZERO 1 +#define BRW_STENCILOP_REPLACE 2 +#define BRW_STENCILOP_INCRSAT 3 +#define BRW_STENCILOP_DECRSAT 4 +#define BRW_STENCILOP_INCR 5 +#define BRW_STENCILOP_DECR 6 +#define BRW_STENCILOP_INVERT 7 + +#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0 +#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1 + +#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 +#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001 +#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002 +#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003 +#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004 +#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005 +#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 +#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 +#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008 +#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040 +#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041 +#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042 +#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043 +#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044 +#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045 +#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046 +#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080 +#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081 +#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082 +#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083 +#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 +#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085 +#define BRW_SURFACEFORMAT_R32G32_SINT 0x086 +#define BRW_SURFACEFORMAT_R32G32_UINT 0x087 +#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 +#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 +#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A +#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B +#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C +#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D +#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E +#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F +#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090 +#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091 +#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092 +#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 +#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094 +#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095 +#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 +#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 +#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 +#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 +#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA +#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB +#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC +#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD +#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE +#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF +#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 +#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 +#define BRW_SURFACEFORMAT_R32_SINT 0x0D6 +#define BRW_SURFACEFORMAT_R32_UINT 0x0D7 +#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8 +#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 +#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA +#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF +#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0 +#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1 +#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2 +#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3 +#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4 +#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC +#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED +#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE +#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0 +#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1 +#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2 +#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 +#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 +#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 +#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6 +#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7 +#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8 +#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 +#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106 +#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107 +#define BRW_SURFACEFORMAT_R8G8_SINT 0x108 +#define BRW_SURFACEFORMAT_R8G8_UINT 0x109 +#define BRW_SURFACEFORMAT_R16_UNORM 0x10A +#define BRW_SURFACEFORMAT_R16_SNORM 0x10B +#define BRW_SURFACEFORMAT_R16_SINT 0x10C +#define BRW_SURFACEFORMAT_R16_UINT 0x10D +#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E +#define BRW_SURFACEFORMAT_I16_UNORM 0x111 +#define BRW_SURFACEFORMAT_L16_UNORM 0x112 +#define BRW_SURFACEFORMAT_A16_UNORM 0x113 +#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114 +#define BRW_SURFACEFORMAT_I16_FLOAT 0x115 +#define BRW_SURFACEFORMAT_L16_FLOAT 0x116 +#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 +#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B +#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C +#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D +#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E +#define BRW_SURFACEFORMAT_R16_USCALED 0x11F +#define BRW_SURFACEFORMAT_R8_UNORM 0x140 +#define BRW_SURFACEFORMAT_R8_SNORM 0x141 +#define BRW_SURFACEFORMAT_R8_SINT 0x142 +#define BRW_SURFACEFORMAT_R8_UINT 0x143 +#define BRW_SURFACEFORMAT_A8_UNORM 0x144 +#define BRW_SURFACEFORMAT_I8_UNORM 0x145 +#define BRW_SURFACEFORMAT_L8_UNORM 0x146 +#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147 +#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148 +#define BRW_SURFACEFORMAT_R8_SSCALED 0x149 +#define BRW_SURFACEFORMAT_R8_USCALED 0x14A +#define BRW_SURFACEFORMAT_R1_UINT 0x181 +#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182 +#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 +#define BRW_SURFACEFORMAT_BC1_UNORM 0x186 +#define BRW_SURFACEFORMAT_BC2_UNORM 0x187 +#define BRW_SURFACEFORMAT_BC3_UNORM 0x188 +#define BRW_SURFACEFORMAT_BC4_UNORM 0x189 +#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A +#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B +#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C +#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D +#define BRW_SURFACEFORMAT_MONO8 0x18E +#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F +#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190 +#define BRW_SURFACEFORMAT_DXT1_RGB 0x191 +#define BRW_SURFACEFORMAT_FXT1 0x192 +#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193 +#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194 +#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195 +#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196 +#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 +#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198 +#define BRW_SURFACEFORMAT_BC4_SNORM 0x199 +#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A +#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C +#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D +#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E +#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F + +#define BRW_SURFACERETURNFORMAT_FLOAT32 0 +#define BRW_SURFACERETURNFORMAT_S1 1 + +#define BRW_SURFACE_1D 0 +#define BRW_SURFACE_2D 1 +#define BRW_SURFACE_3D 2 +#define BRW_SURFACE_CUBE 3 +#define BRW_SURFACE_BUFFER 4 +#define BRW_SURFACE_NULL 7 + +#define BRW_TEXCOORDMODE_WRAP 0 +#define BRW_TEXCOORDMODE_MIRROR 1 +#define BRW_TEXCOORDMODE_CLAMP 2 +#define BRW_TEXCOORDMODE_CUBE 3 +#define BRW_TEXCOORDMODE_CLAMP_BORDER 4 +#define BRW_TEXCOORDMODE_MIRROR_ONCE 5 + +#define BRW_THREAD_PRIORITY_NORMAL 0 +#define BRW_THREAD_PRIORITY_HIGH 1 + +#define BRW_TILEWALK_XMAJOR 0 +#define BRW_TILEWALK_YMAJOR 1 + +#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0 +#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1 + +#define BRW_VERTEXBUFFER_ACCESS_VERTEXDATA 0 +#define BRW_VERTEXBUFFER_ACCESS_INSTANCEDATA 1 + +#define BRW_VFCOMPONENT_NOSTORE 0 +#define BRW_VFCOMPONENT_STORE_SRC 1 +#define BRW_VFCOMPONENT_STORE_0 2 +#define BRW_VFCOMPONENT_STORE_1_FLT 3 +#define BRW_VFCOMPONENT_STORE_1_INT 4 +#define BRW_VFCOMPONENT_STORE_VID 5 +#define BRW_VFCOMPONENT_STORE_IID 6 +#define BRW_VFCOMPONENT_STORE_PID 7 + + + +/* Execution Unit (EU) defines + */ + +#define BRW_ALIGN_1 0 +#define BRW_ALIGN_16 1 + +#define BRW_ADDRESS_DIRECT 0 +#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1 + +#define BRW_CHANNEL_X 0 +#define BRW_CHANNEL_Y 1 +#define BRW_CHANNEL_Z 2 +#define BRW_CHANNEL_W 3 + +#define BRW_COMPRESSION_NONE 0 +#define BRW_COMPRESSION_2NDHALF 1 +#define BRW_COMPRESSION_COMPRESSED 2 + +#define BRW_CONDITIONAL_NONE 0 +#define BRW_CONDITIONAL_Z 1 +#define BRW_CONDITIONAL_NZ 2 +#define BRW_CONDITIONAL_EQ 1 /* Z */ +#define BRW_CONDITIONAL_NEQ 2 /* NZ */ +#define BRW_CONDITIONAL_G 3 +#define BRW_CONDITIONAL_GE 4 +#define BRW_CONDITIONAL_L 5 +#define BRW_CONDITIONAL_LE 6 +#define BRW_CONDITIONAL_C 7 +#define BRW_CONDITIONAL_O 8 + +#define BRW_DEBUG_NONE 0 +#define BRW_DEBUG_BREAKPOINT 1 + +#define BRW_DEPENDENCY_NORMAL 0 +#define BRW_DEPENDENCY_NOTCLEARED 1 +#define BRW_DEPENDENCY_NOTCHECKED 2 +#define BRW_DEPENDENCY_DISABLE 3 + +#define BRW_EXECUTE_1 0 +#define BRW_EXECUTE_2 1 +#define BRW_EXECUTE_4 2 +#define BRW_EXECUTE_8 3 +#define BRW_EXECUTE_16 4 +#define BRW_EXECUTE_32 5 + +#define BRW_HORIZONTAL_STRIDE_0 0 +#define BRW_HORIZONTAL_STRIDE_1 1 +#define BRW_HORIZONTAL_STRIDE_2 2 +#define BRW_HORIZONTAL_STRIDE_4 3 + +#define BRW_INSTRUCTION_NORMAL 0 +#define BRW_INSTRUCTION_SATURATE 1 + +#define BRW_MASK_ENABLE 0 +#define BRW_MASK_DISABLE 1 + +#define BRW_OPCODE_MOV 1 +#define BRW_OPCODE_SEL 2 +#define BRW_OPCODE_NOT 4 +#define BRW_OPCODE_AND 5 +#define BRW_OPCODE_OR 6 +#define BRW_OPCODE_XOR 7 +#define BRW_OPCODE_SHR 8 +#define BRW_OPCODE_SHL 9 +#define BRW_OPCODE_RSR 10 +#define BRW_OPCODE_RSL 11 +#define BRW_OPCODE_ASR 12 +#define BRW_OPCODE_CMP 16 +#define BRW_OPCODE_JMPI 32 +#define BRW_OPCODE_IF 34 +#define BRW_OPCODE_IFF 35 +#define BRW_OPCODE_ELSE 36 +#define BRW_OPCODE_ENDIF 37 +#define BRW_OPCODE_DO 38 +#define BRW_OPCODE_WHILE 39 +#define BRW_OPCODE_BREAK 40 +#define BRW_OPCODE_CONTINUE 41 +#define BRW_OPCODE_HALT 42 +#define BRW_OPCODE_MSAVE 44 +#define BRW_OPCODE_MRESTORE 45 +#define BRW_OPCODE_PUSH 46 +#define BRW_OPCODE_POP 47 +#define BRW_OPCODE_WAIT 48 +#define BRW_OPCODE_SEND 49 +#define BRW_OPCODE_ADD 64 +#define BRW_OPCODE_MUL 65 +#define BRW_OPCODE_AVG 66 +#define BRW_OPCODE_FRC 67 +#define BRW_OPCODE_RNDU 68 +#define BRW_OPCODE_RNDD 69 +#define BRW_OPCODE_RNDE 70 +#define BRW_OPCODE_RNDZ 71 +#define BRW_OPCODE_MAC 72 +#define BRW_OPCODE_MACH 73 +#define BRW_OPCODE_LZD 74 +#define BRW_OPCODE_SAD2 80 +#define BRW_OPCODE_SADA2 81 +#define BRW_OPCODE_DP4 84 +#define BRW_OPCODE_DPH 85 +#define BRW_OPCODE_DP3 86 +#define BRW_OPCODE_DP2 87 +#define BRW_OPCODE_DPA2 88 +#define BRW_OPCODE_LINE 89 +#define BRW_OPCODE_NOP 126 + +#define BRW_PREDICATE_NONE 0 +#define BRW_PREDICATE_NORMAL 1 +#define BRW_PREDICATE_ALIGN1_ANYV 2 +#define BRW_PREDICATE_ALIGN1_ALLV 3 +#define BRW_PREDICATE_ALIGN1_ANY2H 4 +#define BRW_PREDICATE_ALIGN1_ALL2H 5 +#define BRW_PREDICATE_ALIGN1_ANY4H 6 +#define BRW_PREDICATE_ALIGN1_ALL4H 7 +#define BRW_PREDICATE_ALIGN1_ANY8H 8 +#define BRW_PREDICATE_ALIGN1_ALL8H 9 +#define BRW_PREDICATE_ALIGN1_ANY16H 10 +#define BRW_PREDICATE_ALIGN1_ALL16H 11 +#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4 +#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5 +#define BRW_PREDICATE_ALIGN16_ANY4H 6 +#define BRW_PREDICATE_ALIGN16_ALL4H 7 + +#define BRW_ARCHITECTURE_REGISTER_FILE 0 +#define BRW_GENERAL_REGISTER_FILE 1 +#define BRW_MESSAGE_REGISTER_FILE 2 +#define BRW_IMMEDIATE_VALUE 3 + +#define BRW_REGISTER_TYPE_UD 0 +#define BRW_REGISTER_TYPE_D 1 +#define BRW_REGISTER_TYPE_UW 2 +#define BRW_REGISTER_TYPE_W 3 +#define BRW_REGISTER_TYPE_UB 4 +#define BRW_REGISTER_TYPE_B 5 +#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ +#define BRW_REGISTER_TYPE_HF 6 +#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ +#define BRW_REGISTER_TYPE_F 7 + +#define BRW_ARF_NULL 0x00 +#define BRW_ARF_ADDRESS 0x10 +#define BRW_ARF_ACCUMULATOR 0x20 +#define BRW_ARF_FLAG 0x30 +#define BRW_ARF_MASK 0x40 +#define BRW_ARF_MASK_STACK 0x50 +#define BRW_ARF_MASK_STACK_DEPTH 0x60 +#define BRW_ARF_STATE 0x70 +#define BRW_ARF_CONTROL 0x80 +#define BRW_ARF_NOTIFICATION_COUNT 0x90 +#define BRW_ARF_IP 0xA0 + +#define BRW_AMASK 0 +#define BRW_IMASK 1 +#define BRW_LMASK 2 +#define BRW_CMASK 3 + + + +#define BRW_THREAD_NORMAL 0 +#define BRW_THREAD_ATOMIC 1 +#define BRW_THREAD_SWITCH 2 + +#define BRW_VERTICAL_STRIDE_0 0 +#define BRW_VERTICAL_STRIDE_1 1 +#define BRW_VERTICAL_STRIDE_2 2 +#define BRW_VERTICAL_STRIDE_4 3 +#define BRW_VERTICAL_STRIDE_8 4 +#define BRW_VERTICAL_STRIDE_16 5 +#define BRW_VERTICAL_STRIDE_32 6 +#define BRW_VERTICAL_STRIDE_64 7 +#define BRW_VERTICAL_STRIDE_128 8 +#define BRW_VERTICAL_STRIDE_256 9 +#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF + +#define BRW_WIDTH_1 0 +#define BRW_WIDTH_2 1 +#define BRW_WIDTH_4 2 +#define BRW_WIDTH_8 3 +#define BRW_WIDTH_16 4 + +#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0 +#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1 +#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2 +#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3 +#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4 +#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5 +#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6 +#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7 +#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8 +#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9 +#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10 +#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11 + +#define BRW_POLYGON_FACING_FRONT 0 +#define BRW_POLYGON_FACING_BACK 1 + +#define BRW_MESSAGE_TARGET_NULL 0 +#define BRW_MESSAGE_TARGET_MATH 1 +#define BRW_MESSAGE_TARGET_SAMPLER 2 +#define BRW_MESSAGE_TARGET_GATEWAY 3 +#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 +#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 +#define BRW_MESSAGE_TARGET_URB 6 +#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7 + +#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 +#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 +#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3 + +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 + +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 +#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 +#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 +#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 + +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 + +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 + +#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 +#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 + +#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 +#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 +#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 + +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 + +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 +#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 +#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 +#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 + +#define BRW_MATH_FUNCTION_INV 1 +#define BRW_MATH_FUNCTION_LOG 2 +#define BRW_MATH_FUNCTION_EXP 3 +#define BRW_MATH_FUNCTION_SQRT 4 +#define BRW_MATH_FUNCTION_RSQ 5 +#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ +#define BRW_MATH_FUNCTION_COS 7 /* was 8 */ +#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ +#define BRW_MATH_FUNCTION_TAN 9 +#define BRW_MATH_FUNCTION_POW 10 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 +#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13 + +#define BRW_MATH_INTEGER_UNSIGNED 0 +#define BRW_MATH_INTEGER_SIGNED 1 + +#define BRW_MATH_PRECISION_FULL 0 +#define BRW_MATH_PRECISION_PARTIAL 1 + +#define BRW_MATH_SATURATE_NONE 0 +#define BRW_MATH_SATURATE_SATURATE 1 + +#define BRW_MATH_DATA_VECTOR 0 +#define BRW_MATH_DATA_SCALAR 1 + +#define BRW_URB_OPCODE_WRITE 0 + +#define BRW_URB_SWIZZLE_NONE 0 +#define BRW_URB_SWIZZLE_INTERLEAVE 1 +#define BRW_URB_SWIZZLE_TRANSPOSE 2 + +#define BRW_SCRATCH_SPACE_SIZE_1K 0 +#define BRW_SCRATCH_SPACE_SIZE_2K 1 +#define BRW_SCRATCH_SPACE_SIZE_4K 2 +#define BRW_SCRATCH_SPACE_SIZE_8K 3 +#define BRW_SCRATCH_SPACE_SIZE_16K 4 +#define BRW_SCRATCH_SPACE_SIZE_32K 5 +#define BRW_SCRATCH_SPACE_SIZE_64K 6 +#define BRW_SCRATCH_SPACE_SIZE_128K 7 +#define BRW_SCRATCH_SPACE_SIZE_256K 8 +#define BRW_SCRATCH_SPACE_SIZE_512K 9 +#define BRW_SCRATCH_SPACE_SIZE_1M 10 +#define BRW_SCRATCH_SPACE_SIZE_2M 11 + + + + +#define CMD_URB_FENCE 0x6000 +#define CMD_CONST_BUFFER_STATE 0x6001 +#define CMD_CONST_BUFFER 0x6002 + +#define CMD_STATE_BASE_ADDRESS 0x6101 +#define CMD_STATE_INSN_POINTER 0x6102 +#define CMD_PIPELINE_SELECT 0x6104 + +#define CMD_PIPELINED_STATE_POINTERS 0x7800 +#define CMD_BINDING_TABLE_PTRS 0x7801 +#define CMD_VERTEX_BUFFER 0x7808 +#define CMD_VERTEX_ELEMENT 0x7809 +#define CMD_INDEX_BUFFER 0x780a +#define CMD_VF_STATISTICS 0x780b + +#define CMD_DRAW_RECT 0x7900 +#define CMD_BLEND_CONSTANT_COLOR 0x7901 +#define CMD_CHROMA_KEY 0x7904 +#define CMD_DEPTH_BUFFER 0x7905 +#define CMD_POLY_STIPPLE_OFFSET 0x7906 +#define CMD_POLY_STIPPLE_PATTERN 0x7907 +#define CMD_LINE_STIPPLE_PATTERN 0x7908 +#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 + +#define CMD_PIPE_CONTROL 0x7a00 + +#define CMD_3D_PRIM 0x7b00 + +#define CMD_MI_FLUSH 0x0200 + + +/* Various values from the R0 vertex header: + */ +#define R02_PRIM_END 0x1 +#define R02_PRIM_START 0x2 + + + +#endif diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c new file mode 100644 index 00000000000..f12fb4c7f3e --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -0,0 +1,457 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdlib.h> + +#include "glheader.h" +#include "context.h" +#include "state.h" +#include "api_validate.h" +#include "enums.h" + +#include "brw_draw.h" +#include "brw_defines.h" +#include "brw_attrib.h" +#include "brw_context.h" +#include "brw_aub.h" +#include "brw_state.h" +#include "brw_fallback.h" + +#include "intel_ioctl.h" +#include "intel_batchbuffer.h" +#include "intel_buffer_objects.h" + + + + + + +static GLuint hw_prim[GL_POLYGON+1] = { + _3DPRIM_POINTLIST, + _3DPRIM_LINELIST, + _3DPRIM_LINELOOP, + _3DPRIM_LINESTRIP, + _3DPRIM_TRILIST, + _3DPRIM_TRISTRIP, + _3DPRIM_TRIFAN, + _3DPRIM_QUADLIST, + _3DPRIM_QUADSTRIP, + _3DPRIM_POLYGON +}; + + +static const GLenum reduced_prim[GL_POLYGON+1] = { + GL_POINTS, + GL_LINES, + GL_LINES, + GL_LINES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES +}; + + +/* When the primitive changes, set a state bit and re-validate. Not + * the nicest and would rather deal with this by having all the + * programs be immune to the active primitive (ie. cope with all + * possibilities). That may not be realistic however. + */ +static GLuint brw_set_prim(struct brw_context *brw, GLenum prim) +{ + if (INTEL_DEBUG & DEBUG_PRIMS) + _mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim)); + + /* Slight optimization to avoid the GS program when not needed: + */ + if (prim == GL_QUAD_STRIP && + brw->attribs.Light->ShadeModel != GL_FLAT && + brw->attribs.Polygon->FrontMode == GL_FILL && + brw->attribs.Polygon->BackMode == GL_FILL) + prim = GL_TRIANGLE_STRIP; + + if (prim != brw->primitive) { + brw->primitive = prim; + brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; + + if (reduced_prim[prim] != brw->intel.reduced_primitive) { + brw->intel.reduced_primitive = reduced_prim[prim]; + brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE; + } + + brw_validate_state(brw); + } + + return hw_prim[prim]; +} + + +static GLuint trim(GLenum prim, GLuint length) +{ + if (prim == GL_QUAD_STRIP) + return length > 3 ? (length - length % 2) : 0; + else if (prim == GL_QUADS) + return length - length % 4; + else + return length; +} + + + + +static void brw_emit_prim( struct brw_context *brw, + const struct brw_draw_prim *prim ) + +{ + struct brw_3d_primitive prim_packet; + + if (INTEL_DEBUG & DEBUG_PRIMS) + _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), + prim->start, prim->count); + + prim_packet.header.opcode = CMD_3D_PRIM; + prim_packet.header.length = sizeof(prim_packet)/4 - 2; + prim_packet.header.pad = 0; + prim_packet.header.topology = brw_set_prim(brw, prim->mode); + prim_packet.header.indexed = prim->indexed; + + prim_packet.verts_per_instance = trim(prim->mode, prim->count); + prim_packet.start_vert_location = prim->start; + prim_packet.instance_count = 1; + prim_packet.start_instance_location = 0; + prim_packet.base_vert_location = 0; + + if (prim_packet.verts_per_instance) { + intel_batchbuffer_data( brw->intel.batch, &prim_packet, sizeof(prim_packet), + INTEL_BATCH_CLIPRECTS); + } +} + + + +static void update_current_size( struct gl_client_array *array) +{ + const GLfloat *ptr = (const GLfloat *)array->Ptr; + + assert(array->StrideB == 0); + assert(array->Type == GL_FLOAT || array->Type == GL_UNSIGNED_BYTE); + + if (ptr[3] != 1.0) + array->Size = 4; + else if (ptr[2] != 0.0) + array->Size = 3; + else if (ptr[1] != 0.0) + array->Size = 2; + else + array->Size = 1; +} + + + +/* Fill in any gaps in passed arrays with pointers to current + * attributes: + */ +static void brw_merge_inputs( struct brw_context *brw, + const struct gl_client_array *arrays[]) +{ + struct gl_client_array *current_values = brw->vb.current_values; + struct brw_vertex_element *inputs = brw->vb.inputs; + struct brw_vertex_info old = brw->vb.info; + GLuint i; + + memset(inputs, 0, sizeof(*inputs)); + memset(&brw->vb.info, 0, sizeof(brw->vb.info)); + + for (i = 0; i < BRW_ATTRIB_MAX; i++) { + if (arrays[i] && arrays[i]->Enabled) + { + brw->vb.inputs[i].glarray = arrays[i]; + brw->vb.info.varying |= 1 << i; + } + else + { + brw->vb.inputs[i].glarray = ¤t_values[i]; + update_current_size(¤t_values[i]); + } + + brw->vb.info.sizes[i/16] |= (inputs[i].glarray->Size - 1) << ((i%16) * 2); + } + + /* Raise statechanges if input sizes and varying have changed: + */ + if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0) + brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS; + + if (brw->vb.info.varying != old.varying) + brw->state.dirty.brw |= BRW_NEW_INPUT_VARYING; +} + +static GLboolean check_fallbacks( struct brw_context *brw, + const struct brw_draw_prim *prim, + GLuint nr_prims ) +{ + GLuint i; + + if (!brw->intel.strict_conformance) + return GL_FALSE; + + if (brw->attribs.Polygon->SmoothFlag) { + for (i = 0; i < nr_prims; i++) + if (reduced_prim[prim[i].mode] == GL_TRIANGLES) + return GL_TRUE; + } + + /* BRW hardware will do AA lines, but they are non-conformant it + * seems. TBD whether we keep this fallback: + */ + if (brw->attribs.Line->SmoothFlag) { + for (i = 0; i < nr_prims; i++) + if (reduced_prim[prim[i].mode] == GL_LINES) + return GL_TRUE; + } + + /* Stipple -- these fallbacks could be resolved with a little + * bit of work? + */ + if (brw->attribs.Line->StippleFlag) { + for (i = 0; i < nr_prims; i++) { + /* GS doesn't get enough information to know when to reset + * the stipple counter?!? + */ + if (prim[i].mode == GL_LINE_LOOP) + return GL_TRUE; + + if (prim[i].mode == GL_POLYGON && + (brw->attribs.Polygon->FrontMode == GL_LINE || + brw->attribs.Polygon->BackMode == GL_LINE)) + return GL_TRUE; + } + } + + + if (brw->attribs.Point->SmoothFlag) { + for (i = 0; i < nr_prims; i++) + if (prim[i].mode == GL_POINTS) + return GL_TRUE; + } + + return GL_FALSE; +} + + +static GLboolean brw_try_draw_prims( GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct brw_draw_prim *prim, + GLuint nr_prims, + const struct brw_draw_index_buffer *ib, + GLuint min_index, + GLuint max_index, + GLuint flags ) +{ + struct intel_context *intel = intel_context(ctx); + struct brw_context *brw = brw_context(ctx); + GLboolean retval = GL_FALSE; + GLuint i; + + if (ctx->NewState) + _mesa_update_state( ctx ); + + /* Bind all inputs, derive varying and size information: + */ + brw_merge_inputs( brw, arrays ); + + /* Have to validate state quite late. Will rebuild tnl_program, + * which depends on varying information. + * + * Note this is where brw->vs->prog_data.inputs_read is calculated, + * so can't access it earlier. + */ + + LOCK_HARDWARE(intel); + { + assert(intel->locked); + + /* Set the first primitive early, ahead of validate_state: + */ + brw_set_prim(brw, prim[0].mode); + + /* XXX: Need to separate validate and upload of state. + */ + brw_validate_state( brw ); + + /* Various fallback checks: + */ + if (brw->intel.Fallback) + goto out; + + if (check_fallbacks( brw, prim, nr_prims )) + goto out; + + /* Upload index, vertex data: + */ + if (ib) + brw_upload_indices( brw, ib ); + + if (!brw_upload_vertices( brw, min_index, max_index)) { + goto out; + } + + /* Emit prims to batchbuffer: + */ + for (i = 0; i < nr_prims; i++) { + brw_emit_prim(brw, &prim[i]); + } + + retval = GL_TRUE; + } + + out: + + /* Currently have to do this to synchronize with the map/unmap of + * the vertex buffer in brw_exec_api.c. Not sure if there is any + * way around this, as not every flush is due to a buffer filling + * up. + */ + if (!intel_batchbuffer_flush( brw->intel.batch )) { + DBG("%s intel_batchbuffer_flush failed\n", __FUNCTION__); + retval = GL_FALSE; + } + + if (retval && intel->thrashing) { + bmSetFence(intel); + } + + /* Free any old data so it doesn't clog up texture memory - we + * won't be referencing it again. + */ + while (brw->vb.upload.wrap != brw->vb.upload.buf) { + ctx->Driver.BufferData(ctx, + GL_ARRAY_BUFFER_ARB, + BRW_UPLOAD_INIT_SIZE, + NULL, + GL_DYNAMIC_DRAW_ARB, + brw->vb.upload.vbo[brw->vb.upload.wrap]); + brw->vb.upload.wrap++; + brw->vb.upload.wrap %= BRW_NR_UPLOAD_BUFS; + } + + UNLOCK_HARDWARE(intel); + + if (!retval) + DBG("%s failed\n", __FUNCTION__); + + return retval; +} + + +GLboolean brw_draw_prims( GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct brw_draw_prim *prim, + GLuint nr_prims, + const struct brw_draw_index_buffer *ib, + GLuint min_index, + GLuint max_index, + GLuint flags ) +{ + struct intel_context *intel = intel_context(ctx); + GLboolean retval; + + retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index, flags); + + + if (!retval && bmError(intel)) { + + DBG("retrying\n"); + /* This looks like out-of-memory but potentially we have + * situation where there is enough memory but it has become + * fragmented. Clear out all heaps and start from scratch by + * faking a contended lock event: (done elsewhere) + */ + + /* Then try a second time only to upload textures and draw the + * primitives: + */ + retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index, flags); + } + + if (intel->aub_file) { + intelFinish( &intel->ctx ); + intel->aub_wrap = 1; + } + + + return retval; +} + + +static void brw_invalidate_vbo_cb( struct intel_context *intel, void *ptr ) +{ + /* nothing to do, we don't rely on the contents being preserved */ +} + + +void brw_draw_init( struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + GLuint i; + + brw->vb.upload.size = BRW_UPLOAD_INIT_SIZE; + + for (i = 0; i < BRW_NR_UPLOAD_BUFS; i++) { + brw->vb.upload.vbo[i] = ctx->Driver.NewBufferObject(ctx, 1, GL_ARRAY_BUFFER_ARB); + + /* XXX: Set these to no-backing-store + */ + bmBufferSetInvalidateCB(&brw->intel, + intel_bufferobj_buffer(intel_buffer_object(brw->vb.upload.vbo[i])), + brw_invalidate_vbo_cb, + &brw->intel, + GL_TRUE); + + } + + ctx->Driver.BufferData( ctx, + GL_ARRAY_BUFFER_ARB, + BRW_UPLOAD_INIT_SIZE, + NULL, + GL_DYNAMIC_DRAW_ARB, + brw->vb.upload.vbo[0] ); + + + brw_init_current_values(ctx, brw->vb.current_values); +} + +void brw_draw_destroy( struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + GLuint i; + + for (i = 0; i < BRW_NR_UPLOAD_BUFS; i++) + ctx->Driver.DeleteBuffer(ctx, brw->vb.upload.vbo[i]); +} diff --git a/src/mesa/drivers/dri/i965/brw_draw.h b/src/mesa/drivers/dri/i965/brw_draw.h new file mode 100644 index 00000000000..92640bf725b --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_draw.h @@ -0,0 +1,104 @@ + /************************************************************************** + * + * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_DRAW_H +#define BRW_DRAW_H + +#include "mtypes.h" /* for GLcontext... */ +#include "brw_attrib.h" + +struct brw_context; + +struct brw_draw_prim { + GLuint mode:8; + GLuint indexed:1; + GLuint begin:1; + GLuint end:1; + GLuint weak:1; + GLuint pad:20; + + GLuint start; + GLuint count; +}; + +struct brw_draw_index_buffer { + GLuint count; + GLenum type; + struct gl_buffer_object *obj; + const void *ptr; + GLuint rebase; +}; + + +#define BRW_DRAW_SORTED 0x1 +#define BRW_DRAW_ALL_INTERLEAVED 0x2 +#define BRW_DRAW_NON_INTERLEAVED 0x4 +#define BRW_DRAW_LOCKED 0x8 + +GLboolean brw_draw_prims( GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct brw_draw_prim *prims, + GLuint nr_prims, + const struct brw_draw_index_buffer *ib, + GLuint min_index, + GLuint max_index, + GLuint flags ); + +void brw_draw_init( struct brw_context *brw ); +void brw_draw_destroy( struct brw_context *brw ); + +/* brw_draw_current.c + */ +void brw_init_current_values(GLcontext *ctx, + struct gl_client_array *arrays); + + +/* brw_draw_upload.c + */ +void brw_upload_indices( struct brw_context *brw, + const struct brw_draw_index_buffer *index_buffer); + +GLboolean brw_upload_vertices( struct brw_context *brw, + GLuint min_index, + GLuint max_index ); + + +/* Helpers for save, exec. Should probably have their own file: + */ +struct brw_exec_context; +struct brw_save_context; + +struct brw_exec_save { + struct brw_exec_context *exec; + struct brw_save_context *save; +}; + +/* Doesn't really belong here: + */ +#define IMM_CONTEXT(ctx) ((struct brw_exec_save *)((ctx)->swtnl_im)) + +#endif diff --git a/src/mesa/drivers/dri/i965/brw_draw_current.c b/src/mesa/drivers/dri/i965/brw_draw_current.c new file mode 100644 index 00000000000..98d930738e5 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_draw_current.c @@ -0,0 +1,103 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdlib.h> + +#include "glheader.h" +#include "context.h" +#include "state.h" +#include "api_validate.h" +#include "enums.h" + +#include "brw_context.h" +#include "brw_draw.h" + +#include "bufmgr.h" +#include "intel_buffer_objects.h" + + +void brw_init_current_values(GLcontext *ctx, + struct gl_client_array *arrays) +{ + GLuint i; + + memset(arrays, 0, sizeof(*arrays) * BRW_ATTRIB_MAX); + + /* Set up a constant (StrideB == 0) array for each current + * attribute: + */ + for (i = 0; i < BRW_ATTRIB_MAX; i++) { + struct gl_client_array *cl = &arrays[i]; + + switch (i) { + case BRW_ATTRIB_MAT_FRONT_SHININESS: + case BRW_ATTRIB_MAT_BACK_SHININESS: + case BRW_ATTRIB_INDEX: + case BRW_ATTRIB_EDGEFLAG: + cl->Size = 1; + break; + case BRW_ATTRIB_MAT_FRONT_INDEXES: + case BRW_ATTRIB_MAT_BACK_INDEXES: + cl->Size = 3; + break; + default: + /* This is fixed for the material attributes, for others will + * be determined at runtime: + */ + if (i >= BRW_ATTRIB_MAT_FRONT_AMBIENT) + cl->Size = 4; + else + cl->Size = 1; + break; + } + + switch (i) { + case BRW_ATTRIB_EDGEFLAG: + cl->Type = GL_UNSIGNED_BYTE; + cl->Ptr = (const void *)&ctx->Current.EdgeFlag; + break; + case BRW_ATTRIB_INDEX: + cl->Type = GL_FLOAT; + cl->Ptr = (const void *)&ctx->Current.Index; + break; + default: + cl->Type = GL_FLOAT; + if (i < BRW_ATTRIB_FIRST_MATERIAL) + cl->Ptr = (const void *)ctx->Current.Attrib[i]; + else + cl->Ptr = (const void *)ctx->Light.Material.Attrib[i - BRW_ATTRIB_FIRST_MATERIAL]; + break; + } + + cl->Stride = 0; + cl->StrideB = 0; + cl->Enabled = 1; + cl->Flags = 0; + cl->BufferObj = ctx->Array.NullBufferObj; + } +} + diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c new file mode 100644 index 00000000000..8c6b5a6d2c4 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -0,0 +1,678 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdlib.h> + +#include "glheader.h" +#include "context.h" +#include "state.h" +#include "api_validate.h" +#include "enums.h" + +#include "brw_draw.h" +#include "brw_defines.h" +#include "brw_attrib.h" +#include "brw_context.h" +#include "brw_aub.h" +#include "brw_state.h" +#include "brw_fallback.h" + +#include "intel_ioctl.h" +#include "intel_batchbuffer.h" +#include "intel_buffer_objects.h" + + +struct brw_array_state { + union header_union header; + + struct { + union { + struct { + GLuint pitch:11; + GLuint pad:15; + GLuint access_type:1; + GLuint vb_index:5; + } bits; + GLuint dword; + } vb0; + + struct buffer *buffer; + GLuint offset; + + GLuint max_index; + GLuint instance_data_step_rate; + + } vb[BRW_VBP_MAX]; +}; + + +static struct buffer *array_buffer( const struct gl_client_array *array ) +{ + return intel_bufferobj_buffer(intel_buffer_object(array->BufferObj)); +} + +static GLuint double_types[5] = { + 0, + BRW_SURFACEFORMAT_R64_FLOAT, + BRW_SURFACEFORMAT_R64G64_FLOAT, + BRW_SURFACEFORMAT_R64G64B64_FLOAT, + BRW_SURFACEFORMAT_R64G64B64A64_FLOAT +}; + +static GLuint float_types[5] = { + 0, + BRW_SURFACEFORMAT_R32_FLOAT, + BRW_SURFACEFORMAT_R32G32_FLOAT, + BRW_SURFACEFORMAT_R32G32B32_FLOAT, + BRW_SURFACEFORMAT_R32G32B32A32_FLOAT +}; + +static GLuint uint_types_norm[5] = { + 0, + BRW_SURFACEFORMAT_R32_UNORM, + BRW_SURFACEFORMAT_R32G32_UNORM, + BRW_SURFACEFORMAT_R32G32B32_UNORM, + BRW_SURFACEFORMAT_R32G32B32A32_UNORM +}; + +static GLuint uint_types_scale[5] = { + 0, + BRW_SURFACEFORMAT_R32_USCALED, + BRW_SURFACEFORMAT_R32G32_USCALED, + BRW_SURFACEFORMAT_R32G32B32_USCALED, + BRW_SURFACEFORMAT_R32G32B32A32_USCALED +}; + +static GLuint int_types_norm[5] = { + 0, + BRW_SURFACEFORMAT_R32_SNORM, + BRW_SURFACEFORMAT_R32G32_SNORM, + BRW_SURFACEFORMAT_R32G32B32_SNORM, + BRW_SURFACEFORMAT_R32G32B32A32_SNORM +}; + +static GLuint int_types_scale[5] = { + 0, + BRW_SURFACEFORMAT_R32_SSCALED, + BRW_SURFACEFORMAT_R32G32_SSCALED, + BRW_SURFACEFORMAT_R32G32B32_SSCALED, + BRW_SURFACEFORMAT_R32G32B32A32_SSCALED +}; + +static GLuint ushort_types_norm[5] = { + 0, + BRW_SURFACEFORMAT_R16_UNORM, + BRW_SURFACEFORMAT_R16G16_UNORM, + BRW_SURFACEFORMAT_R16G16B16_UNORM, + BRW_SURFACEFORMAT_R16G16B16A16_UNORM +}; + +static GLuint ushort_types_scale[5] = { + 0, + BRW_SURFACEFORMAT_R16_USCALED, + BRW_SURFACEFORMAT_R16G16_USCALED, + BRW_SURFACEFORMAT_R16G16B16_USCALED, + BRW_SURFACEFORMAT_R16G16B16A16_USCALED +}; + +static GLuint short_types_norm[5] = { + 0, + BRW_SURFACEFORMAT_R16_SNORM, + BRW_SURFACEFORMAT_R16G16_SNORM, + BRW_SURFACEFORMAT_R16G16B16_SNORM, + BRW_SURFACEFORMAT_R16G16B16A16_SNORM +}; + +static GLuint short_types_scale[5] = { + 0, + BRW_SURFACEFORMAT_R16_SSCALED, + BRW_SURFACEFORMAT_R16G16_SSCALED, + BRW_SURFACEFORMAT_R16G16B16_SSCALED, + BRW_SURFACEFORMAT_R16G16B16A16_SSCALED +}; + +static GLuint ubyte_types_norm[5] = { + 0, + BRW_SURFACEFORMAT_R8_UNORM, + BRW_SURFACEFORMAT_R8G8_UNORM, + BRW_SURFACEFORMAT_R8G8B8_UNORM, + BRW_SURFACEFORMAT_R8G8B8A8_UNORM +}; + +static GLuint ubyte_types_scale[5] = { + 0, + BRW_SURFACEFORMAT_R8_USCALED, + BRW_SURFACEFORMAT_R8G8_USCALED, + BRW_SURFACEFORMAT_R8G8B8_USCALED, + BRW_SURFACEFORMAT_R8G8B8A8_USCALED +}; + +static GLuint byte_types_norm[5] = { + 0, + BRW_SURFACEFORMAT_R8_SNORM, + BRW_SURFACEFORMAT_R8G8_SNORM, + BRW_SURFACEFORMAT_R8G8B8_SNORM, + BRW_SURFACEFORMAT_R8G8B8A8_SNORM +}; + +static GLuint byte_types_scale[5] = { + 0, + BRW_SURFACEFORMAT_R8_SSCALED, + BRW_SURFACEFORMAT_R8G8_SSCALED, + BRW_SURFACEFORMAT_R8G8B8_SSCALED, + BRW_SURFACEFORMAT_R8G8B8A8_SSCALED +}; + + +static GLuint get_surface_type( GLenum type, GLuint size, GLboolean normalized ) +{ + if (INTEL_DEBUG & DEBUG_VERTS) + _mesa_printf("type %s size %d normalized %d\n", + _mesa_lookup_enum_by_nr(type), size, normalized); + + if (normalized) { + switch (type) { + case GL_DOUBLE: return double_types[size]; + case GL_FLOAT: return float_types[size]; + case GL_INT: return int_types_norm[size]; + case GL_SHORT: return short_types_norm[size]; + case GL_BYTE: return byte_types_norm[size]; + case GL_UNSIGNED_INT: return uint_types_norm[size]; + case GL_UNSIGNED_SHORT: return ushort_types_norm[size]; + case GL_UNSIGNED_BYTE: return ubyte_types_norm[size]; + default: assert(0); return 0; + } + } + else { + switch (type) { + case GL_DOUBLE: return double_types[size]; + case GL_FLOAT: return float_types[size]; + case GL_INT: return int_types_scale[size]; + case GL_SHORT: return short_types_scale[size]; + case GL_BYTE: return byte_types_scale[size]; + case GL_UNSIGNED_INT: return uint_types_scale[size]; + case GL_UNSIGNED_SHORT: return ushort_types_scale[size]; + case GL_UNSIGNED_BYTE: return ubyte_types_scale[size]; + default: assert(0); return 0; + } + } +} + + +static GLuint get_size( GLenum type ) +{ + switch (type) { + case GL_DOUBLE: return sizeof(GLdouble); + case GL_FLOAT: return sizeof(GLfloat); + case GL_INT: return sizeof(GLint); + case GL_SHORT: return sizeof(GLshort); + case GL_BYTE: return sizeof(GLbyte); + case GL_UNSIGNED_INT: return sizeof(GLuint); + case GL_UNSIGNED_SHORT: return sizeof(GLushort); + case GL_UNSIGNED_BYTE: return sizeof(GLubyte); + default: return 0; + } +} + +static GLuint get_index_type(GLenum type) +{ + switch (type) { + case GL_UNSIGNED_BYTE: return BRW_INDEX_BYTE; + case GL_UNSIGNED_SHORT: return BRW_INDEX_WORD; + case GL_UNSIGNED_INT: return BRW_INDEX_DWORD; + default: assert(0); return 0; + } +} + +static void copy_strided_array( GLubyte *dest, + const GLubyte *src, + GLuint size, + GLuint stride, + GLuint count ) +{ + if (size == stride) + do_memcpy(dest, src, count * size); + else { + GLuint i,j; + + for (i = 0; i < count; i++) { + for (j = 0; j < size; j++) + *dest++ = *src++; + src += (stride - size); + } + } +} + +static void wrap_buffers( struct brw_context *brw, + GLuint size ) +{ + GLcontext *ctx = &brw->intel.ctx; + + if (size < BRW_UPLOAD_INIT_SIZE) + size = BRW_UPLOAD_INIT_SIZE; + + brw->vb.upload.buf++; + brw->vb.upload.buf %= BRW_NR_UPLOAD_BUFS; + brw->vb.upload.offset = 0; + + ctx->Driver.BufferData(ctx, + GL_ARRAY_BUFFER_ARB, + size, + NULL, + GL_DYNAMIC_DRAW_ARB, + brw->vb.upload.vbo[brw->vb.upload.buf]); +} + +static void get_space( struct brw_context *brw, + GLuint size, + struct gl_buffer_object **vbo_return, + GLuint *offset_return ) +{ + size = (size + 63) & ~63; + + if (brw->vb.upload.offset + size > BRW_UPLOAD_INIT_SIZE) + wrap_buffers(brw, size); + + *vbo_return = brw->vb.upload.vbo[brw->vb.upload.buf]; + *offset_return = brw->vb.upload.offset; + + brw->vb.upload.offset += size; +} + + + +static struct gl_client_array * +copy_array_to_vbo_array( struct brw_context *brw, + GLuint i, + const struct gl_client_array *array, + GLuint element_size, + GLuint min_index, + GLuint count) +{ + GLcontext *ctx = &brw->intel.ctx; + struct gl_client_array *vbo_array = &brw->vb.vbo_array[i]; + GLuint size = count * element_size; + struct gl_buffer_object *vbo; + GLuint offset; + GLuint new_stride; + + get_space(brw, size, &vbo, &offset); + + if (array->StrideB == 0) { + assert(count == 1); + new_stride = 0; + } + else + new_stride = element_size; + + vbo_array->Size = array->Size; + vbo_array->Type = array->Type; + vbo_array->Stride = new_stride; + vbo_array->StrideB = new_stride; + vbo_array->Ptr = (const void *)offset; + vbo_array->Enabled = 1; + vbo_array->Normalized = array->Normalized; + vbo_array->_MaxElement = array->_MaxElement; /* ? */ + vbo_array->Flags = array->Flags; /* ? */ + vbo_array->BufferObj = vbo; + + { + GLubyte *map = ctx->Driver.MapBuffer(ctx, + GL_ARRAY_BUFFER_ARB, + GL_DYNAMIC_DRAW_ARB, + vbo); + + map += offset; + + copy_strided_array( map, + array->Ptr + min_index * array->StrideB, + element_size, + array->StrideB, + count); + + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, vbo_array->BufferObj); + } + + return vbo_array; +} + + + +static struct gl_client_array * +interleaved_vbo_array( struct brw_context *brw, + GLuint i, + const struct gl_client_array *uploaded_array, + const struct gl_client_array *array, + const char *ptr) +{ + struct gl_client_array *vbo_array = &brw->vb.vbo_array[i]; + + vbo_array->Size = array->Size; + vbo_array->Type = array->Type; + vbo_array->Stride = array->Stride; + vbo_array->StrideB = array->StrideB; + vbo_array->Ptr = (const void *)((const char *)uploaded_array->Ptr + + ((const char *)array->Ptr - ptr)); + vbo_array->Enabled = 1; + vbo_array->Normalized = array->Normalized; + vbo_array->_MaxElement = array->_MaxElement; + vbo_array->Flags = array->Flags; /* ? */ + vbo_array->BufferObj = uploaded_array->BufferObj; + + return vbo_array; +} + + +GLboolean brw_upload_vertices( struct brw_context *brw, + GLuint min_index, + GLuint max_index ) +{ + GLcontext *ctx = &brw->intel.ctx; + struct intel_context *intel = intel_context(ctx); + GLuint tmp = brw->vs.prog_data->inputs_read; + struct brw_vertex_element_packet vep; + struct brw_array_state vbp; + GLuint i; + const void *ptr = NULL; + GLuint interleave = 0; + + struct brw_vertex_element *enabled[BRW_ATTRIB_MAX]; + GLuint nr_enabled = 0; + + struct brw_vertex_element *upload[BRW_ATTRIB_MAX]; + GLuint nr_uploads = 0; + + + memset(&vbp, 0, sizeof(vbp)); + memset(&vep, 0, sizeof(vep)); + + /* First build an array of pointers to ve's in vb.inputs_read + */ + + while (tmp) { + GLuint i = ffs(tmp)-1; + struct brw_vertex_element *input = &brw->vb.inputs[i]; + + tmp &= ~(1<<i); + enabled[nr_enabled++] = input; + + input->index = i; + input->element_size = get_size(input->glarray->Type) * input->glarray->Size; + input->count = input->glarray->StrideB ? max_index - min_index : 1; + + if (!input->glarray->BufferObj->Name) { + if (i == 0) { + /* Position array not properly enabled: + */ + if (input->glarray->StrideB == 0) + return GL_FALSE; + + interleave = input->glarray->StrideB; + ptr = input->glarray->Ptr; + } + else if (interleave != input->glarray->StrideB || + (const char *)input->glarray->Ptr - (const char *)ptr > interleave) { + interleave = 0; + } + + upload[nr_uploads++] = input; + input->vbo_rebase_offset = 0; + } + else + input->vbo_rebase_offset = min_index * input->glarray->StrideB; + } + + /* Upload interleaved arrays if all uploads are interleaved + */ + if (nr_uploads > 1 && + interleave && + interleave <= 256) { + struct brw_vertex_element *input0 = upload[0]; + + input0->glarray = copy_array_to_vbo_array(brw, 0, + input0->glarray, + interleave, + min_index, + input0->count); + + for (i = 1; i < nr_uploads; i++) { + upload[i]->glarray = interleaved_vbo_array(brw, + i, + input0->glarray, + upload[i]->glarray, + ptr); + } + } + else { + for (i = 0; i < nr_uploads; i++) { + struct brw_vertex_element *input = upload[i]; + + input->glarray = copy_array_to_vbo_array(brw, i, + input->glarray, + input->element_size, + min_index, + input->count); + + } + } + + /* XXX: In the rare cases where this happens we fallback all + * the way to software rasterization, although a tnl fallback + * would be sufficient. I don't know of *any* real world + * cases with > 17 vertex attributes enabled, so it probably + * isn't an issue at this point. + */ + if (nr_enabled >= BRW_VEP_MAX) + return GL_FALSE; + + /* This still defines a hardware VB for each input, even if they + * are interleaved or from the same VBO. TBD if this makes a + * performance difference. + */ + for (i = 0; i < nr_enabled; i++) { + struct brw_vertex_element *input = enabled[i]; + + input->vep = &vep.ve[i]; + input->vep->ve0.src_format = get_surface_type(input->glarray->Type, + input->glarray->Size, + input->glarray->Normalized); + input->vep->ve0.valid = 1; + input->vep->ve1.dst_offset = (i) * 4; + input->vep->ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; + input->vep->ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; + input->vep->ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; + input->vep->ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; + + switch (input->glarray->Size) { + case 0: input->vep->ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_0; + case 1: input->vep->ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; + case 2: input->vep->ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; + case 3: input->vep->ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; + break; + } + + input->vep->ve0.vertex_buffer_index = i; + input->vep->ve0.src_offset = 0; + + vbp.vb[i].vb0.bits.pitch = input->glarray->StrideB; + vbp.vb[i].vb0.bits.pad = 0; + vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA; + vbp.vb[i].vb0.bits.vb_index = i; + vbp.vb[i].offset = (GLuint)input->glarray->Ptr + input->vbo_rebase_offset; + vbp.vb[i].buffer = array_buffer(input->glarray); + vbp.vb[i].max_index = max_index - min_index; + } + + + + /* Now emit VB and VEP state packets: + */ + vbp.header.bits.length = (1 + nr_enabled * 4) - 2; + vbp.header.bits.opcode = CMD_VERTEX_BUFFER; + + BEGIN_BATCH(vbp.header.bits.length+2, 0); + OUT_BATCH( vbp.header.dword ); + + for (i = 0; i < nr_enabled; i++) { + OUT_BATCH( vbp.vb[i].vb0.dword ); + OUT_BATCH( bmBufferOffset(&brw->intel, vbp.vb[i].buffer) + vbp.vb[i].offset); + OUT_BATCH( vbp.vb[i].max_index ); + OUT_BATCH( vbp.vb[i].instance_data_step_rate ); + } + ADVANCE_BATCH(); + + vep.header.length = (1 + nr_enabled * sizeof(vep.ve[0])/4) - 2; + vep.header.opcode = CMD_VERTEX_ELEMENT; + brw_cached_batch_struct(brw, &vep, 4 + nr_enabled * sizeof(vep.ve[0])); + + return GL_TRUE; +} + + +static GLuint element_size( GLenum type ) +{ + switch(type) { + case GL_UNSIGNED_INT: return 4; + case GL_UNSIGNED_SHORT: return 2; + case GL_UNSIGNED_BYTE: return 1; + default: assert(0); return 0; + } +} + + + + +static void rebase_indices_to_vbo_indices( struct brw_context *brw, + const struct brw_draw_index_buffer *index_buffer, + struct gl_buffer_object **vbo_return, + GLuint *offset_return ) +{ + GLcontext *ctx = &brw->intel.ctx; + GLuint min_index = index_buffer->rebase; + const void *indices = index_buffer->ptr; + GLsizei count = index_buffer->count; + GLenum type = index_buffer->type; + GLuint size = element_size(type) * count; + struct gl_buffer_object *bufferobj; + GLuint offset; + GLuint i; + + get_space(brw, size, &bufferobj, &offset); + + *vbo_return = bufferobj; + *offset_return = offset; + + if (min_index == 0) { + /* Straight upload + */ + ctx->Driver.BufferSubData( ctx, + GL_ELEMENT_ARRAY_BUFFER_ARB, + offset, + size, + indices, + bufferobj); + } + else { + void *map = ctx->Driver.MapBuffer(ctx, + GL_ELEMENT_ARRAY_BUFFER_ARB, + GL_DYNAMIC_DRAW_ARB, + bufferobj); + + map += offset; + + switch (type) { + case GL_UNSIGNED_INT: { + GLuint *ui_map = (GLuint *)map; + const GLuint *ui_indices = (const GLuint *)indices; + + for (i = 0; i < count; i++) + ui_map[i] = ui_indices[i] - min_index; + break; + } + case GL_UNSIGNED_SHORT: { + GLushort *us_map = (GLushort *)map; + const GLushort *us_indices = (const GLushort *)indices; + + for (i = 0; i < count; i++) + us_map[i] = us_indices[i] - min_index; + break; + } + case GL_UNSIGNED_BYTE: { + GLubyte *ub_map = (GLubyte *)map; + const GLubyte *ub_indices = (const GLubyte *)indices; + + for (i = 0; i < count; i++) + ub_map[i] = ub_indices[i] - min_index; + break; + } + } + + ctx->Driver.UnmapBuffer(ctx, + GL_ELEMENT_ARRAY_BUFFER_ARB, + bufferobj); + + } +} + + + +void brw_upload_indices( struct brw_context *brw, + const struct brw_draw_index_buffer *index_buffer) +{ + struct intel_context *intel = &brw->intel; + GLuint ib_size = get_size(index_buffer->type) * index_buffer->count; + struct gl_buffer_object *bufferobj = index_buffer->obj; + GLuint offset = (GLuint)index_buffer->ptr; + + /* Already turned into a proper VBO: + */ + if (!index_buffer->obj->Name) { + rebase_indices_to_vbo_indices(brw, index_buffer, &bufferobj, &offset ); + } + + /* Emit the indexbuffer packet: + */ + { + struct brw_indexbuffer ib; + struct buffer *buffer = intel_bufferobj_buffer(intel_buffer_object(bufferobj)); + + memset(&ib, 0, sizeof(ib)); + + ib.header.bits.opcode = CMD_INDEX_BUFFER; + ib.header.bits.length = sizeof(ib)/4 - 2; + ib.header.bits.index_format = get_index_type(index_buffer->type); + ib.header.bits.cut_index_enable = 0; + + + BEGIN_BATCH(4, 0); + OUT_BATCH( ib.header.dword ); + OUT_BATCH( bmBufferOffset(intel, buffer) + offset ); + OUT_BATCH( bmBufferOffset(intel, buffer) + offset + ib_size ); + OUT_BATCH( 0 ); + ADVANCE_BATCH(); + } +} diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c new file mode 100644 index 00000000000..d1244befd78 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -0,0 +1,130 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + + +/* How does predicate control work when execution_size != 8? Do I + * need to test/set for 0xffff when execution_size is 16? + */ +void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value ) +{ + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + if (value != 0xff) { + if (value != p->flag_value) { + brw_push_insn_state(p); + brw_MOV(p, brw_flag_reg(), brw_imm_uw(value)); + p->flag_value = value; + brw_pop_insn_state(p); + } + + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } +} + +void brw_set_predicate_control( struct brw_compile *p, GLuint pc ) +{ + p->current->header.predicate_control = pc; +} + +void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional ) +{ + p->current->header.destreg__conditonalmod = conditional; +} + +void brw_set_access_mode( struct brw_compile *p, GLuint access_mode ) +{ + p->current->header.access_mode = access_mode; +} + +void brw_set_compression_control( struct brw_compile *p, GLboolean compression_control ) +{ + p->current->header.compression_control = compression_control; +} + +void brw_set_mask_control( struct brw_compile *p, GLuint value ) +{ + p->current->header.mask_control = value; +} + +void brw_set_saturate( struct brw_compile *p, GLuint value ) +{ + p->current->header.saturate = value; +} + +void brw_push_insn_state( struct brw_compile *p ) +{ + assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); + memcpy(p->current+1, p->current, sizeof(struct brw_instruction)); + p->current++; +} + +void brw_pop_insn_state( struct brw_compile *p ) +{ + assert(p->current != p->stack); + p->current--; +} + + +/*********************************************************************** + */ +void brw_init_compile( struct brw_compile *p ) +{ + p->nr_insn = 0; + p->current = p->stack; + memset(p->current, 0, sizeof(p->current[0])); + + /* Some defaults? + */ + brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */ + brw_set_saturate(p, 0); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_predicate_control_flag_value(p, 0xff); +} + + +const GLuint *brw_get_program( struct brw_compile *p, + GLuint *sz ) +{ + GLuint i; + + for (i = 0; i < 8; i++) + brw_NOP(p); + + *sz = p->nr_insn * sizeof(struct brw_instruction); + return (const GLuint *)p->store; +} + diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h new file mode 100644 index 00000000000..1afa0f816b8 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -0,0 +1,863 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#ifndef BRW_EU_H +#define BRW_EU_H + +#include "brw_structs.h" +#include "brw_defines.h" +#include "shader/program.h" + +#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) +#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) + +#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) +#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) + + +#define REG_SIZE (8*4) + + +/* These aren't hardware structs, just something useful for us to pass around: + * + * Align1 operation has a lot of control over input ranges. Used in + * WM programs to implement shaders decomposed into "channel serial" + * or "structure of array" form: + */ +struct brw_reg +{ + GLuint type:4; + GLuint file:2; + GLuint nr:8; + GLuint subnr:5; /* :1 in align16 */ + GLuint negate:1; /* source only */ + GLuint abs:1; /* source only */ + GLuint vstride:4; /* source only */ + GLuint width:3; /* src only, align1 only */ + GLuint hstride:2; /* src only, align1 only */ + GLuint address_mode:1; /* relative addressing, hopefully! */ + GLuint pad0:1; + + union { + struct { + GLuint swizzle:8; /* src only, align16 only */ + GLuint writemask:4; /* dest only, align16 only */ + GLint indirect_offset:10; /* relative addressing offset */ + GLuint pad1:10; /* two dwords total */ + } bits; + + GLfloat f; + GLint d; + GLuint ud; + } dw1; +}; + + +struct brw_indirect { + GLuint addr_subnr:4; + GLint addr_offset:10; + GLuint pad:18; +}; + + +#define BRW_EU_MAX_INSN_STACK 5 +#define BRW_EU_MAX_INSN 1200 + +struct brw_compile { + struct brw_instruction store[BRW_EU_MAX_INSN]; + GLuint nr_insn; + + /* Allow clients to push/pop instruction state: + */ + struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; + struct brw_instruction *current; + + GLuint flag_value; +}; + + + +static __inline int type_sz( GLuint type ) +{ + switch( type ) { + case BRW_REGISTER_TYPE_UD: + case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_F: + return 4; + case BRW_REGISTER_TYPE_HF: + case BRW_REGISTER_TYPE_UW: + case BRW_REGISTER_TYPE_W: + return 2; + case BRW_REGISTER_TYPE_UB: + case BRW_REGISTER_TYPE_B: + return 1; + default: + return 0; + } +} + +static __inline struct brw_reg brw_reg( GLuint file, + GLuint nr, + GLuint subnr, + GLuint type, + GLuint vstride, + GLuint width, + GLuint hstride, + GLuint swizzle, + GLuint writemask) +{ + + struct brw_reg reg; + reg.type = type; + reg.file = file; + reg.nr = nr; + reg.subnr = subnr * type_sz(type); + reg.negate = 0; + reg.abs = 0; + reg.vstride = vstride; + reg.width = width; + reg.hstride = hstride; + reg.address_mode = BRW_ADDRESS_DIRECT; + reg.pad0 = 0; + + /* Could do better: If the reg is r5.3<0;1,0>, we probably want to + * set swizzle and writemask to W, as the lower bits of subnr will + * be lost when converted to align16. This is probably too much to + * keep track of as you'd want it adjusted by suboffset(), etc. + * Perhaps fix up when converting to align16? + */ + reg.dw1.bits.swizzle = swizzle; + reg.dw1.bits.writemask = writemask; + reg.dw1.bits.indirect_offset = 0; + reg.dw1.bits.pad1 = 0; + return reg; +} + +static __inline struct brw_reg brw_vec16_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_16, + BRW_WIDTH_16, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +static __inline struct brw_reg brw_vec8_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + + +static __inline struct brw_reg brw_vec4_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + + +static __inline struct brw_reg brw_vec2_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYXY, + WRITEMASK_XY); +} + +static __inline struct brw_reg brw_vec1_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + WRITEMASK_X); +} + + +static __inline struct brw_reg retype( struct brw_reg reg, + GLuint type ) +{ + reg.type = type; + return reg; +} + +static __inline struct brw_reg suboffset( struct brw_reg reg, + GLuint delta ) +{ + reg.subnr += delta * type_sz(reg.type); + return reg; +} + + +static __inline struct brw_reg offset( struct brw_reg reg, + GLuint delta ) +{ + reg.nr += delta; + return reg; +} + + +static __inline struct brw_reg byte_offset( struct brw_reg reg, + GLuint bytes ) +{ + GLuint newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; + reg.nr = newoffset / REG_SIZE; + reg.subnr = newoffset % REG_SIZE; + return reg; +} + + +static __inline struct brw_reg brw_uw16_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static __inline struct brw_reg brw_uw8_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static __inline struct brw_reg brw_uw1_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static __inline struct brw_reg brw_imm_reg( GLuint type ) +{ + return brw_reg( BRW_IMMEDIATE_VALUE, + 0, + 0, + type, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + 0, + 0); +} + +static __inline struct brw_reg brw_imm_f( GLfloat f ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); + imm.dw1.f = f; + return imm; +} + +static __inline struct brw_reg brw_imm_d( GLint d ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); + imm.dw1.d = d; + return imm; +} + +static __inline struct brw_reg brw_imm_ud( GLuint ud ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); + imm.dw1.ud = ud; + return imm; +} + +static __inline struct brw_reg brw_imm_uw( GLushort uw ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); + imm.dw1.ud = uw; + return imm; +} + +static __inline struct brw_reg brw_imm_w( GLshort w ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); + imm.dw1.d = w; + return imm; +} + +/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type + * numbers alias with _V and _VF below: + */ + +/* Vector of eight signed half-byte values: + */ +static __inline struct brw_reg brw_imm_v( GLuint v ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_8; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +/* Vector of four 8-bit float values: + */ +static __inline struct brw_reg brw_imm_vf( GLuint v ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +#define VF_ZERO 0x0 +#define VF_ONE 0x30 +#define VF_NEG (1<<7) + +static __inline struct brw_reg brw_imm_vf4( GLuint v0, + GLuint v1, + GLuint v2, + GLuint v3) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = ((v0 << 0) | + (v1 << 8) | + (v2 << 16) | + (v3 << 24)); + return imm; +} + + +static __inline struct brw_reg brw_address( struct brw_reg reg ) +{ + return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); +} + + +static __inline struct brw_reg brw_vec1_grf( GLuint nr, + GLuint subnr ) +{ + return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_vec8_grf( GLuint nr, + GLuint subnr ) +{ + return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_vec4_grf( GLuint nr, + GLuint subnr ) +{ + return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + + +static __inline struct brw_reg brw_vec2_grf( GLuint nr, + GLuint subnr ) +{ + return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_uw8_grf( GLuint nr, + GLuint subnr ) +{ + return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_null_reg( void ) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NULL, + 0); +} + +static __inline struct brw_reg brw_address_reg( GLuint subnr ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, + subnr); +} + +/* If/else instructions break in align16 mode if writemask & swizzle + * aren't xyzw. This goes against the convention for other scalar + * regs: + */ +static __inline struct brw_reg brw_ip_reg( void ) +{ + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_IP, + 0, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_4, /* ? */ + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, /* NOTE! */ + WRITEMASK_XYZW); /* NOTE! */ +} + +static __inline struct brw_reg brw_acc_reg( void ) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ACCUMULATOR, + 0); +} + + +static __inline struct brw_reg brw_flag_reg( void ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_FLAG, + 0); +} + + +static __inline struct brw_reg brw_mask_reg( GLuint subnr ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_MASK, + subnr); +} + +static __inline struct brw_reg brw_message_reg( GLuint nr ) +{ + return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, + nr, + 0); +} + + + + +/* This is almost always called with a numeric constant argument, so + * make things easy to evaluate at compile time: + */ +static __inline GLuint cvt( GLuint val ) +{ + switch (val) { + case 0: return 0; + case 1: return 1; + case 2: return 2; + case 4: return 3; + case 8: return 4; + case 16: return 5; + case 32: return 6; + } + return 0; +} + +static __inline struct brw_reg stride( struct brw_reg reg, + GLuint vstride, + GLuint width, + GLuint hstride ) +{ + + reg.vstride = cvt(vstride); + reg.width = cvt(width) - 1; + reg.hstride = cvt(hstride); + return reg; +} + +static __inline struct brw_reg vec16( struct brw_reg reg ) +{ + return stride(reg, 16,16,1); +} + +static __inline struct brw_reg vec8( struct brw_reg reg ) +{ + return stride(reg, 8,8,1); +} + +static __inline struct brw_reg vec4( struct brw_reg reg ) +{ + return stride(reg, 4,4,1); +} + +static __inline struct brw_reg vec2( struct brw_reg reg ) +{ + return stride(reg, 2,2,1); +} + +static __inline struct brw_reg vec1( struct brw_reg reg ) +{ + return stride(reg, 0,1,0); +} + +static __inline struct brw_reg get_element( struct brw_reg reg, GLuint elt ) +{ + return vec1(suboffset(reg, elt)); +} + +static __inline struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt ) +{ + return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt)); +} + + +static __inline struct brw_reg brw_swizzle( struct brw_reg reg, + GLuint x, + GLuint y, + GLuint z, + GLuint w) +{ + reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x), + BRW_GET_SWZ(reg.dw1.bits.swizzle, y), + BRW_GET_SWZ(reg.dw1.bits.swizzle, z), + BRW_GET_SWZ(reg.dw1.bits.swizzle, w)); + return reg; +} + + +static __inline struct brw_reg brw_swizzle1( struct brw_reg reg, + GLuint x ) +{ + return brw_swizzle(reg, x, x, x, x); +} + +static __inline struct brw_reg brw_writemask( struct brw_reg reg, + GLuint mask ) +{ + reg.dw1.bits.writemask &= mask; + return reg; +} + +static __inline struct brw_reg brw_set_writemask( struct brw_reg reg, + GLuint mask ) +{ + reg.dw1.bits.writemask = mask; + return reg; +} + +static __inline struct brw_reg negate( struct brw_reg reg ) +{ + reg.negate ^= 1; + return reg; +} + +static __inline struct brw_reg brw_abs( struct brw_reg reg ) +{ + reg.abs = 1; + return reg; +} + +/*********************************************************************** + */ +static __inline struct brw_reg brw_vec4_indirect( GLuint subnr, + GLint offset ) +{ + struct brw_reg reg = brw_vec4_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static __inline struct brw_reg brw_vec1_indirect( GLuint subnr, + GLint offset ) +{ + struct brw_reg reg = brw_vec1_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static __inline struct brw_reg deref_4f(struct brw_indirect ptr, GLint offset) +{ + return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static __inline struct brw_reg deref_1f(struct brw_indirect ptr, GLint offset) +{ + return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static __inline struct brw_reg deref_4b(struct brw_indirect ptr, GLint offset) +{ + return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B); +} + +static __inline struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); +} + +static __inline struct brw_reg get_addr_reg(struct brw_indirect ptr) +{ + return brw_address_reg(ptr.addr_subnr); +} + +static __inline struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, GLint offset ) +{ + ptr.addr_offset += offset; + return ptr; +} + +static __inline struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset ) +{ + struct brw_indirect ptr; + ptr.addr_subnr = addr_subnr; + ptr.addr_offset = offset; + ptr.pad = 0; + return ptr; +} + + + +void brw_pop_insn_state( struct brw_compile *p ); +void brw_push_insn_state( struct brw_compile *p ); +void brw_set_mask_control( struct brw_compile *p, GLuint value ); +void brw_set_saturate( struct brw_compile *p, GLuint value ); +void brw_set_access_mode( struct brw_compile *p, GLuint access_mode ); +void brw_set_compression_control( struct brw_compile *p, GLboolean control ); +void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value ); +void brw_set_predicate_control( struct brw_compile *p, GLuint pc ); +void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional ); + +void brw_init_compile( struct brw_compile *p ); +const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz ); + + +/* Helpers for regular instructions: + */ +#define ALU1(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0); + +#define ALU2(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1); + +ALU1(MOV) +ALU2(SEL) +ALU1(NOT) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(SHR) +ALU2(SHL) +ALU2(RSR) +ALU2(RSL) +ALU2(ASR) +ALU2(JMPI) +ALU2(ADD) +ALU2(MUL) +ALU1(FRC) +ALU1(RNDD) +ALU2(MAC) +ALU2(MACH) +ALU1(LZD) +ALU2(DP4) +ALU2(DPH) +ALU2(DP3) +ALU2(DP2) +ALU2(LINE) + +#undef ALU1 +#undef ALU2 + + + +/* Helpers for SEND instruction: + */ +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean eot, + GLboolean writes_complete, + GLuint offset, + GLuint swizzle); + +void brw_fb_WRITE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + GLuint msg_length, + GLuint response_length, + GLboolean eot); + +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + GLuint sampler, + GLuint writemask, + GLuint msg_type, + GLuint response_length, + GLuint msg_length, + GLboolean eot); + +void brw_math_16( struct brw_compile *p, + struct brw_reg dest, + GLuint function, + GLuint saturate, + GLuint msg_reg_nr, + struct brw_reg src, + GLuint precision ); + +void brw_math( struct brw_compile *p, + struct brw_reg dest, + GLuint function, + GLuint saturate, + GLuint msg_reg_nr, + struct brw_reg src, + GLuint data_type, + GLuint precision ); + +void brw_dp_READ_16( struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + GLuint scratch_offset ); + +void brw_dp_WRITE_16( struct brw_compile *p, + struct brw_reg src, + GLuint msg_reg_nr, + GLuint scratch_offset ); + +/* If/else/endif. Works by manipulating the execution flags on each + * channel. + */ +struct brw_instruction *brw_IF(struct brw_compile *p, + GLuint execute_size); + +struct brw_instruction *brw_ELSE(struct brw_compile *p, + struct brw_instruction *if_insn); + +void brw_ENDIF(struct brw_compile *p, + struct brw_instruction *if_or_else_insn); + + +/* DO/WHILE loops: + */ +struct brw_instruction *brw_DO(struct brw_compile *p, + GLuint execute_size); + +void brw_WHILE(struct brw_compile *p, + struct brw_instruction *patch_insn); + +/* Forward jumps: + */ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn); + + + +void brw_NOP(struct brw_compile *p); + +/* Special case: there is never a destination, execution size will be + * taken from src0: + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + GLuint conditional, + struct brw_reg src0, + struct brw_reg src1); + +void brw_print_reg( struct brw_reg reg ); + + +/*********************************************************************** + * brw_eu_util.c: + */ + +void brw_copy_indirect_to_indirect(struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + GLuint count); + +void brw_copy_from_indirect(struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + GLuint count); + +void brw_copy4(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + GLuint count); + +void brw_copy8(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + GLuint count); + +void brw_math_invert( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src); + + +#endif diff --git a/src/mesa/drivers/dri/i965/brw_eu_debug.c b/src/mesa/drivers/dri/i965/brw_eu_debug.c new file mode 100644 index 00000000000..2dff1ad2244 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_eu_debug.c @@ -0,0 +1,90 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "mtypes.h" +#include "brw_eu.h" +#include "imports.h" + +void brw_print_reg( struct brw_reg hwreg ) +{ + static const char *file[] = { + "arf", + "grf", + "msg", + "imm" + }; + + static const char *type[] = { + "ud", + "d", + "uw", + "w", + "ub", + "vf", + "hf", + "f" + }; + + _mesa_printf("%s%s", + hwreg.abs ? "abs/" : "", + hwreg.negate ? "-" : ""); + + if (hwreg.file == BRW_GENERAL_REGISTER_FILE && + hwreg.nr % 2 == 0 && + hwreg.subnr == 0 && + hwreg.vstride == BRW_VERTICAL_STRIDE_8 && + hwreg.width == BRW_WIDTH_8 && + hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && + hwreg.type == BRW_REGISTER_TYPE_F) { + _mesa_printf("vec%d", hwreg.nr); + } + else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && + hwreg.vstride == BRW_VERTICAL_STRIDE_0 && + hwreg.width == BRW_WIDTH_1 && + hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && + hwreg.type == BRW_REGISTER_TYPE_F) { + _mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); + } + else { + _mesa_printf("%s%d.%d<%d;%d,%d>:%s", + file[hwreg.file], + hwreg.nr, + hwreg.subnr / type_sz(hwreg.type), + hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0, + 1<<hwreg.width, + hwreg.hstride ? (1<<(hwreg.hstride-1)) : 0, + type[hwreg.type]); + } +} + + + diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c new file mode 100644 index 00000000000..6425c91450c --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -0,0 +1,1003 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + + + +/*********************************************************************** + * Internal helper for constructing instructions + */ + +static void guess_execution_size( struct brw_instruction *insn, + struct brw_reg reg ) +{ + if (reg.width == BRW_WIDTH_8 && + insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) + insn->header.execution_size = BRW_EXECUTE_16; + else + insn->header.execution_size = reg.width; /* note - definitions are compatible */ +} + + +static void brw_set_dest( struct brw_instruction *insn, + struct brw_reg dest ) +{ + insn->bits1.da1.dest_reg_file = dest.file; + insn->bits1.da1.dest_reg_type = dest.type; + insn->bits1.da1.dest_address_mode = dest.address_mode; + + if (dest.address_mode == BRW_ADDRESS_DIRECT) { + insn->bits1.da1.dest_reg_nr = dest.nr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.da1.dest_subreg_nr = dest.subnr; + insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; + } + else { + insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; + insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; + } + } + else { + insn->bits1.ia1.dest_subreg_nr = dest.subnr; + + /* These are different sizes in align1 vs align16: + */ + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; + insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; + } + else { + insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; + } + } + + /* NEW: Set the execution size based on dest.width and + * insn->compression_control: + */ + guess_execution_size(insn, dest); +} + +static void brw_set_src0( struct brw_instruction *insn, + struct brw_reg reg ) +{ + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + + insn->bits1.da1.src0_reg_file = reg.file; + insn->bits1.da1.src0_reg_type = reg.type; + insn->bits2.da1.src0_abs = reg.abs; + insn->bits2.da1.src0_negate = reg.negate; + insn->bits2.da1.src0_address_mode = reg.address_mode; + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + + /* Required to set some fields in src1 as well: + */ + insn->bits1.da1.src1_reg_file = 0; /* arf */ + insn->bits1.da1.src1_reg_type = reg.type; + } + else + { + if (reg.address_mode == BRW_ADDRESS_DIRECT) { + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.da1.src0_subreg_nr = reg.subnr; + insn->bits2.da1.src0_reg_nr = reg.nr; + } + else { + insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; + insn->bits2.da16.src0_reg_nr = reg.nr; + } + } + else { + insn->bits2.ia1.src0_subreg_nr = reg.subnr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; + } + else { + insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; + } + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits2.da1.src0_width = BRW_WIDTH_1; + insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; + } + else { + insn->bits2.da1.src0_horiz_stride = reg.hstride; + insn->bits2.da1.src0_width = reg.width; + insn->bits2.da1.src0_vert_stride = reg.vstride; + } + } + else { + insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits2.da16.src0_vert_stride = reg.vstride; + } + } +} + + +static void brw_set_src1( struct brw_instruction *insn, + struct brw_reg reg ) +{ + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + + insn->bits1.da1.src1_reg_file = reg.file; + insn->bits1.da1.src1_reg_type = reg.type; + insn->bits3.da1.src1_abs = reg.abs; + insn->bits3.da1.src1_negate = reg.negate; + + /* Only src1 can be immediate in two-argument instructions. + */ + assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + } + else { + /* This is a hardware restriction, which may or may not be lifted + * in the future: + */ + assert (reg.address_mode == BRW_ADDRESS_DIRECT); + assert (reg.file == BRW_GENERAL_REGISTER_FILE); + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits3.da1.src1_subreg_nr = reg.subnr; + insn->bits3.da1.src1_reg_nr = reg.nr; + } + else { + insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; + insn->bits3.da16.src1_reg_nr = reg.nr; + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits3.da1.src1_width = BRW_WIDTH_1; + insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; + } + else { + insn->bits3.da1.src1_horiz_stride = reg.hstride; + insn->bits3.da1.src1_width = reg.width; + insn->bits3.da1.src1_vert_stride = reg.vstride; + } + } + else { + insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits3.da16.src1_vert_stride = reg.vstride; + } + } +} + + + +static void brw_set_math_message( struct brw_instruction *insn, + GLuint msg_length, + GLuint response_length, + GLuint function, + GLuint integer_type, + GLboolean low_precision, + GLboolean saturate, + GLuint dataType ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.math.function = function; + insn->bits3.math.int_type = integer_type; + insn->bits3.math.precision = low_precision; + insn->bits3.math.saturate = saturate; + insn->bits3.math.data_type = dataType; + insn->bits3.math.response_length = response_length; + insn->bits3.math.msg_length = msg_length; + insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; + insn->bits3.math.end_of_thread = 0; +} + +static void brw_set_urb_message( struct brw_instruction *insn, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean end_of_thread, + GLboolean complete, + GLuint offset, + GLuint swizzle_control ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.urb.opcode = 0; /* ? */ + insn->bits3.urb.offset = offset; + insn->bits3.urb.swizzle_control = swizzle_control; + insn->bits3.urb.allocate = allocate; + insn->bits3.urb.used = used; /* ? */ + insn->bits3.urb.complete = complete; + insn->bits3.urb.response_length = response_length; + insn->bits3.urb.msg_length = msg_length; + insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; + insn->bits3.urb.end_of_thread = end_of_thread; +} + +static void brw_set_dp_write_message( struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint msg_length, + GLuint pixel_scoreboard_clear, + GLuint response_length, + GLuint end_of_thread ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.dp_write.binding_table_index = binding_table_index; + insn->bits3.dp_write.msg_control = msg_control; + insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_write.msg_type = msg_type; + insn->bits3.dp_write.send_commit_msg = 0; + insn->bits3.dp_write.response_length = response_length; + insn->bits3.dp_write.msg_length = msg_length; + insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits3.urb.end_of_thread = end_of_thread; +} + +static void brw_set_dp_read_message( struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint target_cache, + GLuint msg_length, + GLuint response_length, + GLuint end_of_thread ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.dp_read.binding_table_index = binding_table_index; + insn->bits3.dp_read.msg_control = msg_control; + insn->bits3.dp_read.msg_type = msg_type; + insn->bits3.dp_read.target_cache = target_cache; + insn->bits3.dp_read.response_length = response_length; + insn->bits3.dp_read.msg_length = msg_length; + insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; + insn->bits3.dp_read.end_of_thread = end_of_thread; +} + +static void brw_set_sampler_message( struct brw_instruction *insn, + GLuint binding_table_index, + GLuint sampler, + GLuint msg_type, + GLuint response_length, + GLuint msg_length, + GLboolean eot) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.sampler.binding_table_index = binding_table_index; + insn->bits3.sampler.sampler = sampler; + insn->bits3.sampler.msg_type = msg_type; + insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; + insn->bits3.sampler.response_length = response_length; + insn->bits3.sampler.msg_length = msg_length; + insn->bits3.sampler.end_of_thread = eot; + insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; +} + + + +static struct brw_instruction *next_insn( struct brw_compile *p, + GLuint opcode ) +{ + struct brw_instruction *insn; + + assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); + + insn = &p->store[p->nr_insn++]; + memcpy(insn, p->current, sizeof(*insn)); + + /* Reset this one-shot flag: + */ + + if (p->current->header.destreg__conditonalmod) { + p->current->header.destreg__conditonalmod = 0; + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } + + insn->header.opcode = opcode; + return insn; +} + + +static struct brw_instruction *brw_alu1( struct brw_compile *p, + GLuint opcode, + struct brw_reg dest, + struct brw_reg src ) +{ + struct brw_instruction *insn = next_insn(p, opcode); + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + return insn; +} + +static struct brw_instruction *brw_alu2(struct brw_compile *p, + GLuint opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1 ) +{ + struct brw_instruction *insn = next_insn(p, opcode); + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, src1); + return insn; +} + + +/*********************************************************************** + * Convenience routines. + */ +#define ALU1(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0) \ +{ \ + return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ +} + +#define ALU2(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1) \ +{ \ + return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ +} + + +ALU1(MOV) +ALU2(SEL) +ALU1(NOT) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(SHR) +ALU2(SHL) +ALU2(RSR) +ALU2(RSL) +ALU2(ASR) +ALU2(ADD) +ALU2(MUL) +ALU1(FRC) +ALU1(RNDD) +ALU2(MAC) +ALU2(MACH) +ALU1(LZD) +ALU2(DP4) +ALU2(DPH) +ALU2(DP3) +ALU2(DP2) +ALU2(LINE) + + + + +void brw_NOP(struct brw_compile *p) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, brw_imm_ud(0x0)); +} + + + + + +/*********************************************************************** + * Comparisons, if/else/endif + */ + +struct brw_instruction *brw_JMPI(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + + +/* EU takes the value from the flag register and pushes it onto some + * sort of a stack (presumably merging with any flag value already on + * the stack). Within an if block, the flags at the top of the stack + * control execution on each channel of the unit, eg. on each of the + * 16 pixel values in our wm programs. + * + * When the matching 'else' instruction is reached (presumably by + * countdown of the instruction count patched in by our ELSE/ENDIF + * functions), the relevent flags are inverted. + * + * When the matching 'endif' instruction is reached, the flags are + * popped off. If the stack is now empty, normal execution resumes. + * + * No attempt is made to deal with stack overflow (14 elements?). + */ +struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_IF); + + /* Override the defaults for this instruction: + */ + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.execution_size = execute_size; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.predicate_control = BRW_PREDICATE_NORMAL; + insn->header.mask_control = BRW_MASK_ENABLE; + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + + +struct brw_instruction *brw_ELSE(struct brw_compile *p, + struct brw_instruction *if_insn) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ELSE); + + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = if_insn->header.execution_size; + insn->header.mask_control = BRW_MASK_ENABLE; + + /* Patch the if instruction to point at this instruction. + */ + assert(if_insn->header.opcode == BRW_OPCODE_IF); + + if_insn->bits3.if_else.jump_count = insn - if_insn; + if_insn->bits3.if_else.pop_count = 1; + if_insn->bits3.if_else.pad0 = 0; + + return insn; +} + +void brw_ENDIF(struct brw_compile *p, + struct brw_instruction *patch_insn) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); + + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = patch_insn->header.execution_size; + insn->header.mask_control = BRW_MASK_ENABLE; + + assert(patch_insn->bits3.if_else.jump_count == 0); + + /* Patch the if or else instructions to point at this or the next + * instruction respectively. + */ + if (patch_insn->header.opcode == BRW_OPCODE_IF) { + /* Automagically turn it into an IFF: + */ + patch_insn->header.opcode = BRW_OPCODE_IFF; + patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.pop_count = 0; + patch_insn->bits3.if_else.pad0 = 0; + + } + else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { + patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.pop_count = 1; + patch_insn->bits3.if_else.pad0 = 0; + } + else { + assert(0); + } + + /* Also pop item off the stack in the endif instruction: + */ + insn->bits3.if_else.jump_count = 0; + insn->bits3.if_else.pop_count = 1; + insn->bits3.if_else.pad0 = 0; +} + +/* DO/WHILE loop: + */ +struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); + + /* Override the defaults for this instruction: + */ + brw_set_dest(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = execute_size; +/* insn->header.mask_control = BRW_MASK_ENABLE; */ + + return insn; +} + + + +void brw_WHILE(struct brw_compile *p, + struct brw_instruction *do_insn) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WHILE); + + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = do_insn->header.execution_size; + + assert(do_insn->header.opcode == BRW_OPCODE_DO); + insn->bits3.if_else.jump_count = do_insn - insn; + insn->bits3.if_else.pop_count = 0; + insn->bits3.if_else.pad0 = 0; + +/* insn->header.mask_control = BRW_MASK_ENABLE; */ + + p->current->header.predicate_control = BRW_PREDICATE_NONE; +} + + +/* FORWARD JUMPS: + */ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn) +{ + struct brw_instruction *landing = &p->store[p->nr_insn]; + + assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); + assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); + + jmp_insn->bits3.ud = (landing - jmp_insn) - 1; +} + + + +/* To integrate with the above, it makes sense that the comparison + * instruction should populate the flag register. It might be simpler + * just to use the flag reg for most WM tasks? + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + GLuint conditional, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); + + insn->header.destreg__conditonalmod = conditional; + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, src1); + +/* guess_execution_size(insn, src0); */ + + + /* Make it so that future instructions will use the computed flag + * value until brw_set_predicate_control_flag_value() is called + * again. + */ + if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && + dest.nr == 0) { + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + p->flag_value = 0xff; + } +} + + + +/*********************************************************************** + * Helpers for the various SEND message types: + */ + +/* Invert 8 values + */ +void brw_math( struct brw_compile *p, + struct brw_reg dest, + GLuint function, + GLuint saturate, + GLuint msg_reg_nr, + struct brw_reg src, + GLuint data_type, + GLuint precision ) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + + /* Example code doesn't set predicate_control for send + * instructions. + */ + insn->header.predicate_control = 0; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_math_message(insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + data_type); +} + +/* Use 2 send instructions to invert 16 elements + */ +void brw_math_16( struct brw_compile *p, + struct brw_reg dest, + GLuint function, + GLuint saturate, + GLuint msg_reg_nr, + struct brw_reg src, + GLuint precision ) +{ + struct brw_instruction *insn; + GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + + /* First instruction: + */ + brw_push_insn_state(p); + brw_set_predicate_control_flag_value(p, 0xff); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + insn = next_insn(p, BRW_OPCODE_SEND); + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_math_message(insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + /* Second instruction: + */ + insn = next_insn(p, BRW_OPCODE_SEND); + insn->header.compression_control = BRW_COMPRESSION_2NDHALF; + insn->header.destreg__conditonalmod = msg_reg_nr+1; + + brw_set_dest(insn, offset(dest,1)); + brw_set_src0(insn, src); + brw_set_math_message(insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + brw_pop_insn_state(p); +} + + + + +void brw_dp_WRITE_16( struct brw_compile *p, + struct brw_reg src, + GLuint msg_reg_nr, + GLuint scratch_offset ) +{ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, + retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), + brw_imm_d(scratch_offset)); + + brw_pop_insn_state(p); + } + + { + GLuint msg_length = 3; + struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + + brw_set_dp_write_message(insn, + 255, /* bti */ + BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ + BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ + msg_length, + 0, /* pixel scoreboard */ + 0, /* response_length */ + 0); /* eot */ + } + +} + + +void brw_dp_READ_16( struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + GLuint scratch_offset ) +{ + { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, + retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), + brw_imm_d(scratch_offset)); + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); /* UW? */ + brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); + + brw_set_dp_read_message(insn, + 255, /* bti */ + 3, /* msg_control */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 1, /* target cache */ + 1, /* msg_length */ + 2, /* response_length */ + 0); /* eot */ + } +} + + +void brw_fb_WRITE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + GLuint msg_length, + GLuint response_length, + GLboolean eot) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_dp_write_message(insn, + binding_table_index, + BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */ + BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ + msg_length, + 1, /* pixel scoreboard */ + response_length, + eot); +} + + + +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + GLuint sampler, + GLuint writemask, + GLuint msg_type, + GLuint response_length, + GLuint msg_length, + GLboolean eot) +{ + GLboolean need_stall = 0; + + if(writemask == 0) { +/* _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ + return; + } + + /* Hardware doesn't do destination dependency checking on send + * instructions properly. Add a workaround which generates the + * dependency by other means. In practice it seems like this bug + * only crops up for texture samples, and only where registers are + * written by the send and then written again later without being + * read in between. Luckily for us, we already track that + * information and use it to modify the writemask for the + * instruction, so that is a guide for whether a workaround is + * needed. + */ + if (writemask != WRITEMASK_XYZW) { + GLuint dst_offset = 0; + GLuint i, newmask = 0, len = 0; + + for (i = 0; i < 4; i++) { + if (writemask & (1<<i)) + break; + dst_offset += 2; + } + for (; i < 4; i++) { + if (!(writemask & (1<<i))) + break; + newmask |= 1<<i; + len++; + } + + if (newmask != writemask) { + need_stall = 1; +/* _mesa_printf("need stall %x %x\n", newmask , writemask); */ + } + else { + struct brw_reg m1 = brw_message_reg(msg_reg_nr); + + newmask = ~newmask & WRITEMASK_XYZW; + + brw_push_insn_state(p); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, m1, brw_vec8_grf(0,0)); + brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); + + brw_pop_insn_state(p); + + src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); + dest = offset(dest, dst_offset); + response_length = len * 2; + } + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_sampler_message(insn, + binding_table_index, + sampler, + msg_type, + response_length, + msg_length, + eot); + } + + if (need_stall) + { + struct brw_reg reg = vec8(offset(dest, response_length-1)); + + /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } + */ + brw_push_insn_state(p); + brw_set_compression_control(p, GL_FALSE); + brw_MOV(p, reg, reg); + brw_pop_insn_state(p); + } + +} + +/* All these variables are pretty confusing - we might be better off + * using bitmasks and macros for this, in the old style. Or perhaps + * just having the caller instantiate the fields in dword3 itself. + */ +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean eot, + GLboolean writes_complete, + GLuint offset, + GLuint swizzle) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + assert(msg_length < 16); + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, brw_imm_d(0)); + + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_urb_message(insn, + allocate, + used, + msg_length, + response_length, + eot, + writes_complete, + offset, + swizzle); +} + diff --git a/src/mesa/drivers/dri/i965/brw_eu_util.c b/src/mesa/drivers/dri/i965/brw_eu_util.c new file mode 100644 index 00000000000..5405cf17a4e --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_eu_util.c @@ -0,0 +1,126 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + +void brw_math_invert( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math( p, + dst, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + src, + BRW_MATH_PRECISION_FULL, + BRW_MATH_DATA_VECTOR ); +} + + + +void brw_copy4(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + GLuint count) +{ + GLuint i; + + dst = vec4(dst); + src = vec4(src); + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16)); + } +} + + +void brw_copy8(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + GLuint count) +{ + GLuint i; + + dst = vec8(dst); + src = vec8(src); + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + } +} + + +void brw_copy_indirect_to_indirect(struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + GLuint count) +{ + GLuint i; + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, deref_4f(dst_ptr, delta), deref_4f(src_ptr, delta)); + brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16)); + } +} + + +void brw_copy_from_indirect(struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + GLuint count) +{ + GLuint i; + + dst = vec4(dst); + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta)); + brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16)); + } +} + + + + diff --git a/src/mesa/drivers/dri/i965/brw_exec.c b/src/mesa/drivers/dri/i965/brw_exec.c new file mode 100644 index 00000000000..fc06c3c3617 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_exec.c @@ -0,0 +1,125 @@ +/* + * Mesa 3-D graphics library + * Version: 6.3 + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "api_arrayelt.h" +#include "glheader.h" +#include "imports.h" +#include "context.h" +#include "macros.h" +#include "mtypes.h" +#include "dlist.h" +#include "vtxfmt.h" + +#include "brw_exec.h" + + +void brw_exec_init( GLcontext *ctx ) +{ + struct brw_exec_context *exec = CALLOC_STRUCT(brw_exec_context); + + if (ctx->swtnl_im == NULL) { + ctx->swtnl_im = CALLOC_STRUCT(brw_exec_save); + } + + exec->ctx = ctx; + IMM_CONTEXT(ctx)->exec = exec; + + /* Initialize the arrayelt helper + */ + if (!ctx->aelt_context && + !_ae_create_context( ctx )) + return; + + brw_exec_vtx_init( exec ); + brw_exec_array_init( exec ); + + ctx->Driver.NeedFlush = 0; + ctx->Driver.CurrentExecPrimitive = PRIM_OUTSIDE_BEGIN_END; + ctx->Driver.FlushVertices = brw_exec_FlushVertices; + + exec->eval.recalculate_maps = 1; +} + + +void brw_exec_destroy( GLcontext *ctx ) +{ + struct brw_exec_context *exec = IMM_CONTEXT(ctx)->exec; + + if (ctx->aelt_context) { + _ae_destroy_context( ctx ); + ctx->aelt_context = NULL; + } + + brw_exec_vtx_destroy( exec ); + brw_exec_array_destroy( exec ); + + if (exec) { + FREE(exec); + IMM_CONTEXT(ctx)->exec = NULL; + } + + if (IMM_CONTEXT(ctx)->exec == NULL && + IMM_CONTEXT(ctx)->save == NULL) { + FREE(IMM_CONTEXT(ctx)); + ctx->swtnl_im = NULL; + } +} + +/* Really want to install these callbacks to a central facility to be + * invoked according to the state flags. That will have to wait for a + * mesa rework: + */ +void brw_exec_invalidate_state( GLcontext *ctx, GLuint new_state ) +{ + struct brw_exec_context *exec = IMM_CONTEXT(ctx)->exec; + + if (new_state & (_NEW_PROGRAM|_NEW_EVAL)) + exec->eval.recalculate_maps = 1; + + _ae_invalidate_state(ctx, new_state); +} + + +void brw_exec_wakeup( GLcontext *ctx ) +{ + struct brw_exec_context *exec = IMM_CONTEXT(ctx)->exec; + + ctx->Driver.FlushVertices = brw_exec_FlushVertices; + ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT; + + /* Hook our functions into exec and compile dispatch tables. + */ + _mesa_install_exec_vtxfmt( ctx, &exec->vtxfmt ); + + /* Assume we haven't been getting state updates either: + */ + brw_exec_invalidate_state( ctx, ~0 ); +} + + + diff --git a/src/mesa/drivers/dri/i965/brw_exec.h b/src/mesa/drivers/dri/i965/brw_exec.h new file mode 100644 index 00000000000..f07b4485874 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_exec.h @@ -0,0 +1,150 @@ +/************************************************************************** + +Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <[email protected]> + * + */ + +#ifndef __BRW_EXEC_H__ +#define __BRW_EXEC_H__ + +#include "mtypes.h" +#include "brw_attrib.h" +#include "brw_draw.h" + + +#define BRW_MAX_PRIM 64 + +/* Wierd implementation stuff: + */ +#define BRW_VERT_BUFFER_SIZE (1024*16) /* dwords == 64k */ +#define BRW_MAX_ATTR_CODEGEN 16 +#define ERROR_ATTRIB 16 + + + + +struct brw_exec_eval1_map { + struct gl_1d_map *map; + GLuint sz; +}; + +struct brw_exec_eval2_map { + struct gl_2d_map *map; + GLuint sz; +}; + + + +struct brw_exec_copied_vtx { + GLfloat buffer[BRW_ATTRIB_MAX * 4 * BRW_MAX_COPIED_VERTS]; + GLuint nr; +}; + + +typedef void (*brw_attrfv_func)( const GLfloat * ); + + +struct brw_exec_context +{ + GLcontext *ctx; + GLvertexformat vtxfmt; + + struct { + struct gl_buffer_object *bufferobj; + GLubyte *buffer_map; + + GLuint vertex_size; + + struct brw_draw_prim prim[BRW_MAX_PRIM]; + GLuint prim_count; + + GLfloat *vbptr; /* cursor, points into buffer */ + GLfloat vertex[BRW_ATTRIB_MAX*4]; /* current vertex */ + + GLfloat *current[BRW_ATTRIB_MAX]; /* points into ctx->Current, ctx->Light.Material */ + GLfloat CurrentFloatEdgeFlag; + + GLuint vert_count; + GLuint max_vert; + struct brw_exec_copied_vtx copied; + + GLubyte attrsz[BRW_ATTRIB_MAX]; + GLubyte active_sz[BRW_ATTRIB_MAX]; + + GLfloat *attrptr[BRW_ATTRIB_MAX]; + struct gl_client_array arrays[BRW_ATTRIB_MAX]; + const struct gl_client_array *inputs[BRW_ATTRIB_MAX]; + } vtx; + + + struct { + GLboolean recalculate_maps; + struct brw_exec_eval1_map map1[BRW_ATTRIB_MAX]; + struct brw_exec_eval2_map map2[BRW_ATTRIB_MAX]; + } eval; + + struct { + const struct gl_client_array *inputs[BRW_ATTRIB_MAX]; + + struct gl_buffer_object *index_obj; + } array; +}; + + + +/* External API: + */ +void brw_exec_init( GLcontext *ctx ); +void brw_exec_destroy( GLcontext *ctx ); +void brw_exec_invalidate_state( GLcontext *ctx, GLuint new_state ); +void brw_exec_FlushVertices( GLcontext *ctx, GLuint flags ); +void brw_exec_wakeup( GLcontext *ctx ); + + +/* Internal functions: + */ +void brw_exec_array_init( struct brw_exec_context *exec ); +void brw_exec_array_destroy( struct brw_exec_context *exec ); + + +void brw_exec_vtx_init( struct brw_exec_context *exec ); +void brw_exec_vtx_destroy( struct brw_exec_context *exec ); +void brw_exec_vtx_flush( struct brw_exec_context *exec ); +void brw_exec_vtx_wrap( struct brw_exec_context *exec ); + +void brw_exec_eval_update( struct brw_exec_context *exec ); + +void brw_exec_do_EvalCoord2f( struct brw_exec_context *exec, + GLfloat u, GLfloat v ); + +void brw_exec_do_EvalCoord1f( struct brw_exec_context *exec, + GLfloat u); + +#endif diff --git a/src/mesa/drivers/dri/i965/brw_exec_api.c b/src/mesa/drivers/dri/i965/brw_exec_api.c new file mode 100644 index 00000000000..ca012dbcd8a --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_exec_api.c @@ -0,0 +1,716 @@ +/************************************************************************** + +Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "glheader.h" +#include "context.h" +#include "macros.h" +#include "vtxfmt.h" +#include "dlist.h" +#include "state.h" +#include "light.h" +#include "api_arrayelt.h" +#include "api_noop.h" +#include "dispatch.h" + +#include "brw_exec.h" + +static void reset_attrfv( struct brw_exec_context *exec ); + + +/* Close off the last primitive, execute the buffer, restart the + * primitive. + */ +static void brw_exec_wrap_buffers( struct brw_exec_context *exec ) +{ + if (exec->vtx.prim_count == 0) { + exec->vtx.copied.nr = 0; + exec->vtx.vert_count = 0; + exec->vtx.vbptr = (GLfloat *)exec->vtx.buffer_map; + } + else { + GLuint last_begin = exec->vtx.prim[exec->vtx.prim_count-1].begin; + GLuint last_count; + + if (exec->ctx->Driver.CurrentExecPrimitive != GL_POLYGON+1) { + GLint i = exec->vtx.prim_count - 1; + assert(i >= 0); + exec->vtx.prim[i].count = (exec->vtx.vert_count - + exec->vtx.prim[i].start); + } + + last_count = exec->vtx.prim[exec->vtx.prim_count-1].count; + + /* Execute the buffer and save copied vertices. + */ + if (exec->vtx.vert_count) + brw_exec_vtx_flush( exec ); + else { + exec->vtx.prim_count = 0; + exec->vtx.copied.nr = 0; + } + + /* Emit a glBegin to start the new list. + */ + assert(exec->vtx.prim_count == 0); + + if (exec->ctx->Driver.CurrentExecPrimitive != GL_POLYGON+1) { + exec->vtx.prim[0].mode = exec->ctx->Driver.CurrentExecPrimitive; + exec->vtx.prim[0].start = 0; + exec->vtx.prim[0].count = 0; + exec->vtx.prim_count++; + + if (exec->vtx.copied.nr == last_count) + exec->vtx.prim[0].begin = last_begin; + } + } +} + + +/* Deal with buffer wrapping where provoked by the vertex buffer + * filling up, as opposed to upgrade_vertex(). + */ +void brw_exec_vtx_wrap( struct brw_exec_context *exec ) +{ + GLfloat *data = exec->vtx.copied.buffer; + GLuint i; + + /* Run pipeline on current vertices, copy wrapped vertices + * to exec->vtx.copied. + */ + brw_exec_wrap_buffers( exec ); + + /* Copy stored stored vertices to start of new list. + */ + assert(exec->vtx.max_vert - exec->vtx.vert_count > exec->vtx.copied.nr); + + for (i = 0 ; i < exec->vtx.copied.nr ; i++) { + _mesa_memcpy( exec->vtx.vbptr, data, + exec->vtx.vertex_size * sizeof(GLfloat)); + exec->vtx.vbptr += exec->vtx.vertex_size; + data += exec->vtx.vertex_size; + exec->vtx.vert_count++; + } + + exec->vtx.copied.nr = 0; +} + + +/* + * Copy the active vertex's values to the ctx->Current fields. + */ +static void brw_exec_copy_to_current( struct brw_exec_context *exec ) +{ + GLcontext *ctx = exec->ctx; + GLuint i; + + for (i = BRW_ATTRIB_POS+1 ; i < BRW_ATTRIB_MAX ; i++) { + if (exec->vtx.attrsz[i]) { + /* Note: the exec->vtx.current[i] pointers point into the + * ctx->Current.Attrib and ctx->Light.Material.Attrib arrays. + */ + COPY_CLEAN_4V(exec->vtx.current[i], + exec->vtx.attrsz[i], + exec->vtx.attrptr[i]); + + /* This triggers rather too much recalculation of Mesa state + * that doesn't get used (eg light positions). + */ + if (i >= BRW_ATTRIB_MAT_FRONT_AMBIENT && + i <= BRW_ATTRIB_MAT_BACK_INDEXES) + ctx->NewState |= _NEW_LIGHT; + } + } + + /* color index is special (it's not a float[4] so COPY_CLEAN_4V above + * will trash adjacent memory!) + */ + if (exec->vtx.attrsz[BRW_ATTRIB_INDEX]) { + ctx->Current.Index = exec->vtx.attrptr[BRW_ATTRIB_INDEX][0]; + } + + /* Edgeflag requires additional treatment: + */ + if (exec->vtx.attrsz[BRW_ATTRIB_EDGEFLAG]) { + ctx->Current.EdgeFlag = (exec->vtx.CurrentFloatEdgeFlag == 1.0); + } + + /* Colormaterial -- this kindof sucks. + */ + if (ctx->Light.ColorMaterialEnabled && + exec->vtx.attrsz[BRW_ATTRIB_COLOR0]) { + _mesa_update_color_material(ctx, + ctx->Current.Attrib[BRW_ATTRIB_COLOR0]); + } + + ctx->Driver.NeedFlush &= ~FLUSH_UPDATE_CURRENT; +} + + +static void brw_exec_copy_from_current( struct brw_exec_context *exec ) +{ + GLcontext *ctx = exec->ctx; + GLint i; + + /* Edgeflag requires additional treatment: + */ + exec->vtx.CurrentFloatEdgeFlag = + (GLfloat)ctx->Current.EdgeFlag; + + for (i = BRW_ATTRIB_POS+1 ; i < BRW_ATTRIB_MAX ; i++) + switch (exec->vtx.attrsz[i]) { + case 4: exec->vtx.attrptr[i][3] = exec->vtx.current[i][3]; + case 3: exec->vtx.attrptr[i][2] = exec->vtx.current[i][2]; + case 2: exec->vtx.attrptr[i][1] = exec->vtx.current[i][1]; + case 1: exec->vtx.attrptr[i][0] = exec->vtx.current[i][0]; + break; + } + + ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT; +} + + +/* Flush existing data, set new attrib size, replay copied vertices. + */ +static void brw_exec_wrap_upgrade_vertex( struct brw_exec_context *exec, + GLuint attr, + GLuint newsz ) +{ + GLcontext *ctx = exec->ctx; + GLint lastcount = exec->vtx.vert_count; + GLfloat *tmp; + GLuint oldsz; + GLuint i; + + /* Run pipeline on current vertices, copy wrapped vertices + * to exec->vtx.copied. + */ + brw_exec_wrap_buffers( exec ); + + + /* Do a COPY_TO_CURRENT to ensure back-copying works for the case + * when the attribute already exists in the vertex and is having + * its size increased. + */ + brw_exec_copy_to_current( exec ); + + + /* Heuristic: Attempt to isolate attributes received outside + * begin/end so that they don't bloat the vertices. + */ + if (ctx->Driver.CurrentExecPrimitive == PRIM_OUTSIDE_BEGIN_END && + exec->vtx.attrsz[attr] == 0 && + lastcount > 8 && + exec->vtx.vertex_size) { + reset_attrfv( exec ); + } + + /* Fix up sizes: + */ + oldsz = exec->vtx.attrsz[attr]; + exec->vtx.attrsz[attr] = newsz; + + exec->vtx.vertex_size += newsz - oldsz; + exec->vtx.max_vert = BRW_VERT_BUFFER_SIZE / exec->vtx.vertex_size; + exec->vtx.vert_count = 0; + exec->vtx.vbptr = (GLfloat *)exec->vtx.buffer_map; + + + /* Recalculate all the attrptr[] values + */ + for (i = 0, tmp = exec->vtx.vertex ; i < BRW_ATTRIB_MAX ; i++) { + if (exec->vtx.attrsz[i]) { + exec->vtx.attrptr[i] = tmp; + tmp += exec->vtx.attrsz[i]; + } + else + exec->vtx.attrptr[i] = NULL; /* will not be dereferenced */ + } + + /* Copy from current to repopulate the vertex with correct values. + */ + brw_exec_copy_from_current( exec ); + + /* Replay stored vertices to translate them + * to new format here. + * + * -- No need to replay - just copy piecewise + */ + if (exec->vtx.copied.nr) + { + GLfloat *data = exec->vtx.copied.buffer; + GLfloat *dest = exec->vtx.vbptr; + GLuint j; + + assert(exec->vtx.vbptr == (GLfloat *)exec->vtx.buffer_map); + + for (i = 0 ; i < exec->vtx.copied.nr ; i++) { + for (j = 0 ; j < BRW_ATTRIB_MAX ; j++) { + if (exec->vtx.attrsz[j]) { + if (j == attr) { + if (oldsz) { + COPY_CLEAN_4V( dest, oldsz, data ); + data += oldsz; + dest += newsz; + } else { + COPY_SZ_4V( dest, newsz, exec->vtx.current[j] ); + dest += newsz; + } + } + else { + GLuint sz = exec->vtx.attrsz[j]; + COPY_SZ_4V( dest, sz, data ); + dest += sz; + data += sz; + } + } + } + } + + exec->vtx.vbptr = dest; + exec->vtx.vert_count += exec->vtx.copied.nr; + exec->vtx.copied.nr = 0; + } +} + + +static void brw_exec_fixup_vertex( GLcontext *ctx, + GLuint attr, GLuint sz ) +{ + struct brw_exec_context *exec = IMM_CONTEXT(ctx)->exec; + int i; + + if (sz > exec->vtx.attrsz[attr]) { + /* New size is larger. Need to flush existing vertices and get + * an enlarged vertex format. + */ + brw_exec_wrap_upgrade_vertex( exec, attr, sz ); + } + else if (sz < exec->vtx.active_sz[attr]) { + static const GLfloat id[4] = { 0, 0, 0, 1 }; + + /* New size is smaller - just need to fill in some + * zeros. Don't need to flush or wrap. + */ + for (i = sz ; i <= exec->vtx.attrsz[attr] ; i++) + exec->vtx.attrptr[attr][i-1] = id[i-1]; + } + + exec->vtx.active_sz[attr] = sz; + + /* Does setting NeedFlush belong here? Necessitates resetting + * vtxfmt on each flush (otherwise flags won't get reset + * afterwards). + */ + if (attr == 0) + exec->ctx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; + else + exec->ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT; +} + + + + +/* + */ +#define ATTR( A, N, V0, V1, V2, V3 ) \ +do { \ + struct brw_exec_context *exec = IMM_CONTEXT(ctx)->exec; \ + \ + if (exec->vtx.active_sz[A] != N) \ + brw_exec_fixup_vertex(ctx, A, N); \ + \ + { \ + GLfloat *dest = exec->vtx.attrptr[A]; \ + if (N>0) dest[0] = V0; \ + if (N>1) dest[1] = V1; \ + if (N>2) dest[2] = V2; \ + if (N>3) dest[3] = V3; \ + } \ + \ + if ((A) == 0) { \ + GLuint i; \ + \ + for (i = 0; i < exec->vtx.vertex_size; i++) \ + exec->vtx.vbptr[i] = exec->vtx.vertex[i]; \ + \ + exec->vtx.vbptr += exec->vtx.vertex_size; \ + exec->ctx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; \ + \ + if (++exec->vtx.vert_count >= exec->vtx.max_vert) \ + brw_exec_vtx_wrap( exec ); \ + } \ +} while (0) + + +#define ERROR() _mesa_error( ctx, GL_INVALID_ENUM, __FUNCTION__ ) +#define TAG(x) brw_##x + +#include "brw_attrib_tmp.h" + + + + + +/* Eval + */ +static void GLAPIENTRY brw_exec_EvalCoord1f( GLfloat u ) +{ + GET_CURRENT_CONTEXT( ctx ); + struct brw_exec_context *exec = IMM_CONTEXT(ctx)->exec; + + { + GLint i; + if (exec->eval.recalculate_maps) + brw_exec_eval_update( exec ); + + for (i = 0 ; i <= BRW_ATTRIB_INDEX ; i++) { + if (exec->eval.map1[i].map) + if (exec->vtx.attrsz[i] != exec->eval.map1[i].sz) + brw_exec_fixup_vertex( ctx, i, exec->eval.map1[i].sz ); + } + } + + + _mesa_memcpy( exec->vtx.copied.buffer, exec->vtx.vertex, + exec->vtx.vertex_size * sizeof(GLfloat)); + + brw_exec_do_EvalCoord1f( exec, u ); + + _mesa_memcpy( exec->vtx.vertex, exec->vtx.copied.buffer, + exec->vtx.vertex_size * sizeof(GLfloat)); +} + +static void GLAPIENTRY brw_exec_EvalCoord2f( GLfloat u, GLfloat v ) +{ + GET_CURRENT_CONTEXT( ctx ); + struct brw_exec_context *exec = IMM_CONTEXT(ctx)->exec; + + { + GLint i; + if (exec->eval.recalculate_maps) + brw_exec_eval_update( exec ); + + for (i = 0 ; i <= BRW_ATTRIB_INDEX ; i++) { + if (exec->eval.map2[i].map) + if (exec->vtx.active_sz[i] != exec->eval.map2[i].sz) + brw_exec_fixup_vertex( ctx, i, exec->eval.map2[i].sz ); + } + + if (ctx->Eval.AutoNormal) + if (exec->vtx.active_sz[BRW_ATTRIB_NORMAL] != 3) + brw_exec_fixup_vertex( ctx, BRW_ATTRIB_NORMAL, 3 ); + } + + _mesa_memcpy( exec->vtx.copied.buffer, exec->vtx.vertex, + exec->vtx.vertex_size * sizeof(GLfloat)); + + brw_exec_do_EvalCoord2f( exec, u, v ); + + _mesa_memcpy( exec->vtx.vertex, exec->vtx.copied.buffer, + exec->vtx.vertex_size * sizeof(GLfloat)); +} + +static void GLAPIENTRY brw_exec_EvalCoord1fv( const GLfloat *u ) +{ + brw_exec_EvalCoord1f( u[0] ); +} + +static void GLAPIENTRY brw_exec_EvalCoord2fv( const GLfloat *u ) +{ + brw_exec_EvalCoord2f( u[0], u[1] ); +} + +static void GLAPIENTRY brw_exec_EvalPoint1( GLint i ) +{ + GET_CURRENT_CONTEXT( ctx ); + GLfloat du = ((ctx->Eval.MapGrid1u2 - ctx->Eval.MapGrid1u1) / + (GLfloat) ctx->Eval.MapGrid1un); + GLfloat u = i * du + ctx->Eval.MapGrid1u1; + + brw_exec_EvalCoord1f( u ); +} + + +static void GLAPIENTRY brw_exec_EvalPoint2( GLint i, GLint j ) +{ + GET_CURRENT_CONTEXT( ctx ); + GLfloat du = ((ctx->Eval.MapGrid2u2 - ctx->Eval.MapGrid2u1) / + (GLfloat) ctx->Eval.MapGrid2un); + GLfloat dv = ((ctx->Eval.MapGrid2v2 - ctx->Eval.MapGrid2v1) / + (GLfloat) ctx->Eval.MapGrid2vn); + GLfloat u = i * du + ctx->Eval.MapGrid2u1; + GLfloat v = j * dv + ctx->Eval.MapGrid2v1; + + brw_exec_EvalCoord2f( u, v ); +} + + +/* Build a list of primitives on the fly. Keep + * ctx->Driver.CurrentExecPrimitive uptodate as well. + */ +static void GLAPIENTRY brw_exec_Begin( GLenum mode ) +{ + GET_CURRENT_CONTEXT( ctx ); + + if (ctx->Driver.CurrentExecPrimitive == GL_POLYGON+1) { + struct brw_exec_context *exec = IMM_CONTEXT(ctx)->exec; + int i; + + if (ctx->NewState) { + _mesa_update_state( ctx ); + + if ((ctx->VertexProgram.Enabled && !ctx->VertexProgram._Enabled) || + (ctx->FragmentProgram.Enabled && !ctx->FragmentProgram._Enabled)) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glBegin (invalid vertex/fragment program)"); + return; + } + + CALL_Begin(ctx->Exec, (mode)); + return; + } + + /* Heuristic: attempt to isolate attributes occuring outside + * begin/end pairs. + */ + if (exec->vtx.vertex_size && !exec->vtx.attrsz[0]) + brw_exec_FlushVertices( ctx, ~0 ); + + i = exec->vtx.prim_count++; + exec->vtx.prim[i].mode = mode; + exec->vtx.prim[i].begin = 1; + exec->vtx.prim[i].end = 0; + exec->vtx.prim[i].indexed = 0; + exec->vtx.prim[i].weak = 0; + exec->vtx.prim[i].pad = 0; + exec->vtx.prim[i].start = exec->vtx.vert_count; + exec->vtx.prim[i].count = 0; + + ctx->Driver.CurrentExecPrimitive = mode; + } + else + _mesa_error( ctx, GL_INVALID_OPERATION, "glBegin" ); + +} + +static void GLAPIENTRY brw_exec_End( void ) +{ + GET_CURRENT_CONTEXT( ctx ); + + if (ctx->Driver.CurrentExecPrimitive != GL_POLYGON+1) { + struct brw_exec_context *exec = IMM_CONTEXT(ctx)->exec; + int idx = exec->vtx.vert_count; + int i = exec->vtx.prim_count - 1; + + exec->vtx.prim[i].end = 1; + exec->vtx.prim[i].count = idx - exec->vtx.prim[i].start; + + ctx->Driver.CurrentExecPrimitive = GL_POLYGON+1; + + if (exec->vtx.prim_count == BRW_MAX_PRIM) + brw_exec_vtx_flush( exec ); + } + else + _mesa_error( ctx, GL_INVALID_OPERATION, "glEnd" ); +} + + +static void brw_exec_vtxfmt_init( struct brw_exec_context *exec ) +{ + GLvertexformat *vfmt = &exec->vtxfmt; + + vfmt->ArrayElement = _ae_loopback_array_elt; /* generic helper */ + vfmt->Begin = brw_exec_Begin; + vfmt->CallList = _mesa_CallList; + vfmt->CallLists = _mesa_CallLists; + vfmt->End = brw_exec_End; + vfmt->EvalCoord1f = brw_exec_EvalCoord1f; + vfmt->EvalCoord1fv = brw_exec_EvalCoord1fv; + vfmt->EvalCoord2f = brw_exec_EvalCoord2f; + vfmt->EvalCoord2fv = brw_exec_EvalCoord2fv; + vfmt->EvalPoint1 = brw_exec_EvalPoint1; + vfmt->EvalPoint2 = brw_exec_EvalPoint2; + + vfmt->Rectf = _mesa_noop_Rectf; + vfmt->EvalMesh1 = _mesa_noop_EvalMesh1; + vfmt->EvalMesh2 = _mesa_noop_EvalMesh2; + + + /* from attrib_tmp.h: + */ + vfmt->Color3f = brw_Color3f; + vfmt->Color3fv = brw_Color3fv; + vfmt->Color4f = brw_Color4f; + vfmt->Color4fv = brw_Color4fv; + vfmt->FogCoordfEXT = brw_FogCoordfEXT; + vfmt->FogCoordfvEXT = brw_FogCoordfvEXT; + vfmt->MultiTexCoord1fARB = brw_MultiTexCoord1f; + vfmt->MultiTexCoord1fvARB = brw_MultiTexCoord1fv; + vfmt->MultiTexCoord2fARB = brw_MultiTexCoord2f; + vfmt->MultiTexCoord2fvARB = brw_MultiTexCoord2fv; + vfmt->MultiTexCoord3fARB = brw_MultiTexCoord3f; + vfmt->MultiTexCoord3fvARB = brw_MultiTexCoord3fv; + vfmt->MultiTexCoord4fARB = brw_MultiTexCoord4f; + vfmt->MultiTexCoord4fvARB = brw_MultiTexCoord4fv; + vfmt->Normal3f = brw_Normal3f; + vfmt->Normal3fv = brw_Normal3fv; + vfmt->SecondaryColor3fEXT = brw_SecondaryColor3fEXT; + vfmt->SecondaryColor3fvEXT = brw_SecondaryColor3fvEXT; + vfmt->TexCoord1f = brw_TexCoord1f; + vfmt->TexCoord1fv = brw_TexCoord1fv; + vfmt->TexCoord2f = brw_TexCoord2f; + vfmt->TexCoord2fv = brw_TexCoord2fv; + vfmt->TexCoord3f = brw_TexCoord3f; + vfmt->TexCoord3fv = brw_TexCoord3fv; + vfmt->TexCoord4f = brw_TexCoord4f; + vfmt->TexCoord4fv = brw_TexCoord4fv; + vfmt->Vertex2f = brw_Vertex2f; + vfmt->Vertex2fv = brw_Vertex2fv; + vfmt->Vertex3f = brw_Vertex3f; + vfmt->Vertex3fv = brw_Vertex3fv; + vfmt->Vertex4f = brw_Vertex4f; + vfmt->Vertex4fv = brw_Vertex4fv; + + vfmt->VertexAttrib1fARB = brw_VertexAttrib1fARB; + vfmt->VertexAttrib1fvARB = brw_VertexAttrib1fvARB; + vfmt->VertexAttrib2fARB = brw_VertexAttrib2fARB; + vfmt->VertexAttrib2fvARB = brw_VertexAttrib2fvARB; + vfmt->VertexAttrib3fARB = brw_VertexAttrib3fARB; + vfmt->VertexAttrib3fvARB = brw_VertexAttrib3fvARB; + vfmt->VertexAttrib4fARB = brw_VertexAttrib4fARB; + vfmt->VertexAttrib4fvARB = brw_VertexAttrib4fvARB; + + vfmt->VertexAttrib1fNV = brw_VertexAttrib1fNV; + vfmt->VertexAttrib1fvNV = brw_VertexAttrib1fvNV; + vfmt->VertexAttrib2fNV = brw_VertexAttrib2fNV; + vfmt->VertexAttrib2fvNV = brw_VertexAttrib2fvNV; + vfmt->VertexAttrib3fNV = brw_VertexAttrib3fNV; + vfmt->VertexAttrib3fvNV = brw_VertexAttrib3fvNV; + vfmt->VertexAttrib4fNV = brw_VertexAttrib4fNV; + vfmt->VertexAttrib4fvNV = brw_VertexAttrib4fvNV; + + vfmt->Materialfv = brw_Materialfv; + + vfmt->EdgeFlag = brw_EdgeFlag; + vfmt->Indexf = brw_Indexf; + vfmt->Indexfv = brw_Indexfv; + +} + + +static void brw_exec_current_init( struct brw_exec_context *exec ) +{ + GLcontext *ctx = exec->ctx; + GLint i; + + /* setup the pointers for the typical 16 vertex attributes */ + for (i = 0; i < BRW_ATTRIB_FIRST_MATERIAL; i++) + exec->vtx.current[i] = ctx->Current.Attrib[i]; + + /* setup pointers for the 12 material attributes */ + for (i = 0; i < MAT_ATTRIB_MAX; i++) + exec->vtx.current[BRW_ATTRIB_FIRST_MATERIAL + i] = + ctx->Light.Material.Attrib[i]; + + exec->vtx.current[BRW_ATTRIB_INDEX] = &ctx->Current.Index; + exec->vtx.current[BRW_ATTRIB_EDGEFLAG] = &exec->vtx.CurrentFloatEdgeFlag; +} + +void brw_exec_vtx_init( struct brw_exec_context *exec ) +{ + GLcontext *ctx = exec->ctx; + GLuint i; + + /* Allocate a buffer object. Will just reuse this object + * continuously. + */ + exec->vtx.bufferobj = ctx->Array.NullBufferObj; + exec->vtx.buffer_map = ALIGN_MALLOC(BRW_VERT_BUFFER_SIZE * sizeof(GLfloat), 64); + + brw_exec_current_init( exec ); + brw_exec_vtxfmt_init( exec ); + + /* Hook our functions into the dispatch table. + */ + _mesa_install_exec_vtxfmt( exec->ctx, &exec->vtxfmt ); + + for (i = 0 ; i < BRW_ATTRIB_MAX ; i++) { + exec->vtx.attrsz[i] = 0; + exec->vtx.active_sz[i] = 0; + exec->vtx.inputs[i] = &exec->vtx.arrays[i]; + } + + exec->vtx.vertex_size = 0; +} + + +void brw_exec_vtx_destroy( struct brw_exec_context *exec ) +{ + if (exec->vtx.buffer_map) { + ALIGN_FREE(exec->vtx.buffer_map); + exec->vtx.buffer_map = NULL; + } +} + + +void brw_exec_FlushVertices( GLcontext *ctx, GLuint flags ) +{ + struct brw_exec_context *exec = IMM_CONTEXT(ctx)->exec; + + if (exec->ctx->Driver.CurrentExecPrimitive != PRIM_OUTSIDE_BEGIN_END) + return; + + if (exec->vtx.vert_count) { + brw_exec_vtx_flush( exec ); + } + + if (exec->vtx.vertex_size) { + brw_exec_copy_to_current( exec ); + reset_attrfv( exec ); + } + + exec->ctx->Driver.NeedFlush = 0; +} + + +static void reset_attrfv( struct brw_exec_context *exec ) +{ + GLuint i; + + for (i = 0 ; i < BRW_ATTRIB_MAX ; i++) { + exec->vtx.attrsz[i] = 0; + exec->vtx.active_sz[i] = 0; + } + + exec->vtx.vertex_size = 0; +} + diff --git a/src/mesa/drivers/dri/i965/brw_exec_array.c b/src/mesa/drivers/dri/i965/brw_exec_array.c new file mode 100644 index 00000000000..ca19a198374 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_exec_array.c @@ -0,0 +1,283 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "glheader.h" +#include "context.h" +#include "state.h" +#include "api_validate.h" +#include "api_noop.h" +#include "dispatch.h" + +#include "brw_attrib.h" +#include "brw_draw.h" +#include "brw_exec.h" +#include "brw_fallback.h" + +static GLuint get_max_index( GLuint count, GLuint type, + const GLvoid *indices ) +{ + GLint i; + + /* Compute max element. This is only needed for upload of non-VBO, + * non-constant data elements. + * + * XXX: Postpone this calculation until it is known that it is + * needed. Otherwise could scan this pointlessly in the all-vbo + * case. + */ + switch(type) { + case GL_UNSIGNED_INT: { + const GLuint *ui_indices = (const GLuint *)indices; + GLuint max_ui = 0; + for (i = 0; i < count; i++) + if (ui_indices[i] > max_ui) + max_ui = ui_indices[i]; + return max_ui; + } + case GL_UNSIGNED_SHORT: { + const GLushort *us_indices = (const GLushort *)indices; + GLuint max_us = 0; + for (i = 0; i < count; i++) + if (us_indices[i] > max_us) + max_us = us_indices[i]; + return max_us; + } + case GL_UNSIGNED_BYTE: { + const GLubyte *ub_indices = (const GLubyte *)indices; + GLuint max_ub = 0; + for (i = 0; i < count; i++) + if (ub_indices[i] > max_ub) + max_ub = ub_indices[i]; + return max_ub; + } + default: + return 0; + } +} + + + + +/*********************************************************************** + * API functions. + */ + +static void GLAPIENTRY +brw_exec_DrawArrays(GLenum mode, GLint start, GLsizei count) +{ + GET_CURRENT_CONTEXT(ctx); + struct brw_exec_context *exec = IMM_CONTEXT(ctx)->exec; + struct brw_draw_prim prim[1]; + GLboolean ok; + + if (!_mesa_validate_DrawArrays( ctx, mode, start, count )) + return; + + FLUSH_CURRENT( ctx, 0 ); + + if (ctx->NewState) + _mesa_update_state( ctx ); + + prim[0].begin = 1; + prim[0].end = 1; + prim[0].weak = 0; + prim[0].pad = 0; + + if (exec->array.inputs[0]->BufferObj->Name) { + /* Use vertex attribute as a hint to tell us if we expect all + * arrays to be in VBO's and if so, don't worry about avoiding + * the upload of elements < start. + */ + prim[0].mode = mode; + prim[0].start = start; + prim[0].count = count; + prim[0].indexed = 0; + + ok = brw_draw_prims( ctx, exec->array.inputs, prim, 1, NULL, 0, start + count, 0 ); + } + else { + /* If not using VBO's, we don't want to upload any more elements + * than necessary from the arrays as they will not be valid next + * time the application tries to draw with them. + */ + prim[0].mode = mode; + prim[0].start = 0; + prim[0].count = count; + prim[0].indexed = 0; + + ok = brw_draw_prims( ctx, exec->array.inputs, prim, 1, NULL, start, start + count, 0 ); + } + + if (!ok) { + brw_fallback(ctx); + CALL_DrawArrays(ctx->Exec, ( mode, start, count )); + brw_unfallback(ctx); + } +} + + + +static void GLAPIENTRY +brw_exec_DrawRangeElements(GLenum mode, + GLuint start, GLuint end, + GLsizei count, GLenum type, const GLvoid *indices) +{ + GET_CURRENT_CONTEXT(ctx); + struct brw_exec_context *exec = IMM_CONTEXT(ctx)->exec; + struct brw_draw_index_buffer ib; + struct brw_draw_prim prim[1]; + + if (!_mesa_validate_DrawRangeElements( ctx, mode, start, end, count, type, indices )) + return; + + FLUSH_CURRENT( ctx, 0 ); + + if (ctx->NewState) + _mesa_update_state( ctx ); + + ib.count = count; + ib.type = type; + ib.obj = ctx->Array.ElementArrayBufferObj; + ib.ptr = indices; + + if (ctx->Array.ElementArrayBufferObj->Name) { + /* Use the fact that indices are in a VBO as a hint that the + * program has put all the arrays in VBO's and we don't have to + * worry about performance implications of start > 0. + * + * XXX: consider passing start as min_index to draw_prims instead. + */ + ib.rebase = 0; + } + else { + ib.rebase = start; + } + + prim[0].begin = 1; + prim[0].end = 1; + prim[0].weak = 0; + prim[0].pad = 0; + prim[0].mode = mode; + prim[0].start = 0; + prim[0].count = count; + prim[0].indexed = 1; + + if (!brw_draw_prims( ctx, exec->array.inputs, prim, 1, &ib, ib.rebase, end+1, 0 )) { + brw_fallback(ctx); + CALL_DrawRangeElements(ctx->Exec, (mode, start, end, count, type, indices)); + brw_unfallback(ctx); + } +} + + +static void GLAPIENTRY +brw_exec_DrawElements(GLenum mode, GLsizei count, GLenum type, const GLvoid *indices) +{ + GET_CURRENT_CONTEXT(ctx); + GLuint max_index; + + if (!_mesa_validate_DrawElements( ctx, mode, count, type, indices )) + return; + + if (ctx->Array.ElementArrayBufferObj->Name) { + const GLvoid *map = ctx->Driver.MapBuffer(ctx, + GL_ELEMENT_ARRAY_BUFFER_ARB, + GL_DYNAMIC_READ_ARB, + ctx->Array.ElementArrayBufferObj); + + max_index = get_max_index(count, type, ADD_POINTERS(map, indices)); + + ctx->Driver.UnmapBuffer(ctx, + GL_ELEMENT_ARRAY_BUFFER_ARB, + ctx->Array.ElementArrayBufferObj); + } + else { + max_index = get_max_index(count, type, indices); + } + + brw_exec_DrawRangeElements(mode, 0, max_index, count, type, indices); +} + + +/*********************************************************************** + * Initialization + */ + + +static void init_arrays( GLcontext *ctx, + const struct gl_client_array *arrays[] ) +{ + struct gl_array_object *obj = ctx->Array.ArrayObj; + GLuint i; + + memset(arrays, 0, sizeof(*arrays) * BRW_ATTRIB_MAX); + + arrays[BRW_ATTRIB_POS] = &obj->Vertex; + arrays[BRW_ATTRIB_NORMAL] = &obj->Normal; + arrays[BRW_ATTRIB_COLOR0] = &obj->Color; + arrays[BRW_ATTRIB_COLOR1] = &obj->SecondaryColor; + arrays[BRW_ATTRIB_FOG] = &obj->FogCoord; + + for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) + arrays[BRW_ATTRIB_TEX0 + i] = &obj->TexCoord[i]; + + arrays[BRW_ATTRIB_INDEX] = &obj->Index; + arrays[BRW_ATTRIB_EDGEFLAG] = &obj->EdgeFlag; + + for (i = BRW_ATTRIB_GENERIC0; i <= BRW_ATTRIB_GENERIC15; i++) + arrays[i] = &obj->VertexAttrib[i - BRW_ATTRIB_GENERIC0]; +} + + + + +void brw_exec_array_init( struct brw_exec_context *exec ) +{ + GLcontext *ctx = exec->ctx; + + init_arrays(ctx, exec->array.inputs); + +#if 1 + exec->vtxfmt.DrawArrays = brw_exec_DrawArrays; + exec->vtxfmt.DrawElements = brw_exec_DrawElements; + exec->vtxfmt.DrawRangeElements = brw_exec_DrawRangeElements; +#else + exec->vtxfmt.DrawArrays = _mesa_noop_DrawArrays; + exec->vtxfmt.DrawElements = _mesa_noop_DrawElements; + exec->vtxfmt.DrawRangeElements = _mesa_noop_DrawRangeElements; +#endif + + exec->array.index_obj = ctx->Driver.NewBufferObject(ctx, 1, GL_ARRAY_BUFFER_ARB); +} + + +void brw_exec_array_destroy( struct brw_exec_context *exec ) +{ + GLcontext *ctx = exec->ctx; + + ctx->Driver.DeleteBuffer(ctx, exec->array.index_obj); +} diff --git a/src/mesa/drivers/dri/i965/brw_exec_draw.c b/src/mesa/drivers/dri/i965/brw_exec_draw.c new file mode 100644 index 00000000000..62bda9845b9 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_exec_draw.c @@ -0,0 +1,227 @@ +/* + * Mesa 3-D graphics library + * Version: 5.1 + * + * Copyright (C) 1999-2003 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "glheader.h" +#include "context.h" +#include "enums.h" +#include "state.h" +#include "macros.h" + +#include "brw_exec.h" +#include "brw_draw.h" +#include "brw_fallback.h" + + +static void brw_exec_debug_verts( struct brw_exec_context *exec ) +{ + GLuint count = exec->vtx.vert_count; + GLuint i; + + _mesa_printf("%s: %u vertices %d primitives, %d vertsize\n", + __FUNCTION__, + count, + exec->vtx.prim_count, + exec->vtx.vertex_size); + + for (i = 0 ; i < exec->vtx.prim_count ; i++) { + struct brw_draw_prim *prim = &exec->vtx.prim[i]; + _mesa_printf(" prim %d: %s%s %d..%d %s %s\n", + i, + _mesa_lookup_enum_by_nr(prim->mode), + prim->weak ? " (weak)" : "", + prim->start, + prim->start + prim->count, + prim->begin ? "BEGIN" : "(wrap)", + prim->end ? "END" : "(wrap)"); + } +} + + +/* + * NOTE: Need to have calculated primitives by this point -- do it on the fly. + * NOTE: Old 'parity' issue is gone. + */ +static GLuint brw_copy_vertices( struct brw_exec_context *exec ) +{ + GLuint nr = exec->vtx.prim[exec->vtx.prim_count-1].count; + GLuint ovf, i; + GLuint sz = exec->vtx.vertex_size; + GLfloat *dst = exec->vtx.copied.buffer; + GLfloat *src = ((GLfloat *)exec->vtx.buffer_map + + exec->vtx.prim[exec->vtx.prim_count-1].start * + exec->vtx.vertex_size); + + + switch( exec->ctx->Driver.CurrentExecPrimitive ) + { + case GL_POINTS: + return 0; + case GL_LINES: + ovf = nr&1; + for (i = 0 ; i < ovf ; i++) + _mesa_memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) ); + return i; + case GL_TRIANGLES: + ovf = nr%3; + for (i = 0 ; i < ovf ; i++) + _mesa_memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) ); + return i; + case GL_QUADS: + ovf = nr&3; + for (i = 0 ; i < ovf ; i++) + _mesa_memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) ); + return i; + case GL_LINE_STRIP: + if (nr == 0) + return 0; + else { + _mesa_memcpy( dst, src+(nr-1)*sz, sz * sizeof(GLfloat) ); + return 1; + } + case GL_LINE_LOOP: + case GL_TRIANGLE_FAN: + case GL_POLYGON: + if (nr == 0) + return 0; + else if (nr == 1) { + _mesa_memcpy( dst, src+0, sz * sizeof(GLfloat) ); + return 1; + } else { + _mesa_memcpy( dst, src+0, sz * sizeof(GLfloat) ); + _mesa_memcpy( dst+sz, src+(nr-1)*sz, sz * sizeof(GLfloat) ); + return 2; + } + case GL_TRIANGLE_STRIP: + case GL_QUAD_STRIP: + switch (nr) { + case 0: ovf = 0; break; + case 1: ovf = 1; break; + default: ovf = 2 + (nr&1); break; + } + for (i = 0 ; i < ovf ; i++) + _mesa_memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) ); + return i; + case GL_POLYGON+1: + return 0; + default: + assert(0); + return 0; + } +} + + +/* TODO: populate these as the vertex is defined: + */ +static void brw_exec_bind_arrays( struct brw_exec_context *exec ) +{ + struct gl_client_array *arrays = exec->vtx.arrays; + GLuint count = exec->vtx.vert_count; + GLubyte *data = exec->vtx.buffer_map; + GLuint attr; + + memset(arrays, 0, BRW_ATTRIB_MAX * sizeof(arrays[0])); + + /* Make all active attributes (including edgeflag) available as + * arrays of floats. + */ + for (attr = 0; attr < BRW_ATTRIB_MAX ; attr++) { + if (exec->vtx.attrsz[attr]) { + arrays[attr].Ptr = (void *)data; + arrays[attr].Size = exec->vtx.attrsz[attr]; + arrays[attr].StrideB = exec->vtx.vertex_size * sizeof(GLfloat); + arrays[attr].Stride = exec->vtx.vertex_size * sizeof(GLfloat); + arrays[attr].Type = GL_FLOAT; + arrays[attr].Enabled = 1; + arrays[attr].BufferObj = exec->vtx.bufferobj; /* NullBufferObj */ + arrays[attr]._MaxElement = count; /* ??? */ + + data += exec->vtx.attrsz[attr] * sizeof(GLfloat); + } + } +} + + +static +void brw_exec_loopback_vertex_list( GLcontext *ctx, + struct brw_exec_context *exec ) +{ + const GLfloat *buffer = (const GLfloat *)exec->vtx.buffer_map; + + brw_fallback(ctx); + + brw_loopback_vertex_list( ctx, + buffer, + exec->vtx.attrsz, + exec->vtx.prim, + exec->vtx.prim_count, + 0, /* XXX - copied.nr */ + exec->vtx.vertex_size); + + + brw_unfallback(ctx); +} + + +/** + * Execute the buffer and save copied verts. + */ +void brw_exec_vtx_flush( struct brw_exec_context *exec ) +{ + if (0) + brw_exec_debug_verts( exec ); + + + if (exec->vtx.prim_count && + exec->vtx.vert_count) { + + exec->vtx.copied.nr = brw_copy_vertices( exec ); + + if (exec->vtx.copied.nr != exec->vtx.vert_count) { + GLcontext *ctx = exec->ctx; + + brw_exec_bind_arrays( exec ); + + + if (!brw_draw_prims( ctx, + exec->vtx.inputs, + exec->vtx.prim, + exec->vtx.prim_count, + NULL, + 0, + exec->vtx.vert_count, + 0 )) { + /* Fallback path: + */ + brw_exec_loopback_vertex_list(ctx, exec); + } + } + } + + exec->vtx.prim_count = 0; + exec->vtx.vert_count = 0; + exec->vtx.vbptr = (GLfloat *)exec->vtx.buffer_map; +} diff --git a/src/mesa/drivers/dri/i965/brw_exec_eval.c b/src/mesa/drivers/dri/i965/brw_exec_eval.c new file mode 100644 index 00000000000..9dbeb1b58e4 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_exec_eval.c @@ -0,0 +1,255 @@ +/* + * Mesa 3-D graphics library + * Version: 6.1 + * + * Copyright (C) 1999-2004 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "glheader.h" +#include "api_eval.h" +#include "context.h" +#include "macros.h" +#include "math/m_eval.h" +#include "brw_exec.h" +#include "dispatch.h" + + +static void clear_active_eval1( struct brw_exec_context *exec, GLuint attr ) +{ + exec->eval.map1[attr].map = NULL; +} + +static void clear_active_eval2( struct brw_exec_context *exec, GLuint attr ) +{ + exec->eval.map2[attr].map = NULL; +} + +static void set_active_eval1( struct brw_exec_context *exec, GLuint attr, GLuint dim, + struct gl_1d_map *map ) +{ + if (!exec->eval.map1[attr].map) { + exec->eval.map1[attr].map = map; + exec->eval.map1[attr].sz = dim; + } +} + +static void set_active_eval2( struct brw_exec_context *exec, GLuint attr, GLuint dim, + struct gl_2d_map *map ) +{ + if (!exec->eval.map2[attr].map) { + exec->eval.map2[attr].map = map; + exec->eval.map2[attr].sz = dim; + } +} + +void brw_exec_eval_update( struct brw_exec_context *exec ) +{ + GLcontext *ctx = exec->ctx; + GLuint attr; + + /* Vertex program maps have priority over conventional attribs */ + + for (attr = 0; attr < BRW_ATTRIB_FIRST_MATERIAL; attr++) { + clear_active_eval1( exec, attr ); + clear_active_eval2( exec, attr ); + } + + /* _NEW_PROGRAM */ + if (ctx->VertexProgram._Enabled) { + for (attr = 0; attr < BRW_ATTRIB_FIRST_MATERIAL; attr++) { + /* _NEW_EVAL */ + if (ctx->Eval.Map1Attrib[attr]) + set_active_eval1( exec, attr, 4, &ctx->EvalMap.Map1Attrib[attr] ); + + if (ctx->Eval.Map2Attrib[attr]) + set_active_eval2( exec, attr, 4, &ctx->EvalMap.Map2Attrib[attr] ); + } + } + + if (ctx->Eval.Map1Color4) + set_active_eval1( exec, BRW_ATTRIB_COLOR0, 4, &ctx->EvalMap.Map1Color4 ); + + if (ctx->Eval.Map2Color4) + set_active_eval2( exec, BRW_ATTRIB_COLOR0, 4, &ctx->EvalMap.Map2Color4 ); + + if (ctx->Eval.Map1TextureCoord4) + set_active_eval1( exec, BRW_ATTRIB_TEX0, 4, &ctx->EvalMap.Map1Texture4 ); + else if (ctx->Eval.Map1TextureCoord3) + set_active_eval1( exec, BRW_ATTRIB_TEX0, 3, &ctx->EvalMap.Map1Texture3 ); + else if (ctx->Eval.Map1TextureCoord2) + set_active_eval1( exec, BRW_ATTRIB_TEX0, 2, &ctx->EvalMap.Map1Texture2 ); + else if (ctx->Eval.Map1TextureCoord1) + set_active_eval1( exec, BRW_ATTRIB_TEX0, 1, &ctx->EvalMap.Map1Texture1 ); + + if (ctx->Eval.Map2TextureCoord4) + set_active_eval2( exec, BRW_ATTRIB_TEX0, 4, &ctx->EvalMap.Map2Texture4 ); + else if (ctx->Eval.Map2TextureCoord3) + set_active_eval2( exec, BRW_ATTRIB_TEX0, 3, &ctx->EvalMap.Map2Texture3 ); + else if (ctx->Eval.Map2TextureCoord2) + set_active_eval2( exec, BRW_ATTRIB_TEX0, 2, &ctx->EvalMap.Map2Texture2 ); + else if (ctx->Eval.Map2TextureCoord1) + set_active_eval2( exec, BRW_ATTRIB_TEX0, 1, &ctx->EvalMap.Map2Texture1 ); + + if (ctx->Eval.Map1Normal) + set_active_eval1( exec, BRW_ATTRIB_NORMAL, 3, &ctx->EvalMap.Map1Normal ); + + if (ctx->Eval.Map2Normal) + set_active_eval2( exec, BRW_ATTRIB_NORMAL, 3, &ctx->EvalMap.Map2Normal ); + + if (ctx->Eval.Map1Vertex4) + set_active_eval1( exec, BRW_ATTRIB_POS, 4, &ctx->EvalMap.Map1Vertex4 ); + else if (ctx->Eval.Map1Vertex3) + set_active_eval1( exec, BRW_ATTRIB_POS, 3, &ctx->EvalMap.Map1Vertex3 ); + + if (ctx->Eval.Map2Vertex4) + set_active_eval2( exec, BRW_ATTRIB_POS, 4, &ctx->EvalMap.Map2Vertex4 ); + else if (ctx->Eval.Map2Vertex3) + set_active_eval2( exec, BRW_ATTRIB_POS, 3, &ctx->EvalMap.Map2Vertex3 ); + + exec->eval.recalculate_maps = 0; +} + + + +void brw_exec_do_EvalCoord1f(struct brw_exec_context *exec, GLfloat u) +{ + GLuint attr; + + for (attr = 1; attr <= BRW_ATTRIB_INDEX; attr++) { + struct gl_1d_map *map = exec->eval.map1[attr].map; + if (map) { + GLfloat uu = (u - map->u1) * map->du; + GLfloat data[4]; + + ASSIGN_4V(data, 0, 0, 0, 1); + + _math_horner_bezier_curve(map->Points, data, uu, + exec->eval.map1[attr].sz, + map->Order); + + COPY_SZ_4V( exec->vtx.attrptr[attr], + exec->vtx.attrsz[attr], + data ); + } + } + + /** Vertex -- EvalCoord1f is a noop if this map not enabled: + **/ + if (exec->eval.map1[0].map) { + struct gl_1d_map *map = exec->eval.map1[0].map; + GLfloat uu = (u - map->u1) * map->du; + GLfloat vertex[4]; + + ASSIGN_4V(vertex, 0, 0, 0, 1); + + _math_horner_bezier_curve(map->Points, vertex, uu, + exec->eval.map1[0].sz, + map->Order); + + if (exec->eval.map1[0].sz == 4) + CALL_Vertex4fv(GET_DISPATCH(), ( vertex )); + else + CALL_Vertex3fv(GET_DISPATCH(), ( vertex )); + } +} + + + +void brw_exec_do_EvalCoord2f( struct brw_exec_context *exec, + GLfloat u, GLfloat v ) +{ + GLuint attr; + + for (attr = 1; attr <= BRW_ATTRIB_INDEX; attr++) { + struct gl_2d_map *map = exec->eval.map2[attr].map; + if (map) { + GLfloat uu = (u - map->u1) * map->du; + GLfloat vv = (v - map->v1) * map->dv; + GLfloat data[4]; + + ASSIGN_4V(data, 0, 0, 0, 1); + + _math_horner_bezier_surf(map->Points, + data, + uu, vv, + exec->eval.map2[attr].sz, + map->Uorder, map->Vorder); + + COPY_SZ_4V( exec->vtx.attrptr[attr], + exec->vtx.attrsz[attr], + data ); + } + } + + /** Vertex -- EvalCoord2f is a noop if this map not enabled: + **/ + if (exec->eval.map2[0].map) { + struct gl_2d_map *map = exec->eval.map2[0].map; + GLfloat uu = (u - map->u1) * map->du; + GLfloat vv = (v - map->v1) * map->dv; + GLfloat vertex[4]; + + ASSIGN_4V(vertex, 0, 0, 0, 1); + + if (exec->ctx->Eval.AutoNormal) { + GLfloat normal[4]; + GLfloat du[4], dv[4]; + + _math_de_casteljau_surf(map->Points, vertex, du, dv, uu, vv, + exec->eval.map2[0].sz, + map->Uorder, map->Vorder); + + if (exec->eval.map2[0].sz == 4) { + du[0] = du[0]*vertex[3] - du[3]*vertex[0]; + du[1] = du[1]*vertex[3] - du[3]*vertex[1]; + du[2] = du[2]*vertex[3] - du[3]*vertex[2]; + + dv[0] = dv[0]*vertex[3] - dv[3]*vertex[0]; + dv[1] = dv[1]*vertex[3] - dv[3]*vertex[1]; + dv[2] = dv[2]*vertex[3] - dv[3]*vertex[2]; + } + + + CROSS3(normal, du, dv); + NORMALIZE_3FV(normal); + normal[3] = 1.0; + + COPY_SZ_4V( exec->vtx.attrptr[BRW_ATTRIB_NORMAL], + exec->vtx.attrsz[BRW_ATTRIB_NORMAL], + normal ); + + } + else { + _math_horner_bezier_surf(map->Points, vertex, uu, vv, + exec->eval.map2[0].sz, + map->Uorder, map->Vorder); + } + + if (exec->vtx.attrsz[0] == 4) + CALL_Vertex4fv(GET_DISPATCH(), ( vertex )); + else + CALL_Vertex3fv(GET_DISPATCH(), ( vertex )); + } +} + + diff --git a/src/mesa/drivers/dri/i965/brw_fallback.c b/src/mesa/drivers/dri/i965/brw_fallback.c new file mode 100644 index 00000000000..4d84ba88137 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_fallback.c @@ -0,0 +1,468 @@ +/************************************************************************** + * + * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "swrast_setup/swrast_setup.h" +#include "swrast/swrast.h" +#include "tnl/tnl.h" +#include "context.h" +#include "brw_context.h" +#include "brw_exec.h" +#include "brw_save.h" +#include "brw_fallback.h" + +#include "glheader.h" +#include "enums.h" +#include "glapi.h" +#include "imports.h" +#include "macros.h" +#include "mtypes.h" +#include "dispatch.h" + + +typedef void (*attr_func)( GLcontext *ctx, GLint target, const GLfloat * ); + + +/* Wrapper functions in case glVertexAttrib*fvNV doesn't exist */ +static void VertexAttrib1fvNV(GLcontext *ctx, GLint target, const GLfloat *v) +{ + CALL_VertexAttrib1fvNV(ctx->Exec, (target, v)); +} + +static void VertexAttrib2fvNV(GLcontext *ctx, GLint target, const GLfloat *v) +{ + CALL_VertexAttrib2fvNV(ctx->Exec, (target, v)); +} + +static void VertexAttrib3fvNV(GLcontext *ctx, GLint target, const GLfloat *v) +{ + CALL_VertexAttrib3fvNV(ctx->Exec, (target, v)); +} + +static void VertexAttrib4fvNV(GLcontext *ctx, GLint target, const GLfloat *v) +{ + CALL_VertexAttrib4fvNV(ctx->Exec, (target, v)); +} + +static attr_func vert_attrfunc[4] = { + VertexAttrib1fvNV, + VertexAttrib2fvNV, + VertexAttrib3fvNV, + VertexAttrib4fvNV +}; + +#if 0 +static void VertexAttrib1fvARB(GLcontext *ctx, GLint target, const GLfloat *v) +{ + CALL_VertexAttrib1fvARB(ctx->Exec, (target, v)); +} + +static void VertexAttrib2fvARB(GLcontext *ctx, GLint target, const GLfloat *v) +{ + CALL_VertexAttrib2fvARB(ctx->Exec, (target, v)); +} + +static void VertexAttrib3fvARB(GLcontext *ctx, GLint target, const GLfloat *v) +{ + CALL_VertexAttrib3fvARB(ctx->Exec, (target, v)); +} + +static void VertexAttrib4fvARB(GLcontext *ctx, GLint target, const GLfloat *v) +{ + CALL_VertexAttrib4fvARB(ctx->Exec, (target, v)); +} + + +static attr_func vert_attrfunc_arb[4] = { + VertexAttrib1fvARB, + VertexAttrib2fvARB, + VertexAttrib3fvARB, + VertexAttrib4fvARB +}; +#endif + + + + + + +static void mat_attr1fv( GLcontext *ctx, GLint target, const GLfloat *v ) +{ + switch (target) { + case BRW_ATTRIB_MAT_FRONT_SHININESS: + CALL_Materialfv(ctx->Exec, ( GL_FRONT, GL_SHININESS, v )); + break; + case BRW_ATTRIB_MAT_BACK_SHININESS: + CALL_Materialfv(ctx->Exec, ( GL_BACK, GL_SHININESS, v )); + break; + } +} + + +static void mat_attr3fv( GLcontext *ctx, GLint target, const GLfloat *v ) +{ + switch (target) { + case BRW_ATTRIB_MAT_FRONT_INDEXES: + CALL_Materialfv(ctx->Exec, ( GL_FRONT, GL_COLOR_INDEXES, v )); + break; + case BRW_ATTRIB_MAT_BACK_INDEXES: + CALL_Materialfv(ctx->Exec, ( GL_BACK, GL_COLOR_INDEXES, v )); + break; + } +} + + +static void mat_attr4fv( GLcontext *ctx, GLint target, const GLfloat *v ) +{ + switch (target) { + case BRW_ATTRIB_MAT_FRONT_EMISSION: + CALL_Materialfv(ctx->Exec, ( GL_FRONT, GL_EMISSION, v )); + break; + case BRW_ATTRIB_MAT_BACK_EMISSION: + CALL_Materialfv(ctx->Exec, ( GL_BACK, GL_EMISSION, v )); + break; + case BRW_ATTRIB_MAT_FRONT_AMBIENT: + CALL_Materialfv(ctx->Exec, ( GL_FRONT, GL_AMBIENT, v )); + break; + case BRW_ATTRIB_MAT_BACK_AMBIENT: + CALL_Materialfv(ctx->Exec, ( GL_BACK, GL_AMBIENT, v )); + break; + case BRW_ATTRIB_MAT_FRONT_DIFFUSE: + CALL_Materialfv(ctx->Exec, ( GL_FRONT, GL_DIFFUSE, v )); + break; + case BRW_ATTRIB_MAT_BACK_DIFFUSE: + CALL_Materialfv(ctx->Exec, ( GL_BACK, GL_DIFFUSE, v )); + break; + case BRW_ATTRIB_MAT_FRONT_SPECULAR: + CALL_Materialfv(ctx->Exec, ( GL_FRONT, GL_SPECULAR, v )); + break; + case BRW_ATTRIB_MAT_BACK_SPECULAR: + CALL_Materialfv(ctx->Exec, ( GL_BACK, GL_SPECULAR, v )); + break; + } +} + + +static attr_func mat_attrfunc[4] = { + mat_attr1fv, + NULL, + mat_attr3fv, + mat_attr4fv +}; + + +static void index_attr1fv(GLcontext *ctx, GLint target, const GLfloat *v) +{ + (void) target; + CALL_Indexf(ctx->Exec, (v[0])); +} + +static void edgeflag_attr1fv(GLcontext *ctx, GLint target, const GLfloat *v) +{ + (void) target; + CALL_EdgeFlag(ctx->Exec, ((GLboolean)(v[0] == 1.0))); +} + +struct loopback_attr { + GLint target; + GLint sz; + attr_func func; +}; + +/* Don't emit ends and begins on wrapped primitives. Don't replay + * wrapped vertices. If we get here, it's probably because the the + * precalculated wrapping is wrong. + */ +static void loopback_prim( GLcontext *ctx, + const GLfloat *buffer, + const struct brw_draw_prim *prim, + GLuint wrap_count, + GLuint vertex_size, + const struct loopback_attr *la, GLuint nr ) +{ + GLint start = prim->start; + GLint end = start + prim->count; + const GLfloat *data; + GLint j; + GLuint k; + + if (0) + _mesa_printf("loopback prim %s(%s,%s) verts %d..%d\n", + _mesa_lookup_enum_by_nr(prim->mode), + prim->begin ? "begin" : "..", + prim->end ? "end" : "..", + start, + end); + + if (prim->begin) { + CALL_Begin(GET_DISPATCH(), ( prim->mode )); + } + else { + assert(start == 0); + start += wrap_count; + } + + data = buffer + start * vertex_size; + + for (j = start ; j < end ; j++) { + const GLfloat *tmp = data + la[0].sz; + + for (k = 1 ; k < nr ; k++) { + la[k].func( ctx, la[k].target, tmp ); + tmp += la[k].sz; + } + + /* Fire the vertex + */ + la[0].func( ctx, BRW_ATTRIB_POS, data ); + data = tmp; + } + + if (prim->end) { + CALL_End(GET_DISPATCH(), ()); + } +} + +/* Primitives generated by DrawArrays/DrawElements/Rectf may be + * caught here. If there is no primitive in progress, execute them + * normally, otherwise need to track and discard the generated + * primitives. + */ +static void loopback_weak_prim( GLcontext *ctx, + const struct brw_draw_prim *prim ) +{ + /* Use the prim_weak flag to ensure that if this primitive + * wraps, we don't mistake future vertex_lists for part of the + * surrounding primitive. + * + * While this flag is set, we are simply disposing of data + * generated by an operation now known to be a noop. + */ + if (prim->begin) + ctx->Driver.CurrentExecPrimitive |= BRW_SAVE_PRIM_WEAK; + if (prim->end) + ctx->Driver.CurrentExecPrimitive &= ~BRW_SAVE_PRIM_WEAK; +} + + +void brw_loopback_vertex_list( GLcontext *ctx, + const GLfloat *buffer, + const GLubyte *attrsz, + const struct brw_draw_prim *prim, + GLuint prim_count, + GLuint wrap_count, + GLuint vertex_size) +{ + struct loopback_attr la[BRW_ATTRIB_MAX]; + GLuint i, nr = 0; + + for (i = 0 ; i <= BRW_ATTRIB_TEX7 ; i++) { + if (attrsz[i]) { + la[nr].target = i; + la[nr].sz = attrsz[i]; + la[nr].func = vert_attrfunc[attrsz[i]-1]; + nr++; + } + } + + for (i = BRW_ATTRIB_MAT_FRONT_AMBIENT ; + i <= BRW_ATTRIB_MAT_BACK_INDEXES ; + i++) { + if (attrsz[i]) { + la[nr].target = i; + la[nr].sz = attrsz[i]; + la[nr].func = mat_attrfunc[attrsz[i]-1]; + nr++; + } + } + + if (attrsz[BRW_ATTRIB_EDGEFLAG]) { + la[nr].target = BRW_ATTRIB_EDGEFLAG; + la[nr].sz = attrsz[BRW_ATTRIB_EDGEFLAG]; + la[nr].func = edgeflag_attr1fv; + nr++; + } + + if (attrsz[BRW_ATTRIB_INDEX]) { + la[nr].target = BRW_ATTRIB_INDEX; + la[nr].sz = attrsz[BRW_ATTRIB_INDEX]; + la[nr].func = index_attr1fv; + nr++; + } + + /* XXX ARB vertex attribs */ + + for (i = 0 ; i < prim_count ; i++) { + if ((prim[i].mode & BRW_SAVE_PRIM_WEAK) && + (ctx->Driver.CurrentExecPrimitive != PRIM_OUTSIDE_BEGIN_END)) + { + loopback_weak_prim( ctx, &prim[i] ); + } + else + { + loopback_prim( ctx, buffer, &prim[i], wrap_count, vertex_size, la, nr ); + } + } +} + + + + + + + + +static GLboolean do_check_fallback(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + GLuint i; + + /* BRW_NEW_METAOPS + */ + if (brw->metaops.active) + return GL_FALSE; + + if (brw->intel.no_rast) + return GL_TRUE; + + /* _NEW_BUFFERS + */ + if (ctx->DrawBuffer->_ColorDrawBufferMask[0] != BUFFER_BIT_FRONT_LEFT && + ctx->DrawBuffer->_ColorDrawBufferMask[0] != BUFFER_BIT_BACK_LEFT) + return GL_TRUE; + + /* _NEW_RENDERMODE + * + * XXX: need to save/restore RenderMode in metaops state, or + * somehow move to a new attribs pointer: + */ + if (ctx->RenderMode != GL_RENDER) + return GL_TRUE; + + /* _NEW_TEXTURE: + */ + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[i]; + if (texUnit->_ReallyEnabled) { + struct intel_texture_object *intelObj = intel_texture_object(texUnit->_Current); + struct gl_texture_image *texImage = intelObj->base.Image[0][intelObj->firstLevel]; + if (texImage->Border) + return GL_TRUE; + } + } + + /* _NEW_STENCIL + */ + if (brw->attribs.Stencil->Enabled && + !brw->intel.hw_stencil) { + return GL_TRUE; + } + + + return GL_FALSE; +} + +static void check_fallback(struct brw_context *brw) +{ + brw->intel.Fallback = do_check_fallback(brw); +} + +const struct brw_tracked_state brw_check_fallback = { + .dirty = { + .mesa = _NEW_BUFFERS | _NEW_RENDERMODE | _NEW_TEXTURE | _NEW_STENCIL, + .brw = BRW_NEW_METAOPS, + .cache = 0 + }, + .update = check_fallback +}; + + + + +/* If there is a fallback, fallback to software rasterization and + * transformation together. There is never a requirement to have + * software t&l but hardware rasterization. + * + * Further, all fallbacks are based on GL state, not on eg. primitive + * or vertex data. + */ + +static void do_fallback( struct brw_context *brw, + GLboolean fallback ) +{ + GLcontext *ctx = &brw->intel.ctx; + + /* flush: + */ + ctx->Driver.Flush( ctx ); + + if (fallback) { + _swsetup_Wakeup( ctx ); + _tnl_wakeup_exec( ctx ); + + /* Need this because tnl_wakeup_exec does too much: + */ + brw_save_wakeup(ctx); + brw_save_fallback(ctx, GL_TRUE); + } + else { + /* Flush vertices and copy-to-current: + */ + FLUSH_CURRENT(ctx, 0); + + _swrast_flush( ctx ); + + brw_exec_wakeup(ctx); + + /* Need this because tnl_wakeup_exec does too much: + */ + brw_save_wakeup(ctx); + brw_save_fallback(ctx, GL_FALSE); + } +} + + +void brw_fallback( GLcontext *ctx ) +{ + struct brw_context *brw = brw_context(ctx); + do_fallback(brw, 1); +} + + +void brw_unfallback( GLcontext *ctx ) +{ + struct brw_context *brw = brw_context(ctx); + do_fallback(brw, 0); +} + +/* Not used: + */ +void intelFallback( struct intel_context *intel, GLuint bit, GLboolean mode ) +{ +} + + + diff --git a/src/mesa/drivers/dri/i965/brw_fallback.h b/src/mesa/drivers/dri/i965/brw_fallback.h new file mode 100644 index 00000000000..81a2d344b81 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_fallback.h @@ -0,0 +1,47 @@ + /************************************************************************** + * + * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_FALLBACK_H +#define BRW_FALLBACK_H + +#include "mtypes.h" /* for GLcontext... */ + +struct brw_context; +struct brw_draw_prim; + +void brw_fallback( GLcontext *ctx ); +void brw_unfallback( GLcontext *ctx ); + +void brw_loopback_vertex_list( GLcontext *ctx, + const GLfloat *buffer, + const GLubyte *attrsz, + const struct brw_draw_prim *prim, + GLuint prim_count, + GLuint wrap_count, + GLuint vertex_size); + +#endif diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c new file mode 100644 index 00000000000..7d3f9dd5e3b --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -0,0 +1,198 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" + +#include "intel_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_state.h" +#include "brw_gs.h" + + + +static void compile_gs_prog( struct brw_context *brw, + struct brw_gs_prog_key *key ) +{ + struct brw_gs_compile c; + const GLuint *program; + GLuint program_size; + + memset(&c, 0, sizeof(c)); + + c.key = *key; + + /* Need to locate the two positions present in vertex + header. + * These are currently hardcoded: + */ + c.nr_attrs = brw_count_bits(c.key.attrs); + c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + c.nr_bytes = c.nr_regs * REG_SIZE; + + + /* Begin the compilation: + */ + brw_init_compile(&c.func); + + /* For some reason the thread is spawned with only 4 channels + * unmasked. + */ + brw_set_mask_control(&c.func, BRW_MASK_DISABLE); + + + /* Note that primitives which don't require a GS program have + * already been weeded out by this stage: + */ + switch (key->primitive) { + case GL_QUADS: + brw_gs_quads( &c ); + break; + case GL_LINE_LOOP: + brw_gs_lines( &c ); + break; + case GL_LINES: + if (key->hint_gs_always) + brw_gs_lines( &c ); + else { + return; + } + break; + case GL_TRIANGLES: + if (key->hint_gs_always) + brw_gs_tris( &c ); + else { + return; + } + break; + case GL_POINTS: + if (key->hint_gs_always) + brw_gs_points( &c ); + else { + return; + } + break; + default: + return; + } + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + /* Upload + */ + brw->gs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_GS_PROG], + &c.key, + sizeof(c.key), + program, + program_size, + &c.prog_data, + &brw->gs.prog_data ); +} + + +static GLboolean search_cache( struct brw_context *brw, + struct brw_gs_prog_key *key ) +{ + return brw_search_cache(&brw->cache[BRW_GS_PROG], + key, sizeof(*key), + &brw->gs.prog_data, + &brw->gs.prog_gs_offset); +} + + +static const GLenum gs_prim[GL_POLYGON+1] = { + GL_POINTS, + GL_LINES, + GL_LINE_LOOP, + GL_LINES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_QUADS, + GL_QUADS, + GL_TRIANGLES +}; + +static void populate_key( struct brw_context *brw, + struct brw_gs_prog_key *key ) +{ + memset(key, 0, sizeof(*key)); + + /* CACHE_NEW_VS_PROG */ + key->attrs = brw->vs.prog_data->outputs_written; + + /* BRW_NEW_PRIMITIVE */ + key->primitive = gs_prim[brw->primitive]; + + key->hint_gs_always = 0; /* debug code? */ + + key->need_gs_prog = (key->hint_gs_always || + brw->primitive == GL_QUADS || + brw->primitive == GL_QUAD_STRIP || + brw->primitive == GL_LINE_LOOP); +} + +/* Calculate interpolants for triangle and line rasterization. + */ +static void upload_gs_prog( struct brw_context *brw ) +{ + struct brw_gs_prog_key key; + + /* Populate the key: + */ + populate_key(brw, &key); + + if (brw->gs.prog_active != key.need_gs_prog) { + brw->state.dirty.cache |= CACHE_NEW_GS_PROG; + brw->gs.prog_active = key.need_gs_prog; + } + + if (brw->gs.prog_active) { + if (!search_cache(brw, &key)) + compile_gs_prog( brw, &key ); + } +} + + +const struct brw_tracked_state brw_gs_prog = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_PRIMITIVE, + .cache = CACHE_NEW_VS_PROG + }, + .update = upload_gs_prog +}; diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h new file mode 100644 index 00000000000..f9aa71d9199 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_gs.h @@ -0,0 +1,74 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#ifndef BRW_GS_H +#define BRW_GS_H + + +#include "brw_context.h" +#include "brw_eu.h" + +#define MAX_GS_VERTS (4) + +struct brw_gs_prog_key { + GLuint primitive:4; + GLuint attrs:16; + GLuint hint_gs_always:1; + GLuint need_gs_prog:1; + GLuint pad:10; +}; + +struct brw_gs_compile { + struct brw_compile func; + struct brw_gs_prog_key key; + struct brw_gs_prog_data prog_data; + + struct { + struct brw_reg R0; + struct brw_reg vertex[MAX_GS_VERTS]; + } reg; + + /* 3 different ways of expressing vertex size: + */ + GLuint nr_attrs; + GLuint nr_regs; + GLuint nr_bytes; +}; + +#define ATTR_SIZE (4*4) + +void brw_gs_quads( struct brw_gs_compile *c ); +void brw_gs_tris( struct brw_gs_compile *c ); +void brw_gs_lines( struct brw_gs_compile *c ); +void brw_gs_points( struct brw_gs_compile *c ); + +#endif diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c new file mode 100644 index 00000000000..e4eed36a464 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c @@ -0,0 +1,146 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "glheader.h" +#include "macros.h" +#include "enums.h" + +#include "shader/program.h" +#include "intel_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_gs.h" + +static void brw_gs_alloc_regs( struct brw_gs_compile *c, + GLuint nr_verts ) +{ + GLuint i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < nr_verts; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + c->prog_data.urb_read_length = c->nr_regs; + c->prog_data.total_grf = i; +} + + +static void brw_gs_emit_vue(struct brw_gs_compile *c, + struct brw_reg vert, + GLboolean last, + GLuint header) +{ + struct brw_compile *p = &c->func; + GLboolean allocate = !last; + + /* Overwrite PrimType and PrimStart in the message header, for + * each vertex in turn: + */ + brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); + + /* Copy the vertex from vertn into m1..mN+1: + */ + brw_copy8(p, brw_message_reg(1), vert, c->nr_regs); + + /* Send each vertex as a seperate write to the urb. This is + * different to the concept in brw_sf_emit.c, where subsequent + * writes are used to build up a single urb entry. Each of these + * writes instantiates a seperate urb entry, and a new one must be + * allocated each time. + */ + brw_urb_WRITE(p, + allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + 0, + c->reg.R0, + allocate, + 1, /* used */ + c->nr_regs + 1, /* msg length */ + allocate ? 1 : 0, /* response length */ + allocate ? 0 : 1, /* eot */ + 1, /* writes_complete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} + + + +void brw_gs_quads( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 4); + + /* Use polygons for correct edgeflag behaviour. Note that vertex 3 + * is the PV for quads, but vertex 0 for polygons: + */ + brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); +} + +void brw_gs_tris( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 3); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2)); + brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END)); +} + +void brw_gs_lines( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 2); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END)); +} + +void brw_gs_points( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 1); + brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END)); +} + + + + + + + + diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c new file mode 100644 index 00000000000..5826c01d4f9 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -0,0 +1,89 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "macros.h" + + + +static void upload_gs_unit( struct brw_context *brw ) +{ + struct brw_gs_unit_state gs; + + memset(&gs, 0, sizeof(gs)); + + /* CACHE_NEW_GS_PROG */ + if (brw->gs.prog_active) { + gs.thread0.grf_reg_count = ((brw->gs.prog_data->total_grf-1) & ~15) / 16; + gs.thread0.kernel_start_pointer = brw->gs.prog_gs_offset >> 6; + gs.thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length; + } + else { + gs.thread0.grf_reg_count = 0; + gs.thread0.kernel_start_pointer = 0; + gs.thread3.urb_entry_read_length = 1; + } + + /* BRW_NEW_URB_FENCE */ + gs.thread4.nr_urb_entries = brw->urb.nr_gs_entries; + gs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; + + gs.thread4.max_threads = 0; /* Hardware requirement */ + + if (INTEL_DEBUG & DEBUG_STATS) + gs.thread4.stats_enable = 1; + + /* CONSTANT */ + gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + gs.thread1.single_program_flow = 1; + gs.thread3.dispatch_grf_start_reg = 1; + gs.thread3.const_urb_entry_read_offset = 0; + gs.thread3.const_urb_entry_read_length = 0; + gs.thread3.urb_entry_read_offset = 0; + + + brw->gs.state_gs_offset = brw_cache_data( &brw->cache[BRW_GS_UNIT], &gs ); +} + + +const struct brw_tracked_state brw_gs_unit = { + .dirty = { + .mesa = 0, + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE), + .cache = CACHE_NEW_GS_PROG + }, + .update = upload_gs_unit +}; diff --git a/src/mesa/drivers/dri/i965/brw_hal.c b/src/mesa/drivers/dri/i965/brw_hal.c new file mode 100644 index 00000000000..3126102749b --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_hal.c @@ -0,0 +1,52 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + +#include "intel_batchbuffer.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_hal.h" +#include <dlfcn.h> + +static void *brw_hal_lib; +static GLboolean brw_hal_tried; + +void * +brw_hal_find_symbol (char *symbol) +{ + if (!brw_hal_tried) + { + char *brw_hal_name = getenv ("INTEL_HAL"); + + if (!brw_hal_name) + brw_hal_name = "/usr/lib/xorg/modules/drivers/intel_hal.so"; + + brw_hal_lib = dlopen (brw_hal_name, RTLD_LAZY|RTLD_LOCAL); + brw_hal_tried = 1; + } + if (!brw_hal_lib) + return NULL; + return dlsym (brw_hal_lib, symbol); +} diff --git a/src/mesa/drivers/dri/i965/brw_hal.h b/src/mesa/drivers/dri/i965/brw_hal.h new file mode 100644 index 00000000000..cd86e395b59 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_hal.h @@ -0,0 +1,27 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + +void * +brw_hal_find_symbol (char *symbol); diff --git a/src/mesa/drivers/dri/i965/brw_metaops.c b/src/mesa/drivers/dri/i965/brw_metaops.c new file mode 100644 index 00000000000..18ca7b13412 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_metaops.c @@ -0,0 +1,483 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + + +#include "glheader.h" +#include "context.h" +#include "macros.h" +#include "enums.h" +#include "dd.h" + +#include "shader/arbprogparse.h" + +#include "intel_screen.h" +#include "intel_batchbuffer.h" +#include "intel_regions.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_draw.h" +#include "brw_attrib.h" +#include "brw_fallback.h" + +#define INIT(brw, STRUCT, ATTRIB) \ +do { \ + brw->attribs.ATTRIB = &ctx->ATTRIB; \ +} while (0) + +#define DUP(brw, STRUCT, ATTRIB) \ +do { \ + brw->metaops.attribs.ATTRIB = MALLOC_STRUCT(STRUCT); \ + memcpy(brw->metaops.attribs.ATTRIB, \ + brw->attribs.ATTRIB, \ + sizeof(struct STRUCT)); \ +} while (0) + + +#define INSTALL(brw, ATTRIB, STATE) \ +do { \ + brw->attribs.ATTRIB = brw->metaops.attribs.ATTRIB; \ + brw->state.dirty.mesa |= STATE; \ +} while (0) + +#define RESTORE(brw, ATTRIB, STATE) \ +do { \ + brw->attribs.ATTRIB = &brw->intel.ctx.ATTRIB; \ + brw->state.dirty.mesa |= STATE; \ +} while (0) + +static void init_attribs( struct brw_context *brw ) +{ + DUP(brw, gl_colorbuffer_attrib, Color); + DUP(brw, gl_depthbuffer_attrib, Depth); + DUP(brw, gl_fog_attrib, Fog); + DUP(brw, gl_hint_attrib, Hint); + DUP(brw, gl_light_attrib, Light); + DUP(brw, gl_line_attrib, Line); + DUP(brw, gl_point_attrib, Point); + DUP(brw, gl_polygon_attrib, Polygon); + DUP(brw, gl_scissor_attrib, Scissor); + DUP(brw, gl_stencil_attrib, Stencil); + DUP(brw, gl_texture_attrib, Texture); + DUP(brw, gl_transform_attrib, Transform); + DUP(brw, gl_viewport_attrib, Viewport); + DUP(brw, gl_vertex_program_state, VertexProgram); + DUP(brw, gl_fragment_program_state, FragmentProgram); +} + +static void install_attribs( struct brw_context *brw ) +{ + INSTALL(brw, Color, _NEW_COLOR); + INSTALL(brw, Depth, _NEW_DEPTH); + INSTALL(brw, Fog, _NEW_FOG); + INSTALL(brw, Hint, _NEW_HINT); + INSTALL(brw, Light, _NEW_LIGHT); + INSTALL(brw, Line, _NEW_LINE); + INSTALL(brw, Point, _NEW_POINT); + INSTALL(brw, Polygon, _NEW_POLYGON); + INSTALL(brw, Scissor, _NEW_SCISSOR); + INSTALL(brw, Stencil, _NEW_STENCIL); + INSTALL(brw, Texture, _NEW_TEXTURE); + INSTALL(brw, Transform, _NEW_TRANSFORM); + INSTALL(brw, Viewport, _NEW_VIEWPORT); + INSTALL(brw, VertexProgram, _NEW_PROGRAM); + INSTALL(brw, FragmentProgram, _NEW_PROGRAM); +} + +static void restore_attribs( struct brw_context *brw ) +{ + RESTORE(brw, Color, _NEW_COLOR); + RESTORE(brw, Depth, _NEW_DEPTH); + RESTORE(brw, Fog, _NEW_FOG); + RESTORE(brw, Hint, _NEW_HINT); + RESTORE(brw, Light, _NEW_LIGHT); + RESTORE(brw, Line, _NEW_LINE); + RESTORE(brw, Point, _NEW_POINT); + RESTORE(brw, Polygon, _NEW_POLYGON); + RESTORE(brw, Scissor, _NEW_SCISSOR); + RESTORE(brw, Stencil, _NEW_STENCIL); + RESTORE(brw, Texture, _NEW_TEXTURE); + RESTORE(brw, Transform, _NEW_TRANSFORM); + RESTORE(brw, Viewport, _NEW_VIEWPORT); + RESTORE(brw, VertexProgram, _NEW_PROGRAM); + RESTORE(brw, FragmentProgram, _NEW_PROGRAM); +} + + +static const char *vp_prog = + "!!ARBvp1.0\n" + "MOV result.color, vertex.color;\n" + "MOV result.position, vertex.position;\n" + "END\n"; + +static const char *fp_prog = + "!!ARBfp1.0\n" + "MOV result.color, fragment.color;\n" + "END\n"; + +/* Derived values of importance: + * + * FragmentProgram->_Current + * VertexProgram->_Enabled + * brw->vertex_program + * DrawBuffer->_ColorDrawBufferMask[0] + * + * + * More if drawpixels-through-texture is added. + */ +static void init_metaops_state( struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + + brw->metaops.vbo = ctx->Driver.NewBufferObject(ctx, 1, GL_ARRAY_BUFFER_ARB); + + ctx->Driver.BufferData(ctx, + GL_ARRAY_BUFFER_ARB, + 4096, + NULL, + GL_DYNAMIC_DRAW_ARB, + brw->metaops.vbo); + + brw->metaops.fp = (struct gl_fragment_program *) + ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 1 ); + + brw->metaops.vp = (struct gl_vertex_program *) + ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 1 ); + + _mesa_parse_arb_fragment_program(ctx, GL_FRAGMENT_PROGRAM_ARB, + fp_prog, strlen(fp_prog), + brw->metaops.fp); + + _mesa_parse_arb_vertex_program(ctx, GL_VERTEX_PROGRAM_ARB, + vp_prog, strlen(vp_prog), + brw->metaops.vp); + + brw->metaops.attribs.VertexProgram->Current = brw->metaops.vp; + brw->metaops.attribs.VertexProgram->_Enabled = GL_TRUE; + + brw->metaops.attribs.FragmentProgram->_Current = brw->metaops.fp; +} + +static void meta_flat_shade( struct intel_context *intel ) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + brw->metaops.attribs.Light->ShadeModel = GL_FLAT; + brw->state.dirty.mesa |= _NEW_LIGHT; +} + + +static void meta_no_stencil_write( struct intel_context *intel ) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + brw->metaops.attribs.Stencil->Enabled = GL_FALSE; + brw->metaops.attribs.Stencil->WriteMask[0] = GL_FALSE; + brw->state.dirty.mesa |= _NEW_STENCIL; +} + +static void meta_no_depth_write( struct intel_context *intel ) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + brw->metaops.attribs.Depth->Test = GL_FALSE; + brw->metaops.attribs.Depth->Mask = GL_FALSE; + brw->state.dirty.mesa |= _NEW_DEPTH; +} + + +static void meta_depth_replace( struct intel_context *intel ) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_TRUE ) + * ctx->Driver.DepthMask( ctx, GL_TRUE ) + */ + brw->metaops.attribs.Depth->Test = GL_TRUE; + brw->metaops.attribs.Depth->Mask = GL_TRUE; + brw->state.dirty.mesa |= _NEW_DEPTH; + + /* ctx->Driver.DepthFunc( ctx, GL_ALWAYS ) + */ + brw->metaops.attribs.Depth->Func = GL_ALWAYS; + + brw->state.dirty.mesa |= _NEW_DEPTH; +} + + +static void meta_stencil_replace( struct intel_context *intel, + GLuint s_mask, + GLuint s_clear) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + brw->metaops.attribs.Stencil->Enabled = GL_TRUE; + brw->metaops.attribs.Stencil->WriteMask[0] = s_mask; + brw->metaops.attribs.Stencil->ValueMask[0] = 0xff; + brw->metaops.attribs.Stencil->Ref[0] = s_clear; + brw->metaops.attribs.Stencil->Function[0] = GL_ALWAYS; + brw->metaops.attribs.Stencil->FailFunc[0] = GL_REPLACE; + brw->metaops.attribs.Stencil->ZPassFunc[0] = GL_REPLACE; + brw->metaops.attribs.Stencil->ZFailFunc[0] = GL_REPLACE; + brw->state.dirty.mesa |= _NEW_STENCIL; +} + + +static void meta_color_mask( struct intel_context *intel, GLboolean state ) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + if (state) + COPY_4V(brw->metaops.attribs.Color->ColorMask, + brw->intel.ctx.Color.ColorMask); + else + ASSIGN_4V(brw->metaops.attribs.Color->ColorMask, 0, 0, 0, 0); + + brw->state.dirty.mesa |= _NEW_COLOR; +} + +static void meta_no_texture( struct intel_context *intel ) +{ + /* Nothing to do */ +} + + +static void meta_draw_region( struct intel_context *intel, + struct intel_region *draw_region, + struct intel_region *depth_region ) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + if (!brw->metaops.saved_draw_region) { + brw->metaops.saved_draw_region = brw->state.draw_region; + brw->metaops.saved_depth_region = brw->state.depth_region; + } + + brw->state.draw_region = draw_region; + brw->state.depth_region = depth_region; + + brw->state.dirty.mesa |= _NEW_BUFFERS; +} + + +static void meta_draw_quad(struct intel_context *intel, + GLfloat x0, GLfloat x1, + GLfloat y0, GLfloat y1, + GLfloat z, + GLubyte red, GLubyte green, + GLubyte blue, GLubyte alpha, + GLfloat s0, GLfloat s1, + GLfloat t0, GLfloat t1) +{ + GLcontext *ctx = &intel->ctx; + struct brw_context *brw = brw_context(&intel->ctx); + struct gl_client_array pos_array; + struct gl_client_array color_array; + struct gl_client_array *attribs[BRW_ATTRIB_MAX]; + struct brw_draw_prim prim[1]; + GLfloat pos[4][3]; + GLubyte color[4]; + + ctx->Driver.BufferData(ctx, + GL_ARRAY_BUFFER_ARB, + sizeof(pos) + sizeof(color), + NULL, + GL_DYNAMIC_DRAW_ARB, + brw->metaops.vbo); + + pos[0][0] = x0; + pos[0][1] = y0; + pos[0][2] = z; + + pos[1][0] = x1; + pos[1][1] = y0; + pos[1][2] = z; + + pos[2][0] = x1; + pos[2][1] = y1; + pos[2][2] = z; + + pos[3][0] = x0; + pos[3][1] = y1; + pos[3][2] = z; + + + ctx->Driver.BufferSubData(ctx, + GL_ARRAY_BUFFER_ARB, + 0, + sizeof(pos), + pos, + brw->metaops.vbo); + + color[0] = red; + color[1] = green; + color[2] = blue; + color[3] = alpha; + + ctx->Driver.BufferSubData(ctx, + GL_ARRAY_BUFFER_ARB, + sizeof(pos), + sizeof(color), + color, + brw->metaops.vbo); + + /* Ignoring texture coords. + */ + + memset(attribs, 0, BRW_ATTRIB_MAX * sizeof(*attribs)); + + attribs[BRW_ATTRIB_POS] = &pos_array; + attribs[BRW_ATTRIB_POS]->Ptr = 0; + attribs[BRW_ATTRIB_POS]->Type = GL_FLOAT; + attribs[BRW_ATTRIB_POS]->Enabled = 1; + attribs[BRW_ATTRIB_POS]->Size = 3; + attribs[BRW_ATTRIB_POS]->StrideB = 3 * sizeof(GLfloat); + attribs[BRW_ATTRIB_POS]->Stride = 3 * sizeof(GLfloat); + attribs[BRW_ATTRIB_POS]->_MaxElement = 4; + attribs[BRW_ATTRIB_POS]->Normalized = 0; + attribs[BRW_ATTRIB_POS]->BufferObj = brw->metaops.vbo; + + attribs[BRW_ATTRIB_COLOR0] = &color_array; + attribs[BRW_ATTRIB_COLOR0]->Ptr = (const GLubyte *)sizeof(pos); + attribs[BRW_ATTRIB_COLOR0]->Type = GL_UNSIGNED_BYTE; + attribs[BRW_ATTRIB_COLOR0]->Enabled = 1; + attribs[BRW_ATTRIB_COLOR0]->Size = 4; + attribs[BRW_ATTRIB_COLOR0]->StrideB = 0; + attribs[BRW_ATTRIB_COLOR0]->Stride = 0; + attribs[BRW_ATTRIB_COLOR0]->_MaxElement = 1; + attribs[BRW_ATTRIB_COLOR0]->Normalized = 1; + attribs[BRW_ATTRIB_COLOR0]->BufferObj = brw->metaops.vbo; + + /* Just ignoring texture coordinates for now. + */ + + memset(prim, 0, sizeof(*prim)); + + prim[0].mode = GL_TRIANGLE_FAN; + prim[0].begin = 1; + prim[0].end = 1; + prim[0].weak = 0; + prim[0].pad = 0; + prim[0].start = 0; + prim[0].count = 4; + + if (!brw_draw_prims(&brw->intel.ctx, + (const struct gl_client_array **)attribs, + prim, 1, + NULL, + 0, + 4, + BRW_DRAW_LOCKED )) + { + /* This should not be possible: + */ + _mesa_printf("brw_draw_prims failed in metaops!\n"); + assert(0); + } +} + + +static void install_meta_state( struct intel_context *intel ) +{ + GLcontext *ctx = &intel->ctx; + struct brw_context *brw = brw_context(ctx); + + if (!brw->metaops.vbo) { + init_metaops_state(brw); + } + + install_attribs(brw); + meta_no_texture(&brw->intel); + meta_flat_shade(&brw->intel); + brw->metaops.restore_draw_mask = ctx->DrawBuffer->_ColorDrawBufferMask[0]; + + /* This works without adjusting refcounts. Fix later? + */ + brw->metaops.saved_draw_region = brw->state.draw_region; + brw->metaops.saved_depth_region = brw->state.depth_region; + brw->metaops.active = 1; + + brw->state.dirty.brw |= BRW_NEW_METAOPS; +} + +static void leave_meta_state( struct intel_context *intel ) +{ + GLcontext *ctx = &intel->ctx; + struct brw_context *brw = brw_context(ctx); + + restore_attribs(brw); + + ctx->DrawBuffer->_ColorDrawBufferMask[0] = brw->metaops.restore_draw_mask; + + brw->state.draw_region = brw->metaops.saved_draw_region; + brw->state.depth_region = brw->metaops.saved_depth_region; + brw->metaops.saved_draw_region = NULL; + brw->metaops.saved_depth_region = NULL; + brw->metaops.active = 0; + + brw->state.dirty.mesa |= _NEW_BUFFERS; + brw->state.dirty.brw |= BRW_NEW_METAOPS; +} + + + +void brw_init_metaops( struct brw_context *brw ) +{ + init_attribs(brw); + + + brw->intel.vtbl.install_meta_state = install_meta_state; + brw->intel.vtbl.leave_meta_state = leave_meta_state; + brw->intel.vtbl.meta_no_depth_write = meta_no_depth_write; + brw->intel.vtbl.meta_no_stencil_write = meta_no_stencil_write; + brw->intel.vtbl.meta_stencil_replace = meta_stencil_replace; + brw->intel.vtbl.meta_depth_replace = meta_depth_replace; + brw->intel.vtbl.meta_color_mask = meta_color_mask; + brw->intel.vtbl.meta_no_texture = meta_no_texture; + brw->intel.vtbl.meta_draw_region = meta_draw_region; + brw->intel.vtbl.meta_draw_quad = meta_draw_quad; + +/* brw->intel.vtbl.meta_texture_blend_replace = meta_texture_blend_replace; */ +/* brw->intel.vtbl.meta_tex_rect_source = meta_tex_rect_source; */ +/* brw->intel.vtbl.meta_draw_format = set_draw_format; */ +} + +void brw_destroy_metaops( struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + + if (brw->metaops.vbo) + ctx->Driver.DeleteBuffer( ctx, brw->metaops.vbo ); + +/* ctx->Driver.DeleteProgram( ctx, brw->metaops.fp ); */ +/* ctx->Driver.DeleteProgram( ctx, brw->metaops.vp ); */ +} diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c new file mode 100644 index 00000000000..29296c17e9e --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -0,0 +1,532 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + + +#include "intel_batchbuffer.h" +#include "intel_regions.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + + + + + +/*********************************************************************** + * Blend color + */ + +static void upload_blend_constant_color(struct brw_context *brw) +{ + struct brw_blend_constant_color bcc; + + memset(&bcc, 0, sizeof(bcc)); + bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR; + bcc.header.length = sizeof(bcc)/4-2; + bcc.blend_constant_color[0] = brw->attribs.Color->BlendColor[0]; + bcc.blend_constant_color[1] = brw->attribs.Color->BlendColor[1]; + bcc.blend_constant_color[2] = brw->attribs.Color->BlendColor[2]; + bcc.blend_constant_color[3] = brw->attribs.Color->BlendColor[3]; + + BRW_CACHED_BATCH_STRUCT(brw, &bcc); +} + + +const struct brw_tracked_state brw_blend_constant_color = { + .dirty = { + .mesa = _NEW_COLOR, + .brw = 0, + .cache = 0 + }, + .update = upload_blend_constant_color +}; + +/*********************************************************************** + * Drawing rectangle -- Need for AUB file only. + */ + +static void upload_drawing_rect(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + __DRIdrawablePrivate *dPriv = intel->driDrawable; + struct brw_drawrect bdr; + int x1, y1; + int x2, y2; + + if (!brw->intel.aub_file) + return; + + /* Basically calculate a single cliprect for the whole window. + * Don't bother iterating over cliprects at the moment. + */ + + x1 = dPriv->x; + y1 = dPriv->y; + x2 = dPriv->x + dPriv->w; + y2 = dPriv->y + dPriv->h; + + if (x1 < 0) x1 = 0; + if (y1 < 0) y1 = 0; + if (x2 > intel->intelScreen->width) x2 = intel->intelScreen->width; + if (y2 > intel->intelScreen->height) y2 = intel->intelScreen->height; + + memset(&bdr, 0, sizeof(bdr)); + bdr.header.opcode = CMD_DRAW_RECT; + bdr.header.length = sizeof(bdr)/4 - 2; + bdr.xmin = x1; + bdr.ymin = y1; + bdr.xmax = x2; + bdr.ymax = y2; + bdr.xorg = dPriv->x; + bdr.yorg = dPriv->y; + + BRW_CACHED_BATCH_STRUCT(brw, &bdr); +} + +const struct brw_tracked_state brw_drawing_rect = { + .dirty = { + .mesa = _NEW_WINDOW_POS, + .brw = 0, + .cache = 0 + }, + .update = upload_drawing_rect +}; + +/*********************************************************************** + * Binding table pointers + */ + +static void upload_binding_table_pointers(struct brw_context *brw) +{ + struct brw_binding_table_pointers btp; + memset(&btp, 0, sizeof(btp)); + + /* The binding table has been emitted to the SS pool already, so we + * know what its offset is. When the batch buffer is fired, the + * binding table and surface structs will get fixed up to point to + * where the textures actually landed, but that won't change the + * value of the offsets here: + */ + btp.header.opcode = CMD_BINDING_TABLE_PTRS; + btp.header.length = sizeof(btp)/4 - 2; + btp.vs = 0; + btp.gs = 0; + btp.clp = 0; + btp.sf = 0; + btp.wm = brw->wm.bind_ss_offset; + + BRW_CACHED_BATCH_STRUCT(brw, &btp); +} + +const struct brw_tracked_state brw_binding_table_pointers = { + .dirty = { + .mesa = 0, + .brw = 0, + .cache = CACHE_NEW_SURF_BIND + }, + .update = upload_binding_table_pointers +}; + + +/*********************************************************************** + * Pipelined state pointers. This is the key state packet from which + * the hardware chases pointers to all the uploaded state in VRAM. + */ + +static void upload_pipelined_state_pointers(struct brw_context *brw ) +{ + struct brw_pipelined_state_pointers psp; + memset(&psp, 0, sizeof(psp)); + + psp.header.opcode = CMD_PIPELINED_STATE_POINTERS; + psp.header.length = sizeof(psp)/4 - 2; + + psp.vs.offset = brw->vs.state_gs_offset >> 5; + psp.sf.offset = brw->sf.state_gs_offset >> 5; + psp.wm.offset = brw->wm.state_gs_offset >> 5; + psp.cc.offset = brw->cc.state_gs_offset >> 5; + + /* GS gets turned on and off regularly. Need to re-emit URB fence + * after this occurs. + */ + if (brw->gs.prog_active) { + psp.gs.offset = brw->gs.state_gs_offset >> 5; + psp.gs.enable = 1; + } + + if (!brw->metaops.active) { + psp.clp.offset = brw->clip.state_gs_offset >> 5; + psp.clp.enable = 1; + } + + + if (BRW_CACHED_BATCH_STRUCT(brw, &psp)) + brw->state.dirty.brw |= BRW_NEW_PSP; +} + +const struct brw_tracked_state brw_pipelined_state_pointers = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_METAOPS, + .cache = (CACHE_NEW_VS_UNIT | + CACHE_NEW_GS_UNIT | + CACHE_NEW_GS_PROG | + CACHE_NEW_CLIP_UNIT | + CACHE_NEW_SF_UNIT | + CACHE_NEW_WM_UNIT | + CACHE_NEW_CC_UNIT) + }, + .update = upload_pipelined_state_pointers +}; + +static void upload_psp_urb_cbs(struct brw_context *brw ) +{ + upload_pipelined_state_pointers(brw); + brw_upload_urb_fence(brw); + brw_upload_constant_buffer_state(brw); +} + + +const struct brw_tracked_state brw_psp_urb_cbs = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_URB_FENCE | BRW_NEW_METAOPS, + .cache = (CACHE_NEW_VS_UNIT | + CACHE_NEW_GS_UNIT | + CACHE_NEW_GS_PROG | + CACHE_NEW_CLIP_UNIT | + CACHE_NEW_SF_UNIT | + CACHE_NEW_WM_UNIT | + CACHE_NEW_CC_UNIT) + }, + .update = upload_psp_urb_cbs +}; + + + + +/*********************************************************************** + * Depthbuffer - currently constant, but rotation would change that. + */ + +static void upload_depthbuffer(struct brw_context *brw) +{ + /* 0x79050003 Depth Buffer */ + struct intel_context *intel = &brw->intel; + struct intel_region *region = brw->state.depth_region; + struct brw_depthbuffer bd; + memset(&bd, 0, sizeof(bd)); + + bd.header.bits.opcode = CMD_DEPTH_BUFFER; + bd.header.bits.length = sizeof(bd)/4-2; + bd.dword1.bits.pitch = (region->pitch * region->cpp) - 1; + + switch (region->cpp) { + case 2: + bd.dword1.bits.format = BRW_DEPTHFORMAT_D16_UNORM; + break; + case 4: + if (intel->depth_buffer_is_float) + bd.dword1.bits.format = BRW_DEPTHFORMAT_D32_FLOAT; + else + bd.dword1.bits.format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; + break; + default: + assert(0); + return; + } + + bd.dword1.bits.depth_offset_disable = 0; /* coordinate offset */ + + /* The depthbuffer can only use YMAJOR tiling... This is a bit of + * a shame as it clashes with the 2d blitter which only supports + * XMAJOR tiling... + */ + bd.dword1.bits.tile_walk = BRW_TILEWALK_YMAJOR; + bd.dword1.bits.tiled_surface = intel->depth_region->tiled; + bd.dword1.bits.surface_type = BRW_SURFACE_2D; + + /* BRW_NEW_LOCK */ + bd.dword2_base_addr = bmBufferOffset(intel, region->buffer); + + bd.dword3.bits.mipmap_layout = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + bd.dword3.bits.lod = 0; + bd.dword3.bits.width = region->pitch - 1; /* XXX: width ? */ + bd.dword3.bits.height = region->height - 1; + + bd.dword4.bits.min_array_element = 0; + bd.dword4.bits.depth = 0; + + BRW_CACHED_BATCH_STRUCT(brw, &bd); +} + +const struct brw_tracked_state brw_depthbuffer = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CONTEXT | BRW_NEW_LOCK, + .cache = 0 + }, + .update = upload_depthbuffer +}; + + + +/*********************************************************************** + * Polygon stipple packet + */ + +static void upload_polygon_stipple(struct brw_context *brw) +{ + struct brw_polygon_stipple bps; + GLuint i; + + memset(&bps, 0, sizeof(bps)); + bps.header.opcode = CMD_POLY_STIPPLE_PATTERN; + bps.header.length = sizeof(bps)/4-2; + + for (i = 0; i < 32; i++) + bps.stipple[i] = brw->attribs.PolygonStipple[31 - i]; /* invert */ + + BRW_CACHED_BATCH_STRUCT(brw, &bps); +} + +const struct brw_tracked_state brw_polygon_stipple = { + .dirty = { + .mesa = _NEW_POLYGONSTIPPLE, + .brw = 0, + .cache = 0 + }, + .update = upload_polygon_stipple +}; + + +/*********************************************************************** + * Polygon stipple offset packet + */ + +static void upload_polygon_stipple_offset(struct brw_context *brw) +{ + __DRIdrawablePrivate *dPriv = brw->intel.driDrawable; + struct brw_polygon_stipple_offset bpso; + + memset(&bpso, 0, sizeof(bpso)); + bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; + bpso.header.length = sizeof(bpso)/4-2; + + bpso.bits0.x_offset = (32 - (dPriv->x & 31)) & 31; + bpso.bits0.y_offset = (32 - ((dPriv->y + dPriv->h) & 31)) & 31; + + BRW_CACHED_BATCH_STRUCT(brw, &bpso); +} + +const struct brw_tracked_state brw_polygon_stipple_offset = { + .dirty = { + .mesa = _NEW_WINDOW_POS, + .brw = 0, + .cache = 0 + }, + .update = upload_polygon_stipple_offset +}; + +/*********************************************************************** + * Line stipple packet + */ + +static void upload_line_stipple(struct brw_context *brw) +{ + struct brw_line_stipple bls; + GLfloat tmp; + GLint tmpi; + + memset(&bls, 0, sizeof(bls)); + bls.header.opcode = CMD_LINE_STIPPLE_PATTERN; + bls.header.length = sizeof(bls)/4 - 2; + + bls.bits0.pattern = brw->attribs.Line->StipplePattern; + bls.bits1.repeat_count = brw->attribs.Line->StippleFactor; + + tmp = 1.0 / (GLfloat) brw->attribs.Line->StippleFactor; + tmpi = tmp * (1<<13); + + + bls.bits1.inverse_repeat_count = tmpi; + + BRW_CACHED_BATCH_STRUCT(brw, &bls); +} + +const struct brw_tracked_state brw_line_stipple = { + .dirty = { + .mesa = _NEW_LINE, + .brw = 0, + .cache = 0 + }, + .update = upload_line_stipple +}; + + + +/*********************************************************************** + * Misc constant state packets + */ + +static void upload_pipe_control(struct brw_context *brw) +{ + struct brw_pipe_control pc; + + return; + + memset(&pc, 0, sizeof(pc)); + + pc.header.opcode = CMD_PIPE_CONTROL; + pc.header.length = sizeof(pc)/4 - 2; + pc.header.post_sync_operation = PIPE_CONTROL_NOWRITE; + + pc.header.instruction_state_cache_flush_enable = 1; + + pc.bits1.dest_addr_type = PIPE_CONTROL_GTTWRITE_GLOBAL; + + BRW_BATCH_STRUCT(brw, &pc); +} + +const struct brw_tracked_state brw_pipe_control = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CONTEXT, + .cache = 0 + }, + .update = upload_pipe_control +}; + + +/*********************************************************************** + * Misc invarient state packets + */ + +static void upload_invarient_state( struct brw_context *brw ) +{ + { + /* 0x61040000 Pipeline Select */ + /* PipelineSelect : 0 */ + struct brw_pipeline_select ps; + + memset(&ps, 0, sizeof(ps)); + ps.header.opcode = CMD_PIPELINE_SELECT; + ps.header.pipeline_select = 0; + BRW_BATCH_STRUCT(brw, &ps); + } + + { + struct brw_global_depth_offset_clamp gdo; + memset(&gdo, 0, sizeof(gdo)); + + /* Disable depth offset clamping. + */ + gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP; + gdo.header.length = sizeof(gdo)/4 - 2; + gdo.depth_offset_clamp = 0.0; + + BRW_BATCH_STRUCT(brw, &gdo); + } + + + /* 0x61020000 State Instruction Pointer */ + { + struct brw_system_instruction_pointer sip; + memset(&sip, 0, sizeof(sip)); + + sip.header.opcode = CMD_STATE_INSN_POINTER; + sip.header.length = 0; + sip.bits0.pad = 0; + sip.bits0.system_instruction_pointer = 0; + BRW_BATCH_STRUCT(brw, &sip); + } + + + { + struct brw_vf_statistics vfs; + memset(&vfs, 0, sizeof(vfs)); + + vfs.opcode = CMD_VF_STATISTICS; + if (INTEL_DEBUG & DEBUG_STATS) + vfs.statistics_enable = 1; + + BRW_BATCH_STRUCT(brw, &vfs); + } +} + +const struct brw_tracked_state brw_invarient_state = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CONTEXT, + .cache = 0 + }, + .update = upload_invarient_state +}; + + +/* State pool addresses: + */ +static void upload_state_base_address( struct brw_context *brw ) +{ + struct intel_context *intel = &brw->intel; + struct brw_state_base_address sba; + + memset(&sba, 0, sizeof(sba)); + + sba.header.opcode = CMD_STATE_BASE_ADDRESS; + sba.header.length = 0x4; + + /* BRW_NEW_LOCK */ + sba.bits0.general_state_address = bmBufferOffset(intel, brw->pool[BRW_GS_POOL].buffer) >> 5; + sba.bits0.modify_enable = 1; + + /* BRW_NEW_LOCK */ + sba.bits1.surface_state_address = bmBufferOffset(intel, brw->pool[BRW_SS_POOL].buffer) >> 5; + sba.bits1.modify_enable = 1; + + sba.bits2.modify_enable = 1; + sba.bits3.modify_enable = 1; + sba.bits4.modify_enable = 1; + + BRW_CACHED_BATCH_STRUCT(brw, &sba); +} + + +const struct brw_tracked_state brw_state_base_address = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CONTEXT | BRW_NEW_LOCK, + .cache = 0 + }, + .update = upload_state_base_address +}; diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c new file mode 100644 index 00000000000..1ae065b10dd --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -0,0 +1,147 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_context.h" +#include "brw_aub.h" +#include "brw_util.h" +#include "program.h" +#include "imports.h" +#include "enums.h" +#include "tnl/tnl.h" + + +static void brwBindProgram( GLcontext *ctx, + GLenum target, + struct gl_program *prog ) +{ + struct brw_context *brw = brw_context(ctx); + + switch (target) { + case GL_VERTEX_PROGRAM_ARB: + brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; + break; + case GL_FRAGMENT_PROGRAM_ARB: + brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; + break; + } +} + +static struct gl_program *brwNewProgram( GLcontext *ctx, + GLenum target, + GLuint id ) +{ + struct brw_context *brw = brw_context(ctx); + + switch (target) { + case GL_VERTEX_PROGRAM_ARB: { + struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program); + if (prog) { + prog->id = brw->program_id++; + + return _mesa_init_vertex_program( ctx, &prog->program, + target, id ); + } + else + return NULL; + } + + case GL_FRAGMENT_PROGRAM_ARB: { + struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program); + if (prog) { + prog->id = brw->program_id++; + + return _mesa_init_fragment_program( ctx, &prog->program, + target, id ); + } + else + return NULL; + } + + default: + return _mesa_new_program(ctx, target, id); + } +} + +static void brwDeleteProgram( GLcontext *ctx, + struct gl_program *prog ) +{ + + _mesa_delete_program( ctx, prog ); +} + + +static GLboolean brwIsProgramNative( GLcontext *ctx, + GLenum target, + struct gl_program *prog ) +{ + return GL_TRUE; +} + +static void brwProgramStringNotify( GLcontext *ctx, + GLenum target, + struct gl_program *prog ) +{ + if (target == GL_FRAGMENT_PROGRAM_ARB) { + struct brw_context *brw = brw_context(ctx); + struct brw_fragment_program *p = (struct brw_fragment_program *)prog; + struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; + if (p == fp) + brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; + p->id = brw->program_id++; + p->param_state = brw_parameter_list_state_flags(p->program.Base.Parameters); + } + else if (target == GL_VERTEX_PROGRAM_ARB) { + struct brw_context *brw = brw_context(ctx); + struct brw_vertex_program *p = (struct brw_vertex_program *)prog; + struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; + if (p == vp) + brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; + p->id = brw->program_id++; + p->param_state = brw_parameter_list_state_flags(p->program.Base.Parameters); + + /* Also tell tnl about it: + */ + _tnl_program_string(ctx, target, prog); + } +} + +void brwInitFragProgFuncs( struct dd_function_table *functions ) +{ + assert(functions->ProgramStringNotify == _tnl_program_string); + + functions->BindProgram = brwBindProgram; + functions->NewProgram = brwNewProgram; + functions->DeleteProgram = brwDeleteProgram; + functions->IsProgramNative = brwIsProgramNative; + functions->ProgramStringNotify = brwProgramStringNotify; +} + diff --git a/src/mesa/drivers/dri/i965/brw_save.c b/src/mesa/drivers/dri/i965/brw_save.c new file mode 100644 index 00000000000..1af7791c4d4 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_save.c @@ -0,0 +1,126 @@ +/* + * Mesa 3-D graphics library + * Version: 6.3 + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "mtypes.h" +#include "api_arrayelt.h" +#include "dlist.h" +#include "vtxfmt.h" +#include "imports.h" + +#include "brw_save.h" + + + +void brw_save_init( GLcontext *ctx ) +{ + struct brw_save_context *save = CALLOC_STRUCT(brw_save_context); + + if (ctx->swtnl_im == NULL) { + ctx->swtnl_im = CALLOC_STRUCT(brw_exec_save); + } + + save->ctx = ctx; + IMM_CONTEXT(ctx)->save = save; + + /* Initialize the arrayelt helper + */ + if (!ctx->aelt_context && + !_ae_create_context( ctx )) + return; + + brw_save_api_init( save ); + brw_save_wakeup(ctx); + + ctx->Driver.CurrentSavePrimitive = PRIM_UNKNOWN; +} + + +void brw_save_destroy( GLcontext *ctx ) +{ + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; + if (save) { + FREE(save); + IMM_CONTEXT(ctx)->save = NULL; + } + + if (ctx->aelt_context) { + _ae_destroy_context( ctx ); + ctx->aelt_context = NULL; + } + + if (IMM_CONTEXT(ctx)->exec == NULL && + IMM_CONTEXT(ctx)->save == NULL) { + FREE(IMM_CONTEXT(ctx)); + ctx->swtnl_im = NULL; + } +} + + +void brw_save_invalidate_state( GLcontext *ctx, GLuint new_state ) +{ + _ae_invalidate_state(ctx, new_state); +} + + +/* Note that this can occur during the playback of a display list: + */ +void brw_save_fallback( GLcontext *ctx, GLboolean fallback ) +{ + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; + + if (fallback) + save->replay_flags |= BRW_SAVE_FALLBACK; + else + save->replay_flags &= ~BRW_SAVE_FALLBACK; +} + + +/* I don't see any reason to swap this code out on fallbacks. It + * wouldn't really mean anything to do so anyway as the old lists are + * still around from pre-fallback. Instead, the above code ensures + * that vertices are routed back through immediate mode dispatch on + * fallback. + * + * The below can be moved into init or removed: + */ +void brw_save_wakeup( GLcontext *ctx ) +{ + ctx->Driver.NewList = brw_save_NewList; + ctx->Driver.EndList = brw_save_EndList; + ctx->Driver.SaveFlushVertices = brw_save_SaveFlushVertices; + ctx->Driver.BeginCallList = brw_save_BeginCallList; + ctx->Driver.EndCallList = brw_save_EndCallList; + ctx->Driver.NotifySaveBegin = brw_save_NotifyBegin; + + /* Assume we haven't been getting state updates either: + */ + brw_save_invalidate_state( ctx, ~0 ); +} + + + diff --git a/src/mesa/drivers/dri/i965/brw_save.h b/src/mesa/drivers/dri/i965/brw_save.h new file mode 100644 index 00000000000..41cabe65084 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_save.h @@ -0,0 +1,171 @@ +/************************************************************************** + +Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <[email protected]> + * + */ + +#ifndef BRW_SAVE_H +#define BRW_SAVE_H + +#include "mtypes.h" +#include "brw_attrib.h" +#include "brw_draw.h" + + +struct brw_save_copied_vtx { + GLfloat buffer[BRW_ATTRIB_MAX * 4 * BRW_MAX_COPIED_VERTS]; + GLuint nr; +}; + + +/* For display lists, this structure holds a run of vertices of the + * same format, and a strictly well-formed set of begin/end pairs, + * starting on the first vertex and ending at the last. Vertex + * copying on buffer breaks is precomputed according to these + * primitives, though there are situations where the copying will need + * correction at execute-time, perhaps by replaying the list as + * immediate mode commands. + * + * On executing this list, the 'current' values may be updated with + * the values of the final vertex, and often no fixup of the start of + * the vertex list is required. + * + * Eval and other commands that don't fit into these vertex lists are + * compiled using the fallback opcode mechanism provided by dlist.c. + */ +struct brw_save_vertex_list { + GLubyte attrsz[BRW_ATTRIB_MAX]; + GLuint vertex_size; + + GLuint buffer_offset; + GLuint count; + GLuint wrap_count; /* number of copied vertices at start */ + GLboolean dangling_attr_ref; /* current attr implicitly referenced + outside the list */ + + struct brw_draw_prim *prim; + GLuint prim_count; + + struct brw_save_vertex_store *vertex_store; + struct brw_save_primitive_store *prim_store; +}; + +/* These buffers should be a reasonable size to support upload to + * hardware. Current brw implementation will re-upload on any + * changes, so don't make too big or apps which dynamically create + * dlists and use only a few times will suffer. + * + * Consider stategy of uploading regions from the VBO on demand in the + * case of dynamic vbos. Then make the dlist code signal that + * likelyhood as it occurs. No reason we couldn't change usage + * internally even though this probably isn't allowed for client VBOs? + */ +#define BRW_SAVE_BUFFER_SIZE (8*1024) /* dwords */ +#define BRW_SAVE_PRIM_SIZE 128 +#define BRW_SAVE_PRIM_WEAK 0x40 + +#define BRW_SAVE_FALLBACK 0x10000000 + +/* Storage to be shared among several vertex_lists. + */ +struct brw_save_vertex_store { + struct gl_buffer_object *bufferobj; + GLfloat *buffer; + GLuint used; + GLuint refcount; +}; + +struct brw_save_primitive_store { + struct brw_draw_prim buffer[BRW_SAVE_PRIM_SIZE]; + GLuint used; + GLuint refcount; +}; + + +struct brw_save_context { + GLcontext *ctx; + GLvertexformat vtxfmt; + struct gl_client_array arrays[BRW_ATTRIB_MAX]; + const struct gl_client_array *inputs[BRW_ATTRIB_MAX]; + + GLubyte attrsz[BRW_ATTRIB_MAX]; + GLubyte active_sz[BRW_ATTRIB_MAX]; + GLuint vertex_size; + + GLfloat *buffer; + GLuint count; + GLuint wrap_count; + GLuint replay_flags; + + struct brw_draw_prim *prim; + GLuint prim_count, prim_max; + + struct brw_save_vertex_store *vertex_store; + struct brw_save_primitive_store *prim_store; + + GLfloat *vbptr; /* cursor, points into buffer */ + GLfloat vertex[BRW_ATTRIB_MAX*4]; /* current values */ + GLfloat *attrptr[BRW_ATTRIB_MAX]; + GLuint vert_count; + GLuint max_vert; + GLboolean dangling_attr_ref; + GLboolean have_materials; + + GLuint opcode_vertex_list; + + struct brw_save_copied_vtx copied; + + GLfloat CurrentFloatEdgeFlag; + + GLfloat *current[BRW_ATTRIB_MAX]; /* points into ctx->ListState */ + GLubyte *currentsz[BRW_ATTRIB_MAX]; +}; + + +void brw_save_init( GLcontext *ctx ); +void brw_save_destroy( GLcontext *ctx ); +void brw_save_wakeup( GLcontext *ctx ); +void brw_save_invalidate_state( GLcontext *ctx, GLuint new_state ); +void brw_save_fallback( GLcontext *ctx, GLboolean fallback ); + +/* Callbacks: + */ +void brw_save_EndList( GLcontext *ctx ); +void brw_save_NewList( GLcontext *ctx, GLuint list, GLenum mode ); +void brw_save_EndCallList( GLcontext *ctx ); +void brw_save_BeginCallList( GLcontext *ctx, struct mesa_display_list *list ); +void brw_save_SaveFlushVertices( GLcontext *ctx ); +GLboolean brw_save_NotifyBegin( GLcontext *ctx, GLenum mode ); + +void brw_save_playback_vertex_list( GLcontext *ctx, void *data ); + +void brw_save_api_init( struct brw_save_context *save ); + +#endif diff --git a/src/mesa/drivers/dri/i965/brw_save_api.c b/src/mesa/drivers/dri/i965/brw_save_api.c new file mode 100644 index 00000000000..9c0e4af48a5 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_save_api.c @@ -0,0 +1,1162 @@ +/************************************************************************** + +Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <[email protected]> + */ + + + +/* Display list compiler attempts to store lists of vertices with the + * same vertex layout. Additionally it attempts to minimize the need + * for execute-time fixup of these vertex lists, allowing them to be + * cached on hardware. + * + * There are still some circumstances where this can be thwarted, for + * example by building a list that consists of one very long primitive + * (eg Begin(Triangles), 1000 vertices, End), and calling that list + * from inside a different begin/end object (Begin(Lines), CallList, + * End). + * + * In that case the code will have to replay the list as individual + * commands through the Exec dispatch table, or fix up the copied + * vertices at execute-time. + * + * The other case where fixup is required is when a vertex attribute + * is introduced in the middle of a primitive. Eg: + * Begin(Lines) + * TexCoord1f() Vertex2f() + * TexCoord1f() Color3f() Vertex2f() + * End() + * + * If the current value of Color isn't known at compile-time, this + * primitive will require fixup. + * + * + * The list compiler currently doesn't attempt to compile lists + * containing EvalCoord or EvalPoint commands. On encountering one of + * these, compilation falls back to opcodes. + * + * This could be improved to fallback only when a mix of EvalCoord and + * Vertex commands are issued within a single primitive. + */ + + +#include "glheader.h" +#include "context.h" +#include "dlist.h" +#include "enums.h" +#include "macros.h" +#include "api_validate.h" +#include "api_arrayelt.h" +#include "vtxfmt.h" +#include "dispatch.h" + +#include "brw_save.h" +#include "brw_fallback.h" + + + + +/* + * NOTE: Old 'parity' issue is gone, but copying can still be + * wrong-footed on replay. + */ +static GLuint _save_copy_vertices( GLcontext *ctx, + const struct brw_save_vertex_list *node, + const GLfloat *src_buffer) +{ + struct brw_save_context *save = IMM_CONTEXT( ctx )->save; + const struct brw_draw_prim *prim = &node->prim[node->prim_count-1]; + GLuint nr = prim->count; + GLuint sz = save->vertex_size; + const GLfloat *src = src_buffer + prim->start * sz; + GLfloat *dst = save->copied.buffer; + GLuint ovf, i; + + if (prim->end) + return 0; + + switch( prim->mode ) + { + case GL_POINTS: + return 0; + case GL_LINES: + ovf = nr&1; + for (i = 0 ; i < ovf ; i++) + _mesa_memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz*sizeof(GLfloat) ); + return i; + case GL_TRIANGLES: + ovf = nr%3; + for (i = 0 ; i < ovf ; i++) + _mesa_memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz*sizeof(GLfloat) ); + return i; + case GL_QUADS: + ovf = nr&3; + for (i = 0 ; i < ovf ; i++) + _mesa_memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz*sizeof(GLfloat) ); + return i; + case GL_LINE_STRIP: + if (nr == 0) + return 0; + else { + _mesa_memcpy( dst, src+(nr-1)*sz, sz*sizeof(GLfloat) ); + return 1; + } + case GL_LINE_LOOP: + case GL_TRIANGLE_FAN: + case GL_POLYGON: + if (nr == 0) + return 0; + else if (nr == 1) { + _mesa_memcpy( dst, src+0, sz*sizeof(GLfloat) ); + return 1; + } else { + _mesa_memcpy( dst, src+0, sz*sizeof(GLfloat) ); + _mesa_memcpy( dst+sz, src+(nr-1)*sz, sz*sizeof(GLfloat) ); + return 2; + } + case GL_TRIANGLE_STRIP: + case GL_QUAD_STRIP: + switch (nr) { + case 0: ovf = 0; break; + case 1: ovf = 1; break; + default: ovf = 2 + (nr&1); break; + } + for (i = 0 ; i < ovf ; i++) + _mesa_memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz*sizeof(GLfloat) ); + return i; + default: + assert(0); + return 0; + } +} + + +static struct brw_save_vertex_store *alloc_vertex_store( GLcontext *ctx ) +{ + struct brw_save_vertex_store *vertex_store = CALLOC_STRUCT(brw_save_vertex_store); + + /* obj->Name needs to be non-zero, but won't ever be examined more + * closely than that. In particular these buffers won't be entered + * into the hash and can never be confused with ones visible to the + * user. Perhaps there could be a special number for internal + * buffers: + */ + vertex_store->bufferobj = ctx->Driver.NewBufferObject(ctx, 1, GL_ARRAY_BUFFER_ARB); + + ctx->Driver.BufferData( ctx, + GL_ARRAY_BUFFER_ARB, + BRW_SAVE_BUFFER_SIZE * sizeof(GLfloat), + NULL, + GL_STATIC_DRAW_ARB, + vertex_store->bufferobj); + + vertex_store->buffer = NULL; + vertex_store->used = 0; + vertex_store->refcount = 1; + + return vertex_store; +} + +static void free_vertex_store( GLcontext *ctx, struct brw_save_vertex_store *vertex_store ) +{ + assert(!vertex_store->buffer); + + if (vertex_store->bufferobj) + ctx->Driver.DeleteBuffer( ctx, vertex_store->bufferobj ); + + FREE( vertex_store ); +} + +static GLfloat *map_vertex_store( GLcontext *ctx, struct brw_save_vertex_store *vertex_store ) +{ + assert(vertex_store->bufferobj); + assert(!vertex_store->buffer); + vertex_store->buffer = (GLfloat *)ctx->Driver.MapBuffer(ctx, + GL_ARRAY_BUFFER_ARB, /* not used */ + GL_STATIC_DRAW_ARB, /* not used */ + vertex_store->bufferobj); + + assert(vertex_store->buffer); + return vertex_store->buffer + vertex_store->used; +} + +static void unmap_vertex_store( GLcontext *ctx, struct brw_save_vertex_store *vertex_store ) +{ + ctx->Driver.UnmapBuffer( ctx, GL_ARRAY_BUFFER_ARB, vertex_store->bufferobj ); + vertex_store->buffer = NULL; +} + + +static struct brw_save_primitive_store *alloc_prim_store( GLcontext *ctx ) +{ + struct brw_save_primitive_store *store = CALLOC_STRUCT(brw_save_primitive_store); + (void) ctx; + store->used = 0; + store->refcount = 1; + return store; +} + +static void _save_reset_counters( GLcontext *ctx ) +{ + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; + + save->prim = save->prim_store->buffer + save->prim_store->used; + save->buffer = (save->vertex_store->buffer + + save->vertex_store->used); + + assert(save->buffer == save->vbptr); + + if (save->vertex_size) + save->max_vert = ((BRW_SAVE_BUFFER_SIZE - save->vertex_store->used) / + save->vertex_size); + else + save->max_vert = 0; + + save->vert_count = 0; + save->prim_count = 0; + save->prim_max = BRW_SAVE_PRIM_SIZE - save->prim_store->used; + save->dangling_attr_ref = 0; +} + + +/* Insert the active immediate struct onto the display list currently + * being built. + */ +static void _save_compile_vertex_list( GLcontext *ctx ) +{ + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; + struct brw_save_vertex_list *node; + + /* Allocate space for this structure in the display list currently + * being compiled. + */ + node = (struct brw_save_vertex_list *) + _mesa_alloc_instruction(ctx, save->opcode_vertex_list, sizeof(*node)); + + if (!node) + return; + + /* Duplicate our template, increment refcounts to the storage structs: + */ + _mesa_memcpy(node->attrsz, save->attrsz, sizeof(node->attrsz)); + node->vertex_size = save->vertex_size; + node->buffer_offset = (save->buffer - save->vertex_store->buffer) * sizeof(GLfloat); + node->count = save->vert_count; + node->wrap_count = save->copied.nr; + node->dangling_attr_ref = save->dangling_attr_ref; + node->prim = save->prim; + node->prim_count = save->prim_count; + node->vertex_store = save->vertex_store; + node->prim_store = save->prim_store; + + node->vertex_store->refcount++; + node->prim_store->refcount++; + + assert(node->attrsz[BRW_ATTRIB_POS] != 0 || + node->count == 0); + + if (save->dangling_attr_ref) + ctx->ListState.CurrentList->flags |= MESA_DLIST_DANGLING_REFS; + + save->vertex_store->used += save->vertex_size * node->count; + save->prim_store->used += node->prim_count; + + + /* Copy duplicated vertices + */ + save->copied.nr = _save_copy_vertices( ctx, node, save->buffer ); + + + /* Deal with GL_COMPILE_AND_EXECUTE: + */ + if (ctx->ExecuteFlag) { + struct _glapi_table *dispatch = GET_DISPATCH(); + + _glapi_set_dispatch(ctx->Exec); + + brw_loopback_vertex_list( ctx, + (const GLfloat *)((const char *)save->vertex_store->buffer + + node->buffer_offset), + node->attrsz, + node->prim, + node->prim_count, + node->wrap_count, + node->vertex_size); + + _glapi_set_dispatch(dispatch); + } + + + /* Decide whether the storage structs are full, or can be used for + * the next vertex lists as well. + */ + if (save->vertex_store->used > + BRW_SAVE_BUFFER_SIZE - 16 * (save->vertex_size + 4)) { + + /* Unmap old store: + */ + unmap_vertex_store( ctx, save->vertex_store ); + + /* Release old reference: + */ + save->vertex_store->refcount--; + assert(save->vertex_store->refcount != 0); + save->vertex_store = NULL; + + /* Allocate and map new store: + */ + save->vertex_store = alloc_vertex_store( ctx ); + save->vbptr = map_vertex_store( ctx, save->vertex_store ); + } + + if (save->prim_store->used > BRW_SAVE_PRIM_SIZE - 6) { + save->prim_store->refcount--; + assert(save->prim_store->refcount != 0); + save->prim_store = alloc_prim_store( ctx ); + } + + /* Reset our structures for the next run of vertices: + */ + _save_reset_counters( ctx ); +} + + +/* TODO -- If no new vertices have been stored, don't bother saving + * it. + */ +static void _save_wrap_buffers( GLcontext *ctx ) +{ + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; + GLint i = save->prim_count - 1; + GLenum mode; + GLboolean weak; + + assert(i < (GLint) save->prim_max); + assert(i >= 0); + + /* Close off in-progress primitive. + */ + save->prim[i].count = (save->vert_count - + save->prim[i].start); + mode = save->prim[i].mode; + weak = save->prim[i].weak; + + /* store the copied vertices, and allocate a new list. + */ + _save_compile_vertex_list( ctx ); + + /* Restart interrupted primitive + */ + save->prim[0].mode = mode; + save->prim[0].weak = weak; + save->prim[0].begin = 0; + save->prim[0].end = 0; + save->prim[0].pad = 0; + save->prim[0].start = 0; + save->prim[0].count = 0; + save->prim_count = 1; +} + + + +/* Called only when buffers are wrapped as the result of filling the + * vertex_store struct. + */ +static void _save_wrap_filled_vertex( GLcontext *ctx ) +{ + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; + GLfloat *data = save->copied.buffer; + GLuint i; + + /* Emit a glEnd to close off the last vertex list. + */ + _save_wrap_buffers( ctx ); + + /* Copy stored stored vertices to start of new list. + */ + assert(save->max_vert - save->vert_count > save->copied.nr); + + for (i = 0 ; i < save->copied.nr ; i++) { + _mesa_memcpy( save->vbptr, data, save->vertex_size * sizeof(GLfloat)); + data += save->vertex_size; + save->vbptr += save->vertex_size; + save->vert_count++; + } +} + + +static void _save_copy_to_current( GLcontext *ctx ) +{ + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; + GLuint i; + + for (i = BRW_ATTRIB_POS+1 ; i <= BRW_ATTRIB_INDEX ; i++) { + if (save->attrsz[i]) { + save->currentsz[i][0] = save->attrsz[i]; + COPY_CLEAN_4V(save->current[i], + save->attrsz[i], + save->attrptr[i]); + } + } + + /* Edgeflag requires special treatment: + * + * TODO: change edgeflag to GLfloat in Mesa. + */ + if (save->attrsz[BRW_ATTRIB_EDGEFLAG]) { + ctx->ListState.ActiveEdgeFlag = 1; + save->CurrentFloatEdgeFlag = + save->attrptr[BRW_ATTRIB_EDGEFLAG][0]; + ctx->ListState.CurrentEdgeFlag = + (save->CurrentFloatEdgeFlag == 1.0); + } +} + + +static void _save_copy_from_current( GLcontext *ctx ) +{ + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; + GLint i; + + for (i = BRW_ATTRIB_POS+1 ; i <= BRW_ATTRIB_INDEX ; i++) + switch (save->attrsz[i]) { + case 4: save->attrptr[i][3] = save->current[i][3]; + case 3: save->attrptr[i][2] = save->current[i][2]; + case 2: save->attrptr[i][1] = save->current[i][1]; + case 1: save->attrptr[i][0] = save->current[i][0]; + case 0: break; + } + + /* Edgeflag requires special treatment: + */ + if (save->attrsz[BRW_ATTRIB_EDGEFLAG]) { + save->CurrentFloatEdgeFlag = (GLfloat)ctx->ListState.CurrentEdgeFlag; + save->attrptr[BRW_ATTRIB_EDGEFLAG][0] = save->CurrentFloatEdgeFlag; + } +} + + + + +/* Flush existing data, set new attrib size, replay copied vertices. + */ +static void _save_upgrade_vertex( GLcontext *ctx, + GLuint attr, + GLuint newsz ) +{ + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; + GLuint oldsz; + GLuint i; + GLfloat *tmp; + + /* Store the current run of vertices, and emit a GL_END. Emit a + * BEGIN in the new buffer. + */ + if (save->vert_count) + _save_wrap_buffers( ctx ); + else + assert( save->copied.nr == 0 ); + + /* Do a COPY_TO_CURRENT to ensure back-copying works for the case + * when the attribute already exists in the vertex and is having + * its size increased. + */ + _save_copy_to_current( ctx ); + + /* Fix up sizes: + */ + oldsz = save->attrsz[attr]; + save->attrsz[attr] = newsz; + + save->vertex_size += newsz - oldsz; + save->max_vert = ((BRW_SAVE_BUFFER_SIZE - save->vertex_store->used) / + save->vertex_size); + save->vert_count = 0; + + /* Recalculate all the attrptr[] values: + */ + for (i = 0, tmp = save->vertex ; i < BRW_ATTRIB_MAX ; i++) { + if (save->attrsz[i]) { + save->attrptr[i] = tmp; + tmp += save->attrsz[i]; + } + else + save->attrptr[i] = NULL; /* will not be dereferenced. */ + } + + /* Copy from current to repopulate the vertex with correct values. + */ + _save_copy_from_current( ctx ); + + /* Replay stored vertices to translate them to new format here. + * + * If there are copied vertices and the new (upgraded) attribute + * has not been defined before, this list is somewhat degenerate, + * and will need fixup at runtime. + */ + if (save->copied.nr) + { + GLfloat *data = save->copied.buffer; + GLfloat *dest = save->buffer; + GLuint j; + + /* Need to note this and fix up at runtime (or loopback): + */ + if (save->currentsz[attr][0] == 0) { + assert(oldsz == 0); + save->dangling_attr_ref = GL_TRUE; + } + + for (i = 0 ; i < save->copied.nr ; i++) { + for (j = 0 ; j < BRW_ATTRIB_MAX ; j++) { + if (save->attrsz[j]) { + if (j == attr) { + if (oldsz) { + COPY_CLEAN_4V( dest, oldsz, data ); + data += oldsz; + dest += newsz; + } + else { + COPY_SZ_4V( dest, newsz, save->current[attr] ); + dest += newsz; + } + } + else { + GLint sz = save->attrsz[j]; + COPY_SZ_4V( dest, sz, data ); + data += sz; + dest += sz; + } + } + } + } + + save->vbptr = dest; + save->vert_count += save->copied.nr; + } +} + +static void save_fixup_vertex( GLcontext *ctx, GLuint attr, GLuint sz ) +{ + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; + + if (sz > save->attrsz[attr]) { + /* New size is larger. Need to flush existing vertices and get + * an enlarged vertex format. + */ + _save_upgrade_vertex( ctx, attr, sz ); + } + else if (sz < save->active_sz[attr]) { + static GLfloat id[4] = { 0, 0, 0, 1 }; + GLuint i; + + /* New size is equal or smaller - just need to fill in some + * zeros. + */ + for (i = sz ; i <= save->attrsz[attr] ; i++) + save->attrptr[attr][i-1] = id[i-1]; + } + + save->active_sz[attr] = sz; +} + +static void _save_reset_vertex( GLcontext *ctx ) +{ + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; + GLuint i; + + for (i = 0 ; i < BRW_ATTRIB_MAX ; i++) { + save->attrsz[i] = 0; + save->active_sz[i] = 0; + } + + save->vertex_size = 0; +} + + + +#define ERROR() _mesa_compile_error( ctx, GL_INVALID_ENUM, __FUNCTION__ ); + + +/* Only one size for each attribute may be active at once. Eg. if + * Color3f is installed/active, then Color4f may not be, even if the + * vertex actually contains 4 color coordinates. This is because the + * 3f version won't otherwise set color[3] to 1.0 -- this is the job + * of the chooser function when switching between Color4f and Color3f. + */ +#define ATTR( A, N, V0, V1, V2, V3 ) \ +do { \ + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; \ + \ + if (save->active_sz[A] != N) \ + save_fixup_vertex(ctx, A, N); \ + \ + { \ + GLfloat *dest = save->attrptr[A]; \ + if (N>0) dest[0] = V0; \ + if (N>1) dest[1] = V1; \ + if (N>2) dest[2] = V2; \ + if (N>3) dest[3] = V3; \ + } \ + \ + if ((A) == 0) { \ + GLuint i; \ + \ + for (i = 0; i < save->vertex_size; i++) \ + save->vbptr[i] = save->vertex[i]; \ + \ + save->vbptr += save->vertex_size; \ + \ + if (++save->vert_count >= save->max_vert) \ + _save_wrap_filled_vertex( ctx ); \ + } \ +} while (0) + +#define TAG(x) _save_##x + +#include "brw_attrib_tmp.h" + + + + +/* Cope with EvalCoord/CallList called within a begin/end object: + * -- Flush current buffer + * -- Fallback to opcodes for the rest of the begin/end object. + */ +#define DO_FALLBACK(ctx) \ +do { \ + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; \ + \ + if (save->vert_count || save->prim_count) \ + _save_compile_vertex_list( ctx ); \ + \ + _save_copy_to_current( ctx ); \ + _save_reset_vertex( ctx ); \ + _save_reset_counters( ctx ); \ + _mesa_install_save_vtxfmt( ctx, &ctx->ListState.ListVtxfmt ); \ + ctx->Driver.SaveNeedFlush = 0; \ +} while (0) + +static void GLAPIENTRY _save_EvalCoord1f( GLfloat u ) +{ + GET_CURRENT_CONTEXT(ctx); + DO_FALLBACK(ctx); + ctx->Save->EvalCoord1f( u ); +} + +static void GLAPIENTRY _save_EvalCoord1fv( const GLfloat *v ) +{ + GET_CURRENT_CONTEXT(ctx); + DO_FALLBACK(ctx); + ctx->Save->EvalCoord1fv( v ); +} + +static void GLAPIENTRY _save_EvalCoord2f( GLfloat u, GLfloat v ) +{ + GET_CURRENT_CONTEXT(ctx); + DO_FALLBACK(ctx); + ctx->Save->EvalCoord2f( u, v ); +} + +static void GLAPIENTRY _save_EvalCoord2fv( const GLfloat *v ) +{ + GET_CURRENT_CONTEXT(ctx); + DO_FALLBACK(ctx); + ctx->Save->EvalCoord2fv( v ); +} + +static void GLAPIENTRY _save_EvalPoint1( GLint i ) +{ + GET_CURRENT_CONTEXT(ctx); + DO_FALLBACK(ctx); + ctx->Save->EvalPoint1( i ); +} + +static void GLAPIENTRY _save_EvalPoint2( GLint i, GLint j ) +{ + GET_CURRENT_CONTEXT(ctx); + DO_FALLBACK(ctx); + ctx->Save->EvalPoint2( i, j ); +} + +static void GLAPIENTRY _save_CallList( GLuint l ) +{ + GET_CURRENT_CONTEXT(ctx); + DO_FALLBACK(ctx); + ctx->Save->CallList( l ); +} + +static void GLAPIENTRY _save_CallLists( GLsizei n, GLenum type, const GLvoid *v ) +{ + GET_CURRENT_CONTEXT(ctx); + DO_FALLBACK(ctx); + ctx->Save->CallLists( n, type, v ); +} + + + + +/* This begin is hooked into ... Updating of + * ctx->Driver.CurrentSavePrimitive is already taken care of. + */ +GLboolean brw_save_NotifyBegin( GLcontext *ctx, GLenum mode ) +{ + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; + + GLuint i = save->prim_count++; + + assert(i < save->prim_max); + save->prim[i].mode = mode & ~BRW_SAVE_PRIM_WEAK; + save->prim[i].begin = 1; + save->prim[i].end = 0; + save->prim[i].weak = (mode & BRW_SAVE_PRIM_WEAK) ? 1 : 0; + save->prim[i].pad = 0; + save->prim[i].start = save->vert_count; + save->prim[i].count = 0; + + _mesa_install_save_vtxfmt( ctx, &save->vtxfmt ); + ctx->Driver.SaveNeedFlush = 1; + return GL_TRUE; +} + + + +static void GLAPIENTRY _save_End( void ) +{ + GET_CURRENT_CONTEXT( ctx ); + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; + GLint i = save->prim_count - 1; + + ctx->Driver.CurrentSavePrimitive = PRIM_OUTSIDE_BEGIN_END; + save->prim[i].end = 1; + save->prim[i].count = (save->vert_count - + save->prim[i].start); + + if (i == (GLint) save->prim_max - 1) { + _save_compile_vertex_list( ctx ); + assert(save->copied.nr == 0); + } + + /* Swap out this vertex format while outside begin/end. Any color, + * etc. received between here and the next begin will be compiled + * as opcodes. + */ + _mesa_install_save_vtxfmt( ctx, &ctx->ListState.ListVtxfmt ); +} + + +/* These are all errors as this vtxfmt is only installed inside + * begin/end pairs. + */ +static void GLAPIENTRY _save_DrawElements(GLenum mode, GLsizei count, GLenum type, + const GLvoid *indices) +{ + GET_CURRENT_CONTEXT(ctx); + (void) mode; (void) count; (void) type; (void) indices; + _mesa_compile_error( ctx, GL_INVALID_OPERATION, "glDrawElements" ); +} + + +static void GLAPIENTRY _save_DrawRangeElements(GLenum mode, + GLuint start, GLuint end, + GLsizei count, GLenum type, + const GLvoid *indices) +{ + GET_CURRENT_CONTEXT(ctx); + (void) mode; (void) start; (void) end; (void) count; (void) type; (void) indices; + _mesa_compile_error( ctx, GL_INVALID_OPERATION, "glDrawRangeElements" ); +} + +static void GLAPIENTRY _save_DrawArrays(GLenum mode, GLint start, GLsizei count) +{ + GET_CURRENT_CONTEXT(ctx); + (void) mode; (void) start; (void) count; + _mesa_compile_error( ctx, GL_INVALID_OPERATION, "glDrawArrays" ); +} + +static void GLAPIENTRY _save_Rectf( GLfloat x1, GLfloat y1, GLfloat x2, GLfloat y2 ) +{ + GET_CURRENT_CONTEXT(ctx); + (void) x1; (void) y1; (void) x2; (void) y2; + _mesa_compile_error( ctx, GL_INVALID_OPERATION, "glRectf" ); +} + +static void GLAPIENTRY _save_EvalMesh1( GLenum mode, GLint i1, GLint i2 ) +{ + GET_CURRENT_CONTEXT(ctx); + (void) mode; (void) i1; (void) i2; + _mesa_compile_error( ctx, GL_INVALID_OPERATION, "glEvalMesh1" ); +} + +static void GLAPIENTRY _save_EvalMesh2( GLenum mode, GLint i1, GLint i2, + GLint j1, GLint j2 ) +{ + GET_CURRENT_CONTEXT(ctx); + (void) mode; (void) i1; (void) i2; (void) j1; (void) j2; + _mesa_compile_error( ctx, GL_INVALID_OPERATION, "glEvalMesh2" ); +} + +static void GLAPIENTRY _save_Begin( GLenum mode ) +{ + GET_CURRENT_CONTEXT( ctx ); + (void) mode; + _mesa_compile_error( ctx, GL_INVALID_OPERATION, "Recursive glBegin" ); +} + + +/* Unlike the functions above, these are to be hooked into the vtxfmt + * maintained in ctx->ListState, active when the list is known or + * suspected to be outside any begin/end primitive. + */ +static void GLAPIENTRY _save_OBE_Rectf( GLfloat x1, GLfloat y1, GLfloat x2, GLfloat y2 ) +{ + GET_CURRENT_CONTEXT(ctx); + brw_save_NotifyBegin( ctx, GL_QUADS | BRW_SAVE_PRIM_WEAK ); + CALL_Vertex2f(GET_DISPATCH(), ( x1, y1 )); + CALL_Vertex2f(GET_DISPATCH(), ( x2, y1 )); + CALL_Vertex2f(GET_DISPATCH(), ( x2, y2 )); + CALL_Vertex2f(GET_DISPATCH(), ( x1, y2 )); + CALL_End(GET_DISPATCH(), ()); +} + + +static void GLAPIENTRY _save_OBE_DrawArrays(GLenum mode, GLint start, GLsizei count) +{ + GET_CURRENT_CONTEXT(ctx); + GLint i; + + if (!_mesa_validate_DrawArrays( ctx, mode, start, count )) + return; + + brw_save_NotifyBegin( ctx, mode | BRW_SAVE_PRIM_WEAK ); + for (i = 0; i < count; i++) + CALL_ArrayElement(GET_DISPATCH(), (start + i)); + CALL_End(GET_DISPATCH(), ()); +} + +/* Could do better by copying the arrays and element list intact and + * then emitting an indexed prim at runtime. + */ +static void GLAPIENTRY _save_OBE_DrawElements(GLenum mode, GLsizei count, GLenum type, + const GLvoid *indices) +{ + GET_CURRENT_CONTEXT(ctx); + GLint i; + + if (!_mesa_validate_DrawElements( ctx, mode, count, type, indices )) + return; + + brw_save_NotifyBegin( ctx, mode | BRW_SAVE_PRIM_WEAK ); + + switch (type) { + case GL_UNSIGNED_BYTE: + for (i = 0 ; i < count ; i++) + CALL_ArrayElement(GET_DISPATCH(), ( ((GLubyte *)indices)[i] )); + break; + case GL_UNSIGNED_SHORT: + for (i = 0 ; i < count ; i++) + CALL_ArrayElement(GET_DISPATCH(), ( ((GLushort *)indices)[i] )); + break; + case GL_UNSIGNED_INT: + for (i = 0 ; i < count ; i++) + CALL_ArrayElement(GET_DISPATCH(), ( ((GLuint *)indices)[i] )); + break; + default: + _mesa_error( ctx, GL_INVALID_ENUM, "glDrawElements(type)" ); + break; + } + + CALL_End(GET_DISPATCH(), ()); +} + +static void GLAPIENTRY _save_OBE_DrawRangeElements(GLenum mode, + GLuint start, GLuint end, + GLsizei count, GLenum type, + const GLvoid *indices) +{ + GET_CURRENT_CONTEXT(ctx); + if (_mesa_validate_DrawRangeElements( ctx, mode, + start, end, + count, type, indices )) + _save_OBE_DrawElements( mode, count, type, indices ); +} + + + + + +static void _save_vtxfmt_init( GLcontext *ctx ) +{ + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; + GLvertexformat *vfmt = &save->vtxfmt; + + vfmt->ArrayElement = _ae_loopback_array_elt; /* generic helper */ + vfmt->Begin = _save_Begin; + vfmt->Color3f = _save_Color3f; + vfmt->Color3fv = _save_Color3fv; + vfmt->Color4f = _save_Color4f; + vfmt->Color4fv = _save_Color4fv; + vfmt->EdgeFlag = _save_EdgeFlag; + vfmt->End = _save_End; + vfmt->FogCoordfEXT = _save_FogCoordfEXT; + vfmt->FogCoordfvEXT = _save_FogCoordfvEXT; + vfmt->Indexf = _save_Indexf; + vfmt->Indexfv = _save_Indexfv; + vfmt->Materialfv = _save_Materialfv; + vfmt->MultiTexCoord1fARB = _save_MultiTexCoord1f; + vfmt->MultiTexCoord1fvARB = _save_MultiTexCoord1fv; + vfmt->MultiTexCoord2fARB = _save_MultiTexCoord2f; + vfmt->MultiTexCoord2fvARB = _save_MultiTexCoord2fv; + vfmt->MultiTexCoord3fARB = _save_MultiTexCoord3f; + vfmt->MultiTexCoord3fvARB = _save_MultiTexCoord3fv; + vfmt->MultiTexCoord4fARB = _save_MultiTexCoord4f; + vfmt->MultiTexCoord4fvARB = _save_MultiTexCoord4fv; + vfmt->Normal3f = _save_Normal3f; + vfmt->Normal3fv = _save_Normal3fv; + vfmt->SecondaryColor3fEXT = _save_SecondaryColor3fEXT; + vfmt->SecondaryColor3fvEXT = _save_SecondaryColor3fvEXT; + vfmt->TexCoord1f = _save_TexCoord1f; + vfmt->TexCoord1fv = _save_TexCoord1fv; + vfmt->TexCoord2f = _save_TexCoord2f; + vfmt->TexCoord2fv = _save_TexCoord2fv; + vfmt->TexCoord3f = _save_TexCoord3f; + vfmt->TexCoord3fv = _save_TexCoord3fv; + vfmt->TexCoord4f = _save_TexCoord4f; + vfmt->TexCoord4fv = _save_TexCoord4fv; + vfmt->Vertex2f = _save_Vertex2f; + vfmt->Vertex2fv = _save_Vertex2fv; + vfmt->Vertex3f = _save_Vertex3f; + vfmt->Vertex3fv = _save_Vertex3fv; + vfmt->Vertex4f = _save_Vertex4f; + vfmt->Vertex4fv = _save_Vertex4fv; + vfmt->VertexAttrib1fARB = _save_VertexAttrib1fARB; + vfmt->VertexAttrib1fvARB = _save_VertexAttrib1fvARB; + vfmt->VertexAttrib2fARB = _save_VertexAttrib2fARB; + vfmt->VertexAttrib2fvARB = _save_VertexAttrib2fvARB; + vfmt->VertexAttrib3fARB = _save_VertexAttrib3fARB; + vfmt->VertexAttrib3fvARB = _save_VertexAttrib3fvARB; + vfmt->VertexAttrib4fARB = _save_VertexAttrib4fARB; + vfmt->VertexAttrib4fvARB = _save_VertexAttrib4fvARB; + + vfmt->VertexAttrib1fNV = _save_VertexAttrib1fNV; + vfmt->VertexAttrib1fvNV = _save_VertexAttrib1fvNV; + vfmt->VertexAttrib2fNV = _save_VertexAttrib2fNV; + vfmt->VertexAttrib2fvNV = _save_VertexAttrib2fvNV; + vfmt->VertexAttrib3fNV = _save_VertexAttrib3fNV; + vfmt->VertexAttrib3fvNV = _save_VertexAttrib3fvNV; + vfmt->VertexAttrib4fNV = _save_VertexAttrib4fNV; + vfmt->VertexAttrib4fvNV = _save_VertexAttrib4fvNV; + + /* This will all require us to fallback to saving the list as opcodes: + */ + vfmt->CallList = _save_CallList; /* inside begin/end */ + vfmt->CallLists = _save_CallLists; /* inside begin/end */ + vfmt->EvalCoord1f = _save_EvalCoord1f; + vfmt->EvalCoord1fv = _save_EvalCoord1fv; + vfmt->EvalCoord2f = _save_EvalCoord2f; + vfmt->EvalCoord2fv = _save_EvalCoord2fv; + vfmt->EvalPoint1 = _save_EvalPoint1; + vfmt->EvalPoint2 = _save_EvalPoint2; + + /* These are all errors as we at least know we are in some sort of + * begin/end pair: + */ + vfmt->EvalMesh1 = _save_EvalMesh1; + vfmt->EvalMesh2 = _save_EvalMesh2; + vfmt->Begin = _save_Begin; + vfmt->Rectf = _save_Rectf; + vfmt->DrawArrays = _save_DrawArrays; + vfmt->DrawElements = _save_DrawElements; + vfmt->DrawRangeElements = _save_DrawRangeElements; + +} + + +void brw_save_SaveFlushVertices( GLcontext *ctx ) +{ + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; + + /* Noop when we are actually active: + */ + if (ctx->Driver.CurrentSavePrimitive == PRIM_INSIDE_UNKNOWN_PRIM || + ctx->Driver.CurrentSavePrimitive <= GL_POLYGON) + return; + + if (save->vert_count || + save->prim_count) + _save_compile_vertex_list( ctx ); + + _save_copy_to_current( ctx ); + _save_reset_vertex( ctx ); + _save_reset_counters( ctx ); + ctx->Driver.SaveNeedFlush = 0; +} + +void brw_save_NewList( GLcontext *ctx, GLuint list, GLenum mode ) +{ + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; + + (void) list; (void) mode; + + if (!save->prim_store) + save->prim_store = alloc_prim_store( ctx ); + + if (!save->vertex_store) + save->vertex_store = alloc_vertex_store( ctx ); + + save->vbptr = map_vertex_store( ctx, save->vertex_store ); + + _save_reset_vertex( ctx ); + _save_reset_counters( ctx ); + ctx->Driver.SaveNeedFlush = 0; +} + +void brw_save_EndList( GLcontext *ctx ) +{ + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; + unmap_vertex_store( ctx, save->vertex_store ); + + assert(save->vertex_size == 0); +} + +void brw_save_BeginCallList( GLcontext *ctx, struct mesa_display_list *dlist ) +{ + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; + save->replay_flags |= dlist->flags; +} + +void brw_save_EndCallList( GLcontext *ctx ) +{ + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; + + if (ctx->ListState.CallDepth == 1) { + /* This is correct: want to keep only the BRW_SAVE_FALLBACK + * flag, if it is set: + */ + save->replay_flags &= BRW_SAVE_FALLBACK; + } +} + + +static void brw_destroy_vertex_list( GLcontext *ctx, void *data ) +{ + struct brw_save_vertex_list *node = (struct brw_save_vertex_list *)data; + (void) ctx; + + if ( --node->vertex_store->refcount == 0 ) + free_vertex_store( ctx, node->vertex_store ); + + if ( --node->prim_store->refcount == 0 ) + FREE( node->prim_store ); +} + + +static void brw_print_vertex_list( GLcontext *ctx, void *data ) +{ + struct brw_save_vertex_list *node = (struct brw_save_vertex_list *)data; + GLuint i; + (void) ctx; + + _mesa_debug(NULL, "BRW-VERTEX-LIST, %u vertices %d primitives, %d vertsize\n", + node->count, + node->prim_count, + node->vertex_size); + + for (i = 0 ; i < node->prim_count ; i++) { + struct brw_draw_prim *prim = &node->prim[i]; + _mesa_debug(NULL, " prim %d: %s%s %d..%d %s %s\n", + i, + _mesa_lookup_enum_by_nr(prim->mode), + prim->weak ? " (weak)" : "", + prim->start, + prim->start + prim->count, + (prim->begin) ? "BEGIN" : "(wrap)", + (prim->end) ? "END" : "(wrap)"); + } +} + + +static void _save_current_init( GLcontext *ctx ) +{ + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; + GLint i; + + for (i = 0; i < BRW_ATTRIB_FIRST_MATERIAL; i++) { + save->currentsz[i] = &ctx->ListState.ActiveAttribSize[i]; + save->current[i] = ctx->ListState.CurrentAttrib[i]; + } + + for (i = BRW_ATTRIB_FIRST_MATERIAL; i < BRW_ATTRIB_INDEX; i++) { + const GLuint j = i - BRW_ATTRIB_FIRST_MATERIAL; + ASSERT(j < MAT_ATTRIB_MAX); + save->currentsz[i] = &ctx->ListState.ActiveMaterialSize[j]; + save->current[i] = ctx->ListState.CurrentMaterial[j]; + } + + save->currentsz[BRW_ATTRIB_INDEX] = &ctx->ListState.ActiveIndex; + save->current[BRW_ATTRIB_INDEX] = &ctx->ListState.CurrentIndex; + + save->currentsz[BRW_ATTRIB_EDGEFLAG] = &ctx->ListState.ActiveEdgeFlag; + save->current[BRW_ATTRIB_EDGEFLAG] = &save->CurrentFloatEdgeFlag; +} + +/** + * Initialize the display list compiler + */ +void brw_save_api_init( struct brw_save_context *save ) +{ + GLcontext *ctx = save->ctx; + GLuint i; + + save->opcode_vertex_list = + _mesa_alloc_opcode( ctx, + sizeof(struct brw_save_vertex_list), + brw_save_playback_vertex_list, + brw_destroy_vertex_list, + brw_print_vertex_list ); + + ctx->Driver.NotifySaveBegin = brw_save_NotifyBegin; + + _save_vtxfmt_init( ctx ); + _save_current_init( ctx ); + + for (i = 0; i < BRW_ATTRIB_MAX; i++) + save->inputs[i] = &save->arrays[i]; + + /* Hook our array functions into the outside-begin-end vtxfmt in + * ctx->ListState. + */ + ctx->ListState.ListVtxfmt.Rectf = _save_OBE_Rectf; + ctx->ListState.ListVtxfmt.DrawArrays = _save_OBE_DrawArrays; + ctx->ListState.ListVtxfmt.DrawElements = _save_OBE_DrawElements; + ctx->ListState.ListVtxfmt.DrawRangeElements = _save_OBE_DrawRangeElements; + _mesa_install_save_vtxfmt( ctx, &ctx->ListState.ListVtxfmt ); +} + diff --git a/src/mesa/drivers/dri/i965/brw_save_draw.c b/src/mesa/drivers/dri/i965/brw_save_draw.c new file mode 100644 index 00000000000..84f74d3f6cf --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_save_draw.c @@ -0,0 +1,209 @@ +/* + * Mesa 3-D graphics library + * Version: 6.1 + * + * Copyright (C) 1999-2004 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* Author: + * Keith Whitwell <[email protected]> + */ + +#include "glheader.h" +#include "context.h" +#include "imports.h" +#include "mtypes.h" +#include "macros.h" +#include "light.h" +#include "state.h" + +#include "brw_save.h" +#include "brw_draw.h" +#include "brw_fallback.h" + + +static void _playback_copy_to_current( GLcontext *ctx, + const struct brw_save_vertex_list *node ) +{ + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; + GLfloat vertex[BRW_ATTRIB_MAX * 4], *data = vertex; + GLuint i, offset; + + if (node->count) + offset = node->buffer_offset + (node->count-1) * node->vertex_size; + else + offset = node->buffer_offset; + + ctx->Driver.GetBufferSubData( ctx, 0, offset, node->vertex_size, + data, node->vertex_store->bufferobj ); + + for (i = BRW_ATTRIB_POS+1 ; i <= BRW_ATTRIB_INDEX ; i++) { + if (node->attrsz[i]) { + COPY_CLEAN_4V(save->current[i], node->attrsz[i], data); + data += node->attrsz[i]; + + if (i >= BRW_ATTRIB_MAT_FRONT_AMBIENT && + i <= BRW_ATTRIB_MAT_BACK_INDEXES) + ctx->NewState |= _NEW_LIGHT; + } + } + + /* Edgeflag requires special treatment: + */ + if (node->attrsz[BRW_ATTRIB_EDGEFLAG]) { + ctx->Current.EdgeFlag = (data[0] == 1.0); + } + + +#if 1 + /* Colormaterial -- this kindof sucks. + */ + if (ctx->Light.ColorMaterialEnabled) { + _mesa_update_color_material(ctx, ctx->Current.Attrib[BRW_ATTRIB_COLOR0]); + } +#endif + + /* CurrentExecPrimitive + */ + if (node->prim_count) { + const struct brw_draw_prim *prim = &node->prim[node->prim_count - 1]; + if (prim->end) + ctx->Driver.CurrentExecPrimitive = PRIM_OUTSIDE_BEGIN_END; + else + ctx->Driver.CurrentExecPrimitive = prim->mode; + } +} + + + +/* Treat the vertex storage as a VBO, define vertex arrays pointing + * into it: + */ +static void brw_bind_vertex_list( struct brw_save_context *save, + const struct brw_save_vertex_list *node ) +{ + struct gl_client_array *arrays = save->arrays; + GLuint data = node->buffer_offset; + GLuint attr; + + memset(arrays, 0, BRW_ATTRIB_MAX * sizeof(arrays[0])); + + for (attr = 0; attr <= BRW_ATTRIB_INDEX; attr++) { + if (node->attrsz[attr]) { + arrays[attr].Ptr = (const GLubyte *)data; + arrays[attr].Size = node->attrsz[attr]; + arrays[attr].StrideB = node->vertex_size * sizeof(GLfloat); + arrays[attr].Stride = node->vertex_size * sizeof(GLfloat); + arrays[attr].Type = GL_FLOAT; + arrays[attr].Enabled = 1; + arrays[attr].BufferObj = node->vertex_store->bufferobj; + arrays[attr]._MaxElement = node->count; /* ??? */ + + assert(arrays[attr].BufferObj->Name); + + data += node->attrsz[attr] * sizeof(GLfloat); + } + } +} + +static void brw_save_loopback_vertex_list( GLcontext *ctx, + const struct brw_save_vertex_list *list ) +{ + const char *buffer = ctx->Driver.MapBuffer(ctx, + GL_ARRAY_BUFFER_ARB, + GL_DYNAMIC_READ_ARB, /* ? */ + list->vertex_store->bufferobj); + + brw_loopback_vertex_list( ctx, + (const GLfloat *)(buffer + list->buffer_offset), + list->attrsz, + list->prim, + list->prim_count, + list->wrap_count, + list->vertex_size); + + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, + list->vertex_store->bufferobj); +} + + +/** + * Execute the buffer and save copied verts. + */ +void brw_save_playback_vertex_list( GLcontext *ctx, void *data ) +{ + const struct brw_save_vertex_list *node = (const struct brw_save_vertex_list *) data; + struct brw_save_context *save = IMM_CONTEXT(ctx)->save; + + FLUSH_CURRENT(ctx, 0); + + if (node->prim_count > 0 && node->count > 0) { + + if (ctx->Driver.CurrentExecPrimitive != PRIM_OUTSIDE_BEGIN_END && + node->prim[0].begin) { + + /* Degenerate case: list is called inside begin/end pair and + * includes operations such as glBegin or glDrawArrays. + */ + if (0) + _mesa_printf("displaylist recursive begin"); + + brw_save_loopback_vertex_list( ctx, node ); + return; + } + else if (save->replay_flags) { + /* Various degnerate cases: translate into immediate mode + * calls rather than trying to execute in place. + */ + brw_save_loopback_vertex_list( ctx, node ); + return; + } + + if (ctx->NewState) + _mesa_update_state( ctx ); + + if ((ctx->VertexProgram.Enabled && !ctx->VertexProgram._Enabled) || + (ctx->FragmentProgram.Enabled && !ctx->FragmentProgram._Enabled)) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glBegin (invalid vertex/fragment program)"); + return; + } + + brw_bind_vertex_list( save, node ); + + if (!brw_draw_prims( save->ctx, + save->inputs, + node->prim, + node->prim_count, + NULL, + 0, /* Node is a VBO, so this is ok */ + node->count, + 0 )) { + brw_fallback(ctx); + brw_save_loopback_vertex_list( ctx, node ); + brw_unfallback(ctx); + return; + } + } + + /* Copy to current? + */ + _playback_copy_to_current( ctx, node ); +} diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c new file mode 100644 index 00000000000..d5175399d6c --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -0,0 +1,188 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "glheader.h" +#include "macros.h" +#include "enums.h" + +#include "intel_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_sf.h" +#include "brw_state.h" + +#define DO_SETUP_BITS ((1<<FRAG_ATTRIB_MAX)-1) + +static void compile_sf_prog( struct brw_context *brw, + struct brw_sf_prog_key *key ) +{ + struct brw_sf_compile c; + const GLuint *program; + GLuint program_size; + GLuint i, idx; + + memset(&c, 0, sizeof(c)); + + /* Begin the compilation: + */ + brw_init_compile(&c.func); + + c.key = *key; + c.nr_attrs = brw_count_bits(c.key.attrs); + c.nr_attr_regs = (c.nr_attrs+1)/2; + c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS); + c.nr_setup_regs = (c.nr_setup_attrs+1)/2; + + c.prog_data.urb_read_length = c.nr_attr_regs; + c.prog_data.urb_entry_size = c.nr_setup_regs * 2; + + /* Construct map from attribute number to position in the vertex. + */ + for (i = idx = 0; i < VERT_RESULT_MAX; i++) + if (c.key.attrs & (1<<i)) { + c.attr_to_idx[i] = idx; + c.idx_to_attr[idx] = i; + idx++; + } + + /* Which primitive? Or all three? + */ + switch (key->primitive) { + case SF_TRIANGLES: + c.nr_verts = 3; + brw_emit_tri_setup( &c ); + break; + case SF_LINES: + c.nr_verts = 2; + brw_emit_line_setup( &c ); + break; + case SF_POINTS: + c.nr_verts = 1; + brw_emit_point_setup( &c ); + break; + case SF_UNFILLED_TRIS: + c.nr_verts = 3; + brw_emit_anyprim_setup( &c ); + break; + default: + assert(0); + return; + } + + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + /* Upload + */ + brw->sf.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_SF_PROG], + &c.key, + sizeof(c.key), + program, + program_size, + &c.prog_data, + &brw->sf.prog_data ); +} + + +static GLboolean search_cache( struct brw_context *brw, + struct brw_sf_prog_key *key ) +{ + return brw_search_cache(&brw->cache[BRW_SF_PROG], + key, sizeof(*key), + &brw->sf.prog_data, + &brw->sf.prog_gs_offset); +} + + +/* Calculate interpolants for triangle and line rasterization. + */ +static void upload_sf_prog( struct brw_context *brw ) +{ + struct brw_sf_prog_key key; + + memset(&key, 0, sizeof(key)); + + /* Populate the key, noting state dependencies: + */ + /* CACHE_NEW_VS_PROG */ + key.attrs = brw->vs.prog_data->outputs_written; + + /* BRW_NEW_REDUCED_PRIMITIVE */ + switch (brw->intel.reduced_primitive) { + case GL_TRIANGLES: + /* NOTE: We just use the edgeflag attribute as an indicator that + * unfilled triangles are active. We don't actually do the + * edgeflag testing here, it is already done in the clip + * program. + */ + if (key.attrs & (1<<VERT_RESULT_EDGE)) + key.primitive = SF_UNFILLED_TRIS; + else + key.primitive = SF_TRIANGLES; + break; + case GL_LINES: + key.primitive = SF_LINES; + break; + case GL_POINTS: + key.primitive = SF_POINTS; + break; + } + + + /* _NEW_LIGHT */ + key.do_flat_shading = (brw->attribs.Light->ShadeModel == GL_FLAT); + key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide); + + /* _NEW_POLYGON */ + if (key.do_twoside_color) + key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW); + + + if (!search_cache(brw, &key)) + compile_sf_prog( brw, &key ); +} + + +const struct brw_tracked_state brw_sf_prog = { + .dirty = { + .mesa = (_NEW_LIGHT|_NEW_POLYGON), + .brw = (BRW_NEW_REDUCED_PRIMITIVE), + .cache = CACHE_NEW_VS_PROG + }, + .update = upload_sf_prog +}; + diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h new file mode 100644 index 00000000000..fb72b84ba8a --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_sf.h @@ -0,0 +1,105 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#ifndef BRW_SF_H +#define BRW_SF_H + + +#include "brw_context.h" +#include "brw_eu.h" +#include "program.h" + + +#define SF_POINTS 0 +#define SF_LINES 1 +#define SF_TRIANGLES 2 +#define SF_UNFILLED_TRIS 3 + +struct brw_sf_prog_key { + GLuint primitive:2; + GLuint do_twoside_color:1; + GLuint do_flat_shading:1; + GLuint attrs:16; + GLuint frontface_ccw:1; + GLuint pad:11; +}; + + +struct brw_sf_compile { + struct brw_compile func; + struct brw_sf_prog_key key; + struct brw_sf_prog_data prog_data; + + struct brw_reg pv; + struct brw_reg det; + struct brw_reg dx0; + struct brw_reg dx2; + struct brw_reg dy0; + struct brw_reg dy2; + + /* z and 1/w passed in seperately: + */ + struct brw_reg z[3]; + struct brw_reg inv_w[3]; + + /* The vertices: + */ + struct brw_reg vert[3]; + + /* Temporaries, allocated after last vertex reg. + */ + struct brw_reg inv_det; + struct brw_reg a1_sub_a0; + struct brw_reg a2_sub_a0; + struct brw_reg tmp; + + struct brw_reg m1Cx; + struct brw_reg m2Cy; + struct brw_reg m3C0; + + GLuint nr_verts; + GLuint nr_attrs; + GLuint nr_attr_regs; + GLuint nr_setup_attrs; + GLuint nr_setup_regs; + + GLubyte attr_to_idx[VERT_RESULT_MAX]; + GLubyte idx_to_attr[VERT_RESULT_MAX]; +}; + + +void brw_emit_tri_setup( struct brw_sf_compile *c ); +void brw_emit_line_setup( struct brw_sf_compile *c ); +void brw_emit_point_setup( struct brw_sf_compile *c ); +void brw_emit_anyprim_setup( struct brw_sf_compile *c ); + +#endif diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c new file mode 100644 index 00000000000..cbaf018c44a --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -0,0 +1,609 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "glheader.h" +#include "macros.h" +#include "enums.h" + +#include "intel_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_sf.h" + + +static struct brw_reg get_vert_attr(struct brw_sf_compile *c, + struct brw_reg vert, + GLuint attr) +{ + GLuint off = c->attr_to_idx[attr] / 2; + GLuint sub = c->attr_to_idx[attr] % 2; + + return brw_vec4_grf(vert.nr + off, sub * 4); +} + +static GLboolean have_attr(struct brw_sf_compile *c, + GLuint attr) +{ + return (c->key.attrs & (1<<attr)) ? 1 : 0; +} + + + +/*********************************************************************** + * Twoside lighting + */ +static void copy_bfc( struct brw_sf_compile *c, + struct brw_reg vert ) +{ + struct brw_compile *p = &c->func; + GLuint i; + + for (i = 0; i < 2; i++) { + if (have_attr(c, VERT_RESULT_COL0+i) && + have_attr(c, VERT_RESULT_BFC0+i)) + brw_MOV(p, + get_vert_attr(c, vert, VERT_RESULT_COL0+i), + get_vert_attr(c, vert, VERT_RESULT_BFC0+i)); + } +} + + +static void do_twoside_color( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *if_insn; + GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L; + + /* Already done in clip program: + */ + if (c->key.primitive == SF_UNFILLED_TRIS) + return; + + /* XXX: What happens if BFC isn't present? This could only happen + * for user-supplied vertex programs, as t_vp_build.c always does + * the right thing. + */ + if (!(have_attr(c, VERT_RESULT_COL0) && have_attr(c, VERT_RESULT_BFC0)) && + !(have_attr(c, VERT_RESULT_COL1) && have_attr(c, VERT_RESULT_BFC1))) + return; + + /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order + * to get all channels active inside the IF. In the clipping code + * we run with NoMask, so it's not an option and we can use + * BRW_EXECUTE_1 for all comparisions. + */ + brw_push_insn_state(p); + brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0)); + if_insn = brw_IF(p, BRW_EXECUTE_4); + { + switch (c->nr_verts) { + case 3: copy_bfc(c, c->vert[2]); + case 2: copy_bfc(c, c->vert[1]); + case 1: copy_bfc(c, c->vert[0]); + } + } + brw_ENDIF(p, if_insn); + brw_pop_insn_state(p); +} + + + +/*********************************************************************** + * Flat shading + */ + +#define VERT_RESULT_COLOR_BITS ((1<<VERT_RESULT_COL0) | \ + (1<<VERT_RESULT_COL1)) + +static void copy_colors( struct brw_sf_compile *c, + struct brw_reg dst, + struct brw_reg src) +{ + struct brw_compile *p = &c->func; + GLuint i; + + for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) { + if (have_attr(c,i)) + brw_MOV(p, + get_vert_attr(c, dst, i), + get_vert_attr(c, src, i)); + } +} + + + +/* Need to use a computed jump to copy flatshaded attributes as the + * vertices are ordered according to y-coordinate before reaching this + * point, so the PV could be anywhere. + */ +static void do_flatshade_triangle( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg ip = brw_ip_reg(); + GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); + if (!nr) + return; + + /* Already done in clip program: + */ + if (c->key.primitive == SF_UNFILLED_TRIS) + return; + + brw_push_insn_state(p); + + brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr*2+1)); + brw_JMPI(p, ip, ip, c->pv); + + copy_colors(c, c->vert[1], c->vert[0]); + copy_colors(c, c->vert[2], c->vert[0]); + brw_JMPI(p, ip, ip, brw_imm_ud(nr*4+1)); + + copy_colors(c, c->vert[0], c->vert[1]); + copy_colors(c, c->vert[2], c->vert[1]); + brw_JMPI(p, ip, ip, brw_imm_ud(nr*2)); + + copy_colors(c, c->vert[0], c->vert[2]); + copy_colors(c, c->vert[1], c->vert[2]); + + brw_pop_insn_state(p); +} + + +static void do_flatshade_line( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg ip = brw_ip_reg(); + GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); + + if (!nr) + return; + + /* Already done in clip program: + */ + if (c->key.primitive == SF_UNFILLED_TRIS) + return; + + brw_push_insn_state(p); + + brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr+1)); + brw_JMPI(p, ip, ip, c->pv); + copy_colors(c, c->vert[1], c->vert[0]); + + brw_JMPI(p, ip, ip, brw_imm_ud(nr)); + copy_colors(c, c->vert[0], c->vert[1]); + + brw_pop_insn_state(p); +} + + + +/*********************************************************************** + * Triangle setup. + */ + + +static void alloc_regs( struct brw_sf_compile *c ) +{ + GLuint reg, i; + + /* Values computed by fixed function unit: + */ + c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_UD); + c->det = brw_vec1_grf(1, 2); + c->dx0 = brw_vec1_grf(1, 3); + c->dx2 = brw_vec1_grf(1, 4); + c->dy0 = brw_vec1_grf(1, 5); + c->dy2 = brw_vec1_grf(1, 6); + + /* z and 1/w passed in seperately: + */ + c->z[0] = brw_vec1_grf(2, 0); + c->inv_w[0] = brw_vec1_grf(2, 1); + c->z[1] = brw_vec1_grf(2, 2); + c->inv_w[1] = brw_vec1_grf(2, 3); + c->z[2] = brw_vec1_grf(2, 4); + c->inv_w[2] = brw_vec1_grf(2, 5); + + /* The vertices: + */ + reg = 3; + for (i = 0; i < c->nr_verts; i++) { + c->vert[i] = brw_vec8_grf(reg, 0); + reg += c->nr_attr_regs; + } + + /* Temporaries, allocated after last vertex reg. + */ + c->inv_det = brw_vec1_grf(reg, 0); reg++; + c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++; + c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++; + c->tmp = brw_vec8_grf(reg, 0); reg++; + + /* Note grf allocation: + */ + c->prog_data.total_grf = reg; + + + /* Outputs of this program - interpolation coefficients for + * rasterization: + */ + c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0); + c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0); + c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0); +} + + +static void copy_z_inv_w( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + GLuint i; + + brw_push_insn_state(p); + + /* Copy both scalars with a single MOV: + */ + for (i = 0; i < c->nr_verts; i++) + brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i])); + + brw_pop_insn_state(p); +} + + +static void invert_det( struct brw_sf_compile *c) +{ + /* Looks like we invert all 8 elements just to get 1/det in + * position 2 !?! + */ + brw_math(&c->func, + c->inv_det, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + c->det, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); + +} + +#define NON_PERPECTIVE_ATTRS (FRAG_BIT_WPOS | \ + FRAG_BIT_COL0 | \ + FRAG_BIT_COL1) + +static GLboolean calculate_masks( struct brw_sf_compile *c, + GLuint reg, + GLushort *pc, + GLushort *pc_persp, + GLushort *pc_linear) +{ + GLboolean is_last_attr = (reg == c->nr_setup_regs - 1); + GLuint persp_mask = c->key.attrs & ~NON_PERPECTIVE_ATTRS; + GLuint linear_mask; + + if (c->key.do_flat_shading) + linear_mask = c->key.attrs & ~(FRAG_BIT_COL0|FRAG_BIT_COL1); + else + linear_mask = c->key.attrs; + + *pc_persp = 0; + *pc_linear = 0; + *pc = 0xf; + + if (persp_mask & (1 << c->idx_to_attr[reg*2])) + *pc_persp = 0xf; + + if (linear_mask & (1 << c->idx_to_attr[reg*2])) + *pc_linear = 0xf; + + /* Maybe only processs one attribute on the final round: + */ + if (reg*2+1 < c->nr_setup_attrs) { + *pc |= 0xf0; + + if (persp_mask & (1 << c->idx_to_attr[reg*2+1])) + *pc_persp |= 0xf0; + + if (linear_mask & (1 << c->idx_to_attr[reg*2+1])) + *pc_linear |= 0xf0; + } + + return is_last_attr; +} + + + +void brw_emit_tri_setup( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + GLuint i; + + c->nr_verts = 3; + alloc_regs(c); + invert_det(c); + copy_z_inv_w(c); + + if (c->key.do_twoside_color) + do_twoside_color(c); + + if (c->key.do_flat_shading) + do_flatshade_triangle(c); + + + for (i = 0; i < c->nr_setup_regs; i++) + { + /* Pair of incoming attributes: + */ + struct brw_reg a0 = offset(c->vert[0], i); + struct brw_reg a1 = offset(c->vert[1], i); + struct brw_reg a2 = offset(c->vert[2], i); + GLushort pc, pc_persp, pc_linear; + GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + brw_MUL(p, a1, a1, c->inv_w[1]); + brw_MUL(p, a2, a2, c->inv_w[2]); + } + + + /* Calculate coefficients for interpolated values: + */ + if (pc_linear) + { + brw_set_predicate_control_flag_value(p, pc_linear); + + brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); + brw_ADD(p, c->a2_sub_a0, a2, negate(a0)); + + /* calculate dA/dx + */ + brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2); + brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0)); + brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); + + /* calculate dA/dy + */ + brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0); + brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2)); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); + } + + { + brw_set_predicate_control_flag_value(p, pc); + /* start point for interpolation + */ + brw_MOV(p, c->m3C0, a0); + + /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in + * the send instruction: + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* offset */ + BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */ + } + } +} + + + +void brw_emit_line_setup( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + GLuint i; + + + c->nr_verts = 2; + alloc_regs(c); + invert_det(c); + copy_z_inv_w(c); + + if (c->key.do_flat_shading) + do_flatshade_line(c); + + for (i = 0; i < c->nr_setup_regs; i++) + { + /* Pair of incoming attributes: + */ + struct brw_reg a0 = offset(c->vert[0], i); + struct brw_reg a1 = offset(c->vert[1], i); + GLushort pc, pc_persp, pc_linear; + GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + brw_MUL(p, a1, a1, c->inv_w[1]); + } + + /* Calculate coefficients for position, color: + */ + if (pc_linear) { + brw_set_predicate_control_flag_value(p, pc_linear); + + brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); + + brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0); + brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); + + brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); + } + + { + brw_set_predicate_control_flag_value(p, pc); + + /* start point for interpolation + */ + brw_MOV(p, c->m3C0, a0); + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } +} + + +/* Points setup - several simplifications as all attributes are + * constant across the face of the point (point sprites excluded!) + */ +void brw_emit_point_setup( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + GLuint i; + + c->nr_verts = 1; + alloc_regs(c); + copy_z_inv_w(c); + + brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */ + brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */ + + for (i = 0; i < c->nr_setup_regs; i++) + { + struct brw_reg a0 = offset(c->vert[0], i); + GLushort pc, pc_persp, pc_linear; + GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + /* This seems odd as the values are all constant, but the + * fragment shader will be expecting it: + */ + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + } + + + /* The delta values are always zero, just send the starting + * coordinate. Again, this is to fit in with the interpolation + * code in the fragment shader. + */ + { + brw_set_predicate_control_flag_value(p, pc); + + brw_MOV(p, c->m3C0, a0); /* constant value */ + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } +} + +void brw_emit_anyprim_setup( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg ip = brw_ip_reg(); + struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0); + struct brw_reg primmask; + struct brw_instruction *jmp; + struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); + + alloc_regs(c); + + primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD); + + brw_MOV(p, primmask, brw_imm_ud(1)); + brw_SHL(p, primmask, primmask, payload_prim); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); + brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) | + (1<<_3DPRIM_TRISTRIP) | + (1<<_3DPRIM_TRIFAN) | + (1<<_3DPRIM_TRISTRIP_REVERSE) | + (1<<_3DPRIM_POLYGON) | + (1<<_3DPRIM_RECTLIST) | + (1<<_3DPRIM_TRIFAN_NOSTIPPLE))); + jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); + { + brw_emit_tri_setup( c ); + /* note - thread killed in subroutine */ + } + brw_land_fwd_jump(p, jmp); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); + brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) | + (1<<_3DPRIM_LINESTRIP) | + (1<<_3DPRIM_LINELOOP) | + (1<<_3DPRIM_LINESTRIP_CONT) | + (1<<_3DPRIM_LINESTRIP_BF) | + (1<<_3DPRIM_LINESTRIP_CONT_BF))); + jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); + { + brw_emit_line_setup( c ); + /* note - thread killed in subroutine */ + } + brw_land_fwd_jump(p, jmp); + + brw_emit_point_setup( c ); +} + + + + diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c new file mode 100644 index 00000000000..bfac52d765b --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -0,0 +1,219 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "macros.h" + +static void upload_sf_vp(struct brw_context *brw) +{ + struct brw_sf_viewport sfv; + + memset(&sfv, 0, sizeof(sfv)); + + if (brw->intel.driDrawable) + { + /* _NEW_VIEWPORT, BRW_NEW_METAOPS */ + + if (!brw->metaops.active) { + const GLfloat *v = brw->intel.ctx.Viewport._WindowMap.m; + + sfv.viewport.m00 = v[MAT_SX]; + sfv.viewport.m11 = - v[MAT_SY]; + sfv.viewport.m22 = v[MAT_SZ] * brw->intel.depth_scale; + sfv.viewport.m30 = v[MAT_TX]; + sfv.viewport.m31 = - v[MAT_TY] + brw->intel.driDrawable->h; + sfv.viewport.m32 = v[MAT_TZ] * brw->intel.depth_scale; + } + else { + sfv.viewport.m00 = 1; + sfv.viewport.m11 = - 1; + sfv.viewport.m22 = 1; + sfv.viewport.m30 = 0; + sfv.viewport.m31 = brw->intel.driDrawable->h; + sfv.viewport.m32 = 0; + } + } + + /* XXX: what state for this? */ + if (brw->intel.driDrawable) + { + intelScreenPrivate *screen = brw->intel.intelScreen; + /* _NEW_SCISSOR */ + GLint x = brw->attribs.Scissor->X; + GLint y = brw->attribs.Scissor->Y; + GLuint w = brw->attribs.Scissor->Width; + GLuint h = brw->attribs.Scissor->Height; + + GLint x1 = x; + GLint y1 = brw->intel.driDrawable->h - (y + h); + GLint x2 = x + w - 1; + GLint y2 = y1 + h - 1; + + if (x1 < 0) x1 = 0; + if (y1 < 0) y1 = 0; + if (x2 < 0) x2 = 0; + if (y2 < 0) y2 = 0; + + if (x2 >= screen->width) x2 = screen->width-1; + if (y2 >= screen->height) y2 = screen->height-1; + if (x1 >= screen->width) x1 = screen->width-1; + if (y1 >= screen->height) y1 = screen->height-1; + + sfv.scissor.xmin = x1; + sfv.scissor.xmax = x2; + sfv.scissor.ymin = y1; + sfv.scissor.ymax = y2; + } + + brw->sf.vp_gs_offset = brw_cache_data( &brw->cache[BRW_SF_VP], &sfv ); +} + +const struct brw_tracked_state brw_sf_vp = { + .dirty = { + .mesa = (_NEW_VIEWPORT | + _NEW_SCISSOR), + .brw = BRW_NEW_METAOPS, + .cache = 0 + }, + .update = upload_sf_vp +}; + + + +static void upload_sf_unit( struct brw_context *brw ) +{ + struct brw_sf_unit_state sf; + memset(&sf, 0, sizeof(sf)); + + /* CACHE_NEW_SF_PROG */ + sf.thread0.grf_reg_count = ((brw->sf.prog_data->total_grf-1) & ~15) / 16; + sf.thread0.kernel_start_pointer = brw->sf.prog_gs_offset >> 6; + sf.thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length; + + sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + sf.thread3.dispatch_grf_start_reg = 3; + sf.thread3.urb_entry_read_offset = 1; + + /* BRW_NEW_URB_FENCE */ + sf.thread4.nr_urb_entries = brw->urb.nr_sf_entries; + sf.thread4.urb_entry_allocation_size = brw->urb.sfsize - 1; + sf.thread4.max_threads = MIN2(12, brw->urb.nr_sf_entries / 2) - 1; + + if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) + sf.thread4.max_threads = 0; + + if (INTEL_DEBUG & DEBUG_STATS) + sf.thread4.stats_enable = 1; + + /* CACHE_NEW_SF_VP */ + sf.sf5.sf_viewport_state_offset = brw->sf.vp_gs_offset >> 5; + + sf.sf5.viewport_transform = 1; + + /* _NEW_SCISSOR */ + if (brw->attribs.Scissor->Enabled) + sf.sf6.scissor = 1; + + /* _NEW_POLYGON */ + if (brw->attribs.Polygon->FrontFace == GL_CCW) + sf.sf5.front_winding = BRW_FRONTWINDING_CCW; + else + sf.sf5.front_winding = BRW_FRONTWINDING_CW; + + if (brw->attribs.Polygon->CullFlag) { + switch (brw->attribs.Polygon->CullFaceMode) { + case GL_FRONT: + sf.sf6.cull_mode = BRW_CULLMODE_FRONT; + break; + case GL_BACK: + sf.sf6.cull_mode = BRW_CULLMODE_BACK; + break; + case GL_FRONT_AND_BACK: + sf.sf6.cull_mode = BRW_CULLMODE_BOTH; + break; + default: + assert(0); + break; + } + } + else + sf.sf6.cull_mode = BRW_CULLMODE_NONE; + + + /* _NEW_LINE */ + sf.sf6.line_width = brw->attribs.Line->_Width * (1<<1); + + sf.sf6.line_endcap_aa_region_width = 1; + if (brw->attribs.Line->SmoothFlag) + sf.sf6.aa_enable = 1; + else if (sf.sf6.line_width <= 0x2) + sf.sf6.line_width = 0; + + /* _NEW_POINT */ + sf.sf6.point_rast_rule = 1; /* opengl conventions */ + sf.sf7.point_size = brw->attribs.Point->_Size * (1<<3); + sf.sf7.use_point_size_state = !brw->attribs.Point->_Attenuated; + + /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: + */ + sf.sf7.trifan_pv = 2; + sf.sf7.linestrip_pv = 1; + sf.sf7.tristrip_pv = 2; + sf.sf7.line_last_pixel_enable = 0; + + /* Set bias for OpenGL rasterization rules: + */ + sf.sf6.dest_org_vbias = 0x8; + sf.sf6.dest_org_hbias = 0x8; + + brw->sf.state_gs_offset = brw_cache_data( &brw->cache[BRW_SF_UNIT], &sf ); +} + + +const struct brw_tracked_state brw_sf_unit = { + .dirty = { + .mesa = (_NEW_POLYGON | + _NEW_LINE | + _NEW_POINT | + _NEW_SCISSOR), + .brw = (BRW_NEW_URB_FENCE | + BRW_NEW_METAOPS), + .cache = (CACHE_NEW_SF_VP | + CACHE_NEW_SF_PROG) + }, + .update = upload_sf_unit +}; + + diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h new file mode 100644 index 00000000000..b4cbdd7a380 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -0,0 +1,146 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#ifndef BRW_STATE_H +#define BRW_STATE_H + +#include "brw_context.h" + + +const struct brw_tracked_state brw_blend_constant_color; +const struct brw_tracked_state brw_cc_unit; +const struct brw_tracked_state brw_cc_vp; +const struct brw_tracked_state brw_check_fallback; +const struct brw_tracked_state brw_clip_prog; +const struct brw_tracked_state brw_clip_unit; +const struct brw_tracked_state brw_constant_buffer_state; +const struct brw_tracked_state brw_constant_buffer; +const struct brw_tracked_state brw_curbe_offsets; +const struct brw_tracked_state brw_invarient_state; +const struct brw_tracked_state brw_gs_prog; +const struct brw_tracked_state brw_gs_unit; +const struct brw_tracked_state brw_drawing_rect; +const struct brw_tracked_state brw_line_stipple; +const struct brw_tracked_state brw_pipelined_state_pointers; +const struct brw_tracked_state brw_binding_table_pointers; +const struct brw_tracked_state brw_depthbuffer; +const struct brw_tracked_state brw_polygon_stipple_offset; +const struct brw_tracked_state brw_polygon_stipple; +const struct brw_tracked_state brw_program_parameters; +const struct brw_tracked_state brw_recalculate_urb_fence; +const struct brw_tracked_state brw_sf_prog; +const struct brw_tracked_state brw_sf_unit; +const struct brw_tracked_state brw_sf_vp; +const struct brw_tracked_state brw_state_base_address; +const struct brw_tracked_state brw_urb_fence; +const struct brw_tracked_state brw_vertex_state; +const struct brw_tracked_state brw_vs_prog; +const struct brw_tracked_state brw_vs_unit; +const struct brw_tracked_state brw_wm_input_sizes; +const struct brw_tracked_state brw_wm_prog; +const struct brw_tracked_state brw_wm_samplers; +const struct brw_tracked_state brw_wm_surfaces; +const struct brw_tracked_state brw_wm_unit; + +const struct brw_tracked_state brw_psp_urb_cbs; + +const struct brw_tracked_state brw_active_vertprog; +const struct brw_tracked_state brw_tnl_vertprog; +const struct brw_tracked_state brw_pipe_control; + +const struct brw_tracked_state brw_clear_surface_cache; +const struct brw_tracked_state brw_clear_batch_cache; + +/*********************************************************************** + * brw_state_cache.c + */ +GLuint brw_cache_data(struct brw_cache *cache, + const void *data ); + +GLuint brw_cache_data_sz(struct brw_cache *cache, + const void *data, + GLuint data_sz); + +GLuint brw_upload_cache( struct brw_cache *cache, + const void *key, + GLuint key_sz, + const void *data, + GLuint data_sz, + const void *aux, + void *aux_return ); + +GLboolean brw_search_cache( struct brw_cache *cache, + const void *key, + GLuint key_size, + void *aux_return, + GLuint *offset_return); + +void brw_init_caches( struct brw_context *brw ); +void brw_destroy_caches( struct brw_context *brw ); + +/*********************************************************************** + * brw_state_batch.c + */ +#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), 0) +#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) ) + +GLboolean brw_cached_batch_struct( struct brw_context *brw, + const void *data, + GLuint sz ); + +void brw_destroy_batch_cache( struct brw_context *brw ); + + +/*********************************************************************** + * brw_state_pool.c + */ +void brw_init_pools( struct brw_context *brw ); +void brw_destroy_pools( struct brw_context *brw ); + +GLboolean brw_pool_alloc( struct brw_mem_pool *pool, + GLuint size, + GLuint alignment, + GLuint *offset_return); + +void brw_pool_fence( struct brw_context *brw, + struct brw_mem_pool *pool, + GLuint fence ); + + +void brw_pool_check_wrap( struct brw_context *brw, + struct brw_mem_pool *pool ); + +void brw_clear_all_caches( struct brw_context *brw ); +void brw_invalidate_pools( struct brw_context *brw ); +void brw_clear_batch_cache_flush( struct brw_context *brw ); + +#endif diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c new file mode 100644 index 00000000000..909b0acd121 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -0,0 +1,122 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + + +#include "brw_state.h" +#include "brw_aub.h" +#include "intel_batchbuffer.h" +#include "imports.h" + + + +/* A facility similar to the data caching code above, which aims to + * prevent identical commands being issued repeatedly. + */ +GLboolean brw_cached_batch_struct( struct brw_context *brw, + const void *data, + GLuint sz ) +{ + struct brw_cached_batch_item *item = brw->cached_batch_items; + struct header *newheader = (struct header *)data; + + if (brw->emit_state_always) { + intel_batchbuffer_data(brw->intel.batch, data, sz, 0); + return GL_TRUE; + } + + while (item) { + if (item->header->opcode == newheader->opcode) { + if (item->sz == sz && memcmp(item->header, newheader, sz) == 0) + return GL_FALSE; + if (item->sz != sz) { + _mesa_free(item->header); + item->header = _mesa_malloc(sz); + item->sz = sz; + } + goto emit; + } + item = item->next; + } + + assert(!item); + item = CALLOC_STRUCT(brw_cached_batch_item); + item->header = _mesa_malloc(sz); + item->sz = sz; + item->next = brw->cached_batch_items; + brw->cached_batch_items = item; + + emit: + memcpy(item->header, newheader, sz); + intel_batchbuffer_data(brw->intel.batch, data, sz, 0); + return GL_TRUE; +} + +static void clear_batch_cache( struct brw_context *brw ) +{ + struct brw_cached_batch_item *item = brw->cached_batch_items; + + while (item) { + struct brw_cached_batch_item *next = item->next; + free((void *)item->header); + free(item); + item = next; + } + + brw->cached_batch_items = NULL; + + + brw_clear_all_caches(brw); + + bmReleaseBuffers(&brw->intel); + + brw_invalidate_pools(brw); +} + +void brw_clear_batch_cache_flush( struct brw_context *brw ) +{ + clear_batch_cache(brw); + + brw->wrap = 0; + +/* brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */ + + brw->state.dirty.mesa |= ~0; + brw->state.dirty.brw |= ~0; + brw->state.dirty.cache |= ~0; +} + + + +void brw_destroy_batch_cache( struct brw_context *brw ) +{ + clear_batch_cache(brw); +} diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c new file mode 100644 index 00000000000..71c6938f9a3 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -0,0 +1,469 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_state.h" +#include "brw_aub.h" +#include "intel_batchbuffer.h" +#include "imports.h" + +/* XXX: Fixme - have to include these to get the sizes of the prog_key + * structs: + */ +#include "brw_wm.h" +#include "brw_vs.h" +#include "brw_clip.h" +#include "brw_sf.h" +#include "brw_gs.h" + + +/*********************************************************************** + * Check cache for uploaded version of struct, else upload new one. + * Fail when memory is exhausted. + * + * XXX: FIXME: Currently search is so slow it would be quicker to + * regenerate the data every time... + */ + +static GLuint hash_key( const void *key, GLuint key_size ) +{ + GLuint *ikey = (GLuint *)key; + GLuint hash = 0, i; + + assert(key_size % 4 == 0); + + /* I'm sure this can be improved on: + */ + for (i = 0; i < key_size/4; i++) + hash ^= ikey[i]; + + return hash; +} + +static struct brw_cache_item *search_cache( struct brw_cache *cache, + GLuint hash, + const void *key, + GLuint key_size) +{ + struct brw_cache_item *c; + + for (c = cache->items[hash % cache->size]; c; c = c->next) { + if (c->hash == hash && + c->key_size == key_size && + memcmp(c->key, key, key_size) == 0) + return c; + } + + return NULL; +} + + +static void rehash( struct brw_cache *cache ) +{ + struct brw_cache_item **items; + struct brw_cache_item *c, *next; + GLuint size, i; + + size = cache->size * 3; + items = (struct brw_cache_item**) _mesa_malloc(size * sizeof(*items)); + _mesa_memset(items, 0, size * sizeof(*items)); + + for (i = 0; i < cache->size; i++) + for (c = cache->items[i]; c; c = next) { + next = c->next; + c->next = items[c->hash % size]; + items[c->hash % size] = c; + } + + FREE(cache->items); + cache->items = items; + cache->size = size; +} + + +GLboolean brw_search_cache( struct brw_cache *cache, + const void *key, + GLuint key_size, + void *aux_return, + GLuint *offset_return) +{ + struct brw_cache_item *item; + GLuint addr = 0; + GLuint hash = hash_key(key, key_size); + + item = search_cache(cache, hash, key, key_size); + + if (item) { + if (aux_return) + *(void **)aux_return = (void *)((char *)item->key + item->key_size); + + *offset_return = addr = item->offset; + } + + if (item == NULL || addr != cache->last_addr) { + cache->brw->state.dirty.cache |= 1<<cache->id; + cache->last_addr = addr; + } + + return item != NULL; +} + +GLuint brw_upload_cache( struct brw_cache *cache, + const void *key, + GLuint key_size, + const void *data, + GLuint data_size, + const void *aux, + void *aux_return ) +{ + GLuint offset; + struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); + GLuint hash = hash_key(key, key_size); + void *tmp = _mesa_malloc(key_size + cache->aux_size); + + if (!brw_pool_alloc(cache->pool, data_size, 6, &offset)) { + /* Should not be possible: + */ + _mesa_printf("brw_pool_alloc failed\n"); + exit(1); + } + + memcpy(tmp, key, key_size); + + if (cache->aux_size) + memcpy(tmp+key_size, aux, cache->aux_size); + + item->key = tmp; + item->hash = hash; + item->key_size = key_size; + item->offset = offset; + item->data_size = data_size; + + if (++cache->n_items > cache->size * 1.5) + rehash(cache); + + hash %= cache->size; + item->next = cache->items[hash]; + cache->items[hash] = item; + + if (aux_return) { + assert(cache->aux_size); + *(void **)aux_return = (void *)((char *)item->key + item->key_size); + } + + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("upload %s: %d bytes to pool buffer %d offset %x\n", + cache->name, + data_size, + cache->pool->buffer, + offset); + + /* Copy data to the buffer: + */ + bmBufferSubDataAUB(&cache->brw->intel, + cache->pool->buffer, + offset, + data_size, + data, + cache->aub_type, + cache->aub_sub_type); + + + cache->brw->state.dirty.cache |= 1<<cache->id; + cache->last_addr = offset; + + return offset; +} + +/* This doesn't really work with aux data. Use search/upload instead + */ +GLuint brw_cache_data_sz(struct brw_cache *cache, + const void *data, + GLuint data_size) +{ + GLuint addr; + + if (!brw_search_cache(cache, data, data_size, NULL, &addr)) { + addr = brw_upload_cache(cache, + data, data_size, + data, data_size, + NULL, NULL); + } + + return addr; +} + +GLuint brw_cache_data(struct brw_cache *cache, + const void *data) +{ + return brw_cache_data_sz(cache, data, cache->key_size); +} + + + + + +static void brw_init_cache( struct brw_context *brw, + const char *name, + GLuint id, + GLuint key_size, + GLuint aux_size, + GLuint aub_type, + GLuint aub_sub_type ) +{ + struct brw_cache *cache = &brw->cache[id]; + cache->brw = brw; + cache->id = id; + cache->name = name; + cache->items = NULL; + + cache->size = 7; + cache->n_items = 0; + cache->items = (struct brw_cache_item **) + _mesa_calloc(cache->size * + sizeof(struct brw_cache_item)); + + + cache->key_size = key_size; + cache->aux_size = aux_size; + cache->aub_type = aub_type; + cache->aub_sub_type = aub_sub_type; + switch (aub_type) { + case DW_GENERAL_STATE: cache->pool = &brw->pool[BRW_GS_POOL]; break; + case DW_SURFACE_STATE: cache->pool = &brw->pool[BRW_SS_POOL]; break; + default: assert(0); break; + } +} + +void brw_init_caches( struct brw_context *brw ) +{ + + brw_init_cache(brw, + "CC_VP", + BRW_CC_VP, + sizeof(struct brw_cc_viewport), + 0, + DW_GENERAL_STATE, + DWGS_COLOR_CALC_VIEWPORT_STATE); + + brw_init_cache(brw, + "CC_UNIT", + BRW_CC_UNIT, + sizeof(struct brw_cc_unit_state), + 0, + DW_GENERAL_STATE, + DWGS_COLOR_CALC_STATE); + + brw_init_cache(brw, + "WM_PROG", + BRW_WM_PROG, + sizeof(struct brw_wm_prog_key), + sizeof(struct brw_wm_prog_data), + DW_GENERAL_STATE, + DWGS_KERNEL_INSTRUCTIONS); + + brw_init_cache(brw, + "SAMPLER_DEFAULT_COLOR", + BRW_SAMPLER_DEFAULT_COLOR, + sizeof(struct brw_sampler_default_color), + 0, + DW_GENERAL_STATE, + DWGS_SAMPLER_DEFAULT_COLOR); + + brw_init_cache(brw, + "SAMPLER", + BRW_SAMPLER, + 0, /* variable key/data size */ + 0, + DW_GENERAL_STATE, + DWGS_SAMPLER_STATE); + + brw_init_cache(brw, + "WM_UNIT", + BRW_WM_UNIT, + sizeof(struct brw_wm_unit_state), + 0, + DW_GENERAL_STATE, + DWGS_WINDOWER_IZ_STATE); + + brw_init_cache(brw, + "SF_PROG", + BRW_SF_PROG, + sizeof(struct brw_sf_prog_key), + sizeof(struct brw_sf_prog_data), + DW_GENERAL_STATE, + DWGS_KERNEL_INSTRUCTIONS); + + brw_init_cache(brw, + "SF_VP", + BRW_SF_VP, + sizeof(struct brw_sf_viewport), + 0, + DW_GENERAL_STATE, + DWGS_STRIPS_FANS_VIEWPORT_STATE); + + brw_init_cache(brw, + "SF_UNIT", + BRW_SF_UNIT, + sizeof(struct brw_sf_unit_state), + 0, + DW_GENERAL_STATE, + DWGS_STRIPS_FANS_STATE); + + brw_init_cache(brw, + "VS_UNIT", + BRW_VS_UNIT, + sizeof(struct brw_vs_unit_state), + 0, + DW_GENERAL_STATE, + DWGS_VERTEX_SHADER_STATE); + + brw_init_cache(brw, + "VS_PROG", + BRW_VS_PROG, + sizeof(struct brw_vs_prog_key), + sizeof(struct brw_vs_prog_data), + DW_GENERAL_STATE, + DWGS_KERNEL_INSTRUCTIONS); + + brw_init_cache(brw, + "CLIP_UNIT", + BRW_CLIP_UNIT, + sizeof(struct brw_clip_unit_state), + 0, + DW_GENERAL_STATE, + DWGS_CLIPPER_STATE); + + brw_init_cache(brw, + "CLIP_PROG", + BRW_CLIP_PROG, + sizeof(struct brw_clip_prog_key), + sizeof(struct brw_clip_prog_data), + DW_GENERAL_STATE, + DWGS_KERNEL_INSTRUCTIONS); + + brw_init_cache(brw, + "GS_UNIT", + BRW_GS_UNIT, + sizeof(struct brw_gs_unit_state), + 0, + DW_GENERAL_STATE, + DWGS_GEOMETRY_SHADER_STATE); + + brw_init_cache(brw, + "GS_PROG", + BRW_GS_PROG, + sizeof(struct brw_gs_prog_key), + sizeof(struct brw_gs_prog_data), + DW_GENERAL_STATE, + DWGS_KERNEL_INSTRUCTIONS); + + brw_init_cache(brw, + "SS_SURFACE", + BRW_SS_SURFACE, + sizeof(struct brw_surface_state), + 0, + DW_SURFACE_STATE, + DWSS_SURFACE_STATE); + + brw_init_cache(brw, + "SS_SURF_BIND", + BRW_SS_SURF_BIND, + sizeof(struct brw_surface_binding_table), + 0, + DW_SURFACE_STATE, + DWSS_BINDING_TABLE_STATE); +} + + +/* When we lose hardware context, need to invalidate the surface cache + * as these structs must be explicitly re-uploaded. They are subject + * to fixup by the memory manager as they contain absolute agp + * offsets, so we need to ensure there is a fresh version of the + * struct available to receive the fixup. + * + * XXX: Need to ensure that there aren't two versions of a surface or + * bufferobj with different backing data active in the same buffer at + * once? Otherwise the cache could confuse them. Maybe better not to + * cache at all? + * + * --> Isn't this the same as saying need to ensure batch is flushed + * before new data is uploaded to an existing buffer? We + * already try to make sure of that. + */ +static void clear_cache( struct brw_cache *cache ) +{ + struct brw_cache_item *c, *next; + GLuint i; + + for (i = 0; i < cache->size; i++) { + for (c = cache->items[i]; c; c = next) { + next = c->next; + free((void *)c->key); + free(c); + } + cache->items[i] = NULL; + } + + cache->n_items = 0; +} + +void brw_clear_all_caches( struct brw_context *brw ) +{ + GLint i; + + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("%s\n", __FUNCTION__); + + for (i = 0; i < BRW_MAX_CACHE; i++) + clear_cache(&brw->cache[i]); + + if (brw->curbe.last_buf) { + _mesa_free(brw->curbe.last_buf); + brw->curbe.last_buf = NULL; + } + + brw->state.dirty.mesa |= ~0; + brw->state.dirty.brw |= ~0; + brw->state.dirty.cache |= ~0; +} + + + + + +void brw_destroy_caches( struct brw_context *brw ) +{ + GLuint i; + + for (i = 0; i < BRW_MAX_CACHE; i++) + clear_cache(&brw->cache[i]); +} diff --git a/src/mesa/drivers/dri/i965/brw_state_pool.c b/src/mesa/drivers/dri/i965/brw_state_pool.c new file mode 100644 index 00000000000..b9926f2a5d7 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_state_pool.c @@ -0,0 +1,154 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_state.h" +#include "imports.h" + +#include "intel_ioctl.h" +#include "bufmgr.h" + +GLboolean brw_pool_alloc( struct brw_mem_pool *pool, + GLuint size, + GLuint align, + GLuint *offset_return) +{ + GLuint align_mask = (1<<align)-1; + GLuint fixup = ((pool->offset + align_mask) & ~align_mask) - pool->offset; + + size = (size + 3) & ~3; + + if (pool->offset + fixup + size >= pool->size) { + _mesa_printf("%s failed\n", __FUNCTION__); + assert(0); + exit(0); + } + + pool->offset += fixup; + *offset_return = pool->offset; + pool->offset += size; + + return GL_TRUE; +} + +static +void brw_invalidate_pool( struct intel_context *intel, + struct brw_mem_pool *pool ) +{ + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("\n\n\n %s \n\n\n", __FUNCTION__); + + bmBufferData(intel, + pool->buffer, + pool->size, + NULL, + 0); + + pool->offset = 0; + + brw_clear_all_caches(pool->brw); +} + +static void brw_invalidate_pool_cb( struct intel_context *intel, void *ptr ) +{ + struct brw_mem_pool *pool = (struct brw_mem_pool *) ptr; + + pool->offset = 0; + brw_clear_all_caches(pool->brw); +} + + + +static void brw_init_pool( struct brw_context *brw, + GLuint pool_id, + GLuint size ) +{ + struct brw_mem_pool *pool = &brw->pool[pool_id]; + + pool->size = size; + pool->brw = brw; + + bmGenBuffers(&brw->intel, "pool", 1, &pool->buffer, 12); + + /* Also want to say not to wait on fences when data is presented + */ + bmBufferSetInvalidateCB(&brw->intel, pool->buffer, + brw_invalidate_pool_cb, + pool, + GL_TRUE); + + bmBufferData(&brw->intel, + pool->buffer, + pool->size, + NULL, + 0); + +} + +static void brw_destroy_pool( struct brw_context *brw, + GLuint pool_id ) +{ + struct brw_mem_pool *pool = &brw->pool[pool_id]; + + bmDeleteBuffers(&brw->intel, 1, &pool->buffer); +} + + +void brw_pool_check_wrap( struct brw_context *brw, + struct brw_mem_pool *pool ) +{ + if (pool->offset > (pool->size * 3) / 4) { + if (brw->intel.aub_file) + brw->intel.aub_wrap = 1; + else + brw->state.dirty.brw |= BRW_NEW_CONTEXT; + } + +} + +void brw_init_pools( struct brw_context *brw ) +{ + brw_init_pool(brw, BRW_GS_POOL, 0x80000); + brw_init_pool(brw, BRW_SS_POOL, 0x80000); +} + +void brw_destroy_pools( struct brw_context *brw ) +{ + brw_destroy_pool(brw, BRW_GS_POOL); + brw_destroy_pool(brw, BRW_SS_POOL); +} + + +void brw_invalidate_pools( struct brw_context *brw ) +{ + brw_invalidate_pool(&brw->intel, &brw->pool[BRW_GS_POOL]); + brw_invalidate_pool(&brw->intel, &brw->pool[BRW_SS_POOL]); +} diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c new file mode 100644 index 00000000000..92c07c29624 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -0,0 +1,264 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + + +#include "brw_context.h" +#include "brw_state.h" +#include "bufmgr.h" +#include "intel_batchbuffer.h" + +/* This is used to initialize brw->state.atoms[]. We could use this + * list directly except for a single atom, brw_constant_buffer, which + * has a .dirty value which changes according to the parameters of the + * current fragment and vertex programs, and so cannot be a static + * value. + */ +const struct brw_tracked_state *atoms[] = +{ + &brw_check_fallback, + + &brw_tnl_vertprog, + &brw_active_vertprog, + &brw_wm_input_sizes, + &brw_vs_prog, + &brw_gs_prog, + &brw_clip_prog, + &brw_sf_prog, + &brw_wm_prog, + + /* Once all the programs are done, we know how large urb entry + * sizes need to be and can decide if we need to change the urb + * layout. + */ + &brw_curbe_offsets, + &brw_recalculate_urb_fence, + + + &brw_cc_vp, + &brw_cc_unit, + + &brw_wm_surfaces, /* must do before samplers */ + &brw_wm_samplers, + + &brw_wm_unit, + &brw_sf_vp, + &brw_sf_unit, + &brw_vs_unit, /* always required, enabled or not */ + &brw_clip_unit, + &brw_gs_unit, + + /* Command packets: + */ + &brw_invarient_state, + &brw_state_base_address, + &brw_pipe_control, + + &brw_binding_table_pointers, + &brw_blend_constant_color, + + &brw_drawing_rect, + &brw_depthbuffer, + + &brw_polygon_stipple, + &brw_polygon_stipple_offset, + + &brw_line_stipple, + + /* Ordering of the commands below is documented as fixed. + */ +#if 0 + &brw_pipelined_state_pointers, + &brw_urb_fence, + &brw_constant_buffer_state, +#else + &brw_psp_urb_cbs, +#endif + + + NULL, /* brw_constant_buffer */ +}; + + +void brw_init_state( struct brw_context *brw ) +{ + GLuint i; + + brw_init_pools(brw); + brw_init_caches(brw); + + brw->state.atoms = _mesa_malloc(sizeof(atoms)); + brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms); + _mesa_memcpy(brw->state.atoms, atoms, sizeof(atoms)); + + /* Patch in a pointer to the dynamic state atom: + */ + for (i = 0; i < brw->state.nr_atoms; i++) + if (brw->state.atoms[i] == NULL) + brw->state.atoms[i] = &brw->curbe.tracked_state; + + _mesa_memcpy(&brw->curbe.tracked_state, + &brw_constant_buffer, + sizeof(brw_constant_buffer)); +} + + +void brw_destroy_state( struct brw_context *brw ) +{ + if (brw->state.atoms) { + _mesa_free(brw->state.atoms); + brw->state.atoms = NULL; + } + + brw_destroy_caches(brw); + brw_destroy_batch_cache(brw); + brw_destroy_pools(brw); +} + +/*********************************************************************** + */ + +static GLboolean check_state( const struct brw_state_flags *a, + const struct brw_state_flags *b ) +{ + return ((a->mesa & b->mesa) || + (a->brw & b->brw) || + (a->cache & b->cache)); +} + +static void accumulate_state( struct brw_state_flags *a, + const struct brw_state_flags *b ) +{ + a->mesa |= b->mesa; + a->brw |= b->brw; + a->cache |= b->cache; +} + + +static void xor_states( struct brw_state_flags *result, + const struct brw_state_flags *a, + const struct brw_state_flags *b ) +{ + result->mesa = a->mesa ^ b->mesa; + result->brw = a->brw ^ b->brw; + result->cache = a->cache ^ b->cache; +} + + +/*********************************************************************** + * Emit all state: + */ +void brw_validate_state( struct brw_context *brw ) +{ + struct brw_state_flags *state = &brw->state.dirty; + GLuint i; + + state->mesa |= brw->intel.NewGLState; + brw->intel.NewGLState = 0; + + if (brw->wrap) + state->brw |= BRW_NEW_CONTEXT; + + if (brw->emit_state_always) { + state->mesa |= ~0; + state->brw |= ~0; + } + + /* texenv program needs to notify us somehow when this happens: + * Some confusion about which state flag should represent this change. + */ + if (brw->fragment_program != brw->attribs.FragmentProgram->_Current) { + brw->fragment_program = brw->attribs.FragmentProgram->_Current; + brw->state.dirty.mesa |= _NEW_PROGRAM; + brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; + } + + + if (state->mesa == 0 && + state->cache == 0 && + state->brw == 0) + return; + + if (brw->state.dirty.brw & BRW_NEW_CONTEXT) + brw_clear_batch_cache_flush(brw); + + + /* Make an early reference to the state pools, as we don't cope + * well with them being evicted from here down. + */ + (void)bmBufferOffset(&brw->intel, brw->pool[BRW_GS_POOL].buffer); + (void)bmBufferOffset(&brw->intel, brw->pool[BRW_SS_POOL].buffer); + (void)bmBufferOffset(&brw->intel, brw->intel.batch->buffer); + + if (INTEL_DEBUG) { + /* Debug version which enforces various sanity checks on the + * state flags which are generated and checked to help ensure + * state atoms are ordered correctly in the list. + */ + struct brw_state_flags examined, prev; + _mesa_memset(&examined, 0, sizeof(examined)); + prev = *state; + + for (i = 0; i < brw->state.nr_atoms; i++) { + const struct brw_tracked_state *atom = brw->state.atoms[i]; + struct brw_state_flags generated; + + assert(atom->dirty.mesa || + atom->dirty.brw || + atom->dirty.cache); + assert(atom->update); + + if (check_state(state, &atom->dirty)) { + brw->state.atoms[i]->update( brw ); + +/* emit_foo(brw); */ + } + + accumulate_state(&examined, &atom->dirty); + + /* generated = (prev ^ state) + * if (examined & generated) + * fail; + */ + xor_states(&generated, &prev, state); + assert(!check_state(&examined, &generated)); + prev = *state; + } + } + else { + for (i = 0; i < Elements(atoms); i++) { + if (check_state(state, &brw->state.atoms[i]->dirty)) + brw->state.atoms[i]->update( brw ); + } + } + + memset(state, 0, sizeof(*state)); +} diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h new file mode 100644 index 00000000000..25acdcfe947 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -0,0 +1,1330 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#ifndef BRW_STRUCTS_H +#define BRW_STRUCTS_H + +/* Command packets: + */ +struct header +{ + GLuint length:16; + GLuint opcode:16; +}; + + +union header_union +{ + struct header bits; + GLuint dword; +}; + +struct brw_3d_control +{ + struct + { + GLuint length:8; + GLuint notify_enable:1; + GLuint pad:3; + GLuint wc_flush_enable:1; + GLuint depth_stall_enable:1; + GLuint operation:2; + GLuint opcode:16; + } header; + + struct + { + GLuint pad:2; + GLuint dest_addr_type:1; + GLuint dest_addr:29; + } dest; + + GLuint dword2; + GLuint dword3; +}; + + +struct brw_3d_primitive +{ + struct + { + GLuint length:8; + GLuint pad:2; + GLuint topology:5; + GLuint indexed:1; + GLuint opcode:16; + } header; + + GLuint verts_per_instance; + GLuint start_vert_location; + GLuint instance_count; + GLuint start_instance_location; + GLuint base_vert_location; +}; + +/* These seem to be passed around as function args, so it works out + * better to keep them as #defines: + */ +#define BRW_FLUSH_READ_CACHE 0x1 +#define BRW_FLUSH_STATE_CACHE 0x2 +#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4 +#define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8 + +struct brw_mi_flush +{ + GLuint flags:4; + GLuint pad:12; + GLuint opcode:16; +}; + +struct brw_vf_statistics +{ + GLuint statistics_enable:1; + GLuint pad:15; + GLuint opcode:16; +}; + + + +struct brw_binding_table_pointers +{ + struct header header; + GLuint vs; + GLuint gs; + GLuint clp; + GLuint sf; + GLuint wm; +}; + + +struct brw_blend_constant_color +{ + struct header header; + GLfloat blend_constant_color[4]; +}; + + +struct brw_depthbuffer +{ + union header_union header; + + union { + struct { + GLuint pitch:18; + GLuint format:3; + GLuint pad:4; + GLuint depth_offset_disable:1; + GLuint tile_walk:1; + GLuint tiled_surface:1; + GLuint pad2:1; + GLuint surface_type:3; + } bits; + GLuint dword; + } dword1; + + GLuint dword2_base_addr; + + union { + struct { + GLuint pad:1; + GLuint mipmap_layout:1; + GLuint lod:4; + GLuint width:13; + GLuint height:13; + } bits; + GLuint dword; + } dword3; + + union { + struct { + GLuint pad:12; + GLuint min_array_element:9; + GLuint depth:11; + } bits; + GLuint dword; + } dword4; +}; + +struct brw_drawrect +{ + struct header header; + GLuint xmin:16; + GLuint ymin:16; + GLuint xmax:16; + GLuint ymax:16; + GLuint xorg:16; + GLuint yorg:16; +}; + + + + +struct brw_global_depth_offset_clamp +{ + struct header header; + GLfloat depth_offset_clamp; +}; + +struct brw_indexbuffer +{ + union { + struct + { + GLuint length:8; + GLuint index_format:2; + GLuint cut_index_enable:1; + GLuint pad:5; + GLuint opcode:16; + } bits; + GLuint dword; + + } header; + + GLuint buffer_start; + GLuint buffer_end; +}; + + +struct brw_line_stipple +{ + struct header header; + + struct + { + GLuint pattern:16; + GLuint pad:16; + } bits0; + + struct + { + GLuint repeat_count:9; + GLuint pad:7; + GLuint inverse_repeat_count:16; + } bits1; +}; + + +struct brw_pipelined_state_pointers +{ + struct header header; + + struct { + GLuint pad:5; + GLuint offset:27; + } vs; + + struct + { + GLuint enable:1; + GLuint pad:4; + GLuint offset:27; + } gs; + + struct + { + GLuint enable:1; + GLuint pad:4; + GLuint offset:27; + } clp; + + struct + { + GLuint pad:5; + GLuint offset:27; + } sf; + + struct + { + GLuint pad:5; + GLuint offset:27; + } wm; + + struct + { + GLuint pad:5; + GLuint offset:27; /* KW: check me! */ + } cc; +}; + + +struct brw_polygon_stipple_offset +{ + struct header header; + + struct { + GLuint y_offset:5; + GLuint pad:3; + GLuint x_offset:5; + GLuint pad0:19; + } bits0; +}; + + + +struct brw_polygon_stipple +{ + struct header header; + GLuint stipple[32]; +}; + + + +struct brw_pipeline_select +{ + struct + { + GLuint pipeline_select:1; + GLuint pad:15; + GLuint opcode:16; + } header; +}; + + +struct brw_pipe_control +{ + struct + { + GLuint length:8; + GLuint notify_enable:1; + GLuint pad:2; + GLuint instruction_state_cache_flush_enable:1; + GLuint write_cache_flush_enable:1; + GLuint depth_stall_enable:1; + GLuint post_sync_operation:2; + + GLuint opcode:16; + } header; + + struct + { + GLuint pad:2; + GLuint dest_addr_type:1; + GLuint dest_addr:29; + } bits1; + + GLuint data0; + GLuint data1; +}; + + +struct brw_urb_fence +{ + struct + { + GLuint length:8; + GLuint vs_realloc:1; + GLuint gs_realloc:1; + GLuint clp_realloc:1; + GLuint sf_realloc:1; + GLuint vfe_realloc:1; + GLuint cs_realloc:1; + GLuint pad:2; + GLuint opcode:16; + } header; + + struct + { + GLuint vs_fence:10; + GLuint gs_fence:10; + GLuint clp_fence:10; + GLuint pad:2; + } bits0; + + struct + { + GLuint sf_fence:10; + GLuint vf_fence:10; + GLuint cs_fence:10; + GLuint pad:2; + } bits1; +}; + +struct brw_constant_buffer_state /* previously brw_command_streamer */ +{ + struct header header; + + struct + { + GLuint nr_urb_entries:3; + GLuint pad:1; + GLuint urb_entry_size:5; + GLuint pad0:23; + } bits0; +}; + +struct brw_constant_buffer +{ + struct + { + GLuint length:8; + GLuint valid:1; + GLuint pad:7; + GLuint opcode:16; + } header; + + struct + { + GLuint buffer_length:6; + GLuint buffer_address:26; + } bits0; +}; + +struct brw_state_base_address +{ + struct header header; + + struct + { + GLuint modify_enable:1; + GLuint pad:4; + GLuint general_state_address:27; + } bits0; + + struct + { + GLuint modify_enable:1; + GLuint pad:4; + GLuint surface_state_address:27; + } bits1; + + struct + { + GLuint modify_enable:1; + GLuint pad:4; + GLuint indirect_object_state_address:27; + } bits2; + + struct + { + GLuint modify_enable:1; + GLuint pad:11; + GLuint general_state_upper_bound:20; + } bits3; + + struct + { + GLuint modify_enable:1; + GLuint pad:11; + GLuint indirect_object_state_upper_bound:20; + } bits4; +}; + +struct brw_state_prefetch +{ + struct header header; + + struct + { + GLuint prefetch_count:3; + GLuint pad:3; + GLuint prefetch_pointer:26; + } bits0; +}; + +struct brw_system_instruction_pointer +{ + struct header header; + + struct + { + GLuint pad:4; + GLuint system_instruction_pointer:28; + } bits0; +}; + + + + +/* State structs for the various fixed function units: + */ + + +struct thread0 +{ + GLuint pad0:1; + GLuint grf_reg_count:3; + GLuint pad1:2; + GLuint kernel_start_pointer:26; +}; + +struct thread1 +{ + GLuint ext_halt_exception_enable:1; + GLuint sw_exception_enable:1; + GLuint mask_stack_exception_enable:1; + GLuint timeout_exception_enable:1; + GLuint illegal_op_exception_enable:1; + GLuint pad0:3; + GLuint depth_coef_urb_read_offset:6; /* WM only */ + GLuint pad1:2; + GLuint floating_point_mode:1; + GLuint thread_priority:1; + GLuint binding_table_entry_count:8; + GLuint pad3:5; + GLuint single_program_flow:1; +}; + +struct thread2 +{ + GLuint per_thread_scratch_space:4; + GLuint pad0:6; + GLuint scratch_space_base_pointer:22; +}; + + +struct thread3 +{ + GLuint dispatch_grf_start_reg:4; + GLuint urb_entry_read_offset:6; + GLuint pad0:1; + GLuint urb_entry_read_length:6; + GLuint pad1:1; + GLuint const_urb_entry_read_offset:6; + GLuint pad2:1; + GLuint const_urb_entry_read_length:6; + GLuint pad3:1; +}; + + + +struct brw_clip_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + GLuint pad0:9; + GLuint gs_output_stats:1; /* not always */ + GLuint stats_enable:1; + GLuint nr_urb_entries:7; + GLuint pad1:1; + GLuint urb_entry_allocation_size:5; + GLuint pad2:1; + GLuint max_threads:6; /* may be less */ + GLuint pad3:1; + } thread4; + + struct + { + GLuint pad0:13; + GLuint clip_mode:3; + GLuint userclip_enable_flags:8; + GLuint userclip_must_clip:1; + GLuint pad1:1; + GLuint guard_band_enable:1; + GLuint viewport_z_clip_enable:1; + GLuint viewport_xy_clip_enable:1; + GLuint vertex_position_space:1; + GLuint api_mode:1; + GLuint pad2:1; + } clip5; + + struct + { + GLuint pad0:5; + GLuint clipper_viewport_state_ptr:27; + } clip6; + + + GLfloat viewport_xmin; + GLfloat viewport_xmax; + GLfloat viewport_ymin; + GLfloat viewport_ymax; +}; + + + +struct brw_cc_unit_state +{ + struct + { + GLuint pad0:3; + GLuint bf_stencil_pass_depth_pass_op:3; + GLuint bf_stencil_pass_depth_fail_op:3; + GLuint bf_stencil_fail_op:3; + GLuint bf_stencil_func:3; + GLuint bf_stencil_enable:1; + GLuint pad1:2; + GLuint stencil_write_enable:1; + GLuint stencil_pass_depth_pass_op:3; + GLuint stencil_pass_depth_fail_op:3; + GLuint stencil_fail_op:3; + GLuint stencil_func:3; + GLuint stencil_enable:1; + } cc0; + + + struct + { + GLuint bf_stencil_ref:8; + GLuint stencil_write_mask:8; + GLuint stencil_test_mask:8; + GLuint stencil_ref:8; + } cc1; + + + struct + { + GLuint logicop_enable:1; + GLuint pad0:10; + GLuint depth_write_enable:1; + GLuint depth_test_function:3; + GLuint depth_test:1; + GLuint bf_stencil_write_mask:8; + GLuint bf_stencil_test_mask:8; + } cc2; + + + struct + { + GLuint pad0:8; + GLuint alpha_test_func:3; + GLuint alpha_test:1; + GLuint blend_enable:1; + GLuint ia_blend_enable:1; + GLuint pad1:1; + GLuint alpha_test_format:1; + GLuint pad2:16; + } cc3; + + struct + { + GLuint pad0:5; + GLuint cc_viewport_state_offset:27; + } cc4; + + struct + { + GLuint pad0:2; + GLuint ia_dest_blend_factor:5; + GLuint ia_src_blend_factor:5; + GLuint ia_blend_function:3; + GLuint statistics_enable:1; + GLuint logicop_func:4; + GLuint pad1:11; + GLuint dither_enable:1; + } cc5; + + struct + { + GLuint clamp_post_alpha_blend:1; + GLuint clamp_pre_alpha_blend:1; + GLuint clamp_range:2; + GLuint pad0:11; + GLuint y_dither_offset:2; + GLuint x_dither_offset:2; + GLuint dest_blend_factor:5; + GLuint src_blend_factor:5; + GLuint blend_function:3; + } cc6; + + struct { + union { + GLfloat f; + GLubyte ub[4]; + } alpha_ref; + } cc7; +}; + + + +struct brw_sf_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + GLuint pad0:10; + GLuint stats_enable:1; + GLuint nr_urb_entries:7; + GLuint pad1:1; + GLuint urb_entry_allocation_size:5; + GLuint pad2:1; + GLuint max_threads:6; + GLuint pad3:1; + } thread4; + + struct + { + GLuint front_winding:1; + GLuint viewport_transform:1; + GLuint pad0:3; + GLuint sf_viewport_state_offset:27; + } sf5; + + struct + { + GLuint pad0:9; + GLuint dest_org_vbias:4; + GLuint dest_org_hbias:4; + GLuint scissor:1; + GLuint disable_2x2_trifilter:1; + GLuint disable_zero_pix_trifilter:1; + GLuint point_rast_rule:2; + GLuint line_endcap_aa_region_width:2; + GLuint line_width:4; + GLuint fast_scissor_disable:1; + GLuint cull_mode:2; + GLuint aa_enable:1; + } sf6; + + struct + { + GLuint point_size:11; + GLuint use_point_size_state:1; + GLuint subpixel_precision:1; + GLuint sprite_point:1; + GLuint pad0:11; + GLuint trifan_pv:2; + GLuint linestrip_pv:2; + GLuint tristrip_pv:2; + GLuint line_last_pixel_enable:1; + } sf7; + +}; + + +struct brw_gs_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + GLuint pad0:10; + GLuint stats_enable:1; + GLuint nr_urb_entries:7; + GLuint pad1:1; + GLuint urb_entry_allocation_size:5; + GLuint pad2:1; + GLuint max_threads:1; + GLuint pad3:6; + } thread4; + + struct + { + GLuint sampler_count:3; + GLuint pad0:2; + GLuint sampler_state_pointer:27; + } gs5; + + + struct + { + GLuint max_vp_index:4; + GLuint pad0:26; + GLuint reorder_enable:1; + GLuint pad1:1; + } gs6; +}; + + +struct brw_vs_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + GLuint pad0:10; + GLuint stats_enable:1; + GLuint nr_urb_entries:7; + GLuint pad1:1; + GLuint urb_entry_allocation_size:5; + GLuint pad2:1; + GLuint max_threads:4; + GLuint pad3:3; + } thread4; + + struct + { + GLuint sampler_count:3; + GLuint pad0:2; + GLuint sampler_state_pointer:27; + } vs5; + + struct + { + GLuint vs_enable:1; + GLuint vert_cache_disable:1; + GLuint pad0:30; + } vs6; +}; + + +struct brw_wm_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct { + GLuint stats_enable:1; + GLuint pad0:1; + GLuint sampler_count:3; + GLuint sampler_state_pointer:27; + } wm4; + + struct + { + GLuint enable_8_pix:1; + GLuint enable_16_pix:1; + GLuint enable_32_pix:1; + GLuint pad0:7; + GLuint legacy_global_depth_bias:1; + GLuint line_stipple:1; + GLuint depth_offset:1; + GLuint polygon_stipple:1; + GLuint line_aa_region_width:2; + GLuint line_endcap_aa_region_width:2; + GLuint early_depth_test:1; + GLuint thread_dispatch_enable:1; + GLuint program_uses_depth:1; + GLuint program_computes_depth:1; + GLuint program_uses_killpixel:1; + GLuint legacy_line_rast: 1; + GLuint pad1:1; + GLuint max_threads:6; + GLuint pad2:1; + } wm5; + + GLfloat global_depth_offset_constant; + GLfloat global_depth_offset_scale; +}; + +struct brw_sampler_default_color { + GLfloat color[4]; +}; + +struct brw_sampler_state +{ + + struct + { + GLuint shadow_function:3; + GLuint lod_bias:11; + GLuint min_filter:3; + GLuint mag_filter:3; + GLuint mip_filter:2; + GLuint base_level:5; + GLuint pad:1; + GLuint lod_preclamp:1; + GLuint default_color_mode:1; + GLuint pad0:1; + GLuint disable:1; + } ss0; + + struct + { + GLuint r_wrap_mode:3; + GLuint t_wrap_mode:3; + GLuint s_wrap_mode:3; + GLuint pad:3; + GLuint max_lod:10; + GLuint min_lod:10; + } ss1; + + + struct + { + GLuint pad:5; + GLuint default_color_pointer:27; + } ss2; + + struct + { + GLuint pad:19; + GLuint max_aniso:3; + GLuint chroma_key_mode:1; + GLuint chroma_key_index:2; + GLuint chroma_key_enable:1; + GLuint monochrome_filter_width:3; + GLuint monochrome_filter_height:3; + } ss3; +}; + + +struct brw_clipper_viewport +{ + GLfloat xmin; + GLfloat xmax; + GLfloat ymin; + GLfloat ymax; +}; + +struct brw_cc_viewport +{ + GLfloat min_depth; + GLfloat max_depth; +}; + +struct brw_sf_viewport +{ + struct { + GLfloat m00; + GLfloat m11; + GLfloat m22; + GLfloat m30; + GLfloat m31; + GLfloat m32; + } viewport; + + struct { + GLshort xmin; + GLshort ymin; + GLshort xmax; + GLshort ymax; + } scissor; +}; + +/* Documented in the subsystem/shared-functions/sampler chapter... + */ +struct brw_surface_state +{ + struct { + GLuint cube_pos_z:1; + GLuint cube_neg_z:1; + GLuint cube_pos_y:1; + GLuint cube_neg_y:1; + GLuint cube_pos_x:1; + GLuint cube_neg_x:1; + GLuint pad:4; + GLuint mipmap_layout_mode:1; + GLuint vert_line_stride_ofs:1; + GLuint vert_line_stride:1; + GLuint color_blend:1; + GLuint writedisable_blue:1; + GLuint writedisable_green:1; + GLuint writedisable_red:1; + GLuint writedisable_alpha:1; + GLuint surface_format:9; + GLuint data_return_format:1; + GLuint pad0:1; + GLuint surface_type:3; + } ss0; + + struct { + GLuint base_addr; + } ss1; + + struct { + GLuint pad:2; + GLuint mip_count:4; + GLuint width:13; + GLuint height:13; + } ss2; + + struct { + GLuint tile_walk:1; + GLuint tiled_surface:1; + GLuint pad:1; + GLuint pitch:18; + GLuint depth:11; + } ss3; + + struct { + GLuint pad:19; + GLuint min_array_elt:9; + GLuint min_lod:4; + } ss4; +}; + + + +struct brw_vertex_buffer_state +{ + struct { + GLuint pitch:11; + GLuint pad:15; + GLuint access_type:1; + GLuint vb_index:5; + } vb0; + + GLuint start_addr; + GLuint max_index; +#if 1 + GLuint instance_data_step_rate; /* not included for sequential/random vertices? */ +#endif +}; + +#define BRW_VBP_MAX 17 + +struct brw_vb_array_state { + struct header header; + struct brw_vertex_buffer_state vb[BRW_VBP_MAX]; +}; + + +struct brw_vertex_element_state +{ + struct + { + GLuint src_offset:11; + GLuint pad:5; + GLuint src_format:9; + GLuint pad0:1; + GLuint valid:1; + GLuint vertex_buffer_index:5; + } ve0; + + struct + { + GLuint dst_offset:8; + GLuint pad:8; + GLuint vfcomponent3:4; + GLuint vfcomponent2:4; + GLuint vfcomponent1:4; + GLuint vfcomponent0:4; + } ve1; +}; + +#define BRW_VEP_MAX 18 + +struct brw_vertex_element_packet { + struct header header; + struct brw_vertex_element_state ve[BRW_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */ +}; + + +struct brw_urb_immediate { + GLuint opcode:4; + GLuint offset:6; + GLuint swizzle_control:2; + GLuint pad:1; + GLuint allocate:1; + GLuint used:1; + GLuint complete:1; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; +}; + +/* Instruction format for the execution units: + */ + +struct brw_instruction +{ + struct + { + GLuint opcode:7; + GLuint pad:1; + GLuint access_mode:1; + GLuint mask_control:1; + GLuint dependency_control:2; + GLuint compression_control:2; + GLuint thread_control:2; + GLuint predicate_control:4; + GLuint predicate_inverse:1; + GLuint execution_size:3; + GLuint destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */ + GLuint pad0:2; + GLuint debug_control:1; + GLuint saturate:1; + } header; + + union { + struct + { + GLuint dest_reg_file:2; + GLuint dest_reg_type:3; + GLuint src0_reg_file:2; + GLuint src0_reg_type:3; + GLuint src1_reg_file:2; + GLuint src1_reg_type:3; + GLuint pad:1; + GLuint dest_subreg_nr:5; + GLuint dest_reg_nr:8; + GLuint dest_horiz_stride:2; + GLuint dest_address_mode:1; + } da1; + + struct + { + GLuint dest_reg_file:2; + GLuint dest_reg_type:3; + GLuint src0_reg_file:2; + GLuint src0_reg_type:3; + GLuint pad:6; + GLint dest_indirect_offset:10; /* offset against the deref'd address reg */ + GLuint dest_subreg_nr:3; /* subnr for the address reg a0.x */ + GLuint dest_horiz_stride:2; + GLuint dest_address_mode:1; + } ia1; + + struct + { + GLuint dest_reg_file:2; + GLuint dest_reg_type:3; + GLuint src0_reg_file:2; + GLuint src0_reg_type:3; + GLuint src1_reg_file:2; + GLuint src1_reg_type:3; + GLuint pad0:1; + GLuint dest_writemask:4; + GLuint dest_subreg_nr:1; + GLuint dest_reg_nr:8; + GLuint pad1:2; + GLuint dest_address_mode:1; + } da16; + + struct + { + GLuint dest_reg_file:2; + GLuint dest_reg_type:3; + GLuint src0_reg_file:2; + GLuint src0_reg_type:3; + GLuint pad0:6; + GLuint dest_writemask:4; + GLint dest_indirect_offset:6; + GLuint dest_subreg_nr:3; + GLuint pad1:2; + GLuint dest_address_mode:1; + } ia16; + } bits1; + + + union { + struct + { + GLuint src0_subreg_nr:5; + GLuint src0_reg_nr:8; + GLuint src0_abs:1; + GLuint src0_negate:1; + GLuint src0_address_mode:1; + GLuint src0_horiz_stride:2; + GLuint src0_width:3; + GLuint src0_vert_stride:4; + GLuint flag_reg_nr:1; + GLuint pad:6; + } da1; + + struct + { + GLint src0_indirect_offset:10; + GLuint src0_subreg_nr:3; + GLuint src0_abs:1; + GLuint src0_negate:1; + GLuint src0_address_mode:1; + GLuint src0_horiz_stride:2; + GLuint src0_width:3; + GLuint src0_vert_stride:4; + GLuint flag_reg_nr:1; + GLuint pad:6; + } ia1; + + struct + { + GLuint src0_swz_x:2; + GLuint src0_swz_y:2; + GLuint src0_subreg_nr:1; + GLuint src0_reg_nr:8; + GLuint src0_abs:1; + GLuint src0_negate:1; + GLuint src0_address_mode:1; + GLuint src0_swz_z:2; + GLuint src0_swz_w:2; + GLuint pad0:1; + GLuint src0_vert_stride:4; + GLuint flag_reg_nr:1; + GLuint pad1:6; + } da16; + + struct + { + GLuint src0_swz_x:2; + GLuint src0_swz_y:2; + GLint src0_indirect_offset:6; + GLuint src0_subreg_nr:3; + GLuint src0_abs:1; + GLuint src0_negate:1; + GLuint src0_address_mode:1; + GLuint src0_swz_z:2; + GLuint src0_swz_w:2; + GLuint pad0:1; + GLuint src0_vert_stride:4; + GLuint flag_reg_nr:1; + GLuint pad1:6; + } ia16; + + } bits2; + + union + { + struct + { + GLuint src1_subreg_nr:5; + GLuint src1_reg_nr:8; + GLuint src1_abs:1; + GLuint src1_negate:1; + GLuint pad:1; + GLuint src1_horiz_stride:2; + GLuint src1_width:3; + GLuint src1_vert_stride:4; + GLuint pad0:7; + } da1; + + struct + { + GLuint src1_swz_x:2; + GLuint src1_swz_y:2; + GLuint src1_subreg_nr:1; + GLuint src1_reg_nr:8; + GLuint src1_abs:1; + GLuint src1_negate:1; + GLuint pad0:1; + GLuint src1_swz_z:2; + GLuint src1_swz_w:2; + GLuint pad1:1; + GLuint src1_vert_stride:4; + GLuint pad2:7; + } da16; + + struct + { + GLint src1_indirect_offset:10; + GLuint src1_subreg_nr:3; + GLuint src1_abs:1; + GLuint src1_negate:1; + GLuint pad0:1; + GLuint src1_horiz_stride:2; + GLuint src1_width:3; + GLuint src1_vert_stride:4; + GLuint flag_reg_nr:1; + GLuint pad1:6; + } ia1; + + struct + { + GLuint src1_swz_x:2; + GLuint src1_swz_y:2; + GLint src1_indirect_offset:6; + GLuint src1_subreg_nr:3; + GLuint src1_abs:1; + GLuint src1_negate:1; + GLuint pad0:1; + GLuint src1_swz_z:2; + GLuint src1_swz_w:2; + GLuint pad1:1; + GLuint src1_vert_stride:4; + GLuint flag_reg_nr:1; + GLuint pad2:6; + } ia16; + + + struct + { + GLint jump_count:16; /* note: signed */ + GLuint pop_count:4; + GLuint pad0:12; + } if_else; + + struct { + GLuint function:4; + GLuint int_type:1; + GLuint precision:1; + GLuint saturate:1; + GLuint data_type:1; + GLuint pad0:8; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; + } math; + + struct { + GLuint binding_table_index:8; + GLuint sampler:4; + GLuint return_format:2; + GLuint msg_type:2; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; + } sampler; + + struct brw_urb_immediate urb; + + struct { + GLuint binding_table_index:8; + GLuint msg_control:4; + GLuint msg_type:2; + GLuint target_cache:2; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; + } dp_read; + + struct { + GLuint binding_table_index:8; + GLuint msg_control:3; + GLuint pixel_scoreboard_clear:1; + GLuint msg_type:3; + GLuint send_commit_msg:1; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; + } dp_write; + + struct { + GLuint pad:16; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; + } generic; + + GLuint ud; + } bits3; +}; + + +#endif diff --git a/src/mesa/drivers/dri/i965/brw_tex.c b/src/mesa/drivers/dri/i965/brw_tex.c new file mode 100644 index 00000000000..d70b2ea87a5 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_tex.c @@ -0,0 +1,158 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "glheader.h" +#include "mtypes.h" +#include "imports.h" +#include "simple_list.h" +#include "enums.h" +#include "image.h" +#include "texstore.h" +#include "texformat.h" +#include "texmem.h" + +#include "intel_ioctl.h" +#include "brw_context.h" +#include "brw_defines.h" + + + + +static const struct gl_texture_format * +brwChooseTextureFormat( GLcontext *ctx, GLint internalFormat, + GLenum format, GLenum type ) +{ + switch ( internalFormat ) { + case 4: + case GL_RGBA: + case GL_COMPRESSED_RGBA: + case GL_RGBA8: + case GL_RGB10_A2: + case GL_RGBA12: + case GL_RGBA16: + case GL_RGBA4: + case GL_RGBA2: + case GL_RGB5_A1: + return &_mesa_texformat_argb8888; +/* return &_mesa_texformat_rgba8888_rev; */ + + case 3: + case GL_RGB: + case GL_COMPRESSED_RGB: + case GL_RGB8: + case GL_RGB10: + case GL_RGB12: + case GL_RGB16: + case GL_RGB5: + case GL_RGB4: + case GL_R3_G3_B2: +/* return &_mesa_texformat_rgb888; */ + return &_mesa_texformat_argb8888; + + case GL_ALPHA: + case GL_ALPHA4: + case GL_ALPHA8: + case GL_ALPHA12: + case GL_ALPHA16: + case GL_COMPRESSED_ALPHA: + return &_mesa_texformat_a8; + + case 1: + case GL_LUMINANCE: + case GL_LUMINANCE4: + case GL_LUMINANCE8: + case GL_LUMINANCE12: + case GL_LUMINANCE16: + case GL_COMPRESSED_LUMINANCE: + return &_mesa_texformat_l8; + + case 2: + case GL_LUMINANCE_ALPHA: + case GL_LUMINANCE4_ALPHA4: + case GL_LUMINANCE6_ALPHA2: + case GL_LUMINANCE8_ALPHA8: + case GL_LUMINANCE12_ALPHA4: + case GL_LUMINANCE12_ALPHA12: + case GL_LUMINANCE16_ALPHA16: + case GL_COMPRESSED_LUMINANCE_ALPHA: + return &_mesa_texformat_al88; + + case GL_INTENSITY: + case GL_INTENSITY4: + case GL_INTENSITY8: + case GL_INTENSITY12: + case GL_INTENSITY16: + case GL_COMPRESSED_INTENSITY: + return &_mesa_texformat_i8; + + case GL_YCBCR_MESA: + if (type == GL_UNSIGNED_SHORT_8_8_MESA || + type == GL_UNSIGNED_BYTE) + return &_mesa_texformat_ycbcr; + else + return &_mesa_texformat_ycbcr_rev; + + case GL_COMPRESSED_RGB_FXT1_3DFX: + case GL_COMPRESSED_RGBA_FXT1_3DFX: + return &_mesa_texformat_rgb_fxt1; + + case GL_RGB_S3TC: + case GL_RGB4_S3TC: + case GL_RGBA_S3TC: + case GL_RGBA4_S3TC: + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: + return &_mesa_texformat_rgb_dxt1; /* there is no rgba support? */ + + case GL_DEPTH_COMPONENT: + case GL_DEPTH_COMPONENT16: + case GL_DEPTH_COMPONENT24: + case GL_DEPTH_COMPONENT32: + return &_mesa_texformat_z16; + + default: + fprintf(stderr, "unexpected texture format %s in %s\n", + _mesa_lookup_enum_by_nr(internalFormat), + __FUNCTION__); + return NULL; + } + + return NULL; /* never get here */ +} + + +void brwInitTextureFuncs( struct dd_function_table *functions ) +{ + functions->ChooseTextureFormat = brwChooseTextureFormat; +} diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c new file mode 100644 index 00000000000..f8aa068241b --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -0,0 +1,162 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +/* Code to layout images in a mipmap tree for i965. + */ + +#include "intel_mipmap_tree.h" +#include "macros.h" + +static GLuint minify( GLuint d ) +{ + return MAX2(1, d>>1); +} + + +GLboolean brw_miptree_layout( struct intel_mipmap_tree *mt ) +{ + /* XXX: these vary depending on image format: + */ +/* GLint align_w = 4; */ + GLint align_h = 2; + + + switch (mt->target) { + case GL_TEXTURE_CUBE_MAP: + case GL_TEXTURE_3D: { + GLuint width = mt->width0; + GLuint height = mt->height0; + GLuint depth = mt->depth0; + GLuint pack_x_pitch, pack_x_nr; + GLuint pack_y_pitch; + GLuint level; + + mt->pitch = ((mt->width0 * mt->cpp + 3) & ~3) / mt->cpp; + mt->total_height = 0; + + pack_y_pitch = MAX2(mt->height0, 2); + pack_x_pitch = mt->pitch; + pack_x_nr = 1; + + for ( level = mt->first_level ; level <= mt->last_level ; level++ ) { + GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6; + GLint x = 0; + GLint y = 0; + GLint q, j; + + intel_miptree_set_level_info(mt, level, nr_images, + 0, mt->total_height, + width, height, depth); + + for (q = 0; q < nr_images;) { + for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { + intel_miptree_set_image_offset(mt, level, q, x, y); + x += pack_x_pitch; + } + + x = 0; + y += pack_y_pitch; + } + + + mt->total_height += y; + + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr <= mt->pitch); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + } + + width = minify(width); + height = minify(height); + depth = minify(depth); + } + break; + } + + default: { + GLuint level; + GLuint x = 0; + GLuint y = 0; + GLuint width = mt->width0; + GLuint height = mt->height0; + + mt->pitch = ((mt->width0 * mt->cpp + 3) & ~3) / mt->cpp; + mt->total_height = 0; + + for ( level = mt->first_level ; level <= mt->last_level ; level++ ) { + GLuint img_height; + + intel_miptree_set_level_info(mt, level, 1, + x, y, + width, + mt->compressed ? height/4 : height, 1); + + if (mt->compressed) + img_height = MAX2(1, height/4); + else + img_height = MAX2(align_h, height); + + + /* Because the images are packed better, the final offset + * might not be the maximal one: + */ + mt->total_height = MAX2(mt->total_height, y + img_height); + + /* Layout_below: step right after second mipmap. + */ + if (level == mt->first_level + 1) + x += mt->pitch / 2; + else { + y += img_height; + } + + + width = minify(width); + height = minify(height); + } + break; + } + } + DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + mt->pitch, + mt->total_height, + mt->cpp, + mt->pitch * mt->total_height * mt->cpp ); + + return GL_TRUE; +} + diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c new file mode 100644 index 00000000000..79ff2b2d4db --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_urb.c @@ -0,0 +1,215 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + + +#include "intel_batchbuffer.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_hal.h" + +#define VS 0 +#define GS 1 +#define CLP 2 +#define SF 3 +#define CS 4 + +/* XXX: Are the min_entry_size numbers useful? + * XXX: Verify min_nr_entries, esp for VS. + * XXX: Verify SF min_entry_size. + */ +static const struct { + GLuint min_nr_entries; + GLuint preferred_nr_entries; + GLuint min_entry_size; + GLuint max_entry_size; +} limits[CS+1] = { + { 8, 32, 1, 5 }, /* vs */ + { 4, 8, 1, 5 }, /* gs */ + { 6, 8, 1, 5 }, /* clp */ + { 1, 8, 1, 12 }, /* sf */ + { 1, 4, 1, 32 } /* cs */ +}; + + +static GLboolean check_urb_layout( struct brw_context *brw ) +{ + brw->urb.vs_start = 0; + brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize; + brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize; + brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize; + brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize; + + return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= 256; +} + +/* Most minimal update, forces re-emit of URB fence packet after GS + * unit turned on/off. + */ +static void recalculate_urb_fence( struct brw_context *brw ) +{ + GLuint csize = brw->curbe.total_size; + GLuint vsize = brw->vs.prog_data->urb_entry_size; + GLuint sfsize = brw->sf.prog_data->urb_entry_size; + + static GLboolean (*hal_recalculate_urb_fence) (struct brw_context *brw); + static GLboolean hal_tried; + + if (!hal_tried) + { + hal_recalculate_urb_fence = brw_hal_find_symbol ("intel_hal_recalculate_urb_fence"); + hal_tried = 1; + } + if (hal_recalculate_urb_fence) + { + if ((*hal_recalculate_urb_fence) (brw)) + return; + } + + if (csize < limits[CS].min_entry_size) + csize = limits[CS].min_entry_size; + + if (vsize < limits[VS].min_entry_size) + vsize = limits[VS].min_entry_size; + + if (sfsize < limits[SF].min_entry_size) + sfsize = limits[SF].min_entry_size; + + if (brw->urb.vsize < vsize || + brw->urb.sfsize < sfsize || + brw->urb.csize < csize || + (brw->urb.constrained && (brw->urb.vsize > brw->urb.vsize || + brw->urb.sfsize > brw->urb.sfsize || + brw->urb.csize > brw->urb.csize))) { + + + brw->urb.csize = csize; + brw->urb.sfsize = sfsize; + brw->urb.vsize = vsize; + + brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; + brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries; + brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries; + brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; + brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries; + + if (!check_urb_layout(brw)) { + brw->urb.nr_vs_entries = limits[VS].min_nr_entries; + brw->urb.nr_gs_entries = limits[GS].min_nr_entries; + brw->urb.nr_clip_entries = limits[CLP].min_nr_entries; + brw->urb.nr_sf_entries = limits[SF].min_nr_entries; + brw->urb.nr_cs_entries = limits[CS].min_nr_entries; + + brw->urb.constrained = 1; + + if (check_urb_layout(brw)) { + /* This is impossible, given the maximal sizes of urb + * entries and the values for minimum nr of entries + * provided above. + */ + _mesa_printf("couldn't calculate URB layout!\n"); + exit(1); + } + + if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS)) + _mesa_printf("URB CONSTRAINED\n"); + } + else + brw->urb.constrained = 0; + + if (INTEL_DEBUG & DEBUG_URB) + _mesa_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", + brw->urb.vs_start, + brw->urb.gs_start, + brw->urb.clip_start, + brw->urb.sf_start, + brw->urb.cs_start, + 256); + + brw->state.dirty.brw |= BRW_NEW_URB_FENCE; + } +} + + +const struct brw_tracked_state brw_recalculate_urb_fence = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CURBE_OFFSETS, + .cache = (CACHE_NEW_VS_PROG | + CACHE_NEW_SF_PROG) + }, + .update = recalculate_urb_fence +}; + + + + + +void brw_upload_urb_fence(struct brw_context *brw) +{ + struct brw_urb_fence uf; + memset(&uf, 0, sizeof(uf)); + + uf.header.opcode = CMD_URB_FENCE; + uf.header.length = sizeof(uf)/4-2; + uf.header.vs_realloc = 1; + uf.header.gs_realloc = 1; + uf.header.clp_realloc = 1; + uf.header.sf_realloc = 1; + uf.header.vfe_realloc = 1; + uf.header.cs_realloc = 1; + + /* The ordering below is correct, not the layout in the + * instruction. + * + * There are 256 urb reg pairs in total. + */ + uf.bits0.vs_fence = brw->urb.gs_start; + uf.bits0.gs_fence = brw->urb.clip_start; + uf.bits0.clp_fence = brw->urb.sf_start; + uf.bits1.sf_fence = brw->urb.cs_start; + uf.bits1.cs_fence = 256; + + BRW_BATCH_STRUCT(brw, &uf); +} + + +#if 0 +const struct brw_tracked_state brw_urb_fence = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_URB_FENCE | BRW_NEW_PSP, + .cache = 0 + }, + .update = brw_upload_urb_fence +}; +#endif diff --git a/src/mesa/drivers/dri/i965/brw_util.c b/src/mesa/drivers/dri/i965/brw_util.c new file mode 100644 index 00000000000..5957b717218 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_util.c @@ -0,0 +1,188 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_util.h" +#include "mtypes.h" +#include "shader/program.h" +#include "brw_defines.h" + +GLuint brw_count_bits( GLuint val ) +{ + GLuint i; + for (i = 0; val ; val >>= 1) + if (val & 1) + i++; + return i; +} + + +static GLuint brw_parameter_state_flags(const enum state_index state[]) +{ + switch (state[0]) { + case STATE_MATERIAL: + case STATE_LIGHT: + case STATE_LIGHTMODEL_AMBIENT: + case STATE_LIGHTMODEL_SCENECOLOR: + case STATE_LIGHTPROD: + return _NEW_LIGHT; + + case STATE_TEXGEN: + case STATE_TEXENV_COLOR: + return _NEW_TEXTURE; + + case STATE_FOG_COLOR: + case STATE_FOG_PARAMS: + return _NEW_FOG; + + case STATE_CLIPPLANE: + return _NEW_TRANSFORM; + + case STATE_POINT_SIZE: + case STATE_POINT_ATTENUATION: + return _NEW_POINT; + + case STATE_MATRIX: + switch (state[1]) { + case STATE_MODELVIEW: + return _NEW_MODELVIEW; + case STATE_PROJECTION: + return _NEW_PROJECTION; + case STATE_MVP: + return _NEW_MODELVIEW | _NEW_PROJECTION; + case STATE_TEXTURE: + return _NEW_TEXTURE_MATRIX; + case STATE_PROGRAM: + return _NEW_TRACK_MATRIX; + default: + assert(0); + return 0; + } + + case STATE_DEPTH_RANGE: + return _NEW_VIEWPORT; + + case STATE_FRAGMENT_PROGRAM: + case STATE_VERTEX_PROGRAM: + return _NEW_PROGRAM; + + case STATE_INTERNAL: + switch (state[1]) { + case STATE_NORMAL_SCALE: + return _NEW_MODELVIEW; + default: + assert(0); + return 0; + } + + default: + assert(0); + return 0; + } +} + + +GLuint +brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList) +{ + GLuint i; + GLuint result = 0; + + if (!paramList) + return 0; + + for (i = 0; i < paramList->NumParameters; i++) { + if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) { + result |= brw_parameter_state_flags(paramList->Parameters[i].StateIndexes); + } + } + + return result; +} + + +GLuint brw_translate_blend_equation( GLenum mode ) +{ + switch (mode) { + case GL_FUNC_ADD: + return BRW_BLENDFUNCTION_ADD; + case GL_MIN: + return BRW_BLENDFUNCTION_MIN; + case GL_MAX: + return BRW_BLENDFUNCTION_MAX; + case GL_FUNC_SUBTRACT: + return BRW_BLENDFUNCTION_SUBTRACT; + case GL_FUNC_REVERSE_SUBTRACT: + return BRW_BLENDFUNCTION_REVERSE_SUBTRACT; + default: + assert(0); + return BRW_BLENDFUNCTION_ADD; + } +} + +GLuint brw_translate_blend_factor( GLenum factor ) +{ + switch(factor) { + case GL_ZERO: + return BRW_BLENDFACTOR_ZERO; + case GL_SRC_ALPHA: + return BRW_BLENDFACTOR_SRC_ALPHA; + case GL_ONE: + return BRW_BLENDFACTOR_ONE; + case GL_SRC_COLOR: + return BRW_BLENDFACTOR_SRC_COLOR; + case GL_ONE_MINUS_SRC_COLOR: + return BRW_BLENDFACTOR_INV_SRC_COLOR; + case GL_DST_COLOR: + return BRW_BLENDFACTOR_DST_COLOR; + case GL_ONE_MINUS_DST_COLOR: + return BRW_BLENDFACTOR_INV_DST_COLOR; + case GL_ONE_MINUS_SRC_ALPHA: + return BRW_BLENDFACTOR_INV_SRC_ALPHA; + case GL_DST_ALPHA: + return BRW_BLENDFACTOR_DST_ALPHA; + case GL_ONE_MINUS_DST_ALPHA: + return BRW_BLENDFACTOR_INV_DST_ALPHA; + case GL_SRC_ALPHA_SATURATE: + return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE; + case GL_CONSTANT_COLOR: + return BRW_BLENDFACTOR_CONST_COLOR; + case GL_ONE_MINUS_CONSTANT_COLOR: + return BRW_BLENDFACTOR_INV_CONST_COLOR; + case GL_CONSTANT_ALPHA: + return BRW_BLENDFACTOR_CONST_ALPHA; + case GL_ONE_MINUS_CONSTANT_ALPHA: + return BRW_BLENDFACTOR_INV_CONST_ALPHA; + default: + assert(0); + return BRW_BLENDFACTOR_ZERO; + } +} diff --git a/src/mesa/drivers/dri/i965/brw_util.h b/src/mesa/drivers/dri/i965/brw_util.h new file mode 100644 index 00000000000..bd6cc0a2682 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_util.h @@ -0,0 +1,45 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#ifndef BRW_UTIL_H +#define BRW_UTIL_H + +#include "mtypes.h" + +extern GLuint brw_count_bits( GLuint val ); +extern GLuint brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList); +extern GLuint brw_translate_blend_factor( GLenum factor ); +extern GLuint brw_translate_blend_equation( GLenum mode ); + + + +#endif diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c new file mode 100644 index 00000000000..2a94ac64965 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -0,0 +1,129 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_context.h" +#include "brw_vs.h" +#include "brw_util.h" +#include "brw_state.h" +#include "program.h" +#include "shader/arbprogparse.h" + + + +static void do_vs_prog( struct brw_context *brw, + struct brw_vertex_program *vp, + struct brw_vs_prog_key *key ) +{ + GLuint program_size; + const GLuint *program; + struct brw_vs_compile c; + + memset(&c, 0, sizeof(c)); + memcpy(&c.key, key, sizeof(*key)); + + brw_init_compile(&c.func); + c.vp = vp; + + c.prog_data.outputs_written = vp->program.Base.OutputsWritten; + c.prog_data.inputs_read = brw_translate_inputs(brw->intel.ctx.VertexProgram._Enabled, + vp->program.Base.InputsRead); + + if (c.key.copy_edgeflag) { + c.prog_data.outputs_written |= 1<<VERT_RESULT_EDGE; + c.prog_data.inputs_read |= 1<<BRW_ATTRIB_EDGEFLAG; + } + + if (0) + _mesa_print_program(&c.vp->program.Base); + + + + /* Emit GEN4 code. + */ + brw_vs_emit(&c); + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + /* + */ + brw->vs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_VS_PROG], + &c.key, + sizeof(c.key), + program, + program_size, + &c.prog_data, + &brw->vs.prog_data); +} + + +static void brw_upload_vs_prog( struct brw_context *brw ) +{ + struct brw_vs_prog_key key; + struct brw_vertex_program *vp = + (struct brw_vertex_program *)brw->vertex_program; + + assert (vp && !vp->program.IsNVProgram); + + memset(&key, 0, sizeof(key)); + + /* Just upload the program verbatim for now. Always send it all + * the inputs it asks for, whether they are varying or not. + */ + key.program_string_id = vp->id; + key.nr_userclip = brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled); + key.copy_edgeflag = (brw->attribs.Polygon->FrontMode != GL_FILL || + brw->attribs.Polygon->BackMode != GL_FILL); + + /* Make an early check for the key. + */ + if (brw_search_cache(&brw->cache[BRW_VS_PROG], + &key, sizeof(key), + &brw->vs.prog_data, + &brw->vs.prog_gs_offset)) + return; + + do_vs_prog(brw, vp, &key); +} + + +/* See brw_vs.c: + */ +const struct brw_tracked_state brw_vs_prog = { + .dirty = { + .mesa = _NEW_TRANSFORM | _NEW_POLYGON, + .brw = BRW_NEW_VERTEX_PROGRAM, + .cache = 0 + }, + .update = brw_upload_vs_prog +}; diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h new file mode 100644 index 00000000000..d355681b5ea --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -0,0 +1,80 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#ifndef BRW_VS_H +#define BRW_VS_H + + +#include "brw_context.h" +#include "brw_eu.h" +#include "program.h" + + +struct brw_vs_prog_key { + GLuint program_string_id; + GLuint nr_userclip:4; + GLuint copy_edgeflag:1; + GLuint pad:27; +}; + + +struct brw_vs_compile { + struct brw_compile func; + struct brw_vs_prog_key key; + struct brw_vs_prog_data prog_data; + + struct brw_vertex_program *vp; + + GLuint nr_inputs; + + GLuint first_output; + GLuint nr_outputs; + + GLuint first_tmp; + GLuint last_tmp; + + struct brw_reg r0; + struct brw_reg r1; + struct brw_reg regs[PROGRAM_ADDRESS+1][128]; + struct brw_reg tmp; + + struct brw_reg userplane[6]; + +}; + +void brw_vs_emit( struct brw_vs_compile *c ); + + +void brw_ProgramCacheDestroy( GLcontext *ctx ); +void brw_ProgramCacheInit( GLcontext *ctx ); + +#endif diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c new file mode 100644 index 00000000000..502d8283970 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c @@ -0,0 +1,226 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_context.h" +#include "program.h" +#include "program_instruction.h" +#include "macros.h" +#include "brw_vs.h" + +/* Component is active if it may diverge from [0,0,0,1]. Undef values + * are promoted to [0,0,0,1] for the purposes of this analysis. + */ +struct tracker { + GLboolean twoside; + GLubyte active[PROGRAM_OUTPUT+1][128]; + GLuint size_masks[4]; +}; + + +static void set_active_component( struct tracker *t, + GLuint file, + GLuint index, + GLubyte active ) +{ + switch (file) { + case PROGRAM_TEMPORARY: + case PROGRAM_INPUT: + case PROGRAM_OUTPUT: + t->active[file][index] |= active; + + default: + break; + } +} + +static void set_active( struct tracker *t, + struct prog_dst_register dst, + GLuint active ) +{ + set_active_component( t, dst.File, dst.Index, active & dst.WriteMask ); +} + + +static GLubyte get_active_component( struct tracker *t, + GLuint file, + GLuint index, + GLuint component, + GLubyte swz ) +{ + switch (swz) { + case SWIZZLE_ZERO: + return component < 3 ? 0 : (1<<component); + case SWIZZLE_ONE: + return component == 3 ? 0 : (1<<component); + default: + switch (file) { + case PROGRAM_TEMPORARY: + case PROGRAM_INPUT: + case PROGRAM_OUTPUT: + return t->active[file][index] & (1<<component); + default: + return 1 << component; + } + } +} + + +static GLubyte get_active( struct tracker *t, + struct prog_src_register src ) +{ + GLuint i; + GLubyte active = src.NegateBase; /* NOTE! */ + + if (src.RelAddr) + return 0xf; + + for (i = 0; i < 4; i++) + active |= get_active_component(t, src.File, src.Index, i, + GET_SWZ(src.Swizzle, i)); + + return active; +} + +static GLubyte get_output_size( struct tracker *t, + GLuint idx ) +{ + GLubyte active = t->active[PROGRAM_OUTPUT][idx]; + if (active & (1<<3)) return 4; + if (active & (1<<2)) return 3; + if (active & (1<<1)) return 2; + if (active & (1<<0)) return 1; + return 0; +} + +/* Note the potential copying that occurs in the setup program: + */ +static void calc_sizes( struct tracker *t ) +{ + GLuint i; + + if (t->twoside) { + t->active[PROGRAM_OUTPUT][VERT_RESULT_COL0] |= + t->active[PROGRAM_OUTPUT][VERT_RESULT_BFC0]; + + t->active[PROGRAM_OUTPUT][VERT_RESULT_COL1] |= + t->active[PROGRAM_OUTPUT][VERT_RESULT_BFC1]; + } + + for (i = 0; i < FRAG_ATTRIB_MAX; i++) { + switch (get_output_size(t, i)) { + case 4: t->size_masks[4-1] |= 1<<i; + case 3: t->size_masks[3-1] |= 1<<i; + case 2: t->size_masks[2-1] |= 1<<i; + case 1: t->size_masks[1-1] |= 1<<i; + break; + } + } +} + +static GLubyte szflag[4+1] = { + 0, + 0x1, + 0x3, + 0x7, + 0xf +}; + +/* Pull a size out of the packed array: + */ +static GLuint get_input_size(struct brw_context *brw, + GLuint attr) +{ + GLuint sizes_dword = brw->vb.info.sizes[attr/16]; + GLuint sizes_bits = (sizes_dword>>((attr%16)*2)) & 0x3; + return sizes_bits + 1; +} + +/* Calculate sizes of vertex program outputs. Size is the largest + * component index which might vary from [0,0,0,1] + */ +static void calc_wm_input_sizes( struct brw_context *brw ) +{ + /* BRW_NEW_VERTEX_PROGRAM */ + struct brw_vertex_program *vp = + (struct brw_vertex_program *)brw->vertex_program; + /* BRW_NEW_INPUT_DIMENSIONS */ + struct tracker t; + GLuint insn; + GLuint i; + GLuint64EXT inputs = brw_translate_inputs(brw->intel.ctx.VertexProgram._Enabled, + vp->program.Base.InputsRead); + + memset(&t, 0, sizeof(t)); + + /* _NEW_LIGHT */ + if (brw->attribs.Light->Model.TwoSide) + t.twoside = 1; + + for (i = 0; i < BRW_ATTRIB_MAX; i++) + if (inputs & (1<<i)) + set_active_component(&t, PROGRAM_INPUT, i, + szflag[get_input_size(brw, i)]); + + for (insn = 0; insn < vp->program.Base.NumInstructions; insn++) { + struct prog_instruction *inst = &vp->program.Base.Instructions[insn]; + + switch (inst->Opcode) { + case OPCODE_ARL: + break; + + case OPCODE_MOV: + set_active(&t, inst->DstReg, get_active(&t, inst->SrcReg[0])); + break; + + default: + set_active(&t, inst->DstReg, 0xf); + break; + } + } + + calc_sizes(&t); + + if (memcmp(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks)) != 0) { + memcpy(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks)); + brw->state.dirty.brw |= BRW_NEW_WM_INPUT_DIMENSIONS; + } +} + +const struct brw_tracked_state brw_wm_input_sizes = { + .dirty = { + .mesa = _NEW_LIGHT, + .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS, + .cache = 0 + }, + .update = calc_wm_input_sizes +}; + diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c new file mode 100644 index 00000000000..a22740084d3 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -0,0 +1,1032 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_context.h" +#include "program.h" +#include "program_instruction.h" +#include "macros.h" +#include "brw_vs.h" + + + +/* Do things as simply as possible. Allocate and populate all regs + * ahead of time. + */ +static void brw_vs_alloc_regs( struct brw_vs_compile *c ) +{ + GLuint i, reg = 0, mrf; + GLuint nr_params; + + /* r0 -- reserved as usual + */ + c->r0 = brw_vec8_grf(reg, 0); reg++; + + /* User clip planes from curbe: + */ + if (c->key.nr_userclip) { + for (i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1); + } + + /* Deal with curbe alignment: + */ + reg += ((6+c->key.nr_userclip+3)/4)*2; + } + + /* Vertex program parameters from curbe: + */ + nr_params = c->vp->program.Base.Parameters->NumParameters; + for (i = 0; i < nr_params; i++) { + c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); + } + reg += (nr_params+1)/2; + + c->prog_data.curb_read_length = reg - 1; + + + + /* Allocate input regs: + */ + c->nr_inputs = 0; + for (i = 0; i < BRW_ATTRIB_MAX; i++) { + if (c->prog_data.inputs_read & (1<<i)) { + c->nr_inputs++; + c->regs[PROGRAM_INPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } + } + + + /* Allocate outputs: TODO: could organize the non-position outputs + * to go straight into message regs. + */ + c->nr_outputs = 0; + c->first_output = reg; + mrf = 4; + for (i = 0; i < VERT_RESULT_MAX; i++) { + if (c->prog_data.outputs_written & (1<<i)) { + c->nr_outputs++; + if (i == VERT_RESULT_HPOS) { + c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } + else if (i == VERT_RESULT_PSIZ) { + c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + mrf++; /* just a placeholder? XXX fix later stages & remove this */ + } + else { + c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf); + mrf++; + } + } + } + + /* Allocate program temporaries: + */ + for (i = 0; i < c->vp->program.Base.NumTemporaries; i++) { + c->regs[PROGRAM_TEMPORARY][i] = brw_vec8_grf(reg, 0); + reg++; + } + + /* Address reg(s). Don't try to use the internal address reg until + * deref time. + */ + for (i = 0; i < c->vp->program.Base.NumAddressRegs; i++) { + c->regs[PROGRAM_ADDRESS][i] = brw_reg(BRW_GENERAL_REGISTER_FILE, + reg, + 0, + BRW_REGISTER_TYPE_D, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XXXX, + WRITEMASK_X); + reg++; + } + + + /* Some opcodes need an internal temporary: + */ + c->first_tmp = reg; + c->last_tmp = reg; /* for allocation purposes */ + + /* Each input reg holds data from two vertices. The + * urb_read_length is the number of registers read from *each* + * vertex urb, so is half the amount: + */ + c->prog_data.urb_read_length = (c->nr_inputs+1)/2; + + c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4; + c->prog_data.total_grf = reg; +} + + +static struct brw_reg get_tmp( struct brw_vs_compile *c ) +{ + struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} + +static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + +static void release_tmps( struct brw_vs_compile *c ) +{ + c->last_tmp = c->first_tmp; +} + + +static void unalias1( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + void (*func)( struct brw_vs_compile *, + struct brw_reg, + struct brw_reg )) +{ + if (dst.file == arg0.file && dst.nr == arg0.nr) { + struct brw_compile *p = &c->func; + struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); + func(c, tmp, arg0); + brw_MOV(p, dst, tmp); + } + else { + func(c, dst, arg0); + } +} + +static void unalias2( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + void (*func)( struct brw_vs_compile *, + struct brw_reg, + struct brw_reg, + struct brw_reg )) +{ + if ((dst.file == arg0.file && dst.nr == arg0.nr) && + (dst.file == arg1.file && dst.nr == arg1.nr)) { + struct brw_compile *p = &c->func; + struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); + func(c, tmp, arg0, arg1); + brw_MOV(p, dst, tmp); + } + else { + func(c, dst, arg0, arg1); + } +} + + + + +static void emit_slt( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + /* Could be done with an if/else/endif, but this method uses half + * the instructions. Note that we are careful to reference the + * arguments before writing the dest. That means we emit the + * instructions in an odd order and have to play with the flag + * values. + */ + brw_push_insn_state(p); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0, arg1); + + /* Write all values to 1: + */ + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_MOV(p, dst, brw_imm_f(1.0)); + + /* Where the test succeeded, overwite with zero: + */ + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, brw_imm_f(0.0)); + brw_pop_insn_state(p); +} + + +static void emit_sge( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + brw_push_insn_state(p); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0, arg1); + + /* Write all values to zero: + */ + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_MOV(p, dst, brw_imm_f(0)); + + /* Where the test succeeded, overwite with 1: + */ + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, brw_imm_f(1.0)); + brw_pop_insn_state(p); +} + + +static void emit_max( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); + brw_SEL(p, dst, arg1, arg0); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +} + +static void emit_min( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); + brw_SEL(p, dst, arg0, arg1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +} + + +static void emit_math1( struct brw_vs_compile *c, + GLuint function, + struct brw_reg dst, + struct brw_reg arg0, + GLuint precision) +{ + /* There are various odd behaviours with SEND on the simulator. In + * addition there are documented issues with the fact that the GEN4 + * processor doesn't do dependency control properly on SEND + * results. So, on balance, this kludge to get around failures + * with writemasked math results looks like it might be necessary + * whether that turns out to be a simulator bug or not: + */ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_math(p, + tmp, + function, + BRW_MATH_SATURATE_NONE, + 2, + arg0, + BRW_MATH_DATA_SCALAR, + precision); + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + +static void emit_math2( struct brw_vs_compile *c, + GLuint function, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + GLuint precision) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_MOV(p, brw_message_reg(3), arg1); + + brw_math(p, + tmp, + function, + BRW_MATH_SATURATE_NONE, + 2, + arg0, + BRW_MATH_DATA_SCALAR, + precision); + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + + + +static void emit_exp_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + + + if (dst.dw1.bits.writemask & WRITEMASK_X) { + struct brw_reg tmp = get_tmp(c); + struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D); + + /* tmp_d = floor(arg0.x) */ + brw_RNDD(p, tmp_d, brw_swizzle1(arg0, 0)); + + /* result[0] = 2.0 ^ tmp */ + + /* Adjust exponent for floating point: + * exp += 127 + */ + brw_ADD(p, brw_writemask(tmp_d, WRITEMASK_X), tmp_d, brw_imm_d(127)); + + /* Install exponent and sign. + * Excess drops off the edge: + */ + brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), WRITEMASK_X), + tmp_d, brw_imm_d(23)); + + release_tmp(c, tmp); + } + + if (dst.dw1.bits.writemask & WRITEMASK_Y) { + /* result[1] = arg0.x - floor(arg0.x) */ + brw_FRC(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0, 0)); + } + + if (dst.dw1.bits.writemask & WRITEMASK_Z) { + /* As with the LOG instruction, we might be better off just + * doing a taylor expansion here, seeing as we have to do all + * the prep work. + * + * If mathbox partial precision is too low, consider also: + * result[3] = result[0] * EXP(result[1]) + */ + emit_math1(c, + BRW_MATH_FUNCTION_EXP, + brw_writemask(dst, WRITEMASK_Z), + brw_swizzle1(arg0, 0), + BRW_MATH_PRECISION_PARTIAL); + } + + if (dst.dw1.bits.writemask & WRITEMASK_W) { + /* result[3] = 1.0; */ + brw_MOV(p, brw_writemask(dst, WRITEMASK_W), brw_imm_f(1)); + } +} + + +static void emit_log_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); + struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD); + GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) { + tmp = get_tmp(c); + tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); + } + + /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt + * according to spec: + * + * These almost look likey they could be joined up, but not really + * practical: + * + * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127 + * result[1].i = (x.i & ((1<<23)-1) + (127<<23) + */ + if (dst.dw1.bits.writemask & WRITEMASK_XZ) { + brw_AND(p, + brw_writemask(tmp_ud, WRITEMASK_X), + brw_swizzle1(arg0_ud, 0), + brw_imm_ud((1U<<31)-1)); + + brw_SHR(p, + brw_writemask(tmp_ud, WRITEMASK_X), + tmp_ud, + brw_imm_ud(23)); + + brw_ADD(p, + brw_writemask(tmp, WRITEMASK_X), + retype(tmp_ud, BRW_REGISTER_TYPE_D), /* does it matter? */ + brw_imm_d(-127)); + } + + if (dst.dw1.bits.writemask & WRITEMASK_YZ) { + brw_AND(p, + brw_writemask(tmp_ud, WRITEMASK_Y), + brw_swizzle1(arg0_ud, 0), + brw_imm_ud((1<<23)-1)); + + brw_OR(p, + brw_writemask(tmp_ud, WRITEMASK_Y), + tmp_ud, + brw_imm_ud(127<<23)); + } + + if (dst.dw1.bits.writemask & WRITEMASK_Z) { + /* result[2] = result[0] + LOG2(result[1]); */ + + /* Why bother? The above is just a hint how to do this with a + * taylor series. Maybe we *should* use a taylor series as by + * the time all the above has been done it's almost certainly + * quicker than calling the mathbox, even with low precision. + * + * Options are: + * - result[0] + mathbox.LOG2(result[1]) + * - mathbox.LOG2(arg0.x) + * - result[0] + inline_taylor_approx(result[1]) + */ + emit_math1(c, + BRW_MATH_FUNCTION_LOG, + brw_writemask(tmp, WRITEMASK_Z), + brw_swizzle1(tmp, 1), + BRW_MATH_PRECISION_FULL); + + brw_ADD(p, + brw_writemask(tmp, WRITEMASK_Z), + brw_swizzle1(tmp, 2), + brw_swizzle1(tmp, 0)); + } + + if (dst.dw1.bits.writemask & WRITEMASK_W) { + /* result[3] = 1.0; */ + brw_MOV(p, brw_writemask(tmp, WRITEMASK_W), brw_imm_f(1)); + } + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + + + + +/* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1 + */ +static void emit_dst_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1) +{ + struct brw_compile *p = &c->func; + + /* There must be a better way to do this: + */ + if (dst.dw1.bits.writemask & WRITEMASK_X) + brw_MOV(p, brw_writemask(dst, WRITEMASK_X), brw_imm_f(1.0)); + if (dst.dw1.bits.writemask & WRITEMASK_Y) + brw_MUL(p, brw_writemask(dst, WRITEMASK_Y), arg0, arg1); + if (dst.dw1.bits.writemask & WRITEMASK_Z) + brw_MOV(p, brw_writemask(dst, WRITEMASK_Z), arg0); + if (dst.dw1.bits.writemask & WRITEMASK_W) + brw_MOV(p, brw_writemask(dst, WRITEMASK_W), arg1); +} + +static void emit_xpd( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg t, + struct brw_reg u) +{ + brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3), brw_swizzle(u,2,0,1,3)); + brw_MAC(p, dst, negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3)); +} + + + +static void emit_lit_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *if_insn; + struct brw_reg tmp = dst; + GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_MOV(p, brw_writemask(dst, WRITEMASK_YZ), brw_imm_f(0)); + brw_MOV(p, brw_writemask(dst, WRITEMASK_XW), brw_imm_f(1)); + + /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order + * to get all channels active inside the IF. In the clipping code + * we run with NoMask, so it's not an option and we can use + * BRW_EXECUTE_1 for all comparisions. + */ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0)); + if_insn = brw_IF(p, BRW_EXECUTE_8); + { + brw_MOV(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0,0)); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0)); + brw_MOV(p, brw_writemask(tmp, WRITEMASK_Z), brw_swizzle1(arg0,1)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + emit_math2(c, + BRW_MATH_FUNCTION_POW, + brw_writemask(dst, WRITEMASK_Z), + brw_swizzle1(tmp, 2), + brw_swizzle1(arg0, 3), + BRW_MATH_PRECISION_PARTIAL); + } + + brw_ENDIF(p, if_insn); +} + + + + + +/* TODO: relative addressing! + */ +static struct brw_reg get_reg( struct brw_vs_compile *c, + GLuint file, + GLuint index ) +{ + + switch (file) { + case PROGRAM_TEMPORARY: + case PROGRAM_INPUT: + case PROGRAM_OUTPUT: + case PROGRAM_STATE_VAR: + assert(c->regs[file][index].nr != 0); + return c->regs[file][index]; + case PROGRAM_ADDRESS: + assert(index == 0); + return c->regs[file][index]; + + case PROGRAM_UNDEFINED: /* undef values */ + return brw_null_reg(); + + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_WRITE_ONLY: + default: + assert(0); + return brw_null_reg(); + } +} + + + +static struct brw_reg deref( struct brw_vs_compile *c, + struct brw_reg arg, + GLint offset) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = vec4(get_tmp(c)); + struct brw_reg vp_address = retype(vec1(get_reg(c, PROGRAM_ADDRESS, 0)), BRW_REGISTER_TYPE_UW); + GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16; + struct brw_reg indirect = brw_vec4_indirect(0,0); + + { + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + + /* This is pretty clunky - load the address register twice and + * fetch each 4-dword value in turn. There must be a way to do + * this in a single pass, but I couldn't get it to work. + */ + brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset)); + brw_MOV(p, tmp, indirect); + + brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset)); + brw_MOV(p, suboffset(tmp, 4), indirect); + + brw_pop_insn_state(p); + } + + return vec8(tmp); +} + + +static void emit_arl( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_RNDD(p, tmp, arg0); + brw_MUL(p, dst, tmp, brw_imm_d(16)); + + if (need_tmp) + release_tmp(c, tmp); +} + + +/* Will return mangled results for SWZ op. The emit_swz() function + * ignores this result and recalculates taking extended swizzles into + * account. + */ +static struct brw_reg get_arg( struct brw_vs_compile *c, + struct prog_src_register src ) +{ + struct brw_reg reg; + + if (src.File == PROGRAM_UNDEFINED) + return brw_null_reg(); + + if (src.RelAddr) + reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index); + else + reg = get_reg(c, src.File, src.Index); + + /* Convert 3-bit swizzle to 2-bit. + */ + reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src.Swizzle, 0), + GET_SWZ(src.Swizzle, 1), + GET_SWZ(src.Swizzle, 2), + GET_SWZ(src.Swizzle, 3)); + + /* Note this is ok for non-swizzle instructions: + */ + reg.negate = src.NegateBase ? 1 : 0; + + return reg; +} + + +static struct brw_reg get_dst( struct brw_vs_compile *c, + struct prog_dst_register dst ) +{ + struct brw_reg reg = get_reg(c, dst.File, dst.Index); + + reg.dw1.bits.writemask = dst.WriteMask; + + return reg; +} + + + + +static void emit_swz( struct brw_vs_compile *c, + struct brw_reg dst, + struct prog_src_register src ) +{ + struct brw_compile *p = &c->func; + GLuint zeros_mask = 0; + GLuint ones_mask = 0; + GLuint src_mask = 0; + GLubyte src_swz[4]; + GLboolean need_tmp = (src.NegateBase && + dst.file != BRW_GENERAL_REGISTER_FILE); + struct brw_reg tmp = dst; + GLuint i; + + if (need_tmp) + tmp = get_tmp(c); + + for (i = 0; i < 4; i++) { + if (dst.dw1.bits.writemask & (1<<i)) { + GLubyte s = GET_SWZ(src.Swizzle, i); + switch (s) { + case SWIZZLE_X: + case SWIZZLE_Y: + case SWIZZLE_Z: + case SWIZZLE_W: + src_mask |= 1<<i; + src_swz[i] = s; + break; + case SWIZZLE_ZERO: + zeros_mask |= 1<<i; + break; + case SWIZZLE_ONE: + ones_mask |= 1<<i; + break; + } + } + } + + /* Do src first, in case dst aliases src: + */ + if (src_mask) { + struct brw_reg arg0; + + if (src.RelAddr) + arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index); + else + arg0 = get_reg(c, src.File, src.Index); + + arg0 = brw_swizzle(arg0, + src_swz[0], src_swz[1], + src_swz[2], src_swz[3]); + + brw_MOV(p, brw_writemask(tmp, src_mask), arg0); + } + + if (zeros_mask) + brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0)); + + if (ones_mask) + brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1)); + + if (src.NegateBase) + brw_MOV(p, brw_writemask(tmp, src.NegateBase), negate(tmp)); + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + + + +/* Post-vertex-program processing. Send the results to the URB. + */ +static void emit_vertex_write( struct brw_vs_compile *c) +{ + struct brw_compile *p = &c->func; + struct brw_reg m0 = brw_message_reg(0); + struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS]; + struct brw_reg ndc; + + if (c->key.copy_edgeflag) { + brw_MOV(p, + get_reg(c, PROGRAM_OUTPUT, VERT_RESULT_EDGE), + get_reg(c, PROGRAM_INPUT, BRW_ATTRIB_EDGEFLAG)); + } + + + /* Build ndc coords? TODO: Shortcircuit when w is known to be one. + */ + ndc = get_tmp(c); + emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); + brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc); + + /* This includes the workaround for -ve rhw, so is no longer an + * optional step: + */ + { + struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + GLuint i; + + brw_MOV(p, header1, brw_imm_ud(0)); + + brw_set_access_mode(p, BRW_ALIGN_16); + + if (c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) { + struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ]; + brw_MUL(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); + brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8)); + } + + + for (i = 0; i < c->key.nr_userclip; i++) { + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, brw_null_reg(), pos, c->userplane[i]); + brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<i)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + + + /* i965 clipping workaround: + * 1) Test for -ve rhw + * 2) If set, + * set ndc = (0,0,0,0) + * set ucp[6] = 1 + * + * Later, clipping will detect ucp[6] and ensure the primitive is + * clipped against all fixed planes. + */ + brw_CMP(p, + vec8(brw_null_reg()), + BRW_CONDITIONAL_L, + brw_swizzle1(ndc, 3), + brw_imm_f(0)); + + brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6)); + brw_MOV(p, ndc, brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + + + + + + brw_set_access_mode(p, BRW_ALIGN_1); /* why? */ + brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1); + brw_set_access_mode(p, BRW_ALIGN_16); + + release_tmp(c, header1); + } + + + /* Emit the (interleaved) headers for the two vertices - an 8-reg + * of zeros followed by two sets of NDC coordinates: + */ + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, offset(m0, 2), ndc); + brw_MOV(p, offset(m0, 3), pos); + + + brw_urb_WRITE(p, + brw_null_reg(), /* dest */ + 0, /* starting mrf reg nr */ + c->r0, /* src */ + 0, /* allocate */ + 1, /* used */ + c->nr_outputs + 3, /* msg len */ + 0, /* response len */ + 1, /* eot */ + 1, /* writes complete */ + 0, /* urb destination offset */ + BRW_URB_SWIZZLE_INTERLEAVE); + +} + + + + +/* Emit the fragment program instructions here. + */ +void brw_vs_emit( struct brw_vs_compile *c ) +{ + struct brw_compile *p = &c->func; + GLuint nr_insns = c->vp->program.Base.NumInstructions; + GLuint insn; + + + if (INTEL_DEBUG & DEBUG_VS) { + _mesa_printf("\n\n\nvs-emit:\n"); + _mesa_print_program(&c->vp->program.Base); + _mesa_printf("\n"); + } + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_access_mode(p, BRW_ALIGN_16); + + /* Static register allocation + */ + brw_vs_alloc_regs(c); + + for (insn = 0; insn < nr_insns; insn++) { + + struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; + struct brw_reg args[3], dst; + GLuint i; + + /* Get argument regs. SWZ is special and does this itself. + */ + if (inst->Opcode != OPCODE_SWZ) + for (i = 0; i < 3; i++) + args[i] = get_arg(c, inst->SrcReg[i]); + + /* Get dest regs. Note that it is possible for a reg to be both + * dst and arg, given the static allocation of registers. So + * care needs to be taken emitting multi-operation instructions. + */ + dst = get_dst(c, inst->DstReg); + + + switch (inst->Opcode) { + case OPCODE_ABS: + brw_MOV(p, dst, brw_abs(args[0])); + break; + case OPCODE_ADD: + brw_ADD(p, dst, args[0], args[1]); + break; + case OPCODE_DP3: + brw_DP3(p, dst, args[0], args[1]); + break; + case OPCODE_DP4: + brw_DP4(p, dst, args[0], args[1]); + break; + case OPCODE_DPH: + brw_DPH(p, dst, args[0], args[1]); + break; + case OPCODE_DST: + unalias2(c, dst, args[0], args[1], emit_dst_noalias); + break; + case OPCODE_EXP: + unalias1(c, dst, args[0], emit_exp_noalias); + break; + case OPCODE_EX2: + emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case OPCODE_ARL: + emit_arl(c, dst, args[0]); + break; + case OPCODE_FLR: + brw_RNDD(p, dst, args[0]); + break; + case OPCODE_FRC: + brw_FRC(p, dst, args[0]); + break; + case OPCODE_LOG: + unalias1(c, dst, args[0], emit_log_noalias); + break; + case OPCODE_LG2: + emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case OPCODE_LIT: + unalias1(c, dst, args[0], emit_lit_noalias); + break; + case OPCODE_MAD: + brw_MOV(p, brw_acc_reg(), args[2]); + brw_MAC(p, dst, args[0], args[1]); + break; + case OPCODE_MAX: + emit_max(p, dst, args[0], args[1]); + break; + case OPCODE_MIN: + emit_min(p, dst, args[0], args[1]); + break; + case OPCODE_MOV: + brw_MOV(p, dst, args[0]); + break; + case OPCODE_MUL: + brw_MUL(p, dst, args[0], args[1]); + break; + case OPCODE_POW: + emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL); + break; + case OPCODE_RCP: + emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case OPCODE_RSQ: + emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case OPCODE_SGE: + emit_sge(p, dst, args[0], args[1]); + break; + case OPCODE_SLT: + emit_slt(p, dst, args[0], args[1]); + break; + case OPCODE_SUB: + brw_ADD(p, dst, args[0], negate(args[1])); + break; + case OPCODE_SWZ: + /* The args[0] value can't be used here as it won't have + * correctly encoded the full swizzle: + */ + emit_swz(c, dst, inst->SrcReg[0] ); + break; + case OPCODE_XPD: + emit_xpd(p, dst, args[0], args[1]); + break; + case OPCODE_END: + case OPCODE_PRINT: + break; + default: + break; + } + + release_tmps(c); + } + + emit_vertex_write(c); + +} + + + + + diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c new file mode 100644 index 00000000000..c225bf8f5c5 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -0,0 +1,102 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "macros.h" + +static void upload_vs_unit( struct brw_context *brw ) +{ + struct brw_vs_unit_state vs; + + memset(&vs, 0, sizeof(vs)); + + /* CACHE_NEW_VS_PROG */ + vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6; + vs.thread0.grf_reg_count = ((brw->vs.prog_data->total_grf-1) & ~15) / 16; + vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; + vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; + vs.thread3.dispatch_grf_start_reg = 1; + + + /* BRW_NEW_URB_FENCE */ + vs.thread4.nr_urb_entries = brw->urb.nr_vs_entries; + vs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; + vs.thread4.max_threads = MIN2( + MAX2(0, (brw->urb.nr_vs_entries - 6) / 2 - 1), + 15); + + + + if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) + vs.thread4.max_threads = 0; + + /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ + if (brw->attribs.Transform->ClipPlanesEnabled) { + /* Note that we read in the userclip planes as well, hence + * clip_start: + */ + vs.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; + } + else { + vs.thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2; + } + + vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + vs.thread3.urb_entry_read_offset = 0; + + /* No samplers for ARB_vp programs: + */ + vs.vs5.sampler_count = 0; + + if (INTEL_DEBUG & DEBUG_STATS) + vs.thread4.stats_enable = 1; + + /* Vertex program always enabled: + */ + vs.vs6.vs_enable = 1; + + brw->vs.state_gs_offset = brw_cache_data( &brw->cache[BRW_VS_UNIT], &vs ); +} + + +const struct brw_tracked_state brw_vs_unit = { + .dirty = { + .mesa = _NEW_TRANSFORM, + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE), + .cache = CACHE_NEW_VS_PROG + }, + .update = upload_vs_unit +}; diff --git a/src/mesa/drivers/dri/i965/brw_vs_tnl.c b/src/mesa/drivers/dri/i965/brw_vs_tnl.c new file mode 100644 index 00000000000..c1099d4c676 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vs_tnl.c @@ -0,0 +1,1682 @@ +/* + * Mesa 3-D graphics library + * Version: 6.3 + * + * Copyright (C) 2005 Tungsten Graphics All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file t_vp_build.c + * Create a vertex program to execute the current fixed function T&L pipeline. + * \author Keith Whitwell + */ + + +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#include "brw_vs.h" +#include "brw_state.h" + +#include "shader/program.h" +#include "shader/program_instruction.h" +#include "shader/arbprogparse.h" + +struct state_key { + unsigned light_global_enabled:1; + unsigned light_local_viewer:1; + unsigned light_twoside:1; + unsigned light_color_material:1; + unsigned light_color_material_mask:12; + unsigned light_material_mask:12; + unsigned normalize:1; + unsigned rescale_normals:1; + unsigned fog_source_is_depth:1; + unsigned tnl_do_vertex_fog:1; + unsigned separate_specular:1; + unsigned fog_option:2; + unsigned point_attenuated:1; + unsigned texture_enabled_global:1; + unsigned fragprog_inputs_read:12; + + struct { + unsigned light_enabled:1; + unsigned light_eyepos3_is_zero:1; + unsigned light_spotcutoff_is_180:1; + unsigned light_attenuated:1; + unsigned texunit_really_enabled:1; + unsigned texmat_enabled:1; + unsigned texgen_enabled:4; + unsigned texgen_mode0:4; + unsigned texgen_mode1:4; + unsigned texgen_mode2:4; + unsigned texgen_mode3:4; + } unit[8]; +}; + + + +#define FOG_NONE 0 +#define FOG_LINEAR 1 +#define FOG_EXP 2 +#define FOG_EXP2 3 + +static GLuint translate_fog_mode( GLenum mode ) +{ + switch (mode) { + case GL_LINEAR: return FOG_LINEAR; + case GL_EXP: return FOG_EXP; + case GL_EXP2: return FOG_EXP2; + default: return FOG_NONE; + } +} + +#define TXG_NONE 0 +#define TXG_OBJ_LINEAR 1 +#define TXG_EYE_LINEAR 2 +#define TXG_SPHERE_MAP 3 +#define TXG_REFLECTION_MAP 4 +#define TXG_NORMAL_MAP 5 + +static GLuint translate_texgen( GLboolean enabled, GLenum mode ) +{ + if (!enabled) + return TXG_NONE; + + switch (mode) { + case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR; + case GL_EYE_LINEAR: return TXG_EYE_LINEAR; + case GL_SPHERE_MAP: return TXG_SPHERE_MAP; + case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP; + case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP; + default: return TXG_NONE; + } +} + +static void make_state_key( GLcontext *ctx, struct state_key *key ) +{ + struct brw_context *brw = brw_context(ctx); + struct gl_fragment_program *fp = brw->fragment_program; + GLuint i; + + /* This now relies on texenvprogram.c being active: + */ + assert(fp); + + memset(key, 0, sizeof(*key)); + + /* BRW_NEW_FRAGMENT_PROGRAM */ + key->fragprog_inputs_read = fp->Base.InputsRead; + + /* _NEW_LIGHT */ + key->separate_specular = (brw->attribs.Light->Model.ColorControl == + GL_SEPARATE_SPECULAR_COLOR); + + /* _NEW_LIGHT */ + if (brw->attribs.Light->Enabled) { + key->light_global_enabled = 1; + + if (brw->attribs.Light->Model.LocalViewer) + key->light_local_viewer = 1; + + if (brw->attribs.Light->Model.TwoSide) + key->light_twoside = 1; + + if (brw->attribs.Light->ColorMaterialEnabled) { + key->light_color_material = 1; + key->light_color_material_mask = brw->attribs.Light->ColorMaterialBitmask; + } + + /* BRW_NEW_INPUT_VARYING */ + for (i = BRW_ATTRIB_MAT_FRONT_AMBIENT ; i < BRW_ATTRIB_INDEX ; i++) + if (brw->vb.info.varying & (1<<i)) + key->light_material_mask |= 1<<(i-BRW_ATTRIB_MAT_FRONT_AMBIENT); + + for (i = 0; i < MAX_LIGHTS; i++) { + struct gl_light *light = &brw->attribs.Light->Light[i]; + + if (light->Enabled) { + key->unit[i].light_enabled = 1; + + if (light->EyePosition[3] == 0.0) + key->unit[i].light_eyepos3_is_zero = 1; + + if (light->SpotCutoff == 180.0) + key->unit[i].light_spotcutoff_is_180 = 1; + + if (light->ConstantAttenuation != 1.0 || + light->LinearAttenuation != 0.0 || + light->QuadraticAttenuation != 0.0) + key->unit[i].light_attenuated = 1; + } + } + } + + /* _NEW_TRANSFORM */ + if (brw->attribs.Transform->Normalize) + key->normalize = 1; + + if (brw->attribs.Transform->RescaleNormals) + key->rescale_normals = 1; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + key->fog_option = translate_fog_mode(fp->FogOption); + if (key->fog_option) + key->fragprog_inputs_read |= FRAG_BIT_FOGC; + + /* _NEW_FOG */ + if (brw->attribs.Fog->FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT) + key->fog_source_is_depth = 1; + + /* _NEW_HINT, ??? */ + if (1) + key->tnl_do_vertex_fog = 1; + + /* _NEW_POINT */ + if (brw->attribs.Point->_Attenuated) + key->point_attenuated = 1; + + /* _NEW_TEXTURE */ + if (brw->attribs.Texture->_TexGenEnabled || + brw->attribs.Texture->_TexMatEnabled || + brw->attribs.Texture->_EnabledUnits) + key->texture_enabled_global = 1; + + for (i = 0; i < MAX_TEXTURE_UNITS; i++) { + struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[i]; + + if (texUnit->_ReallyEnabled) + key->unit[i].texunit_really_enabled = 1; + + if (brw->attribs.Texture->_TexMatEnabled & ENABLE_TEXMAT(i)) + key->unit[i].texmat_enabled = 1; + + if (texUnit->TexGenEnabled) { + key->unit[i].texgen_enabled = 1; + + key->unit[i].texgen_mode0 = + translate_texgen( texUnit->TexGenEnabled & (1<<0), + texUnit->GenModeS ); + key->unit[i].texgen_mode1 = + translate_texgen( texUnit->TexGenEnabled & (1<<1), + texUnit->GenModeT ); + key->unit[i].texgen_mode2 = + translate_texgen( texUnit->TexGenEnabled & (1<<2), + texUnit->GenModeR ); + key->unit[i].texgen_mode3 = + translate_texgen( texUnit->TexGenEnabled & (1<<3), + texUnit->GenModeQ ); + } + } +} + + + +/* Very useful debugging tool - produces annotated listing of + * generated program with line/function references for each + * instruction back into this file: + */ +#define DISASSEM 0 + +/* Should be tunable by the driver - do we want to do matrix + * multiplications with DP4's or with MUL/MAD's? SSE works better + * with the latter, drivers may differ. + */ +#define PREFER_DP4 1 + + +/* Use uregs to represent registers internally, translate to Mesa's + * expected formats on emit. + * + * NOTE: These are passed by value extensively in this file rather + * than as usual by pointer reference. If this disturbs you, try + * remembering they are just 32bits in size. + * + * GCC is smart enough to deal with these dword-sized structures in + * much the same way as if I had defined them as dwords and was using + * macros to access and set the fields. This is much nicer and easier + * to evolve. + */ +struct ureg { + GLuint file:4; + GLint idx:8; /* relative addressing may be negative */ + GLuint negate:1; + GLuint swz:12; + GLuint pad:7; +}; + + +struct tnl_program { + const struct state_key *state; + struct gl_vertex_program *program; + + GLuint nr_instructions; + GLuint temp_in_use; + GLuint temp_reserved; + + struct ureg eye_position; + struct ureg eye_position_normalized; + struct ureg eye_normal; + struct ureg identity; + + GLuint materials; + GLuint color_materials; +}; + + +const static struct ureg undef = { + PROGRAM_UNDEFINED, + ~0, + 0, + 0, + 0 +}; + +/* Local shorthand: + */ +#define X SWIZZLE_X +#define Y SWIZZLE_Y +#define Z SWIZZLE_Z +#define W SWIZZLE_W + + +/* Construct a ureg: + */ +static struct ureg make_ureg(GLuint file, GLint idx) +{ + struct ureg reg; + reg.file = file; + reg.idx = idx; + reg.negate = 0; + reg.swz = SWIZZLE_NOOP; + reg.pad = 0; + return reg; +} + + + +static struct ureg ureg_negate( struct ureg reg ) +{ + reg.negate ^= 1; + return reg; +} + + +static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w ) +{ + reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x), + GET_SWZ(reg.swz, y), + GET_SWZ(reg.swz, z), + GET_SWZ(reg.swz, w)); + + return reg; +} + +static struct ureg swizzle1( struct ureg reg, int x ) +{ + return swizzle(reg, x, x, x, x); +} + +static struct ureg get_temp( struct tnl_program *p ) +{ + int bit = ffs( ~p->temp_in_use ); + if (!bit) { + fprintf(stderr, "%s: out of temporaries\n", __FILE__); + assert(0); + } + + if (bit > p->program->Base.NumTemporaries) + p->program->Base.NumTemporaries = bit; + + p->temp_in_use |= 1<<(bit-1); + return make_ureg(PROGRAM_TEMPORARY, bit-1); +} + +static struct ureg reserve_temp( struct tnl_program *p ) +{ + struct ureg temp = get_temp( p ); + p->temp_reserved |= 1<<temp.idx; + return temp; +} + +static void release_temp( struct tnl_program *p, struct ureg reg ) +{ + if (reg.file == PROGRAM_TEMPORARY) { + p->temp_in_use &= ~(1<<reg.idx); + p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */ + } +} + +static void release_temps( struct tnl_program *p ) +{ + p->temp_in_use = p->temp_reserved; +} + + + +static struct ureg register_input( struct tnl_program *p, GLuint input ) +{ + /* Cram the material flags into the generic range. We'll translate + * them back later. + */ + if (input >= BRW_ATTRIB_MAT_FRONT_AMBIENT) + input -= BRW_ATTRIB_MAT_FRONT_AMBIENT; + + assert(input < 32); + + p->program->Base.InputsRead |= (1<<input); + return make_ureg(PROGRAM_INPUT, input); +} + +static struct ureg register_output( struct tnl_program *p, GLuint output ) +{ + p->program->Base.OutputsWritten |= (1<<output); + return make_ureg(PROGRAM_OUTPUT, output); +} + +static struct ureg register_const4f( struct tnl_program *p, + GLfloat s0, + GLfloat s1, + GLfloat s2, + GLfloat s3) +{ + GLfloat values[4]; + GLint idx; + values[0] = s0; + values[1] = s1; + values[2] = s2; + values[3] = s3; + idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values ); + return make_ureg(PROGRAM_STATE_VAR, idx); +} + +#define register_const1f(p, s0) register_const4f(p, s0, 0, 0, 1) +#define register_scalar_const(p, s0) register_const4f(p, s0, s0, s0, s0) +#define register_const2f(p, s0, s1) register_const4f(p, s0, s1, 0, 1) +#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1) + +static GLboolean is_undef( struct ureg reg ) +{ + return reg.file == PROGRAM_UNDEFINED; +} + +static struct ureg get_identity_param( struct tnl_program *p ) +{ + if (is_undef(p->identity)) + p->identity = register_const4f(p, 0,0,0,1); + + return p->identity; +} + +static struct ureg register_param6( struct tnl_program *p, + GLint s0, + GLint s1, + GLint s2, + GLint s3, + GLint s4, + GLint s5) +{ + GLint tokens[6]; + GLint idx; + tokens[0] = s0; + tokens[1] = s1; + tokens[2] = s2; + tokens[3] = s3; + tokens[4] = s4; + tokens[5] = s5; + idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens ); + return make_ureg(PROGRAM_STATE_VAR, idx); +} + + +#define register_param1(p,s0) register_param6(p,s0,0,0,0,0,0) +#define register_param2(p,s0,s1) register_param6(p,s0,s1,0,0,0,0) +#define register_param3(p,s0,s1,s2) register_param6(p,s0,s1,s2,0,0,0) +#define register_param4(p,s0,s1,s2,s3) register_param6(p,s0,s1,s2,s3,0,0) + + +static void register_matrix_param6( struct tnl_program *p, + GLint s0, + GLint s1, + GLint s2, + GLint s3, + GLint s4, + GLint s5, + struct ureg *matrix ) +{ + GLint i; + + /* This is a bit sad as the support is there to pull the whole + * matrix out in one go: + */ + for (i = 0; i <= s4 - s3; i++) + matrix[i] = register_param6( p, s0, s1, s2, i, i, s5 ); +} + + +static void emit_arg( struct prog_src_register *src, + struct ureg reg ) +{ + src->File = reg.file; + src->Index = reg.idx; + src->Swizzle = reg.swz; + src->RelAddr = 0; + src->NegateBase = reg.negate; + src->Abs = 0; + src->NegateAbs = 0; +} + +static void emit_dst( struct prog_dst_register *dst, + struct ureg reg, GLuint mask ) +{ + dst->File = reg.file; + dst->Index = reg.idx; + /* allow zero as a shorthand for xyzw */ + dst->WriteMask = mask ? mask : WRITEMASK_XYZW; + dst->CondMask = 0; + dst->CondSwizzle = 0; + dst->CondSrc = 0; + dst->pad = 0; +} + +static void debug_insn( struct prog_instruction *inst, const char *fn, + GLuint line ) +{ + if (DISASSEM) { + static const char *last_fn; + + if (fn != last_fn) { + last_fn = fn; + _mesa_printf("%s:\n", fn); + } + + _mesa_printf("%d:\t", line); + _mesa_print_instruction(inst); + } +} + + +static void emit_op3fn(struct tnl_program *p, + GLuint op, + struct ureg dest, + GLuint mask, + struct ureg src0, + struct ureg src1, + struct ureg src2, + const char *fn, + GLuint line) +{ + GLuint nr = p->program->Base.NumInstructions++; + + if (nr >= p->nr_instructions) { + p->program->Base.Instructions = + _mesa_realloc(p->program->Base.Instructions, + sizeof(struct prog_instruction) * p->nr_instructions, + sizeof(struct prog_instruction) * (p->nr_instructions *= 2)); + } + + { + struct prog_instruction *inst = &p->program->Base.Instructions[nr]; + inst->Opcode = op; + inst->StringPos = 0; + inst->Data = 0; + + emit_arg( &inst->SrcReg[0], src0 ); + emit_arg( &inst->SrcReg[1], src1 ); + emit_arg( &inst->SrcReg[2], src2 ); + + emit_dst( &inst->DstReg, dest, mask ); + + debug_insn(inst, fn, line); + } +} + + + +#define emit_op3(p, op, dst, mask, src0, src1, src2) \ + emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__) + +#define emit_op2(p, op, dst, mask, src0, src1) \ + emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__) + +#define emit_op1(p, op, dst, mask, src0) \ + emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__) + + +static struct ureg make_temp( struct tnl_program *p, struct ureg reg ) +{ + if (reg.file == PROGRAM_TEMPORARY && + !(p->temp_reserved & (1<<reg.idx))) + return reg; + else { + struct ureg temp = get_temp(p); + emit_op1(p, OPCODE_MOV, temp, 0, reg); + return temp; + } +} + + +/* Currently no tracking performed of input/output/register size or + * active elements. Could be used to reduce these operations, as + * could the matrix type. + */ +static void emit_matrix_transform_vec4( struct tnl_program *p, + struct ureg dest, + const struct ureg *mat, + struct ureg src) +{ + emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]); + emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]); + emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]); + emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]); +} + +/* This version is much easier to implement if writemasks are not + * supported natively on the target or (like SSE), the target doesn't + * have a clean/obvious dotproduct implementation. + */ +static void emit_transpose_matrix_transform_vec4( struct tnl_program *p, + struct ureg dest, + const struct ureg *mat, + struct ureg src) +{ + struct ureg tmp; + + if (dest.file != PROGRAM_TEMPORARY) + tmp = get_temp(p); + else + tmp = dest; + + emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]); + emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp); + emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp); + emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp); + + if (dest.file != PROGRAM_TEMPORARY) + release_temp(p, tmp); +} + +static void emit_matrix_transform_vec3( struct tnl_program *p, + struct ureg dest, + const struct ureg *mat, + struct ureg src) +{ + emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]); + emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]); + emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]); +} + + +static void emit_normalize_vec3( struct tnl_program *p, + struct ureg dest, + struct ureg src ) +{ + emit_op2(p, OPCODE_DP3, dest, WRITEMASK_W, src, src); + emit_op1(p, OPCODE_RSQ, dest, WRITEMASK_W, swizzle1(dest,W)); + emit_op2(p, OPCODE_MUL, dest, WRITEMASK_XYZ, src, swizzle1(dest,W)); +} + +static void emit_passthrough( struct tnl_program *p, + GLuint input, + GLuint output ) +{ + struct ureg out = register_output(p, output); + emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input)); +} + +static struct ureg get_eye_position( struct tnl_program *p ) +{ + if (is_undef(p->eye_position)) { + struct ureg pos = register_input( p, BRW_ATTRIB_POS ); + struct ureg modelview[4]; + + p->eye_position = reserve_temp(p); + + if (PREFER_DP4) { + register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 0, 3, + STATE_MATRIX, modelview ); + + emit_matrix_transform_vec4(p, p->eye_position, modelview, pos); + } + else { + register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 0, 3, + STATE_MATRIX_TRANSPOSE, modelview ); + + emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos); + } + } + + return p->eye_position; +} + + +#if 0 +static struct ureg get_eye_z( struct tnl_program *p ) +{ + if (!is_undef(p->eye_position)) { + return swizzle1(p->eye_position, Z); + } + else if (!is_undef(p->eye_z)) { + struct ureg pos = register_input( p, BRW_ATTRIB_POS ); + struct ureg modelview2; + + p->eye_z = reserve_temp(p); + + register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 2, 1, + STATE_MATRIX, &modelview2 ); + + emit_matrix_transform_vec4(p, p->eye_position, modelview, pos); + emit_op2(p, OPCODE_DP4, p->eye_z, WRITEMASK_Z, pos, modelview2); + } + + return swizzle1(p->eye_z, Z) +} +#endif + + + +static struct ureg get_eye_position_normalized( struct tnl_program *p ) +{ + if (is_undef(p->eye_position_normalized)) { + struct ureg eye = get_eye_position(p); + p->eye_position_normalized = reserve_temp(p); + emit_normalize_vec3(p, p->eye_position_normalized, eye); + } + + return p->eye_position_normalized; +} + + +static struct ureg get_eye_normal( struct tnl_program *p ) +{ + if (is_undef(p->eye_normal)) { + struct ureg normal = register_input(p, BRW_ATTRIB_NORMAL ); + struct ureg mvinv[3]; + + register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 0, 2, + STATE_MATRIX_INVTRANS, mvinv ); + + p->eye_normal = reserve_temp(p); + + /* Transform to eye space: + */ + emit_matrix_transform_vec3( p, p->eye_normal, mvinv, normal ); + + /* Normalize/Rescale: + */ + if (p->state->normalize) { + emit_normalize_vec3( p, p->eye_normal, p->eye_normal ); + } + else if (p->state->rescale_normals) { + struct ureg rescale = register_param2(p, STATE_INTERNAL, + STATE_NORMAL_SCALE); + + emit_op2( p, OPCODE_MUL, p->eye_normal, 0, p->eye_normal, + swizzle1(rescale, X)); + } + } + + return p->eye_normal; +} + + + +static void build_hpos( struct tnl_program *p ) +{ + struct ureg pos = register_input( p, BRW_ATTRIB_POS ); + struct ureg hpos = register_output( p, VERT_RESULT_HPOS ); + struct ureg mvp[4]; + + if (PREFER_DP4) { + register_matrix_param6( p, STATE_MATRIX, STATE_MVP, 0, 0, 3, + STATE_MATRIX, mvp ); + emit_matrix_transform_vec4( p, hpos, mvp, pos ); + } + else { + register_matrix_param6( p, STATE_MATRIX, STATE_MVP, 0, 0, 3, + STATE_MATRIX_TRANSPOSE, mvp ); + emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos ); + } +} + + +static GLuint material_attrib( GLuint side, GLuint property ) +{ + return (property - STATE_AMBIENT) * 2 + side; +} + +/* Get a bitmask of which material values vary on a per-vertex basis. + */ +static void set_material_flags( struct tnl_program *p ) +{ + p->color_materials = 0; + p->materials = 0; + + if (p->state->light_color_material) { + p->materials = + p->color_materials = p->state->light_color_material_mask; + } + + p->materials |= p->state->light_material_mask; +} + + +static struct ureg get_material( struct tnl_program *p, GLuint side, + GLuint property ) +{ + GLuint attrib = material_attrib(side, property); + + if (p->color_materials & (1<<attrib)) + return register_input(p, BRW_ATTRIB_COLOR0); + else if (p->materials & (1<<attrib)) + return register_input( p, attrib + BRW_ATTRIB_MAT_FRONT_AMBIENT ); + else + return register_param3( p, STATE_MATERIAL, side, property ); +} + +#define SCENE_COLOR_BITS(side) ((MAT_BIT_FRONT_EMISSION | \ + MAT_BIT_FRONT_AMBIENT | \ + MAT_BIT_FRONT_DIFFUSE) << (side)) + +/* Either return a precalculated constant value or emit code to + * calculate these values dynamically in the case where material calls + * are present between begin/end pairs. + * + * Probably want to shift this to the program compilation phase - if + * we always emitted the calculation here, a smart compiler could + * detect that it was constant (given a certain set of inputs), and + * lift it out of the main loop. That way the programs created here + * would be independent of the vertex_buffer details. + */ +static struct ureg get_scenecolor( struct tnl_program *p, GLuint side ) +{ + if (p->materials & SCENE_COLOR_BITS(side)) { + struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT); + struct ureg material_emission = get_material(p, side, STATE_EMISSION); + struct ureg material_ambient = get_material(p, side, STATE_AMBIENT); + struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE); + struct ureg tmp = make_temp(p, material_diffuse); + emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient, + material_ambient, material_emission); + return tmp; + } + else + return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side ); +} + + +static struct ureg get_lightprod( struct tnl_program *p, GLuint light, + GLuint side, GLuint property ) +{ + GLuint attrib = material_attrib(side, property); + if (p->materials & (1<<attrib)) { + struct ureg light_value = + register_param3(p, STATE_LIGHT, light, property); + struct ureg material_value = get_material(p, side, property); + struct ureg tmp = get_temp(p); + emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value); + return tmp; + } + else + return register_param4(p, STATE_LIGHTPROD, light, side, property); +} + +static struct ureg calculate_light_attenuation( struct tnl_program *p, + GLuint i, + struct ureg VPpli, + struct ureg dist ) +{ + struct ureg attenuation = register_param3(p, STATE_LIGHT, i, + STATE_ATTENUATION); + struct ureg att = get_temp(p); + + /* Calculate spot attenuation: + */ + if (!p->state->unit[i].light_spotcutoff_is_180) { + struct ureg spot_dir = register_param3(p, STATE_LIGHT, i, + STATE_SPOT_DIRECTION); + struct ureg spot = get_temp(p); + struct ureg slt = get_temp(p); + + emit_normalize_vec3( p, spot, spot_dir ); /* XXX: precompute! */ + emit_op2(p, OPCODE_DP3, spot, 0, ureg_negate(VPpli), spot); + emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir,W), spot); + emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W)); + emit_op2(p, OPCODE_MUL, att, 0, slt, spot); + + release_temp(p, spot); + release_temp(p, slt); + } + + /* Calculate distance attenuation: + */ + if (p->state->unit[i].light_attenuated) { + + /* 1/d,d,d,1/d */ + emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist); + /* 1,d,d*d,1/d */ + emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y)); + /* 1/dist-atten */ + emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist); + + if (!p->state->unit[i].light_spotcutoff_is_180) { + /* dist-atten */ + emit_op1(p, OPCODE_RCP, dist, 0, dist); + /* spot-atten * dist-atten */ + emit_op2(p, OPCODE_MUL, att, 0, dist, att); + } else { + /* dist-atten */ + emit_op1(p, OPCODE_RCP, att, 0, dist); + } + } + + return att; +} + + + + + +/* Need to add some addtional parameters to allow lighting in object + * space - STATE_SPOT_DIRECTION and STATE_HALF implicitly assume eye + * space lighting. + */ +static void build_lighting( struct tnl_program *p ) +{ + const GLboolean twoside = p->state->light_twoside; + const GLboolean separate = p->state->separate_specular; + GLuint nr_lights = 0, count = 0; + struct ureg normal = get_eye_normal(p); + struct ureg lit = get_temp(p); + struct ureg dots = get_temp(p); + struct ureg _col0 = undef, _col1 = undef; + struct ureg _bfc0 = undef, _bfc1 = undef; + GLuint i; + + for (i = 0; i < MAX_LIGHTS; i++) + if (p->state->unit[i].light_enabled) + nr_lights++; + + set_material_flags(p); + + { + struct ureg shininess = get_material(p, 0, STATE_SHININESS); + emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X)); + release_temp(p, shininess); + + _col0 = make_temp(p, get_scenecolor(p, 0)); + if (separate) + _col1 = make_temp(p, get_identity_param(p)); + else + _col1 = _col0; + + } + + if (twoside) { + struct ureg shininess = get_material(p, 1, STATE_SHININESS); + emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z, + ureg_negate(swizzle1(shininess,X))); + release_temp(p, shininess); + + _bfc0 = make_temp(p, get_scenecolor(p, 1)); + if (separate) + _bfc1 = make_temp(p, get_identity_param(p)); + else + _bfc1 = _bfc0; + } + + + /* If no lights, still need to emit the scenecolor. + */ + /* KW: changed to do this always - v1.17 "Fix lighting alpha result"? + */ + if (p->state->fragprog_inputs_read & FRAG_BIT_COL0) + { + struct ureg res0 = register_output( p, VERT_RESULT_COL0 ); + emit_op1(p, OPCODE_MOV, res0, 0, _col0); + + if (twoside) { + struct ureg res0 = register_output( p, VERT_RESULT_BFC0 ); + emit_op1(p, OPCODE_MOV, res0, 0, _bfc0); + } + } + + if (separate && (p->state->fragprog_inputs_read & FRAG_BIT_COL1)) { + + struct ureg res1 = register_output( p, VERT_RESULT_COL1 ); + emit_op1(p, OPCODE_MOV, res1, 0, _col1); + + if (twoside) { + struct ureg res1 = register_output( p, VERT_RESULT_BFC1 ); + emit_op1(p, OPCODE_MOV, res1, 0, _bfc1); + } + } + + if (nr_lights == 0) { + release_temps(p); + return; + } + + + for (i = 0; i < MAX_LIGHTS; i++) { + if (p->state->unit[i].light_enabled) { + struct ureg half = undef; + struct ureg att = undef, VPpli = undef; + + count++; + + if (p->state->unit[i].light_eyepos3_is_zero) { + /* Can used precomputed constants in this case. + * Attenuation never applies to infinite lights. + */ + VPpli = register_param3(p, STATE_LIGHT, i, + STATE_POSITION_NORMALIZED); + half = register_param3(p, STATE_LIGHT, i, STATE_HALF); + } + else { + struct ureg Ppli = register_param3(p, STATE_LIGHT, i, + STATE_POSITION); + struct ureg V = get_eye_position(p); + struct ureg dist = get_temp(p); + + VPpli = get_temp(p); + half = get_temp(p); + + /* Calulate VPpli vector + */ + emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V); + + /* Normalize VPpli. The dist value also used in + * attenuation below. + */ + emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli); + emit_op1(p, OPCODE_RSQ, dist, 0, dist); + emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist); + + + /* Calculate attenuation: + */ + if (!p->state->unit[i].light_spotcutoff_is_180 || + p->state->unit[i].light_attenuated) { + att = calculate_light_attenuation(p, i, VPpli, dist); + } + + + /* Calculate viewer direction, or use infinite viewer: + */ + if (p->state->light_local_viewer) { + struct ureg eye_hat = get_eye_position_normalized(p); + emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); + } + else { + struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z); + emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir); + } + + emit_normalize_vec3(p, half, half); + + release_temp(p, dist); + } + + /* Calculate dot products: + */ + emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli); + emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half); + + + /* Front face lighting: + */ + { + struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT); + struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE); + struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR); + struct ureg res0, res1; + GLuint mask0, mask1; + + emit_op1(p, OPCODE_LIT, lit, 0, dots); + + if (!is_undef(att)) + emit_op2(p, OPCODE_MUL, lit, 0, lit, att); + + + mask0 = 0; + mask1 = 0; + res0 = _col0; + res1 = _col1; + + if (count == nr_lights) { + if (separate) { + mask0 = WRITEMASK_XYZ; + mask1 = WRITEMASK_XYZ; + + if (p->state->fragprog_inputs_read & FRAG_BIT_COL0) + res0 = register_output( p, VERT_RESULT_COL0 ); + + if (p->state->fragprog_inputs_read & FRAG_BIT_COL1) + res1 = register_output( p, VERT_RESULT_COL1 ); + } + else { + mask1 = WRITEMASK_XYZ; + + if (p->state->fragprog_inputs_read & FRAG_BIT_COL0) + res1 = register_output( p, VERT_RESULT_COL0 ); + } + } + + emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0); + emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0); + emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1); + + release_temp(p, ambient); + release_temp(p, diffuse); + release_temp(p, specular); + } + + /* Back face lighting: + */ + if (twoside) { + struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT); + struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE); + struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR); + struct ureg res0, res1; + GLuint mask0, mask1; + + emit_op1(p, OPCODE_LIT, lit, 0, ureg_negate(swizzle(dots,X,Y,W,Z))); + + if (!is_undef(att)) + emit_op2(p, OPCODE_MUL, lit, 0, lit, att); + + mask0 = 0; + mask1 = 0; + res0 = _bfc0; + res1 = _bfc1; + + if (count == nr_lights) { + if (separate) { + mask0 = WRITEMASK_XYZ; + mask1 = WRITEMASK_XYZ; + if (p->state->fragprog_inputs_read & FRAG_BIT_COL0) + res0 = register_output( p, VERT_RESULT_BFC0 ); + + if (p->state->fragprog_inputs_read & FRAG_BIT_COL1) + res1 = register_output( p, VERT_RESULT_BFC1 ); + } + else { + mask1 = WRITEMASK_XYZ; + + if (p->state->fragprog_inputs_read & FRAG_BIT_COL0) + res1 = register_output( p, VERT_RESULT_BFC0 ); + } + } + + emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0); + emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0); + emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1); + + release_temp(p, ambient); + release_temp(p, diffuse); + release_temp(p, specular); + } + + release_temp(p, half); + release_temp(p, VPpli); + release_temp(p, att); + } + } + + release_temps( p ); +} + + +static void build_fog( struct tnl_program *p ) +{ + struct ureg fog = register_output(p, VERT_RESULT_FOGC); + struct ureg input; + + if (p->state->fog_source_is_depth) { + input = swizzle1(get_eye_position(p), Z); + } + else { + input = swizzle1(register_input(p, BRW_ATTRIB_FOG), X); + } + + if (p->state->fog_option && + p->state->tnl_do_vertex_fog) { + struct ureg params = register_param1(p, STATE_FOG_PARAMS); + struct ureg tmp = get_temp(p); + struct ureg id = get_identity_param(p); + + emit_op1(p, OPCODE_MOV, fog, 0, id); + + switch (p->state->fog_option) { + case FOG_LINEAR: { + emit_op1(p, OPCODE_ABS, tmp, 0, input); + emit_op2(p, OPCODE_SUB, tmp, 0, swizzle1(params,Z), tmp); + emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,W)); + emit_op2(p, OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */ + emit_op2(p, OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W)); + break; + } + case FOG_EXP: + emit_op1(p, OPCODE_ABS, tmp, 0, input); + emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,X)); + emit_op2(p, OPCODE_POW, fog, WRITEMASK_X, + register_const1f(p, M_E), ureg_negate(tmp)); + break; + case FOG_EXP2: + emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,X)); + emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp); + emit_op2(p, OPCODE_POW, fog, WRITEMASK_X, + register_const1f(p, M_E), ureg_negate(tmp)); + break; + } + + release_temp(p, tmp); + } + else { + /* results = incoming fog coords (compute fog per-fragment later) + * + * KW: Is it really necessary to do anything in this case? + */ + emit_op1(p, OPCODE_MOV, fog, 0, input); + } +} + +static void build_reflect_texgen( struct tnl_program *p, + struct ureg dest, + GLuint writemask ) +{ + struct ureg normal = get_eye_normal(p); + struct ureg eye_hat = get_eye_position_normalized(p); + struct ureg tmp = get_temp(p); + + /* n.u */ + emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); + /* 2n.u */ + emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); + /* (-2n.u)n + u */ + emit_op3(p, OPCODE_MAD, dest, writemask, ureg_negate(tmp), normal, eye_hat); + + release_temp(p, tmp); +} + +static void build_sphere_texgen( struct tnl_program *p, + struct ureg dest, + GLuint writemask ) +{ + struct ureg normal = get_eye_normal(p); + struct ureg eye_hat = get_eye_position_normalized(p); + struct ureg tmp = get_temp(p); + struct ureg half = register_scalar_const(p, .5); + struct ureg r = get_temp(p); + struct ureg inv_m = get_temp(p); + struct ureg id = get_identity_param(p); + + /* Could share the above calculations, but it would be + * a fairly odd state for someone to set (both sphere and + * reflection active for different texture coordinate + * components. Of course - if two texture units enable + * reflect and/or sphere, things start to tilt in favour + * of seperating this out: + */ + + /* n.u */ + emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); + /* 2n.u */ + emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); + /* (-2n.u)n + u */ + emit_op3(p, OPCODE_MAD, r, 0, ureg_negate(tmp), normal, eye_hat); + /* r + 0,0,1 */ + emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z)); + /* rx^2 + ry^2 + (rz+1)^2 */ + emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp); + /* 2/m */ + emit_op1(p, OPCODE_RSQ, tmp, 0, tmp); + /* 1/m */ + emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half); + /* r/m + 1/2 */ + emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half); + + release_temp(p, tmp); + release_temp(p, r); + release_temp(p, inv_m); +} + + +static void build_texture_transform( struct tnl_program *p ) +{ + GLuint i, j; + + for (i = 0; i < MAX_TEXTURE_UNITS; i++) { + + if (!(p->state->fragprog_inputs_read & (FRAG_BIT_TEX0<<i))) + continue; + + if (p->state->unit[i].texgen_enabled || + p->state->unit[i].texmat_enabled) { + + GLuint texmat_enabled = p->state->unit[i].texmat_enabled; + struct ureg out = register_output(p, VERT_RESULT_TEX0 + i); + struct ureg out_texgen = undef; + + if (p->state->unit[i].texgen_enabled) { + GLuint copy_mask = 0; + GLuint sphere_mask = 0; + GLuint reflect_mask = 0; + GLuint normal_mask = 0; + GLuint modes[4]; + + if (texmat_enabled) + out_texgen = get_temp(p); + else + out_texgen = out; + + modes[0] = p->state->unit[i].texgen_mode0; + modes[1] = p->state->unit[i].texgen_mode1; + modes[2] = p->state->unit[i].texgen_mode2; + modes[3] = p->state->unit[i].texgen_mode3; + + for (j = 0; j < 4; j++) { + switch (modes[j]) { + case TXG_OBJ_LINEAR: { + struct ureg obj = register_input(p, BRW_ATTRIB_POS); + struct ureg plane = + register_param3(p, STATE_TEXGEN, i, + STATE_TEXGEN_OBJECT_S + j); + + emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, + obj, plane ); + break; + } + case TXG_EYE_LINEAR: { + struct ureg eye = get_eye_position(p); + struct ureg plane = + register_param3(p, STATE_TEXGEN, i, + STATE_TEXGEN_EYE_S + j); + + emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, + eye, plane ); + break; + } + case TXG_SPHERE_MAP: + sphere_mask |= WRITEMASK_X << j; + break; + case TXG_REFLECTION_MAP: + reflect_mask |= WRITEMASK_X << j; + break; + case TXG_NORMAL_MAP: + normal_mask |= WRITEMASK_X << j; + break; + case TXG_NONE: + copy_mask |= WRITEMASK_X << j; + } + + } + + + if (sphere_mask) { + build_sphere_texgen(p, out_texgen, sphere_mask); + } + + if (reflect_mask) { + build_reflect_texgen(p, out_texgen, reflect_mask); + } + + if (normal_mask) { + struct ureg normal = get_eye_normal(p); + emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal ); + } + + if (copy_mask) { + struct ureg in = register_input(p, BRW_ATTRIB_TEX0+i); + emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in ); + } + } + + if (texmat_enabled) { + struct ureg texmat[4]; + struct ureg in = (!is_undef(out_texgen) ? + out_texgen : + register_input(p, BRW_ATTRIB_TEX0+i)); + if (PREFER_DP4) { + register_matrix_param6( p, STATE_MATRIX, STATE_TEXTURE, i, + 0, 3, STATE_MATRIX, texmat ); + emit_matrix_transform_vec4( p, out, texmat, in ); + } + else { + register_matrix_param6( p, STATE_MATRIX, STATE_TEXTURE, i, + 0, 3, STATE_MATRIX_TRANSPOSE, texmat ); + emit_transpose_matrix_transform_vec4( p, out, texmat, in ); + } + } + + release_temps(p); + } + else { + emit_passthrough(p, BRW_ATTRIB_TEX0+i, VERT_RESULT_TEX0+i); + } + } +} + + +/* Seems like it could be tighter: + */ +static void build_pointsize( struct tnl_program *p ) +{ + struct ureg eye = get_eye_position(p); + struct ureg state_size = register_param1(p, STATE_POINT_SIZE); + struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION); + struct ureg out = register_output(p, VERT_RESULT_PSIZ); + struct ureg ut = get_temp(p); + + /* 1, Z, Z * Z, 1 */ + emit_op1(p, OPCODE_MOV, ut, WRITEMASK_XW, swizzle1(get_identity_param(p), W)); + emit_op1(p, OPCODE_ABS, ut, WRITEMASK_YZ, swizzle1(eye, Z)); + emit_op2(p, OPCODE_MUL, ut, WRITEMASK_Z, ut, ut); + + + /* p1 + p2 * dist + p3 * dist * dist, 0 */ + emit_op2(p, OPCODE_DP3, ut, WRITEMASK_X, ut, state_attenuation); + + /* 1 / sqrt(factor) */ + emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut ); + + /* ut = pointSize / factor */ + emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size); + + /* Clamp to min/max - state_size.[yz] + */ + emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y)); + emit_op2(p, OPCODE_MIN, out, 0, swizzle1(ut, X), swizzle1(state_size, Z)); + + release_temp(p, ut); +} + +static void build_tnl_program( struct tnl_program *p ) +{ + /* Emit the program, starting with modelviewproject: + */ + build_hpos(p); + + /* Lighting calculations: + */ + if (p->state->fragprog_inputs_read & (FRAG_BIT_COL0|FRAG_BIT_COL1)) { + if (p->state->light_global_enabled) + build_lighting(p); + else { + if (p->state->fragprog_inputs_read & FRAG_BIT_COL0) + emit_passthrough(p, BRW_ATTRIB_COLOR0, VERT_RESULT_COL0); + + if (p->state->fragprog_inputs_read & FRAG_BIT_COL1) + emit_passthrough(p, BRW_ATTRIB_COLOR1, VERT_RESULT_COL1); + } + } + + if ((p->state->fragprog_inputs_read & FRAG_BIT_FOGC) || + p->state->fog_option != FOG_NONE) + build_fog(p); + + if (p->state->fragprog_inputs_read & FRAG_BITS_TEX_ANY) + build_texture_transform(p); + + if (p->state->point_attenuated) + build_pointsize(p); + + /* Finish up: + */ + emit_op1(p, OPCODE_END, undef, 0, undef); + + /* Disassemble: + */ + if (DISASSEM) { + _mesa_printf ("\n"); + } +} + + +static void build_new_tnl_program( const struct state_key *key, + struct gl_vertex_program *program, + GLuint max_temps) +{ + struct tnl_program p; + + _mesa_memset(&p, 0, sizeof(p)); + p.state = key; + p.program = program; + p.eye_position = undef; + p.eye_position_normalized = undef; + p.eye_normal = undef; + p.identity = undef; + p.temp_in_use = 0; + p.nr_instructions = 16; + + if (max_temps >= sizeof(int) * 8) + p.temp_reserved = 0; + else + p.temp_reserved = ~((1<<max_temps)-1); + + p.program->Base.Instructions = + _mesa_malloc(sizeof(struct prog_instruction) * p.nr_instructions); + p.program->Base.String = 0; + p.program->Base.NumInstructions = + p.program->Base.NumTemporaries = + p.program->Base.NumParameters = + p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0; + p.program->Base.Parameters = _mesa_new_parameter_list(); + p.program->Base.InputsRead = 0; + p.program->Base.OutputsWritten = 0; + + build_tnl_program( &p ); +} + +static void *search_cache( struct brw_tnl_cache *cache, + GLuint hash, + const void *key, + GLuint keysize) +{ + struct brw_tnl_cache_item *c; + + for (c = cache->items[hash % cache->size]; c; c = c->next) { + if (c->hash == hash && memcmp(c->key, key, keysize) == 0) + return c->data; + } + + return NULL; +} + +static void rehash( struct brw_tnl_cache *cache ) +{ + struct brw_tnl_cache_item **items; + struct brw_tnl_cache_item *c, *next; + GLuint size, i; + + size = cache->size * 3; + items = (struct brw_tnl_cache_item**) _mesa_malloc(size * sizeof(*items)); + _mesa_memset(items, 0, size * sizeof(*items)); + + for (i = 0; i < cache->size; i++) + for (c = cache->items[i]; c; c = next) { + next = c->next; + c->next = items[c->hash % size]; + items[c->hash % size] = c; + } + + FREE(cache->items); + cache->items = items; + cache->size = size; +} + +static void cache_item( struct brw_tnl_cache *cache, + GLuint hash, + const struct state_key *key, + void *data ) +{ + struct brw_tnl_cache_item *c = MALLOC(sizeof(*c)); + c->hash = hash; + + c->key = malloc(sizeof(*key)); + memcpy(c->key, key, sizeof(*key)); + + c->data = data; + + if (++cache->n_items > cache->size * 1.5) + rehash(cache); + + c->next = cache->items[hash % cache->size]; + cache->items[hash % cache->size] = c; +} + + +static GLuint hash_key( struct state_key *key ) +{ + GLuint *ikey = (GLuint *)key; + GLuint hash = 0, i; + + /* I'm sure this can be improved on, but speed is important: + */ + for (i = 0; i < sizeof(*key)/sizeof(GLuint); i++) + hash += ikey[i]; + + return hash; +} + +static void update_tnl_program( struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + struct state_key key; + GLuint hash; + struct gl_vertex_program *old = brw->tnl_program; + + /* _NEW_PROGRAM */ + if (brw->attribs.VertexProgram->_Enabled) + return; + + /* Grab all the relevent state and put it in a single structure: + */ + make_state_key(ctx, &key); + hash = hash_key(&key); + + /* Look for an already-prepared program for this state: + */ + brw->tnl_program = (struct gl_vertex_program *) + search_cache( &brw->tnl_program_cache, hash, &key, sizeof(key) ); + + /* OK, we'll have to build a new one: + */ + if (!brw->tnl_program) { + brw->tnl_program = (struct gl_vertex_program *) + ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0); + + build_new_tnl_program( &key, brw->tnl_program, +/* ctx->Const.MaxVertexProgramTemps */ + 32 + ); + + if (ctx->Driver.ProgramStringNotify) + ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB, + &brw->tnl_program->Base ); + + cache_item( &brw->tnl_program_cache, + hash, &key, brw->tnl_program ); + } + + if (old != brw->tnl_program) + brw->state.dirty.brw |= BRW_NEW_TNL_PROGRAM; +} + +/* Note: See brw_draw.c - the vertex program must not rely on + * brw->primitive or brw->reduced_prim. + */ +const struct brw_tracked_state brw_tnl_vertprog = { + .dirty = { + .mesa = (_NEW_PROGRAM | + _NEW_LIGHT | + _NEW_TRANSFORM | + _NEW_FOG | + _NEW_HINT | + _NEW_POINT | + _NEW_TEXTURE), + .brw = (BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_INPUT_VARYING), + .cache = 0 + }, + .update = update_tnl_program +}; + + + + +static void update_active_vertprog( struct brw_context *brw ) +{ + struct gl_vertex_program *prev = brw->vertex_program; + + /* NEW_PROGRAM */ + if (brw->attribs.VertexProgram->_Enabled) { + brw->vertex_program = brw->attribs.VertexProgram->Current; + } + else { + /* BRW_NEW_TNL_PROGRAM */ + brw->vertex_program = brw->tnl_program; + } + + if (brw->vertex_program != prev) + brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; +} + + + +const struct brw_tracked_state brw_active_vertprog = { + .dirty = { + .mesa = _NEW_PROGRAM, + .brw = BRW_NEW_TNL_PROGRAM, + .cache = 0 + }, + .update = update_active_vertprog +}; + + +void brw_ProgramCacheInit( GLcontext *ctx ) +{ + struct brw_context *brw = brw_context(ctx); + + brw->tnl_program_cache.size = 17; + brw->tnl_program_cache.n_items = 0; + brw->tnl_program_cache.items = (struct brw_tnl_cache_item **) + _mesa_calloc(brw->tnl_program_cache.size * + sizeof(struct brw_tnl_cache_item)); +} + +void brw_ProgramCacheDestroy( GLcontext *ctx ) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_tnl_cache_item *c, *next; + GLuint i; + + for (i = 0; i < brw->tnl_program_cache.size; i++) + for (c = brw->tnl_program_cache.items[i]; c; c = next) { + next = c->next; + FREE(c->key); + FREE(c->data); + FREE(c); + } + + FREE(brw->tnl_program_cache.items); +} diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c new file mode 100644 index 00000000000..4896882034b --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -0,0 +1,192 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + + + +#include "glheader.h" +#include "mtypes.h" +#include "imports.h" +#include "macros.h" +#include "colormac.h" + +#include "intel_batchbuffer.h" +#include "intel_regions.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" + +#include "brw_draw.h" +#include "brw_exec.h" +#include "brw_save.h" +#include "brw_state.h" +#include "brw_aub.h" +#include "brw_fallback.h" +#include "brw_vs.h" + + + +/* called from intelDestroyContext() + */ +static void brw_destroy_context( struct intel_context *intel ) +{ + GLcontext *ctx = &intel->ctx; + struct brw_context *brw = brw_context(&intel->ctx); + + brw_aub_destroy(brw); + + brw_destroy_metaops(brw); + brw_destroy_state(brw); + brw_draw_destroy( brw ); + + brw_exec_destroy( ctx ); + brw_save_destroy( ctx ); + + brw_ProgramCacheDestroy( ctx ); +} + +/* called from intelDrawBuffer() + */ +static void brw_set_draw_region( struct intel_context *intel, + struct intel_region *draw_region, + struct intel_region *depth_region) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + intel_region_release(intel, &brw->state.draw_region); + intel_region_release(intel, &brw->state.depth_region); + intel_region_reference(&brw->state.draw_region, draw_region); + intel_region_reference(&brw->state.depth_region, depth_region); +} + + +/* called from intelFlushBatchLocked + */ +static void brw_lost_hardware( struct intel_context *intel ) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + /* Note that we effectively lose the context after this. + * + * Setting this flag provokes a state buffer wrap and also flushes + * the hardware caches. + */ + brw->state.dirty.brw |= BRW_NEW_CONTEXT; + + /* Which means there shouldn't be any commands already queued: + */ + assert(intel->batch->ptr == intel->batch->map + intel->batch->offset); + + brw->state.dirty.mesa |= ~0; + brw->state.dirty.brw |= ~0; + brw->state.dirty.cache |= ~0; +} + +static void brw_note_fence( struct intel_context *intel, + GLuint fence ) +{ + brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE; +} + +static void brw_note_unlock( struct intel_context *intel ) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + brw_pool_check_wrap(brw, &brw->pool[BRW_GS_POOL]); + brw_pool_check_wrap(brw, &brw->pool[BRW_SS_POOL]); + + brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_LOCK; +} + + +void brw_do_flush( struct brw_context *brw, + GLuint flags ) +{ + struct brw_mi_flush flush; + memset(&flush, 0, sizeof(flush)); + flush.opcode = CMD_MI_FLUSH; + flush.flags = flags; + BRW_BATCH_STRUCT(brw, &flush); +} + + +static void brw_emit_flush( struct intel_context *intel, + GLuint unused ) +{ + brw_do_flush(brw_context(&intel->ctx), + BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); +} + + +/* called from intelWaitForIdle() and intelFlush() + * + * For now, just flush everything. Could be smarter later. + */ +static GLuint brw_flush_cmd( void ) +{ + struct brw_mi_flush flush; + flush.opcode = CMD_MI_FLUSH; + flush.pad = 0; + flush.flags = BRW_FLUSH_READ_CACHE | BRW_FLUSH_STATE_CACHE; + return *(GLuint *)&flush; +} + + + + +static void brw_invalidate_state( struct intel_context *intel, GLuint new_state ) +{ + GLcontext *ctx = &intel->ctx; + + brw_exec_invalidate_state(ctx, new_state); + brw_save_invalidate_state(ctx, new_state); +} + + +void brwInitVtbl( struct brw_context *brw ) +{ + brw->intel.vtbl.check_vertex_size = 0; + brw->intel.vtbl.emit_state = 0; + brw->intel.vtbl.reduced_primitive_state = 0; + brw->intel.vtbl.render_start = 0; + brw->intel.vtbl.update_texture_state = 0; + + brw->intel.vtbl.invalidate_state = brw_invalidate_state; + brw->intel.vtbl.note_fence = brw_note_fence; + brw->intel.vtbl.note_unlock = brw_note_unlock; + brw->intel.vtbl.lost_hardware = brw_lost_hardware; + brw->intel.vtbl.destroy = brw_destroy_context; + brw->intel.vtbl.set_draw_region = brw_set_draw_region; + brw->intel.vtbl.flush_cmd = brw_flush_cmd; + brw->intel.vtbl.emit_flush = brw_emit_flush; +} + diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c new file mode 100644 index 00000000000..3e2f2d06b81 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -0,0 +1,349 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_context.h" +#include "brw_util.h" +#include "brw_wm.h" +#include "brw_state.h" +#include "brw_hal.h" + +#include "program.h" +#include "program_instruction.h" +#include "arbprogparse.h" + + +GLuint brw_wm_nr_args( GLuint opcode ) +{ + switch (opcode) { + + case WM_PIXELXY: + case OPCODE_ABS: + case OPCODE_FLR: + case OPCODE_FRC: + case OPCODE_SWZ: + case OPCODE_MOV: + case OPCODE_COS: + case OPCODE_EX2: + case OPCODE_LG2: + case OPCODE_RCP: + case OPCODE_RSQ: + case OPCODE_SIN: + case OPCODE_SCS: + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXP: + case OPCODE_KIL: + case OPCODE_LIT: + case WM_CINTERP: + case WM_WPOSXY: + return 1; + + case OPCODE_POW: + case OPCODE_SUB: + case OPCODE_SGE: + case OPCODE_SLT: + case OPCODE_ADD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MUL: + case OPCODE_XPD: + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_DPH: + case OPCODE_DST: + case WM_LINTERP: + case WM_DELTAXY: + case WM_PIXELW: + return 2; + + case WM_FB_WRITE: + case WM_PINTERP: + case OPCODE_MAD: + case OPCODE_CMP: + case OPCODE_LRP: + return 3; + + default: + return 0; + } +} + + +GLuint brw_wm_is_scalar_result( GLuint opcode ) +{ + switch (opcode) { + case OPCODE_COS: + case OPCODE_EX2: + case OPCODE_LG2: + case OPCODE_POW: + case OPCODE_RCP: + case OPCODE_RSQ: + case OPCODE_SIN: + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_DPH: + case OPCODE_DST: + return 1; + + default: + return 0; + } +} + + +static void brw_wm_pass_hal (struct brw_wm_compile *c) +{ + static void (*hal_wm_pass) (struct brw_wm_compile *c); + static GLboolean hal_tried; + + if (!hal_tried) + { + hal_wm_pass = brw_hal_find_symbol ("intel_hal_wm_pass"); + hal_tried = 1; + } + if (hal_wm_pass) + (*hal_wm_pass) (c); +} + +static void do_wm_prog( struct brw_context *brw, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key) +{ + struct brw_wm_compile c; + const GLuint *program; + GLuint program_size; + + memset(&c, 0, sizeof(c)); + memcpy(&c.key, key, sizeof(*key)); + + c.fp = fp; + c.env_param = brw->intel.ctx.FragmentProgram.Parameters; + + + /* Augment fragment program. Add instructions for pre- and + * post-fragment-program tasks such as interpolation and fogging. + */ + brw_wm_pass_fp(&c); + + /* Translate to intermediate representation. Build register usage + * chains. + */ + brw_wm_pass0(&c); + + /* Dead code removal. + */ + brw_wm_pass1(&c); + + /* Hal optimization + */ + brw_wm_pass_hal (&c); + + /* Register allocation. + */ + c.grf_limit = BRW_WM_MAX_GRF/2; + + /* This is where we start emitting gen4 code: + */ + brw_init_compile(&c.func); + + brw_wm_pass2(&c); + + c.prog_data.total_grf = c.max_wm_grf; + c.prog_data.total_scratch = c.last_scratch ? c.last_scratch + 0x40 : 0; + + /* Emit GEN4 code. + */ + brw_wm_emit(&c); + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + /* + */ + brw->wm.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_WM_PROG], + &c.key, + sizeof(c.key), + program, + program_size, + &c.prog_data, + &brw->wm.prog_data ); +} + + + +static void brw_wm_populate_key( struct brw_context *brw, + struct brw_wm_prog_key *key ) +{ + /* BRW_NEW_FRAGMENT_PROGRAM */ + struct brw_fragment_program *fp = + (struct brw_fragment_program *)brw->fragment_program; + GLuint lookup = 0; + GLuint line_aa; + GLuint i; + + memset(key, 0, sizeof(*key)); + + /* Build the index for table lookup + */ + /* _NEW_COLOR */ + if (fp->program.UsesKill || + brw->attribs.Color->AlphaEnabled) + lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPR)) + lookup |= IZ_PS_COMPUTES_DEPTH_BIT; + + /* _NEW_DEPTH */ + if (brw->attribs.Depth->Test) + lookup |= IZ_DEPTH_TEST_ENABLE_BIT; + + if (brw->attribs.Depth->Test && + brw->attribs.Depth->Mask) /* ?? */ + lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + + /* _NEW_STENCIL */ + if (brw->attribs.Stencil->Enabled) { + lookup |= IZ_STENCIL_TEST_ENABLE_BIT; + + if (brw->attribs.Stencil->WriteMask[0] || + (brw->attribs.Stencil->TestTwoSide && brw->attribs.Stencil->WriteMask[1])) + lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; + } + + /* XXX: when should this be disabled? + */ + if (1) + lookup |= IZ_EARLY_DEPTH_TEST_BIT; + + + line_aa = AA_NEVER; + + /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ + if (brw->attribs.Line->SmoothFlag) { + if (brw->intel.reduced_primitive == GL_LINES) { + line_aa = AA_ALWAYS; + } + else if (brw->intel.reduced_primitive == GL_TRIANGLES) { + if (brw->attribs.Polygon->FrontMode == GL_LINE) { + line_aa = AA_SOMETIMES; + + if (brw->attribs.Polygon->BackMode == GL_LINE || + (brw->attribs.Polygon->CullFlag && + brw->attribs.Polygon->CullFaceMode == GL_BACK)) + line_aa = AA_ALWAYS; + } + else if (brw->attribs.Polygon->BackMode == GL_LINE) { + line_aa = AA_SOMETIMES; + + if ((brw->attribs.Polygon->CullFlag && + brw->attribs.Polygon->CullFaceMode == GL_FRONT)) + line_aa = AA_ALWAYS; + } + } + } + + brw_wm_lookup_iz(line_aa, + lookup, + key); + + + /* BRW_NEW_WM_INPUT_DIMENSIONS */ + key->projtex_mask = brw->wm.input_size_masks[4-1]; + + /* _NEW_LIGHT */ + key->flat_shade = (brw->attribs.Light->ShadeModel == GL_FLAT); + + /* _NEW_TEXTURE */ + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + const struct gl_texture_unit *unit = &brw->attribs.Texture->Unit[i]; + const struct gl_texture_object *t = unit->_Current; + + if (unit->_ReallyEnabled) { + + if (t->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB && + t->Image[0][t->BaseLevel]->_BaseFormat == GL_DEPTH_COMPONENT) { + key->shadowtex_mask |= 1<<i; + } + + if (t->Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA) + key->yuvtex_mask |= 1<<i; + } + } + + + /* Extra info: + */ + key->program_string_id = fp->id; + +} + + +static void brw_upload_wm_prog( struct brw_context *brw ) +{ + struct brw_wm_prog_key key; + struct brw_fragment_program *fp = (struct brw_fragment_program *) + brw->fragment_program; + + brw_wm_populate_key(brw, &key); + + /* Make an early check for the key. + */ + if (brw_search_cache(&brw->cache[BRW_WM_PROG], + &key, sizeof(key), + &brw->wm.prog_data, + &brw->wm.prog_gs_offset)) + return; + + do_wm_prog(brw, fp, &key); +} + + +/* See brw_wm.c: + */ +const struct brw_tracked_state brw_wm_prog = { + .dirty = { + .mesa = (_NEW_COLOR | + _NEW_DEPTH | + _NEW_STENCIL | + _NEW_POLYGON | + _NEW_LINE | + _NEW_LIGHT | + _NEW_TEXTURE), + .brw = (BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_WM_INPUT_DIMENSIONS | + BRW_NEW_REDUCED_PRIMITIVE), + .cache = 0 + }, + .update = brw_upload_wm_prog +}; + diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h new file mode 100644 index 00000000000..74c3bbe2047 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -0,0 +1,261 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#ifndef BRW_WM_H +#define BRW_WM_H + + +#include "brw_context.h" +#include "brw_eu.h" +#include "program_instruction.h" + +/* A big lookup table is used to figure out which and how many + * additional regs will inserted before the main payload in the WM + * program execution. These mainly relate to depth and stencil + * processing and the early-depth-test optimization. + */ +#define IZ_PS_KILL_ALPHATEST_BIT 0x1 +#define IZ_PS_COMPUTES_DEPTH_BIT 0x2 +#define IZ_DEPTH_WRITE_ENABLE_BIT 0x4 +#define IZ_DEPTH_TEST_ENABLE_BIT 0x8 +#define IZ_STENCIL_WRITE_ENABLE_BIT 0x10 +#define IZ_STENCIL_TEST_ENABLE_BIT 0x20 +#define IZ_EARLY_DEPTH_TEST_BIT 0x40 +#define IZ_BIT_MAX 0x80 + +#define AA_NEVER 0 +#define AA_SOMETIMES 1 +#define AA_ALWAYS 2 + +struct brw_wm_prog_key { + GLuint source_depth_reg:3; + GLuint aa_dest_stencil_reg:3; + GLuint dest_depth_reg:3; + GLuint nr_depth_regs:3; + GLuint projtex_mask:8; + GLuint shadowtex_mask:8; + GLuint computes_depth:1; /* could be derived from program string */ + GLuint source_depth_to_render_target:1; + GLuint flat_shade:1; + GLuint runtime_check_aads_emit:1; + + GLuint yuvtex_mask:8; + GLuint pad1:24; + + GLuint program_string_id:32; +}; + + +/* A bit of a glossary: + * + * brw_wm_value: A computed value or program input. Values are + * constant, they are created once and are never modified. When a + * fragment program register is written or overwritten, new values are + * created fresh, preserving the rule that values are constant. + * + * brw_wm_ref: A reference to a value. Wherever a value used is by an + * instruction or as a program output, that is tracked with an + * instance of this struct. All references to a value occur after it + * is created. After the last reference, a value is dead and can be + * discarded. + * + * brw_wm_grf: Represents a physical hardware register. May be either + * empty or hold a value. Register allocation is the process of + * assigning values to grf registers. This occurs in pass2 and the + * brw_wm_grf struct is not used before that. + * + * Fragment program registers: These are time-varying constructs that + * are hard to reason about and which we translate away in pass0. A + * single fragment program register element (eg. temp[0].x) will be + * translated to one or more brw_wm_value structs, one for each time + * that temp[0].x is written to during the program. + */ + + + +/* Used in pass2 to track register allocation. + */ +struct brw_wm_grf { + struct brw_wm_value *value; + GLuint nextuse; +}; + +struct brw_wm_value { + struct brw_reg hw_reg; /* emitted to this reg, may not always be there */ + struct brw_wm_ref *lastuse; + struct brw_wm_grf *resident; + GLuint contributes_to_output:1; + GLuint spill_slot:16; /* if non-zero, spill immediately after calculation */ +}; + +struct brw_wm_ref { + struct brw_reg hw_reg; /* nr filled in in pass2, everything else, pass0 */ + struct brw_wm_value *value; + struct brw_wm_ref *prevuse; + GLuint unspill_reg:7; /* unspill to reg */ + GLuint emitted:1; + GLuint insn:24; +}; + +struct brw_wm_constref { + const struct brw_wm_ref *ref; + GLfloat constval; +}; + + +struct brw_wm_instruction { + struct brw_wm_value *dst[4]; + struct brw_wm_ref *src[3][4]; + GLuint opcode:8; + GLuint saturate:1; + GLuint writemask:4; + GLuint tex_unit:4; /* texture unit for TEX, TXD, TXP instructions */ + GLuint tex_idx:3; /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */ +}; + + +#define PROGRAM_INTERNAL_PARAM + +#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3) +#define BRW_WM_MAX_GRF 128 /* hardware limit */ +#define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) +#define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12) +#define BRW_WM_MAX_PARAM 256 +#define BRW_WM_MAX_CONST 256 +#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS + + + +/* New opcodes to track internal operations required for WM unit. + * These are added early so that the registers used can be tracked, + * freed and reused like those of other instructions. + */ +#define WM_PIXELXY (MAX_OPCODE) +#define WM_DELTAXY (MAX_OPCODE + 1) +#define WM_PIXELW (MAX_OPCODE + 2) +#define WM_LINTERP (MAX_OPCODE + 3) +#define WM_PINTERP (MAX_OPCODE + 4) +#define WM_CINTERP (MAX_OPCODE + 5) +#define WM_WPOSXY (MAX_OPCODE + 6) +#define WM_FB_WRITE (MAX_OPCODE + 7) + +#define PROGRAM_PAYLOAD (PROGRAM_FILE_MAX) +#define PAYLOAD_DEPTH (FRAG_ATTRIB_MAX) + +struct brw_wm_compile { + struct brw_compile func; + struct brw_wm_prog_key key; + struct brw_wm_prog_data prog_data; + + struct brw_fragment_program *fp; + + GLfloat (*env_param)[4]; + + enum { + START, + PASS2_DONE + } state; + + /* Initial pass - translate fp instructions to fp instructions, + * simplifying and adding instructions for interpolation and + * framebuffer writes. + */ + struct prog_instruction prog_instructions[BRW_WM_MAX_INSN]; + GLuint nr_fp_insns; + GLuint fp_temp; + GLuint fp_interp_emitted; + + struct prog_src_register pixel_xy; + struct prog_src_register delta_xy; + struct prog_src_register pixel_w; + + + struct brw_wm_value vreg[BRW_WM_MAX_VREG]; + GLuint nr_vreg; + + struct brw_wm_value creg[BRW_WM_MAX_PARAM]; + GLuint nr_creg; + + struct { + struct brw_wm_value depth[4]; /* includes r0/r1 */ + struct brw_wm_value input_interp[FRAG_ATTRIB_MAX]; + } payload; + + + const struct brw_wm_ref *pass0_fp_reg[PROGRAM_PAYLOAD+1][256][4]; + + struct brw_wm_ref undef_ref; + struct brw_wm_value undef_value; + + struct brw_wm_ref refs[BRW_WM_MAX_REF]; + GLuint nr_refs; + + struct brw_wm_instruction instruction[BRW_WM_MAX_INSN]; + GLuint nr_insns; + + struct brw_wm_constref constref[BRW_WM_MAX_CONST]; + GLuint nr_constrefs; + + struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2]; + + GLuint grf_limit; + GLuint max_wm_grf; + GLuint last_scratch; +}; + + +GLuint brw_wm_nr_args( GLuint opcode ); +GLuint brw_wm_is_scalar_result( GLuint opcode ); + +void brw_wm_pass_fp( struct brw_wm_compile *c ); +void brw_wm_pass0( struct brw_wm_compile *c ); +void brw_wm_pass1( struct brw_wm_compile *c ); +void brw_wm_pass2( struct brw_wm_compile *c ); +void brw_wm_emit( struct brw_wm_compile *c ); + +void brw_wm_print_value( struct brw_wm_compile *c, + struct brw_wm_value *value ); + +void brw_wm_print_ref( struct brw_wm_compile *c, + struct brw_wm_ref *ref ); + +void brw_wm_print_insn( struct brw_wm_compile *c, + struct brw_wm_instruction *inst ); + +void brw_wm_print_program( struct brw_wm_compile *c, + const char *stage ); + +void brw_wm_lookup_iz( GLuint line_aa, + GLuint lookup, + struct brw_wm_prog_key *key ); + +#endif diff --git a/src/mesa/drivers/dri/i965/brw_wm_debug.c b/src/mesa/drivers/dri/i965/brw_wm_debug.c new file mode 100644 index 00000000000..6b0096eca58 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_wm_debug.c @@ -0,0 +1,234 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_context.h" +#include "brw_wm.h" +#include "program.h" +#include "shader/arbprogparse.h" +#include "shader/program_instruction.h" + +static const char *fp_opcode_string[] = { + "ABS", /* ARB_f_p only */ + "ADD", + "CMP", /* ARB_f_p only */ + "COS", + "DDX", /* NV_f_p only */ + "DDY", /* NV_f_p only */ + "DP3", + "DP4", + "DPH", /* ARB_f_p only */ + "DST", + "END", /* private opcode */ + "EX2", + "FLR", + "FRC", + "KIL", /* ARB_f_p only */ + "KIL_NV", /* NV_f_p only */ + "LG2", + "LIT", + "LRP", + "MAD", + "MAX", + "MIN", + "MOV", + "MUL", + "PK2H", /* NV_f_p only */ + "PK2US", /* NV_f_p only */ + "PK4B", /* NV_f_p only */ + "PK4UB", /* NV_f_p only */ + "POW", + "PRINT", /* Mesa only */ + "RCP", + "RFL", /* NV_f_p only */ + "RSQ", + "SCS", /* ARB_f_p only */ + "SEQ", /* NV_f_p only */ + "SFL", /* NV_f_p only */ + "SGE", /* NV_f_p only */ + "SGT", /* NV_f_p only */ + "SIN", + "SLE", /* NV_f_p only */ + "SLT", + "SNE", /* NV_f_p only */ + "STR", /* NV_f_p only */ + "SUB", + "SWZ", /* ARB_f_p only */ + "TEX", + "TXB", /* ARB_f_p only */ + "TXD", /* NV_f_p only */ + "TXP", /* ARB_f_p only */ + "TXP_NV", /* NV_f_p only */ + "UP2H", /* NV_f_p only */ + "UP2US", /* NV_f_p only */ + "UP4B", /* NV_f_p only */ + "UP4UB", /* NV_f_p only */ + "X2D", /* NV_f_p only - 2d mat mul */ + "XPD", /* ARB_f_p only - cross product */ +}; + + + +void brw_wm_print_value( struct brw_wm_compile *c, + struct brw_wm_value *value ) +{ + assert(value); + if (c->state >= PASS2_DONE) + brw_print_reg(value->hw_reg); + else if( value == &c->undef_value ) + _mesa_printf("undef"); + else if( value - c->vreg >= 0 && + value - c->vreg < BRW_WM_MAX_VREG) + _mesa_printf("r%d", value - c->vreg); + else if (value - c->creg >= 0 && + value - c->creg < BRW_WM_MAX_PARAM) + _mesa_printf("c%d", value - c->creg); + else if (value - c->payload.input_interp >= 0 && + value - c->payload.input_interp < FRAG_ATTRIB_MAX) + _mesa_printf("i%d", value - c->payload.input_interp); + else if (value - c->payload.depth >= 0 && + value - c->payload.depth < FRAG_ATTRIB_MAX) + _mesa_printf("d%d", value - c->payload.depth); + else + _mesa_printf("?"); +} + +void brw_wm_print_ref( struct brw_wm_compile *c, + struct brw_wm_ref *ref ) +{ + struct brw_reg hw_reg = ref->hw_reg; + + if (ref->unspill_reg) + _mesa_printf("UNSPILL(%x)/", ref->value->spill_slot); + + if (c->state >= PASS2_DONE) + brw_print_reg(ref->hw_reg); + else { + _mesa_printf("%s", hw_reg.negate ? "-" : ""); + _mesa_printf("%s", hw_reg.abs ? "abs/" : ""); + brw_wm_print_value(c, ref->value); + if ((hw_reg.nr&1) || hw_reg.subnr) { + _mesa_printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr); + } + } +} + +void brw_wm_print_insn( struct brw_wm_compile *c, + struct brw_wm_instruction *inst ) +{ + GLuint i, arg; + GLuint nr_args = brw_wm_nr_args(inst->opcode); + + _mesa_printf("["); + for (i = 0; i < 4; i++) { + if (inst->dst[i]) { + brw_wm_print_value(c, inst->dst[i]); + if (inst->dst[i]->spill_slot) + _mesa_printf("/SPILL(%x)",inst->dst[i]->spill_slot); + } + else + _mesa_printf("#"); + if (i < 3) + _mesa_printf(","); + } + _mesa_printf("]"); + + if (inst->writemask != WRITEMASK_XYZW) + _mesa_printf(".%s%s%s%s", + GET_BIT(inst->writemask, 0) ? "x" : "", + GET_BIT(inst->writemask, 1) ? "y" : "", + GET_BIT(inst->writemask, 2) ? "z" : "", + GET_BIT(inst->writemask, 3) ? "w" : ""); + + switch (inst->opcode) { + case WM_PIXELXY: + _mesa_printf(" = PIXELXY"); + break; + case WM_DELTAXY: + _mesa_printf(" = DELTAXY"); + break; + case WM_PIXELW: + _mesa_printf(" = PIXELW"); + break; + case WM_WPOSXY: + _mesa_printf(" = WPOSXY"); + break; + case WM_PINTERP: + _mesa_printf(" = PINTERP"); + break; + case WM_LINTERP: + _mesa_printf(" = LINTERP"); + break; + case WM_CINTERP: + _mesa_printf(" = CINTERP"); + break; + case WM_FB_WRITE: + _mesa_printf(" = FB_WRITE"); + break; + default: + _mesa_printf(" = %s", fp_opcode_string[inst->opcode]); + break; + } + + if (inst->saturate) + _mesa_printf("_SAT"); + + for (arg = 0; arg < nr_args; arg++) { + + _mesa_printf(" ["); + + for (i = 0; i < 4; i++) { + if (inst->src[arg][i]) { + brw_wm_print_ref(c, inst->src[arg][i]); + } + else + _mesa_printf("%%"); + + if (i < 3) + _mesa_printf(","); + else + _mesa_printf("]"); + } + } + _mesa_printf("\n"); +} + +void brw_wm_print_program( struct brw_wm_compile *c, + const char *stage ) +{ + GLuint insn; + + _mesa_printf("\n\n\n%s:\n", stage); + for (insn = 0; insn < c->nr_insns; insn++) + brw_wm_print_insn(c, &c->instruction[insn]); + _mesa_printf("\n\n\n"); +} + diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c new file mode 100644 index 00000000000..d96a9b717c0 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -0,0 +1,1250 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_context.h" +#include "program.h" +#include "program_instruction.h" +#include "macros.h" +#include "brw_wm.h" + +#define SATURATE (1<<5) + +/* Not quite sure how correct this is - need to understand horiz + * vs. vertical strides a little better. + */ +static __inline struct brw_reg sechalf( struct brw_reg reg ) +{ + if (reg.vstride) + reg.nr++; + return reg; +} + +/* Payload R0: + * + * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles, + * corresponding to each of the 16 execution channels. + * R0.1..8 -- ? + * R1.0 -- triangle vertex 0.X + * R1.1 -- triangle vertex 0.Y + * R1.2 -- tile 0 x,y coords (2 packed uwords) + * R1.3 -- tile 1 x,y coords (2 packed uwords) + * R1.4 -- tile 2 x,y coords (2 packed uwords) + * R1.5 -- tile 3 x,y coords (2 packed uwords) + * R1.6 -- ? + * R1.7 -- ? + * R1.8 -- ? + */ + + +static void emit_pixel_xy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) +{ + struct brw_reg r1 = brw_vec1_grf(1, 0); + struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + /* Calculate pixel centers by adding 1 or 0 to each of the + * micro-tile coordinates passed in r1. + */ + if (mask & WRITEMASK_X) { + brw_ADD(p, + vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)), + stride(suboffset(r1_uw, 4), 2, 4, 0), + brw_imm_v(0x10101010)); + } + + if (mask & WRITEMASK_Y) { + brw_ADD(p, + vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)), + stride(suboffset(r1_uw,5), 2, 4, 0), + brw_imm_v(0x11001100)); + } + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); +} + + + +static void emit_delta_xy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) +{ + struct brw_reg r1 = brw_vec1_grf(1, 0); + + /* Calc delta X,Y by subtracting origin in r1 from the pixel + * centers. + */ + if (mask & WRITEMASK_X) { + brw_ADD(p, + dst[0], + retype(arg0[0], BRW_REGISTER_TYPE_UW), + negate(r1)); + } + + if (mask & WRITEMASK_Y) { + brw_ADD(p, + dst[1], + retype(arg0[1], BRW_REGISTER_TYPE_UW), + negate(suboffset(r1,1))); + + } +} + +static void emit_wpos_xy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) +{ + /* Calc delta X,Y by subtracting origin in r1 from the pixel + * centers. + */ + if (mask & WRITEMASK_X) { + brw_MOV(p, + dst[0], + retype(arg0[0], BRW_REGISTER_TYPE_UW)); + } + + if (mask & WRITEMASK_Y) { + /* TODO -- window_height - Y */ + brw_MOV(p, + dst[1], + negate(retype(arg0[1], BRW_REGISTER_TYPE_UW))); + + } +} + + +static void emit_pixel_w( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas) +{ + /* Don't need this if all you are doing is interpolating color, for + * instance. + */ + if (mask & WRITEMASK_W) { + struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4); + + /* Calc 1/w - just linterp wpos[3] optimized by putting the + * result straight into a message reg. + */ + brw_LINE(p, brw_null_reg(), interp3, deltas[0]); + brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]); + + /* Calc w */ + brw_math_16( p, dst[3], + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, brw_null_reg(), + BRW_MATH_PRECISION_FULL); + } +} + + + +static void emit_linterp( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas ) +{ + struct brw_reg interp[4]; + GLuint nr = arg0[0].nr; + GLuint i; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); + brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + } + } +} + + +static void emit_pinterp( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas, + const struct brw_reg *w) +{ + struct brw_reg interp[4]; + GLuint nr = arg0[0].nr; + GLuint i; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); + brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + brw_MUL(p, dst[i], dst[i], w[3]); + } + } +} + +static void emit_cinterp( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0 ) +{ + struct brw_reg interp[4]; + GLuint nr = arg0[0].nr; + GLuint i; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */ + } + } +} + + + + + +static void emit_alu1( struct brw_compile *p, + struct brw_instruction *(*func)(struct brw_compile *, + struct brw_reg, + struct brw_reg), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0 ) +{ + GLuint i; + + if (mask & SATURATE) + brw_set_saturate(p, 1); + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + func(p, dst[i], arg0[i]); + } + } + + if (mask & SATURATE) + brw_set_saturate(p, 0); +} + +static void emit_alu2( struct brw_compile *p, + struct brw_instruction *(*func)(struct brw_compile *, + struct brw_reg, + struct brw_reg, + struct brw_reg), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + GLuint i; + + if (mask & SATURATE) + brw_set_saturate(p, 1); + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + func(p, dst[i], arg0[i], arg1[i]); + } + } + + if (mask & SATURATE) + brw_set_saturate(p, 0); +} + + +static void emit_mad( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2 ) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_MUL(p, dst[i], arg0[i], arg1[i]); + + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_ADD(p, dst[i], dst[i], arg2[i]); + brw_set_saturate(p, 0); + } + } +} + + +static void emit_lrp( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2 ) +{ + GLuint i; + + /* Uses dst as a temporary: + */ + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + /* Can I use the LINE instruction for this? + */ + brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0)); + brw_MUL(p, brw_null_reg(), dst[i], arg2[i]); + + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MAC(p, dst[i], arg0[i], arg1[i]); + brw_set_saturate(p, 0); + } + } +} + + +static void emit_slt( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_MOV(p, dst[i], brw_imm_f(0)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]); + brw_MOV(p, dst[i], brw_imm_f(1.0)); + brw_set_predicate_control_flag_value(p, 0xff); + } + } +} + +/* Isn't this just the same as the above with the args swapped? + */ +static void emit_sge( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_MOV(p, dst[i], brw_imm_f(0)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], arg1[i]); + brw_MOV(p, dst[i], brw_imm_f(1.0)); + brw_set_predicate_control_flag_value(p, 0xff); + } + } +} + + + +static void emit_cmp( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2 ) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MOV(p, dst[i], arg2[i]); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0)); + + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MOV(p, dst[i], arg1[i]); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } +} + +static void emit_max( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MOV(p, dst[i], arg0[i]); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]); + + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MOV(p, dst[i], arg1[i]); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } +} + +static void emit_min( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MOV(p, dst[i], arg1[i]); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]); + + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MOV(p, dst[i], arg0[i]); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } +} + + +static void emit_dp3( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); + + brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); + brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); + + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MAC(p, dst[0], arg0[2], arg1[2]); + brw_set_saturate(p, 0); +} + + +static void emit_dp4( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); + + brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); + brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); + brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]); + + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MAC(p, dst[0], arg0[3], arg1[3]); + brw_set_saturate(p, 0); +} + + +static void emit_dph( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); + + brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); + brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); + brw_MAC(p, dst[0], arg0[2], arg1[2]); + + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_ADD(p, dst[0], dst[0], arg1[3]); + brw_set_saturate(p, 0); +} + + +static void emit_xpd( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + GLuint i; + + assert(!(mask & WRITEMASK_W) == WRITEMASK_X); + + for (i = 0 ; i < 3; i++) { + if (mask & (1<<i)) { + GLuint i2 = (i+2)%3; + GLuint i1 = (i+1)%3; + + brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]); + + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MAC(p, dst[i], arg0[i1], arg1[i2]); + brw_set_saturate(p, 0); + } + } +} + + +static void emit_math1( struct brw_compile *p, + GLuint function, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0 ) +{ + assert((mask & WRITEMASK_XYZW) == WRITEMASK_X || + function == BRW_MATH_FUNCTION_SINCOS); + + brw_MOV(p, brw_message_reg(2), arg0[0]); + + /* Send two messages to perform all 16 operations: + */ + brw_math_16(p, + dst[0], + function, + (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_PRECISION_FULL); +} + + +static void emit_math2( struct brw_compile *p, + GLuint function, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) +{ + assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); + + brw_push_insn_state(p); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, brw_message_reg(2), arg0[0]); + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, brw_message_reg(4), sechalf(arg0[0])); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, brw_message_reg(3), arg1[0]); + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, brw_message_reg(5), sechalf(arg1[0])); + + + /* Send two messages to perform all 16 operations: + */ + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_math(p, + dst[0], + function, + (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math(p, + offset(dst[0],1), + function, + (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + 4, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + + brw_pop_insn_state(p); +} + + + +static void emit_tex( struct brw_wm_compile *c, + const struct brw_wm_instruction *inst, + struct brw_reg *dst, + GLuint dst_flags, + struct brw_reg *arg ) +{ + struct brw_compile *p = &c->func; + GLuint msgLength, responseLength; + GLboolean shadow = (c->key.shadowtex_mask & (1<<inst->tex_unit)) ? 1 : 0; + GLuint i, nr; + GLuint emit; + + /* How many input regs are there? + */ + switch (inst->tex_idx) { + case TEXTURE_1D_INDEX: + emit = WRITEMASK_X; + nr = 1; + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + emit = WRITEMASK_XY; + nr = 2; + break; + default: + emit = WRITEMASK_XYZ; + nr = 3; + break; + } + + if (shadow) { + nr = 4; + emit |= WRITEMASK_W; + } + + msgLength = 1; + + for (i = 0; i < nr; i++) { + static const GLuint swz[4] = {0,1,2,2}; + if (emit & (1<<i)) + brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]); + else + brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0)); + msgLength += 2; + } + + responseLength = 8; /* always */ + + brw_SAMPLE(p, + retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), + 1, + retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), + inst->tex_unit + 1, /* surface */ + inst->tex_unit, /* sampler */ + inst->writemask, + (shadow ? + BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE : + BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE), + responseLength, + msgLength, + 0); + +} + + +static void emit_txb( struct brw_wm_compile *c, + const struct brw_wm_instruction *inst, + struct brw_reg *dst, + GLuint dst_flags, + struct brw_reg *arg ) +{ + struct brw_compile *p = &c->func; + GLuint msgLength; + + /* Shadow ignored for txb. + */ + switch (inst->tex_idx) { + case TEXTURE_1D_INDEX: + brw_MOV(p, brw_message_reg(2), arg[0]); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + brw_MOV(p, brw_message_reg(2), arg[0]); + brw_MOV(p, brw_message_reg(4), arg[1]); + brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + break; + default: + brw_MOV(p, brw_message_reg(2), arg[0]); + brw_MOV(p, brw_message_reg(4), arg[1]); + brw_MOV(p, brw_message_reg(6), arg[2]); + break; + } + + brw_MOV(p, brw_message_reg(8), arg[3]); + msgLength = 9; + + + brw_SAMPLE(p, + retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), + 1, + retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), + inst->tex_unit + 1, /* surface */ + inst->tex_unit, /* sampler */ + inst->writemask, + BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, + 8, /* responseLength */ + msgLength, + 0); + +} + + +static void emit_lit( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0 ) +{ + assert((mask & WRITEMASK_XW) == 0); + + if (mask & WRITEMASK_Y) { + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MOV(p, dst[1], arg0[0]); + brw_set_saturate(p, 0); + } + + if (mask & WRITEMASK_Z) { + emit_math2(p, BRW_MATH_FUNCTION_POW, + &dst[2], + WRITEMASK_X | (mask & SATURATE), + &arg0[1], + &arg0[3]); + } + + /* Ordinarily you'd use an iff statement to skip or shortcircuit + * some of the POW calculations above, but 16-wide iff statements + * seem to lock c1 hardware, so this is a nasty workaround: + */ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0)); + { + if (mask & WRITEMASK_Y) + brw_MOV(p, dst[1], brw_imm_f(0)); + + if (mask & WRITEMASK_Z) + brw_MOV(p, dst[2], brw_imm_f(0)); + } + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +} + + +/* Kill pixel - set execution mask to zero for those pixels which + * fail. + */ +static void emit_kil( struct brw_wm_compile *c, + struct brw_reg *arg0) +{ + struct brw_compile *p = &c->func; + struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + GLuint i; + + + /* XXX - usually won't need 4 compares! + */ + for (i = 0; i < 4; i++) { + brw_push_insn_state(p); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0)); + brw_set_predicate_control_flag_value(p, 0xff); + brw_AND(p, r0uw, brw_flag_reg(), r0uw); + brw_pop_insn_state(p); + } +} + +static void fire_fb_write( struct brw_wm_compile *c, + GLuint base_reg, + GLuint nr ) +{ + struct brw_compile *p = &c->func; + + /* Pass through control information: + */ +/* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, + brw_message_reg(base_reg + 1), + brw_vec8_grf(1, 0)); + brw_pop_insn_state(p); + } + + /* Send framebuffer write message: */ +/* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */ + brw_fb_WRITE(p, + retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW), + base_reg, + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), + 0, /* render surface always 0 */ + nr, + 0, + 1); +} + +static void emit_aa( struct brw_wm_compile *c, + struct brw_reg *arg1, + GLuint reg ) +{ + struct brw_compile *p = &c->func; + GLuint comp = c->key.aa_dest_stencil_reg / 2; + GLuint off = c->key.aa_dest_stencil_reg % 2; + struct brw_reg aa = offset(arg1[comp], off); + + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */ + brw_MOV(p, brw_message_reg(reg), aa); + brw_pop_insn_state(p); +} + + +/* Post-fragment-program processing. Send the results to the + * framebuffer. + */ +static void emit_fb_write( struct brw_wm_compile *c, + struct brw_reg *arg0, + struct brw_reg *arg1, + struct brw_reg *arg2) +{ + struct brw_compile *p = &c->func; + GLuint nr = 2; + GLuint channel; + + /* Reserve a space for AA - may not be needed: + */ + if (c->key.aa_dest_stencil_reg) + nr += 1; + + /* I don't really understand how this achieves the color interleave + * (ie RGBARGBA) in the result: [Do the saturation here] + */ + { + brw_push_insn_state(p); + + for (channel = 0; channel < 4; channel++) { + /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ + /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, + brw_message_reg(nr + channel), + arg0[channel]); + + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, + brw_message_reg(nr + channel + 4), + sechalf(arg0[channel])); + } + + /* skip over the regs populated above: + */ + nr += 8; + + brw_pop_insn_state(p); + } + + if (c->key.source_depth_to_render_target) + { + if (c->key.computes_depth) + brw_MOV(p, brw_message_reg(nr), arg2[2]); + else + brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */ + + nr += 2; + } + + if (c->key.dest_depth_reg) + { + GLuint comp = c->key.dest_depth_reg / 2; + GLuint off = c->key.dest_depth_reg % 2; + + if (off != 0) { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, brw_message_reg(nr), arg1[comp]); + /* 2nd half? */ + brw_MOV(p, brw_message_reg(nr+1), offset(arg1[comp],1)); + brw_pop_insn_state(p); + } + else { + brw_MOV(p, brw_message_reg(nr), arg1[comp]); + } + nr += 2; + } + + + if (!c->key.runtime_check_aads_emit) { + if (c->key.aa_dest_stencil_reg) + emit_aa(c, arg1, 2); + + fire_fb_write(c, 0, nr); + } + else { + struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); + struct brw_reg ip = brw_ip_reg(); + struct brw_instruction *jmp; + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); + brw_AND(p, + v1_null_ud, + get_element_ud(brw_vec8_grf(1,0), 6), + brw_imm_ud(1<<26)); + + jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); + { + emit_aa(c, arg1, 2); + fire_fb_write(c, 0, nr); + /* note - thread killed in subroutine */ + } + brw_land_fwd_jump(p, jmp); + + /* ELSE: Shuffle up one register to fill in the hole left for AA: + */ + fire_fb_write(c, 1, nr-1); + } +} + + + + +/* Post-fragment-program processing. Send the results to the + * framebuffer. + */ +static void emit_spill( struct brw_wm_compile *c, + struct brw_reg reg, + GLuint slot ) +{ + struct brw_compile *p = &c->func; + + /* + mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr } + */ + brw_MOV(p, brw_message_reg(2), reg); + + /* + mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask } + send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 } + */ + brw_dp_WRITE_16(p, + retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW), + 1, + slot); +} + +static void emit_unspill( struct brw_wm_compile *c, + struct brw_reg reg, + GLuint slot ) +{ + struct brw_compile *p = &c->func; + + /* Slot 0 is the undef value. + */ + if (slot == 0) { + brw_MOV(p, reg, brw_imm_f(0)); + return; + } + + /* + mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask } + send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 } + */ + + brw_dp_READ_16(p, + retype(vec16(reg), BRW_REGISTER_TYPE_UW), + 1, + slot); +} + + + +/** + * Retrieve upto 4 GEN4 register pairs for the given wm reg: + */ +static void get_argument_regs( struct brw_wm_compile *c, + struct brw_wm_ref *arg[], + struct brw_reg *regs ) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + if (arg[i]) { + + if (arg[i]->unspill_reg) + emit_unspill(c, + brw_vec8_grf(arg[i]->unspill_reg, 0), + arg[i]->value->spill_slot); + + regs[i] = arg[i]->hw_reg; + } + else { + regs[i] = brw_null_reg(); + } + } +} + +static void spill_values( struct brw_wm_compile *c, + struct brw_wm_value *values, + GLuint nr ) +{ + GLuint i; + + for (i = 0; i < nr; i++) + if (values[i].spill_slot) + emit_spill(c, values[i].hw_reg, values[i].spill_slot); +} + + + +/* Emit the fragment program instructions here. + */ +void brw_wm_emit( struct brw_wm_compile *c ) +{ + struct brw_compile *p = &c->func; + GLuint insn; + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + /* Check if any of the payload regs need to be spilled: + */ + spill_values(c, c->payload.depth, 4); + spill_values(c, c->creg, c->nr_creg); + spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX); + + + for (insn = 0; insn < c->nr_insns; insn++) { + + struct brw_wm_instruction *inst = &c->instruction[insn]; + struct brw_reg args[3][4], dst[4]; + GLuint i, dst_flags; + + /* Get argument regs: + */ + for (i = 0; i < 3; i++) + get_argument_regs(c, inst->src[i], args[i]); + + /* Get dest regs: + */ + for (i = 0; i < 4; i++) + if (inst->dst[i]) + dst[i] = inst->dst[i]->hw_reg; + else + dst[i] = brw_null_reg(); + + /* Flags + */ + dst_flags = inst->writemask; + if (inst->saturate) + dst_flags |= SATURATE; + + switch (inst->opcode) { + /* Generated instructions for calculating triangle interpolants: + */ + case WM_PIXELXY: + emit_pixel_xy(p, dst, dst_flags, args[0]); + break; + + case WM_DELTAXY: + emit_delta_xy(p, dst, dst_flags, args[0], args[1]); + break; + + case WM_WPOSXY: + emit_wpos_xy(p, dst, dst_flags, args[0]); + break; + + case WM_PIXELW: + emit_pixel_w(p, dst, dst_flags, args[0], args[1]); + break; + + case WM_LINTERP: + emit_linterp(p, dst, dst_flags, args[0], args[1]); + break; + + case WM_PINTERP: + emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]); + break; + + case WM_CINTERP: + emit_cinterp(p, dst, dst_flags, args[0]); + break; + + case WM_FB_WRITE: + emit_fb_write(c, args[0], args[1], args[2]); + break; + + /* Straightforward arithmetic: + */ + case OPCODE_ADD: + emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]); + break; + + case OPCODE_FRC: + emit_alu1(p, brw_FRC, dst, dst_flags, args[0]); + break; + + case OPCODE_FLR: + emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); + break; + + case OPCODE_DP3: /* */ + emit_dp3(p, dst, dst_flags, args[0], args[1]); + break; + + case OPCODE_DP4: + emit_dp4(p, dst, dst_flags, args[0], args[1]); + break; + + case OPCODE_DPH: + emit_dph(p, dst, dst_flags, args[0], args[1]); + break; + + case OPCODE_LRP: /* */ + emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]); + break; + + case OPCODE_MAD: + emit_mad(p, dst, dst_flags, args[0], args[1], args[2]); + break; + + case OPCODE_MOV: + case OPCODE_SWZ: + emit_alu1(p, brw_MOV, dst, dst_flags, args[0]); + break; + + case OPCODE_MUL: + emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]); + break; + + case OPCODE_XPD: + emit_xpd(p, dst, dst_flags, args[0], args[1]); + break; + + /* Higher math functions: + */ + case OPCODE_RCP: + emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); + break; + + case OPCODE_RSQ: + emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); + break; + + case OPCODE_SIN: + emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); + break; + + case OPCODE_COS: + emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); + break; + + case OPCODE_EX2: + emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); + break; + + case OPCODE_LG2: + emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); + break; + + case OPCODE_SCS: + /* There is an scs math function, but it would need some + * fixup for 16-element execution. + */ + if (dst_flags & WRITEMASK_X) + emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); + if (dst_flags & WRITEMASK_Y) + emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); + break; + + case OPCODE_POW: + emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]); + break; + + /* Comparisons: + */ + case OPCODE_CMP: + emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]); + break; + + case OPCODE_MAX: + emit_max(p, dst, dst_flags, args[0], args[1]); + break; + + case OPCODE_MIN: + emit_min(p, dst, dst_flags, args[0], args[1]); + break; + + case OPCODE_SLT: + emit_slt(p, dst, dst_flags, args[0], args[1]); + break; + + case OPCODE_SGE: + emit_sge(p, dst, dst_flags, args[0], args[1]); + break; + + case OPCODE_LIT: + emit_lit(p, dst, dst_flags, args[0]); + break; + + /* Texturing operations: + */ + case OPCODE_TEX: + emit_tex(c, inst, dst, dst_flags, args[0]); + break; + + case OPCODE_TXB: + emit_txb(c, inst, dst, dst_flags, args[0]); + break; + + case OPCODE_KIL: + emit_kil(c, args[0]); + break; + + default: + assert(0); + } + + for (i = 0; i < 4; i++) + if (inst->dst[i] && inst->dst[i]->spill_slot) + emit_spill(c, + inst->dst[i]->hw_reg, + inst->dst[i]->spill_slot); + } +} + + + + + diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c new file mode 100644 index 00000000000..203eeead0f3 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -0,0 +1,874 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#include "brw_context.h" +#include "brw_wm.h" +#include "brw_util.h" + +#include "shader/program.h" +#include "shader/program_instruction.h" +#include "shader/arbprogparse.h" + +#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS + +#define X 0 +#define Y 1 +#define Z 2 +#define W 3 + + +static const char *wm_opcode_strings[] = { + "PIXELXY", + "DELTAXY", + "PIXELW", + "LINTERP", + "PINTERP", + "CINTERP", + "WPOSXY", + "FB_WRITE" +}; + +static const char *wm_file_strings[] = { + "PAYLOAD" +}; + + +/*********************************************************************** + * Source regs + */ + +static struct prog_src_register src_reg(GLuint file, GLuint idx) +{ + struct prog_src_register reg; + reg.File = file; + reg.Index = idx; + reg.Swizzle = SWIZZLE_NOOP; + reg.RelAddr = 0; + reg.NegateBase = 0; + reg.Abs = 0; + reg.NegateAbs = 0; + return reg; +} + +static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst) +{ + return src_reg(dst.File, dst.Index); +} + +static struct prog_src_register src_undef( void ) +{ + return src_reg(PROGRAM_UNDEFINED, 0); +} + +static GLboolean src_is_undef(struct prog_src_register src) +{ + return src.File == PROGRAM_UNDEFINED; +} + +static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w ) +{ + reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w); + return reg; +} + +static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x ) +{ + return src_swizzle(reg, x, x, x, x); +} + + +/*********************************************************************** + * Dest regs + */ + +static struct prog_dst_register dst_reg(GLuint file, GLuint idx) +{ + struct prog_dst_register reg; + reg.File = file; + reg.Index = idx; + reg.WriteMask = WRITEMASK_XYZW; + reg.CondMask = 0; + reg.CondSwizzle = 0; + reg.pad = 0; + reg.CondSrc = 0; + return reg; +} + +static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask ) +{ + reg.WriteMask &= mask; + return reg; +} + +static struct prog_dst_register dst_undef( void ) +{ + return dst_reg(PROGRAM_UNDEFINED, 0); +} + + + +static struct prog_dst_register get_temp( struct brw_wm_compile *c ) +{ + int bit = ffs( ~c->fp_temp ); + + if (!bit) { + _mesa_printf("%s: out of temporaries\n", __FILE__); + exit(1); + } + + c->fp_temp |= 1<<(bit-1); + return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1)); +} + + +static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp ) +{ + c->fp_temp &= ~1<<(temp.Index + 1 - FIRST_INTERNAL_TEMP); +} + + +/*********************************************************************** + * Instructions + */ + +static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c) +{ + return &c->prog_instructions[c->nr_fp_insns++]; +} + +static struct prog_instruction *emit_insn(struct brw_wm_compile *c, + const struct prog_instruction *inst0) +{ + struct prog_instruction *inst = get_fp_inst(c); + *inst = *inst0; + return inst; +} + +static struct prog_instruction * emit_op(struct brw_wm_compile *c, + GLuint op, + struct prog_dst_register dest, + GLuint saturate, + GLuint tex_src_unit, + GLuint tex_src_target, + struct prog_src_register src0, + struct prog_src_register src1, + struct prog_src_register src2 ) +{ + struct prog_instruction *inst = get_fp_inst(c); + + memset(inst, 0, sizeof(*inst)); + + inst->Opcode = op; + inst->DstReg = dest; + inst->SaturateMode = saturate; + inst->TexSrcUnit = tex_src_unit; + inst->TexSrcTarget = tex_src_target; + inst->SrcReg[0] = src0; + inst->SrcReg[1] = src1; + inst->SrcReg[2] = src2; + + return inst; +} + + + + +/*********************************************************************** + * Special instructions for interpolation and other tasks + */ + +static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c ) +{ + if (src_is_undef(c->pixel_xy)) { + struct prog_dst_register pixel_xy = get_temp(c); + struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); + + + /* Emit the out calculations, and hold onto the results. Use + * two instructions as a temporary is required. + */ + /* pixel_xy.xy = PIXELXY payload[0]; + */ + emit_op(c, + WM_PIXELXY, + dst_mask(pixel_xy, WRITEMASK_XY), + 0, 0, 0, + payload_r0_depth, + src_undef(), + src_undef()); + + c->pixel_xy = src_reg_from_dst(pixel_xy); + } + + return c->pixel_xy; +} + +static struct prog_src_register get_delta_xy( struct brw_wm_compile *c ) +{ + if (src_is_undef(c->delta_xy)) { + struct prog_dst_register delta_xy = get_temp(c); + struct prog_src_register pixel_xy = get_pixel_xy(c); + struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); + + /* deltas.xy = DELTAXY pixel_xy, payload[0] + */ + emit_op(c, + WM_DELTAXY, + dst_mask(delta_xy, WRITEMASK_XY), + 0, 0, 0, + pixel_xy, + payload_r0_depth, + src_undef()); + + c->delta_xy = src_reg_from_dst(delta_xy); + } + + return c->delta_xy; +} + +static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) +{ + if (src_is_undef(c->pixel_w)) { + struct prog_dst_register pixel_w = get_temp(c); + struct prog_src_register deltas = get_delta_xy(c); + struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS); + + + /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x + */ + emit_op(c, + WM_PIXELW, + dst_mask(pixel_w, WRITEMASK_W), + 0, 0, 0, + interp_wpos, + deltas, + src_undef()); + + + c->pixel_w = src_reg_from_dst(pixel_w); + } + + return c->pixel_w; +} + +static void emit_interp( struct brw_wm_compile *c, + GLuint idx ) +{ + struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); + struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); + struct prog_src_register deltas = get_delta_xy(c); + struct prog_src_register arg2; + GLuint opcode; + + /* Need to use PINTERP on attributes which have been + * multiplied by 1/W in the SF program, and LINTERP on those + * which have not: + */ + switch (idx) { + case FRAG_ATTRIB_WPOS: + opcode = WM_LINTERP; + arg2 = src_undef(); + + /* Have to treat wpos.xy specially: + */ + emit_op(c, + WM_WPOSXY, + dst_mask(dst, WRITEMASK_XY), + 0, 0, 0, + get_pixel_xy(c), + src_undef(), + src_undef()); + + dst = dst_mask(dst, WRITEMASK_ZW); + + /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw + */ + emit_op(c, + WM_LINTERP, + dst, + 0, 0, 0, + interp, + deltas, + arg2); + break; + case FRAG_ATTRIB_COL0: + case FRAG_ATTRIB_COL1: + if (c->key.flat_shade) { + emit_op(c, + WM_CINTERP, + dst, + 0, 0, 0, + interp, + src_undef(), + src_undef()); + } + else { + emit_op(c, + WM_LINTERP, + dst, + 0, 0, 0, + interp, + deltas, + src_undef()); + } + break; + default: + emit_op(c, + WM_PINTERP, + dst, + 0, 0, 0, + interp, + deltas, + get_pixel_w(c)); + break; + } + + c->fp_interp_emitted |= 1<<idx; +} + + +/*********************************************************************** + * Hacks to extend the program parameter and constant lists. + */ + +/* Add the fog parameters to the parameter list of the original + * program, rather than creating a new list. Doesn't really do any + * harm and it's not as if the parameter handling isn't a big hack + * anyway. + */ +static struct prog_src_register search_or_add_param6( struct brw_wm_compile *c, + GLint s0, + GLint s1, + GLint s2, + GLint s3, + GLint s4, + GLint s5) +{ + struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; + GLint tokens[6]; + GLuint idx; + tokens[0] = s0; + tokens[1] = s1; + tokens[2] = s2; + tokens[3] = s3; + tokens[4] = s4; + tokens[5] = s5; + + for (idx = 0; idx < paramList->NumParameters; idx++) { + if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR && + memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0) + return src_reg(PROGRAM_STATE_VAR, idx); + } + + idx = _mesa_add_state_reference( paramList, tokens ); + + /* Recalculate state dependency: + */ + c->fp->param_state = brw_parameter_list_state_flags( paramList ); + + return src_reg(PROGRAM_STATE_VAR, idx); +} + + +static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, + GLfloat s0, + GLfloat s1, + GLfloat s2, + GLfloat s3) +{ + struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; + GLfloat values[4]; + GLuint idx; + + values[0] = s0; + values[1] = s1; + values[2] = s2; + values[3] = s3; + + /* Have to search, otherwise multiple compilations will each grow + * the parameter list. + */ + for (idx = 0; idx < paramList->NumParameters; idx++) { + if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT && + memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0) + + /* XXX: this mimics the mesa bug which puts all constants and + * parameters into the "PROGRAM_STATE_VAR" category: + */ + return src_reg(PROGRAM_STATE_VAR, idx); + } + + idx = _mesa_add_unnamed_constant( paramList, values ); + + return src_reg(PROGRAM_STATE_VAR, idx); +} + + + +/*********************************************************************** + * Expand various instructions here to simpler forms. + */ +static void precalc_dst( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + struct prog_src_register src0 = inst->SrcReg[0]; + struct prog_src_register src1 = inst->SrcReg[1]; + struct prog_dst_register dst = inst->DstReg; + + if (dst.WriteMask & WRITEMASK_Y) { + /* dst.y = mul src0.y, src1.y + */ + emit_op(c, + OPCODE_MUL, + dst_mask(dst, WRITEMASK_Y), + inst->SaturateMode, 0, 0, + src0, + src1, + src_undef()); + } + + + if (dst.WriteMask & WRITEMASK_XZ) { + GLuint z = GET_SWZ(src0.Swizzle, Z); + + /* dst.xz = swz src0.1zzz + */ + emit_op(c, + OPCODE_SWZ, + dst_mask(dst, WRITEMASK_XZ), + inst->SaturateMode, 0, 0, + src_swizzle(src0, SWIZZLE_ONE, z, z, z), + src_undef(), + src_undef()); + } + if (dst.WriteMask & WRITEMASK_W) { + /* dst.w = mov src1.w + */ + emit_op(c, + OPCODE_MOV, + dst_mask(dst, WRITEMASK_W), + inst->SaturateMode, 0, 0, + src1, + src_undef(), + src_undef()); + } +} + + +static void precalc_lit( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + struct prog_src_register src0 = inst->SrcReg[0]; + struct prog_dst_register dst = inst->DstReg; + + if (dst.WriteMask & WRITEMASK_XW) { + /* dst.xw = swz src0.1111 + */ + emit_op(c, + OPCODE_SWZ, + dst_mask(dst, WRITEMASK_XW), + 0, 0, 0, + src_swizzle1(src0, SWIZZLE_ONE), + src_undef(), + src_undef()); + } + + + if (dst.WriteMask & WRITEMASK_YZ) { + emit_op(c, + OPCODE_LIT, + dst_mask(dst, WRITEMASK_YZ), + inst->SaturateMode, 0, 0, + src0, + src_undef(), + src_undef()); + } +} + +static void precalc_tex( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + /* Need to emit YUV texture conversions by hand. Probably need to + * do this here - the alternative is in brw_wm_emit.c, but the + * conversion requires allocating a temporary variable which we + * don't have the facility to do that late in the compilation. + */ + if (!(c->key.yuvtex_mask & (1<<inst->TexSrcUnit))) { + emit_op(c, + OPCODE_TEX, + inst->DstReg, + inst->SaturateMode, + inst->TexSrcUnit, + inst->TexSrcTarget, + inst->SrcReg[0], + src_undef(), + src_undef()); + } + else { + /* + CONST C0 = { -.5, -.0625, -.5, 1.164 } + CONST C1 = { 1.596, -0.813, 2.018, -.391 } + UYV = TEX ... + UYV.xyz = ADD UYV, C0 + UYV.y = MUL UYV.y, C0.w + RGB.xyz = MAD UYV.xxz, C1, UYV.y + RGB.y = MAD UYV.z, C1.w, RGB.y + */ + struct prog_dst_register dst = inst->DstReg; + struct prog_src_register src0 = inst->SrcReg[0]; + struct prog_dst_register tmp = get_temp(c); + struct prog_src_register tmpsrc = src_reg_from_dst(tmp); + struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 ); + struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 ); + + /* tmp = TEX ... + */ + emit_op(c, + OPCODE_TEX, + tmp, + inst->SaturateMode, + inst->TexSrcUnit, + inst->TexSrcTarget, + src0, + src_undef(), + src_undef()); + + /* tmp.xyz = ADD TMP, C0 + */ + emit_op(c, + OPCODE_ADD, + dst_mask(tmp, WRITEMASK_XYZ), + 0, 0, 0, + tmpsrc, + C0, + src_undef()); + + /* YUV.y = MUL YUV.y, C0.w + */ + emit_op(c, + OPCODE_MUL, + dst_mask(tmp, WRITEMASK_Y), + 0, 0, 0, + tmpsrc, + src_swizzle1(C0, W), + src_undef()); + + /* RGB.xyz = MAD YUV.xxz, C1, YUV.y + */ + emit_op(c, + OPCODE_MAD, + dst_mask(dst, WRITEMASK_XYZ), + 0, 0, 0, + src_swizzle(tmpsrc, X,X,Z,Z), + C1, + src_swizzle1(tmpsrc, Y)); + + /* RGB.y = MAD YUV.z, C1.w, RGB.y + */ + emit_op(c, + OPCODE_MAD, + dst_mask(dst, WRITEMASK_Y), + 0, 0, 0, + src_swizzle1(tmpsrc, Z), + src_swizzle1(C1, W), + src_swizzle1(src_reg_from_dst(dst), Y)); + } +} + + +static GLboolean projtex( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + struct prog_src_register src = inst->SrcReg[0]; + + /* Only try to detect the simplest cases. Could detect (later) + * cases where we are trying to emit code like RCP {1.0}, MUL x, + * {1.0}, and so on. + * + * More complex cases than this typically only arise from + * user-provided fragment programs anyway: + */ + if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) + return 0; /* ut2004 gun rendering !?! */ + else if (src.File == PROGRAM_INPUT && + GET_SWZ(src.Swizzle, W) == W && + (c->key.projtex_mask & (1<<src.Index)) == 0) + return 0; + else + return 1; +} + + +static void precalc_txp( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + struct prog_src_register src0 = inst->SrcReg[0]; + + if (projtex(c, inst)) { + struct prog_dst_register tmp = get_temp(c); + struct prog_instruction tmp_inst; + + /* tmp0.w = RCP inst.arg[0][3] + */ + emit_op(c, + OPCODE_RCP, + dst_mask(tmp, WRITEMASK_W), + 0, 0, 0, + src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)), + src_undef(), + src_undef()); + + /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww + */ + emit_op(c, + OPCODE_MUL, + dst_mask(tmp, WRITEMASK_XYZ), + 0, 0, 0, + src0, + src_swizzle1(src_reg_from_dst(tmp), W), + src_undef()); + + /* dst = precalc(TEX tmp0) + */ + tmp_inst = *inst; + tmp_inst.SrcReg[0] = src_reg_from_dst(tmp); + precalc_tex(c, &tmp_inst); + + release_temp(c, tmp); + } + else + { + /* dst = precalc(TEX src0) + */ + precalc_tex(c, inst); + } +} + + + + + +/*********************************************************************** + * Add instructions to perform fog blending + */ + +static void fog_blend( struct brw_wm_compile *c, + struct prog_src_register fog_factor ) +{ + struct prog_dst_register outcolor = dst_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR); + struct prog_src_register fogcolor = search_or_add_param6( c, STATE_FOG_COLOR, 0,0,0,0,0 ); + + /* color.xyz = LRP fog_factor.xxxx, output_color, fog_color */ + + emit_op(c, + OPCODE_LRP, + dst_mask(outcolor, WRITEMASK_XYZ), + 0, 0, 0, + fog_factor, + src_reg_from_dst(outcolor), + fogcolor); +} + + + +/* This one is simple - just take the interpolated fog coordinate and + * use it as the fog blend factor. + */ +static void fog_interpolated( struct brw_wm_compile *c ) +{ + struct prog_src_register fogc = src_reg(PROGRAM_INPUT, FRAG_ATTRIB_FOGC); + + if (!(c->fp_interp_emitted & (1<<FRAG_ATTRIB_FOGC))) + emit_interp(c, FRAG_ATTRIB_FOGC); + + fog_blend( c, src_swizzle1(fogc, GET_SWZ(fogc.Swizzle,X))); +} + +static void emit_fog( struct brw_wm_compile *c ) +{ + if (!c->fp->program.FogOption) + return; + + if (1) + fog_interpolated( c ); + else { + /* TODO: per-pixel fog */ + assert(0); + } +} + +static void emit_fb_write( struct brw_wm_compile *c ) +{ + struct prog_src_register outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR); + struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); + struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR); + + emit_op(c, + WM_FB_WRITE, + dst_mask(dst_undef(),0), + 0, 0, 0, + outcolor, + payload_r0_depth, + outdepth); +} + + + + +/*********************************************************************** + * Emit INTERP instructions ahead of first use of each attrib. + */ + +static void validate_src_regs( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + GLuint nr_args = brw_wm_nr_args( inst->Opcode ); + GLuint i; + + for (i = 0; i < nr_args; i++) { + if (inst->SrcReg[i].File == PROGRAM_INPUT) { + GLuint idx = inst->SrcReg[i].Index; + if (!(c->fp_interp_emitted & (1<<idx))) { + emit_interp(c, idx); + } + } + } +} + + + +void brw_wm_pass_fp( struct brw_wm_compile *c ) +{ + struct brw_fragment_program *fp = c->fp; + GLuint insn; + + if (INTEL_DEBUG & DEBUG_WM) { + _mesa_printf("\n\n\npre-fp:\n"); + _mesa_print_program(&fp->program.Base); + _mesa_printf("\n"); + } + + c->pixel_xy = src_undef(); + c->delta_xy = src_undef(); + c->pixel_w = src_undef(); + c->nr_fp_insns = 0; + + /* Emit preamble instructions: + */ + + + for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { + const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; + struct prog_instruction *out; + + /* Check for INPUT values, emit INTERP instructions where + * necessary: + */ + validate_src_regs(c, inst); + + + switch (inst->Opcode) { + case OPCODE_SWZ: + out = emit_insn(c, inst); + out->Opcode = OPCODE_MOV; + break; + + case OPCODE_ABS: + out = emit_insn(c, inst); + out->Opcode = OPCODE_MOV; + out->SrcReg[0].NegateBase = 0; + out->SrcReg[0].Abs = 1; + break; + + case OPCODE_SUB: + out = emit_insn(c, inst); + out->Opcode = OPCODE_ADD; + out->SrcReg[1].NegateBase ^= 0xf; + break; + + case OPCODE_SCS: + out = emit_insn(c, inst); + /* This should probably be done in the parser. + */ + out->DstReg.WriteMask &= WRITEMASK_XY; + break; + + case OPCODE_DST: + precalc_dst(c, inst); + break; + + case OPCODE_LIT: + precalc_lit(c, inst); + break; + + case OPCODE_TXP: + precalc_txp(c, inst); + break; + + case OPCODE_XPD: + out = emit_insn(c, inst); + /* This should probably be done in the parser. + */ + out->DstReg.WriteMask &= WRITEMASK_XYZ; + break; + + case OPCODE_KIL: + out = emit_insn(c, inst); + /* This should probably be done in the parser. + */ + out->DstReg.WriteMask = 0; + break; + + case OPCODE_END: + case OPCODE_PRINT: + break; + + default: + emit_insn(c, inst); + break; + } + } + + emit_fog(c); + emit_fb_write(c); + + + if (INTEL_DEBUG & DEBUG_WM) { + _mesa_printf("\n\n\npass_fp:\n"); +/* _mesa_debug_fp_inst(c->nr_fp_insns, c->prog_instructions, wm_opcode_strings, wm_file_strings); */ + _mesa_printf("\n"); + } +} + diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c new file mode 100644 index 00000000000..ec2b976faa7 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c @@ -0,0 +1,216 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "mtypes.h" +#include "brw_wm.h" + + +#undef P /* prompted depth */ +#undef C /* computed */ +#undef N /* non-promoted? */ + +#define P 0 +#define C 1 +#define N 2 + +const struct { + GLuint mode:2; + GLuint sd_present:1; + GLuint sd_to_rt:1; + GLuint dd_present:1; + GLuint ds_present:1; +} wm_iz_table[IZ_BIT_MAX] = +{ + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 1, 1, 0, 0 }, + { C, 1, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 1, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 1, 1, 0, 0 }, + { C, 1, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 1, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 0, 0, 1 }, + { C, 0, 0, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 1, 1, 0, 1 }, + { C, 1, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 1, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 0, 0, 1 }, + { C, 0, 0, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 1, 1, 0, 1 }, + { C, 1, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 1, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 1 }, + { N, 0, 1, 0, 1 }, + { N, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { C, 0, 0, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 } +}; + +void brw_wm_lookup_iz( GLuint line_aa, + GLuint lookup, + struct brw_wm_prog_key *key ) +{ + GLuint reg = 2; + + assert (lookup < IZ_BIT_MAX); + + if (lookup & IZ_PS_COMPUTES_DEPTH_BIT) + key->computes_depth = 1; + + if (wm_iz_table[lookup].sd_present) { + key->source_depth_reg = reg; + reg += 2; + } + + if (wm_iz_table[lookup].sd_to_rt) + key->source_depth_to_render_target = 1; + + if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) { + key->aa_dest_stencil_reg = reg; + key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present && + line_aa == AA_SOMETIMES); + reg++; + } + + if (wm_iz_table[lookup].dd_present) { + key->dest_depth_reg = reg; + reg+=2; + } + + key->nr_depth_regs = (reg+1)/2; +} + diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c new file mode 100644 index 00000000000..36b69b70681 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -0,0 +1,464 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_context.h" +#include "brw_wm.h" +#include "program.h" +#include "arbprogparse.h" +#include "program_instruction.h" + + + +/*********************************************************************** + */ + +static struct brw_wm_ref *get_ref( struct brw_wm_compile *c ) +{ + assert(c->nr_refs < BRW_WM_MAX_REF); + return &c->refs[c->nr_refs++]; +} + +static struct brw_wm_value *get_value( struct brw_wm_compile *c) +{ + assert(c->nr_refs < BRW_WM_MAX_VREG); + return &c->vreg[c->nr_vreg++]; +} + +static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c ) +{ + assert(c->nr_insns < BRW_WM_MAX_INSN); + return &c->instruction[c->nr_insns++]; +} + +/*********************************************************************** + */ + +static void pass0_init_undef( struct brw_wm_compile *c) +{ + struct brw_wm_ref *ref = &c->undef_ref; + ref->value = &c->undef_value; + ref->hw_reg = brw_vec8_grf(0, 0); + ref->insn = 0; + ref->prevuse = NULL; +} + +static void pass0_set_fpreg_value( struct brw_wm_compile *c, + GLuint file, + GLuint idx, + GLuint component, + struct brw_wm_value *value ) +{ + struct brw_wm_ref *ref = get_ref(c); + ref->value = value; + ref->hw_reg = brw_vec8_grf(0, 0); + ref->insn = 0; + ref->prevuse = NULL; + c->pass0_fp_reg[file][idx][component] = ref; +} + +static void pass0_set_fpreg_ref( struct brw_wm_compile *c, + GLuint file, + GLuint idx, + GLuint component, + const struct brw_wm_ref *src_ref ) +{ + c->pass0_fp_reg[file][idx][component] = src_ref; +} + +static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, + const GLfloat *param_ptr ) +{ + GLuint i = c->prog_data.nr_params++; + + if (i >= BRW_WM_MAX_PARAM) { + _mesa_printf("%s: out of params\n", __FUNCTION__); + c->prog_data.error = 1; + return NULL; + } + else { + struct brw_wm_ref *ref = get_ref(c); + + c->prog_data.param[i] = param_ptr; + c->nr_creg = (i+16)/16; + + /* Push the offsets into hw_reg. These will be added to the + * real register numbers once one is allocated in pass2. + */ + ref->hw_reg = brw_vec1_grf((i&8)?1:0, i%8); + ref->value = &c->creg[i/16]; + ref->insn = 0; + ref->prevuse = NULL; + + return ref; + } +} + + +static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c, + const GLfloat *constval ) +{ + GLuint i; + + /* Search for an existing const value matching the request: + */ + for (i = 0; i < c->nr_constrefs; i++) { + if (c->constref[i].constval == *constval) + return c->constref[i].ref; + } + + /* Else try to add a new one: + */ + if (c->nr_constrefs < BRW_WM_MAX_CONST) { + GLuint i = c->nr_constrefs++; + + /* A constant is a special type of parameter: + */ + c->constref[i].constval = *constval; + c->constref[i].ref = get_param_ref(c, constval); + + return c->constref[i].ref; + } + else { + _mesa_printf("%s: out of constrefs\n", __FUNCTION__); + c->prog_data.error = 1; + return NULL; + } +} + + +/* Lookup our internal registers + */ +static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, + GLuint file, + GLuint idx, + GLuint component ) +{ + const struct brw_wm_ref *ref = c->pass0_fp_reg[file][idx][component]; + + if (!ref) { + switch (file) { + case PROGRAM_INPUT: + case PROGRAM_PAYLOAD: + case PROGRAM_TEMPORARY: + case PROGRAM_OUTPUT: + break; + + case PROGRAM_LOCAL_PARAM: + ref = get_param_ref(c, &c->fp->program.Base.LocalParams[idx][component]); + break; + + case PROGRAM_ENV_PARAM: + ref = get_param_ref(c, &c->env_param[idx][component]); + break; + + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: { + struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters; + + /* There's something really hokey about parameters parsed in + * arb programs - they all end up in here, whether they be + * state values, paramters or constants. This duplicates the + * structure above & also seems to subvert the limits set for + * each type of constant/param. + */ + switch (plist->Parameters[idx].Type) { + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + /* These are invarient: + */ + ref = get_const_ref(c, &plist->ParameterValues[idx][component]); + break; + + case PROGRAM_STATE_VAR: + /* These may change from run to run: + */ + ref = get_param_ref(c, &plist->ParameterValues[idx][component] ); + break; + + default: + assert(0); + break; + } + break; + } + + default: + assert(0); + break; + } + + c->pass0_fp_reg[file][idx][component] = ref; + } + + if (!ref) + ref = &c->undef_ref; + + return ref; +} + + + + +/*********************************************************************** + * Straight translation to internal instruction format + */ + +static void pass0_set_dst( struct brw_wm_compile *c, + struct brw_wm_instruction *out, + const struct prog_instruction *inst, + GLuint writemask ) +{ + const struct prog_dst_register *dst = &inst->DstReg; + GLuint i; + + for (i = 0; i < 4; i++) { + if (writemask & (1<<i)) { + out->dst[i] = get_value(c); + + pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[i]); + } + } + + out->writemask = writemask; +} + + +static void pass0_set_dst_scalar( struct brw_wm_compile *c, + struct brw_wm_instruction *out, + const struct prog_instruction *inst, + GLuint writemask ) +{ + if (writemask) { + const struct prog_dst_register *dst = &inst->DstReg; + GLuint i; + + /* Compute only the first (X) value: + */ + out->writemask = WRITEMASK_X; + out->dst[0] = get_value(c); + + /* Update our tracking register file for all the components in + * writemask: + */ + for (i = 0; i < 4; i++) { + if (writemask & (1<<i)) { + pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[0]); + } + } + } + else + out->writemask = 0; +} + + + +static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c, + struct prog_src_register src, + GLuint i ) +{ + GLuint component = GET_SWZ(src.Swizzle,i); + const struct brw_wm_ref *src_ref; + static const GLfloat const_zero = 0.0; + static const GLfloat const_one = 1.0; + + + if (component == SWIZZLE_ZERO) + src_ref = get_const_ref(c, &const_zero); + else if (component == SWIZZLE_ONE) + src_ref = get_const_ref(c, &const_one); + else + src_ref = pass0_get_reg(c, src.File, src.Index, component); + + return src_ref; +} + + +static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c, + struct prog_src_register src, + GLuint i, + struct brw_wm_instruction *insn) +{ + const struct brw_wm_ref *ref = get_fp_src_reg_ref(c, src, i); + struct brw_wm_ref *newref = get_ref(c); + + newref->value = ref->value; + newref->hw_reg = ref->hw_reg; + + if (insn) { + newref->insn = insn - c->instruction; + newref->prevuse = newref->value->lastuse; + newref->value->lastuse = newref; + } + + if (src.NegateBase & (1<<i)) + newref->hw_reg.negate ^= 1; + + if (src.Abs) { + newref->hw_reg.negate = 0; + newref->hw_reg.abs = 1; + } + + return newref; +} + + + +static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + struct brw_wm_instruction *out = get_instruction(c); + GLuint writemask = inst->DstReg.WriteMask; + GLuint nr_args = brw_wm_nr_args(inst->Opcode); + GLuint i, j; + + /* Copy some data out of the instruction + */ + out->opcode = inst->Opcode; + out->saturate = (inst->SaturateMode != SATURATE_OFF); + out->tex_unit = inst->TexSrcUnit; + out->tex_idx = inst->TexSrcTarget; + + /* Args: + */ + for (i = 0; i < nr_args; i++) { + for (j = 0; j < 4; j++) { + out->src[i][j] = get_new_ref(c, inst->SrcReg[i], j, out); + } + } + + /* Dst: + */ + if (brw_wm_is_scalar_result(out->opcode)) + pass0_set_dst_scalar(c, out, inst, writemask); + else + pass0_set_dst(c, out, inst, writemask); + + return out; +} + + + +/*********************************************************************** + * Optimize moves and swizzles away: + */ +static void pass0_precalc_mov( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + const struct prog_dst_register *dst = &inst->DstReg; + GLuint writemask = inst->DstReg.WriteMask; + GLuint i; + + /* Get the effect of a MOV by manipulating our register table: + */ + for (i = 0; i < 4; i++) { + if (writemask & (1<<i)) { + pass0_set_fpreg_ref( c, dst->File, dst->Index, i, + get_new_ref(c, inst->SrcReg[0], i, NULL)); + } + } +} + + +/* Initialize payload "registers". + */ +static void pass0_init_payload( struct brw_wm_compile *c ) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + GLuint j = i >= c->key.nr_depth_regs ? 0 : i; + pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, + &c->payload.depth[j] ); + } + +#if 0 + /* This seems to be an alternative to the INTERP_WPOS stuff I do + * elsewhere: + */ + if (c->key.source_depth_reg) + pass0_set_fpreg_value(c, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, 2, + &c->payload.depth[c->key.source_depth_reg/2]); +#endif + + for (i = 0; i < FRAG_ATTRIB_MAX; i++) + pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, i, 0, + &c->payload.input_interp[i] ); +} + +/*********************************************************************** + * PASS 0 + * + * Work forwards to give each calculated value a unique number. Where + * an instruction produces duplicate values (eg DP3), all are given + * the same number. + * + * Translate away swizzling and eliminate non-saturating moves. + */ +void brw_wm_pass0( struct brw_wm_compile *c ) +{ + GLuint insn; + + c->nr_vreg = 0; + c->nr_insns = 0; + + pass0_init_undef(c); + pass0_init_payload(c); + + for (insn = 0; insn < c->nr_fp_insns; insn++) { + const struct prog_instruction *inst = &c->prog_instructions[insn]; + + + /* Optimize away moves, otherwise emit translated instruction: + */ + switch (inst->Opcode) { + case OPCODE_MOV: + case OPCODE_SWZ: + if (!inst->SaturateMode) { + pass0_precalc_mov(c, inst); + } + else { + translate_insn(c, inst); + } + break; + + + default: + translate_insn(c, inst); + break; + } + } + + if (INTEL_DEBUG & DEBUG_WM) { + brw_wm_print_program(c, "pass0"); + } +} + diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c new file mode 100644 index 00000000000..21d0881d57e --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c @@ -0,0 +1,278 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_context.h" +#include "brw_wm.h" +#include "program.h" +#include "arbprogparse.h" +#include "program_instruction.h" + + +static GLuint get_tracked_mask(struct brw_wm_compile *c, + struct brw_wm_instruction *inst) +{ + GLuint i; + for (i = 0; i < 4; i++) { + if (inst->writemask & (1<<i)) { + if (!inst->dst[i]->contributes_to_output) { + inst->writemask &= ~(1<<i); + inst->dst[i] = 0; + } + } + } + + return inst->writemask; +} + +/* Remove a reference from a value's usage chain. + */ +static void unlink_ref(struct brw_wm_ref *ref) +{ + struct brw_wm_value *value = ref->value; + + if (ref == value->lastuse) { + value->lastuse = ref->prevuse; + } else { + struct brw_wm_ref *i = value->lastuse; + while (i->prevuse != ref) i = i->prevuse; + i->prevuse = ref->prevuse; + } +} + +static void track_arg(struct brw_wm_compile *c, + struct brw_wm_instruction *inst, + GLuint arg, + GLuint readmask) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + struct brw_wm_ref *ref = inst->src[arg][i]; + if (ref) { + if (readmask & (1<<i)) + ref->value->contributes_to_output = 1; + else { + unlink_ref(ref); + inst->src[arg][i] = NULL; + } + } + } +} + +static GLuint get_texcoord_mask( GLuint tex_idx ) +{ + switch (tex_idx) { + case TEXTURE_1D_INDEX: return WRITEMASK_X; + case TEXTURE_2D_INDEX: return WRITEMASK_XY; + case TEXTURE_3D_INDEX: return WRITEMASK_XYZ; + case TEXTURE_CUBE_INDEX: return WRITEMASK_XYZ; + case TEXTURE_RECT_INDEX: return WRITEMASK_XY; + default: return 0; + } +} + +/* Step two: Basically this is dead code elimination. + * + * Iterate backwards over instructions, noting which values + * contribute to the final result. Adjust writemasks to only + * calculate these values. + */ +void brw_wm_pass1( struct brw_wm_compile *c ) +{ + GLint insn; + + for (insn = c->nr_insns-1; insn >= 0; insn--) { + struct brw_wm_instruction *inst = &c->instruction[insn]; + GLuint writemask; + GLuint read0, read1, read2; + + if (inst->opcode == OPCODE_KIL) { + track_arg(c, inst, 0, WRITEMASK_XYZW); /* All args contribute to final */ + continue; + } + + if (inst->opcode == WM_FB_WRITE) { + track_arg(c, inst, 0, WRITEMASK_XYZW); + track_arg(c, inst, 1, WRITEMASK_XYZW); + if (c->key.source_depth_to_render_target && + c->key.computes_depth) + track_arg(c, inst, 2, WRITEMASK_Z); + else + track_arg(c, inst, 2, 0); + continue; + } + + /* Lookup all the registers which were written by this + * instruction and get a mask of those that contribute to the output: + */ + writemask = get_tracked_mask(c, inst); + if (!writemask) { + GLuint arg; + for (arg = 0; arg < 3; arg++) + track_arg(c, inst, arg, 0); + continue; + } + + read0 = 0; + read1 = 0; + read2 = 0; + + /* Mark all inputs which contribute to the marked outputs: + */ + switch (inst->opcode) { + case OPCODE_ABS: + case OPCODE_FLR: + case OPCODE_FRC: + case OPCODE_MOV: + read0 = writemask; + break; + + case OPCODE_SUB: + case OPCODE_SLT: + case OPCODE_SGE: + case OPCODE_ADD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MUL: + read0 = writemask; + read1 = writemask; + break; + + case OPCODE_MAD: + case OPCODE_CMP: + case OPCODE_LRP: + read0 = writemask; + read1 = writemask; + read2 = writemask; + break; + + case OPCODE_XPD: + if (writemask & WRITEMASK_X) read0 |= WRITEMASK_YZ; + if (writemask & WRITEMASK_Y) read0 |= WRITEMASK_XZ; + if (writemask & WRITEMASK_Z) read0 |= WRITEMASK_XY; + read1 = read0; + break; + + case OPCODE_COS: + case OPCODE_EX2: + case OPCODE_LG2: + case OPCODE_RCP: + case OPCODE_RSQ: + case OPCODE_SIN: + case OPCODE_SCS: + case WM_CINTERP: + case WM_PIXELXY: + read0 = WRITEMASK_X; + break; + + case OPCODE_POW: + read0 = WRITEMASK_X; + read1 = WRITEMASK_X; + break; + + case OPCODE_TEX: + read0 = get_texcoord_mask(inst->tex_idx); + + if (c->key.shadowtex_mask & (1<<inst->tex_unit)) + read0 |= WRITEMASK_Z; + break; + + case OPCODE_TXB: + /* Shadow ignored for txb. + */ + read0 = get_texcoord_mask(inst->tex_idx) | WRITEMASK_W; + break; + + case WM_WPOSXY: + read0 = writemask & WRITEMASK_XY; + break; + + case WM_DELTAXY: + read0 = writemask & WRITEMASK_XY; + read1 = WRITEMASK_X; + break; + + case WM_PIXELW: + read0 = WRITEMASK_X; + read1 = WRITEMASK_XY; + break; + + case WM_LINTERP: + read0 = WRITEMASK_X; + read1 = WRITEMASK_XY; + break; + + case WM_PINTERP: + read0 = WRITEMASK_X; /* interpolant */ + read1 = WRITEMASK_XY; /* deltas */ + read2 = WRITEMASK_W; /* pixel w */ + break; + + case OPCODE_DP3: + read0 = WRITEMASK_XYZ; + read1 = WRITEMASK_XYZ; + break; + + case OPCODE_DPH: + read0 = WRITEMASK_XYZ; + read1 = WRITEMASK_XYZW; + break; + + case OPCODE_DP4: + read0 = WRITEMASK_XYZW; + read1 = WRITEMASK_XYZW; + break; + + case OPCODE_LIT: + read0 = WRITEMASK_XYW; + break; + + case OPCODE_SWZ: + case OPCODE_DST: + case OPCODE_TXP: + default: + assert(0); + break; + } + + track_arg(c, inst, 0, read0); + track_arg(c, inst, 1, read1); + track_arg(c, inst, 2, read2); + } + + if (INTEL_DEBUG & DEBUG_WM) { + brw_wm_print_program(c, "pass1"); + } +} + + + diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass2.c b/src/mesa/drivers/dri/i965/brw_wm_pass2.c new file mode 100644 index 00000000000..cb8d51fb5cf --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_wm_pass2.c @@ -0,0 +1,338 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_context.h" +#include "brw_wm.h" +#include "program.h" +#include "arbprogparse.h" +#include "program_instruction.h" + +/* Use these to force spilling so that that functionality can be + * tested with known-good examples rather than having to construct new + * tests. + */ +#define TEST_PAYLOAD_SPILLS 0 +#define TEST_DST_SPILLS 0 + +static void spill_value(struct brw_wm_compile *c, + struct brw_wm_value *value); + +static void prealloc_reg(struct brw_wm_compile *c, + struct brw_wm_value *value, + GLuint reg) +{ + if (value->lastuse) { + /* Set nextuse to zero, it will be corrected by + * update_register_usage(). + */ + c->pass2_grf[reg].value = value; + c->pass2_grf[reg].nextuse = 0; + + value->resident = &c->pass2_grf[reg]; + value->hw_reg = brw_vec8_grf(reg*2, 0); + + if (TEST_PAYLOAD_SPILLS) + spill_value(c, value); + } +} + + +/* Initialize all the register values. Do the initial setup + * calculations for interpolants. + */ +static void init_registers( struct brw_wm_compile *c ) +{ + GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted; + GLuint nr_interp_regs = 0; + GLuint i = 0; + GLuint j; + + for (j = 0; j < c->grf_limit; j++) + c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN; + + for (j = 0; j < c->key.nr_depth_regs; j++) + prealloc_reg(c, &c->payload.depth[j], i++); + + for (j = 0; j < c->nr_creg; j++) + prealloc_reg(c, &c->creg[j], i++); + + for (j = 0; j < FRAG_ATTRIB_MAX; j++) + if (inputs & (1<<j)) { + nr_interp_regs++; + prealloc_reg(c, &c->payload.input_interp[j], i++); + } + + assert(nr_interp_regs >= 1); + + c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; + c->prog_data.urb_read_length = nr_interp_regs * 2; + c->prog_data.curb_read_length = c->nr_creg * 2; + + c->max_wm_grf = i * 2; +} + + +/* Update the nextuse value for each register in our file. + */ +static void update_register_usage(struct brw_wm_compile *c, + GLuint thisinsn) +{ + GLuint i; + + for (i = 1; i < c->grf_limit; i++) { + struct brw_wm_grf *grf = &c->pass2_grf[i]; + + /* Only search those which can change: + */ + if (grf->nextuse < thisinsn) { + struct brw_wm_ref *ref = grf->value->lastuse; + + /* Has last use of value been passed? + */ + if (ref->insn < thisinsn) { + grf->value->resident = 0; + grf->value = 0; + grf->nextuse = BRW_WM_MAX_INSN; + } + else { + /* Else loop through chain to update: + */ + while (ref->prevuse && ref->prevuse->insn >= thisinsn) + ref = ref->prevuse; + + grf->nextuse = ref->insn; + } + } + } +} + + +static void spill_value(struct brw_wm_compile *c, + struct brw_wm_value *value) +{ + /* Allocate a spill slot. Note that allocations start from 0x40 - + * the first slot is reserved to mean "undef" in brw_wm_emit.c + */ + if (!value->spill_slot) { + c->last_scratch += 0x40; + value->spill_slot = c->last_scratch; + } + + /* The spill will be done in brw_wm_emit.c immediately after the + * value is calculated, so we can just take this reg without any + * further work. + */ + value->resident->value = NULL; + value->resident->nextuse = BRW_WM_MAX_INSN; + value->resident = NULL; +} + + + +/* Search for contiguous region with the most distant nearest + * member. Free regs count as very distant. + * + * TODO: implement spill-to-reg so that we can rearrange discontigous + * free regs and then spill the oldest non-free regs in sequence. + * This would mean inserting instructions in this pass. + */ +static GLuint search_contiguous_regs(struct brw_wm_compile *c, + GLuint nr, + GLuint thisinsn) +{ + struct brw_wm_grf *grf = c->pass2_grf; + GLuint furthest = 0; + GLuint reg = 0; + GLuint i, j; + + /* Start search at 1: r0 is special and can't be used or spilled. + */ + for (i = 1; i < c->grf_limit && furthest < BRW_WM_MAX_INSN; i++) { + GLuint group_nextuse = BRW_WM_MAX_INSN; + + for (j = 0; j < nr; j++) { + if (grf[i+j].nextuse < group_nextuse) + group_nextuse = grf[i+j].nextuse; + } + + if (group_nextuse > furthest) { + furthest = group_nextuse; + reg = i; + } + } + + assert(furthest != thisinsn); + + /* Any non-empty regs will need to be spilled: + */ + for (j = 0; j < nr; j++) + if (grf[reg+j].value) + spill_value(c, grf[reg+j].value); + + return reg; +} + + +static void alloc_contiguous_dest(struct brw_wm_compile *c, + struct brw_wm_value *dst[], + GLuint nr, + GLuint thisinsn) +{ + GLuint reg = search_contiguous_regs(c, nr, thisinsn); + GLuint i; + + for (i = 0; i < nr; i++) { + if (!dst[i]) { + /* Need to grab a dummy value in TEX case. Don't introduce + * it into the tracking scheme. + */ + dst[i] = &c->vreg[c->nr_vreg++]; + } + else { + assert(!dst[i]->resident); + assert(c->pass2_grf[reg+i].nextuse != thisinsn); + + c->pass2_grf[reg+i].value = dst[i]; + c->pass2_grf[reg+i].nextuse = thisinsn; + + dst[i]->resident = &c->pass2_grf[reg+i]; + } + + dst[i]->hw_reg = brw_vec8_grf((reg+i)*2, 0); + } + + if ((reg+nr)*2 > c->max_wm_grf) + c->max_wm_grf = (reg+nr) * 2; +} + + +static void load_args(struct brw_wm_compile *c, + struct brw_wm_instruction *inst) +{ + GLuint thisinsn = inst - c->instruction; + GLuint i,j; + + for (i = 0; i < 3; i++) { + for (j = 0; j < 4; j++) { + struct brw_wm_ref *ref = inst->src[i][j]; + + if (ref) { + if (!ref->value->resident) { + /* Need to bring the value in from scratch space. The code for + * this will be done in brw_wm_emit.c, here we just do the + * register allocation and mark the ref as requiring a fill. + */ + GLuint reg = search_contiguous_regs(c, 1, thisinsn); + + c->pass2_grf[reg].value = ref->value; + c->pass2_grf[reg].nextuse = thisinsn; + + ref->value->resident = &c->pass2_grf[reg]; + + /* Note that a fill is required: + */ + ref->unspill_reg = reg*2; + } + + /* Adjust the hw_reg to point at the value's current location: + */ + assert(ref->value == ref->value->resident->value); + ref->hw_reg.nr += (ref->value->resident - c->pass2_grf) * 2; + } + } + } +} + + + +/* Step 3: Work forwards once again. Perform register allocations, + * taking into account instructions like TEX which require contiguous + * result registers. Where necessary spill registers to scratch space + * and reload later. + */ +void brw_wm_pass2( struct brw_wm_compile *c ) +{ + GLuint insn; + GLuint i; + + init_registers(c); + + for (insn = 0; insn < c->nr_insns; insn++) { + struct brw_wm_instruction *inst = &c->instruction[insn]; + + /* Update registers' nextuse values: + */ + update_register_usage(c, insn); + + /* May need to unspill some args. + */ + load_args(c, inst); + + /* Allocate registers to hold results: + */ + switch (inst->opcode) { + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXP: + alloc_contiguous_dest(c, inst->dst, 4, insn); + break; + + default: + for (i = 0; i < 4; i++) { + if (inst->writemask & (1<<i)) { + assert(inst->dst[i]); + alloc_contiguous_dest(c, &inst->dst[i], 1, insn); + } + } + break; + } + + if (TEST_DST_SPILLS && inst->opcode != WM_PIXELXY) + for (i = 0; i < 4; i++) + if (inst->dst[i]) + spill_value(c, inst->dst[i]); + + } + + if (INTEL_DEBUG & DEBUG_WM) { + brw_wm_print_program(c, "pass2"); + } + + c->state = PASS2_DONE; + + if (INTEL_DEBUG & DEBUG_WM) { + brw_wm_print_program(c, "pass2/done"); + } +} + + + diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c new file mode 100644 index 00000000000..93d4cfc3a5f --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -0,0 +1,253 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +#include "macros.h" + + + +/* Samplers aren't strictly wm state from the hardware's perspective, + * but that is the only situation in which we use them in this driver. + */ + + + +/* The brw (and related graphics cores) do not support GL_CLAMP. The + * Intel drivers for "other operating systems" implement GL_CLAMP as + * GL_CLAMP_TO_EDGE, so the same is done here. + */ +static GLuint translate_wrap_mode( GLenum wrap ) +{ + switch( wrap ) { + case GL_REPEAT: + return BRW_TEXCOORDMODE_WRAP; + case GL_CLAMP: + return BRW_TEXCOORDMODE_CLAMP_BORDER; /* conform likes it this way */ + case GL_CLAMP_TO_EDGE: + return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */ + case GL_CLAMP_TO_BORDER: + return BRW_TEXCOORDMODE_CLAMP_BORDER; + case GL_MIRRORED_REPEAT: + return BRW_TEXCOORDMODE_MIRROR; + default: + return BRW_TEXCOORDMODE_WRAP; + } +} + + +static GLuint U_FIXED(GLfloat value, GLuint frac_bits) +{ + value *= (1<<frac_bits); + return value < 0 ? 0 : value; +} + +static GLint S_FIXED(GLfloat value, GLuint frac_bits) +{ + return value * (1<<frac_bits); +} + + +static GLuint upload_default_color( struct brw_context *brw, + const GLfloat *color ) +{ + struct brw_sampler_default_color sdc; + + COPY_4V(sdc.color, color); + + return brw_cache_data( &brw->cache[BRW_SAMPLER_DEFAULT_COLOR], &sdc ); +} + + +/* + */ +static void brw_update_sampler_state( struct gl_texture_unit *texUnit, + struct gl_texture_object *texObj, + GLuint sdc_gs_offset, + struct brw_sampler_state *sampler) +{ + _mesa_memset(sampler, 0, sizeof(*sampler)); + + switch (texObj->MinFilter) { + case GL_NEAREST: + sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; + sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; + break; + case GL_LINEAR: + sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; + sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; + break; + case GL_NEAREST_MIPMAP_NEAREST: + sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; + sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST; + break; + case GL_LINEAR_MIPMAP_NEAREST: + sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; + sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST; + break; + case GL_NEAREST_MIPMAP_LINEAR: + sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; + sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR; + break; + case GL_LINEAR_MIPMAP_LINEAR: + sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; + sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR; + break; + default: + break; + } + + /* Set Anisotropy: + */ + if ( texObj->MaxAnisotropy > 1.0 ) { + sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; + sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC; + + if (texObj->MaxAnisotropy > 2.0) { + sampler->ss3.max_aniso = MAX2((texObj->MaxAnisotropy - 2) / 2, + BRW_ANISORATIO_16); + } + } + else { + switch (texObj->MagFilter) { + case GL_NEAREST: + sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; + break; + case GL_LINEAR: + sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; + break; + default: + break; + } + } + + sampler->ss1.r_wrap_mode = translate_wrap_mode(texObj->WrapR); + sampler->ss1.s_wrap_mode = translate_wrap_mode(texObj->WrapS); + sampler->ss1.t_wrap_mode = translate_wrap_mode(texObj->WrapT); + + /* Fulsim complains if I don't do this. Hardware doesn't mind: + */ +#if 0 + if (texObj->Target == GL_TEXTURE_CUBE_MAP_ARB) { + sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE; + sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; + sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; + } +#endif + + /* Set shadow function: + */ + if (texObj->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) { + /* Shadowing is "enabled" by emitting a particular sampler + * message (sample_c). So need to recompile WM program when + * shadow comparison is enabled on each/any texture unit. + */ + sampler->ss0.shadow_function = intel_translate_compare_func(texObj->CompareFunc); + } + + /* Set LOD bias: + */ + sampler->ss0.lod_bias = S_FIXED(texUnit->LodBias + texObj->LodBias, 6); + + sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ + sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ + + /* Set BaseMipLevel, MaxLOD, MinLOD: + * + * XXX: I don't think that using firstLevel, lastLevel works, + * because we always setup the surface state as if firstLevel == + * level zero. Probably have to subtract firstLevel from each of + * these: + */ + sampler->ss0.base_level = U_FIXED(0, 1); + + sampler->ss1.max_lod = U_FIXED(MAX2(texObj->MaxLod, 0), 6); + sampler->ss1.min_lod = U_FIXED(MAX2(texObj->MinLod, 0), 6); + + sampler->ss2.default_color_pointer = sdc_gs_offset >> 5; +} + + + +/* All samplers must be uploaded in a single contiguous array, which + * complicates various things. However, this is still too confusing - + * FIXME: simplify all the different new texture state flags. + */ +static void upload_wm_samplers( struct brw_context *brw ) +{ + GLuint unit; + GLuint sampler_count = 0; + + /* _NEW_TEXTURE */ + for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { + if (brw->attribs.Texture->Unit[unit]._ReallyEnabled) { + struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit]; + struct gl_texture_object *texObj = texUnit->_Current; + + GLuint sdc_gs_offset = upload_default_color(brw, texObj->BorderColor); + + brw_update_sampler_state(texUnit, + texObj, + sdc_gs_offset, + &brw->wm.sampler[unit]); + + sampler_count = unit + 1; + } + } + + if (brw->wm.sampler_count != sampler_count) { + brw->wm.sampler_count = sampler_count; + brw->state.dirty.cache |= CACHE_NEW_SAMPLER; + } + + brw->wm.sampler_gs_offset = 0; + + if (brw->wm.sampler_count) + brw->wm.sampler_gs_offset = + brw_cache_data_sz(&brw->cache[BRW_SAMPLER], + brw->wm.sampler, + sizeof(struct brw_sampler_state) * brw->wm.sampler_count); +} + + +const struct brw_tracked_state brw_wm_samplers = { + .dirty = { + .mesa = _NEW_TEXTURE, + .brw = 0, + .cache = 0 + }, + .update = upload_wm_samplers +}; + + diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c new file mode 100644 index 00000000000..4707a709e77 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -0,0 +1,194 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "bufmgr.h" + +/*********************************************************************** + * WM unit - fragment programs and rasterization + */ + +static void invalidate_scratch_cb( struct intel_context *intel, + void *unused ) +{ + /* nothing */ +} + + +static void upload_wm_unit(struct brw_context *brw ) +{ + struct intel_context *intel = &brw->intel; + struct brw_wm_unit_state wm; + GLuint max_threads; + + if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) + max_threads = 0; + else + max_threads = 31; + + + memset(&wm, 0, sizeof(wm)); + + /* CACHE_NEW_WM_PROG */ + wm.thread0.grf_reg_count = ((brw->wm.prog_data->total_grf-1) & ~15) / 16; + wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6; + wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; + wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length; + wm.thread3.const_urb_entry_read_length = brw->wm.prog_data->curb_read_length; + + wm.wm5.max_threads = max_threads; + + if (brw->wm.prog_data->total_scratch) { + GLuint per_thread = (brw->wm.prog_data->total_scratch + 1023) / 1024; + GLuint total = per_thread * (max_threads + 1); + + /* Scratch space -- just have to make sure there is sufficient + * allocated for the active program and current number of threads. + */ + + if (!brw->wm.scratch_buffer) { + bmGenBuffers(intel, "wm scratch", 1, &brw->wm.scratch_buffer, 12); + bmBufferSetInvalidateCB(intel, + brw->wm.scratch_buffer, + invalidate_scratch_cb, + NULL, + GL_FALSE); + } + + if (total > brw->wm.scratch_buffer_size) { + brw->wm.scratch_buffer_size = total; + bmBufferData(intel, + brw->wm.scratch_buffer, + brw->wm.scratch_buffer_size, + NULL, + 0); + } + + assert(per_thread <= 12 * 1024); + wm.thread2.per_thread_scratch_space = (per_thread / 1024) - 1; + + /* XXX: could make this dynamic as this is so rarely active: + */ + /* BRW_NEW_LOCK */ + wm.thread2.scratch_space_base_pointer = + bmBufferOffset(intel, brw->wm.scratch_buffer) >> 10; + } + + /* CACHE_NEW_SURFACE */ + wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces; + + /* BRW_NEW_CURBE_OFFSETS */ + wm.thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2; + + wm.thread3.urb_entry_read_offset = 0; + wm.thread1.depth_coef_urb_read_offset = 1; + wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + + /* CACHE_NEW_SAMPLER */ + wm.wm4.sampler_count = brw->wm.sampler_count; + wm.wm4.sampler_state_pointer = brw->wm.sampler_gs_offset >> 5; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + { + struct gl_fragment_program *fp = brw->fragment_program; + + if (fp->Base.InputsRead & (1<<FRAG_ATTRIB_WPOS)) + wm.wm5.program_uses_depth = 1; /* as far as we can tell */ + + if (fp->Base.OutputsWritten & (1<<FRAG_RESULT_DEPR)) + wm.wm5.program_computes_depth = 1; + + /* _NEW_COLOR */ + if (fp->UsesKill || + brw->attribs.Color->AlphaEnabled) + wm.wm5.program_uses_killpixel = 1; + } + + wm.wm5.enable_16_pix = 1; + wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */ + wm.wm5.legacy_line_rast = 0; + wm.wm5.legacy_global_depth_bias = 0; + wm.wm5.early_depth_test = 1; /* never need to disable */ + wm.wm5.line_aa_region_width = 0; + wm.wm5.line_endcap_aa_region_width = 1; + + /* _NEW_POLYGONSTIPPLE */ + if (brw->attribs.Polygon->StippleFlag) + wm.wm5.polygon_stipple = 1; + + /* _NEW_POLYGON */ + if (brw->attribs.Polygon->OffsetFill) { + wm.wm5.depth_offset = 1; + /* Something wierd going on with legacy_global_depth_bias, + * offset_constant, scaling and MRD. This value passes glean + * but gives some odd results elsewere (eg. the + * quad-offset-units test). + */ + wm.global_depth_offset_constant = brw->attribs.Polygon->OffsetUnits * 2; + + /* This is the only value that passes glean: + */ + wm.global_depth_offset_scale = brw->attribs.Polygon->OffsetFactor; + } + + /* _NEW_LINE */ + if (brw->attribs.Line->StippleFlag) { + wm.wm5.line_stipple = 1; + } + + if (INTEL_DEBUG & DEBUG_STATS) + wm.wm4.stats_enable = 1; + + brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm ); +} + +const struct brw_tracked_state brw_wm_unit = { + .dirty = { + .mesa = (_NEW_POLYGON | + _NEW_POLYGONSTIPPLE | + _NEW_LINE | + _NEW_COLOR), + + .brw = (BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_CURBE_OFFSETS | + BRW_NEW_LOCK), + + .cache = (CACHE_NEW_SURFACE | + CACHE_NEW_WM_PROG | + CACHE_NEW_SAMPLER) + }, + .update = upload_wm_unit +}; + diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c new file mode 100644 index 00000000000..6ccf56e41a2 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -0,0 +1,252 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "mtypes.h" +#include "texformat.h" +#include "texstore.h" + +#include "intel_mipmap_tree.h" +#include "intel_batchbuffer.h" +#include "intel_tex.h" + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + + +static GLuint translate_tex_target( GLenum target ) +{ + switch (target) { + case GL_TEXTURE_1D: + return BRW_SURFACE_1D; + + case GL_TEXTURE_RECTANGLE_NV: + return BRW_SURFACE_2D; + + case GL_TEXTURE_2D: + return BRW_SURFACE_2D; + + case GL_TEXTURE_3D: + return BRW_SURFACE_3D; + + case GL_TEXTURE_CUBE_MAP: + return BRW_SURFACE_CUBE; + + default: + assert(0); + return 0; + } +} + + +static GLuint translate_tex_format( GLuint mesa_format ) +{ + switch( mesa_format ) { + case MESA_FORMAT_L8: + return BRW_SURFACEFORMAT_L8_UNORM; + + case MESA_FORMAT_I8: + return BRW_SURFACEFORMAT_I8_UNORM; + + case MESA_FORMAT_A8: + return BRW_SURFACEFORMAT_A8_UNORM; + + case MESA_FORMAT_AL88: + return BRW_SURFACEFORMAT_L8A8_UNORM; + + case MESA_FORMAT_RGB888: + return BRW_SURFACEFORMAT_R8G8B8_UNORM; + + case MESA_FORMAT_ARGB8888: + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + + case MESA_FORMAT_RGBA8888_REV: + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + + case MESA_FORMAT_YCBCR_REV: + return BRW_SURFACEFORMAT_YCRCB_NORMAL; + + case MESA_FORMAT_YCBCR: + return BRW_SURFACEFORMAT_YCRCB_SWAPUVY; + + case MESA_FORMAT_RGB_FXT1: + case MESA_FORMAT_RGBA_FXT1: + return BRW_SURFACEFORMAT_FXT1; + + case MESA_FORMAT_Z16: + return BRW_SURFACEFORMAT_L16_UNORM; + + case MESA_FORMAT_RGBA_DXT1: + case MESA_FORMAT_RGB_DXT1: + return BRW_SURFACEFORMAT_DXT1_RGB; + + default: + assert(0); + return 0; + } +} + +static +void brw_update_texture_surface( GLcontext *ctx, + GLuint unit, + struct brw_surface_state *surf ) +{ + struct intel_context *intel = intel_context(ctx); + struct brw_context *brw = brw_context(ctx); + struct gl_texture_object *tObj = brw->attribs.Texture->Unit[unit]._Current; + struct intel_texture_object *intelObj = intel_texture_object(tObj); + struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; + + memset(surf, 0, sizeof(*surf)); + + surf->ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + surf->ss0.surface_type = translate_tex_target(tObj->Target); + surf->ss0.surface_format = translate_tex_format(firstImage->TexFormat->MesaFormat); + + /* This is ok for all textures with channel width 8bit or less: + */ +/* surf->ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ + + /* BRW_NEW_LOCK */ + surf->ss1.base_addr = bmBufferOffset(intel, + intelObj->mt->region->buffer); + + surf->ss2.mip_count = intelObj->lastLevel - intelObj->firstLevel; + surf->ss2.width = firstImage->Width - 1; + surf->ss2.height = firstImage->Height - 1; + + surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR; + surf->ss3.tiled_surface = intelObj->mt->region->tiled; /* always zero */ + surf->ss3.pitch = (intelObj->mt->pitch * intelObj->mt->cpp) - 1; + surf->ss3.depth = firstImage->Depth - 1; + + surf->ss4.min_lod = 0; + + if (tObj->Target == GL_TEXTURE_CUBE_MAP) { + surf->ss0.cube_pos_x = 1; + surf->ss0.cube_pos_y = 1; + surf->ss0.cube_pos_z = 1; + surf->ss0.cube_neg_x = 1; + surf->ss0.cube_neg_y = 1; + surf->ss0.cube_neg_z = 1; + } +} + + + +#define OFFSET(TYPE, FIELD) ( (GLuint)&(((TYPE *)0)->FIELD) ) + + +static void upload_wm_surfaces(struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + struct brw_surface_binding_table bind; + GLuint i; + + memcpy(&bind, &brw->wm.bind, sizeof(bind)); + + { + struct brw_surface_state surf; + struct intel_region *region = brw->state.draw_region; + + memset(&surf, 0, sizeof(surf)); + + if (region->cpp == 4) + surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + else + surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; + + surf.ss0.surface_type = BRW_SURFACE_2D; + + /* _NEW_COLOR */ + surf.ss0.color_blend = (!brw->attribs.Color->_LogicOpEnabled && + brw->attribs.Color->BlendEnabled); + + + surf.ss0.writedisable_red = !brw->attribs.Color->ColorMask[0]; + surf.ss0.writedisable_green = !brw->attribs.Color->ColorMask[1]; + surf.ss0.writedisable_blue = !brw->attribs.Color->ColorMask[2]; + surf.ss0.writedisable_alpha = !brw->attribs.Color->ColorMask[3]; + + /* BRW_NEW_LOCK */ + surf.ss1.base_addr = bmBufferOffset(&brw->intel, region->buffer); + + + surf.ss2.width = region->pitch - 1; /* XXX: not really! */ + surf.ss2.height = region->height - 1; + surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; + surf.ss3.tiled_surface = region->tiled; + surf.ss3.pitch = (region->pitch * region->cpp) - 1; + + brw->wm.bind.surf_ss_offset[0] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); + brw->wm.nr_surfaces = 1; + } + + + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[i]; + + /* _NEW_TEXTURE, BRW_NEW_TEXDATA + */ + if (texUnit->_ReallyEnabled && + intel_finalize_mipmap_tree(intel,texUnit->_Current)) { + + struct brw_surface_state surf; + + brw_update_texture_surface(ctx, i, &surf); + + brw->wm.bind.surf_ss_offset[i+1] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); + brw->wm.nr_surfaces = i+2; + } + else { + brw->wm.bind.surf_ss_offset[i+1] = 0; + } + } + + brw->wm.bind_ss_offset = brw_cache_data( &brw->cache[BRW_SS_SURF_BIND], + &brw->wm.bind ); +} + +const struct brw_tracked_state brw_wm_surfaces = { + .dirty = { + .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS, + .brw = (BRW_NEW_CONTEXT | + BRW_NEW_LOCK), /* required for bmBufferOffset */ + .cache = 0 + }, + .update = upload_wm_surfaces +}; + + + diff --git a/src/mesa/drivers/dri/i965/bufmgr.h b/src/mesa/drivers/dri/i965/bufmgr.h new file mode 100644 index 00000000000..83a810cc6dd --- /dev/null +++ b/src/mesa/drivers/dri/i965/bufmgr.h @@ -0,0 +1,211 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BUFMGR_H +#define BUFMGR_H + +#include "intel_context.h" + + +/* The buffer manager context. Opaque. + */ +struct bufmgr; +struct buffer; + + +struct bufmgr *bm_fake_intel_Attach( struct intel_context *intel ); + +/* Flags for validate and other calls. If both NO_UPLOAD and NO_EVICT + * are specified, ValidateBuffers is essentially a query. + */ +#define BM_MEM_LOCAL 0x1 +#define BM_MEM_AGP 0x2 +#define BM_MEM_VRAM 0x4 /* not yet used */ +#define BM_WRITE 0x8 /* not yet used */ +#define BM_READ 0x10 /* not yet used */ +#define BM_NO_UPLOAD 0x20 +#define BM_NO_EVICT 0x40 +#define BM_NO_MOVE 0x80 /* not yet used */ +#define BM_NO_ALLOC 0x100 /* legacy "fixed" buffers only */ +#define BM_CLIENT 0x200 /* for map - pointer will be accessed + * without dri lock */ + +#define BM_MEM_MASK (BM_MEM_LOCAL|BM_MEM_AGP|BM_MEM_VRAM) + + + + +/* Create a pool of a given memory type, from a certain offset and a + * certain size. + * + * Also passed in is a virtual pointer to the start of the pool. This + * is useful in the faked-out version in i915 so that MapBuffer can + * return a pointer to a buffer residing in AGP space. + * + * Flags passed into a pool are inherited by all buffers allocated in + * that pool. So pools representing the static front,back,depth + * buffer allocations should have MEM_AGP|NO_UPLOAD|NO_EVICT|NO_MOVE to match + * the behaviour of the legacy allocations. + * + * Returns -1 for failure, pool number for success. + */ +int bmInitPool( struct intel_context *, + unsigned long low_offset, + void *low_virtual, + unsigned long size, + unsigned flags); + + +/* Stick closely to ARB_vbo semantics - they're well defined and + * understood, and drivers can just pass the calls through without too + * much thunking. + */ +void bmGenBuffers(struct intel_context *, const char *, unsigned n, struct buffer **buffers, + int align ); +void bmDeleteBuffers(struct intel_context *, unsigned n, struct buffer **buffers); + + +/* Hook to inform faked buffer manager about fixed-position + * front,depth,back buffers. These may move to a fully memory-managed + * scheme, or they may continue to be managed as is. + */ +struct buffer *bmGenBufferStatic(struct intel_context *, + unsigned pool); + +/* On evict, buffer manager will call invalidate_cb() to note that the + * buffer needs to be reloaded. + * + * Buffer is uploaded by calling bmMapBuffer() and copying data into + * the returned pointer. + * + * This is basically a big hack to get some more performance by + * turning off backing store for buffers where we either have it + * already (textures) or don't need it (batch buffers, temporary + * vbo's). + */ +void bmBufferSetInvalidateCB(struct intel_context *, + struct buffer *buf, + void (*invalidate_cb)( struct intel_context *, void *ptr ), + void *ptr, + GLboolean dont_fence_subdata); + + +/* The driver has more intimate knowledge of the hardare than a GL + * client would, so flags here is more proscriptive than the usage + * values in the ARB_vbo interface: + */ +int bmBufferData(struct intel_context *, + struct buffer *buf, + unsigned size, + const void *data, + unsigned flags ); + +int bmBufferSubData(struct intel_context *, + struct buffer *buf, + unsigned offset, + unsigned size, + const void *data ); + + +int bmBufferDataAUB(struct intel_context *, + struct buffer *buf, + unsigned size, + const void *data, + unsigned flags, + unsigned aubtype, + unsigned aubsubtype ); + +int bmBufferSubDataAUB(struct intel_context *, + struct buffer *buf, + unsigned offset, + unsigned size, + const void *data, + unsigned aubtype, + unsigned aubsubtype ); + + +/* In this version, taking the offset will provoke an upload on + * buffers not already resident in AGP: + */ +unsigned bmBufferOffset(struct intel_context *, + struct buffer *buf); + + +/* Extract data from the buffer: + */ +void bmBufferGetSubData(struct intel_context *, + struct buffer *buf, + unsigned offset, + unsigned size, + void *data ); + +void *bmMapBuffer( struct intel_context *, + struct buffer *buf, + unsigned access ); + +void bmUnmapBuffer( struct intel_context *, + struct buffer *buf ); + +void bmUnmapBufferAUB( struct intel_context *, + struct buffer *buf, + unsigned aubtype, + unsigned aubsubtype ); + + +/* Pertains to all buffers who's offset has been taken since the last + * fence or release. + */ +int bmValidateBuffers( struct intel_context * ); +void bmReleaseBuffers( struct intel_context * ); + + +GLboolean bmError( struct intel_context * ); +void bmEvictAll( struct intel_context * ); + +void *bmFindVirtual( struct intel_context *intel, + unsigned int offset, + size_t sz ); + +/* This functionality is used by the buffer manager, not really sure + * if we need to be exposing it in this way, probably libdrm will + * offer equivalent calls. + * + * For now they can stay, but will likely change/move before final: + */ +unsigned bmSetFence( struct intel_context * ); +unsigned bmLockAndFence( struct intel_context *intel ); +int bmTestFence( struct intel_context *, unsigned fence ); +void bmFinishFence( struct intel_context *, unsigned fence ); + +void bm_fake_NotifyContendedLockTake( struct intel_context * ); + +extern int INTEL_DEBUG; +#define DEBUG_BUFMGR 0x10000000 + +#define DBG(...) do { if (INTEL_DEBUG & DEBUG_BUFMGR) _mesa_printf(__VA_ARGS__); } while(0) + +#endif diff --git a/src/mesa/drivers/dri/i965/bufmgr_fake.c b/src/mesa/drivers/dri/i965/bufmgr_fake.c new file mode 100644 index 00000000000..8f182f3d877 --- /dev/null +++ b/src/mesa/drivers/dri/i965/bufmgr_fake.c @@ -0,0 +1,1434 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Originally a fake version of the buffer manager so that we can + * prototype the changes in a driver fairly quickly, has been fleshed + * out to a fully functional interim solution. + * + * Basically wraps the old style memory management in the new + * programming interface, but is more expressive and avoids many of + * the bugs in the old texture manager. + */ +#include "bufmgr.h" + +#include "intel_context.h" +#include "intel_ioctl.h" +#include "intel_batchbuffer.h" + +#include "simple_list.h" +#include "mm.h" +#include "imports.h" + +#define BM_POOL_MAX 8 + +/* Internal flags: + */ +#define BM_NO_BACKING_STORE 0x2000 +#define BM_NO_FENCE_SUBDATA 0x4000 + + +static int check_fenced( struct intel_context *intel ); + +static int nr_attach = 0; + +/* Wrapper around mm.c's mem_block, which understands that you must + * wait for fences to expire before memory can be freed. This is + * specific to our use of memcpy for uploads - an upload that was + * processed through the command queue wouldn't need to care about + * fences. + */ +struct block { + struct block *next, *prev; + struct pool *pool; /* BM_MEM_AGP */ + struct mem_block *mem; /* BM_MEM_AGP */ + + unsigned referenced:1; + unsigned on_hardware:1; + unsigned fenced:1; + + + unsigned fence; /* BM_MEM_AGP, Split to read_fence, write_fence */ + + struct buffer *buf; + void *virtual; +}; + + +struct buffer { + unsigned id; /* debug only */ + const char *name; + unsigned size; + + unsigned mapped:1; + unsigned dirty:1; + unsigned aub_dirty:1; + unsigned alignment:13; + unsigned flags:16; + + struct block *block; + void *backing_store; + void (*invalidate_cb)( struct intel_context *, void * ); + void *invalidate_ptr; +}; + +struct pool { + unsigned size; + unsigned low_offset; + struct buffer *static_buffer; + unsigned flags; + struct mem_block *heap; + void *virtual; + struct block lru; /* only allocated, non-fence-pending blocks here */ +}; + +struct bufmgr { + _glthread_Mutex mutex; /**< for thread safety */ + struct pool pool[BM_POOL_MAX]; + unsigned nr_pools; + + unsigned buf_nr; /* for generating ids */ + + struct block referenced; /* after bmBufferOffset */ + struct block on_hardware; /* after bmValidateBuffers */ + struct block fenced; /* after bmFenceBuffers (mi_flush, emit irq, write dword) */ + /* then to pool->lru or free() */ + + unsigned last_fence; + unsigned free_on_hardware; + + unsigned fail:1; + unsigned need_fence:1; +}; + +#define MAXFENCE 0x7fffffff + +static GLboolean FENCE_LTE( unsigned a, unsigned b ) +{ + if (a == b) + return GL_TRUE; + + if (a < b && b - a < (1<<24)) + return GL_TRUE; + + if (a > b && MAXFENCE - a + b < (1<<24)) + return GL_TRUE; + + return GL_FALSE; +} + +int bmTestFence( struct intel_context *intel, unsigned fence ) +{ + /* Slight problem with wrap-around: + */ + return fence == 0 || FENCE_LTE(fence, intel->sarea->last_dispatch); +} + +#define LOCK(bm) \ + int dolock = nr_attach > 1; \ + if (dolock) _glthread_LOCK_MUTEX(bm->mutex) + +#define UNLOCK(bm) \ + if (dolock) _glthread_UNLOCK_MUTEX(bm->mutex) + + + +static GLboolean alloc_from_pool( struct intel_context *intel, + unsigned pool_nr, + struct buffer *buf ) +{ + struct bufmgr *bm = intel->bm; + struct pool *pool = &bm->pool[pool_nr]; + struct block *block = (struct block *)calloc(sizeof *block, 1); + GLuint sz, align = (1<<buf->alignment); + + if (!block) + return GL_FALSE; + + sz = (buf->size + align-1) & ~(align-1); + + block->mem = mmAllocMem(pool->heap, + sz, + buf->alignment, 0); + if (!block->mem) { + free(block); + return GL_FALSE; + } + + make_empty_list(block); + + /* Insert at head or at tail??? + */ + insert_at_tail(&pool->lru, block); + + block->pool = pool; + block->virtual = pool->virtual + block->mem->ofs; + block->buf = buf; + + buf->block = block; + + return GL_TRUE; +} + + + + + + + + +/* Release the card storage associated with buf: + */ +static void free_block( struct intel_context *intel, struct block *block ) +{ + DBG("free block %p\n", block); + + if (!block) + return; + + check_fenced(intel); + + if (block->referenced) { + _mesa_printf("tried to free block on referenced list\n"); + assert(0); + } + else if (block->on_hardware) { + block->buf = NULL; + intel->bm->free_on_hardware += block->mem->size; + } + else if (block->fenced) { + block->buf = NULL; + } + else { + DBG(" - free immediately\n"); + remove_from_list(block); + + mmFreeMem(block->mem); + free(block); + } +} + + +static void alloc_backing_store( struct intel_context *intel, struct buffer *buf ) +{ + assert(!buf->backing_store); + assert(!(buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE))); + + buf->backing_store = ALIGN_MALLOC(buf->size, 64); +} + +static void free_backing_store( struct intel_context *intel, struct buffer *buf ) +{ + assert(!(buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE))); + + if (buf->backing_store) { + ALIGN_FREE(buf->backing_store); + buf->backing_store = NULL; + } +} + + + + + + +static void set_dirty( struct intel_context *intel, + struct buffer *buf ) +{ + if (buf->flags & BM_NO_BACKING_STORE) + buf->invalidate_cb(intel, buf->invalidate_ptr); + + assert(!(buf->flags & BM_NO_EVICT)); + + DBG("set_dirty - buf %d\n", buf->id); + buf->dirty = 1; +} + + +static int evict_lru( struct intel_context *intel, GLuint max_fence, GLuint *pool ) +{ + struct bufmgr *bm = intel->bm; + struct block *block, *tmp; + int i; + + DBG("%s\n", __FUNCTION__); + + for (i = 0; i < bm->nr_pools; i++) { + if (!(bm->pool[i].flags & BM_NO_EVICT)) { + foreach_s(block, tmp, &bm->pool[i].lru) { + + if (block->buf && + (block->buf->flags & BM_NO_FENCE_SUBDATA)) + continue; + + if (block->fence && max_fence && + !FENCE_LTE(block->fence, max_fence)) + return 0; + + set_dirty(intel, block->buf); + block->buf->block = NULL; + + free_block(intel, block); + *pool = i; + return 1; + } + } + } + + + return 0; +} + + +#define foreach_s_rev(ptr, t, list) \ + for(ptr=(list)->prev,t=(ptr)->prev; list != ptr; ptr=t, t=(t)->prev) + +static int evict_mru( struct intel_context *intel, GLuint *pool ) +{ + struct bufmgr *bm = intel->bm; + struct block *block, *tmp; + int i; + + DBG("%s\n", __FUNCTION__); + + for (i = 0; i < bm->nr_pools; i++) { + if (!(bm->pool[i].flags & BM_NO_EVICT)) { + foreach_s_rev(block, tmp, &bm->pool[i].lru) { + + if (block->buf && + (block->buf->flags & BM_NO_FENCE_SUBDATA)) + continue; + + set_dirty(intel, block->buf); + block->buf->block = NULL; + + free_block(intel, block); + *pool = i; + return 1; + } + } + } + + + return 0; +} + + + +static int check_fenced( struct intel_context *intel ) +{ + struct bufmgr *bm = intel->bm; + struct block *block, *tmp; + int ret = 0; + + foreach_s(block, tmp, &bm->fenced ) { + assert(block->fenced); + + if (bmTestFence(intel, block->fence)) { + + block->fenced = 0; + + if (!block->buf) { + DBG("delayed free: offset %x sz %x\n", block->mem->ofs, block->mem->size); + remove_from_list(block); + mmFreeMem(block->mem); + free(block); + } + else { + DBG("return to lru: offset %x sz %x\n", block->mem->ofs, block->mem->size); + move_to_tail(&block->pool->lru, block); + } + + ret = 1; + } + else { + /* Blocks are ordered by fence, so if one fails, all from + * here will fail also: + */ + break; + } + } + + /* Also check the referenced list: + */ + foreach_s(block, tmp, &bm->referenced ) { + if (block->fenced && + bmTestFence(intel, block->fence)) { + block->fenced = 0; + } + } + + + DBG("%s: %d\n", __FUNCTION__, ret); + return ret; +} + + + +static void fence_blocks( struct intel_context *intel, + unsigned fence ) +{ + struct bufmgr *bm = intel->bm; + struct block *block, *tmp; + + foreach_s (block, tmp, &bm->on_hardware) { + DBG("Fence block %p (sz 0x%x buf %p) with fence %d\n", block, + block->mem->size, block->buf, fence); + block->fence = fence; + + block->on_hardware = 0; + block->fenced = 1; + + /* Move to tail of pending list here + */ + move_to_tail(&bm->fenced, block); + } + + /* Also check the referenced list: + */ + foreach_s (block, tmp, &bm->referenced) { + if (block->on_hardware) { + DBG("Fence block %p (sz 0x%x buf %p) with fence %d\n", block, + block->mem->size, block->buf, fence); + + block->fence = fence; + block->on_hardware = 0; + block->fenced = 1; + } + } + + + bm->last_fence = fence; + assert(is_empty_list(&bm->on_hardware)); +} + + + + +static GLboolean alloc_block( struct intel_context *intel, + struct buffer *buf ) +{ + struct bufmgr *bm = intel->bm; + int i; + + assert(intel->locked); + + DBG("%s 0x%x bytes (%s)\n", __FUNCTION__, buf->size, buf->name); + + for (i = 0; i < bm->nr_pools; i++) { + if (!(bm->pool[i].flags & BM_NO_ALLOC) && + alloc_from_pool(intel, i, buf)) { + + DBG("%s --> 0x%x (sz %x)\n", __FUNCTION__, + buf->block->mem->ofs, buf->block->mem->size); + + return GL_TRUE; + } + } + + DBG("%s --> fail\n", __FUNCTION__); + return GL_FALSE; +} + + +static GLboolean evict_and_alloc_block( struct intel_context *intel, + struct buffer *buf ) +{ + GLuint pool; + struct bufmgr *bm = intel->bm; + + assert(buf->block == NULL); + + /* Put a cap on the amount of free memory we'll allow to accumulate + * before emitting a fence. + */ + if (bm->free_on_hardware > 1 * 1024 * 1024) { + DBG("fence for free space: %x\n", bm->free_on_hardware); + bmSetFence(intel); + } + + /* Search for already free memory: + */ + if (alloc_block(intel, buf)) + return GL_TRUE; + + /* Look for memory that may have become free: + */ + if (check_fenced(intel) && + alloc_block(intel, buf)) + return GL_TRUE; + + /* Look for memory blocks not used for >1 frame: + */ + while (evict_lru(intel, intel->second_last_swap_fence, &pool)) + if (alloc_from_pool(intel, pool, buf)) + return GL_TRUE; + + /* If we're not thrashing, allow lru eviction to dig deeper into + * recently used textures. We'll probably be thrashing soon: + */ + if (!intel->thrashing) { + while (evict_lru(intel, 0, &pool)) + if (alloc_from_pool(intel, pool, buf)) + return GL_TRUE; + } + + /* Keep thrashing counter alive? + */ + if (intel->thrashing) + intel->thrashing = 20; + + /* Wait on any already pending fences - here we are waiting for any + * freed memory that has been submitted to hardware and fenced to + * become available: + */ + while (!is_empty_list(&bm->fenced)) { + GLuint fence = bm->fenced.next->fence; + bmFinishFence(intel, fence); + + if (alloc_block(intel, buf)) + return GL_TRUE; + } + + + /* + */ + if (!is_empty_list(&bm->on_hardware)) { + bmSetFence(intel); + + while (!is_empty_list(&bm->fenced)) { + GLuint fence = bm->fenced.next->fence; + bmFinishFence(intel, fence); + } + + if (!intel->thrashing) { + DBG("thrashing\n"); + } + intel->thrashing = 20; + + if (alloc_block(intel, buf)) + return GL_TRUE; + } + + while (evict_mru(intel, &pool)) + if (alloc_from_pool(intel, pool, buf)) + return GL_TRUE; + + DBG("%s 0x%x bytes failed\n", __FUNCTION__, buf->size); + + assert(is_empty_list(&bm->on_hardware)); + assert(is_empty_list(&bm->fenced)); + + return GL_FALSE; +} + + + + + + + + + + +/*********************************************************************** + * Public functions + */ + + +/* The initialization functions are skewed in the fake implementation. + * This call would be to attach to an existing manager, rather than to + * create a local one. + */ +struct bufmgr *bm_fake_intel_Attach( struct intel_context *intel ) +{ + _glthread_DECLARE_STATIC_MUTEX(initMutex); + static struct bufmgr bm; + + /* This function needs a mutex of its own... + */ + _glthread_LOCK_MUTEX(initMutex); + + if (nr_attach == 0) { + _glthread_INIT_MUTEX(bm.mutex); + + make_empty_list(&bm.referenced); + make_empty_list(&bm.fenced); + make_empty_list(&bm.on_hardware); + } + + nr_attach++; + + _glthread_UNLOCK_MUTEX(initMutex); + + return &bm; +} + + + +/* The virtual pointer would go away in a true implementation. + */ +int bmInitPool( struct intel_context *intel, + unsigned long low_offset, + void *low_virtual, + unsigned long size, + unsigned flags) +{ + struct bufmgr *bm = intel->bm; + int retval = 0; + + LOCK(bm); + { + GLuint i; + + for (i = 0; i < bm->nr_pools; i++) { + if (bm->pool[i].low_offset == low_offset && + bm->pool[i].size == size) { + retval = i; + goto out; + } + } + + + if (bm->nr_pools >= BM_POOL_MAX) + retval = -1; + else { + i = bm->nr_pools++; + + DBG("bmInitPool %d low_offset %x sz %x\n", + i, low_offset, size); + + bm->pool[i].low_offset = low_offset; + bm->pool[i].size = size; + bm->pool[i].heap = mmInit( low_offset, size ); + bm->pool[i].virtual = low_virtual - low_offset; + bm->pool[i].flags = flags; + + make_empty_list(&bm->pool[i].lru); + + retval = i; + } + } + out: + UNLOCK(bm); + return retval; +} + +static struct buffer *do_GenBuffer(struct intel_context *intel, const char *name, int align) +{ + struct bufmgr *bm = intel->bm; + struct buffer *buf = calloc(sizeof(*buf), 1); + + buf->id = ++bm->buf_nr; + buf->name = name; + buf->alignment = align; + buf->flags = BM_MEM_AGP|BM_MEM_VRAM|BM_MEM_LOCAL; + + return buf; +} + + +void *bmFindVirtual( struct intel_context *intel, + unsigned int offset, + size_t sz ) +{ + struct bufmgr *bm = intel->bm; + int i; + + for (i = 0; i < bm->nr_pools; i++) + if (offset >= bm->pool[i].low_offset && + offset + sz <= bm->pool[i].low_offset + bm->pool[i].size) + return bm->pool[i].virtual + offset; + + return NULL; +} + + +void bmGenBuffers(struct intel_context *intel, + const char *name, unsigned n, + struct buffer **buffers, + int align ) +{ + struct bufmgr *bm = intel->bm; + LOCK(bm); + { + int i; + + for (i = 0; i < n; i++) + buffers[i] = do_GenBuffer(intel, name, align); + } + UNLOCK(bm); +} + + +void bmDeleteBuffers(struct intel_context *intel, unsigned n, struct buffer **buffers) +{ + struct bufmgr *bm = intel->bm; + + LOCK(bm); + { + unsigned i; + + for (i = 0; i < n; i++) { + struct buffer *buf = buffers[i]; + + if (buf && buf->block) + free_block(intel, buf->block); + + if (buf) + free(buf); + } + } + UNLOCK(bm); +} + + + + +/* Hook to inform faked buffer manager about fixed-position + * front,depth,back buffers. These may move to a fully memory-managed + * scheme, or they may continue to be managed as is. It will probably + * be useful to pass a fixed offset here one day. + */ +struct buffer *bmGenBufferStatic(struct intel_context *intel, + unsigned pool ) +{ + struct bufmgr *bm = intel->bm; + struct buffer *buf; + LOCK(bm); + { + assert(bm->pool[pool].flags & BM_NO_EVICT); + assert(bm->pool[pool].flags & BM_NO_MOVE); + + if (bm->pool[pool].static_buffer) + buf = bm->pool[pool].static_buffer; + else { + buf = do_GenBuffer(intel, "static", 12); + + bm->pool[pool].static_buffer = buf; + assert(!buf->block); + + buf->size = bm->pool[pool].size; + buf->flags = bm->pool[pool].flags; + buf->alignment = 12; + + if (!alloc_from_pool(intel, pool, buf)) + assert(0); + } + } + UNLOCK(bm); + return buf; +} + + +static void wait_quiescent(struct intel_context *intel, + struct block *block) +{ + if (block->on_hardware) { + assert(intel->bm->need_fence); + bmSetFence(intel); + assert(!block->on_hardware); + } + + + if (block->fenced) { + bmFinishFence(intel, block->fence); + } + + assert(!block->on_hardware); + assert(!block->fenced); +} + + + +/* If buffer size changes, free and reallocate. Otherwise update in + * place. + */ +int bmBufferData(struct intel_context *intel, + struct buffer *buf, + unsigned size, + const void *data, + unsigned flags ) +{ + struct bufmgr *bm = intel->bm; + int retval = 0; + + LOCK(bm); + { + DBG("bmBufferData %d sz 0x%x data: %p\n", buf->id, size, data); + + assert(!buf->mapped); + + if (buf->block) { + struct block *block = buf->block; + + /* Optimistic check to see if we can reuse the block -- not + * required for correctness: + */ + if (block->fenced) + check_fenced(intel); + + if (block->on_hardware || + block->fenced || + (buf->size && buf->size != size) || + (data == NULL)) { + + assert(!block->referenced); + + free_block(intel, block); + buf->block = NULL; + buf->dirty = 1; + } + } + + buf->size = size; + if (buf->block) { + assert (buf->block->mem->size >= size); + } + + if (buf->flags & (BM_NO_BACKING_STORE|BM_NO_EVICT)) { + + assert(intel->locked || data == NULL); + + if (data != NULL) { + if (!buf->block && !evict_and_alloc_block(intel, buf)) { + bm->fail = 1; + retval = -1; + goto out; + } + + wait_quiescent(intel, buf->block); + + DBG("bmBufferData %d offset 0x%x sz 0x%x\n", + buf->id, buf->block->mem->ofs, size); + + assert(buf->block->virtual == buf->block->pool->virtual + buf->block->mem->ofs); + + do_memcpy(buf->block->virtual, data, size); + } + buf->dirty = 0; + } + else { + DBG("%s - set buf %d dirty\n", __FUNCTION__, buf->id); + set_dirty(intel, buf); + free_backing_store(intel, buf); + + if (data != NULL) { + alloc_backing_store(intel, buf); + do_memcpy(buf->backing_store, data, size); + } + } + } + out: + UNLOCK(bm); + return retval; +} + + +/* Update the buffer in place, in whatever space it is currently resident: + */ +int bmBufferSubData(struct intel_context *intel, + struct buffer *buf, + unsigned offset, + unsigned size, + const void *data ) +{ + struct bufmgr *bm = intel->bm; + int retval = 0; + + if (size == 0) + return 0; + + LOCK(bm); + { + DBG("bmBufferSubdata %d offset 0x%x sz 0x%x\n", buf->id, offset, size); + + assert(offset+size <= buf->size); + + if (buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE)) { + + assert(intel->locked); + + if (!buf->block && !evict_and_alloc_block(intel, buf)) { + bm->fail = 1; + retval = -1; + goto out; + } + + if (!(buf->flags & BM_NO_FENCE_SUBDATA)) + wait_quiescent(intel, buf->block); + + buf->dirty = 0; + + do_memcpy(buf->block->virtual + offset, data, size); + } + else { + DBG("%s - set buf %d dirty\n", __FUNCTION__, buf->id); + set_dirty(intel, buf); + + if (buf->backing_store == NULL) + alloc_backing_store(intel, buf); + + do_memcpy(buf->backing_store + offset, data, size); + } + } + out: + UNLOCK(bm); + return retval; +} + + + +int bmBufferDataAUB(struct intel_context *intel, + struct buffer *buf, + unsigned size, + const void *data, + unsigned flags, + unsigned aubtype, + unsigned aubsubtype ) +{ + int retval = bmBufferData(intel, buf, size, data, flags); + + + /* This only works because in this version of the buffer manager we + * allocate all buffers statically in agp space and so can emit the + * uploads to the aub file with the correct offsets as they happen. + */ + if (retval == 0 && data && intel->aub_file) { + + if (buf->block && !buf->dirty) { + intel->vtbl.aub_gtt_data(intel, + buf->block->mem->ofs, + buf->block->virtual, + size, + aubtype, + aubsubtype); + buf->aub_dirty = 0; + } + } + + return retval; +} + + +int bmBufferSubDataAUB(struct intel_context *intel, + struct buffer *buf, + unsigned offset, + unsigned size, + const void *data, + unsigned aubtype, + unsigned aubsubtype ) +{ + int retval = bmBufferSubData(intel, buf, offset, size, data); + + + /* This only works because in this version of the buffer manager we + * allocate all buffers statically in agp space and so can emit the + * uploads to the aub file with the correct offsets as they happen. + */ + if (intel->aub_file) { + if (retval == 0 && buf->block && !buf->dirty) + intel->vtbl.aub_gtt_data(intel, + buf->block->mem->ofs + offset, + ((const char *)buf->block->virtual) + offset, + size, + aubtype, + aubsubtype); + } + + return retval; +} + +void bmUnmapBufferAUB( struct intel_context *intel, + struct buffer *buf, + unsigned aubtype, + unsigned aubsubtype ) +{ + bmUnmapBuffer(intel, buf); + + if (intel->aub_file) { + /* Hack - exclude the framebuffer mappings. If you removed + * this, you'd get very big aubfiles, but you *would* be able to + * see fallback rendering. + */ + if (buf->block && !buf->dirty && buf->block->pool == &intel->bm->pool[0]) { + buf->aub_dirty = 1; + } + } +} + +unsigned bmBufferOffset(struct intel_context *intel, + struct buffer *buf) +{ + struct bufmgr *bm = intel->bm; + unsigned retval = 0; + + LOCK(bm); + { + assert(intel->locked); + + if (!buf->block && + !evict_and_alloc_block(intel, buf)) { + bm->fail = 1; + retval = ~0; + } + else { + assert(buf->block); + assert(buf->block->buf == buf); + + DBG("Add buf %d (block %p, dirty %d) to referenced list\n", buf->id, buf->block, + buf->dirty); + + move_to_tail(&bm->referenced, buf->block); + buf->block->referenced = 1; + + retval = buf->block->mem->ofs; + } + } + UNLOCK(bm); + + return retval; +} + + + +/* Extract data from the buffer: + */ +void bmBufferGetSubData(struct intel_context *intel, + struct buffer *buf, + unsigned offset, + unsigned size, + void *data ) +{ + struct bufmgr *bm = intel->bm; + + LOCK(bm); + { + DBG("bmBufferSubdata %d offset 0x%x sz 0x%x\n", buf->id, offset, size); + + if (buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE)) { + if (buf->block && size) { + wait_quiescent(intel, buf->block); + do_memcpy(data, buf->block->virtual + offset, size); + } + } + else { + if (buf->backing_store && size) { + do_memcpy(data, buf->backing_store + offset, size); + } + } + } + UNLOCK(bm); +} + + +/* Return a pointer to whatever space the buffer is currently resident in: + */ +void *bmMapBuffer( struct intel_context *intel, + struct buffer *buf, + unsigned flags ) +{ + struct bufmgr *bm = intel->bm; + void *retval = NULL; + + LOCK(bm); + { + DBG("bmMapBuffer %d\n", buf->id); + + if (buf->mapped) { + _mesa_printf("%s: already mapped\n", __FUNCTION__); + retval = NULL; + } + else if (buf->flags & (BM_NO_BACKING_STORE|BM_NO_EVICT)) { + + assert(intel->locked); + + if (!buf->block && !evict_and_alloc_block(intel, buf)) { + DBG("%s: alloc failed\n", __FUNCTION__); + bm->fail = 1; + retval = NULL; + } + else { + assert(buf->block); + buf->dirty = 0; + + if (!(buf->flags & BM_NO_FENCE_SUBDATA)) + wait_quiescent(intel, buf->block); + + buf->mapped = 1; + retval = buf->block->virtual; + } + } + else { + DBG("%s - set buf %d dirty\n", __FUNCTION__, buf->id); + set_dirty(intel, buf); + + if (buf->backing_store == 0) + alloc_backing_store(intel, buf); + + buf->mapped = 1; + retval = buf->backing_store; + } + } + UNLOCK(bm); + return retval; +} + +void bmUnmapBuffer( struct intel_context *intel, struct buffer *buf ) +{ + struct bufmgr *bm = intel->bm; + + LOCK(bm); + { + DBG("bmUnmapBuffer %d\n", buf->id); + buf->mapped = 0; + } + UNLOCK(bm); +} + + + + +/* This is the big hack that turns on BM_NO_BACKING_STORE. Basically + * says that an external party will maintain the backing store, eg + * Mesa's local copy of texture data. + */ +void bmBufferSetInvalidateCB(struct intel_context *intel, + struct buffer *buf, + void (*invalidate_cb)( struct intel_context *, void *ptr ), + void *ptr, + GLboolean dont_fence_subdata) +{ + struct bufmgr *bm = intel->bm; + + LOCK(bm); + { + if (buf->backing_store) + free_backing_store(intel, buf); + + buf->flags |= BM_NO_BACKING_STORE; + + if (dont_fence_subdata) + buf->flags |= BM_NO_FENCE_SUBDATA; + + DBG("bmBufferSetInvalidateCB set buf %d dirty\n", buf->id); + buf->dirty = 1; + buf->invalidate_cb = invalidate_cb; + buf->invalidate_ptr = ptr; + + /* Note that it is invalid right from the start. Also note + * invalidate_cb is called with the bufmgr locked, so cannot + * itself make bufmgr calls. + */ + invalidate_cb( intel, ptr ); + } + UNLOCK(bm); +} + + + + + + + +/* This is only protected against thread interactions by the DRI lock + * and the policy of ensuring that all dma is flushed prior to + * releasing that lock. Otherwise you might have two threads building + * up a list of buffers to validate at once. + */ +int bmValidateBuffers( struct intel_context *intel ) +{ + struct bufmgr *bm = intel->bm; + int retval = 0; + + LOCK(bm); + { + DBG("%s fail %d\n", __FUNCTION__, bm->fail); + assert(intel->locked); + + if (!bm->fail) { + struct block *block, *tmp; + + foreach_s(block, tmp, &bm->referenced) { + struct buffer *buf = block->buf; + + DBG("Validate buf %d / block %p / dirty %d\n", buf->id, block, buf->dirty); + + /* Upload the buffer contents if necessary: + */ + if (buf->dirty) { + DBG("Upload dirty buf %d (%s) sz %d offset 0x%x\n", buf->id, + buf->name, buf->size, block->mem->ofs); + + assert(!(buf->flags & (BM_NO_BACKING_STORE|BM_NO_EVICT))); + + wait_quiescent(intel, buf->block); + + do_memcpy(buf->block->virtual, + buf->backing_store, + buf->size); + + if (intel->aub_file) { + intel->vtbl.aub_gtt_data(intel, + buf->block->mem->ofs, + buf->backing_store, + buf->size, + 0, + 0); + } + + buf->dirty = 0; + buf->aub_dirty = 0; + } + else if (buf->aub_dirty) { + intel->vtbl.aub_gtt_data(intel, + buf->block->mem->ofs, + buf->block->virtual, + buf->size, + 0, + 0); + buf->aub_dirty = 0; + } + + block->referenced = 0; + block->on_hardware = 1; + move_to_tail(&bm->on_hardware, block); + } + + bm->need_fence = 1; + } + + retval = bm->fail ? -1 : 0; + } + UNLOCK(bm); + + + if (retval != 0) + DBG("%s failed\n", __FUNCTION__); + + return retval; +} + + + + +void bmReleaseBuffers( struct intel_context *intel ) +{ + struct bufmgr *bm = intel->bm; + + LOCK(bm); + { + struct block *block, *tmp; + assert(intel->locked); + + foreach_s (block, tmp, &bm->referenced) { + + DBG("remove block %p from referenced list\n", block); + + if (block->on_hardware) { + /* Return to the on-hardware list. + */ + move_to_tail(&bm->on_hardware, block); + } + else if (block->fenced) { + struct block *s; + + /* Hmm - have to scan the fenced list to insert the + * buffers in order. This is O(nm), but rare and the + * numbers are low. + */ + foreach (s, &bm->fenced) { + if (FENCE_LTE(block->fence, s->fence)) + break; + } + + move_to_tail(s, block); + } + else { + /* Return to the lru list: + */ + move_to_tail(&block->pool->lru, block); + } + + block->referenced = 0; + } + } + UNLOCK(bm); +} + + +/* This functionality is used by the buffer manager, not really sure + * if we need to be exposing it in this way, probably libdrm will + * offer equivalent calls. + * + * For now they can stay, but will likely change/move before final: + */ +unsigned bmSetFence( struct intel_context *intel ) +{ + assert(intel->locked); + + /* Emit MI_FLUSH here: + */ + if (intel->bm->need_fence) { + + /* Emit a flush without using a batchbuffer. Can't rely on the + * batchbuffer at this level really. Would really prefer that + * the IRQ ioctly emitted the flush at the same time. + */ + GLuint dword[2]; + dword[0] = intel->vtbl.flush_cmd(); + dword[1] = 0; + intel_cmd_ioctl(intel, (char *)&dword, sizeof(dword), GL_TRUE); + + intel->bm->last_fence = intelEmitIrqLocked( intel ); + + fence_blocks(intel, intel->bm->last_fence); + + intel->vtbl.note_fence(intel, intel->bm->last_fence); + intel->bm->need_fence = 0; + + if (intel->thrashing) { + intel->thrashing--; + if (!intel->thrashing) + DBG("not thrashing\n"); + } + + intel->bm->free_on_hardware = 0; + } + + return intel->bm->last_fence; +} + +unsigned bmLockAndFence( struct intel_context *intel ) +{ + if (intel->bm->need_fence) { + LOCK_HARDWARE(intel); + bmSetFence(intel); + UNLOCK_HARDWARE(intel); + } + + return intel->bm->last_fence; +} + + +void bmFinishFence( struct intel_context *intel, unsigned fence ) +{ + if (!bmTestFence(intel, fence)) { + DBG("...wait on fence %d\n", fence); + intelWaitIrq( intel, fence ); + } + assert(bmTestFence(intel, fence)); + check_fenced(intel); +} + + + + +/* Specifically ignore texture memory sharing. + * -- just evict everything + * -- and wait for idle + */ +void bm_fake_NotifyContendedLockTake( struct intel_context *intel ) +{ + struct bufmgr *bm = intel->bm; + + LOCK(bm); + { + struct block *block, *tmp; + GLuint i; + + assert(is_empty_list(&bm->referenced)); + + bm->need_fence = 1; + bm->fail = 0; + bmFinishFence(intel, bmSetFence(intel)); + + assert(is_empty_list(&bm->fenced)); + assert(is_empty_list(&bm->on_hardware)); + + for (i = 0; i < bm->nr_pools; i++) { + if (!(bm->pool[i].flags & BM_NO_EVICT)) { + foreach_s(block, tmp, &bm->pool[i].lru) { + assert(bmTestFence(intel, block->fence)); + set_dirty(intel, block->buf); + } + } + } + } + UNLOCK(bm); +} + + + +void bmEvictAll( struct intel_context *intel ) +{ + struct bufmgr *bm = intel->bm; + + LOCK(bm); + { + struct block *block, *tmp; + GLuint i; + + DBG("%s\n", __FUNCTION__); + + assert(is_empty_list(&bm->referenced)); + + bm->need_fence = 1; + bm->fail = 0; + bmFinishFence(intel, bmSetFence(intel)); + + assert(is_empty_list(&bm->fenced)); + assert(is_empty_list(&bm->on_hardware)); + + for (i = 0; i < bm->nr_pools; i++) { + if (!(bm->pool[i].flags & BM_NO_EVICT)) { + foreach_s(block, tmp, &bm->pool[i].lru) { + assert(bmTestFence(intel, block->fence)); + set_dirty(intel, block->buf); + block->buf->block = NULL; + + free_block(intel, block); + } + } + } + } + UNLOCK(bm); +} + + +GLboolean bmError( struct intel_context *intel ) +{ + struct bufmgr *bm = intel->bm; + GLboolean retval; + + LOCK(bm); + { + retval = bm->fail; + } + UNLOCK(bm); + + return retval; +} diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c new file mode 100644 index 00000000000..598ce08735d --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -0,0 +1,261 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "imports.h" +#include "intel_batchbuffer.h" +#include "intel_ioctl.h" +#include "bufmgr.h" + + +static void intel_batchbuffer_reset( struct intel_batchbuffer *batch ) +{ + assert(batch->map == NULL); + + batch->offset = (unsigned long)batch->ptr; + batch->offset = (batch->offset + 63) & ~63; + batch->ptr = (unsigned char *) batch->offset; + + if (BATCH_SZ - batch->offset < BATCH_REFILL) { + bmBufferData(batch->intel, + batch->buffer, + BATCH_SZ, + NULL, + 0); + batch->offset = 0; + batch->ptr = NULL; + } + + batch->flags = 0; +} + +static void intel_batchbuffer_reset_cb( struct intel_context *intel, + void *ptr ) +{ + struct intel_batchbuffer *batch = (struct intel_batchbuffer *)ptr; + assert(batch->map == NULL); + batch->flags = 0; + batch->offset = 0; + batch->ptr = NULL; +} + +GLubyte *intel_batchbuffer_map( struct intel_batchbuffer *batch ) +{ + if (!batch->map) { + batch->map = bmMapBuffer(batch->intel, batch->buffer, + BM_MEM_AGP|BM_MEM_LOCAL|BM_CLIENT|BM_WRITE); + batch->ptr += (unsigned long)batch->map; + } + + return batch->map; +} + +void intel_batchbuffer_unmap( struct intel_batchbuffer *batch ) +{ + if (batch->map) { + batch->ptr -= (unsigned long)batch->map; + batch->map = NULL; + bmUnmapBuffer(batch->intel, batch->buffer); + } +} + + + +/*====================================================================== + * Public functions + */ +struct intel_batchbuffer *intel_batchbuffer_alloc( struct intel_context *intel ) +{ + struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1); + + batch->intel = intel; + + bmGenBuffers(intel, "batch", 1, &batch->buffer, 12); + + bmBufferSetInvalidateCB(intel, batch->buffer, + intel_batchbuffer_reset_cb, + batch, + GL_TRUE); + + bmBufferData(batch->intel, + batch->buffer, + BATCH_SZ, + NULL, + 0); + + + return batch; +} + +void intel_batchbuffer_free( struct intel_batchbuffer *batch ) +{ + if (batch->map) + bmUnmapBuffer(batch->intel, batch->buffer); + + bmDeleteBuffers(batch->intel, 1, &batch->buffer); + free(batch); +} + + +#define MI_BATCH_BUFFER_END (0xA<<23) + + +GLboolean intel_batchbuffer_flush( struct intel_batchbuffer *batch ) +{ + struct intel_context *intel = batch->intel; + GLuint used = batch->ptr - (batch->map + batch->offset); + GLuint offset; + GLboolean ignore_cliprects = (batch->flags & INTEL_BATCH_CLIPRECTS) ? GL_FALSE : GL_TRUE; + GLint retval = GL_TRUE; + + assert(intel->locked); + + if (used == 0) { + bmReleaseBuffers( batch->intel ); + return GL_TRUE; + } + + /* Throw away non-effective packets. + */ + if (intel->numClipRects == 0 && !ignore_cliprects) { + batch->ptr = batch->map + batch->offset; + bmReleaseBuffers( batch->intel ); + intel->vtbl.lost_hardware(intel); + batch->flags = 0; + + UNLOCK_HARDWARE(intel); + sched_yield(); + LOCK_HARDWARE(intel); + + return GL_TRUE; + } + + + /* Add the MI_BATCH_BUFFER_END. Always add an MI_FLUSH - this is a + * performance drain that we would like to avoid. + */ + if (used & 4) { + ((int *)batch->ptr)[0] = MI_BATCH_BUFFER_END; + batch->ptr += 4; + used += 4; + } + else { + ((int *)batch->ptr)[0] = 0; + ((int *)batch->ptr)[1] = MI_BATCH_BUFFER_END; + + batch->ptr += 8; + used += 8; + } + + intel_batchbuffer_unmap(batch); + + /* Get the batch buffer offset: Must call bmBufferOffset() before + * bmValidateBuffers(), otherwise the buffer won't be on the inuse + * list. + */ + offset = bmBufferOffset(batch->intel, batch->buffer); + + if (bmValidateBuffers( batch->intel ) != 0) { + assert(intel->locked); + bmReleaseBuffers( batch->intel ); + retval = GL_FALSE; + goto out; + } + + + if (intel->aub_file) { + /* Send buffered commands to aubfile as a single packet. + */ + intel_batchbuffer_map(batch); + ((int *)batch->ptr)[-1] = intel->vtbl.flush_cmd(); + intel->vtbl.aub_commands(intel, + offset, /* Fulsim wierdness - don't adjust */ + batch->map + batch->offset, + used); + ((int *)batch->ptr)[-1] = MI_BATCH_BUFFER_END; + intel_batchbuffer_unmap(batch); + } + + + /* Fire the batch buffer, which was uploaded above: + */ + intel_batch_ioctl(batch->intel, + offset + batch->offset, + used, + ignore_cliprects); + + if (intel->aub_file && + intel->ctx.DrawBuffer->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) + intel->vtbl.aub_dump_bmp( intel, 0 ); + + /* Reset the buffer: + */ + out: + intel_batchbuffer_reset( batch ); + intel_batchbuffer_map( batch ); + + if (!retval) + DBG("%s failed\n", __FUNCTION__); + + return retval; +} + + + + + + + +void intel_batchbuffer_align( struct intel_batchbuffer *batch, + GLuint align, + GLuint sz ) +{ + unsigned long ptr = (unsigned long) batch->ptr; + unsigned long aptr = (ptr + align) & ~((unsigned long)align-1); + GLuint fixup = aptr - ptr; + + if (intel_batchbuffer_space(batch) < fixup + sz) + intel_batchbuffer_flush(batch); + else { + memset(batch->ptr, 0, fixup); + batch->ptr += fixup; + } +} + + + + +void intel_batchbuffer_data(struct intel_batchbuffer *batch, + const void *data, + GLuint bytes, + GLuint flags) +{ + assert((bytes & 3) == 0); + intel_batchbuffer_require_space(batch, bytes, flags); + __memcpy(batch->ptr, data, bytes); + batch->ptr += bytes; +} + diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h new file mode 100644 index 00000000000..7a9ead3e373 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h @@ -0,0 +1,127 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_BATCHBUFFER_H +#define INTEL_BATCHBUFFER_H + +#include "mtypes.h" +#include "bufmgr.h" + +struct intel_context; + +#define BATCH_SZ (16 * 1024) +#define BATCH_REFILL 4096 +#define BATCH_RESERVED 16 + +#define INTEL_BATCH_NO_CLIPRECTS 0x1 +#define INTEL_BATCH_CLIPRECTS 0x2 + +struct intel_batchbuffer { + struct intel_context *intel; + + struct buffer *buffer; + + GLuint flags; + GLuint offset; + + GLubyte *map; + GLubyte *ptr; +}; + +struct intel_batchbuffer *intel_batchbuffer_alloc( struct intel_context *intel ); + +void intel_batchbuffer_free( struct intel_batchbuffer *batch ); + + +GLboolean intel_batchbuffer_flush( struct intel_batchbuffer *batch ); + +void intel_batchbuffer_unmap( struct intel_batchbuffer *batch ); +GLubyte *intel_batchbuffer_map( struct intel_batchbuffer *batch ); + + +/* Unlike bmBufferData, this currently requires the buffer be mapped. + * Consider it a convenience function wrapping multple + * intel_buffer_dword() calls. + */ +void intel_batchbuffer_data(struct intel_batchbuffer *batch, + const void *data, + GLuint bytes, + GLuint flags); + +void intel_batchbuffer_release_space(struct intel_batchbuffer *batch, + GLuint bytes); + + +/* Inline functions - might actually be better off with these + * non-inlined. Certainly better off switching all command packets to + * be passed as structs rather than dwords, but that's a little bit of + * work... + */ +static inline GLuint +intel_batchbuffer_space( struct intel_batchbuffer *batch ) +{ + return (BATCH_SZ - BATCH_RESERVED) - (batch->ptr - (batch->map + batch->offset)); +} + + +static inline void +intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, + GLuint dword) +{ + assert(batch->map); + assert(intel_batchbuffer_space(batch) >= 4); + *(GLuint *)(batch->ptr) = dword; + batch->ptr += 4; +} + +static inline void +intel_batchbuffer_require_space(struct intel_batchbuffer *batch, + GLuint sz, + GLuint flags) +{ + assert(sz < BATCH_SZ - 8); + if (intel_batchbuffer_space(batch) < sz || + (batch->flags != 0 && flags != 0 && batch->flags != flags)) + intel_batchbuffer_flush(batch); + + batch->flags |= flags; +} + +void intel_batchbuffer_align( struct intel_batchbuffer *batch, + GLuint align, + GLuint sz ); + + +/* Here are the crusty old macros, to be removed: + */ +#define BATCH_LOCALS +#define BEGIN_BATCH(n, flags) intel_batchbuffer_require_space(intel->batch, n*4, flags) +#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) +#define ADVANCE_BATCH() do { } while(0) + + +#endif diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c new file mode 100644 index 00000000000..2191dd585e1 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -0,0 +1,494 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <stdio.h> +#include <errno.h> + +#include "mtypes.h" +#include "context.h" +#include "enums.h" +#include "vblank.h" + +#include "intel_reg.h" +#include "intel_batchbuffer.h" +#include "intel_context.h" +#include "intel_blit.h" +#include "intel_regions.h" + +#include "bufmgr.h" + + + + +/* + * Copy the back buffer to the front buffer. + */ +void intelCopyBuffer( const __DRIdrawablePrivate *dPriv, + const drm_clip_rect_t *rect ) +{ + struct intel_context *intel; + GLboolean missed_target; + int64_t ust; + + DBG("%s\n", __FUNCTION__); + + assert(dPriv); + assert(dPriv->driContextPriv); + assert(dPriv->driContextPriv->driverPrivate); + + intel = (struct intel_context *) dPriv->driContextPriv->driverPrivate; + intelFlush( &intel->ctx ); + + + bmFinishFence(intel, intel->last_swap_fence); + + /* The LOCK_HARDWARE is required for the cliprects. Buffer offsets + * should work regardless. + */ + LOCK_HARDWARE( intel ); + + if (!rect) + { + /* This is a really crappy way to do wait-for-vblank. I guess + * it sortof works in the single-application case. + */ + UNLOCK_HARDWARE( intel ); + driWaitForVBlank( dPriv, &intel->vbl_seq, intel->vblank_flags, & missed_target ); + LOCK_HARDWARE( intel ); + } + + { + intelScreenPrivate *intelScreen = intel->intelScreen; + __DRIdrawablePrivate *dPriv = intel->driDrawable; + int nbox = dPriv->numClipRects; + drm_clip_rect_t *pbox = dPriv->pClipRects; + int cpp = intelScreen->cpp; + struct intel_region *src, *dst; + int BR13, CMD; + int i; + int src_pitch, dst_pitch; + + if (intel->sarea->pf_current_page == 0) { + dst = intel->front_region; + src = intel->back_region; + } + else { + assert(0); + src = intel->front_region; + dst = intel->back_region; + } + + src_pitch = src->pitch * src->cpp; + dst_pitch = dst->pitch * dst->cpp; + + if (cpp == 2) { + BR13 = (0xCC << 16) | (1<<24); + CMD = XY_SRC_COPY_BLT_CMD; + } + else { + BR13 = (0xCC << 16) | (1<<24) | (1<<25); + CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | + XY_SRC_COPY_BLT_WRITE_RGB); + } + + if (src->tiled) { + CMD |= XY_SRC_TILED; + src_pitch /= 4; + } + + if (dst->tiled) { + CMD |= XY_DST_TILED; + dst_pitch /= 4; + } + + for (i = 0 ; i < nbox; i++, pbox++) + { + drm_clip_rect_t tmp = *pbox; + + if (rect) { + if (!intel_intersect_cliprects(&tmp, &tmp, rect)) + continue; + } + + + if (tmp.x1 > tmp.x2 || + tmp.y1 > tmp.y2 || + tmp.x2 > intelScreen->width || + tmp.y2 > intelScreen->height) + continue; + + BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH( CMD ); + OUT_BATCH( dst_pitch | BR13 ); + OUT_BATCH( (tmp.y1 << 16) | tmp.x1 ); + OUT_BATCH( (tmp.y2 << 16) | tmp.x2 ); + OUT_BATCH( bmBufferOffset(intel, dst->buffer) ); + OUT_BATCH( (tmp.y1 << 16) | tmp.x1 ); + OUT_BATCH( src_pitch ); + OUT_BATCH( bmBufferOffset(intel, src->buffer) ); + ADVANCE_BATCH(); + } + } + + intel_batchbuffer_flush( intel->batch ); + intel->second_last_swap_fence = intel->last_swap_fence; + intel->last_swap_fence = bmSetFence( intel ); + UNLOCK_HARDWARE( intel ); + + if (!rect) + { + intel->swap_count++; + (*dri_interface->getUST)(&ust); + if (missed_target) { + intel->swap_missed_count++; + intel->swap_missed_ust = ust - intel->swap_ust; + } + + intel->swap_ust = ust; + } + +} + + + + +void intelEmitFillBlit( struct intel_context *intel, + GLuint cpp, + GLshort dst_pitch, + struct buffer *dst_buffer, + GLuint dst_offset, + GLboolean dst_tiled, + GLshort x, GLshort y, + GLshort w, GLshort h, + GLuint color ) +{ + GLuint BR13, CMD; + BATCH_LOCALS; + + dst_pitch *= cpp; + + switch(cpp) { + case 1: + case 2: + case 3: + BR13 = (0xF0 << 16) | (1<<24); + CMD = XY_COLOR_BLT_CMD; + break; + case 4: + BR13 = (0xF0 << 16) | (1<<24) | (1<<25); + CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA | + XY_COLOR_BLT_WRITE_RGB); + break; + default: + return; + } + + if (dst_tiled) { + CMD |= XY_DST_TILED; + dst_pitch /= 4; + } + + BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH( CMD ); + OUT_BATCH( dst_pitch | BR13 ); + OUT_BATCH( (y << 16) | x ); + OUT_BATCH( ((y+h) << 16) | (x+w) ); + OUT_BATCH( bmBufferOffset(intel, dst_buffer) + dst_offset ); + OUT_BATCH( color ); + ADVANCE_BATCH(); +} + + +/* Copy BitBlt + */ +void intelEmitCopyBlit( struct intel_context *intel, + GLuint cpp, + GLshort src_pitch, + struct buffer *src_buffer, + GLuint src_offset, + GLboolean src_tiled, + GLshort dst_pitch, + struct buffer *dst_buffer, + GLuint dst_offset, + GLboolean dst_tiled, + GLshort src_x, GLshort src_y, + GLshort dst_x, GLshort dst_y, + GLshort w, GLshort h ) +{ + GLuint CMD, BR13; + int dst_y2 = dst_y + h; + int dst_x2 = dst_x + w; + BATCH_LOCALS; + + + DBG("%s src:buf(%d)/%d %d,%d dst:buf(%d)/%d %d,%d sz:%dx%d\n", + __FUNCTION__, + src_buffer, src_pitch, src_x, src_y, + dst_buffer, dst_pitch, dst_x, dst_y, + w,h); + + src_pitch *= cpp; + dst_pitch *= cpp; + + switch(cpp) { + case 1: + case 2: + case 3: + BR13 = (0xCC << 16) | (1<<24); + CMD = XY_SRC_COPY_BLT_CMD; + break; + case 4: + BR13 = (0xCC << 16) | (1<<24) | (1<<25); + CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | + XY_SRC_COPY_BLT_WRITE_RGB); + break; + default: + return; + } + + if (src_tiled) { + CMD |= XY_SRC_TILED; + src_pitch /= 4; + } + + if (dst_tiled) { + CMD |= XY_DST_TILED; + dst_pitch /= 4; + } + + if (dst_y2 < dst_y || + dst_x2 < dst_x) { + return; + } + + dst_pitch &= 0xffff; + src_pitch &= 0xffff; + + /* Initial y values don't seem to work with negative pitches. If + * we adjust the offsets manually (below), it seems to work fine. + */ + if (0) { + BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH( CMD ); + OUT_BATCH( dst_pitch | BR13 ); + OUT_BATCH( (dst_y << 16) | dst_x ); + OUT_BATCH( (dst_y2 << 16) | dst_x2 ); + OUT_BATCH( bmBufferOffset(intel, dst_buffer) + dst_offset ); + OUT_BATCH( (src_y << 16) | src_x ); + OUT_BATCH( src_pitch ); + OUT_BATCH( bmBufferOffset(intel, src_buffer) + src_offset ); + ADVANCE_BATCH(); + } + else { + BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH( CMD ); + OUT_BATCH( (dst_pitch & 0xffff) | BR13 ); + OUT_BATCH( (0 << 16) | dst_x ); + OUT_BATCH( (h << 16) | dst_x2 ); + OUT_BATCH( bmBufferOffset(intel, dst_buffer) + dst_offset + dst_y * dst_pitch ); + OUT_BATCH( (0 << 16) | src_x ); + OUT_BATCH( (src_pitch & 0xffff) ); + OUT_BATCH( bmBufferOffset(intel, src_buffer) + src_offset + src_y * src_pitch ); + ADVANCE_BATCH(); + } +} + + + +void intelClearWithBlit(GLcontext *ctx, GLbitfield flags, GLboolean all, + GLint cx1, GLint cy1, GLint cw, GLint ch) +{ + struct intel_context *intel = intel_context( ctx ); + intelScreenPrivate *intelScreen = intel->intelScreen; + GLuint clear_depth, clear_color; + GLint cx, cy; + GLint cpp = intelScreen->cpp; + GLint i; + struct intel_region *front = intel->front_region; + struct intel_region *back = intel->back_region; + struct intel_region *depth = intel->depth_region; + GLuint BR13, FRONT_CMD, BACK_CMD, DEPTH_CMD; + GLuint front_pitch; + GLuint back_pitch; + GLuint depth_pitch; + BATCH_LOCALS; + + + clear_color = intel->ClearColor; + clear_depth = 0; + + if (flags & BUFFER_BIT_DEPTH) { + clear_depth = (GLuint)(ctx->Depth.Clear * intel->ClearDepth); + } + + if (flags & BUFFER_BIT_STENCIL) { + clear_depth |= (ctx->Stencil.Clear & 0xff) << 24; + } + + switch(cpp) { + case 2: + BR13 = (0xF0 << 16) | (1<<24); + BACK_CMD = FRONT_CMD = XY_COLOR_BLT_CMD; + DEPTH_CMD = XY_COLOR_BLT_CMD; + break; + case 4: + BR13 = (0xF0 << 16) | (1<<24) | (1<<25); + BACK_CMD = FRONT_CMD = (XY_COLOR_BLT_CMD | + XY_COLOR_BLT_WRITE_ALPHA | + XY_COLOR_BLT_WRITE_RGB); + DEPTH_CMD = XY_COLOR_BLT_CMD; + if (flags & BUFFER_BIT_DEPTH) DEPTH_CMD |= XY_COLOR_BLT_WRITE_RGB; + if (flags & BUFFER_BIT_STENCIL) DEPTH_CMD |= XY_COLOR_BLT_WRITE_ALPHA; + break; + default: + return; + } + + + + intelFlush( &intel->ctx ); + LOCK_HARDWARE( intel ); + { + /* Refresh the cx/y/w/h values as they may have been invalidated + * by a new window position or size picked up when we did + * LOCK_HARDWARE above. The values passed by mesa are not + * reliable. + */ + { + cx = ctx->DrawBuffer->_Xmin; + cy = ctx->DrawBuffer->_Ymin; + ch = ctx->DrawBuffer->_Ymax - ctx->DrawBuffer->_Ymin; + cw = ctx->DrawBuffer->_Xmax - ctx->DrawBuffer->_Xmin; + } + + /* flip top to bottom */ + cy = intel->driDrawable->h-cy1-ch; + cx = cx1 + intel->drawX; + cy += intel->drawY; + + /* adjust for page flipping */ + if ( intel->sarea->pf_current_page == 0 ) { + front = intel->front_region; + back = intel->back_region; + } + else { + back = intel->front_region; + front = intel->back_region; + } + + front_pitch = front->pitch * front->cpp; + back_pitch = back->pitch * back->cpp; + depth_pitch = depth->pitch * depth->cpp; + + if (front->tiled) { + FRONT_CMD |= XY_DST_TILED; + front_pitch /= 4; + } + + if (back->tiled) { + BACK_CMD |= XY_DST_TILED; + back_pitch /= 4; + } + + if (depth->tiled) { + DEPTH_CMD |= XY_DST_TILED; + depth_pitch /= 4; + } + + for (i = 0 ; i < intel->numClipRects ; i++) + { + drm_clip_rect_t *box = &intel->pClipRects[i]; + drm_clip_rect_t b; + + if (!all) { + GLint x = box->x1; + GLint y = box->y1; + GLint w = box->x2 - x; + GLint h = box->y2 - y; + + if (x < cx) w -= cx - x, x = cx; + if (y < cy) h -= cy - y, y = cy; + if (x + w > cx + cw) w = cx + cw - x; + if (y + h > cy + ch) h = cy + ch - y; + if (w <= 0) continue; + if (h <= 0) continue; + + b.x1 = x; + b.y1 = y; + b.x2 = x + w; + b.y2 = y + h; + } else { + b = *box; + } + + + if (b.x1 > b.x2 || + b.y1 > b.y2 || + b.x2 > intelScreen->width || + b.y2 > intelScreen->height) + continue; + + if ( flags & BUFFER_BIT_FRONT_LEFT ) { + BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH( FRONT_CMD ); + OUT_BATCH( front_pitch | BR13 ); + OUT_BATCH( (b.y1 << 16) | b.x1 ); + OUT_BATCH( (b.y2 << 16) | b.x2 ); + OUT_BATCH( bmBufferOffset(intel, front->buffer) ); + OUT_BATCH( clear_color ); + ADVANCE_BATCH(); + } + + if ( flags & BUFFER_BIT_BACK_LEFT ) { + BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH( BACK_CMD ); + OUT_BATCH( back_pitch | BR13 ); + OUT_BATCH( (b.y1 << 16) | b.x1 ); + OUT_BATCH( (b.y2 << 16) | b.x2 ); + OUT_BATCH( bmBufferOffset(intel, back->buffer) ); + OUT_BATCH( clear_color ); + ADVANCE_BATCH(); + } + + if ( flags & (BUFFER_BIT_STENCIL | BUFFER_BIT_DEPTH) ) { + BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH( DEPTH_CMD ); + OUT_BATCH( depth_pitch | BR13 ); + OUT_BATCH( (b.y1 << 16) | b.x1 ); + OUT_BATCH( (b.y2 << 16) | b.x2 ); + OUT_BATCH( bmBufferOffset(intel, depth->buffer) ); + OUT_BATCH( clear_depth ); + ADVANCE_BATCH(); + } + } + } + intel_batchbuffer_flush( intel->batch ); + UNLOCK_HARDWARE( intel ); +} + + diff --git a/src/mesa/drivers/dri/i965/intel_blit.h b/src/mesa/drivers/dri/i965/intel_blit.h new file mode 100644 index 00000000000..357ceb4c512 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_blit.h @@ -0,0 +1,66 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_BLIT_H +#define INTEL_BLIT_H + +#include "intel_context.h" +#include "intel_ioctl.h" + +struct buffer; + +extern void intelCopyBuffer( const __DRIdrawablePrivate *dpriv, + const drm_clip_rect_t *rect ); +extern void intelClearWithBlit(GLcontext *ctx, GLbitfield mask, GLboolean all, + GLint cx1, GLint cy1, GLint cw, GLint ch); + +extern void intelEmitCopyBlit( struct intel_context *intel, + GLuint cpp, + GLshort src_pitch, + struct buffer *src_buffer, + GLuint src_offset, + GLboolean src_tiled, + GLshort dst_pitch, + struct buffer *dst_buffer, + GLuint dst_offset, + GLboolean dst_tiled, + GLshort srcx, GLshort srcy, + GLshort dstx, GLshort dsty, + GLshort w, GLshort h ); + +extern void intelEmitFillBlit( struct intel_context *intel, + GLuint cpp, + GLshort dst_pitch, + struct buffer *dst_buffer, + GLuint dst_offset, + GLboolean dst_tiled, + GLshort x, GLshort y, + GLshort w, GLshort h, + GLuint color ); + + +#endif diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c new file mode 100644 index 00000000000..015e433fd7a --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c @@ -0,0 +1,207 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "imports.h" +#include "mtypes.h" +#include "bufferobj.h" + +#include "intel_context.h" +#include "intel_buffer_objects.h" +#include "bufmgr.h" + + +/** + * There is some duplication between mesa's bufferobjects and our + * bufmgr buffers. Both have an integer handle and a hashtable to + * lookup an opaque structure. It would be nice if the handles and + * internal structure where somehow shared. + */ +static struct gl_buffer_object *intel_bufferobj_alloc( GLcontext *ctx, + GLuint name, + GLenum target ) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *obj = MALLOC_STRUCT(intel_buffer_object); + + _mesa_initialize_buffer_object(&obj->Base, name, target); + + /* XXX: We generate our own handle, which is different to 'name' above. + */ + bmGenBuffers(intel, "bufferobj", 1, &obj->buffer, 6); + assert(obj->buffer); + + return &obj->Base; +} + + +/** + * Deallocate/free a vertex/pixel buffer object. + * Called via glDeleteBuffersARB(). + */ +static void intel_bufferobj_free( GLcontext *ctx, + struct gl_buffer_object *obj ) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + + assert(intel_obj); + + if (intel_obj->buffer) + bmDeleteBuffers( intel, 1, &intel_obj->buffer ); + + _mesa_free(intel_obj); +} + + + +/** + * Allocate space for and store data in a buffer object. Any data that was + * previously stored in the buffer object is lost. If data is NULL, + * memory will be allocated, but no copy will occur. + * Called via glBufferDataARB(). + */ +static void intel_bufferobj_data( GLcontext *ctx, + GLenum target, + GLsizeiptrARB size, + const GLvoid *data, + GLenum usage, + struct gl_buffer_object *obj ) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + + /* XXX: do something useful with 'usage' (eg. populate flags + * argument below) + */ + assert(intel_obj); + + obj->Size = size; + obj->Usage = usage; + + bmBufferDataAUB(intel, intel_obj->buffer, size, data, 0, + 0, 0); +} + + +/** + * Replace data in a subrange of buffer object. If the data range + * specified by size + offset extends beyond the end of the buffer or + * if data is NULL, no copy is performed. + * Called via glBufferSubDataARB(). + */ +static void intel_bufferobj_subdata( GLcontext *ctx, + GLenum target, + GLintptrARB offset, + GLsizeiptrARB size, + const GLvoid * data, + struct gl_buffer_object * obj ) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + + assert(intel_obj); + bmBufferSubDataAUB(intel, intel_obj->buffer, offset, size, data, 0, 0); +} + + +/** + * Called via glGetBufferSubDataARB(). + */ +static void intel_bufferobj_get_subdata( GLcontext *ctx, + GLenum target, + GLintptrARB offset, + GLsizeiptrARB size, + GLvoid * data, + struct gl_buffer_object * obj ) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + + assert(intel_obj); + bmBufferGetSubData(intel, intel_obj->buffer, offset, size, data); +} + + + +/** + * Called via glMapBufferARB(). + */ +static void *intel_bufferobj_map( GLcontext *ctx, + GLenum target, + GLenum access, + struct gl_buffer_object *obj ) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + + /* XXX: Translate access to flags arg below: + */ + assert(intel_obj); + assert(intel_obj->buffer); + obj->Pointer = bmMapBuffer(intel, intel_obj->buffer, 0); + return obj->Pointer; +} + + +/** + * Called via glMapBufferARB(). + */ +static GLboolean intel_bufferobj_unmap( GLcontext *ctx, + GLenum target, + struct gl_buffer_object *obj ) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + + assert(intel_obj); + assert(intel_obj->buffer); + assert(obj->Pointer); + bmUnmapBufferAUB(intel, intel_obj->buffer, 0, 0); + obj->Pointer = NULL; + return GL_TRUE; +} + +struct buffer *intel_bufferobj_buffer( const struct intel_buffer_object *intel_obj ) +{ + assert(intel_obj->Base.Name); + assert(intel_obj->buffer); + return intel_obj->buffer; +} + +void intel_bufferobj_init( struct intel_context *intel ) +{ + GLcontext *ctx = &intel->ctx; + + ctx->Driver.NewBufferObject = intel_bufferobj_alloc; + ctx->Driver.DeleteBuffer = intel_bufferobj_free; + ctx->Driver.BufferData = intel_bufferobj_data; + ctx->Driver.BufferSubData = intel_bufferobj_subdata; + ctx->Driver.GetBufferSubData = intel_bufferobj_get_subdata; + ctx->Driver.MapBuffer = intel_bufferobj_map; + ctx->Driver.UnmapBuffer = intel_bufferobj_unmap; +} diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.h b/src/mesa/drivers/dri/i965/intel_buffer_objects.h new file mode 100644 index 00000000000..4b38803e576 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.h @@ -0,0 +1,70 @@ + /************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_BUFFEROBJ_H +#define INTEL_BUFFEROBJ_H + +#include "mtypes.h" + +struct intel_context; +struct gl_buffer_object; + + +/** + * Intel vertex/pixel buffer object, derived from Mesa's gl_buffer_object. + */ +struct intel_buffer_object { + struct gl_buffer_object Base; + struct buffer *buffer; /* the low-level buffer manager's buffer handle */ +}; + + +/* Get the bm buffer associated with a GL bufferobject: + */ +struct buffer *intel_bufferobj_buffer( const struct intel_buffer_object *obj ); + +/* Hook the bufferobject implementation into mesa: + */ +void intel_bufferobj_init( struct intel_context *intel ); + + + +/* Are the obj->Name tests necessary? Unfortunately yes, mesa + * allocates a couple of gl_buffer_object structs statically, and + * the Name == 0 test is the only way to identify them and avoid + * casting them erroneously to our structs. + */ +static inline struct intel_buffer_object * +intel_buffer_object( struct gl_buffer_object *obj ) +{ + if (obj->Name) + return (struct intel_buffer_object *)obj; + else + return NULL; +} + +#endif diff --git a/src/mesa/drivers/dri/i965/intel_buffers.c b/src/mesa/drivers/dri/i965/intel_buffers.c new file mode 100644 index 00000000000..853956671ce --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_buffers.c @@ -0,0 +1,555 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "intel_screen.h" +#include "intel_context.h" +#include "intel_blit.h" +#include "intel_regions.h" +#include "intel_batchbuffer.h" +#include "context.h" +#include "framebuffer.h" +#include "macros.h" +#include "swrast/swrast.h" + +GLboolean intel_intersect_cliprects( drm_clip_rect_t *dst, + const drm_clip_rect_t *a, + const drm_clip_rect_t *b ) +{ + dst->x1 = MAX2(a->x1, b->x1); + dst->x2 = MIN2(a->x2, b->x2); + dst->y1 = MAX2(a->y1, b->y1); + dst->y2 = MIN2(a->y2, b->y2); + + return (dst->x1 <= dst->x2 && + dst->y1 <= dst->y2); +} + +struct intel_region *intel_drawbuf_region( struct intel_context *intel ) +{ + switch (intel->ctx.DrawBuffer->_ColorDrawBufferMask[0]) { + case BUFFER_BIT_FRONT_LEFT: + return intel->front_region; + case BUFFER_BIT_BACK_LEFT: + return intel->back_region; + default: + /* Not necessary to fallback - could handle either NONE or + * FRONT_AND_BACK cases below. + */ + return NULL; + } +} + +struct intel_region *intel_readbuf_region( struct intel_context *intel ) +{ + GLcontext *ctx = &intel->ctx; + + /* This will have to change to support EXT_fbo's, but is correct + * for now: + */ + switch (ctx->ReadBuffer->_ColorReadBufferIndex) { + case BUFFER_FRONT_LEFT: + return intel->front_region; + case BUFFER_BACK_LEFT: + return intel->back_region; + default: + assert(0); + return NULL; + } +} + + + +static void intelBufferSize(GLframebuffer *buffer, + GLuint *width, + GLuint *height) +{ + GET_CURRENT_CONTEXT(ctx); + struct intel_context *intel = intel_context(ctx); + /* Need to lock to make sure the driDrawable is uptodate. This + * information is used to resize Mesa's software buffers, so it has + * to be correct. + */ + LOCK_HARDWARE(intel); + if (intel->driDrawable) { + *width = intel->driDrawable->w; + *height = intel->driDrawable->h; + } + else { + *width = 0; + *height = 0; + } + UNLOCK_HARDWARE(intel); +} + + +static void intelSetFrontClipRects( struct intel_context *intel ) +{ + __DRIdrawablePrivate *dPriv = intel->driDrawable; + + if (!dPriv) return; + + intel->numClipRects = dPriv->numClipRects; + intel->pClipRects = dPriv->pClipRects; + intel->drawX = dPriv->x; + intel->drawY = dPriv->y; +} + + +static void intelSetBackClipRects( struct intel_context *intel ) +{ + __DRIdrawablePrivate *dPriv = intel->driDrawable; + + if (!dPriv) return; + + if (intel->sarea->pf_enabled == 0 && dPriv->numBackClipRects == 0) { + intel->numClipRects = dPriv->numClipRects; + intel->pClipRects = dPriv->pClipRects; + intel->drawX = dPriv->x; + intel->drawY = dPriv->y; + } else { + intel->numClipRects = dPriv->numBackClipRects; + intel->pClipRects = dPriv->pBackClipRects; + intel->drawX = dPriv->backX; + intel->drawY = dPriv->backY; + + if (dPriv->numBackClipRects == 1 && + dPriv->x == dPriv->backX && + dPriv->y == dPriv->backY) { + + /* Repeat the calculation of the back cliprect dimensions here + * as early versions of dri.a in the Xserver are incorrect. Try + * very hard not to restrict future versions of dri.a which + * might eg. allocate truly private back buffers. + */ + int x1, y1; + int x2, y2; + + x1 = dPriv->x; + y1 = dPriv->y; + x2 = dPriv->x + dPriv->w; + y2 = dPriv->y + dPriv->h; + + if (x1 < 0) x1 = 0; + if (y1 < 0) y1 = 0; + if (x2 > intel->intelScreen->width) x2 = intel->intelScreen->width; + if (y2 > intel->intelScreen->height) y2 = intel->intelScreen->height; + + if (x1 == dPriv->pBackClipRects[0].x1 && + y1 == dPriv->pBackClipRects[0].y1) { + + dPriv->pBackClipRects[0].x2 = x2; + dPriv->pBackClipRects[0].y2 = y2; + } + } + } +} + + +void intelWindowMoved( struct intel_context *intel ) +{ + __DRIdrawablePrivate *dPriv = intel->driDrawable; + + if (!intel->ctx.DrawBuffer) { + intelSetFrontClipRects( intel ); + } + else { + switch (intel->ctx.DrawBuffer->_ColorDrawBufferMask[0]) { + case BUFFER_BIT_FRONT_LEFT: + intelSetFrontClipRects( intel ); + break; + case BUFFER_BIT_BACK_LEFT: + intelSetBackClipRects( intel ); + break; + default: + /* glDrawBuffer(GL_NONE or GL_FRONT_AND_BACK): software fallback */ + intelSetFrontClipRects( intel ); + } + } + + _mesa_resize_framebuffer(&intel->ctx, + (GLframebuffer*)dPriv->driverPrivate, + dPriv->w, dPriv->h); + + /* Set state we know depends on drawable parameters: + */ + { + GLcontext *ctx = &intel->ctx; + + if (ctx->Driver.Scissor) + ctx->Driver.Scissor( ctx, ctx->Scissor.X, ctx->Scissor.Y, + ctx->Scissor.Width, ctx->Scissor.Height ); + + if (ctx->Driver.DepthRange) + ctx->Driver.DepthRange( ctx, + ctx->Viewport.Near, + ctx->Viewport.Far ); + + intel->NewGLState |= _NEW_SCISSOR; + } +} + + + +/* A true meta version of this would be very simple and additionally + * machine independent. Maybe we'll get there one day. + */ +static void intelClearWithTris(struct intel_context *intel, + GLbitfield mask, + GLboolean all, + GLint cx, GLint cy, + GLint cw, GLint ch) +{ + drm_clip_rect_t clear; + + if (INTEL_DEBUG & DEBUG_DRI) + _mesa_printf("%s %x\n", __FUNCTION__, mask); + + { + + intel->vtbl.install_meta_state(intel); + + /* Refresh the cx/y/w/h values as they may have been invalidated + * by a new window position or size picked up when we did + * LOCK_HARDWARE above. The values passed by mesa are not + * reliable. + */ + { + GLcontext *ctx = &intel->ctx; + cx = ctx->DrawBuffer->_Xmin; + cy = ctx->DrawBuffer->_Ymin; + ch = ctx->DrawBuffer->_Ymax - ctx->DrawBuffer->_Ymin; + cw = ctx->DrawBuffer->_Xmax - ctx->DrawBuffer->_Xmin; + } + + clear.x1 = cx; + clear.y1 = cy; + clear.x2 = cx + cw; + clear.y2 = cy + ch; + + /* Back and stencil cliprects are the same. Try and do both + * buffers at once: + */ + if (mask & (BUFFER_BIT_BACK_LEFT|BUFFER_BIT_STENCIL|BUFFER_BIT_DEPTH)) { + intel->vtbl.meta_draw_region(intel, + intel->back_region, + intel->depth_region ); + + if (mask & BUFFER_BIT_BACK_LEFT) + intel->vtbl.meta_color_mask(intel, GL_TRUE ); + else + intel->vtbl.meta_color_mask(intel, GL_FALSE ); + + if (mask & BUFFER_BIT_STENCIL) + intel->vtbl.meta_stencil_replace( intel, + intel->ctx.Stencil.WriteMask[0], + intel->ctx.Stencil.Clear); + else + intel->vtbl.meta_no_stencil_write(intel); + + if (mask & BUFFER_BIT_DEPTH) + intel->vtbl.meta_depth_replace( intel ); + else + intel->vtbl.meta_no_depth_write(intel); + + /* XXX: Using INTEL_BATCH_NO_CLIPRECTS here is dangerous as the + * drawing origin may not be correctly emitted. + */ + intel->vtbl.meta_draw_quad(intel, + clear.x1, clear.x2, + clear.y1, clear.y2, + intel->ctx.Depth.Clear, + intel->clear_chan[0], + intel->clear_chan[1], + intel->clear_chan[2], + intel->clear_chan[3], + 0, 0, 0, 0); + } + + /* Front may have different cliprects: + */ + if (mask & BUFFER_BIT_FRONT_LEFT) { + intel->vtbl.meta_no_depth_write(intel); + intel->vtbl.meta_no_stencil_write(intel); + intel->vtbl.meta_color_mask(intel, GL_TRUE ); + intel->vtbl.meta_draw_region(intel, + intel->front_region, + intel->depth_region); + + /* XXX: Using INTEL_BATCH_NO_CLIPRECTS here is dangerous as the + * drawing origin may not be correctly emitted. + */ + intel->vtbl.meta_draw_quad(intel, + clear.x1, clear.x2, + clear.y1, clear.y2, + 0, + intel->clear_chan[0], + intel->clear_chan[1], + intel->clear_chan[2], + intel->clear_chan[3], + 0, 0, 0, 0); + } + + intel->vtbl.leave_meta_state( intel ); + } +} + + + + + +static void intelClear(GLcontext *ctx, + GLbitfield mask, + GLboolean all, + GLint cx, GLint cy, + GLint cw, GLint ch) +{ + struct intel_context *intel = intel_context( ctx ); + const GLuint colorMask = *((GLuint *) &ctx->Color.ColorMask); + GLbitfield tri_mask = 0; + GLbitfield blit_mask = 0; + GLbitfield swrast_mask = 0; + + if (INTEL_DEBUG & DEBUG_DRI) + fprintf(stderr, "%s %x all %d dims %d,%d %dx%d\n", __FUNCTION__, + mask, all, cx, cy, cw, ch); + + + if (mask & BUFFER_BIT_FRONT_LEFT) { + if (colorMask == ~0) { + blit_mask |= BUFFER_BIT_FRONT_LEFT; + } + else { + tri_mask |= BUFFER_BIT_FRONT_LEFT; + } + } + + if (mask & BUFFER_BIT_BACK_LEFT) { + if (colorMask == ~0) { + blit_mask |= BUFFER_BIT_BACK_LEFT; + } + else { + tri_mask |= BUFFER_BIT_BACK_LEFT; + } + } + + + if (mask & BUFFER_BIT_STENCIL) { + if (!intel->hw_stencil) { + swrast_mask |= BUFFER_BIT_STENCIL; + } + else if ((ctx->Stencil.WriteMask[0] & 0xff) != 0xff || + intel->depth_region->tiled) { + tri_mask |= BUFFER_BIT_STENCIL; + } + else { + blit_mask |= BUFFER_BIT_STENCIL; + } + } + + /* Do depth with stencil if possible to avoid 2nd pass over the + * same buffer. + */ + if (mask & BUFFER_BIT_DEPTH) { + if ((tri_mask & BUFFER_BIT_STENCIL) || + intel->depth_region->tiled) + tri_mask |= BUFFER_BIT_DEPTH; + else + blit_mask |= BUFFER_BIT_DEPTH; + } + + swrast_mask |= (mask & BUFFER_BIT_ACCUM); + + intelFlush( ctx ); + + if (blit_mask) + intelClearWithBlit( ctx, blit_mask, all, cx, cy, cw, ch ); + + if (tri_mask) + intelClearWithTris( intel, tri_mask, all, cx, cy, cw, ch); + + if (swrast_mask) + _swrast_Clear( ctx, swrast_mask, all, cx, cy, cw, ch ); +} + + + + + + + +/* Flip the front & back buffers + */ +static void intelPageFlip( const __DRIdrawablePrivate *dPriv ) +{ +#if 0 + struct intel_context *intel; + int tmp, ret; + + if (INTEL_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s\n", __FUNCTION__); + + assert(dPriv); + assert(dPriv->driContextPriv); + assert(dPriv->driContextPriv->driverPrivate); + + intel = (struct intel_context *) dPriv->driContextPriv->driverPrivate; + + intelFlush( &intel->ctx ); + LOCK_HARDWARE( intel ); + + if (dPriv->pClipRects) { + *(drm_clip_rect_t *)intel->sarea->boxes = dPriv->pClipRects[0]; + intel->sarea->nbox = 1; + } + + ret = drmCommandNone(intel->driFd, DRM_I830_FLIP); + if (ret) { + fprintf(stderr, "%s: %d\n", __FUNCTION__, ret); + UNLOCK_HARDWARE( intel ); + exit(1); + } + + tmp = intel->sarea->last_enqueue; + intelRefillBatchLocked( intel ); + UNLOCK_HARDWARE( intel ); + + + intelSetDrawBuffer( &intel->ctx, intel->ctx.Color.DriverDrawBuffer ); +#endif +} + + +void intelSwapBuffers( __DRIdrawablePrivate *dPriv ) +{ + if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { + struct intel_context *intel; + GLcontext *ctx; + intel = (struct intel_context *) dPriv->driContextPriv->driverPrivate; + ctx = &intel->ctx; + if (ctx->Visual.doubleBufferMode) { + _mesa_notifySwapBuffers( ctx ); /* flush pending rendering comands */ + if ( 0 /*intel->doPageFlip*/ ) { /* doPageFlip is never set !!! */ + intelPageFlip( dPriv ); + } else { + intelCopyBuffer( dPriv, NULL ); + } + if (intel->aub_file) { + intelFlush(ctx); + intel->vtbl.aub_dump_bmp( intel, 1 ); + + intel->aub_wrap = 1; + } + } + } else { + /* XXX this shouldn't be an error but we can't handle it for now */ + fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__); + } +} + +void intelCopySubBuffer( __DRIdrawablePrivate *dPriv, + int x, int y, int w, int h ) +{ + if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { + struct intel_context *intel = dPriv->driContextPriv->driverPrivate; + GLcontext *ctx = &intel->ctx; + + if (ctx->Visual.doubleBufferMode) { + drm_clip_rect_t rect; + rect.x1 = x + dPriv->x; + rect.y1 = (dPriv->h - y - h) + dPriv->y; + rect.x2 = rect.x1 + w; + rect.y2 = rect.y1 + h; + _mesa_notifySwapBuffers( ctx ); /* flush pending rendering comands */ + intelCopyBuffer( dPriv, &rect ); + } + } else { + /* XXX this shouldn't be an error but we can't handle it for now */ + fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__); + } +} + + +static void intelDrawBuffer(GLcontext *ctx, GLenum mode ) +{ + struct intel_context *intel = intel_context(ctx); + int front = 0; + + if (!ctx->DrawBuffer) + return; + + switch ( ctx->DrawBuffer->_ColorDrawBufferMask[0] ) { + case BUFFER_BIT_FRONT_LEFT: + front = 1; + FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE ); + break; + case BUFFER_BIT_BACK_LEFT: + front = 0; + FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE ); + break; + default: + FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_TRUE ); + return; + } + + if ( intel->sarea->pf_current_page == 1 ) + front ^= 1; + + intelSetFrontClipRects( intel ); + + + if (front) { + if (intel->draw_region != intel->front_region) { + intel_region_release(intel, &intel->draw_region); + intel_region_reference(&intel->draw_region, intel->front_region); + } + } else { + if (intel->draw_region != intel->back_region) { + intel_region_release(intel, &intel->draw_region); + intel_region_reference(&intel->draw_region, intel->back_region); + } + } + + intel->vtbl.set_draw_region( intel, + intel->draw_region, + intel->depth_region); +} + +static void intelReadBuffer( GLcontext *ctx, GLenum mode ) +{ + /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */ +} + + + +void intelInitBufferFuncs( struct dd_function_table *functions ) +{ + functions->Clear = intelClear; + functions->GetBufferSize = intelBufferSize; + functions->ResizeBuffers = _mesa_resize_framebuffer; + functions->DrawBuffer = intelDrawBuffer; + functions->ReadBuffer = intelReadBuffer; +} diff --git a/src/mesa/drivers/dri/i965/intel_context.c b/src/mesa/drivers/dri/i965/intel_context.c new file mode 100644 index 00000000000..59fc8073eee --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_context.c @@ -0,0 +1,656 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "glheader.h" +#include "context.h" +#include "matrix.h" +#include "simple_list.h" +#include "extensions.h" +#include "framebuffer.h" +#include "imports.h" + +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" +#include "tnl/tnl.h" +#include "array_cache/acache.h" + +#include "tnl/t_pipeline.h" +#include "tnl/t_vertex.h" + +#include "drivers/common/driverfuncs.h" + +#include "intel_screen.h" + +#include "i830_dri.h" +#include "i830_common.h" + +#include "intel_tex.h" +#include "intel_span.h" +#include "intel_ioctl.h" +#include "intel_batchbuffer.h" +#include "intel_blit.h" +#include "intel_regions.h" +#include "intel_buffer_objects.h" + +#include "bufmgr.h" + +#include "utils.h" +#ifndef INTEL_DEBUG +int INTEL_DEBUG = (0); +#endif + +#define need_GL_ARB_multisample +#define need_GL_ARB_point_parameters +#define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_buffer_object +#define need_GL_ARB_vertex_program +#define need_GL_ARB_window_pos +#define need_GL_EXT_blend_color +#define need_GL_EXT_blend_equation_separate +#define need_GL_EXT_blend_func_separate +#define need_GL_EXT_blend_minmax +#define need_GL_EXT_cull_vertex +#define need_GL_EXT_fog_coord +#define need_GL_EXT_multi_draw_arrays +#define need_GL_EXT_secondary_color +#include "extension_helper.h" + +#ifndef VERBOSE +int VERBOSE = 0; +#endif + +#if DEBUG_LOCKING +char *prevLockFile; +int prevLockLine; +#endif + +/*************************************** + * Mesa's Driver Functions + ***************************************/ + +#define DRIVER_VERSION "4.1.3002" + +static const GLubyte *intelGetString( GLcontext *ctx, GLenum name ) +{ + const char * chipset; + static char buffer[128]; + + switch (name) { + case GL_VENDOR: + return (GLubyte *)"Tungsten Graphics, Inc"; + break; + + case GL_RENDERER: + switch (intel_context(ctx)->intelScreen->deviceID) { + case PCI_CHIP_I965_Q: + chipset = "Intel(R) 965Q"; break; + break; + case PCI_CHIP_I965_G: + case PCI_CHIP_I965_G_1: + chipset = "Intel(R) 965G"; break; + break; + case PCI_CHIP_I946_GZ: + chipset = "Intel(R) 946GZ"; break; + break; + default: + chipset = "Unknown Intel Chipset"; break; + } + + (void) driGetRendererString( buffer, chipset, DRIVER_VERSION, 0 ); + return (GLubyte *) buffer; + + default: + return NULL; + } +} + + +/** + * Extension strings exported by the intel driver. + * + * \note + * It appears that ARB_texture_env_crossbar has "disappeared" compared to the + * old i830-specific driver. + */ +const struct dri_extension card_extensions[] = +{ + { "GL_ARB_multisample", GL_ARB_multisample_functions }, + { "GL_ARB_multitexture", NULL }, + { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, + { "GL_ARB_texture_border_clamp", NULL }, + { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, + { "GL_ARB_texture_cube_map", NULL }, + { "GL_ARB_texture_env_add", NULL }, + { "GL_ARB_texture_env_combine", NULL }, + { "GL_ARB_texture_env_dot3", NULL }, + { "GL_ARB_texture_mirrored_repeat", NULL }, + { "GL_ARB_texture_rectangle", NULL }, + { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, + { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions }, + { "GL_ARB_window_pos", GL_ARB_window_pos_functions }, + { "GL_EXT_blend_color", GL_EXT_blend_color_functions }, + { "GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions }, + { "GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions }, + { "GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions }, + { "GL_EXT_blend_logic_op", NULL }, + { "GL_EXT_blend_subtract", NULL }, + { "GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions }, + { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, + { "GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions }, + { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, + { "GL_EXT_stencil_wrap", NULL }, + { "GL_EXT_texture_edge_clamp", NULL }, + { "GL_EXT_texture_env_combine", NULL }, + { "GL_EXT_texture_env_dot3", NULL }, + { "GL_EXT_texture_filter_anisotropic", NULL }, + { "GL_EXT_texture_lod_bias", NULL }, + { "GL_3DFX_texture_compression_FXT1", NULL }, + { "GL_APPLE_client_storage", NULL }, + { "GL_MESA_pack_invert", NULL }, + { "GL_MESA_ycbcr_texture", NULL }, + { "GL_NV_blend_square", NULL }, + { "GL_SGIS_generate_mipmap", NULL }, + { NULL, NULL } +}; + + + +static const struct dri_debug_control debug_control[] = +{ + { "fall", DEBUG_FALLBACKS }, + { "tex", DEBUG_TEXTURE }, + { "ioctl", DEBUG_IOCTL }, + { "prim", DEBUG_PRIMS }, + { "vert", DEBUG_VERTS }, + { "state", DEBUG_STATE }, + { "verb", DEBUG_VERBOSE }, + { "dri", DEBUG_DRI }, + { "dma", DEBUG_DMA }, + { "san", DEBUG_SANITY }, + { "sync", DEBUG_SYNC }, + { "sleep", DEBUG_SLEEP }, + { "pix", DEBUG_PIXEL }, + { "buf", DEBUG_BUFMGR }, + { "stats", DEBUG_STATS }, + { "tile", DEBUG_TILE }, + { "sing", DEBUG_SINGLE_THREAD }, + { "thre", DEBUG_SINGLE_THREAD }, + { "wm", DEBUG_WM }, + { "vs", DEBUG_VS }, + { NULL, 0 } +}; + + +static void intelInvalidateState( GLcontext *ctx, GLuint new_state ) +{ + struct intel_context *intel = intel_context(ctx); + + _swrast_InvalidateState( ctx, new_state ); + _swsetup_InvalidateState( ctx, new_state ); + _ac_InvalidateState( ctx, new_state ); + _tnl_InvalidateState( ctx, new_state ); + _tnl_invalidate_vertex_state( ctx, new_state ); + + intel->NewGLState |= new_state; + + if (intel->vtbl.invalidate_state) + intel->vtbl.invalidate_state( intel, new_state ); +} + + +void intelFlush( GLcontext *ctx ) +{ + struct intel_context *intel = intel_context( ctx ); + + bmLockAndFence(intel); +} + +void intelFinish( GLcontext *ctx ) +{ + struct intel_context *intel = intel_context( ctx ); + + bmFinishFence(intel, bmLockAndFence(intel)); +} + + +void intelInitDriverFunctions( struct dd_function_table *functions ) +{ + _mesa_init_driver_functions( functions ); + + functions->Flush = intelFlush; + functions->Finish = intelFinish; + functions->GetString = intelGetString; + functions->UpdateState = intelInvalidateState; + functions->CopyColorTable = _swrast_CopyColorTable; + functions->CopyColorSubTable = _swrast_CopyColorSubTable; + functions->CopyConvolutionFilter1D = _swrast_CopyConvolutionFilter1D; + functions->CopyConvolutionFilter2D = _swrast_CopyConvolutionFilter2D; + + /* Pixel path fallbacks. + */ + functions->Accum = _swrast_Accum; + functions->Bitmap = _swrast_Bitmap; + functions->CopyPixels = _swrast_CopyPixels; + functions->ReadPixels = _swrast_ReadPixels; + functions->DrawPixels = _swrast_DrawPixels; + + intelInitTextureFuncs( functions ); + intelInitStateFuncs( functions ); + intelInitBufferFuncs( functions ); +} + + + +GLboolean intelInitContext( struct intel_context *intel, + const __GLcontextModes *mesaVis, + __DRIcontextPrivate *driContextPriv, + void *sharedContextPrivate, + struct dd_function_table *functions ) +{ + GLcontext *ctx = &intel->ctx; + GLcontext *shareCtx = (GLcontext *) sharedContextPrivate; + __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; + intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private; + volatile drmI830Sarea *saPriv = (volatile drmI830Sarea *) + (((GLubyte *)sPriv->pSAREA)+intelScreen->sarea_priv_offset); + + if (!_mesa_initialize_context(&intel->ctx, + mesaVis, shareCtx, + functions, + (void*) intel)) { + _mesa_printf("%s: failed to init mesa context\n", __FUNCTION__); + return GL_FALSE; + } + + driContextPriv->driverPrivate = intel; + intel->intelScreen = intelScreen; + intel->driScreen = sPriv; + intel->sarea = saPriv; + + + ctx->Const.MaxTextureMaxAnisotropy = 2.0; + + if (getenv("INTEL_STRICT_CONFORMANCE")) { + intel->strict_conformance = 1; + } + + if (intel->strict_conformance) { + ctx->Const.MinLineWidth = 1.0; + ctx->Const.MinLineWidthAA = 1.0; + ctx->Const.MaxLineWidth = 1.0; + ctx->Const.MaxLineWidthAA = 1.0; + ctx->Const.LineWidthGranularity = 1.0; + } + else { + ctx->Const.MinLineWidth = 1.0; + ctx->Const.MinLineWidthAA = 1.0; + ctx->Const.MaxLineWidth = 5.0; + ctx->Const.MaxLineWidthAA = 5.0; + ctx->Const.LineWidthGranularity = 0.5; + } + + ctx->Const.MinPointSize = 1.0; + ctx->Const.MinPointSizeAA = 1.0; + ctx->Const.MaxPointSize = 255.0; + ctx->Const.MaxPointSizeAA = 3.0; + ctx->Const.PointSizeGranularity = 1.0; + + /* Initialize the software rasterizer and helper modules. */ + _swrast_CreateContext( ctx ); + _ac_CreateContext( ctx ); + _tnl_CreateContext( ctx ); + _swsetup_CreateContext( ctx ); + + TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; + + /* Configure swrast to match hardware characteristics: */ + _swrast_allow_pixel_fog( ctx, GL_FALSE ); + _swrast_allow_vertex_fog( ctx, GL_TRUE ); + + /* Dri stuff */ + intel->hHWContext = driContextPriv->hHWContext; + intel->driFd = sPriv->fd; + intel->driHwLock = (drmLock *) &sPriv->pSAREA->lock; + + intel->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24; + intel->hw_stipple = 1; + + switch(mesaVis->depthBits) { + case 0: /* what to do in this case? */ + case 16: + intel->depth_scale = 1.0/0xffff; + intel->polygon_offset_scale = 1.0/0xffff; + intel->depth_clear_mask = ~0; + intel->ClearDepth = 0xffff; + break; + case 24: + intel->depth_scale = 1.0/0xffffff; + intel->polygon_offset_scale = 2.0/0xffffff; /* req'd to pass glean */ + intel->depth_clear_mask = 0x00ffffff; + intel->stencil_clear_mask = 0xff000000; + intel->ClearDepth = 0x00ffffff; + break; + default: + assert(0); + break; + } + + /* Initialize swrast, tnl driver tables: */ + intelInitSpanFuncs( ctx ); + + intel->no_hw = getenv("INTEL_NO_HW") != NULL; + + if (!intel->intelScreen->irq_active) { + _mesa_printf("IRQs not active. Exiting\n"); + exit(1); + } + + _math_matrix_ctr (&intel->ViewportMatrix); + + driInitExtensions( ctx, card_extensions, + GL_TRUE ); + + INTEL_DEBUG = driParseDebugString( getenv( "INTEL_DEBUG" ), + debug_control ); + + + /* Buffer manager: + */ + intel->bm = bm_fake_intel_Attach( intel ); + + + bmInitPool(intel, + intel->intelScreen->tex.offset, /* low offset */ + intel->intelScreen->tex.map, /* low virtual */ + intel->intelScreen->tex.size, + BM_MEM_AGP); + + /* These are still static, but create regions for them. + */ + intel->front_region = + intel_region_create_static(intel, + BM_MEM_AGP, + intelScreen->front.offset, + intelScreen->front.map, + intelScreen->cpp, + intelScreen->front.pitch / intelScreen->cpp, + intelScreen->height, + GL_FALSE); + + + intel->back_region = + intel_region_create_static(intel, + BM_MEM_AGP, + intelScreen->back.offset, + intelScreen->back.map, + intelScreen->cpp, + intelScreen->back.pitch / intelScreen->cpp, + intelScreen->height, + (INTEL_DEBUG & DEBUG_TILE) ? 0 : 1); + + /* Still assuming front.cpp == depth.cpp + * + * XXX: Setting tiling to false because Depth tiling only supports + * YMAJOR but the blitter only supports XMAJOR tiling. Have to + * resolve later. + */ + intel->depth_region = + intel_region_create_static(intel, + BM_MEM_AGP, + intelScreen->depth.offset, + intelScreen->depth.map, + intelScreen->cpp, + intelScreen->depth.pitch / intelScreen->cpp, + intelScreen->height, + (INTEL_DEBUG & DEBUG_TILE) ? 0 : 1); + + intel_bufferobj_init( intel ); + intel->batch = intel_batchbuffer_alloc( intel ); + + if (intel->ctx.Mesa_DXTn) { + _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); + _mesa_enable_extension( ctx, "GL_S3_s3tc" ); + } + else if (driQueryOptionb (&intelScreen->optionCache, "force_s3tc_enable")) { + _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); + } + +/* driInitTextureObjects( ctx, & intel->swapped, */ +/* DRI_TEXMGR_DO_TEXTURE_1D | */ +/* DRI_TEXMGR_DO_TEXTURE_2D | */ +/* DRI_TEXMGR_DO_TEXTURE_RECT ); */ + + + intel->prim.primitive = ~0; + + if (getenv("INTEL_NO_RAST")) { + fprintf(stderr, "disabling 3D rasterization\n"); + intel->no_rast = 1; + } + + + return GL_TRUE; +} + +void intelDestroyContext(__DRIcontextPrivate *driContextPriv) +{ + struct intel_context *intel = (struct intel_context *) driContextPriv->driverPrivate; + + assert(intel); /* should never be null */ + if (intel) { + GLboolean release_texture_heaps; + + + intel->vtbl.destroy( intel ); + + release_texture_heaps = (intel->ctx.Shared->RefCount == 1); + _swsetup_DestroyContext (&intel->ctx); + _tnl_DestroyContext (&intel->ctx); + _ac_DestroyContext (&intel->ctx); + + _swrast_DestroyContext (&intel->ctx); + intel->Fallback = 0; /* don't call _swrast_Flush later */ + intel_batchbuffer_free(intel->batch); + intel->batch = NULL; + + + if ( release_texture_heaps ) { + /* This share group is about to go away, free our private + * texture object data. + */ + + /* XXX: destroy the shared bufmgr struct here? + */ + } + + /* Free the regions created to describe front/back/depth + * buffers: + */ +#if 0 + intel_region_release(intel, &intel->front_region); + intel_region_release(intel, &intel->back_region); + intel_region_release(intel, &intel->depth_region); + intel_region_release(intel, &intel->draw_region); +#endif + + /* free the Mesa context */ + _mesa_destroy_context(&intel->ctx); + } + + driContextPriv->driverPrivate = NULL; +} + +GLboolean intelUnbindContext(__DRIcontextPrivate *driContextPriv) +{ + return GL_TRUE; +} + +GLboolean intelMakeCurrent(__DRIcontextPrivate *driContextPriv, + __DRIdrawablePrivate *driDrawPriv, + __DRIdrawablePrivate *driReadPriv) +{ + + if (driContextPriv) { + struct intel_context *intel = (struct intel_context *) driContextPriv->driverPrivate; + + if ( intel->driDrawable != driDrawPriv ) { + /* Shouldn't the readbuffer be stored also? */ + intel->driDrawable = driDrawPriv; + intelWindowMoved( intel ); + } + + _mesa_make_current(&intel->ctx, + (GLframebuffer *) driDrawPriv->driverPrivate, + (GLframebuffer *) driReadPriv->driverPrivate); + + intel->ctx.Driver.DrawBuffer( &intel->ctx, intel->ctx.Color.DrawBuffer[0] ); + } else { + _mesa_make_current(NULL, NULL, NULL); + } + + return GL_TRUE; +} + + +static void lost_hardware( struct intel_context *intel ) +{ + bm_fake_NotifyContendedLockTake( intel ); + intel->vtbl.lost_hardware( intel ); +} + +static void intelContendedLock( struct intel_context *intel, GLuint flags ) +{ + __DRIdrawablePrivate *dPriv = intel->driDrawable; + __DRIscreenPrivate *sPriv = intel->driScreen; + volatile drmI830Sarea * sarea = intel->sarea; + int me = intel->hHWContext; + + drmGetLock(intel->driFd, intel->hHWContext, flags); + + /* If the window moved, may need to set a new cliprect now. + * + * NOTE: This releases and regains the hw lock, so all state + * checking must be done *after* this call: + */ + if (dPriv) + DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv); + + + intel->locked = 1; + + /* Lost context? + */ + if (sarea->ctxOwner != me) { + sarea->ctxOwner = me; + lost_hardware(intel); + } + + /* Drawable changed? + */ + if (dPriv && intel->lastStamp != dPriv->lastStamp) { + intelWindowMoved( intel ); + intel->lastStamp = dPriv->lastStamp; + + /* This works because the lock is always grabbed before emitting + * commands and commands are always flushed prior to releasing + * the lock. + */ + intel->NewGLState |= _NEW_WINDOW_POS; + } +} + +_glthread_DECLARE_STATIC_MUTEX(lockMutex); + +/* Lock the hardware and validate our state. + */ +void LOCK_HARDWARE( struct intel_context *intel ) +{ + char __ret=0; + + _glthread_LOCK_MUTEX(lockMutex); + assert(!intel->locked); + + + DRM_CAS(intel->driHwLock, intel->hHWContext, + (DRM_LOCK_HELD|intel->hHWContext), __ret); + if (__ret) + intelContendedLock( intel, 0 ); + + intel->locked = 1; + + if (intel->aub_wrap) { + bm_fake_NotifyContendedLockTake( intel ); + intel->vtbl.lost_hardware( intel ); + intel->vtbl.aub_wrap(intel); + intel->aub_wrap = 0; + } + + if (bmError(intel)) { + bmEvictAll(intel); + intel->vtbl.lost_hardware( intel ); + } + + /* Make sure nothing has been emitted prior to getting the lock: + */ + assert(intel->batch->map == 0); + + /* XXX: postpone, may not be needed: + */ + if (!intel_batchbuffer_map(intel->batch)) { + bmEvictAll(intel); + intel->vtbl.lost_hardware( intel ); + + /* This could only fail if the batchbuffer was greater in size + * than the available texture memory: + */ + if (!intel_batchbuffer_map(intel->batch)) { + _mesa_printf("double failure to map batchbuffer\n"); + assert(0); + } + } +} + + +/* Unlock the hardware using the global current context + */ +void UNLOCK_HARDWARE( struct intel_context *intel ) +{ + /* Make sure everything has been released: + */ + assert(intel->batch->ptr == intel->batch->map + intel->batch->offset); + + intel_batchbuffer_unmap(intel->batch); + intel->vtbl.note_unlock( intel ); + intel->locked = 0; + + + + DRM_UNLOCK(intel->driFd, intel->driHwLock, intel->hHWContext); + _glthread_UNLOCK_MUTEX(lockMutex); +} + diff --git a/src/mesa/drivers/dri/i965/intel_context.h b/src/mesa/drivers/dri/i965/intel_context.h new file mode 100644 index 00000000000..0328cb900a8 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_context.h @@ -0,0 +1,524 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTELCONTEXT_INC +#define INTELCONTEXT_INC + + + +#include "mtypes.h" +#include "drm.h" +#include "texmem.h" + +#include "intel_screen.h" +#include "i830_common.h" +#include "tnl/t_vertex.h" + +#define TAG(x) intel##x +#include "tnl_dd/t_dd_vertex.h" +#undef TAG + +#define DV_PF_555 (1<<8) +#define DV_PF_565 (2<<8) +#define DV_PF_8888 (3<<8) + +struct intel_region; +struct intel_context; + +typedef void (*intel_tri_func)(struct intel_context *, intelVertex *, intelVertex *, + intelVertex *); +typedef void (*intel_line_func)(struct intel_context *, intelVertex *, intelVertex *); +typedef void (*intel_point_func)(struct intel_context *, intelVertex *); + +#define INTEL_FALLBACK_DRAW_BUFFER 0x1 +#define INTEL_FALLBACK_READ_BUFFER 0x2 +#define INTEL_FALLBACK_USER 0x4 +#define INTEL_FALLBACK_RENDERMODE 0x8 +#define INTEL_FALLBACK_TEXTURE 0x10 + +extern void intelFallback( struct intel_context *intel, GLuint bit, GLboolean mode ); +#define FALLBACK( intel, bit, mode ) intelFallback( intel, bit, mode ) + + + +struct intel_texture_object +{ + struct gl_texture_object base; /* The "parent" object */ + + /* The mipmap tree must include at least these levels once + * validated: + */ + GLuint firstLevel; + GLuint lastLevel; + + GLuint dirty_images[6]; + GLuint dirty; + + /* On validation any active images held in main memory or in other + * regions will be copied to this region and the old storage freed. + */ + struct intel_mipmap_tree *mt; +}; + + + + +struct intel_context +{ + GLcontext ctx; /* the parent class */ + + struct { + void (*destroy)( struct intel_context *intel ); + void (*emit_state)( struct intel_context *intel ); + void (*emit_invarient_state)( struct intel_context *intel ); + void (*lost_hardware)( struct intel_context *intel ); + void (*note_fence)( struct intel_context *intel, GLuint fence ); + void (*note_unlock)( struct intel_context *intel ); + void (*update_texture_state)( struct intel_context *intel ); + + void (*render_start)( struct intel_context *intel ); + void (*set_draw_region)( struct intel_context *intel, + struct intel_region *draw_region, + struct intel_region *depth_region ); + + GLuint (*flush_cmd)( void ); + + void (*emit_flush)( struct intel_context *intel, + GLuint unused ); + + void (*aub_commands)( struct intel_context *intel, + GLuint offset, + const void *buf, + GLuint sz ); + void (*aub_dump_bmp)( struct intel_context *intel, GLuint buffer ); + void (*aub_wrap)( struct intel_context *intel ); + void (*aub_gtt_data)( struct intel_context *intel, + GLuint offset, + const void *src, + GLuint size, + GLuint aubtype, + GLuint aubsubtype); + + + void (*reduced_primitive_state)( struct intel_context *intel, GLenum rprim ); + + GLboolean (*check_vertex_size)( struct intel_context *intel, GLuint expected ); + + void (*invalidate_state)( struct intel_context *intel, GLuint new_state ); + + /* Metaops: + */ + void (*install_meta_state)( struct intel_context *intel ); + void (*leave_meta_state)( struct intel_context *intel ); + + void (*meta_draw_region)( struct intel_context *intel, + struct intel_region *draw_region, + struct intel_region *depth_region ); + + void (*meta_color_mask)( struct intel_context *intel, + GLboolean ); + + void (*meta_stencil_replace)( struct intel_context *intel, + GLuint mask, + GLuint clear ); + + void (*meta_depth_replace)( struct intel_context *intel ); + + void (*meta_no_stencil_write)( struct intel_context *intel ); + void (*meta_no_depth_write)( struct intel_context *intel ); + void (*meta_no_texture)( struct intel_context *intel ); + + void (*meta_draw_quad)(struct intel_context *intel, + GLfloat x0, GLfloat x1, + GLfloat y0, GLfloat y1, + GLfloat z, + GLubyte red, GLubyte green, + GLubyte blue, GLubyte alpha, + GLfloat s0, GLfloat s1, + GLfloat t0, GLfloat t1); + + + + } vtbl; + + GLint refcount; + GLuint Fallback; + GLuint NewGLState; + + GLuint last_swap_fence; + GLuint second_last_swap_fence; + + GLboolean aub_wrap; + + struct intel_batchbuffer *batch; + + struct { + GLuint id; + GLuint primitive; + GLubyte *start_ptr; + void (*flush)( struct intel_context * ); + } prim; + + GLboolean locked; + GLboolean strict_conformance; + + GLubyte clear_chan[4]; + GLuint ClearColor; + GLuint ClearDepth; + + GLfloat depth_scale; + GLfloat polygon_offset_scale; /* dependent on depth_scale, bpp */ + GLuint depth_clear_mask; + GLuint stencil_clear_mask; + + GLboolean hw_stencil; + GLboolean hw_stipple; + GLboolean depth_buffer_is_float; + GLboolean no_hw; + GLboolean no_rast; + GLboolean thrashing; + + + /* AGP memory buffer manager: + */ + struct bufmgr *bm; + + + /* State for intelvb.c and inteltris.c. + */ + GLuint RenderIndex; + GLmatrix ViewportMatrix; + GLenum render_primitive; + GLenum reduced_primitive; + GLuint vertex_size; + GLubyte *verts; /* points to tnl->clipspace.vertex_buf */ + + + struct intel_region *front_region; + struct intel_region *back_region; + struct intel_region *draw_region; + struct intel_region *depth_region; + + + /* Fallback rasterization functions + */ + intel_point_func draw_point; + intel_line_func draw_line; + intel_tri_func draw_tri; + + /* These refer to the current draw (front vs. back) buffer: + */ + int drawX; /* origin of drawable in draw buffer */ + int drawY; + GLuint numClipRects; /* cliprects for that buffer */ + drm_clip_rect_t *pClipRects; + + GLboolean scissor; + drm_clip_rect_t draw_rect; + drm_clip_rect_t scissor_rect; + + drm_context_t hHWContext; + drmLock *driHwLock; + int driFd; + + __DRIdrawablePrivate *driDrawable; + __DRIscreenPrivate *driScreen; + intelScreenPrivate *intelScreen; + volatile drmI830Sarea *sarea; + + FILE *aub_file; + + GLuint lastStamp; + + /** + * Configuration cache + */ + driOptionCache optionCache; + + /* VBI + */ + GLuint vbl_seq; + GLuint vblank_flags; + + int64_t swap_ust; + int64_t swap_missed_ust; + + GLuint swap_count; + GLuint swap_missed_count; +}; + +/* These are functions now: + */ +void LOCK_HARDWARE( struct intel_context *intel ); +void UNLOCK_HARDWARE( struct intel_context *intel ); + + +#define SUBPIXEL_X 0.125 +#define SUBPIXEL_Y 0.125 + +/* ================================================================ + * Color packing: + */ + +#define INTEL_PACKCOLOR4444(r,g,b,a) \ + ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4)) + +#define INTEL_PACKCOLOR1555(r,g,b,a) \ + ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \ + ((a) ? 0x8000 : 0)) + +#define INTEL_PACKCOLOR565(r,g,b) \ + ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3)) + +#define INTEL_PACKCOLOR8888(r,g,b,a) \ + ((a<<24) | (r<<16) | (g<<8) | b) + + +#define INTEL_PACKCOLOR(format, r, g, b, a) \ +(format == DV_PF_555 ? INTEL_PACKCOLOR1555(r,g,b,a) : \ + (format == DV_PF_565 ? INTEL_PACKCOLOR565(r,g,b) : \ + (format == DV_PF_8888 ? INTEL_PACKCOLOR8888(r,g,b,a) : \ + 0))) + + + +/* ================================================================ + * From linux kernel i386 header files, copes with odd sizes better + * than COPY_DWORDS would: + */ +#if defined(i386) || defined(__i386__) +static inline void * __memcpy(void * to, const void * from, size_t n) +{ + int d0, d1, d2; + __asm__ __volatile__( + "rep ; movsl\n\t" + "testb $2,%b4\n\t" + "je 1f\n\t" + "movsw\n" + "1:\ttestb $1,%b4\n\t" + "je 2f\n\t" + "movsb\n" + "2:" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from) + : "memory"); + return (to); +} +#else +#define __memcpy(a,b,c) memcpy(a,b,c) +#endif + + +/* The system memcpy (at least on ubuntu 5.10) has problems copying + * to agp (writecombined) memory from a source which isn't 64-byte + * aligned - there is a 4x performance falloff. + * + * The x86 __memcpy is immune to this but is slightly slower + * (10%-ish) than the system memcpy. + * + * The sse_memcpy seems to have a slight cliff at 64/32 bytes, but + * isn't much faster than x86_memcpy for agp copies. + * + * TODO: switch dynamically. + */ +static inline void *do_memcpy( void *dest, const void *src, size_t n ) +{ + if ( (((unsigned)src) & 63) || + (((unsigned)dest) & 63)) { + return __memcpy(dest, src, n); + } + else + return memcpy(dest, src, n); +} + + + + + +/* ================================================================ + * Debugging: + */ +extern int INTEL_DEBUG; + +#define DEBUG_TEXTURE 0x1 +#define DEBUG_STATE 0x2 +#define DEBUG_IOCTL 0x4 +#define DEBUG_PRIMS 0x8 +#define DEBUG_VERTS 0x10 +#define DEBUG_FALLBACKS 0x20 +#define DEBUG_VERBOSE 0x40 +#define DEBUG_DRI 0x80 +#define DEBUG_DMA 0x100 +#define DEBUG_SANITY 0x200 +#define DEBUG_SYNC 0x400 +#define DEBUG_SLEEP 0x800 +#define DEBUG_PIXEL 0x1000 +#define DEBUG_STATS 0x2000 +#define DEBUG_TILE 0x4000 +#define DEBUG_SINGLE_THREAD 0x8000 +#define DEBUG_WM 0x10000 +#define DEBUG_URB 0x20000 +#define DEBUG_VS 0x40000 + + +#define PCI_CHIP_845_G 0x2562 +#define PCI_CHIP_I830_M 0x3577 +#define PCI_CHIP_I855_GM 0x3582 +#define PCI_CHIP_I865_G 0x2572 +#define PCI_CHIP_I915_G 0x2582 +#define PCI_CHIP_I915_GM 0x2592 +#define PCI_CHIP_I945_G 0x2772 +#define PCI_CHIP_I965_G 0x29A2 +#define PCI_CHIP_I965_Q 0x2992 +#define PCI_CHIP_I965_G_1 0x2982 +#define PCI_CHIP_I946_GZ 0x2972 + + +/* ================================================================ + * intel_context.c: + */ + +extern GLboolean intelInitContext( struct intel_context *intel, + const __GLcontextModes *mesaVis, + __DRIcontextPrivate *driContextPriv, + void *sharedContextPrivate, + struct dd_function_table *functions ); + +extern void intelGetLock(struct intel_context *intel, GLuint flags); + +extern void intelInitState( GLcontext *ctx ); +extern void intelFinish( GLcontext *ctx ); +extern void intelFlush( GLcontext *ctx ); + +extern void intelInitDriverFunctions( struct dd_function_table *functions ); + + +/* ================================================================ + * intel_state.c: + */ +extern void intelInitStateFuncs( struct dd_function_table *functions ); + +#define COMPAREFUNC_ALWAYS 0 +#define COMPAREFUNC_NEVER 0x1 +#define COMPAREFUNC_LESS 0x2 +#define COMPAREFUNC_EQUAL 0x3 +#define COMPAREFUNC_LEQUAL 0x4 +#define COMPAREFUNC_GREATER 0x5 +#define COMPAREFUNC_NOTEQUAL 0x6 +#define COMPAREFUNC_GEQUAL 0x7 + +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 0x1 +#define STENCILOP_REPLACE 0x2 +#define STENCILOP_INCRSAT 0x3 +#define STENCILOP_DECRSAT 0x4 +#define STENCILOP_INCR 0x5 +#define STENCILOP_DECR 0x6 +#define STENCILOP_INVERT 0x7 + +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 0x1 +#define LOGICOP_AND_INV 0x2 +#define LOGICOP_COPY_INV 0x3 +#define LOGICOP_AND_RVRSE 0x4 +#define LOGICOP_INV 0x5 +#define LOGICOP_XOR 0x6 +#define LOGICOP_NAND 0x7 +#define LOGICOP_AND 0x8 +#define LOGICOP_EQUIV 0x9 +#define LOGICOP_NOOP 0xa +#define LOGICOP_OR_INV 0xb +#define LOGICOP_COPY 0xc +#define LOGICOP_OR_RVRSE 0xd +#define LOGICOP_OR 0xe +#define LOGICOP_SET 0xf + +#define BLENDFACT_ZERO 0x01 +#define BLENDFACT_ONE 0x02 +#define BLENDFACT_SRC_COLR 0x03 +#define BLENDFACT_INV_SRC_COLR 0x04 +#define BLENDFACT_SRC_ALPHA 0x05 +#define BLENDFACT_INV_SRC_ALPHA 0x06 +#define BLENDFACT_DST_ALPHA 0x07 +#define BLENDFACT_INV_DST_ALPHA 0x08 +#define BLENDFACT_DST_COLR 0x09 +#define BLENDFACT_INV_DST_COLR 0x0a +#define BLENDFACT_SRC_ALPHA_SATURATE 0x0b +#define BLENDFACT_CONST_COLOR 0x0c +#define BLENDFACT_INV_CONST_COLOR 0x0d +#define BLENDFACT_CONST_ALPHA 0x0e +#define BLENDFACT_INV_CONST_ALPHA 0x0f +#define BLENDFACT_MASK 0x0f + + +extern int intel_translate_compare_func( GLenum func ); +extern int intel_translate_stencil_op( GLenum op ); +extern int intel_translate_blend_factor( GLenum factor ); +extern int intel_translate_logic_op( GLenum opcode ); + + +/* ================================================================ + * intel_buffers.c: + */ +void intelInitBufferFuncs( struct dd_function_table *functions ); + +struct intel_region *intel_readbuf_region( struct intel_context *intel ); +struct intel_region *intel_drawbuf_region( struct intel_context *intel ); + +extern void intelWindowMoved( struct intel_context *intel ); + +extern GLboolean intel_intersect_cliprects( drm_clip_rect_t *dest, + const drm_clip_rect_t *a, + const drm_clip_rect_t *b ); + + + +#define _NEW_WINDOW_POS 0x40000000 + + +/*====================================================================== + * Inline conversion functions. + * These are better-typed than the macros used previously: + */ +static inline struct intel_context *intel_context( GLcontext *ctx ) +{ + return (struct intel_context *)ctx; +} + +static inline struct intel_texture_object *intel_texture_object( struct gl_texture_object *obj ) +{ + return (struct intel_texture_object *)obj; +} + +static inline struct intel_texture_image *intel_texture_image( struct gl_texture_image *img ) +{ + return (struct intel_texture_image *)img; +} + + +#endif + diff --git a/src/mesa/drivers/dri/i965/intel_ioctl.c b/src/mesa/drivers/dri/i965/intel_ioctl.c new file mode 100644 index 00000000000..9297543f82b --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_ioctl.c @@ -0,0 +1,182 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <sched.h> + +#include "mtypes.h" +#include "context.h" +#include "swrast/swrast.h" + +#include "intel_context.h" +#include "intel_ioctl.h" +#include "intel_batchbuffer.h" +#include "intel_blit.h" +#include "intel_regions.h" +#include "drm.h" +#include "bufmgr.h" + + +int intelEmitIrqLocked( struct intel_context *intel ) +{ + int seq = 1; + + if (!intel->no_hw) { + drmI830IrqEmit ie; + int ret; + + assert(((*(int *)intel->driHwLock) & ~DRM_LOCK_CONT) == + (DRM_LOCK_HELD|intel->hHWContext)); + + ie.irq_seq = &seq; + + ret = drmCommandWriteRead( intel->driFd, DRM_I830_IRQ_EMIT, + &ie, sizeof(ie) ); + if ( ret ) { + fprintf( stderr, "%s: drmI830IrqEmit: %d\n", __FUNCTION__, ret ); + exit(1); + } + + if (0) + fprintf(stderr, "%s --> %d\n", __FUNCTION__, seq ); + } + + return seq; +} + +void intelWaitIrq( struct intel_context *intel, int seq ) +{ + if (!intel->no_hw) { + drmI830IrqWait iw; + int ret; + + if (0) + fprintf(stderr, "%s %d\n", __FUNCTION__, seq ); + + iw.irq_seq = seq; + + do { + ret = drmCommandWrite( intel->driFd, DRM_I830_IRQ_WAIT, &iw, sizeof(iw) ); + + /* This seems quite often to return before it should!?! + */ + } while (ret == -EAGAIN || ret == -EINTR || (ret == 0 && seq > intel->sarea->last_dispatch)); + + + if ( ret ) { + fprintf( stderr, "%s: drmI830IrqWait: %d\n", __FUNCTION__, ret ); + + if (intel->aub_file) { + intel->vtbl.aub_dump_bmp( intel, intel->ctx.Visual.doubleBufferMode ? 1 : 0 ); + } + + exit(1); + } + } +} + + +void intel_batch_ioctl( struct intel_context *intel, + GLuint start_offset, + GLuint used, + GLboolean ignore_cliprects) +{ + drmI830BatchBuffer batch; + + assert(intel->locked); + assert(used); + + if (0) + fprintf(stderr, "%s used %d offset %x..%x ignore_cliprects %d\n", + __FUNCTION__, + used, + start_offset, + start_offset + used, + ignore_cliprects); + + batch.start = start_offset; + batch.used = used; + batch.cliprects = intel->pClipRects; + batch.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects; + batch.DR1 = 0; + batch.DR4 = ((((GLuint)intel->drawX) & 0xffff) | + (((GLuint)intel->drawY) << 16)); + + if (INTEL_DEBUG & DEBUG_DMA) + fprintf(stderr, "%s: 0x%x..0x%x DR4: %x cliprects: %d\n", + __FUNCTION__, + batch.start, + batch.start + batch.used * 4, + batch.DR4, batch.num_cliprects); + + if (!intel->no_hw) { + if (drmCommandWrite (intel->driFd, DRM_I830_BATCHBUFFER, &batch, + sizeof(batch))) { + fprintf(stderr, "DRM_I830_BATCHBUFFER: %d\n", -errno); + UNLOCK_HARDWARE(intel); + exit(1); + } + } +} + +void intel_cmd_ioctl( struct intel_context *intel, + char *buf, + GLuint used, + GLboolean ignore_cliprects) +{ + drmI830CmdBuffer cmd; + + assert(intel->locked); + assert(used); + + cmd.buf = buf; + cmd.sz = used; + cmd.cliprects = intel->pClipRects; + cmd.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects; + cmd.DR1 = 0; + cmd.DR4 = ((((GLuint)intel->drawX) & 0xffff) | + (((GLuint)intel->drawY) << 16)); + + if (INTEL_DEBUG & DEBUG_DMA) + fprintf(stderr, "%s: 0x%x..0x%x DR4: %x cliprects: %d\n", + __FUNCTION__, + 0, + 0 + cmd.sz, + cmd.DR4, cmd.num_cliprects); + + if (!intel->no_hw) { + if (drmCommandWrite (intel->driFd, DRM_I830_CMDBUFFER, &cmd, + sizeof(cmd))) { + fprintf(stderr, "DRM_I830_CMDBUFFER: %d\n", -errno); + UNLOCK_HARDWARE(intel); + exit(1); + } + } +} diff --git a/src/mesa/drivers/dri/i965/intel_ioctl.h b/src/mesa/drivers/dri/i965/intel_ioctl.h new file mode 100644 index 00000000000..dcebcb06d1d --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_ioctl.h @@ -0,0 +1,46 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_IOCTL_H +#define INTEL_IOCTL_H + +#include "intel_context.h" + +void intelWaitIrq( struct intel_context *intel, int seq ); +int intelEmitIrqLocked( struct intel_context *intel ); + +void intel_batch_ioctl( struct intel_context *intel, + GLuint start_offset, + GLuint used, + GLboolean ignore_cliprects); + +void intel_cmd_ioctl( struct intel_context *intel, + char *buf, + GLuint used, + GLboolean ignore_cliprects); + +#endif diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c new file mode 100644 index 00000000000..8486086b274 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -0,0 +1,247 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "intel_context.h" +#include "intel_mipmap_tree.h" +#include "intel_regions.h" +#include "bufmgr.h" +#include "enums.h" +#include "imports.h" + +static GLenum target_to_target( GLenum target ) +{ + switch (target) { + case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: + return GL_TEXTURE_CUBE_MAP_ARB; + default: + return target; + } +} + +struct intel_mipmap_tree *intel_miptree_create( struct intel_context *intel, + GLenum target, + GLenum internal_format, + GLuint first_level, + GLuint last_level, + GLuint width0, + GLuint height0, + GLuint depth0, + GLuint cpp, + GLboolean compressed) +{ + GLboolean ok; + struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); + + if (INTEL_DEBUG & DEBUG_TEXTURE) + _mesa_printf("%s target %s format %s level %d..%d\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(target), + _mesa_lookup_enum_by_nr(internal_format), + first_level, + last_level); + + mt->target = target_to_target(target); + mt->internal_format = internal_format; + mt->first_level = first_level; + mt->last_level = last_level; + mt->width0 = width0; + mt->height0 = height0; + mt->depth0 = depth0; + mt->cpp = compressed ? 2 : cpp; + mt->compressed = compressed; + + switch (intel->intelScreen->deviceID) { +#if 0 + case PCI_CHIP_I945_G: + ok = i945_miptree_layout( mt ); + break; + case PCI_CHIP_I915_G: + case PCI_CHIP_I915_GM: + ok = i915_miptree_layout( mt ); + break; +#endif + default: + if (INTEL_DEBUG & DEBUG_TEXTURE) + _mesa_printf("assuming BRW texture layouts\n"); + ok = brw_miptree_layout( mt ); + break; + } + + if (ok) + mt->region = intel_region_alloc( intel, + mt->cpp, + mt->pitch, + mt->total_height ); + + if (!mt->region) { + free(mt); + return NULL; + } + + return mt; +} + + + +void intel_miptree_destroy( struct intel_context *intel, + struct intel_mipmap_tree *mt ) +{ + if (mt) { + GLuint i; + + intel_region_release(intel, &(mt->region)); + + for (i = 0; i < MAX_TEXTURE_LEVELS; i++) + if (mt->level[i].image_offset) + free(mt->level[i].image_offset); + + free(mt); + } +} + + + + +void intel_miptree_set_level_info(struct intel_mipmap_tree *mt, + GLuint level, + GLuint nr_images, + GLuint x, GLuint y, + GLuint w, GLuint h, GLuint d) +{ + mt->level[level].width = w; + mt->level[level].height = h; + mt->level[level].depth = d; + mt->level[level].level_offset = (x + y * mt->pitch) * mt->cpp; + mt->level[level].nr_images = nr_images; + + if (INTEL_DEBUG & DEBUG_TEXTURE) + _mesa_printf("%s level %d img size: %d,%d level_offset 0x%x\n", __FUNCTION__, level, w, h, + mt->level[level].level_offset); + + /* Not sure when this would happen, but anyway: + */ + if (mt->level[level].image_offset) { + free(mt->level[level].image_offset); + mt->level[level].image_offset = NULL; + } + + if (nr_images > 1) { + mt->level[level].image_offset = malloc(nr_images * sizeof(GLuint)); + mt->level[level].image_offset[0] = 0; + } +} + + + +void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt, + GLuint level, + GLuint img, + GLuint x, GLuint y) +{ + if (INTEL_DEBUG & DEBUG_TEXTURE) + _mesa_printf("%s level %d img %d pos %d,%d\n", __FUNCTION__, level, img, x, y); + + if (img == 0) + assert(x == 0 && y == 0); + + if (img > 0) + mt->level[level].image_offset[img] = (x + y * mt->pitch) * mt->cpp; +} + + +/* Although we use the image_offset[] array to store relative offsets + * to cube faces, Mesa doesn't know anything about this and expects + * each cube face to be treated as a separate image. + * + * These functions present that view to mesa: + */ +const GLuint *intel_miptree_depth_offsets(struct intel_mipmap_tree *mt, + GLuint level) +{ + static const GLuint zero = 0; + + if (mt->target != GL_TEXTURE_3D || + mt->level[level].nr_images == 1) + return &zero; + else + return mt->level[level].image_offset; +} + + +GLuint intel_miptree_image_offset(struct intel_mipmap_tree *mt, + GLuint face, + GLuint level) +{ + if (mt->target == GL_TEXTURE_CUBE_MAP_ARB) + return (mt->level[level].level_offset + + mt->level[level].image_offset[face]); + else + return mt->level[level].level_offset; +} + + + + + + +/* Upload data for a particular image. + */ +GLboolean intel_miptree_image_data(struct intel_context *intel, + struct intel_mipmap_tree *dst, + GLuint face, + GLuint level, + const void *src, + GLuint src_row_pitch, + GLuint src_image_pitch) +{ + GLuint depth = dst->level[level].depth; + GLuint dst_offset = intel_miptree_image_offset(dst, face, level); + const GLuint *dst_depth_offset = intel_miptree_depth_offsets(dst, level); + GLuint i; + + DBG("%s\n", __FUNCTION__); + for (i = 0; i < depth; i++) { + if (!intel_region_data(intel, + dst->region, + dst_offset + dst_depth_offset[i], + 0, + 0, + src, + src_row_pitch, + 0, 0, /* source x,y */ + dst->level[level].width, + dst->level[level].height)) + return GL_FALSE; + src += src_image_pitch; + } + return GL_TRUE; +} + diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h new file mode 100644 index 00000000000..dbd7167b778 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -0,0 +1,166 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_MIPMAP_TREE_H +#define INTEL_MIPMAP_TREE_H + +#include "intel_regions.h" + +/* A layer on top of the intel_regions code which adds: + * + * - Code to size and layout a region to hold a set of mipmaps. + * - Query to determine if a new image fits in an existing tree. + * + * The fixed mipmap layout of intel hardware where one offset + * specifies the position of all images in a mipmap hierachy + * complicates the implementation of GL texture image commands, + * compared to hardware where each image is specified with an + * independent offset. + * + * In an ideal world, each texture object would be associated with a + * single bufmgr buffer or 2d intel_region, and all the images within + * the texture object would slot into the tree as they arrive. The + * reality can be a little messier, as images can arrive from the user + * with sizes that don't fit in the existing tree, or in an order + * where the tree layout cannot be guessed immediately. + * + * This structure encodes an idealized mipmap tree. The GL image + * commands build these where possible, otherwise store the images in + * temporary system buffers. + */ + + +struct intel_mipmap_level { + GLuint level_offset; + GLuint width; + GLuint height; + GLuint depth; + GLuint nr_images; + + /* Explicitly store the offset of each image for each cube face or + * depth value. Pretty much have to accept that hardware formats + * are going to be so diverse that there is no unified way to + * compute the offsets of depth/cube images within a mipmap level, + * so have to store them as a lookup table: + */ + GLuint *image_offset; +}; + +struct intel_mipmap_tree { + /* Effectively the key: + */ + GLenum target; + GLenum internal_format; + + GLuint first_level; + GLuint last_level; + + GLuint width0, height0, depth0; + GLuint cpp; + GLboolean compressed; + + /* Derived from the above: + */ + GLuint pitch; + GLuint depth_pitch; /* per-image on i945? */ + GLuint total_height; + + /* Includes image offset tables: + */ + struct intel_mipmap_level level[MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct intel_region *region; + + /* These are also refcounted: + */ + GLuint refcount; +}; + + + +struct intel_mipmap_tree *intel_miptree_create( struct intel_context *intel, + GLenum target, + GLenum internal_format, + GLuint first_level, + GLuint last_level, + GLuint width0, + GLuint height0, + GLuint depth0, + GLuint cpp, + GLboolean compressed); + +void intel_miptree_destroy( struct intel_context *intel, + struct intel_mipmap_tree *mt ); + + +/* Return the linear offset of an image relative to the start of the + * tree: + */ +GLuint intel_miptree_image_offset( struct intel_mipmap_tree *mt, + GLuint face, + GLuint level ); + +/* Return pointers to each 2d slice within an image. Indexed by depth + * value. + */ +const GLuint *intel_miptree_depth_offsets(struct intel_mipmap_tree *mt, + GLuint level); + + +void intel_miptree_set_level_info(struct intel_mipmap_tree *mt, + GLuint level, + GLuint nr_images, + GLuint x, GLuint y, + GLuint w, GLuint h, GLuint d); + +void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt, + GLuint level, + GLuint img, + GLuint x, GLuint y); + + +/* Upload an image into a tree + */ +GLboolean intel_miptree_image_data(struct intel_context *intel, + struct intel_mipmap_tree *dst, + GLuint face, + GLuint level, + const void *src, + GLuint src_row_pitch, + GLuint src_image_pitch); + +/* i915_mipmap_tree.c: + */ +GLboolean i915_miptree_layout( struct intel_mipmap_tree *mt ); +GLboolean i945_miptree_layout( struct intel_mipmap_tree *mt ); +GLboolean brw_miptree_layout( struct intel_mipmap_tree *mt ); + + + +#endif diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h new file mode 100644 index 00000000000..3c448b3559a --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_reg.h @@ -0,0 +1,91 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef _INTEL_REG_H_ +#define _INTEL_REG_H_ + + + +#define CMD_3D (0x3<<29) + + +#define _3DPRIMITIVE ((0x3<<29)|(0x1f<<24)) +#define PRIM_INDIRECT (1<<23) +#define PRIM_INLINE (0<<23) +#define PRIM_INDIRECT_SEQUENTIAL (0<<17) +#define PRIM_INDIRECT_ELTS (1<<17) + +#define PRIM3D_TRILIST (0x0<<18) +#define PRIM3D_TRISTRIP (0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) +#define PRIM3D_TRIFAN (0x3<<18) +#define PRIM3D_POLY (0x4<<18) +#define PRIM3D_LINELIST (0x5<<18) +#define PRIM3D_LINESTRIP (0x6<<18) +#define PRIM3D_RECTLIST (0x7<<18) +#define PRIM3D_POINTLIST (0x8<<18) +#define PRIM3D_DIB (0x9<<18) +#define PRIM3D_MASK (0x1f<<18) + +#define I915PACKCOLOR4444(r,g,b,a) \ + ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4)) + +#define I915PACKCOLOR1555(r,g,b,a) \ + ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \ + ((a) ? 0x8000 : 0)) + +#define I915PACKCOLOR565(r,g,b) \ + ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3)) + +#define I915PACKCOLOR8888(r,g,b,a) \ + ((a<<24) | (r<<16) | (g<<8) | b) + + + + +#define BR00_BITBLT_CLIENT 0x40000000 +#define BR00_OP_COLOR_BLT 0x10000000 +#define BR00_OP_SRC_COPY_BLT 0x10C00000 +#define BR13_SOLID_PATTERN 0x80000000 + +#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|0x4) +#define XY_COLOR_BLT_WRITE_ALPHA (1<<21) +#define XY_COLOR_BLT_WRITE_RGB (1<<20) + +#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) +#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21) +#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20) + +#define XY_SRC_TILED (1<<15) +#define XY_DST_TILED (1<<11) + +#define FENCE_LINEAR 0 +#define FENCE_XMAJOR 1 +#define FENCE_YMAJOR 2 + +#endif diff --git a/src/mesa/drivers/dri/i965/intel_regions.c b/src/mesa/drivers/dri/i965/intel_regions.c new file mode 100644 index 00000000000..53f05612377 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_regions.c @@ -0,0 +1,294 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Provide additional functionality on top of bufmgr buffers: + * - 2d semantics and blit operations + * - refcounting of buffers for multiple images in a buffer. + * - refcounting of buffer mappings. + * - some logic for moving the buffers to the best memory pools for + * given operations. + * + * Most of this is to make it easier to implement the fixed-layout + * mipmap tree required by intel hardware in the face of GL's + * programming interface where each image can be specifed in random + * order and it isn't clear what layout the tree should have until the + * last moment. + */ + +#include "intel_context.h" +#include "intel_regions.h" +#include "intel_blit.h" +#include "bufmgr.h" +#include "imports.h" + +/* XXX: Thread safety? + */ +GLubyte *intel_region_map(struct intel_context *intel, struct intel_region *region) +{ + DBG("%s\n", __FUNCTION__); + if (!region->map_refcount++) { + region->map = bmMapBuffer(intel, region->buffer, 0); + if (!region->map) + region->map_refcount--; + } + + return region->map; +} + +void intel_region_unmap(struct intel_context *intel, + struct intel_region *region) +{ + DBG("%s\n", __FUNCTION__); + if (!--region->map_refcount) { + bmUnmapBufferAUB(intel, region->buffer, 0, 0); + region->map = NULL; + } +} + +struct intel_region *intel_region_alloc( struct intel_context *intel, + GLuint cpp, + GLuint pitch, + GLuint height ) +{ + struct intel_region *region = calloc(sizeof(*region), 1); + + DBG("%s %dx%dx%d == 0x%x bytes\n", __FUNCTION__, + cpp, pitch, height, cpp*pitch*height); + + region->cpp = cpp; + region->pitch = pitch; + region->height = height; /* needed? */ + region->refcount = 1; + + bmGenBuffers(intel, "tex", 1, ®ion->buffer, 6); + bmBufferData(intel, region->buffer, pitch * cpp * height, NULL, 0); + + return region; +} + +void intel_region_reference( struct intel_region **dst, + struct intel_region *src) +{ + src->refcount++; + assert(*dst == NULL); + *dst = src; +} + +void intel_region_release( struct intel_context *intel, + struct intel_region **region ) +{ + if (!*region) + return; + + DBG("%s %d\n", __FUNCTION__, (*region)->refcount-1); + + if (--(*region)->refcount == 0) { + assert((*region)->map_refcount == 0); + bmDeleteBuffers(intel, 1, &(*region)->buffer); + free(*region); + } + *region = NULL; +} + + +struct intel_region *intel_region_create_static( struct intel_context *intel, + GLuint mem_type, + GLuint offset, + void *virtual, + GLuint cpp, + GLuint pitch, + GLuint height, + GLboolean tiled ) +{ + struct intel_region *region = calloc(sizeof(*region), 1); + GLuint size = cpp * pitch * height; + GLint pool; + + DBG("%s\n", __FUNCTION__); + + region->cpp = cpp; + region->pitch = pitch; + region->height = height; /* needed? */ + region->refcount = 1; + region->tiled = tiled; + + /* Recipe for creating a static buffer - create a static pool with + * the right offset and size, generate a buffer and use a special + * call to bind it to all of the memory in that pool. + */ + pool = bmInitPool(intel, offset, virtual, size, + (BM_MEM_AGP | + BM_NO_UPLOAD | + BM_NO_EVICT | + BM_NO_MOVE)); + if (pool < 0) { + _mesa_printf("bmInitPool failed for static region\n"); + exit(1); + } + + region->buffer = bmGenBufferStatic(intel, pool); + + return region; +} + + + + +void _mesa_copy_rect( GLubyte *dst, + GLuint cpp, + GLuint dst_pitch, + GLuint dst_x, + GLuint dst_y, + GLuint width, + GLuint height, + const GLubyte *src, + GLuint src_pitch, + GLuint src_x, + GLuint src_y ) +{ + GLuint i; + + dst_pitch *= cpp; + src_pitch *= cpp; + dst += dst_x * cpp; + src += src_x * cpp; + dst += dst_y * dst_pitch; + src += src_y * dst_pitch; + width *= cpp; + + if (width == dst_pitch && + width == src_pitch) + do_memcpy(dst, src, height * width); + else { + for (i = 0; i < height; i++) { + do_memcpy(dst, src, width); + dst += dst_pitch; + src += src_pitch; + } + } +} + + +/* Upload data to a rectangular sub-region. Lots of choices how to do this: + * + * - memcpy by span to current destination + * - upload data as new buffer and blit + * + * Currently always memcpy. + */ +GLboolean intel_region_data(struct intel_context *intel, + struct intel_region *dst, + GLuint dst_offset, + GLuint dstx, GLuint dsty, + const void *src, GLuint src_pitch, + GLuint srcx, GLuint srcy, + GLuint width, GLuint height) +{ + DBG("%s\n", __FUNCTION__); + + if (width == dst->pitch && + width == src_pitch && + dst_offset == 0 && + height == dst->height && + srcx == 0 && + srcy == 0) + { + return (bmBufferDataAUB(intel, + dst->buffer, + dst->cpp * width * dst->height, + src, 0, 0, 0) == 0); + } + else { + GLubyte *map = intel_region_map(intel, dst); + + if (map) { + assert (dst_offset + dstx + width + + (dsty + height - 1) * dst->pitch * dst->cpp <= + dst->pitch * dst->cpp * dst->height); + + _mesa_copy_rect(map + dst_offset, + dst->cpp, + dst->pitch, + dstx, dsty, + width, height, + src, + src_pitch, + srcx, srcy); + + intel_region_unmap(intel, dst); + return GL_TRUE; + } + else + return GL_FALSE; + } +} + +/* Copy rectangular sub-regions. Need better logic about when to + * push buffers into AGP - will currently do so whenever possible. + */ +void intel_region_copy( struct intel_context *intel, + struct intel_region *dst, + GLuint dst_offset, + GLuint dstx, GLuint dsty, + struct intel_region *src, + GLuint src_offset, + GLuint srcx, GLuint srcy, + GLuint width, GLuint height ) +{ + DBG("%s\n", __FUNCTION__); + + assert(src->cpp == dst->cpp); + + intelEmitCopyBlit(intel, + dst->cpp, + src->pitch, src->buffer, src_offset, src->tiled, + dst->pitch, dst->buffer, dst_offset, dst->tiled, + srcx, srcy, + dstx, dsty, + width, height); +} + +/* Fill a rectangular sub-region. Need better logic about when to + * push buffers into AGP - will currently do so whenever possible. + */ +void intel_region_fill( struct intel_context *intel, + struct intel_region *dst, + GLuint dst_offset, + GLuint dstx, GLuint dsty, + GLuint width, GLuint height, + GLuint color ) +{ + DBG("%s\n", __FUNCTION__); + + intelEmitFillBlit(intel, + dst->cpp, + dst->pitch, dst->buffer, dst_offset, dst->tiled, + dstx, dsty, + width, height, + color ); +} + diff --git a/src/mesa/drivers/dri/i965/intel_regions.h b/src/mesa/drivers/dri/i965/intel_regions.h new file mode 100644 index 00000000000..2413f0de33c --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_regions.h @@ -0,0 +1,139 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_REGIONS_H +#define INTEL_REGIONS_H + +#include "mtypes.h" +#include "bufmgr.h" /* for DBG! */ +struct intel_context; + +/* A layer on top of the bufmgr buffers that adds a few useful things: + * + * - Refcounting for local buffer references. + * - Refcounting for buffer maps + * - Buffer dimensions - pitch and height. + * - Blitter commands for copying 2D regions between buffers. + */ +struct intel_region { + struct buffer *buffer; + GLuint refcount; + GLuint cpp; + GLuint pitch; + GLuint height; + GLboolean tiled; + GLubyte *map; + GLuint map_refcount; +}; + +/* Allocate a refcounted region. Pointers to regions should only be + * copied by calling intel_reference_region(). + * + * No support for dynamically allocating tiled regions at this point. + */ +struct intel_region *intel_region_alloc( struct intel_context *intel, + GLuint cpp, + GLuint pitch, + GLuint height ); + +void intel_region_reference( struct intel_region **dst, + struct intel_region *src ); + +void intel_region_release(struct intel_context *intel, + struct intel_region **ib ); + +/* Static regions may be tiled. The assumption is that the X server + * has set up fence registers to define tiled zones in agp and these + * buffers are within those zones. Tiling regions without fence + * registers is more work. + */ +struct intel_region *intel_region_create_static( struct intel_context *intel, + GLuint mem_type, + GLuint offset, + void *virtual, + GLuint cpp, + GLuint pitch, + GLuint height, + GLboolean tiled ); + +/* Map/unmap regions. This is refcounted also: + */ +GLubyte *intel_region_map(struct intel_context *intel, + struct intel_region *ib); + +void intel_region_unmap(struct intel_context *intel, + struct intel_region *ib); + + +/* Upload data to a rectangular sub-region + */ +GLboolean intel_region_data(struct intel_context *intel, + struct intel_region *dest, + GLuint dest_offset, + GLuint destx, GLuint desty, + const void *src, GLuint src_stride, + GLuint srcx, GLuint srcy, + GLuint width, GLuint height); + +/* Copy rectangular sub-regions + */ +void intel_region_copy( struct intel_context *intel, + struct intel_region *dest, + GLuint dest_offset, + GLuint destx, GLuint desty, + struct intel_region *src, + GLuint src_offset, + GLuint srcx, GLuint srcy, + GLuint width, GLuint height ); + +/* Fill a rectangular sub-region + */ +void intel_region_fill( struct intel_context *intel, + struct intel_region *dest, + GLuint dest_offset, + GLuint destx, GLuint desty, + GLuint width, GLuint height, + GLuint color ); + + +/*********************************************************************** + * Misc utilities: move to somewhere generic + */ +void _mesa_copy_rect( GLubyte *dst, + GLuint cpp, + GLuint dst_pitch, + GLuint dst_x, + GLuint dst_y, + GLuint width, + GLuint height, + const GLubyte *src, + GLuint src_pitch, + GLuint src_x, + GLuint src_y ); + + +#endif diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c new file mode 100644 index 00000000000..14b461b1ee7 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -0,0 +1,699 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "glheader.h" +#include "context.h" +#include "framebuffer.h" +#include "matrix.h" +#include "renderbuffer.h" +#include "simple_list.h" +#include "utils.h" +#include "vblank.h" +#include "xmlpool.h" + + +#include "intel_screen.h" + +#include "intel_tex.h" +#include "intel_span.h" +#include "intel_ioctl.h" + +#include "i830_dri.h" + +PUBLIC const char __driConfigOptions[] = +DRI_CONF_BEGIN + DRI_CONF_SECTION_PERFORMANCE + DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) + DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0) + DRI_CONF_SECTION_END + DRI_CONF_SECTION_QUALITY + DRI_CONF_FORCE_S3TC_ENABLE(false) + DRI_CONF_ALLOW_LARGE_TEXTURES(1) + DRI_CONF_SECTION_END +DRI_CONF_END; +const GLuint __driNConfigOptions = 4; + +#ifdef USE_NEW_INTERFACE +static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; +#endif /*USE_NEW_INTERFACE*/ + +extern const struct dri_extension card_extensions[]; + +/** + * Map all the memory regions described by the screen. + * \return GL_TRUE if success, GL_FALSE if error. + */ +GLboolean +intelMapScreenRegions(__DRIscreenPrivate *sPriv) +{ + intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private; + + if (intelScreen->front.handle) { + if (drmMap(sPriv->fd, + intelScreen->front.handle, + intelScreen->front.size, + (drmAddress *)&intelScreen->front.map) != 0) { + _mesa_problem(NULL, "drmMap(frontbuffer) failed!"); + return GL_FALSE; + } + } else { + /* Use the old static allocation method if the server isn't setting up + * a movable handle for us. Add in the front buffer offset from + * framebuffer start, as our span routines (unlike other drivers) expect + * the renderbuffer address to point to the beginning of the + * renderbuffer. + */ + intelScreen->front.map = sPriv->pFB; + if (intelScreen->front.map == NULL) { + fprintf(stderr, "Failed to find framebuffer mapping\n"); + return GL_FALSE; + } + } + + if (drmMap(sPriv->fd, + intelScreen->back.handle, + intelScreen->back.size, + (drmAddress *)&intelScreen->back.map) != 0) { + intelUnmapScreenRegions(intelScreen); + return GL_FALSE; + } + + if (drmMap(sPriv->fd, + intelScreen->depth.handle, + intelScreen->depth.size, + (drmAddress *)&intelScreen->depth.map) != 0) { + intelUnmapScreenRegions(intelScreen); + return GL_FALSE; + } + + if (drmMap(sPriv->fd, + intelScreen->tex.handle, + intelScreen->tex.size, + (drmAddress *)&intelScreen->tex.map) != 0) { + intelUnmapScreenRegions(intelScreen); + return GL_FALSE; + } + + if (0) + printf("Mappings: front: %p back: %p depth: %p tex: %p\n", + intelScreen->front.map, + intelScreen->back.map, + intelScreen->depth.map, + intelScreen->tex.map); + return GL_TRUE; +} + + +void +intelUnmapScreenRegions(intelScreenPrivate *intelScreen) +{ +#define REALLY_UNMAP 1 + /* If front.handle is present, we're doing the dynamic front buffer mapping, + * but if we've fallen back to static allocation then we shouldn't try to + * unmap here. + */ + if (intelScreen->front.handle) { +#if REALLY_UNMAP + if (drmUnmap(intelScreen->front.map, intelScreen->front.size) != 0) + printf("drmUnmap front failed!\n"); +#endif + intelScreen->front.map = NULL; + } + if (intelScreen->back.map) { +#if REALLY_UNMAP + if (drmUnmap(intelScreen->back.map, intelScreen->back.size) != 0) + printf("drmUnmap back failed!\n"); +#endif + intelScreen->back.map = NULL; + } + if (intelScreen->depth.map) { +#if REALLY_UNMAP + drmUnmap(intelScreen->depth.map, intelScreen->depth.size); + intelScreen->depth.map = NULL; +#endif + } + if (intelScreen->tex.map) { +#if REALLY_UNMAP + drmUnmap(intelScreen->tex.map, intelScreen->tex.size); + intelScreen->tex.map = NULL; +#endif + } +} + + +static void +intelPrintDRIInfo(intelScreenPrivate *intelScreen, + __DRIscreenPrivate *sPriv, + I830DRIPtr gDRIPriv) +{ + fprintf(stderr, "*** Front size: 0x%x offset: 0x%x pitch: %d\n", + intelScreen->front.size, intelScreen->front.offset, + intelScreen->front.pitch); + fprintf(stderr, "*** Back size: 0x%x offset: 0x%x pitch: %d\n", + intelScreen->back.size, intelScreen->back.offset, + intelScreen->back.pitch); + fprintf(stderr, "*** Depth size: 0x%x offset: 0x%x pitch: %d\n", + intelScreen->depth.size, intelScreen->depth.offset, + intelScreen->depth.pitch); + fprintf(stderr, "*** Rotated size: 0x%x offset: 0x%x pitch: %d\n", + intelScreen->rotated.size, intelScreen->rotated.offset, + intelScreen->rotated.pitch); + fprintf(stderr, "*** Texture size: 0x%x offset: 0x%x\n", + intelScreen->tex.size, intelScreen->tex.offset); + fprintf(stderr, "*** Memory : 0x%x\n", gDRIPriv->mem); +} + + +static void +intelPrintSAREA(volatile drmI830Sarea *sarea) +{ + fprintf(stderr, "SAREA: sarea width %d height %d\n", sarea->width, sarea->height); + fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch); + fprintf(stderr, + "SAREA: front offset: 0x%08x size: 0x%x handle: 0x%x\n", + sarea->front_offset, sarea->front_size, + (unsigned) sarea->front_handle); + fprintf(stderr, + "SAREA: back offset: 0x%08x size: 0x%x handle: 0x%x\n", + sarea->back_offset, sarea->back_size, + (unsigned) sarea->back_handle); + fprintf(stderr, "SAREA: depth offset: 0x%08x size: 0x%x handle: 0x%x\n", + sarea->depth_offset, sarea->depth_size, + (unsigned) sarea->depth_handle); + fprintf(stderr, "SAREA: tex offset: 0x%08x size: 0x%x handle: 0x%x\n", + sarea->tex_offset, sarea->tex_size, + (unsigned) sarea->tex_handle); + fprintf(stderr, "SAREA: rotation: %d\n", sarea->rotation); + fprintf(stderr, + "SAREA: rotated offset: 0x%08x size: 0x%x\n", + sarea->rotated_offset, sarea->rotated_size); + fprintf(stderr, "SAREA: rotated pitch: %d\n", sarea->rotated_pitch); +} + + +/** + * A number of the screen parameters are obtained/computed from + * information in the SAREA. This function updates those parameters. + */ +void +intelUpdateScreenFromSAREA(intelScreenPrivate *intelScreen, + volatile drmI830Sarea *sarea) +{ + intelScreen->width = sarea->width; + intelScreen->height = sarea->height; + + intelScreen->front.offset = sarea->front_offset; + intelScreen->front.pitch = sarea->pitch * intelScreen->cpp; + intelScreen->front.handle = sarea->front_handle; + intelScreen->front.size = sarea->front_size; + + intelScreen->back.offset = sarea->back_offset; + intelScreen->back.pitch = sarea->pitch * intelScreen->cpp; + intelScreen->back.handle = sarea->back_handle; + intelScreen->back.size = sarea->back_size; + + intelScreen->depth.offset = sarea->depth_offset; + intelScreen->depth.pitch = sarea->pitch * intelScreen->cpp; + intelScreen->depth.handle = sarea->depth_handle; + intelScreen->depth.size = sarea->depth_size; + + intelScreen->tex.offset = sarea->tex_offset; + intelScreen->logTextureGranularity = sarea->log_tex_granularity; + intelScreen->tex.handle = sarea->tex_handle; + intelScreen->tex.size = sarea->tex_size; + + intelScreen->rotated.offset = sarea->rotated_offset; + intelScreen->rotated.pitch = sarea->rotated_pitch * intelScreen->cpp; + intelScreen->rotated.size = sarea->rotated_size; + intelScreen->current_rotation = sarea->rotation; +#if 0 + matrix23Rotate(&intelScreen->rotMatrix, + sarea->width, sarea->height, sarea->rotation); +#endif + intelScreen->rotatedWidth = sarea->virtualX; + intelScreen->rotatedHeight = sarea->virtualY; + + if (0) + intelPrintSAREA(sarea); +} + + +static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv) +{ + intelScreenPrivate *intelScreen; + I830DRIPtr gDRIPriv = (I830DRIPtr)sPriv->pDevPriv; + PFNGLXSCRENABLEEXTENSIONPROC glx_enable_extension = + (PFNGLXSCRENABLEEXTENSIONPROC) (*dri_interface->getProcAddress("glxEnableExtension")); + void * const psc = sPriv->psc->screenConfigs; + volatile drmI830Sarea *sarea; + + if (sPriv->devPrivSize != sizeof(I830DRIRec)) { + fprintf(stderr,"\nERROR! sizeof(I830DRIRec) (%d) does not match passed size from device driver (%d)\n", sizeof(I830DRIRec), sPriv->devPrivSize); + return GL_FALSE; + } + + /* Allocate the private area */ + intelScreen = (intelScreenPrivate *)CALLOC(sizeof(intelScreenPrivate)); + if (!intelScreen) { + fprintf(stderr,"\nERROR! Allocating private area failed\n"); + return GL_FALSE; + } + /* parse information in __driConfigOptions */ + driParseOptionInfo (&intelScreen->optionCache, + __driConfigOptions, __driNConfigOptions); + + intelScreen->driScrnPriv = sPriv; + sPriv->private = (void *)intelScreen; + intelScreen->sarea_priv_offset = gDRIPriv->sarea_priv_offset; + sarea = (volatile drmI830Sarea *) + (((GLubyte *)sPriv->pSAREA)+intelScreen->sarea_priv_offset); + + intelScreen->deviceID = gDRIPriv->deviceID; + intelScreen->mem = gDRIPriv->mem; + intelScreen->cpp = gDRIPriv->cpp; + + switch (gDRIPriv->bitsPerPixel) { + case 15: intelScreen->fbFormat = DV_PF_555; break; + case 16: intelScreen->fbFormat = DV_PF_565; break; + case 32: intelScreen->fbFormat = DV_PF_8888; break; + } + + intelUpdateScreenFromSAREA(intelScreen, sarea); + + if (0) + intelPrintDRIInfo(intelScreen, sPriv, gDRIPriv); + + if (!intelMapScreenRegions(sPriv)) { + fprintf(stderr,"\nERROR! mapping regions\n"); + _mesa_free(intelScreen); + sPriv->private = NULL; + return GL_FALSE; + } + + intelScreen->drmMinor = sPriv->drmMinor; + + /* Determine if IRQs are active? */ + { + int ret; + drmI830GetParam gp; + + gp.param = I830_PARAM_IRQ_ACTIVE; + gp.value = &intelScreen->irq_active; + + ret = drmCommandWriteRead( sPriv->fd, DRM_I830_GETPARAM, + &gp, sizeof(gp)); + if (ret) { + fprintf(stderr, "drmI830GetParam: %d\n", ret); + return GL_FALSE; + } + } + + /* Determine if batchbuffers are allowed */ + { + int ret; + drmI830GetParam gp; + + gp.param = I830_PARAM_ALLOW_BATCHBUFFER; + gp.value = &intelScreen->allow_batchbuffer; + + ret = drmCommandWriteRead( sPriv->fd, DRM_I830_GETPARAM, + &gp, sizeof(gp)); + if (ret) { + fprintf(stderr, "drmI830GetParam: (%d) %d\n", gp.param, ret); + return GL_FALSE; + } + } + + if (glx_enable_extension != NULL) { + (*glx_enable_extension)( psc, "GLX_SGI_swap_control" ); + (*glx_enable_extension)( psc, "GLX_SGI_video_sync" ); + (*glx_enable_extension)( psc, "GLX_MESA_swap_control" ); + (*glx_enable_extension)( psc, "GLX_MESA_swap_frame_usage" ); + (*glx_enable_extension)( psc, "GLX_SGI_make_current_read" ); + (*glx_enable_extension)( psc, "GLX_MESA_copy_sub_buffer" ); + } + + return GL_TRUE; +} + + +static void intelDestroyScreen(__DRIscreenPrivate *sPriv) +{ + intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private; + + intelUnmapScreenRegions(intelScreen); + FREE(intelScreen); + sPriv->private = NULL; +} + +static GLboolean intelCreateBuffer( __DRIscreenPrivate *driScrnPriv, + __DRIdrawablePrivate *driDrawPriv, + const __GLcontextModes *mesaVis, + GLboolean isPixmap ) +{ + intelScreenPrivate *screen = (intelScreenPrivate *) driScrnPriv->private; + + if (isPixmap) { + return GL_FALSE; /* not implemented */ + } else { + GLboolean swStencil = (mesaVis->stencilBits > 0 && + mesaVis->depthBits != 24); + + struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis); + + { + driRenderbuffer *frontRb + = driNewRenderbuffer(GL_RGBA, + screen->front.map, + screen->cpp, + screen->front.offset, screen->front.pitch, + driDrawPriv); + intelSetSpanFunctions(frontRb, mesaVis); + _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base); + } + + if (mesaVis->doubleBufferMode) { + driRenderbuffer *backRb + = driNewRenderbuffer(GL_RGBA, + screen->back.map, + screen->cpp, + screen->back.offset, screen->back.pitch, + driDrawPriv); + intelSetSpanFunctions(backRb, mesaVis); + _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base); + } + + if (mesaVis->depthBits == 16) { + driRenderbuffer *depthRb + = driNewRenderbuffer(GL_DEPTH_COMPONENT16, + screen->depth.map, + screen->cpp, + screen->depth.offset, screen->depth.pitch, + driDrawPriv); + intelSetSpanFunctions(depthRb, mesaVis); + _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base); + } + else if (mesaVis->depthBits == 24) { + driRenderbuffer *depthRb + = driNewRenderbuffer(GL_DEPTH_COMPONENT24, + screen->depth.map, + screen->cpp, + screen->depth.offset, screen->depth.pitch, + driDrawPriv); + intelSetSpanFunctions(depthRb, mesaVis); + _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base); + } + + if (mesaVis->stencilBits > 0 && !swStencil) { + driRenderbuffer *stencilRb + = driNewRenderbuffer(GL_STENCIL_INDEX8_EXT, + screen->depth.map, + screen->cpp, + screen->depth.offset, screen->depth.pitch, + driDrawPriv); + intelSetSpanFunctions(stencilRb, mesaVis); + _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base); + } + + _mesa_add_soft_renderbuffers(fb, + GL_FALSE, /* color */ + GL_FALSE, /* depth */ + swStencil, + mesaVis->accumRedBits > 0, + GL_FALSE, /* alpha */ + GL_FALSE /* aux */); + driDrawPriv->driverPrivate = (void *) fb; + + return (driDrawPriv->driverPrivate != NULL); + } +} + +static void intelDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) +{ + _mesa_destroy_framebuffer((GLframebuffer *) (driDrawPriv->driverPrivate)); +} + + +/** + * Get information about previous buffer swaps. + */ +static int +intelGetSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo ) +{ + struct intel_context *intel; + + if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL) + || (dPriv->driContextPriv->driverPrivate == NULL) + || (sInfo == NULL) ) { + return -1; + } + + intel = dPriv->driContextPriv->driverPrivate; + sInfo->swap_count = intel->swap_count; + sInfo->swap_ust = intel->swap_ust; + sInfo->swap_missed_count = intel->swap_missed_count; + + sInfo->swap_missed_usage = (sInfo->swap_missed_count != 0) + ? driCalculateSwapUsage( dPriv, 0, intel->swap_missed_ust ) + : 0.0; + + return 0; +} + + +/* There are probably better ways to do this, such as an + * init-designated function to register chipids and createcontext + * functions. + */ +extern GLboolean i830CreateContext( const __GLcontextModes *mesaVis, + __DRIcontextPrivate *driContextPriv, + void *sharedContextPrivate); + +extern GLboolean i915CreateContext( const __GLcontextModes *mesaVis, + __DRIcontextPrivate *driContextPriv, + void *sharedContextPrivate); + +extern GLboolean brwCreateContext( const __GLcontextModes *mesaVis, + __DRIcontextPrivate *driContextPriv, + void *sharedContextPrivate); + + + + +static GLboolean intelCreateContext( const __GLcontextModes *mesaVis, + __DRIcontextPrivate *driContextPriv, + void *sharedContextPrivate) +{ +#if 0 + __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; + intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private; + switch (intelScreen->deviceID) { + case PCI_CHIP_845_G: + case PCI_CHIP_I830_M: + case PCI_CHIP_I855_GM: + case PCI_CHIP_I865_G: + return i830CreateContext( mesaVis, driContextPriv, + sharedContextPrivate ); + + case PCI_CHIP_I915_G: + case PCI_CHIP_I915_GM: + case PCI_CHIP_I945_G: + case PCI_CHIP_I945_GM: + return i915CreateContext( mesaVis, driContextPriv, + sharedContextPrivate ); + + default: + fprintf(stderr, "Unrecognized deviceID %x\n", intelScreen->deviceID); + return GL_FALSE; + } +#else + return brwCreateContext( mesaVis, driContextPriv, + sharedContextPrivate ); +#endif +} + + +static const struct __DriverAPIRec intelAPI = { + .InitDriver = intelInitDriver, + .DestroyScreen = intelDestroyScreen, + .CreateContext = intelCreateContext, + .DestroyContext = intelDestroyContext, + .CreateBuffer = intelCreateBuffer, + .DestroyBuffer = intelDestroyBuffer, + .SwapBuffers = intelSwapBuffers, + .MakeCurrent = intelMakeCurrent, + .UnbindContext = intelUnbindContext, + .GetSwapInfo = intelGetSwapInfo, + .GetMSC = driGetMSC32, + .WaitForMSC = driWaitForMSC32, + .WaitForSBC = NULL, + .SwapBuffersMSC = NULL, + .CopySubBuffer = intelCopySubBuffer +}; + + +static __GLcontextModes * +intelFillInModes( unsigned pixel_bits, unsigned depth_bits, + unsigned stencil_bits, GLboolean have_back_buffer ) +{ + __GLcontextModes * modes; + __GLcontextModes * m; + unsigned num_modes; + unsigned depth_buffer_factor; + unsigned back_buffer_factor; + GLenum fb_format; + GLenum fb_type; + + /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't + * support pageflipping at all. + */ + static const GLenum back_buffer_modes[] = { + GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML + }; + + u_int8_t depth_bits_array[3]; + u_int8_t stencil_bits_array[3]; + + + depth_bits_array[0] = 0; + depth_bits_array[1] = depth_bits; + depth_bits_array[2] = depth_bits; + + /* Just like with the accumulation buffer, always provide some modes + * with a stencil buffer. It will be a sw fallback, but some apps won't + * care about that. + */ + stencil_bits_array[0] = 0; + stencil_bits_array[1] = 0; + stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits; + + depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1; + back_buffer_factor = (have_back_buffer) ? 3 : 1; + + num_modes = depth_buffer_factor * back_buffer_factor * 4; + + if ( pixel_bits == 16 ) { + fb_format = GL_RGB; + fb_type = GL_UNSIGNED_SHORT_5_6_5; + } + else { + fb_format = GL_BGRA; + fb_type = GL_UNSIGNED_INT_8_8_8_8_REV; + } + + modes = (*dri_interface->createContextModes)( num_modes, sizeof( __GLcontextModes ) ); + m = modes; + if ( ! driFillInModes( & m, fb_format, fb_type, + depth_bits_array, stencil_bits_array, depth_buffer_factor, + back_buffer_modes, back_buffer_factor, + GLX_TRUE_COLOR ) ) { + fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", + __func__, __LINE__ ); + return NULL; + } + if ( ! driFillInModes( & m, fb_format, fb_type, + depth_bits_array, stencil_bits_array, depth_buffer_factor, + back_buffer_modes, back_buffer_factor, + GLX_DIRECT_COLOR ) ) { + fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", + __func__, __LINE__ ); + return NULL; + } + + /* Mark the visual as slow if there are "fake" stencil bits. + */ + for ( m = modes ; m != NULL ; m = m->next ) { + if ( (m->stencilBits != 0) && (m->stencilBits != stencil_bits) ) { + m->visualRating = GLX_SLOW_CONFIG; + } + } + + return modes; +} + + +/** + * This is the bootstrap function for the driver. libGL supplies all of the + * requisite information about the system, and the driver initializes itself. + * This routine also fills in the linked list pointed to by \c driver_modes + * with the \c __GLcontextModes that the driver can support for windows or + * pbuffers. + * + * \return A pointer to a \c __DRIscreenPrivate on success, or \c NULL on + * failure. + */ +PUBLIC +void * __driCreateNewScreen_20050727( __DRInativeDisplay *dpy, int scrn, __DRIscreen *psc, + const __GLcontextModes * modes, + const __DRIversion * ddx_version, + const __DRIversion * dri_version, + const __DRIversion * drm_version, + const __DRIframebuffer * frame_buffer, + drmAddress pSAREA, int fd, + int internal_api_version, + const __DRIinterfaceMethods * interface, + __GLcontextModes ** driver_modes ) + +{ + __DRIscreenPrivate *psp; + static const __DRIversion ddx_expected = { 1, 6, 0 }; + static const __DRIversion dri_expected = { 4, 0, 0 }; + static const __DRIversion drm_expected = { 1, 3, 0 }; + + dri_interface = interface; + + if ( ! driCheckDriDdxDrmVersions2( "i915", + dri_version, & dri_expected, + ddx_version, & ddx_expected, + drm_version, & drm_expected ) ) { + return NULL; + } + + psp = __driUtilCreateNewScreen(dpy, scrn, psc, NULL, + ddx_version, dri_version, drm_version, + frame_buffer, pSAREA, fd, + internal_api_version, &intelAPI); + if ( psp != NULL ) { + I830DRIPtr dri_priv = (I830DRIPtr) psp->pDevPriv; + *driver_modes = intelFillInModes( dri_priv->cpp * 8, + (dri_priv->cpp == 2) ? 16 : 24, + (dri_priv->cpp == 2) ? 0 : 8, + GL_TRUE ); + + /* Calling driInitExtensions here, with a NULL context pointer, does not actually + * enable the extensions. It just makes sure that all the dispatch offsets for all + * the extensions that *might* be enables are known. This is needed because the + * dispatch offsets need to be known when _mesa_context_create is called, but we can't + * enable the extensions until we have a context pointer. + * + * Hello chicken. Hello egg. How are you two today? + */ + driInitExtensions( NULL, card_extensions, GL_FALSE ); + } + + return (void *) psp; +} diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h new file mode 100644 index 00000000000..094158afd85 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_screen.h @@ -0,0 +1,113 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef _INTEL_INIT_H_ +#define _INTEL_INIT_H_ + +#include <sys/time.h> +#include "dri_util.h" +#include "xmlconfig.h" +#include "i830_common.h" + +/* XXX: change name or eliminate to avoid conflict with "struct + * intel_region"!!! + */ +typedef struct { + drm_handle_t handle; + drmSize size; /* region size in bytes */ + char *map; /* memory map */ + int offset; /* from start of video mem, in bytes */ + int pitch; /* row stride, in pixels */ +} intelRegion; + +typedef struct +{ + intelRegion front; + intelRegion back; + intelRegion rotated; + intelRegion depth; + intelRegion tex; + + int deviceID; + int width; + int height; + int mem; /* unused */ + + int cpp; /* for front and back buffers */ + int fbFormat; + + int logTextureGranularity; + + __DRIscreenPrivate *driScrnPriv; + unsigned int sarea_priv_offset; + + int drmMinor; + + int irq_active; + int allow_batchbuffer; + +/* struct matrix23 rotMatrix; */ + + int current_rotation; /* 0, 90, 180 or 270 */ + int rotatedWidth, rotatedHeight; + + /** + * Configuration cache with default values for all contexts + */ + driOptionCache optionCache; +} intelScreenPrivate; + + +extern GLboolean +intelMapScreenRegions(__DRIscreenPrivate *sPriv); + +extern void +intelUnmapScreenRegions(intelScreenPrivate *intelScreen); + +extern void +intelUpdateScreenFromSAREA(intelScreenPrivate *intelScreen, + volatile drmI830Sarea *sarea); + +extern void +intelDestroyContext(__DRIcontextPrivate *driContextPriv); + +extern GLboolean +intelUnbindContext(__DRIcontextPrivate *driContextPriv); + +extern GLboolean +intelMakeCurrent(__DRIcontextPrivate *driContextPriv, + __DRIdrawablePrivate *driDrawPriv, + __DRIdrawablePrivate *driReadPriv); + +extern void +intelSwapBuffers(__DRIdrawablePrivate *dPriv); + +extern void +intelCopySubBuffer( __DRIdrawablePrivate *dPriv, + int x, int y, int w, int h ); + +#endif diff --git a/src/mesa/drivers/dri/i965/intel_span.c b/src/mesa/drivers/dri/i965/intel_span.c new file mode 100644 index 00000000000..c68def5a9f9 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_span.c @@ -0,0 +1,272 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "glheader.h" +#include "macros.h" +#include "mtypes.h" +#include "colormac.h" + +#include "intel_screen.h" +#include "intel_regions.h" +#include "intel_span.h" +#include "intel_ioctl.h" +#include "intel_tex.h" +#include "swrast/swrast.h" + +#undef DBG +#define DBG 0 + +#define LOCAL_VARS \ + struct intel_context *intel = intel_context(ctx); \ + __DRIdrawablePrivate *dPriv = intel->driDrawable; \ + driRenderbuffer *drb = (driRenderbuffer *) rb; \ + GLuint pitch = drb->pitch; \ + GLuint height = dPriv->h; \ + char *buf = (char *) drb->Base.Data + \ + dPriv->x * drb->cpp + \ + dPriv->y * pitch; \ + GLushort p; \ + (void) buf; (void) p + +#define LOCAL_DEPTH_VARS \ + struct intel_context *intel = intel_context(ctx); \ + __DRIdrawablePrivate *dPriv = intel->driDrawable; \ + driRenderbuffer *drb = (driRenderbuffer *) rb; \ + GLuint pitch = drb->pitch; \ + GLuint height = dPriv->h; \ + char *buf = (char *) drb->Base.Data + \ + dPriv->x * drb->cpp + \ + dPriv->y * pitch + +#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS + +#define INIT_MONO_PIXEL(p,color)\ + p = INTEL_PACKCOLOR565(color[0],color[1],color[2]) + +#define Y_FLIP(_y) (height - _y - 1) + +#define HW_LOCK() + +#define HW_UNLOCK() + +/* 16 bit, 565 rgb color spanline and pixel functions + */ +#define WRITE_RGBA( _x, _y, r, g, b, a ) \ + *(GLushort *)(buf + _x*2 + _y*pitch) = ( (((int)r & 0xf8) << 8) | \ + (((int)g & 0xfc) << 3) | \ + (((int)b & 0xf8) >> 3)) +#define WRITE_PIXEL( _x, _y, p ) \ + *(GLushort *)(buf + _x*2 + _y*pitch) = p + +#define READ_RGBA( rgba, _x, _y ) \ +do { \ + GLushort p = *(GLushort *)(buf + _x*2 + _y*pitch); \ + rgba[0] = (((p >> 11) & 0x1f) * 255) / 31; \ + rgba[1] = (((p >> 5) & 0x3f) * 255) / 63; \ + rgba[2] = (((p >> 0) & 0x1f) * 255) / 31; \ + rgba[3] = 255; \ +} while(0) + +#define TAG(x) intel##x##_565 +#include "spantmp.h" + +/* 15 bit, 555 rgb color spanline and pixel functions + */ +#define WRITE_RGBA( _x, _y, r, g, b, a ) \ + *(GLushort *)(buf + _x*2 + _y*pitch) = (((r & 0xf8) << 7) | \ + ((g & 0xf8) << 3) | \ + ((b & 0xf8) >> 3)) + +#define WRITE_PIXEL( _x, _y, p ) \ + *(GLushort *)(buf + _x*2 + _y*pitch) = p + +#define READ_RGBA( rgba, _x, _y ) \ +do { \ + GLushort p = *(GLushort *)(buf + _x*2 + _y*pitch); \ + rgba[0] = (p >> 7) & 0xf8; \ + rgba[1] = (p >> 3) & 0xf8; \ + rgba[2] = (p << 3) & 0xf8; \ + rgba[3] = 255; \ +} while(0) + +#define TAG(x) intel##x##_555 +#include "spantmp.h" + +/* 16 bit depthbuffer functions. + */ +#define WRITE_DEPTH( _x, _y, d ) \ + *(GLushort *)(buf + (_x)*2 + (_y)*pitch) = d; + +#define READ_DEPTH( d, _x, _y ) \ + d = *(GLushort *)(buf + (_x)*2 + (_y)*pitch); + + +#define TAG(x) intel##x##_z16 +#include "depthtmp.h" + + +#undef LOCAL_VARS +#define LOCAL_VARS \ + struct intel_context *intel = intel_context(ctx); \ + __DRIdrawablePrivate *dPriv = intel->driDrawable; \ + driRenderbuffer *drb = (driRenderbuffer *) rb; \ + GLuint pitch = drb->pitch; \ + GLuint height = dPriv->h; \ + char *buf = (char *)drb->Base.Data + \ + dPriv->x * drb->cpp + \ + dPriv->y * pitch; \ + GLuint p; \ + (void) buf; (void) p + +#undef INIT_MONO_PIXEL +#define INIT_MONO_PIXEL(p,color)\ + p = INTEL_PACKCOLOR8888(color[0],color[1],color[2],color[3]) + +/* 32 bit, 8888 argb color spanline and pixel functions + */ +#define WRITE_RGBA(_x, _y, r, g, b, a) \ + *(GLuint *)(buf + _x*4 + _y*pitch) = ((r << 16) | \ + (g << 8) | \ + (b << 0) | \ + (a << 24) ) + +#define WRITE_PIXEL(_x, _y, p) \ + *(GLuint *)(buf + _x*4 + _y*pitch) = p + + +#define READ_RGBA(rgba, _x, _y) \ + do { \ + GLuint p = *(GLuint *)(buf + _x*4 + _y*pitch); \ + rgba[0] = (p >> 16) & 0xff; \ + rgba[1] = (p >> 8) & 0xff; \ + rgba[2] = (p >> 0) & 0xff; \ + rgba[3] = (p >> 24) & 0xff; \ + } while (0) + +#define TAG(x) intel##x##_8888 +#include "spantmp.h" + + +/* 24/8 bit interleaved depth/stencil functions + */ +#define WRITE_DEPTH( _x, _y, d ) { \ + GLuint tmp = *(GLuint *)(buf + (_x)*4 + (_y)*pitch); \ + tmp &= 0xff000000; \ + tmp |= (d) & 0xffffff; \ + *(GLuint *)(buf + (_x)*4 + (_y)*pitch) = tmp; \ +} + +#define READ_DEPTH( d, _x, _y ) \ + d = *(GLuint *)(buf + (_x)*4 + (_y)*pitch) & 0xffffff; + + +#define TAG(x) intel##x##_z24_s8 +#include "depthtmp.h" + +#define WRITE_STENCIL( _x, _y, d ) { \ + GLuint tmp = *(GLuint *)(buf + (_x)*4 + (_y)*pitch); \ + tmp &= 0xffffff; \ + tmp |= ((d)<<24); \ + *(GLuint *)(buf + (_x)*4 + (_y)*pitch) = tmp; \ +} + +#define READ_STENCIL( d, _x, _y ) \ + d = *(GLuint *)(buf + (_x)*4 + (_y)*pitch) >> 24; + +#define TAG(x) intel##x##_z24_s8 +#include "stenciltmp.h" + + +/* Move locking out to get reasonable span performance. + */ +void intelSpanRenderStart( GLcontext *ctx ) +{ + struct intel_context *intel = intel_context(ctx); + + LOCK_HARDWARE(intel); + + /* Just map the framebuffer and all textures. Bufmgr code will + * take care of waiting on the necessary fences: + */ + intel_region_map(intel, intel->front_region); + intel_region_map(intel, intel->back_region); + intel_region_map(intel, intel->depth_region); +} + +void intelSpanRenderFinish( GLcontext *ctx ) +{ + struct intel_context *intel = intel_context( ctx ); + + _swrast_flush( ctx ); + + /* Now unmap the framebuffer: + */ + intel_region_unmap(intel, intel->front_region); + intel_region_unmap(intel, intel->back_region); + intel_region_unmap(intel, intel->depth_region); + + UNLOCK_HARDWARE( intel ); +} + +void intelInitSpanFuncs( GLcontext *ctx ) +{ + struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx); + swdd->SpanRenderStart = intelSpanRenderStart; + swdd->SpanRenderFinish = intelSpanRenderFinish; +} + + +/** + * Plug in the Get/Put routines for the given driRenderbuffer. + */ +void +intelSetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis) +{ + if (drb->Base.InternalFormat == GL_RGBA) { + if (vis->redBits == 5 && vis->greenBits == 5 && vis->blueBits == 5) { + intelInitPointers_555(&drb->Base); + } + else if (vis->redBits == 5 && vis->greenBits == 6 && vis->blueBits == 5) { + intelInitPointers_565(&drb->Base); + } + else { + assert(vis->redBits == 8); + assert(vis->greenBits == 8); + assert(vis->blueBits == 8); + intelInitPointers_8888(&drb->Base); + } + } + else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) { + intelInitDepthPointers_z16(&drb->Base); + } + else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) { + intelInitDepthPointers_z24_s8(&drb->Base); + } + else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) { + intelInitStencilPointers_z24_s8(&drb->Base); + } +} diff --git a/src/mesa/drivers/dri/i965/intel_span.h b/src/mesa/drivers/dri/i965/intel_span.h new file mode 100644 index 00000000000..2d4f8589d0f --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_span.h @@ -0,0 +1,41 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef _INTEL_SPAN_H +#define _INTEL_SPAN_H + +#include "drirenderbuffer.h" + +extern void intelInitSpanFuncs( GLcontext *ctx ); + +extern void intelSpanRenderFinish( GLcontext *ctx ); +extern void intelSpanRenderStart( GLcontext *ctx ); + +extern void +intelSetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis); + +#endif diff --git a/src/mesa/drivers/dri/i965/intel_state.c b/src/mesa/drivers/dri/i965/intel_state.c new file mode 100644 index 00000000000..a471f67c510 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_state.c @@ -0,0 +1,330 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "glheader.h" +#include "context.h" +#include "macros.h" +#include "enums.h" +#include "colormac.h" +#include "dd.h" + +#include "intel_screen.h" +#include "intel_context.h" +#include "intel_regions.h" +#include "swrast/swrast.h" + +int intel_translate_compare_func( GLenum func ) +{ + switch(func) { + case GL_NEVER: + return COMPAREFUNC_NEVER; + case GL_LESS: + return COMPAREFUNC_LESS; + case GL_LEQUAL: + return COMPAREFUNC_LEQUAL; + case GL_GREATER: + return COMPAREFUNC_GREATER; + case GL_GEQUAL: + return COMPAREFUNC_GEQUAL; + case GL_NOTEQUAL: + return COMPAREFUNC_NOTEQUAL; + case GL_EQUAL: + return COMPAREFUNC_EQUAL; + case GL_ALWAYS: + return COMPAREFUNC_ALWAYS; + } + + fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func); + return COMPAREFUNC_ALWAYS; +} + +int intel_translate_stencil_op( GLenum op ) +{ + switch(op) { + case GL_KEEP: + return STENCILOP_KEEP; + case GL_ZERO: + return STENCILOP_ZERO; + case GL_REPLACE: + return STENCILOP_REPLACE; + case GL_INCR: + return STENCILOP_INCRSAT; + case GL_DECR: + return STENCILOP_DECRSAT; + case GL_INCR_WRAP: + return STENCILOP_INCR; + case GL_DECR_WRAP: + return STENCILOP_DECR; + case GL_INVERT: + return STENCILOP_INVERT; + default: + return STENCILOP_ZERO; + } +} + +int intel_translate_blend_factor( GLenum factor ) +{ + switch(factor) { + case GL_ZERO: + return BLENDFACT_ZERO; + case GL_SRC_ALPHA: + return BLENDFACT_SRC_ALPHA; + case GL_ONE: + return BLENDFACT_ONE; + case GL_SRC_COLOR: + return BLENDFACT_SRC_COLR; + case GL_ONE_MINUS_SRC_COLOR: + return BLENDFACT_INV_SRC_COLR; + case GL_DST_COLOR: + return BLENDFACT_DST_COLR; + case GL_ONE_MINUS_DST_COLOR: + return BLENDFACT_INV_DST_COLR; + case GL_ONE_MINUS_SRC_ALPHA: + return BLENDFACT_INV_SRC_ALPHA; + case GL_DST_ALPHA: + return BLENDFACT_DST_ALPHA; + case GL_ONE_MINUS_DST_ALPHA: + return BLENDFACT_INV_DST_ALPHA; + case GL_SRC_ALPHA_SATURATE: + return BLENDFACT_SRC_ALPHA_SATURATE; + case GL_CONSTANT_COLOR: + return BLENDFACT_CONST_COLOR; + case GL_ONE_MINUS_CONSTANT_COLOR: + return BLENDFACT_INV_CONST_COLOR; + case GL_CONSTANT_ALPHA: + return BLENDFACT_CONST_ALPHA; + case GL_ONE_MINUS_CONSTANT_ALPHA: + return BLENDFACT_INV_CONST_ALPHA; + } + + fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, factor); + return BLENDFACT_ZERO; +} + +int intel_translate_logic_op( GLenum opcode ) +{ + switch(opcode) { + case GL_CLEAR: + return LOGICOP_CLEAR; + case GL_AND: + return LOGICOP_AND; + case GL_AND_REVERSE: + return LOGICOP_AND_RVRSE; + case GL_COPY: + return LOGICOP_COPY; + case GL_COPY_INVERTED: + return LOGICOP_COPY_INV; + case GL_AND_INVERTED: + return LOGICOP_AND_INV; + case GL_NOOP: + return LOGICOP_NOOP; + case GL_XOR: + return LOGICOP_XOR; + case GL_OR: + return LOGICOP_OR; + case GL_OR_INVERTED: + return LOGICOP_OR_INV; + case GL_NOR: + return LOGICOP_NOR; + case GL_EQUIV: + return LOGICOP_EQUIV; + case GL_INVERT: + return LOGICOP_INV; + case GL_OR_REVERSE: + return LOGICOP_OR_RVRSE; + case GL_NAND: + return LOGICOP_NAND; + case GL_SET: + return LOGICOP_SET; + default: + return LOGICOP_SET; + } +} + + +static void intelClearColor(GLcontext *ctx, const GLfloat color[4]) +{ + struct intel_context *intel = intel_context(ctx); + intelScreenPrivate *screen = intel->intelScreen; + + UNCLAMPED_FLOAT_TO_RGBA_CHAN(intel->clear_chan, color); + + intel->ClearColor = INTEL_PACKCOLOR(screen->fbFormat, + intel->clear_chan[0], + intel->clear_chan[1], + intel->clear_chan[2], + intel->clear_chan[3]); +} + + +static void intelCalcViewport( GLcontext *ctx ) +{ + struct intel_context *intel = intel_context(ctx); + const GLfloat *v = ctx->Viewport._WindowMap.m; + GLfloat *m = intel->ViewportMatrix.m; + GLint h = 0; + + if (intel->driDrawable) + h = intel->driDrawable->h + SUBPIXEL_Y; + + /* See also intel_translate_vertex. SUBPIXEL adjustments can be done + * via state vars, too. + */ + m[MAT_SX] = v[MAT_SX]; + m[MAT_TX] = v[MAT_TX] + SUBPIXEL_X; + m[MAT_SY] = - v[MAT_SY]; + m[MAT_TY] = - v[MAT_TY] + h; + m[MAT_SZ] = v[MAT_SZ] * intel->depth_scale; + m[MAT_TZ] = v[MAT_TZ] * intel->depth_scale; +} + +static void intelViewport( GLcontext *ctx, + GLint x, GLint y, + GLsizei width, GLsizei height ) +{ + intelCalcViewport( ctx ); +} + +static void intelDepthRange( GLcontext *ctx, + GLclampd nearval, GLclampd farval ) +{ + intelCalcViewport( ctx ); +} + +/* Fallback to swrast for select and feedback. + */ +static void intelRenderMode( GLcontext *ctx, GLenum mode ) +{ + struct intel_context *intel = intel_context(ctx); + FALLBACK( intel, INTEL_FALLBACK_RENDERMODE, (mode != GL_RENDER) ); +} + + +void intelInitStateFuncs( struct dd_function_table *functions ) +{ + functions->RenderMode = intelRenderMode; + functions->Viewport = intelViewport; + functions->DepthRange = intelDepthRange; + functions->ClearColor = intelClearColor; +} + + + + +void intelInitState( GLcontext *ctx ) +{ + /* Mesa should do this for us: + */ + ctx->Driver.AlphaFunc( ctx, + ctx->Color.AlphaFunc, + ctx->Color.AlphaRef); + + ctx->Driver.BlendColor( ctx, + ctx->Color.BlendColor ); + + ctx->Driver.BlendEquationSeparate( ctx, + ctx->Color.BlendEquationRGB, + ctx->Color.BlendEquationA); + + ctx->Driver.BlendFuncSeparate( ctx, + ctx->Color.BlendSrcRGB, + ctx->Color.BlendDstRGB, + ctx->Color.BlendSrcA, + ctx->Color.BlendDstA); + + ctx->Driver.ColorMask( ctx, + ctx->Color.ColorMask[RCOMP], + ctx->Color.ColorMask[GCOMP], + ctx->Color.ColorMask[BCOMP], + ctx->Color.ColorMask[ACOMP]); + + ctx->Driver.CullFace( ctx, ctx->Polygon.CullFaceMode ); + ctx->Driver.DepthFunc( ctx, ctx->Depth.Func ); + ctx->Driver.DepthMask( ctx, ctx->Depth.Mask ); + + ctx->Driver.Enable( ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled ); + ctx->Driver.Enable( ctx, GL_BLEND, ctx->Color.BlendEnabled ); + ctx->Driver.Enable( ctx, GL_COLOR_LOGIC_OP, ctx->Color.ColorLogicOpEnabled ); + ctx->Driver.Enable( ctx, GL_COLOR_SUM, ctx->Fog.ColorSumEnabled ); + ctx->Driver.Enable( ctx, GL_CULL_FACE, ctx->Polygon.CullFlag ); + ctx->Driver.Enable( ctx, GL_DEPTH_TEST, ctx->Depth.Test ); + ctx->Driver.Enable( ctx, GL_DITHER, ctx->Color.DitherFlag ); + ctx->Driver.Enable( ctx, GL_FOG, ctx->Fog.Enabled ); + ctx->Driver.Enable( ctx, GL_LIGHTING, ctx->Light.Enabled ); + ctx->Driver.Enable( ctx, GL_LINE_SMOOTH, ctx->Line.SmoothFlag ); + ctx->Driver.Enable( ctx, GL_POLYGON_STIPPLE, ctx->Polygon.StippleFlag ); + ctx->Driver.Enable( ctx, GL_SCISSOR_TEST, ctx->Scissor.Enabled ); + ctx->Driver.Enable( ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled ); + ctx->Driver.Enable( ctx, GL_TEXTURE_1D, GL_FALSE ); + ctx->Driver.Enable( ctx, GL_TEXTURE_2D, GL_FALSE ); + ctx->Driver.Enable( ctx, GL_TEXTURE_RECTANGLE_NV, GL_FALSE ); + ctx->Driver.Enable( ctx, GL_TEXTURE_3D, GL_FALSE ); + ctx->Driver.Enable( ctx, GL_TEXTURE_CUBE_MAP, GL_FALSE ); + + ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color ); + ctx->Driver.Fogfv( ctx, GL_FOG_MODE, 0 ); + ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density ); + ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start ); + ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End ); + + ctx->Driver.FrontFace( ctx, ctx->Polygon.FrontFace ); + + { + GLfloat f = (GLfloat)ctx->Light.Model.ColorControl; + ctx->Driver.LightModelfv( ctx, GL_LIGHT_MODEL_COLOR_CONTROL, &f ); + } + + ctx->Driver.LineWidth( ctx, ctx->Line.Width ); + ctx->Driver.LogicOpcode( ctx, ctx->Color.LogicOp ); + ctx->Driver.PointSize( ctx, ctx->Point.Size ); + ctx->Driver.PolygonStipple( ctx, (const GLubyte *)ctx->PolygonStipple ); + ctx->Driver.Scissor( ctx, ctx->Scissor.X, ctx->Scissor.Y, + ctx->Scissor.Width, ctx->Scissor.Height ); + ctx->Driver.ShadeModel( ctx, ctx->Light.ShadeModel ); + ctx->Driver.StencilFuncSeparate( ctx, GL_FRONT, + ctx->Stencil.Function[0], + ctx->Stencil.Ref[0], + ctx->Stencil.ValueMask[0] ); + ctx->Driver.StencilFuncSeparate( ctx, GL_BACK, + ctx->Stencil.Function[1], + ctx->Stencil.Ref[1], + ctx->Stencil.ValueMask[1] ); + ctx->Driver.StencilMaskSeparate( ctx, GL_FRONT, ctx->Stencil.WriteMask[0] ); + ctx->Driver.StencilMaskSeparate( ctx, GL_BACK, ctx->Stencil.WriteMask[1] ); + ctx->Driver.StencilOpSeparate( ctx, GL_FRONT, + ctx->Stencil.FailFunc[0], + ctx->Stencil.ZFailFunc[0], + ctx->Stencil.ZPassFunc[0]); + ctx->Driver.StencilOpSeparate( ctx, GL_BACK, + ctx->Stencil.FailFunc[1], + ctx->Stencil.ZFailFunc[1], + ctx->Stencil.ZPassFunc[1]); + + + ctx->Driver.DrawBuffer( ctx, ctx->Color.DrawBuffer[0] ); +} diff --git a/src/mesa/drivers/dri/i965/intel_tex.c b/src/mesa/drivers/dri/i965/intel_tex.c new file mode 100644 index 00000000000..4523969bfa5 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_tex.c @@ -0,0 +1,315 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "mtypes.h" +#include "image.h" +#include "texstore.h" +#include "texformat.h" +#include "teximage.h" +#include "texobj.h" +#include "swrast/swrast.h" + + +#include "intel_context.h" +#include "intel_tex.h" +#include "intel_mipmap_tree.h" + + +static GLuint target_to_face( GLenum target ) +{ + switch (target) { + case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: + return ((GLuint) target - + (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X); + default: + return 0; + } +} + +static void intelTexImage1D( GLcontext *ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint border, + GLenum format, GLenum type, const GLvoid *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) +{ + struct intel_texture_object *intelObj = intel_texture_object(texObj); + + _mesa_store_teximage1d( ctx, target, level, internalFormat, + width, border, format, type, + pixels, packing, texObj, texImage ); + + intelObj->dirty_images[0] |= (1 << level); + intelObj->dirty |= 1; +} + +static void intelTexSubImage1D( GLcontext *ctx, + GLenum target, + GLint level, + GLint xoffset, + GLsizei width, + GLenum format, GLenum type, + const GLvoid *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) +{ + struct intel_texture_object *intelObj = intel_texture_object(texObj); + + _mesa_store_texsubimage1d(ctx, target, level, xoffset, width, + format, type, pixels, packing, texObj, + texImage); + + intelObj->dirty_images[0] |= (1 << level); + intelObj->dirty |= 1; +} + + +/* Handles 2D, CUBE, RECT: + */ +static void intelTexImage2D( GLcontext *ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint border, + GLenum format, GLenum type, const GLvoid *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) +{ + struct intel_texture_object *intelObj = intel_texture_object(texObj); + GLuint face = target_to_face(target); + + _mesa_store_teximage2d( ctx, target, level, internalFormat, + width, height, border, format, type, + pixels, packing, texObj, texImage ); + + intelObj->dirty_images[face] |= (1 << level); + intelObj->dirty |= 1 << face; +} + +static void intelTexSubImage2D( GLcontext *ctx, + GLenum target, + GLint level, + GLint xoffset, GLint yoffset, + GLsizei width, GLsizei height, + GLenum format, GLenum type, + const GLvoid *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) +{ + struct intel_texture_object *intelObj = intel_texture_object(texObj); + GLuint face = target_to_face(target); + + _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, + height, format, type, pixels, packing, texObj, + texImage); + + intelObj->dirty_images[face] |= (1 << level); + intelObj->dirty |= 1 << face; +} + +static void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint border, + GLsizei imageSize, const GLvoid *data, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) +{ + struct intel_texture_object *intelObj = intel_texture_object(texObj); + GLuint face = target_to_face(target); + + _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width, + height, border, imageSize, data, texObj, texImage); + + intelObj->dirty_images[face] |= (1 << level); + intelObj->dirty |= 1 << face; +} + + +static void intelCompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, + GLsizei width, GLsizei height, + GLenum format, + GLsizei imageSize, const GLvoid *data, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) +{ + struct intel_texture_object *intelObj = intel_texture_object(texObj); + GLuint face = target_to_face(target); + + _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width, + height, format, imageSize, data, texObj, texImage); + + intelObj->dirty_images[face] |= (1 << level); + intelObj->dirty |= 1 << face; +} + + +static void intelTexImage3D( GLcontext *ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint depth, + GLint border, + GLenum format, GLenum type, const GLvoid *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) +{ + struct intel_texture_object *intelObj = intel_texture_object(texObj); + + _mesa_store_teximage3d(ctx, target, level, internalFormat, + width, height, depth, border, + format, type, pixels, + &ctx->Unpack, texObj, texImage); + + intelObj->dirty_images[0] |= (1 << level); + intelObj->dirty |= 1 << 0; +} + + +static void +intelTexSubImage3D( GLcontext *ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, + const GLvoid *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) +{ + struct intel_texture_object *intelObj = intel_texture_object(texObj); + + _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset, + width, height, depth, + format, type, pixels, packing, texObj, texImage); + + intelObj->dirty_images[0] |= (1 << level); + intelObj->dirty |= 1 << 0; +} + + + + +static struct gl_texture_object *intelNewTextureObject( GLcontext *ctx, + GLuint name, + GLenum target ) +{ + struct intel_texture_object *obj = CALLOC_STRUCT(intel_texture_object); + + _mesa_initialize_texture_object(&obj->base, name, target); + + return &obj->base; +} + +static GLboolean intelIsTextureResident(GLcontext *ctx, + struct gl_texture_object *texObj) +{ +#if 0 + struct intel_context *intel = intel_context(ctx); + struct intel_texture_object *intelObj = intel_texture_object(texObj); + + return + intelObj->mt && + intelObj->mt->region && + intel_is_region_resident(intel, intelObj->mt->region); +#endif + return 1; +} + + + +static void intelTexParameter( GLcontext *ctx, + GLenum target, + struct gl_texture_object *texObj, + GLenum pname, + const GLfloat *params ) +{ + struct intel_texture_object *intelObj = intel_texture_object(texObj); + + switch (pname) { + /* Anything which can affect the calculation of firstLevel and + * lastLevel, as changes to these may invalidate the miptree. + */ + case GL_TEXTURE_MIN_FILTER: + case GL_TEXTURE_MAG_FILTER: + case GL_TEXTURE_BASE_LEVEL: + case GL_TEXTURE_MAX_LEVEL: + case GL_TEXTURE_MIN_LOD: + case GL_TEXTURE_MAX_LOD: + intelObj->dirty |= 1; + break; + + default: + break; + } +} + + +static void +intel_delete_texture_object( GLcontext *ctx, struct gl_texture_object *texObj ) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_object *intelObj = intel_texture_object(texObj); + + if (intelObj->mt) + intel_miptree_destroy(intel, intelObj->mt); + + _mesa_delete_texture_object( ctx, texObj ); +} + +void intelInitTextureFuncs( struct dd_function_table *functions ) +{ + functions->NewTextureObject = intelNewTextureObject; + functions->TexImage1D = intelTexImage1D; + functions->TexImage2D = intelTexImage2D; + functions->TexImage3D = intelTexImage3D; + functions->TexSubImage1D = intelTexSubImage1D; + functions->TexSubImage2D = intelTexSubImage2D; + functions->TexSubImage3D = intelTexSubImage3D; + functions->CopyTexImage1D = _swrast_copy_teximage1d; + functions->CopyTexImage2D = _swrast_copy_teximage2d; + functions->CopyTexSubImage1D = _swrast_copy_texsubimage1d; + functions->CopyTexSubImage2D = _swrast_copy_texsubimage2d; + functions->CopyTexSubImage3D = _swrast_copy_texsubimage3d; + functions->DeleteTexture = intel_delete_texture_object; + functions->UpdateTexturePalette = NULL; + functions->IsTextureResident = intelIsTextureResident; + functions->TestProxyTexImage = _mesa_test_proxy_teximage; + functions->CompressedTexImage2D = intelCompressedTexImage2D; + functions->CompressedTexSubImage2D = intelCompressedTexSubImage2D; + functions->TexParameter = intelTexParameter; +} + + + + + diff --git a/src/mesa/drivers/dri/i965/intel_tex.h b/src/mesa/drivers/dri/i965/intel_tex.h new file mode 100644 index 00000000000..e389d521461 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_tex.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTELTEX_INC +#define INTELTEX_INC + +#include "mtypes.h" +#include "intel_context.h" + + +void intelInitTextureFuncs( struct dd_function_table *functions ); + + +GLuint intel_finalize_mipmap_tree( struct intel_context *intel, + struct gl_texture_object *tObj ); + + +#endif diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c b/src/mesa/drivers/dri/i965/intel_tex_validate.c new file mode 100644 index 00000000000..5f65242458a --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c @@ -0,0 +1,250 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "mtypes.h" +#include "macros.h" + +#include "intel_context.h" +#include "intel_mipmap_tree.h" +#include "intel_tex.h" +#include "bufmgr.h" + +/** + * Compute which mipmap levels that really need to be sent to the hardware. + * This depends on the base image size, GL_TEXTURE_MIN_LOD, + * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL. + */ +static void intel_calculate_first_last_level( struct intel_texture_object *intelObj ) +{ + struct gl_texture_object *tObj = &intelObj->base; + const struct gl_texture_image * const baseImage = + tObj->Image[0][tObj->BaseLevel]; + + /* These must be signed values. MinLod and MaxLod can be negative numbers, + * and having firstLevel and lastLevel as signed prevents the need for + * extra sign checks. + */ + int firstLevel; + int lastLevel; + + /* Yes, this looks overly complicated, but it's all needed. + */ + switch (tObj->Target) { + case GL_TEXTURE_1D: + case GL_TEXTURE_2D: + case GL_TEXTURE_3D: + case GL_TEXTURE_CUBE_MAP: + if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) { + /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL. + */ + firstLevel = lastLevel = tObj->BaseLevel; + } + else { + /* Currently not taking min/max lod into account here, those + * values are programmed as sampler state elsewhere and we + * upload the same mipmap levels regardless. Not sure if + * this makes sense as it means it isn't possible for the app + * to use min/max lod to reduce texture memory pressure: + */ + firstLevel = tObj->BaseLevel; + lastLevel = MIN2(tObj->BaseLevel + baseImage->MaxLog2, + tObj->MaxLevel); + lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */ + } + break; + case GL_TEXTURE_RECTANGLE_NV: + case GL_TEXTURE_4D_SGIS: + firstLevel = lastLevel = 0; + break; + default: + return; + } + + /* save these values */ + intelObj->firstLevel = firstLevel; + intelObj->lastLevel = lastLevel; +} + +static GLboolean copy_image_data_to_tree( struct intel_context *intel, + struct intel_texture_object *intelObj, + struct gl_texture_image *texImage, + GLuint face, + GLuint level) +{ + return intel_miptree_image_data(intel, + intelObj->mt, + face, + level, + texImage->Data, + texImage->RowStride, + (texImage->RowStride * + texImage->Height * + texImage->TexFormat->TexelBytes)); +} + +static void intel_texture_invalidate( struct intel_texture_object *intelObj ) +{ + GLint nr_faces, face; + intelObj->dirty = ~0; + + nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; + for (face = 0; face < nr_faces; face++) + intelObj->dirty_images[face] = ~0; +} + +static void intel_texture_invalidate_cb( struct intel_context *intel, + void *ptr ) +{ + intel_texture_invalidate( (struct intel_texture_object *) ptr ); +} + + +/* + */ +GLuint intel_finalize_mipmap_tree( struct intel_context *intel, + struct gl_texture_object *tObj ) +{ + struct intel_texture_object *intelObj = intel_texture_object(tObj); + GLuint face, i; + GLuint nr_faces = 0; + struct gl_texture_image *firstImage; + + /* We know/require this is true by now: + */ + assert(intelObj->base.Complete); + + /* What levels must the tree include at a minimum? + */ + if (intelObj->dirty) { + intel_calculate_first_last_level( intelObj ); +/* intel_miptree_destroy(intel, intelObj->mt); */ +/* intelObj->mt = NULL; */ + } + + firstImage = intelObj->base.Image[0][intelObj->firstLevel]; + + /* Fallback case: + */ + if (firstImage->Border) { + if (intelObj->mt) { + intel_miptree_destroy(intel, intelObj->mt); + intelObj->mt = NULL; + /* Set all images dirty: + */ + intel_texture_invalidate(intelObj); + } + return GL_FALSE; + } + + + + /* Check tree can hold all active levels. Check tree matches + * target, imageFormat, etc. + */ + if (intelObj->mt && + (intelObj->mt->first_level != intelObj->firstLevel || + intelObj->mt->last_level != intelObj->lastLevel || + intelObj->mt->internal_format != firstImage->InternalFormat || + intelObj->mt->width0 != firstImage->Width || + intelObj->mt->height0 != firstImage->Height || + intelObj->mt->depth0 != firstImage->Depth)) + { + intel_miptree_destroy(intel, intelObj->mt); + intelObj->mt = NULL; + + /* Set all images dirty: + */ + intel_texture_invalidate(intelObj); + } + + + /* May need to create a new tree: + */ + if (!intelObj->mt) { + intelObj->mt = intel_miptree_create(intel, + intelObj->base.Target, + firstImage->InternalFormat, + intelObj->firstLevel, + intelObj->lastLevel, + firstImage->Width, + firstImage->Height, + firstImage->Depth, + firstImage->TexFormat->TexelBytes, + firstImage->IsCompressed); + + /* Tell the buffer manager that we will manage the backing + * store, but we still want it to do fencing for us. + */ + bmBufferSetInvalidateCB(intel, + intelObj->mt->region->buffer, + intel_texture_invalidate_cb, + intelObj, + GL_FALSE); + } + + /* Pull in any images not in the object's tree: + */ + if (intelObj->dirty) { + nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; + for (face = 0; face < nr_faces; face++) { + if (intelObj->dirty_images[face]) { + for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++) { + struct gl_texture_image *texImage = intelObj->base.Image[face][i]; + + /* Need to import images in main memory or held in other trees. + */ + if (intelObj->dirty_images[face] & (1<<i) && + texImage) { + + if (INTEL_DEBUG & DEBUG_TEXTURE) + _mesa_printf("copy data from image %d (%p) into object miptree\n", + i, + texImage->Data); + + if (!copy_image_data_to_tree(intel, + intelObj, + texImage, + face, + i)) + return GL_FALSE; + + } + } + } + } + + /* Only clear the dirty flags if everything went ok: + */ + for (face = 0; face < nr_faces; face++) { + intelObj->dirty_images[face] = 0; + } + + intelObj->dirty = 0; + } + + return GL_TRUE; +} diff --git a/src/mesa/drivers/dri/i965/server/i830_common.h b/src/mesa/drivers/dri/i965/server/i830_common.h new file mode 100644 index 00000000000..e3bbdc79070 --- /dev/null +++ b/src/mesa/drivers/dri/i965/server/i830_common.h @@ -0,0 +1,203 @@ +/************************************************************************** + +Copyright 2001 VA Linux Systems Inc., Fremont, California. +Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/i810/i830_common.h,v 1.1 2002/09/11 00:29:32 dawes Exp $ */ + +#ifndef _I830_COMMON_H_ +#define _I830_COMMON_H_ + + +#define I830_NR_TEX_REGIONS 255 /* maximum due to use of chars for next/prev */ +#define I830_LOG_MIN_TEX_REGION_SIZE 14 + + +/* Driver specific DRM command indices + * NOTE: these are not OS specific, but they are driver specific + */ +#define DRM_I830_INIT 0x00 +#define DRM_I830_FLUSH 0x01 +#define DRM_I830_FLIP 0x02 +#define DRM_I830_BATCHBUFFER 0x03 +#define DRM_I830_IRQ_EMIT 0x04 +#define DRM_I830_IRQ_WAIT 0x05 +#define DRM_I830_GETPARAM 0x06 +#define DRM_I830_SETPARAM 0x07 +#define DRM_I830_ALLOC 0x08 +#define DRM_I830_FREE 0x09 +#define DRM_I830_INIT_HEAP 0x0a +#define DRM_I830_CMDBUFFER 0x0b +#define DRM_I830_DESTROY_HEAP 0x0c + +typedef struct { + enum { + I830_INIT_DMA = 0x01, + I830_CLEANUP_DMA = 0x02, + I830_RESUME_DMA = 0x03 + } func; + unsigned int mmio_offset; + int sarea_priv_offset; + unsigned int ring_start; + unsigned int ring_end; + unsigned int ring_size; + unsigned int front_offset; + unsigned int back_offset; + unsigned int depth_offset; + unsigned int w; + unsigned int h; + unsigned int pitch; + unsigned int pitch_bits; + unsigned int back_pitch; + unsigned int depth_pitch; + unsigned int cpp; + unsigned int chipset; +} drmI830Init; + +typedef struct { + drmTextureRegion texList[I830_NR_TEX_REGIONS+1]; + int last_upload; /* last time texture was uploaded */ + int last_enqueue; /* last time a buffer was enqueued */ + volatile int last_dispatch; /* age of the most recently dispatched buffer */ + int ctxOwner; /* last context to upload state */ + int texAge; + int pf_enabled; /* is pageflipping allowed? */ + int pf_active; + int pf_current_page; /* which buffer is being displayed? */ + int perf_boxes; /* performance boxes to be displayed */ + int width, height; /* screen size in pixels */ + + drm_handle_t front_handle; + int front_offset; + int front_size; + + drm_handle_t back_handle; + int back_offset; + int back_size; + + drm_handle_t depth_handle; + int depth_offset; + int depth_size; + + drm_handle_t tex_handle; + int tex_offset; + int tex_size; + int log_tex_granularity; + int pitch; + int rotation; /* 0, 90, 180 or 270 */ + int rotated_offset; + int rotated_size; + int rotated_pitch; + int virtualX, virtualY; + + unsigned int front_tiled; + unsigned int back_tiled; + unsigned int depth_tiled; + unsigned int rotated_tiled; + unsigned int rotated2_tiled; +} drmI830Sarea; + +/* Flags for perf_boxes + */ +#define I830_BOX_RING_EMPTY 0x1 /* populated by kernel */ +#define I830_BOX_FLIP 0x2 /* populated by kernel */ +#define I830_BOX_WAIT 0x4 /* populated by kernel & client */ +#define I830_BOX_TEXTURE_LOAD 0x8 /* populated by kernel */ +#define I830_BOX_LOST_CONTEXT 0x10 /* populated by client */ + + +typedef struct { + int start; /* agp offset */ + int used; /* nr bytes in use */ + int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */ + int DR4; /* window origin for GFX_OP_DRAWRECT_INFO*/ + int num_cliprects; /* mulitpass with multiple cliprects? */ + drm_clip_rect_t *cliprects; /* pointer to userspace cliprects */ +} drmI830BatchBuffer; + +typedef struct { + char *buf; /* agp offset */ + int sz; /* nr bytes in use */ + int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */ + int DR4; /* window origin for GFX_OP_DRAWRECT_INFO*/ + int num_cliprects; /* mulitpass with multiple cliprects? */ + drm_clip_rect_t *cliprects; /* pointer to userspace cliprects */ +} drmI830CmdBuffer; + +typedef struct { + int *irq_seq; +} drmI830IrqEmit; + +typedef struct { + int irq_seq; +} drmI830IrqWait; + +typedef struct { + int param; + int *value; +} drmI830GetParam; + +#define I830_PARAM_IRQ_ACTIVE 1 +#define I830_PARAM_ALLOW_BATCHBUFFER 2 + +typedef struct { + int param; + int value; +} drmI830SetParam; + +#define I830_SETPARAM_USE_MI_BATCHBUFFER_START 1 +#define I830_SETPARAM_TEX_LRU_LOG_GRANULARITY 2 +#define I830_SETPARAM_ALLOW_BATCHBUFFER 3 + + +/* A memory manager for regions of shared memory: + */ +#define I830_MEM_REGION_AGP 1 + +typedef struct { + int region; + int alignment; + int size; + int *region_offset; /* offset from start of fb or agp */ +} drmI830MemAlloc; + +typedef struct { + int region; + int region_offset; +} drmI830MemFree; + +typedef struct { + int region; + int size; + int start; +} drmI830MemInitHeap; + +typedef struct { + int region; +} drmI830MemDestroyHeap; + + +#endif /* _I830_DRM_H_ */ diff --git a/src/mesa/drivers/dri/i965/server/i830_dri.h b/src/mesa/drivers/dri/i965/server/i830_dri.h new file mode 100644 index 00000000000..22951812ad3 --- /dev/null +++ b/src/mesa/drivers/dri/i965/server/i830_dri.h @@ -0,0 +1,63 @@ +/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/i810/i830_dri.h,v 1.4 2002/10/30 12:52:18 alanh Exp $ */ + +#ifndef _I830_DRI_H +#define _I830_DRI_H + +#include "xf86drm.h" +#include "i830_common.h" + +#define I830_MAX_DRAWABLES 256 + +#define I830_MAJOR_VERSION 1 +#define I830_MINOR_VERSION 3 +#define I830_PATCHLEVEL 0 + +#define I830_REG_SIZE 0x80000 + +typedef struct _I830DRIRec { + drm_handle_t regs; + drmSize regsSize; + + drmSize unused1; /* backbufferSize */ + drm_handle_t unused2; /* backbuffer */ + + drmSize unused3; /* depthbufferSize */ + drm_handle_t unused4; /* depthbuffer */ + + drmSize unused5; /* rotatedSize /*/ + drm_handle_t unused6; /* rotatedbuffer */ + + drm_handle_t unused7; /* textures */ + int unused8; /* textureSize */ + + drm_handle_t unused9; /* agp_buffers */ + drmSize unused10; /* agp_buf_size */ + + int deviceID; + int width; + int height; + int mem; + int cpp; + int bitsPerPixel; + + int unused11[8]; /* was front/back/depth/rotated offset/pitch */ + + int unused12; /* logTextureGranularity */ + int unused13; /* textureOffset */ + + int irq; + int sarea_priv_offset; +} I830DRIRec, *I830DRIPtr; + +typedef struct { + /* Nothing here yet */ + int dummy; +} I830ConfigPrivRec, *I830ConfigPrivPtr; + +typedef struct { + /* Nothing here yet */ + int dummy; +} I830DRIContextRec, *I830DRIContextPtr; + + +#endif diff --git a/src/mesa/drivers/dri/i965/server/intel.h b/src/mesa/drivers/dri/i965/server/intel.h new file mode 100644 index 00000000000..d7858a20c8d --- /dev/null +++ b/src/mesa/drivers/dri/i965/server/intel.h @@ -0,0 +1,328 @@ +#ifndef _INTEL_H_ +#define _INTEL_H_ + +#include "xf86drm.h" /* drm_handle_t, etc */ + +/* Intel */ +#ifndef PCI_CHIP_I810 +#define PCI_CHIP_I810 0x7121 +#define PCI_CHIP_I810_DC100 0x7123 +#define PCI_CHIP_I810_E 0x7125 +#define PCI_CHIP_I815 0x1132 +#define PCI_CHIP_I810_BRIDGE 0x7120 +#define PCI_CHIP_I810_DC100_BRIDGE 0x7122 +#define PCI_CHIP_I810_E_BRIDGE 0x7124 +#define PCI_CHIP_I815_BRIDGE 0x1130 +#endif + +#define PCI_CHIP_845_G 0x2562 +#define PCI_CHIP_I830_M 0x3577 + +#ifndef PCI_CHIP_I855_GM +#define PCI_CHIP_I855_GM 0x3582 +#define PCI_CHIP_I855_GM_BRIDGE 0x3580 +#endif + +#ifndef PCI_CHIP_I865_G +#define PCI_CHIP_I865_G 0x2572 +#define PCI_CHIP_I865_G_BRIDGE 0x2570 +#endif + +#ifndef PCI_CHIP_I915_G +#define PCI_CHIP_I915_G 0x2582 +#define PCI_CHIP_I915_G_BRIDGE 0x2580 +#endif + +#ifndef PCI_CHIP_I915_GM +#define PCI_CHIP_I915_GM 0x2592 +#define PCI_CHIP_I915_GM_BRIDGE 0x2590 +#endif + +#ifndef PCI_CHIP_E7221_G +#define PCI_CHIP_E7221_G 0x258A +/* Same as I915_G_BRIDGE */ +#define PCI_CHIP_E7221_G_BRIDGE 0x2580 +#endif + +#ifndef PCI_CHIP_I945_G +#define PCI_CHIP_I945_G 0x2772 +#define PCI_CHIP_I945_G_BRIDGE 0x2770 +#endif + +#ifndef PCI_CHIP_I945_GM +#define PCI_CHIP_I945_GM 0x27A2 +#define PCI_CHIP_I945_GM_BRIDGE 0x27A0 +#endif + +#define IS_I810(pI810) (pI810->Chipset == PCI_CHIP_I810 || \ + pI810->Chipset == PCI_CHIP_I810_DC100 || \ + pI810->Chipset == PCI_CHIP_I810_E) +#define IS_I815(pI810) (pI810->Chipset == PCI_CHIP_I815) +#define IS_I830(pI810) (pI810->Chipset == PCI_CHIP_I830_M) +#define IS_845G(pI810) (pI810->Chipset == PCI_CHIP_845_G) +#define IS_I85X(pI810) (pI810->Chipset == PCI_CHIP_I855_GM) +#define IS_I852(pI810) (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I852_GM || pI810->variant == I852_GME)) +#define IS_I855(pI810) (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I855_GM || pI810->variant == I855_GME)) +#define IS_I865G(pI810) (pI810->Chipset == PCI_CHIP_I865_G) + +#define IS_I915G(pI810) (pI810->Chipset == PCI_CHIP_I915_G || pI810->Chipset == PCI_CHIP_E7221_G) +#define IS_I915GM(pI810) (pI810->Chipset == PCI_CHIP_I915_GM) +#define IS_I945G(pI810) (pI810->Chipset == PCI_CHIP_I945_G) +#define IS_I945GM(pI810) (pI810->Chipset == PCI_CHIP_I945_GM) +#define IS_I9XX(pI810) (IS_I915G(pI810) || IS_I915GM(pI810) || IS_I945G(pI810) || IS_I945GM(pI810)) + +#define IS_MOBILE(pI810) (IS_I830(pI810) || IS_I85X(pI810) || IS_I915GM(pI810) || IS_I945GM(pI810)) + +#define I830_GMCH_CTRL 0x52 + + +#define I830_GMCH_GMS_MASK 0x70 +#define I830_GMCH_GMS_DISABLED 0x00 +#define I830_GMCH_GMS_LOCAL 0x10 +#define I830_GMCH_GMS_STOLEN_512 0x20 +#define I830_GMCH_GMS_STOLEN_1024 0x30 +#define I830_GMCH_GMS_STOLEN_8192 0x40 + +#define I855_GMCH_GMS_MASK (0x7 << 4) +#define I855_GMCH_GMS_DISABLED 0x00 +#define I855_GMCH_GMS_STOLEN_1M (0x1 << 4) +#define I855_GMCH_GMS_STOLEN_4M (0x2 << 4) +#define I855_GMCH_GMS_STOLEN_8M (0x3 << 4) +#define I855_GMCH_GMS_STOLEN_16M (0x4 << 4) +#define I855_GMCH_GMS_STOLEN_32M (0x5 << 4) +#define I915G_GMCH_GMS_STOLEN_48M (0x6 << 4) +#define I915G_GMCH_GMS_STOLEN_64M (0x7 << 4) + +typedef unsigned char Bool; +#define TRUE 1 +#define FALSE 0 + +#define PIPE_NONE 0<<0 +#define PIPE_CRT 1<<0 +#define PIPE_TV 1<<1 +#define PIPE_DFP 1<<2 +#define PIPE_LFP 1<<3 +#define PIPE_CRT2 1<<4 +#define PIPE_TV2 1<<5 +#define PIPE_DFP2 1<<6 +#define PIPE_LFP2 1<<7 + +typedef struct _I830MemPool *I830MemPoolPtr; +typedef struct _I830MemRange *I830MemRangePtr; +typedef struct _I830MemRange { + long Start; + long End; + long Size; + unsigned long Physical; + unsigned long Offset; /* Offset of AGP-allocated portion */ + unsigned long Alignment; + drm_handle_t Key; + unsigned long Pitch; // add pitch + I830MemPoolPtr Pool; +} I830MemRange; + +typedef struct _I830MemPool { + I830MemRange Total; + I830MemRange Free; + I830MemRange Fixed; + I830MemRange Allocated; +} I830MemPool; + +typedef struct { + int tail_mask; + I830MemRange mem; + unsigned char *virtual_start; + int head; + int tail; + int space; +} I830RingBuffer; + +typedef struct _I830Rec { + unsigned char *MMIOBase; + unsigned char *FbBase; + int cpp; + + unsigned int bios_version; + + /* These are set in PreInit and never changed. */ + long FbMapSize; + long TotalVideoRam; + I830MemRange StolenMemory; /* pre-allocated memory */ + long BIOSMemorySize; /* min stolen pool size */ + int BIOSMemSizeLoc; + + /* These change according to what has been allocated. */ + long FreeMemory; + I830MemRange MemoryAperture; + I830MemPool StolenPool; + long allocatedMemory; + + /* Regions allocated either from the above pools, or from agpgart. */ + /* for single and dual head configurations */ + I830MemRange FrontBuffer; + I830MemRange FrontBuffer2; + I830MemRange Scratch; + I830MemRange Scratch2; + + I830RingBuffer *LpRing; + + I830MemRange BackBuffer; + I830MemRange DepthBuffer; + I830MemRange TexMem; + int TexGranularity; + I830MemRange ContextMem; + int drmMinor; + Bool have3DWindows; + + Bool NeedRingBufferLow; + Bool allowPageFlip; + Bool disableTiling; + + int Chipset; + unsigned long LinearAddr; + unsigned long MMIOAddr; + + drmSize registerSize; /**< \brief MMIO register map size */ + drm_handle_t registerHandle; /**< \brief MMIO register map handle */ + // IOADDRESS ioBase; + int irq; /**< \brief IRQ number */ + int GttBound; + + drm_handle_t ring_map; + unsigned int Fence[8]; + +} I830Rec; + +/* + * 12288 is set as the maximum, chosen because it is enough for + * 1920x1440@32bpp with a 2048 pixel line pitch with some to spare. + */ +#define I830_MAXIMUM_VBIOS_MEM 12288 +#define I830_DEFAULT_VIDEOMEM_2D (MB(32) / 1024) +#define I830_DEFAULT_VIDEOMEM_3D (MB(64) / 1024) + +/* Flags for memory allocation function */ +#define FROM_ANYWHERE 0x00000000 +#define FROM_POOL_ONLY 0x00000001 +#define FROM_NEW_ONLY 0x00000002 +#define FROM_MASK 0x0000000f + +#define ALLOCATE_AT_TOP 0x00000010 +#define ALLOCATE_AT_BOTTOM 0x00000020 +#define FORCE_GAPS 0x00000040 + +#define NEED_PHYSICAL_ADDR 0x00000100 +#define ALIGN_BOTH_ENDS 0x00000200 +#define FORCE_LOW 0x00000400 + +#define ALLOC_NO_TILING 0x00001000 +#define ALLOC_INITIAL 0x00002000 + +#define ALLOCATE_DRY_RUN 0x80000000 + +/* Chipset registers for VIDEO BIOS memory RW access */ +#define _855_DRAM_RW_CONTROL 0x58 +#define _845_DRAM_RW_CONTROL 0x90 +#define DRAM_WRITE 0x33330000 + +#define KB(x) ((x) * 1024) +#define MB(x) ((x) * KB(1024)) + +#define GTT_PAGE_SIZE KB(4) +#define ROUND_TO(x, y) (((x) + (y) - 1) / (y) * (y)) +#define ROUND_DOWN_TO(x, y) ((x) / (y) * (y)) +#define ROUND_TO_PAGE(x) ROUND_TO((x), GTT_PAGE_SIZE) +#define ROUND_TO_MB(x) ROUND_TO((x), MB(1)) +#define PRIMARY_RINGBUFFER_SIZE KB(128) + + +/* Ring buffer registers, p277, overview p19 + */ +#define LP_RING 0x2030 +#define HP_RING 0x2040 + +#define RING_TAIL 0x00 +#define TAIL_ADDR 0x000FFFF8 +#define I830_TAIL_MASK 0x001FFFF8 + +#define RING_HEAD 0x04 +#define HEAD_WRAP_COUNT 0xFFE00000 +#define HEAD_WRAP_ONE 0x00200000 +#define HEAD_ADDR 0x001FFFFC +#define I830_HEAD_MASK 0x001FFFFC + +#define RING_START 0x08 +#define START_ADDR 0x03FFFFF8 +#define I830_RING_START_MASK 0xFFFFF000 + +#define RING_LEN 0x0C +#define RING_NR_PAGES 0x001FF000 +#define I830_RING_NR_PAGES 0x001FF000 +#define RING_REPORT_MASK 0x00000006 +#define RING_REPORT_64K 0x00000002 +#define RING_REPORT_128K 0x00000004 +#define RING_NO_REPORT 0x00000000 +#define RING_VALID_MASK 0x00000001 +#define RING_VALID 0x00000001 +#define RING_INVALID 0x00000000 + + +/* Fence/Tiling ranges [0..7] + */ +#define FENCE 0x2000 +#define FENCE_NR 8 + +#define I915G_FENCE_START_MASK 0x0ff00000 + +#define I830_FENCE_START_MASK 0x07f80000 + +#define FENCE_START_MASK 0x03F80000 +#define FENCE_X_MAJOR 0x00000000 +#define FENCE_Y_MAJOR 0x00001000 +#define FENCE_SIZE_MASK 0x00000700 +#define FENCE_SIZE_512K 0x00000000 +#define FENCE_SIZE_1M 0x00000100 +#define FENCE_SIZE_2M 0x00000200 +#define FENCE_SIZE_4M 0x00000300 +#define FENCE_SIZE_8M 0x00000400 +#define FENCE_SIZE_16M 0x00000500 +#define FENCE_SIZE_32M 0x00000600 +#define FENCE_SIZE_64M 0x00000700 +#define I915G_FENCE_SIZE_1M 0x00000000 +#define I915G_FENCE_SIZE_2M 0x00000100 +#define I915G_FENCE_SIZE_4M 0x00000200 +#define I915G_FENCE_SIZE_8M 0x00000300 +#define I915G_FENCE_SIZE_16M 0x00000400 +#define I915G_FENCE_SIZE_32M 0x00000500 +#define I915G_FENCE_SIZE_64M 0x00000600 +#define I915G_FENCE_SIZE_128M 0x00000700 +#define FENCE_PITCH_1 0x00000000 +#define FENCE_PITCH_2 0x00000010 +#define FENCE_PITCH_4 0x00000020 +#define FENCE_PITCH_8 0x00000030 +#define FENCE_PITCH_16 0x00000040 +#define FENCE_PITCH_32 0x00000050 +#define FENCE_PITCH_64 0x00000060 +#define FENCE_VALID 0x00000001 + +#include <mmio.h> + +# define MMIO_IN8(base, offset) \ + *(volatile unsigned char *)(((unsigned char*)(base)) + (offset)) +# define MMIO_IN32(base, offset) \ + read_MMIO_LE32(base, offset) +# define MMIO_OUT8(base, offset, val) \ + *(volatile unsigned char *)(((unsigned char*)(base)) + (offset)) = (val) +# define MMIO_OUT32(base, offset, val) \ + *(volatile unsigned int *)(void *)(((unsigned char*)(base)) + (offset)) = CPU_TO_LE32(val) + + + /* Memory mapped register access macros */ +#define INREG8(addr) MMIO_IN8(MMIO, addr) +#define INREG(addr) MMIO_IN32(MMIO, addr) +#define OUTREG8(addr, val) MMIO_OUT8(MMIO, addr, val) +#define OUTREG(addr, val) MMIO_OUT32(MMIO, addr, val) + +#define DSPABASE 0x70184 + +#endif diff --git a/src/mesa/drivers/dri/i965/server/intel_dri.c b/src/mesa/drivers/dri/i965/server/intel_dri.c new file mode 100644 index 00000000000..169fdbece30 --- /dev/null +++ b/src/mesa/drivers/dri/i965/server/intel_dri.c @@ -0,0 +1,1282 @@ +/** + * \file server/intel_dri.c + * \brief File to perform the device-specific initialization tasks typically + * done in the X server. + * + * Here they are converted to run in the client (or perhaps a standalone + * process), and to work with the frame buffer device rather than the X + * server infrastructure. + * + * Copyright (C) 2006 Dave Airlie ([email protected]) + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sub license, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial portions + of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND/OR THEIR SUPPLIERS BE LIABLE FOR + ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> + +#include "driver.h" +#include "drm.h" + +#include "intel.h" +#include "i830_dri.h" + +#include "memops.h" +#include "pciaccess.h" + +static size_t drm_page_size; +static int nextTile = 0; +#define xf86DrvMsg(...) do {} while(0) + +static const int pitches[] = { + 128 * 8, + 128 * 16, + 128 * 32, + 128 * 64, + 0 +}; + +static Bool I830DRIDoMappings(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea); + +static unsigned long +GetBestTileAlignment(unsigned long size) +{ + unsigned long i; + + for (i = KB(512); i < size; i <<= 1) + ; + + if (i > MB(64)) + i = MB(64); + + return i; +} + +static void SetFenceRegs(const DRIDriverContext *ctx, I830Rec *pI830) +{ + int i; + unsigned char *MMIO = ctx->MMIOAddress; + + for (i = 0; i < 8; i++) { + OUTREG(FENCE + i * 4, pI830->Fence[i]); + // if (I810_DEBUG & DEBUG_VERBOSE_VGA) + fprintf(stderr,"Fence Register : %x\n", pI830->Fence[i]); + } +} + +/* Tiled memory is good... really, really good... + * + * Need to make it less likely that we miss out on this - probably + * need to move the frontbuffer away from the 'guarenteed' alignment + * of the first memory segment, or perhaps allocate a discontigous + * framebuffer to get more alignment 'sweet spots'. + */ +static void +SetFence(const DRIDriverContext *ctx, I830Rec *pI830, + int nr, unsigned int start, unsigned int pitch, + unsigned int size) +{ + unsigned int val; + unsigned int fence_mask = 0; + unsigned int fence_pitch; + + if (nr < 0 || nr > 7) { + fprintf(stderr, + "SetFence: fence %d out of range\n",nr); + return; + } + + pI830->Fence[nr] = 0; + + if (IS_I9XX(pI830)) + fence_mask = ~I915G_FENCE_START_MASK; + else + fence_mask = ~I830_FENCE_START_MASK; + + if (start & fence_mask) { + fprintf(stderr, + "SetFence: %d: start (0x%08x) is not %s aligned\n", + nr, start, (IS_I9XX(pI830)) ? "1MB" : "512k"); + return; + } + + if (start % size) { + fprintf(stderr, + "SetFence: %d: start (0x%08x) is not size (%dk) aligned\n", + nr, start, size / 1024); + return; + } + + if (pitch & 127) { + fprintf(stderr, + "SetFence: %d: pitch (%d) not a multiple of 128 bytes\n", + nr, pitch); + return; + } + + val = (start | FENCE_X_MAJOR | FENCE_VALID); + + if (IS_I9XX(pI830)) { + switch (size) { + case MB(1): + val |= I915G_FENCE_SIZE_1M; + break; + case MB(2): + val |= I915G_FENCE_SIZE_2M; + break; + case MB(4): + val |= I915G_FENCE_SIZE_4M; + break; + case MB(8): + val |= I915G_FENCE_SIZE_8M; + break; + case MB(16): + val |= I915G_FENCE_SIZE_16M; + break; + case MB(32): + val |= I915G_FENCE_SIZE_32M; + break; + case MB(64): + val |= I915G_FENCE_SIZE_64M; + break; + default: + fprintf(stderr, + "SetFence: %d: illegal size (%d kByte)\n", nr, size / 1024); + return; + } + } else { + switch (size) { + case KB(512): + val |= FENCE_SIZE_512K; + break; + case MB(1): + val |= FENCE_SIZE_1M; + break; + case MB(2): + val |= FENCE_SIZE_2M; + break; + case MB(4): + val |= FENCE_SIZE_4M; + break; + case MB(8): + val |= FENCE_SIZE_8M; + break; + case MB(16): + val |= FENCE_SIZE_16M; + break; + case MB(32): + val |= FENCE_SIZE_32M; + break; + case MB(64): + val |= FENCE_SIZE_64M; + break; + default: + fprintf(stderr, + "SetFence: %d: illegal size (%d kByte)\n", nr, size / 1024); + return; + } + } + + if (IS_I9XX(pI830)) + fence_pitch = pitch / 512; + else + fence_pitch = pitch / 128; + + switch (fence_pitch) { + case 1: + val |= FENCE_PITCH_1; + break; + case 2: + val |= FENCE_PITCH_2; + break; + case 4: + val |= FENCE_PITCH_4; + break; + case 8: + val |= FENCE_PITCH_8; + break; + case 16: + val |= FENCE_PITCH_16; + break; + case 32: + val |= FENCE_PITCH_32; + break; + case 64: + val |= FENCE_PITCH_64; + break; + default: + fprintf(stderr, + "SetFence: %d: illegal pitch (%d)\n", nr, pitch); + return; + } + + pI830->Fence[nr] = val; +} + +static Bool +MakeTiles(const DRIDriverContext *ctx, I830Rec *pI830, I830MemRange *pMem) +{ + int pitch, ntiles, i; + + pitch = pMem->Pitch * ctx->cpp; + /* + * Simply try to break the region up into at most four pieces of size + * equal to the alignment. + */ + ntiles = ROUND_TO(pMem->Size, pMem->Alignment) / pMem->Alignment; + if (ntiles >= 4) { + return FALSE; + } + + for (i = 0; i < ntiles; i++, nextTile++) { + SetFence(ctx, pI830, nextTile, pMem->Start + i * pMem->Alignment, + pitch, pMem->Alignment); + } + return TRUE; +} + +static void I830SetupMemoryTiling(const DRIDriverContext *ctx, I830Rec *pI830) +{ + int i; + + /* Clear out */ + for (i = 0; i < 8; i++) + pI830->Fence[i] = 0; + + nextTile = 0; + + if (pI830->BackBuffer.Alignment >= KB(512)) { + if (MakeTiles(ctx, pI830, &(pI830->BackBuffer))) { + fprintf(stderr, + "Activating tiled memory for the back buffer.\n"); + } else { + fprintf(stderr, + "MakeTiles failed for the back buffer.\n"); + pI830->allowPageFlip = FALSE; + } + } + + if (pI830->DepthBuffer.Alignment >= KB(512)) { + if (MakeTiles(ctx, pI830, &(pI830->DepthBuffer))) { + fprintf(stderr, + "Activating tiled memory for the depth buffer.\n"); + } else { + fprintf(stderr, + "MakeTiles failed for the depth buffer.\n"); + } + } + + return; +} + +static int I830DetectMemory(const DRIDriverContext *ctx, I830Rec *pI830) +{ + struct pci_device host_bridge; + uint32_t gmch_ctrl; + int memsize = 0; + int range; + + memset(&host_bridge, 0, sizeof(host_bridge)); + + pci_device_cfg_read_u32(&host_bridge, &gmch_ctrl, I830_GMCH_CTRL); + + /* We need to reduce the stolen size, by the GTT and the popup. + * The GTT varying according the the FbMapSize and the popup is 4KB */ + range = (ctx->shared.fbSize / (1024*1024)) + 4; + + if (IS_I85X(pI830) || IS_I865G(pI830) || IS_I9XX(pI830)) { + switch (gmch_ctrl & I830_GMCH_GMS_MASK) { + case I855_GMCH_GMS_STOLEN_1M: + memsize = MB(1) - KB(range); + break; + case I855_GMCH_GMS_STOLEN_4M: + memsize = MB(4) - KB(range); + break; + case I855_GMCH_GMS_STOLEN_8M: + memsize = MB(8) - KB(range); + break; + case I855_GMCH_GMS_STOLEN_16M: + memsize = MB(16) - KB(range); + break; + case I855_GMCH_GMS_STOLEN_32M: + memsize = MB(32) - KB(range); + break; + case I915G_GMCH_GMS_STOLEN_48M: + if (IS_I9XX(pI830)) + memsize = MB(48) - KB(range); + break; + case I915G_GMCH_GMS_STOLEN_64M: + if (IS_I9XX(pI830)) + memsize = MB(64) - KB(range); + break; + } + } else { + switch (gmch_ctrl & I830_GMCH_GMS_MASK) { + case I830_GMCH_GMS_STOLEN_512: + memsize = KB(512) - KB(range); + break; + case I830_GMCH_GMS_STOLEN_1024: + memsize = MB(1) - KB(range); + break; + case I830_GMCH_GMS_STOLEN_8192: + memsize = MB(8) - KB(range); + break; + case I830_GMCH_GMS_LOCAL: + memsize = 0; + xf86DrvMsg(pScrn->scrnIndex, X_WARNING, + "Local memory found, but won't be used.\n"); + break; + } + } + if (memsize > 0) { + fprintf(stderr, + "detected %d kB stolen memory.\n", memsize / 1024); + } else { + fprintf(stderr, + "no video memory detected.\n"); + } + return memsize; +} + +static int AgpInit(const DRIDriverContext *ctx, I830Rec *info) +{ + unsigned long mode = 0x4; + + if (drmAgpAcquire(ctx->drmFD) < 0) { + fprintf(stderr, "[gart] AGP not available\n"); + return 0; + } + + if (drmAgpEnable(ctx->drmFD, mode) < 0) { + fprintf(stderr, "[gart] AGP not enabled\n"); + drmAgpRelease(ctx->drmFD); + return 0; + } + else + fprintf(stderr, "[gart] AGP enabled at %dx\n", ctx->agpmode); + + return 1; +} + +/* + * Allocate memory from the given pool. Grow the pool if needed and if + * possible. + */ +static unsigned long +AllocFromPool(const DRIDriverContext *ctx, I830Rec *pI830, + I830MemRange *result, I830MemPool *pool, + long size, unsigned long alignment, int flags) +{ + long needed, start, end; + + if (!result || !pool || !size) + return 0; + + /* Calculate how much space is needed. */ + if (alignment <= GTT_PAGE_SIZE) + needed = size; + else { + start = ROUND_TO(pool->Free.Start, alignment); + end = ROUND_TO(start + size, alignment); + needed = end - pool->Free.Start; + } + if (needed > pool->Free.Size) { + return 0; + } + + result->Start = ROUND_TO(pool->Free.Start, alignment); + pool->Free.Start += needed; + result->End = pool->Free.Start; + + pool->Free.Size = pool->Free.End - pool->Free.Start; + result->Size = result->End - result->Start; + result->Pool = pool; + result->Alignment = alignment; + return needed; +} + +static unsigned long AllocFromAGP(const DRIDriverContext *ctx, I830Rec *pI830, long size, unsigned long alignment, I830MemRange *result) +{ + unsigned long start, end; + unsigned long newApStart, newApEnd; + int ret; + if (!result || !size) + return 0; + + if (!alignment) + alignment = 4; + + start = ROUND_TO(pI830->MemoryAperture.Start, alignment); + end = ROUND_TO(start + size, alignment); + newApStart = end; + newApEnd = pI830->MemoryAperture.End; + + ret=drmAgpAlloc(ctx->drmFD, size, 0, &(result->Physical), (drm_handle_t *)&(result->Key)); + + if (ret) + { + fprintf(stderr,"drmAgpAlloc failed %d\n", ret); + return 0; + } + pI830->allocatedMemory += size; + pI830->MemoryAperture.Start = newApStart; + pI830->MemoryAperture.End = newApEnd; + pI830->MemoryAperture.Size = newApEnd - newApStart; + // pI830->FreeMemory -= size; + result->Start = start; + result->End = start + size; + result->Size = size; + result->Offset = start; + result->Alignment = alignment; + result->Pool = NULL; + + return size; +} + +unsigned long +I830AllocVidMem(const DRIDriverContext *ctx, I830Rec *pI830, I830MemRange *result, I830MemPool *pool, long size, unsigned long alignment, int flags) +{ + int ret; + + if (!result) + return 0; + + /* Make sure these are initialised. */ + result->Size = 0; + result->Key = -1; + + if (!size) { + return 0; + } + + if (pool->Free.Size < size) + return AllocFromAGP(ctx, pI830, size, alignment, result); + else + { + ret = AllocFromPool(ctx, pI830, result, pool, size, alignment, flags); + + if (ret==0) + return AllocFromAGP(ctx, pI830, size, alignment, result); + return ret; + } +} + +static Bool BindAgpRange(const DRIDriverContext *ctx, I830MemRange *mem) +{ + if (!mem) + return FALSE; + + if (mem->Key == -1) + return TRUE; + + return !drmAgpBind(ctx->drmFD, mem->Key, mem->Offset); +} + +/* simple memory allocation routines needed */ +/* put ring buffer in low memory */ +/* need to allocate front, back, depth buffers aligned correctly, + allocate ring buffer, +*/ + +/* */ +static Bool +I830AllocateMemory(const DRIDriverContext *ctx, I830Rec *pI830) +{ + unsigned long size, ret; + unsigned long lines, lineSize, align; + + /* allocate ring buffer */ + memset(pI830->LpRing, 0, sizeof(I830RingBuffer)); + pI830->LpRing->mem.Key = -1; + + size = PRIMARY_RINGBUFFER_SIZE; + + ret = I830AllocVidMem(ctx, pI830, &pI830->LpRing->mem, &pI830->StolenPool, size, 0x1000, 0); + + if (ret != size) + { + fprintf(stderr,"unable to allocate ring buffer %ld\n", ret); + return FALSE; + } + + pI830->LpRing->tail_mask = pI830->LpRing->mem.Size - 1; + + + /* allocate front buffer */ + memset(&(pI830->FrontBuffer), 0, sizeof(pI830->FrontBuffer)); + pI830->FrontBuffer.Key = -1; + pI830->FrontBuffer.Pitch = ctx->shared.virtualWidth; + + align = KB(512); + + lineSize = ctx->shared.virtualWidth * ctx->cpp; + lines = (ctx->shared.virtualHeight + 15) / 16 * 16; + size = lineSize * lines; + size = ROUND_TO_PAGE(size); + + align = GetBestTileAlignment(size); + + ret = I830AllocVidMem(ctx, pI830, &pI830->FrontBuffer, &pI830->StolenPool, size, align, 0); + if (ret < size) + { + fprintf(stderr,"unable to allocate front buffer %ld\n", ret); + return FALSE; + } + + memset(&(pI830->BackBuffer), 0, sizeof(pI830->BackBuffer)); + pI830->BackBuffer.Key = -1; + pI830->BackBuffer.Pitch = ctx->shared.virtualWidth; + + ret = I830AllocVidMem(ctx, pI830, &pI830->BackBuffer, &pI830->StolenPool, size, align, 0); + if (ret < size) + { + fprintf(stderr,"unable to allocate back buffer %ld\n", ret); + return FALSE; + } + + memset(&(pI830->DepthBuffer), 0, sizeof(pI830->DepthBuffer)); + pI830->DepthBuffer.Key = -1; + pI830->DepthBuffer.Pitch = ctx->shared.virtualWidth; + + ret = I830AllocVidMem(ctx, pI830, &pI830->DepthBuffer, &pI830->StolenPool, size, align, 0); + if (ret < size) + { + fprintf(stderr,"unable to allocate depth buffer %ld\n", ret); + return FALSE; + } + + memset(&(pI830->ContextMem), 0, sizeof(pI830->ContextMem)); + pI830->ContextMem.Key = -1; + size = KB(32); + + ret = I830AllocVidMem(ctx, pI830, &pI830->ContextMem, &pI830->StolenPool, size, align, 0); + if (ret < size) + { + fprintf(stderr,"unable to allocate context buffer %ld\n", ret); + return FALSE; + } + + memset(&(pI830->TexMem), 0, sizeof(pI830->TexMem)); + pI830->TexMem.Key = -1; + + size = 32768 * 1024; + ret = AllocFromAGP(ctx, pI830, size, align, &pI830->TexMem); + if (ret < size) + { + fprintf(stderr,"unable to allocate texture memory %ld\n", ret); + return FALSE; + } + + return TRUE; +} + +static Bool +I830BindMemory(const DRIDriverContext *ctx, I830Rec *pI830) +{ + if (!BindAgpRange(ctx, &pI830->LpRing->mem)) + return FALSE; + if (!BindAgpRange(ctx, &pI830->FrontBuffer)) + return FALSE; + if (!BindAgpRange(ctx, &pI830->BackBuffer)) + return FALSE; + if (!BindAgpRange(ctx, &pI830->DepthBuffer)) + return FALSE; + if (!BindAgpRange(ctx, &pI830->ContextMem)) + return FALSE; + if (!BindAgpRange(ctx, &pI830->TexMem)) + return FALSE; + + return TRUE; +} + +static Bool +I830CleanupDma(const DRIDriverContext *ctx) +{ + drmI830Init info; + + memset(&info, 0, sizeof(drmI830Init)); + info.func = I830_CLEANUP_DMA; + + if (drmCommandWrite(ctx->drmFD, DRM_I830_INIT, + &info, sizeof(drmI830Init))) { + fprintf(stderr, "I830 Dma Cleanup Failed\n"); + return FALSE; + } + + return TRUE; +} + +static Bool +I830InitDma(const DRIDriverContext *ctx, I830Rec *pI830) +{ + I830RingBuffer *ring = pI830->LpRing; + drmI830Init info; + + memset(&info, 0, sizeof(drmI830Init)); + info.func = I830_INIT_DMA; + + info.ring_start = ring->mem.Start + pI830->LinearAddr; + info.ring_end = ring->mem.End + pI830->LinearAddr; + info.ring_size = ring->mem.Size; + + info.mmio_offset = (unsigned int)ctx->MMIOStart; + + info.sarea_priv_offset = sizeof(drm_sarea_t); + + info.front_offset = pI830->FrontBuffer.Start; + info.back_offset = pI830->BackBuffer.Start; + info.depth_offset = pI830->DepthBuffer.Start; + info.w = ctx->shared.virtualWidth; + info.h = ctx->shared.virtualHeight; + info.pitch = ctx->shared.virtualWidth; + info.back_pitch = pI830->BackBuffer.Pitch; + info.depth_pitch = pI830->DepthBuffer.Pitch; + info.cpp = ctx->cpp; + + if (drmCommandWrite(ctx->drmFD, DRM_I830_INIT, + &info, sizeof(drmI830Init))) { + fprintf(stderr, + "I830 Dma Initialization Failed\n"); + return FALSE; + } + + return TRUE; +} + +static int I830CheckDRMVersion( const DRIDriverContext *ctx, + I830Rec *pI830 ) +{ + drmVersionPtr version; + + version = drmGetVersion(ctx->drmFD); + + if (version) { + int req_minor, req_patch; + + req_minor = 4; + req_patch = 0; + + if (version->version_major != 1 || + version->version_minor < req_minor || + (version->version_minor == req_minor && + version->version_patchlevel < req_patch)) { + /* Incompatible drm version */ + fprintf(stderr, + "[dri] I830DRIScreenInit failed because of a version " + "mismatch.\n" + "[dri] i915.o kernel module version is %d.%d.%d " + "but version 1.%d.%d or newer is needed.\n" + "[dri] Disabling DRI.\n", + version->version_major, + version->version_minor, + version->version_patchlevel, + req_minor, + req_patch); + drmFreeVersion(version); + return 0; + } + + pI830->drmMinor = version->version_minor; + drmFreeVersion(version); + } + return 1; +} + +static void +I830SetRingRegs(const DRIDriverContext *ctx, I830Rec *pI830) +{ + unsigned int itemp; + unsigned char *MMIO = ctx->MMIOAddress; + + OUTREG(LP_RING + RING_LEN, 0); + OUTREG(LP_RING + RING_TAIL, 0); + OUTREG(LP_RING + RING_HEAD, 0); + + if ((long)(pI830->LpRing->mem.Start & I830_RING_START_MASK) != + pI830->LpRing->mem.Start) { + fprintf(stderr, + "I830SetRingRegs: Ring buffer start (%lx) violates its " + "mask (%x)\n", pI830->LpRing->mem.Start, I830_RING_START_MASK); + } + /* Don't care about the old value. Reserved bits must be zero anyway. */ + itemp = pI830->LpRing->mem.Start & I830_RING_START_MASK; + OUTREG(LP_RING + RING_START, itemp); + + if (((pI830->LpRing->mem.Size - 4096) & I830_RING_NR_PAGES) != + pI830->LpRing->mem.Size - 4096) { + fprintf(stderr, + "I830SetRingRegs: Ring buffer size - 4096 (%lx) violates its " + "mask (%x)\n", pI830->LpRing->mem.Size - 4096, + I830_RING_NR_PAGES); + } + /* Don't care about the old value. Reserved bits must be zero anyway. */ + itemp = (pI830->LpRing->mem.Size - 4096) & I830_RING_NR_PAGES; + itemp |= (RING_NO_REPORT | RING_VALID); + OUTREG(LP_RING + RING_LEN, itemp); + + pI830->LpRing->head = INREG(LP_RING + RING_HEAD) & I830_HEAD_MASK; + pI830->LpRing->tail = INREG(LP_RING + RING_TAIL); + pI830->LpRing->space = pI830->LpRing->head - (pI830->LpRing->tail + 8); + if (pI830->LpRing->space < 0) + pI830->LpRing->space += pI830->LpRing->mem.Size; + + SetFenceRegs(ctx, pI830); + + /* RESET THE DISPLAY PIPE TO POINT TO THE FRONTBUFFER - hacky + hacky hacky */ + OUTREG(DSPABASE, pI830->FrontBuffer.Start + pI830->LinearAddr); + +} + +static Bool +I830SetParam(const DRIDriverContext *ctx, int param, int value) +{ + drmI830SetParam sp; + + memset(&sp, 0, sizeof(sp)); + sp.param = param; + sp.value = value; + + if (drmCommandWrite(ctx->drmFD, DRM_I830_SETPARAM, &sp, sizeof(sp))) { + fprintf(stderr, "I830 SetParam Failed\n"); + return FALSE; + } + + return TRUE; +} + +static Bool +I830DRIMapScreenRegions(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea) +{ + fprintf(stderr, + "[drm] Mapping front buffer\n"); + + if (drmAddMap(ctx->drmFD, + (drm_handle_t)(sarea->front_offset + pI830->LinearAddr), + sarea->front_size, + DRM_FRAME_BUFFER, /*DRM_AGP,*/ + 0, + &sarea->front_handle) < 0) { + fprintf(stderr, + "[drm] drmAddMap(front_handle) failed. Disabling DRI\n"); + return FALSE; + } + ctx->shared.hFrameBuffer = sarea->front_handle; + ctx->shared.fbSize = sarea->front_size; + fprintf(stderr, "[drm] Front Buffer = 0x%08x\n", + sarea->front_handle); + + if (drmAddMap(ctx->drmFD, + (drm_handle_t)(sarea->back_offset), + sarea->back_size, DRM_AGP, 0, + &sarea->back_handle) < 0) { + fprintf(stderr, + "[drm] drmAddMap(back_handle) failed. Disabling DRI\n"); + return FALSE; + } + fprintf(stderr, "[drm] Back Buffer = 0x%08x\n", + sarea->back_handle); + + if (drmAddMap(ctx->drmFD, + (drm_handle_t)sarea->depth_offset, + sarea->depth_size, DRM_AGP, 0, + &sarea->depth_handle) < 0) { + fprintf(stderr, + "[drm] drmAddMap(depth_handle) failed. Disabling DRI\n"); + return FALSE; + } + fprintf(stderr, "[drm] Depth Buffer = 0x%08x\n", + sarea->depth_handle); + + if (drmAddMap(ctx->drmFD, + (drm_handle_t)sarea->tex_offset, + sarea->tex_size, DRM_AGP, 0, + &sarea->tex_handle) < 0) { + fprintf(stderr, + "[drm] drmAddMap(tex_handle) failed. Disabling DRI\n"); + return FALSE; + } + fprintf(stderr, "[drm] textures = 0x%08x\n", + sarea->tex_handle); + + return TRUE; +} + + +static void +I830DRIUnmapScreenRegions(const DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea) +{ +#if 1 + if (sarea->front_handle) { + drmRmMap(ctx->drmFD, sarea->front_handle); + sarea->front_handle = 0; + } +#endif + if (sarea->back_handle) { + drmRmMap(ctx->drmFD, sarea->back_handle); + sarea->back_handle = 0; + } + if (sarea->depth_handle) { + drmRmMap(ctx->drmFD, sarea->depth_handle); + sarea->depth_handle = 0; + } + if (sarea->tex_handle) { + drmRmMap(ctx->drmFD, sarea->tex_handle); + sarea->tex_handle = 0; + } +} + +static void +I830InitTextureHeap(const DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea) +{ + /* Start up the simple memory manager for agp space */ + drmI830MemInitHeap drmHeap; + drmHeap.region = I830_MEM_REGION_AGP; + drmHeap.start = 0; + drmHeap.size = sarea->tex_size; + + if (drmCommandWrite(ctx->drmFD, DRM_I830_INIT_HEAP, + &drmHeap, sizeof(drmHeap))) { + fprintf(stderr, + "[drm] Failed to initialized agp heap manager\n"); + } else { + fprintf(stderr, + "[drm] Initialized kernel agp heap manager, %d\n", + sarea->tex_size); + + I830SetParam(ctx, I830_SETPARAM_TEX_LRU_LOG_GRANULARITY, + sarea->log_tex_granularity); + } +} + +static Bool +I830DRIDoMappings(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea) +{ + if (drmAddMap(ctx->drmFD, + (drm_handle_t)pI830->LpRing->mem.Start, + pI830->LpRing->mem.Size, DRM_AGP, 0, + &pI830->ring_map) < 0) { + fprintf(stderr, + "[drm] drmAddMap(ring_map) failed. Disabling DRI\n"); + return FALSE; + } + fprintf(stderr, "[drm] ring buffer = 0x%08x\n", + pI830->ring_map); + + if (I830InitDma(ctx, pI830) == FALSE) { + return FALSE; + } + + /* init to zero to be safe */ + + I830DRIMapScreenRegions(ctx, pI830, sarea); + I830InitTextureHeap(ctx, pI830, sarea); + + if (ctx->pciDevice != PCI_CHIP_845_G && + ctx->pciDevice != PCI_CHIP_I830_M) { + I830SetParam(ctx, I830_SETPARAM_USE_MI_BATCHBUFFER_START, 1 ); + } + + /* Okay now initialize the dma engine */ + { + pI830->irq = drmGetInterruptFromBusID(ctx->drmFD, + ctx->pciBus, + ctx->pciDevice, + ctx->pciFunc); + + if (drmCtlInstHandler(ctx->drmFD, pI830->irq)) { + fprintf(stderr, + "[drm] failure adding irq handler\n"); + pI830->irq = 0; + return FALSE; + } + else + fprintf(stderr, + "[drm] dma control initialized, using IRQ %d\n", + pI830->irq); + } + + fprintf(stderr, "[dri] visual configs initialized\n"); + + return TRUE; +} + +static Bool +I830ClearScreen(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea) +{ + /* need to drmMap front and back buffers and zero them */ + drmAddress map_addr; + int ret; + + ret = drmMap(ctx->drmFD, + sarea->front_handle, + sarea->front_size, + &map_addr); + + if (ret) + { + fprintf(stderr, "Unable to map front buffer\n"); + return FALSE; + } + + drimemsetio((char *)map_addr, + 0, + sarea->front_size); + drmUnmap(map_addr, sarea->front_size); + + + ret = drmMap(ctx->drmFD, + sarea->back_handle, + sarea->back_size, + &map_addr); + + if (ret) + { + fprintf(stderr, "Unable to map back buffer\n"); + return FALSE; + } + + drimemsetio((char *)map_addr, + 0, + sarea->back_size); + drmUnmap(map_addr, sarea->back_size); + + return TRUE; +} + +static Bool +I830ScreenInit(DRIDriverContext *ctx, I830Rec *pI830) + +{ + I830DRIPtr pI830DRI; + drmI830Sarea *pSAREAPriv; + int err; + + drm_page_size = getpagesize(); + + pI830->registerSize = ctx->MMIOSize; + /* This is a hack for now. We have to have more than a 4k page here + * because of the size of the state. However, the state should be + * in a per-context mapping. This will be added in the Mesa 3.5 port + * of the I830 driver. + */ + ctx->shared.SAREASize = SAREA_MAX; + + /* Note that drmOpen will try to load the kernel module, if needed. */ + ctx->drmFD = drmOpen("i915", NULL ); + if (ctx->drmFD < 0) { + fprintf(stderr, "[drm] drmOpen failed\n"); + return 0; + } + + if ((err = drmSetBusid(ctx->drmFD, ctx->pciBusID)) < 0) { + fprintf(stderr, "[drm] drmSetBusid failed (%d, %s), %s\n", + ctx->drmFD, ctx->pciBusID, strerror(-err)); + return 0; + } + + if (drmAddMap( ctx->drmFD, + 0, + ctx->shared.SAREASize, + DRM_SHM, + DRM_CONTAINS_LOCK, + &ctx->shared.hSAREA) < 0) + { + fprintf(stderr, "[drm] drmAddMap failed\n"); + return 0; + } + + fprintf(stderr, "[drm] added %d byte SAREA at 0x%08x\n", + ctx->shared.SAREASize, ctx->shared.hSAREA); + + if (drmMap( ctx->drmFD, + ctx->shared.hSAREA, + ctx->shared.SAREASize, + (drmAddressPtr)(&ctx->pSAREA)) < 0) + { + fprintf(stderr, "[drm] drmMap failed\n"); + return 0; + + } + + memset(ctx->pSAREA, 0, ctx->shared.SAREASize); + fprintf(stderr, "[drm] mapped SAREA 0x%08x to %p, size %d\n", + ctx->shared.hSAREA, ctx->pSAREA, ctx->shared.SAREASize); + + + if (drmAddMap(ctx->drmFD, + ctx->MMIOStart, + ctx->MMIOSize, + DRM_REGISTERS, + DRM_READ_ONLY, + &pI830->registerHandle) < 0) { + fprintf(stderr, "[drm] drmAddMap mmio failed\n"); + return 0; + } + fprintf(stderr, + "[drm] register handle = 0x%08x\n", pI830->registerHandle); + + + if (!I830CheckDRMVersion(ctx, pI830)) { + return FALSE; + } + + /* Create a 'server' context so we can grab the lock for + * initialization ioctls. + */ + if ((err = drmCreateContext(ctx->drmFD, &ctx->serverContext)) != 0) { + fprintf(stderr, "%s: drmCreateContext failed %d\n", __FUNCTION__, err); + return 0; + } + + DRM_LOCK(ctx->drmFD, ctx->pSAREA, ctx->serverContext, 0); + + /* Initialize the SAREA private data structure */ + pSAREAPriv = (drmI830Sarea *)(((char*)ctx->pSAREA) + + sizeof(drm_sarea_t)); + memset(pSAREAPriv, 0, sizeof(*pSAREAPriv)); + + pI830->StolenMemory.Size = I830DetectMemory(ctx, pI830); + pI830->StolenMemory.Start = 0; + pI830->StolenMemory.End = pI830->StolenMemory.Size; + + pI830->MemoryAperture.Start = pI830->StolenMemory.End; + pI830->MemoryAperture.End = KB(40000); + pI830->MemoryAperture.Size = pI830->MemoryAperture.End - pI830->MemoryAperture.Start; + + pI830->StolenPool.Fixed = pI830->StolenMemory; + pI830->StolenPool.Total = pI830->StolenMemory; + pI830->StolenPool.Free = pI830->StolenPool.Total; + pI830->FreeMemory = pI830->StolenPool.Total.Size; + + if (!AgpInit(ctx, pI830)) + return FALSE; + + if (I830AllocateMemory(ctx, pI830) == FALSE) + { + return FALSE; + } + + if (I830BindMemory(ctx, pI830) == FALSE) + { + return FALSE; + } + + pSAREAPriv->front_offset = pI830->FrontBuffer.Start; + pSAREAPriv->front_size = pI830->FrontBuffer.Size; + pSAREAPriv->width = ctx->shared.virtualWidth; + pSAREAPriv->height = ctx->shared.virtualHeight; + pSAREAPriv->pitch = ctx->shared.virtualWidth; + pSAREAPriv->virtualX = ctx->shared.virtualWidth; + pSAREAPriv->virtualY = ctx->shared.virtualHeight; + pSAREAPriv->back_offset = pI830->BackBuffer.Start; + pSAREAPriv->back_size = pI830->BackBuffer.Size; + pSAREAPriv->depth_offset = pI830->DepthBuffer.Start; + pSAREAPriv->depth_size = pI830->DepthBuffer.Size; + pSAREAPriv->tex_offset = pI830->TexMem.Start; + pSAREAPriv->tex_size = pI830->TexMem.Size; + pSAREAPriv->log_tex_granularity = pI830->TexGranularity; + + ctx->driverClientMsg = malloc(sizeof(I830DRIRec)); + ctx->driverClientMsgSize = sizeof(I830DRIRec); + pI830DRI = (I830DRIPtr)ctx->driverClientMsg; + pI830DRI->deviceID = pI830->Chipset; + pI830DRI->regsSize = I830_REG_SIZE; + pI830DRI->width = ctx->shared.virtualWidth; + pI830DRI->height = ctx->shared.virtualHeight; + pI830DRI->mem = ctx->shared.fbSize; + pI830DRI->cpp = ctx->cpp; + pI830DRI->backOffset = pI830->BackBuffer.Start; + pI830DRI->backPitch = pI830->BackBuffer.Pitch; + + pI830DRI->depthOffset = pI830->DepthBuffer.Start; + pI830DRI->depthPitch = pI830->DepthBuffer.Pitch; + + pI830DRI->fbOffset = pI830->FrontBuffer.Start; + pI830DRI->fbStride = pI830->FrontBuffer.Pitch; + + pI830DRI->bitsPerPixel = ctx->bpp; + pI830DRI->sarea_priv_offset = sizeof(drm_sarea_t); + + err = I830DRIDoMappings(ctx, pI830, pSAREAPriv); + if (err == FALSE) + return FALSE; + + I830SetupMemoryTiling(ctx, pI830); + + /* Quick hack to clear the front & back buffers. Could also use + * the clear ioctl to do this, but would need to setup hw state + * first. + */ + I830ClearScreen(ctx, pI830, pSAREAPriv); + + I830SetRingRegs(ctx, pI830); + + return TRUE; +} + + +/** + * \brief Validate the fbdev mode. + * + * \param ctx display handle. + * + * \return one on success, or zero on failure. + * + * Saves some registers and returns 1. + * + * \sa radeonValidateMode(). + */ +static int i830ValidateMode( const DRIDriverContext *ctx ) +{ + return 1; +} + +/** + * \brief Examine mode returned by fbdev. + * + * \param ctx display handle. + * + * \return one on success, or zero on failure. + * + * Restores registers that fbdev has clobbered and returns 1. + * + * \sa i810ValidateMode(). + */ +static int i830PostValidateMode( const DRIDriverContext *ctx ) +{ + I830Rec *pI830 = ctx->driverPrivate; + + I830SetRingRegs(ctx, pI830); + return 1; +} + + +/** + * \brief Initialize the framebuffer device mode + * + * \param ctx display handle. + * + * \return one on success, or zero on failure. + * + * Fills in \p info with some default values and some information from \p ctx + * and then calls I810ScreenInit() for the screen initialization. + * + * Before exiting clears the framebuffer memory accessing it directly. + */ +static int i830InitFBDev( DRIDriverContext *ctx ) +{ + I830Rec *pI830 = calloc(1, sizeof(I830Rec)); + int i; + + { + int dummy = ctx->shared.virtualWidth; + + switch (ctx->bpp / 8) { + case 1: dummy = (ctx->shared.virtualWidth + 127) & ~127; break; + case 2: dummy = (ctx->shared.virtualWidth + 31) & ~31; break; + case 3: + case 4: dummy = (ctx->shared.virtualWidth + 15) & ~15; break; + } + + ctx->shared.virtualWidth = dummy; + ctx->shared.Width = ctx->shared.virtualWidth; + } + + + for (i = 0; pitches[i] != 0; i++) { + if (pitches[i] >= ctx->shared.virtualWidth) { + ctx->shared.virtualWidth = pitches[i]; + break; + } + } + + ctx->driverPrivate = (void *)pI830; + + pI830->LpRing = calloc(1, sizeof(I830RingBuffer)); + pI830->Chipset = ctx->chipset; + pI830->LinearAddr = ctx->FBStart; + + if (!I830ScreenInit( ctx, pI830 )) + return 0; + + + return 1; +} + + +/** + * \brief The screen is being closed, so clean up any state and free any + * resources used by the DRI. + * + * \param ctx display handle. + * + * Unmaps the SAREA, closes the DRM device file descriptor and frees the driver + * private data. + */ +static void i830HaltFBDev( DRIDriverContext *ctx ) +{ + drmI830Sarea *pSAREAPriv; + I830Rec *pI830 = ctx->driverPrivate; + + if (pI830->irq) { + drmCtlUninstHandler(ctx->drmFD); + pI830->irq = 0; } + + I830CleanupDma(ctx); + + pSAREAPriv = (drmI830Sarea *)(((char*)ctx->pSAREA) + + sizeof(drm_sarea_t)); + + I830DRIUnmapScreenRegions(ctx, pI830, pSAREAPriv); + drmUnmap( ctx->pSAREA, ctx->shared.SAREASize ); + drmClose(ctx->drmFD); + + if (ctx->driverPrivate) { + free(ctx->driverPrivate); + ctx->driverPrivate = 0; + } +} + + +extern void i810NotifyFocus( int ); + +/** + * \brief Exported driver interface for Mini GLX. + * + * \sa DRIDriverRec. + */ +const struct DRIDriverRec __driDriver = { + i830ValidateMode, + i830PostValidateMode, + i830InitFBDev, + i830HaltFBDev, + NULL,//I830EngineShutdown, + NULL, //I830EngineRestore, +#ifndef _EMBEDDED + 0, +#else + i810NotifyFocus, +#endif +}; diff --git a/src/mesa/drivers/dri/r128/r128_span.c b/src/mesa/drivers/dri/r128/r128_span.c index a24e63c9b67..25e57133cc3 100644 --- a/src/mesa/drivers/dri/r128/r128_span.c +++ b/src/mesa/drivers/dri/r128/r128_span.c @@ -123,6 +123,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Depth buffer */ +/* These functions require locking */ +#undef HW_LOCK +#undef HW_UNLOCK +#define HW_LOCK() LOCK_HARDWARE(R128_CONTEXT(ctx)); +#define HW_UNLOCK() UNLOCK_HARDWARE(R128_CONTEXT(ctx)); + /* 16-bit depth buffer functions */ diff --git a/src/mesa/drivers/dri/r128/r128_state.c b/src/mesa/drivers/dri/r128/r128_state.c index 1b2c2a5284e..491f288ffb2 100644 --- a/src/mesa/drivers/dri/r128/r128_state.c +++ b/src/mesa/drivers/dri/r128/r128_state.c @@ -599,8 +599,26 @@ static void r128UpdateClipping( GLcontext *ctx ) x2 += drawable->x; y2 += drawable->y; - rmesa->setup.sc_top_left_c = ((y1 << 16) | x1); - rmesa->setup.sc_bottom_right_c = ((y2 << 16) | x2); + /* Clamp values to screen to avoid wrapping problems */ + if ( x1 < 0 ) + x1 = 0; + else if ( x1 >= rmesa->driScreen->fbWidth ) + x1 = rmesa->driScreen->fbWidth - 1; + if ( y1 < 0 ) + y1 = 0; + else if ( y1 >= rmesa->driScreen->fbHeight ) + y1 = rmesa->driScreen->fbHeight - 1; + if ( x2 < 0 ) + x2 = 0; + else if ( x2 >= rmesa->driScreen->fbWidth ) + x2 = rmesa->driScreen->fbWidth - 1; + if ( y2 < 0 ) + y2 = 0; + else if ( y2 >= rmesa->driScreen->fbHeight ) + y2 = rmesa->driScreen->fbHeight - 1; + + rmesa->setup.sc_top_left_c = (((y1 & 0x3FFF) << 16) | (x1 & 0x3FFF)); + rmesa->setup.sc_bottom_right_c = (((y2 & 0x3FFF) << 16) | (x2 & 0x3FFF)); rmesa->dirty |= R128_UPLOAD_CONTEXT; } @@ -798,8 +816,8 @@ static void r128UpdateWindow( GLcontext *ctx ) struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0][0]; driRenderbuffer *drb = (driRenderbuffer *) rb; - rmesa->setup.window_xy_offset = ((y << R128_WINDOW_Y_SHIFT) | - (x << R128_WINDOW_X_SHIFT)); + rmesa->setup.window_xy_offset = (((y & 0xFFF) << R128_WINDOW_Y_SHIFT) | + ((x & 0xFFF) << R128_WINDOW_X_SHIFT)); rmesa->setup.dst_pitch_offset_c = (((drb->flippedPitch/8) << 21) | (drb->flippedOffset >> 5)); @@ -1122,6 +1140,15 @@ void r128EmitHwStateLocked( r128ContextPtr rmesa ) R128_UPLOAD_WINDOW | R128_UPLOAD_CORE) ) { memcpy( &sarea->context_state, regs, sizeof(sarea->context_state) ); + + if( rmesa->dirty & R128_UPLOAD_CONTEXT ) + { + /* One possible side-effect of uploading a new context is the + * setting of the R128_GMC_AUX_CLIP_DIS bit, which causes all + * auxilliary cliprects to be disabled. So the next command must + * upload them again. */ + rmesa->dirty |= R128_UPLOAD_CLIPRECTS; + } } if ( (rmesa->dirty & R128_UPLOAD_TEX0) && t0 ) { @@ -1207,7 +1234,10 @@ void r128DDUpdateHWState( GLcontext *ctx ) r128UpdateMasks( ctx ); if ( new_state & R128_NEW_WINDOW ) + { r128UpdateWindow( ctx ); + r128CalcViewport( ctx ); + } if ( rmesa->NewGLState & _NEW_TEXTURE ) { r128UpdateTextureState( ctx ); diff --git a/src/mesa/drivers/dri/r128/r128_tris.c b/src/mesa/drivers/dri/r128/r128_tris.c index 46315225165..7e3e714f372 100644 --- a/src/mesa/drivers/dri/r128/r128_tris.c +++ b/src/mesa/drivers/dri/r128/r128_tris.c @@ -420,7 +420,7 @@ r128_fallback_point( r128ContextPtr rmesa, /**********************************************************************/ #define POINT_FALLBACK (DD_POINT_SMOOTH) -#define LINE_FALLBACK (DD_LINE_STIPPLE|DD_LINE_SMOOTH) +#define LINE_FALLBACK (DD_LINE_STIPPLE) #define TRI_FALLBACK (DD_TRI_SMOOTH) #define ANY_FALLBACK_FLAGS (POINT_FALLBACK|LINE_FALLBACK|TRI_FALLBACK) #define ANY_RASTER_FLAGS (DD_TRI_LIGHT_TWOSIDE|DD_TRI_OFFSET|DD_TRI_UNFILLED) @@ -531,11 +531,36 @@ static void r128RasterPrimitive( GLcontext *ctx, GLuint hwprim ) } } +static void r128SetupAntialias( GLcontext *ctx, GLenum prim ) +{ + r128ContextPtr rmesa = R128_CONTEXT(ctx); + + GLuint currAA, wantAA; + + currAA = (rmesa->setup.pm4_vc_fpu_setup & R128_EDGE_ANTIALIAS) != 0; + if( prim >= GL_TRIANGLES ) + wantAA = ctx->Polygon.SmoothFlag; + else if( prim >= GL_LINES ) + wantAA = ctx->Line.SmoothFlag; + else + wantAA = 0; + + if( wantAA != currAA ) + { + FLUSH_BATCH( rmesa ); + rmesa->setup.pm4_vc_fpu_setup ^= R128_EDGE_ANTIALIAS; + rmesa->dirty |= R128_UPLOAD_SETUP; + } +} + static void r128RenderPrimitive( GLcontext *ctx, GLenum prim ) { r128ContextPtr rmesa = R128_CONTEXT(ctx); GLuint hw = hw_prim[prim]; rmesa->render_primitive = prim; + + r128SetupAntialias( ctx, prim ); + if (prim >= GL_TRIANGLES && (ctx->_TriangleCaps & DD_TRI_UNFILLED)) return; r128RasterPrimitive( ctx, hw ); diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile index 2084d52132e..75c09ff867e 100644 --- a/src/mesa/drivers/dri/r200/Makefile +++ b/src/mesa/drivers/dri/r200/Makefile @@ -31,6 +31,7 @@ DRIVER_SOURCES = r200_context.c \ r200_vtxfmt_sse.c \ r200_vtxfmt_x86.c \ r200_fragshader.c \ + r200_vertprog.c \ radeon_screen.c \ $(EGL_SOURCES) diff --git a/src/mesa/drivers/dri/r200/r200_cmdbuf.c b/src/mesa/drivers/dri/r200/r200_cmdbuf.c index 2122d16dcbe..91737d2d33c 100644 --- a/src/mesa/drivers/dri/r200/r200_cmdbuf.c +++ b/src/mesa/drivers/dri/r200/r200_cmdbuf.c @@ -110,6 +110,11 @@ void r200SetUpAtomList( r200ContextPtr rmesa ) /* FIXME: is this a good place to insert that atom ? */ insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.spr ); insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.prf ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pvs ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpp[0] ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpp[1] ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpi[0] ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpi[1] ); } static void r200SaveHwState( r200ContextPtr rmesa ) @@ -412,15 +417,13 @@ void r200EmitBlit( r200ContextPtr rmesa, void r200EmitWait( r200ContextPtr rmesa, GLuint flags ) { - if (rmesa->dri.drmMinor >= 6) { - drm_radeon_cmd_header_t *cmd; - - assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) ); - - cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, 1 * sizeof(int), - __FUNCTION__ ); - cmd[0].i = 0; - cmd[0].wait.cmd_type = RADEON_CMD_WAIT; - cmd[0].wait.flags = flags; - } + drm_radeon_cmd_header_t *cmd; + + assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) ); + + cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, 1 * sizeof(int), + __FUNCTION__ ); + cmd[0].i = 0; + cmd[0].wait.cmd_type = RADEON_CMD_WAIT; + cmd[0].wait.flags = flags; } diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c index aaaaa5a95bf..7a8f270160f 100644 --- a/src/mesa/drivers/dri/r200/r200_context.c +++ b/src/mesa/drivers/dri/r200/r200_context.c @@ -62,6 +62,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_tcl.h" #include "r200_vtxfmt.h" #include "r200_maos.h" +#include "r200_vertprog.h" #define need_GL_ARB_multisample #define need_GL_ARB_texture_compression @@ -76,7 +77,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define need_GL_NV_vertex_program #include "extension_helper.h" -#define DRIVER_DATE "20060327" +#define DRIVER_DATE "20060602" #include "vblank.h" #include "utils.h" @@ -310,6 +311,7 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, r200InitIoctlFuncs(&functions); r200InitStateFuncs(&functions); r200InitTextureFuncs(&functions); + r200InitShaderFuncs(&functions); /* Allocate and initialize the Mesa context */ if (sharedContextPrivate) @@ -417,6 +419,12 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, ctx->Const.MaxLineWidthAA = 10.0; ctx->Const.LineWidthGranularity = 0.0625; + ctx->Const.VertexProgram.MaxNativeInstructions = R200_VSF_MAX_INST; + ctx->Const.VertexProgram.MaxNativeAttribs = 12; + ctx->Const.VertexProgram.MaxNativeTemps = R200_VSF_MAX_TEMPS; + ctx->Const.VertexProgram.MaxNativeParameters = R200_VSF_MAX_PARAM; + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; + /* Initialize the software rasterizer and helper modules. */ _swrast_CreateContext( ctx ); @@ -470,7 +478,7 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, if (rmesa->r200Screen->drmSupportsBlendColor) { driInitExtensions( ctx, blend_extensions, GL_FALSE ); } - if(driQueryOptionb(&rmesa->optionCache, "arb_vertex_program")) + if(rmesa->r200Screen->drmSupportsVertexProgram) driInitSingleExtension( ctx, ARB_vp_extension ); if(driQueryOptionb(&rmesa->optionCache, "nv_vertex_program")) driInitSingleExtension( ctx, NV_vp_extension ); @@ -494,17 +502,15 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, fthrottle_mode = driQueryOptioni(&rmesa->optionCache, "fthrottle_mode"); rmesa->iw.irq_seq = -1; rmesa->irqsEmitted = 0; - rmesa->do_irqs = (rmesa->dri.drmMinor >= 6 && - fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS && + rmesa->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS && rmesa->r200Screen->irq); rmesa->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); if (!rmesa->do_irqs) fprintf(stderr, - "IRQ's not enabled, falling back to %s: %d %d %d\n", + "IRQ's not enabled, falling back to %s: %d %d\n", rmesa->do_usleeps ? "usleeps" : "busy waits", - rmesa->dri.drmMinor, fthrottle_mode, rmesa->r200Screen->irq); diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h index f6709d3d7fd..b7ee33aa6c9 100644 --- a/src/mesa/drivers/dri/r200/r200_context.h +++ b/src/mesa/drivers/dri/r200/r200_context.h @@ -46,9 +46,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "mtypes.h" #include "colormac.h" #include "r200_reg.h" +#include "r200_vertprog.h" #define ENABLE_HW_3D_TEXTURE 1 /* XXX this is temporary! */ +#ifndef R200_EMIT_VAP_PVS_CNTL +#error This driver requires a newer libdrm to compile +#endif + struct r200_context; typedef struct r200_context r200ContextRec; typedef struct r200_context *r200ContextPtr; @@ -94,6 +99,15 @@ typedef void (*r200_point_func)( r200ContextPtr, r200Vertex * ); +struct r200_vertex_program { + struct gl_vertex_program mesa_program; /* Must be first */ + int translated; + VERTEX_SHADER_INSTRUCTION instr[R200_VSF_MAX_INST + 3]; + int pos_end; + int inputs[VERT_ATTRIB_MAX]; + int native; +}; + struct r200_colorbuffer_state { GLuint clear; #if 000 @@ -336,6 +350,34 @@ struct r200_state_atom { #define AFS_IA1 4 /* 2f0c */ #define AFS_STATE_SIZE 33 +#define PVS_CMD_0 0 +#define PVS_CNTL_1 1 +#define PVS_CNTL_2 2 +#define PVS_STATE_SIZE 3 + +/* those are quite big... */ +#define VPI_CMD_0 0 +#define VPI_OPDST_0 1 +#define VPI_SRC0_0 2 +#define VPI_SRC1_0 3 +#define VPI_SRC2_0 4 +#define VPI_OPDST_63 253 +#define VPI_SRC0_63 254 +#define VPI_SRC1_63 255 +#define VPI_SRC2_63 256 +#define VPI_STATE_SIZE 257 + +#define VPP_CMD_0 0 +#define VPP_PARAM0_0 1 +#define VPP_PARAM1_0 2 +#define VPP_PARAM2_0 3 +#define VPP_PARAM3_0 4 +#define VPP_PARAM0_95 381 +#define VPP_PARAM1_95 382 +#define VPP_PARAM2_95 383 +#define VPP_PARAM3_95 384 +#define VPP_STATE_SIZE 385 + #define TCL_CMD_0 0 #define TCL_LIGHT_MODEL_CTL_0 1 #define TCL_LIGHT_MODEL_CTL_1 2 @@ -567,6 +609,9 @@ struct r200_hw_state { struct r200_state_atom glt; struct r200_state_atom prf; struct r200_state_atom afs[2]; + struct r200_state_atom pvs; + struct r200_state_atom vpi[2]; + struct r200_state_atom vpp[2]; struct r200_state_atom atf; struct r200_state_atom spr; @@ -637,7 +682,7 @@ struct r200_dri_mirror { }; -#define R200_CMD_BUF_SZ (8*1024) +#define R200_CMD_BUF_SZ (16*1024) struct r200_store { GLuint statenr; @@ -883,6 +928,7 @@ struct r200_context { */ struct r200_hw_state hw; struct r200_state state; + struct r200_vertex_program *curr_vp_hw; /* Texture object bookkeeping */ diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c index ae0f35cb398..650d7cfdf2b 100644 --- a/src/mesa/drivers/dri/r200/r200_ioctl.c +++ b/src/mesa/drivers/dri/r200/r200_ioctl.c @@ -374,7 +374,7 @@ static void r200WaitIrq( r200ContextPtr rmesa ) do { ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT, &rmesa->iw, sizeof(rmesa->iw) ); - } while (ret && (errno == EINTR || errno == EAGAIN)); + } while (ret && (errno == EINTR || errno == EBUSY)); if ( ret ) { fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret ); @@ -870,10 +870,7 @@ void *r200AllocateMemoryMESA(__DRInativeDisplay *dpy, int scrn, GLsizei size, if (getenv("R200_NO_ALLOC")) return NULL; - - if (rmesa->dri.drmMinor < 6) - return NULL; - + alloc.region = RADEON_MEM_REGION_GART; alloc.alignment = 0; alloc.size = size; @@ -912,9 +909,6 @@ void r200FreeMemoryMESA(__DRInativeDisplay *dpy, int scrn, GLvoid *pointer) return; } - if (rmesa->dri.drmMinor < 6) - return; - region_offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map; if (region_offset < 0 || @@ -950,9 +944,6 @@ GLuint r200GetMemoryOffsetMESA(__DRInativeDisplay *dpy, int scrn, const GLvoid * if (!r200IsGartMemory( rmesa, pointer, 0 )) return ~0; - if (rmesa->dri.drmMinor < 6) - return ~0; - card_offset = r200GartOffsetFromVirtual( rmesa, pointer ); return card_offset - rmesa->r200Screen->gart_base; diff --git a/src/mesa/drivers/dri/r200/r200_pixel.c b/src/mesa/drivers/dri/r200/r200_pixel.c index c3489b6c621..1f711bbee5a 100644 --- a/src/mesa/drivers/dri/r200/r200_pixel.c +++ b/src/mesa/drivers/dri/r200/r200_pixel.c @@ -490,7 +490,7 @@ void r200InitPixelFuncs( GLcontext *ctx ) ctx->Driver.DrawPixels = _swrast_DrawPixels; ctx->Driver.ReadPixels = _swrast_ReadPixels; - if (!getenv("R200_NO_BLITS") && R200_CONTEXT(ctx)->dri.drmMinor >= 6) { + if (!getenv("R200_NO_BLITS")) { ctx->Driver.ReadPixels = r200ReadPixels; ctx->Driver.DrawPixels = r200DrawPixels; if (getenv("R200_HW_BITMAP")) diff --git a/src/mesa/drivers/dri/r200/r200_reg.h b/src/mesa/drivers/dri/r200/r200_reg.h index 6d1b84769a8..dab08a9eb73 100644 --- a/src/mesa/drivers/dri/r200/r200_reg.h +++ b/src/mesa/drivers/dri/r200/r200_reg.h @@ -337,6 +337,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /* gap */ #define R200_SE_VAP_CNTL 0x2080 #define R200_VAP_TCL_ENABLE 0x00000001 +#define R200_VAP_PROG_VTX_SHADER_ENABLE 0x00000004 #define R200_VAP_SINGLE_BUF_STATE_ENABLE 0x00000010 #define R200_VAP_FORCE_W_TO_ONE 0x00010000 #define R200_VAP_D3D_TEX_DEFAULT 0x00020000 @@ -660,6 +661,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define R200_SE_TCL_POINT_SPRITE_CNTL 0x22c4 #define R200_POINTSIZE_SEL_STATE (1<<16) /* gap */ +/* taken from r300, see comments there */ +#define R200_VAP_PVS_CNTL_1 0x22d0 +# define R200_PVS_CNTL_1_PROGRAM_START_SHIFT 0 +# define R200_PVS_CNTL_1_POS_END_SHIFT 10 +# define R200_PVS_CNTL_1_PROGRAM_END_SHIFT 20 +/* Addresses are relative the the vertex program parameters area. */ +#define R200_VAP_PVS_CNTL_2 0x22d4 +# define R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT 0 +# define R200_PVS_CNTL_2_PARAM_COUNT_SHIFT 16 +/* gap */ + #define R200_SE_VTX_ST_POS_0_X_4 0x2300 #define R200_SE_VTX_ST_POS_0_Y_4 0x2304 #define R200_SE_VTX_ST_POS_0_Z_4 0x2308 @@ -1473,6 +1485,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define R200_VS_PNT_SPRITE_CLAMP 0x000000BE #define R200_VS_MAX 0x000001C0 +#define R200_PVS_PROG0 0x00000080 +#define R200_PVS_PROG1 0x00000180 +#define R200_PVS_PARAM0 0x00000000 +#define R200_PVS_PARAM1 0x00000100 /* * Offsets in TCL scalar state diff --git a/src/mesa/drivers/dri/r200/r200_sanity.c b/src/mesa/drivers/dri/r200/r200_sanity.c index 4dc87cd945d..ca5b926a944 100644 --- a/src/mesa/drivers/dri/r200/r200_sanity.c +++ b/src/mesa/drivers/dri/r200/r200_sanity.c @@ -160,6 +160,7 @@ static struct { { R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"}, { R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"}, { R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"}, + { R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"}, }; struct reg_names { @@ -532,6 +533,8 @@ static struct reg_names reg_names[] = { { R200_PP_TXCBLEND2_15, "R200_PP_TXCBLEND2_15" }, { R200_PP_TXABLEND_15, "R200_PP_TXABLEND_15" }, { R200_PP_TXABLEND2_15, "R200_PP_TXABLEND2_15" }, + { R200_VAP_PVS_CNTL_1, "R200_VAP_PVS_CNTL_1" }, + { R200_VAP_PVS_CNTL_2, "R200_VAP_PVS_CNTL_2" }, }; static struct reg_names scalar_names[] = { @@ -935,6 +938,62 @@ static int radeon_emit_vectors( return 0; } +static int radeon_emit_veclinear( + drm_radeon_cmd_header_t header, + drm_radeon_cmd_buffer_t *cmdbuf ) +{ + int sz = header.veclinear.count * 4; + int *data = (int *)cmdbuf->buf; + float *fdata =(float *)cmdbuf->buf; + int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8); + int i; + + if (1||VERBOSE) + fprintf(stderr, "emit vectors linear, start %d nr %d (end %d) (0x%x)\n", + start, sz >> 2, start + (sz >> 2), header.i); + + + if (start < 0x60) { + for (i = 0 ; i < sz ; i += 4) { + fprintf(stderr, "R200_VS_PARAM %d 0 %f\n", (i >> 2) + start, fdata[i]); + fprintf(stderr, "R200_VS_PARAM %d 1 %f\n", (i >> 2) + start, fdata[i+1]); + fprintf(stderr, "R200_VS_PARAM %d 2 %f\n", (i >> 2) + start, fdata[i+2]); + fprintf(stderr, "R200_VS_PARAM %d 3 %f\n", (i >> 2) + start, fdata[i+3]); + } + } + else if ((start >= 0x100) && (start < 0x160)) { + for (i = 0 ; i < sz ; i += 4) { + fprintf(stderr, "R200_VS_PARAM %d 0 %f\n", (i >> 2) + start - 0x100 + 0x60, fdata[i]); + fprintf(stderr, "R200_VS_PARAM %d 1 %f\n", (i >> 2) + start - 0x100 + 0x60, fdata[i+1]); + fprintf(stderr, "R200_VS_PARAM %d 2 %f\n", (i >> 2) + start - 0x100 + 0x60, fdata[i+2]); + fprintf(stderr, "R200_VS_PARAM %d 3 %f\n", (i >> 2) + start - 0x100 + 0x60, fdata[i+3]); + } + } + else if ((start >= 0x80) && (start < 0xc0)) { + for (i = 0 ; i < sz ; i += 4) { + fprintf(stderr, "R200_VS_PROG %d OPDST %08x\n", (i >> 2) + start - 0x80, data[i]); + fprintf(stderr, "R200_VS_PROG %d SRC1 %08x\n", (i >> 2) + start - 0x80, data[i+1]); + fprintf(stderr, "R200_VS_PROG %d SRC2 %08x\n", (i >> 2) + start - 0x80, data[i+2]); + fprintf(stderr, "R200_VS_PROG %d SRC3 %08x\n", (i >> 2) + start - 0x80, data[i+3]); + } + } + else if ((start >= 0x180) && (start < 0x1c0)) { + for (i = start ; (i < start + sz) ; i += 4) { + fprintf(stderr, "R200_VS_PROG %d OPDST %08x\n", (i >> 2) + start - 0x180 + 0x40, data[i]); + fprintf(stderr, "R200_VS_PROG %d SRC1 %08x\n", (i >> 2) + start - 0x180 + 0x40, data[i+1]); + fprintf(stderr, "R200_VS_PROG %d SRC2 %08x\n", (i >> 2) + start - 0x180 + 0x40, data[i+2]); + fprintf(stderr, "R200_VS_PROG %d SRC3 %08x\n", (i >> 2) + start - 0x180 + 0x40, data[i+3]); + } + } + else { + fprintf(stderr, "write to unknown vector area\n"); + } + + cmdbuf->buf += sz * sizeof(int); + cmdbuf->bufsz -= sz * sizeof(int); + return 0; +} + #if 0 static int print_vertex_format( int vfmt ) { @@ -1360,6 +1419,13 @@ int r200SanityCmdBuffer( r200ContextPtr rmesa, case RADEON_CMD_WAIT: break; + case RADEON_CMD_VECLINEAR: + if (radeon_emit_veclinear( header, &cmdbuf )) { + fprintf(stderr,"radeon_emit_veclinear failed\n"); + return -EINVAL; + } + break; + default: fprintf(stderr,"bad cmd_type %d at %p\n", header.header.cmd_type, diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index 6ffb48c1505..ac9e20e28ad 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -54,6 +54,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_tex.h" #include "r200_swtcl.h" #include "r200_vtxfmt.h" +#include "r200_vertprog.h" #include "drirenderbuffer.h" @@ -214,7 +215,7 @@ static void r200_set_blend_state( GLcontext * ctx ) R200_STATECHANGE( rmesa, ctx ); if (rmesa->r200Screen->drmSupportsBlendColor) { - if (ctx->Color._LogicOpEnabled) { + if (ctx->Color.ColorLogicOpEnabled) { rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = cntl | R200_ROP_ENABLE; rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqn | func; rmesa->hw.ctx.cmd[CTX_RB3D_CBLENDCNTL] = eqn | func; @@ -230,7 +231,7 @@ static void r200_set_blend_state( GLcontext * ctx ) } } else { - if (ctx->Color._LogicOpEnabled) { + if (ctx->Color.ColorLogicOpEnabled) { rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = cntl | R200_ROP_ENABLE; rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func; return; @@ -1969,6 +1970,8 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state ) case GL_LIGHTING: r200UpdateSpecular(ctx); + /* for reflection map fixup - might set recheck_texgen for all units too */ + rmesa->NewGLState |= _NEW_TEXTURE; break; case GL_LINE_SMOOTH: @@ -2100,7 +2103,68 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state ) break; case GL_VERTEX_PROGRAM_ARB: - TCL_FALLBACK(rmesa->glCtx, R200_TCL_FALLBACK_VERTEX_PROGRAM, state); + if (!state) { + GLuint i; + rmesa->curr_vp_hw = NULL; + R200_STATECHANGE( rmesa, vap ); + rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~R200_VAP_PROG_VTX_SHADER_ENABLE; + /* mark all tcl atoms (tcl vector state got overwritten) dirty + not sure about tcl scalar state - we need at least grd + with vert progs too. + ucp looks like it doesn't get overwritten (may even work + with vp for pos-invariant progs if we're lucky) */ + R200_STATECHANGE( rmesa, mtl[0] ); + R200_STATECHANGE( rmesa, mtl[1] ); + R200_STATECHANGE( rmesa, fog ); + R200_STATECHANGE( rmesa, glt ); + R200_STATECHANGE( rmesa, eye ); + for (i = R200_MTX_MV; i <= R200_MTX_TEX5; i++) { + R200_STATECHANGE( rmesa, mat[i] ); + } + for (i = 0 ; i < 8; i++) { + R200_STATECHANGE( rmesa, lit[i] ); + } + R200_STATECHANGE( rmesa, tcl ); + for (i = 0; i <= ctx->Const.MaxClipPlanes; i++) { + if (ctx->Transform.ClipPlanesEnabled & (1 << i)) { + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (R200_UCP_ENABLE_0 << i); + } +/* else { + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(R200_UCP_ENABLE_0 << i); + }*/ + } + /* FIXME: ugly as hell. need to call everything which might change tcl_output_vtxfmt0/1 and compsel */ + r200UpdateSpecular( ctx ); + r200Fogfv( ctx, GL_FOG_COORD_SRC, NULL ); +#if 1 + /* shouldn't be necessary, as it's picked up anyway in r200ValidateState (_NEW_PROGRAM), + but without it doom3 locks up at always the same places. Why? */ + /* FIXME: This can (and should) be replaced by a call to the TCL_STATE_FLUSH reg before + accessing VAP_SE_VAP_CNTL. Requires drm changes (done). Remove after some time... */ + r200UpdateTextureState( ctx ); + /* if we call r200UpdateTextureState we need the code below because we are calling it with + non-current derived enabled values which may revert the state atoms for frag progs even when + they already got disabled... ugh + Should really figure out why we need to call r200UpdateTextureState in the first place */ + GLuint unit; + for (unit = 0; unit < R200_MAX_TEXTURE_UNITS; unit++) { + R200_STATECHANGE( rmesa, pix[unit] ); + R200_STATECHANGE( rmesa, tex[unit] ); + rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= + ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE); + rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT; + /* need to guard this with drmSupportsFragmentShader? Should never get here if + we don't announce ATI_fs, right? */ + rmesa->hw.tex[unit].cmd[TEX_PP_TXMULTI_CTL] = 0; + } + R200_STATECHANGE( rmesa, cst ); + R200_STATECHANGE( rmesa, tf ); + rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0; +#endif + } + else { + /* picked up later */ + } break; case GL_FRAGMENT_SHADER_ATI: @@ -2110,18 +2174,18 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state ) if they didn't change) and restore tex coord routing */ GLuint unit; for (unit = 0; unit < R200_MAX_TEXTURE_UNITS; unit++) { + R200_STATECHANGE( rmesa, pix[unit] ); + R200_STATECHANGE( rmesa, tex[unit] ); rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE); rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT; /* need to guard this with drmSupportsFragmentShader? Should never get here if we don't announce ATI_fs, right? */ rmesa->hw.tex[unit].cmd[TEX_PP_TXMULTI_CTL] = 0; - R200_STATECHANGE( rmesa, pix[unit] ); - R200_STATECHANGE( rmesa, tex[unit] ); } - rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0; R200_STATECHANGE( rmesa, cst ); R200_STATECHANGE( rmesa, tf ); + rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0; } else { /* need to mark this dirty as pix/tf atoms have overwritten the data @@ -2310,6 +2374,8 @@ void r200ValidateState( GLcontext *ctx ) r200UpdateLocalViewer( ctx ); } +/* FIXME: don't really need most of these when vertex progs are enabled */ + /* Need an event driven matrix update? */ if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) @@ -2340,6 +2406,16 @@ void r200ValidateState( GLcontext *ctx ) r200UpdateClipPlanes( ctx ); } + if (new_state & (_NEW_PROGRAM| + /* need to test for pretty much anything due to possible parameter bindings */ + _NEW_MODELVIEW|_NEW_PROJECTION|_NEW_TRANSFORM| + _NEW_LIGHT|_NEW_TEXTURE|_NEW_TEXTURE_MATRIX| + _NEW_FOG|_NEW_POINT|_NEW_TRACK_MATRIX)) { + if (ctx->VertexProgram._Enabled) { + r200SetupVertexProg( ctx ); + } + else TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, 0); + } rmesa->NewGLState = 0; } diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c index 14616b09f41..ffca7ea5fa1 100644 --- a/src/mesa/drivers/dri/r200/r200_state_init.c +++ b/src/mesa/drivers/dri/r200/r200_state_init.c @@ -93,6 +93,19 @@ static int cmdvec( int offset, int stride, int count ) return h.i; } +/* warning: the count here is divided by 4 compared to other cmds + (so it doesn't exceed the char size)! */ +static int cmdveclinear( int offset, int count ) +{ + drm_radeon_cmd_header_t h; + h.i = 0; + h.veclinear.cmd_type = RADEON_CMD_VECLINEAR; + h.veclinear.addr_lo = offset & 0xff; + h.veclinear.addr_hi = (offset & 0xff00) >> 8; + h.veclinear.count = count; + return h.i; +} + static int cmdscl( int offset, int stride, int count ) { drm_radeon_cmd_header_t h; @@ -129,9 +142,24 @@ static GLboolean check_##NM( GLcontext *ctx, int idx ) \ { \ r200ContextPtr rmesa = R200_CONTEXT(ctx); \ (void) idx; \ + return !rmesa->TclFallback && !ctx->VertexProgram._Enabled && (FLAG); \ +} + +#define TCL_OR_VP_CHECK( NM, FLAG ) \ +static GLboolean check_##NM( GLcontext *ctx, int idx ) \ +{ \ + r200ContextPtr rmesa = R200_CONTEXT(ctx); \ + (void) idx; \ return !rmesa->TclFallback && (FLAG); \ } +#define VP_CHECK( NM, FLAG ) \ +static GLboolean check_##NM( GLcontext *ctx, int idx ) \ +{ \ + r200ContextPtr rmesa = R200_CONTEXT(ctx); \ + (void) idx; \ + return !rmesa->TclFallback && ctx->VertexProgram._Enabled && (FLAG); \ +} CHECK( always, GL_TRUE ) @@ -145,12 +173,16 @@ CHECK( texenv, (rmesa->state.envneeded & (1 << idx) && !ctx->ATIFragmentShader._ CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)) ) CHECK( afs, ctx->ATIFragmentShader._Enabled ) CHECK( tex_cube, rmesa->state.texture.unit[idx].unitneeded & TEXTURE_CUBE_BIT ) -CHECK( fog, ctx->Fog.Enabled ) +TCL_CHECK( tcl_fog, ctx->Fog.Enabled ) TCL_CHECK( tcl, GL_TRUE ) TCL_CHECK( tcl_tex, rmesa->state.texture.unit[idx].unitneeded ) TCL_CHECK( tcl_lighting, ctx->Light.Enabled ) TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[idx].Enabled ) -TCL_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << idx)) ) +TCL_OR_VP_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << idx)) ) +TCL_OR_VP_CHECK( tcl_or_vp, GL_TRUE ) +VP_CHECK( tcl_vp, GL_TRUE ) +VP_CHECK( tcl_vp_size, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64 ) +VP_CHECK( tcl_vpp_size, ctx->VertexProgram.Current->Base.NumNativeParameters > 96 ) /* Initialize the context's hardware state. @@ -307,14 +339,28 @@ void r200InitState( r200ContextPtr rmesa ) ALLOC_STATE( cube[4], never, CUBE_STATE_SIZE, "CUBE/tex-4", 4 ); ALLOC_STATE( cube[5], never, CUBE_STATE_SIZE, "CUBE/tex-5", 5 ); } - - ALLOC_STATE( tcl, tcl, TCL_STATE_SIZE, "TCL/tcl", 0 ); + if (rmesa->r200Screen->drmSupportsVertexProgram) { + ALLOC_STATE( pvs, tcl_vp, PVS_STATE_SIZE, "PVS/pvscntl", 0 ); + ALLOC_STATE( vpi[0], tcl_vp, VPI_STATE_SIZE, "VP/vertexprog-0", 0 ); + ALLOC_STATE( vpi[1], tcl_vp_size, VPI_STATE_SIZE, "VP/vertexprog-1", 1 ); + ALLOC_STATE( vpp[0], tcl_vp, VPP_STATE_SIZE, "VPP/vertexparam-0", 0 ); + ALLOC_STATE( vpp[1], tcl_vpp_size, VPP_STATE_SIZE, "VPP/vertexparam-1", 1 ); + } + else { + ALLOC_STATE( pvs, never, PVS_STATE_SIZE, "PVS/pvscntl", 0 ); + ALLOC_STATE( vpi[0], never, VPI_STATE_SIZE, "VP/vertexprog-0", 0 ); + ALLOC_STATE( vpi[1], never, VPI_STATE_SIZE, "VP/vertexprog-1", 1 ); + ALLOC_STATE( vpp[0], never, VPP_STATE_SIZE, "VPP/vertexparam-0", 0 ); + ALLOC_STATE( vpp[1], never, VPP_STATE_SIZE, "VPP/vertexparam-1", 1 ); + } + /* FIXME: this atom has two commands, we need only one (ucp_vert_blend) for vp */ + ALLOC_STATE( tcl, tcl_or_vp, TCL_STATE_SIZE, "TCL/tcl", 0 ); ALLOC_STATE( msl, tcl, MSL_STATE_SIZE, "MSL/matrix-select", 0 ); ALLOC_STATE( tcg, tcl, TCG_STATE_SIZE, "TCG/texcoordgen", 0 ); ALLOC_STATE( mtl[0], tcl_lighting, MTL_STATE_SIZE, "MTL0/material0", 0 ); ALLOC_STATE( mtl[1], tcl_lighting, MTL_STATE_SIZE, "MTL1/material1", 1 ); - ALLOC_STATE( grd, tcl, GRD_STATE_SIZE, "GRD/guard-band", 0 ); - ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 0 ); + ALLOC_STATE( grd, tcl_or_vp, GRD_STATE_SIZE, "GRD/guard-band", 0 ); + ALLOC_STATE( fog, tcl_fog, FOG_STATE_SIZE, "FOG/fog", 0 ); ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 0 ); ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 0 ); ALLOC_STATE( mat[R200_MTX_MV], tcl, MAT_STATE_SIZE, "MAT/modelview", 0 ); @@ -411,6 +457,7 @@ void r200InitState( r200ContextPtr rmesa ) } rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_0); rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_1); + rmesa->hw.pvs.cmd[PVS_CMD_0] = cmdpkt(R200_EMIT_VAP_PVS_CNTL); rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_0); rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_0); rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_1); @@ -450,6 +497,15 @@ void r200InitState( r200ContextPtr rmesa ) rmesa->hw.mtl[1].cmd[MTL_CMD_1] = cmdscl2( R200_SS_MAT_1_SHININESS, 1, 1 ); + rmesa->hw.vpi[0].cmd[VPI_CMD_0] = + cmdveclinear( R200_PVS_PROG0, 64 ); + rmesa->hw.vpi[1].cmd[VPI_CMD_0] = + cmdveclinear( R200_PVS_PROG1, 64 ); + rmesa->hw.vpp[0].cmd[VPP_CMD_0] = + cmdveclinear( R200_PVS_PARAM0, 96 ); + rmesa->hw.vpp[1].cmd[VPP_CMD_0] = + cmdveclinear( R200_PVS_PARAM1, 96 ); + rmesa->hw.grd.cmd[GRD_CMD_0] = cmdscl( R200_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 ); rmesa->hw.fog.cmd[FOG_CMD_0] = diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c index c41622debe5..18b5458a97b 100644 --- a/src/mesa/drivers/dri/r200/r200_tcl.c +++ b/src/mesa/drivers/dri/r200/r200_tcl.c @@ -390,27 +390,86 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx, if (rmesa->NewGLState) r200ValidateState( ctx ); + if (!ctx->VertexProgram._Enabled) { /* NOTE: inputs != tnl->render_inputs - these are the untransformed * inputs. */ - if (ctx->Light.Enabled) { - inputs |= VERT_BIT_NORMAL; - } + if (ctx->Light.Enabled) { + inputs |= VERT_BIT_NORMAL; + } - if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) { - inputs |= VERT_BIT_COLOR1; - } + if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) { + inputs |= VERT_BIT_COLOR1; + } - if ( (ctx->Fog.FogCoordinateSource == GL_FOG_COORD) && ctx->Fog.Enabled ) { - inputs |= VERT_BIT_FOG; - } + if ( (ctx->Fog.FogCoordinateSource == GL_FOG_COORD) && ctx->Fog.Enabled ) { + inputs |= VERT_BIT_FOG; + } - for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled) { - if (rmesa->TexGenNeedNormals[i]) { - inputs |= VERT_BIT_NORMAL; + for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { + if (rmesa->TexGenNeedNormals[i]) { + inputs |= VERT_BIT_NORMAL; + } + inputs |= VERT_BIT_TEX(i); } - inputs |= VERT_BIT_TEX(i); + } + } + else { + GLuint out_vtxfmt0 = 0; + GLuint out_vtxfmt1 = 0; + GLuint out_compsel = 0; + GLuint vp_out = rmesa->curr_vp_hw->mesa_program.Base.OutputsWritten; + /* can't handle other inputs, generic attribs etc. currently - should never arrive here */ + assert ((rmesa->curr_vp_hw->mesa_program.Base.InputsRead & + ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 | + VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 | + VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) == 0); + inputs |= rmesa->curr_vp_hw->mesa_program.Base.InputsRead; + /* FIXME: this is a mess. Not really sure how to set up TCL_OUTPUT_VTXFMT + in "undefined" cases (e.g. output needed later but not written by vertex program or vice versa) + - however misconfiguration here will almost certainly lock up the chip. + I think at the very least we need to enable tcl outputs which we write to. Maybe even need to + fix up a vertex program so an output needed later always gets written? + For now just set the compsel and output_vtxfmt to the outputs written. + However, for simplicity we assume always all 4 values are written which may not be correct + (but I don't know if it could lead to lockups). */ + assert(vp_out & (1 << VERT_RESULT_HPOS)); + out_vtxfmt0 = R200_VTX_XY | R200_VTX_Z0 | R200_VTX_W0; + /* FIXME: need to always enable color_0 otherwise doom3's shadow vp (?) will lock up (?) */ + out_vtxfmt0 |= R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT; + out_compsel = R200_OUTPUT_XYZW; + if (vp_out & (1 << VERT_RESULT_COL0)) { + out_vtxfmt0 |= R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT; + out_compsel |= R200_OUTPUT_COLOR_0; + } + if (vp_out & (1 << VERT_RESULT_COL1)) { + out_vtxfmt0 |= R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT; + out_compsel |= R200_OUTPUT_COLOR_1; + } + /* FIXME: probably not everything is set up for fogc and psiz to work correctly */ + if (vp_out & (1 << VERT_RESULT_FOGC)) { + out_vtxfmt0 |= R200_VTX_DISCRETE_FOG; + out_compsel |= R200_OUTPUT_DISCRETE_FOG; + } + if (vp_out & (1 << VERT_RESULT_PSIZ)) { + out_vtxfmt0 |= R200_VTX_POINT_SIZE; + out_compsel |= R200_OUTPUT_PT_SIZE; + } + for (i = VERT_RESULT_TEX0; i < VERT_RESULT_TEX6; i++) { + if (vp_out & (1 << i)) { + out_vtxfmt1 |= 4 << ((i - VERT_RESULT_TEX0) * 3); + out_compsel |= R200_OUTPUT_TEX_0 << (i - VERT_RESULT_TEX0); + } + } + if ((rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] != out_vtxfmt0) || + (rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] != out_vtxfmt1) || + (rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] != out_compsel)) { + R200_STATECHANGE( rmesa, vtx ); + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] = out_vtxfmt0; + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] = out_vtxfmt1; + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] = out_compsel; + /* FIXME: should restore this when disabling vertex programs maybe? */ } } @@ -486,7 +545,7 @@ static void transition_to_swtnl( GLcontext *ctx ) * need to put the card into D3D mode to make it work: */ R200_STATECHANGE( rmesa, vap ); - rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~R200_VAP_TCL_ENABLE; + rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~(R200_VAP_TCL_ENABLE|R200_VAP_PROG_VTX_SHADER_ENABLE); } static void transition_to_hwtnl( GLcontext *ctx ) @@ -513,6 +572,10 @@ static void transition_to_hwtnl( GLcontext *ctx ) rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_TCL_ENABLE; rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~R200_VAP_FORCE_W_TO_ONE; + if (ctx->VertexProgram._Enabled) { + rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE; + } + if ( ((rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] & R200_FOG_USE_MASK) == R200_FOG_USE_SPEC_ALPHA) && (ctx->Fog.FogCoordinateSource == GL_FOG_COORD )) { diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c index acc980ca8c1..433bc67e3f3 100644 --- a/src/mesa/drivers/dri/r200/r200_texstate.c +++ b/src/mesa/drivers/dri/r200/r200_texstate.c @@ -1137,7 +1137,10 @@ static void import_tex_obj_state( r200ContextPtr rmesa, int unit, r200TexObjPtr texobj ) { - GLuint *cmd = R200_DB_STATE( tex[unit] ); +/* do not use RADEON_DB_STATE to avoid stale texture caches */ + GLuint *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0]; + + R200_STATECHANGE( rmesa, tex[unit] ); cmd[TEX_PP_TXFILTER] &= ~TEXOBJ_TXFILTER_MASK; cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK; @@ -1156,9 +1159,11 @@ static void import_tex_obj_state( r200ContextPtr rmesa, } if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) { - GLuint *cube_cmd = R200_DB_STATE( cube[unit] ); + GLuint *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0]; GLuint bytesPerFace = texobj->base.totalSize / 6; ASSERT(texobj->base.totalSize % 6 == 0); + + R200_STATECHANGE( rmesa, cube[unit] ); cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces; if (rmesa->r200Screen->drmSupportsFragShader) { /* that value is submitted twice. could change cube atom @@ -1170,9 +1175,7 @@ static void import_tex_obj_state( r200ContextPtr rmesa, cube_cmd[CUBE_PP_CUBIC_OFFSET_F3] = texobj->pp_txoffset + 3 * bytesPerFace; cube_cmd[CUBE_PP_CUBIC_OFFSET_F4] = texobj->pp_txoffset + 4 * bytesPerFace; cube_cmd[CUBE_PP_CUBIC_OFFSET_F5] = texobj->pp_txoffset + 5 * bytesPerFace; - R200_DB_STATECHANGE( rmesa, &rmesa->hw.cube[unit] ); } - R200_DB_STATECHANGE( rmesa, &rmesa->hw.tex[unit] ); texobj->dirty_state &= ~(1<<unit); } @@ -1304,11 +1307,13 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) case GL_REFLECTION_MAP_NV: rmesa->TexGenNeedNormals[unit] = GL_TRUE; tgi |= R200_TEXGEN_INPUT_EYE_REFLECT<<inputshift; - set_texgen_matrix( rmesa, unit, - (texUnit->TexGenEnabled & S_BIT) ? reflect : I, - (texUnit->TexGenEnabled & T_BIT) ? reflect + 4 : I + 4, - (texUnit->TexGenEnabled & R_BIT) ? reflect + 8 : I + 8, - I + 12); + /* pretty weird, must only negate when lighting is enabled? */ + if (ctx->Light.Enabled) + set_texgen_matrix( rmesa, unit, + (texUnit->TexGenEnabled & S_BIT) ? reflect : I, + (texUnit->TexGenEnabled & T_BIT) ? reflect + 4 : I + 4, + (texUnit->TexGenEnabled & R_BIT) ? reflect + 8 : I + 8, + I + 12); break; case GL_NORMAL_MAP_NV: diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c new file mode 100644 index 00000000000..cf0c15093fd --- /dev/null +++ b/src/mesa/drivers/dri/r200/r200_vertprog.c @@ -0,0 +1,1142 @@ +/************************************************************************** + +Copyright (C) 2005 Aapo Tahkola. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Aapo Tahkola <[email protected]> + * Roland Scheidegger <[email protected]> + */ +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#include "program.h" + +#include "r200_context.h" +#include "r200_vertprog.h" +#include "r200_ioctl.h" +#include "r200_tcl.h" +#include "program_instruction.h" +#include "tnl/tnl.h" + +#if SWIZZLE_X != VSF_IN_COMPONENT_X || \ + SWIZZLE_Y != VSF_IN_COMPONENT_Y || \ + SWIZZLE_Z != VSF_IN_COMPONENT_Z || \ + SWIZZLE_W != VSF_IN_COMPONENT_W || \ + SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \ + SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \ + WRITEMASK_X != VSF_FLAG_X || \ + WRITEMASK_Y != VSF_FLAG_Y || \ + WRITEMASK_Z != VSF_FLAG_Z || \ + WRITEMASK_W != VSF_FLAG_W +#error Cannot change these! +#endif + +#define SCALAR_FLAG (1<<31) +#define FLAG_MASK (1<<31) +#define OP_MASK (0xf) /* we are unlikely to have more than 15 */ +#define OPN(operator, ip) {#operator, OPCODE_##operator, ip} + +static struct{ + char *name; + int opcode; + unsigned long ip; /* number of input operands and flags */ +}op_names[]={ + OPN(ABS, 1), + OPN(ADD, 2), + OPN(ARL, 1|SCALAR_FLAG), + OPN(DP3, 2), + OPN(DP4, 2), + OPN(DPH, 2), + OPN(DST, 2), + OPN(EX2, 1|SCALAR_FLAG), + OPN(EXP, 1|SCALAR_FLAG), + OPN(FLR, 1), + OPN(FRC, 1), + OPN(LG2, 1|SCALAR_FLAG), + OPN(LIT, 1), + OPN(LOG, 1|SCALAR_FLAG), + OPN(MAD, 3), + OPN(MAX, 2), + OPN(MIN, 2), + OPN(MOV, 1), + OPN(MUL, 2), + OPN(POW, 2|SCALAR_FLAG), + OPN(RCP, 1|SCALAR_FLAG), + OPN(RSQ, 1|SCALAR_FLAG), + OPN(SGE, 2), + OPN(SLT, 2), + OPN(SUB, 2), + OPN(SWZ, 1), + OPN(XPD, 2), + OPN(PRINT, 0), + OPN(END, 0), +}; +#undef OPN + +static GLboolean r200VertexProgUpdateParams(GLcontext *ctx, struct r200_vertex_program *vp) +{ + r200ContextPtr rmesa = R200_CONTEXT( ctx ); + GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1]; + int pi; + struct gl_vertex_program *mesa_vp = &vp->mesa_program; + struct gl_program_parameter_list *paramList; + drm_radeon_cmd_header_t tmp; + + R200_STATECHANGE( rmesa, vpp[0] ); + R200_STATECHANGE( rmesa, vpp[1] ); + assert(mesa_vp->Base.Parameters); + _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters); + paramList = mesa_vp->Base.Parameters; + + if(paramList->NumParameters > R200_VSF_MAX_PARAM){ + fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__); + return GL_FALSE; + } + + for(pi = 0; pi < paramList->NumParameters; pi++) { + switch(paramList->Parameters[pi].Type) { + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name); + case PROGRAM_CONSTANT: + *fcmd++ = paramList->ParameterValues[pi][0]; + *fcmd++ = paramList->ParameterValues[pi][1]; + *fcmd++ = paramList->ParameterValues[pi][2]; + *fcmd++ = paramList->ParameterValues[pi][3]; + break; + default: + _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__); + break; + } + if (pi == 95) { + fcmd = (GLfloat *)&rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1]; + } + } + /* hack up the cmd_size so not the whole state atom is emitted always. */ + rmesa->hw.vpp[0].cmd_size = + 1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters); + tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0]; + tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters; + rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i; + if (paramList->NumParameters > 96) { + rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96); + tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0]; + tmp.veclinear.count = paramList->NumParameters - 96; + rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i; + } + return GL_TRUE; +} + +static __inline unsigned long t_dst_mask(GLuint mask) +{ + /* WRITEMASK_* is equivalent to VSF_FLAG_* */ + return mask & VSF_FLAG_ALL; +} + +static unsigned long t_dst(struct prog_dst_register *dst) +{ + switch(dst->File) { + case PROGRAM_TEMPORARY: + return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT) + | R200_VSF_OUT_CLASS_TMP); + case PROGRAM_OUTPUT: + switch (dst->Index) { + case VERT_RESULT_HPOS: + return R200_VSF_OUT_CLASS_RESULT_POS; + case VERT_RESULT_COL0: + return R200_VSF_OUT_CLASS_RESULT_COLOR; + case VERT_RESULT_COL1: + return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT) + | R200_VSF_OUT_CLASS_RESULT_COLOR); + case VERT_RESULT_FOGC: + return R200_VSF_OUT_CLASS_RESULT_FOGC; + case VERT_RESULT_TEX0: + case VERT_RESULT_TEX1: + case VERT_RESULT_TEX2: + case VERT_RESULT_TEX3: + case VERT_RESULT_TEX4: + case VERT_RESULT_TEX5: + return (((dst->Index - VERT_RESULT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT) + | R200_VSF_OUT_CLASS_RESULT_TEXC); + case VERT_RESULT_PSIZ: + return R200_VSF_OUT_CLASS_RESULT_POINTSIZE; + default: + fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __FUNCTION__, dst->Index); + exit(0); + return 0; + } + case PROGRAM_ADDRESS: + assert (dst->Index == 0); + return R200_VSF_OUT_CLASS_ADDR; + default: + fprintf(stderr, "problem in %s, unknown register type %d\n", __FUNCTION__, dst->File); + exit(0); + return 0; + } +} + +static unsigned long t_src_class(enum register_file file) +{ + + switch(file){ + case PROGRAM_TEMPORARY: + return VSF_IN_CLASS_TMP; + + case PROGRAM_INPUT: + return VSF_IN_CLASS_ATTR; + + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_NAMED_PARAM: + case PROGRAM_STATE_VAR: + return VSF_IN_CLASS_PARAM; + /* + case PROGRAM_OUTPUT: + case PROGRAM_WRITE_ONLY: + case PROGRAM_ADDRESS: + */ + default: + fprintf(stderr, "problem in %s", __FUNCTION__); + exit(0); + } +} + +static __inline unsigned long t_swizzle(GLubyte swizzle) +{ +/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ + return swizzle; +} + +#if 0 +static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller) +{ + int i; + + if(vp == NULL){ + fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller); + return ; + } + + fprintf(stderr, "%s:<", caller); + for(i=0; i < VERT_ATTRIB_MAX; i++) + fprintf(stderr, "%d ", vp->inputs[i]); + fprintf(stderr, ">\n"); + +} +#endif + +static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src) +{ +/* + int i; + int max_reg = -1; +*/ + if(src->File == PROGRAM_INPUT){ +/* if(vp->inputs[src->Index] != -1) + return vp->inputs[src->Index]; + + for(i=0; i < VERT_ATTRIB_MAX; i++) + if(vp->inputs[i] > max_reg) + max_reg = vp->inputs[i]; + + vp->inputs[src->Index] = max_reg+1;*/ + + //vp_dump_inputs(vp, __FUNCTION__); + assert(vp->inputs[src->Index] != -1); + return vp->inputs[src->Index]; + } else { + if (src->Index < 0) { + fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n"); + return 0; + } + return src->Index; + } +} + +static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src) +{ + + return MAKE_VSF_SOURCE(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 1)), + t_swizzle(GET_SWZ(src->Swizzle, 2)), + t_swizzle(GET_SWZ(src->Swizzle, 3)), + t_src_class(src->File), + src->NegateBase) | (src->RelAddr << 4); +} + +static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src) +{ + + return MAKE_VSF_SOURCE(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_src_class(src->File), + src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4); +} + +static unsigned long t_opcode(enum prog_opcode opcode) +{ + + switch(opcode){ + case OPCODE_ADD: return R200_VPI_OUT_OP_ADD; + /* FIXME: ARL works fine, but negative offsets won't work - fglrx just + * seems to ignore neg offsets which isn't quite correct... + */ + case OPCODE_ARL: return R200_VPI_OUT_OP_ARL; + case OPCODE_DP4: return R200_VPI_OUT_OP_DOT; + case OPCODE_DST: return R200_VPI_OUT_OP_DST; + case OPCODE_EX2: return R200_VPI_OUT_OP_EX2; + case OPCODE_EXP: return R200_VPI_OUT_OP_EXP; + case OPCODE_FRC: return R200_VPI_OUT_OP_FRC; + case OPCODE_LG2: return R200_VPI_OUT_OP_LG2; + case OPCODE_LIT: return R200_VPI_OUT_OP_LIT; + case OPCODE_LOG: return R200_VPI_OUT_OP_LOG; + case OPCODE_MAX: return R200_VPI_OUT_OP_MAX; + case OPCODE_MIN: return R200_VPI_OUT_OP_MIN; + case OPCODE_MUL: return R200_VPI_OUT_OP_MUL; + case OPCODE_RCP: return R200_VPI_OUT_OP_RCP; + case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ; + case OPCODE_SGE: return R200_VPI_OUT_OP_SGE; + case OPCODE_SLT: return R200_VPI_OUT_OP_SLT; + + default: + fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode); + } + exit(-1); + return 0; +} + +static unsigned long op_operands(enum prog_opcode opcode) +{ + int i; + + /* Can we trust mesas opcodes to be in order ? */ + for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++) + if(op_names[i].opcode == opcode) + return op_names[i].ip; + + fprintf(stderr, "op %d not found in op_names\n", opcode); + exit(-1); + return 0; +} + +/* TODO: Get rid of t_src_class call */ +#define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \ + ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \ + t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \ + (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \ + t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \ + +/* fglrx on rv250 codes up unused sources as follows: + unused but necessary sources are same as previous source, zero-ed out. + unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set. + i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg + set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */ + +/* use these simpler definitions. Must obviously not be used with not yet set up regs. + Those are NOT semantically equivalent to the r300 ones, requires code changes */ +#define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \ + | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \ + | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \ + | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \ + | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT)))) + +#define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \ + | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \ + | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \ + | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \ + | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT)))) + +#define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \ + | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \ + | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \ + | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \ + | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT)))) + +#define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9) + +#define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9) + +#define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9) + + +/* DP4 version seems to trigger some hw peculiarity - fglrx does this on r200 however */ +#define PREFER_DP4 + + +/** + * Generate an R200 vertex program from Mesa's internal representation. + * + * \return GL_TRUE for success, GL_FALSE for failure. + */ +static GLboolean r200_translate_vertex_program(struct r200_vertex_program *vp) +{ + struct gl_vertex_program *mesa_vp = &vp->mesa_program; + struct prog_instruction *vpi; + int i; + VERTEX_SHADER_INSTRUCTION *o_inst; + unsigned long operands; + int are_srcs_scalar; + unsigned long hw_op; + + vp->native = GL_FALSE; + vp->translated = GL_TRUE; + + if (mesa_vp->Base.NumInstructions == 0) + return GL_FALSE; + + if ((mesa_vp->Base.InputsRead & + ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 | + VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 | + VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) { + if (R200_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "can't handle vert prog inputs 0x%x\n", + mesa_vp->Base.InputsRead); + } + return GL_FALSE; + } + + if ((mesa_vp->Base.OutputsWritten & + ~((1 << VERT_RESULT_HPOS) | (1 << VERT_RESULT_COL0) | (1 << VERT_RESULT_COL1) | + (1 << VERT_RESULT_FOGC) | (1 << VERT_RESULT_TEX0) | (1 << VERT_RESULT_TEX1) | + (1 << VERT_RESULT_TEX2) | (1 << VERT_RESULT_TEX3) | (1 << VERT_RESULT_TEX4) | + (1 << VERT_RESULT_TEX5) | (1 << VERT_RESULT_PSIZ))) != 0) { + if (R200_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "can't handle vert prog outputs 0x%x\n", + mesa_vp->Base.OutputsWritten); + } + return GL_FALSE; + } + + if (mesa_vp->IsNVProgram) { + /* subtle differences in spec like guaranteed initialized regs could cause + headaches. Might want to remove the driconf option to enable it completely */ + return GL_FALSE; + } + /* Initial value should be last tmp reg that hw supports. + Strangely enough r300 doesnt mind even though these would be out of range. + Smart enough to realize that it doesnt need it? */ + int u_temp_i = R200_VSF_MAX_TEMPS - 1; + struct prog_src_register src[3]; + +/* FIXME: is changing the prog safe to do here? */ + if (mesa_vp->IsPositionInvariant) { + struct gl_program_parameter_list *paramList; + GLint tokens[6] = { STATE_MATRIX, STATE_MVP, 0, 0, 0, STATE_MATRIX }; + +#ifdef PREFER_DP4 + tokens[5] = STATE_MATRIX; +#else + tokens[5] = STATE_MATRIX_TRANSPOSE; +#endif + paramList = mesa_vp->Base.Parameters; + + vpi = malloc((mesa_vp->Base.NumInstructions + 4) * sizeof(struct prog_instruction)); + memset(vpi, 0, 4 * sizeof(struct prog_instruction)); + + /* emit four dot product instructions to do MVP transformation */ + for (i=0; i < 4; i++) { + GLint idx; + tokens[3] = tokens[4] = i; + idx = _mesa_add_state_reference(paramList, tokens); +#ifdef PREFER_DP4 + vpi[i].Opcode = OPCODE_DP4; + vpi[i].StringPos = 0; + vpi[i].Data = 0; + + vpi[i].DstReg.File = PROGRAM_OUTPUT; + vpi[i].DstReg.Index = VERT_RESULT_HPOS; + vpi[i].DstReg.WriteMask = 1 << i; + vpi[i].DstReg.CondMask = COND_TR; + + vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; + vpi[i].SrcReg[0].Index = idx; + vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); + + vpi[i].SrcReg[1].File = PROGRAM_INPUT; + vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; + vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); +#else + if (i == 0) + vpi[i].Opcode = OPCODE_MUL; + else + vpi[i].Opcode = OPCODE_MAD; + + vpi[i].StringPos = 0; + vpi[i].Data = 0; + + if (i == 3) + vpi[i].DstReg.File = PROGRAM_OUTPUT; + else + vpi[i].DstReg.File = PROGRAM_TEMPORARY; + vpi[i].DstReg.Index = 0; + vpi[i].DstReg.WriteMask = 0xf; + vpi[i].DstReg.CondMask = COND_TR; + + vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; + vpi[i].SrcReg[0].Index = idx; + vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); + + vpi[i].SrcReg[1].File = PROGRAM_INPUT; + vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; + vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i); + + if (i > 0) { + vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY; + vpi[i].SrcReg[2].Index = 0; + vpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); + } +#endif + } + + /* now append original program after our new instructions */ + memcpy(&vpi[i], mesa_vp->Base.Instructions, mesa_vp->Base.NumInstructions * sizeof(struct prog_instruction)); + + /* deallocate original program */ + free(mesa_vp->Base.Instructions); + + /* install new program */ + mesa_vp->Base.Instructions = vpi; + + mesa_vp->Base.NumInstructions += 4; + vpi = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions-1]; + + assert(vpi->Opcode == OPCODE_END); + + mesa_vp->Base.InputsRead |= (1 << VERT_ATTRIB_POS); + mesa_vp->Base.OutputsWritten |= (1 << VERT_RESULT_HPOS); + + //fprintf(stderr, "IsPositionInvariant is set!\n"); + //_mesa_print_program(&mesa_vp->Base); + } + + vp->pos_end = 0; + mesa_vp->Base.NumNativeInstructions = 0; + if (mesa_vp->Base.Parameters) + mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters; + else + mesa_vp->Base.NumNativeParameters = 0; + + for(i=0; i < VERT_ATTRIB_MAX; i++) + vp->inputs[i] = -1; +/* fglrx uses fixed inputs as follows for conventional attribs. + generic attribs use non-fixed assignment, fglrx will always use the lowest attrib values available. + There are 12 generic attribs possible, corresponding to attrib 0, 2-11 and 13 in a hw vertex prog. + attr 1 and 12 are not available for generic attribs as those cannot be made vec4 (correspond to + vertex normal/weight) + attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0 + attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0) + attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1) + attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0) + generic attribs would require some more work (dma regions, renaming). */ + +/* may look different when using idx buf / input_route instead of se_vtx_fmt? */ + vp->inputs[VERT_ATTRIB_POS] = 0; + vp->inputs[VERT_ATTRIB_WEIGHT] = 12; + vp->inputs[VERT_ATTRIB_NORMAL] = 1; + vp->inputs[VERT_ATTRIB_COLOR0] = 2; + vp->inputs[VERT_ATTRIB_COLOR1] = 3; + vp->inputs[VERT_ATTRIB_FOG] = 15; + vp->inputs[VERT_ATTRIB_TEX0] = 6; + vp->inputs[VERT_ATTRIB_TEX1] = 7; + vp->inputs[VERT_ATTRIB_TEX2] = 8; + vp->inputs[VERT_ATTRIB_TEX3] = 9; + vp->inputs[VERT_ATTRIB_TEX4] = 10; + vp->inputs[VERT_ATTRIB_TEX5] = 11; +/* attr 4,5 and 13 are only used with generic attribs. + Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is + not possibe to use with vertex progs as it is lacking in vert prog specification) */ + + if (!(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) { + if (R200_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "can't handle vert prog without position output\n"); + } + return GL_FALSE; + } + + o_inst = vp->instr; + for(vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){ + operands = op_operands(vpi->Opcode); + are_srcs_scalar = operands & SCALAR_FLAG; + operands &= OP_MASK; + + for(i = 0; i < operands; i++) + src[i] = vpi->SrcReg[i]; + + if(operands == 3){ + if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){ + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, + (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, + VSF_FLAG_ALL); + + o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), + SWIZZLE_X, SWIZZLE_Y, + SWIZZLE_Z, SWIZZLE_W, + t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4); + + o_inst->src1 = ZERO_SRC_0; + o_inst->src2 = UNUSED_SRC_1; + o_inst++; + + src[2].File = PROGRAM_TEMPORARY; + src[2].Index = u_temp_i; + src[2].RelAddr = 0; + u_temp_i--; + } + } + + if(operands >= 2){ + if( CMP_SRCS(src[1], src[0]) ){ + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, + (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, + VSF_FLAG_ALL); + + o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + SWIZZLE_X, SWIZZLE_Y, + SWIZZLE_Z, SWIZZLE_W, + t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4); + + o_inst->src1 = ZERO_SRC_0; + o_inst->src2 = UNUSED_SRC_1; + o_inst++; + + src[0].File = PROGRAM_TEMPORARY; + src[0].Index = u_temp_i; + src[0].RelAddr = 0; + u_temp_i--; + } + } + + /* These ops need special handling. */ + switch(vpi->Opcode){ + case OPCODE_POW: +/* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter). + So may need to insert additional instruction */ + if ((src[0].File == src[1].File) && + (src[0].Index == src[1].Index)) { + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask)); + o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + SWIZZLE_ZERO, + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + SWIZZLE_ZERO, + t_src_class(src[0].File), + src[0].NegateBase) | (src[0].RelAddr << 4); + o_inst->src1 = UNUSED_SRC_0; + o_inst->src2 = UNUSED_SRC_0; + } + else { + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, + (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, + VSF_FLAG_ALL); + o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, + t_src_class(src[0].File), + src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); + o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + SWIZZLE_ZERO, SWIZZLE_ZERO, + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO, + t_src_class(src[1].File), + src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); + o_inst->src2 = UNUSED_SRC_1; + o_inst++; + + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask)); + o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i, + VSF_IN_COMPONENT_X, + VSF_IN_COMPONENT_Y, + VSF_IN_COMPONENT_Z, + VSF_IN_COMPONENT_W, + VSF_IN_CLASS_TMP, + VSF_FLAG_NONE); + o_inst->src1 = UNUSED_SRC_0; + o_inst->src2 = UNUSED_SRC_0; + u_temp_i--; + } + goto next; + + case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} + case OPCODE_SWZ: + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask)); + o_inst->src0 = t_src(vp, &src[0]); + o_inst->src1 = ZERO_SRC_0; + o_inst->src2 = UNUSED_SRC_1; + goto next; + + case OPCODE_MAD: + hw_op=(src[0].File == PROGRAM_TEMPORARY && + src[1].File == PROGRAM_TEMPORARY && + src[2].File == PROGRAM_TEMPORARY) ? R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD; + + o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask)); + o_inst->src0 = t_src(vp, &src[0]); +#if 0 +if ((o_inst - vp->instr) == 31) { +/* fix up the broken vertex program of quake4 demo... */ +o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, + t_src_class(src[1].File), + src[1].NegateBase) | (src[1].RelAddr << 4); +o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, + t_src_class(src[1].File), + src[1].NegateBase) | (src[1].RelAddr << 4); +} +else { + o_inst->src1 = t_src(vp, &src[1]); + o_inst->src2 = t_src(vp, &src[2]); +} +#else + o_inst->src1 = t_src(vp, &src[1]); + o_inst->src2 = t_src(vp, &src[2]); +#endif + goto next; + + case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask)); + + o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + SWIZZLE_ZERO, + t_src_class(src[0].File), + src[0].NegateBase) | (src[0].RelAddr << 4); + + o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + SWIZZLE_ZERO, + t_src_class(src[1].File), + src[1].NegateBase) | (src[1].RelAddr << 4); + + o_inst->src2 = UNUSED_SRC_1; + goto next; + + case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask)); + + o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + VSF_IN_COMPONENT_ONE, + t_src_class(src[0].File), + src[0].NegateBase) | (src[0].RelAddr << 4); + o_inst->src1 = t_src(vp, &src[1]); + o_inst->src2 = UNUSED_SRC_1; + goto next; + + case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask)); + + o_inst->src0 = t_src(vp, &src[0]); + o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), + t_src_class(src[1].File), + (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); + o_inst->src2 = UNUSED_SRC_1; + goto next; + + case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W + o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask)); + + o_inst->src0=t_src(vp, &src[0]); + o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), + t_src_class(src[0].File), + (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); + o_inst->src2 = UNUSED_SRC_1; + goto next; + + case OPCODE_FLR: + /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} + ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ + + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC, + (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, + t_dst_mask(vpi->DstReg.WriteMask)); + + o_inst->src0 = t_src(vp, &src[0]); + o_inst->src1 = UNUSED_SRC_0; + o_inst->src2 = UNUSED_SRC_1; + o_inst++; + + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask)); + + o_inst->src0 = t_src(vp, &src[0]); + o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i, + VSF_IN_COMPONENT_X, + VSF_IN_COMPONENT_Y, + VSF_IN_COMPONENT_Z, + VSF_IN_COMPONENT_W, + VSF_IN_CLASS_TMP, + /* Not 100% sure about this */ + (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/); + + o_inst->src2 = UNUSED_SRC_0; + u_temp_i--; + goto next; + + case OPCODE_XPD: + /* mul r0, r1.yzxw, r2.zxyw + mad r0, -r2.yzxw, r1.zxyw, r0 + NOTE: might need MAD_2 + */ + + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL, + (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, + t_dst_mask(vpi->DstReg.WriteMask)); + + o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + t_src_class(src[0].File), + src[0].NegateBase) | (src[0].RelAddr << 4); + + o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w + t_src_class(src[1].File), + src[1].NegateBase) | (src[1].RelAddr << 4); + + o_inst->src2 = UNUSED_SRC_1; + o_inst++; + u_temp_i--; + + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MAD, t_dst(&vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask)); + + o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w + t_src_class(src[1].File), + (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); + + o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + t_src_class(src[0].File), + src[0].NegateBase) | (src[0].RelAddr << 4); + + o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1, + VSF_IN_COMPONENT_X, + VSF_IN_COMPONENT_Y, + VSF_IN_COMPONENT_Z, + VSF_IN_COMPONENT_W, + VSF_IN_CLASS_TMP, + VSF_FLAG_NONE); + goto next; + + case OPCODE_END: + assert(0); + default: + break; + } + + o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask)); + + if(are_srcs_scalar){ + switch(operands){ + case 1: + o_inst->src0 = t_src_scalar(vp, &src[0]); + o_inst->src1 = UNUSED_SRC_0; + o_inst->src2 = UNUSED_SRC_1; + break; + + case 2: + o_inst->src0 = t_src_scalar(vp, &src[0]); + o_inst->src1 = t_src_scalar(vp, &src[1]); + o_inst->src2 = UNUSED_SRC_1; + break; + + case 3: + o_inst->src0 = t_src_scalar(vp, &src[0]); + o_inst->src1 = t_src_scalar(vp, &src[1]); + o_inst->src2 = t_src_scalar(vp, &src[2]); + break; + + default: + fprintf(stderr, "illegal number of operands %lu\n", operands); + exit(-1); + break; + } + } else { + switch(operands){ + case 1: + o_inst->src0 = t_src(vp, &src[0]); + o_inst->src1 = UNUSED_SRC_0; + o_inst->src2 = UNUSED_SRC_1; + break; + + case 2: + o_inst->src0 = t_src(vp, &src[0]); + o_inst->src1 = t_src(vp, &src[1]); + o_inst->src2 = UNUSED_SRC_1; + break; + + case 3: + o_inst->src0 = t_src(vp, &src[0]); + o_inst->src1 = t_src(vp, &src[1]); + o_inst->src2 = t_src(vp, &src[2]); + break; + + default: + fprintf(stderr, "illegal number of operands %lu\n", operands); + exit(-1); + break; + } + } + next: + if (mesa_vp->Base.NumNativeTemporaries < + (mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i))) { + mesa_vp->Base.NumNativeTemporaries = + mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i); + } + if (u_temp_i < mesa_vp->Base.NumTemporaries) { + if (R200_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_i); + } + return GL_FALSE; + } + u_temp_i = R200_VSF_MAX_TEMPS - 1; + if(o_inst - vp->instr >= R200_VSF_MAX_INST) { + mesa_vp->Base.NumNativeInstructions = 129; + if (R200_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "more than 128 native instructions\n"); + } + return GL_FALSE; + } + if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) { + vp->pos_end = (o_inst - vp->instr); + } + } + + vp->native = GL_TRUE; + mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr); +#if 0 + fprintf(stderr, "hw program:\n"); + for(i=0; i < vp->program.length; i++) + fprintf(stderr, "%08x\n", vp->instr[i]); +#endif + return GL_TRUE; +} + +void r200SetupVertexProg( GLcontext *ctx ) { + r200ContextPtr rmesa = R200_CONTEXT(ctx); + struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current; + GLboolean fallback; + GLint i; + + if (!vp->translated) { + rmesa->curr_vp_hw = NULL; + r200_translate_vertex_program(vp); + } + /* could optimize setting up vertex progs away for non-tcl hw */ + fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp) && + rmesa->r200Screen->drmSupportsVertexProgram); + TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback); + if (rmesa->TclFallback) return; + + R200_STATECHANGE( rmesa, vap ); + /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it? + maybe only when using more than 64 inst / 96 param? */ + rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE /*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/; + + R200_STATECHANGE( rmesa, pvs ); + + rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) | + ((vp->mesa_program.Base.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) | + (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT); + rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) | + (vp->mesa_program.Base.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT); + + /* maybe user clip planes just work with vertex progs... untested */ + if (ctx->Transform.ClipPlanesEnabled) { + R200_STATECHANGE( rmesa, tcl ); + if (vp->mesa_program.IsPositionInvariant) { + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2); + } + else { + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc); + } + } + + if (vp != rmesa->curr_vp_hw) { + GLuint count = vp->mesa_program.Base.NumNativeInstructions; + drm_radeon_cmd_header_t tmp; + + R200_STATECHANGE( rmesa, vpi[0] ); + R200_STATECHANGE( rmesa, vpi[1] ); + + /* FIXME: what about using a memcopy... */ + for (i = 0; (i < 64) && i < count; i++) { + rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op; + rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0; + rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1; + rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2; + } + /* hack up the cmd_size so not the whole state atom is emitted always. + This may require some more thought, we may emit half progs on lost state, but + hopefully it won't matter? + WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected) + packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */ + rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count); + tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0]; + tmp.veclinear.count = (count > 64) ? 64 : count; + rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i; + if (count > 64) { + for (i = 0; i < (count - 64); i++) { + rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op; + rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0; + rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1; + rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2; + } + rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64); + tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0]; + tmp.veclinear.count = count - 64; + rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i; + } + rmesa->curr_vp_hw = vp; + } +} + + +static void +r200BindProgram(GLcontext *ctx, GLenum target, struct gl_program *prog) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + switch(target){ + case GL_VERTEX_PROGRAM_ARB: + rmesa->curr_vp_hw = NULL; + break; + default: + _mesa_problem(ctx, "Target not supported yet!"); + break; + } +} + +static struct gl_program * +r200NewProgram(GLcontext *ctx, GLenum target, GLuint id) +{ + struct r200_vertex_program *vp; + + switch(target){ + case GL_VERTEX_PROGRAM_ARB: + vp = CALLOC_STRUCT(r200_vertex_program); + return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id); + case GL_FRAGMENT_PROGRAM_ARB: + case GL_FRAGMENT_PROGRAM_NV: + return _mesa_init_fragment_program( ctx, CALLOC_STRUCT(gl_fragment_program), target, id ); + default: + _mesa_problem(ctx, "Bad target in r200NewProgram"); + } + return NULL; +} + + +static void +r200DeleteProgram(GLcontext *ctx, struct gl_program *prog) +{ + _mesa_delete_program(ctx, prog); +} + +static void +r200ProgramStringNotify(GLcontext *ctx, GLenum target, struct gl_program *prog) +{ + struct r200_vertex_program *vp = (void *)prog; + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + switch(target) { + case GL_VERTEX_PROGRAM_ARB: + vp->translated = GL_FALSE; +/* memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_vertex_program));*/ + r200_translate_vertex_program(vp); + rmesa->curr_vp_hw = NULL; + break; + } + /* need this for tcl fallbacks */ + _tnl_program_string(ctx, target, prog); +} + +static GLboolean +r200IsProgramNative(GLcontext *ctx, GLenum target, struct gl_program *prog) +{ + struct r200_vertex_program *vp = (void *)prog; + + switch(target){ + case GL_VERTEX_STATE_PROGRAM_NV: + case GL_VERTEX_PROGRAM_ARB: + if (!vp->translated) { + r200_translate_vertex_program(vp); + } + /* does not take parameters etc. into account */ + return vp->native; + default: + _mesa_problem(ctx, "Bad target in r200NewProgram"); + } + return 0; +} + +void r200InitShaderFuncs(struct dd_function_table *functions) +{ + functions->NewProgram = r200NewProgram; + functions->BindProgram = r200BindProgram; + functions->DeleteProgram = r200DeleteProgram; + functions->ProgramStringNotify = r200ProgramStringNotify; + functions->IsProgramNative = r200IsProgramNative; +} diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.h b/src/mesa/drivers/dri/r200/r200_vertprog.h new file mode 100644 index 00000000000..0fbe5eec7d9 --- /dev/null +++ b/src/mesa/drivers/dri/r200/r200_vertprog.h @@ -0,0 +1,161 @@ +#ifndef __VERTEX_SHADER_H__ +#define __VERTEX_SHADER_H__ + +#include "r200_reg.h" + +typedef struct { + uint32_t op; + uint32_t src0; + uint32_t src1; + uint32_t src2; +} VERTEX_SHADER_INSTRUCTION; + +extern void r200InitShaderFuncs(struct dd_function_table *functions); +extern void r200SetupVertexProg( GLcontext *ctx ); + +#define VSF_FLAG_X 1 +#define VSF_FLAG_Y 2 +#define VSF_FLAG_Z 4 +#define VSF_FLAG_W 8 +#define VSF_FLAG_XYZ (VSF_FLAG_X | VSF_FLAG_Y | VSF_FLAG_Z) +#define VSF_FLAG_ALL 0xf +#define VSF_FLAG_NONE 0 + +#define R200_VSF_MAX_INST 128 +#define R200_VSF_MAX_PARAM 192 +#define R200_VSF_MAX_TEMPS 12 + +#define R200_VPI_OUT_REG_INDEX_SHIFT 13 +#define R200_VPI_OUT_REG_INDEX_MASK (31 << 13) /* GUESS based on fglrx native limits */ + +#define R200_VPI_OUT_WRITE_X (1 << 20) +#define R200_VPI_OUT_WRITE_Y (1 << 21) +#define R200_VPI_OUT_WRITE_Z (1 << 22) +#define R200_VPI_OUT_WRITE_W (1 << 23) + +#define R200_VPI_IN_REG_CLASS_TEMPORARY (0 << 0) +#define R200_VPI_IN_REG_CLASS_ATTRIBUTE (1 << 0) +#define R200_VPI_IN_REG_CLASS_PARAMETER (2 << 0) +#define R200_VPI_IN_REG_CLASS_NONE (9 << 0) +#define R200_VPI_IN_REG_CLASS_MASK (31 << 0) /* GUESS */ + +#define R200_VPI_IN_REG_INDEX_SHIFT 5 +#define R200_VPI_IN_REG_INDEX_MASK (255 << 5) /* GUESS based on fglrx native limits */ + +/* The R200 can select components from the input register arbitrarily. +// Use the following constants, shifted by the component shift you +// want to select */ +#define R200_VPI_IN_SELECT_X 0 +#define R200_VPI_IN_SELECT_Y 1 +#define R200_VPI_IN_SELECT_Z 2 +#define R200_VPI_IN_SELECT_W 3 +#define R200_VPI_IN_SELECT_ZERO 4 +#define R200_VPI_IN_SELECT_ONE 5 +#define R200_VPI_IN_SELECT_MASK 7 + +#define R200_VPI_IN_X_SHIFT 13 +#define R200_VPI_IN_Y_SHIFT 16 +#define R200_VPI_IN_Z_SHIFT 19 +#define R200_VPI_IN_W_SHIFT 22 + +#define R200_VPI_IN_NEG_X (1 << 25) +#define R200_VPI_IN_NEG_Y (1 << 26) +#define R200_VPI_IN_NEG_Z (1 << 27) +#define R200_VPI_IN_NEG_W (1 << 28) + +#define R200_VSF_OUT_CLASS_TMP (0 << 8) +#define R200_VSF_OUT_CLASS_ADDR (3 << 8) +#define R200_VSF_OUT_CLASS_RESULT_POS (4 << 8) +#define R200_VSF_OUT_CLASS_RESULT_COLOR (5 << 8) +#define R200_VSF_OUT_CLASS_RESULT_TEXC (6 << 8) +#define R200_VSF_OUT_CLASS_RESULT_FOGC (7 << 8) +#define R200_VSF_OUT_CLASS_RESULT_POINTSIZE (8 << 8) +#define R200_VSF_OUT_CLASS_MASK (31 << 8) + +/* opcodes - they all are the same as on r300 it seems, however + LIT and POW require different setup */ +#define R200_VPI_OUT_OP_DOT (1 << 0) +#define R200_VPI_OUT_OP_MUL (2 << 0) +#define R200_VPI_OUT_OP_ADD (3 << 0) +#define R200_VPI_OUT_OP_MAD (4 << 0) +#define R200_VPI_OUT_OP_DST (5 << 0) +#define R200_VPI_OUT_OP_FRC (6 << 0) +#define R200_VPI_OUT_OP_MAX (7 << 0) +#define R200_VPI_OUT_OP_MIN (8 << 0) +#define R200_VPI_OUT_OP_SGE (9 << 0) +#define R200_VPI_OUT_OP_SLT (10 << 0) + +#define R200_VPI_OUT_OP_ARL (13 << 0) + +#define R200_VPI_OUT_OP_EXP (65 << 0) +#define R200_VPI_OUT_OP_LOG (66 << 0) + +#define R200_VPI_OUT_OP_LIT (68 << 0) +#define R200_VPI_OUT_OP_POW (69 << 0) +#define R200_VPI_OUT_OP_RCP (70 << 0) +#define R200_VPI_OUT_OP_RSQ (72 << 0) + +#define R200_VPI_OUT_OP_EX2 (75 << 0) +#define R200_VPI_OUT_OP_LG2 (76 << 0) + +#define R200_VPI_OUT_OP_MAD_2 (128 << 0) + +/* first CARD32 of an instruction */ + +/* possible operations: + DOT, MUL, ADD, MAD, FRC, MAX, MIN, SGE, SLT, EXP, LOG, LIT, POW, RCP, RSQ, EX2, + LG2, MAD_2, ARL */ + +#define MAKE_VSF_OP(op, out_reg, out_reg_fields) \ + ((op) | (out_reg) | ((out_reg_fields) << 20) ) + +#define VSF_IN_CLASS_TMP 0 +#define VSF_IN_CLASS_ATTR 1 +#define VSF_IN_CLASS_PARAM 2 +#define VSF_IN_CLASS_NONE 9 + +#define VSF_IN_COMPONENT_X 0 +#define VSF_IN_COMPONENT_Y 1 +#define VSF_IN_COMPONENT_Z 2 +#define VSF_IN_COMPONENT_W 3 +#define VSF_IN_COMPONENT_ZERO 4 +#define VSF_IN_COMPONENT_ONE 5 + +#define MAKE_VSF_SOURCE(in_reg_index, comp_x, comp_y, comp_z, comp_w, class, negate) \ + ( ((in_reg_index)<<R200_VPI_IN_REG_INDEX_SHIFT) \ + | ((comp_x)<<R200_VPI_IN_X_SHIFT) \ + | ((comp_y)<<R200_VPI_IN_Y_SHIFT) \ + | ((comp_z)<<R200_VPI_IN_Z_SHIFT) \ + | ((comp_w)<<R200_VPI_IN_W_SHIFT) \ + | ((negate)<<25) | ((class))) + +#define EASY_VSF_SOURCE(in_reg_index, comp_x, comp_y, comp_z, comp_w, class, negate) \ + MAKE_VSF_SOURCE(in_reg_index, \ + VSF_IN_COMPONENT_##comp_x, \ + VSF_IN_COMPONENT_##comp_y, \ + VSF_IN_COMPONENT_##comp_z, \ + VSF_IN_COMPONENT_##comp_w, \ + VSF_IN_CLASS_##class, VSF_FLAG_##negate) + +/* special sources: */ + +/* (1.0,1.0,1.0,1.0) vector (ATTR, plain ) */ +#define VSF_ATTR_UNITY(reg) EASY_VSF_SOURCE(reg, ONE, ONE, ONE, ONE, ATTR, NONE) +#define VSF_UNITY(reg) EASY_VSF_SOURCE(reg, ONE, ONE, ONE, ONE, NONE, NONE) + +/* contents of unmodified register */ +#define VSF_REG(reg) EASY_VSF_SOURCE(reg, X, Y, Z, W, ATTR, NONE) + +/* contents of unmodified parameter */ +#define VSF_PARAM(reg) EASY_VSF_SOURCE(reg, X, Y, Z, W, PARAM, NONE) + +/* contents of unmodified temporary register */ +#define VSF_TMP(reg) EASY_VSF_SOURCE(reg, X, Y, Z, W, TMP, NONE) + +/* components of ATTR register */ +#define VSF_ATTR_X(reg) EASY_VSF_SOURCE(reg, X, X, X, X, ATTR, NONE) +#define VSF_ATTR_Y(reg) EASY_VSF_SOURCE(reg, Y, Y, Y, Y, ATTR, NONE) +#define VSF_ATTR_Z(reg) EASY_VSF_SOURCE(reg, Z, Z, Z, Z, ATTR, NONE) +#define VSF_ATTR_W(reg) EASY_VSF_SOURCE(reg, W, W, W, W, ATTR, NONE) + +#endif diff --git a/src/mesa/drivers/dri/r200/r200_vtxfmt.c b/src/mesa/drivers/dri/r200/r200_vtxfmt.c index 673076d0605..d73fbbafd5f 100644 --- a/src/mesa/drivers/dri/r200/r200_vtxfmt.c +++ b/src/mesa/drivers/dri/r200/r200_vtxfmt.c @@ -57,6 +57,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/tnl.h" #include "tnl/t_context.h" #include "tnl/t_array_api.h" +#include "tnl/t_save_api.h" #include "dispatch.h" @@ -106,6 +107,12 @@ static void count_funcs( r200ContextPtr rmesa ) count_func( "FogCoordfvEXT", &rmesa->vb.dfn_cache.FogCoordfvEXT );*/ } +static void r200NewList( GLcontext *ctx, GLuint list, GLenum mode ) +{ + VFMT_FALLBACK( __FUNCTION__ ); + _tnl_NewList( ctx, list, mode ); + return; +} void r200_copy_to_current( GLcontext *ctx ) { @@ -395,6 +402,7 @@ static void VFMT_FALLBACK_OUTSIDE_BEGIN_END( const char *caller ) _tnl_wakeup_exec( ctx ); ctx->Driver.FlushVertices = r200FlushVertices; + ctx->Driver.NewList = _tnl_NewList; assert( rmesa->dma.flush == 0 ); rmesa->vb.fell_back = GL_TRUE; @@ -663,7 +671,9 @@ static GLboolean check_vtx_fmt( GLcontext *ctx ) GLuint count[R200_MAX_TEXTURE_UNITS]; if (rmesa->TclFallback || rmesa->vb.fell_back || ctx->CompileFlag || - (ctx->Fog.Enabled && (ctx->Fog.FogCoordinateSource == GL_FOG_COORD))) + (ctx->Fog.Enabled && (ctx->Fog.FogCoordinateSource == GL_FOG_COORD)) || + /* TODO: set tcl out fmt/compsel and reenable vtxfmt code */ + ctx->VertexProgram._Enabled) return GL_FALSE; if (ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) @@ -851,6 +861,7 @@ static void r200VtxfmtValidate( GLcontext *ctx ) _mesa_install_exec_vtxfmt( ctx, &rmesa->vb.vtxfmt ); ctx->Driver.FlushVertices = r200VtxFmtFlushVertices; + ctx->Driver.NewList = r200NewList; rmesa->vb.installed = GL_TRUE; } else if (R200_DEBUG & DEBUG_VFMT) @@ -865,6 +876,7 @@ static void r200VtxfmtValidate( GLcontext *ctx ) rmesa->dma.flush( rmesa ); _tnl_wakeup_exec( ctx ); ctx->Driver.FlushVertices = r200FlushVertices; + ctx->Driver.NewList =_tnl_NewList; rmesa->vb.installed = GL_FALSE; } } diff --git a/src/mesa/drivers/dri/r200/radeon_chipset.h b/src/mesa/drivers/dri/r200/radeon_chipset.h new file mode 120000 index 00000000000..eba99001ff8 --- /dev/null +++ b/src/mesa/drivers/dri/r200/radeon_chipset.h @@ -0,0 +1 @@ +../radeon/radeon_chipset.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r200/radeon_screen.h b/src/mesa/drivers/dri/r200/radeon_screen.h new file mode 120000 index 00000000000..23bb6bd4598 --- /dev/null +++ b/src/mesa/drivers/dri/r200/radeon_screen.h @@ -0,0 +1 @@ +../radeon/radeon_screen.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index f39f71584ce..ace9b667240 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -544,9 +544,9 @@ void r300InitCmdBuf(r300ContextPtr r300) size = 64*256; if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA)) { - fprintf(stderr, "sizeof(drm_r300_cmd_header_t)=%ld\n", + fprintf(stderr, "sizeof(drm_r300_cmd_header_t)=%u\n", sizeof(drm_r300_cmd_header_t)); - fprintf(stderr, "sizeof(drm_radeon_cmd_buffer_t)=%ld\n", + fprintf(stderr, "sizeof(drm_radeon_cmd_buffer_t)=%u\n", sizeof(drm_radeon_cmd_buffer_t)); fprintf(stderr, "Allocating %d bytes command buffer (max state is %d bytes)\n", @@ -603,7 +603,7 @@ void r300EmitBlit(r300ContextPtr rmesa, cmd[0].header.cmd_type = R300_CMD_PACKET3; cmd[0].header.pad0 = R300_CMD_PACKET3_RAW; - cmd[1].u = R200_CP_CMD_BITBLT_MULTI | (5 << 16); + cmd[1].u = R300_CP_CMD_BITBLT_MULTI | (5 << 16); cmd[2].u = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL | RADEON_GMC_DST_PITCH_OFFSET_CNTL | RADEON_GMC_BRUSH_NONE | @@ -635,30 +635,35 @@ void r300EmitWait(r300ContextPtr rmesa, GLuint flags) void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) { - int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; - int i; - LOCAL_VARS - - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __func__, nr, offset); - - start_packet3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz-1); - e32(nr); - for(i=0;i+1<nr;i+=2){ - e32( (rmesa->state.aos[i].aos_size << 0) - |(rmesa->state.aos[i].aos_stride << 8) - |(rmesa->state.aos[i+1].aos_size << 16) - |(rmesa->state.aos[i+1].aos_stride << 24) - ); - e32(rmesa->state.aos[i].aos_offset+offset*4*rmesa->state.aos[i].aos_stride); - e32(rmesa->state.aos[i+1].aos_offset+offset*4*rmesa->state.aos[i+1].aos_stride); - } - if(nr & 1){ - e32( (rmesa->state.aos[nr-1].aos_size << 0) - |(rmesa->state.aos[nr-1].aos_stride << 8) - ); - e32(rmesa->state.aos[nr-1].aos_offset+offset*4*rmesa->state.aos[nr-1].aos_stride); - } + int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; + int i; + int cmd_reserved = 0; + int cmd_written = 0; + drm_radeon_cmd_header_t *cmd = NULL; + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __func__, nr, offset); + + start_packet3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz-1); + e32(nr); + for(i=0;i+1<nr;i+=2){ + e32( (rmesa->state.aos[i].aos_size << 0) + |(rmesa->state.aos[i].aos_stride << 8) + |(rmesa->state.aos[i+1].aos_size << 16) + |(rmesa->state.aos[i+1].aos_stride << 24) + ); + e32(rmesa->state.aos[i].aos_offset + + offset*4*rmesa->state.aos[i].aos_stride); + e32(rmesa->state.aos[i+1].aos_offset + + offset*4*rmesa->state.aos[i+1].aos_stride); + } + if(nr & 1){ + e32( (rmesa->state.aos[nr-1].aos_size << 0) + |(rmesa->state.aos[nr-1].aos_stride << 8) + ); + e32(rmesa->state.aos[nr-1].aos_offset + + offset*4*rmesa->state.aos[nr-1].aos_stride); + } } diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index cadb27ba8b9..54eb081d055 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -48,6 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/tnl.h" #include "tnl/t_pipeline.h" +#include "tnl/t_vp_build.h" #include "drivers/common/driverfuncs.h" @@ -81,6 +82,7 @@ int hw_tcl_on=1; #define need_GL_EXT_secondary_color #define need_GL_EXT_blend_equation_separate #define need_GL_EXT_blend_func_separate +#define need_GL_EXT_gpu_program_parameters #define need_GL_NV_vertex_program #include "extension_helper.h" @@ -103,6 +105,7 @@ const struct dri_extension card_extensions[] = { {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions}, {"GL_EXT_blend_subtract", NULL}, // {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, + {"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions}, {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, {"GL_EXT_stencil_wrap", NULL}, {"GL_EXT_texture_edge_clamp", NULL}, @@ -274,9 +277,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, ctx->Const.MaxLineWidth = R300_LINESIZE_MAX; ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX; - if (hw_tcl_on) - ctx->_MaintainTnlProgram = GL_TRUE; - #ifdef USER_BUFFERS /* Needs further modifications */ #if 0 @@ -310,7 +310,8 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, _tnl_allow_vertex_fog(ctx, GL_TRUE); /* currently bogus data */ - ctx->Const.VertexProgram.MaxNativeInstructions=VSF_MAX_FRAGMENT_LENGTH; + ctx->Const.VertexProgram.MaxInstructions=VSF_MAX_FRAGMENT_LENGTH/4; + ctx->Const.VertexProgram.MaxNativeInstructions=VSF_MAX_FRAGMENT_LENGTH/4; ctx->Const.VertexProgram.MaxNativeAttribs=16; /* r420 */ ctx->Const.VertexProgram.MaxTemps=32; ctx->Const.VertexProgram.MaxNativeTemps=/*VSF_MAX_FRAGMENT_TEMPS*/32; @@ -325,11 +326,12 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, ctx->Const.FragmentProgram.MaxNativeInstructions = PFS_MAX_ALU_INST+PFS_MAX_TEX_INST; ctx->Const.FragmentProgram.MaxNativeTexIndirections = PFS_MAX_TEX_INDIRECT; ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */ + _tnl_ProgramCacheInit(ctx); ctx->_MaintainTexEnvProgram = GL_TRUE; driInitExtensions(ctx, card_extensions, GL_TRUE); - if (r300->radeon.glCtx->Mesa_DXTn) { + if (r300->radeon.glCtx->Mesa_DXTn && !driQueryOptionb (&r300->radeon.optionCache, "disable_s3tc")) { _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); _mesa_enable_extension( ctx, "GL_S3_s3tc" ); } @@ -337,6 +339,8 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); } + r300->disable_lowimpact_fallback = driQueryOptionb(&r300->radeon.optionCache, "disable_lowimpact_fallback"); + radeonInitSpanFuncs(ctx); r300InitCmdBuf(r300); r300InitState(r300); @@ -374,20 +378,38 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, static void r300FreeGartAllocations(r300ContextPtr r300) { - int i, ret, tries=0, done_age; + int i, ret, tries=0, done_age, in_use=0; drm_radeon_mem_free_t memfree; memfree.region = RADEON_MEM_REGION_GART; #ifdef USER_BUFFERS + for (i = r300->rmm->u_last; i > 0; i--) { + if (r300->rmm->u_list[i].ptr == NULL) { + continue; + } + + /* check whether this buffer is still in use */ + if (r300->rmm->u_list[i].pending) { + in_use++; + } + } + /* Cannot flush/lock if no context exists. */ + if (in_use) + r300FlushCmdBuf(r300, __FUNCTION__); + done_age = radeonGetAge((radeonContextPtr)r300); for (i = r300->rmm->u_last; i > 0; i--) { if (r300->rmm->u_list[i].ptr == NULL) { continue; } - - assert(r300->rmm->u_list[i].pending); + + /* check whether this buffer is still in use */ + if (!r300->rmm->u_list[i].pending) { + continue; + } + assert(r300->rmm->u_list[i].h_pending == 0); tries = 0; @@ -454,14 +476,17 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv) release_texture_heaps = (r300->radeon.glCtx->Shared->RefCount == 1); _swsetup_DestroyContext(r300->radeon.glCtx); + _tnl_ProgramCacheDestroy(r300->radeon.glCtx); _tnl_DestroyContext(r300->radeon.glCtx); _ac_DestroyContext(r300->radeon.glCtx); _swrast_DestroyContext(r300->radeon.glCtx); if (r300->dma.current.buf) { r300ReleaseDmaRegion(r300, &r300->dma.current, __FUNCTION__ ); +#ifndef USER_BUFFERS + r300FlushCmdBuf(r300, __FUNCTION__); +#endif } - r300FlushCmdBuf(r300, __FUNCTION__); r300FreeGartAllocations(r300); r300DestroyCmdBuf(r300); @@ -486,6 +511,13 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv) radeonCleanupContext(&r300->radeon); +#ifdef USER_BUFFERS + /* the memory manager might be accessed when Mesa frees the shared + * state, so don't destroy it earlier + */ + radeon_mm_destroy(r300); +#endif + /* free the option cache */ driDestroyOptionCache(&r300->radeon.optionCache); diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 7ff805fd5d3..df73cdedc11 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -131,7 +131,6 @@ struct r300_dma_region { int aos_offset; /* address in GART memory */ int aos_stride; /* distance between elements, in dwords */ int aos_size; /* number of components (1-4) */ - int aos_format; /* format of components */ int aos_reg; /* VAP register assignment */ }; @@ -208,6 +207,10 @@ struct r300_texture_env_state { GLenum envMode; }; + +/* The blit width for texture uploads + */ +#define R300_BLIT_WIDTH_BYTES 1024 #define R300_MAX_TEXTURE_UNITS 8 struct r300_texture_state { @@ -537,17 +540,6 @@ struct r300_stencilbuffer_state { }; -struct r300_vap_reg_state { - /* input register assigments */ - int i_coords; - int i_normal; - int i_color[2]; - int i_fog; - int i_tex[R300_MAX_TEXTURE_UNITS]; - int i_index; - int i_pointsize; - }; - /* Vertex shader state */ /* Perhaps more if we store programs in vmem? */ @@ -616,7 +608,7 @@ extern int hw_tcl_on; * Keeping them them seperate for now should ensure fixed pipeline keeps functioning properly. */ struct r300_vertex_program { - struct vertex_program mesa_program; /* Must be first */ + struct gl_vertex_program mesa_program; /* Must be first */ int translated; struct r300_vertex_shader_fragment program; @@ -670,7 +662,7 @@ struct r300_pfs_compile_state { }; struct r300_fragment_program { - struct fragment_program mesa_program; + struct gl_fragment_program mesa_program; GLcontext *ctx; GLboolean translated; @@ -723,6 +715,7 @@ struct r300_fragment_program { #define R300_MAX_AOS_ARRAYS 16 +#define AOS_FORMAT_USHORT 0 #define AOS_FORMAT_FLOAT 1 #define AOS_FORMAT_UBYTE 2 #define AOS_FORMAT_FLOAT_COLOR 3 @@ -769,7 +762,7 @@ struct r300_aos_rec { struct r300_state { struct r300_depthbuffer_state depth; struct r300_texture_state texture; - struct r300_vap_reg_state vap_reg; + int sw_tcl_inputs[VERT_ATTRIB_MAX]; struct r300_vertex_shader_state vertex_shader; struct r300_pfs_compile_state pfs_compile; struct r300_dma_region aos[R300_MAX_AOS_ARRAYS]; @@ -803,7 +796,7 @@ struct r300_context { struct r300_hw_state hw; struct r300_cmdbuf cmdbuf; struct r300_state state; - struct vertex_program *curr_vp; + struct gl_vertex_program *curr_vp; /* Vertex buffers */ @@ -831,6 +824,7 @@ struct r300_context { GLboolean texmicrotile; GLboolean span_dlocking; + GLboolean disable_lowimpact_fallback; }; struct r300_buffer_object { @@ -858,6 +852,8 @@ extern GLboolean r300CreateContext(const __GLcontextModes * glVisual, __DRIcontextPrivate * driContextPriv, void *sharedContextPrivate); +extern int r300_get_num_verts(r300ContextPtr rmesa, int num_verts, int prim); + void r300_translate_vertex_shader(struct r300_vertex_program *vp); extern void r300InitShaderFuncs(struct dd_function_table *functions); extern int r300VertexProgUpdateParams(GLcontext *ctx, struct r300_vertex_program *vp, float *dst); diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h index c7cb93d0ac2..1101a3545a0 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.h +++ b/src/mesa/drivers/dri/r300/r300_emit.h @@ -1,65 +1,64 @@ -#ifndef __EMIT_H__ -#define __EMIT_H__ +/* + * Copyright (C) 2005 Vladimir Dergachev. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Vladimir Dergachev <[email protected]> + * Nicolai Haehnle <[email protected]> + * Aapo Tahkola <[email protected]> + * Ben Skeggs <[email protected]> + * Jerome Glisse <[email protected]> + */ + +/* This files defines functions for accessing R300 hardware. + */ +#ifndef __R300_EMIT_H__ +#define __R300_EMIT_H__ + #include "glheader.h" #include "r300_context.h" #include "r300_cmdbuf.h" +#include "radeon_reg.h" -/* convenience macros */ -#define RADEON_CP_PACKET0 0x00000000 -#define RADEON_CP_PACKET1 0x40000000 -#define RADEON_CP_PACKET2 0x80000000 -#define RADEON_CP_PACKET3 0xC0000000 - -#define RADEON_CP_PACKET3_NOP 0xC0001000 -#define RADEON_CP_PACKET3_NEXT_CHAR 0xC0001900 +/* + * CP type-3 packets + */ #define RADEON_CP_PACKET3_UNK1B 0xC0001B00 -#define RADEON_CP_PACKET3_PLY_NEXTSCAN 0xC0001D00 -#define RADEON_CP_PACKET3_SET_SCISSORS 0xC0001E00 -#define RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM 0xC0002300 -#define RADEON_CP_PACKET3_LOAD_MICROCODE 0xC0002400 -#define RADEON_CP_PACKET3_WAIT_FOR_IDLE 0xC0002600 -#define RADEON_CP_PACKET3_3D_DRAW_VBUF 0xC0002800 -#define RADEON_CP_PACKET3_3D_DRAW_IMMD 0xC0002900 -#define RADEON_CP_PACKET3_3D_DRAW_INDX 0xC0002A00 -#define RADEON_CP_PACKET3_LOAD_PALETTE 0xC0002C00 #define RADEON_CP_PACKET3_INDX_BUFFER 0xC0003300 #define RADEON_CP_PACKET3_3D_DRAW_VBUF_2 0xC0003400 #define RADEON_CP_PACKET3_3D_DRAW_IMMD_2 0xC0003500 #define RADEON_CP_PACKET3_3D_DRAW_INDX_2 0xC0003600 #define RADEON_CP_PACKET3_3D_LOAD_VBPNTR 0xC0002F00 -#define RADEON_CP_PACKET3_CNTL_PAINT 0xC0009100 -#define RADEON_CP_PACKET3_CNTL_BITBLT 0xC0009200 -#define RADEON_CP_PACKET3_CNTL_SMALLTEXT 0xC0009300 -#define RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT 0xC0009400 -#define RADEON_CP_PACKET3_CNTL_POLYLINE 0xC0009500 -#define RADEON_CP_PACKET3_CNTL_POLYSCANLINES 0xC0009800 -#define RADEON_CP_PACKET3_CNTL_PAINT_MULTI 0xC0009A00 -#define RADEON_CP_PACKET3_CNTL_BITBLT_MULTI 0xC0009B00 -#define RADEON_CP_PACKET3_CNTL_TRANS_BITBLT 0xC0009C00 #define RADEON_CP_PACKET3_3D_CLEAR_ZMASK 0xC0003202 #define RADEON_CP_PACKET3_3D_CLEAR_CMASK 0xC0003802 #define RADEON_CP_PACKET3_3D_CLEAR_HIZ 0xC0003702 #define CP_PACKET0(reg, n) (RADEON_CP_PACKET0 | ((n)<<16) | ((reg)>>2)) -/* Glue to R300 Mesa driver */ -#define LOCAL_VARS int cmd_reserved=0;\ - int cmd_written=0; \ - drm_radeon_cmd_header_t *cmd=NULL; - -#define PREFIX_VOID r300ContextPtr rmesa - -#define PREFIX PREFIX_VOID , - -#define PASS_PREFIX_VOID rmesa -#define PASS_PREFIX rmesa , - -typedef GLuint CARD32; - -/* This files defines functions for accessing R300 hardware. - It needs to be customized to whatever code r300_lib.c is used - in */ - void static inline check_space(int dwords) { } @@ -127,156 +126,167 @@ static __inline__ uint32_t cmdpacify(void) return cmd.u; } -/* Prepare to write a register value to register at address reg. - If num_extra > 0 then the following extra values are written - to registers with address +4, +8 and so on.. */ -#define reg_start(reg, num_extra) \ - { \ - int _n; \ - _n=(num_extra); \ - cmd=(drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, \ - (_n+2), \ - __FUNCTION__); \ - cmd_reserved=_n+2; \ - cmd_written=1; \ - cmd[0].i=cmdpacket0((reg), _n+1); \ - } - -/* Prepare to write a register value to register at address reg. - If num_extra > 0 then the following extra values are written - into the same register. */ -/* It is here to permit r300_lib to compile and link anyway, but - complain if actually called */ -#define reg_start_pump(reg, num_extra) \ - { \ - fprintf(stderr, "I am not defined.. Error ! in %s::%s at line %d\n", \ - __FILE__, __FUNCTION__, __LINE__); \ - exit(-1); \ - } - -/* Emit CARD32 freestyle*/ -#define e32(dword) { \ - if(cmd_written<cmd_reserved){\ - cmd[cmd_written].i=(dword); \ - cmd_written++; \ - } else { \ - fprintf(stderr, "e32 but no previous packet declaration.. Aborting! in %s::%s at line %d, cmd_written=%d cmd_reserved=%d\n", \ - __FILE__, __FUNCTION__, __LINE__, cmd_written, cmd_reserved); \ - exit(-1); \ - } \ - } +/** + * Prepare to write a register value to register at address reg. + * If num_extra > 0 then the following extra values are written + * to registers with address +4, +8 and so on.. + */ +#define reg_start(reg, num_extra) \ + do { \ + int _n; \ + _n=(num_extra); \ + cmd = (drm_radeon_cmd_header_t*) \ + r300AllocCmdBuf(rmesa, \ + (_n+2), \ + __func__); \ + cmd_reserved=_n+2; \ + cmd_written=1; \ + cmd[0].i=cmdpacket0((reg), _n+1); \ + } while (0); + +/** + * Emit GLuint freestyle + */ +#define e32(dword) \ + do { \ + if(cmd_written<cmd_reserved) { \ + cmd[cmd_written].i=(dword); \ + cmd_written++; \ + } else { \ + fprintf(stderr, \ + "e32 but no previous packet " \ + "declaration.\n" \ + "Aborting! in %s::%s at line %d, " \ + "cmd_written=%d cmd_reserved=%d\n", \ + __FILE__, __FUNCTION__, __LINE__, \ + cmd_written, cmd_reserved); \ + exit(-1); \ + } \ + } while(0); #define efloat(f) e32(r300PackFloat32(f)) -#define vsf_start_fragment(dest, length) \ - { \ - int _n; \ - _n=(length); \ - cmd=(drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, \ - (_n+1), \ - __FUNCTION__); \ - cmd_reserved=_n+2; \ - cmd_written=1; \ - cmd[0].i=cmdvpu((dest), _n/4); \ - } - -#define start_packet3(packet, count) \ - { \ - int _n; \ - CARD32 _p; \ - _n=(count); \ - _p=(packet); \ - cmd=(drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, \ - (_n+3), \ - __FUNCTION__); \ - cmd_reserved=_n+3; \ - cmd_written=2; \ - if(_n>0x3fff) {\ - fprintf(stderr,"Too big packet3 %08x: cannot store %d dwords\n", \ - _p, _n); \ - exit(-1); \ - } \ - cmd[0].i=cmdpacket3(R300_CMD_PACKET3_RAW); \ - cmd[1].i=_p | ((_n & 0x3fff)<<16); \ +#define vsf_start_fragment(dest, length) \ + do { \ + int _n; \ + _n = (length); \ + cmd = (drm_radeon_cmd_header_t*) \ + r300AllocCmdBuf(rmesa, \ + (_n+1), \ + __func__); \ + cmd_reserved = _n+2; \ + cmd_written =1; \ + cmd[0].i = cmdvpu((dest), _n/4); \ + } while (0); + +#define start_packet3(packet, count) \ + { \ + int _n; \ + GLuint _p; \ + _n = (count); \ + _p = (packet); \ + cmd = (drm_radeon_cmd_header_t*) \ + r300AllocCmdBuf(rmesa, \ + (_n+3), \ + __func__); \ + cmd_reserved = _n+3; \ + cmd_written = 2; \ + if(_n > 0x3fff) { \ + fprintf(stderr,"Too big packet3 %08x: cannot " \ + "store %d dwords\n", \ + _p, _n); \ + exit(-1); \ + } \ + cmd[0].i = cmdpacket3(R300_CMD_PACKET3_RAW); \ + cmd[1].i = _p | ((_n & 0x3fff)<<16); \ } - /* must be sent to switch to 2d commands */ - -void static inline end_3d(PREFIX_VOID) +/** + * Must be sent to switch to 2d commands + */ +void static inline end_3d(r300ContextPtr rmesa) { -LOCAL_VARS -(void)cmd_reserved; (void)cmd_written; + drm_radeon_cmd_header_t *cmd = NULL; -cmd=(drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, \ - 1, \ - __FUNCTION__); \ - -cmd[0].header.cmd_type=R300_CMD_END3D; + cmd = (drm_radeon_cmd_header_t*)r300AllocCmdBuf(rmesa, + 1, + __FUNCTION__); + cmd[0].header.cmd_type=R300_CMD_END3D; } -void static inline cp_delay(PREFIX unsigned short count) +void static inline cp_delay(r300ContextPtr rmesa, unsigned short count) { -LOCAL_VARS -(void)cmd_reserved; (void)cmd_written; - -cmd=(drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, \ - 1, \ - __FUNCTION__); \ + drm_radeon_cmd_header_t *cmd = NULL; -cmd[0].i=cmdcpdelay(count); + cmd = (drm_radeon_cmd_header_t*)r300AllocCmdBuf(rmesa, + 1, + __FUNCTION__); + cmd[0].i=cmdcpdelay(count); } -void static inline cp_wait(PREFIX unsigned char flags) +void static inline cp_wait(r300ContextPtr rmesa, unsigned char flags) { -LOCAL_VARS -(void)cmd_reserved; (void)cmd_written; - -cmd=(drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, \ - 1, \ - __FUNCTION__); \ + drm_radeon_cmd_header_t *cmd = NULL; -cmd[0].i=cmdwait(flags); + cmd = (drm_radeon_cmd_header_t*)r300AllocCmdBuf(rmesa, + 1, + __FUNCTION__); + cmd[0].i = cmdwait(flags); } -/* fire vertex buffer */ -static void inline fire_AOS(PREFIX int vertex_count, int type) +/** + * fire vertex buffer + */ +static void inline fire_AOS(r300ContextPtr rmesa, int vertex_count, int type) { -LOCAL_VARS -check_space(9); - -start_packet3(RADEON_CP_PACKET3_3D_DRAW_VBUF_2, 0); -/* e32(0x840c0024); */ - e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count<<16) | type); + int cmd_reserved = 0; + int cmd_written = 0; + drm_radeon_cmd_header_t *cmd = NULL; + check_space(9); + + start_packet3(RADEON_CP_PACKET3_3D_DRAW_VBUF_2, 0); +#ifdef NOTNEEDED_ANYMORE + e32(0x840c0024); +#endif + e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | + (vertex_count<<16) | type); } -/* these are followed by the corresponding data */ -#define start_index32_packet(vertex_count, type) \ - {\ - int _vc;\ - _vc=(vertex_count); \ - start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, _vc); \ - e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (_vc<<16) | type \ - | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); \ - } - -#define start_index16_packet(vertex_count, type) \ - {\ - int _vc, _n;\ - _vc=(vertex_count); \ - _n=(vertex_count+1)>>1; \ - start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, _n); \ - e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (_vc<<16) | type); \ - } - -/* Interestingly enough this ones needs the call to setup_AOS, even thought - some of the data so setup is not needed and some is not as arbitrary - as when used by DRAW_VBUF_2 or DRAW_INDX_2 */ -#define start_immediate_packet(vertex_count, type, vertex_size) \ - {\ - int _vc; \ - _vc=(vertex_count); \ - start_packet3(RADEON_CP_PACKET3_3D_DRAW_IMMD_2, _vc*(vertex_size)); \ - e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (_vc<<16) | type); \ - } +/** + * These are followed by the corresponding data + */ +#define start_index32_packet(vertex_count, type) \ + do { \ + int _vc; \ + _vc = (vertex_count); \ + start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, _vc); \ + e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (_vc<<16) | \ + type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); \ + } while (0); + +#define start_index16_packet(vertex_count, type) \ + do { \ + int _vc, _n; \ + _vc = (vertex_count); \ + _n = (vertex_count+1)>>1; \ + start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, _n); \ + e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (_vc<<16) | \ + type); \ + } while (0); + +/** + * Interestingly enough this ones needs the call to setup_AOS, even thought + * some of the data so setup is not needed and some is not as arbitrary + * as when used by DRAW_VBUF_2 or DRAW_INDX_2 + */ +#define start_immediate_packet(vertex_count, type, vertex_size) \ + do { \ + int _vc; \ + _vc = (vertex_count); \ + start_packet3(RADEON_CP_PACKET3_3D_DRAW_IMMD_2, \ + _vc*(vertex_size)); \ + e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | \ + (_vc<<16) | type); \ + } while (0); #endif diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index e045f0c6a1e..2d947dea3af 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -529,7 +529,7 @@ static pfs_reg_t t_src(struct r300_fragment_program *rp, } /* no point swizzling ONE/ZERO/HALF constants... */ - if (r.v_swz < SWIZZLE_111 && r.s_swz < SWIZZLE_ZERO) + if (r.v_swz < SWIZZLE_111 || r.s_swz < SWIZZLE_ZERO) r = do_swizzle(rp, r, fpsrc.Swizzle, fpsrc.NegateBase); #if 0 /* WRONG! Need to be able to do individual component negation, @@ -1018,7 +1018,7 @@ static void emit_arith(struct r300_fragment_program *rp, int op, #if 0 static pfs_reg_t get_attrib(struct r300_fragment_program *rp, GLuint attr) { - struct fragment_program *mp = &rp->mesa_program; + struct gl_fragment_program *mp = &rp->mesa_program; pfs_reg_t r = undef; if (!(mp->Base.InputsRead & (1<<attr))) { @@ -1035,7 +1035,7 @@ static pfs_reg_t get_attrib(struct r300_fragment_program *rp, GLuint attr) static GLboolean parse_program(struct r300_fragment_program *rp) { - struct fragment_program *mp = &rp->mesa_program; + struct gl_fragment_program *mp = &rp->mesa_program; const struct prog_instruction *inst = mp->Base.Instructions; struct prog_instruction *fpi; pfs_reg_t src[3], dest, temp; @@ -1355,7 +1355,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) static void init_program(struct r300_fragment_program *rp) { struct r300_pfs_compile_state *cs = NULL; - struct fragment_program *mp = &rp->mesa_program; + struct gl_fragment_program *mp = &rp->mesa_program; struct prog_instruction *fpi; GLuint InputsRead = mp->Base.InputsRead; GLuint temps_used = 0; /* for rp->temps[] */ @@ -1467,7 +1467,7 @@ static void init_program(struct r300_fragment_program *rp) static void update_params(struct r300_fragment_program *rp) { - struct fragment_program *mp = &rp->mesa_program; + struct gl_fragment_program *mp = &rp->mesa_program; int i; /* Ask Mesa nicely to fill in ParameterValues for us */ diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h index a6c93bd3607..e7dbaf973e2 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.h +++ b/src/mesa/drivers/dri/r300/r300_fragprog.h @@ -1,3 +1,35 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs <[email protected]> + * Jerome Glisse <[email protected]> + */ #ifndef __R300_FRAGPROG_H_ #define __R300_FRAGPROG_H_ @@ -59,30 +91,27 @@ typedef struct r300_fragment_program_swizzle { #define SRC_MASK (63 << 0) #define SRC_STRIDE 6 -#define NOP_INST0 ( \ - (R300_FPI0_OUTC_MAD) | \ +#define NOP_INST0 ( \ + (R300_FPI0_OUTC_MAD) | \ (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG0C_SHIFT) | \ (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG1C_SHIFT) | \ (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG2C_SHIFT)) -#define NOP_INST1 ( \ +#define NOP_INST1 ( \ ((0 | SRC_CONST) << R300_FPI1_SRC0C_SHIFT) | \ ((0 | SRC_CONST) << R300_FPI1_SRC1C_SHIFT) | \ ((0 | SRC_CONST) << R300_FPI1_SRC2C_SHIFT)) #define NOP_INST2 ( \ - (R300_FPI2_OUTA_MAD) | \ + (R300_FPI2_OUTA_MAD) | \ (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG0A_SHIFT) | \ (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG1A_SHIFT) | \ (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG2A_SHIFT)) -#define NOP_INST3 ( \ +#define NOP_INST3 ( \ ((0 | SRC_CONST) << R300_FPI3_SRC0A_SHIFT) | \ ((0 | SRC_CONST) << R300_FPI3_SRC1A_SHIFT) | \ ((0 | SRC_CONST) << R300_FPI3_SRC2A_SHIFT)) -#include "r300_context.h" - struct r300_fragment_program; extern void r300_translate_fragment_shader(struct r300_fragment_program *rp); #endif - diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 159285962d2..d0d2def8648 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -65,12 +65,12 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; GLuint cboffset, cbpitch; drm_r300_cmd_header_t* cmd2; -#ifdef CB_DPATH - r300ContextPtr rmesa=r300; - LOCAL_VARS; -#else + int cmd_reserved = 0; + int cmd_written = 0; + drm_radeon_cmd_header_t *cmd = NULL; r300ContextPtr rmesa=r300; - LOCAL_VARS; + +#ifndef CB_DPATH int i; #endif @@ -218,32 +218,18 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) r300->hw.vpi.cmd[8] = 0; R300_STATECHANGE(r300, zs); + r300->hw.zs.cmd[R300_ZS_CNTL_0] = 0; + r300->hw.zs.cmd[R300_ZS_CNTL_1] = 0; if (flags & CLEARBUFFER_DEPTH) { - r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_RB3D_STENCIL_ENABLE; - r300->hw.zs.cmd[R300_ZS_CNTL_0] |= 0x6; // test and write - r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT); + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_WRITE_ONLY; r300->hw.zs.cmd[R300_ZS_CNTL_1] |= (R300_ZS_ALWAYS<<R300_RB3D_ZS1_DEPTH_FUNC_SHIFT); -/* - R300_STATECHANGE(r300, zb); - r300->hw.zb.cmd[R300_ZB_OFFSET] = - 1024*4*300 + - r300->radeon.radeonScreen->frontOffset + - r300->radeon.radeonScreen->fbLocation; - r300->hw.zb.cmd[R300_ZB_PITCH] = - r300->radeon.radeonScreen->depthPitch; -*/ } else { - r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_RB3D_STENCIL_ENABLE; r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_DISABLED_1; // disable - r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT); } R300_STATECHANGE(r300, zs); if (flags & CLEARBUFFER_STENCIL) { - r300->hw.zs.cmd[R300_ZS_CNTL_0] &= ~R300_RB3D_STENCIL_ENABLE; r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_STENCIL_ENABLE; - r300->hw.zs.cmd[R300_ZS_CNTL_1] &= - ~((R300_ZS_MASK << R300_RB3D_ZS1_FRONT_FUNC_SHIFT) | (R300_ZS_MASK << R300_RB3D_ZS1_BACK_FUNC_SHIFT)); r300->hw.zs.cmd[R300_ZS_CNTL_1] |= (R300_ZS_ALWAYS<<R300_RB3D_ZS1_FRONT_FUNC_SHIFT) | (R300_ZS_REPLACE<<R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT) | @@ -263,7 +249,7 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) #else #if 1 cp_wait(r300, R300_WAIT_3D | R300_WAIT_3D_CLEAN); - end_3d(PASS_PREFIX_VOID); + end_3d(rmesa); #endif R300_STATECHANGE(r300, cb); @@ -299,37 +285,18 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) { uint32_t t1, t2; - t1 = r300->hw.zs.cmd[R300_ZS_CNTL_0]; - t2 = r300->hw.zs.cmd[R300_ZS_CNTL_1]; + t1 = 0x0; + t2 = 0x0; if (flags & CLEARBUFFER_DEPTH) { - t1 &= R300_RB3D_STENCIL_ENABLE; - t1 |= 0x6; // test and write - - t2 &= ~(R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT); - t2 |= (R300_ZS_ALWAYS<<R300_RB3D_ZS1_DEPTH_FUNC_SHIFT); -/* - R300_STATECHANGE(r300, zb); - r300->hw.zb.cmd[R300_ZB_OFFSET] = - 1024*4*300 + - r300->radeon.radeonScreen->frontOffset + - r300->radeon.radeonScreen->fbLocation; - r300->hw.zb.cmd[R300_ZB_PITCH] = - r300->radeon.radeonScreen->depthPitch; -*/ + t1 |= R300_RB3D_Z_WRITE_ONLY; + t2 |= (R300_ZS_ALWAYS << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT); } else { - t1 &= R300_RB3D_STENCIL_ENABLE; t1 |= R300_RB3D_Z_DISABLED_1; // disable - - t2 &= ~(R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT); } if (flags & CLEARBUFFER_STENCIL) { - t1 &= ~R300_RB3D_STENCIL_ENABLE; t1 |= R300_RB3D_STENCIL_ENABLE; - - t2 &= - ~((R300_ZS_MASK << R300_RB3D_ZS1_FRONT_FUNC_SHIFT) | (R300_ZS_MASK << R300_RB3D_ZS1_BACK_FUNC_SHIFT)); t2 |= (R300_ZS_ALWAYS<<R300_RB3D_ZS1_FRONT_FUNC_SHIFT) | (R300_ZS_REPLACE<<R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT) | @@ -378,7 +345,9 @@ static void r300EmitClearState(GLcontext * ctx) r300ContextPtr rmesa=r300; __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; int i; - LOCAL_VARS; + int cmd_reserved = 0; + int cmd_written = 0; + drm_radeon_cmd_header_t *cmd = NULL; R300_STATECHANGE(r300, vir[0]); @@ -875,9 +844,6 @@ GLuint r300GetMemoryOffsetMESA(__DRInativeDisplay * dpy, int scrn, if (!r300IsGartMemory(rmesa, pointer, 0)) return ~0; - if (rmesa->radeon.dri.drmMinor < 6) - return ~0; - card_offset = r300GartOffsetFromVirtual(rmesa, pointer); return card_offset - rmesa->radeon.radeonScreen->gart_base; diff --git a/src/mesa/drivers/dri/r300/r300_maos.c b/src/mesa/drivers/dri/r300/r300_maos.c index 6b8365e6d9e..2fdad519fd5 100644 --- a/src/mesa/drivers/dri/r300/r300_maos.c +++ b/src/mesa/drivers/dri/r300/r300_maos.c @@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "colormac.h" #include "imports.h" #include "macros.h" +#include "image.h" #include "swrast_setup/swrast_setup.h" #include "math/m_translate.h" @@ -54,6 +55,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_mm.h" #endif +#if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \ + SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \ + SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \ + SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \ + SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \ + SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE +#error Cannot change these! +#endif + #define DEBUG_ALL DEBUG_VERTS @@ -177,16 +187,6 @@ static void emit_vector(GLcontext * ctx, fprintf(stderr, "%s count %d size %d stride %d\n", __FUNCTION__, count, size, stride); - if(r300IsGartMemory(rmesa, data, /*(count-1)*stride */ 4)){ - rvb->address = data; - rvb->start = 0; - rvb->aos_offset = r300GartOffsetFromVirtual(rmesa, data); - rvb->aos_stride = stride / 4 ; - - rvb->aos_size = size; - return; - } - /* Gets triggered when playing with future_hw_tcl_on ...*/ //assert(!rvb->buf); @@ -195,12 +195,10 @@ static void emit_vector(GLcontext * ctx, count = 1; rvb->aos_offset = GET_START(rvb); rvb->aos_stride = 0; - rvb->aos_size = size; } else { r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4); /* alignment? */ rvb->aos_offset = GET_START(rvb); rvb->aos_stride = size; - rvb->aos_size = size; } /* Emit the data @@ -252,315 +250,297 @@ void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts, int elt_siz memcpy(out, elts, n_elts * elt_size); } - /* Mesa assumes that all missing components are from (0, 0, 0, 1) */ -#define ALL_COMPONENTS ((R300_INPUT_ROUTE_SELECT_X<<R300_INPUT_ROUTE_X_SHIFT) \ - | (R300_INPUT_ROUTE_SELECT_Y<<R300_INPUT_ROUTE_Y_SHIFT) \ - | (R300_INPUT_ROUTE_SELECT_Z<<R300_INPUT_ROUTE_Z_SHIFT) \ - | (R300_INPUT_ROUTE_SELECT_W<<R300_INPUT_ROUTE_W_SHIFT)) - -#define ALL_DEFAULT ((R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_X_SHIFT) \ - | (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Y_SHIFT) \ - | (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Z_SHIFT) \ - | (R300_INPUT_ROUTE_SELECT_ONE<<R300_INPUT_ROUTE_W_SHIFT)) - +static GLuint t_type(struct dt *dt) +{ + switch (dt->type) { + case GL_UNSIGNED_BYTE: + return AOS_FORMAT_UBYTE; + + case GL_SHORT: + return AOS_FORMAT_USHORT; + + case GL_FLOAT: + return AOS_FORMAT_FLOAT; + + default: + assert(0); + break; + } + + return AOS_FORMAT_FLOAT; +} -static GLuint t_comps(GLuint aos_size) +static GLuint t_vir0_size(struct dt *dt) { - GLuint mask; - mask = (1 << (aos_size*3)) - 1; - return (ALL_COMPONENTS & mask) | (ALL_DEFAULT & ~mask); + switch (dt->type) { + case GL_UNSIGNED_BYTE: + return 4; + + case GL_SHORT: + return 7; + + case GL_FLOAT: + return dt->size - 1; + + default: + assert(0); + break; + } + + return 0; } -static GLuint fix_comps(GLuint dw, int fmt) -{ -#ifdef MESA_BIG_ENDIAN - if (fmt == 2) { - GLuint dw_temp = 0; +static GLuint t_aos_size(struct dt *dt) +{ + switch (dt->type) { + case GL_UNSIGNED_BYTE: + return 1; + + case GL_SHORT: + return 2; + + case GL_FLOAT: + return dt->size; + + default: + assert(0); + break; + } + + return 0; +} - dw_temp |= ((dw >> R300_INPUT_ROUTE_X_SHIFT) & R300_INPUT_ROUTE_SELECT_MASK) << R300_INPUT_ROUTE_W_SHIFT; - dw_temp |= ((dw >> R300_INPUT_ROUTE_Y_SHIFT) & R300_INPUT_ROUTE_SELECT_MASK) << R300_INPUT_ROUTE_Z_SHIFT; - dw_temp |= ((dw >> R300_INPUT_ROUTE_Z_SHIFT) & R300_INPUT_ROUTE_SELECT_MASK) << R300_INPUT_ROUTE_Y_SHIFT; - dw_temp |= ((dw >> R300_INPUT_ROUTE_W_SHIFT) & R300_INPUT_ROUTE_SELECT_MASK) << R300_INPUT_ROUTE_X_SHIFT; +static GLuint t_vir0(uint32_t *dst, struct dt *dt, int *inputs, GLint *tab, GLuint nr) +{ + GLuint i, dw; + + for (i = 0; i + 1 < nr; i += 2){ + dw = t_vir0_size(&dt[tab[i]]) | (inputs[tab[i]] << 8) | (t_type(&dt[tab[i]]) << 14); + dw |= (t_vir0_size(&dt[tab[i + 1]]) | (inputs[tab[i + 1]] << 8) | (t_type(&dt[tab[i + 1]]) << 14)) << 16; - return dw_temp; + if (i + 2 == nr) { + dw |= (1 << (13 + 16)); + } + dst[i >> 1] = dw; } -#endif /* MESA_BIG_ENDIAN */ - return dw; + + if (nr & 1) { + dw = t_vir0_size(&dt[tab[nr - 1]]) | (inputs[tab[nr - 1]] << 8) | (t_type(&dt[tab[nr - 1]]) << 14); + dw |= 1 << 13; + dst[nr >> 1] = dw; + } + + return (nr + 1) >> 1; } -/* Emit vertex data to GART memory (unless immediate mode) - * Route inputs to the vertex processor - */ - -void r300EmitArrays(GLcontext * ctx, GLboolean immd) +static GLuint t_swizzle(int swizzle[4]) { - r300ContextPtr rmesa = R300_CONTEXT(ctx); - r300ContextPtr r300 = rmesa; - struct radeon_vertex_buffer *VB = &rmesa->state.VB; - //struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; - GLuint nr = 0; - GLuint count = VB->Count; - GLuint dw; - GLuint vic_1 = 0; /* R300_VAP_INPUT_CNTL_1 */ - GLuint aa_vap_reg = 0; /* VAP register assignment */ - GLuint i; - DECLARE_RENDERINPUTS(inputs_bitset); - - RENDERINPUTS_ZERO( inputs_bitset ); - -#define CONFIGURE_AOS(r, f, v, sz, cn) { \ - if (RADEON_DEBUG & DEBUG_STATE) \ - fprintf(stderr, "Enabling "#v "\n"); \ - if (++nr >= R300_MAX_AOS_ARRAYS) { \ - fprintf(stderr, "Aieee! AOS array count exceeded!\n"); \ - exit(-1); \ - } \ - \ - if (hw_tcl_on == GL_FALSE) \ - rmesa->state.aos[nr-1].aos_reg = aa_vap_reg++; \ - rmesa->state.aos[nr-1].aos_format = f; \ - if (immd) { \ - rmesa->state.aos[nr-1].aos_size = 4; \ - rmesa->state.aos[nr-1].aos_stride = 4; \ - rmesa->state.aos[nr-1].aos_offset = 0; \ - } else { \ - emit_vector(ctx, \ - &rmesa->state.aos[nr-1], \ - v.data, \ - sz, \ - v.stride, \ - cn); \ - rmesa->state.vap_reg.r=rmesa->state.aos[nr-1].aos_reg; \ - } \ + return (swizzle[0] << R300_INPUT_ROUTE_X_SHIFT) | + (swizzle[1] << R300_INPUT_ROUTE_Y_SHIFT) | + (swizzle[2] << R300_INPUT_ROUTE_Z_SHIFT) | + (swizzle[3] << R300_INPUT_ROUTE_W_SHIFT); } - if (hw_tcl_on) { - GLuint InputsRead = CURRENT_VERTEX_SHADER(ctx)->Base.InputsRead; - struct r300_vertex_program *prog=(struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); - if (InputsRead & (1<<VERT_ATTRIB_POS)) { - RENDERINPUTS_SET( inputs_bitset, _TNL_ATTRIB_POS ); - rmesa->state.aos[nr++].aos_reg = prog->inputs[VERT_ATTRIB_POS]; - } - if (InputsRead & (1<<VERT_ATTRIB_NORMAL)) { - RENDERINPUTS_SET( inputs_bitset, _TNL_ATTRIB_NORMAL ); - rmesa->state.aos[nr++].aos_reg = prog->inputs[VERT_ATTRIB_NORMAL]; - } - if (InputsRead & (1<<VERT_ATTRIB_COLOR0)) { - RENDERINPUTS_SET( inputs_bitset, _TNL_ATTRIB_COLOR0 ); - rmesa->state.aos[nr++].aos_reg = prog->inputs[VERT_ATTRIB_COLOR0]; - } - if (InputsRead & (1<<VERT_ATTRIB_COLOR1)) { - RENDERINPUTS_SET( inputs_bitset, _TNL_ATTRIB_COLOR1 ); - rmesa->state.aos[nr++].aos_reg = prog->inputs[VERT_ATTRIB_COLOR1]; - } - if (InputsRead & (1<<VERT_ATTRIB_FOG)) { - RENDERINPUTS_SET( inputs_bitset, _TNL_ATTRIB_FOG ); - rmesa->state.aos[nr++].aos_reg = prog->inputs[VERT_ATTRIB_FOG]; - } - if(ctx->Const.MaxTextureUnits > 8) { /* Not sure if this can even happen... */ - fprintf(stderr, "%s: Cant handle that many inputs\n", __FUNCTION__); - exit(-1); - } - for (i=0;i<ctx->Const.MaxTextureUnits;i++) { - if (InputsRead & (1<<(VERT_ATTRIB_TEX0+i))) { - RENDERINPUTS_SET( inputs_bitset, _TNL_ATTRIB_TEX(i) ); - rmesa->state.aos[nr++].aos_reg = prog->inputs[VERT_ATTRIB_TEX0+i]; - } - } - nr = 0; - } else { - RENDERINPUTS_COPY( inputs_bitset, TNL_CONTEXT(ctx)->render_inputs_bitset ); +static GLuint t_vir1(uint32_t *dst, int swizzle[][4], GLuint nr) +{ + GLuint i; + + for (i = 0; i + 1 < nr; i += 2) { + dst[i >> 1] = t_swizzle(swizzle[i]) | R300_INPUT_ROUTE_ENABLE; + dst[i >> 1] |= (t_swizzle(swizzle[i + 1]) | R300_INPUT_ROUTE_ENABLE) << 16; } - RENDERINPUTS_COPY( rmesa->state.render_inputs_bitset, inputs_bitset ); + + if (nr & 1) + dst[nr >> 1] = t_swizzle(swizzle[nr - 1]) | R300_INPUT_ROUTE_ENABLE; + + return (nr + 1) >> 1; +} - if (RENDERINPUTS_TEST( inputs_bitset, _TNL_ATTRIB_POS )) { - CONFIGURE_AOS(i_coords, AOS_FORMAT_FLOAT, - VB->AttribPtr[VERT_ATTRIB_POS], - immd ? 4 : VB->AttribPtr[VERT_ATTRIB_POS].size, - count); +static GLuint t_emit_size(struct dt *dt) +{ + return dt->size; +} +static GLuint t_vic(GLcontext * ctx, GLuint InputsRead) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + GLuint i, vic_1 = 0; + + if (InputsRead & (1 << VERT_ATTRIB_POS)) vic_1 |= R300_INPUT_CNTL_POS; - } - - if (RENDERINPUTS_TEST( inputs_bitset, _TNL_ATTRIB_NORMAL )) { - CONFIGURE_AOS(i_normal, AOS_FORMAT_FLOAT, - VB->AttribPtr[VERT_ATTRIB_NORMAL], - immd ? 4 : VB->AttribPtr[VERT_ATTRIB_NORMAL].size, - count); - + + if (InputsRead & (1 << VERT_ATTRIB_NORMAL)) vic_1 |= R300_INPUT_CNTL_NORMAL; - } - - if (RENDERINPUTS_TEST( inputs_bitset, _TNL_ATTRIB_COLOR0 )) { - int emitsize=4; - - if (!immd) { - if (VB->AttribPtr[VERT_ATTRIB_COLOR0].size == 4 && - (VB->AttribPtr[VERT_ATTRIB_COLOR0].stride != 0 || - ((float*)VB->AttribPtr[VERT_ATTRIB_COLOR0].data)[3] != 1.0)) { - emitsize = 4; - } else { - emitsize = 3; - }//emitsize = VB->AttribPtr[VERT_ATTRIB_COLOR0].size; - } - if(VB->AttribPtr[VERT_ATTRIB_COLOR0].type == GL_UNSIGNED_BYTE) - emitsize = 1; - - CONFIGURE_AOS(i_color[0], VB->AttribPtr[VERT_ATTRIB_COLOR0].type == GL_UNSIGNED_BYTE ? AOS_FORMAT_UBYTE : AOS_FORMAT_FLOAT_COLOR, - VB->AttribPtr[VERT_ATTRIB_COLOR0], - immd ? 4 : emitsize, - count); + if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) vic_1 |= R300_INPUT_CNTL_COLOR; - } - - if (RENDERINPUTS_TEST( inputs_bitset, _TNL_ATTRIB_COLOR1 )) { - int emitsize=4; - - if (!immd) { - if (VB->AttribPtr[VERT_ATTRIB_COLOR1].size == 4 && - (VB->AttribPtr[VERT_ATTRIB_COLOR1].stride != 0 || - ((float*)VB->AttribPtr[VERT_ATTRIB_COLOR1].data)[3] != 1.0)) { - emitsize = 4; - } else { - emitsize = 3; - }//emitsize = VB->AttribPtr[VERT_ATTRIB_COLOR1].size; - } - if(VB->AttribPtr[VERT_ATTRIB_COLOR1].type == GL_UNSIGNED_BYTE) - emitsize = 1; - - CONFIGURE_AOS(i_color[1], VB->AttribPtr[VERT_ATTRIB_COLOR1].type == GL_UNSIGNED_BYTE ? AOS_FORMAT_UBYTE : AOS_FORMAT_FLOAT_COLOR, - VB->AttribPtr[VERT_ATTRIB_COLOR1], - immd ? 4 : VB->AttribPtr[VERT_ATTRIB_COLOR1].size, - count); - } - -#if 0 - if (RENDERINPUTS_TEST( inputs_bitset, _TNL_ATTRIB_FOG )) { - CONFIGURE_AOS( AOS_FORMAT_FLOAT, - VB->FogCoordPtr, - immd ? 4 : VB->FogCoordPtr->size, - count); - } -#endif - + r300->state.texture.tc_count = 0; - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - if (RENDERINPUTS_TEST( inputs_bitset, _TNL_ATTRIB_TEX(i) )) { - CONFIGURE_AOS(i_tex[i], AOS_FORMAT_FLOAT, - VB->AttribPtr[VERT_ATTRIB_TEX0+i], - immd ? 4 : VB->AttribPtr[VERT_ATTRIB_TEX0+i].size, - count); - - vic_1 |= R300_INPUT_CNTL_TC0 << i; + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) + if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) { r300->state.texture.tc_count++; - } - } - for(i=0; i < nr; i++) - if(r300->state.aos[i].aos_format == 2){ - assert(r300->state.aos[i].aos_size == 1); - r300->state.aos[i].aos_size=5; + vic_1 |= R300_INPUT_CNTL_TC0 << i; } -#define SHOW_INFO(n) do { \ - if (RADEON_DEBUG & DEBUG_ALL) { \ - fprintf(stderr, "RR[%d] - sz=%d, reg=%d, fmt=%d -- st=%d, of=0x%08x\n", \ - n, \ - r300->state.aos[n].aos_size, \ - r300->state.aos[n].aos_reg, \ - r300->state.aos[n].aos_format, \ - r300->state.aos[n].aos_stride, \ - r300->state.aos[n].aos_offset); \ - } \ -} while(0); - - /* setup INPUT_ROUTE */ - R300_STATECHANGE(r300, vir[0]); - for(i=0;i+1<nr;i+=2){ - SHOW_INFO(i) - SHOW_INFO(i+1) - dw=(r300->state.aos[i].aos_size-1) - | ((r300->state.aos[i].aos_reg)<<8) - | (r300->state.aos[i].aos_format<<14) - | (((r300->state.aos[i+1].aos_size-1) - | ((r300->state.aos[i+1].aos_reg)<<8) - | (r300->state.aos[i+1].aos_format<<14))<<16); - - if(i+2==nr){ - dw|=(1<<(13+16)); - } - r300->hw.vir[0].cmd[R300_VIR_CNTL_0+(i>>1)]=dw; - } - if(nr & 1){ - SHOW_INFO(nr-1) - dw=(r300->state.aos[nr-1].aos_size-1) - | (r300->state.aos[nr-1].aos_format<<14) - | ((r300->state.aos[nr-1].aos_reg)<<8) - | (1<<13); - r300->hw.vir[0].cmd[R300_VIR_CNTL_0+(nr>>1)]=dw; - //fprintf(stderr, "vir0 dw=%08x\n", dw); - } - /* Set the rest of INPUT_ROUTE_0 to 0 */ - //for(i=((count+1)>>1); i<8; i++)r300->hw.vir[0].cmd[R300_VIR_CNTL_0+i]=(0x0); - ((drm_r300_cmd_header_t*)r300->hw.vir[0].cmd)->packet0.count = (nr+1)>>1; - + return vic_1; +} - R300_STATECHANGE(r300, vir[1]); +/* Emit vertex data to GART memory + * Route inputs to the vertex processor + * This function should never return R300_FALLBACK_TCL when using software tcl. + */ - for(i=0; i < nr; i++) - if(r300->state.aos[i].aos_format == 2){ - assert(r300->state.aos[i].aos_size == 5); - r300->state.aos[i].aos_size=/*3*/4; /* XXX */ +int r300EmitArrays(GLcontext *ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + r300ContextPtr r300 = rmesa; + struct radeon_vertex_buffer *VB = &rmesa->state.VB; + GLuint nr; + GLuint count = VB->Count; + GLuint i; + GLuint InputsRead = 0, OutputsWritten = 0; + int *inputs = NULL; + GLint tab[VERT_ATTRIB_MAX]; + int swizzle[VERT_ATTRIB_MAX][4]; + + if (hw_tcl_on) { + struct r300_vertex_program *prog=(struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); + inputs = prog->inputs; + InputsRead = CURRENT_VERTEX_SHADER(ctx)->Base.InputsRead; + OutputsWritten = CURRENT_VERTEX_SHADER(ctx)->Base.OutputsWritten; + } else { + DECLARE_RENDERINPUTS(inputs_bitset); + inputs = r300->state.sw_tcl_inputs; + + RENDERINPUTS_COPY( inputs_bitset, TNL_CONTEXT(ctx)->render_inputs_bitset ); + + assert(RENDERINPUTS_TEST( inputs_bitset, _TNL_ATTRIB_POS )); + InputsRead |= 1 << VERT_ATTRIB_POS; + OutputsWritten |= 1 << VERT_RESULT_HPOS; + + assert(RENDERINPUTS_TEST( inputs_bitset, _TNL_ATTRIB_NORMAL ) == 0); + + assert(RENDERINPUTS_TEST( inputs_bitset, _TNL_ATTRIB_COLOR0 )); + InputsRead |= 1 << VERT_ATTRIB_COLOR0; + OutputsWritten |= 1 << VERT_RESULT_COL0; + + if (RENDERINPUTS_TEST( inputs_bitset, _TNL_ATTRIB_COLOR1 )) { + InputsRead |= 1 << VERT_ATTRIB_COLOR1; + OutputsWritten |= 1 << VERT_RESULT_COL1; } - for (i=0;i+1<nr;i+=2) { - /* do i first.. */ - dw = fix_comps(t_comps(r300->state.aos[i].aos_size), r300->state.aos[i].aos_format) | R300_INPUT_ROUTE_ENABLE; - /* i+1 */ - dw |= (fix_comps(t_comps(r300->state.aos[i+1].aos_size), r300->state.aos[i+1].aos_format) | R300_INPUT_ROUTE_ENABLE) << 16; + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) + if (RENDERINPUTS_TEST( inputs_bitset, _TNL_ATTRIB_TEX(i) )) { + InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i); + OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); + } + + for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) + if (InputsRead & (1 << i)) + inputs[i] = nr++; + else + inputs[i] = -1; - //fprintf(stderr, "vir1 dw=%08x\n", dw); - r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(i>>1)]=dw; + RENDERINPUTS_COPY( rmesa->state.render_inputs_bitset, inputs_bitset ); } - if (nr & 1) { - dw = fix_comps(t_comps(r300->state.aos[nr-1].aos_size), r300->state.aos[nr-1].aos_format) | R300_INPUT_ROUTE_ENABLE; + assert(InputsRead); + assert(OutputsWritten); + + for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) + if (InputsRead & (1 << i)) + tab[nr++] = i; + + if (nr > R300_MAX_AOS_ARRAYS) + return R300_FALLBACK_TCL; + + for (i = 0; i < nr; i++) { + int ci; + int comp_size, fix, found = 0; + + swizzle[i][0] = SWIZZLE_ZERO; + swizzle[i][1] = SWIZZLE_ZERO; + swizzle[i][2] = SWIZZLE_ZERO; + swizzle[i][3] = SWIZZLE_ONE; + + for (ci = 0; ci < VB->AttribPtr[tab[i]].size; ci++) + swizzle[i][ci] = ci; + +#if MESA_BIG_ENDIAN +#define SWAP_INT(a, b) do { \ + int __temp; \ + __temp = a;\ + a = b; \ + b = __temp; \ +} while (0) + + if (VB->AttribPtr[tab[i]].type == GL_UNSIGNED_BYTE) { + SWAP_INT(swizzle[i][0], swizzle[i][3]); + SWAP_INT(swizzle[i][1], swizzle[i][2]); + } +#endif /* MESA_BIG_ENDIAN */ + + if (r300IsGartMemory(rmesa, VB->AttribPtr[tab[i]].data, /*(count-1)*stride */ 4)) { + if (VB->AttribPtr[tab[i]].stride % 4) + return R300_FALLBACK_TCL; + + rmesa->state.aos[i].address = VB->AttribPtr[tab[i]].data; + rmesa->state.aos[i].start = 0; + rmesa->state.aos[i].aos_offset = r300GartOffsetFromVirtual(rmesa, VB->AttribPtr[tab[i]].data); + rmesa->state.aos[i].aos_stride = VB->AttribPtr[tab[i]].stride / 4; + + rmesa->state.aos[i].aos_size = t_emit_size(&VB->AttribPtr[tab[i]]); + } else { + /* TODO: emit_vector can only handle 4 byte vectors */ + if (VB->AttribPtr[tab[i]].type != GL_FLOAT) + return R300_FALLBACK_TCL; + + emit_vector(ctx, &rmesa->state.aos[i], VB->AttribPtr[tab[i]].data, + t_emit_size(&VB->AttribPtr[tab[i]]), VB->AttribPtr[tab[i]].stride, count); + } + + rmesa->state.aos[i].aos_size = t_aos_size(&VB->AttribPtr[tab[i]]); + + comp_size = _mesa_sizeof_type(VB->AttribPtr[tab[i]].type); - //fprintf(stderr, "vir1 dw=%08x\n", dw); - r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(nr>>1)]=dw; + for (fix = 0; fix <= 4 - VB->AttribPtr[tab[i]].size; fix++) { + if ((rmesa->state.aos[i].aos_offset - comp_size * fix) % 4) + continue; + + found = 1; + break; + } + + if (found) { + if (fix > 0) { + WARN_ONCE("Feeling lucky?\n"); + } + + rmesa->state.aos[i].aos_offset -= comp_size * fix; + + for (ci = 0; ci < VB->AttribPtr[tab[i]].size; ci++) + swizzle[i][ci] += fix; + } else { + WARN_ONCE("Cannot handle offset %x with stride %d, comp %d\n", + rmesa->state.aos[i].aos_offset, rmesa->state.aos[i].aos_stride, VB->AttribPtr[tab[i]].size); + return R300_FALLBACK_TCL; + } } + + /* setup INPUT_ROUTE */ + R300_STATECHANGE(r300, vir[0]); + ((drm_r300_cmd_header_t*)r300->hw.vir[0].cmd)->packet0.count = + t_vir0(&r300->hw.vir[0].cmd[R300_VIR_CNTL_0], VB->AttribPtr, inputs, tab, nr); - /* Set the rest of INPUT_ROUTE_1 to 0 */ - //for(i=((count+1)>>1); i<8; i++)r300->hw.vir[1].cmd[R300_VIR_CNTL_0+i]=0x0; - ((drm_r300_cmd_header_t*)r300->hw.vir[1].cmd)->packet0.count = (nr+1)>>1; + R300_STATECHANGE(r300, vir[1]); + ((drm_r300_cmd_header_t*)r300->hw.vir[1].cmd)->packet0.count = + t_vir1(&r300->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, nr); /* Set up input_cntl */ /* I don't think this is needed for vertex buffers, but it doesn't hurt anything */ R300_STATECHANGE(r300, vic); - r300->hw.vic.cmd[R300_VIC_CNTL_0]=0x5555; /* Hard coded value, no idea what it means */ - r300->hw.vic.cmd[R300_VIC_CNTL_1]=vic_1; - - for(i=0; i < nr; i++) - if(r300->state.aos[i].aos_format == 2){ - assert(r300->state.aos[i].aos_size == /*3*/4); /* XXX */ - r300->state.aos[i].aos_size=1; - } -#if 0 - r300->hw.vic.cmd[R300_VIC_CNTL_1]=0; - - if(RENDERINPUTS_TEST( r300->state.render_inputs_bitset, _TNL_ATTRIB_POS )) - r300->hw.vic.cmd[R300_VIC_CNTL_1]|=R300_INPUT_CNTL_POS; - - if(RENDERINPUTS_TEST( r300->state.render_inputs_bitset, _TNL_ATTRIB_NORMAL )) - r300->hw.vic.cmd[R300_VIC_CNTL_1]|=R300_INPUT_CNTL_NORMAL; - - if(RENDERINPUTS_TEST( r300->state.render_inputs_bitset, _TNL_ATTRIB_COLOR0 )) - r300->hw.vic.cmd[R300_VIC_CNTL_1]|=R300_INPUT_CNTL_COLOR; - - for(i=0;i < ctx->Const.MaxTextureUnits;i++) - if(RENDERINPUTS_TEST( r300->state.render_inputs_bitset, _TNL_ATTRIB_TEX(i) )) - r300->hw.vic.cmd[R300_VIC_CNTL_1]|=(R300_INPUT_CNTL_TC0<<i); -#endif + r300->hw.vic.cmd[R300_VIC_CNTL_0] = 0x5555; /* Hard coded value, no idea what it means */ + r300->hw.vic.cmd[R300_VIC_CNTL_1] = t_vic(ctx, InputsRead); /* Stage 3: VAP output */ @@ -568,41 +548,33 @@ void r300EmitArrays(GLcontext * ctx, GLboolean immd) r300->hw.vof.cmd[R300_VOF_CNTL_0]=0; r300->hw.vof.cmd[R300_VOF_CNTL_1]=0; - if (hw_tcl_on){ - GLuint OutputsWritten = CURRENT_VERTEX_SHADER(ctx)->Base.OutputsWritten; - if(OutputsWritten & (1<<VERT_RESULT_HPOS)) - r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; - if(OutputsWritten & (1<<VERT_RESULT_COL0)) - r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT; - if(OutputsWritten & (1<<VERT_RESULT_COL1)) - r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT; - /*if(OutputsWritten & (1<<VERT_RESULT_BFC0)) - r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT; - if(OutputsWritten & (1<<VERT_RESULT_BFC1)) - r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;*/ - //if(OutputsWritten & (1<<VERT_RESULT_FOGC)) - - if(OutputsWritten & (1<<VERT_RESULT_PSIZ)) - r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; - - for(i=0;i < ctx->Const.MaxTextureUnits;i++) - if(OutputsWritten & (1<<(VERT_RESULT_TEX0+i))) - r300->hw.vof.cmd[R300_VOF_CNTL_1] |= (4<<(3*i)); - } else { - if(RENDERINPUTS_TEST( inputs_bitset, _TNL_ATTRIB_POS )) - r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; - if(RENDERINPUTS_TEST( inputs_bitset, _TNL_ATTRIB_COLOR0 )) - r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT; - if(RENDERINPUTS_TEST( inputs_bitset, _TNL_ATTRIB_COLOR1 )) - r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT; - - for(i=0;i < ctx->Const.MaxTextureUnits;i++) - if(RENDERINPUTS_TEST( inputs_bitset, _TNL_ATTRIB_TEX(i) )) - r300->hw.vof.cmd[R300_VOF_CNTL_1]|=(4<<(3*i)); - } + if (OutputsWritten & (1 << VERT_RESULT_HPOS)) + r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + + if (OutputsWritten & (1 << VERT_RESULT_COL0)) + r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT; + + if (OutputsWritten & (1 << VERT_RESULT_COL1)) + r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT; + + /*if(OutputsWritten & (1 << VERT_RESULT_BFC0)) + r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT; + + if(OutputsWritten & (1 << VERT_RESULT_BFC1)) + r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;*/ + //if(OutputsWritten & (1 << VERT_RESULT_FOGC)) + + if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) + r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; + for(i=0;i < ctx->Const.MaxTextureUnits;i++) + if(OutputsWritten & (1 << (VERT_RESULT_TEX0 + i))) + r300->hw.vof.cmd[R300_VOF_CNTL_1] |= (4 << (3 * i)); + rmesa->state.aos_count = nr; + + return R300_FALLBACK_NONE; } #ifdef USER_BUFFERS @@ -621,8 +593,12 @@ void r300UseArrays(GLcontext * ctx) #ifdef HW_VBOS -#define USE_VBO(a) if (ctx->Array.a.BufferObj->Name && ctx->Array.a.Enabled) \ - radeon_mm_use(rmesa, ((struct r300_buffer_object *)ctx->Array.a.BufferObj)->id) +#define USE_VBO(a) \ + do { \ + if (ctx->Array.ArrayObj->a.BufferObj->Name \ + && ctx->Array.ArrayObj->a.Enabled) \ + radeon_mm_use(rmesa, ((struct r300_buffer_object *)ctx->Array.ArrayObj->a.BufferObj)->id); \ + } while(0) if (ctx->Array.ElementArrayBufferObj->Name && ctx->Array.ElementArrayBufferObj->OnCard) radeon_mm_use(rmesa, ((struct r300_buffer_object *)ctx->Array.ElementArrayBufferObj)->id); diff --git a/src/mesa/drivers/dri/r300/r300_maos.h b/src/mesa/drivers/dri/r300/r300_maos.h index 7235af6ef6b..679f1c25588 100644 --- a/src/mesa/drivers/dri/r300/r300_maos.h +++ b/src/mesa/drivers/dri/r300/r300_maos.h @@ -41,7 +41,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_context.h" extern void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts, int elt_size); -extern void r300EmitArrays(GLcontext * ctx, GLboolean immd); +extern int r300EmitArrays(GLcontext *ctx); #ifdef USER_BUFFERS void r300UseArrays(GLcontext * ctx); diff --git a/src/mesa/drivers/dri/r300/r300_program.h b/src/mesa/drivers/dri/r300/r300_program.h index 3defe106c2d..3210660df1c 100644 --- a/src/mesa/drivers/dri/r300/r300_program.h +++ b/src/mesa/drivers/dri/r300/r300_program.h @@ -145,6 +145,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ((arg1) << R300_FPI2_ARG1A_SHIFT) | \ ((arg2) << R300_FPI2_ARG2A_SHIFT)) -extern void debug_vp(GLcontext *ctx, struct vertex_program *vp); +extern void debug_vp(GLcontext *ctx, struct gl_vertex_program *vp); #endif /* __R300_PROGRAM_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 4afe4f239c2..f3d8fa60f5b 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -48,12 +48,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_MC_MISC__MC_GLOBW_FULL_LAT_SHIFT 28 /* -This file contains registers and constants for the R300. They have been -found mostly by examining command buffers captured using glxtest, as well -as by extrapolating some known registers and constants from the R200. - -I am fairly certain that they are correct unless stated otherwise in comments. -*/ + * This file contains registers and constants for the R300. They have been + * found mostly by examining command buffers captured using glxtest, as well + * as by extrapolating some known registers and constants from the R200. + * I am fairly certain that they are correct unless stated otherwise + * in comments. + */ #define R300_SE_VPORT_XSCALE 0x1D98 #define R300_SE_VPORT_XOFFSET 0x1D9C @@ -63,46 +63,48 @@ I am fairly certain that they are correct unless stated otherwise in comments. #define R300_SE_VPORT_ZOFFSET 0x1DAC -/* This register is written directly and also starts data section in many 3d CP_PACKET3's */ +/* This register is written directly and also starts data section + * in many 3d CP_PACKET3's + */ #define R300_VAP_VF_CNTL 0x2084 - -# define R300_VAP_VF_CNTL__PRIM_TYPE__SHIFT 0 -# define R300_VAP_VF_CNTL__PRIM_NONE (0<<0) -# define R300_VAP_VF_CNTL__PRIM_POINTS (1<<0) -# define R300_VAP_VF_CNTL__PRIM_LINES (2<<0) -# define R300_VAP_VF_CNTL__PRIM_LINE_STRIP (3<<0) -# define R300_VAP_VF_CNTL__PRIM_TRIANGLES (4<<0) -# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN (5<<0) -# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP (6<<0) -# define R300_VAP_VF_CNTL__PRIM_LINE_LOOP (12<<0) -# define R300_VAP_VF_CNTL__PRIM_QUADS (13<<0) -# define R300_VAP_VF_CNTL__PRIM_QUAD_STRIP (14<<0) -# define R300_VAP_VF_CNTL__PRIM_POLYGON (15<<0) - -# define R300_VAP_VF_CNTL__PRIM_WALK__SHIFT 4 - /* State based - direct writes to registers trigger vertex generation */ -# define R300_VAP_VF_CNTL__PRIM_WALK_STATE_BASED (0<<4) -# define R300_VAP_VF_CNTL__PRIM_WALK_INDICES (1<<4) -# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST (2<<4) -# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED (3<<4) - - /* I don't think I saw these three used.. */ -# define R300_VAP_VF_CNTL__COLOR_ORDER__SHIFT 6 -# define R300_VAP_VF_CNTL__TCL_OUTPUT_CTL_ENA__SHIFT 9 -# define R300_VAP_VF_CNTL__PROG_STREAM_ENA__SHIFT 10 - - /* index size - when not set the indices are assumed to be 16 bit */ -# define R300_VAP_VF_CNTL__INDEX_SIZE_32bit (1<<11) - /* number of vertices */ -# define R300_VAP_VF_CNTL__NUM_VERTICES__SHIFT 16 +# define R300_VAP_VF_CNTL__PRIM_TYPE__SHIFT 0 +# define R300_VAP_VF_CNTL__PRIM_NONE (0<<0) +# define R300_VAP_VF_CNTL__PRIM_POINTS (1<<0) +# define R300_VAP_VF_CNTL__PRIM_LINES (2<<0) +# define R300_VAP_VF_CNTL__PRIM_LINE_STRIP (3<<0) +# define R300_VAP_VF_CNTL__PRIM_TRIANGLES (4<<0) +# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN (5<<0) +# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP (6<<0) +# define R300_VAP_VF_CNTL__PRIM_LINE_LOOP (12<<0) +# define R300_VAP_VF_CNTL__PRIM_QUADS (13<<0) +# define R300_VAP_VF_CNTL__PRIM_QUAD_STRIP (14<<0) +# define R300_VAP_VF_CNTL__PRIM_POLYGON (15<<0) + +# define R300_VAP_VF_CNTL__PRIM_WALK__SHIFT 4 + /* State based - direct writes to registers trigger vertex + generation */ +# define R300_VAP_VF_CNTL__PRIM_WALK_STATE_BASED (0<<4) +# define R300_VAP_VF_CNTL__PRIM_WALK_INDICES (1<<4) +# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST (2<<4) +# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED (3<<4) + + /* I don't think I saw these three used.. */ +# define R300_VAP_VF_CNTL__COLOR_ORDER__SHIFT 6 +# define R300_VAP_VF_CNTL__TCL_OUTPUT_CTL_ENA__SHIFT 9 +# define R300_VAP_VF_CNTL__PROG_STREAM_ENA__SHIFT 10 + + /* index size - when not set the indices are assumed to be 16 bit */ +# define R300_VAP_VF_CNTL__INDEX_SIZE_32bit (1<<11) + /* number of vertices */ +# define R300_VAP_VF_CNTL__NUM_VERTICES__SHIFT 16 /* BEGIN: Wild guesses */ #define R300_VAP_OUTPUT_VTX_FMT_0 0x2090 # define R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT (1<<0) # define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT (1<<1) -# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT (1<<2) /* GUESS */ -# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT (1<<3) /* GUESS */ -# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT (1<<4) /* GUESS */ +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT (1<<2) /* GUESS */ +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT (1<<3) /* GUESS */ +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT (1<<4) /* GUESS */ # define R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT (1<<16) /* GUESS */ #define R300_VAP_OUTPUT_VTX_FMT_1 0x2094 @@ -114,7 +116,7 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_VAP_OUTPUT_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15 # define R300_VAP_OUTPUT_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18 # define R300_VAP_OUTPUT_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21 -/* END */ +/* END: Wild guesses */ #define R300_SE_VTE_CNTL 0x20b0 # define R300_VPORT_X_SCALE_ENA 0x00000001 @@ -130,29 +132,39 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_VTX_ST_DENORMALIZED 0x00001000 /* BEGIN: Vertex data assembly - lots of uncertainties */ + /* gap */ + +#define R300_VAP_CNTL 0x2140 +# define R300_VC_NO_SWAP (0 << 0) +# define R300_VC_16BIT_SWAP (1 << 0) +# define R300_VC_32BIT_SWAP (2 << 0) + +/* gap */ + /* Where do we get our vertex data? -// -// Vertex data either comes either from immediate mode registers or from -// vertex arrays. -// There appears to be no mixed mode (though we can force the pitch of -// vertex arrays to 0, effectively reusing the same element over and over -// again). -// -// Immediate mode is controlled by the INPUT_CNTL registers. I am not sure -// if these registers influence vertex array processing. -// -// Vertex arrays are controlled via the 3D_LOAD_VBPNTR packet3. -// -// In both cases, vertex attributes are then passed through INPUT_ROUTE. - -// Beginning with INPUT_ROUTE_0_0 is a list of WORDs that route vertex data -// into the vertex processor's input registers. -// The first word routes the first input, the second word the second, etc. -// The corresponding input is routed into the register with the given index. -// The list is ended by a word with INPUT_ROUTE_END set. -// -// Always set COMPONENTS_4 in immediate mode. */ + * + * Vertex data either comes either from immediate mode registers or from + * vertex arrays. + * There appears to be no mixed mode (though we can force the pitch of + * vertex arrays to 0, effectively reusing the same element over and over + * again). + * + * Immediate mode is controlled by the INPUT_CNTL registers. I am not sure + * if these registers influence vertex array processing. + * + * Vertex arrays are controlled via the 3D_LOAD_VBPNTR packet3. + * + * In both cases, vertex attributes are then passed through INPUT_ROUTE. + * + * Beginning with INPUT_ROUTE_0_0 is a list of WORDs that route vertex data + * into the vertex processor's input registers. + * The first word routes the first input, the second word the second, etc. + * The corresponding input is routed into the register with the given index. + * The list is ended by a word with INPUT_ROUTE_END set. + * + * Always set COMPONENTS_4 in immediate mode. + */ #define R300_VAP_INPUT_ROUTE_0_0 0x2150 # define R300_INPUT_ROUTE_COMPONENTS_1 (0 << 0) @@ -176,10 +188,12 @@ I am fairly certain that they are correct unless stated otherwise in comments. #define R300_VAP_INPUT_ROUTE_0_7 0x216C /* gap */ + /* Notes: -// - always set up to produce at least two attributes: -// if vertex program uses only position, fglrx will set normal, too -// - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal */ + * - always set up to produce at least two attributes: + * if vertex program uses only position, fglrx will set normal, too + * - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal. + */ #define R300_VAP_INPUT_CNTL_0 0x2180 # define R300_INPUT_CNTL_0_COLOR 0x00000001 #define R300_VAP_INPUT_CNTL_1 0x2184 @@ -196,12 +210,14 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_INPUT_CNTL_TC7 0x00020000 /* GUESS */ /* gap */ + /* Words parallel to INPUT_ROUTE_0; All words that are active in INPUT_ROUTE_0 -// are set to a swizzling bit pattern, other words are 0. -// -// In immediate mode, the pattern is always set to xyzw. In vertex array -// mode, the swizzling pattern is e.g. used to set zw components in texture -// coordinates with only tweo components. */ + * are set to a swizzling bit pattern, other words are 0. + * + * In immediate mode, the pattern is always set to xyzw. In vertex array + * mode, the swizzling pattern is e.g. used to set zw components in texture + * coordinates with only tweo components. + */ #define R300_VAP_INPUT_ROUTE_1_0 0x21E0 # define R300_INPUT_ROUTE_SELECT_X 0 # define R300_INPUT_ROUTE_SELECT_Y 1 @@ -210,11 +226,11 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_INPUT_ROUTE_SELECT_ZERO 4 # define R300_INPUT_ROUTE_SELECT_ONE 5 # define R300_INPUT_ROUTE_SELECT_MASK 7 -# define R300_INPUT_ROUTE_X_SHIFT 0 -# define R300_INPUT_ROUTE_Y_SHIFT 3 -# define R300_INPUT_ROUTE_Z_SHIFT 6 -# define R300_INPUT_ROUTE_W_SHIFT 9 -# define R300_INPUT_ROUTE_ENABLE (15 << 12) +# define R300_INPUT_ROUTE_X_SHIFT 0 +# define R300_INPUT_ROUTE_Y_SHIFT 3 +# define R300_INPUT_ROUTE_Z_SHIFT 6 +# define R300_INPUT_ROUTE_W_SHIFT 9 +# define R300_INPUT_ROUTE_ENABLE (15 << 12) #define R300_VAP_INPUT_ROUTE_1_1 0x21E4 #define R300_VAP_INPUT_ROUTE_1_2 0x21E8 #define R300_VAP_INPUT_ROUTE_1_3 0x21EC @@ -223,53 +239,64 @@ I am fairly certain that they are correct unless stated otherwise in comments. #define R300_VAP_INPUT_ROUTE_1_6 0x21F8 #define R300_VAP_INPUT_ROUTE_1_7 0x21FC -/* END */ +/* END: Vertex data assembly */ /* gap */ -/* BEGIN: Upload vertex program and data -// The programmable vertex shader unit has a memory bank of unknown size -// that can be written to in 16 byte units by writing the address into -// UPLOAD_ADDRESS, followed by data in UPLOAD_DATA (multiples of 4 DWORDs). -// -// Pointers into the memory bank are always in multiples of 16 bytes. -// -// The memory bank is divided into areas with fixed meaning. -// -// Starting at address UPLOAD_PROGRAM: Vertex program instructions. -// Native limits reported by drivers from ATI suggest size 256 (i.e. 4KB), -// whereas the difference between known addresses suggests size 512. -// -// Starting at address UPLOAD_PARAMETERS: Vertex program parameters. -// Native reported limits and the VPI layout suggest size 256, whereas -// difference between known addresses suggests size 512. -// -// At address UPLOAD_POINTSIZE is a vector (0, 0, ps, 0), where ps is the -// floating point pointsize. The exact purpose of this state is uncertain, -// as there is also the R300_RE_POINTSIZE register. -// -// Multiple vertex programs and parameter sets can be loaded at once, -// which could explain the size discrepancy. */ + +/* BEGIN: Upload vertex program and data */ + +/* + * The programmable vertex shader unit has a memory bank of unknown size + * that can be written to in 16 byte units by writing the address into + * UPLOAD_ADDRESS, followed by data in UPLOAD_DATA (multiples of 4 DWORDs). + * + * Pointers into the memory bank are always in multiples of 16 bytes. + * + * The memory bank is divided into areas with fixed meaning. + * + * Starting at address UPLOAD_PROGRAM: Vertex program instructions. + * Native limits reported by drivers from ATI suggest size 256 (i.e. 4KB), + * whereas the difference between known addresses suggests size 512. + * + * Starting at address UPLOAD_PARAMETERS: Vertex program parameters. + * Native reported limits and the VPI layout suggest size 256, whereas + * difference between known addresses suggests size 512. + * + * At address UPLOAD_POINTSIZE is a vector (0, 0, ps, 0), where ps is the + * floating point pointsize. The exact purpose of this state is uncertain, + * as there is also the R300_RE_POINTSIZE register. + * + * Multiple vertex programs and parameter sets can be loaded at once, + * which could explain the size discrepancy. + */ #define R300_VAP_PVS_UPLOAD_ADDRESS 0x2200 # define R300_PVS_UPLOAD_PROGRAM 0x00000000 # define R300_PVS_UPLOAD_PARAMETERS 0x00000200 # define R300_PVS_UPLOAD_POINTSIZE 0x00000406 + /* gap */ + #define R300_VAP_PVS_UPLOAD_DATA 0x2208 -/* END */ + +/* END: Upload vertex program and data */ /* gap */ + /* I do not know the purpose of this register. However, I do know that -// it is set to 221C_CLEAR for clear operations and to 221C_NORMAL -// for normal rendering. */ + * it is set to 221C_CLEAR for clear operations and to 221C_NORMAL + * for normal rendering. + */ #define R300_VAP_UNKNOWN_221C 0x221C # define R300_221C_NORMAL 0x00000000 # define R300_221C_CLEAR 0x0001C000 /* gap */ + /* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between -// rendering commands and overwriting vertex program parameters. -// Therefore, I suspect writing zero to 0x2284 synchronizes the engine and -// avoids bugs caused by still running shaders reading bad data from memory. */ + * rendering commands and overwriting vertex program parameters. + * Therefore, I suspect writing zero to 0x2284 synchronizes the engine and + * avoids bugs caused by still running shaders reading bad data from memory. + */ #define R300_VAP_PVS_WAITIDLE 0x2284 /* GUESS */ /* Absolutely no clue what this register is about. */ @@ -278,19 +305,22 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_2288_RV350 0x0000FFFF /* -- Vladimir */ /* gap */ + /* Addresses are relative to the vertex program instruction area of the -// memory bank. PROGRAM_END points to the last instruction of the active -// program -// -// The meaning of the two UNKNOWN fields is obviously not known. However, -// experiments so far have shown that both *must* point to an instruction -// inside the vertex program, otherwise the GPU locks up. -// fglrx usually sets CNTL_3_UNKNOWN to the end of the program and -// CNTL_1_UNKNOWN points to instruction where last write to position takes place. -// Most likely this is used to ignore rest of the program in cases where group of verts arent visible. -// For some reason this "section" is sometimes accepted other instruction that have -// no relationship with position calculations. -*/ + * memory bank. PROGRAM_END points to the last instruction of the active + * program + * + * The meaning of the two UNKNOWN fields is obviously not known. However, + * experiments so far have shown that both *must* point to an instruction + * inside the vertex program, otherwise the GPU locks up. + * fglrx usually sets CNTL_3_UNKNOWN to the end of the program and + * CNTL_1_UNKNOWN points to instruction where last write to position takes + * place. + * Most likely this is used to ignore rest of the program in cases + * where group of verts arent visible. For some reason this "section" + * is sometimes accepted other instruction that have no relationship with + *position calculations. + */ #define R300_VAP_PVS_CNTL_1 0x22D0 # define R300_PVS_CNTL_1_PROGRAM_START_SHIFT 0 # define R300_PVS_CNTL_1_POS_END_SHIFT 10 @@ -304,7 +334,8 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT 0 /* The entire range from 0x2300 to 0x2AC inclusive seems to be used for -// immediate vertices */ + * immediate vertices + */ #define R300_VAP_VTX_COLOR_R 0x2464 #define R300_VAP_VTX_COLOR_G 0x2468 #define R300_VAP_VTX_COLOR_B 0x246C @@ -314,13 +345,15 @@ I am fairly certain that they are correct unless stated otherwise in comments. #define R300_VAP_VTX_POS_0_X_2 0x24A0 /* used for glVertex3*() */ #define R300_VAP_VTX_POS_0_Y_2 0x24A4 #define R300_VAP_VTX_POS_0_Z_2 0x24A8 -#define R300_VAP_VTX_END_OF_PKT 0x24AC /* write 0 to indicate end of packet? */ +/* write 0 to indicate end of packet? */ +#define R300_VAP_VTX_END_OF_PKT 0x24AC /* gap */ /* These are values from r300_reg/r300_reg.h - they are known to be correct - and are here so we can use one register file instead of several - - Vladimir */ + * and are here so we can use one register file instead of several + * - Vladimir + */ #define R300_GB_VAP_RASTER_VTX_FMT_0 0x4000 # define R300_GB_VAP_RASTER_VTX_FMT_0__POS_PRESENT (1<<0) # define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_0_PRESENT (1<<1) @@ -343,8 +376,10 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21 /* UNK30 seems to enables point to quad transformation on textures - (or something closely related to that). - This bit is rather fatal at the time being due to lackings at pixel shader side */ + * (or something closely related to that). + * This bit is rather fatal at the time being due to lackings at pixel + * shader side + */ #define R300_GB_ENABLE 0x4008 # define R300_GB_POINT_STUFF_ENABLE (1<<0) # define R300_GB_LINE_STUFF_ENABLE (1<<1) @@ -452,9 +487,8 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_AA_SUBSAMPLES_4 (2<<1) # define R300_AA_SUBSAMPLES_6 (3<<1) -/* END */ - /* gap */ + /* Zero to flush caches. */ #define R300_TX_CNTL 0x4100 @@ -478,8 +512,9 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_TX_ENABLE_15 (1 << 15) /* The pointsize is given in multiples of 6. The pointsize can be -// enormous: Clear() renders a single point that fills the entire -// framebuffer. */ + * enormous: Clear() renders a single point that fills the entire + * framebuffer. + */ #define R300_RE_POINTSIZE 0x421C # define R300_POINTSIZE_Y_SHIFT 0 # define R300_POINTSIZE_Y_MASK (0xFFFF << 0) /* GUESS */ @@ -488,11 +523,11 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_POINTSIZE_MAX (R300_POINTSIZE_Y_MASK / 6) /* The line width is given in multiples of 6. - In default mode lines are classified as vertical lines. - HO: horizontal - VE: vertical or horizontal - HO & VE: no classification -*/ + * In default mode lines are classified as vertical lines. + * HO: horizontal + * VE: vertical or horizontal + * HO & VE: no classification + */ #define R300_RE_LINE_CNT 0x4234 # define R300_LINESIZE_SHIFT 0 # define R300_LINESIZE_MASK (0xFFFF << 0) /* GUESS */ @@ -522,23 +557,25 @@ I am fairly certain that they are correct unless stated otherwise in comments. #define R300_RE_FOG_START 0x4298 /* Not sure why there are duplicate of factor and constant values. - My best guess so far is that there are seperate zbiases for test and write. - Ordering might be wrong. - Some of the tests indicate that fgl has a fallback implementation of zbias - via pixel shaders. */ + * My best guess so far is that there are seperate zbiases for test and write. + * Ordering might be wrong. + * Some of the tests indicate that fgl has a fallback implementation of zbias + * via pixel shaders. + */ #define R300_RE_ZBIAS_T_FACTOR 0x42A4 #define R300_RE_ZBIAS_T_CONSTANT 0x42A8 #define R300_RE_ZBIAS_W_FACTOR 0x42AC #define R300_RE_ZBIAS_W_CONSTANT 0x42B0 /* This register needs to be set to (1<<1) for RV350 to correctly - perform depth test (see --vb-triangles in r300_demo) - Don't know about other chips. - Vladimir - This is set to 3 when GL_POLYGON_OFFSET_FILL is on. - My guess is that there are two bits for each zbias primitive (FILL, LINE, POINT). - One to enable depth test and one for depth write. - Yet this doesnt explain why depth writes work ... - */ + * perform depth test (see --vb-triangles in r300_demo) + * Don't know about other chips. - Vladimir + * This is set to 3 when GL_POLYGON_OFFSET_FILL is on. + * My guess is that there are two bits for each zbias primitive + * (FILL, LINE, POINT). + * One to enable depth test and one for depth write. + * Yet this doesnt explain why depth writes work ... + */ #define R300_RE_OCCLUSION_CNTL 0x42B4 # define R300_OCCLUSION_ON (1<<1) @@ -549,30 +586,37 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_FRONT_FACE_CW (1 << 2) -/* BEGIN: Rasterization / Interpolators - many guesses -// 0_UNKNOWN_18 has always been set except for clear operations. -// TC_CNT is the number of incoming texture coordinate sets (i.e. it depends -// on the vertex program, *not* the fragment program) */ +/* BEGIN: Rasterization / Interpolators - many guesses */ + +/* 0_UNKNOWN_18 has always been set except for clear operations. + * TC_CNT is the number of incoming texture coordinate sets (i.e. it depends + * on the vertex program, *not* the fragment program) + */ #define R300_RS_CNTL_0 0x4300 # define R300_RS_CNTL_TC_CNT_SHIFT 2 # define R300_RS_CNTL_TC_CNT_MASK (7 << 2) -# define R300_RS_CNTL_CI_CNT_SHIFT 7 /* number of color interpolators used */ + /* number of color interpolators used */ +# define R300_RS_CNTL_CI_CNT_SHIFT 7 # define R300_RS_CNTL_0_UNKNOWN_18 (1 << 18) -/* Guess: RS_CNTL_1 holds the index of the highest used RS_ROUTE_n register. */ + /* Guess: RS_CNTL_1 holds the index of the highest used RS_ROUTE_n + register. */ #define R300_RS_CNTL_1 0x4304 /* gap */ + /* Only used for texture coordinates. -// Use the source field to route texture coordinate input from the vertex program -// to the desired interpolator. Note that the source field is relative to the -// outputs the vertex program *actually* writes. If a vertex program only writes -// texcoord[1], this will be source index 0. -// Set INTERP_USED on all interpolators that produce data used by the -// fragment program. INTERP_USED looks like a swizzling mask, but -// I haven't seen it used that way. -// -// Note: The _UNKNOWN constants are always set in their respective register. -// I don't know if this is necessary. */ + * Use the source field to route texture coordinate input from the + * vertex program to the desired interpolator. Note that the source + * field is relative to the outputs the vertex program *actually* + * writes. If a vertex program only writes texcoord[1], this will + * be source index 0. + * Set INTERP_USED on all interpolators that produce data used by + * the fragment program. INTERP_USED looks like a swizzling mask, + * but I haven't seen it used that way. + * + * Note: The _UNKNOWN constants are always set in their respective + * register. I don't know if this is necessary. + */ #define R300_RS_INTERP_0 0x4310 #define R300_RS_INTERP_1 0x4314 # define R300_RS_INTERP_1_UNKNOWN 0x40 @@ -589,7 +633,8 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_RS_INTERP_USED 0x00D10000 /* These DWORDs control how vertex data is routed into fragment program -// registers, after interpolators. */ + * registers, after interpolators. + */ #define R300_RS_ROUTE_0 0x4330 #define R300_RS_ROUTE_1 0x4334 #define R300_RS_ROUTE_2 0x4338 @@ -611,8 +656,9 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_RS_ROUTE_DEST_MASK (31 << 6) /* GUESS */ /* Special handling for color: When the fragment program uses color, -// the ROUTE_0_COLOR bit is set and ROUTE_0_COLOR_DEST contains the -// color register index. */ + * the ROUTE_0_COLOR bit is set and ROUTE_0_COLOR_DEST contains the + * color register index. + */ # define R300_RS_ROUTE_0_COLOR (1 << 14) # define R300_RS_ROUTE_0_COLOR_DEST_SHIFT 17 # define R300_RS_ROUTE_0_COLOR_DEST_MASK (31 << 17) /* GUESS */ @@ -621,22 +667,24 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_RS_ROUTE_1_COLOR1_DEST_SHIFT 17 # define R300_RS_ROUTE_1_COLOR1_DEST_MASK (31 << 17) # define R300_RS_ROUTE_1_UNKNOWN11 (1 << 11) -/* END */ - -/* BEGIN: Scissors and cliprects -// There are four clipping rectangles. Their corner coordinates are inclusive. -// Every pixel is assigned a number from 0 and 15 by setting bits 0-3 depending -// on whether the pixel is inside cliprects 0-3, respectively. For example, -// if a pixel is inside cliprects 0 and 1, but outside 2 and 3, it is assigned -// the number 3 (binary 0011). -// Iff the bit corresponding to the pixel's number in RE_CLIPRECT_CNTL is set, -// the pixel is rasterized. -// -// In addition to this, there is a scissors rectangle. Only pixels inside the -// scissors rectangle are drawn. (coordinates are inclusive) -// -// For some reason, the top-left corner of the framebuffer is at (1440, 1440) -// for the purpose of clipping and scissors. */ +/* END: Rasterization / Interpolators - many guesses */ + +/* BEGIN: Scissors and cliprects */ + +/* There are four clipping rectangles. Their corner coordinates are inclusive. + * Every pixel is assigned a number from 0 and 15 by setting bits 0-3 depending + * on whether the pixel is inside cliprects 0-3, respectively. For example, + * if a pixel is inside cliprects 0 and 1, but outside 2 and 3, it is assigned + * the number 3 (binary 0011). + * Iff the bit corresponding to the pixel's number in RE_CLIPRECT_CNTL is set, + * the pixel is rasterized. + * + * In addition to this, there is a scissors rectangle. Only pixels inside the + * scissors rectangle are drawn. (coordinates are inclusive) + * + * For some reason, the top-left corner of the framebuffer is at (1440, 1440) + * for the purpose of clipping and scissors. + */ #define R300_RE_CLIPRECT_TL_0 0x43B0 #define R300_RE_CLIPRECT_BR_0 0x43B4 #define R300_RE_CLIPRECT_TL_1 0x43B8 @@ -670,6 +718,7 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_CLIP_3210 (1 << 15) /* gap */ + #define R300_RE_SCISSORS_TL 0x43E0 #define R300_RE_SCISSORS_BR 0x43E4 # define R300_SCISSORS_OFFSET 1440 @@ -677,12 +726,15 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_SCISSORS_X_MASK (0x1FFF << 0) # define R300_SCISSORS_Y_SHIFT 13 # define R300_SCISSORS_Y_MASK (0x1FFF << 13) -/* END */ +/* END: Scissors and cliprects */ + +/* BEGIN: Texture specification */ -/* BEGIN: Texture specification -// The texture specification dwords are grouped by meaning and not by texture unit. -// This means that e.g. the offset for texture image unit N is found in register -// TX_OFFSET_0 + (4*N) */ +/* + * The texture specification dwords are grouped by meaning and not by texture + * unit. This means that e.g. the offset for texture image unit N is found in + * register TX_OFFSET_0 + (4*N) + */ #define R300_TX_FILTER_0 0x4400 # define R300_TX_REPEAT 0 # define R300_TX_MIRRORED 1 @@ -706,13 +758,14 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_TX_MIN_FILTER_LINEAR_MIP_LINEAR (10 << 11) /* NOTE: NEAREST doesnt seem to exist. - Im not seting MAG_FILTER_MASK and (3 << 11) on for all - anisotropy modes because that would void selected mag filter */ -# define R300_TX_MIN_FILTER_ANISO_NEAREST ((0 << 13) /*|R300_TX_MAG_FILTER_MASK|(3<<11)*/) -# define R300_TX_MIN_FILTER_ANISO_LINEAR ((0 << 13) /*|R300_TX_MAG_FILTER_MASK|(3<<11)*/) -# define R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST ((1 << 13) /*|R300_TX_MAG_FILTER_MASK|(3<<11)*/) -# define R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR ((2 << 13) /*|R300_TX_MAG_FILTER_MASK|(3<<11)*/) -# define R300_TX_MIN_FILTER_MASK ( (15 << 11) | (3 << 13) ) + * Im not seting MAG_FILTER_MASK and (3 << 11) on for all + * anisotropy modes because that would void selected mag filter + */ +# define R300_TX_MIN_FILTER_ANISO_NEAREST (0 << 13) +# define R300_TX_MIN_FILTER_ANISO_LINEAR (0 << 13) +# define R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST (1 << 13) +# define R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR (2 << 13) +# define R300_TX_MIN_FILTER_MASK ( (15 << 11) | (3 << 13) ) # define R300_TX_MAX_ANISO_1_TO_1 (0 << 21) # define R300_TX_MAX_ANISO_2_TO_1 (2 << 21) # define R300_TX_MAX_ANISO_4_TO_1 (4 << 21) @@ -745,8 +798,8 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_TX_UNK23 (1 << 23) # define R300_TX_MAX_MIP_LEVEL_SHIFT 26 # define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 26) -# define R300_TX_SIZE_PROJECTED (1<<30) -# define R300_TX_SIZE_TXPITCH_EN (1<<31) +# define R300_TX_SIZE_PROJECTED (1<<30) +# define R300_TX_SIZE_TXPITCH_EN (1<<31) #define R300_TX_FORMAT_0 0x44C0 /* The interpretation of the format word by Wladimir van der Laan */ /* The X, Y, Z and W refer to the layout of the components. @@ -774,7 +827,7 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_TX_FORMAT_A8R8G8B8 0x13 /* no swizzle */ # define R300_TX_FORMAT_B8G8_B8G8 0x14 /* no swizzle */ # define R300_TX_FORMAT_G8R8_G8B8 0x15 /* no swizzle */ - /* 0x16 - some 16 bit green format.. ?? */ + /* 0x16 - some 16 bit green format.. ?? */ # define R300_TX_FORMAT_UNK25 (1 << 25) /* no swizzle */ # define R300_TX_FORMAT_CUBIC_MAP (1 << 26) @@ -802,23 +855,26 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_TX_FORMAT_W 3 # define R300_TX_FORMAT_ZERO 4 # define R300_TX_FORMAT_ONE 5 -# define R300_TX_FORMAT_CUT_Z 6 /* 2.0*Z, everything above 1.0 is set to 0.0 */ -# define R300_TX_FORMAT_CUT_W 7 /* 2.0*W, everything above 1.0 is set to 0.0 */ + /* 2.0*Z, everything above 1.0 is set to 0.0 */ +# define R300_TX_FORMAT_CUT_Z 6 + /* 2.0*W, everything above 1.0 is set to 0.0 */ +# define R300_TX_FORMAT_CUT_W 7 # define R300_TX_FORMAT_B_SHIFT 18 # define R300_TX_FORMAT_G_SHIFT 15 # define R300_TX_FORMAT_R_SHIFT 12 # define R300_TX_FORMAT_A_SHIFT 9 /* Convenience macro to take care of layout and swizzling */ -# define R300_EASY_TX_FORMAT(B, G, R, A, FMT) (\ - ((R300_TX_FORMAT_##B)<<R300_TX_FORMAT_B_SHIFT) \ - | ((R300_TX_FORMAT_##G)<<R300_TX_FORMAT_G_SHIFT) \ - | ((R300_TX_FORMAT_##R)<<R300_TX_FORMAT_R_SHIFT) \ - | ((R300_TX_FORMAT_##A)<<R300_TX_FORMAT_A_SHIFT) \ - | (R300_TX_FORMAT_##FMT) \ - ) - /* These can be ORed with result of R300_EASY_TX_FORMAT() */ - /* We don't really know what they do. Take values from a constant color ? */ +# define R300_EASY_TX_FORMAT(B, G, R, A, FMT) ( \ + ((R300_TX_FORMAT_##B)<<R300_TX_FORMAT_B_SHIFT) \ + | ((R300_TX_FORMAT_##G)<<R300_TX_FORMAT_G_SHIFT) \ + | ((R300_TX_FORMAT_##R)<<R300_TX_FORMAT_R_SHIFT) \ + | ((R300_TX_FORMAT_##A)<<R300_TX_FORMAT_A_SHIFT) \ + | (R300_TX_FORMAT_##FMT) \ + ) + /* These can be ORed with result of R300_EASY_TX_FORMAT() + We don't really know what they do. Take values from a + constant color ? */ # define R300_TX_FORMAT_CONST_X (1<<5) # define R300_TX_FORMAT_CONST_Y (2<<5) # define R300_TX_FORMAT_CONST_Z (4<<5) @@ -828,7 +884,7 @@ I am fairly certain that they are correct unless stated otherwise in comments. #define R300_TX_PITCH_0 0x4500 /* obvious missing in gap */ #define R300_TX_OFFSET_0 0x4540 -/* BEGIN: Guess from R200 */ + /* BEGIN: Guess from R200 */ # define R300_TXO_ENDIAN_NO_SWAP (0 << 0) # define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0) # define R300_TXO_ENDIAN_WORD_SWAP (2 << 0) @@ -837,53 +893,62 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_TXO_MICRO_TILE (1 << 3) # define R300_TXO_OFFSET_MASK 0xffffffe0 # define R300_TXO_OFFSET_SHIFT 5 -/* END */ -#define R300_TX_CHROMA_KEY_0 0x4580 /* 32 bit chroma key */ -#define R300_TX_BORDER_COLOR_0 0x45C0 //ff00ff00 == { 0, 1.0, 0, 1.0 } - -/* END */ - -/* BEGIN: Fragment program instruction set -// Fragment programs are written directly into register space. -// There are separate instruction streams for texture instructions and ALU -// instructions. -// In order to synchronize these streams, the program is divided into up -// to 4 nodes. Each node begins with a number of TEX operations, followed -// by a number of ALU operations. -// The first node can have zero TEX ops, all subsequent nodes must have at least -// one TEX ops. -// All nodes must have at least one ALU op. -// -// The index of the last node is stored in PFS_CNTL_0: A value of 0 means -// 1 node, a value of 3 means 4 nodes. -// The total amount of instructions is defined in PFS_CNTL_2. The offsets are -// offsets into the respective instruction streams, while *_END points to the -// last instruction relative to this offset. */ + /* END: Guess from R200 */ + +/* 32 bit chroma key */ +#define R300_TX_CHROMA_KEY_0 0x4580 +/* ff00ff00 == { 0, 1.0, 0, 1.0 } */ +#define R300_TX_BORDER_COLOR_0 0x45C0 + +/* END: Texture specification */ + +/* BEGIN: Fragment program instruction set */ + +/* Fragment programs are written directly into register space. + * There are separate instruction streams for texture instructions and ALU + * instructions. + * In order to synchronize these streams, the program is divided into up + * to 4 nodes. Each node begins with a number of TEX operations, followed + * by a number of ALU operations. + * The first node can have zero TEX ops, all subsequent nodes must have at + * least + * one TEX ops. + * All nodes must have at least one ALU op. + * + * The index of the last node is stored in PFS_CNTL_0: A value of 0 means + * 1 node, a value of 3 means 4 nodes. + * The total amount of instructions is defined in PFS_CNTL_2. The offsets are + * offsets into the respective instruction streams, while *_END points to the + * last instruction relative to this offset. + */ #define R300_PFS_CNTL_0 0x4600 # define R300_PFS_CNTL_LAST_NODES_SHIFT 0 # define R300_PFS_CNTL_LAST_NODES_MASK (3 << 0) # define R300_PFS_CNTL_FIRST_NODE_HAS_TEX (1 << 3) #define R300_PFS_CNTL_1 0x4604 /* There is an unshifted value here which has so far always been equal to the -// index of the highest used temporary register. */ + * index of the highest used temporary register. + */ #define R300_PFS_CNTL_2 0x4608 # define R300_PFS_CNTL_ALU_OFFSET_SHIFT 0 # define R300_PFS_CNTL_ALU_OFFSET_MASK (63 << 0) # define R300_PFS_CNTL_ALU_END_SHIFT 6 -# define R300_PFS_CNTL_ALU_END_MASK (63 << 0) +# define R300_PFS_CNTL_ALU_END_MASK (63 << 6) # define R300_PFS_CNTL_TEX_OFFSET_SHIFT 12 # define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 12) /* GUESS */ # define R300_PFS_CNTL_TEX_END_SHIFT 18 # define R300_PFS_CNTL_TEX_END_MASK (31 << 18) /* GUESS */ /* gap */ + /* Nodes are stored backwards. The last active node is always stored in -// PFS_NODE_3. -// Example: In a 2-node program, NODE_0 and NODE_1 are set to 0. The -// first node is stored in NODE_2, the second node is stored in NODE_3. -// -// Offsets are relative to the master offset from PFS_CNTL_2. -// LAST_NODE is set for the last node, and only for the last node. */ + * PFS_NODE_3. + * Example: In a 2-node program, NODE_0 and NODE_1 are set to 0. The + * first node is stored in NODE_2, the second node is stored in NODE_3. + * + * Offsets are relative to the master offset from PFS_CNTL_2. + * LAST_NODE is set for the last node, and only for the last node. + */ #define R300_PFS_NODE_0 0x4610 #define R300_PFS_NODE_1 0x4614 #define R300_PFS_NODE_2 0x4618 @@ -901,86 +966,92 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_PFS_NODE_OUTPUT_DEPTH (1 << 23) /* TEX -// As far as I can tell, texture instructions cannot write into output -// registers directly. A subsequent ALU instruction is always necessary, -// even if it's just MAD o0, r0, 1, 0 */ + * As far as I can tell, texture instructions cannot write into output + * registers directly. A subsequent ALU instruction is always necessary, + * even if it's just MAD o0, r0, 1, 0 + */ #define R300_PFS_TEXI_0 0x4620 -# define R300_FPITX_SRC_SHIFT 0 -# define R300_FPITX_SRC_MASK (31 << 0) -# define R300_FPITX_SRC_CONST (1 << 5) /* GUESS */ -# define R300_FPITX_DST_SHIFT 6 -# define R300_FPITX_DST_MASK (31 << 6) -# define R300_FPITX_IMAGE_SHIFT 11 -# define R300_FPITX_IMAGE_MASK (15 << 11) /* GUESS based on layout and native limits */ +# define R300_FPITX_SRC_SHIFT 0 +# define R300_FPITX_SRC_MASK (31 << 0) + /* GUESS */ +# define R300_FPITX_SRC_CONST (1 << 5) +# define R300_FPITX_DST_SHIFT 6 +# define R300_FPITX_DST_MASK (31 << 6) +# define R300_FPITX_IMAGE_SHIFT 11 + /* GUESS based on layout and native limits */ +# define R300_FPITX_IMAGE_MASK (15 << 11) /* Unsure if these are opcodes, or some kind of bitfield, but this is how * they were set when I checked */ -# define R300_FPITX_OPCODE_SHIFT 15 -# define R300_FPITX_OP_TEX 1 -# define R300_FPITX_OP_KIL 2 -# define R300_FPITX_OP_TXP 3 -# define R300_FPITX_OP_TXB 4 +# define R300_FPITX_OPCODE_SHIFT 15 +# define R300_FPITX_OP_TEX 1 +# define R300_FPITX_OP_KIL 2 +# define R300_FPITX_OP_TXP 3 +# define R300_FPITX_OP_TXB 4 /* ALU -// The ALU instructions register blocks are enumerated according to the order -// in which fglrx. I assume there is space for 64 instructions, since -// each block has space for a maximum of 64 DWORDs, and this matches reported -// native limits. -// -// The basic functional block seems to be one MAD for each color and alpha, -// and an adder that adds all components after the MUL. -// - ADD, MUL, MAD etc.: use MAD with appropriate neutral operands -// - DP4: Use OUTC_DP4, OUTA_DP4 -// - DP3: Use OUTC_DP3, OUTA_DP4, appropriate alpha operands -// - DPH: Use OUTC_DP4, OUTA_DP4, appropriate alpha operands -// - CMP: If ARG2 < 0, return ARG1, else return ARG0 -// - FLR: use FRC+MAD -// - XPD: use MAD+MAD -// - SGE, SLT: use MAD+CMP -// - RSQ: use ABS modifier for argument -// - Use OUTC_REPL_ALPHA to write results of an alpha-only operation (e.g. RCP) -// into color register -// - apparently, there's no quick DST operation -// - fglrx set FPI2_UNKNOWN_31 on a "MAD fragment.color, tmp0, tmp1, tmp2" -// - fglrx set FPI2_UNKNOWN_31 on a "MAX r2, r1, c0" -// - fglrx once set FPI0_UNKNOWN_31 on a "FRC r1, r1" -// -// Operand selection -// First stage selects three sources from the available registers and -// constant parameters. This is defined in INSTR1 (color) and INSTR3 (alpha). -// fglrx sorts the three source fields: Registers before constants, -// lower indices before higher indices; I do not know whether this is necessary. -// fglrx fills unused sources with "read constant 0" -// According to specs, you cannot select more than two different constants. -// -// Second stage selects the operands from the sources. This is defined in -// INSTR0 (color) and INSTR2 (alpha). You can also select the special constants -// zero and one. -// Swizzling and negation happens in this stage, as well. -// -// Important: Color and alpha seem to be mostly separate, i.e. their sources -// selection appears to be fully independent (the register storage is probably -// physically split into a color and an alpha section). -// However (because of the apparent physical split), there is some interaction -// WRT swizzling. If, for example, you want to load an R component into an -// Alpha operand, this R component is taken from a *color* source, not from -// an alpha source. The corresponding register doesn't even have to appear in -// the alpha sources list. (I hope this alll makes sense to you) -// -// Destination selection -// The destination register index is in FPI1 (color) and FPI3 (alpha) together -// with enable bits. -// There are separate enable bits for writing into temporary registers -// (DSTC_REG_* /DSTA_REG) and and program output registers (DSTC_OUTPUT_* /DSTA_OUTPUT). -// You can write to both at once, or not write at all (the same index -// must be used for both). -// -// Note: There is a special form for LRP -// - Argument order is the same as in ARB_fragment_program. -// - Operation is MAD -// - ARG1 is set to ARGC_SRC1C_LRP/ARGC_SRC1A_LRP -// - Set FPI0/FPI2_SPECIAL_LRP -// Arbitrary LRP (including support for swizzling) requires vanilla MAD+MAD */ + * The ALU instructions register blocks are enumerated according to the order + * in which fglrx. I assume there is space for 64 instructions, since + * each block has space for a maximum of 64 DWORDs, and this matches reported + * native limits. + * + * The basic functional block seems to be one MAD for each color and alpha, + * and an adder that adds all components after the MUL. + * - ADD, MUL, MAD etc.: use MAD with appropriate neutral operands + * - DP4: Use OUTC_DP4, OUTA_DP4 + * - DP3: Use OUTC_DP3, OUTA_DP4, appropriate alpha operands + * - DPH: Use OUTC_DP4, OUTA_DP4, appropriate alpha operands + * - CMP: If ARG2 < 0, return ARG1, else return ARG0 + * - FLR: use FRC+MAD + * - XPD: use MAD+MAD + * - SGE, SLT: use MAD+CMP + * - RSQ: use ABS modifier for argument + * - Use OUTC_REPL_ALPHA to write results of an alpha-only operation + * (e.g. RCP) into color register + * - apparently, there's no quick DST operation + * - fglrx set FPI2_UNKNOWN_31 on a "MAD fragment.color, tmp0, tmp1, tmp2" + * - fglrx set FPI2_UNKNOWN_31 on a "MAX r2, r1, c0" + * - fglrx once set FPI0_UNKNOWN_31 on a "FRC r1, r1" + * + * Operand selection + * First stage selects three sources from the available registers and + * constant parameters. This is defined in INSTR1 (color) and INSTR3 (alpha). + * fglrx sorts the three source fields: Registers before constants, + * lower indices before higher indices; I do not know whether this is + * necessary. + * + * fglrx fills unused sources with "read constant 0" + * According to specs, you cannot select more than two different constants. + * + * Second stage selects the operands from the sources. This is defined in + * INSTR0 (color) and INSTR2 (alpha). You can also select the special constants + * zero and one. + * Swizzling and negation happens in this stage, as well. + * + * Important: Color and alpha seem to be mostly separate, i.e. their sources + * selection appears to be fully independent (the register storage is probably + * physically split into a color and an alpha section). + * However (because of the apparent physical split), there is some interaction + * WRT swizzling. If, for example, you want to load an R component into an + * Alpha operand, this R component is taken from a *color* source, not from + * an alpha source. The corresponding register doesn't even have to appear in + * the alpha sources list. (I hope this alll makes sense to you) + * + * Destination selection + * The destination register index is in FPI1 (color) and FPI3 (alpha) + * together with enable bits. + * There are separate enable bits for writing into temporary registers + * (DSTC_REG_* /DSTA_REG) and and program output registers (DSTC_OUTPUT_* + * /DSTA_OUTPUT). You can write to both at once, or not write at all (the + * same index must be used for both). + * + * Note: There is a special form for LRP + * - Argument order is the same as in ARB_fragment_program. + * - Operation is MAD + * - ARG1 is set to ARGC_SRC1C_LRP/ARGC_SRC1A_LRP + * - Set FPI0/FPI2_SPECIAL_LRP + * Arbitrary LRP (including support for swizzling) requires vanilla MAD+MAD + */ #define R300_PFS_INSTR1_0 0x46C0 # define R300_FPI1_SRC0C_SHIFT 0 # define R300_FPI1_SRC0C_MASK (31 << 0) @@ -1037,7 +1108,8 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_FPI0_ARGC_SRC1C_LRP 15 # define R300_FPI0_ARGC_ZERO 20 # define R300_FPI0_ARGC_ONE 21 -# define R300_FPI0_ARGC_HALF 22 /* GUESS */ + /* GUESS */ +# define R300_FPI0_ARGC_HALF 22 # define R300_FPI0_ARGC_SRC0C_YZX 23 # define R300_FPI0_ARGC_SRC1C_YZX 24 # define R300_FPI0_ARGC_SRC2C_YZX 25 @@ -1088,20 +1160,23 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_FPI2_ARGA_SRC1A_LRP 15 # define R300_FPI2_ARGA_ZERO 16 # define R300_FPI2_ARGA_ONE 17 -# define R300_FPI2_ARGA_HALF 18 /* GUESS */ - + /* GUESS */ +# define R300_FPI2_ARGA_HALF 18 # define R300_FPI2_ARG0A_SHIFT 0 # define R300_FPI2_ARG0A_MASK (31 << 0) # define R300_FPI2_ARG0A_NEG (1 << 5) -# define R300_FPI2_ARG0A_ABS (1 << 6) /* GUESS */ + /* GUESS */ +# define R300_FPI2_ARG0A_ABS (1 << 6) # define R300_FPI2_ARG1A_SHIFT 7 # define R300_FPI2_ARG1A_MASK (31 << 7) # define R300_FPI2_ARG1A_NEG (1 << 12) -# define R300_FPI2_ARG1A_ABS (1 << 13) /* GUESS */ + /* GUESS */ +# define R300_FPI2_ARG1A_ABS (1 << 13) # define R300_FPI2_ARG2A_SHIFT 14 # define R300_FPI2_ARG2A_MASK (31 << 14) # define R300_FPI2_ARG2A_NEG (1 << 19) -# define R300_FPI2_ARG2A_ABS (1 << 20) /* GUESS */ + /* GUESS */ +# define R300_FPI2_ARG2A_ABS (1 << 20) # define R300_FPI2_SPECIAL_LRP (1 << 21) # define R300_FPI2_OUTA_MAD (0 << 23) # define R300_FPI2_OUTA_DP4 (1 << 23) @@ -1115,7 +1190,7 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_FPI2_OUTA_RSQ (11 << 23) # define R300_FPI2_OUTA_SAT (1 << 30) # define R300_FPI2_UNKNOWN_31 (1 << 31) -/* END */ +/* END: Fragment program instruction set */ /* Fog state and color */ #define R300_RE_FOG_STATE 0x4BC0 @@ -1142,6 +1217,7 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_ALPHA_TEST_ENABLE (1 << 11) /* gap */ + /* Fragment program parameters in 7.16 floating point */ #define R300_PFS_PARAM_0_X 0x4C00 #define R300_PFS_PARAM_0_Y 0x4C04 @@ -1154,45 +1230,47 @@ I am fairly certain that they are correct unless stated otherwise in comments. #define R300_PFS_PARAM_31_W 0x4DFC /* Notes: -// - AFAIK fglrx always sets BLEND_UNKNOWN when blending is used in the application -// - AFAIK fglrx always sets BLEND_NO_SEPARATE when CBLEND and ABLEND are set to the same -// function (both registers are always set up completely in any case) -// - Most blend flags are simply copied from R200 and not tested yet */ + * - AFAIK fglrx always sets BLEND_UNKNOWN when blending is used in + * the application + * - AFAIK fglrx always sets BLEND_NO_SEPARATE when CBLEND and ABLEND + * are set to the same + * function (both registers are always set up completely in any case) + * - Most blend flags are simply copied from R200 and not tested yet + */ #define R300_RB3D_CBLEND 0x4E04 #define R300_RB3D_ABLEND 0x4E08 - /* the following only appear in CBLEND */ +/* the following only appear in CBLEND */ # define R300_BLEND_ENABLE (1 << 0) # define R300_BLEND_UNKNOWN (3 << 1) # define R300_BLEND_NO_SEPARATE (1 << 3) - /* the following are shared between CBLEND and ABLEND */ +/* the following are shared between CBLEND and ABLEND */ # define R300_FCN_MASK (3 << 12) # define R300_COMB_FCN_ADD_CLAMP (0 << 12) # define R300_COMB_FCN_ADD_NOCLAMP (1 << 12) # define R300_COMB_FCN_SUB_CLAMP (2 << 12) # define R300_COMB_FCN_SUB_NOCLAMP (3 << 12) -# define R300_SRC_BLEND_GL_ZERO (32 << 16) -# define R300_SRC_BLEND_GL_ONE (33 << 16) -# define R300_SRC_BLEND_GL_SRC_COLOR (34 << 16) -# define R300_SRC_BLEND_GL_ONE_MINUS_SRC_COLOR (35 << 16) -# define R300_SRC_BLEND_GL_DST_COLOR (36 << 16) -# define R300_SRC_BLEND_GL_ONE_MINUS_DST_COLOR (37 << 16) -# define R300_SRC_BLEND_GL_SRC_ALPHA (38 << 16) -# define R300_SRC_BLEND_GL_ONE_MINUS_SRC_ALPHA (39 << 16) -# define R300_SRC_BLEND_GL_DST_ALPHA (40 << 16) -# define R300_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA (41 << 16) -# define R300_SRC_BLEND_GL_SRC_ALPHA_SATURATE (42 << 16) -# define R300_SRC_BLEND_MASK (63 << 16) -# define R300_DST_BLEND_GL_ZERO (32 << 24) -# define R300_DST_BLEND_GL_ONE (33 << 24) -# define R300_DST_BLEND_GL_SRC_COLOR (34 << 24) -# define R300_DST_BLEND_GL_ONE_MINUS_SRC_COLOR (35 << 24) -# define R300_DST_BLEND_GL_DST_COLOR (36 << 24) -# define R300_DST_BLEND_GL_ONE_MINUS_DST_COLOR (37 << 24) -# define R300_DST_BLEND_GL_SRC_ALPHA (38 << 24) -# define R300_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA (39 << 24) -# define R300_DST_BLEND_GL_DST_ALPHA (40 << 24) -# define R300_DST_BLEND_GL_ONE_MINUS_DST_ALPHA (41 << 24) -# define R300_DST_BLEND_MASK (63 << 24) +# define R300_COMB_FCN_MIN (4 << 12) +# define R300_COMB_FCN_MAX (5 << 12) +# define R300_COMB_FCN_RSUB_CLAMP (6 << 12) +# define R300_COMB_FCN_RSUB_NOCLAMP (7 << 12) +# define R300_BLEND_GL_ZERO (32) +# define R300_BLEND_GL_ONE (33) +# define R300_BLEND_GL_SRC_COLOR (34) +# define R300_BLEND_GL_ONE_MINUS_SRC_COLOR (35) +# define R300_BLEND_GL_DST_COLOR (36) +# define R300_BLEND_GL_ONE_MINUS_DST_COLOR (37) +# define R300_BLEND_GL_SRC_ALPHA (38) +# define R300_BLEND_GL_ONE_MINUS_SRC_ALPHA (39) +# define R300_BLEND_GL_DST_ALPHA (40) +# define R300_BLEND_GL_ONE_MINUS_DST_ALPHA (41) +# define R300_BLEND_GL_SRC_ALPHA_SATURATE (42) +# define R300_BLEND_GL_CONST_COLOR (43) +# define R300_BLEND_GL_ONE_MINUS_CONST_COLOR (44) +# define R300_BLEND_GL_CONST_ALPHA (45) +# define R300_BLEND_GL_ONE_MINUS_CONST_ALPHA (46) +# define R300_BLEND_MASK (63) +# define R300_SRC_BLEND_SHIFT (16) +# define R300_DST_BLEND_SHIFT (24) #define R300_RB3D_COLORMASK 0x4E0C # define R300_COLORMASK0_B (1<<0) # define R300_COLORMASK0_G (1<<1) @@ -1200,15 +1278,19 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_COLORMASK0_A (1<<3) /* gap */ + #define R300_RB3D_COLOROFFSET0 0x4E28 # define R300_COLOROFFSET_MASK 0xFFFFFFF0 /* GUESS */ #define R300_RB3D_COLOROFFSET1 0x4E2C /* GUESS */ #define R300_RB3D_COLOROFFSET2 0x4E30 /* GUESS */ #define R300_RB3D_COLOROFFSET3 0x4E34 /* GUESS */ + /* gap */ + /* Bit 16: Larger tiles -// Bit 17: 4x2 tiles -// Bit 18: Extremely weird tile like, but some pixels duplicated? */ + * Bit 17: 4x2 tiles + * Bit 18: Extremely weird tile like, but some pixels duplicated? + */ #define R300_RB3D_COLORPITCH0 0x4E38 # define R300_COLORPITCH_MASK 0x00001FF8 /* GUESS */ # define R300_COLOR_TILE_ENABLE (1 << 16) /* GUESS */ @@ -1223,18 +1305,22 @@ I am fairly certain that they are correct unless stated otherwise in comments. #define R300_RB3D_COLORPITCH3 0x4E44 /* GUESS */ /* gap */ + /* Guess by Vladimir. -// Set to 0A before 3D operations, set to 02 afterwards. */ + * Set to 0A before 3D operations, set to 02 afterwards. + */ #define R300_RB3D_DSTCACHE_CTLSTAT 0x4E4C # define R300_RB3D_DSTCACHE_02 0x00000002 # define R300_RB3D_DSTCACHE_0A 0x0000000A /* gap */ -/* There seems to be no "write only" setting, so use Z-test = ALWAYS for this. */ -/* Bit (1<<8) is the "test" bit. so plain write is 6 - vd */ +/* There seems to be no "write only" setting, so use Z-test = ALWAYS + * for this. + * Bit (1<<8) is the "test" bit. so plain write is 6 - vd + */ #define R300_RB3D_ZSTENCIL_CNTL_0 0x4F00 -# define R300_RB3D_Z_DISABLED_1 0x00000010 /* GUESS */ -# define R300_RB3D_Z_DISABLED_2 0x00000014 /* GUESS */ +# define R300_RB3D_Z_DISABLED_1 0x00000010 +# define R300_RB3D_Z_DISABLED_2 0x00000014 # define R300_RB3D_Z_TEST 0x00000012 # define R300_RB3D_Z_TEST_AND_WRITE 0x00000016 # define R300_RB3D_Z_WRITE_ONLY 0x00000006 @@ -1245,7 +1331,7 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_RB3D_STENCIL_ENABLE 0x00000001 #define R300_RB3D_ZSTENCIL_CNTL_1 0x4F04 - /* functions */ + /* functions */ # define R300_ZS_NEVER 0 # define R300_ZS_LESS 1 # define R300_ZS_LEQUAL 2 @@ -1255,7 +1341,7 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_ZS_NOTEQUAL 6 # define R300_ZS_ALWAYS 7 # define R300_ZS_MASK 7 - /* operations */ + /* operations */ # define R300_ZS_KEEP 0 # define R300_ZS_ZERO 1 # define R300_ZS_REPLACE 2 @@ -1264,9 +1350,8 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_ZS_INVERT 5 # define R300_ZS_INCR_WRAP 6 # define R300_ZS_DECR_WRAP 7 - - /* front and back refer to operations done for front - and back faces, i.e. separate stencil function support */ + /* front and back refer to operations done for front + and back faces, i.e. separate stencil function support */ # define R300_RB3D_ZS1_DEPTH_FUNC_SHIFT 0 # define R300_RB3D_ZS1_FRONT_FUNC_SHIFT 3 # define R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT 6 @@ -1277,8 +1362,6 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT 21 # define R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT 24 - - #define R300_RB3D_ZSTENCIL_CNTL_2 0x4F08 # define R300_RB3D_ZS2_STENCIL_REF_SHIFT 0 # define R300_RB3D_ZS2_STENCIL_MASK 0xFF @@ -1290,10 +1373,11 @@ I am fairly certain that they are correct unless stated otherwise in comments. #define R300_RB3D_ZSTENCIL_FORMAT 0x4F10 # define R300_DEPTH_FORMAT_16BIT_INT_Z (0 << 0) # define R300_DEPTH_FORMAT_24BIT_INT_Z (2 << 0) - /* 16 bit format or some aditional bit ? */ + /* 16 bit format or some aditional bit ? */ # define R300_DEPTH_FORMAT_UNK32 (32 << 0) /* gap */ + #define R300_RB3D_DEPTHOFFSET 0x4F20 #define R300_RB3D_DEPTHPITCH 0x4F24 # define R300_DEPTHPITCH_MASK 0x00001FF8 /* GUESS */ @@ -1303,34 +1387,40 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_DEPTH_ENDIAN_WORD_SWAP (1 << 18) /* GUESS */ # define R300_DEPTH_ENDIAN_DWORD_SWAP (2 << 18) /* GUESS */ -/* BEGIN: Vertex program instruction set -// Every instruction is four dwords long: -// DWORD 0: output and opcode -// DWORD 1: first argument -// DWORD 2: second argument -// DWORD 3: third argument -// -// Notes: -// - ABS r, a is implemented as MAX r, a, -a -// - MOV is implemented as ADD to zero -// - XPD is implemented as MUL + MAD -// - FLR is implemented as FRC + ADD -// - apparently, fglrx tries to schedule instructions so that there is at least -// one instruction between the write to a temporary and the first read -// from said temporary; however, violations of this scheduling are allowed -// - register indices seem to be unrelated with OpenGL aliasing to conventional state -// - only one attribute and one parameter can be loaded at a time; however, the -// same attribute/parameter can be used for more than one argument -// - the second software argument for POW is the third hardware argument (no idea why) -// - MAD with only temporaries as input seems to use VPI_OUT_SELECT_MAD_2 -// -// There is some magic surrounding LIT: -// The single argument is replicated across all three inputs, but swizzled: -// First argument: xyzy -// Second argument: xyzx -// Third argument: xyzw -// Whenever the result is used later in the fragment program, fglrx forces x and w -// to be 1.0 in the input selection; I don't know whether this is strictly necessary */ +/* BEGIN: Vertex program instruction set */ + +/* Every instruction is four dwords long: + * DWORD 0: output and opcode + * DWORD 1: first argument + * DWORD 2: second argument + * DWORD 3: third argument + * + * Notes: + * - ABS r, a is implemented as MAX r, a, -a + * - MOV is implemented as ADD to zero + * - XPD is implemented as MUL + MAD + * - FLR is implemented as FRC + ADD + * - apparently, fglrx tries to schedule instructions so that there is at + * least one instruction between the write to a temporary and the first + * read from said temporary; however, violations of this scheduling are + * allowed + * - register indices seem to be unrelated with OpenGL aliasing to + * conventional state + * - only one attribute and one parameter can be loaded at a time; however, + * the same attribute/parameter can be used for more than one argument + * - the second software argument for POW is the third hardware argument + * (no idea why) + * - MAD with only temporaries as input seems to use VPI_OUT_SELECT_MAD_2 + * + * There is some magic surrounding LIT: + * The single argument is replicated across all three inputs, but swizzled: + * First argument: xyzy + * Second argument: xyzx + * Third argument: xyzw + * Whenever the result is used later in the fragment program, fglrx forces + * x and w to be 1.0 in the input selection; I don't know whether this is + * strictly necessary + */ #define R300_VPI_OUT_OP_DOT (1 << 0) #define R300_VPI_OUT_OP_MUL (2 << 0) #define R300_VPI_OUT_OP_ADD (3 << 0) @@ -1341,20 +1431,24 @@ I am fairly certain that they are correct unless stated otherwise in comments. #define R300_VPI_OUT_OP_MIN (8 << 0) #define R300_VPI_OUT_OP_SGE (9 << 0) #define R300_VPI_OUT_OP_SLT (10 << 0) -#define R300_VPI_OUT_OP_UNK12 (12 << 0) /* Used in GL_POINT_DISTANCE_ATTENUATION_ARB, vector(scalar, vector) */ + /* Used in GL_POINT_DISTANCE_ATTENUATION_ARB, vector(scalar, vector) */ +#define R300_VPI_OUT_OP_UNK12 (12 << 0) #define R300_VPI_OUT_OP_ARL (13 << 0) #define R300_VPI_OUT_OP_EXP (65 << 0) #define R300_VPI_OUT_OP_LOG (66 << 0) -#define R300_VPI_OUT_OP_UNK67 (67 << 0) /* Used in fog computations, scalar(scalar) */ + /* Used in fog computations, scalar(scalar) */ +#define R300_VPI_OUT_OP_UNK67 (67 << 0) #define R300_VPI_OUT_OP_LIT (68 << 0) #define R300_VPI_OUT_OP_POW (69 << 0) #define R300_VPI_OUT_OP_RCP (70 << 0) #define R300_VPI_OUT_OP_RSQ (72 << 0) -#define R300_VPI_OUT_OP_UNK73 (73 << 0) /* Used in GL_POINT_DISTANCE_ATTENUATION_ARB, scalar(scalar) */ + /* Used in GL_POINT_DISTANCE_ATTENUATION_ARB, scalar(scalar) */ +#define R300_VPI_OUT_OP_UNK73 (73 << 0) #define R300_VPI_OUT_OP_EX2 (75 << 0) #define R300_VPI_OUT_OP_LG2 (76 << 0) #define R300_VPI_OUT_OP_MAD_2 (128 << 0) -#define R300_VPI_OUT_OP_UNK129 (129 << 0) /* all temps, vector(scalar, vector, vector) */ + /* all temps, vector(scalar, vector, vector) */ +#define R300_VPI_OUT_OP_UNK129 (129 << 0) #define R300_VPI_OUT_REG_CLASS_TEMPORARY (0 << 8) #define R300_VPI_OUT_REG_CLASS_ADDR (1 << 8) @@ -1362,7 +1456,8 @@ I am fairly certain that they are correct unless stated otherwise in comments. #define R300_VPI_OUT_REG_CLASS_MASK (31 << 8) #define R300_VPI_OUT_REG_INDEX_SHIFT 13 -#define R300_VPI_OUT_REG_INDEX_MASK (31 << 13) /* GUESS based on fglrx native limits */ + /* GUESS based on fglrx native limits */ +#define R300_VPI_OUT_REG_INDEX_MASK (31 << 13) #define R300_VPI_OUT_WRITE_X (1 << 20) #define R300_VPI_OUT_WRITE_Y (1 << 21) @@ -1373,14 +1468,16 @@ I am fairly certain that they are correct unless stated otherwise in comments. #define R300_VPI_IN_REG_CLASS_ATTRIBUTE (1 << 0) #define R300_VPI_IN_REG_CLASS_PARAMETER (2 << 0) #define R300_VPI_IN_REG_CLASS_NONE (9 << 0) -#define R300_VPI_IN_REG_CLASS_MASK (31 << 0) /* GUESS */ +#define R300_VPI_IN_REG_CLASS_MASK (31 << 0) #define R300_VPI_IN_REG_INDEX_SHIFT 5 -#define R300_VPI_IN_REG_INDEX_MASK (255 << 5) /* GUESS based on fglrx native limits */ + /* GUESS based on fglrx native limits */ +#define R300_VPI_IN_REG_INDEX_MASK (255 << 5) /* The R300 can select components from the input register arbitrarily. -// Use the following constants, shifted by the component shift you -// want to select */ + * Use the following constants, shifted by the component shift you + * want to select + */ #define R300_VPI_IN_SELECT_X 0 #define R300_VPI_IN_SELECT_Y 1 #define R300_VPI_IN_SELECT_Z 2 @@ -1398,11 +1495,11 @@ I am fairly certain that they are correct unless stated otherwise in comments. #define R300_VPI_IN_NEG_Y (1 << 26) #define R300_VPI_IN_NEG_Z (1 << 27) #define R300_VPI_IN_NEG_W (1 << 28) -/* END */ +/* END: Vertex program instruction set */ -//BEGIN: Packet 3 commands +/* BEGIN: Packet 3 commands */ -// A primitive emission dword. +/* A primitive emission dword. */ #define R300_PRIM_TYPE_NONE (0 << 0) #define R300_PRIM_TYPE_POINT (1 << 0) #define R300_PRIM_TYPE_LINE (2 << 0) @@ -1414,7 +1511,8 @@ I am fairly certain that they are correct unless stated otherwise in comments. #define R300_PRIM_TYPE_RECT_LIST (8 << 0) #define R300_PRIM_TYPE_3VRT_POINT_LIST (9 << 0) #define R300_PRIM_TYPE_3VRT_LINE_LIST (10 << 0) -#define R300_PRIM_TYPE_POINT_SPRITES (11 << 0) // GUESS (based on r200) + /* GUESS (based on r200) */ +#define R300_PRIM_TYPE_POINT_SPRITES (11 << 0) #define R300_PRIM_TYPE_LINE_LOOP (12 << 0) #define R300_PRIM_TYPE_QUADS (13 << 0) #define R300_PRIM_TYPE_QUAD_STRIP (14 << 0) @@ -1424,29 +1522,32 @@ I am fairly certain that they are correct unless stated otherwise in comments. #define R300_PRIM_WALK_LIST (2 << 4) #define R300_PRIM_WALK_RING (3 << 4) #define R300_PRIM_WALK_MASK (3 << 4) -#define R300_PRIM_COLOR_ORDER_BGRA (0 << 6) // GUESS (based on r200) -#define R300_PRIM_COLOR_ORDER_RGBA (1 << 6) // GUESS + /* GUESS (based on r200) */ +#define R300_PRIM_COLOR_ORDER_BGRA (0 << 6) +#define R300_PRIM_COLOR_ORDER_RGBA (1 << 6) #define R300_PRIM_NUM_VERTICES_SHIFT 16 -// Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR. -// Two parameter dwords: -// 0. The first parameter appears to be always 0 -// 1. The second parameter is a standard primitive emission dword. +/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR. + * Two parameter dwords: + * 0. The first parameter appears to be always 0 + * 1. The second parameter is a standard primitive emission dword. + */ #define R300_PACKET3_3D_DRAW_VBUF 0x00002800 -// Specify the full set of vertex arrays as (address, stride). -// The first parameter is the number of vertex arrays specified. -// The rest of the command is a variable length list of blocks, where -// each block is three dwords long and specifies two arrays. -// The first dword of a block is split into two words, the lower significant -// word refers to the first array, the more significant word to the second -// array in the block. -// The low byte of each word contains the size of an array entry in dwords, -// the high byte contains the stride of the array. -// The second dword of a block contains the pointer to the first array, -// the third dword of a block contains the pointer to the second array. -// Note that if the total number of arrays is odd, the third dword of -// the last block is omitted. +/* Specify the full set of vertex arrays as (address, stride). + * The first parameter is the number of vertex arrays specified. + * The rest of the command is a variable length list of blocks, where + * each block is three dwords long and specifies two arrays. + * The first dword of a block is split into two words, the lower significant + * word refers to the first array, the more significant word to the second + * array in the block. + * The low byte of each word contains the size of an array entry in dwords, + * the high byte contains the stride of the array. + * The second dword of a block contains the pointer to the first array, + * the third dword of a block contains the pointer to the second array. + * Note that if the total number of arrays is odd, the third dword of + * the last block is omitted. + */ #define R300_PACKET3_3D_LOAD_VBPNTR 0x00002F00 #define R300_PACKET3_INDX_BUFFER 0x00003300 @@ -1455,6 +1556,23 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_EB_UNK2 0x0810 #define R300_PACKET3_3D_DRAW_INDX_2 0x00003600 -//END +/* END: Packet 3 commands */ + + +/* Color formats for 2d packets + */ +#define R300_CP_COLOR_FORMAT_CI8 2 +#define R300_CP_COLOR_FORMAT_ARGB1555 3 +#define R300_CP_COLOR_FORMAT_RGB565 4 +#define R300_CP_COLOR_FORMAT_ARGB8888 6 +#define R300_CP_COLOR_FORMAT_RGB332 7 +#define R300_CP_COLOR_FORMAT_RGB8 9 +#define R300_CP_COLOR_FORMAT_ARGB4444 15 + +/* + * CP type-3 packets + */ +#define R300_CP_CMD_BITBLT_MULTI 0xC0009B00 + #endif /* _R300_REG_H */ diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index f86a439bc29..03f168365dc 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -113,10 +113,7 @@ static int r300_get_primitive_type(r300ContextPtr rmesa, GLcontext *ctx, int pri return type; } -static int r300_get_num_verts(r300ContextPtr rmesa, - GLcontext *ctx, - int num_verts, - int prim) +int r300_get_num_verts(r300ContextPtr rmesa, int num_verts, int prim) { int verts_off=0; char *name="UNKNOWN"; @@ -196,9 +193,11 @@ static int r300_get_num_verts(r300ContextPtr rmesa, /* vertex buffer implementation */ -static void inline fire_EB(PREFIX unsigned long addr, int vertex_count, int type, int elt_size) +static void inline fire_EB(r300ContextPtr rmesa, unsigned long addr, int vertex_count, int type, int elt_size) { - LOCAL_VARS + int cmd_reserved = 0; + int cmd_written = 0; + drm_radeon_cmd_header_t *cmd = NULL; unsigned long addr_a; unsigned long t_addr; unsigned long magic_1, magic_2; @@ -241,7 +240,7 @@ static void inline fire_EB(PREFIX unsigned long addr, int vertex_count, int type } else { e32(magic_2); /* Total number of dwords needed? */ } - //cp_delay(PASS_PREFIX 1); + //cp_delay(rmesa, 1); #if 0 fprintf(stderr, "magic_1 %d\n", magic_1); fprintf(stderr, "t_addr %x\n", t_addr); @@ -271,7 +270,7 @@ static void inline fire_EB(PREFIX unsigned long addr, int vertex_count, int type } else { e32((vertex_count+1)/2 /*+ addr_a/4*/); /* Total number of dwords needed? */ } - //cp_delay(PASS_PREFIX 1); + //cp_delay(rmesa, 1); #endif } @@ -284,14 +283,16 @@ static void r300_render_vb_primitive(r300ContextPtr rmesa, int type, num_verts; type=r300_get_primitive_type(rmesa, ctx, prim); - num_verts=r300_get_num_verts(rmesa, ctx, end-start, prim); + num_verts=r300_get_num_verts(rmesa, end-start, prim); if(type<0 || num_verts <= 0)return; if(rmesa->state.VB.Elts){ r300EmitAOS(rmesa, rmesa->state.aos_count, /*0*/start); #if 0 - LOCAL_VARS + int cmd_reserved = 0; + int cmd_written = 0; + drm_radeon_cmd_header_t *cmd = NULL; int i; start_index32_packet(num_verts, type); for(i=0; i < num_verts; i++) @@ -309,68 +310,24 @@ static void r300_render_vb_primitive(r300ContextPtr rmesa, } r300EmitElts(ctx, rmesa->state.VB.Elts, num_verts, rmesa->state.VB.elt_size); - fire_EB(PASS_PREFIX rmesa->state.elt_dma.aos_offset, num_verts, type, rmesa->state.VB.elt_size); + fire_EB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type, rmesa->state.VB.elt_size); #endif }else{ r300EmitAOS(rmesa, rmesa->state.aos_count, start); - fire_AOS(PASS_PREFIX num_verts, type); + fire_AOS(rmesa, num_verts, type); } } -#if 0 -void dump_array(struct r300_dma_region *rvb, int count) -{ - int *out = (int *)(rvb->address + rvb->start); - int i, ci; - - fprintf(stderr, "stride %d:", rvb->aos_stride); - for (i=0; i < count; i++) { - fprintf(stderr, "{"); - if (rvb->aos_format == AOS_FORMAT_FLOAT) - for (ci=0; ci < rvb->aos_size; ci++) - fprintf(stderr, "%f ", ((float *)out)[ci]); - else - for (ci=0; ci < rvb->aos_size; ci++) - fprintf(stderr, "%d ", ((unsigned char *)out)[ci]); - fprintf(stderr, "}"); - - out += rvb->aos_stride; - } - - fprintf(stderr, "\n"); -} - -void dump_dt(struct dt *dt, int count) -{ - int *out = dt->data; - int i, ci; - - fprintf(stderr, "stride %d", dt->stride); - - for (i=0; i < count; i++){ - fprintf(stderr, "{"); - if (dt->type == GL_FLOAT) - for (ci=0; ci < dt->size; ci++) - fprintf(stderr, "%f ", ((float *)out)[ci]); - else - for (ci=0; ci < dt->size; ci++) - fprintf(stderr, "%d ", ((unsigned char *)out)[ci]); - fprintf(stderr, "}"); - - out = (int *)((char *)out + dt->stride); - } - - fprintf(stderr, "\n"); -} -#endif - GLboolean r300_run_vb_render(GLcontext *ctx, struct tnl_pipeline_stage *stage) { r300ContextPtr rmesa = R300_CONTEXT(ctx); struct radeon_vertex_buffer *VB = &rmesa->state.VB; int i; - LOCAL_VARS + int cmd_reserved = 0; + int cmd_written = 0; + drm_radeon_cmd_header_t *cmd = NULL; + if (RADEON_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__); @@ -381,51 +338,17 @@ GLboolean r300_run_vb_render(GLcontext *ctx, } r300UpdateShaders(rmesa); - if (rmesa->state.VB.LockCount == 0 || 1) { - r300EmitArrays(ctx, GL_FALSE); + if (r300EmitArrays(ctx)) + return GL_TRUE; - r300UpdateShaderStates(rmesa); - } else { - /* TODO: Figure out why do we need these. */ - R300_STATECHANGE(rmesa, vir[0]); - R300_STATECHANGE(rmesa, vir[1]); - R300_STATECHANGE(rmesa, vic); - R300_STATECHANGE(rmesa, vof); - -#if 0 - fprintf(stderr, "dt:\n"); - for(i=0; i < VERT_ATTRIB_MAX; i++){ - fprintf(stderr, "dt %d:", i); - dump_dt(&rmesa->state.VB.AttribPtr[i], VB->Count); - } - - fprintf(stderr, "before:\n"); - for(i=0; i < rmesa->state.aos_count; i++){ - fprintf(stderr, "aos %d:", i); - dump_array(&rmesa->state.aos[i], VB->Count); - } -#endif -#if 0 - r300ReleaseArrays(ctx); - r300EmitArrays(ctx, GL_FALSE); - - fprintf(stderr, "after:\n"); - for(i=0; i < rmesa->state.aos_count; i++){ - fprintf(stderr, "aos %d:", i); - dump_array(&rmesa->state.aos[i], VB->Count); - } -#endif - } + r300UpdateShaderStates(rmesa); reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); e32(0x0000000a); reg_start(0x4f18,0); e32(0x00000003); -#if 0 - reg_start(R300_VAP_PVS_WAITIDLE,0); - e32(0x00000000); -#endif + r300EmitState(rmesa); for(i=0; i < VB->PrimitiveCount; i++){ @@ -449,50 +372,78 @@ GLboolean r300_run_vb_render(GLcontext *ctx, return GL_FALSE; } -#define FALLBACK_IF(expr) \ -do { \ - if (expr) { \ - if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \ - WARN_ONCE("Software fallback:%s\n", #expr); \ - return R300_FALLBACK_RAST; \ - } \ -} while(0) +#define FALLBACK_IF(expr) \ + do { \ + if (expr) { \ + if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \ + WARN_ONCE("Software fallback:%s\n", \ + #expr); \ + return R300_FALLBACK_RAST; \ + } \ + } while(0) int r300Fallback(GLcontext *ctx) { + r300ContextPtr r300 = R300_CONTEXT(ctx); int i; - FALLBACK_IF(ctx->RenderMode != GL_RENDER); // We do not do SELECT or FEEDBACK (yet ?) - -#if 0 /* These should work now.. */ + /* We do not do SELECT or FEEDBACK (yet ?) + * Is it worth doing them ? + */ + FALLBACK_IF(ctx->RenderMode != GL_RENDER); + +#if 0 + /* These should work now.. */ FALLBACK_IF(ctx->Color.DitherFlag); - FALLBACK_IF(ctx->Color.AlphaEnabled); // GL_ALPHA_TEST - FALLBACK_IF(ctx->Color.BlendEnabled); // GL_BLEND - FALLBACK_IF(ctx->Polygon.OffsetFill); // GL_POLYGON_OFFSET_FILL + /* GL_ALPHA_TEST */ + FALLBACK_IF(ctx->Color.AlphaEnabled); + /* GL_BLEND */ + FALLBACK_IF(ctx->Color.BlendEnabled); + /* GL_POLYGON_OFFSET_FILL */ + FALLBACK_IF(ctx->Polygon.OffsetFill); + /* FOG seems to trigger an unknown output + * in vertex program. + */ FALLBACK_IF(ctx->Fog.Enabled); #endif - FALLBACK_IF(ctx->Polygon.OffsetPoint); // GL_POLYGON_OFFSET_POINT - FALLBACK_IF(ctx->Polygon.OffsetLine); // GL_POLYGON_OFFSET_LINE - //FALLBACK_IF(ctx->Stencil.Enabled); // GL_STENCIL_TEST - - //FALLBACK_IF(ctx->Polygon.SmoothFlag); // GL_POLYGON_SMOOTH disabling to get blender going - FALLBACK_IF(ctx->Polygon.StippleFlag); // GL_POLYGON_STIPPLE - FALLBACK_IF(ctx->Multisample.Enabled); // GL_MULTISAMPLE_ARB - - - FALLBACK_IF(ctx->Line.StippleFlag); - - /* HW doesnt appear to directly support these */ - FALLBACK_IF(ctx->Line.SmoothFlag); // GL_LINE_SMOOTH - FALLBACK_IF(ctx->Point.SmoothFlag); // GL_POINT_SMOOTH + + if(!r300->disable_lowimpact_fallback){ + /* GL_POLYGON_OFFSET_POINT */ + FALLBACK_IF(ctx->Polygon.OffsetPoint); + /* GL_POLYGON_OFFSET_LINE */ + FALLBACK_IF(ctx->Polygon.OffsetLine); +#if 0 + /* GL_STENCIL_TEST */ + FALLBACK_IF(ctx->Stencil.Enabled); + /* GL_POLYGON_SMOOTH disabling to get blender going */ + FALLBACK_IF(ctx->Polygon.SmoothFlag); +#endif + /* GL_POLYGON_STIPPLE */ + FALLBACK_IF(ctx->Polygon.StippleFlag); + /* GL_MULTISAMPLE_ARB */ + FALLBACK_IF(ctx->Multisample.Enabled); + /* blender ? */ + FALLBACK_IF(ctx->Line.StippleFlag); + /* GL_LINE_SMOOTH */ + FALLBACK_IF(ctx->Line.SmoothFlag); + /* GL_POINT_SMOOTH */ + FALLBACK_IF(ctx->Point.SmoothFlag); + } + + /* Fallback for LOGICOP */ + FALLBACK_IF(ctx->Color.ColorLogicOpEnabled); + /* Rest could be done with vertex fragments */ - if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite) - FALLBACK_IF(ctx->Point.PointSprite); // GL_POINT_SPRITE_NV + if (ctx->Extensions.NV_point_sprite || + ctx->Extensions.ARB_point_sprite) + /* GL_POINT_SPRITE_NV */ + FALLBACK_IF(ctx->Point.PointSprite); + /* Fallback for rectangular texture */ for (i = 0; i < ctx->Const.MaxTextureUnits; i++) if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_RECT_BIT) return R300_FALLBACK_TCL; - + return R300_FALLBACK_NONE; } diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index f32e86065dd..576b18953f9 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -3,10 +3,12 @@ #include "enums.h" #include "program.h" +#include "tnl/tnl.h" #include "r300_context.h" #include "r300_fragprog.h" -static void r300BindProgram(GLcontext *ctx, GLenum target, struct program *prog) +static void +r300BindProgram(GLcontext *ctx, GLenum target, struct gl_program *prog) { r300ContextPtr rmesa = R300_CONTEXT(ctx); @@ -15,7 +17,7 @@ static void r300BindProgram(GLcontext *ctx, GLenum target, struct program *prog) switch(target){ case GL_VERTEX_PROGRAM_ARB: - rmesa->curr_vp = (struct vertex_program *)vp; + rmesa->curr_vp = (struct gl_vertex_program *)vp; vp->ref_count++; #if 0 if((vp->ref_count % 1500) == 0) { @@ -32,7 +34,8 @@ static void r300BindProgram(GLcontext *ctx, GLenum target, struct program *prog) } } -static struct program *r300NewProgram(GLcontext *ctx, GLenum target, GLuint id) +static struct gl_program * +r300NewProgram(GLcontext *ctx, GLenum target, GLuint id) { struct r300_vertex_program *vp; struct r300_fragment_program *fp; @@ -57,7 +60,8 @@ static struct program *r300NewProgram(GLcontext *ctx, GLenum target, GLuint id) } -static void r300DeleteProgram(GLcontext *ctx, struct program *prog) +static void +r300DeleteProgram(GLcontext *ctx, struct gl_program *prog) { #if 0 r300ContextPtr rmesa = R300_CONTEXT(ctx); @@ -70,8 +74,8 @@ static void r300DeleteProgram(GLcontext *ctx, struct program *prog) _mesa_delete_program(ctx, prog); } -static void r300ProgramStringNotify(GLcontext *ctx, GLenum target, - struct program *prog) +static void +r300ProgramStringNotify(GLcontext *ctx, GLenum target, struct gl_program *prog) { struct r300_vertex_program *vp=(void *)prog; struct r300_fragment_program *fp = (struct r300_fragment_program *) prog; @@ -79,16 +83,19 @@ static void r300ProgramStringNotify(GLcontext *ctx, GLenum target, switch(target) { case GL_VERTEX_PROGRAM_ARB: vp->translated = GL_FALSE; - memset(&vp->translated, 0, sizeof(struct r300_vertex_program) - sizeof(struct vertex_program)); + memset(&vp->translated, 0, sizeof(struct r300_vertex_program) - sizeof(struct gl_vertex_program)); /*r300_translate_vertex_shader(vp);*/ break; case GL_FRAGMENT_PROGRAM_ARB: fp->translated = GL_FALSE; break; } + /* need this for tcl fallbacks */ + _tnl_program_string(ctx, target, prog); } -static GLboolean r300IsProgramNative(GLcontext *ctx, GLenum target, struct program *prog) +static GLboolean +r300IsProgramNative(GLcontext *ctx, GLenum target, struct gl_program *prog) { //struct r300_vertex_program *vp=(void *)prog; //r300ContextPtr rmesa = R300_CONTEXT(ctx); diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 70d42c2ea3b..bac1bef9681 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -64,50 +64,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "drirenderbuffer.h" -static void r300AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - int pp_misc = rmesa->hw.at.cmd[R300_AT_ALPHA_TEST]; - GLubyte refByte; - - CLAMPED_FLOAT_TO_UBYTE(refByte, ref); - - R300_STATECHANGE(rmesa, at); - - pp_misc &= ~(R300_ALPHA_TEST_OP_MASK | R300_REF_ALPHA_MASK); - pp_misc |= (refByte & R300_REF_ALPHA_MASK); - - switch (func) { - case GL_NEVER: - pp_misc |= R300_ALPHA_TEST_FAIL; - break; - case GL_LESS: - pp_misc |= R300_ALPHA_TEST_LESS; - break; - case GL_EQUAL: - pp_misc |= R300_ALPHA_TEST_EQUAL; - break; - case GL_LEQUAL: - pp_misc |= R300_ALPHA_TEST_LEQUAL; - break; - case GL_GREATER: - pp_misc |= R300_ALPHA_TEST_GREATER; - break; - case GL_NOTEQUAL: - pp_misc |= R300_ALPHA_TEST_NEQUAL; - break; - case GL_GEQUAL: - pp_misc |= R300_ALPHA_TEST_GEQUAL; - break; - case GL_ALWAYS: - pp_misc |= R300_ALPHA_TEST_PASS; - //pp_misc &= ~R300_ALPHA_TEST_ENABLE; - break; - } - - rmesa->hw.at.cmd[R300_AT_ALPHA_TEST] = pp_misc; -} - static void r300BlendColor(GLcontext * ctx, const GLfloat cf[4]) { GLubyte color[4]; @@ -144,55 +100,54 @@ static int blend_factor(GLenum factor, GLboolean is_src) switch (factor) { case GL_ZERO: - func = R200_BLEND_GL_ZERO; + func = R300_BLEND_GL_ZERO; break; case GL_ONE: - func = R200_BLEND_GL_ONE; + func = R300_BLEND_GL_ONE; break; case GL_DST_COLOR: - func = R200_BLEND_GL_DST_COLOR; + func = R300_BLEND_GL_DST_COLOR; break; case GL_ONE_MINUS_DST_COLOR: - func = R200_BLEND_GL_ONE_MINUS_DST_COLOR; + func = R300_BLEND_GL_ONE_MINUS_DST_COLOR; break; case GL_SRC_COLOR: - func = R200_BLEND_GL_SRC_COLOR; + func = R300_BLEND_GL_SRC_COLOR; break; case GL_ONE_MINUS_SRC_COLOR: - func = R200_BLEND_GL_ONE_MINUS_SRC_COLOR; + func = R300_BLEND_GL_ONE_MINUS_SRC_COLOR; break; case GL_SRC_ALPHA: - func = R200_BLEND_GL_SRC_ALPHA; + func = R300_BLEND_GL_SRC_ALPHA; break; case GL_ONE_MINUS_SRC_ALPHA: - func = R200_BLEND_GL_ONE_MINUS_SRC_ALPHA; + func = R300_BLEND_GL_ONE_MINUS_SRC_ALPHA; break; case GL_DST_ALPHA: - func = R200_BLEND_GL_DST_ALPHA; + func = R300_BLEND_GL_DST_ALPHA; break; case GL_ONE_MINUS_DST_ALPHA: - func = R200_BLEND_GL_ONE_MINUS_DST_ALPHA; + func = R300_BLEND_GL_ONE_MINUS_DST_ALPHA; break; case GL_SRC_ALPHA_SATURATE: - func = - (is_src) ? R200_BLEND_GL_SRC_ALPHA_SATURATE : - R200_BLEND_GL_ZERO; + func = (is_src) ? R300_BLEND_GL_SRC_ALPHA_SATURATE : + R300_BLEND_GL_ZERO; break; case GL_CONSTANT_COLOR: - func = R200_BLEND_GL_CONST_COLOR; + func = R300_BLEND_GL_CONST_COLOR; break; case GL_ONE_MINUS_CONSTANT_COLOR: - func = R200_BLEND_GL_ONE_MINUS_CONST_COLOR; + func = R300_BLEND_GL_ONE_MINUS_CONST_COLOR; break; case GL_CONSTANT_ALPHA: - func = R200_BLEND_GL_CONST_ALPHA; + func = R300_BLEND_GL_CONST_ALPHA; break; case GL_ONE_MINUS_CONSTANT_ALPHA: - func = R200_BLEND_GL_ONE_MINUS_CONST_ALPHA; + func = R300_BLEND_GL_ONE_MINUS_CONST_ALPHA; break; default: fprintf(stderr, "unknown blend factor %x\n", factor); - func = (is_src) ? R200_BLEND_GL_ONE : R200_BLEND_GL_ZERO; + func = (is_src) ? R300_BLEND_GL_ONE : R300_BLEND_GL_ZERO; } return func; } @@ -202,10 +157,10 @@ static int blend_factor(GLenum factor, GLboolean is_src) * This is done in a single * function because some blend equations (i.e., \c GL_MIN and \c GL_MAX) * change the interpretation of the blend function. - * Also, make sure that blend function and blend equation are set to their default - * value if color blending is not enabled, since at least blend equations GL_MIN - * and GL_FUNC_REVERSE_SUBTRACT will cause wrong results otherwise for - * unknown reasons. + * Also, make sure that blend function and blend equation are set to their + * default value if color blending is not enabled, since at least blend + * equations GL_MIN and GL_FUNC_REVERSE_SUBTRACT will cause wrong results + * otherwise for unknown reasons. */ /* helper function */ @@ -244,12 +199,12 @@ static void r300_set_blend_cntl(r300ContextPtr r300, int func, int eqn, int cbit static void r300_set_blend_state(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - int func = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) | - (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT); - int eqn = R200_COMB_FCN_ADD_CLAMP; - int funcA = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) | - (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT); - int eqnA = R200_COMB_FCN_ADD_CLAMP; + int func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT); + int eqn = R300_COMB_FCN_ADD_CLAMP; + int funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT); + int eqnA = R300_COMB_FCN_ADD_CLAMP; if (ctx->Color._LogicOpEnabled || !ctx->Color.BlendEnabled) { r300_set_blend_cntl(r300, @@ -258,8 +213,8 @@ static void r300_set_blend_state(GLcontext * ctx) return; } - func = (blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE) << R200_SRC_BLEND_SHIFT) | - (blend_factor(ctx->Color.BlendDstRGB, GL_FALSE) << R200_DST_BLEND_SHIFT); + func = (blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE) << R300_SRC_BLEND_SHIFT) | + (blend_factor(ctx->Color.BlendDstRGB, GL_FALSE) << R300_DST_BLEND_SHIFT); switch (ctx->Color.BlendEquationRGB) { case GL_FUNC_ADD: @@ -271,19 +226,19 @@ static void r300_set_blend_state(GLcontext * ctx) break; case GL_FUNC_REVERSE_SUBTRACT: - eqn = R200_COMB_FCN_RSUB_CLAMP; + eqn = R300_COMB_FCN_RSUB_CLAMP; break; case GL_MIN: - eqn = R200_COMB_FCN_MIN; - func = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) | - (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT); + eqn = R300_COMB_FCN_MIN; + func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT); break; case GL_MAX: - eqn = R200_COMB_FCN_MAX; - func = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) | - (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT); + eqn = R300_COMB_FCN_MAX; + func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT); break; default: @@ -294,8 +249,8 @@ static void r300_set_blend_state(GLcontext * ctx) } - funcA = (blend_factor(ctx->Color.BlendSrcA, GL_TRUE) << R200_SRC_BLEND_SHIFT) | - (blend_factor(ctx->Color.BlendDstA, GL_FALSE) << R200_DST_BLEND_SHIFT); + funcA = (blend_factor(ctx->Color.BlendSrcA, GL_TRUE) << R300_SRC_BLEND_SHIFT) | + (blend_factor(ctx->Color.BlendDstA, GL_FALSE) << R300_DST_BLEND_SHIFT); switch (ctx->Color.BlendEquationA) { case GL_FUNC_ADD: @@ -307,19 +262,19 @@ static void r300_set_blend_state(GLcontext * ctx) break; case GL_FUNC_REVERSE_SUBTRACT: - eqnA = R200_COMB_FCN_RSUB_CLAMP; + eqnA = R300_COMB_FCN_RSUB_CLAMP; break; case GL_MIN: - eqnA = R200_COMB_FCN_MIN; - funcA = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) | - (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT); + eqnA = R300_COMB_FCN_MIN; + funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT); break; case GL_MAX: - eqnA = R200_COMB_FCN_MAX; - funcA = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) | - (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT); + eqnA = R300_COMB_FCN_MAX; + funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT); break; default: @@ -371,7 +326,7 @@ static void r300UpdateCulling(GLcontext* ctx) r300->hw.cul.cmd[R300_CUL_CULL] = val; } -static void update_early_z(GLcontext* ctx) +static void update_early_z(GLcontext *ctx) { /* updates register 0x4f14 if depth test is not enabled it should be 0x00000000 @@ -381,11 +336,11 @@ static void update_early_z(GLcontext* ctx) r300ContextPtr r300 = R300_CONTEXT(ctx); R300_STATECHANGE(r300, unk4F10); - if (ctx->Color.AlphaEnabled) + if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS) /* disable early Z */ r300->hw.unk4F10.cmd[2] = 0x00000000; else { - if (ctx->Depth.Test) + if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER) /* enable early Z */ r300->hw.unk4F10.cmd[2] = 0x00000001; else @@ -394,6 +349,109 @@ static void update_early_z(GLcontext* ctx) } } +static void update_alpha(GLcontext *ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + GLubyte refByte; + uint32_t pp_misc = 0x0; + GLboolean really_enabled = ctx->Color.AlphaEnabled; + + CLAMPED_FLOAT_TO_UBYTE(refByte, ctx->Color.AlphaRef); + + switch (ctx->Color.AlphaFunc) { + case GL_NEVER: + pp_misc |= R300_ALPHA_TEST_FAIL; + break; + case GL_LESS: + pp_misc |= R300_ALPHA_TEST_LESS; + break; + case GL_EQUAL: + pp_misc |= R300_ALPHA_TEST_EQUAL; + break; + case GL_LEQUAL: + pp_misc |= R300_ALPHA_TEST_LEQUAL; + break; + case GL_GREATER: + pp_misc |= R300_ALPHA_TEST_GREATER; + break; + case GL_NOTEQUAL: + pp_misc |= R300_ALPHA_TEST_NEQUAL; + break; + case GL_GEQUAL: + pp_misc |= R300_ALPHA_TEST_GEQUAL; + break; + case GL_ALWAYS: + /*pp_misc |= R300_ALPHA_TEST_PASS;*/ + really_enabled = GL_FALSE; + break; + } + + if (really_enabled) { + pp_misc |= R300_ALPHA_TEST_ENABLE; + pp_misc |= (refByte & R300_REF_ALPHA_MASK); + } else { + pp_misc = 0x0; + } + + + R300_STATECHANGE(r300, at); + r300->hw.at.cmd[R300_AT_ALPHA_TEST] = pp_misc; + update_early_z(ctx); +} + +static void r300AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref) +{ + (void) func; + (void) ref; + update_alpha(ctx); +} + +static int translate_func(int func) +{ + switch (func) { + case GL_NEVER: + return R300_ZS_NEVER; + case GL_LESS: + return R300_ZS_LESS; + case GL_EQUAL: + return R300_ZS_EQUAL; + case GL_LEQUAL: + return R300_ZS_LEQUAL; + case GL_GREATER: + return R300_ZS_GREATER; + case GL_NOTEQUAL: + return R300_ZS_NOTEQUAL; + case GL_GEQUAL: + return R300_ZS_GEQUAL; + case GL_ALWAYS: + return R300_ZS_ALWAYS; + } + return 0; +} + +static void update_depth(GLcontext* ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + R300_STATECHANGE(r300, zs); + r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_RB3D_STENCIL_ENABLE; + r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT); + + if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER) { + if (ctx->Depth.Mask) + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_TEST_AND_WRITE; + else + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_TEST; + + r300->hw.zs.cmd[R300_ZS_CNTL_1] |= translate_func(ctx->Depth.Func) << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT; + } else { + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_DISABLED_1; + r300->hw.zs.cmd[R300_ZS_CNTL_1] |= translate_func(GL_NEVER) << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT; + } + + update_early_z(ctx); +} + /** * Handle glEnable()/glDisable(). * @@ -436,15 +494,7 @@ static void r300Enable(GLcontext* ctx, GLenum cap, GLboolean state) break; case GL_ALPHA_TEST: - R300_STATECHANGE(r300, at); - if (state) { - r300->hw.at.cmd[R300_AT_ALPHA_TEST] |= - R300_ALPHA_TEST_ENABLE; - } else { - r300->hw.at.cmd[R300_AT_ALPHA_TEST] &= - ~R300_ALPHA_TEST_ENABLE; - } - update_early_z(ctx); + update_alpha(ctx); break; case GL_BLEND: @@ -453,19 +503,7 @@ static void r300Enable(GLcontext* ctx, GLenum cap, GLboolean state) break; case GL_DEPTH_TEST: - R300_STATECHANGE(r300, zs); - - if (state) { - if (ctx->Depth.Mask) - newval = R300_RB3D_Z_TEST_AND_WRITE; - else - newval = R300_RB3D_Z_TEST; - } else - newval = R300_RB3D_Z_DISABLED_1; - - r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_RB3D_STENCIL_ENABLE; - r300->hw.zs.cmd[R300_ZS_CNTL_0] |= newval; - update_early_z(ctx); + update_depth(ctx); break; case GL_STENCIL_TEST: @@ -593,38 +631,8 @@ static void r300FrontFace(GLcontext* ctx, GLenum mode) */ static void r300DepthFunc(GLcontext* ctx, GLenum func) { - r300ContextPtr r300 = R300_CONTEXT(ctx); - - R300_STATECHANGE(r300, zs); - - r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT); - - switch(func) { - case GL_NEVER: - r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_NEVER << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT; - break; - case GL_LESS: - r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_LESS << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT; - break; - case GL_EQUAL: - r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_EQUAL << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT; - break; - case GL_LEQUAL: - r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_LEQUAL << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT; - break; - case GL_GREATER: - r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_GREATER << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT; - break; - case GL_NOTEQUAL: - r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_NOTEQUAL << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT; - break; - case GL_GEQUAL: - r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_GEQUAL << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT; - break; - case GL_ALWAYS: - r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_ALWAYS << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT; - break; - } + (void) func; + update_depth(ctx); } @@ -635,7 +643,8 @@ static void r300DepthFunc(GLcontext* ctx, GLenum func) */ static void r300DepthMask(GLcontext* ctx, GLboolean mask) { - r300Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test); + (void) mask; + update_depth(ctx); } @@ -793,29 +802,6 @@ static void r300PolygonMode(GLcontext *ctx, GLenum face, GLenum mode) * Stencil */ -static int translate_stencil_func(int func) -{ - switch (func) { - case GL_NEVER: - return R300_ZS_NEVER; - case GL_LESS: - return R300_ZS_LESS; - case GL_EQUAL: - return R300_ZS_EQUAL; - case GL_LEQUAL: - return R300_ZS_LEQUAL; - case GL_GREATER: - return R300_ZS_GREATER; - case GL_NOTEQUAL: - return R300_ZS_NOTEQUAL; - case GL_GEQUAL: - return R300_ZS_GEQUAL; - case GL_ALWAYS: - return R300_ZS_ALWAYS; - } - return 0; -} - static int translate_stencil_op(int op) { switch (op) { @@ -877,7 +863,7 @@ static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face, rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &= ~((R300_RB3D_ZS2_STENCIL_MASK << R300_RB3D_ZS2_STENCIL_REF_SHIFT) | (R300_RB3D_ZS2_STENCIL_MASK << R300_RB3D_ZS2_STENCIL_MASK_SHIFT)); - flag = translate_stencil_func(ctx->Stencil.Function[0]); + flag = translate_func(ctx->Stencil.Function[0]); rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= (flag << R300_RB3D_ZS1_FRONT_FUNC_SHIFT) | (flag << R300_RB3D_ZS1_BACK_FUNC_SHIFT); @@ -987,15 +973,15 @@ void r300UpdateViewportOffset( GLcontext *ctx ) GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X; GLfloat ty = (- v[MAT_TY]) + yoffset + SUBPIXEL_Y; - if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != r300PackFloat32(tx) || - rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != r300PackFloat32(ty)) + if ( rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] != r300PackFloat32(tx) || + rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] != r300PackFloat32(ty)) { /* Note: this should also modify whatever data the context reset * code uses... */ R300_STATECHANGE( rmesa, vpt ); - rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = r300PackFloat32(tx); - rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = r300PackFloat32(ty); + rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] = r300PackFloat32(tx); + rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] = r300PackFloat32(ty); } @@ -1316,19 +1302,20 @@ void r300_setup_rs_unit(GLcontext *ctx) R300_STATECHANGE(r300, rr); fp_reg = in_texcoords = col_interp_nr = high_rr = 0; - r300->hw.rr.cmd[R300_RR_ROUTE_0] = 0; - r300->hw.rr.cmd[R300_RR_ROUTE_1] = 0; + r300->hw.rr.cmd[R300_RR_ROUTE_1] = 0; + for (i=0;i<ctx->Const.MaxTextureUnits;i++) { r300->hw.ri.cmd[R300_RI_INTERP_0+i] = 0 | R300_RS_INTERP_USED | (in_texcoords << R300_RS_INTERP_SRC_SHIFT) | interp_magic[i]; + r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] = 0; if (InputsRead & (FRAG_BIT_TEX0<<i)) { //assert(r300->state.texture.tc_count != 0); - r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] = 0 - | R300_RS_ROUTE_ENABLE + r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] |= + R300_RS_ROUTE_ENABLE | i /* source INTERP */ | (fp_reg << R300_RS_ROUTE_DEST_SHIFT); high_rr = fp_reg; @@ -1525,26 +1512,16 @@ static void r300GenerateSimpleVertexShader(r300ContextPtr r300) ) o_reg += 2; - if (RENDERINPUTS_TEST( r300->state.render_inputs_bitset, _TNL_ATTRIB_COLOR1 )) { - WRITE_OP( - EASY_VSF_OP(MUL, o_reg++, ALL, RESULT), - VSF_REG(r300->state.vap_reg.i_color[1]), - VSF_ATTR_UNITY(r300->state.vap_reg.i_color[1]), - VSF_UNITY(r300->state.vap_reg.i_color[1]) - ) - } - - /* Pass through texture coordinates, if any */ - for(i=0;i < r300->radeon.glCtx->Const.MaxTextureUnits;i++) - if (RENDERINPUTS_TEST( r300->state.render_inputs_bitset, _TNL_ATTRIB_TEX(i) )){ - // fprintf(stderr, "i_tex[%d]=%d\n", i, r300->state.vap_reg.i_tex[i]); + for (i = VERT_ATTRIB_COLOR1; i < VERT_ATTRIB_MAX; i++) + if (r300->state.sw_tcl_inputs[i] != -1) { WRITE_OP( EASY_VSF_OP(MUL, o_reg++ /* 2+i */, ALL, RESULT), - VSF_REG(r300->state.vap_reg.i_tex[i]), - VSF_ATTR_UNITY(r300->state.vap_reg.i_tex[i]), - VSF_UNITY(r300->state.vap_reg.i_tex[i]) + VSF_REG(r300->state.sw_tcl_inputs[i]), + VSF_ATTR_UNITY(r300->state.sw_tcl_inputs[i]), + VSF_UNITY(r300->state.sw_tcl_inputs[i]) ) - } + + } r300->state.vertex_shader.program_end--; /* r300 wants program length to be one more - no idea why */ r300->state.vertex_shader.program.length=(r300->state.vertex_shader.program_end+1)*4; @@ -1675,18 +1652,24 @@ void r300UpdateShaders(r300ContextPtr rmesa) { GLcontext *ctx; struct r300_vertex_program *vp; + int i; ctx = rmesa->radeon.glCtx; - /* Disable tnl programs when doing software vertex programs. - I can only hope this actually disables it at the right time. */ - ctx->_MaintainTnlProgram = hw_tcl_on; - if (rmesa->NewGLState && hw_tcl_on) { rmesa->NewGLState = 0; + for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { + rmesa->temp_attrib[i] = TNL_CONTEXT(ctx)->vb.AttribPtr[i]; + TNL_CONTEXT(ctx)->vb.AttribPtr[i] = &rmesa->dummy_attrib[i]; + } + _tnl_UpdateFixedFunctionProgram(ctx); + for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { + TNL_CONTEXT(ctx)->vb.AttribPtr[i] = rmesa->temp_attrib[i]; + } + vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); if (vp->translated == GL_FALSE) r300_translate_vertex_shader(vp); @@ -2202,12 +2185,12 @@ void r300InitState(r300ContextPtr r300) switch (ctx->Visual.depthBits) { case 16: r300->state.depth.scale = 1.0 / (GLfloat) 0xffff; - depth_fmt = R200_DEPTH_FORMAT_16BIT_INT_Z; + depth_fmt = R300_DEPTH_FORMAT_16BIT_INT_Z; r300->state.stencil.clear = 0x00000000; break; case 24: r300->state.depth.scale = 1.0 / (GLfloat) 0xffffff; - depth_fmt = R200_DEPTH_FORMAT_24BIT_INT_Z; + depth_fmt = R300_DEPTH_FORMAT_24BIT_INT_Z; r300->state.stencil.clear = 0x00ff0000; break; default: diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c index c1d01020cdb..96973c00987 100644 --- a/src/mesa/drivers/dri/r300/r300_texmem.c +++ b/src/mesa/drivers/dri/r300/r300_texmem.c @@ -116,23 +116,23 @@ static void r300UploadGARTClientSubImage(r300ContextPtr rmesa, */ switch (texFormat->TexelBytes) { case 1: - blit_format = R200_CP_COLOR_FORMAT_CI8; + blit_format = R300_CP_COLOR_FORMAT_CI8; srcPitch = t->image[0][0].width * texFormat->TexelBytes; dstPitch = t->image[0][0].width * texFormat->TexelBytes; break; case 2: - blit_format = R200_CP_COLOR_FORMAT_RGB565; + blit_format = R300_CP_COLOR_FORMAT_RGB565; srcPitch = t->image[0][0].width * texFormat->TexelBytes; dstPitch = t->image[0][0].width * texFormat->TexelBytes; break; case 4: - blit_format = R200_CP_COLOR_FORMAT_ARGB8888; + blit_format = R300_CP_COLOR_FORMAT_ARGB8888; srcPitch = t->image[0][0].width * texFormat->TexelBytes; dstPitch = t->image[0][0].width * texFormat->TexelBytes; break; case 8: case 16: - blit_format = R200_CP_COLOR_FORMAT_CI8; + blit_format = R300_CP_COLOR_FORMAT_CI8; srcPitch = t->image[0][0].width * texFormat->TexelBytes; dstPitch = t->image[0][0].width * texFormat->TexelBytes; break; @@ -179,17 +179,17 @@ static void r300UploadRectSubImage(r300ContextPtr rmesa, switch (texFormat->TexelBytes) { case 1: - blit_format = R200_CP_COLOR_FORMAT_CI8; + blit_format = R300_CP_COLOR_FORMAT_CI8; break; case 2: - blit_format = R200_CP_COLOR_FORMAT_RGB565; + blit_format = R300_CP_COLOR_FORMAT_RGB565; break; case 4: - blit_format = R200_CP_COLOR_FORMAT_ARGB8888; + blit_format = R300_CP_COLOR_FORMAT_ARGB8888; break; case 8: case 16: - blit_format = R200_CP_COLOR_FORMAT_CI8; + blit_format = R300_CP_COLOR_FORMAT_CI8; break; default: return; @@ -457,8 +457,8 @@ static void uploadSubImage( r300ContextPtr rmesa, r300TexObjPtr t, needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */ /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed so the kernel module reads the right amount of data. */ - tex.format = R200_TXFORMAT_I8; /* any 1-byte texel format */ - tex.pitch = (BLIT_WIDTH_BYTES / 64); + tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */ + tex.pitch = (R300_BLIT_WIDTH_BYTES / 64); tex.height = (imageHeight + 3) / 4; tex.width = (imageWidth + 3) / 4; if ((t->format & R300_TX_FORMAT_DXT1) == R300_TX_FORMAT_DXT1) diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index b8055788229..c4a1bf01b4c 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -47,64 +47,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_tex.h" #include "r300_reg.h" -#define R200_TXFORMAT_A8 R200_TXFORMAT_I8 -#define R200_TXFORMAT_L8 R200_TXFORMAT_I8 -#define R200_TXFORMAT_AL88 R200_TXFORMAT_AI88 -#define R200_TXFORMAT_YCBCR R200_TXFORMAT_YVYU422 -#define R200_TXFORMAT_YCBCR_REV R200_TXFORMAT_VYUY422 -#define R200_TXFORMAT_RGB_DXT1 R200_TXFORMAT_DXT1 -#define R200_TXFORMAT_RGBA_DXT1 R200_TXFORMAT_DXT1 -#define R200_TXFORMAT_RGBA_DXT3 R200_TXFORMAT_DXT23 -#define R200_TXFORMAT_RGBA_DXT5 R200_TXFORMAT_DXT45 - -#define _COLOR(f) \ - [ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f, 0 } -#define _COLOR_REV(f) \ - [ MESA_FORMAT_ ## f ## _REV ] = { R200_TXFORMAT_ ## f, 0 } -#define _ALPHA(f) \ - [ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f | R200_TXFORMAT_ALPHA_IN_MAP, 0 } -#define _ALPHA_REV(f) \ - [ MESA_FORMAT_ ## f ## _REV ] = { R200_TXFORMAT_ ## f | R200_TXFORMAT_ALPHA_IN_MAP, 0 } -#define _YUV(f) \ - [ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f, R200_YUV_TO_RGB } -#define _INVALID(f) \ - [ MESA_FORMAT_ ## f ] = { 0xffffffff, 0 } -#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5 \ - || ((f) >= MESA_FORMAT_RGBA_FLOAT32 && (f) <= MESA_FORMAT_INTENSITY_FLOAT16)) \ - && tx_table[f].flag ) - -#define _ASSIGN(entry, format) \ - [ MESA_FORMAT_ ## entry ] = { format, 0, 1} -static const struct { - GLuint format, filter; -} tx_table0[] = { - _ALPHA(RGBA8888), - _ALPHA_REV(RGBA8888), - _ALPHA(ARGB8888), - _ALPHA_REV(ARGB8888), - _INVALID(RGB888), - _COLOR(RGB565), - _COLOR_REV(RGB565), - _ALPHA(ARGB4444), - _ALPHA_REV(ARGB4444), - _ALPHA(ARGB1555), - _ALPHA_REV(ARGB1555), - _ALPHA(AL88), - _ALPHA_REV(AL88), - _ALPHA(A8), - _COLOR(L8), - _ALPHA(I8), - _INVALID(CI8), - _YUV(YCBCR), - _YUV(YCBCR_REV), - _INVALID(RGB_FXT1), - _INVALID(RGBA_FXT1), - _COLOR(RGB_DXT1), - _ALPHA(RGBA_DXT1), - _ALPHA(RGBA_DXT3), - _ALPHA(RGBA_DXT5), - }; +#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5 \ + || ((f) >= MESA_FORMAT_RGBA_FLOAT32 && \ + (f) <= MESA_FORMAT_INTENSITY_FLOAT16)) \ + && tx_table[f].flag ) + +#define _ASSIGN(entry, format) \ + [ MESA_FORMAT_ ## entry ] = { format, 0, 1} static const struct { GLuint format, filter, flag; @@ -156,9 +106,6 @@ static const struct { _ASSIGN(INTENSITY_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, X, FL_I16)), }; -#undef _COLOR -#undef _ALPHA -#undef _INVALID #undef _ASSIGN @@ -186,17 +133,12 @@ static void r300SetTexImages(r300ContextPtr rmesa, /* Set the hardware texture format */ - - t->format &= ~(R200_TXFORMAT_FORMAT_MASK | - R200_TXFORMAT_ALPHA_IN_MAP); - - if (VALID_FORMAT(baseImage->TexFormat->MesaFormat)) { + if (VALID_FORMAT(baseImage->TexFormat->MesaFormat) && + tx_table[baseImage->TexFormat->MesaFormat].flag) { t->format = tx_table[baseImage->TexFormat->MesaFormat].format; -#if 1 t->filter |= tx_table[baseImage->TexFormat->MesaFormat].filter; -#endif } else { _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__); @@ -207,7 +149,6 @@ static void r300SetTexImages(r300ContextPtr rmesa, /* Compute which mipmap levels we really want to send to the hardware. */ - driCalculateTextureFirstLastLevel((driTextureObject *) t); log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2; log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2; @@ -222,7 +163,7 @@ static void r300SetTexImages(r300ContextPtr rmesa, * memory organized as a rectangle of width BLIT_WIDTH_BYTES. */ curOffset = 0; - blitWidth = BLIT_WIDTH_BYTES; + blitWidth = R300_BLIT_WIDTH_BYTES; t->tile_bits = 0; /* figure out if this texture is suitable for tiling. */ @@ -310,16 +251,16 @@ static void r300SetTexImages(r300ContextPtr rmesa, t->image[0][i].width = MIN2(size / texelBytes, blitWidth); t->image[0][i].height = (size / texelBytes) / t->image[0][i].width; } else { - t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES; - t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES; - t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES); + t->image[0][i].x = curOffset % R300_BLIT_WIDTH_BYTES; + t->image[0][i].y = curOffset / R300_BLIT_WIDTH_BYTES; + t->image[0][i].width = MIN2(size, R300_BLIT_WIDTH_BYTES); t->image[0][i].height = size / t->image[0][i].width; } #if 0 /* for debugging only and only applicable to non-rectangle targets */ assert(size % t->image[0][i].width == 0); assert(t->image[0][i].x == 0 - || (size < BLIT_WIDTH_BYTES + || (size < R300_BLIT_WIDTH_BYTES && t->image[0][i].height == 1)); #endif @@ -366,7 +307,7 @@ static void r300SetTexImages(r300ContextPtr rmesa, t->format |= ((log2Width << R200_TXFORMAT_WIDTH_SHIFT) | (log2Height << R200_TXFORMAT_HEIGHT_SHIFT)); #endif - +#if 0 t->format_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK); if (tObj->Target == GL_TEXTURE_3D) { t->format_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT); @@ -385,6 +326,11 @@ static void r300SetTexImages(r300ContextPtr rmesa, (log2Width << R200_FACE_WIDTH_4_SHIFT) | (log2Height << R200_FACE_HEIGHT_4_SHIFT)); } +#endif + if (tObj->Target == GL_TEXTURE_CUBE_MAP) { + ASSERT(log2Width == log2Height); + t->format |= R300_TX_FORMAT_CUBIC_MAP; + } t->size = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << R300_TX_WIDTHMASK_SHIFT) |((tObj->Image[0][t->base.firstLevel]->Height - 1) << R300_TX_HEIGHTMASK_SHIFT)) @@ -451,11 +397,12 @@ static GLboolean enable_tex_3d(GLcontext * ctx, int unit) /* Need to load the 3d images associated with this unit. */ +#if 0 if (t->format & R200_TXFORMAT_NON_POWER2) { t->format &= ~R200_TXFORMAT_NON_POWER2; t->base.dirty_images[0] = ~0; } - +#endif ASSERT(tObj->Target == GL_TEXTURE_3D); /* R100 & R200 do not support mipmaps for 3D textures. @@ -486,12 +433,13 @@ static GLboolean enable_tex_cube(GLcontext * ctx, int unit) /* Need to load the 2d images associated with this unit. */ +#if 0 if (t->format & R200_TXFORMAT_NON_POWER2) { t->format &= ~R200_TXFORMAT_NON_POWER2; for (face = 0; face < 6; face++) t->base.dirty_images[face] = ~0; } - +#endif ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP); if (t->base.dirty_images[0] || t->base.dirty_images[1] || @@ -570,10 +518,6 @@ static GLboolean update_tex_common(GLcontext * ctx, int unit) driUpdateTextureLRU((driTextureObject *) t); /* XXX: should be locked! */ } -#if R200_MERGED - FALLBACK(&rmesa->radeon, RADEON_FALLBACK_BORDER_MODE, t->border_fallback); -#endif - return !t->border_fallback; } @@ -617,8 +561,4 @@ void r300UpdateTextureState(GLcontext * ctx) r300UpdateTextureUnit(ctx, 6) && r300UpdateTextureUnit(ctx, 7) ); - -#if R200_MERGED - FALLBACK(&rmesa->radeon, RADEON_FALLBACK_TEXTURE, !ok); -#endif } diff --git a/src/mesa/drivers/dri/r300/r300_vertexprog.c b/src/mesa/drivers/dri/r300/r300_vertexprog.c index 9e32cfcfef9..cc932b86d99 100644 --- a/src/mesa/drivers/dri/r300/r300_vertexprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertexprog.c @@ -39,56 +39,68 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_program.h" #include "program_instruction.h" +#if SWIZZLE_X != VSF_IN_COMPONENT_X || \ + SWIZZLE_Y != VSF_IN_COMPONENT_Y || \ + SWIZZLE_Z != VSF_IN_COMPONENT_Z || \ + SWIZZLE_W != VSF_IN_COMPONENT_W || \ + SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \ + SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \ + WRITEMASK_X != VSF_FLAG_X || \ + WRITEMASK_Y != VSF_FLAG_Y || \ + WRITEMASK_Z != VSF_FLAG_Z || \ + WRITEMASK_W != VSF_FLAG_W +#error Cannot change these! +#endif + #define SCALAR_FLAG (1<<31) #define FLAG_MASK (1<<31) #define OP_MASK (0xf) /* we are unlikely to have more than 15 */ -#define OPN(operator, ip, op) {#operator, OPCODE_##operator, ip, op} +#define OPN(operator, ip) {#operator, OPCODE_##operator, ip} static struct{ char *name; int opcode; unsigned long ip; /* number of input operands and flags */ - unsigned long op; }op_names[]={ - OPN(ABS, 1, 1), - OPN(ADD, 2, 1), - OPN(ARL, 1, 1|SCALAR_FLAG), - OPN(DP3, 2, 3|SCALAR_FLAG), - OPN(DP4, 2, 3|SCALAR_FLAG), - OPN(DPH, 2, 3|SCALAR_FLAG), - OPN(DST, 2, 1), - OPN(EX2, 1|SCALAR_FLAG, 4|SCALAR_FLAG), - OPN(EXP, 1|SCALAR_FLAG, 1), - OPN(FLR, 1, 1), - OPN(FRC, 1, 1), - OPN(LG2, 1|SCALAR_FLAG, 4|SCALAR_FLAG), - OPN(LIT, 1, 1), - OPN(LOG, 1|SCALAR_FLAG, 1), - OPN(MAD, 3, 1), - OPN(MAX, 2, 1), - OPN(MIN, 2, 1), - OPN(MOV, 1, 1), - OPN(MUL, 2, 1), - OPN(POW, 2|SCALAR_FLAG, 4|SCALAR_FLAG), - OPN(RCP, 1|SCALAR_FLAG, 4|SCALAR_FLAG), - OPN(RSQ, 1|SCALAR_FLAG, 4|SCALAR_FLAG), - OPN(SGE, 2, 1), - OPN(SLT, 2, 1), - OPN(SUB, 2, 1), - OPN(SWZ, 1, 1), - OPN(XPD, 2, 1), - OPN(RCC, 0, 0), //extra - OPN(PRINT, 0, 0), - OPN(END, 0, 0), + OPN(ABS, 1), + OPN(ADD, 2), + OPN(ARL, 1|SCALAR_FLAG), + OPN(DP3, 2), + OPN(DP4, 2), + OPN(DPH, 2), + OPN(DST, 2), + OPN(EX2, 1|SCALAR_FLAG), + OPN(EXP, 1|SCALAR_FLAG), + OPN(FLR, 1), + OPN(FRC, 1), + OPN(LG2, 1|SCALAR_FLAG), + OPN(LIT, 1), + OPN(LOG, 1|SCALAR_FLAG), + OPN(MAD, 3), + OPN(MAX, 2), + OPN(MIN, 2), + OPN(MOV, 1), + OPN(MUL, 2), + OPN(POW, 2|SCALAR_FLAG), + OPN(RCP, 1|SCALAR_FLAG), + OPN(RSQ, 1|SCALAR_FLAG), + OPN(SGE, 2), + OPN(SLT, 2), + OPN(SUB, 2), + OPN(SWZ, 1), + OPN(XPD, 2), + OPN(RCC, 0), //extra + OPN(PRINT, 0), + OPN(END, 0), }; #undef OPN int r300VertexProgUpdateParams(GLcontext *ctx, struct r300_vertex_program *vp, float *dst) { int pi; - struct vertex_program *mesa_vp=(void *)vp; + struct gl_vertex_program *mesa_vp = &vp->mesa_program; float *dst_o=dst; - struct program_parameter_list *paramList; + struct gl_program_parameter_list *paramList; if (mesa_vp->IsNVProgram) { _mesa_init_vp_per_primitive_registers(ctx); @@ -134,14 +146,8 @@ int r300VertexProgUpdateParams(GLcontext *ctx, struct r300_vertex_program *vp, f static unsigned long t_dst_mask(GLuint mask) { - unsigned long flags=0; - - if(mask & WRITEMASK_X) flags |= VSF_FLAG_X; - if(mask & WRITEMASK_Y) flags |= VSF_FLAG_Y; - if(mask & WRITEMASK_Z) flags |= VSF_FLAG_Z; - if(mask & WRITEMASK_W) flags |= VSF_FLAG_W; - - return flags; + /* WRITEMASK_* is equivalent to VSF_FLAG_* */ + return mask & VSF_FLAG_ALL; } static unsigned long t_dst_class(enum register_file file) @@ -211,19 +217,10 @@ static unsigned long t_src_class(enum register_file file) } } -static unsigned long t_swizzle(GLubyte swizzle) +static __inline unsigned long t_swizzle(GLubyte swizzle) { - switch(swizzle){ - case SWIZZLE_X: return VSF_IN_COMPONENT_X; - case SWIZZLE_Y: return VSF_IN_COMPONENT_Y; - case SWIZZLE_Z: return VSF_IN_COMPONENT_Z; - case SWIZZLE_W: return VSF_IN_COMPONENT_W; - case SWIZZLE_ZERO: return VSF_IN_COMPONENT_ZERO; - case SWIZZLE_ONE: return VSF_IN_COMPONENT_ONE; - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - exit(0); - } +/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ + return swizzle; } #if 0 @@ -263,20 +260,26 @@ static unsigned long t_src_index(struct r300_vertex_program *vp, struct prog_src return vp->inputs[src->Index]; }else{ + if (src->Index < 0) { + fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n"); + return 0; + } return src->Index; } } static unsigned long t_src(struct r300_vertex_program *vp, struct prog_src_register *src) { - + /* src->NegateBase uses the NEGATE_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ return MAKE_VSF_SOURCE(t_src_index(vp, src), t_swizzle(GET_SWZ(src->Swizzle, 0)), t_swizzle(GET_SWZ(src->Swizzle, 1)), t_swizzle(GET_SWZ(src->Swizzle, 2)), t_swizzle(GET_SWZ(src->Swizzle, 3)), t_src_class(src->File), - src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4); + src->NegateBase) | (src->RelAddr << 4); } static unsigned long t_src_scalar(struct r300_vertex_program *vp, struct prog_src_register *src) @@ -288,13 +291,14 @@ static unsigned long t_src_scalar(struct r300_vertex_program *vp, struct prog_sr t_swizzle(GET_SWZ(src->Swizzle, 0)), t_swizzle(GET_SWZ(src->Swizzle, 0)), t_src_class(src->File), - src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE); + src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4); } static unsigned long t_opcode(enum prog_opcode opcode) { switch(opcode){ + case OPCODE_ARL: return R300_VPI_OUT_OP_ARL; case OPCODE_DST: return R300_VPI_OUT_OP_DST; case OPCODE_EX2: return R300_VPI_OUT_OP_EX2; case OPCODE_EXP: return R300_VPI_OUT_OP_EXP; @@ -332,41 +336,41 @@ static unsigned long op_operands(enum prog_opcode opcode) } /* TODO: Get rid of t_src_class call */ -#define CMP_SRCS(a, b) (a.Index != b.Index && \ +#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \ t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \ (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \ - t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \ + t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \ -#define ZERO_SRC_0 MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - t_src_class(src[0].File), VSF_FLAG_NONE) +#define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) -#define ZERO_SRC_1 MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - t_src_class(src[1].File), VSF_FLAG_NONE) - -#define ZERO_SRC_2 MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - t_src_class(src[2].File), VSF_FLAG_NONE) +#define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) + +#define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) -#define ONE_SRC_0 MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - t_src_class(src[0].File), VSF_FLAG_NONE) +#define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) -#define ONE_SRC_1 MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - t_src_class(src[1].File), VSF_FLAG_NONE) - -#define ONE_SRC_2 MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - t_src_class(src[2].File), VSF_FLAG_NONE) +#define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) + +#define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) /* DP4 version seems to trigger some hw peculiarity */ //#define PREFER_DP4 @@ -382,7 +386,7 @@ static unsigned long op_operands(enum prog_opcode opcode) void r300_translate_vertex_shader(struct r300_vertex_program *vp) { - struct vertex_program *mesa_vp=(void *)vp; + struct gl_vertex_program *mesa_vp= &vp->mesa_program; struct prog_instruction *vpi; int i, cur_reg=0; VERTEX_SHADER_INSTRUCTION *o_inst; @@ -395,6 +399,9 @@ void r300_translate_vertex_shader(struct r300_vertex_program *vp) int u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; struct prog_src_register src[3]; + if (mesa_vp->Base.NumInstructions == 0) + return; + if (getenv("R300_VP_SAFETY")) { WARN_ONCE("R300_VP_SAFETY enabled.\n"); @@ -429,7 +436,7 @@ void r300_translate_vertex_shader(struct r300_vertex_program *vp) } if (mesa_vp->IsPositionInvariant) { - struct program_parameter_list *paramList; + struct gl_program_parameter_list *paramList; GLint tokens[6] = { STATE_MATRIX, STATE_MVP, 0, 0, 0, STATE_MATRIX }; #ifdef PREFER_DP4 @@ -576,7 +583,7 @@ void r300_translate_vertex_shader(struct r300_vertex_program *vp) o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, - t_src_class(src[2].File), VSF_FLAG_NONE); + t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4); o_inst->src2=ZERO_SRC_2; o_inst->src3=ZERO_SRC_2; @@ -584,6 +591,7 @@ void r300_translate_vertex_shader(struct r300_vertex_program *vp) src[2].File=PROGRAM_TEMPORARY; src[2].Index=u_temp_i; + src[2].RelAddr=0; u_temp_i--; } @@ -597,7 +605,7 @@ void r300_translate_vertex_shader(struct r300_vertex_program *vp) o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, - t_src_class(src[0].File), VSF_FLAG_NONE); + t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4); o_inst->src2=ZERO_SRC_0; o_inst->src3=ZERO_SRC_0; @@ -605,20 +613,13 @@ void r300_translate_vertex_shader(struct r300_vertex_program *vp) src[0].File=PROGRAM_TEMPORARY; src[0].Index=u_temp_i; + src[0].RelAddr=0; u_temp_i--; } } /* These ops need special handling. */ switch(vpi->Opcode){ - case OPCODE_ARL: - o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ARL, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - o_inst->src1=t_src_scalar(vp, &src[0]); - o_inst->src2=ZERO_SRC_0; - o_inst->src3=ZERO_SRC_0; - goto next; - case OPCODE_POW: o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_POW, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); @@ -628,6 +629,7 @@ void r300_translate_vertex_shader(struct r300_vertex_program *vp) goto next; case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} + case OPCODE_SWZ: #if 1 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); @@ -640,12 +642,7 @@ void r300_translate_vertex_shader(struct r300_vertex_program *vp) o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); o_inst->src1=t_src(vp, &src[0]); - o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - SWIZZLE_ONE, SWIZZLE_ONE, - SWIZZLE_ONE, SWIZZLE_ONE, - t_src_class(src[0].File), VSF_FLAG_NONE); - - + o_inst->src2=ONE_SRC_0; o_inst->src3=ZERO_SRC_0; #endif @@ -705,7 +702,7 @@ void r300_translate_vertex_shader(struct r300_vertex_program *vp) t_swizzle(GET_SWZ(src[0].Swizzle, 2)), SWIZZLE_ZERO, t_src_class(src[0].File), - src[0].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE); + src[0].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[0].RelAddr << 4); o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 0)), @@ -713,7 +710,7 @@ void r300_translate_vertex_shader(struct r300_vertex_program *vp) t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO, t_src_class(src[1].File), - src[1].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE); + src[1].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[1].RelAddr << 4); o_inst->src3=ZERO_SRC_1; goto next; @@ -726,17 +723,14 @@ void r300_translate_vertex_shader(struct r300_vertex_program *vp) o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); o_inst->src1=t_src(vp, &src[0]); - o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - SWIZZLE_ONE, SWIZZLE_ONE, - SWIZZLE_ONE, SWIZZLE_ONE, - t_src_class(src[0].File), VSF_FLAG_NONE); + o_inst->src2=ONE_SRC_0; o_inst->src3=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 0)), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), t_swizzle(GET_SWZ(src[1].Swizzle, 3)), t_src_class(src[1].File), - (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE); + (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); #else o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); @@ -748,7 +742,7 @@ void r300_translate_vertex_shader(struct r300_vertex_program *vp) t_swizzle(GET_SWZ(src[1].Swizzle, 2)), t_swizzle(GET_SWZ(src[1].Swizzle, 3)), t_src_class(src[1].File), - (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE); + (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); o_inst->src3=0; #endif goto next; @@ -764,7 +758,7 @@ void r300_translate_vertex_shader(struct r300_vertex_program *vp) t_swizzle(GET_SWZ(src[0].Swizzle, 2)), t_swizzle(GET_SWZ(src[0].Swizzle, 3)), t_src_class(src[0].File), - (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE); + (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); o_inst->src3=0; goto next; @@ -791,7 +785,7 @@ void r300_translate_vertex_shader(struct r300_vertex_program *vp) VSF_IN_COMPONENT_W, VSF_IN_CLASS_TMP, /* Not 100% sure about this */ - (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/); + (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/); o_inst->src3=ZERO_SRC_0; u_temp_i--; @@ -807,7 +801,7 @@ void r300_translate_vertex_shader(struct r300_vertex_program *vp) t_swizzle(GET_SWZ(src[0].Swizzle, 0)), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), t_src_class(src[0].File), - src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE); + src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); o_inst->src2=ZERO_SRC_0; o_inst->src3=ZERO_SRC_0; goto next; @@ -822,21 +816,21 @@ void r300_translate_vertex_shader(struct r300_vertex_program *vp) VSF_IN_COMPONENT_ZERO, // z t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y t_src_class(src[0].File), - src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE); + src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w VSF_IN_COMPONENT_ZERO, // z t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x t_src_class(src[0].File), - src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE); + src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); o_inst->src3=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x VSF_IN_COMPONENT_ZERO, // z t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w t_src_class(src[0].File), - src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE); + src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); goto next; case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} @@ -849,7 +843,7 @@ void r300_translate_vertex_shader(struct r300_vertex_program *vp) t_swizzle(GET_SWZ(src[0].Swizzle, 2)), VSF_IN_COMPONENT_ONE, t_src_class(src[0].File), - src[0].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE); + src[0].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[0].RelAddr << 4); o_inst->src2=t_src(vp, &src[1]); o_inst->src3=ZERO_SRC_1; goto next; @@ -869,7 +863,7 @@ void r300_translate_vertex_shader(struct r300_vertex_program *vp) t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w t_src_class(src[0].File), - src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE); + src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z @@ -877,7 +871,7 @@ void r300_translate_vertex_shader(struct r300_vertex_program *vp) t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w t_src_class(src[1].File), - src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE); + src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); o_inst->src3=ZERO_SRC_1; o_inst++; @@ -892,7 +886,7 @@ void r300_translate_vertex_shader(struct r300_vertex_program *vp) t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w t_src_class(src[1].File), - (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE); + (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z @@ -900,7 +894,7 @@ void r300_translate_vertex_shader(struct r300_vertex_program *vp) t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w t_src_class(src[0].File), - src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE); + src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); o_inst->src3=MAKE_VSF_SOURCE(u_temp_i+1, VSF_IN_COMPONENT_X, @@ -916,16 +910,6 @@ void r300_translate_vertex_shader(struct r300_vertex_program *vp) fprintf(stderr, "Dont know how to handle op %d yet\n", vpi->Opcode); exit(-1); break; - case OPCODE_SWZ: - hw_op=(src[0].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD; - - o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - o_inst->src1=t_src(vp, &src[0]); - o_inst->src2=ONE_SRC_0; - o_inst->src3=ZERO_SRC_0; - - goto next; case OPCODE_END: break; default: diff --git a/src/mesa/drivers/dri/r300/radeon_chipset.h b/src/mesa/drivers/dri/r300/radeon_chipset.h new file mode 120000 index 00000000000..eba99001ff8 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_chipset.h @@ -0,0 +1 @@ +../radeon/radeon_chipset.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_context.c b/src/mesa/drivers/dri/r300/radeon_context.c index 0147c6492e5..e824a923aca 100644 --- a/src/mesa/drivers/dri/r300/radeon_context.c +++ b/src/mesa/drivers/dri/r300/radeon_context.c @@ -59,7 +59,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "vblank.h" #include "xmlpool.h" /* for symbolic values of enum-type options */ -#define DRIVER_DATE "20040924" +#define DRIVER_DATE "20060815" /* Return various strings for glGetString(). @@ -175,17 +175,15 @@ GLboolean radeonInitContext(radeonContextPtr radeon, fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode"); radeon->iw.irq_seq = -1; radeon->irqsEmitted = 0; - radeon->do_irqs = (radeon->dri.drmMinor >= 6 && - fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS && + radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS && radeon->radeonScreen->irq); radeon->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); if (!radeon->do_irqs) fprintf(stderr, - "IRQ's not enabled, falling back to %s: %d %d %d\n", + "IRQ's not enabled, falling back to %s: %d %d\n", radeon->do_usleeps ? "usleeps" : "busy waits", - radeon->dri.drmMinor, fthrottle_mode, radeon->radeonScreen->irq); radeon->vblank_flags = (radeon->radeonScreen->irq != 0) @@ -203,8 +201,13 @@ GLboolean radeonInitContext(radeonContextPtr radeon, */ void radeonCleanupContext(radeonContextPtr radeon) { + /* _mesa_destroy_context() might result in calls to functions that + * depend on the DriverCtx, so don't set it to NULL before. + * + * radeon->glCtx->DriverCtx = NULL; + */ + /* free the Mesa context */ - radeon->glCtx->DriverCtx = NULL; _mesa_destroy_context(radeon->glCtx); if (radeon->state.scissor.pClipRects) { diff --git a/src/mesa/drivers/dri/r300/radeon_context.h b/src/mesa/drivers/dri/r300/radeon_context.h index 4eeb4edcfd4..0ec6466e441 100644 --- a/src/mesa/drivers/dri/r300/radeon_context.h +++ b/src/mesa/drivers/dri/r300/radeon_context.h @@ -40,6 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_screen.h" #include "drm.h" #include "dri_util.h" +#include "colormac.h" struct radeon_context; typedef struct radeon_context radeonContextRec; diff --git a/src/mesa/drivers/dri/r300/radeon_ioctl.c b/src/mesa/drivers/dri/r300/radeon_ioctl.c index 798e83c0107..d5b53194189 100644 --- a/src/mesa/drivers/dri/r300/radeon_ioctl.c +++ b/src/mesa/drivers/dri/r300/radeon_ioctl.c @@ -122,7 +122,7 @@ static void radeonWaitIrq(radeonContextPtr radeon) do { ret = drmCommandWrite(radeon->dri.fd, DRM_RADEON_IRQ_WAIT, &radeon->iw, sizeof(radeon->iw)); - } while (ret && (errno == EINTR || errno == EAGAIN)); + } while (ret && (errno == EINTR || errno == EBUSY)); if (ret) { fprintf(stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, @@ -183,10 +183,7 @@ void radeonCopyBuffer(const __DRIdrawablePrivate * dPriv, (void *)radeon->glCtx); } - if (IS_R200_CLASS(radeon->radeonScreen)) - R200_FIREVERTICES((r200ContextPtr)radeon); - else - r300Flush(radeon->glCtx); + r300Flush(radeon->glCtx); LOCK_HARDWARE(radeon); @@ -283,10 +280,7 @@ void radeonPageFlip(const __DRIdrawablePrivate * dPriv) radeon->sarea->pfCurrentPage); } - if (IS_R200_CLASS(radeon->radeonScreen)) - R200_FIREVERTICES((r200ContextPtr)radeon); - else - r300Flush(radeon->glCtx); + r300Flush(radeon->glCtx); LOCK_HARDWARE(radeon); if (!dPriv->numClipRects) { diff --git a/src/mesa/drivers/dri/r300/radeon_ioctl.h b/src/mesa/drivers/dri/r300/radeon_ioctl.h index b53767510e8..3a80d36c622 100644 --- a/src/mesa/drivers/dri/r300/radeon_ioctl.h +++ b/src/mesa/drivers/dri/r300/radeon_ioctl.h @@ -41,7 +41,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "xf86drm.h" #include "drm.h" +#if 0 #include "r200_context.h" +#endif #include "radeon_drm.h" extern void radeonCopyBuffer(const __DRIdrawablePrivate * drawable, diff --git a/src/mesa/drivers/dri/r300/radeon_lock.c b/src/mesa/drivers/dri/r300/radeon_lock.c index 893b4b4e7ef..edf26183148 100644 --- a/src/mesa/drivers/dri/r300/radeon_lock.c +++ b/src/mesa/drivers/dri/r300/radeon_lock.c @@ -153,6 +153,8 @@ void radeonGetLock(radeonContextPtr radeon, GLuint flags) __DRIdrawablePrivate *dPriv = radeon->dri.drawable; __DRIscreenPrivate *sPriv = radeon->dri.screen; drm_radeon_sarea_t *sarea = radeon->sarea; + + assert (dPriv != NULL); drmGetLock(radeon->dri.fd, radeon->dri.hwContext, flags); diff --git a/src/mesa/drivers/dri/r300/radeon_lock.h b/src/mesa/drivers/dri/r300/radeon_lock.h index c7c75ae862a..632329fde1e 100644 --- a/src/mesa/drivers/dri/r300/radeon_lock.h +++ b/src/mesa/drivers/dri/r300/radeon_lock.h @@ -35,7 +35,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #ifndef __RADEON_LOCK_H__ #define __RADEON_LOCK_H__ +#if 0 #include "r200_ioctl.h" +#endif +#include "radeon_context.h" extern void radeonGetLock(radeonContextPtr radeon, GLuint flags); diff --git a/src/mesa/drivers/dri/r300/radeon_mm.c b/src/mesa/drivers/dri/r300/radeon_mm.c index 55aa5df01a6..7595d2144f7 100644 --- a/src/mesa/drivers/dri/r300/radeon_mm.c +++ b/src/mesa/drivers/dri/r300/radeon_mm.c @@ -69,6 +69,15 @@ void radeon_mm_init(r300ContextPtr rmesa) resize_u_list(rmesa); } +void radeon_mm_destroy(r300ContextPtr rmesa) +{ + _mesa_free(rmesa->rmm->u_list); + rmesa->rmm->u_list = NULL; + + _mesa_free(rmesa->rmm); + rmesa->rmm = NULL; +} + void *radeon_mm_ptr(r300ContextPtr rmesa, int id) { assert(id <= rmesa->rmm->u_last); @@ -96,7 +105,7 @@ int radeon_mm_find(r300ContextPtr rmesa, void *ptr) int radeon_mm_alloc(r300ContextPtr rmesa, int alignment, int size) { drm_radeon_mem_alloc_t alloc; - int offset, ret; + int offset = 0, ret; int i, free=-1; int done_age; drm_radeon_mem_free_t memfree; @@ -230,7 +239,9 @@ int radeon_mm_alloc(r300ContextPtr rmesa, int alignment, int size) #include "r300_emit.h" static void emit_lin_cp(r300ContextPtr rmesa, unsigned long dst, unsigned long src, unsigned long size) { - LOCAL_VARS + int cmd_reserved = 0; + int cmd_written = 0; + drm_radeon_cmd_header_t *cmd = NULL; int cp_size; diff --git a/src/mesa/drivers/dri/r300/radeon_mm.h b/src/mesa/drivers/dri/r300/radeon_mm.h index 9df5224ddeb..81f89917e60 100644 --- a/src/mesa/drivers/dri/r300/radeon_mm.h +++ b/src/mesa/drivers/dri/r300/radeon_mm.h @@ -26,6 +26,7 @@ struct radeon_memory_manager { }; extern void radeon_mm_init(r300ContextPtr rmesa); +extern void radeon_mm_destroy(r300ContextPtr rmesa); extern void *radeon_mm_ptr(r300ContextPtr rmesa, int id); extern int radeon_mm_find(r300ContextPtr rmesa, void *ptr); extern int radeon_mm_alloc(r300ContextPtr rmesa, int alignment, int size); diff --git a/src/mesa/drivers/dri/r300/radeon_screen.h b/src/mesa/drivers/dri/r300/radeon_screen.h new file mode 120000 index 00000000000..23bb6bd4598 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_screen.h @@ -0,0 +1 @@ +../radeon/radeon_screen.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_state.c b/src/mesa/drivers/dri/r300/radeon_state.c index c95ae8df4f0..a6375ff878f 100644 --- a/src/mesa/drivers/dri/r300/radeon_state.c +++ b/src/mesa/drivers/dri/r300/radeon_state.c @@ -140,10 +140,7 @@ static void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h if (ctx->Scissor.Enabled) { /* We don't pipeline cliprect changes */ - if (IS_R200_CLASS(radeon->radeonScreen)) - R200_FIREVERTICES((r200ContextPtr)radeon); - else - r300Flush(ctx); + r300Flush(ctx); radeonUpdateScissor(ctx); } @@ -196,10 +193,7 @@ void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state) switch(cap) { case GL_SCISSOR_TEST: /* We don't pipeline cliprect & scissor changes */ - if (IS_R200_CLASS(radeon->radeonScreen)) - R200_FIREVERTICES((r200ContextPtr)radeon); - else - r300Flush(ctx); + r300Flush(ctx); radeon->state.scissor.enabled = state; radeonUpdateScissor(ctx); diff --git a/src/mesa/drivers/dri/r300/radeon_vtxfmt_a.c b/src/mesa/drivers/dri/r300/radeon_vtxfmt_a.c index 1044973b001..72c03c53ad9 100644 --- a/src/mesa/drivers/dri/r300/radeon_vtxfmt_a.c +++ b/src/mesa/drivers/dri/r300/radeon_vtxfmt_a.c @@ -44,6 +44,7 @@ #include "vtxfmt.h" #include "api_validate.h" #include "state.h" +#include "image.h" #define CONV_VB(a, b) rvb->AttribPtr[(a)].size = vb->b->size, \ rvb->AttribPtr[(a)].type = GL_FLOAT, \ @@ -73,6 +74,9 @@ void radeon_vb_to_rvb(r300ContextPtr rmesa, struct radeon_vertex_buffer *rvb, st for (i=0; i < ctx->Const.MaxTextureCoordUnits; i++) CONV_VB(VERT_ATTRIB_TEX0 + i, TexCoordPtr[i]); + + for (i=0; i < MAX_VERTEX_PROGRAM_ATTRIBS; i++) + CONV_VB(VERT_ATTRIB_GENERIC0 + i, AttribPtr[VERT_ATTRIB_GENERIC0 + i]); rvb->Primitive = vb->Primitive; rvb->PrimitiveCount = vb->PrimitiveCount; @@ -84,16 +88,18 @@ void radeon_vb_to_rvb(r300ContextPtr rmesa, struct radeon_vertex_buffer *rvb, st extern void _tnl_array_init( GLcontext *ctx ); -#define CONV(a, b) do { \ - if (ctx->Array.b.Enabled) { \ - rmesa->state.VB.AttribPtr[(a)].size = ctx->Array.b.Size; \ - rmesa->state.VB.AttribPtr[(a)].data = ctx->Array.b.BufferObj->Name ? \ - (void *)ADD_POINTERS(ctx->Array.b.Ptr, ctx->Array.b.BufferObj->Data) : (void *)ctx->Array.b.Ptr; \ - rmesa->state.VB.AttribPtr[(a)].stride = ctx->Array.b.StrideB; \ - rmesa->state.VB.AttribPtr[(a)].type = ctx->Array.b.Type; \ - enabled |= 1 << (a); \ - } \ - } while (0) +#define CONV(a, b) \ + do { \ + if (ctx->Array.ArrayObj->b.Enabled) { \ + rmesa->state.VB.AttribPtr[(a)].size = ctx->Array.ArrayObj->b.Size; \ + rmesa->state.VB.AttribPtr[(a)].data = ctx->Array.ArrayObj->b.BufferObj->Name \ + ? (void *)ADD_POINTERS(ctx->Array.ArrayObj->b.Ptr, ctx->Array.ArrayObj->b.BufferObj->Data) \ + : (void *)ctx->Array.ArrayObj->b.Ptr; \ + rmesa->state.VB.AttribPtr[(a)].stride = ctx->Array.ArrayObj->b.StrideB; \ + rmesa->state.VB.AttribPtr[(a)].type = ctx->Array.ArrayObj->b.Type; \ + enabled |= 1 << (a); \ + } \ + } while (0) static int setup_arrays(r300ContextPtr rmesa, GLint start) { @@ -136,15 +142,13 @@ static int setup_arrays(r300ContextPtr rmesa, GLint start) for(i=0; i < VERT_ATTRIB_MAX; i++){ if(rmesa->state.VB.AttribPtr[i].type != GL_UNSIGNED_BYTE && - rmesa->state.VB.AttribPtr[i].type != GL_FLOAT){ +#if MESA_LITTLE_ENDIAN + rmesa->state.VB.AttribPtr[i].type != GL_SHORT && +#endif + rmesa->state.VB.AttribPtr[i].type != GL_FLOAT){ WARN_ONCE("Unsupported format %d at index %d\n", rmesa->state.VB.AttribPtr[i].type, i); return R300_FALLBACK_TCL; } - if(rmesa->state.VB.AttribPtr[i].type == GL_UNSIGNED_BYTE && - rmesa->state.VB.AttribPtr[i].size != 4){ - WARN_ONCE("Unsupported component count for ub colors\n"); - return R300_FALLBACK_TCL; - } /*fprintf(stderr, "%d: ", i); @@ -298,47 +302,15 @@ static void radeonDrawElements( GLenum mode, GLsizei count, GLenum type, const G if (ctx->NewState) _mesa_update_state( ctx ); - for (i=_TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { - rmesa->temp_attrib[i] = TNL_CONTEXT(ctx)->vb.AttribPtr[i]; - TNL_CONTEXT(ctx)->vb.AttribPtr[i] = &rmesa->dummy_attrib[i]; - } r300UpdateShaders(rmesa); - for (i=_TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { - TNL_CONTEXT(ctx)->vb.AttribPtr[i] = rmesa->temp_attrib[i]; - } - if (rmesa->state.VB.LockCount) { - if (rmesa->state.VB.lock_uptodate == GL_FALSE) { - if (setup_arrays(rmesa, rmesa->state.VB.LockFirst)) - return; - - rmesa->state.VB.Count = rmesa->state.VB.LockCount; - - r300ReleaseArrays(ctx); - r300EmitArrays(ctx, GL_FALSE); - - rmesa->state.VB.lock_uptodate = GL_TRUE; - } - - if (min < rmesa->state.VB.LockFirst) { - WARN_ONCE("Out of range min %d vs %d!\n", min, rmesa->state.VB.LockFirst); - return; - } - - if (max >= rmesa->state.VB.LockFirst + rmesa->state.VB.LockCount) { - WARN_ONCE("Out of range max %d vs %d!\n", max, rmesa->state.VB.LockFirst + - rmesa->state.VB.LockCount); - return; - } - } else { - if (setup_arrays(rmesa, min) >= R300_FALLBACK_TCL) { - r300ReleaseDmaRegion(rmesa, &rvb, __FUNCTION__); - goto fallback; - } - - rmesa->state.VB.Count = max - min + 1; + if (setup_arrays(rmesa, min) >= R300_FALLBACK_TCL) { + r300ReleaseDmaRegion(rmesa, &rvb, __FUNCTION__); + goto fallback; } + rmesa->state.VB.Count = max - min + 1; + r300UpdateShaderStates(rmesa); rmesa->state.VB.Primitive = &prim; @@ -354,7 +326,10 @@ static void radeonDrawElements( GLenum mode, GLsizei count, GLenum type, const G rmesa->state.VB.Elts = ptr; rmesa->state.VB.elt_size = elt_size; - r300_run_vb_render(ctx, NULL); + if (r300_run_vb_render(ctx, NULL)) { + r300ReleaseDmaRegion(rmesa, &rvb, __FUNCTION__); + goto fallback; + } if(rvb.buf) radeon_mm_use(rmesa, rvb.buf->id); @@ -382,6 +357,22 @@ static void radeonDrawRangeElements(GLenum mode, GLuint min, GLuint max, GLsizei const GLvoid *indices = c_indices; if (count > 65535) { + /* TODO */ + if (mode == GL_POINTS || + mode == GL_LINES || + mode == GL_QUADS || + mode == GL_TRIANGLES) { + + while (count) { + i = r300_get_num_verts(rmesa, MIN2(count, 65535), mode); + + radeonDrawRangeElements(mode, min, max, i, type, indices); + + indices += i * _mesa_sizeof_type(type); + count -= i; + } + return ; + } WARN_ONCE("Too many verts!\n"); goto fallback; } @@ -482,46 +473,14 @@ static void radeonDrawRangeElements(GLenum mode, GLuint min, GLuint max, GLsizei if (ctx->NewState) _mesa_update_state( ctx ); - for (i=_TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { - rmesa->temp_attrib[i] = TNL_CONTEXT(ctx)->vb.AttribPtr[i]; - TNL_CONTEXT(ctx)->vb.AttribPtr[i] = &rmesa->dummy_attrib[i]; - } r300UpdateShaders(rmesa); - for (i=_TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { - TNL_CONTEXT(ctx)->vb.AttribPtr[i] = rmesa->temp_attrib[i]; - } - if (rmesa->state.VB.LockCount) { - if (rmesa->state.VB.lock_uptodate == GL_FALSE) { - if (setup_arrays(rmesa, rmesa->state.VB.LockFirst)) - goto fallback; - - rmesa->state.VB.Count = rmesa->state.VB.LockCount; - - r300ReleaseArrays(ctx); - r300EmitArrays(ctx, GL_FALSE); - - rmesa->state.VB.lock_uptodate = GL_TRUE; - } - - if (min < rmesa->state.VB.LockFirst) { - WARN_ONCE("Out of range min %d vs %d!\n", min, rmesa->state.VB.LockFirst); - goto fallback; - } - - /*if (max >= rmesa->state.VB.LockFirst + rmesa->state.VB.LockCount) { - WARN_ONCE("Out of range max %d vs %d!\n", max, rmesa->state.VB.LockFirst + - rmesa->state.VB.LockCount); - return; - }*/ - } else { - if (setup_arrays(rmesa, min) >= R300_FALLBACK_TCL) { - r300ReleaseDmaRegion(rmesa, &rvb, __FUNCTION__); - goto fallback; - } - - rmesa->state.VB.Count = max - min + 1; + if (setup_arrays(rmesa, min) >= R300_FALLBACK_TCL) { + r300ReleaseDmaRegion(rmesa, &rvb, __FUNCTION__); + goto fallback; } + + rmesa->state.VB.Count = max - min + 1; r300UpdateShaderStates(rmesa); @@ -540,7 +499,10 @@ static void radeonDrawRangeElements(GLenum mode, GLuint min, GLuint max, GLsizei rmesa->state.VB.elt_min = min; rmesa->state.VB.elt_max = max; - r300_run_vb_render(ctx, NULL); + if (r300_run_vb_render(ctx, NULL)) { + r300ReleaseDmaRegion(rmesa, &rvb, __FUNCTION__); + goto fallback; + } if(rvb.buf) radeon_mm_use(rmesa, rvb.buf->id); @@ -561,7 +523,6 @@ static void radeonDrawArrays( GLenum mode, GLint start, GLsizei count ) GET_CURRENT_CONTEXT(ctx); r300ContextPtr rmesa = R300_CONTEXT(ctx); struct tnl_prim prim; - int i; if (count > 65535) { WARN_ONCE("Too many verts!\n"); @@ -578,44 +539,12 @@ static void radeonDrawArrays( GLenum mode, GLint start, GLsizei count ) /* XXX: setup_arrays before state update? */ - for (i=_TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { - rmesa->temp_attrib[i] = TNL_CONTEXT(ctx)->vb.AttribPtr[i]; - TNL_CONTEXT(ctx)->vb.AttribPtr[i] = &rmesa->dummy_attrib[i]; - } r300UpdateShaders(rmesa); - for (i=_TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { - TNL_CONTEXT(ctx)->vb.AttribPtr[i] = rmesa->temp_attrib[i]; - } - if (rmesa->state.VB.LockCount) { - if (rmesa->state.VB.lock_uptodate == GL_FALSE) { - if (setup_arrays(rmesa, rmesa->state.VB.LockFirst)) - return; - - rmesa->state.VB.Count = rmesa->state.VB.LockCount; - - r300ReleaseArrays(ctx); - r300EmitArrays(ctx, GL_FALSE); - - rmesa->state.VB.lock_uptodate = GL_TRUE; - } - - if (start < rmesa->state.VB.LockFirst) { - WARN_ONCE("Out of range min %d vs %d!\n", start, rmesa->state.VB.LockFirst); - goto fallback; - } - - if (start + count - 1 >= rmesa->state.VB.LockFirst + rmesa->state.VB.LockCount) { /* XXX */ - WARN_ONCE("Out of range max %d vs %d!\n", start + count - 1, rmesa->state.VB.LockFirst + - rmesa->state.VB.LockCount); - goto fallback; - } - } else { - if (setup_arrays(rmesa, start) >= R300_FALLBACK_TCL) - goto fallback; - - rmesa->state.VB.Count = count; - } + if (setup_arrays(rmesa, start) >= R300_FALLBACK_TCL) + goto fallback; + + rmesa->state.VB.Count = count; r300UpdateShaderStates(rmesa); @@ -634,7 +563,8 @@ static void radeonDrawArrays( GLenum mode, GLint start, GLsizei count ) rmesa->state.VB.elt_min = 0; rmesa->state.VB.elt_max = 0; - r300_run_vb_render(ctx, NULL); + if (r300_run_vb_render(ctx, NULL)) + goto fallback; return ; @@ -663,41 +593,6 @@ void radeon_init_vtxfmt_a(r300ContextPtr rmesa) #ifdef HW_VBOS -#if 0 -static void radeonLockArraysEXT(GLcontext *ctx, GLint first, GLsizei count) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - - /* Only when CB_DPATH is defined. - r300Clear tampers over the aos setup without it. - (r300ResetHwState cannot call r300EmitArrays) - */ -#ifndef CB_DPATH - first = 0; count = 0; -#endif - - if (first < 0 || count <= 0) { - rmesa->state.VB.LockFirst = 0; - rmesa->state.VB.LockCount = 0; - rmesa->state.VB.lock_uptodate = GL_FALSE; - return ; - } - - rmesa->state.VB.LockFirst = first; - rmesa->state.VB.LockCount = count; - rmesa->state.VB.lock_uptodate = GL_FALSE; -} - -static void radeonUnlockArraysEXT(GLcontext *ctx) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - - rmesa->state.VB.LockFirst = 0; - rmesa->state.VB.LockCount = 0; - rmesa->state.VB.lock_uptodate = GL_FALSE; -} -#endif - static struct gl_buffer_object * r300NewBufferObject(GLcontext *ctx, GLuint name, GLenum target ) { @@ -854,15 +749,14 @@ void r300_evict_vbos(GLcontext *ctx, int amount) r300ContextPtr rmesa = R300_CONTEXT(ctx); struct _mesa_HashTable *hash = ctx->Shared->BufferObjects; GLuint k = _mesa_HashFirstEntry(hash); - struct gl_buffer_object *obj; - struct r300_buffer_object *r300_obj; - GLvoid *data; while (amount > 0 && k) { - obj = (struct gl_buffer_object *) _mesa_HashLookup(hash, k); - r300_obj = (struct r300_buffer_object *) obj; + struct gl_buffer_object *obj = _mesa_lookup_bufferobj(ctx, k); + struct r300_buffer_object *r300_obj + = (struct r300_buffer_object *) obj; if (obj->OnCard && obj->Size) { + GLvoid *data; obj->Data = _mesa_malloc(obj->Size); data = radeon_mm_map(rmesa, r300_obj->id, RADEON_MM_R); @@ -889,9 +783,6 @@ void r300_init_vbo_funcs(struct dd_function_table *functions) functions->MapBuffer = r300MapBuffer; functions->UnmapBuffer = r300UnmapBuffer; functions->DeleteBuffer = r300DeleteBuffer; - - /*functions->LockArraysEXT = radeonLockArraysEXT; - functions->UnlockArraysEXT = radeonUnlockArraysEXT;*/ } #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c index 21161d2f69d..2167e7afe82 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_context.c @@ -157,7 +157,6 @@ const struct dri_extension card_extensions[] = { NULL, NULL } }; -extern const struct tnl_pipeline_stage _radeon_texrect_stage; extern const struct tnl_pipeline_stage _radeon_render_stage; extern const struct tnl_pipeline_stage _radeon_tcl_stage; @@ -176,10 +175,6 @@ static const struct tnl_pipeline_stage *radeon_pipeline[] = { &_tnl_texgen_stage, &_tnl_texture_transform_stage, - /* Scale texture rectangle to 0..1. - */ - &_radeon_texrect_stage, - &_radeon_render_stage, &_tnl_render_stage, /* FALLBACK: */ NULL, diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c index 53f6f57057b..aee849e13bf 100644 --- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c +++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c @@ -521,17 +521,15 @@ void radeonEmitBlit( radeonContextPtr rmesa, /* FIXME: which drmMinor is require void radeonEmitWait( radeonContextPtr rmesa, GLuint flags ) { - if (rmesa->dri.drmMinor >= 6) { - drm_radeon_cmd_header_t *cmd; + drm_radeon_cmd_header_t *cmd; - assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) ); - - cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 1 * sizeof(int), - __FUNCTION__ ); - cmd[0].i = 0; - cmd[0].wait.cmd_type = RADEON_CMD_WAIT; - cmd[0].wait.flags = flags; - } + assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) ); + + cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 1 * sizeof(int), + __FUNCTION__ ); + cmd[0].i = 0; + cmd[0].wait.cmd_type = RADEON_CMD_WAIT; + cmd[0].wait.flags = flags; } @@ -782,25 +780,15 @@ void radeonAllocDmaRegion( radeonContextPtr rmesa, static u_int32_t radeonGetLastFrame (radeonContextPtr rmesa) { - unsigned char *RADEONMMIO = rmesa->radeonScreen->mmio.map; + drm_radeon_getparam_t gp; int ret; u_int32_t frame; - if (rmesa->dri.screen->drmMinor >= 4) { - drm_radeon_getparam_t gp; + gp.param = RADEON_PARAM_LAST_FRAME; + gp.value = (int *)&frame; + ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM, + &gp, sizeof(gp) ); - gp.param = RADEON_PARAM_LAST_FRAME; - gp.value = (int *)&frame; - ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM, - &gp, sizeof(gp) ); - } - else - ret = -EINVAL; - - if ( ret == -EINVAL ) { - frame = INREG( RADEON_LAST_FRAME_REG ); - ret = 0; - } if ( ret ) { fprintf( stderr, "%s: drm_radeon_getparam_t: %d\n", __FUNCTION__, ret ); exit(1); @@ -831,7 +819,7 @@ static void radeonWaitIrq( radeonContextPtr rmesa ) do { ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT, &rmesa->iw, sizeof(rmesa->iw) ); - } while (ret && (errno == EINTR || errno == EAGAIN)); + } while (ret && (errno == EINTR || errno == EBUSY)); if ( ret ) { fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret ); @@ -1039,7 +1027,6 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask, GLboolean all, radeonContextPtr rmesa = RADEON_CONTEXT(ctx); __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; drm_radeon_sarea_t *sarea = rmesa->sarea; - unsigned char *RADEONMMIO = rmesa->radeonScreen->mmio.map; u_int32_t clear; GLuint flags = 0; GLuint color_mask = 0; @@ -1111,29 +1098,17 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask, GLboolean all, */ while ( 1 ) { int ret; + drm_radeon_getparam_t gp; - if (rmesa->dri.screen->drmMinor >= 4) { - drm_radeon_getparam_t gp; - - gp.param = RADEON_PARAM_LAST_CLEAR; - gp.value = (int *)&clear; - ret = drmCommandWriteRead( rmesa->dri.fd, - DRM_RADEON_GETPARAM, &gp, sizeof(gp) ); - } else - ret = -EINVAL; + gp.param = RADEON_PARAM_LAST_CLEAR; + gp.value = (int *)&clear; + ret = drmCommandWriteRead( rmesa->dri.fd, + DRM_RADEON_GETPARAM, &gp, sizeof(gp) ); - if ( ret == -EINVAL ) { - clear = INREG( RADEON_LAST_CLEAR_REG ); - ret = 0; - } if ( ret ) { fprintf( stderr, "%s: drm_radeon_getparam_t: %d\n", __FUNCTION__, ret ); exit(1); } - if ( RADEON_DEBUG & DEBUG_IOCTL ) { - fprintf( stderr, "%s( %d )\n", __FUNCTION__, (int)clear ); - if ( ret ) fprintf( stderr, " ( RADEON_LAST_CLEAR register read directly )\n" ); - } if ( sarea->last_clear - clear <= RADEON_MAX_CLEARS ) { break; diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.h b/src/mesa/drivers/dri/radeon/radeon_ioctl.h index 335ed77c9ad..11a7d02b1b7 100644 --- a/src/mesa/drivers/dri/radeon/radeon_ioctl.h +++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.h @@ -192,8 +192,6 @@ static __inline char *radeonAllocCmdBuf( radeonContextPtr rmesa, { if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ) radeonFlushCmdBuf( rmesa, __FUNCTION__ ); - - assert(rmesa->dri.drmMinor >= 3); { char *head = rmesa->store.cmd_buf + rmesa->store.cmd_used; diff --git a/src/mesa/drivers/dri/radeon/radeon_sanity.c b/src/mesa/drivers/dri/radeon/radeon_sanity.c index 9ca9ebe84e0..557057784c3 100644 --- a/src/mesa/drivers/dri/radeon/radeon_sanity.c +++ b/src/mesa/drivers/dri/radeon/radeon_sanity.c @@ -156,6 +156,7 @@ static struct { { 0, 8, "R200_PP_TXCTLALL_3"}, { 0, 8, "R200_PP_TXCTLALL_4"}, { 0, 8, "R200_PP_TXCTLALL_5"}, + { 0, 2, "R200_VAP_PVS_CNTL"}, }; struct reg_names { diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 3a0a8d208b2..140d848d9ae 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -121,11 +121,10 @@ DRI_CONF_BEGIN DRI_CONF_NO_RAST(false) DRI_CONF_SECTION_END DRI_CONF_SECTION_SOFTWARE - DRI_CONF_ARB_VERTEX_PROGRAM(false) DRI_CONF_NV_VERTEX_PROGRAM(false) DRI_CONF_SECTION_END DRI_CONF_END; -static const GLuint __driNConfigOptions = 17; +static const GLuint __driNConfigOptions = 16; extern const struct dri_extension blend_extensions[]; extern const struct dri_extension ARB_vp_extension[]; @@ -153,6 +152,17 @@ DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \ DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \ DRI_CONF_OPT_END +#define DRI_CONF_DISABLE_S3TC(def) \ +DRI_CONF_OPT_BEGIN(disable_s3tc,bool,def) \ + DRI_CONF_DESC(en,"Disable S3TC compression") \ +DRI_CONF_OPT_END + +#define DRI_CONF_DISABLE_FALLBACK(def) \ +DRI_CONF_OPT_BEGIN(disable_lowimpact_fallback,bool,def) \ + DRI_CONF_DESC(en,"Disable Low-impact fallback") \ +DRI_CONF_OPT_END + + const char __driConfigOptions[] = DRI_CONF_BEGIN DRI_CONF_SECTION_PERFORMANCE @@ -162,12 +172,14 @@ DRI_CONF_BEGIN DRI_CONF_MAX_TEXTURE_IMAGE_UNITS(8, 2, 8) DRI_CONF_MAX_TEXTURE_COORD_UNITS(8, 2, 8) DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32) + DRI_CONF_DISABLE_FALLBACK(false) DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB) DRI_CONF_DEF_MAX_ANISOTROPY(1.0, "1.0,2.0,4.0,8.0,16.0") DRI_CONF_NO_NEG_LOD_BIAS(false) DRI_CONF_FORCE_S3TC_ENABLE(false) + DRI_CONF_DISABLE_S3TC(false) DRI_CONF_COLOR_REDUCTION(DRI_CONF_COLOR_REDUCTION_DITHER) DRI_CONF_ROUND_MODE(DRI_CONF_ROUND_TRUNC) DRI_CONF_DITHER_MODE(DRI_CONF_DITHER_XERRORDIFF) @@ -176,7 +188,7 @@ DRI_CONF_BEGIN DRI_CONF_NO_RAST(false) DRI_CONF_SECTION_END DRI_CONF_END; -static const GLuint __driNConfigOptions = 14; +static const GLuint __driNConfigOptions = 16; #ifndef RADEON_DEBUG int RADEON_DEBUG = 0; @@ -372,6 +384,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->drmSupportsFragShader = (sPriv->drmMinor >= 18); screen->drmSupportsPointSprites = (sPriv->drmMinor >= 13); screen->drmSupportsCubeMapsR100 = (sPriv->drmMinor >= 15); + screen->drmSupportsVertexProgram = (sPriv->drmMinor >= 25); } screen->mmio.handle = dri_priv->registerHandle; @@ -635,13 +648,10 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) dri_priv->deviceID); return NULL; } - if (screen->chip_family == CHIP_FAMILY_R350 || - screen->chip_family == CHIP_FAMILY_R300) { - if (getenv("R300_FORCE_R300") == NULL) { - fprintf(stderr, "Radeon 9500/9700/9800 cards are not currently stable.\n"); - fprintf(stderr, "More details can be found at https://bugs.freedesktop.org/show_bug.cgi?id=6318\n"); - return NULL; - } + if ((screen->chip_family == CHIP_FAMILY_R350 || screen->chip_family == CHIP_FAMILY_R300) && + sPriv->ddxMinor < 2) { + fprintf(stderr, "xf86-video-ati-6.6.2 or newer needed for Radeon 9500/9700/9800 cards.\n"); + return NULL; } if (screen->chip_family <= CHIP_FAMILY_RS200) diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h index 3e0f9454e7f..25e6fcf399a 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.h +++ b/src/mesa/drivers/dri/radeon/radeon_screen.h @@ -98,6 +98,7 @@ typedef struct { GLboolean drmSupportsFragShader; /* need radeon kernel module >= 1.18 */ GLboolean drmSupportsPointSprites; /* need radeon kernel module >= 1.13 */ GLboolean drmSupportsCubeMapsR100; /* need radeon kernel module >= 1.15 */ + GLboolean drmSupportsVertexProgram; /* need radeon kernel module >= 1.25 */ GLboolean depthHasSurface; /* Configuration cache with default values for all contexts */ diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c index 307c1f341b4..449c63eb7a2 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.c +++ b/src/mesa/drivers/dri/radeon/radeon_state.c @@ -135,7 +135,8 @@ static void radeonBlendEquationSeparate( GLcontext *ctx, if ( !fallback ) { RADEON_STATECHANGE( rmesa, ctx ); rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = b; - if ( ctx->Color._LogicOpEnabled ) { + if ( (ctx->Color.ColorLogicOpEnabled || (ctx->Color.BlendEnabled + && ctx->Color.BlendEquationRGB == GL_LOGIC_OP)) ) { rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_ROP_ENABLE; } else { rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE; @@ -1739,7 +1740,8 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state ) } else { rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ALPHA_BLEND_ENABLE; } - if ( ctx->Color._LogicOpEnabled ) { + if ( (ctx->Color.ColorLogicOpEnabled || (ctx->Color.BlendEnabled + && ctx->Color.BlendEquationRGB == GL_LOGIC_OP)) ) { rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_ROP_ENABLE; } else { rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE; @@ -1877,7 +1879,8 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state ) case GL_COLOR_LOGIC_OP: RADEON_STATECHANGE( rmesa, ctx ); - if ( ctx->Color._LogicOpEnabled ) { + if ( (ctx->Color.ColorLogicOpEnabled || (ctx->Color.BlendEnabled + && ctx->Color.BlendEquationRGB == GL_LOGIC_OP)) ) { rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_ROP_ENABLE; } else { rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE; @@ -2017,26 +2020,6 @@ static void radeonLightingSpaceChange( GLcontext *ctx ) * Deferred state management - matrices, textures, other? */ -static void texmat_set_texrect( radeonContextPtr rmesa, - struct gl_texture_object *tObj, GLuint unit ) -{ - const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel]; - _math_matrix_set_identity( &rmesa->tmpmat[unit] ); - rmesa->tmpmat[unit].m[0] = 1.0 / baseImage->Width; - rmesa->tmpmat[unit].m[5] = 1.0 / baseImage->Height; - -} - -static void texmat_fixup_texrect( radeonContextPtr rmesa, - struct gl_texture_object *tObj, GLuint unit ) -{ - const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel]; - GLuint i; - for (i = 0; i < 4; i++) { - rmesa->tmpmat[unit].m[i] = rmesa->tmpmat[unit].m[i] / baseImage->Width; - rmesa->tmpmat[unit].m[i+4] = rmesa->tmpmat[unit].m[i+4] / baseImage->Height; - }} - void radeonUploadTexMatrix( radeonContextPtr rmesa, int unit, GLboolean swapcols ) @@ -2177,15 +2160,6 @@ static void update_texturematrix( GLcontext *ctx ) _math_matrix_copy( &rmesa->tmpmat[unit], &rmesa->TexGenMatrix[unit] ); needMatrix = GL_TRUE; } - if (ctx->Texture.Unit[unit]._ReallyEnabled == TEXTURE_RECT_BIT) { - texMatEnabled |= (RADEON_TEXGEN_TEXMAT_0_ENABLE | - RADEON_TEXMAT_0_ENABLE) << unit; - if (needMatrix) - texmat_fixup_texrect( rmesa, ctx->Texture.Unit[unit]._Current, unit ); - else - texmat_set_texrect( rmesa, ctx->Texture.Unit[unit]._Current, unit ); - needMatrix = GL_TRUE; - } if (needMatrix) { rmesa->NeedTexMatrix |= 1 << unit; radeonUploadTexMatrix( rmesa, unit, diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c index 4d5bbbd1f18..e36a710d33a 100644 --- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c @@ -246,7 +246,11 @@ void radeonChooseVertexState( GLcontext *ctx ) radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); TNLcontext *tnl = TNL_CONTEXT(ctx); - GLuint se_coord_fmt; + GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT]; + + se_coord_fmt &= ~(RADEON_VTX_XY_PRE_MULT_1_OVER_W0 | + RADEON_VTX_Z_PRE_MULT_1_OVER_W0 | + RADEON_VTX_W0_IS_NOT_1_OVER_W0); /* We must ensure that we don't do _tnl_need_projected_coords while in a * rasterization fallback. As this function will be called again when we @@ -263,14 +267,12 @@ void radeonChooseVertexState( GLcontext *ctx ) !RENDERINPUTS_TEST( tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR1 )) || (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) { rmesa->swtcl.needproj = GL_TRUE; - se_coord_fmt = (RADEON_VTX_XY_PRE_MULT_1_OVER_W0 | - RADEON_VTX_Z_PRE_MULT_1_OVER_W0 | - RADEON_TEX1_W_ROUTING_USE_Q1); + se_coord_fmt |= (RADEON_VTX_XY_PRE_MULT_1_OVER_W0 | + RADEON_VTX_Z_PRE_MULT_1_OVER_W0); } else { rmesa->swtcl.needproj = GL_FALSE; - se_coord_fmt = (RADEON_VTX_W0_IS_NOT_1_OVER_W0 | - RADEON_TEX1_W_ROUTING_USE_Q1); + se_coord_fmt |= (RADEON_VTX_W0_IS_NOT_1_OVER_W0); } _tnl_need_projected_coords( ctx, rmesa->swtcl.needproj ); @@ -458,7 +460,6 @@ static GLboolean radeon_run_render( GLcontext *ctx, - const struct tnl_pipeline_stage _radeon_render_stage = { "radeon render", @@ -472,113 +473,6 @@ const struct tnl_pipeline_stage _radeon_render_stage = /**************************************************************************/ -/* Radeon texture rectangle expects coords in 0..1 range, not 0..dimension - * as in the extension spec. Need to translate here. - * - * Note that swrast expects 0..dimension, so if a fallback is active, - * don't do anything. (Maybe need to configure swrast to match hw) - */ -struct texrect_stage_data { - GLvector4f texcoord[MAX_TEXTURE_UNITS]; -}; - -#define TEXRECT_STAGE_DATA(stage) ((struct texrect_stage_data *)stage->privatePtr) - - -static GLboolean run_texrect_stage( GLcontext *ctx, - struct tnl_pipeline_stage *stage ) -{ - struct texrect_stage_data *store = TEXRECT_STAGE_DATA(stage); - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - GLuint i; - - if (rmesa->Fallback) - return GL_TRUE; - - for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_RECT_BIT) { - struct gl_texture_object *texObj = ctx->Texture.Unit[i].CurrentRect; - struct gl_texture_image *texImage = texObj->Image[0][texObj->BaseLevel]; - const GLfloat iw = 1.0/texImage->Width; - const GLfloat ih = 1.0/texImage->Height; - GLfloat *in = (GLfloat *)VB->TexCoordPtr[i]->data; - GLint instride = VB->TexCoordPtr[i]->stride; - GLfloat (*out)[4] = store->texcoord[i].data; - GLint j; - - store->texcoord[i].size = VB->TexCoordPtr[i]->size; - for (j = 0 ; j < VB->Count ; j++) { - switch (VB->TexCoordPtr[i]->size) { - case 4: - out[j][3] = in[3]; - /* fallthrough */ - case 3: - out[j][2] = in[2]; - /* fallthrough */ - default: - out[j][0] = in[0] * iw; - out[j][1] = in[1] * ih; - } - in = (GLfloat *)((GLubyte *)in + instride); - } - - VB->AttribPtr[VERT_ATTRIB_TEX0+i] = VB->TexCoordPtr[i] = &store->texcoord[i]; - } - } - - return GL_TRUE; -} - - -/* Called the first time stage->run() is invoked. - */ -static GLboolean alloc_texrect_data( GLcontext *ctx, - struct tnl_pipeline_stage *stage ) -{ - struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; - struct texrect_stage_data *store; - GLuint i; - - stage->privatePtr = CALLOC(sizeof(*store)); - store = TEXRECT_STAGE_DATA(stage); - if (!store) - return GL_FALSE; - - for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++) - _mesa_vector4f_alloc( &store->texcoord[i], 0, VB->Size, 32 ); - - return GL_TRUE; -} - -static void free_texrect_data( struct tnl_pipeline_stage *stage ) -{ - struct texrect_stage_data *store = TEXRECT_STAGE_DATA(stage); - GLuint i; - - if (store) { - for (i = 0 ; i < MAX_TEXTURE_UNITS ; i++) - if (store->texcoord[i].data) - _mesa_vector4f_free( &store->texcoord[i] ); - FREE( store ); - stage->privatePtr = NULL; - } -} - -const struct tnl_pipeline_stage _radeon_texrect_stage = -{ - "radeon texrect stage", /* name */ - NULL, - alloc_texrect_data, - free_texrect_data, - NULL, - run_texrect_stage -}; - - -/**************************************************************************/ - static const GLuint reduced_hw_prim[GL_POLYGON+1] = { RADEON_CP_VC_CNTL_PRIM_TYPE_POINT, diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c index cd421567b61..ffd49b525b8 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c @@ -493,8 +493,12 @@ static void transition_to_hwtnl( GLcontext *ctx ) { radeonContextPtr rmesa = RADEON_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); - GLuint se_coord_fmt = (RADEON_VTX_W0_IS_NOT_1_OVER_W0 | - RADEON_TEX1_W_ROUTING_USE_Q1); + GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT]; + + se_coord_fmt &= ~(RADEON_VTX_XY_PRE_MULT_1_OVER_W0 | + RADEON_VTX_Z_PRE_MULT_1_OVER_W0 | + RADEON_VTX_W0_IS_NOT_1_OVER_W0); + se_coord_fmt |= RADEON_VTX_W0_IS_NOT_1_OVER_W0; if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) { RADEON_STATECHANGE( rmesa, set ); diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c index 46ca86bde26..edaea6c209c 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex.c @@ -311,7 +311,7 @@ radeonChooseTextureFormat( GLcontext *ctx, GLint internalFormat, case GL_UNSIGNED_SHORT_1_5_5_5_REV: return _dri_texformat_argb1555; default: - return do32bpt ? _dri_texformat_rgba8888 : _dri_texformat_argb4444; + return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb4444; } case 3: @@ -328,7 +328,7 @@ radeonChooseTextureFormat( GLcontext *ctx, GLint internalFormat, case GL_UNSIGNED_SHORT_5_6_5_REV: return _dri_texformat_rgb565; default: - return do32bpt ? _dri_texformat_rgba8888 : _dri_texformat_rgb565; + return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565; } case GL_RGBA8: @@ -336,7 +336,7 @@ radeonChooseTextureFormat( GLcontext *ctx, GLint internalFormat, case GL_RGBA12: case GL_RGBA16: return !force16bpt ? - _dri_texformat_rgba8888 : _dri_texformat_argb4444; + _dri_texformat_argb8888 : _dri_texformat_argb4444; case GL_RGBA4: case GL_RGBA2: @@ -349,7 +349,7 @@ radeonChooseTextureFormat( GLcontext *ctx, GLint internalFormat, case GL_RGB10: case GL_RGB12: case GL_RGB16: - return !force16bpt ? _dri_texformat_rgba8888 : _dri_texformat_rgb565; + return !force16bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565; case GL_RGB5: case GL_RGB4: diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c index 3467832a3b0..1e3a3951e2d 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texstate.c +++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c @@ -847,7 +847,11 @@ static void import_tex_obj_state( radeonContextPtr rmesa, int unit, radeonTexObjPtr texobj ) { - GLuint *cmd = RADEON_DB_STATE( tex[unit] ); +/* do not use RADEON_DB_STATE to avoid stale texture caches */ + GLuint *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0]; + GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT]; + + RADEON_STATECHANGE( rmesa, tex[unit] ); cmd[TEX_PP_TXFILTER] &= ~TEXOBJ_TXFILTER_MASK; cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK; @@ -856,31 +860,39 @@ static void import_tex_obj_state( radeonContextPtr rmesa, cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset; cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color; - if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) { - GLuint *cube_cmd = RADEON_DB_STATE( cube[unit] ); - GLuint bytesPerFace = texobj->base.totalSize / 6; - ASSERT(texobj->base.totalSize % 6 == 0); - - cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces; - /* dont know if this setup conforms to OpenGL.. - * at least it matches the behavior of mesa software renderer - */ - cube_cmd[CUBE_PP_CUBIC_OFFSET_0] = texobj->pp_txoffset; /* right */ - cube_cmd[CUBE_PP_CUBIC_OFFSET_1] = texobj->pp_txoffset + 1 * bytesPerFace; /* left */ - cube_cmd[CUBE_PP_CUBIC_OFFSET_2] = texobj->pp_txoffset + 2 * bytesPerFace; /* top */ - cube_cmd[CUBE_PP_CUBIC_OFFSET_3] = texobj->pp_txoffset + 3 * bytesPerFace; /* bottom */ - cube_cmd[CUBE_PP_CUBIC_OFFSET_4] = texobj->pp_txoffset + 4 * bytesPerFace; /* front */ - RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.cube[unit] ); - cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset + 5 * bytesPerFace; /* back */ - } - else if (texobj->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) { + if (texobj->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) { GLuint *txr_cmd = RADEON_DB_STATE( txr[unit] ); txr_cmd[TXR_PP_TEX_SIZE] = texobj->pp_txsize; /* NPOT only! */ txr_cmd[TXR_PP_TEX_PITCH] = texobj->pp_txpitch; /* NPOT only! */ RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.txr[unit] ); + se_coord_fmt |= RADEON_VTX_ST0_NONPARAMETRIC << unit; + } + else { + se_coord_fmt &= ~(RADEON_VTX_ST0_NONPARAMETRIC << unit); + + if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) { + GLuint *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0]; + GLuint bytesPerFace = texobj->base.totalSize / 6; + ASSERT(texobj->base.totalSize % 6 == 0); + + RADEON_STATECHANGE( rmesa, cube[unit] ); + cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces; + /* dont know if this setup conforms to OpenGL.. + * at least it matches the behavior of mesa software renderer + */ + cube_cmd[CUBE_PP_CUBIC_OFFSET_0] = texobj->pp_txoffset; /* right */ + cube_cmd[CUBE_PP_CUBIC_OFFSET_1] = texobj->pp_txoffset + 1 * bytesPerFace; /* left */ + cube_cmd[CUBE_PP_CUBIC_OFFSET_2] = texobj->pp_txoffset + 2 * bytesPerFace; /* top */ + cube_cmd[CUBE_PP_CUBIC_OFFSET_3] = texobj->pp_txoffset + 3 * bytesPerFace; /* bottom */ + cube_cmd[CUBE_PP_CUBIC_OFFSET_4] = texobj->pp_txoffset + 4 * bytesPerFace; /* front */ + cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset + 5 * bytesPerFace; /* back */ + } } - RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.tex[unit] ); + if (se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT]) { + RADEON_STATECHANGE( rmesa, set ); + rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt; + } texobj->dirty_state &= ~(1<<unit); } @@ -1197,6 +1209,9 @@ static GLboolean update_tex_common( GLcontext *ctx, int unit ) fprintf(stderr, "%s: border\n", __FUNCTION__); return GL_FALSE; } + /* yuv conversion only works in first unit */ + if (unit != 0 && (t->pp_txfilter & RADEON_YUV_TO_RGB)) + return GL_FALSE; /* Update state if this is a different texture object to last * time. diff --git a/src/mesa/drivers/dri/radeon/radeon_vtxfmt.c b/src/mesa/drivers/dri/radeon/radeon_vtxfmt.c index e9a76dc92eb..a5a9eb144b5 100644 --- a/src/mesa/drivers/dri/radeon/radeon_vtxfmt.c +++ b/src/mesa/drivers/dri/radeon/radeon_vtxfmt.c @@ -48,6 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/tnl.h" #include "tnl/t_context.h" #include "tnl/t_array_api.h" +#include "tnl/t_save_api.h" #include "radeon_context.h" #include "radeon_state.h" @@ -343,6 +344,7 @@ static void VFMT_FALLBACK_OUTSIDE_BEGIN_END( const char *caller ) _tnl_wakeup_exec( ctx ); ctx->Driver.FlushVertices = radeonFlushVertices; + ctx->Driver.NewList =_tnl_NewList; assert( rmesa->dma.flush == 0 ); rmesa->vb.fell_back = GL_TRUE; @@ -464,6 +466,12 @@ static void VFMT_FALLBACK( const char *caller ) } } +static void radeonNewList( GLcontext *ctx, GLuint list, GLenum mode ) +{ + VFMT_FALLBACK( __FUNCTION__ ); + _tnl_NewList( ctx, list, mode ); + return; +} static void wrap_buffer( void ) @@ -717,6 +725,7 @@ static void radeonVtxfmtValidate( GLcontext *ctx ) _mesa_install_exec_vtxfmt( ctx, &rmesa->vb.vtxfmt ); ctx->Driver.FlushVertices = radeonVtxfmtFlushVertices; + ctx->Driver.NewList = radeonNewList; rmesa->vb.installed = GL_TRUE; } else if (RADEON_DEBUG & DEBUG_VFMT) @@ -731,6 +740,7 @@ static void radeonVtxfmtValidate( GLcontext *ctx ) rmesa->dma.flush( rmesa ); _tnl_wakeup_exec( ctx ); ctx->Driver.FlushVertices = radeonFlushVertices; + ctx->Driver.NewList =_tnl_NewList; rmesa->vb.installed = GL_FALSE; } } diff --git a/src/mesa/drivers/dri/radeon/server/radeon_reg.h b/src/mesa/drivers/dri/radeon/server/radeon_reg.h index a1883f644d1..4dcce638460 100644 --- a/src/mesa/drivers/dri/radeon/server/radeon_reg.h +++ b/src/mesa/drivers/dri/radeon/server/radeon_reg.h @@ -1883,6 +1883,8 @@ # define RADEON_LIGHT_6_SHIFT 0 # define RADEON_LIGHT_7_SHIFT 16 +#define RADEON_SE_TCL_STATE_FLUSH 0x2284 + #define RADEON_SE_TCL_SHININESS 0x2250 #define RADEON_SE_TCL_TEXTURE_PROC_CTL 0x2268 diff --git a/src/mesa/drivers/dri/savage/savage_3d_reg.h b/src/mesa/drivers/dri/savage/savage_3d_reg.h index b7d1c3960a3..bc81d732967 100644 --- a/src/mesa/drivers/dri/savage/savage_3d_reg.h +++ b/src/mesa/drivers/dri/savage/savage_3d_reg.h @@ -290,6 +290,14 @@ typedef union struct { unsigned dstAlphaMode : 3; + + /** + * This bit enables \c GL_FUNC_SUBTRACT. Like most DirectX oriented + * hardware, there's no way to do \c GL_FUNC_REVERSE_SUBTRACT. + * + * \todo + * Add support for \c GL_FUNC_SUBTRACT! + */ unsigned dstMinusSrc : 1; unsigned srcAlphaMode : 3; unsigned binaryFinalAlpha : 1; @@ -327,7 +335,11 @@ typedef union unsigned texBlendCtrl : 3; unsigned flushPdDestWrites : 1; unsigned flushPdZbufWrites : 1; - /* havn't found an equivalent for Savage4. Utah-driver sets it to 0. */ + + /** + * Disable perspective correct interpolation for vertex color, vertex + * fog, and vertex alpha. For OpenGL, this should \b always be zero. + */ unsigned interpMode : 1; }ni; u_int32_t ui; diff --git a/src/mesa/drivers/dri/savage/savagetex.c b/src/mesa/drivers/dri/savage/savagetex.c index f0fe0c52594..719e50f964c 100644 --- a/src/mesa/drivers/dri/savage/savagetex.c +++ b/src/mesa/drivers/dri/savage/savagetex.c @@ -527,6 +527,51 @@ savageAllocTexObj( struct gl_texture_object *texObj ) * formats that promote to ARGB8888 or ARGB4444 and set the color * components to white. This way we get the correct result. */ + +static GLboolean +_savage_texstore_a1114444(TEXSTORE_PARAMS); + +static GLboolean +_savage_texstore_a1118888(TEXSTORE_PARAMS); + +static struct gl_texture_format _savage_texformat_a1114444 = { + MESA_FORMAT_ARGB4444, /* MesaFormat */ + GL_RGBA, /* BaseFormat */ + GL_UNSIGNED_NORMALIZED_ARB, /* DataType */ + 4, /* RedBits */ + 4, /* GreenBits */ + 4, /* BlueBits */ + 4, /* AlphaBits */ + 0, /* LuminanceBits */ + 0, /* IntensityBits */ + 0, /* IndexBits */ + 0, /* DepthBits */ + 0, /* StencilBits */ + 2, /* TexelBytes */ + _savage_texstore_a1114444, /* StoreTexImageFunc */ + NULL, NULL, NULL, NULL, NULL, NULL /* FetchTexel* filled in by + * savageDDInitTextureFuncs */ +}; +static struct gl_texture_format _savage_texformat_a1118888 = { + MESA_FORMAT_ARGB8888, /* MesaFormat */ + GL_RGBA, /* BaseFormat */ + GL_UNSIGNED_NORMALIZED_ARB, /* DataType */ + 8, /* RedBits */ + 8, /* GreenBits */ + 8, /* BlueBits */ + 8, /* AlphaBits */ + 0, /* LuminanceBits */ + 0, /* IntensityBits */ + 0, /* IndexBits */ + 0, /* DepthBits */ + 0, /* StencilBits */ + 4, /* TexelBytes */ + _savage_texstore_a1118888, /* StoreTexImageFunc */ + NULL, NULL, NULL, NULL, NULL, NULL /* FetchTexel* filled in by + * savageDDInitTextureFuncs */ +}; + + static GLboolean _savage_texstore_a1114444(TEXSTORE_PARAMS) { @@ -605,44 +650,6 @@ _savage_texstore_a1118888(TEXSTORE_PARAMS) } -static struct gl_texture_format _savage_texformat_a1114444 = { - MESA_FORMAT_ARGB4444, /* MesaFormat */ - GL_RGBA, /* BaseFormat */ - GL_UNSIGNED_NORMALIZED_ARB, /* DataType */ - 4, /* RedBits */ - 4, /* GreenBits */ - 4, /* BlueBits */ - 4, /* AlphaBits */ - 0, /* LuminanceBits */ - 0, /* IntensityBits */ - 0, /* IndexBits */ - 0, /* DepthBits */ - 0, /* StencilBits */ - 2, /* TexelBytes */ - _savage_texstore_a1114444, /* StoreTexImageFunc */ - NULL, NULL, NULL, NULL, NULL, NULL /* FetchTexel* filled in by - * savageDDInitTextureFuncs */ -}; -static struct gl_texture_format _savage_texformat_a1118888 = { - MESA_FORMAT_ARGB8888, /* MesaFormat */ - GL_RGBA, /* BaseFormat */ - GL_UNSIGNED_NORMALIZED_ARB, /* DataType */ - 8, /* RedBits */ - 8, /* GreenBits */ - 8, /* BlueBits */ - 8, /* AlphaBits */ - 0, /* LuminanceBits */ - 0, /* IntensityBits */ - 0, /* IndexBits */ - 0, /* DepthBits */ - 0, /* StencilBits */ - 4, /* TexelBytes */ - _savage_texstore_a1118888, /* StoreTexImageFunc */ - NULL, NULL, NULL, NULL, NULL, NULL /* FetchTexel* filled in by - * savageDDInitTextureFuncs */ -}; - - /* Called by the _mesa_store_teximage[123]d() functions. */ static const struct gl_texture_format * savageChooseTextureFormat( GLcontext *ctx, GLint internalFormat, diff --git a/src/mesa/drivers/dri/sis/sis_context.c b/src/mesa/drivers/dri/sis/sis_context.c index 7e86c1127cb..a300a080ec0 100644 --- a/src/mesa/drivers/dri/sis/sis_context.c +++ b/src/mesa/drivers/dri/sis/sis_context.c @@ -272,7 +272,7 @@ sisCreateContext( const __GLcontextModes *glVisual, /* set AGP */ smesa->AGPSize = sisScreen->agp.size; smesa->AGPBase = sisScreen->agp.map; - smesa->AGPAddr = sisScreen->agp.handle; + smesa->AGPAddr = sisScreen->agpBaseOffset; /* Create AGP command buffer */ if (smesa->AGPSize != 0 && diff --git a/src/mesa/drivers/dri/sis/sis_dd.c b/src/mesa/drivers/dri/sis/sis_dd.c index 39c00a18af6..8fc7896b877 100644 --- a/src/mesa/drivers/dri/sis/sis_dd.c +++ b/src/mesa/drivers/dri/sis/sis_dd.c @@ -47,7 +47,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "utils.h" -#define DRIVER_DATE "20051023" +#define DRIVER_DATE "20060710" /* Return the width and height of the given buffer. */ diff --git a/src/mesa/drivers/dri/sis/sis_screen.c b/src/mesa/drivers/dri/sis/sis_screen.c index d4395a9cddf..4fd692ec240 100644 --- a/src/mesa/drivers/dri/sis/sis_screen.c +++ b/src/mesa/drivers/dri/sis/sis_screen.c @@ -163,6 +163,7 @@ sisCreateScreen( __DRIscreenPrivate *sPriv ) if (sisDRIPriv->agp.size) { sisScreen->agp.handle = sisDRIPriv->agp.handle; + sisScreen->agpBaseOffset = drmAgpBase(sPriv->fd); sisScreen->agp.size = sisDRIPriv->agp.size; if ( drmMap( sPriv->fd, sisScreen->agp.handle, sisScreen->agp.size, &sisScreen->agp.map ) ) @@ -349,10 +350,10 @@ void * __driCreateNewScreen_20050727( __DRInativeDisplay *dpy, int scrn, static const __DRIversion ddx_expected = {0, 8, 0}; static const __DRIversion dri_expected = {4, 0, 0}; static const __DRIversion drm_expected = {1, 0, 0}; - + static const char *driver_name = "SiS"; dri_interface = interface; - if (!driCheckDriDdxDrmVersions2("SiS", dri_version, &dri_expected, + if (!driCheckDriDdxDrmVersions2(driver_name, dri_version, &dri_expected, ddx_version, &ddx_expected, drm_version, &drm_expected)) { return NULL; diff --git a/src/mesa/drivers/dri/sis/sis_screen.h b/src/mesa/drivers/dri/sis/sis_screen.h index c3e9ef48764..d5b2101e980 100644 --- a/src/mesa/drivers/dri/sis/sis_screen.h +++ b/src/mesa/drivers/dri/sis/sis_screen.h @@ -41,6 +41,7 @@ typedef struct { typedef struct { sisRegionRec2 mmio; sisRegionRec2 agp; + unsigned long agpBaseOffset; unsigned int AGPCmdBufOffset; unsigned int AGPCmdBufSize; diff --git a/src/mesa/drivers/dri/tdfx/tdfx_context.c b/src/mesa/drivers/dri/tdfx/tdfx_context.c index 826a9f71dfe..dd40544d490 100644 --- a/src/mesa/drivers/dri/tdfx/tdfx_context.c +++ b/src/mesa/drivers/dri/tdfx/tdfx_context.c @@ -50,6 +50,7 @@ #include "tdfx_texman.h" #include "extensions.h" #include "hash.h" +#include "texobj.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" @@ -602,7 +603,7 @@ tdfxDestroyContext( __DRIcontextPrivate *driContextPriv ) id; id = _mesa_HashNextEntry(textures, id)) { struct gl_texture_object *tObj - = (struct gl_texture_object *) _mesa_HashLookup(textures, id); + = _mesa_lookup_texture(fxMesa->glCtx, id); tdfxTMFreeTexture(fxMesa, tObj); } } diff --git a/src/mesa/drivers/dri/tdfx/tdfx_tex.c b/src/mesa/drivers/dri/tdfx/tdfx_tex.c index 04ddfaad24b..862c7dacf1f 100644 --- a/src/mesa/drivers/dri/tdfx/tdfx_tex.c +++ b/src/mesa/drivers/dri/tdfx/tdfx_tex.c @@ -1364,8 +1364,7 @@ tdfxTexImage2D(GLcontext *ctx, GLenum target, GLint level, texImage->Data = _mesa_alloc_texmemory(texImage->CompressedSize); } else { dstRowStride = mml->width * texelBytes; - texImage->Data = _mesa_alloc_texmemory(mml->width * mml->height * - texelBytes); + texImage->Data = _mesa_alloc_texmemory(mml->width * mml->height * texelBytes); } if (!texImage->Data) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D"); diff --git a/src/mesa/drivers/dri/tdfx/tdfx_texman.c b/src/mesa/drivers/dri/tdfx/tdfx_texman.c index 63cae17e4e9..6f782f687f3 100644 --- a/src/mesa/drivers/dri/tdfx/tdfx_texman.c +++ b/src/mesa/drivers/dri/tdfx/tdfx_texman.c @@ -38,6 +38,7 @@ #include "tdfx_context.h" #include "tdfx_tex.h" #include "tdfx_texman.h" +#include "texobj.h" #include "hash.h" @@ -82,8 +83,8 @@ VerifyFreeList(tdfxContextPtr fxMesa, FxU32 tmu) for (id = _mesa_HashFirstEntry(textures); id; id = _mesa_HashNextEntry(textures, id)) { - struct gl_texture_object *tObj; - tObj = (struct gl_texture_object *) _mesa_HashLookup(textures, id); + struct gl_texture_object *tObj + = _mesa_lookup_texture(fxMesa->glCtx, id); tdfxTexInfo *ti = TDFX_TEXTURE_DATA(tObj); if (ti) { if (ti->isInTM) { @@ -122,7 +123,7 @@ dump_texmem(tdfxContextPtr fxMesa) id; id = _mesa_HashNextEntry(textures, id)) { struct gl_texture_object *obj - = (struct gl_texture_object *) _mesa_HashLookup(textures, id); + = _mesa_lookup_texture(fxMesa->glCtx, id); tdfxTexInfo *info = TDFX_TEXTURE_DATA(obj); if (info && info->isInTM) { @@ -404,7 +405,7 @@ FindOldestObject(tdfxContextPtr fxMesa, FxU32 tmu) id; id = _mesa_HashNextEntry(textures, id)) { struct gl_texture_object *obj - = (struct gl_texture_object *) _mesa_HashLookup(textures, id); + = _mesa_lookup_texture(fxMesa->glCtx, id); tdfxTexInfo *info = TDFX_TEXTURE_DATA(obj); if (info && info->isInTM && @@ -460,7 +461,7 @@ FlushTexMemory(tdfxContextPtr fxMesa) id; id = _mesa_HashNextEntry(textures, id)) { struct gl_texture_object *obj - = (struct gl_texture_object *) _mesa_HashLookup(textures, id); + = _mesa_lookup_texture(fxMesa->glCtx, id); if (obj->RefCount < 2) { /* don't flush currently bound textures */ tdfxTMMoveOutTM_NoLock(fxMesa, obj); @@ -972,7 +973,7 @@ void tdfxTMRestoreTextures_NoLock( tdfxContextPtr fxMesa ) id; id = _mesa_HashNextEntry(textures, id)) { struct gl_texture_object *tObj - = (struct gl_texture_object *) _mesa_HashLookup(textures, id); + = _mesa_lookup_texture(fxMesa->glCtx, id); tdfxTexInfo *ti = TDFX_TEXTURE_DATA( tObj ); if ( ti && ti->isInTM ) { int i; diff --git a/src/mesa/drivers/dri/unichrome/via_context.c b/src/mesa/drivers/dri/unichrome/via_context.c index 183f3d3479f..ffde1b66b76 100644 --- a/src/mesa/drivers/dri/unichrome/via_context.c +++ b/src/mesa/drivers/dri/unichrome/via_context.c @@ -68,7 +68,7 @@ #define need_GL_EXT_secondary_color #include "extension_helper.h" -#define DRIVER_DATE "20050526" +#define DRIVER_DATE "20060710" #include "vblank.h" #include "utils.h" @@ -336,7 +336,9 @@ calculate_buffer_parameters( struct via_context *vmesa, if( vmesa->viaScreen->width == vmesa->driDrawable->w && vmesa->viaScreen->height == vmesa->driDrawable->h ) { vmesa->doPageFlip = vmesa->allowPageFlip; - assert(vmesa->back.pitch == vmesa->front.pitch); + if (vmesa->hasBack) { + assert(vmesa->back.pitch == vmesa->front.pitch); + } } else vmesa->doPageFlip = GL_FALSE; diff --git a/src/mesa/drivers/dri/unichrome/via_screen.c b/src/mesa/drivers/dri/unichrome/via_screen.c index 24c62198ac6..4f137c8ceb8 100644 --- a/src/mesa/drivers/dri/unichrome/via_screen.c +++ b/src/mesa/drivers/dri/unichrome/via_screen.c @@ -169,13 +169,8 @@ viaInitDriver(__DRIscreenPrivate *sPriv) __driUtilMessage("viaInitDriver: drmMap agp failed"); return GL_FALSE; } - - /* - * FIXME: This is an invalid assumption that works until handle is - * changed to mean something else than the 32-bit physical AGP address. - */ - viaScreen->agpBase = gDRIPriv->agp.handle; + viaScreen->agpBase = drmAgpBase(sPriv->fd); } else viaScreen->agpLinearStart = 0; @@ -440,13 +435,14 @@ void * __driCreateNewScreen_20050727( __DRInativeDisplay *dpy, int scrn, VIA_DRIDDX_VERSION_PATCH }; static const __DRIversion dri_expected = { 4, 0, 0 }; static const __DRIversion drm_expected = { 2, 3, 0 }; + static const char *driver_name = "Unichrome"; dri_interface = interface; - if ( ! driCheckDriDdxDrmVersions2( "Unichrome", + if ( ! driCheckDriDdxDrmVersions2( driver_name, dri_version, & dri_expected, ddx_version, & ddx_expected, - drm_version, & drm_expected ) ) { + drm_version, & drm_expected) ) { return NULL; } @@ -470,7 +466,6 @@ void * __driCreateNewScreen_20050727( __DRInativeDisplay *dpy, int scrn, driInitExtensions( NULL, card_extensions, GL_FALSE ); } - fprintf(stderr, "%s - succeeded\n", __FUNCTION__); return (void *) psp; } diff --git a/src/mesa/drivers/fbdev/glfbdev.c b/src/mesa/drivers/fbdev/glfbdev.c index 01b178fb126..3a6b452065b 100644 --- a/src/mesa/drivers/fbdev/glfbdev.c +++ b/src/mesa/drivers/fbdev/glfbdev.c @@ -1,8 +1,8 @@ /* * Mesa 3-D graphics library - * Version: 6.5 + * Version: 6.5.1 * - * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -152,8 +152,8 @@ static void get_buffer_size( GLframebuffer *buffer, GLuint *width, GLuint *height ) { const GLFBDevBufferPtr fbdevbuffer = GLFBDEV_BUFFER(buffer); - *width = fbdevbuffer->var.xres_virtual; - *height = fbdevbuffer->var.yres_virtual; + *width = fbdevbuffer->var.xres; + *height = fbdevbuffer->var.yres; } @@ -171,7 +171,7 @@ viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h) /* 24-bit BGR */ #define NAME(PREFIX) PREFIX##_B8G8R8 -#define FORMAT GL_RGBA8 +#define RB_TYPE GLubyte #define SPAN_VARS \ struct GLFBDevRenderbufferRec *frb = (struct GLFBDevRenderbufferRec *) rb; #define INIT_PIXEL_PTR(P, X, Y) \ @@ -192,7 +192,7 @@ viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h) /* 32-bit BGRA */ #define NAME(PREFIX) PREFIX##_B8G8R8A8 -#define FORMAT GL_RGBA8 +#define RB_TYPE GLubyte #define SPAN_VARS \ struct GLFBDevRenderbufferRec *frb = (struct GLFBDevRenderbufferRec *) rb; #define INIT_PIXEL_PTR(P, X, Y) \ @@ -214,7 +214,7 @@ viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h) /* 16-bit BGR (XXX implement dithering someday) */ #define NAME(PREFIX) PREFIX##_B5G6R5 -#define FORMAT GL_RGBA8 +#define RB_TYPE GLubyte #define SPAN_VARS \ struct GLFBDevRenderbufferRec *frb = (struct GLFBDevRenderbufferRec *) rb; #define INIT_PIXEL_PTR(P, X, Y) \ @@ -233,7 +233,7 @@ viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h) /* 15-bit BGR (XXX implement dithering someday) */ #define NAME(PREFIX) PREFIX##_B5G5R5 -#define FORMAT GL_RGBA8 +#define RB_TYPE GLubyte #define SPAN_VARS \ struct GLFBDevRenderbufferRec *frb = (struct GLFBDevRenderbufferRec *) rb; #define INIT_PIXEL_PTR(P, X, Y) \ @@ -252,7 +252,8 @@ viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h) /* 8-bit color index */ #define NAME(PREFIX) PREFIX##_CI8 -#define FORMAT GL_COLOR_INDEX8_EXT +#define CI_MODE +#define RB_TYPE GLubyte #define SPAN_VARS \ struct GLFBDevRenderbufferRec *frb = (struct GLFBDevRenderbufferRec *) rb; #define INIT_PIXEL_PTR(P, X, Y) \ @@ -280,7 +281,7 @@ glFBDevGetString( int str ) case GLFBDEV_VENDOR: return "Mesa Project"; case GLFBDEV_VERSION: - return "1.0.0"; + return "1.0.1"; default: return NULL; } @@ -371,6 +372,10 @@ glFBDevCreateVisual( const struct fb_fix_screeninfo *fixInfo, case GLFBDEV_LEVEL: /* ignored for now */ break; + case GLFBDEV_MULTISAMPLE: + numSamples = attrib[1]; + attrib++; + break; default: /* unexpected token */ _mesa_free(vis); @@ -384,50 +389,37 @@ glFBDevCreateVisual( const struct fb_fix_screeninfo *fixInfo, blueBits = varInfo->blue.length; alphaBits = varInfo->transp.length; - if ((fixInfo->visual == FB_VISUAL_TRUECOLOR || - fixInfo->visual == FB_VISUAL_DIRECTCOLOR) - && varInfo->bits_per_pixel == 24 - && varInfo->red.offset == 16 - && varInfo->green.offset == 8 - && varInfo->blue.offset == 0) { - vis->pixelFormat = PF_B8G8R8; - } - else if ((fixInfo->visual == FB_VISUAL_TRUECOLOR || - fixInfo->visual == FB_VISUAL_DIRECTCOLOR) - && varInfo->bits_per_pixel == 32 - && varInfo->red.offset == 16 - && varInfo->green.offset == 8 - && varInfo->blue.offset == 0 - && varInfo->transp.offset == 24) { - vis->pixelFormat = PF_B8G8R8A8; - } - else if ((fixInfo->visual == FB_VISUAL_TRUECOLOR || - fixInfo->visual == FB_VISUAL_DIRECTCOLOR) - && varInfo->bits_per_pixel == 16 - && varInfo->red.offset == 11 - && varInfo->green.offset == 5 - && varInfo->blue.offset == 0) { - vis->pixelFormat = PF_B5G6R5; - } - else if ((fixInfo->visual == FB_VISUAL_TRUECOLOR || - fixInfo->visual == FB_VISUAL_DIRECTCOLOR) - && varInfo->bits_per_pixel == 16 - && varInfo->red.offset == 10 - && varInfo->green.offset == 5 - && varInfo->blue.offset == 0) { - vis->pixelFormat = PF_B5G5R5; - } - else { - _mesa_problem(NULL, "Unsupported fbdev RGB visual/bitdepth!\n"); - /* - printf("fixInfo->visual = 0x%x\n", fixInfo->visual); - printf("varInfo->bits_per_pixel = %d\n", varInfo->bits_per_pixel); - printf("varInfo->red.offset = %d\n", varInfo->red.offset); - printf("varInfo->green.offset = %d\n", varInfo->green.offset); - printf("varInfo->blue.offset = %d\n", varInfo->blue.offset); - */ - _mesa_free(vis); - return NULL; + if (fixInfo->visual == FB_VISUAL_TRUECOLOR || + fixInfo->visual == FB_VISUAL_DIRECTCOLOR) { + if(varInfo->bits_per_pixel == 24 + && varInfo->red.offset == 16 + && varInfo->green.offset == 8 + && varInfo->blue.offset == 0) + vis->pixelFormat = PF_B8G8R8; + + else if(varInfo->bits_per_pixel == 32 + && varInfo->red.offset == 16 + && varInfo->green.offset == 8 + && varInfo->blue.offset == 0) + vis->pixelFormat = PF_B8G8R8A8; + + else if(varInfo->bits_per_pixel == 16 + && varInfo->red.offset == 11 + && varInfo->green.offset == 5 + && varInfo->blue.offset == 0) + vis->pixelFormat = PF_B5G6R5; + + else if(varInfo->bits_per_pixel == 16 + && varInfo->red.offset == 10 + && varInfo->green.offset == 5 + && varInfo->blue.offset == 0) + vis->pixelFormat = PF_B5G5R5; + + else { + _mesa_problem(NULL, "Unsupported fbdev RGB visual/bitdepth!\n"); + _mesa_free(vis); + return NULL; + } } } else { @@ -500,11 +492,13 @@ renderbuffer_storage(GLcontext *ctx, struct gl_renderbuffer *rb, static struct GLFBDevRenderbufferRec * -new_glfbdev_renderbuffer(void *bufferStart, int pixelFormat) +new_glfbdev_renderbuffer(void *bufferStart, const GLFBDevVisualPtr visual) { struct GLFBDevRenderbufferRec *rb = CALLOC_STRUCT(GLFBDevRenderbufferRec); if (rb) { GLuint name = 0; + int pixelFormat = visual->pixelFormat; + _mesa_init_renderbuffer(&rb->Base, name); rb->Base.Delete = delete_renderbuffer; @@ -565,11 +559,24 @@ new_glfbdev_renderbuffer(void *bufferStart, int pixelFormat) } rb->Base.DataType = GL_UNSIGNED_BYTE; rb->Base.Data = bufferStart; + + rb->rowStride = visual->var.xres_virtual * visual->var.bits_per_pixel / 8; + rb->bottom = (GLubyte *) bufferStart + + (visual->var.yres - 1) * rb->rowStride; + + rb->Base.Width = visual->var.xres; + rb->Base.Height = visual->var.yres; + + rb->Base.RedBits = visual->var.red.length; + rb->Base.GreenBits = visual->var.green.length; + rb->Base.BlueBits = visual->var.blue.length; + rb->Base.AlphaBits = visual->var.transp.length; + + rb->Base.InternalFormat = pixelFormat; } return rb; } - GLFBDevBufferPtr glFBDevCreateBuffer( const struct fb_fix_screeninfo *fixInfo, const struct fb_var_screeninfo *varInfo, @@ -583,6 +590,11 @@ glFBDevCreateBuffer( const struct fb_fix_screeninfo *fixInfo, ASSERT(frontBuffer); ASSERT(size > 0); + /* this is to update the visual if there was a resize and the + buffer is created again */ + visual->var = *varInfo; + visual->fix = *fixInfo; + if (visual->fix.visual != fixInfo->visual || visual->fix.type != fixInfo->type || visual->var.bits_per_pixel != varInfo->bits_per_pixel || @@ -602,12 +614,26 @@ glFBDevCreateBuffer( const struct fb_fix_screeninfo *fixInfo, /* basic framebuffer setup */ _mesa_initialize_framebuffer(&buf->glframebuffer, &visual->glvisual); /* add front renderbuffer */ - frontrb = new_glfbdev_renderbuffer(frontBuffer, visual->pixelFormat); + frontrb = new_glfbdev_renderbuffer(frontBuffer, visual); _mesa_add_renderbuffer(&buf->glframebuffer, BUFFER_FRONT_LEFT, &frontrb->Base); /* add back renderbuffer */ if (visual->glvisual.doubleBufferMode) { - backrb = new_glfbdev_renderbuffer(backBuffer, visual->pixelFormat); + int malloced = !backBuffer; + if (malloced) { + /* malloc a back buffer */ + backBuffer = _mesa_malloc(size); + if (!backBuffer) { + _mesa_free_framebuffer_data(&buf->glframebuffer); + _mesa_free(buf); + return NULL; + } + } + + backrb = new_glfbdev_renderbuffer(backBuffer, visual); + if(malloced) + backrb->mallocedBuffer = GL_TRUE; + _mesa_add_renderbuffer(&buf->glframebuffer, BUFFER_BACK_LEFT, &backrb->Base); } @@ -620,36 +646,11 @@ glFBDevCreateBuffer( const struct fb_fix_screeninfo *fixInfo, GL_FALSE, /* alpha */ GL_FALSE /* aux bufs */); - - buf->fix = *fixInfo; /* struct assignment */ buf->var = *varInfo; /* struct assignment */ buf->visual = visual; /* ptr assignment */ buf->size = size; buf->bytesPerPixel = visual->var.bits_per_pixel / 8; - frontrb->rowStride = visual->var.xres_virtual * buf->bytesPerPixel; - frontrb->bottom = (GLubyte *) frontrb->Base.Data - + (visual->var.yres_virtual - 1) * frontrb->rowStride; - - if (visual->glvisual.doubleBufferMode) { - if (!backBuffer) { - /* malloc a back buffer */ - backrb->Base.Data = _mesa_malloc(size); - if (!backrb->Base.Data) { - _mesa_free_framebuffer_data(&buf->glframebuffer); - _mesa_free(buf); - return NULL; - } - backrb->mallocedBuffer = GL_TRUE; - } - backrb->rowStride = frontrb->rowStride; - backrb->bottom = (GLubyte *) backrb->Base.Data - + (visual->var.yres_virtual - 1) * backrb->rowStride; - } - else { - backrb->bottom = NULL; - backrb->rowStride = 0; - } return buf; } diff --git a/src/mesa/drivers/ggi/default/genkgi_mode.c b/src/mesa/drivers/ggi/default/genkgi_mode.c index a81f553f49b..938024789f9 100644 --- a/src/mesa/drivers/ggi/default/genkgi_mode.c +++ b/src/mesa/drivers/ggi/default/genkgi_mode.c @@ -1,4 +1,4 @@ -/* $Id: genkgi_mode.c,v 1.4 2000-01-07 08:34:44 jtaylor Exp $ +/* $Id: genkgi_mode.c,v 1.4 2000/01/07 08:34:44 jtaylor Exp $ ****************************************************************************** display-fbdev-kgicon-generic-mesa diff --git a/src/mesa/drivers/ggi/default/genkgi_visual.c b/src/mesa/drivers/ggi/default/genkgi_visual.c index 1b49a4ae528..17ef9679bb8 100644 --- a/src/mesa/drivers/ggi/default/genkgi_visual.c +++ b/src/mesa/drivers/ggi/default/genkgi_visual.c @@ -1,4 +1,4 @@ -/* $Id: genkgi_visual.c,v 1.7 2000-06-11 20:11:55 jtaylor Exp $ +/* $Id: genkgi_visual.c,v 1.7 2000/06/11 20:11:55 jtaylor Exp $ ****************************************************************************** genkgi_visual.c: visual handling for the generic KGI helper diff --git a/src/mesa/drivers/ggi/include/ggi/mesa/debug.h b/src/mesa/drivers/ggi/include/ggi/mesa/debug.h index e4c2a250454..35d11624c64 100644 --- a/src/mesa/drivers/ggi/include/ggi/mesa/debug.h +++ b/src/mesa/drivers/ggi/include/ggi/mesa/debug.h @@ -1,4 +1,4 @@ -/* $Id: debug.h,v 1.5 2003-09-22 15:18:51 brianp Exp $ +/* $Id: debug.h,v 1.5 2003/09/22 15:18:51 brianp Exp $ ****************************************************************************** GGIMesa debugging macros diff --git a/src/mesa/drivers/svga/svgamesa.c b/src/mesa/drivers/svga/svgamesa.c index ca79ebb6c74..09330e78ab5 100644 --- a/src/mesa/drivers/svga/svgamesa.c +++ b/src/mesa/drivers/svga/svgamesa.c @@ -1,4 +1,4 @@ -/* $Id: svgamesa.c,v 1.26 2005-09-07 23:26:01 brianp Exp $ */ +/* $Id: svgamesa.c,v 1.26 2005/09/07 23:26:01 brianp Exp $ */ /* * Mesa 3-D graphics library diff --git a/src/mesa/drivers/svga/svgamesa15.c b/src/mesa/drivers/svga/svgamesa15.c index 817da34ef1d..54b6c977a7b 100644 --- a/src/mesa/drivers/svga/svgamesa15.c +++ b/src/mesa/drivers/svga/svgamesa15.c @@ -1,4 +1,4 @@ -/* $Id: svgamesa15.c,v 1.11 2002-11-11 18:42:39 brianp Exp $ */ +/* $Id: svgamesa15.c,v 1.11 2002/11/11 18:42:39 brianp Exp $ */ /* * Mesa 3-D graphics library diff --git a/src/mesa/drivers/svga/svgamesa15.h b/src/mesa/drivers/svga/svgamesa15.h index 4a318d3aa73..3ed7db82ee5 100644 --- a/src/mesa/drivers/svga/svgamesa15.h +++ b/src/mesa/drivers/svga/svgamesa15.h @@ -1,4 +1,4 @@ -/* $Id: svgamesa15.h,v 1.7 2002-11-11 18:42:39 brianp Exp $ */ +/* $Id: svgamesa15.h,v 1.7 2002/11/11 18:42:39 brianp Exp $ */ /* * Mesa 3-D graphics library diff --git a/src/mesa/drivers/svga/svgamesa16.c b/src/mesa/drivers/svga/svgamesa16.c index 483158ccdb4..72ac8183294 100644 --- a/src/mesa/drivers/svga/svgamesa16.c +++ b/src/mesa/drivers/svga/svgamesa16.c @@ -1,4 +1,4 @@ -/* $Id: svgamesa16.c,v 1.11 2002-11-11 18:42:40 brianp Exp $ */ +/* $Id: svgamesa16.c,v 1.11 2002/11/11 18:42:40 brianp Exp $ */ /* * Mesa 3-D graphics library diff --git a/src/mesa/drivers/svga/svgamesa16.h b/src/mesa/drivers/svga/svgamesa16.h index 91ac404ab77..247c1f40455 100644 --- a/src/mesa/drivers/svga/svgamesa16.h +++ b/src/mesa/drivers/svga/svgamesa16.h @@ -1,4 +1,4 @@ -/* $Id: svgamesa16.h,v 1.6 2002-11-11 18:42:41 brianp Exp $ */ +/* $Id: svgamesa16.h,v 1.6 2002/11/11 18:42:41 brianp Exp $ */ /* * Mesa 3-D graphics library diff --git a/src/mesa/drivers/svga/svgamesa24.c b/src/mesa/drivers/svga/svgamesa24.c index ff3b24894e1..07491cc67ac 100644 --- a/src/mesa/drivers/svga/svgamesa24.c +++ b/src/mesa/drivers/svga/svgamesa24.c @@ -1,4 +1,4 @@ -/* $Id: svgamesa24.c,v 1.12 2002-11-11 18:42:41 brianp Exp $ */ +/* $Id: svgamesa24.c,v 1.12 2002/11/11 18:42:41 brianp Exp $ */ /* * Mesa 3-D graphics library diff --git a/src/mesa/drivers/svga/svgamesa24.h b/src/mesa/drivers/svga/svgamesa24.h index be41bd4abb5..54d1a8298ba 100644 --- a/src/mesa/drivers/svga/svgamesa24.h +++ b/src/mesa/drivers/svga/svgamesa24.h @@ -1,4 +1,4 @@ -/* $Id: svgamesa24.h,v 1.6 2002-11-11 18:42:41 brianp Exp $ */ +/* $Id: svgamesa24.h,v 1.6 2002/11/11 18:42:41 brianp Exp $ */ /* * Mesa 3-D graphics library diff --git a/src/mesa/drivers/svga/svgamesa32.c b/src/mesa/drivers/svga/svgamesa32.c index a7829526dda..8a366998d6b 100644 --- a/src/mesa/drivers/svga/svgamesa32.c +++ b/src/mesa/drivers/svga/svgamesa32.c @@ -1,4 +1,4 @@ -/* $Id: svgamesa32.c,v 1.12 2002-11-11 18:42:42 brianp Exp $ */ +/* $Id: svgamesa32.c,v 1.12 2002/11/11 18:42:42 brianp Exp $ */ /* * Mesa 3-D graphics library diff --git a/src/mesa/drivers/svga/svgamesa32.h b/src/mesa/drivers/svga/svgamesa32.h index f622fbfa64b..f518e11ad5b 100644 --- a/src/mesa/drivers/svga/svgamesa32.h +++ b/src/mesa/drivers/svga/svgamesa32.h @@ -1,4 +1,4 @@ -/* $Id: svgamesa32.h,v 1.6 2002-11-11 18:42:42 brianp Exp $ */ +/* $Id: svgamesa32.h,v 1.6 2002/11/11 18:42:42 brianp Exp $ */ /* * Mesa 3-D graphics library diff --git a/src/mesa/drivers/svga/svgamesa8.c b/src/mesa/drivers/svga/svgamesa8.c index 6f57ae7e4d6..fd880ef85ad 100644 --- a/src/mesa/drivers/svga/svgamesa8.c +++ b/src/mesa/drivers/svga/svgamesa8.c @@ -1,4 +1,4 @@ -/* $Id: svgamesa8.c,v 1.9 2005-05-04 20:11:39 brianp Exp $ */ +/* $Id: svgamesa8.c,v 1.9 2005/05/04 20:11:39 brianp Exp $ */ /* * Mesa 3-D graphics library diff --git a/src/mesa/drivers/svga/svgamesa8.h b/src/mesa/drivers/svga/svgamesa8.h index d9c1979f0e0..1aa25f93fce 100644 --- a/src/mesa/drivers/svga/svgamesa8.h +++ b/src/mesa/drivers/svga/svgamesa8.h @@ -1,4 +1,4 @@ -/* $Id: svgamesa8.h,v 1.4 2001-02-06 00:03:48 brianp Exp $ */ +/* $Id: svgamesa8.h,v 1.4 2001/02/06 00:03:48 brianp Exp $ */ /* * Mesa 3-D graphics library diff --git a/src/mesa/drivers/svga/svgapix.h b/src/mesa/drivers/svga/svgapix.h index 00c1292bb3a..0b19551bf6e 100644 --- a/src/mesa/drivers/svga/svgapix.h +++ b/src/mesa/drivers/svga/svgapix.h @@ -1,4 +1,4 @@ -/* $Id: svgapix.h,v 1.5 2002-11-11 18:42:44 brianp Exp $ */ +/* $Id: svgapix.h,v 1.5 2002/11/11 18:42:44 brianp Exp $ */ /* * Mesa 3-D graphics library diff --git a/src/mesa/drivers/windows/gdi/mesa.def b/src/mesa/drivers/windows/gdi/mesa.def index e784d98b4ce..f0b275730ca 100644 --- a/src/mesa/drivers/windows/gdi/mesa.def +++ b/src/mesa/drivers/windows/gdi/mesa.def @@ -500,8 +500,6 @@ EXPORTS glPolygonOffsetEXT glTexImage3DEXT glTexSubImage3DEXT - glGetTexFilterFuncSGIS - glTexFilterFuncSGIS glTexSubImage1DEXT glTexSubImage2DEXT glCopyTexImage1DEXT @@ -509,57 +507,12 @@ EXPORTS glCopyTexSubImage1DEXT glCopyTexSubImage2DEXT glCopyTexSubImage3DEXT - glGetHistogramEXT - glGetHistogramParameterfvEXT - glGetHistogramParameterivEXT - glGetMinmaxEXT - glGetMinmaxParameterfvEXT - glGetMinmaxParameterivEXT - glHistogramEXT - glMinmaxEXT - glResetHistogramEXT - glResetMinmaxEXT - glConvolutionFilter1DEXT - glConvolutionFilter2DEXT - glConvolutionParameterfEXT - glConvolutionParameterfvEXT - glConvolutionParameteriEXT - glConvolutionParameterivEXT - glCopyConvolutionFilter1DEXT - glCopyConvolutionFilter2DEXT - glGetConvolutionFilterEXT - glGetConvolutionParameterfvEXT - glGetConvolutionParameterivEXT - glGetSeparableFilterEXT - glSeparableFilter2DEXT - glColorTableSGI - glColorTableParameterfvSGI - glColorTableParameterivSGI - glCopyColorTableSGI - glGetColorTableSGI - glGetColorTableParameterfvSGI - glGetColorTableParameterivSGI - glPixelTexGenSGIX - glPixelTexGenParameteriSGIS - glPixelTexGenParameterivSGIS - glPixelTexGenParameterfSGIS - glPixelTexGenParameterfvSGIS - glGetPixelTexGenParameterivSGIS - glGetPixelTexGenParameterfvSGIS - glTexImage4DSGIS - glTexSubImage4DSGIS glAreTexturesResidentEXT glBindTextureEXT glDeleteTexturesEXT glGenTexturesEXT glIsTextureEXT glPrioritizeTexturesEXT - glDetailTexFuncSGIS - glGetDetailTexFuncSGIS - glSharpenTexFuncSGIS - glGetSharpenTexFuncSGIS - glSampleMaskSGIS - glSamplePatternSGIS glArrayElementEXT glColorPointerEXT glDrawArraysEXT @@ -570,63 +523,16 @@ EXPORTS glTexCoordPointerEXT glVertexPointerEXT glBlendEquationEXT - glSpriteParameterfSGIX - glSpriteParameterfvSGIX - glSpriteParameteriSGIX - glSpriteParameterivSGIX glPointParameterfEXT glPointParameterfvEXT glPointParameterfARB glPointParameterfvARB - glPointParameterfSGIS - glPointParameterfvSGIS - glGetInstrumentsSGIX - glInstrumentsBufferSGIX - glPollInstrumentsSGIX - glReadInstrumentsSGIX - glStartInstrumentsSGIX - glStopInstrumentsSGIX - glFrameZoomSGIX - glTagSampleBufferSGIX - glReferencePlaneSGIX - glFlushRasterSGIX - glColorSubTableEXT - glCopyColorSubTableEXT - glHintPGI glColorTableEXT glGetColorTableEXT glGetColorTableParameterivEXT glGetColorTableParameterfvEXT - glGetListParameterfvSGIX - glGetListParameterivSGIX - glListParameterfSGIX - glListParameterfvSGIX - glListParameteriSGIX - glListParameterivSGIX - glIndexMaterialEXT - glIndexFuncEXT glLockArraysEXT glUnlockArraysEXT - glCullParameterdvEXT - glCullParameterfvEXT - glFragmentColorMaterialSGIX - glFragmentLightfSGIX - glFragmentLightfvSGIX - glFragmentLightiSGIX - glFragmentLightivSGIX - glFragmentLightModelfSGIX - glFragmentLightModelfvSGIX - glFragmentLightModeliSGIX - glFragmentLightModelivSGIX - glFragmentMaterialfSGIX - glFragmentMaterialfvSGIX - glFragmentMaterialiSGIX - glFragmentMaterialivSGIX - glGetFragmentLightfvSGIX - glGetFragmentLightivSGIX - glGetFragmentMaterialfvSGIX - glGetFragmentMaterialivSGIX - glLightEnviSGIX glDrawRangeElementsEXT glSecondaryColor3bEXT glSecondaryColor3bvEXT @@ -653,10 +559,6 @@ EXPORTS glFogCoorddvEXT glFogCoordPointerEXT glBlendFuncSeparateEXT - glBlendFuncSeparateINGR - glVertexWeightfEXT - glVertexWeightfvEXT - glVertexWeightPointerEXT glFlushVertexArrayRangeNV glVertexArrayRangeNV glCombinerParameterfvNV @@ -697,11 +599,6 @@ EXPORTS glWindowPos4ivMESA glWindowPos4sMESA glWindowPos4svMESA - glMultiModeDrawArraysIBM - glMultiModeDrawElementsIBM - glTbufferMask3DFX - glSampleMaskEXT - glSamplePatternEXT glWindowPos2dARB glWindowPos2fARB glWindowPos2iARB @@ -784,7 +681,6 @@ EXPORTS glVertexAttribs4ubvNV glPointParameteriNV glPointParameterivNV - glBlendFuncSeparate glFogCoordf glFogCoordfv glFogCoordd @@ -829,14 +725,6 @@ EXPORTS glWindowPos3iv glWindowPos3s glWindowPos3sv - glActiveStencilFaceEXT - glDeleteFencesNV - glGenFencesNV - glIsFenceNV - glTestFenceNV - glGetFenceivNV - glFinishFenceNV - glSetFenceNV glVertexAttrib1sARB glVertexAttrib1fARB glVertexAttrib1dARB @@ -916,7 +804,6 @@ EXPORTS glIsBufferARB glMapBufferARB glUnmapBufferARB - glDepthBoundsEXT glGenQueriesARB glDeleteQueriesARB glIsQueryARB @@ -991,6 +878,7 @@ EXPORTS _mesa_compressed_texture_size _mesa_create_framebuffer _mesa_create_visual + _mesa_delete_array_object _mesa_delete_buffer_object _mesa_delete_program _mesa_delete_texture_object @@ -1016,14 +904,17 @@ EXPORTS _mesa_make_current _mesa_memcpy _mesa_memset + _mesa_new_array_object _mesa_new_buffer_object _mesa_new_framebuffer _mesa_new_program _mesa_new_query_object + _mesa_new_renderbuffer _mesa_new_soft_renderbuffer _mesa_new_texture_image _mesa_new_texture_object _mesa_problem + _mesa_remove_renderbuffer _mesa_render_texture _mesa_ResizeBuffersMESA _mesa_resize_framebuffer @@ -1041,6 +932,7 @@ EXPORTS _mesa_store_texsubimage3d _mesa_strcmp _mesa_test_proxy_teximage + _mesa_update_framebuffer_visual _mesa_Viewport _swrast_Accum _swrast_Bitmap @@ -1064,7 +956,6 @@ EXPORTS _swrast_DestroyContext _swrast_InvalidateState _swrast_ReadPixels -; _swrast_zbuffer_address _swsetup_Wakeup _swsetup_CreateContext _swsetup_DestroyContext diff --git a/src/mesa/drivers/windows/gdi/wgl.c b/src/mesa/drivers/windows/gdi/wgl.c index 2eef039d5d8..197de0743c7 100644 --- a/src/mesa/drivers/windows/gdi/wgl.c +++ b/src/mesa/drivers/windows/gdi/wgl.c @@ -1,4 +1,4 @@ -/* $Id: wgl.c,v 1.12 2006-03-30 07:58:24 kschultz Exp $ */ +/* $Id: wgl.c,v 1.12 2006/03/30 07:58:24 kschultz Exp $ */ /* * This library is free software; you can redistribute it and/or diff --git a/src/mesa/drivers/windows/gdi/wmesa.c b/src/mesa/drivers/windows/gdi/wmesa.c index 8e861da5081..78e3d9ec8d1 100644 --- a/src/mesa/drivers/windows/gdi/wmesa.c +++ b/src/mesa/drivers/windows/gdi/wmesa.c @@ -1321,7 +1321,7 @@ void WMesaMakeCurrent(WMesaContext c, HDC hdc) /* return if already current */ GET_CURRENT_CONTEXT(ctx); WMesaContext pwc = wmesa_context(ctx); - if (c == pwc && pwc->hDC == hdc) + if (pwc && c == pwc && pwc->hDC == hdc) return; } @@ -1397,3 +1397,53 @@ void WMesaSwapBuffers( HDC hdc ) _mesa_problem(NULL, "wmesa: can't swap non-current window"); } } + +/* This is hopefully a temporary hack to define some needed dispatch + * table entries. Hopefully, I'll find a better solution. The + * dispatch table generation scripts ought to be making these dummy + * stubs as well. */ +void gl_dispatch_stub_543(void){}; +void gl_dispatch_stub_544(void){}; +void gl_dispatch_stub_545(void){}; +void gl_dispatch_stub_546(void){}; +void gl_dispatch_stub_547(void){}; +void gl_dispatch_stub_548(void){}; +void gl_dispatch_stub_549(void){}; +void gl_dispatch_stub_550(void){}; +void gl_dispatch_stub_551(void){}; +void gl_dispatch_stub_552(void){}; +void gl_dispatch_stub_553(void){}; +void gl_dispatch_stub_554(void){}; +void gl_dispatch_stub_555(void){}; +void gl_dispatch_stub_556(void){}; +void gl_dispatch_stub_557(void){}; +void gl_dispatch_stub_558(void){}; +void gl_dispatch_stub_559(void){}; +void gl_dispatch_stub_560(void){}; +void gl_dispatch_stub_561(void){}; +void gl_dispatch_stub_565(void){}; +void gl_dispatch_stub_566(void){}; +void gl_dispatch_stub_577(void){}; +void gl_dispatch_stub_578(void){}; +void gl_dispatch_stub_603(void){}; +void gl_dispatch_stub_645(void){}; +void gl_dispatch_stub_646(void){}; +void gl_dispatch_stub_647(void){}; +void gl_dispatch_stub_648(void){}; +void gl_dispatch_stub_649(void){}; +void gl_dispatch_stub_650(void){}; +void gl_dispatch_stub_651(void){}; +void gl_dispatch_stub_652(void){}; +void gl_dispatch_stub_653(void){}; +void gl_dispatch_stub_734(void){}; +void gl_dispatch_stub_735(void){}; +void gl_dispatch_stub_736(void){}; +void gl_dispatch_stub_737(void){}; +void gl_dispatch_stub_738(void){}; +void gl_dispatch_stub_745(void){}; +void gl_dispatch_stub_746(void){}; +void gl_dispatch_stub_760(void){}; +void gl_dispatch_stub_761(void){}; +void gl_dispatch_stub_766(void){}; +void gl_dispatch_stub_767(void){}; +void gl_dispatch_stub_768(void){}; diff --git a/src/mesa/drivers/windows/gldirect/gld_debug_clip.c b/src/mesa/drivers/windows/gldirect/gld_debug_clip.c index 3000cd07d2f..1eb19ca84b0 100644 --- a/src/mesa/drivers/windows/gldirect/gld_debug_clip.c +++ b/src/mesa/drivers/windows/gldirect/gld_debug_clip.c @@ -1,4 +1,4 @@ -/* $Id: gld_debug_clip.c,v 1.1 2004-04-20 11:13:11 alanh Exp $ */ +/* $Id: gld_debug_clip.c,v 1.1 2004/04/20 11:13:11 alanh Exp $ */ /* * Mesa 3-D graphics library diff --git a/src/mesa/drivers/windows/gldirect/gld_debug_norm.c b/src/mesa/drivers/windows/gldirect/gld_debug_norm.c index 8b7523ff5f3..00c428bd262 100644 --- a/src/mesa/drivers/windows/gldirect/gld_debug_norm.c +++ b/src/mesa/drivers/windows/gldirect/gld_debug_norm.c @@ -1,4 +1,4 @@ -/* $Id: gld_debug_norm.c,v 1.1 2004-04-20 11:13:11 alanh Exp $ */ +/* $Id: gld_debug_norm.c,v 1.1 2004/04/20 11:13:11 alanh Exp $ */ /* * Mesa 3-D graphics library diff --git a/src/mesa/drivers/windows/gldirect/gld_debug_xform.c b/src/mesa/drivers/windows/gldirect/gld_debug_xform.c index 5cbea2617ea..d6e64b8ffdf 100644 --- a/src/mesa/drivers/windows/gldirect/gld_debug_xform.c +++ b/src/mesa/drivers/windows/gldirect/gld_debug_xform.c @@ -1,4 +1,4 @@ -/* $Id: gld_debug_xform.c,v 1.1 2004-04-20 11:13:11 alanh Exp $ */ +/* $Id: gld_debug_xform.c,v 1.1 2004/04/20 11:13:11 alanh Exp $ */ /* * Mesa 3-D graphics library diff --git a/src/mesa/drivers/x11/fakeglx.c b/src/mesa/drivers/x11/fakeglx.c index 259a2950425..bc779a94a9e 100644 --- a/src/mesa/drivers/x11/fakeglx.c +++ b/src/mesa/drivers/x11/fakeglx.c @@ -1,6 +1,6 @@ /* * Mesa 3-D graphics library - * Version: 6.5 + * Version: 6.5.1 * * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. * @@ -80,7 +80,7 @@ "GLX_ARB_get_proc_address " \ "GLX_EXT_visual_info " \ "GLX_EXT_visual_rating " \ - "GLX_SGI_video_sync " \ + /*"GLX_SGI_video_sync "*/ \ "GLX_SGIX_fbconfig " \ "GLX_SGIX_pbuffer " @@ -916,6 +916,81 @@ choose_x_overlay_visual( Display *dpy, int scr, GLboolean rgbFlag, /**********************************************************************/ +/*** Display-related functions ***/ +/**********************************************************************/ + + +/** + * Free all XMesaVisuals which are associated with the given display. + */ +static void +destroy_visuals_on_display(Display *dpy) +{ + int i; + for (i = 0; i < NumVisuals; i++) { + if (VisualTable[i]->display == dpy) { + /* remove this visual */ + int j; + free(VisualTable[i]); + for (j = i; j < NumVisuals - 1; j++) + VisualTable[j] = VisualTable[j + 1]; + NumVisuals--; + } + } +} + + +/** + * Called from XCloseDisplay() to let us free our display-related data. + */ +static int +close_display_callback(Display *dpy, XExtCodes *codes) +{ + destroy_visuals_on_display(dpy); + xmesa_destroy_buffers_on_display(dpy); + return 0; +} + + +/** + * Look for the named extension on given display and return a pointer + * to the _XExtension data, or NULL if extension not found. + */ +static _XExtension * +lookup_extension(Display *dpy, const char *extName) +{ + _XExtension *ext; + for (ext = dpy->ext_procs; ext; ext = ext->next) { + if (ext->name && strcmp(ext->name, extName) == 0) { + return ext; + } + } + return NULL; +} + + +/** + * Whenever we're given a new Display pointer, call this function to + * register our close_display_callback function. + */ +static void +register_with_display(Display *dpy) +{ + const char *extName = "MesaGLX"; + _XExtension *ext; + + ext = lookup_extension(dpy, extName); + if (!ext) { + XExtCodes *c = XAddExtension(dpy); + ext = dpy->ext_procs; /* new extension is at head of list */ + assert(c->extension == ext->codes.extension); + ext->name = _mesa_strdup(extName); + ext->close_display = close_display_callback; + } +} + + +/**********************************************************************/ /*** Begin Fake GLX API Functions ***/ /**********************************************************************/ @@ -1264,7 +1339,12 @@ choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig ) static XVisualInfo * Fake_glXChooseVisual( Display *dpy, int screen, int *list ) { - XMesaVisual xmvis = choose_visual(dpy, screen, list, GL_FALSE); + XMesaVisual xmvis; + + /* register ourselves as an extension on this display */ + register_with_display(dpy); + + xmvis = choose_visual(dpy, screen, list, GL_FALSE); if (xmvis) { #if 0 return xmvis->vishandle; @@ -1298,7 +1378,9 @@ Fake_glXCreateContext( Display *dpy, XVisualInfo *visinfo, return 0; /* deallocate unused windows/buffers */ +#if 0 XMesaGarbageCollect(); +#endif xmvis = find_glx_visual( dpy, visinfo ); if (!xmvis) { @@ -2274,19 +2356,26 @@ Fake_glXSwapIntervalSGI(int interval) /*** GLX_SGI_video_sync ***/ +static unsigned int FrameCounter = 0; + static int Fake_glXGetVideoSyncSGI(unsigned int *count) { - (void) count; + /* this is a bogus implementation */ + *count = FrameCounter++; return 0; } static int Fake_glXWaitVideoSyncSGI(int divisor, int remainder, unsigned int *count) { - (void) divisor; - (void) remainder; - (void) count; + if (divisor <= 0 || remainder < 0) + return GLX_BAD_VALUE; + /* this is a bogus implementation */ + FrameCounter++; + while (FrameCounter % divisor != remainder) + FrameCounter++; + *count = FrameCounter; return 0; } diff --git a/src/mesa/drivers/x11/glxapi.c b/src/mesa/drivers/x11/glxapi.c index 7ce607a69c0..4f3dca8f484 100644 --- a/src/mesa/drivers/x11/glxapi.c +++ b/src/mesa/drivers/x11/glxapi.c @@ -588,8 +588,8 @@ glXGetVideoSyncSGI(unsigned int *count) struct _glxapi_table *t; Display *dpy = glXGetCurrentDisplay(); GET_DISPATCH(dpy, t); - if (!t) - return 0; + if (!t || !glXGetCurrentContext()) + return GLX_BAD_CONTEXT; return (t->GetVideoSyncSGI)(count); } @@ -599,8 +599,8 @@ glXWaitVideoSyncSGI(int divisor, int remainder, unsigned int *count) struct _glxapi_table *t; Display *dpy = glXGetCurrentDisplay(); GET_DISPATCH(dpy, t); - if (!t) - return 0; + if (!t || !glXGetCurrentContext()) + return GLX_BAD_CONTEXT; return (t->WaitVideoSyncSGI)(divisor, remainder, count); } @@ -614,7 +614,7 @@ glXMakeCurrentReadSGI(Display *dpy, GLXDrawable draw, GLXDrawable read, GLXConte struct _glxapi_table *t; GET_DISPATCH(dpy, t); if (!t) - return 0; + return False; return (t->MakeCurrentReadSGI)(dpy, draw, read, ctx); } diff --git a/src/mesa/drivers/x11/glxheader.h b/src/mesa/drivers/x11/glxheader.h index 34b613a1bc4..844a7838da3 100644 --- a/src/mesa/drivers/x11/glxheader.h +++ b/src/mesa/drivers/x11/glxheader.h @@ -1,9 +1,8 @@ - /* * Mesa 3-D graphics library - * Version: 4.1 + * Version: 6.5.1 * - * Copyright (C) 1999-2002 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -38,12 +37,12 @@ # include "resource.h" # include "windowstr.h" # include "gcstruct.h" -# include "GL/xf86glx.h" # include "xf86glx_util.h" #else # include <X11/Xlib.h> +# include <X11/Xlibint.h> # include <X11/Xutil.h> # ifdef USE_XSHM /* was SHM */ # include <sys/ipc.h> diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c index d2e2c7137b5..dea23fcf59b 100644 --- a/src/mesa/drivers/x11/xm_api.c +++ b/src/mesa/drivers/x11/xm_api.c @@ -1,6 +1,6 @@ /* * Mesa 3-D graphics library - * Version: 6.5 + * Version: 6.5.1 * * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. * @@ -2553,6 +2553,20 @@ XMesaBuffer XMesaFindBuffer( XMesaDisplay *dpy, XMesaDrawable d ) } +/** + * Free/destroy all XMesaBuffers associated with given display. + */ +void xmesa_destroy_buffers_on_display(XMesaDisplay *dpy) +{ + XMesaBuffer b, next; + for (b = XMesaBufferList; b; b = next) { + next = b->Next; + if (b->display == dpy) { + free_xmesa_buffer(0, b); + } + } +} + /* * Look for XMesaBuffers whose X window has been destroyed. diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c index d339ac6bc39..11d323082e6 100644 --- a/src/mesa/drivers/x11/xm_dd.c +++ b/src/mesa/drivers/x11/xm_dd.c @@ -1,6 +1,6 @@ /* * Mesa 3-D graphics library - * Version: 6.5 + * Version: 6.5.1 * * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. * @@ -620,6 +620,7 @@ xmesa_DrawPixels_8R8G8B( GLcontext *ctx, _swrast_validate_derived( ctx ); if (xrb->pixmap && + xrb->Base.AlphaBits == 0 && format == GL_BGRA && type == GL_UNSIGNED_BYTE && (swrast->_RasterMask & ~CLIP_BIT) == 0 && /* no blend, z-test, etc */ diff --git a/src/mesa/drivers/x11/xmesaP.h b/src/mesa/drivers/x11/xmesaP.h index bcac0557b98..4db0872621f 100644 --- a/src/mesa/drivers/x11/xmesaP.h +++ b/src/mesa/drivers/x11/xmesaP.h @@ -28,7 +28,6 @@ #ifdef XFree86Server -# include "GL/xf86glx.h" # include "xf86glx_util.h" #elif defined(USE_XSHM) # include <X11/extensions/XShm.h> @@ -521,6 +520,8 @@ extern void xmesa_set_renderbuffer_funcs(struct xmesa_renderbuffer *xrb, enum pixel_format pixelformat, GLint depth); +extern void xmesa_destroy_buffers_on_display(XMesaDisplay *dpy); + /** * Using a function instead of an ordinary cast is safer. |