diff options
-rw-r--r-- | src/mesa/drivers/dri/r300/Makefile | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_cmdbuf.c | 46 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_context.c | 96 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_context.h | 57 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_ioctl.c | 124 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_maos.c | 396 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_maos.h | 11 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_reg.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_render.c | 280 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_state.c | 30 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_state.h | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_texstate.c | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_vertexprog.c | 18 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/radeon_mm.c | 339 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/radeon_mm.h | 35 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/radeon_vtxfmt_a.c | 610 |
16 files changed, 1898 insertions, 156 deletions
diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 02ef3214776..29dd20860c8 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -25,6 +25,8 @@ DRIVER_SOURCES = \ radeon_lock.c \ radeon_span.c \ radeon_state.c \ + radeon_mm.c \ + radeon_vtxfmt_a.c \ \ r300_context.c \ r300_ioctl.c \ diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index f24ebe1a91c..94c7031baae 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -128,9 +128,10 @@ int r300FlushCmdBuf(r300ContextPtr r300, const char* caller) } -static void print_state_atom(struct r300_state_atom *state, int dwords) +void r300_print_state_atom(r300ContextPtr r300, struct r300_state_atom *state) { int i; + int dwords = (*state->check)(r300, state); fprintf(stderr, " emit %s/%d/%d\n", state->name, dwords, state->cmd_size); @@ -159,7 +160,7 @@ static __inline__ void r300DoEmitState(r300ContextPtr r300, GLboolean dirty) int dwords = (*atom->check)(r300, atom); if (dwords) - print_state_atom(atom, dwords); + r300_print_state_atom(r300, atom); else fprintf(stderr, " skip state %s\n", atom->name); @@ -567,7 +568,7 @@ void r300EmitBlit(r300ContextPtr rmesa, GLint srcx, GLint srcy, GLint dstx, GLint dsty, GLuint w, GLuint h) { - drm_radeon_cmd_header_t *cmd; + drm_r300_cmd_header_t *cmd; if (RADEON_DEBUG & DEBUG_IOCTL) fprintf(stderr, @@ -582,14 +583,13 @@ void r300EmitBlit(r300ContextPtr rmesa, assert(w < (1 << 16)); assert(h < (1 << 16)); - cmd = - (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 8, + cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 8, __FUNCTION__); cmd[0].header.cmd_type = R300_CMD_PACKET3; cmd[0].header.pad0 = R300_CMD_PACKET3_RAW; - cmd[1].i = R200_CP_CMD_BITBLT_MULTI | (5 << 16); - cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL | + cmd[1].u = R200_CP_CMD_BITBLT_MULTI | (5 << 16); + cmd[2].u = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL | RADEON_GMC_DST_PITCH_OFFSET_CNTL | RADEON_GMC_BRUSH_NONE | (color_fmt << 8) | @@ -598,28 +598,24 @@ void r300EmitBlit(r300ContextPtr rmesa, RADEON_DP_SRC_SOURCE_MEMORY | RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS); - cmd[3].i = ((src_pitch / 64) << 22) | (src_offset >> 10); - cmd[4].i = ((dst_pitch / 64) << 22) | (dst_offset >> 10); - cmd[5].i = (srcx << 16) | srcy; - cmd[6].i = (dstx << 16) | dsty; /* dst */ - cmd[7].i = (w << 16) | h; + cmd[3].u = ((src_pitch / 64) << 22) | (src_offset >> 10); + cmd[4].u = ((dst_pitch / 64) << 22) | (dst_offset >> 10); + cmd[5].u = (srcx << 16) | srcy; + cmd[6].u = (dstx << 16) | dsty; /* dst */ + cmd[7].u = (w << 16) | h; } void r300EmitWait(r300ContextPtr rmesa, GLuint flags) { - if (rmesa->radeon.dri.drmMinor >= 6) { - drm_radeon_cmd_header_t *cmd; - - assert(!(flags & ~(R300_WAIT_2D | R300_WAIT_3D))); - - cmd = - (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, - 1, - __FUNCTION__); - cmd[0].i = 0; - cmd[0].wait.cmd_type = R300_CMD_WAIT; - cmd[0].wait.flags = flags; - } + drm_r300_cmd_header_t *cmd; + + assert(!(flags & ~(R300_WAIT_2D | R300_WAIT_3D))); + + cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, + __FUNCTION__); + cmd[0].u = 0; + cmd[0].wait.cmd_type = R300_CMD_WAIT; + cmd[0].wait.flags = flags; } void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index db8b0a6e11c..07eaaedfbe8 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -59,6 +59,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_ioctl.h" #include "r300_tex.h" +#ifdef USER_BUFFERS +#include "radeon_mm.h" +#endif + #include "vblank.h" #include "utils.h" #include "xmlpool.h" /* for symbolic values of enum-type options */ @@ -83,8 +87,7 @@ const struct dri_extension card_extensions[] = { {"GL_ARB_multitexture", NULL}, {"GL_ARB_texture_border_clamp", NULL}, {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions}, -/* disable until we support it, fixes a few things in ut2004 */ -/* {"GL_ARB_texture_cube_map", NULL}, */ + {"GL_ARB_texture_cube_map", NULL}, {"GL_ARB_texture_env_add", NULL}, {"GL_ARB_texture_env_combine", NULL}, {"GL_ARB_texture_env_crossbar", NULL}, @@ -155,75 +158,6 @@ static const struct tnl_pipeline_stage *r300_pipeline[] = { 0, }; -static void r300BufferData(GLcontext *ctx, GLenum target, GLsizeiptrARB size, - const GLvoid *data, GLenum usage, struct gl_buffer_object *obj) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - drm_radeon_mem_alloc_t alloc; - int offset, ret; - - /* Free previous buffer */ - if (obj->OnCard) { - drm_radeon_mem_free_t memfree; - - memfree.region = RADEON_MEM_REGION_GART; - memfree.region_offset = (char *)obj->Data - (char *)rmesa->radeon.radeonScreen->gartTextures.map; - - ret = drmCommandWrite(rmesa->radeon.radeonScreen->driScreen->fd, - DRM_RADEON_FREE, &memfree, sizeof(memfree)); - - if (ret) { - WARN_ONCE("Failed to free GART memroy!\n"); - } - obj->OnCard = GL_FALSE; - } - - alloc.region = RADEON_MEM_REGION_GART; - alloc.alignment = 4; - alloc.size = size; - alloc.region_offset = &offset; - - ret = drmCommandWriteRead( rmesa->radeon.dri.fd, DRM_RADEON_ALLOC, &alloc, sizeof(alloc)); - if (ret) { - WARN_ONCE("Ran out of GART memory!\n"); - obj->Data = NULL; - _mesa_buffer_data(ctx, target, size, data, usage, obj); - return ; - } - obj->Data = ((GLubyte *)rmesa->radeon.radeonScreen->gartTextures.map) + offset; - - if (data) - memcpy(obj->Data, data, size); - - obj->Size = size; - obj->Usage = usage; - obj->OnCard = GL_TRUE; -#if 0 - fprintf(stderr, "allocated %d bytes at %p, offset=%d\n", size, obj->Data, offset); -#endif -} - -static void r300DeleteBuffer(GLcontext *ctx, struct gl_buffer_object *obj) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - - if(r300IsGartMemory(rmesa, obj->Data, obj->Size)){ - drm_radeon_mem_free_t memfree; - int ret; - - memfree.region = RADEON_MEM_REGION_GART; - memfree.region_offset = (char *)obj->Data - (char *)rmesa->radeon.radeonScreen->gartTextures.map; - - ret = drmCommandWrite(rmesa->radeon.radeonScreen->driScreen->fd, - DRM_RADEON_FREE, &memfree, sizeof(memfree)); - - if(ret){ - WARN_ONCE("Failed to free GART memroy!\n"); - } - obj->Data = NULL; - } - _mesa_delete_buffer_object(ctx, obj); -} /* Create the device specific rendering context. */ @@ -263,13 +197,14 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, r300InitTextureFuncs(&functions); r300InitShaderFuncs(&functions); -#if 0 /* Needs various Mesa changes... */ +#ifdef USER_BUFFERS + radeon_mm_init(r300); +#endif +#ifdef HW_VBOS if (hw_tcl_on) { - functions.BufferData = r300BufferData; - functions.DeleteBuffer = r300DeleteBuffer; + r300_init_vbo_funcs(&functions); } -#endif - +#endif if (!radeonInitContext(&r300->radeon, &functions, glVisual, driContextPriv, sharedContextPrivate)) { FREE(r300); @@ -331,6 +266,11 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, ctx->Const.MinLineWidthAA = 1.0; ctx->Const.MaxLineWidth = R300_LINESIZE_MAX; ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX; + +#ifdef USER_BUFFERS + /* Needs further modifications */ + //ctx->Const.MaxArrayLockSize = (/*512*/RADEON_BUFFER_SIZE*16*1024) / (4*4); +#endif /* Initialize the software rasterizer and helper modules. */ @@ -391,6 +331,10 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, radeonInitSpanFuncs(ctx); r300InitCmdBuf(r300); r300InitState(r300); + +#ifdef RADEON_VTXFMT_A + radeon_init_vtxfmt_a(r300); +#endif #if 0 /* plug in a few more device driver functions */ diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index b4ba612af6b..cb2e4bb3218 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -48,6 +48,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_context.h" #define USE_ARB_F_P 1 +//#define USER_BUFFERS +//#define RADEON_VTXFMT_A +//#define HW_VBOS +//#define OPTIMIZE_ELTS struct r300_context; typedef struct r300_context r300ContextRec; @@ -109,12 +113,15 @@ static __inline__ uint32_t r300PackFloat32(float fl) struct r300_dma_buffer { int refcount; /* the number of retained regions in buf */ drmBufPtr buf; + int id; }; - +#ifdef USER_BUFFERS +#define GET_START(rvb) (r300GartOffsetFromVirtual(rmesa, (rvb)->address+(rvb)->start)) +#else #define GET_START(rvb) (rmesa->radeon.radeonScreen->gart_buffer_offset + \ (rvb)->address - rmesa->dma.buf0_address + \ (rvb)->start) - +#endif /* A retained region, eg vertices for indexed vertices. */ struct r300_dma_region { @@ -738,6 +745,30 @@ struct r300_pixel_shader_state { #define REG_COLOR0 1 #define REG_TEX0 2 +#ifdef USER_BUFFERS +struct dt { + GLint size; + GLenum type; + GLsizei stride; + void *data; +}; + +struct radeon_vertex_buffer { + int Count; + void *Elts; + int elt_size; + int elt_min, elt_max; /* debug */ + + struct dt AttribPtr[VERT_ATTRIB_MAX]; + + struct tnl_prim *Primitive; + GLuint PrimitiveCount; + GLint LockFirst; + GLsizei LockCount; + int lock_uptodate; +}; +#endif + struct r300_aos_rec { GLuint offset; int element_size; /* in dwords */ @@ -761,6 +792,9 @@ struct r300_state { #endif struct r300_dma_region aos[R300_MAX_AOS_ARRAYS]; int aos_count; +#ifdef USER_BUFFERS + struct radeon_vertex_buffer VB; +#endif GLuint *Elts; struct r300_dma_region elt_dma; @@ -815,8 +849,19 @@ struct r300_context { GLuint TexGenInputs; GLuint TexGenCompSel; GLmatrix tmpmat; +#ifdef USER_BUFFERS + key_t mm_ipc_key; + int mm_shm_id; + int mm_sem_id; + struct radeon_memory_manager *rmm; +#endif }; +struct r300_buffer_object { + struct gl_buffer_object mesa_obj; + int id; +}; + #define R300_CONTEXT(ctx) ((r300ContextPtr)(ctx->DriverCtx)) static __inline GLuint r300PackColor( GLuint cpp, @@ -841,4 +886,12 @@ void translate_vertex_shader(struct r300_vertex_program *vp); extern void r300InitShaderFuncs(struct dd_function_table *functions); extern void r300VertexProgUpdateParams(GLcontext *ctx, struct r300_vertex_program *vp); +#ifdef RADEON_VTXFMT_A +extern void radeon_init_vtxfmt_a(r300ContextPtr rmesa); +#endif + +#ifdef HW_VBOS +extern void r300_init_vbo_funcs(struct dd_function_table *functions); +#endif + #endif /* __R300_CONTEXT_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 82379ebb371..56a5ec9473a 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -71,6 +71,8 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) r300ContextPtr rmesa=r300; LOCAL_VARS; #else + r300ContextPtr rmesa=r300; + LOCAL_VARS; int i; #endif @@ -261,6 +263,11 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) r300EmitState(r300); #else +#if 1 + cp_wait(r300, R300_WAIT_3D | R300_WAIT_3D_CLEAN); + end_3d(PASS_PREFIX_VOID); +#endif + R300_STATECHANGE(r300, cb); reg_start(R300_RB3D_COLOROFFSET0, 0); e32(cboffset); @@ -270,6 +277,9 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) else cbpitch |= R300_COLOR_FORMAT_RGB565; + if (r300->radeon.sarea->tiling_enabled) + cbpitch |= R300_COLOR_TILE_ENABLE; + reg_start(R300_RB3D_COLORPITCH0, 0); e32(cbpitch); @@ -352,6 +362,15 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) cmd2[7].u = r300PackFloat32(ctx->Color.ClearColor[2]); cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]); +#if 1 + reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); + e32(0x0000000a); + + + reg_start(0x4f18,0); + e32(0x00000003); + cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN); +#endif } #ifdef CB_DPATH @@ -422,7 +441,7 @@ static void r300EmitClearState(GLcontext * ctx) R300_STATECHANGE(r300, rc); /* The second constant is needed to get glxgears display anything .. */ reg_start(R300_RS_CNTL_0, 1); - e32(R300_RS_CNTL_0_UNKNOWN_7 | R300_RS_CNTL_0_UNKNOWN_18); + e32((1 << R300_RS_CNTL_CI_CNT_SHIFT) | R300_RS_CNTL_0_UNKNOWN_18); e32(0); R300_STATECHANGE(r300, rr); @@ -477,6 +496,8 @@ static void r300EmitClearState(GLcontext * ctx) e32(VP_ZERO()); e32(0); + /*reg_start(0x4500,0); + e32(2560-1);*/ } #endif @@ -561,6 +582,7 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask, GLboolean all, #endif } + void r300Flush(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); @@ -572,6 +594,104 @@ void r300Flush(GLcontext * ctx) r300FlushCmdBuf(r300, __FUNCTION__); } +#ifdef USER_BUFFERS +#include "radeon_mm.h" + +void r300RefillCurrentDmaRegion(r300ContextPtr rmesa) +{ + struct r300_dma_buffer *dmabuf; + int fd = rmesa->radeon.dri.fd; + int index = 0; + int size = 0; + drmDMAReq dma; + int ret; + + if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (rmesa->dma.flush) { + rmesa->dma.flush(rmesa); + } + + if (rmesa->dma.current.buf) + r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__); + + if (rmesa->dma.nr_released_bufs > 4) + r300FlushCmdBuf(rmesa, __FUNCTION__); + + dmabuf = CALLOC_STRUCT(r300_dma_buffer); + dmabuf->buf = (void *)1; /* hack */ + dmabuf->refcount = 1; + + dmabuf->id = radeon_mm_alloc(rmesa, 4, RADEON_BUFFER_SIZE*16); + + rmesa->dma.current.buf = dmabuf; + rmesa->dma.current.address = radeon_mm_ptr(rmesa, dmabuf->id); + rmesa->dma.current.end = RADEON_BUFFER_SIZE*16; + rmesa->dma.current.start = 0; + rmesa->dma.current.ptr = 0; +} + +void r300ReleaseDmaRegion(r300ContextPtr rmesa, + struct r300_dma_region *region, const char *caller) +{ + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); + + if (!region->buf) + return; + + if (rmesa->dma.flush) + rmesa->dma.flush(rmesa); + + if (--region->buf->refcount == 0) { + radeon_mm_free(rmesa, region->buf->id); + FREE(region->buf); + rmesa->dma.nr_released_bufs++; + } + + region->buf = 0; + region->start = 0; +} + +/* Allocates a region from rmesa->dma.current. If there isn't enough + * space in current, grab a new buffer (and discard what was left of current) + */ +void r300AllocDmaRegion(r300ContextPtr rmesa, + struct r300_dma_region *region, + int bytes, int alignment) +{ + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); + + if (rmesa->dma.flush) + rmesa->dma.flush(rmesa); + + if (region->buf) + r300ReleaseDmaRegion(rmesa, region, __FUNCTION__); + + alignment--; + rmesa->dma.current.start = rmesa->dma.current.ptr = + (rmesa->dma.current.ptr + alignment) & ~alignment; + + if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end) + r300RefillCurrentDmaRegion(rmesa); + + region->start = rmesa->dma.current.start; + region->ptr = rmesa->dma.current.start; + region->end = rmesa->dma.current.start + bytes; + region->address = rmesa->dma.current.address; + region->buf = rmesa->dma.current.buf; + region->buf->refcount++; + + rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */ + rmesa->dma.current.start = + rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7; + + assert(rmesa->dma.current.ptr <= rmesa->dma.current.end); +} + +#else void r300RefillCurrentDmaRegion(r300ContextPtr rmesa) { struct r300_dma_buffer *dmabuf; @@ -714,6 +834,8 @@ void r300AllocDmaRegion(r300ContextPtr rmesa, assert(rmesa->dma.current.ptr <= rmesa->dma.current.end); } +#endif + /* Called via glXGetMemoryOffsetMESA() */ GLuint r300GetMemoryOffsetMESA(__DRInativeDisplay * dpy, int scrn, const GLvoid * pointer) diff --git a/src/mesa/drivers/dri/r300/r300_maos.c b/src/mesa/drivers/dri/r300/r300_maos.c index 1908ac07acd..52827c2d5dc 100644 --- a/src/mesa/drivers/dri/r300/r300_maos.c +++ b/src/mesa/drivers/dri/r300/r300_maos.c @@ -50,6 +50,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_maos.h" #include "r300_ioctl.h" +#ifdef USER_BUFFERS +#include "radeon_mm.h" +#endif + #define DEBUG_ALL DEBUG_VERTS @@ -173,15 +177,11 @@ static void emit_vector(GLcontext * ctx, fprintf(stderr, "%s count %d size %d stride %d\n", __FUNCTION__, count, size, stride); - if(r300IsGartMemory(rmesa, data, size*stride)){ - rvb->address = rmesa->radeon.radeonScreen->gartTextures.map; - rvb->start = (char *)data - rvb->address; + if(r300IsGartMemory(rmesa, data, /*(count-1)*stride */ 4)){ + rvb->address = data; + rvb->start = 0; rvb->aos_offset = r300GartOffsetFromVirtual(rmesa, data); - - if(stride == 0) - rvb->aos_stride = 0; - else - rvb->aos_stride = stride / 4; + rvb->aos_stride = stride / 4 ; rvb->aos_size = size; return; @@ -226,26 +226,30 @@ static void emit_vector(GLcontext * ctx, } -void r300EmitElts(GLcontext * ctx, GLuint *elts, unsigned long n_elts) +void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts, int elt_size) { r300ContextPtr rmesa = R300_CONTEXT(ctx); struct r300_dma_region *rvb=&rmesa->state.elt_dma; - unsigned short int *out; - int i; + void *out; - if(r300IsGartMemory(rmesa, elts, n_elts*sizeof(unsigned short int))){ + assert(elt_size == 2 || elt_size == 4); + + if(r300IsGartMemory(rmesa, elts, n_elts * elt_size)){ rvb->address = rmesa->radeon.radeonScreen->gartTextures.map; - rvb->start = (char *)elts - rvb->address; + rvb->start = ((char *)elts) - rvb->address; rvb->aos_offset = rmesa->radeon.radeonScreen->gart_texture_offset + rvb->start; + return ; + }else if(r300IsGartMemory(rmesa, elts, 1)){ + WARN_ONCE("Pointer not within GART memory!\n"); + exit(1); } - r300AllocDmaRegion(rmesa, rvb, n_elts*sizeof(unsigned short int), 2); + r300AllocDmaRegion(rmesa, rvb, n_elts * elt_size, elt_size); + rvb->aos_offset = GET_START(rvb); - out = (unsigned short int *)(rvb->address + rvb->start); - - for(i=0; i < n_elts; i++) - out[i]=(unsigned short int)elts[i]; + out = rvb->address + rvb->start; + memcpy(out, elts, n_elts * elt_size); } /* Emit vertex data to GART memory (unless immediate mode) @@ -549,6 +553,362 @@ void r300EmitArrays(GLcontext * ctx, GLboolean immd) rmesa->state.aos_count = nr; } +#ifdef RADEON_VTXFMT_A +void r300EmitArraysVtx(GLcontext * ctx, GLboolean immd) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + r300ContextPtr r300 = rmesa; + struct radeon_vertex_buffer *VB = &rmesa->state.VB; + //struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; + GLuint nr = 0; + GLuint count = VB->Count; + GLuint dw,mask; + GLuint vic_1 = 0; /* R300_VAP_INPUT_CNTL_1 */ + GLuint aa_vap_reg = 0; /* VAP register assignment */ + GLuint i; + GLuint inputs = 0; + + +#define CONFIGURE_AOS(r, f, v, sz, cn) { \ + if (RADEON_DEBUG & DEBUG_STATE) \ + fprintf(stderr, "Enabling "#v "\n"); \ + if (++nr >= R300_MAX_AOS_ARRAYS) { \ + fprintf(stderr, "Aieee! AOS array count exceeded!\n"); \ + exit(-1); \ + } \ + \ + if (hw_tcl_on == GL_FALSE) \ + rmesa->state.aos[nr-1].aos_reg = aa_vap_reg++; \ + rmesa->state.aos[nr-1].aos_format = f; \ + if (immd) { \ + rmesa->state.aos[nr-1].aos_size = 4; \ + rmesa->state.aos[nr-1].aos_stride = 4; \ + rmesa->state.aos[nr-1].aos_offset = 0; \ + } else { \ + emit_vector(ctx, \ + &rmesa->state.aos[nr-1], \ + v.data, \ + sz, \ + v.stride, \ + cn); \ + rmesa->state.vap_reg.r=rmesa->state.aos[nr-1].aos_reg; \ + } \ +} + + if (hw_tcl_on) { + GLuint InputsRead = CURRENT_VERTEX_SHADER(ctx)->InputsRead; + struct r300_vertex_program *prog=(struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); + if (InputsRead & (1<<VERT_ATTRIB_POS)) { + inputs |= _TNL_BIT_POS; + rmesa->state.aos[nr++].aos_reg = prog->inputs[VERT_ATTRIB_POS]; + } + if (InputsRead & (1<<VERT_ATTRIB_NORMAL)) { + inputs |= _TNL_BIT_NORMAL; + rmesa->state.aos[nr++].aos_reg = prog->inputs[VERT_ATTRIB_NORMAL]; + } + if (InputsRead & (1<<VERT_ATTRIB_COLOR0)) { + inputs |= _TNL_BIT_COLOR0; + rmesa->state.aos[nr++].aos_reg = prog->inputs[VERT_ATTRIB_COLOR0]; + } + if (InputsRead & (1<<VERT_ATTRIB_COLOR1)) { + inputs |= _TNL_BIT_COLOR1; + rmesa->state.aos[nr++].aos_reg = prog->inputs[VERT_ATTRIB_COLOR1]; + } + if (InputsRead & (1<<VERT_ATTRIB_FOG)) { + inputs |= _TNL_BIT_FOG; + rmesa->state.aos[nr++].aos_reg = prog->inputs[VERT_ATTRIB_FOG]; + } + if(ctx->Const.MaxTextureUnits > 8) { /* Not sure if this can even happen... */ + fprintf(stderr, "%s: Cant handle that many inputs\n", __FUNCTION__); + exit(-1); + } + for (i=0;i<ctx->Const.MaxTextureUnits;i++) { + if (InputsRead & (1<<(VERT_ATTRIB_TEX0+i))) { + inputs |= _TNL_BIT_TEX0<<i; + rmesa->state.aos[nr++].aos_reg = prog->inputs[VERT_ATTRIB_TEX0+i]; + } + } + nr = 0; + } else { + inputs = TNL_CONTEXT(ctx)->render_inputs; + } + rmesa->state.render_inputs = inputs; + + if (inputs & _TNL_BIT_POS) { + CONFIGURE_AOS(i_coords, AOS_FORMAT_FLOAT, + VB->AttribPtr[VERT_ATTRIB_POS], + immd ? 4 : VB->AttribPtr[VERT_ATTRIB_POS].size, + count); + + vic_1 |= R300_INPUT_CNTL_POS; + } + + if (inputs & _TNL_BIT_NORMAL) { + CONFIGURE_AOS(i_normal, AOS_FORMAT_FLOAT, + VB->AttribPtr[VERT_ATTRIB_NORMAL], + immd ? 4 : VB->AttribPtr[VERT_ATTRIB_NORMAL].size, + count); + + vic_1 |= R300_INPUT_CNTL_NORMAL; + } + + if (inputs & _TNL_BIT_COLOR0) { + int emitsize=4; + + if (!immd) { + if (VB->AttribPtr[VERT_ATTRIB_COLOR0].size == 4 && + (VB->AttribPtr[VERT_ATTRIB_COLOR0].stride != 0 || + ((float*)VB->AttribPtr[VERT_ATTRIB_COLOR0].data)[3] != 1.0)) { + emitsize = 4; + } else { + emitsize = 3; + }//emitsize = VB->AttribPtr[VERT_ATTRIB_COLOR0].size; + } + if(VB->AttribPtr[VERT_ATTRIB_COLOR0].type == GL_UNSIGNED_BYTE) + emitsize = 1; + + CONFIGURE_AOS(i_color[0], VB->AttribPtr[VERT_ATTRIB_COLOR0].type == GL_UNSIGNED_BYTE ? AOS_FORMAT_UBYTE : AOS_FORMAT_FLOAT_COLOR, + VB->AttribPtr[VERT_ATTRIB_COLOR0], + immd ? 4 : emitsize, + count); + + vic_1 |= R300_INPUT_CNTL_COLOR; + } + + if (inputs & _TNL_BIT_COLOR1) { + CONFIGURE_AOS(i_color[1], AOS_FORMAT_FLOAT_COLOR, + VB->AttribPtr[VERT_ATTRIB_COLOR1], + immd ? 4 : VB->AttribPtr[VERT_ATTRIB_COLOR1].size, + count); + } + +#if 0 + if (inputs & _TNL_BIT_FOG) { + CONFIGURE_AOS( AOS_FORMAT_FLOAT, + VB->FogCoordPtr, + immd ? 4 : VB->FogCoordPtr->size, + count); + } +#endif + + r300->state.texture.tc_count = 0; + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (inputs & (_TNL_BIT_TEX0 << i)) { + CONFIGURE_AOS(i_tex[i], AOS_FORMAT_FLOAT, + VB->AttribPtr[VERT_ATTRIB_TEX0+i], + immd ? 4 : VB->AttribPtr[VERT_ATTRIB_TEX0+i].size, + count); + + vic_1 |= R300_INPUT_CNTL_TC0 << i; + r300->state.texture.tc_count++; + } + } + for(i=0; i < nr; i++) + if(r300->state.aos[i].aos_format == 2){ + assert(r300->state.aos[i].aos_size == 1); + r300->state.aos[i].aos_size=5; + } + +#define SHOW_INFO(n) do { \ + if (RADEON_DEBUG & DEBUG_ALL) { \ + fprintf(stderr, "RR[%d] - sz=%d, reg=%d, fmt=%d -- st=%d, of=0x%08x\n", \ + n, \ + r300->state.aos[n].aos_size, \ + r300->state.aos[n].aos_reg, \ + r300->state.aos[n].aos_format, \ + r300->state.aos[n].aos_stride, \ + r300->state.aos[n].aos_offset); \ + } \ +} while(0); + + /* setup INPUT_ROUTE */ + R300_STATECHANGE(r300, vir[0]); + for(i=0;i+1<nr;i+=2){ + SHOW_INFO(i) + SHOW_INFO(i+1) + dw=(r300->state.aos[i].aos_size-1) + | ((r300->state.aos[i].aos_reg)<<8) + | (r300->state.aos[i].aos_format<<14) + | (((r300->state.aos[i+1].aos_size-1) + | ((r300->state.aos[i+1].aos_reg)<<8) + | (r300->state.aos[i+1].aos_format<<14))<<16); + + if(i+2==nr){ + dw|=(1<<(13+16)); + } + r300->hw.vir[0].cmd[R300_VIR_CNTL_0+(i>>1)]=dw; + } + if(nr & 1){ + SHOW_INFO(nr-1) + dw=(r300->state.aos[nr-1].aos_size-1) + | (r300->state.aos[nr-1].aos_format<<14) + | ((r300->state.aos[nr-1].aos_reg)<<8) + | (1<<13); + r300->hw.vir[0].cmd[R300_VIR_CNTL_0+(nr>>1)]=dw; + //fprintf(stderr, "vir0 dw=%08x\n", dw); + } + /* Set the rest of INPUT_ROUTE_0 to 0 */ + //for(i=((count+1)>>1); i<8; i++)r300->hw.vir[0].cmd[R300_VIR_CNTL_0+i]=(0x0); + ((drm_r300_cmd_header_t*)r300->hw.vir[0].cmd)->packet0.count = (nr+1)>>1; + + + /* Mesa assumes that all missing components are from (0, 0, 0, 1) */ +#define ALL_COMPONENTS ((R300_INPUT_ROUTE_SELECT_X<<R300_INPUT_ROUTE_X_SHIFT) \ + | (R300_INPUT_ROUTE_SELECT_Y<<R300_INPUT_ROUTE_Y_SHIFT) \ + | (R300_INPUT_ROUTE_SELECT_Z<<R300_INPUT_ROUTE_Z_SHIFT) \ + | (R300_INPUT_ROUTE_SELECT_W<<R300_INPUT_ROUTE_W_SHIFT)) + +#define ALL_DEFAULT ((R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_X_SHIFT) \ + | (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Y_SHIFT) \ + | (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Z_SHIFT) \ + | (R300_INPUT_ROUTE_SELECT_ONE<<R300_INPUT_ROUTE_W_SHIFT)) + + R300_STATECHANGE(r300, vir[1]); + + for(i=0; i < nr; i++) + if(r300->state.aos[i].aos_format == 2){ + assert(r300->state.aos[i].aos_size == 5); + r300->state.aos[i].aos_size=/*3*/4; /* XXX */ + } + + + for(i=0;i+1<nr;i+=2){ + /* do i first.. */ + mask=(1<<(r300->state.aos[i].aos_size*3))-1; + dw=(ALL_COMPONENTS & mask) + | (ALL_DEFAULT & ~mask) + | R300_INPUT_ROUTE_ENABLE; + + /* i+1 */ + mask=(1<<(r300->state.aos[i+1].aos_size*3))-1; + dw|=( + (ALL_COMPONENTS & mask) + | (ALL_DEFAULT & ~mask) + | R300_INPUT_ROUTE_ENABLE + )<<16; + + //fprintf(stderr, "vir1 dw=%08x\n", dw); + r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(i>>1)]=dw; + } + if(nr & 1){ + mask=(1<<(r300->state.aos[nr-1].aos_size*3))-1; + dw=(ALL_COMPONENTS & mask) + | (ALL_DEFAULT & ~mask) + | R300_INPUT_ROUTE_ENABLE; + r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(nr>>1)]=dw; + //fprintf(stderr, "vir1 dw=%08x\n", dw); + } + /* Set the rest of INPUT_ROUTE_1 to 0 */ + //for(i=((count+1)>>1); i<8; i++)r300->hw.vir[1].cmd[R300_VIR_CNTL_0+i]=0x0; + ((drm_r300_cmd_header_t*)r300->hw.vir[1].cmd)->packet0.count = (nr+1)>>1; + + /* Set up input_cntl */ + /* I don't think this is needed for vertex buffers, but it doesn't hurt anything */ + R300_STATECHANGE(r300, vic); + r300->hw.vic.cmd[R300_VIC_CNTL_0]=0x5555; /* Hard coded value, no idea what it means */ + r300->hw.vic.cmd[R300_VIC_CNTL_1]=vic_1; + + for(i=0; i < nr; i++) + if(r300->state.aos[i].aos_format == 2){ + assert(r300->state.aos[i].aos_size == /*3*/4); /* XXX */ + r300->state.aos[i].aos_size=1; + } +#if 0 + r300->hw.vic.cmd[R300_VIC_CNTL_1]=0; + + if(r300->state.render_inputs & _TNL_BIT_POS) + r300->hw.vic.cmd[R300_VIC_CNTL_1]|=R300_INPUT_CNTL_POS; + + if(r300->state.render_inputs & _TNL_BIT_NORMAL) + r300->hw.vic.cmd[R300_VIC_CNTL_1]|=R300_INPUT_CNTL_NORMAL; + + if(r300->state.render_inputs & _TNL_BIT_COLOR0) + r300->hw.vic.cmd[R300_VIC_CNTL_1]|=R300_INPUT_CNTL_COLOR; + + for(i=0;i < ctx->Const.MaxTextureUnits;i++) + if(r300->state.render_inputs & (_TNL_BIT_TEX0<<i)) + r300->hw.vic.cmd[R300_VIC_CNTL_1]|=(R300_INPUT_CNTL_TC0<<i); +#endif + + /* Stage 3: VAP output */ + + R300_STATECHANGE(r300, vof); + + r300->hw.vof.cmd[R300_VOF_CNTL_0]=0; + r300->hw.vof.cmd[R300_VOF_CNTL_1]=0; + if (hw_tcl_on){ + GLuint OutputsWritten = CURRENT_VERTEX_SHADER(ctx)->OutputsWritten; + + if(OutputsWritten & (1<<VERT_RESULT_HPOS)) + r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + if(OutputsWritten & (1<<VERT_RESULT_COL0)) + r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT; + /*if(OutputsWritten & (1<<VERT_RESULT_COL1)) + r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT; + if(OutputsWritten & (1<<VERT_RESULT_BFC0)) + r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT; + if(OutputsWritten & (1<<VERT_RESULT_BFC1)) + r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;*/ + //if(OutputsWritten & (1<<VERT_RESULT_FOGC)) + + if(OutputsWritten & (1<<VERT_RESULT_PSIZ)) + r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; + + for(i=0;i < ctx->Const.MaxTextureUnits;i++) + if(OutputsWritten & (1<<(VERT_RESULT_TEX0+i))) + r300->hw.vof.cmd[R300_VOF_CNTL_1] |= (4<<(3*i)); + } else { + if(inputs & _TNL_BIT_POS) + r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + if(inputs & _TNL_BIT_COLOR0) + r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT; + if(inputs & _TNL_BIT_COLOR1) + r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT; + + for(i=0;i < ctx->Const.MaxTextureUnits;i++) + if(inputs & (_TNL_BIT_TEX0<<i)) + r300->hw.vof.cmd[R300_VOF_CNTL_1]|=(4<<(3*i)); + } + + rmesa->state.aos_count = nr; +} +#endif + +#ifdef USER_BUFFERS +void r300UseArrays(GLcontext * ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + int i; + + if(rmesa->state.elt_dma.buf) + radeon_mm_use(rmesa, rmesa->state.elt_dma.buf->id); + + for (i=0; i < rmesa->state.aos_count;i++) { + if (rmesa->state.aos[i].buf) + radeon_mm_use(rmesa, rmesa->state.aos[i].buf->id); + } + +#ifdef HW_VBOS + +#define USE_VBO(a) if (ctx->Array.a.BufferObj->Name && ctx->Array.a.Enabled) \ + radeon_mm_use(rmesa, ((struct r300_buffer_object *)ctx->Array.a.BufferObj)->id) + + if (ctx->Array.ElementArrayBufferObj->Name && ctx->Array.ElementArrayBufferObj->OnCard) + radeon_mm_use(rmesa, ((struct r300_buffer_object *)ctx->Array.ElementArrayBufferObj)->id); + + USE_VBO(Vertex); + USE_VBO(Normal); + USE_VBO(Color); + USE_VBO(SecondaryColor); + USE_VBO(FogCoord); + + for (i=0; i < MAX_TEXTURE_COORD_UNITS; i++) + USE_VBO(TexCoord[i]); +#endif + +} +#endif + void r300ReleaseArrays(GLcontext * ctx) { r300ContextPtr rmesa = R300_CONTEXT(ctx); diff --git a/src/mesa/drivers/dri/r300/r300_maos.h b/src/mesa/drivers/dri/r300/r300_maos.h index f76c94a2191..c75589085bf 100644 --- a/src/mesa/drivers/dri/r300/r300_maos.h +++ b/src/mesa/drivers/dri/r300/r300_maos.h @@ -40,8 +40,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_context.h" -extern void r300EmitElts(GLcontext * ctx, GLuint *elts, unsigned long n_elts); +extern void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts, int elt_size); extern void r300EmitArrays(GLcontext * ctx, GLboolean immd); + +#ifdef RADEON_VTXFMT_A +extern void r300EmitArraysVtx(GLcontext * ctx, GLboolean immd); +#endif + +#ifdef USER_BUFFERS +void r300UseArrays(GLcontext * ctx); +#endif + extern void r300ReleaseArrays(GLcontext * ctx); #endif diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 07c453027ba..32cac350fa5 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -728,6 +728,7 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_TX_FORMAT_G8R8_G8B8 0x15 /* no swizzle */ /* 0x16 - some 16 bit green format.. ?? */ # define R300_TX_FORMAT_UNK25 (1 << 25) /* no swizzle */ +# define R300_TX_FORMAT_CUBIC_MAP (1 << 26) /* gap */ /* Floating point formats */ diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index a0769e27f89..8f74007faa9 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -417,27 +417,81 @@ static GLboolean r300_run_immediate_render(GLcontext *ctx, /* vertex buffer implementation */ -static void inline fire_EB(PREFIX unsigned long addr, int vertex_count, int type) +static void inline fire_EB(PREFIX unsigned long addr, int vertex_count, int type, int elt_size) { LOCAL_VARS unsigned long addr_a; + unsigned long t_addr; + unsigned long magic_1, magic_2; + GLcontext *ctx; + ctx = rmesa->radeon.glCtx; - if(addr & 1){ + assert(elt_size == 2 || elt_size == 4); + + if(addr & (elt_size-1)){ WARN_ONCE("Badly aligned buffer\n"); return ; } - addr_a = 0; /*addr & 0x1c;*/ +#ifdef OPTIMIZE_ELTS + addr_a = 0; + + magic_1 = (addr % 32) / 4; + t_addr = addr & (~0x1d); + magic_2 = (vertex_count + 1 + (t_addr & 0x2)) / 2 + magic_1; + + check_space(6); + + start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0); + if(elt_size == 4){ + e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); + } else { + e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type); + } + + start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2); + if(elt_size == 4){ + e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); + e32(addr /*& 0xffffffe3*/); + } else { + e32(R300_EB_UNK1 | (magic_1 << 16) | R300_EB_UNK2); + e32(t_addr); + } + + if(elt_size == 4){ + e32(vertex_count /*+ addr_a/4*/); /* Total number of dwords needed? */ + } else { + e32(magic_2); /* Total number of dwords needed? */ + } + //cp_delay(PASS_PREFIX 1); +#if 0 + fprintf(stderr, "magic_1 %d\n", magic_1); + fprintf(stderr, "t_addr %x\n", t_addr); + fprintf(stderr, "magic_2 %d\n", magic_2); + exit(1); +#endif +#else + addr_a = 0; check_space(6); start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0); - /* TODO: R300_VAP_VF_CNTL__INDEX_SIZE_32bit . */ - e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type); + if(elt_size == 4){ + e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); + } else { + e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type); + } start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2); - e32(R300_EB_UNK1 | (addr_a << 16) | R300_EB_UNK2); + e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); e32(addr /*& 0xffffffe3*/); - e32((vertex_count+1)/2 /*+ addr_a/4*/); /* Total number of dwords needed? */ + + if(elt_size == 4){ + e32(vertex_count /*+ addr_a/4*/); /* Total number of dwords needed? */ + } else { + e32((vertex_count+1)/2 /*+ addr_a/4*/); /* Total number of dwords needed? */ + } + //cp_delay(PASS_PREFIX 1); +#endif } static void r300_render_vb_primitive(r300ContextPtr rmesa, @@ -476,8 +530,8 @@ static void r300_render_vb_primitive(r300ContextPtr rmesa, WARN_ONCE("Too many elts\n"); return; } - r300EmitElts(ctx, rmesa->state.Elts+start, num_verts); - fire_EB(PASS_PREFIX GET_START(&(rmesa->state.elt_dma)), num_verts, type); + r300EmitElts(ctx, rmesa->state.Elts+start, num_verts, 4); + fire_EB(PASS_PREFIX GET_START(&(rmesa->state.elt_dma)), num_verts, type, 4); #endif }else{ r300EmitAOS(rmesa, rmesa->state.aos_count, start); @@ -500,7 +554,7 @@ static GLboolean r300_run_vb_render(GLcontext *ctx, r300ReleaseArrays(ctx); r300EmitArrays(ctx, GL_FALSE); - + // LOCK_HARDWARE(&(rmesa->radeon)); reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); @@ -526,6 +580,9 @@ static GLboolean r300_run_vb_render(GLcontext *ctx, reg_start(0x4f18,0); e32(0x00000003); +#ifdef USER_BUFFERS + r300UseArrays(ctx); +#endif // end_3d(PASS_PREFIX_VOID); /* Flush state - we are done drawing.. */ @@ -536,6 +593,186 @@ static GLboolean r300_run_vb_render(GLcontext *ctx, return GL_FALSE; } +#ifdef RADEON_VTXFMT_A + +static void r300_render_vb_primitive_vtxfmt_a(r300ContextPtr rmesa, + GLcontext *ctx, + int start, + int end, + int prim) +{ + int type, num_verts; + radeonScreenPtr rsp=rmesa->radeon.radeonScreen; + LOCAL_VARS + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + int i; + + type=r300_get_primitive_type(rmesa, ctx, prim); + num_verts=r300_get_num_verts(rmesa, ctx, end-start, prim); + + if(type<0 || num_verts <= 0)return; + + if(rmesa->state.VB.Elts){ + r300EmitAOS(rmesa, rmesa->state.aos_count, /*0*/start); +#if 0 + start_index32_packet(num_verts, type); + for(i=0; i < num_verts; i++) + e32(((unsigned long *)rmesa->state.VB.Elts)[i]/*rmesa->state.Elts[start+i]*/); /* start ? */ +#else + WARN_ONCE("Rendering with elt buffers\n"); + if(num_verts == 1){ + //start_index32_packet(num_verts, type); + //e32(rmesa->state.Elts[start]); + return; + } + + if(num_verts > 65535){ /* not implemented yet */ + WARN_ONCE("Too many elts\n"); + return; + } + + r300EmitElts(ctx, rmesa->state.VB.Elts, num_verts, rmesa->state.VB.elt_size); + fire_EB(PASS_PREFIX rmesa->state.elt_dma.aos_offset, num_verts, type, rmesa->state.VB.elt_size); +#endif + }else{ + r300EmitAOS(rmesa, rmesa->state.aos_count, start); + fire_AOS(PASS_PREFIX num_verts, type); + } +} + +void dump_array(struct r300_dma_region *rvb, int count) +{ + int *out = (int *)(rvb->address + rvb->start); + int i, ci; + + for (i=0; i < count; i++) { + fprintf(stderr, "{"); + if (rvb->aos_format == AOS_FORMAT_FLOAT) + for (ci=0; ci < rvb->aos_size; ci++) + fprintf(stderr, "%f ", ((float *)out)[ci]); + else + for (ci=0; ci < rvb->aos_size; ci++) + fprintf(stderr, "%d ", ((unsigned char *)out)[ci]); + fprintf(stderr, "}"); + + out += rvb->aos_stride; + } + + fprintf(stderr, "\n"); +} + +void dump_dt(struct dt *dt, int count) +{ + int *out = dt->data; + int i, ci; + + fprintf(stderr, "base at %p ", out); + + for (i=0; i < count; i++){ + fprintf(stderr, "{"); + if (dt->type == GL_FLOAT) + for (ci=0; ci < dt->size; ci++) + fprintf(stderr, "%f ", ((float *)out)[ci]); + else + for (ci=0; ci < dt->size; ci++) + fprintf(stderr, "%d ", ((unsigned char *)out)[ci]); + fprintf(stderr, "}"); + + out = (char *)out + dt->stride; + } + + fprintf(stderr, "\n"); +} + +/*static */GLboolean r300_run_vb_render_vtxfmt_a(GLcontext *ctx, + struct tnl_pipeline_stage *stage) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + //TNLcontext *tnl = TNL_CONTEXT(ctx); + struct radeon_vertex_buffer *VB = &rmesa->state.VB; //&tnl->vb; + int i, j; + LOCAL_VARS + + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (rmesa->state.VB.LockCount == 0) { + r300ReleaseArrays(ctx); + r300EmitArraysVtx(ctx, GL_FALSE); + } else { + /* TODO: Figure out why do we need these. */ + R300_STATECHANGE(rmesa, vir[0]); + R300_STATECHANGE(rmesa, vir[1]); + R300_STATECHANGE(rmesa, vic); + R300_STATECHANGE(rmesa, vof); + +#if 0 + fprintf(stderr, "dt:\n"); + for(i=0; i < VERT_ATTRIB_MAX; i++){ + fprintf(stderr, "dt %d:", i); + dump_dt(&rmesa->state.VB.AttribPtr[i], VB->Count); + } + + fprintf(stderr, "before:\n"); + for(i=0; i < rmesa->state.aos_count; i++){ + fprintf(stderr, "aos %d:", i); + dump_array(&rmesa->state.aos[i], VB->Count); + } +#endif +#if 0 + r300ReleaseArrays(ctx); + r300EmitArraysVtx(ctx, GL_FALSE); + + fprintf(stderr, "after:\n"); + for(i=0; i < rmesa->state.aos_count; i++){ + fprintf(stderr, "aos %d:", i); + dump_array(&rmesa->state.aos[i], VB->Count); + } +#endif + } + +// LOCK_HARDWARE(&(rmesa->radeon)); + + reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); + e32(0x0000000a); + + reg_start(0x4f18,0); + e32(0x00000003); +#if 0 + reg_start(R300_VAP_PVS_WAITIDLE,0); + e32(0x00000000); +#endif + r300EmitState(rmesa); + + for(i=0; i < VB->PrimitiveCount; i++){ + GLuint prim = VB->Primitive[i].mode; + GLuint start = VB->Primitive[i].start; + GLuint length = VB->Primitive[i].count; + + r300_render_vb_primitive_vtxfmt_a(rmesa, ctx, start, start + length, prim); + } + + reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); + e32(0x0000000a/*0x2*/); + + reg_start(0x4f18,0); + e32(0x00000003/*0x1*/); + +#ifdef USER_BUFFERS + r300UseArrays(ctx); +#endif +// end_3d(PASS_PREFIX_VOID); + + /* Flush state - we are done drawing.. */ +// r300FlushCmdBufLocked(rmesa, __FUNCTION__); +// radeonWaitForIdleLocked(&(rmesa->radeon)); + +// UNLOCK_HARDWARE(&(rmesa->radeon)); + return GL_FALSE; +} +#endif + /** * Called by the pipeline manager to render a batch of primitives. * We can return true to pass on to the next stage (i.e. software @@ -678,26 +915,9 @@ static GLboolean r300_run_tcl_render(GLcontext *ctx, fprintf(stderr, "%s\n", __FUNCTION__); if(hw_tcl_on == GL_FALSE) return GL_TRUE; - if(ctx->VertexProgram._Enabled == GL_FALSE){ - _tnl_UpdateFixedFunctionProgram(ctx); - } - vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); - if(vp->translated == GL_FALSE) - translate_vertex_shader(vp); - if(vp->translated == GL_FALSE){ - fprintf(stderr, "Failing back to sw-tcl\n"); - debug_vp(ctx, &vp->mesa_program); - hw_tcl_on=future_hw_tcl_on=0; - r300ResetHwState(rmesa); - return GL_TRUE; - } - - r300_setup_textures(ctx); - r300_setup_rs_unit(ctx); - - r300SetupVertexShader(rmesa); - r300SetupPixelShader(rmesa); - + + r300UpdateShaderStates(rmesa); + return r300_run_vb_render(ctx, stage); } diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index c0b4dcc8cab..e0f357d8c45 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1553,6 +1553,36 @@ void r300SetupVertexProgram(r300ContextPtr rmesa) #endif } +extern int future_hw_tcl_on; +void r300UpdateShaderStates(r300ContextPtr rmesa) +{ + GLcontext *ctx; + struct r300_vertex_program *vp; + + ctx = rmesa->radeon.glCtx; + + if(ctx->VertexProgram._Enabled == GL_FALSE){ + _tnl_UpdateFixedFunctionProgram(ctx); + } + vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); + if(vp->translated == GL_FALSE) + translate_vertex_shader(vp); + if(vp->translated == GL_FALSE){ + fprintf(stderr, "Failing back to sw-tcl\n"); + debug_vp(ctx, &vp->mesa_program); + hw_tcl_on=future_hw_tcl_on=0; + r300ResetHwState(rmesa); + + return ; + } + + r300_setup_textures(ctx); + r300_setup_rs_unit(ctx); + + r300SetupVertexShader(rmesa); + r300SetupPixelShader(rmesa); +} + /* This is probably wrong for some values, I need to test this * some more. Range checking would be a good idea also.. * diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h index 131834fa782..3e157943711 100644 --- a/src/mesa/drivers/dri/r300/r300_state.h +++ b/src/mesa/drivers/dri/r300/r300_state.h @@ -43,6 +43,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. r300->hw.is_dirty = GL_TRUE; \ } while(0) +#define R300_PRINT_STATE(r300, atom) \ + r300_print_state_atom(r300, &r300->hw.atom) + /* Fire the buffered vertices no matter what. TODO: This has not been implemented yet */ @@ -65,5 +68,6 @@ extern void r300SetupPixelShader(r300ContextPtr rmesa); extern void r300_setup_textures(GLcontext *ctx); extern void r300_setup_rs_unit(GLcontext *ctx); +extern void r300UpdateShaderStates(r300ContextPtr rmesa); #endif /* __R300_STATE_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index 9d929516c96..359a0b1b8af 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -341,9 +341,8 @@ static void r300SetTexImages(r300ContextPtr rmesa, t->format_x |= R200_TEXCOORD_VOLUME; } else if (tObj->Target == GL_TEXTURE_CUBE_MAP) { ASSERT(log2Width == log2Height); - t->format |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) | - (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) - | (R200_TXFORMAT_CUBIC_MAP_ENABLE)); + t->format |= R300_TX_FORMAT_CUBIC_MAP; + t->format_x |= R200_TEXCOORD_CUBIC_ENV; t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) | (log2Height << R200_FACE_HEIGHT_1_SHIFT) | diff --git a/src/mesa/drivers/dri/r300/r300_vertexprog.c b/src/mesa/drivers/dri/r300/r300_vertexprog.c index 717832048cf..10fa22d5615 100644 --- a/src/mesa/drivers/dri/r300/r300_vertexprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertexprog.c @@ -597,6 +597,7 @@ void translate_vertex_shader(struct r300_vertex_program *vp) Ops that need temp vars should probably be given reg indexes starting at the end of tmp area. */ switch(vpi->Opcode){ case VP_OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} +#if 1 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); o_inst->src1=t_src(vp, &src[0]); @@ -606,6 +607,23 @@ void translate_vertex_shader(struct r300_vertex_program *vp) t_src_class(src[0].File), VSF_FLAG_NONE); o_inst->src3=0; +#else + hw_op=(src[0].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD; + + o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); + o_inst->src1=t_src(vp, &src[0]); + o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + SWIZZLE_ONE, SWIZZLE_ONE, + SWIZZLE_ONE, SWIZZLE_ONE, + t_src_class(src[0].File), VSF_FLAG_NONE); + + + o_inst->src3=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + SWIZZLE_ZERO, SWIZZLE_ZERO, + SWIZZLE_ZERO, SWIZZLE_ZERO, + t_src_class(src[0].File), VSF_FLAG_NONE); +#endif goto next; diff --git a/src/mesa/drivers/dri/r300/radeon_mm.c b/src/mesa/drivers/dri/r300/radeon_mm.c new file mode 100644 index 00000000000..b69929e6642 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_mm.c @@ -0,0 +1,339 @@ +#include <sys/ipc.h> +#include <sys/shm.h> +#include <sys/types.h> +#include <sys/sem.h> + +#include "r300_context.h" +#include "r300_cmdbuf.h" +#include "radeon_mm.h" + +#ifdef USER_BUFFERS +void radeon_mm_reset(r300ContextPtr rmesa) +{ + drm_r300_cmd_header_t *cmd; + int ret; + + memset(rmesa->rmm, 0, sizeof(struct radeon_memory_manager)); + + rmesa->rmm->u_size = 1024; //2048; + //rmesa->radeon.radeonScreen->scratch[2] = rmesa->rmm->vb_age; +#if 0 /* FIXME */ + cmd = r300AllocCmdBuf(rmesa, 4, __FUNCTION__); + cmd[0].scratch.cmd_type = R300_CMD_SCRATCH; + cmd[0].scratch.reg = 2; + cmd[0].scratch.n_bufs = 1; + cmd[0].scratch.flags = 0; + cmd[1].u = (unsigned long)(&rmesa->rmm->vb_age); + cmd[2].u = (unsigned long)(&rmesa->rmm->u_list[0].age); + cmd[3].u = /*id*/0; + + /* Protect from DRM. */ + LOCK_HARDWARE(&rmesa->radeon); + rmesa->rmm->u_list[0].h_pending ++; + ret = r300FlushCmdBufLocked(rmesa, __FUNCTION__); + UNLOCK_HARDWARE(&rmesa->radeon); + + if (ret) { + WARN_ONCE("r300FlushCmdBufLocked\n"); + exit(1); + } +#endif +} + +void radeon_mm_init(r300ContextPtr rmesa) +{ + + rmesa->mm_ipc_key = 0xdeadbeed; //ftok("/tmp/.r300.mm_lock", "x"); + if(rmesa->mm_ipc_key == -1){ + perror("ftok"); + exit(1); + } + + rmesa->mm_shm_id = shmget(rmesa->mm_ipc_key, sizeof(struct radeon_memory_manager), 0644); + if (rmesa->mm_shm_id == -1) { + rmesa->mm_shm_id = shmget(rmesa->mm_ipc_key, sizeof(struct radeon_memory_manager), 0644 | IPC_CREAT); + + rmesa->rmm = shmat(rmesa->mm_shm_id, (void *)0, 0); + if (rmesa->rmm == (char *)(-1)) { + perror("shmat"); + exit(1); + } + + radeon_mm_reset(rmesa); + + rmesa->mm_sem_id = semget(rmesa->mm_ipc_key, 2, 0666 | IPC_CREAT); + if (rmesa->mm_sem_id == -1) { + perror("semget"); + exit(1); + } + + return ; + } + + rmesa->rmm = shmat(rmesa->mm_shm_id, (void *)0, 0); + if (rmesa->rmm == (char *)(-1)) { + perror("shmat"); + exit(1); + } + /* FIXME */ + radeon_mm_reset(rmesa); + + rmesa->mm_sem_id = semget(rmesa->mm_ipc_key, 2, 0666); + if (rmesa->mm_sem_id == -1) { + perror("semget"); + exit(1); + } +} + +static void radeon_mm_lock(r300ContextPtr rmesa, int sem) +{ + struct sembuf sb = { 0, 1, 0 }; + + sb.sem_num = sem; + + if (semop(rmesa->mm_sem_id, &sb, 1) == -1) { + perror("semop"); + exit(1); + } +} + +static void radeon_mm_unlock(r300ContextPtr rmesa, int sem) +{ + struct sembuf sb = { 0, -1, 0 }; + + sb.sem_num = sem; + + if (semop(rmesa->mm_sem_id, &sb, 1) == -1) { + perror("semop"); + exit(1); + } +} + +void *radeon_mm_ptr(r300ContextPtr rmesa, int id) +{ + return rmesa->rmm->u_list[id].ptr; +} + +//#define MM_DEBUG +int radeon_mm_alloc(r300ContextPtr rmesa, int alignment, int size) +{ + drm_radeon_mem_alloc_t alloc; + int offset, ret; + int i, end, free=-1; + int done_age; + drm_radeon_mem_free_t memfree; + int tries=0, tries2=0; + + memfree.region = RADEON_MEM_REGION_GART; + + radeon_mm_lock(rmesa, RADEON_MM_UL); + + again: + + done_age = rmesa->radeon.radeonScreen->scratch[2]; + + i = 1; //rmesa->rmm->u_head + 1; + //i &= rmesa->rmm->u_size - 1; + + end = i + rmesa->rmm->u_size; + //end &= rmesa->rmm->u_size - 1; + + for (; i != end; i ++/*, i &= rmesa->rmm->u_size-1*/) { + if (rmesa->rmm->u_list[i].ptr == NULL){ + free = i; + continue; + } + + if (rmesa->rmm->u_list[i].h_pending == 0 && + rmesa->rmm->u_list[i].pending && rmesa->rmm->u_list[i].age <= done_age) { + memfree.region_offset = (char *)rmesa->rmm->u_list[i].ptr - + (char *)rmesa->radeon.radeonScreen->gartTextures.map; + + ret = drmCommandWrite(rmesa->radeon.radeonScreen->driScreen->fd, + DRM_RADEON_FREE, &memfree, sizeof(memfree)); + + if (ret) { + //fprintf(stderr, "Failed to free at %p\n", rmesa->rmm->u_list[i].ptr); + //fprintf(stderr, "ret = %s\n", strerror(-ret)); + + //radeon_mm_unlock(rmesa, RADEON_MM_UL); + //exit(1); + } else { +#ifdef MM_DEBUG + fprintf(stderr, "really freed %d at age %x\n", i, rmesa->radeon.radeonScreen->scratch[2]); +#endif + rmesa->rmm->u_list[i].pending = 0; + rmesa->rmm->u_list[i].ptr = NULL; + free = i; + } + } + } + done: + rmesa->rmm->u_head = i; + + if (free == -1) { + //usleep(100); + r300FlushCmdBuf(rmesa, __FUNCTION__); + tries++; + if(tries>100){ + WARN_ONCE("Ran out of slots!\n"); + exit(1); + } + goto again; + } + + alloc.region = RADEON_MEM_REGION_GART; + alloc.alignment = alignment; + alloc.size = size; + alloc.region_offset = &offset; + + ret = drmCommandWriteRead( rmesa->radeon.dri.fd, DRM_RADEON_ALLOC, &alloc, sizeof(alloc)); + if (ret) { + r300FlushCmdBuf(rmesa, __FUNCTION__); + //usleep(100); + tries2++; + tries = 0; + if(tries2>100){ + WARN_ONCE("Ran out of GART memory!\n"); + exit(1); + } + goto again; + } + + i = free; + rmesa->rmm->u_list[i].ptr = ((GLubyte *)rmesa->radeon.radeonScreen->gartTextures.map) + offset; + rmesa->rmm->u_list[i].size = size; + rmesa->rmm->u_list[i].age = 0; + +#ifdef MM_DEBUG + fprintf(stderr, "allocated %d at age %x\n", i, rmesa->radeon.radeonScreen->scratch[2]); +#endif + + radeon_mm_unlock(rmesa, RADEON_MM_UL); + + return i; +} + +void radeon_mm_use(r300ContextPtr rmesa, int id) +{ +#ifdef MM_DEBUG + fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, rmesa->radeon.radeonScreen->scratch[2]); +#endif + drm_r300_cmd_header_t *cmd; + + if(id == 0) + return; + + radeon_mm_lock(rmesa, RADEON_MM_UL); + + cmd = r300AllocCmdBuf(rmesa, 4, __FUNCTION__); + cmd[0].scratch.cmd_type = R300_CMD_SCRATCH; + cmd[0].scratch.reg = 2; + cmd[0].scratch.n_bufs = 1; + cmd[0].scratch.flags = 0; + cmd[1].u = (unsigned long)(&rmesa->rmm->vb_age); + cmd[2].u = (unsigned long)(&rmesa->rmm->u_list[id].age); + cmd[3].u = /*id*/0; + + LOCK_HARDWARE(&rmesa->radeon); /* Protect from DRM. */ + rmesa->rmm->u_list[id].h_pending ++; + UNLOCK_HARDWARE(&rmesa->radeon); + + radeon_mm_unlock(rmesa, RADEON_MM_UL); +} + +void *radeon_mm_map(r300ContextPtr rmesa, int id, int access) +{ +#ifdef MM_DEBUG + fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, rmesa->radeon.radeonScreen->scratch[2]); +#endif + void *ptr; + int tries = 0; + + if (access == RADEON_MM_R) { + radeon_mm_lock(rmesa, RADEON_MM_UL); + + if(rmesa->rmm->u_list[id].mapped == 1) + WARN_ONCE("buffer %d already mapped\n", id); + + rmesa->rmm->u_list[id].mapped = 1; + ptr = radeon_mm_ptr(rmesa, id); + + radeon_mm_unlock(rmesa, RADEON_MM_UL); + + return ptr; + } + + radeon_mm_lock(rmesa, RADEON_MM_UL); + + if (rmesa->rmm->u_list[id].h_pending) + r300FlushCmdBuf(rmesa, __FUNCTION__); + + if (rmesa->rmm->u_list[id].h_pending) { + radeon_mm_unlock(rmesa, RADEON_MM_UL); + return NULL; + } + + while(rmesa->rmm->u_list[id].age > rmesa->radeon.radeonScreen->scratch[2] && tries++ < 1000) + usleep(10); + + if (tries >= 1000) { + fprintf(stderr, "Idling failed (%x vs %x)\n", + rmesa->rmm->u_list[id].age, rmesa->radeon.radeonScreen->scratch[2]); + radeon_mm_unlock(rmesa, RADEON_MM_UL); + return NULL; + } + + if(rmesa->rmm->u_list[id].mapped == 1) + WARN_ONCE("buffer %d already mapped\n", id); + + rmesa->rmm->u_list[id].mapped = 1; + ptr = radeon_mm_ptr(rmesa, id); + + radeon_mm_unlock(rmesa, RADEON_MM_UL); + + return ptr; +} + +void radeon_mm_unmap(r300ContextPtr rmesa, int id) +{ +#ifdef MM_DEBUG + fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, rmesa->radeon.radeonScreen->scratch[2]); +#endif + + radeon_mm_lock(rmesa, RADEON_MM_UL); + + if(rmesa->rmm->u_list[id].mapped == 0) + WARN_ONCE("buffer %d not mapped\n", id); + + rmesa->rmm->u_list[id].mapped = 0; + + radeon_mm_unlock(rmesa, RADEON_MM_UL); +} + +void radeon_mm_free(r300ContextPtr rmesa, int id) +{ +#ifdef MM_DEBUG + fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, rmesa->radeon.radeonScreen->scratch[2]); +#endif + + if(id == 0) + return; + + radeon_mm_lock(rmesa, RADEON_MM_UL); + if(rmesa->rmm->u_list[id].ptr == NULL){ + radeon_mm_unlock(rmesa, RADEON_MM_UL); + WARN_ONCE("Not allocated!\n"); + return ; + } + + if(rmesa->rmm->u_list[id].pending){ + radeon_mm_unlock(rmesa, RADEON_MM_UL); + WARN_ONCE("%p already pended!\n", rmesa->rmm->u_list[id].ptr); + return ; + } + + rmesa->rmm->u_list[id].pending = 1; + radeon_mm_unlock(rmesa, RADEON_MM_UL); +} +#endif diff --git a/src/mesa/drivers/dri/r300/radeon_mm.h b/src/mesa/drivers/dri/r300/radeon_mm.h new file mode 100644 index 00000000000..637ecb1ddaf --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_mm.h @@ -0,0 +1,35 @@ +#ifndef __RADEON_MM_H__ +#define __RADEON_MM_H__ + +//#define RADEON_MM_PDL 0 +#define RADEON_MM_UL 1 + +#define RADEON_MM_R 1 +#define RADEON_MM_W 2 +#define RADEON_MM_RW (RADEON_MM_R | RADEON_MM_W) + +struct radeon_memory_manager { + uint32_t vb_age; + /*uint32_t ages[1024];*/ + + struct { + void *ptr; + uint32_t size; + uint32_t age; + uint32_t h_pending; + int pending; + int mapped; + } u_list[/*4096*/2048]; + int u_head, u_tail, u_size; + +}; + +extern void radeon_mm_init(r300ContextPtr rmesa); +extern void *radeon_mm_ptr(r300ContextPtr rmesa, int id); +extern int radeon_mm_alloc(r300ContextPtr rmesa, int alignment, int size); +extern void radeon_mm_use(r300ContextPtr rmesa, int id); +extern void *radeon_mm_map(r300ContextPtr rmesa, int id, int access); +extern void radeon_mm_unmap(r300ContextPtr rmesa, int id); +extern void radeon_mm_free(r300ContextPtr rmesa, int id); + +#endif diff --git a/src/mesa/drivers/dri/r300/radeon_vtxfmt_a.c b/src/mesa/drivers/dri/r300/radeon_vtxfmt_a.c new file mode 100644 index 00000000000..d1f90abfb24 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_vtxfmt_a.c @@ -0,0 +1,610 @@ +#include "context.h" +#include "r300_context.h" +#include "r300_cmdbuf.h" +#include "radeon_mm.h" + +#ifdef RADEON_VTXFMT_A + +#define CONV(a, b) rmesa->state.VB.AttribPtr[(a)].size = ctx->Array.b.Size, \ + rmesa->state.VB.AttribPtr[(a)].data = ctx->Array.b.BufferObj->Name ? \ + ADD_POINTERS(ctx->Array.b.Ptr, ctx->Array.b.BufferObj->Data) : ctx->Array.b.Ptr, \ + rmesa->state.VB.AttribPtr[(a)].stride = ctx->Array.b.StrideB, \ + rmesa->state.VB.AttribPtr[(a)].type = ctx->Array.b.Type + +static int setup_arrays(r300ContextPtr rmesa, GLint start) +{ + int i; + struct dt def = { 4, GL_FLOAT, 0, NULL }; + GLcontext *ctx; + GLuint enabled = 0; + + ctx = rmesa->radeon.glCtx; + + memset(rmesa->state.VB.AttribPtr, 0, VERT_ATTRIB_MAX*sizeof(struct dt)); + + CONV(VERT_ATTRIB_POS, Vertex); + if (ctx->Array.Vertex.Enabled) + enabled |= 1 << VERT_ATTRIB_POS; + + CONV(VERT_ATTRIB_NORMAL, Normal); + if (ctx->Array.Normal.Enabled) + enabled |= 1 << VERT_ATTRIB_NORMAL; + + CONV(VERT_ATTRIB_COLOR0, Color); + if (ctx->Array.Color.Enabled) + enabled |= 1 << VERT_ATTRIB_COLOR0; + + CONV(VERT_ATTRIB_COLOR1, SecondaryColor); + if (ctx->Array.SecondaryColor.Enabled) + enabled |= 1 << VERT_ATTRIB_COLOR1; + + CONV(VERT_ATTRIB_FOG, FogCoord); + if (ctx->Array.FogCoord.Enabled) + enabled |= 1 << VERT_ATTRIB_FOG; + + for (i=0; i < MAX_TEXTURE_COORD_UNITS; i++) { + CONV(VERT_ATTRIB_TEX0 + i, TexCoord[i]); + + if(ctx->Array.TexCoord[i].Enabled) { + enabled |= 1 << (VERT_ATTRIB_TEX0+i); + } + + } + + for (i=0; i < VERT_ATTRIB_MAX; i++) { + if (enabled & (1 << i)) { + rmesa->state.VB.AttribPtr[i].data += rmesa->state.VB.AttribPtr[i].stride * start; + } else { + def.data = ctx->Current.Attrib[i]; + memcpy(&rmesa->state.VB.AttribPtr[i], &def, sizeof(struct dt)); + } + + /*if(rmesa->state.VB.AttribPtr[i].data == ctx->Current.Attrib[i]) + fprintf(stderr, "%d is default coord\n", i);*/ + } + + for(i=0; i < VERT_ATTRIB_MAX; i++){ + if(rmesa->state.VB.AttribPtr[i].type != GL_UNSIGNED_BYTE && + rmesa->state.VB.AttribPtr[i].type != GL_FLOAT){ + WARN_ONCE("Unsupported format %d at index %d\n", rmesa->state.VB.AttribPtr[i].type, i); + return -1; + } + if(rmesa->state.VB.AttribPtr[i].type == GL_UNSIGNED_BYTE && + rmesa->state.VB.AttribPtr[i].size != 4){ + WARN_ONCE("Unsupported component count for ub colors\n"); + return -1; + } + + /*fprintf(stderr, "%d: ", i); + + switch(rmesa->state.VB.AttribPtr[i].type){ + case GL_BYTE: fprintf(stderr, "byte "); break; + case GL_UNSIGNED_BYTE: fprintf(stderr, "u byte "); break; + case GL_SHORT: fprintf(stderr, "short "); break; + case GL_UNSIGNED_SHORT: fprintf(stderr, "u short "); break; + case GL_INT: fprintf(stderr, "int "); break; + case GL_UNSIGNED_INT: fprintf(stderr, "u int "); break; + case GL_FLOAT: fprintf(stderr, "float "); break; + case GL_2_BYTES: fprintf(stderr, "2 bytes "); break; + case GL_3_BYTES: fprintf(stderr, "3 bytes "); break; + case GL_4_BYTES: fprintf(stderr, "4 bytes "); break; + case GL_DOUBLE: fprintf(stderr, "double "); break; + default: fprintf(stderr, "unknown "); break; + } + + fprintf(stderr, "Size %d ", rmesa->state.VB.AttribPtr[i].size); + fprintf(stderr, "Ptr %p ", rmesa->state.VB.AttribPtr[i].data); + fprintf(stderr, "Stride %d ", rmesa->state.VB.AttribPtr[i].stride); + fprintf(stderr, "\n");*/ + } + return 0; +} + +void radeonDrawElements( GLenum mode, GLsizei count, GLenum type, const GLvoid *indices ) +{ + GET_CURRENT_CONTEXT(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + int elt_size; + int i; + unsigned int min = ~0, max = 0; + struct tnl_prim prim; + static void *ptr = NULL; + static struct r300_dma_region rvb; + + if (ctx->Array.ElementArrayBufferObj->Name) { + /* use indices in the buffer object */ + if (!ctx->Array.ElementArrayBufferObj->Data) { + _mesa_warning(ctx, "DrawRangeElements with empty vertex elements buffer!"); + return; + } + /* actual address is the sum of pointers */ + indices = (const GLvoid *) + ADD_POINTERS(ctx->Array.ElementArrayBufferObj->Data, (const GLubyte *) indices); + } + + if (!_mesa_validate_DrawElements( ctx, mode, count, type, indices )) + return; + + FLUSH_CURRENT( ctx, 0 ); + /* + fprintf(stderr, "dt at %s:\n", __FUNCTION__); + for(i=0; i < VERT_ATTRIB_MAX; i++){ + fprintf(stderr, "dt %d:", i); + dump_dt(&rmesa->state.VB.AttribPtr[i], rmesa->state.VB.Count); + }*/ + r300ReleaseDmaRegion(rmesa, &rvb, __FUNCTION__); + + switch (type) { + case GL_UNSIGNED_BYTE: + elt_size = 2; + + r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size); + rvb.aos_offset = GET_START(&rvb); + ptr = rvb.address + rvb.start; + + for (i=0; i < count; i++) { + if(((unsigned char *)indices)[i] < min) + min = ((unsigned char *)indices)[i]; + if(((unsigned char *)indices)[i] > max) + max = ((unsigned char *)indices)[i]; + } + + for (i=0; i < count; i++) + ((unsigned short int *)ptr)[i] = ((unsigned char *)indices)[i] - min; + break; + + case GL_UNSIGNED_SHORT: + elt_size = 2; + + r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size); + rvb.aos_offset = GET_START(&rvb); + ptr = rvb.address + rvb.start; + + for (i=0; i < count; i++) { + if(((unsigned short int *)indices)[i] < min) + min = ((unsigned short int *)indices)[i]; + if(((unsigned short int *)indices)[i] > max) + max = ((unsigned short int *)indices)[i]; + } + + for (i=0; i < count; i++) + ((unsigned short int *)ptr)[i] = ((unsigned short int *)indices)[i] - min; + break; + + case GL_UNSIGNED_INT: + elt_size = 4; + + r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size); + rvb.aos_offset = GET_START(&rvb); + ptr = rvb.address + rvb.start; + + for (i=0; i < count; i++) { + if(((unsigned int *)indices)[i] < min) + min = ((unsigned int *)indices)[i]; + if(((unsigned int *)indices)[i] > max) + max = ((unsigned int *)indices)[i]; + } + + for (i=0; i < count; i++) + ((unsigned int *)ptr)[i] = ((unsigned int *)indices)[i] - min; + break; + + default: + fprintf(stderr, "Unknown elt type!\n"); + return; + + + } + + if (ctx->NewState) + _mesa_update_state( ctx ); + + r300UpdateShaderStates(rmesa); + + if (rmesa->state.VB.LockCount) { + if (rmesa->state.VB.lock_uptodate == GL_FALSE) { + if (setup_arrays(rmesa, rmesa->state.VB.LockFirst)) + return; + + rmesa->state.VB.Count = rmesa->state.VB.LockCount; + + r300ReleaseArrays(ctx); + r300EmitArraysVtx(ctx, GL_FALSE); + + rmesa->state.VB.lock_uptodate = GL_TRUE; + } + + if (min < rmesa->state.VB.LockFirst) { + WARN_ONCE("Out of range min %d vs %d!\n", min, rmesa->state.VB.LockFirst); + return; + } + + if (max >= rmesa->state.VB.LockFirst + rmesa->state.VB.LockCount) { + WARN_ONCE("Out of range max %d vs %d!\n", max, rmesa->state.VB.LockFirst + + rmesa->state.VB.LockCount); + return; + } + } else { + if (setup_arrays(rmesa, min)) + return; + rmesa->state.VB.Count = max - min + 1; + } + + rmesa->state.VB.Primitive = &prim; + rmesa->state.VB.PrimitiveCount = 1; + + prim.mode = mode | PRIM_BEGIN | PRIM_END; + if (rmesa->state.VB.LockCount) + prim.start = min - rmesa->state.VB.LockFirst; + else + prim.start = 0; + prim.count = count; + + rmesa->state.VB.Elts = ptr; + rmesa->state.VB.elt_size = elt_size; + + r300_run_vb_render_vtxfmt_a(ctx, NULL); + + if(rvb.buf) + radeon_mm_use(rmesa, rvb.buf->id); +} + +void radeonDrawRangeElements(GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices) +{ + GET_CURRENT_CONTEXT(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct tnl_prim prim; + int elt_size; + int i; + static void *ptr = NULL; + static struct r300_dma_region rvb; + + if (ctx->Array.ElementArrayBufferObj->Name) { + /* use indices in the buffer object */ + if (!ctx->Array.ElementArrayBufferObj->Data) { + _mesa_warning(ctx, "DrawRangeElements with empty vertex elements buffer!"); + return; + } + /* actual address is the sum of pointers */ + indices = (const GLvoid *) + ADD_POINTERS(ctx->Array.ElementArrayBufferObj->Data, (const GLubyte *) indices); + } + + if (!_mesa_validate_DrawRangeElements( ctx, mode, start, end, count, type, indices )) + return; + + FLUSH_CURRENT( ctx, 0 ); +#ifdef OPTIMIZE_ELTS + start = 0; +#endif + r300ReleaseDmaRegion(rmesa, &rvb, __FUNCTION__); + + switch(type){ + case GL_UNSIGNED_BYTE: + elt_size = 2; + + r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size); + rvb.aos_offset = GET_START(&rvb); + ptr = rvb.address + rvb.start; + + for(i=0; i < count; i++) + ((unsigned short int *)ptr)[i] = ((unsigned char *)indices)[i] - start; + break; + + case GL_UNSIGNED_SHORT: + elt_size = 2; + +#ifdef OPTIMIZE_ELTS + if (start == 0 && ctx->Array.ElementArrayBufferObj->Name){ + ptr = indices; + break; + } +#endif + r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size); + rvb.aos_offset = GET_START(&rvb); + ptr = rvb.address + rvb.start; + + for(i=0; i < count; i++) + ((unsigned short int *)ptr)[i] = ((unsigned short int *)indices)[i] - start; + break; + + case GL_UNSIGNED_INT: + elt_size = 4; + + r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size); + rvb.aos_offset = GET_START(&rvb); + ptr = rvb.address + rvb.start; + + for(i=0; i < count; i++) + ((unsigned int *)ptr)[i] = ((unsigned int *)indices)[i] - start; + break; + + default: + fprintf(stderr, "Unknown elt type!\n"); + return; + + } + + if(setup_arrays(rmesa, start)) + return; + + if (ctx->NewState) + _mesa_update_state( ctx ); + + r300UpdateShaderStates(rmesa); + + rmesa->state.VB.Count = (end - start) + 1; + rmesa->state.VB.Primitive = &prim; + rmesa->state.VB.PrimitiveCount = 1; + + prim.mode = mode | PRIM_BEGIN | PRIM_END; + prim.start = 0; + prim.count = count; + + rmesa->state.VB.Elts = ptr; + rmesa->state.VB.elt_size = elt_size; + rmesa->state.VB.elt_min = start; + rmesa->state.VB.elt_max = end; + + r300_run_vb_render_vtxfmt_a(ctx, NULL); + + if(rvb.buf) + radeon_mm_use(rmesa, rvb.buf->id); +} + +void radeonDrawArrays( GLenum mode, GLint start, GLsizei count ) +{ + GET_CURRENT_CONTEXT(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct tnl_prim prim; + + if (!_mesa_validate_DrawArrays( ctx, mode, start, count )) + return; + + FLUSH_CURRENT( ctx, 0 ); + + if (ctx->NewState) + _mesa_update_state( ctx ); + + if (rmesa->state.VB.LockCount == 0) + if (setup_arrays(rmesa, start)) + return; + + r300UpdateShaderStates(rmesa); + + if (rmesa->state.VB.LockCount) { + start -= rmesa->state.VB.LockFirst; + if (start < 0) { /* Generate error */ + WARN_ONCE("Out of range!\n"); + return; + } + } + + if (rmesa->state.VB.LockCount == 0) + rmesa->state.VB.Count = count; + rmesa->state.VB.Primitive = &prim; + rmesa->state.VB.PrimitiveCount = 1; + + prim.mode = mode | PRIM_BEGIN | PRIM_END; + if (ctx->Array.LockCount == 0) + prim.start = 0; + else + prim.start = start; + + prim.count = count; + + rmesa->state.VB.Elts = NULL; + rmesa->state.VB.elt_size = 0; + rmesa->state.VB.elt_min = 0; + rmesa->state.VB.elt_max = 0; + + r300_run_vb_render_vtxfmt_a(ctx, NULL); +} + +void radeon_init_vtxfmt_a(r300ContextPtr rmesa) +{ + GLcontext *ctx; + GLvertexformat *vfmt; + + ctx = rmesa->radeon.glCtx; + vfmt = ctx->TnlModule.Current; + + vfmt->DrawElements = radeonDrawElements; + vfmt->DrawArrays = radeonDrawArrays; + vfmt->DrawRangeElements = radeonDrawRangeElements; + +} +#endif + +#ifdef HW_VBOS + +void radeonLockArraysEXT(GLcontext *ctx, GLint first, GLsizei count) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + int i; + + /* Disabled as array changes arent properly handled yet. */ + first = 0; count = 0; + + if (first < 0 || count <= 0) { + rmesa->state.VB.LockFirst = 0; + rmesa->state.VB.LockCount = 0; + return ; + } + + rmesa->state.VB.LockFirst = first; + rmesa->state.VB.LockCount = count; + rmesa->state.VB.lock_uptodate = GL_FALSE; +} + +void radeonUnlockArraysEXT(GLcontext *ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + + rmesa->state.VB.LockFirst = 0; + rmesa->state.VB.LockCount = 0; +} + +struct gl_buffer_object * +r300NewBufferObject(GLcontext *ctx, GLuint name, GLenum target ) +{ + struct r300_buffer_object *obj; + + (void) ctx; + + obj = MALLOC_STRUCT(r300_buffer_object); + _mesa_initialize_buffer_object(&obj->mesa_obj, name, target); + return &obj->mesa_obj; +} + +void r300BufferData(GLcontext *ctx, GLenum target, GLsizeiptrARB size, + const GLvoid *data, GLenum usage, struct gl_buffer_object *obj) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r300_buffer_object *r300_obj = (struct r300_buffer_object *)obj; + drm_radeon_mem_alloc_t alloc; + int offset, ret; + + /* Free previous buffer */ + if (obj->OnCard) { + radeon_mm_free(rmesa, r300_obj->id); + obj->OnCard = GL_FALSE; + } else { + if (obj->Data) + free(obj->Data); + } +#ifdef OPTIMIZE_ELTS + if (0) { +#else + if (target == GL_ELEMENT_ARRAY_BUFFER_ARB) { +#endif + obj->Data = malloc(size); + + if (data) + memcpy(obj->Data, data, size); + + obj->OnCard = GL_FALSE; + } else { + r300_obj->id = radeon_mm_alloc(rmesa, 4, size); + obj->Data = radeon_mm_map(rmesa, r300_obj->id, RADEON_MM_W); + + if (data) + memcpy(obj->Data, data, size); + + radeon_mm_unmap(rmesa, r300_obj->id); + obj->OnCard = GL_TRUE; + } + + obj->Size = size; + obj->Usage = usage; +} + +void r300BufferSubData(GLcontext *ctx, GLenum target, GLintptrARB offset, + GLsizeiptrARB size, const GLvoid * data, struct gl_buffer_object * bufObj) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r300_buffer_object *r300_obj = (struct r300_buffer_object *)bufObj; + (void) ctx; (void) target; + void *ptr; + + if (bufObj->Data && ((GLuint) (size + offset) <= bufObj->Size)) { + if (bufObj->OnCard){ + ptr = radeon_mm_map(rmesa, r300_obj->id, RADEON_MM_W); + + _mesa_memcpy( (GLubyte *) ptr + offset, data, size ); + + radeon_mm_unmap(rmesa, r300_obj->id); + } else { + _mesa_memcpy( (GLubyte *) bufObj->Data + offset, data, size ); + } + } +} + +void *r300MapBuffer(GLcontext *ctx, GLenum target, GLenum access, + struct gl_buffer_object *bufObj) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r300_buffer_object *r300_obj = (struct r300_buffer_object *)bufObj; + + (void) ctx; + (void) target; + (void) access; + //ASSERT(!bufObj->OnCard); + /* Just return a direct pointer to the data */ + if (bufObj->Pointer) { + /* already mapped! */ + return NULL; + } + + if (!bufObj->OnCard) { + bufObj->Pointer = bufObj->Data; + return bufObj->Pointer; + } + + switch (access) { + case GL_READ_ONLY: + bufObj->Pointer = radeon_mm_map(rmesa, r300_obj->id, RADEON_MM_R); + break; + + case GL_WRITE_ONLY: + bufObj->Pointer = radeon_mm_map(rmesa, r300_obj->id, RADEON_MM_W); + break; + + case GL_READ_WRITE: + bufObj->Pointer = radeon_mm_map(rmesa, r300_obj->id, RADEON_MM_RW); + break; + + default: + WARN_ONCE("Unknown access type\n"); + bufObj->Pointer = NULL; + break; + } + + return bufObj->Pointer; +} + +GLboolean r300UnmapBuffer(GLcontext *ctx, GLenum target, struct gl_buffer_object *bufObj) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r300_buffer_object *r300_obj = (struct r300_buffer_object *)bufObj; + + (void) ctx; + (void) target; + //ASSERT(!bufObj->OnCard); + /* XXX we might assert here that bufObj->Pointer is non-null */ + if (!bufObj->OnCard) { + bufObj->Pointer = NULL; + return GL_TRUE; + } + radeon_mm_unmap(rmesa, r300_obj->id); + + bufObj->Pointer = NULL; + return GL_TRUE; +} + +void r300DeleteBuffer(GLcontext *ctx, struct gl_buffer_object *obj) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r300_buffer_object *r300_obj = (struct r300_buffer_object *)obj; + + if (obj->OnCard) { + radeon_mm_free(rmesa, r300_obj->id); + obj->Data = NULL; + } + _mesa_delete_buffer_object(ctx, obj); +} + +void r300_init_vbo_funcs(struct dd_function_table *functions) +{ + functions->NewBufferObject = r300NewBufferObject; + functions->BufferData = r300BufferData; + functions->BufferSubData = r300BufferSubData; + functions->MapBuffer = r300MapBuffer; + functions->UnmapBuffer = r300UnmapBuffer; + functions->DeleteBuffer = r300DeleteBuffer; + + functions->LockArraysEXT = radeonLockArraysEXT; + functions->UnlockArraysEXT = radeonUnlockArraysEXT; +} + +#endif |