diff options
Diffstat (limited to 'src/mesa/drivers')
65 files changed, 2412 insertions, 2464 deletions
diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 4ca0e7bcc37..9b271f85e92 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -124,7 +124,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->UnmapTexture = NULL; driver->TextureMemCpy = _mesa_memcpy; driver->IsTextureResident = NULL; - driver->ActiveTexture = NULL; driver->UpdateTexturePalette = NULL; /* imaging */ diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index a4315191434..39b0ab13c6b 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -2149,6 +2149,7 @@ _mesa_meta_GenerateMipmap(GLcontext *ctx, GLenum target, const GLenum wrapTSave = texObj->WrapT; const GLenum wrapRSave = texObj->WrapR; const GLuint fboSave = ctx->DrawBuffer->Name; + const GLuint original_active_unit = ctx->Texture.CurrentUnit; GLenum faceTarget; GLuint dstLevel; GLuint border = 0; @@ -2288,6 +2289,9 @@ _mesa_meta_GenerateMipmap(GLcontext *ctx, GLenum target, /* texture is already locked, unlock now */ _mesa_unlock_texture(ctx, texObj); + if (original_active_unit != 0) + _mesa_BindTexture(target, texObj->Name); + for (dstLevel = baseLevel + 1; dstLevel <= maxLevel; dstLevel++) { const struct gl_texture_image *srcImage; const GLuint srcLevel = dstLevel - 1; diff --git a/src/mesa/drivers/dri/common/spantmp2.h b/src/mesa/drivers/dri/common/spantmp2.h index 95f97414a98..447f3d15b95 100644 --- a/src/mesa/drivers/dri/common/spantmp2.h +++ b/src/mesa/drivers/dri/common/spantmp2.h @@ -356,6 +356,63 @@ } while (0) # endif +#elif (SPANTMP_PIXEL_FMT == GL_BGR) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV) + +/** + ** GL_BGR, GL_UNSIGNED_INT_8_8_8_8_REV + ** + ** This is really for MESA_FORMAT_XRGB8888. The spantmp code needs to be + ** kicked to the curb, and we need to just code-gen this. + **/ + +#ifndef GET_VALUE +#ifndef GET_PTR +#define GET_PTR(_x, _y) ( buf + (_x) * 4 + (_y) * pitch) +#endif + +#define GET_VALUE(_x, _y) *(volatile GLuint *)(GET_PTR(_x, _y)) +#define PUT_VALUE(_x, _y, _v) *(volatile GLuint *)(GET_PTR(_x, _y)) = (_v) +#endif /* GET_VALUE */ + +# define INIT_MONO_PIXEL(p, color) \ + p = PACK_COLOR_8888(0xff, color[0], color[1], color[2]) + +# define WRITE_RGBA(_x, _y, r, g, b, a) \ + PUT_VALUE(_x, _y, ((r << 16) | \ + (g << 8) | \ + (b << 0) | \ + (0xff << 24))) + +#define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p) + +# if defined( USE_X86_ASM ) +# define READ_RGBA(rgba, _x, _y) \ + do { \ + GLuint p = GET_VALUE(_x, _y); \ + __asm__ __volatile__( "bswap %0; rorl $8, %0" \ + : "=r" (p) : "0" (p) ); \ + ((GLuint *)rgba)[0] = p | 0xff000000; \ + } while (0) +# elif defined( MESA_BIG_ENDIAN ) + /* On PowerPC with GCC 3.4.2 the shift madness below becomes a single + * rotlwi instruction. It also produces good code on SPARC. + */ +# define READ_RGBA( rgba, _x, _y ) \ + do { \ + GLuint p = GET_VALUE(_x, _y); \ + *((uint32_t *) rgba) = (t << 8) | 0xff; \ + } while (0) +# else +# define READ_RGBA( rgba, _x, _y ) \ + do { \ + GLuint p = GET_VALUE(_x, _y); \ + rgba[0] = (p >> 16) & 0xff; \ + rgba[1] = (p >> 8) & 0xff; \ + rgba[2] = (p >> 0) & 0xff; \ + rgba[3] = 0xff; \ + } while (0) +# endif + #else #error SPANTMP_PIXEL_FMT must be set to a valid value! #endif diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c index c62281d341e..27c5aa1e085 100644 --- a/src/mesa/drivers/dri/i915/i830_texstate.c +++ b/src/mesa/drivers/dri/i915/i830_texstate.c @@ -56,10 +56,7 @@ translate_texture_format(GLuint mesa_format, GLuint internal_format) case MESA_FORMAT_ARGB4444: return MAPSURF_16BIT | MT_16BIT_ARGB4444; case MESA_FORMAT_ARGB8888: - if (internal_format == GL_RGB) - return MAPSURF_32BIT | MT_32BIT_XRGB8888; - else - return MAPSURF_32BIT | MT_32BIT_ARGB8888; + return MAPSURF_32BIT | MT_32BIT_ARGB8888; case MESA_FORMAT_XRGB8888: return MAPSURF_32BIT | MT_32BIT_XRGB8888; case MESA_FORMAT_YCBCR_REV: diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index c05c7759ac5..1e3c8301d8d 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -645,7 +645,7 @@ i830_state_draw_region(struct intel_context *intel, DSTORG_VERT_BIAS(0x8) | DEPTH_IS_Z); /* .5 */ if (irb != NULL) { - switch (irb->texformat) { + switch (irb->Base.Format) { case MESA_FORMAT_ARGB8888: case MESA_FORMAT_XRGB8888: value |= DV_PF_8888; @@ -661,7 +661,7 @@ i830_state_draw_region(struct intel_context *intel, break; default: _mesa_problem(ctx, "Bad renderbuffer format: %d\n", - irb->texformat); + irb->Base.Format); } } diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index 7d4c7cfbabb..0485be2cc1f 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -143,6 +143,9 @@ i915CreateContext(const __GLcontextModes * mesaVis, ctx->Const.MaxTextureImageUnits = I915_TEX_UNITS; ctx->Const.MaxTextureCoordUnits = I915_TEX_UNITS; ctx->Const.MaxVarying = I915_TEX_UNITS; + ctx->Const.MaxCombinedTextureImageUnits = + ctx->Const.MaxVertexTextureImageUnits + + ctx->Const.MaxTextureImageUnits; /* Advertise the full hardware capabilities. The new memory * manager should cope much better with overload situations: diff --git a/src/mesa/drivers/dri/i915/i915_debug.c b/src/mesa/drivers/dri/i915/i915_debug.c index f7bb7ea44c9..fecfac30339 100644 --- a/src/mesa/drivers/dri/i915/i915_debug.c +++ b/src/mesa/drivers/dri/i915/i915_debug.c @@ -806,6 +806,7 @@ static GLboolean i915_debug_packet( struct debug_stream *stream ) default: return debug(stream, "", 0); } + break; default: assert(0); return 0; diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index 1bacd51aec5..2b03331a004 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -57,10 +57,7 @@ translate_texture_format(gl_format mesa_format, GLuint internal_format, case MESA_FORMAT_ARGB4444: return MAPSURF_16BIT | MT_16BIT_ARGB4444; case MESA_FORMAT_ARGB8888: - if (internal_format == GL_RGB) - return MAPSURF_32BIT | MT_32BIT_XRGB8888; - else - return MAPSURF_32BIT | MT_32BIT_ARGB8888; + return MAPSURF_32BIT | MT_32BIT_ARGB8888; case MESA_FORMAT_XRGB8888: return MAPSURF_32BIT | MT_32BIT_XRGB8888; case MESA_FORMAT_YCBCR_REV: diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 3e7b5101cca..ba6be9796e1 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -587,7 +587,7 @@ i915_state_draw_region(struct intel_context *intel, DSTORG_VERT_BIAS(0x8) | /* .5 */ LOD_PRECLAMP_OGL | TEX_DEFAULT_COLOR_OGL); if (irb != NULL) { - switch (irb->texformat) { + switch (irb->Base.Format) { case MESA_FORMAT_ARGB8888: case MESA_FORMAT_XRGB8888: value |= DV_PF_8888; @@ -603,7 +603,7 @@ i915_state_draw_region(struct intel_context *intel, break; default: _mesa_problem(ctx, "Bad renderbuffer format: %d\n", - irb->texformat); + irb->Base.Format); } } diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index d4ccd28c9e8..ab301b9a3a0 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -34,6 +34,7 @@ #include "brw_state.h" #include "brw_defines.h" #include "brw_util.h" +#include "intel_fbo.h" #include "main/macros.h" #include "main/enums.h" @@ -89,6 +90,28 @@ struct brw_cc_unit_key { GLenum depth_func; }; +/** + * Modify blend function to force destination alpha to 1.0 + * + * If \c function specifies a blend function that uses destination alpha, + * replace it with a function that hard-wires destination alpha to 1.0. This + * is used when rendering to xRGB targets. + */ +static GLenum +fix_xRGB_alpha(GLenum function) +{ + switch (function) { + case GL_DST_ALPHA: + return GL_ONE; + + case GL_ONE_MINUS_DST_ALPHA: + case GL_SRC_ALPHA_SATURATE: + return GL_ZERO; + } + + return function; +} + static void cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) { @@ -132,6 +155,17 @@ cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) key->blend_dst_rgb = ctx->Color.BlendDstRGB; key->blend_src_a = ctx->Color.BlendSrcA; key->blend_dst_a = ctx->Color.BlendDstA; + + /* If the renderbuffer is XRGB, we have to frob the blend function to + * force the destination alpha to 1.0. This means replacing GL_DST_ALPHA + * with GL_ONE and GL_ONE_MINUS_DST_ALPAH with GL_ZERO. + */ + if (ctx->Visual.alphaBits == 0) { + key->blend_src_rgb = fix_xRGB_alpha(key->blend_src_rgb); + key->blend_src_a = fix_xRGB_alpha(key->blend_src_a); + key->blend_dst_rgb = fix_xRGB_alpha(key->blend_dst_rgb); + key->blend_dst_a = fix_xRGB_alpha(key->blend_dst_a); + } } key->alpha_enabled = ctx->Color.AlphaEnabled; diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 8bdda60697b..78bea829493 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -111,7 +111,9 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits, ctx->Const.MaxTextureImageUnits); ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */ - ctx->Const.MaxCombinedTextureImageUnits = 0; + ctx->Const.MaxCombinedTextureImageUnits = + ctx->Const.MaxVertexTextureImageUnits + + ctx->Const.MaxTextureImageUnits; /* Mesa limits textures to 4kx4k; it would be nice to fix that someday */ diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 47035cc6fc1..3f9b1fbfdc6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -94,20 +94,14 @@ static GLuint translate_tex_format( gl_format mesa_format, return BRW_SURFACEFORMAT_R8G8B8_UNORM; case MESA_FORMAT_ARGB8888: - if (internal_format == GL_RGB) - return BRW_SURFACEFORMAT_B8G8R8X8_UNORM; - else - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; case MESA_FORMAT_XRGB8888: return BRW_SURFACEFORMAT_B8G8R8X8_UNORM; case MESA_FORMAT_RGBA8888_REV: _mesa_problem(NULL, "unexpected format in i965:translate_tex_format()"); - if (internal_format == GL_RGB) - return BRW_SURFACEFORMAT_R8G8B8X8_UNORM; - else - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; case MESA_FORMAT_RGB565: return BRW_SURFACEFORMAT_B5G6R5_UNORM; @@ -537,12 +531,16 @@ brw_update_renderbuffer_surface(struct brw_context *brw, region_bo = region->buffer; key.surface_type = BRW_SURFACE_2D; - switch (irb->texformat) { + switch (irb->Base.Format) { + /* XRGB and ARGB are treated the same here because the chips in this + * family cannot render to XRGB targets. This means that we have to + * mask writes to alpha (ala glColorMask) and reconfigure the alpha + * blending hardware to use GL_ONE (or GL_ZERO) for cases where + * GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is used. + */ case MESA_FORMAT_ARGB8888: - key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - break; case MESA_FORMAT_XRGB8888: - key.surface_format = BRW_SURFACEFORMAT_B8G8R8X8_UNORM; + key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; break; case MESA_FORMAT_RGB565: key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; @@ -554,7 +552,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM; break; default: - _mesa_problem(ctx, "Bad renderbuffer format: %d\n", irb->texformat); + _mesa_problem(ctx, "Bad renderbuffer format: %d\n", irb->Base.Format); } key.tiling = region->tiling; if (brw->intel.intelScreen->driScrnPriv->dri2.enabled) { @@ -579,6 +577,13 @@ brw_update_renderbuffer_surface(struct brw_context *brw, /* _NEW_COLOR */ memcpy(key.color_mask, ctx->Color.ColorMask, sizeof(key.color_mask)); + + /* As mentioned above, disable writes to the alpha component when the + * renderbuffer is XRGB. + */ + if (ctx->Visual.alphaBits == 0) + key.color_mask[3] = GL_FALSE; + key.color_blend = (!ctx->Color._LogicOpEnabled && ctx->Color.BlendEnabled); diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index f14854602b6..cdf1408cd33 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -496,7 +496,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) CLAMPED_FLOAT_TO_UBYTE(clear[2], color[2]); CLAMPED_FLOAT_TO_UBYTE(clear[3], color[3]); - switch (irb->texformat) { + switch (irb->Base.Format) { case MESA_FORMAT_ARGB8888: case MESA_FORMAT_XRGB8888: clearVal = PACK_COLOR_8888(clear[3], clear[0], @@ -515,7 +515,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) break; default: _mesa_problem(ctx, "Unexpected renderbuffer format: %d\n", - irb->texformat); + irb->Base.Format); clearVal = 0; } } diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index f5fe543b5df..86dc42cc510 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -124,7 +124,6 @@ static const struct dri_extension card_extensions[] = { { "GL_MESA_pack_invert", NULL }, { "GL_MESA_ycbcr_texture", NULL }, { "GL_NV_blend_square", NULL }, - { "GL_NV_point_sprite", GL_NV_point_sprite_functions }, { "GL_NV_vertex_program", GL_NV_vertex_program_functions }, { "GL_NV_vertex_program1_1", NULL }, { "GL_SGIS_generate_mipmap", NULL }, diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 5615040946f..608f75b8240 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -37,6 +37,7 @@ #include "drivers/common/meta.h" #include "intel_context.h" +#include "intel_batchbuffer.h" #include "intel_buffers.h" #include "intel_fbo.h" #include "intel_mipmap_tree.h" @@ -105,8 +106,8 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, { struct intel_context *intel = intel_context(ctx); struct intel_renderbuffer *irb = intel_renderbuffer(rb); - GLboolean softwareBuffer = GL_FALSE; int cpp; + GLuint pitch; ASSERT(rb->Name != 0); @@ -116,18 +117,14 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, case GL_RGB5: rb->Format = MESA_FORMAT_RGB565; rb->DataType = GL_UNSIGNED_BYTE; - irb->texformat = MESA_FORMAT_RGB565; - cpp = 2; break; case GL_RGB: case GL_RGB8: case GL_RGB10: case GL_RGB12: case GL_RGB16: - rb->Format = MESA_FORMAT_ARGB8888; + rb->Format = MESA_FORMAT_XRGB8888; rb->DataType = GL_UNSIGNED_BYTE; - irb->texformat = MESA_FORMAT_ARGB8888; /* XXX: Need xrgb8888 */ - cpp = 4; break; case GL_RGBA: case GL_RGBA2: @@ -139,8 +136,6 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, case GL_RGBA16: rb->Format = MESA_FORMAT_ARGB8888; rb->DataType = GL_UNSIGNED_BYTE; - irb->texformat = MESA_FORMAT_ARGB8888; - cpp = 4; break; case GL_STENCIL_INDEX: case GL_STENCIL_INDEX1_EXT: @@ -150,29 +145,21 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, /* alloc a depth+stencil buffer */ rb->Format = MESA_FORMAT_S8_Z24; rb->DataType = GL_UNSIGNED_INT_24_8_EXT; - cpp = 4; - irb->texformat = MESA_FORMAT_S8_Z24; break; case GL_DEPTH_COMPONENT16: rb->Format = MESA_FORMAT_Z16; rb->DataType = GL_UNSIGNED_SHORT; - cpp = 2; - irb->texformat = MESA_FORMAT_Z16; break; case GL_DEPTH_COMPONENT: case GL_DEPTH_COMPONENT24: case GL_DEPTH_COMPONENT32: rb->Format = MESA_FORMAT_S8_Z24; rb->DataType = GL_UNSIGNED_INT_24_8_EXT; - cpp = 4; - irb->texformat = MESA_FORMAT_S8_Z24; break; case GL_DEPTH_STENCIL_EXT: case GL_DEPTH24_STENCIL8_EXT: rb->Format = MESA_FORMAT_S8_Z24; rb->DataType = GL_UNSIGNED_INT_24_8_EXT; - cpp = 4; - irb->texformat = MESA_FORMAT_S8_Z24; break; default: _mesa_problem(ctx, @@ -181,6 +168,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, } rb->_BaseFormat = _mesa_base_fbo_format(ctx, internalFormat); + cpp = _mesa_get_format_bytes(rb->Format); intelFlush(ctx); @@ -190,32 +178,25 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, } /* allocate new memory region/renderbuffer */ - if (softwareBuffer) { - return _mesa_soft_renderbuffer_storage(ctx, rb, internalFormat, - width, height); - } - else { - /* Choose a pitch to match hardware requirements: - */ - GLuint pitch = ((cpp * width + 63) & ~63) / cpp; - /* alloc hardware renderbuffer */ - DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width, - height, pitch); + /* Choose a pitch to match hardware requirements: + */ + pitch = ((cpp * width + 63) & ~63) / cpp; - irb->region = intel_region_alloc(intel, I915_TILING_NONE, - cpp, width, height, pitch, - GL_TRUE); - if (!irb->region) - return GL_FALSE; /* out of memory? */ + /* alloc hardware renderbuffer */ + DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width, height, pitch); - ASSERT(irb->region->buffer); + irb->region = intel_region_alloc(intel, I915_TILING_NONE, cpp, + width, height, pitch, GL_TRUE); + if (!irb->region) + return GL_FALSE; /* out of memory? */ - rb->Width = width; - rb->Height = height; + ASSERT(irb->region->buffer); - return GL_TRUE; - } + rb->Width = width; + rb->Height = height; + + return GL_TRUE; } @@ -297,7 +278,6 @@ intel_create_renderbuffer(gl_format format) GET_CURRENT_CONTEXT(ctx); struct intel_renderbuffer *irb; - const GLuint name = 0; irb = CALLOC_STRUCT(intel_renderbuffer); if (!irb) { @@ -305,7 +285,7 @@ intel_create_renderbuffer(gl_format format) return NULL; } - _mesa_init_renderbuffer(&irb->Base, name); + _mesa_init_renderbuffer(&irb->Base, 0); irb->Base.ClassID = INTEL_RB_CLASS; switch (format) { @@ -314,10 +294,6 @@ intel_create_renderbuffer(gl_format format) irb->Base.DataType = GL_UNSIGNED_BYTE; break; case MESA_FORMAT_XRGB8888: - /* XXX this is a hack since XRGB surfaces don't seem to work - * properly yet. Reading the alpha channel returns 0 instead of 1. - */ - format = MESA_FORMAT_ARGB8888; irb->Base._BaseFormat = GL_RGB; irb->Base.DataType = GL_UNSIGNED_BYTE; break; @@ -346,7 +322,6 @@ intel_create_renderbuffer(gl_format format) irb->Base.Format = format; irb->Base.InternalFormat = irb->Base._BaseFormat; - irb->texformat = format; /* intel-specific methods */ irb->Base.Delete = intel_delete_renderbuffer; @@ -423,7 +398,6 @@ static GLboolean intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, struct gl_texture_image *texImage) { - irb->texformat = texImage->TexFormat; gl_format texFormat; if (texImage->TexFormat == MESA_FORMAT_ARGB8888) { @@ -591,6 +565,7 @@ static void intel_finish_render_texture(GLcontext * ctx, struct gl_renderbuffer_attachment *att) { + struct intel_context *intel = intel_context(ctx); struct gl_texture_object *tex_obj = att->Texture; struct gl_texture_image *image = tex_obj->Image[att->CubeMapFace][att->TextureLevel]; @@ -598,8 +573,14 @@ intel_finish_render_texture(GLcontext * ctx, /* Flag that this image may now be validated into the object's miptree. */ intel_image->used_as_render_target = GL_FALSE; -} + /* Since we've (probably) rendered to the texture and will (likely) use + * it in the texture domain later on in this batchbuffer, flush the + * batch. Once again, we wish for a domain tracker in libdrm to cover + * usage inside of a batchbuffer like GEM does in the kernel. + */ + intel_batchbuffer_emit_mi_flush(intel->batch); +} /** * Do additional "completeness" testing of a framebuffer object. @@ -632,7 +613,7 @@ intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) continue; } - switch (irb->texformat) { + switch (irb->Base.Format) { case MESA_FORMAT_ARGB8888: case MESA_FORMAT_XRGB8888: case MESA_FORMAT_RGB565: diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index 50a8a959858..fa43077d6a7 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -62,8 +62,6 @@ struct intel_renderbuffer struct gl_renderbuffer Base; struct intel_region *region; - gl_format texformat; - GLuint vbl_pending; /**< vblank sequence number of pending flip */ uint8_t *span_cache; diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c index 9572b673269..668697cb5eb 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c @@ -335,6 +335,8 @@ out: unpack->BufferObj); } + intel_check_front_buffer_rendering(intel); + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c index f058b3c8e4d..622aaa22d67 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_copy.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c @@ -222,6 +222,8 @@ do_blit_copypixels(GLcontext * ctx, out: UNLOCK_HARDWARE(intel); + intel_check_front_buffer_rendering(intel); + DBG("%s: success\n", __FUNCTION__); return GL_TRUE; } diff --git a/src/mesa/drivers/dri/intel/intel_pixel_read.c b/src/mesa/drivers/dri/intel/intel_pixel_read.c index 47075001801..20424e2e589 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_read.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_read.c @@ -285,11 +285,11 @@ intelReadPixels(GLcontext * ctx, intelFlush(ctx); -#ifdef I915 if (do_blit_readpixels (ctx, x, y, width, height, format, type, pack, pixels)) return; +#ifdef I915 if (do_texture_readpixels (ctx, x, y, width, height, format, type, pack, pixels)) return; diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 2c89a66a95f..d1681e9088a 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -334,7 +334,7 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #include "intel_spantmp.h" /* x8r8g8b8 color span and pixel functions */ -#define INTEL_PIXEL_FMT GL_BGRA +#define INTEL_PIXEL_FMT GL_BGR #define INTEL_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV #define INTEL_READ_VALUE(offset) pread_xrgb8888(irb, offset) #define INTEL_WRITE_VALUE(offset, v) pwrite_xrgb8888(irb, offset, v) @@ -616,7 +616,7 @@ intel_set_span_functions(struct intel_context *intel, uint32_t tiling = irb->region->tiling; if (intel->intelScreen->kernel_exec_fencing) { - switch (irb->texformat) { + switch (irb->Base.Format) { case MESA_FORMAT_RGB565: intel_gttmap_InitPointers_RGB565(rb); break; @@ -630,13 +630,7 @@ intel_set_span_functions(struct intel_context *intel, intel_gttmap_InitPointers_xRGB8888(rb); break; case MESA_FORMAT_ARGB8888: - if (rb->_BaseFormat == GL_RGB) { - /* XXX remove this code someday when we enable XRGB surfaces */ - /* 8888 RGBx */ - intel_gttmap_InitPointers_xRGB8888(rb); - } else { - intel_gttmap_InitPointers_ARGB8888(rb); - } + intel_gttmap_InitPointers_ARGB8888(rb); break; case MESA_FORMAT_Z16: intel_gttmap_InitDepthPointers_z16(rb); @@ -659,7 +653,7 @@ intel_set_span_functions(struct intel_context *intel, default: _mesa_problem(NULL, "Unexpected MesaFormat %d in intelSetSpanFunctions", - irb->texformat); + irb->Base.Format); break; } return; @@ -668,7 +662,7 @@ intel_set_span_functions(struct intel_context *intel, /* If in GEM mode, we need to do the tile address swizzling ourselves, * instead of the fence registers handling it. */ - switch (irb->texformat) { + switch (irb->Base.Format) { case MESA_FORMAT_RGB565: switch (tiling) { case I915_TILING_NONE: @@ -726,35 +720,18 @@ intel_set_span_functions(struct intel_context *intel, } break; case MESA_FORMAT_ARGB8888: - if (rb->_BaseFormat == GL_RGB) { - /* XXX remove this code someday when we enable XRGB surfaces */ - /* 8888 RGBx */ - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitPointers_xRGB8888(rb); - break; - case I915_TILING_X: - intel_XTile_InitPointers_xRGB8888(rb); - break; - case I915_TILING_Y: - intel_YTile_InitPointers_xRGB8888(rb); - break; - } - } else { - /* 8888 RGBA */ - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitPointers_ARGB8888(rb); - break; - case I915_TILING_X: - intel_XTile_InitPointers_ARGB8888(rb); - break; - case I915_TILING_Y: - intel_YTile_InitPointers_ARGB8888(rb); - break; - } + /* 8888 RGBA */ + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitPointers_ARGB8888(rb); + break; + case I915_TILING_X: + intel_XTile_InitPointers_ARGB8888(rb); + break; + case I915_TILING_Y: + intel_YTile_InitPointers_ARGB8888(rb); + break; } break; case MESA_FORMAT_Z16: diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c b/src/mesa/drivers/dri/intel/intel_tex_format.c index bfa3dba1f5c..87efb72cc51 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_format.c +++ b/src/mesa/drivers/dri/intel/intel_tex_format.c @@ -50,8 +50,7 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) { return MESA_FORMAT_RGB565; } - /* XXX use MESA_FORMAT_XRGB8888 someday */ - return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_RGB565; + return do32bpt ? MESA_FORMAT_XRGB8888 : MESA_FORMAT_RGB565; case GL_RGBA8: case GL_RGB10_A2: @@ -70,8 +69,7 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, case GL_RGB10: case GL_RGB12: case GL_RGB16: - /* XXX use MESA_FORMAT_XRGB8888 someday */ - return MESA_FORMAT_ARGB8888; + return MESA_FORMAT_XRGB8888; case GL_RGB5: case GL_RGB4: diff --git a/src/mesa/drivers/dri/mach64/mach64_tex.c b/src/mesa/drivers/dri/mach64/mach64_tex.c index a757362b11d..72917ee13bf 100644 --- a/src/mesa/drivers/dri/mach64/mach64_tex.c +++ b/src/mesa/drivers/dri/mach64/mach64_tex.c @@ -565,7 +565,6 @@ void mach64InitTextureFuncs( struct dd_function_table *functions ) functions->IsTextureResident = driIsTextureResident; functions->UpdateTexturePalette = NULL; - functions->ActiveTexture = NULL; driInitTextureFormats(); } diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c index e3ae839235e..5f985d624d8 100644 --- a/src/mesa/drivers/dri/r200/r200_context.c +++ b/src/mesa/drivers/dri/r200/r200_context.c @@ -325,9 +325,9 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, _mesa_init_driver_functions(&functions); r200InitDriverFuncs(&functions); r200InitIoctlFuncs(&functions); - r200InitStateFuncs(&functions, screen->kernel_mm); + r200InitStateFuncs(&functions); r200InitTextureFuncs(&functions); - r200InitShaderFuncs(&functions); + r200InitShaderFuncs(&functions); radeonInitQueryObjFunctions(&functions); if (!radeonInitContext(&rmesa->radeon, &functions, diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index d28e96d9d98..6d99c039ded 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -2476,7 +2476,7 @@ static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask ) } /* Initialize the driver's state functions. */ -void r200InitStateFuncs( struct dd_function_table *functions, GLboolean dri2 ) +void r200InitStateFuncs( struct dd_function_table *functions ) { functions->UpdateState = r200InvalidateState; functions->LightingSpaceChange = r200LightingSpaceChange; @@ -2510,10 +2510,7 @@ void r200InitStateFuncs( struct dd_function_table *functions, GLboolean dri2 ) functions->LogicOpcode = r200LogicOpCode; functions->PolygonMode = r200PolygonMode; functions->PolygonOffset = r200PolygonOffset; - if (dri2) - functions->PolygonStipple = r200PolygonStipple; - else - functions->PolygonStipple = radeonPolygonStipplePreKMS; + functions->PolygonStipple = r200PolygonStipple; functions->PointParameterfv = r200PointParameter; functions->PointSize = r200PointSize; functions->RenderMode = r200RenderMode; diff --git a/src/mesa/drivers/dri/r200/r200_state.h b/src/mesa/drivers/dri/r200/r200_state.h index 9c62f0a6446..7b9b0c106aa 100644 --- a/src/mesa/drivers/dri/r200/r200_state.h +++ b/src/mesa/drivers/dri/r200/r200_state.h @@ -38,7 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_context.h" extern void r200InitState( r200ContextPtr rmesa ); -extern void r200InitStateFuncs( struct dd_function_table *functions, GLboolean dri2 ); +extern void r200InitStateFuncs( struct dd_function_table *functions ); extern void r200InitTnlFuncs( GLcontext *ctx ); extern void r200UpdateMaterial( GLcontext *ctx ); diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c index e53fd72290c..6c5a0b79eed 100644 --- a/src/mesa/drivers/dri/r200/r200_state_init.c +++ b/src/mesa/drivers/dri/r200/r200_state_init.c @@ -887,10 +887,8 @@ void r200InitState( r200ContextPtr rmesa ) } } } - /* polygon stipple is done with irq for non-kms */ - if (rmesa->radeon.radeonScreen->kernel_mm) { - ALLOC_STATE( stp, always, STP_STATE_SIZE, "STP/stp", 0 ); - } + + ALLOC_STATE( stp, always, STP_STATE_SIZE, "STP/stp", 0 ); for (i = 0; i < 6; i++) if (rmesa->radeon.radeonScreen->kernel_mm) @@ -1122,12 +1120,11 @@ void r200InitState( r200ContextPtr rmesa ) rmesa->hw.sci.cmd[SCI_CMD_1] = CP_PACKET0(R200_RE_TOP_LEFT, 0); rmesa->hw.sci.cmd[SCI_CMD_2] = CP_PACKET0(R200_RE_WIDTH_HEIGHT, 0); - if (rmesa->radeon.radeonScreen->kernel_mm) { - - rmesa->hw.stp.cmd[STP_CMD_0] = CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0); - rmesa->hw.stp.cmd[STP_DATA_0] = 0; - rmesa->hw.stp.cmd[STP_CMD_1] = CP_PACKET0_ONE(RADEON_RE_STIPPLE_DATA, 31); + rmesa->hw.stp.cmd[STP_CMD_0] = CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0); + rmesa->hw.stp.cmd[STP_DATA_0] = 0; + rmesa->hw.stp.cmd[STP_CMD_1] = CP_PACKET0_ONE(RADEON_RE_STIPPLE_DATA, 31); + if (rmesa->radeon.radeonScreen->kernel_mm) { rmesa->hw.mtl[0].emit = mtl_emit; rmesa->hw.mtl[1].emit = mtl_emit; diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c index 7782404a794..e2f9cf0ea86 100644 --- a/src/mesa/drivers/dri/r200/r200_texstate.c +++ b/src/mesa/drivers/dri/r200/r200_texstate.c @@ -797,24 +797,13 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo return; } - radeon_update_renderbuffers(pDRICtx, dPriv); - /* back & depth buffer are useless free them right away */ - rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer; - if (rb && rb->bo) { - radeon_bo_unref(rb->bo); - rb->bo = NULL; - } - rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer; - if (rb && rb->bo) { - radeon_bo_unref(rb->bo); - rb->bo = NULL; - } + radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE); rb = rfb->color_rb[0]; if (rb->bo == NULL) { /* Failed to BO for the buffer */ return; } - + _mesa_lock_texture(radeon->glCtx, texObj); if (t->bo) { radeon_bo_unref(t->bo); diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index cb0f715fa07..409d126ab2b 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -43,13 +43,14 @@ RADEON_COMMON_SOURCES = \ DRIVER_SOURCES = \ radeon_screen.c \ + r300_blit.c \ r300_context.c \ r300_draw.c \ - r300_ioctl.c \ r300_cmdbuf.c \ r300_state.c \ r300_render.c \ r300_tex.c \ + r300_texcopy.c \ r300_texstate.c \ r300_vertprog.c \ r300_fragprog_common.c \ diff --git a/src/mesa/drivers/dri/r300/r300_blit.c b/src/mesa/drivers/dri/r300/r300_blit.c new file mode 100644 index 00000000000..ca6dd3bcf8e --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_blit.c @@ -0,0 +1,520 @@ +/* + * Copyright (C) 2009 Maciej Cencora <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_common.h" +#include "r300_context.h" + +#include "r300_blit.h" +#include "r300_cmdbuf.h" +#include "r300_emit.h" +#include "r300_tex.h" +#include "compiler/radeon_compiler.h" +#include "compiler/radeon_opcodes.h" + +static void vp_ins_outs(struct r300_vertex_program_compiler *c) +{ + c->code->inputs[VERT_ATTRIB_POS] = 0; + c->code->inputs[VERT_ATTRIB_TEX0] = 1; + c->code->outputs[VERT_RESULT_HPOS] = 0; + c->code->outputs[VERT_RESULT_TEX0] = 1; +} + +static void fp_allocate_hw_inputs( + struct r300_fragment_program_compiler * c, + void (*allocate)(void * data, unsigned input, unsigned hwreg), + void * mydata) +{ + allocate(mydata, FRAG_ATTRIB_TEX0, 0); +} + +static void create_vertex_program(struct r300_context *r300) +{ + struct r300_vertex_program_compiler compiler; + struct rc_instruction *inst; + + rc_init(&compiler.Base); + + inst = rc_insert_new_instruction(&compiler.Base, compiler.Base.Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = VERT_RESULT_HPOS; + inst->U.I.DstReg.RelAddr = 0; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst->U.I.SrcReg[0].Abs = 0; + inst->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst->U.I.SrcReg[0].Index = VERT_ATTRIB_POS; + inst->U.I.SrcReg[0].Negate = 0; + inst->U.I.SrcReg[0].RelAddr = 0; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + inst = rc_insert_new_instruction(&compiler.Base, compiler.Base.Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = VERT_RESULT_TEX0; + inst->U.I.DstReg.RelAddr = 0; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst->U.I.SrcReg[0].Abs = 0; + inst->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst->U.I.SrcReg[0].Index = VERT_ATTRIB_TEX0; + inst->U.I.SrcReg[0].Negate = 0; + inst->U.I.SrcReg[0].RelAddr = 0; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + compiler.Base.Program.InputsRead = (1 << VERT_ATTRIB_POS) | (1 << VERT_ATTRIB_TEX0); + compiler.RequiredOutputs = compiler.Base.Program.OutputsWritten = (1 << VERT_RESULT_HPOS) | (1 << VERT_RESULT_TEX0); + compiler.SetHwInputOutput = vp_ins_outs; + compiler.code = &r300->blit.vp_code; + + r3xx_compile_vertex_program(&compiler); +} + +static void create_fragment_program(struct r300_context *r300) +{ + struct r300_fragment_program_compiler compiler; + struct rc_instruction *inst; + + rc_init(&compiler.Base); + + inst = rc_insert_new_instruction(&compiler.Base, compiler.Base.Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_TEX; + inst->U.I.TexSrcTarget = RC_TEXTURE_2D; + inst->U.I.TexSrcUnit = 0; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = FRAG_RESULT_COLOR; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst->U.I.SrcReg[0].Abs = 0; + inst->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst->U.I.SrcReg[0].Index = FRAG_ATTRIB_TEX0; + inst->U.I.SrcReg[0].Negate = 0; + inst->U.I.SrcReg[0].RelAddr = 0; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + compiler.Base.Program.InputsRead = (1 << FRAG_ATTRIB_TEX0); + compiler.OutputColor = FRAG_RESULT_COLOR; + compiler.OutputDepth = FRAG_RESULT_DEPTH; + compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515); + compiler.code = &r300->blit.fp_code; + compiler.AllocateHwInputs = fp_allocate_hw_inputs; + + r3xx_compile_fragment_program(&compiler); +} + +void r300_blit_init(struct r300_context *r300) +{ + create_vertex_program(r300); + create_fragment_program(r300); +} + +static void r300_emit_tx_setup(struct r300_context *r300, + gl_format mesa_format, + struct radeon_bo *bo, + intptr_t offset, + unsigned width, + unsigned height, + unsigned pitch) +{ + BATCH_LOCALS(&r300->radeon); + + assert(width <= 2048); + assert(height <= 2048); + assert(r300TranslateTexFormat(mesa_format) >= 0); + assert(offset % 32 == 0); + + BEGIN_BATCH(17); + OUT_BATCH_REGVAL(R300_TX_FILTER0_0, + (R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_S_SHIFT) | + (R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_T_SHIFT) | + (R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_R_SHIFT) | + R300_TX_MIN_FILTER_MIP_NONE | + R300_TX_MIN_FILTER_LINEAR | + R300_TX_MAG_FILTER_LINEAR | + (0 << 28)); + OUT_BATCH_REGVAL(R300_TX_FILTER1_0, 0); + OUT_BATCH_REGVAL(R300_TX_SIZE_0, + ((width-1) << R300_TX_WIDTHMASK_SHIFT) | + ((height-1) << R300_TX_HEIGHTMASK_SHIFT) | + (0 << R300_TX_DEPTHMASK_SHIFT) | + (0 << R300_TX_MAX_MIP_LEVEL_SHIFT) | + R300_TX_SIZE_TXPITCH_EN); + + OUT_BATCH_REGVAL(R300_TX_FORMAT_0, r300TranslateTexFormat(mesa_format)); + OUT_BATCH_REGVAL(R300_TX_FORMAT2_0, pitch/_mesa_get_format_bytes(mesa_format) - 1); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0, 1); + OUT_BATCH_RELOC(0, bo, offset, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + + OUT_BATCH_REGSEQ(R300_TX_INVALTAGS, 2); + OUT_BATCH(0); + OUT_BATCH(1); + + END_BATCH(); +} + +#define EASY_US_FORMAT(FMT, C0, C1, C2, C3, SIGN) \ + (FMT | R500_C0_SEL_##C0 | R500_C1_SEL_##C1 | \ + R500_C2_SEL_##C2 | R500_C3_SEL_##C3 | R500_OUT_SIGN(SIGN)) + +static uint32_t mesa_format_to_us_format(gl_format mesa_format) +{ + switch(mesa_format) + { + case MESA_FORMAT_S8_Z24: + case MESA_FORMAT_X8_Z24: + case MESA_FORMAT_RGBA8888: // x + return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, B, G, R, 0); + case MESA_FORMAT_RGB565: // x + case MESA_FORMAT_ARGB1555: // x + case MESA_FORMAT_RGBA8888_REV: // x + return EASY_US_FORMAT(R500_OUT_FMT_C4_8, R, G, B, A, 0); + case MESA_FORMAT_ARGB8888: // x + return EASY_US_FORMAT(R500_OUT_FMT_C4_8, B, G, R, A, 0); + case MESA_FORMAT_ARGB8888_REV: + return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, R, G, B, 0); + case MESA_FORMAT_XRGB8888: + return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, R, G, B, 0); + + case MESA_FORMAT_RGB332: + return EASY_US_FORMAT(R500_OUT_FMT_C_3_3_2, A, R, G, B, 0); + + case MESA_FORMAT_RGBA_FLOAT32: + return EASY_US_FORMAT(R500_OUT_FMT_C4_32_FP, R, G, B, A, 0); + case MESA_FORMAT_RGBA_FLOAT16: + return EASY_US_FORMAT(R500_OUT_FMT_C4_16_FP, R, G, B, A, 0); + case MESA_FORMAT_ALPHA_FLOAT32: + return EASY_US_FORMAT(R500_OUT_FMT_C_32_FP, A, A, A, A, 0); + case MESA_FORMAT_ALPHA_FLOAT16: + return EASY_US_FORMAT(R500_OUT_FMT_C_16_FP, A, A, A, A, 0); + + case MESA_FORMAT_SIGNED_RGBA8888: + return EASY_US_FORMAT(R500_OUT_FMT_C4_8, R, G, B, A, 0xf); + case MESA_FORMAT_SIGNED_RGBA8888_REV: + return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, B, G, R, 0xf); + case MESA_FORMAT_SIGNED_RGBA_16: + return EASY_US_FORMAT(R500_OUT_FMT_C4_16, R, G, B, A, 0xf); + + default: + assert(!"Invalid format for US output\n"); + return 0; + } +} +#undef EASY_US_FORMAT + +static void r500_emit_fp_setup(struct r300_context *r300, + struct r500_fragment_program_code *fp, + gl_format dst_format) +{ + r500_emit_fp(r300, (uint32_t *)fp->inst, (fp->inst_end + 1) * 6, 0, 0, 0); + BATCH_LOCALS(&r300->radeon); + + BEGIN_BATCH(10); + OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3); + OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(fp->inst_end)); + OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(fp->inst_end)); + OUT_BATCH(0); + OUT_BATCH_REGVAL(R500_US_CONFIG, 0); + OUT_BATCH_REGVAL(R500_US_OUT_FMT_0, mesa_format_to_us_format(dst_format)); + OUT_BATCH_REGVAL(R500_US_PIXSIZE, fp->max_temp_idx); + END_BATCH(); +} + +static void r500_emit_rs_setup(struct r300_context *r300) +{ + BATCH_LOCALS(&r300->radeon); + + BEGIN_BATCH(7); + OUT_BATCH_REGSEQ(R300_RS_COUNT, 2); + OUT_BATCH((4 << R300_IT_COUNT_SHIFT) | R300_HIRES_EN); + OUT_BATCH(0); + OUT_BATCH_REGVAL(R500_RS_INST_0, + (0 << R500_RS_INST_TEX_ID_SHIFT) | + (0 << R500_RS_INST_TEX_ADDR_SHIFT) | + R500_RS_INST_TEX_CN_WRITE | + R500_RS_INST_COL_CN_NO_WRITE); + OUT_BATCH_REGVAL(R500_RS_IP_0, + (0 << R500_RS_IP_TEX_PTR_S_SHIFT) | + (1 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (2 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (3 << R500_RS_IP_TEX_PTR_Q_SHIFT)); + END_BATCH(); +} + +static void r300_emit_fp_setup(struct r300_context *r300, + struct r300_fragment_program_code *code, + gl_format dst_format) +{ + unsigned i; + BATCH_LOCALS(&r300->radeon); + + BEGIN_BATCH((code->alu.length + 1) * 4 + code->tex.length + 1 + 11); + + OUT_BATCH_REGSEQ(R300_US_ALU_RGB_INST_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) { + OUT_BATCH(code->alu.inst[i].rgb_inst); + } + OUT_BATCH_REGSEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) { + OUT_BATCH(code->alu.inst[i].rgb_addr); + } + OUT_BATCH_REGSEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) { + OUT_BATCH(code->alu.inst[i].alpha_inst); + } + OUT_BATCH_REGSEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) { + OUT_BATCH(code->alu.inst[i].alpha_addr); + } + + OUT_BATCH_REGSEQ(R300_US_TEX_INST_0, code->tex.length); + OUT_BATCH_TABLE(code->tex.inst, code->tex.length); + + OUT_BATCH_REGSEQ(R300_US_CONFIG, 3); + OUT_BATCH(R300_PFS_CNTL_FIRST_NODE_HAS_TEX); + OUT_BATCH(code->pixsize); + OUT_BATCH(code->code_offset); + OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4); + OUT_BATCH_TABLE(code->code_addr, 4); + OUT_BATCH_REGVAL(R500_US_OUT_FMT_0, mesa_format_to_us_format(dst_format)); + END_BATCH(); +} + +static void r300_emit_rs_setup(struct r300_context *r300) +{ + BATCH_LOCALS(&r300->radeon); + + BEGIN_BATCH(7); + OUT_BATCH_REGSEQ(R300_RS_COUNT, 2); + OUT_BATCH((4 << R300_IT_COUNT_SHIFT) | R300_HIRES_EN); + OUT_BATCH(0); + OUT_BATCH_REGVAL(R300_RS_INST_0, + R300_RS_INST_TEX_ID(0) | + R300_RS_INST_TEX_ADDR(0) | + R300_RS_INST_TEX_CN_WRITE); + OUT_BATCH_REGVAL(R300_RS_IP_0, + R300_RS_TEX_PTR(0) | + R300_RS_SEL_S(R300_RS_SEL_C0) | + R300_RS_SEL_T(R300_RS_SEL_C1) | + R300_RS_SEL_R(R300_RS_SEL_K0) | + R300_RS_SEL_Q(R300_RS_SEL_K1)); + END_BATCH(); +} + +static void emit_pvs_setup(struct r300_context *r300, + uint32_t *vp_code, + unsigned vp_len) +{ + BATCH_LOCALS(&r300->radeon); + + r300_emit_vpu(r300, vp_code, vp_len * 4, R300_PVS_CODE_START); + + BEGIN_BATCH(4); + OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3); + OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) | + ((vp_len - 1) << R300_PVS_XYZW_VALID_INST_SHIFT) | + ((vp_len - 1)<< R300_PVS_LAST_INST_SHIFT)); + OUT_BATCH(0); + OUT_BATCH((vp_len - 1) << R300_PVS_LAST_VTX_SRC_INST_SHIFT); + END_BATCH(); +} + +static void emit_vap_setup(struct r300_context *r300, unsigned width, unsigned height) +{ + BATCH_LOCALS(&r300->radeon); + + BEGIN_BATCH(12); + OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2); + OUT_BATCH(R300_VTX_XY_FMT | R300_VTX_Z_FMT); + OUT_BATCH(4); + + OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa); + OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_0, + ((R300_DATA_TYPE_FLOAT_2 | (0 << R300_DST_VEC_LOC_SHIFT)) << 0) | + (((1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_2 | R300_LAST_VEC) << 16)); + OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0, + ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | + (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) | + (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | + (0xf << R300_WRITE_ENA_SHIFT) ) << 0) | + (((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | + (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) | + (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | + (0xf << R300_WRITE_ENA_SHIFT) ) << 16) ) ); + OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); + OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT); + OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS); + END_BATCH(); +} + +static GLboolean validate_buffers(struct r300_context *r300, + struct radeon_bo *src_bo, + struct radeon_bo *dst_bo) +{ + int ret; + radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, + src_bo, RADEON_GEM_DOMAIN_VRAM, 0); + + radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, + dst_bo, 0, RADEON_GEM_DOMAIN_VRAM); + + ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, + first_elem(&r300->radeon.dma.reserved)->bo, + RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + return GL_FALSE; + + return GL_TRUE; +} + +static void emit_draw_packet(struct r300_context *r300, float width, float height) +{ + float verts[] = { 0.0, 0.0, 0.0, 1.0, + 0.0, height, 0.0, 0.0, + width, height, 1.0, 0.0, + width, 0.0, 1.0, 1.0 }; + + BATCH_LOCALS(&r300->radeon); + + BEGIN_BATCH(19); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_IMMD_2, 16); + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | + (4 << 16) | R300_VAP_VF_CNTL__PRIM_QUADS); + OUT_BATCH_TABLE(verts, 16); + END_BATCH(); +} + +static void other_stuff(struct r300_context *r300) +{ + BATCH_LOCALS(&r300->radeon); + + BEGIN_BATCH(15); + OUT_BATCH_REGVAL(R300_GA_POLY_MODE, + R300_GA_POLY_MODE_FRONT_PTYPE_TRI | R300_GA_POLY_MODE_BACK_PTYPE_TRI); + OUT_BATCH_REGVAL(R300_SU_CULL_MODE, R300_FRONT_FACE_CCW); + OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0); + OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0); + OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2); + OUT_BATCH(0x0); + OUT_BATCH(0x0); + OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); + OUT_BATCH_REGVAL(R300_ZB_CNTL, 0); + END_BATCH(); +} + +static void emit_cb_setup(struct r300_context *r300, + struct radeon_bo *bo, + intptr_t offset, + gl_format mesa_format, + unsigned pitch, + unsigned width, + unsigned height) +{ + BATCH_LOCALS(&r300->radeon); + + unsigned x1, y1, x2, y2; + x1 = 0; + y1 = 0; + x2 = width - 1; + y2 = height - 1; + + if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { + x1 += R300_SCISSORS_OFFSET; + y1 += R300_SCISSORS_OFFSET; + x2 += R300_SCISSORS_OFFSET; + y2 += R300_SCISSORS_OFFSET; + } + + r300_emit_cb_setup(r300, bo, offset, mesa_format, + _mesa_get_format_bytes(mesa_format), + _mesa_format_row_stride(mesa_format, pitch)); + + BEGIN_BATCH_NO_AUTOSTATE(5); + OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); + OUT_BATCH((x1 << R300_SCISSORS_X_SHIFT)|(y1 << R300_SCISSORS_Y_SHIFT)); + OUT_BATCH((x2 << R300_SCISSORS_X_SHIFT)|(y2 << R300_SCISSORS_Y_SHIFT)); + OUT_BATCH_REGVAL(R300_RB3D_CCTL, 0); + END_BATCH(); +} + +GLboolean r300_blit(struct r300_context *r300, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height) +{ + if (src_bo == dst_bo) { + return GL_FALSE; + } + + if (0) { + fprintf(stderr, "src: width %d, height %d, pitch %d vs %d, format %s\n", + src_width, src_height, src_pitch, + _mesa_format_row_stride(src_mesaformat, src_width), + _mesa_get_format_name(src_mesaformat)); + fprintf(stderr, "dst: width %d, height %d, pitch %d, format %s\n", + dst_width, dst_height, + _mesa_format_row_stride(dst_mesaformat, dst_width), + _mesa_get_format_name(dst_mesaformat)); + } + + if (!validate_buffers(r300, src_bo, dst_bo)) + return GL_FALSE; + + rcommonEnsureCmdBufSpace(&r300->radeon, 200, __FUNCTION__); + + other_stuff(r300); + + r300_emit_tx_setup(r300, src_mesaformat, src_bo, src_offset, src_width, src_height, src_pitch); + + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + r500_emit_fp_setup(r300, &r300->blit.fp_code.code.r500, dst_mesaformat); + r500_emit_rs_setup(r300); + } else { + r300_emit_fp_setup(r300, &r300->blit.fp_code.code.r300, dst_mesaformat); + r300_emit_rs_setup(r300); + } + + emit_pvs_setup(r300, r300->blit.vp_code.body.d, 2); + emit_vap_setup(r300, dst_width, dst_height); + + emit_cb_setup(r300, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height); + + emit_draw_packet(r300, dst_width, dst_height); + + r300EmitCacheFlush(r300); + + radeonFlush(r300->radeon.glCtx); + + return GL_TRUE; +}
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/r300_blit.h b/src/mesa/drivers/dri/r300/r300_blit.h new file mode 100644 index 00000000000..28ffd4ea421 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_blit.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2009 Maciej Cencora <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef R300_BLIT_H +#define R300_BLIT_H + +void r300_blit_init(struct r300_context *r300); + +GLboolean r300_blit(struct r300_context *r300, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height); + +#endif // R300_BLIT_H
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index ad8db6e68e0..e1c33bbb2cf 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -45,7 +45,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_drm.h" #include "r300_context.h" -#include "r300_ioctl.h" #include "r300_reg.h" #include "r300_cmdbuf.h" #include "r300_emit.h" @@ -72,7 +71,7 @@ static unsigned packet0_count(r300ContextPtr r300, uint32_t *pkt) #define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) #define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count) -int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom) +static int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom) { r300ContextPtr r300 = R300_CONTEXT(ctx); int cnt; @@ -86,54 +85,73 @@ int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom) return cnt ? (cnt * 4) + extra : 0; } +void r300_emit_vpu(struct r300_context *r300, + uint32_t *data, + unsigned len, + uint32_t addr) +{ + BATCH_LOCALS(&r300->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(5 + len); + OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); + OUT_BATCH_REGVAL(R300_VAP_PVS_VECTOR_INDX_REG, addr); + OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, len-1) | RADEON_ONE_REG_WR); + OUT_BATCH_TABLE(data, len); + END_BATCH(); +} -void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom) +static void emit_vpu_state(GLcontext *ctx, struct radeon_state_atom * atom) { - r300ContextPtr r300 = R300_CONTEXT(ctx); - BATCH_LOCALS(&r300->radeon); - drm_r300_cmd_header_t cmd; - uint32_t addr, ndw; + r300ContextPtr r300 = R300_CONTEXT(ctx); + drm_r300_cmd_header_t cmd; + uint32_t addr, ndw; - cmd.u = atom->cmd[0]; - addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo; - ndw = atom->check(ctx, atom); + cmd.u = atom->cmd[0]; + addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo; + ndw = atom->check(ctx, atom); - BEGIN_BATCH_NO_AUTOSTATE(ndw); + r300_emit_vpu(r300, &atom->cmd[1], vpu_count(atom->cmd) * 4, addr); +} - ndw -= 5; - OUT_BATCH_REGVAL(R300_VAP_PVS_VECTOR_INDX_REG, addr); - OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, ndw-1) | RADEON_ONE_REG_WR); - OUT_BATCH_TABLE(&atom->cmd[1], ndw); - OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); - END_BATCH(); +void r500_emit_fp(struct r300_context *r300, + uint32_t *data, + unsigned len, + uint32_t addr, + unsigned type, + unsigned clamp) +{ + BATCH_LOCALS(&r300->radeon); + + addr |= (type << 16); + addr |= (clamp << 17); + + BEGIN_BATCH_NO_AUTOSTATE(len + 3); + OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0)); + OUT_BATCH(addr); + OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, len-1) | RADEON_ONE_REG_WR); + OUT_BATCH_TABLE(data, len); + END_BATCH(); } -void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom) +static void emit_r500fp_atom(GLcontext *ctx, struct radeon_state_atom * atom) { - r300ContextPtr r300 = R300_CONTEXT(ctx); - BATCH_LOCALS(&r300->radeon); - drm_r300_cmd_header_t cmd; - uint32_t addr, ndw, sz; - int type, clamp; - - ndw = atom->check(ctx, atom); - - cmd.u = atom->cmd[0]; - sz = cmd.r500fp.count; - addr = ((cmd.r500fp.adrhi_flags & 1) << 8) | cmd.r500fp.adrlo; - type = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE); - clamp = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP); - - addr |= (type << 16); - addr |= (clamp << 17); - - BEGIN_BATCH_NO_AUTOSTATE(ndw); - OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0)); - OUT_BATCH(addr); - ndw-=3; - OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, ndw-1) | RADEON_ONE_REG_WR); - OUT_BATCH_TABLE(&atom->cmd[1], ndw); - END_BATCH(); + r300ContextPtr r300 = R300_CONTEXT(ctx); + drm_r300_cmd_header_t cmd; + uint32_t addr, count; + int type, clamp; + + cmd.u = atom->cmd[0]; + addr = ((cmd.r500fp.adrhi_flags & 1) << 8) | cmd.r500fp.adrlo; + type = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE); + clamp = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP); + + if (type) { + count = r500fp_count(atom->cmd) * 4; + } else { + count = r500fp_count(atom->cmd) * 6; + } + + r500_emit_fp(r300, &atom->cmd[1], count, addr, type, clamp); } static int check_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom) @@ -256,110 +274,136 @@ static int check_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) return dw; } -static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) +static void emit_scissor(struct r300_context *r300, + unsigned width, + unsigned height) { - r300ContextPtr r300 = R300_CONTEXT(ctx); - BATCH_LOCALS(&r300->radeon); - struct radeon_renderbuffer *rrb; - uint32_t cbpitch; - uint32_t offset = r300->radeon.state.color.draw_offset; - uint32_t dw = 6; - int i; + int i; + BATCH_LOCALS(&r300->radeon); + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + BEGIN_BATCH_NO_AUTOSTATE(3); + OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); + OUT_BATCH(0); + OUT_BATCH(((width - 1) << R300_SCISSORS_X_SHIFT) | + ((height - 1) << R300_SCISSORS_Y_SHIFT)); + END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(16); + for (i = 0; i < 4; i++) { + OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2); + OUT_BATCH((0 << R300_CLIPRECT_X_SHIFT) | (0 << R300_CLIPRECT_Y_SHIFT)); + OUT_BATCH(((width - 1) << R300_CLIPRECT_X_SHIFT) | ((height - 1) << R300_CLIPRECT_Y_SHIFT)); + } + OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1); + OUT_BATCH(0xAAAA); + OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1); + OUT_BATCH(0xffffff); + END_BATCH(); + } else { + BEGIN_BATCH_NO_AUTOSTATE(3); + OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); + OUT_BATCH((R300_SCISSORS_OFFSET << R300_SCISSORS_X_SHIFT) | + (R300_SCISSORS_OFFSET << R300_SCISSORS_Y_SHIFT)); + OUT_BATCH(((width + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_X_SHIFT) | + ((height + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_Y_SHIFT)); + END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(16); + for (i = 0; i < 4; i++) { + OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2); + OUT_BATCH((R300_SCISSORS_OFFSET << R300_CLIPRECT_X_SHIFT) | (R300_SCISSORS_OFFSET << R300_CLIPRECT_Y_SHIFT)); + OUT_BATCH(((R300_SCISSORS_OFFSET + width - 1) << R300_CLIPRECT_X_SHIFT) | + ((R300_SCISSORS_OFFSET + height - 1) << R300_CLIPRECT_Y_SHIFT)); + } + OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1); + OUT_BATCH(0xAAAA); + OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1); + OUT_BATCH(0xffffff); + END_BATCH(); + } +} - rrb = radeon_get_colorbuffer(&r300->radeon); - if (!rrb || !rrb->bo) { - fprintf(stderr, "no rrb\n"); - return; - } +void r300_emit_cb_setup(struct r300_context *r300, + struct radeon_bo *bo, + uint32_t offset, + GLuint format, + unsigned cpp, + unsigned pitch) +{ + BATCH_LOCALS(&r300->radeon); + uint32_t cbpitch = pitch / cpp; + uint32_t dw = 6; - if (RADEON_DEBUG & RADEON_STATE) - fprintf(stderr,"rrb is %p %d %dx%d\n", rrb, offset, rrb->base.Width, rrb->base.Height); - cbpitch = (rrb->pitch / rrb->cpp); - if (rrb->cpp == 4) - cbpitch |= R300_COLOR_FORMAT_ARGB8888; - else switch (rrb->base.Format) { + assert(offset % 32 == 0); + + switch (format) { case MESA_FORMAT_RGB565: - assert(_mesa_little_endian()); - cbpitch |= R300_COLOR_FORMAT_RGB565; - break; + assert(_mesa_little_endian()); + cbpitch |= R300_COLOR_FORMAT_RGB565; + break; case MESA_FORMAT_RGB565_REV: - assert(!_mesa_little_endian()); - cbpitch |= R300_COLOR_FORMAT_RGB565; - break; + assert(!_mesa_little_endian()); + cbpitch |= R300_COLOR_FORMAT_RGB565; + break; case MESA_FORMAT_ARGB4444: - assert(_mesa_little_endian()); - cbpitch |= R300_COLOR_FORMAT_ARGB4444; - break; + assert(_mesa_little_endian()); + cbpitch |= R300_COLOR_FORMAT_ARGB4444; + break; case MESA_FORMAT_ARGB4444_REV: - assert(!_mesa_little_endian()); - cbpitch |= R300_COLOR_FORMAT_ARGB4444; - break; - case MESA_FORMAT_ARGB1555: - assert(_mesa_little_endian()); - cbpitch |= R300_COLOR_FORMAT_ARGB1555; - break; - case MESA_FORMAT_ARGB1555_REV: - assert(!_mesa_little_endian()); - cbpitch |= R300_COLOR_FORMAT_ARGB1555; - break; - default: - _mesa_problem(ctx, "unexpected format in emit_cb_offset()"); - } + assert(!_mesa_little_endian()); + cbpitch |= R300_COLOR_FORMAT_ARGB4444; + break; + case MESA_FORMAT_ARGB1555: + assert(_mesa_little_endian()); + cbpitch |= R300_COLOR_FORMAT_ARGB1555; + break; + case MESA_FORMAT_ARGB1555_REV: + assert(!_mesa_little_endian()); + cbpitch |= R300_COLOR_FORMAT_ARGB1555; + break; + default: + if (cpp == 4) { + cbpitch |= R300_COLOR_FORMAT_ARGB8888; + } else { + _mesa_problem(r300->radeon.glCtx, "unexpected format in emit_cb_offset()");; + } + break; + } - if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) - cbpitch |= R300_COLOR_TILE_ENABLE; + if (bo->flags & RADEON_BO_FLAGS_MACRO_TILE) + cbpitch |= R300_COLOR_TILE_ENABLE; + + if (r300->radeon.radeonScreen->kernel_mm) + dw += 2; + + BEGIN_BATCH_NO_AUTOSTATE(dw); + OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1); + OUT_BATCH_RELOC(offset, bo, offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1); + if (!r300->radeon.radeonScreen->kernel_mm) + OUT_BATCH(cbpitch); + else + OUT_BATCH_RELOC(cbpitch, bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); +} + +static void emit_cb_offset_atom(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct radeon_renderbuffer *rrb; + uint32_t offset = r300->radeon.state.color.draw_offset; + + rrb = radeon_get_colorbuffer(&r300->radeon); + if (!rrb || !rrb->bo) { + fprintf(stderr, "no rrb\n"); + return; + } + + if (RADEON_DEBUG & RADEON_STATE) + fprintf(stderr,"rrb is %p %d %dx%d\n", rrb, offset, rrb->base.Width, rrb->base.Height); + + r300_emit_cb_setup(r300, rrb->bo, offset, rrb->base.Format, rrb->cpp, rrb->pitch); - if (r300->radeon.radeonScreen->kernel_mm) - dw += 2; - BEGIN_BATCH_NO_AUTOSTATE(dw); - OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1); - OUT_BATCH_RELOC(offset, rrb->bo, offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); - OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1); - if (!r300->radeon.radeonScreen->kernel_mm) - OUT_BATCH(cbpitch); - else - OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); - END_BATCH(); if (r300->radeon.radeonScreen->driScreen->dri2.enabled) { - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - BEGIN_BATCH_NO_AUTOSTATE(3); - OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); - OUT_BATCH(0); - OUT_BATCH(((rrb->base.Width - 1) << R300_SCISSORS_X_SHIFT) | - ((rrb->base.Height - 1) << R300_SCISSORS_Y_SHIFT)); - END_BATCH(); - BEGIN_BATCH_NO_AUTOSTATE(16); - for (i = 0; i < 4; i++) { - OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2); - OUT_BATCH((0 << R300_CLIPRECT_X_SHIFT) | (0 << R300_CLIPRECT_Y_SHIFT)); - OUT_BATCH(((rrb->base.Width - 1) << R300_CLIPRECT_X_SHIFT) | ((rrb->base.Height - 1) << R300_CLIPRECT_Y_SHIFT)); - } - OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1); - OUT_BATCH(0xAAAA); - OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1); - OUT_BATCH(0xffffff); - END_BATCH(); - } else { - BEGIN_BATCH_NO_AUTOSTATE(3); - OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); - OUT_BATCH((R300_SCISSORS_OFFSET << R300_SCISSORS_X_SHIFT) | - (R300_SCISSORS_OFFSET << R300_SCISSORS_Y_SHIFT)); - OUT_BATCH(((rrb->base.Width + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_X_SHIFT) | - ((rrb->base.Height + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_Y_SHIFT)); - END_BATCH(); - BEGIN_BATCH_NO_AUTOSTATE(16); - for (i = 0; i < 4; i++) { - OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2); - OUT_BATCH((R300_SCISSORS_OFFSET << R300_CLIPRECT_X_SHIFT) | (R300_SCISSORS_OFFSET << R300_CLIPRECT_Y_SHIFT)); - OUT_BATCH(((R300_SCISSORS_OFFSET + rrb->base.Width - 1) << R300_CLIPRECT_X_SHIFT) | - ((R300_SCISSORS_OFFSET + rrb->base.Height - 1) << R300_CLIPRECT_Y_SHIFT)); - } - OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1); - OUT_BATCH(0xAAAA); - OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1); - OUT_BATCH(0xffffff); - END_BATCH(); - } + emit_scissor(r300, rrb->base.Width, rrb->base.Height); } } @@ -455,7 +499,7 @@ static int check_variable(GLcontext *ctx, struct radeon_state_atom *atom) return cnt ? cnt + 1 : 0; } -int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom) +static int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom) { int cnt; r300ContextPtr r300 = R300_CONTEXT(ctx); @@ -467,7 +511,7 @@ int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom) return cnt ? (cnt * 6) + extra : 0; } -int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom) +static int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom) { int cnt; r300ContextPtr r300 = R300_CONTEXT(ctx); @@ -644,13 +688,13 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(r300->radeon.radeonScreen, 0, 0, 0, 0); if (r300->radeon.radeonScreen->kernel_mm) - r300->hw.r500fp.emit = emit_r500fp; + r300->hw.r500fp.emit = emit_r500fp_atom; ALLOC_STATE(r500fp_const, r500fp_const, R500_FPP_CMDSIZE, 0); r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(r300->radeon.radeonScreen, 0, 0, 1, 0); if (r300->radeon.radeonScreen->kernel_mm) - r300->hw.r500fp_const.emit = emit_r500fp; + r300->hw.r500fp_const.emit = emit_r500fp_atom; } else { ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CONFIG, 3); @@ -694,7 +738,7 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(rop, always, 2, 0); r300->hw.rop.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_ROPCNTL, 1); ALLOC_STATE(cb, cb_offset, R300_CB_CMDSIZE, 0); - r300->hw.cb.emit = &emit_cb_offset; + r300->hw.cb.emit = &emit_cb_offset_atom; ALLOC_STATE(rb3d_dither_ctl, always, 10, 0); r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DITHER_CTL, 9); ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0); @@ -758,20 +802,20 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.vpi.cmd[0] = cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0); if (r300->radeon.radeonScreen->kernel_mm) - r300->hw.vpi.emit = emit_vpu; + r300->hw.vpi.emit = emit_vpu_state; if (is_r500) { ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); r300->hw.vpp.cmd[0] = cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0); if (r300->radeon.radeonScreen->kernel_mm) - r300->hw.vpp.emit = emit_vpu; + r300->hw.vpp.emit = emit_vpu_state; ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); r300->hw.vps.cmd[0] = cmdvpu(r300->radeon.radeonScreen, R500_POINT_VPORT_SCALE_OFFSET, 1); if (r300->radeon.radeonScreen->kernel_mm) - r300->hw.vps.emit = emit_vpu; + r300->hw.vps.emit = emit_vpu_state; for (i = 0; i < 6; i++) { ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); @@ -779,20 +823,20 @@ void r300InitCmdBuf(r300ContextPtr r300) cmdvpu(r300->radeon.radeonScreen, R500_PVS_UCP_START + i, 1); if (r300->radeon.radeonScreen->kernel_mm) - r300->hw.vpucp[i].emit = emit_vpu; + r300->hw.vpucp[i].emit = emit_vpu_state; } } else { ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); r300->hw.vpp.cmd[0] = cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0); if (r300->radeon.radeonScreen->kernel_mm) - r300->hw.vpp.emit = emit_vpu; + r300->hw.vpp.emit = emit_vpu_state; ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); r300->hw.vps.cmd[0] = cmdvpu(r300->radeon.radeonScreen, R300_POINT_VPORT_SCALE_OFFSET, 1); if (r300->radeon.radeonScreen->kernel_mm) - r300->hw.vps.emit = emit_vpu; + r300->hw.vps.emit = emit_vpu_state; for (i = 0; i < 6; i++) { ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); @@ -800,7 +844,7 @@ void r300InitCmdBuf(r300ContextPtr r300) cmdvpu(r300->radeon.radeonScreen, R300_PVS_UCP_START + i, 1); if (r300->radeon.radeonScreen->kernel_mm) - r300->hw.vpucp[i].emit = emit_vpu; + r300->hw.vpucp[i].emit = emit_vpu_state; } } } diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h index 1b703e518a0..0e68da928ed 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.h +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h @@ -44,14 +44,26 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define FIREAOS_BUFSZ (3) #define SCISSORS_BUFSZ (3) -extern void r300InitCmdBuf(r300ContextPtr r300); +void r300InitCmdBuf(r300ContextPtr r300); void r300_emit_scissor(GLcontext *ctx); -void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom); -int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom); +void r300_emit_vpu(struct r300_context *ctx, + uint32_t *data, + unsigned len, + uint32_t addr); -void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom); -int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom); -int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom); +void r500_emit_fp(struct r300_context *r300, + uint32_t *data, + unsigned len, + uint32_t addr, + unsigned type, + unsigned clamp); -#endif /* __R300_CMDBUF_H__ */ +void r300_emit_cb_setup(struct r300_context *r300, + struct radeon_bo *bo, + uint32_t offset, + GLuint format, + unsigned cpp, + unsigned pitch); + +#endif /* __R300_CMDBUF_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 5f07b956349..3c6ec2a34a8 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -55,13 +55,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/t_vp_build.h" #include "drivers/common/driverfuncs.h" +#include "drivers/common/meta.h" #include "r300_context.h" #include "radeon_context.h" #include "radeon_span.h" +#include "r300_blit.h" #include "r300_cmdbuf.h" #include "r300_state.h" -#include "r300_ioctl.h" #include "r300_tex.h" #include "r300_emit.h" #include "r300_swtcl.h" @@ -92,6 +93,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/remap_helper.h" +void r300_init_texcopy_functions(struct dd_function_table *table); static const struct dri_extension card_extensions[] = { /* *INDENT-OFF* */ @@ -451,6 +453,13 @@ static void r300InitGLExtensions(GLcontext *ctx) } } +static void r300InitIoctlFuncs(struct dd_function_table *functions) +{ + functions->Clear = _mesa_meta_Clear; + functions->Finish = radeonFinish; + functions->Flush = radeonFlush; +} + /* Create the device specific rendering context. */ GLboolean r300CreateContext(const __GLcontextModes * glVisual, @@ -484,6 +493,10 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, radeonInitQueryObjFunctions(&functions); radeonInitBufferObjectFuncs(&functions); + if (r300->radeon.radeonScreen->kernel_mm) { + r300_init_texcopy_functions(&functions); + } + if (!radeonInitContext(&r300->radeon, &functions, glVisual, driContextPriv, sharedContextPrivate)) { @@ -530,6 +543,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, r300InitSwtcl(ctx); } + r300_blit_init(r300); radeon_fbo_init(&r300->radeon); radeonInitSpanFuncs( ctx ); r300InitCmdBuf(r300); diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 518d5cdbf4f..54a92a2e447 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -533,6 +533,11 @@ struct r300_context { uint32_t fallback; + struct { + struct r300_vertex_program_code vp_code; + struct rX00_fragment_program_code fp_code; + } blit; + DECLARE_RENDERINPUTS(render_inputs_bitset); }; @@ -549,6 +554,8 @@ extern void r300InitShaderFunctions(r300ContextPtr r300); extern void r300InitDraw(GLcontext *ctx); +extern void r300_init_texcopy_functions(struct dd_function_table *table); + #define r300PackFloat32 radeonPackFloat32 #define r300PackFloat24 radeonPackFloat24 diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c index 07e62230874..3759ca2bea7 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.c +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -49,7 +49,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_context.h" #include "r300_state.h" #include "r300_emit.h" -#include "r300_ioctl.h" #include "r300_render.h" #include "r300_swtcl.h" diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c deleted file mode 100644 index 5cb04e2bb6d..00000000000 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ /dev/null @@ -1,782 +0,0 @@ -/* -Copyright (C) The Weather Channel, Inc. 2002. -Copyright (C) 2004 Nicolai Haehnle. -All Rights Reserved. - -The Weather Channel (TM) funded Tungsten Graphics to develop the -initial release of the Radeon 8500 driver under the XFree86 license. -This notice must be preserved. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/** - * \file - * - * \author Keith Whitwell <[email protected]> - * - * \author Nicolai Haehnle <[email protected]> - */ - -#include <sched.h> -#include <errno.h> - -#include "main/glheader.h" -#include "main/imports.h" -#include "main/macros.h" -#include "main/context.h" -#include "main/simple_list.h" -#include "swrast/swrast.h" - -#include "radeon_common.h" -#include "radeon_lock.h" -#include "r300_context.h" -#include "r300_ioctl.h" -#include "r300_cmdbuf.h" -#include "r300_state.h" -#include "r300_vertprog.h" -#include "radeon_reg.h" -#include "r300_emit.h" -#include "r300_context.h" - -#include "vblank.h" - -#define R200_3D_DRAW_IMMD_2 0xC0003500 - -#define CLEARBUFFER_COLOR 0x1 -#define CLEARBUFFER_DEPTH 0x2 -#define CLEARBUFFER_STENCIL 0x4 - -#if 1 - -/** - * Fragment program helper macros - */ - -/* Produce unshifted source selectors */ -#define FP_TMP(idx) (idx) -#define FP_CONST(idx) ((idx) | (1 << 5)) - -/* Produce source/dest selector dword */ -#define FP_SELC_MASK_NO 0 -#define FP_SELC_MASK_X 1 -#define FP_SELC_MASK_Y 2 -#define FP_SELC_MASK_XY 3 -#define FP_SELC_MASK_Z 4 -#define FP_SELC_MASK_XZ 5 -#define FP_SELC_MASK_YZ 6 -#define FP_SELC_MASK_XYZ 7 - -#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \ - (((destidx) << R300_ALU_DSTC_SHIFT) | \ - (FP_SELC_MASK_##regmask << 23) | \ - (FP_SELC_MASK_##outmask << 26) | \ - ((src0) << R300_ALU_SRC0C_SHIFT) | \ - ((src1) << R300_ALU_SRC1C_SHIFT) | \ - ((src2) << R300_ALU_SRC2C_SHIFT)) - -#define FP_SELA_MASK_NO 0 -#define FP_SELA_MASK_W 1 - -#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \ - (((destidx) << R300_ALU_DSTA_SHIFT) | \ - (FP_SELA_MASK_##regmask << 23) | \ - (FP_SELA_MASK_##outmask << 24) | \ - ((src0) << R300_ALU_SRC0A_SHIFT) | \ - ((src1) << R300_ALU_SRC1A_SHIFT) | \ - ((src2) << R300_ALU_SRC2A_SHIFT)) - -/* Produce unshifted argument selectors */ -#define FP_ARGC(source) R300_ALU_ARGC_##source -#define FP_ARGA(source) R300_ALU_ARGA_##source -#define FP_ABS(arg) ((arg) | (1 << 6)) -#define FP_NEG(arg) ((arg) ^ (1 << 5)) - -/* Produce instruction dword */ -#define FP_INSTRC(opcode,arg0,arg1,arg2) \ - (R300_ALU_OUTC_##opcode | \ - ((arg0) << R300_ALU_ARG0C_SHIFT) | \ - ((arg1) << R300_ALU_ARG1C_SHIFT) | \ - ((arg2) << R300_ALU_ARG2C_SHIFT)) - -#define FP_INSTRA(opcode,arg0,arg1,arg2) \ - (R300_ALU_OUTA_##opcode | \ - ((arg0) << R300_ALU_ARG0A_SHIFT) | \ - ((arg1) << R300_ALU_ARG1A_SHIFT) | \ - ((arg2) << R300_ALU_ARG2A_SHIFT)) - -#endif - -static void r300EmitClearState(GLcontext * ctx); - -static void r300ClearBuffer(r300ContextPtr r300, int flags, - struct radeon_renderbuffer *rrb, - struct radeon_renderbuffer *rrbd) -{ - BATCH_LOCALS(&r300->radeon); - GLcontext *ctx = r300->radeon.glCtx; - __DRIdrawablePrivate *dPriv = radeon_get_drawable(&r300->radeon); - GLuint cbpitch = 0; - r300ContextPtr rmesa = r300; - - if (RADEON_DEBUG & RADEON_IOCTL) - fprintf(stderr, "%s: buffer %p (%i,%i %ix%i)\n", - __FUNCTION__, rrb, dPriv->x, dPriv->y, - dPriv->w, dPriv->h); - - if (rrb) { - cbpitch = (rrb->pitch / rrb->cpp); - if (rrb->cpp == 4) - cbpitch |= R300_COLOR_FORMAT_ARGB8888; - else - cbpitch |= R300_COLOR_FORMAT_RGB565; - - if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){ - cbpitch |= R300_COLOR_TILE_ENABLE; - } - } - - /* TODO in bufmgr */ - cp_wait(&r300->radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN); - end_3d(&rmesa->radeon); - - if (flags & CLEARBUFFER_COLOR) { - assert(rrb != 0); - BEGIN_BATCH_NO_AUTOSTATE(6); - OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1); - OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); - OUT_BATCH_REGVAL(R300_RB3D_COLORPITCH0, cbpitch); - END_BATCH(); - } -#if 1 - if (flags & (CLEARBUFFER_DEPTH | CLEARBUFFER_STENCIL)) { - uint32_t zbpitch = (rrbd->pitch / rrbd->cpp); - if (rrbd->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){ - zbpitch |= R300_DEPTHMACROTILE_ENABLE; - } - if (rrbd->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){ - zbpitch |= R300_DEPTHMICROTILE_TILED; - } - BEGIN_BATCH_NO_AUTOSTATE(6); - OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_BATCH_RELOC(0, rrbd->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); - OUT_BATCH_REGSEQ(R300_ZB_DEPTHPITCH, 1); - if (!r300->radeon.radeonScreen->kernel_mm) - OUT_BATCH(zbpitch); - else - OUT_BATCH_RELOC(zbpitch, rrbd->bo, zbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); - END_BATCH(); - } -#endif - BEGIN_BATCH_NO_AUTOSTATE(6); - OUT_BATCH_REGSEQ(RB3D_COLOR_CHANNEL_MASK, 1); - if (flags & CLEARBUFFER_COLOR) { - OUT_BATCH((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) | - (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) | - (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) | - (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0)); - } else { - OUT_BATCH(0); - } - - - { - uint32_t t1, t2; - - t1 = 0x0; - t2 = 0x0; - - if (flags & CLEARBUFFER_DEPTH) { - t1 |= R300_Z_ENABLE | R300_Z_WRITE_ENABLE; - t2 |= - (R300_ZS_ALWAYS << R300_Z_FUNC_SHIFT); - } - - if (flags & CLEARBUFFER_STENCIL) { - t1 |= R300_STENCIL_ENABLE; - t2 |= - (R300_ZS_ALWAYS << - R300_S_FRONT_FUNC_SHIFT) | - (R300_ZS_REPLACE << - R300_S_FRONT_SFAIL_OP_SHIFT) | - (R300_ZS_REPLACE << - R300_S_FRONT_ZPASS_OP_SHIFT) | - (R300_ZS_REPLACE << - R300_S_FRONT_ZFAIL_OP_SHIFT); - } - - OUT_BATCH_REGSEQ(R300_ZB_CNTL, 3); - OUT_BATCH(t1); - OUT_BATCH(t2); - OUT_BATCH(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << - R300_STENCILWRITEMASK_SHIFT) | - (ctx->Stencil.Clear & R300_STENCILREF_MASK)); - END_BATCH(); - } - - if (!rmesa->radeon.radeonScreen->kernel_mm) { - BEGIN_BATCH_NO_AUTOSTATE(9); - OUT_BATCH(cmdpacket3(r300->radeon.radeonScreen, R300_CMD_PACKET3_CLEAR)); - OUT_BATCH_FLOAT32(dPriv->w / 2.0); - OUT_BATCH_FLOAT32(dPriv->h / 2.0); - OUT_BATCH_FLOAT32(ctx->Depth.Clear); - OUT_BATCH_FLOAT32(1.0); - OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]); - OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]); - OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]); - OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]); - END_BATCH(); - } else { - OUT_BATCH(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8)); - OUT_BATCH(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING | - (1 << R300_PRIM_NUM_VERTICES_SHIFT)); - OUT_BATCH_FLOAT32(dPriv->w / 2.0); - OUT_BATCH_FLOAT32(dPriv->h / 2.0); - OUT_BATCH_FLOAT32(ctx->Depth.Clear); - OUT_BATCH_FLOAT32(1.0); - OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]); - OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]); - OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]); - OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]); - } - - r300EmitCacheFlush(rmesa); - cp_wait(&r300->radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN); - - R300_STATECHANGE(r300, cb); - R300_STATECHANGE(r300, cmk); - R300_STATECHANGE(r300, zs); -} - -static void r300EmitClearState(GLcontext * ctx) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - BATCH_LOCALS(&r300->radeon); - __DRIdrawablePrivate *dPriv = radeon_get_drawable(&r300->radeon); - int i; - int has_tcl; - int is_r500 = 0; - GLuint vap_cntl; - - has_tcl = r300->options.hw_tcl_enabled; - - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) - is_r500 = 1; - - /* State atom dirty tracking is a little subtle here. - * - * On the one hand, we need to make sure base state is emitted - * here if we start with an empty batch buffer, otherwise clear - * works incorrectly with multiple processes. Therefore, the first - * BEGIN_BATCH cannot be a BEGIN_BATCH_NO_AUTOSTATE. - * - * On the other hand, implicit state emission clears the state atom - * dirty bits, so we have to call R300_STATECHANGE later than the - * first BEGIN_BATCH. - * - * The final trickiness is that, because we change state, we need - * to ensure that any stored swtcl primitives are flushed properly - * before we start changing state. See the R300_NEWPRIM in r300Clear - * for this. - */ - BEGIN_BATCH(31); - OUT_BATCH_REGSEQ(R300_VAP_PROG_STREAM_CNTL_0, 1); - if (!has_tcl) - OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | - ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); - else - OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | - ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); - - OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0); - OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0, - ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | - (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | - (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | - (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | - ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) - << R300_SWIZZLE0_SHIFT) | - (((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | - (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | - (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | - (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | - ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) - << R300_SWIZZLE1_SHIFT))); - - /* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */ - OUT_BATCH_REGSEQ(R300_VAP_VTX_STATE_CNTL, 2); - OUT_BATCH((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT)); - OUT_BATCH(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0); - - /* comes from fglrx startup of clear */ - OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2); - OUT_BATCH(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA | - R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | - R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | - R300_VPORT_Z_OFFSET_ENA); - OUT_BATCH(0x8); - - OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa); - - OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); - OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT | - R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT); - OUT_BATCH(0); /* no textures */ - - OUT_BATCH_REGVAL(R300_TX_ENABLE, 0); - - OUT_BATCH_REGSEQ(R300_SE_VPORT_XSCALE, 6); - OUT_BATCH_FLOAT32(1.0); - OUT_BATCH_FLOAT32(dPriv->x); - OUT_BATCH_FLOAT32(1.0); - OUT_BATCH_FLOAT32(dPriv->y); - OUT_BATCH_FLOAT32(1.0); - OUT_BATCH_FLOAT32(0.0); - - OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0); - - OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2); - OUT_BATCH(0x0); - OUT_BATCH(0x0); - END_BATCH(); - - R300_STATECHANGE(r300, vir[0]); - R300_STATECHANGE(r300, fogs); - R300_STATECHANGE(r300, vir[1]); - R300_STATECHANGE(r300, vic); - R300_STATECHANGE(r300, vte); - R300_STATECHANGE(r300, vof); - R300_STATECHANGE(r300, txe); - R300_STATECHANGE(r300, vpt); - R300_STATECHANGE(r300, at); - R300_STATECHANGE(r300, bld); - R300_STATECHANGE(r300, ps); - - if (has_tcl) { - R300_STATECHANGE(r300, vap_clip_cntl); - - BEGIN_BATCH_NO_AUTOSTATE(2); - OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE); - END_BATCH(); - } - - BEGIN_BATCH_NO_AUTOSTATE(2); - OUT_BATCH_REGVAL(R300_GA_POINT_SIZE, - ((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) | - ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT)); - END_BATCH(); - - if (!is_r500) { - R300_STATECHANGE(r300, ri); - R300_STATECHANGE(r300, rc); - R300_STATECHANGE(r300, rr); - - BEGIN_BATCH(14); - OUT_BATCH_REGSEQ(R300_RS_IP_0, 8); - for (i = 0; i < 8; ++i) - OUT_BATCH(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3)); - - OUT_BATCH_REGSEQ(R300_RS_COUNT, 2); - OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); - OUT_BATCH(0x0); - - OUT_BATCH_REGVAL(R300_RS_INST_0, R300_RS_INST_COL_CN_WRITE); - END_BATCH(); - } else { - R300_STATECHANGE(r300, ri); - R300_STATECHANGE(r300, rc); - R300_STATECHANGE(r300, rr); - - BEGIN_BATCH(14); - OUT_BATCH_REGSEQ(R500_RS_IP_0, 8); - for (i = 0; i < 8; ++i) { - OUT_BATCH((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | - (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)); - } - - OUT_BATCH_REGSEQ(R300_RS_COUNT, 2); - OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); - OUT_BATCH(0x0); - - OUT_BATCH_REGVAL(R500_RS_INST_0, R500_RS_INST_COL_CN_WRITE); - END_BATCH(); - } - - if (!is_r500) { - R300_STATECHANGE(r300, fp); - R300_STATECHANGE(r300, fpi[0]); - R300_STATECHANGE(r300, fpi[1]); - R300_STATECHANGE(r300, fpi[2]); - R300_STATECHANGE(r300, fpi[3]); - - BEGIN_BATCH(17); - OUT_BATCH_REGSEQ(R300_US_CONFIG, 3); - OUT_BATCH(0x0); - OUT_BATCH(0x0); - OUT_BATCH(0x0); - OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4); - OUT_BATCH(0x0); - OUT_BATCH(0x0); - OUT_BATCH(0x0); - OUT_BATCH(R300_RGBA_OUT); - - OUT_BATCH_REGVAL(R300_US_ALU_RGB_INST_0, - FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); - OUT_BATCH_REGVAL(R300_US_ALU_RGB_ADDR_0, - FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); - OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_INST_0, - FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); - OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_ADDR_0, - FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); - END_BATCH(); - } else { - struct radeon_state_atom r500fp; - uint32_t _cmd[10]; - - R300_STATECHANGE(r300, fp); - R300_STATECHANGE(r300, r500fp); - - BEGIN_BATCH(7); - OUT_BATCH_REGSEQ(R500_US_CONFIG, 2); - OUT_BATCH(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); - OUT_BATCH(0x0); - OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3); - OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1)); - OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1)); - OUT_BATCH(R500_US_CODE_OFFSET_ADDR(0)); - END_BATCH(); - - r500fp.check = check_r500fp; - r500fp.cmd = _cmd; - r500fp.cmd[0] = cmdr500fp(r300->radeon.radeonScreen, 0, 1, 0, 0); - r500fp.cmd[1] = R500_INST_TYPE_OUT | - R500_INST_TEX_SEM_WAIT | - R500_INST_LAST | - R500_INST_RGB_OMASK_R | - R500_INST_RGB_OMASK_G | - R500_INST_RGB_OMASK_B | - R500_INST_ALPHA_OMASK | - R500_INST_RGB_CLAMP | - R500_INST_ALPHA_CLAMP; - r500fp.cmd[2] = R500_RGB_ADDR0(0) | - R500_RGB_ADDR1(0) | - R500_RGB_ADDR1_CONST | - R500_RGB_ADDR2(0) | - R500_RGB_ADDR2_CONST; - r500fp.cmd[3] = R500_ALPHA_ADDR0(0) | - R500_ALPHA_ADDR1(0) | - R500_ALPHA_ADDR1_CONST | - R500_ALPHA_ADDR2(0) | - R500_ALPHA_ADDR2_CONST; - r500fp.cmd[4] = R500_ALU_RGB_SEL_A_SRC0 | - R500_ALU_RGB_R_SWIZ_A_R | - R500_ALU_RGB_G_SWIZ_A_G | - R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRC0 | - R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_B_SWIZ_B_G | - R500_ALU_RGB_G_SWIZ_B_B; - r500fp.cmd[5] = R500_ALPHA_OP_CMP | - R500_ALPHA_SWIZ_A_A | - R500_ALPHA_SWIZ_B_A; - r500fp.cmd[6] = R500_ALU_RGBA_OP_CMP | - R500_ALU_RGBA_R_SWIZ_0 | - R500_ALU_RGBA_G_SWIZ_0 | - R500_ALU_RGBA_B_SWIZ_0 | - R500_ALU_RGBA_A_SWIZ_0; - - r500fp.cmd[7] = 0; - if (r300->radeon.radeonScreen->kernel_mm) { - emit_r500fp(ctx, &r500fp); - } else { - int dwords = r500fp.check(ctx,&r500fp); - BEGIN_BATCH_NO_AUTOSTATE(dwords); - OUT_BATCH_TABLE(r500fp.cmd, dwords); - END_BATCH(); - } - - } - - BEGIN_BATCH(2); - OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); - END_BATCH(); - - if (has_tcl) { - vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | - (5 << R300_PVS_NUM_CNTLRS_SHIFT) | - (12 << R300_VF_MAX_VTX_NUM_SHIFT)); - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) - vap_cntl |= R500_TCL_STATE_OPTIMIZATION; - } else { - vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | - (5 << R300_PVS_NUM_CNTLRS_SHIFT) | - (5 << R300_VF_MAX_VTX_NUM_SHIFT)); - } - - if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515) - vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT); - else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) || - (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) || - (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) - vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT); - else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) || - (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420)) - vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT); - else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || - (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580)) - vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT); - else - vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT); - - R300_STATECHANGE(r300, vap_cntl); - - BEGIN_BATCH(2); - OUT_BATCH_REGVAL(R300_VAP_CNTL, vap_cntl); - END_BATCH(); - - if (has_tcl) { - struct radeon_state_atom vpu; - uint32_t _cmd[10]; - R300_STATECHANGE(r300, pvs); - R300_STATECHANGE(r300, vap_flush); - R300_STATECHANGE(r300, vpi); - - BEGIN_BATCH(4); - OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3); - OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) | - (0 << R300_PVS_XYZW_VALID_INST_SHIFT) | - (1 << R300_PVS_LAST_INST_SHIFT)); - OUT_BATCH((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | - (0 << R300_PVS_MAX_CONST_ADDR_SHIFT)); - OUT_BATCH(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT); - END_BATCH(); - - vpu.check = check_vpu; - vpu.cmd = _cmd; - vpu.cmd[0] = cmdvpu(r300->radeon.radeonScreen, 0, 2); - - vpu.cmd[1] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, - 0, 0xf, PVS_DST_REG_OUT); - vpu.cmd[2] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, - PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, - PVS_SRC_REG_INPUT, NEGATE_NONE); - vpu.cmd[3] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, - PVS_SRC_SELECT_FORCE_0, - PVS_SRC_SELECT_FORCE_0, - PVS_SRC_SELECT_FORCE_0, - PVS_SRC_REG_INPUT, NEGATE_NONE); - vpu.cmd[4] = 0x0; - - vpu.cmd[5] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, - PVS_DST_REG_OUT); - vpu.cmd[6] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, - PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, - PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, - NEGATE_NONE); - vpu.cmd[7] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, - PVS_SRC_SELECT_FORCE_0, - PVS_SRC_SELECT_FORCE_0, - PVS_SRC_SELECT_FORCE_0, - PVS_SRC_REG_INPUT, NEGATE_NONE); - vpu.cmd[8] = 0x0; - - if (r300->radeon.radeonScreen->kernel_mm) { - int dwords = r300->hw.vap_flush.check(ctx,&r300->hw.vap_flush); - BEGIN_BATCH_NO_AUTOSTATE(dwords); - OUT_BATCH_TABLE(r300->hw.vap_flush.cmd, dwords); - END_BATCH(); - emit_vpu(ctx, &vpu); - } else { - int dwords = vpu.check(ctx,&vpu); - BEGIN_BATCH_NO_AUTOSTATE(dwords); - OUT_BATCH_TABLE(vpu.cmd, dwords); - END_BATCH(); - } - - } -} - -static int r300KernelClear(GLcontext *ctx, GLuint flags) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = radeon_get_drawable(&r300->radeon); - struct radeon_framebuffer *rfb = dPriv->driverPrivate; - struct radeon_renderbuffer *rrb; - struct radeon_renderbuffer *rrbd; - int bits = 0, ret; - - /* Make sure it fits there. */ - radeon_cs_space_reset_bos(r300->radeon.cmdbuf.cs); - - if (flags & BUFFER_BIT_COLOR0) { - rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_COLOR0); - radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, - rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM); - } - - if (flags & BUFFER_BIT_FRONT_LEFT) { - rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_FRONT_LEFT); - radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, - rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM); - } - - if (flags & BUFFER_BIT_BACK_LEFT) { - rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_BACK_LEFT); - radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, - rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM); - } - - rrbd = radeon_get_renderbuffer(&rfb->base, BUFFER_DEPTH); - if (rrbd) { - radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, - rrbd->bo, 0, RADEON_GEM_DOMAIN_VRAM); - } - - ret = radeon_cs_space_check(r300->radeon.cmdbuf.cs); - if (ret) - return -1; - - rcommonEnsureCmdBufSpace(&r300->radeon, 421 * 3, __FUNCTION__); - if (flags || bits) - r300EmitClearState(ctx); - - rrbd = radeon_get_renderbuffer(&rfb->base, BUFFER_DEPTH); - if (rrbd && (flags & BUFFER_BIT_DEPTH)) - bits |= CLEARBUFFER_DEPTH; - - if (rrbd && (flags & BUFFER_BIT_STENCIL)) - bits |= CLEARBUFFER_STENCIL; - - if (flags & BUFFER_BIT_COLOR0) { - rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_COLOR0); - r300ClearBuffer(r300, CLEARBUFFER_COLOR, rrb, NULL); - bits = 0; - } - - if (flags & BUFFER_BIT_FRONT_LEFT) { - rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_FRONT_LEFT); - r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd); - bits = 0; - } - - if (flags & BUFFER_BIT_BACK_LEFT) { - rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_BACK_LEFT); - r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd); - bits = 0; - } - - if (bits) - r300ClearBuffer(r300, bits, NULL, rrbd); - - COMMIT_BATCH(); - return 0; -} - -/** - * Buffer clear - */ -static void r300Clear(GLcontext * ctx, GLbitfield mask) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = radeon_get_drawable(&r300->radeon); - const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask); - GLbitfield swrast_mask = 0, tri_mask = 0; - int i, ret; - struct gl_framebuffer *fb = ctx->DrawBuffer; - - if (RADEON_DEBUG & RADEON_IOCTL) - fprintf(stderr, "r300Clear\n"); - - if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { - LOCK_HARDWARE(&r300->radeon); - UNLOCK_HARDWARE(&r300->radeon); - if (dPriv->numClipRects == 0) - return; - } - - /* Flush swtcl vertices if necessary, because we will change hardware - * state during clear. See also the state-related comment in - * r300EmitClearState. - */ - R300_NEWPRIM(r300); - - if (colorMask == ~0) - tri_mask |= (mask & BUFFER_BITS_COLOR); - else - tri_mask |= (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT)); - - - /* HW stencil */ - if (mask & BUFFER_BIT_STENCIL) { - tri_mask |= BUFFER_BIT_STENCIL; - } - - /* HW depth */ - if (mask & BUFFER_BIT_DEPTH) { - tri_mask |= BUFFER_BIT_DEPTH; - } - - /* If we're doing a tri pass for depth/stencil, include a likely color - * buffer with it. - */ - - for (i = 0; i < BUFFER_COUNT; i++) { - GLuint bufBit = 1 << i; - if ((tri_mask) & bufBit) { - if (!fb->Attachment[i].Renderbuffer->ClassID) { - tri_mask &= ~bufBit; - swrast_mask |= bufBit; - } - } - } - - /* SW fallback clearing */ - swrast_mask = mask & ~tri_mask; - - ret = 0; - if (tri_mask) { - if (r300->radeon.radeonScreen->kernel_mm) - radeonUserClear(ctx, tri_mask); - else { - /* if kernel clear fails due to size restraints fallback */ - ret = r300KernelClear(ctx, tri_mask); - if (ret < 0) - swrast_mask |= tri_mask; - } - } - - if (swrast_mask) { - if (RADEON_DEBUG & RADEON_FALLBACKS) - fprintf(stderr, "%s: swrast clear, mask: %x\n", - __FUNCTION__, swrast_mask); - _swrast_Clear(ctx, swrast_mask); - } -} - -void r300InitIoctlFuncs(struct dd_function_table *functions) -{ - functions->Clear = r300Clear; - functions->Finish = radeonFinish; - functions->Flush = radeonFlush; -} diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.h b/src/mesa/drivers/dri/r300/r300_ioctl.h deleted file mode 100644 index 3abfa71a6e8..00000000000 --- a/src/mesa/drivers/dri/r300/r300_ioctl.h +++ /dev/null @@ -1,44 +0,0 @@ -/* -Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. - -The Weather Channel (TM) funded Tungsten Graphics to develop the -initial release of the Radeon 8500 driver under the XFree86 license. -This notice must be preserved. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/* - * Authors: - * Keith Whitwell <[email protected]> - * Nicolai Haehnle <[email protected]> - */ - -#ifndef __R300_IOCTL_H__ -#define __R300_IOCTL_H__ - -#include "r300_context.h" -#include "radeon_drm.h" - -extern void r300InitIoctlFuncs(struct dd_function_table *functions); - -#endif /* __R300_IOCTL_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 4ae593cbe79..02c94250a8f 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -68,7 +68,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/tnl.h" #include "tnl/t_vp_build.h" #include "r300_context.h" -#include "r300_ioctl.h" #include "r300_state.h" #include "r300_reg.h" #include "r300_tex.h" diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 1fd32d497b4..da0a9dfb4cf 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -55,7 +55,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/t_vp_build.h" #include "r300_context.h" -#include "r300_ioctl.h" #include "r300_state.h" #include "r300_reg.h" #include "r300_emit.h" @@ -1741,8 +1740,7 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) r300SetPolygonOffsetState(ctx, state); break; case GL_SCISSOR_TEST: - if (!rmesa->radeon.radeonScreen->kernel_mm) - radeon_firevertices(&rmesa->radeon); + radeon_firevertices(&rmesa->radeon); rmesa->radeon.state.scissor.enabled = state; radeonUpdateScissor( ctx ); break; diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 726b3ff98e1..ac3d5b1bec3 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -48,7 +48,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_context.h" #include "r300_state.h" -#include "r300_ioctl.h" #include "radeon_mipmap_tree.h" #include "r300_tex.h" diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h index 8a653ea2d11..6ede0fe25c9 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.h +++ b/src/mesa/drivers/dri/r300/r300_tex.h @@ -51,4 +51,6 @@ extern GLboolean r300ValidateBuffers(GLcontext * ctx); extern void r300InitTextureFuncs(struct dd_function_table *functions); +int32_t r300TranslateTexFormat(gl_format mesaFormat); + #endif /* __r300_TEX_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_texcopy.c b/src/mesa/drivers/dri/r300/r300_texcopy.c new file mode 100644 index 00000000000..7702a1d67df --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_texcopy.c @@ -0,0 +1,166 @@ +/* + * Copyright (C) 2009 Maciej Cencora <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_common.h" +#include "r300_context.h" + +#include "main/image.h" +#include "main/teximage.h" +#include "main/texstate.h" +#include "drivers/common/meta.h" + +#include "radeon_mipmap_tree.h" +#include "r300_blit.h" +#include <main/debug.h> + +static GLboolean +do_copy_texsubimage(GLcontext *ctx, + GLenum target, GLint level, + struct radeon_tex_obj *tobj, + radeon_texture_image *timg, + GLint dstx, GLint dsty, + GLint x, GLint y, + GLsizei width, GLsizei height) +{ + struct r300_context *r300 = R300_CONTEXT(ctx); + struct radeon_renderbuffer *rrb; + + if (_mesa_get_format_bits(timg->base.TexFormat, GL_DEPTH_BITS) > 0) { + rrb = radeon_get_depthbuffer(&r300->radeon); + } else { + rrb = radeon_get_colorbuffer(&r300->radeon); + } + + if (!timg->mt) { + radeon_validate_texture_miptree(ctx, &tobj->base); + } + + assert(rrb && rrb->bo); + assert(timg->mt->bo); + assert(timg->base.Width >= dstx + width); + assert(timg->base.Height >= dsty + height); + + intptr_t src_offset = rrb->draw_offset + x * rrb->cpp + y * rrb->pitch; + intptr_t dst_offset = radeon_miptree_image_offset(timg->mt, _mesa_tex_target_to_face(target), level); + dst_offset += dstx * _mesa_get_format_bytes(timg->base.TexFormat) + + dsty * _mesa_format_row_stride(timg->base.TexFormat, timg->base.Width); + + if (src_offset % 32 || dst_offset % 32) { + return GL_FALSE; + } + + if (0) { + fprintf(stderr, "%s: copying to face %d, level %d\n", + __FUNCTION__, _mesa_tex_target_to_face(target), level); + fprintf(stderr, "to: x %d, y %d, offset %d\n", dstx, dsty, (uint32_t) dst_offset); + fprintf(stderr, "from (%dx%d) width %d, height %d, offset %d, pitch %d, width %d\n", + x, y, width, height, (uint32_t) src_offset, rrb->pitch, rrb->pitch/rrb->cpp); + fprintf(stderr, "src size %d, dst size %d\n", rrb->bo->size, timg->mt->bo->size); + + } + + /* blit from src buffer to texture */ + return r300_blit(r300, rrb->bo, src_offset, rrb->base.Format, rrb->pitch, + rrb->base.Width, rrb->base.Height, timg->mt->bo ? timg->mt->bo : timg->bo, dst_offset, + timg->base.TexFormat, timg->base.Width, width, height); +} + +static void +r300CopyTexImage2D(GLcontext *ctx, GLenum target, GLint level, + GLenum internalFormat, + GLint x, GLint y, GLsizei width, GLsizei height, + GLint border) +{ + struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); + struct gl_texture_object *texObj = + _mesa_select_tex_object(ctx, texUnit, target); + struct gl_texture_image *texImage = + _mesa_select_tex_image(ctx, texObj, target, level); + int srcx, srcy, dstx, dsty; + + if (border) + goto fail; + + /* Setup or redefine the texture object, mipmap tree and texture + * image. Don't populate yet. + */ + ctx->Driver.TexImage2D(ctx, target, level, internalFormat, + width, height, border, + GL_RGBA, GL_UNSIGNED_BYTE, NULL, + &ctx->DefaultPacking, texObj, texImage); + + srcx = x; + srcy = y; + dstx = 0; + dsty = 0; + if (!_mesa_clip_copytexsubimage(ctx, + &dstx, &dsty, + &srcx, &srcy, + &width, &height)) { + return; + } + + if (!do_copy_texsubimage(ctx, target, level, + radeon_tex_obj(texObj), (radeon_texture_image *)texImage, + 0, 0, x, y, width, height)) { + goto fail; + } + + return; + +fail: + _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y, + width, height, border); +} + +static void +r300CopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, + GLint x, GLint y, + GLsizei width, GLsizei height) +{ + struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); + struct gl_texture_object *texObj = _mesa_select_tex_object(ctx, texUnit, target); + struct gl_texture_image *texImage = _mesa_select_tex_image(ctx, texObj, target, level); + + if (!do_copy_texsubimage(ctx, target, level, + radeon_tex_obj(texObj), (radeon_texture_image *)texImage, + xoffset, yoffset, x, y, width, height)) { + + //DEBUG_FALLBACKS + + _mesa_meta_CopyTexSubImage2D(ctx, target, level, + xoffset, yoffset, x, y, width, height); + } +} + + +void r300_init_texcopy_functions(struct dd_function_table *table) +{ + table->CopyTexImage2D = r300CopyTexImage2D; + table->CopyTexSubImage2D = r300CopyTexSubImage2D; +}
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index e6f2c0c1a7b..d4a728381e4 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -46,19 +46,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_context.h" #include "r300_state.h" -#include "r300_ioctl.h" #include "radeon_mipmap_tree.h" #include "r300_tex.h" #include "r300_reg.h" -#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5 \ - || ((f) >= MESA_FORMAT_RGBA_FLOAT32 && \ - (f) <= MESA_FORMAT_INTENSITY_FLOAT16)) \ - && tx_table[f].flag ) - -#define _ASSIGN(entry, format) \ - [ MESA_FORMAT_ ## entry ] = { format, 0, 1} - /* * Note that the _REV formats are the same as the non-REV formats. This is * because the REV and non-REV formats are identical as a byte string, but @@ -68,67 +59,119 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * identically. -- paulus */ -static const struct tx_table { - GLuint format, filter, flag; -} tx_table[] = { - /* *INDENT-OFF* */ +int32_t r300TranslateTexFormat(gl_format mesaFormat) +{ + switch (mesaFormat) + { #ifdef MESA_LITTLE_ENDIAN - _ASSIGN(RGBA8888, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8)), - _ASSIGN(RGBA8888_REV, R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)), - _ASSIGN(ARGB8888, R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)), - _ASSIGN(ARGB8888_REV, R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)), + case MESA_FORMAT_RGBA8888: + return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8); + case MESA_FORMAT_RGBA8888_REV: + return R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8); + case MESA_FORMAT_ARGB8888: + return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); + case MESA_FORMAT_ARGB8888_REV: + return R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8); #else - _ASSIGN(RGBA8888, R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)), - _ASSIGN(RGBA8888_REV, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8)), - _ASSIGN(ARGB8888, R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)), - _ASSIGN(ARGB8888_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)), + case MESA_FORMAT_RGBA8888: + return R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8); + case MESA_FORMAT_RGBA8888_REV: + return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8); + case MESA_FORMAT_ARGB8888: + return R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8); + case MESA_FORMAT_ARGB8888_REV: + return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); #endif - _ASSIGN(XRGB8888, R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)), - _ASSIGN(RGB888, R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)), - _ASSIGN(RGB565, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)), - _ASSIGN(RGB565_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)), - _ASSIGN(ARGB4444, R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4)), - _ASSIGN(ARGB4444_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4)), - _ASSIGN(ARGB1555, R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)), - _ASSIGN(ARGB1555_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)), - _ASSIGN(AL88, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8)), - _ASSIGN(AL88_REV, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8)), - _ASSIGN(RGB332, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z3Y3X2)), - _ASSIGN(A8, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8)), - _ASSIGN(L8, R300_EASY_TX_FORMAT(X, X, X, ONE, X8)), - _ASSIGN(I8, R300_EASY_TX_FORMAT(X, X, X, X, X8)), - _ASSIGN(CI8, R300_EASY_TX_FORMAT(X, X, X, X, X8)), - _ASSIGN(YCBCR, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE), - _ASSIGN(YCBCR_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE), - _ASSIGN(RGB_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1)), - _ASSIGN(RGBA_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1)), - _ASSIGN(RGBA_DXT3, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3)), - _ASSIGN(RGBA_DXT5, R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5)), - _ASSIGN(RGBA_FLOAT32, R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R32G32B32A32)), - _ASSIGN(RGBA_FLOAT16, R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16)), - _ASSIGN(RGB_FLOAT32, 0xffffffff), - _ASSIGN(RGB_FLOAT16, 0xffffffff), - _ASSIGN(ALPHA_FLOAT32, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I32)), - _ASSIGN(ALPHA_FLOAT16, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I16)), - _ASSIGN(LUMINANCE_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I32)), - _ASSIGN(LUMINANCE_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I16)), - _ASSIGN(LUMINANCE_ALPHA_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I32A32)), - _ASSIGN(LUMINANCE_ALPHA_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I16A16)), - _ASSIGN(INTENSITY_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, X, FL_I32)), - _ASSIGN(INTENSITY_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, X, FL_I16)), - _ASSIGN(Z16, R300_EASY_TX_FORMAT(X, X, X, X, X16)), - _ASSIGN(Z24_S8, R300_EASY_TX_FORMAT(X, X, X, X, X24_Y8)), - _ASSIGN(S8_Z24, R300_EASY_TX_FORMAT(Y, Y, Y, Y, X24_Y8)), - _ASSIGN(Z32, R300_EASY_TX_FORMAT(X, X, X, X, X32)), - /* EXT_texture_sRGB */ - _ASSIGN(SRGBA8, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8) | R300_TX_FORMAT_GAMMA), - _ASSIGN(SLA8, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8) | R300_TX_FORMAT_GAMMA), - _ASSIGN(SL8, R300_EASY_TX_FORMAT(X, X, X, ONE, X8) | R300_TX_FORMAT_GAMMA), - /* *INDENT-ON* */ + case MESA_FORMAT_XRGB8888: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); + case MESA_FORMAT_RGB888: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); + case MESA_FORMAT_RGB565: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); + case MESA_FORMAT_RGB565_REV: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); + case MESA_FORMAT_ARGB4444: + return R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4); + case MESA_FORMAT_ARGB4444_REV: + return R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4); + case MESA_FORMAT_ARGB1555: + return R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5); + case MESA_FORMAT_ARGB1555_REV: + return R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5); + case MESA_FORMAT_AL88: + return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8); + case MESA_FORMAT_AL88_REV: + return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8); + case MESA_FORMAT_RGB332: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z3Y3X2); + case MESA_FORMAT_A8: + return R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8); + case MESA_FORMAT_L8: + return R300_EASY_TX_FORMAT(X, X, X, ONE, X8); + case MESA_FORMAT_I8: + return R300_EASY_TX_FORMAT(X, X, X, X, X8); + case MESA_FORMAT_CI8: + return R300_EASY_TX_FORMAT(X, X, X, X, X8); + case MESA_FORMAT_YCBCR: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE; + case MESA_FORMAT_YCBCR_REV: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE; + case MESA_FORMAT_RGB_DXT1: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1); + case MESA_FORMAT_RGBA_DXT1: + return R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1); + case MESA_FORMAT_RGBA_DXT3: + return R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3); + case MESA_FORMAT_RGBA_DXT5: + return R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5); + case MESA_FORMAT_RGBA_FLOAT32: + return R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R32G32B32A32); + case MESA_FORMAT_RGBA_FLOAT16: + return R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16); + case MESA_FORMAT_ALPHA_FLOAT32: + return R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I32); + case MESA_FORMAT_ALPHA_FLOAT16: + return R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I16); + case MESA_FORMAT_LUMINANCE_FLOAT32: + return R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I32); + case MESA_FORMAT_LUMINANCE_FLOAT16: + return R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I16); + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32: + return R300_EASY_TX_FORMAT(X, X, X, Y, FL_I32A32); + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16: + return R300_EASY_TX_FORMAT(X, X, X, Y, FL_I16A16); + case MESA_FORMAT_INTENSITY_FLOAT32: + return R300_EASY_TX_FORMAT(X, X, X, X, FL_I32); + case MESA_FORMAT_INTENSITY_FLOAT16: + return R300_EASY_TX_FORMAT(X, X, X, X, FL_I16); + case MESA_FORMAT_Z16: + return R300_EASY_TX_FORMAT(X, X, X, X, X16); + case MESA_FORMAT_Z24_S8: + return R300_EASY_TX_FORMAT(X, X, X, X, X24_Y8); + case MESA_FORMAT_S8_Z24: + return R300_EASY_TX_FORMAT(Y, Y, Y, Y, X24_Y8); + case MESA_FORMAT_Z32: + return R300_EASY_TX_FORMAT(X, X, X, X, X32); + /* EXT_texture_sRGB */ + case MESA_FORMAT_SRGBA8: + return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8) | R300_TX_FORMAT_GAMMA; + case MESA_FORMAT_SLA8: + return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8) | R300_TX_FORMAT_GAMMA; + case MESA_FORMAT_SL8: + return R300_EASY_TX_FORMAT(X, X, X, ONE, X8) | R300_TX_FORMAT_GAMMA; + case MESA_FORMAT_SRGB_DXT1: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1) | R300_TX_FORMAT_GAMMA; + case MESA_FORMAT_SRGBA_DXT1: + return R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1) | R300_TX_FORMAT_GAMMA; + case MESA_FORMAT_SRGBA_DXT3: + return R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3) | R300_TX_FORMAT_GAMMA; + case MESA_FORMAT_SRGBA_DXT5: + return R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5) | R300_TX_FORMAT_GAMMA; + default: + return -1; + } }; -#undef _ASSIGN - void r300SetDepthTexMode(struct gl_texture_object *tObj) { static const GLuint formats[3][3] = { @@ -205,19 +248,18 @@ static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t) const struct gl_texture_image *firstImage; firstImage = t->base.Image[0][t->minLod]; - if (!t->image_override - && VALID_FORMAT(firstImage->TexFormat)) { + if (!t->image_override) { if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { r300SetDepthTexMode(&t->base); } else { - t->pp_txformat = tx_table[firstImage->TexFormat].format; + int32_t txformat = r300TranslateTexFormat(firstImage->TexFormat); + if (txformat < 0) { + _mesa_problem(rmesa->radeon.glCtx, "%s: Invalid format %s", + __FUNCTION__, _mesa_get_format_name(firstImage->TexFormat)); + _mesa_exit(1); + } + t->pp_txformat = (uint32_t) txformat; } - - t->pp_txfilter |= tx_table[firstImage->TexFormat].filter; - } else if (!t->image_override) { - _mesa_problem(NULL, "unexpected texture format in %s", - __FUNCTION__); - return; } if (t->image_override && t->bo) @@ -357,18 +399,15 @@ void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, switch (depth) { case 32: t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); - t->pp_txfilter |= tx_table[2].filter; pitch_val /= 4; break; case 24: default: t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); - t->pp_txfilter |= tx_table[4].filter; pitch_val /= 4; break; case 16: t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); - t->pp_txfilter |= tx_table[5].filter; pitch_val /= 2; break; } @@ -409,18 +448,7 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo return; } - radeon_update_renderbuffers(pDRICtx, dPriv); - /* back & depth buffer are useless free them right away */ - rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer; - if (rb && rb->bo) { - radeon_bo_unref(rb->bo); - rb->bo = NULL; - } - rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer; - if (rb && rb->bo) { - radeon_bo_unref(rb->bo); - rb->bo = NULL; - } + radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE); rb = rfb->color_rb[0]; if (rb->bo == NULL) { /* Failed to BO for the buffer */ @@ -458,18 +486,15 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); else t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); - t->pp_txfilter |= tx_table[2].filter; pitch_val /= 4; break; case 3: default: t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); - t->pp_txfilter |= tx_table[4].filter; pitch_val /= 4; break; case 2: t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); - t->pp_txfilter |= tx_table[5].filter; pitch_val /= 2; break; } diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c index d27a3245a39..5e1504872d6 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c @@ -52,6 +52,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_mipmap_tree.h" #include "radeon_reg.h" +struct r600_cs_manager_legacy +{ + struct radeon_cs_manager base; + struct radeon_context *ctx; + /* hack for scratch stuff */ + uint32_t pending_age; + uint32_t pending_count; +}; + +struct r600_cs_reloc_legacy { + struct radeon_cs_reloc base; + uint32_t cindices; + uint32_t *indices; + uint32_t *reloc_indices; +}; static struct radeon_cs * r600_cs_create(struct radeon_cs_manager *csm, diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.h b/src/mesa/drivers/dri/r600/r600_cmdbuf.h index eba43d37b6b..dff00096999 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.h +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.h @@ -118,22 +118,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define R600_IT_SET_CTL_CONST 0x00006F00 #define R600_IT_SURFACE_BASE_UPDATE 0x00007300 -struct r600_cs_manager_legacy -{ - struct radeon_cs_manager base; - struct radeon_context *ctx; - /* hack for scratch stuff */ - uint32_t pending_age; - uint32_t pending_count; -}; - -struct r600_cs_reloc_legacy { - struct radeon_cs_reloc base; - uint32_t cindices; - uint32_t *indices; - uint32_t *reloc_indices; -}; - struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_context *ctx); /** diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index 25314eff563..b66fe78ac3b 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -317,20 +317,8 @@ static void r600InitGLExtensions(GLcontext *ctx) #ifdef R600_ENABLE_GLSL_TEST driInitExtensions(ctx, gl_20_extension, GL_TRUE); - //_mesa_enable_2_0_extensions(ctx); - //1.5 - ctx->Extensions.ARB_occlusion_query = GL_TRUE; - ctx->Extensions.ARB_vertex_buffer_object = GL_TRUE; - ctx->Extensions.EXT_shadow_funcs = GL_TRUE; - //2.0 - ctx->Extensions.ARB_draw_buffers = GL_TRUE; - ctx->Extensions.ARB_point_sprite = GL_TRUE; - ctx->Extensions.ARB_shader_objects = GL_TRUE; - ctx->Extensions.ARB_vertex_shader = GL_TRUE; - ctx->Extensions.ARB_fragment_shader = GL_TRUE; - ctx->Extensions.EXT_blend_equation_separate = GL_TRUE; - ctx->Extensions.ATI_separate_stencil = GL_TRUE; - + _mesa_enable_2_0_extensions(ctx); + /* glsl compiler has problem if this is not GL_TRUE */ ctx->Shader.EmitCondCodes = GL_TRUE; #endif /* R600_ENABLE_GLSL_TEST */ diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index 4ec315b78c7..2a4a6e6ee14 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -917,18 +917,7 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo return; } - radeon_update_renderbuffers(pDRICtx, dPriv); - /* back & depth buffer are useless free them right away */ - rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer; - if (rb && rb->bo) { - radeon_bo_unref(rb->bo); - rb->bo = NULL; - } - rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer; - if (rb && rb->bo) { - radeon_bo_unref(rb->bo); - rb->bo = NULL; - } + radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE); rb = rfb->color_rb[0]; if (rb->bo == NULL) { /* Failed to BO for the buffer */ diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 309c90fdd0c..e10b23b97f1 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -32,6 +32,7 @@ #include "main/mtypes.h" #include "main/imports.h" +#include "shader/prog_parameter.h" #include "radeon_debug.h" #include "r600_context.h" @@ -41,6 +42,39 @@ #define USE_CF_FOR_CONTINUE_BREAK 1 #define USE_CF_FOR_POP_AFTER 1 +struct prog_instruction noise1_insts[12] = { + {OPCODE_BGNSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_SGT , {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_IF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_ENDIF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_ENDSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0} +}; +float noise1_const[2][4] = { + {0.300000f, 0.900000f, 0.500000f, 0.300000f} +}; + +COMPILED_SUB noise1_presub = { + &(noise1_insts[0]), + 12, + 2, + 1, + 0, + &(noise1_const[0]), + SWIZZLE_X, + SWIZZLE_X, + SWIZZLE_X, + SWIZZLE_X, + {0,0,0}, + 0 +}; + BITS addrmode_PVSDST(PVSDST * pPVSDST) { return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1); @@ -330,14 +364,14 @@ GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size) return(format); } -unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) +unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3) { - if(pAsm->D.dst.op3) + if(nIsOp3 > 0) { return 3; } - switch (pAsm->D.dst.opcode) + switch (opcode) { case SQ_OP2_INST_ADD: case SQ_OP2_INST_KILLE: @@ -378,7 +412,7 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) return 1; default: radeon_error( - "Need instruction operand number for %x.\n", pAsm->D.dst.opcode); + "Need instruction operand number for %x.\n", opcode); }; return 3; @@ -500,12 +534,20 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700 pAsm->unCFflags = 0; + pAsm->presubs = NULL; + pAsm->unPresubArraySize = 0; + pAsm->unNumPresub = 0; + pAsm->unCurNumILInsts = 0; + + pAsm->unVetTexBits = 0; + return 0; } GLboolean IsTex(gl_inst_opcode Opcode) { - if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) ) + if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) || + (OPCODE_DDX==Opcode) || (OPCODE_DDY==Opcode) ) { return GL_TRUE; } @@ -1292,6 +1334,15 @@ GLboolean assemble_dst(r700_AssemblerBase *pAsm) pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1; pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1; + if(pILInst->SaturateMode == SATURATE_ZERO_ONE) + { + pAsm->D2.dst2.SaturateMode = 1; + } + else + { + pAsm->D2.dst2.SaturateMode = 0; + } + return GL_TRUE; } @@ -1364,43 +1415,65 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; break; case PROGRAM_INPUT: - switch (pILInst->SrcReg[0].Index) + if(SPT_VP == pAsm->currentShaderType) { - case FRAG_ATTRIB_WPOS: - case FRAG_ATTRIB_COL0: - case FRAG_ATTRIB_COL1: - case FRAG_ATTRIB_FOGC: - case FRAG_ATTRIB_TEX0: - case FRAG_ATTRIB_TEX1: - case FRAG_ATTRIB_TEX2: - case FRAG_ATTRIB_TEX3: - case FRAG_ATTRIB_TEX4: - case FRAG_ATTRIB_TEX5: - case FRAG_ATTRIB_TEX6: - case FRAG_ATTRIB_TEX7: - bValidTexCoord = GL_TRUE; + switch (pILInst->SrcReg[0].Index) + { + case VERT_ATTRIB_TEX0: + case VERT_ATTRIB_TEX1: + case VERT_ATTRIB_TEX2: + case VERT_ATTRIB_TEX3: + case VERT_ATTRIB_TEX4: + case VERT_ATTRIB_TEX5: + case VERT_ATTRIB_TEX6: + case VERT_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->ucVP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + break; + } + } + else + { + switch (pILInst->SrcReg[0].Index) + { + case FRAG_ATTRIB_WPOS: + case FRAG_ATTRIB_COL0: + case FRAG_ATTRIB_COL1: + case FRAG_ATTRIB_FOGC: + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + break; + case FRAG_ATTRIB_FACE: + fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n"); + break; + case FRAG_ATTRIB_PNTC: + fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n"); + break; + } + + if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) || + (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) ) + { + bValidTexCoord = GL_TRUE; pAsm->S[0].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; pAsm->S[0].src.rtype = SRC_REG_INPUT; - break; - case FRAG_ATTRIB_FACE: - fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n"); - break; - case FRAG_ATTRIB_PNTC: - fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n"); - break; - } - - if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) || - (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) ) - { - bValidTexCoord = GL_TRUE; - pAsm->S[0].src.reg = - pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; - pAsm->S[0].src.rtype = SRC_REG_INPUT; + } } - break; + break; } } @@ -1445,8 +1518,17 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode; tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0; tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0; + tex_instruction_ptr->m_Word0.f.alt_const = 0; - tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg; + if(SPT_VP == pAsm->currentShaderType) + { + tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg + VERT_ATTRIB_MAX; + pAsm->unVetTexBits |= 1 << texture_unit_source->reg; + } + else + { + tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg; + } tex_instruction_ptr->m_Word1.f.lod_bias = 0x0; if (normalized) { @@ -1465,7 +1547,6 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize tex_instruction_ptr->m_Word2.f.offset_x = 0x0; tex_instruction_ptr->m_Word2.f.offset_y = 0x0; tex_instruction_ptr->m_Word2.f.offset_z = 0x0; - tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg; // dst @@ -1724,7 +1805,7 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, } else { - pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++; + pAsm->cf_current_alu_clause_ptr->m_Word1.f.count += (GetInstructionSize(alu_instruction_ptr->m_ShaderInstType) / 2); } // If this clause constains any instruction that is forward dependent on a TEX instruction, @@ -2001,7 +2082,7 @@ GLboolean check_scalar(r700_AssemblerBase* pAsm, GLuint swizzle_key; - GLuint number_of_operands = r700GetNumOperands(pAsm); + GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); for (src=0; src<number_of_operands; src++) { @@ -2090,7 +2171,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, GLuint swizzle_key; - GLuint number_of_operands = r700GetNumOperands(pAsm); + GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); for (src=0; src<number_of_operands; src++) { @@ -2159,6 +2240,10 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { + R700ALUInstruction * alu_instruction_ptr; + R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl; + R700ALUInstructionFullLiteral * alu_instruction_ptr_fl; + GLuint number_of_scalar_operations; GLboolean is_single_scalar_operation; GLuint scalar_channel_index; @@ -2167,7 +2252,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) int current_source_index; GLuint contiguous_slots_needed; - GLuint uNumSrc = r700GetNumOperands(pAsm); + GLuint uNumSrc = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); //GLuint channel_swizzle, j; //GLuint chan_counter[4] = {0, 0, 0, 0}; //PVSSRC * pSource[3]; @@ -2224,262 +2309,44 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) contiguous_slots_needed = 0; - if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) ) + if(!is_single_scalar_operation) { contiguous_slots_needed = 4; } + contiguous_slots_needed += pAsm->D2.dst2.literal_slots; + initialize(pAsm); for (scalar_channel_index=0; scalar_channel_index < number_of_scalar_operations; scalar_channel_index++) { - R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); - if (alu_instruction_ptr == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstruction(alu_instruction_ptr); - - //src 0 - current_source_index = 0; - pcurrent_source = &(pAsm->S[0].src); - - if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, - current_source_index, - pcurrent_source, - scalar_channel_index) ) - { - return GL_FALSE; - } - - if (uNumSrc > 1) - { - // Process source 1 - current_source_index = 1; - pcurrent_source = &(pAsm->S[current_source_index].src); - - if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, - current_source_index, - pcurrent_source, - scalar_channel_index) ) - { - return GL_FALSE; - } - } - - //other bits - alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_AR_X; - - if( (is_single_scalar_operation == GL_TRUE) - || (GL_TRUE == bSplitInst) ) - { - alu_instruction_ptr->m_Word0.f.last = 1; - } - else + if(scalar_channel_index == (number_of_scalar_operations-1)) { - alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0; - } - - alu_instruction_ptr->m_Word0.f.pred_sel = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; - - // dst - if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || - (pAsm->D.dst.rtype == DST_REG_OUT) ) - { - alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; - } - else - { - radeon_error("Only temp destination registers supported for ALU dest regs.\n"); - return GL_FALSE; - } - - alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype - - if ( is_single_scalar_operation == GL_TRUE ) - { - // Override scalar_channel_index since only one scalar value will be written - if(pAsm->D.dst.writex) - { - scalar_channel_index = 0; - } - else if(pAsm->D.dst.writey) - { - scalar_channel_index = 1; - } - else if(pAsm->D.dst.writez) - { - scalar_channel_index = 2; - } - else if(pAsm->D.dst.writew) + switch(pAsm->D2.dst2.literal_slots) { - scalar_channel_index = 3; - } - } - - alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index; - - alu_instruction_ptr->m_Word1.f.clamp = pAsm->pILInst[pAsm->uiCurInst].SaturateMode; - - if (pAsm->D.dst.op3) - { - //op3 - - alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode; - - //There's 3rd src for op3 - current_source_index = 2; - pcurrent_source = &(pAsm->S[current_source_index].src); - - if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr, - current_source_index, - pcurrent_source, - scalar_channel_index) ) - { - return GL_FALSE; - } - } - else - { - //op2 - if (pAsm->bR6xx) - { - alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode; - - alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0; - - //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0; - //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0; - switch (scalar_channel_index) - { - case 0: - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex; - break; - case 1: - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey; - break; - case 2: - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez; - break; - case 3: - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew; - break; - default: - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK; - break; - } - alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF; - } - else - { - alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode; - - alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0; - - //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; - //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; - switch (scalar_channel_index) - { - case 0: - alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex; - break; - case 1: - alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey; - break; - case 2: - alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez; - break; - case 3: - alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew; - break; - default: - alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK; - break; - } - alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF; - } - } - - if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) ) - { - return GL_FALSE; - } - - /* - * Judge the type of current instruction, is it vector or scalar - * instruction. - */ - if (is_single_scalar_operation) - { - if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) ) - { - return GL_FALSE; - } - } - else - { - if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) ) - { - return GL_FALSE; - } + case 0: + alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); + Init_R700ALUInstruction(alu_instruction_ptr); + break; + case 1: + alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral); + Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pAsm->C[0].f, pAsm->C[1].f); + alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl; + break; + case 2: + alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral); + Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl,pAsm->C[0].f, pAsm->C[1].f, pAsm->C[2].f, pAsm->C[3].f); + alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl; + break; + }; } - - contiguous_slots_needed = 0; - } - - return GL_TRUE; -} - -GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm) -{ - GLuint number_of_scalar_operations; - GLboolean is_single_scalar_operation; - GLuint scalar_channel_index; - - PVSSRC * pcurrent_source; - int current_source_index; - GLuint contiguous_slots_needed; - - GLuint uNumSrc = r700GetNumOperands(pAsm); - - GLboolean bSplitInst = GL_FALSE; - - if (1 == pAsm->D.dst.math) - { - is_single_scalar_operation = GL_TRUE; - number_of_scalar_operations = 1; - } - else - { - is_single_scalar_operation = GL_FALSE; - number_of_scalar_operations = 4; - } - - contiguous_slots_needed = 0; - - if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) ) - { - contiguous_slots_needed = 4; - } - - initialize(pAsm); - - for (scalar_channel_index=0; - scalar_channel_index < number_of_scalar_operations; - scalar_channel_index++) - { - R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); - if (alu_instruction_ptr == NULL) + else { - return GL_FALSE; + alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); + Init_R700ALUInstruction(alu_instruction_ptr); } - Init_R700ALUInstruction(alu_instruction_ptr); //src 0 current_source_index = 0; @@ -2489,7 +2356,7 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm) current_source_index, pcurrent_source, scalar_channel_index) ) - { + { return GL_FALSE; } @@ -2503,13 +2370,13 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm) current_source_index, pcurrent_source, scalar_channel_index) ) - { + { return GL_FALSE; } } //other bits - alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP; + alu_instruction_ptr->m_Word0.f.index_mode = pAsm->D2.dst2.index_mode; if( (is_single_scalar_operation == GL_TRUE) || (GL_TRUE == bSplitInst) ) @@ -2524,15 +2391,15 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm) alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0; if(1 == pAsm->D.dst.predicated) { - alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1; - alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1; + alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1; + alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1; } else { - alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; } - + // dst if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || (pAsm->D.dst.rtype == DST_REG_OUT) ) @@ -2540,7 +2407,7 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm) alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; } else - { + { radeon_error("Only temp destination registers supported for ALU dest regs.\n"); return GL_FALSE; } @@ -2597,8 +2464,8 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm) { alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode; - alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = pAsm->S[0].src.abs; + alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = pAsm->S[1].src.abs; //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0; //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0; @@ -2626,8 +2493,8 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm) { alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode; - alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.src0_abs = pAsm->S[0].src.abs; + alu_instruction_ptr->m_Word1_OP2.f.src1_abs = pAsm->S[1].src.abs; //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; @@ -2654,7 +2521,7 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm) } if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) ) - { + { return GL_FALSE; } @@ -2665,272 +2532,19 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm) if (is_single_scalar_operation) { if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) ) - { + { return GL_FALSE; } } else { if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) ) - { + { return GL_FALSE; } } - contiguous_slots_needed = 0; - } - - return GL_TRUE; -} - -GLboolean assemble_alu_instruction_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral) -{ - R700ALUInstruction * alu_instruction_ptr; - R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl; - R700ALUInstructionFullLiteral * alu_instruction_ptr_fl; - - GLuint number_of_scalar_operations; - GLboolean is_single_scalar_operation; - GLuint scalar_channel_index; - - GLuint contiguous_slots_needed; - GLuint lastInstruction; - GLuint not_masked[4]; - - GLuint uNumSrc = r700GetNumOperands(pAsm); - - GLboolean bSplitInst = GL_FALSE; - - number_of_scalar_operations = 0; - contiguous_slots_needed = 0; - - if(1 == pAsm->D.dst.writew) - { - lastInstruction = 3; - number_of_scalar_operations++; - not_masked[3] = 1; - } - else - { - not_masked[3] = 0; - } - if(1 == pAsm->D.dst.writez) - { - lastInstruction = 2; - number_of_scalar_operations++; - not_masked[2] = 1; - } - else - { - not_masked[2] = 0; - } - if(1 == pAsm->D.dst.writey) - { - lastInstruction = 1; - number_of_scalar_operations++; - not_masked[1] = 1; - } - else - { - not_masked[1] = 0; - } - if(1 == pAsm->D.dst.writex) - { - lastInstruction = 0; - number_of_scalar_operations++; - not_masked[0] = 1; - } - else - { - not_masked[0] = 0; - } - - if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) ) - { - contiguous_slots_needed = 4; - } - else - { - contiguous_slots_needed = number_of_scalar_operations; - } - - if(1 == pAsm->D2.dst2.literal) - { - contiguous_slots_needed += 1; - } - else if(2 == pAsm->D2.dst2.literal) - { - contiguous_slots_needed += 2; - } - - initialize(pAsm); - - for (scalar_channel_index=0; scalar_channel_index < 4; scalar_channel_index++) - { - if(0 == not_masked[scalar_channel_index]) - { - continue; - } - - if(scalar_channel_index == lastInstruction) - { - switch (pAsm->D2.dst2.literal) - { - case 0: - alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); - if (alu_instruction_ptr == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstruction(alu_instruction_ptr); - break; - case 1: - alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral); - if (alu_instruction_ptr_hl == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pLiteral[0], pLiteral[1]); - alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl; - break; - case 2: - alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral); - if (alu_instruction_ptr_fl == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl, pLiteral[0], pLiteral[1], pLiteral[2], pLiteral[3]); - alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl; - break; - default: - break; - }; - } - else - { - alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); - if (alu_instruction_ptr == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstruction(alu_instruction_ptr); - } - - //src 0 - if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, - 0, - &(pAsm->S[0].src), - scalar_channel_index) ) - { - return GL_FALSE; - } - - if (uNumSrc > 1) - { - // Process source 1 - if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, - 1, - &(pAsm->S[1].src), - scalar_channel_index) ) - { - return GL_FALSE; - } - } - - //other bits - alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP; - - if(scalar_channel_index == lastInstruction) - { - alu_instruction_ptr->m_Word0.f.last = 1; - } - - alu_instruction_ptr->m_Word0.f.pred_sel = 0x0; - if(1 == pAsm->D.dst.predicated) - { - alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1; - alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1; - } - else - { - alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0; - alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0; - } - - // dst - if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || - (pAsm->D.dst.rtype == DST_REG_OUT) ) - { - alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; - } - else - { - radeon_error("Only temp destination registers supported for ALU dest regs.\n"); - return GL_FALSE; - } - - alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype - - alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index; - - alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode; - - if (pAsm->D.dst.op3) - { - //op3 - alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode; - - //There's 3rd src for op3 - if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr, - 2, - &(pAsm->S[2].src), - scalar_channel_index) ) - { - return GL_FALSE; - } - } - else - { - //op2 - if (pAsm->bR6xx) - { - alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode; - alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; - alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF; - } - else - { - alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode; - alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; - alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF; - } - } - - if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) ) - { - return GL_FALSE; - } - - if (1 == number_of_scalar_operations) - { - if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) ) - { - return GL_FALSE; - } - } - else - { - if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) ) - { - return GL_FALSE; - } - } - - contiguous_slots_needed -= 2; + contiguous_slots_needed -= 1; } return GL_TRUE; @@ -2987,69 +2601,8 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) pAsm->S[2].bits = 0; pAsm->is_tex = GL_FALSE; pAsm->need_tex_barrier = GL_FALSE; - - return GL_TRUE; -} - -GLboolean next_ins2(r700_AssemblerBase *pAsm) -{ - struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - - //ALU - if( GL_FALSE == assemble_alu_instruction2(pAsm) ) - { - radeon_error("Error assembling ALU instruction\n"); - return GL_FALSE; - } - - if(pAsm->D.dst.rtype == DST_REG_OUT) - { - if(pAsm->D.dst.op3) - { - // There is no mask for OP3 instructions, so all channels are written - pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF; - } - else - { - pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] - |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask; - } - } - - //reset for next inst. - pAsm->D.bits = 0; - pAsm->D2.bits = 0; - pAsm->S[0].bits = 0; - pAsm->S[1].bits = 0; - pAsm->S[2].bits = 0; - pAsm->is_tex = GL_FALSE; - pAsm->need_tex_barrier = GL_FALSE; - pAsm->D2.bits = 0; - - return GL_TRUE; -} - -/* not work yet */ -GLboolean next_ins_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral) -{ - struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - - //ALU - if( GL_FALSE == assemble_alu_instruction_literal(pAsm, pLiteral) ) - { - radeon_error("Error assembling ALU instruction\n"); - return GL_FALSE; - } - - //reset for next inst. - pAsm->D.bits = 0; - pAsm->D2.bits = 0; - pAsm->S[0].bits = 0; - pAsm->S[1].bits = 0; - pAsm->S[2].bits = 0; - pAsm->is_tex = GL_FALSE; - pAsm->need_tex_barrier = GL_FALSE; + pAsm->C[0].bits = pAsm->C[1].bits = pAsm->C[2].bits = pAsm->C[3].bits = 0; return GL_TRUE; } @@ -3282,9 +2835,44 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm) return GL_TRUE; } -GLboolean assemble_COS(r700_AssemblerBase *pAsm) +GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode) { - return assemble_math_function(pAsm, SQ_OP2_INST_COS); + int tmp; + checkop1(pAsm); + + tmp = gethelpr(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + assemble_src(pAsm, 0, -1); + + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1/(3.1415926535 * 2); + pAsm->C[1].f = 0.0F; + next_ins(pAsm); + + pAsm->D.dst.opcode = opcode; + pAsm->D.dst.math = 1; + + assemble_dst(pAsm); + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + next_ins(pAsm); + + //TODO - replicate if more channels set in WriteMask + return GL_TRUE; + } GLboolean assemble_DOT(r700_AssemblerBase *pAsm) @@ -3554,10 +3142,13 @@ GLboolean assemble_FRC(r700_AssemblerBase *pAsm) GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode) { - checkop2(pAsm); + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + if(pILInst->Opcode == OPCODE_KIL) + checkop1(pAsm); pAsm->D.dst.opcode = opcode; - pAsm->D.dst.math = 1; + //pAsm->D.dst.math = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; @@ -3567,17 +3158,30 @@ GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode) pAsm->D.dst.writez = 0; pAsm->D.dst.writew = 0; - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = 0; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if(pILInst->Opcode == OPCODE_KIL_NV) { - return GL_FALSE; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = 0; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1); + neg_PVSSRC(&(pAsm->S[1].src)); } - - if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + else { - return GL_FALSE; + if( GL_FALSE == assemble_src(pAsm, 0, 1) ) + { + return GL_FALSE; + } + } - - if ( GL_FALSE == next_ins2(pAsm) ) + + if ( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } @@ -4384,77 +3988,67 @@ GLboolean assemble_RSQ(r700_AssemblerBase *pAsm) return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE); } -GLboolean assemble_SIN(r700_AssemblerBase *pAsm) -{ - return assemble_math_function(pAsm, SQ_OP2_INST_SIN); -} - GLboolean assemble_SCS(r700_AssemblerBase *pAsm) { BITS tmp; - checkop1(pAsm); - - tmp = gethelpr(pAsm); - - // COS tmp.x, a.x - pAsm->D.dst.opcode = SQ_OP2_INST_COS; - pAsm->D.dst.math = 1; + checkop1(pAsm); - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp; - pAsm->D.dst.writex = 1; + tmp = gethelpr(pAsm); + /* tmp.x = src /2*PI */ + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) - { - return GL_FALSE; - } + assemble_src(pAsm, 0, -1); - if ( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1/(3.1415926535 * 2); + pAsm->C[1].f = 0.0F; - // SIN tmp.y, a.x - pAsm->D.dst.opcode = SQ_OP2_INST_SIN; - pAsm->D.dst.math = 1; + next_ins(pAsm); - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp; - pAsm->D.dst.writey = 1; + // COS dst.x, a.x + pAsm->D.dst.opcode = SQ_OP2_INST_COS; + pAsm->D.dst.math = 1; - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) - { - return GL_FALSE; - } + assemble_dst(pAsm); + /* mask y */ + pAsm->D.dst.writey = 0; - if( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); - // MOV dst.mask, tmp - pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } - if( GL_FALSE == assemble_dst(pAsm) ) - { - return GL_FALSE; - } + // SIN dst.y, a.x + pAsm->D.dst.opcode = SQ_OP2_INST_SIN; + pAsm->D.dst.math = 1; - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = DST_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp; + assemble_dst(pAsm); + /* mask x */ + pAsm->D.dst.writex = 0; - noswizzle_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlez = SQ_SEL_0; - pAsm->S[0].src.swizzlew = SQ_SEL_0; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); - if ( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } return GL_TRUE; } @@ -4467,7 +4061,7 @@ GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode) } pAsm->D.dst.opcode = opcode; - pAsm->D.dst.math = 1; + //pAsm->D.dst.math = 1; if( GL_FALSE == assemble_dst(pAsm) ) { @@ -4494,32 +4088,34 @@ GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode) GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode) { - if( GL_FALSE == checkop2(pAsm) ) - { - return GL_FALSE; - } + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); pAsm->D.dst.opcode = opcode; pAsm->D.dst.math = 1; pAsm->D.dst.predicated = 1; - pAsm->D2.dst2.SaturateMode = pAsm->pILInst[pAsm->uiCurInst].SaturateMode; - if( GL_FALSE == assemble_dst(pAsm) ) - { - return GL_FALSE; - } + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pAsm->uHelpReg; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) - { - return GL_FALSE; - } + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->last_cond_register + pAsm->starting_temp_register_number; + pAsm->S[0].src.swizzlex = pILInst->DstReg.CondSwizzle & 0x7; + noneg_PVSSRC(&(pAsm->S[0].src)); - if( GL_FALSE == assemble_src(pAsm, 1, -1) ) - { - return GL_FALSE; - } + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = pAsm->uHelpReg; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_0; + pAsm->S[1].src.swizzley = SQ_SEL_0; + pAsm->S[1].src.swizzlez = SQ_SEL_0; + pAsm->S[1].src.swizzlew = SQ_SEL_0; - if( GL_FALSE == next_ins2(pAsm) ) + if( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } @@ -4626,22 +4222,6 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) need_barrier = GL_TRUE; } - switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) - { - case OPCODE_TEX: - break; - case OPCODE_TXB: - radeon_error("do not support TXB yet\n"); - return GL_FALSE; - break; - case OPCODE_TXP: - break; - default: - radeon_error("Internal error: bad texture op (not TEX)\n"); - return GL_FALSE; - break; - } - if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP) { GLuint tmp = gethelpr(pAsm); @@ -4719,24 +4299,6 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) return GL_FALSE; } - /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently - * have to do explicit instruction - */ - pAsm->D.dst.opcode = SQ_OP2_INST_MAX; - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp1; - pAsm->D.dst.writez = 1; - - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp1; - noswizzle_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[1].bits = pAsm->S[0].bits; - flipneg_PVSSRC(&(pAsm->S[1].src)); - - next_ins(pAsm); - /* tmp1.z = RCP_e(|tmp1.z|) */ pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE; pAsm->D.dst.math = 1; @@ -4749,13 +4311,13 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; pAsm->S[0].src.reg = tmp1; pAsm->S[0].src.swizzlex = SQ_SEL_Z; + pAsm->S[0].src.abs = 1; next_ins(pAsm); /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x * muladd has no writemask, have to use another temp - * also no support for imm constants, so add 1 here */ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; pAsm->D.dst.op3 = 1; @@ -4772,30 +4334,12 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) pAsm->S[1].src.reg = tmp1; setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z); setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); - pAsm->S[2].src.rtype = SRC_REG_TEMPORARY; + /* immediate c 1.5 */ + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1.5F; + pAsm->S[2].src.rtype = SRC_REC_LITERAL; pAsm->S[2].src.reg = tmp1; - setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_1); - - next_ins(pAsm); - - /* ADD the remaining .5 */ - pAsm->D.dst.opcode = SQ_OP2_INST_ADD; - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp2; - pAsm->D.dst.writex = 1; - pAsm->D.dst.writey = 1; - pAsm->D.dst.writez = 0; - pAsm->D.dst.writew = 0; - - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp2; - noswizzle_PVSSRC(&(pAsm->S[0].src)); - setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); - pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[1].src.reg = 252; // SQ_ALU_SRC_0_5 - noswizzle_PVSSRC(&(pAsm->S[1].src)); + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X); next_ins(pAsm); @@ -4820,14 +4364,32 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) } - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + switch(pAsm->pILInst[pAsm->uiCurInst].Opcode) + { + case OPCODE_DDX: + /* will these need WQM(1) on CF inst ? */ + pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_H; + break; + case OPCODE_DDY: + pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_V; + break; + case OPCODE_TXB: + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L; + break; + default: + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + } + + pAsm->is_tex = GL_TRUE; + if ( GL_TRUE == need_barrier ) + pAsm->is_tex = GL_TRUE; if ( GL_TRUE == need_barrier ) { pAsm->need_tex_barrier = GL_TRUE; } // Set src1 to tex unit id - pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit; + pAsm->S[1].src.reg = pAsm->SamplerUnits[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit]; pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; //No sw info from mesa compiler, so hard code here. @@ -5103,6 +4665,11 @@ GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops) GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse) { + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + + if(GL_FALSE == add_cf_instruction(pAsm) ) { return GL_FALSE; @@ -5247,6 +4814,11 @@ GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm) GLboolean assemble_BRK(r700_AssemblerBase *pAsm) { #ifdef USE_CF_FOR_CONTINUE_BREAK + + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + unsigned int unFCSP; for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--) { @@ -5313,6 +4885,10 @@ GLboolean assemble_BRK(r700_AssemblerBase *pAsm) GLboolean assemble_CONT(r700_AssemblerBase *pAsm) { #ifdef USE_CF_FOR_CONTINUE_BREAK + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + unsigned int unFCSP; for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--) { @@ -5471,7 +5047,7 @@ void add_return_inst(r700_AssemblerBase *pAsm) pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; } -GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex) +GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift) { /* Put in sub */ if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize ) @@ -5486,7 +5062,7 @@ GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex) pAsm->unSubArraySize += 10; } - pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex; + pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex + uiIL_Shift; pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL; pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL; pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0; @@ -5577,9 +5153,13 @@ GLboolean assemble_RET(r700_AssemblerBase *pAsm) GLboolean assemble_CAL(r700_AssemblerBase *pAsm, GLint nILindex, + GLuint uiIL_Shift, GLuint uiNumberInsts, - struct prog_instruction *pILInst) + struct prog_instruction *pILInst, + PRESUB_DESC * pPresubDesc) { + GLint uiIL_Offset; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; if(GL_FALSE == add_cf_instruction(pAsm) ) @@ -5612,8 +5192,12 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm, pAsm->unCallerArraySize += 10; } - pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = nILindex; - pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr; + uiIL_Offset = nILindex + uiIL_Shift; + pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = uiIL_Offset; + pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr; + + pAsm->callers[pAsm->unCallerArrayPointer].finale_cf_ptr = NULL; + pAsm->callers[pAsm->unCallerArrayPointer].prelude_cf_ptr = NULL; pAsm->unCallerArrayPointer++; @@ -5623,7 +5207,7 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm, GLboolean bRet; for(j=0; j<pAsm->unSubArrayPointer; j++) { - if(nILindex == pAsm->subs[j].subIL_Offset) + if(uiIL_Offset == pAsm->subs[j].subIL_Offset) { /* compiled before */ max = pAsm->subs[j].unStackDepthMax @@ -5641,7 +5225,7 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm, pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer; unSubID = pAsm->unSubArrayPointer; - bRet = AssembleInstr(nILindex, uiNumberInsts, pILInst, pAsm); + bRet = AssembleInstr(nILindex, uiIL_Shift, uiNumberInsts, pILInst, pAsm); if(GL_TRUE == bRet) { @@ -5651,6 +5235,8 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm, { pAsm->CALLSTACK[pAsm->CALLSP].max = max; } + + pAsm->subs[unSubID].pPresubDesc = pPresubDesc; } return bRet; @@ -5668,11 +5254,12 @@ GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue) pAsm->D.dst.writey = 0; pAsm->D.dst.writez = 0; pAsm->D.dst.writew = 0; - pAsm->D2.dst2.literal = 1; + pAsm->D2.dst2.literal_slots = 1; pAsm->D2.dst2.SaturateMode = SATURATE_OFF; pAsm->D.dst.predicated = 0; /* in reloc where dislink flag init inst, only one slot alu inst is handled. */ pAsm->D.dst.math = 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */ + pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */ #if 0 pAsm->S[0].src.rtype = SRC_REC_LITERAL; //pAsm->S[0].src.reg = 0; @@ -5697,7 +5284,7 @@ GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue) pAsm->S[0].src.swizzlez = flagValue; pAsm->S[0].src.swizzlew = flagValue; - if( GL_FALSE == next_ins2(pAsm) ) + if( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } @@ -5722,9 +5309,10 @@ GLboolean testFlag(r700_AssemblerBase *pAsm) pAsm->D.dst.writey = 0; pAsm->D.dst.writez = 0; pAsm->D.dst.writew = 0; - pAsm->D2.dst2.literal = 1; + pAsm->D2.dst2.literal_slots = 1; pAsm->D2.dst2.SaturateMode = SATURATE_OFF; pAsm->D.dst.predicated = 1; + pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */ pAsm->S[0].src.rtype = DST_REG_TEMPORARY; pAsm->S[0].src.reg = pAsm->flag_reg_index; @@ -5758,7 +5346,7 @@ GLboolean testFlag(r700_AssemblerBase *pAsm) pAsm->S[1].src.swizzlez = SQ_SEL_1; pAsm->S[1].src.swizzlew = SQ_SEL_1; - if( GL_FALSE == next_ins2(pAsm) ) + if( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } @@ -5814,6 +5402,7 @@ GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP) } GLboolean AssembleInstr(GLuint uiFirstInst, + GLuint uiIL_Shift, GLuint uiNumberInsts, struct prog_instruction *pILInst, r700_AssemblerBase *pR700AsmCode) @@ -5853,6 +5442,12 @@ GLboolean AssembleInstr(GLuint uiFirstInst, } } #endif + if(pILInst[i].CondUpdate == 1) + { + /* remember dest register used for cond evaluation */ + /* XXX also handle PROGRAM_OUTPUT registers here? */ + pR700AsmCode->last_cond_register = pILInst[i].DstReg.Index; + } switch (pILInst[i].Opcode) { @@ -5881,7 +5476,7 @@ GLboolean AssembleInstr(GLuint uiFirstInst, return GL_FALSE; break; case OPCODE_COS: - if ( GL_FALSE == assemble_COS(pR700AsmCode) ) + if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) ) return GL_FALSE; break; @@ -5923,9 +5518,8 @@ GLboolean AssembleInstr(GLuint uiFirstInst, case OPCODE_KIL: case OPCODE_KIL_NV: - /* done at OPCODE_SE/SGT...etc. */ - /* if ( GL_FALSE == assemble_KIL(pR700AsmCode) ) - return GL_FALSE; */ + if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) ) + return GL_FALSE; break; case OPCODE_LG2: if ( GL_FALSE == assemble_LG2(pR700AsmCode) ) @@ -5964,6 +5558,26 @@ GLboolean AssembleInstr(GLuint uiFirstInst, case OPCODE_MUL: if ( GL_FALSE == assemble_MUL(pR700AsmCode) ) return GL_FALSE; + break; + + case OPCODE_NOISE1: + { + callPreSub(pR700AsmCode, + GLSL_NOISE1, + &noise1_presub, + pILInst->DstReg.Index + pR700AsmCode->starting_temp_register_number, + 1); + radeon_error("noise1: not yet supported shader instruction\n"); + }; + break; + case OPCODE_NOISE2: + radeon_error("noise2: not yet supported shader instruction\n"); + break; + case OPCODE_NOISE3: + radeon_error("noise3: not yet supported shader instruction\n"); + break; + case OPCODE_NOISE4: + radeon_error("noise4: not yet supported shader instruction\n"); break; case OPCODE_POW: @@ -5979,7 +5593,7 @@ GLboolean AssembleInstr(GLuint uiFirstInst, return GL_FALSE; break; case OPCODE_SIN: - if ( GL_FALSE == assemble_SIN(pR700AsmCode) ) + if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) ) return GL_FALSE; break; case OPCODE_SCS: @@ -5988,151 +5602,23 @@ GLboolean AssembleInstr(GLuint uiFirstInst, break; case OPCODE_SEQ: - if(OPCODE_IF == pILInst[i+1].Opcode) - { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) ) - { - return GL_FALSE; - } - } - else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) ) { - if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLE) ) - { - return GL_FALSE; - } - } - else - { - if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) ) - { - return GL_FALSE; - } + return GL_FALSE; } break; case OPCODE_SGT: - if(OPCODE_IF == pILInst[i+1].Opcode) - { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - return GL_FALSE; - } - } - else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) - { - if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) ) - { - return GL_FALSE; - } - } - else - { - if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) - { - return GL_FALSE; - } + return GL_FALSE; } break; case OPCODE_SGE: - if(OPCODE_IF == pILInst[i+1].Opcode) - { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - return GL_FALSE; - } - } - else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) - { - if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGE) ) - { - return GL_FALSE; - } - } - else - { - if ( GL_FALSE == assemble_SGE(pR700AsmCode) ) - { - return GL_FALSE; - } + if ( GL_FALSE == assemble_SGE(pR700AsmCode) ) + { + return GL_FALSE; } break; @@ -6144,61 +5630,12 @@ GLboolean AssembleInstr(GLuint uiFirstInst, SrcRegSave[1] = pILInst[i].SrcReg[1]; pILInst[i].SrcReg[0] = SrcRegSave[1]; pILInst[i].SrcReg[1] = SrcRegSave[0]; - if(OPCODE_IF == pILInst[i+1].Opcode) - { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } - else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) { - if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) ) - { - return GL_FALSE; - } + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; } - else - { - if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } pILInst[i].SrcReg[0] = SrcRegSave[0]; pILInst[i].SrcReg[1] = SrcRegSave[1]; } @@ -6211,60 +5648,11 @@ GLboolean AssembleInstr(GLuint uiFirstInst, SrcRegSave[1] = pILInst[i].SrcReg[1]; pILInst[i].SrcReg[0] = SrcRegSave[1]; pILInst[i].SrcReg[1] = SrcRegSave[0]; - if(OPCODE_IF == pILInst[i+1].Opcode) - { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } - } - else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) - { - if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGE) ) - { - return GL_FALSE; - } - } - else + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) ) { - if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) ) - { - pILInst[i].SrcReg[0] = SrcRegSave[0]; - pILInst[i].SrcReg[1] = SrcRegSave[1]; - return GL_FALSE; - } + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; } pILInst[i].SrcReg[0] = SrcRegSave[0]; pILInst[i].SrcReg[1] = SrcRegSave[1]; @@ -6272,51 +5660,9 @@ GLboolean AssembleInstr(GLuint uiFirstInst, break; case OPCODE_SNE: - if(OPCODE_IF == pILInst[i+1].Opcode) - { - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_BRK == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) ) - { - return GL_FALSE; - } - } - else if(OPCODE_CONT == pILInst[i+1].Opcode) - { -#ifdef USE_CF_FOR_CONTINUE_BREAK - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; -#else - pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE; -#endif - if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) ) - { - return GL_FALSE; - } - } - else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode)) + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) ) { - if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLNE) ) - { - return GL_FALSE; - } - } - else - { - if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) ) - { - return GL_FALSE; - } + return GL_FALSE; } break; @@ -6344,7 +5690,8 @@ GLboolean AssembleInstr(GLuint uiFirstInst, } } break; - + case OPCODE_DDX: + case OPCODE_DDY: case OPCODE_TEX: case OPCODE_TXB: case OPCODE_TXP: @@ -6417,7 +5764,7 @@ GLboolean AssembleInstr(GLuint uiFirstInst, break; case OPCODE_BGNSUB: - if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i) ) + if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i, uiIL_Shift) ) { return GL_FALSE; } @@ -6432,9 +5779,11 @@ GLboolean AssembleInstr(GLuint uiFirstInst, case OPCODE_CAL: if( GL_FALSE == assemble_CAL(pR700AsmCode, - pILInst[i].BranchTarget, + pILInst[i].BranchTarget, + uiIL_Shift, uiNumberInsts, - pILInst) ) + pILInst, + NULL) ) { return GL_FALSE; } @@ -6471,7 +5820,7 @@ GLboolean InitShaderProgram(r700_AssemblerBase * pAsm) return GL_TRUE; } -GLboolean RelocProgram(r700_AssemblerBase * pAsm) +GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg) { GLuint i; GLuint unCFoffset; @@ -6481,6 +5830,12 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm) R700ShaderInstruction * pInst; R700ControlFlowGenericClause * pCFInst; + R700ControlFlowALUClause * pCF_ALU; + R700ALUInstruction * pALU; + GLuint unConstOffset = 0; + GLuint unRegOffset; + GLuint unMinRegIndex; + plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local; /* remove flags init if they are not used */ @@ -6526,6 +5881,11 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm) unCFoffset = plstCFmain->uNumOfNode; + if(NULL != pILProg->Parameters) + { + unConstOffset = pILProg->Parameters->NumParameters; + } + /* Reloc subs */ for(i=0; i<pAsm->unSubArrayPointer; i++) { @@ -6563,6 +5923,84 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm) pInst = pInst->pNextInst; }; + if(NULL != pAsm->subs[i].pPresubDesc) + { + GLuint uNumSrc; + + unMinRegIndex = pAsm->subs[i].pPresubDesc->pCompiledSub->MinRegIndex; + unRegOffset = pAsm->subs[i].pPresubDesc->maxStartReg; + unConstOffset += pAsm->subs[i].pPresubDesc->unConstantsStart; + + pInst = plstCFsub->pHead; + while(pInst) + { + if(SIT_CF_ALU == pInst->m_ShaderInstType) + { + pCF_ALU = (R700ControlFlowALUClause *)pInst; + + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) + { + pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex; + + if(pALU->m_Word0.f.src0_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src0_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src0_sel += unConstOffset; + } + + if( ((pALU->m_Word1.val >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT) & 0x0000001F) + >= SQ_OP3_INST_MUL_LIT ) + { /* op3 : 3 srcs */ + if(pALU->m_Word1_OP3.f.src2_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word1_OP3.f.src2_sel = pALU->m_Word1_OP3.f.src2_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word1_OP3.f.src2_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word1_OP3.f.src2_sel += unConstOffset; + } + if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src1_sel += unConstOffset; + } + } + else + { + if(pAsm->bR6xx) + { + uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0); + } + else + { + uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0); + } + if(2 == uNumSrc) + { /* 2 srcs */ + if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src1_sel += unConstOffset; + } + } + } + pALU = (R700ALUInstruction*)(pALU->pNextInst); + } + } + pInst = pInst->pNextInst; + }; + } + /* Put sub into main */ plstCFmain->pTail->pNextInst = plstCFsub->pHead; plstCFmain->pTail = plstCFsub->pTail; @@ -6576,11 +6014,216 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm) { pAsm->callers[i].cf_ptr->m_Word0.f.addr = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset; + + if(NULL != pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc) + { + unMinRegIndex = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->pCompiledSub->MinRegIndex; + unRegOffset = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->maxStartReg; + + if(NULL != pAsm->callers[i].prelude_cf_ptr) + { + pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].prelude_cf_ptr); + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) + { + pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex; + pALU = (R700ALUInstruction*)(pALU->pNextInst); + } + } + if(NULL != pAsm->callers[i].finale_cf_ptr) + { + pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].finale_cf_ptr); + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) + { + pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex; + pALU = (R700ALUInstruction*)(pALU->pNextInst); + } + } + } } return GL_TRUE; } +GLboolean callPreSub(r700_AssemblerBase* pAsm, + LOADABLE_SCRIPT_SIGNITURE scriptSigniture, + COMPILED_SUB * pCompiledSub, + GLshort uOutReg, + GLshort uNumValidSrc) +{ + /* save assemble context */ + GLuint starting_temp_register_number_save; + GLuint number_used_registers_save; + GLuint uFirstHelpReg_save; + GLuint uHelpReg_save; + GLuint uiCurInst_save; + struct prog_instruction *pILInst_save; + PRESUB_DESC * pPresubDesc; + GLboolean bRet; + int i; + + R700ControlFlowGenericClause* prelude_cf_ptr = NULL; + + /* copy srcs to presub inputs */ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + for(i=0; i<uNumValidSrc; i++) + { + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pCompiledSub->srcRegIndex[i]; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 1; + pAsm->D.dst.writew = 1; + + if( GL_FALSE == assemble_src(pAsm, i, 0) ) + { + return GL_FALSE; + } + + next_ins(pAsm); + } + if(uNumValidSrc > 0) + { + prelude_cf_ptr = pAsm->cf_current_alu_clause_ptr; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + } + + /* browse thro existing presubs. */ + for(i=0; i<pAsm->unNumPresub; i++) + { + if(pAsm->presubs[i].sptSigniture == scriptSigniture) + { + break; + } + } + + if(i == pAsm->unNumPresub) + { /* not loaded yet */ + /* save assemble context */ + number_used_registers_save = pAsm->number_used_registers; + uFirstHelpReg_save = pAsm->uFirstHelpReg; + uHelpReg_save = pAsm->uHelpReg; + starting_temp_register_number_save = pAsm->starting_temp_register_number; + pILInst_save = pAsm->pILInst; + uiCurInst_save = pAsm->uiCurInst; + + /* alloc in presub */ + if( (pAsm->unNumPresub + 1) > pAsm->unPresubArraySize ) + { + pAsm->presubs = (PRESUB_DESC*)_mesa_realloc( (void *)pAsm->presubs, + sizeof(PRESUB_DESC) * pAsm->unPresubArraySize, + sizeof(PRESUB_DESC) * (pAsm->unPresubArraySize + 4) ); + if(NULL == pAsm->presubs) + { + radeon_error("No memeory to allocate built in shader function description structures. \n"); + return GL_FALSE; + } + pAsm->unPresubArraySize += 4; + } + + pPresubDesc = &(pAsm->presubs[i]); + pPresubDesc->sptSigniture = scriptSigniture; + + /* constants offsets need to be final resolved at reloc. */ + if(0 == pAsm->unNumPresub) + { + pPresubDesc->unConstantsStart = 0; + } + else + { + pPresubDesc->unConstantsStart = pAsm->presubs[i-1].unConstantsStart + + pAsm->presubs[i-1].pCompiledSub->NumParameters; + } + + pPresubDesc->pCompiledSub = pCompiledSub; + + pPresubDesc->subIL_Shift = pAsm->unCurNumILInsts; + pPresubDesc->maxStartReg = uFirstHelpReg_save; + pAsm->unCurNumILInsts += pCompiledSub->NumInstructions; + + pAsm->unNumPresub++; + + /* setup new assemble context */ + pAsm->starting_temp_register_number = 0; + pAsm->number_used_registers = pCompiledSub->NumTemporaries; + pAsm->uFirstHelpReg = pAsm->number_used_registers; + pAsm->uHelpReg = pAsm->uFirstHelpReg; + + bRet = assemble_CAL(pAsm, + 0, + pPresubDesc->subIL_Shift, + pCompiledSub->NumInstructions, + pCompiledSub->Instructions, + pPresubDesc); + + + pPresubDesc->number_used_registers = pAsm->number_used_registers; + + /* restore assemble context */ + pAsm->number_used_registers = number_used_registers_save; + pAsm->uFirstHelpReg = uFirstHelpReg_save; + pAsm->uHelpReg = uHelpReg_save; + pAsm->starting_temp_register_number = starting_temp_register_number_save; + pAsm->pILInst = pILInst_save; + pAsm->uiCurInst = uiCurInst_save; + } + else + { /* was loaded */ + pPresubDesc = &(pAsm->presubs[i]); + + bRet = assemble_CAL(pAsm, + 0, + pPresubDesc->subIL_Shift, + pCompiledSub->NumInstructions, + pCompiledSub->Instructions, + pPresubDesc); + } + + if(GL_FALSE == bRet) + { + radeon_error("Shader presub assemble failed. \n"); + } + else + { + /* copy presub output to real dst */ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pCompiledSub->dstRegIndex; + pAsm->S[0].src.swizzlex = pCompiledSub->outputSwizzleX; + pAsm->S[0].src.swizzley = pCompiledSub->outputSwizzleY; + pAsm->S[0].src.swizzlez = pCompiledSub->outputSwizzleZ; + pAsm->S[0].src.swizzlew = pCompiledSub->outputSwizzleW; + + next_ins(pAsm); + + pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = pAsm->cf_current_alu_clause_ptr; + pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + } + + if( (pPresubDesc->number_used_registers + pAsm->uFirstHelpReg) > pAsm->number_used_registers ) + { + pAsm->number_used_registers = pPresubDesc->number_used_registers + pAsm->uFirstHelpReg; + } + if(pAsm->uFirstHelpReg > pPresubDesc->maxStartReg) + { + pPresubDesc->maxStartReg = pAsm->uFirstHelpReg; + } + + return bRet; +} + GLboolean Process_Export(r700_AssemblerBase* pAsm, GLuint type, GLuint export_starting_index, @@ -6938,6 +6581,11 @@ GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode) FREE(pR700AsmCode->callers); } + if(NULL != pR700AsmCode->presubs) + { + FREE(pR700AsmCode->presubs); + } + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index 130fc89dae1..dbd9860f7d0 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -34,6 +34,45 @@ #include "r700_shaderinst.h" #include "r700_shader.h" +typedef enum LOADABLE_SCRIPT_SIGNITURE +{ + GLSL_NOISE1 = 0x10000001, + GLSL_NOISE2 = 0x10000002, + GLSL_NOISE3 = 0x10000003, + GLSL_NOISE4 = 0x10000004 +}LOADABLE_SCRIPT_SIGNITURE; + +typedef struct COMPILED_SUB +{ + struct prog_instruction *Instructions; + GLuint NumInstructions; + GLuint NumTemporaries; + GLuint NumParameters; + GLuint MinRegIndex; + GLfloat (*ParameterValues)[4]; + GLbyte outputSwizzleX; + GLbyte outputSwizzleY; + GLbyte outputSwizzleZ; + GLbyte outputSwizzleW; + GLshort srcRegIndex[3]; + GLushort dstRegIndex; +}COMPILED_SUB; + +typedef struct PRESUB_DESCtag +{ + LOADABLE_SCRIPT_SIGNITURE sptSigniture; + GLint subIL_Shift; + struct prog_src_register InReg[3]; + struct prog_dst_register OutReg; + + GLushort maxStartReg; + GLushort number_used_registers; + + GLuint unConstantsStart; + + COMPILED_SUB * pCompiledSub; +} PRESUB_DESC; + typedef enum SHADER_PIPE_TYPE { SPT_VP = 0, @@ -114,20 +153,22 @@ typedef struct PVSDSTtag typedef struct PVSINSTtag { - BITS literal :2; + BITS literal_slots :2; BITS SaturateMode :2; + BITS index_mode :3; } PVSINST; typedef struct PVSSRCtag { - BITS rtype:4; + BITS rtype:3; BITS addrmode0:1; - BITS reg:10; //15 (8) + BITS reg:10; //14 (8) BITS swizzlex:3; BITS swizzley:3; BITS swizzlez:3; - BITS swizzlew:3; //27 + BITS swizzlew:3; //26 + BITS abs:1; BITS negx:1; BITS negy:1; BITS negz:1; @@ -294,6 +335,7 @@ typedef struct SUB_OFFSET GLint subIL_Offset; GLuint unCFoffset; GLuint unStackDepthMax; + PRESUB_DESC * pPresubDesc; TypedShaderList lstCFInstructions_local; } SUB_OFFSET; @@ -302,6 +344,9 @@ typedef struct CALLER_POINTER GLint subIL_Offset; GLint subDescIndex; R700ControlFlowGenericClause* cf_ptr; + + R700ControlFlowGenericClause* prelude_cf_ptr; + R700ControlFlowGenericClause* finale_cf_ptr; } CALLER_POINTER; #define SQ_MAX_CALL_DEPTH 0x00000020 @@ -343,8 +388,10 @@ typedef struct r700_AssemblerBase PVSDWORD D; PVSDWORD D2; PVSDWORD S[3]; + PVSDWORD C[4]; unsigned int uLastPosUpdate; + unsigned int last_cond_register; OUT_FRAGMENT_FMT_0 fp_stOutFmt0; @@ -415,6 +462,7 @@ typedef struct r700_AssemblerBase SHADER_PIPE_TYPE currentShaderType; struct prog_instruction * pILInst; GLuint uiCurInst; + GLubyte SamplerUnits[MAX_SAMPLERS]; GLboolean bR6xx; /* helper to decide which type of instruction to assemble */ GLboolean is_tex; @@ -432,6 +480,13 @@ typedef struct r700_AssemblerBase GLuint unCFflags; + PRESUB_DESC * presubs; + GLuint unPresubArraySize; + GLuint unNumPresub; + GLuint unCurNumILInsts; + + GLuint unVetTexBits; + } r700_AssemblerBase; //Internal use @@ -453,7 +508,7 @@ BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) ; GLboolean is_reduction_opcode(PVSDWORD * dest); GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size); -unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm); +unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3); GLboolean IsTex(gl_inst_opcode Opcode); GLboolean IsAlu(gl_inst_opcode Opcode); @@ -526,13 +581,6 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm); GLboolean next_ins(r700_AssemblerBase *pAsm); -GLboolean next_ins2(r700_AssemblerBase *pAsm); -GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm); - -/* TODO : merge next_ins/2/literal, assemble_alu_instruction/2/literal */ -GLboolean next_ins_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral); -GLboolean assemble_alu_instruction_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral); - GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops); GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset); GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue); @@ -546,7 +594,6 @@ GLboolean assemble_ADD(r700_AssemblerBase *pAsm); GLboolean assemble_ARL(r700_AssemblerBase *pAsm); GLboolean assemble_BAD(char *opcode_str); GLboolean assemble_CMP(r700_AssemblerBase *pAsm); -GLboolean assemble_COS(r700_AssemblerBase *pAsm); GLboolean assemble_DOT(r700_AssemblerBase *pAsm); GLboolean assemble_DST(r700_AssemblerBase *pAsm); GLboolean assemble_EX2(r700_AssemblerBase *pAsm); @@ -567,12 +614,12 @@ GLboolean assemble_MUL(r700_AssemblerBase *pAsm); GLboolean assemble_POW(r700_AssemblerBase *pAsm); GLboolean assemble_RCP(r700_AssemblerBase *pAsm); GLboolean assemble_RSQ(r700_AssemblerBase *pAsm); -GLboolean assemble_SIN(r700_AssemblerBase *pAsm); GLboolean assemble_SCS(r700_AssemblerBase *pAsm); GLboolean assemble_SGE(r700_AssemblerBase *pAsm); GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode); GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode); +GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode); GLboolean assemble_SLT(r700_AssemblerBase *pAsm); GLboolean assemble_STP(r700_AssemblerBase *pAsm); @@ -588,13 +635,15 @@ GLboolean assemble_BRK(r700_AssemblerBase *pAsm); GLboolean assemble_COND(r700_AssemblerBase *pAsm); GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm); -GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex); +GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift); GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm); GLboolean assemble_RET(r700_AssemblerBase *pAsm); GLboolean assemble_CAL(r700_AssemblerBase *pAsm, GLint nILindex, + GLuint uiIL_Offest, GLuint uiNumberInsts, - struct prog_instruction *pILInst); + struct prog_instruction *pILInst, + PRESUB_DESC * pPresubDesc); GLboolean Process_Export(r700_AssemblerBase* pAsm, GLuint type, @@ -605,16 +654,23 @@ GLboolean Process_Export(r700_AssemblerBase* pAsm, GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select); +GLboolean callPreSub(r700_AssemblerBase* pAsm, + LOADABLE_SCRIPT_SIGNITURE scriptSigniture, + /* struct prog_instruction ** pILInstParent, */ + COMPILED_SUB * pCompiledSub, + GLshort uOutReg, + GLshort uNumValidSrc); //Interface GLboolean AssembleInstr(GLuint uiFirstInst, + GLuint uiIL_Shift, GLuint uiNumberInsts, struct prog_instruction *pILInst, r700_AssemblerBase *pR700AsmCode); GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten); GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten); -GLboolean RelocProgram(r700_AssemblerBase * pAsm); +GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg); GLboolean InitShaderProgram(r700_AssemblerBase * pAsm); int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader); diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 8126777bf48..c124e02184f 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -45,6 +45,9 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) { context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + + struct r700_vertex_program *vp = context->selected_vp; + struct radeon_bo *bo = NULL; unsigned int i; BATCH_LOCALS(&context->radeon); @@ -52,7 +55,7 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { radeonTexObj *t = r700->textures[i]; uint32_t offset; if (t) { @@ -71,7 +74,16 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) BEGIN_BATCH_NO_AUTOSTATE(9 + 4); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); - R600_OUT_BATCH(i * 7); + + if( (1<<i) & vp->r700AsmCode.unVetTexBits ) + { /* vs texture */ + R600_OUT_BATCH((i + VERT_ATTRIB_MAX + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); + } + else + { + R600_OUT_BATCH(i * 7); + } + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0); R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1); R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE2); @@ -95,21 +107,35 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) } } +#define SAMPLER_STRIDE 3 + static void r700SendTexSamplerState(GLcontext *ctx, struct radeon_state_atom *atom) { context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); unsigned int i; + + struct r700_vertex_program *vp = context->selected_vp; + BATCH_LOCALS(&context->radeon); radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { radeonTexObj *t = r700->textures[i]; if (t) { BEGIN_BATCH_NO_AUTOSTATE(5); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3)); - R600_OUT_BATCH(i * 3); + + if( (1<<i) & vp->r700AsmCode.unVetTexBits ) + { /* vs texture */ + R600_OUT_BATCH((i+SQ_TEX_SAMPLER_VS_OFFSET) * SAMPLER_STRIDE); //work 1 + } + else + { + R600_OUT_BATCH(i * 3); + } + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0); R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1); R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2); @@ -1163,7 +1189,11 @@ static int check_blnd(GLcontext *ctx, struct radeon_state_atom *atom) count += 3; if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) { - for (ui = 0; ui < R700_MAX_RENDER_TARGETS; ui++) { + /* targets are enabled in r700SetRenderTarget but state + size is calculated before that. Until MRT's are done + hardcode target0 as enabled. */ + count += 3; + for (ui = 1; ui < R700_MAX_RENDER_TARGETS; ui++) { if (r700->render_target[ui].enabled) count += 3; } @@ -1306,9 +1336,9 @@ void r600InitAtoms(context_t *context) ALLOC_STATE(poly, always, 10, r700SendPolyState); ALLOC_STATE(cb, cb, 18, r700SendCBState); ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState); + ALLOC_STATE(cb_target, always, 25, r700SendRenderTargetState); ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState); ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState); - ALLOC_STATE(cb_target, always, 25, r700SendRenderTargetState); ALLOC_STATE(sx, always, 9, r700SendSXState); ALLOC_STATE(vgt, always, 41, r700SendVGTState); ALLOC_STATE(spi, always, (59 + R700_MAX_SHADER_EXPORTS), r700SendSPIState); diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index e9ef6c86953..ca0710b6813 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -34,6 +34,7 @@ #include "main/imports.h" #include "shader/prog_parameter.h" #include "shader/prog_statevars.h" +#include "shader/program.h" #include "r600_context.h" #include "r600_cmdbuf.h" @@ -42,6 +43,54 @@ #include "r700_debug.h" +void insert_wpos_code(GLcontext *ctx, struct gl_fragment_program *fprog) +{ + static const gl_state_index winstate[STATE_LENGTH] + = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0}; + struct prog_instruction *newInst, *inst; + GLint win_size; /* state reference */ + GLuint wpos_temp; /* temp register */ + int i, j; + + /* PARAM win_size = STATE_FB_SIZE */ + win_size = _mesa_add_state_reference(fprog->Base.Parameters, winstate); + + wpos_temp = fprog->Base.NumTemporaries++; + + /* scan program where WPOS is used and replace with wpos_temp */ + inst = fprog->Base.Instructions; + for (i = 0; i < fprog->Base.NumInstructions; i++) { + for (j=0; j < 3; j++) { + if(inst->SrcReg[j].File == PROGRAM_INPUT && + inst->SrcReg[j].Index == FRAG_ATTRIB_WPOS) { + inst->SrcReg[j].File = PROGRAM_TEMPORARY; + inst->SrcReg[j].Index = wpos_temp; + } + } + inst++; + } + + _mesa_insert_instructions(&(fprog->Base), 0, 1); + + newInst = fprog->Base.Instructions; + /* invert wpos.y + * wpos_temp.xyzw = wpos.x-yzw + winsize.0y00 */ + newInst[0].Opcode = OPCODE_ADD; + newInst[0].DstReg.File = PROGRAM_TEMPORARY; + newInst[0].DstReg.Index = wpos_temp; + newInst[0].DstReg.WriteMask = WRITEMASK_XYZW; + + newInst[0].SrcReg[0].File = PROGRAM_INPUT; + newInst[0].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + newInst[0].SrcReg[0].Swizzle = SWIZZLE_XYZW; + newInst[0].SrcReg[0].Negate = NEGATE_Y; + + newInst[0].SrcReg[1].File = PROGRAM_STATE_VAR; + newInst[0].SrcReg[1].Index = win_size; + newInst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO); + +} + //TODO : Validate FP input with VP output. void Map_Fragment_Program(r700_AssemblerBase *pAsm, struct gl_fragment_program *mesa_fp, @@ -93,7 +142,7 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i] = pAsm->number_used_registers++; } } - + /* order has been taken care of */ #if 1 for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++) @@ -149,6 +198,17 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm, pAsm->number_used_registers += unMaxVarying + 1; } #endif + unBit = 1 << FRAG_ATTRIB_FACE; + if(mesa_fp->Base.InputsRead & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE] = pAsm->number_used_registers++; + } + + unBit = 1 << FRAG_ATTRIB_PNTC; + if(mesa_fp->Base.InputsRead & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC] = pAsm->number_used_registers++; + } /* Map temporary registers (GPRs) */ pAsm->starting_temp_register_number = pAsm->number_used_registers; @@ -303,10 +363,17 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, GLuint number_of_colors_exported; GLboolean z_enabled = GL_FALSE; GLuint unBit; + int i; //Init_Program Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) ); - Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx); + + if(mesa_fp->Base.InputsRead & FRAG_BIT_WPOS) + { + insert_wpos_code(ctx, mesa_fp); + } + + Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx); if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) ) { @@ -315,7 +382,15 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, InitShaderProgram(&(fp->r700AsmCode)); + for(i=0; i < MAX_SAMPLERS; i++) + { + fp->r700AsmCode.SamplerUnits[i] = fp->mesa_program.Base.SamplerUnits[i]; + } + + fp->r700AsmCode.unCurNumILInsts = mesa_fp->Base.NumInstructions; + if( GL_FALSE == AssembleInstr(0, + 0, mesa_fp->Base.NumInstructions, &(mesa_fp->Base.Instructions[0]), &(fp->r700AsmCode)) ) @@ -328,7 +403,7 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, return GL_FALSE; } - if( GL_FALSE == RelocProgram(&(fp->r700AsmCode)) ) + if( GL_FALSE == RelocProgram(&(fp->r700AsmCode), &(mesa_fp->Base)) ) { return GL_FALSE; } @@ -451,6 +526,35 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) CLEARbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit); } + if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_FACE)) + { + ui += 1; + SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask); + SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit); + SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ALL_BITS_bit); + SETfield(r700->SPI_PS_IN_CONTROL_1.u32All, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE], FRONT_FACE_ADDR_shift, FRONT_FACE_ADDR_mask); + } + else + { + CLEARbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit); + } + + + if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC)) + { + ui++; + SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask); + SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit); + SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_S, PNT_SPRITE_OVRD_X_shift, PNT_SPRITE_OVRD_X_mask); + SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_T, PNT_SPRITE_OVRD_Y_shift, PNT_SPRITE_OVRD_Y_mask); + //SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit); + } + else + { + CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit); + } + + ui = (unNumOfReg < ui) ? ui : unNumOfReg; SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask); @@ -470,6 +574,10 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) struct r700_vertex_program_cont *vpc = (struct r700_vertex_program_cont *)ctx->VertexProgram._Current; GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten; + + for(ui = 0; ui < R700_MAX_SHADER_EXPORTS; ui++) + r700->SPI_PS_INPUT_CNTL[ui].u32All = 0; + unBit = 1 << FRAG_ATTRIB_WPOS; if(mesa_fp->Base.InputsRead & unBit) { @@ -535,6 +643,35 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) } } + unBit = 1 << FRAG_ATTRIB_FACE; + if(mesa_fp->Base.InputsRead & unBit) + { + ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE]; + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + } + unBit = 1 << FRAG_ATTRIB_PNTC; + if(mesa_fp->Base.InputsRead & unBit) + { + ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC]; + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit); + } + + + + for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++) { unBit = 1 << i; @@ -583,6 +720,25 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) } else r700->ps.num_consts = 0; + COMPILED_SUB * pCompiledSub; + GLuint uj; + GLuint unConstOffset = r700->ps.num_consts; + for(ui=0; ui<pAsm->unNumPresub; ui++) + { + pCompiledSub = pAsm->presubs[ui].pCompiledSub; + + r700->ps.num_consts += pCompiledSub->NumParameters; + + for(uj=0; uj<pCompiledSub->NumParameters; uj++) + { + r700->ps.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0]; + r700->ps.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1]; + r700->ps.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2]; + r700->ps.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3]; + } + unConstOffset += pCompiledSub->NumParameters; + } + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.h b/src/mesa/drivers/dri/r600/r700_fragprog.h index e562bfa4789..39c59c9201d 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.h +++ b/src/mesa/drivers/dri/r600/r700_fragprog.h @@ -48,6 +48,8 @@ struct r700_fragment_program }; /* Internal */ +void insert_wpos_code(GLcontext *ctx, struct gl_fragment_program *fprog); + void Map_Fragment_Program(r700_AssemblerBase *pAsm, struct gl_fragment_program *mesa_fp, GLcontext *ctx); diff --git a/src/mesa/drivers/dri/r600/r700_shaderinst.h b/src/mesa/drivers/dri/r600/r700_shaderinst.h index 2829cca0a3c..cdb9a570f7c 100644 --- a/src/mesa/drivers/dri/r600/r700_shaderinst.h +++ b/src/mesa/drivers/dri/r600/r700_shaderinst.h @@ -42,6 +42,13 @@ #define SQ_FETCH_RESOURCE_VS_OFFSET 0x000000a0 #define SQ_FETCH_RESOURCE_VS_COUNT 0x000000b0 +//richard dec.10 glsl +#define SQ_TEX_SAMPLER_PS_OFFSET 0x00000000 +#define SQ_TEX_SAMPLER_PS_COUNT 0x00000012 +#define SQ_TEX_SAMPLER_VS_OFFSET 0x00000012 +#define SQ_TEX_SAMPLER_VS_COUNT 0x00000012 +//------------------- + #define SHADERINST_TYPEMASK_CF 0x10 #define SHADERINST_TYPEMASK_ALU 0x20 #define SHADERINST_TYPEMASK_TEX 0x40 diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index d3d1da79592..90fac078ff0 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -337,7 +337,15 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, InitShaderProgram(&(vp->r700AsmCode)); + for(i=0; i < MAX_SAMPLERS; i++) + { + vp->r700AsmCode.SamplerUnits[i] = vp->mesa_program->Base.SamplerUnits[i]; + } + + vp->r700AsmCode.unCurNumILInsts = vp->mesa_program->Base.NumInstructions; + if(GL_FALSE == AssembleInstr(0, + 0, vp->mesa_program->Base.NumInstructions, &(vp->mesa_program->Base.Instructions[0]), &(vp->r700AsmCode)) ) @@ -350,7 +358,7 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, return NULL; } - if( GL_FALSE == RelocProgram(&(vp->r700AsmCode)) ) + if( GL_FALSE == RelocProgram(&(vp->r700AsmCode), &(vp->mesa_program->Base)) ) { return GL_FALSE; } @@ -667,5 +675,24 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) } else r700->vs.num_consts = 0; + COMPILED_SUB * pCompiledSub; + GLuint uj; + GLuint unConstOffset = r700->vs.num_consts; + for(ui=0; ui<vp->r700AsmCode.unNumPresub; ui++) + { + pCompiledSub = vp->r700AsmCode.presubs[ui].pCompiledSub; + + r700->vs.num_consts += pCompiledSub->NumParameters; + + for(uj=0; uj<pCompiledSub->NumParameters; uj++) + { + r700->vs.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0]; + r700->vs.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1]; + r700->vs.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2]; + r700->vs.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3]; + } + unConstOffset += pCompiledSub->NumParameters; + } + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index 3b4366aa61c..51fa6189377 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -257,36 +257,11 @@ void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h) radeonContextPtr radeon = RADEON_CONTEXT(ctx); if (ctx->Scissor.Enabled) { /* We don't pipeline cliprect changes */ - if (!radeon->radeonScreen->kernel_mm) { - radeon_firevertices(radeon); - } + radeon_firevertices(radeon); radeonUpdateScissor(ctx); } } -void radeonPolygonStipplePreKMS( GLcontext *ctx, const GLubyte *mask ) -{ - radeonContextPtr radeon = RADEON_CONTEXT(ctx); - GLuint i; - drm_radeon_stipple_t stipple; - - /* Must flip pattern upside down. - */ - for ( i = 0 ; i < 32 ; i++ ) { - stipple.mask[31 - i] = ((GLuint *) mask)[i]; - } - - /* TODO: push this into cmd mechanism - */ - radeon_firevertices(radeon); - LOCK_HARDWARE( radeon ); - - drmCommandWrite( radeon->dri.fd, DRM_RADEON_STIPPLE, - &stipple, sizeof(stipple) ); - UNLOCK_HARDWARE( radeon ); -} - - /* ================================================================ * SwapBuffers with client-side throttling */ @@ -842,7 +817,7 @@ void radeonDrawBuffer( GLcontext *ctx, GLenum mode ) */ if (!was_front_buffer_rendering && radeon->is_front_buffer_rendering) { radeon_update_renderbuffers(radeon->dri.context, - radeon->dri.context->driDrawablePriv); + radeon->dri.context->driDrawablePriv, GL_FALSE); } } @@ -859,7 +834,7 @@ void radeonReadBuffer( GLcontext *ctx, GLenum mode ) if (!was_front_buffer_reading && rmesa->is_front_buffer_reading) { radeon_update_renderbuffers(rmesa->dri.context, - rmesa->dri.context->driReadablePriv); + rmesa->dri.context->driReadablePriv, GL_FALSE); } } /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */ @@ -910,9 +885,9 @@ void radeon_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei he if (radeon->is_front_buffer_rendering) { ctx->Driver.Flush(ctx); } - radeon_update_renderbuffers(driContext, driContext->driDrawablePriv); + radeon_update_renderbuffers(driContext, driContext->driDrawablePriv, GL_FALSE); if (driContext->driDrawablePriv != driContext->driReadablePriv) - radeon_update_renderbuffers(driContext, driContext->driReadablePriv); + radeon_update_renderbuffers(driContext, driContext->driReadablePriv, GL_FALSE); } old_viewport = ctx->Driver.Viewport; diff --git a/src/mesa/drivers/dri/radeon/radeon_common.h b/src/mesa/drivers/dri/radeon/radeon_common.h index def0cc17a91..0608fe2418c 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.h +++ b/src/mesa/drivers/dri/radeon/radeon_common.h @@ -10,7 +10,6 @@ void radeonRecalcScissorRects(radeonContextPtr radeon); void radeonSetCliprects(radeonContextPtr radeon); void radeonUpdateScissor( GLcontext *ctx ); void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h); -void radeonPolygonStipplePreKMS( GLcontext *ctx, const GLubyte *mask ); void radeonWaitForIdleLocked(radeonContextPtr radeon); extern uint32_t radeonGetAge(radeonContextPtr radeon); diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index 71f70d724b9..5c68bf5df6c 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -499,7 +499,8 @@ radeon_bits_per_pixel(const struct radeon_renderbuffer *rb) } void -radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) +radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable, + GLboolean front_only) { unsigned int attachments[10]; __DRIbuffer *buffers = NULL; @@ -525,7 +526,7 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) struct radeon_renderbuffer *stencil_rb; i = 0; - if ((radeon->is_front_buffer_rendering || + if ((front_only || radeon->is_front_buffer_rendering || radeon->is_front_buffer_reading || !draw->color_rb[1]) && draw->color_rb[0]) { @@ -533,23 +534,25 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) attachments[i++] = radeon_bits_per_pixel(draw->color_rb[0]); } - if (draw->color_rb[1]) { - attachments[i++] = __DRI_BUFFER_BACK_LEFT; - attachments[i++] = radeon_bits_per_pixel(draw->color_rb[1]); - } + if (!front_only) { + if (draw->color_rb[1]) { + attachments[i++] = __DRI_BUFFER_BACK_LEFT; + attachments[i++] = radeon_bits_per_pixel(draw->color_rb[1]); + } - depth_rb = radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH); - stencil_rb = radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL); - - if ((depth_rb != NULL) && (stencil_rb != NULL)) { - attachments[i++] = __DRI_BUFFER_DEPTH_STENCIL; - attachments[i++] = radeon_bits_per_pixel(depth_rb); - } else if (depth_rb != NULL) { - attachments[i++] = __DRI_BUFFER_DEPTH; - attachments[i++] = radeon_bits_per_pixel(depth_rb); - } else if (stencil_rb != NULL) { - attachments[i++] = __DRI_BUFFER_STENCIL; - attachments[i++] = radeon_bits_per_pixel(stencil_rb); + depth_rb = radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH); + stencil_rb = radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL); + + if ((depth_rb != NULL) && (stencil_rb != NULL)) { + attachments[i++] = __DRI_BUFFER_DEPTH_STENCIL; + attachments[i++] = radeon_bits_per_pixel(depth_rb); + } else if (depth_rb != NULL) { + attachments[i++] = __DRI_BUFFER_DEPTH; + attachments[i++] = radeon_bits_per_pixel(depth_rb); + } else if (stencil_rb != NULL) { + attachments[i++] = __DRI_BUFFER_STENCIL; + attachments[i++] = radeon_bits_per_pixel(stencil_rb); + } } buffers = (*screen->dri2.loader->getBuffersWithFormat)(drawable, @@ -562,12 +565,14 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) i = 0; if (draw->color_rb[0]) attachments[i++] = __DRI_BUFFER_FRONT_LEFT; - if (draw->color_rb[1]) - attachments[i++] = __DRI_BUFFER_BACK_LEFT; - if (radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH)) - attachments[i++] = __DRI_BUFFER_DEPTH; - if (radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL)) - attachments[i++] = __DRI_BUFFER_STENCIL; + if (!front_only) { + if (draw->color_rb[1]) + attachments[i++] = __DRI_BUFFER_BACK_LEFT; + if (radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH)) + attachments[i++] = __DRI_BUFFER_DEPTH; + if (radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL)) + attachments[i++] = __DRI_BUFFER_STENCIL; + } buffers = (*screen->dri2.loader->getBuffers)(drawable, &drawable->w, @@ -735,9 +740,9 @@ GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, readfb = driReadPriv->driverPrivate; if (driContextPriv->driScreenPriv->dri2.enabled) { - radeon_update_renderbuffers(driContextPriv, driDrawPriv); + radeon_update_renderbuffers(driContextPriv, driDrawPriv, GL_FALSE); if (driDrawPriv != driReadPriv) - radeon_update_renderbuffers(driContextPriv, driReadPriv); + radeon_update_renderbuffers(driContextPriv, driReadPriv, GL_FALSE); _mesa_reference_renderbuffer(&radeon->state.color.rb, &(radeon_get_renderbuffer(&drfb->base, BUFFER_BACK_LEFT)->base)); _mesa_reference_renderbuffer(&radeon->state.depth.rb, diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h index ad953ddbb5a..0739496e032 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -406,9 +406,6 @@ struct radeon_state { struct radeon_depthbuffer_state depth; struct radeon_scissor_state scissor; struct radeon_stencilbuffer_state stencil; - - struct radeon_cs_space_check bos[RADEON_MAX_BOS]; - int validated_bo_count; }; /** @@ -589,7 +586,8 @@ GLboolean radeonInitContext(radeonContextPtr radeon, void radeonCleanupContext(radeonContextPtr radeon); GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv); -void radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable); +void radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable, + GLboolean front_only); GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, __DRIdrawablePrivate * driDrawPriv, __DRIdrawablePrivate * driReadPriv); diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h index 4e2c52c835c..12ab33a009c 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_context.h @@ -331,8 +331,12 @@ struct r100_hw_state { struct radeon_state_atom stp; }; +struct radeon_stipple_state { + GLuint mask[32]; +}; struct r100_state { + struct radeon_stipple_state stipple; struct radeon_texture_state texture; }; diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.c b/src/mesa/drivers/dri/radeon/radeon_dma.c index b8c65f4ce62..d31e4e47ddb 100644 --- a/src/mesa/drivers/dri/radeon/radeon_dma.c +++ b/src/mesa/drivers/dri/radeon/radeon_dma.c @@ -205,7 +205,6 @@ again_alloc: counter on unused buffers for later freeing them from begin of list */ dma_bo = last_elem(&rmesa->dma.free); - assert(dma_bo->bo->cref == 1); remove_from_list(dma_bo); insert_at_head(&rmesa->dma.reserved, dma_bo); } diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c index 7ec641ff18a..fc21069a92c 100644 --- a/src/mesa/drivers/dri/radeon/radeon_fbo.c +++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c @@ -369,6 +369,12 @@ radeon_framebuffer_renderbuffer(GLcontext * ctx, } +/* TODO: According to EXT_fbo spec internal format of texture image + * once set during glTexImage call, should be preserved when + * attaching image to renderbuffer. When HW doesn't support + * rendering to format of attached image, set framebuffer + * completeness accordingly in radeon_validate_framebuffer (issue #79). + */ static GLboolean radeon_update_wrapper(GLcontext *ctx, struct radeon_renderbuffer *rrb, struct gl_texture_image *texImage) diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c index bdbb9460bf0..a7f347202a1 100644 --- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c @@ -437,9 +437,12 @@ static void migrate_image_to_miptree(radeon_mipmap_tree *mt, radeon_mipmap_level *srclvl = &image->mt->levels[image->mtlevel]; + /* TODO: bring back these assertions once the FBOs are fixed */ +#if 0 assert(image->mtlevel == level); assert(srclvl->size == dstlvl->size); assert(srclvl->rowstride == dstlvl->rowstride); +#endif radeon_bo_map(image->mt->bo, GL_FALSE); @@ -450,23 +453,18 @@ static void migrate_image_to_miptree(radeon_mipmap_tree *mt, radeon_miptree_unreference(&image->mt); } else { - /* need to confirm this value is correct */ - if (_mesa_is_format_compressed(image->base.TexFormat)) { - unsigned size = _mesa_format_image_size(image->base.TexFormat, - image->base.Width, - image->base.Height, - image->base.Depth); - memcpy(dest, image->base.Data, size); - } else { - uint32_t srcrowstride; - uint32_t height; + const uint32_t srcrowstride = _mesa_format_row_stride(image->base.TexFormat, image->base.Width); + uint32_t rows = image->base.Height * image->base.Depth; - height = image->base.Height * image->base.Depth; - srcrowstride = image->base.Width * _mesa_get_format_bytes(image->base.TexFormat); - copy_rows(dest, dstlvl->rowstride, image->base.Data, srcrowstride, - height, srcrowstride); + if (_mesa_is_format_compressed(image->base.TexFormat)) { + uint32_t blockWidth, blockHeight; + _mesa_get_format_block_size(image->base.TexFormat, &blockWidth, &blockHeight); + rows = (rows + blockHeight - 1) / blockHeight; } + copy_rows(dest, dstlvl->rowstride, image->base.Data, srcrowstride, + rows, srcrowstride); + _mesa_free_texmemory(image->base.Data); image->base.Data = 0; } diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c index 4d0d35ee0cd..f6c733ab209 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.c +++ b/src/mesa/drivers/dri/radeon/radeon_state.c @@ -550,6 +550,31 @@ static void radeonPolygonOffset( GLcontext *ctx, rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = constant.ui32; } +static void radeonPolygonStipplePreKMS( GLcontext *ctx, const GLubyte *mask ) +{ + r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLuint i; + drm_radeon_stipple_t stipple; + + /* Must flip pattern upside down. + */ + for ( i = 0 ; i < 32 ; i++ ) { + rmesa->state.stipple.mask[31 - i] = ((GLuint *) mask)[i]; + } + + /* TODO: push this into cmd mechanism + */ + radeon_firevertices(&rmesa->radeon); + LOCK_HARDWARE( &rmesa->radeon ); + + /* FIXME: Use window x,y offsets into stipple RAM. + */ + stipple.mask = rmesa->state.stipple.mask; + drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE, + &stipple, sizeof(drm_radeon_stipple_t) ); + UNLOCK_HARDWARE( &rmesa->radeon ); +} + static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode ) { r100ContextPtr rmesa = R100_CONTEXT(ctx); diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c index 3cbe3b47254..84ddcfd4fd3 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texstate.c +++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c @@ -672,24 +672,13 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_ return; } - radeon_update_renderbuffers(pDRICtx, dPriv); - /* back & depth buffer are useless free them right away */ - rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer; - if (rb && rb->bo) { - radeon_bo_unref(rb->bo); - rb->bo = NULL; - } - rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer; - if (rb && rb->bo) { - radeon_bo_unref(rb->bo); - rb->bo = NULL; - } + radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE); rb = rfb->color_rb[0]; if (rb->bo == NULL) { /* Failed to BO for the buffer */ return; } - + _mesa_lock_texture(radeon->glCtx, texObj); if (t->bo) { radeon_bo_unref(t->bo); diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index 0390d376ba2..28690325d12 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -515,15 +515,18 @@ static int image_matches_texture_obj(struct gl_texture_object *texObj, struct gl_texture_image *texImage, unsigned level) { - const struct gl_texture_image *baseImage = texObj->Image[0][level]; + const struct gl_texture_image *baseImage = texObj->Image[0][texObj->BaseLevel]; + + if (!baseImage) + return 0; if (level < texObj->BaseLevel || level > texObj->MaxLevel) return 0; const unsigned levelDiff = level - texObj->BaseLevel; - const unsigned refWidth = baseImage->Width >> levelDiff; - const unsigned refHeight = baseImage->Height >> levelDiff; - const unsigned refDepth = baseImage->Depth >> levelDiff; + const unsigned refWidth = MAX2(baseImage->Width >> levelDiff, 1); + const unsigned refHeight = MAX2(baseImage->Height >> levelDiff, 1); + const unsigned refDepth = MAX2(baseImage->Depth >> levelDiff, 1); return (texImage->Width == refWidth && texImage->Height == refHeight && |