diff options
author | Roland Scheidegger <[email protected]> | 2005-02-10 22:36:06 +0000 |
---|---|---|
committer | Roland Scheidegger <[email protected]> | 2005-02-10 22:36:06 +0000 |
commit | 4837ea30208d002bc36a836d2117f826d40c8bfa (patch) | |
tree | 4db5a234a5af7d7f02a42ed824b85e938066828d /src/mesa/drivers/dri | |
parent | 26d31591257d575362776972439f614948366dd1 (diff) |
add texture micro and macro tiling to radeon/r200 driver. This can improve performance up to 15% in texture-intensive applications. Convert the driver to use the correct blit format and blit width instead of fixed blit format and blit width when uploading textures to make it work.
Diffstat (limited to 'src/mesa/drivers/dri')
-rw-r--r-- | src/mesa/drivers/dri/r200/r200_context.c | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r200/r200_context.h | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r200/r200_reg.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r200/r200_texmem.c | 71 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r200/r200_texstate.c | 73 | ||||
-rw-r--r-- | src/mesa/drivers/dri/radeon/radeon_context.c | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/radeon/radeon_context.h | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/radeon/radeon_texmem.c | 52 | ||||
-rw-r--r-- | src/mesa/drivers/dri/radeon/radeon_texstate.c | 70 |
9 files changed, 209 insertions, 74 deletions
diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c index 4eca4ad7e50..baaca087555 100644 --- a/src/mesa/drivers/dri/r200/r200_context.c +++ b/src/mesa/drivers/dri/r200/r200_context.c @@ -272,6 +272,9 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, else rmesa->using_hyperz = GL_TRUE; } + + if ( sPriv->drmMinor >= 15 ) + rmesa->texmicrotile = GL_TRUE; /* Init default driver functions then plug in our R200-specific functions * (the texture functions are especially important) diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h index cedf1b974f1..7e0a46ae51c 100644 --- a/src/mesa/drivers/dri/r200/r200_context.h +++ b/src/mesa/drivers/dri/r200/r200_context.h @@ -167,6 +167,8 @@ struct r200_tex_obj { GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */ GLboolean border_fallback; + + GLuint tile_bits; /* hw texture tile bits used on this texture */ }; @@ -931,6 +933,7 @@ struct r200_context { driOptionCache optionCache; GLboolean using_hyperz; + GLboolean texmicrotile; }; #define R200_CONTEXT(ctx) ((r200ContextPtr)(ctx->DriverCtx)) diff --git a/src/mesa/drivers/dri/r200/r200_reg.h b/src/mesa/drivers/dri/r200/r200_reg.h index c1132e54ab7..2468c6cebfa 100644 --- a/src/mesa/drivers/dri/r200/r200_reg.h +++ b/src/mesa/drivers/dri/r200/r200_reg.h @@ -968,6 +968,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define R200_TXO_ENDIAN_BYTE_SWAP (1 << 0) #define R200_TXO_ENDIAN_WORD_SWAP (2 << 0) #define R200_TXO_ENDIAN_HALFDW_SWAP (3 << 0) +#define R200_TXO_MACRO_TILE (1 << 2) +#define R200_TXO_MICRO_TILE (1 << 3) #define R200_TXO_OFFSET_MASK 0xffffffe0 #define R200_TXO_OFFSET_SHIFT 5 #define R200_PP_CUBIC_OFFSET_F1_0 0x2d04 diff --git a/src/mesa/drivers/dri/r200/r200_texmem.c b/src/mesa/drivers/dri/r200/r200_texmem.c index 3f8e5d6e7f8..7472afeedd7 100644 --- a/src/mesa/drivers/dri/r200/r200_texmem.c +++ b/src/mesa/drivers/dri/r200/r200_texmem.c @@ -43,12 +43,10 @@ SOFTWARE. #include "context.h" #include "colormac.h" #include "macros.h" -#include "radeon_reg.h" /* gets definition for usleep */ #include "r200_context.h" -#include "r200_state.h" #include "r200_ioctl.h" -#include "r200_swtcl.h" #include "r200_tex.h" +#include "radeon_reg.h" #include <unistd.h> /* for usleep() */ @@ -253,12 +251,13 @@ static void r200UploadRectSubImage( r200ContextPtr rmesa, /* Blit to framebuffer */ - r200EmitBlit( rmesa, - blit_format, - dstPitch, GET_START( ®ion ), - dstPitch, t->bufAddr, - 0, 0, - 0, done, + r200EmitBlit( rmesa, + blit_format, + dstPitch, GET_START( ®ion ), + dstPitch | (t->tile_bits >> 16), + t->bufAddr, + 0, 0, + 0, done, width, lines ); r200EmitWait( rmesa, RADEON_WAIT_2D ); @@ -339,7 +338,7 @@ static void uploadSubImage( r200ContextPtr rmesa, r200TexObjPtr t, imageWidth = texImage->Width; imageHeight = texImage->Height; - offset = t->bufAddr; + offset = t->bufAddr + t->base.totalSize / 6 * face; if ( R200_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) { GLint imageX = 0; @@ -363,19 +362,47 @@ static void uploadSubImage( r200ContextPtr rmesa, r200TexObjPtr t, * We used to use 1, 2 and 4-byte texels and used to use the texture * width to dictate the blit width - but that won't work for compressed * textures. (Brian) + * NOTE: can't do that with texture tiling. (sroland) */ tex.offset = offset; - tex.pitch = BLIT_WIDTH_BYTES / 64; - tex.format = R200_TXFORMAT_I8; /* any 1-byte texel format */ + tex.image = &tmp; + /* copy (x,y,width,height,data) */ + memcpy( &tmp, &t->image[face][hwlevel], sizeof(tmp) ); + if (texImage->TexFormat->TexelBytes) { - tex.width = imageWidth * texImage->TexFormat->TexelBytes; /* in bytes */ + /* use multi-byte upload scheme */ tex.height = imageHeight; + tex.width = imageWidth; + tex.format = t->pp_txformat & R200_TXFORMAT_FORMAT_MASK; + tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1); + tex.offset += tmp.x & ~1023; + tmp.x = tmp.x % 1024; + if (t->tile_bits & R200_TXO_MICRO_TILE) { + /* need something like "tiled coordinates" ? */ + tmp.y = tmp.x / (tex.pitch * 128) * 2; + tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes; + tex.pitch |= RADEON_DST_TILE_MICRO >> 22; + } + else { + tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1); + } + if ((t->tile_bits & R200_TXO_MACRO_TILE) && + (texImage->Width * texImage->TexFormat->TexelBytes >= 256) && + ((!(t->tile_bits & R200_TXO_MICRO_TILE) && (texImage->Height >= 8)) || + (texImage->Height >= 16))) { + /* weird: R200 disables macro tiling if mip width is smaller than 256 bytes, + OR if height is smaller than 8 automatically, but if micro tiling is active + the limit is height 16 instead ? */ + tex.pitch |= RADEON_DST_TILE_MACRO >> 22; + } } else { /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */ /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed so the kernel module reads the right amount of data. */ + tex.format = R200_TXFORMAT_I8; /* any 1-byte texel format */ + tex.pitch = (BLIT_WIDTH_BYTES / 64); tex.height = (imageHeight + 3) / 4; tex.width = (imageWidth + 3) / 4; switch (t->pp_txformat & R200_TXFORMAT_FORMAT_MASK) { @@ -390,19 +417,7 @@ static void uploadSubImage( r200ContextPtr rmesa, r200TexObjPtr t, fprintf(stderr, "unknown compressed tex format in uploadSubImage\n"); } } - tex.image = &tmp; - /* copy (x,y,width,height,data) */ - memcpy( &tmp, &t->image[face][hwlevel], sizeof(tmp) ); - - /* Adjust the base offset to account for the Y-offset. This is done, - * instead of just letting the Y-offset automatically take care of it, - * because it is possible, for very large textures, for the Y-offset - * to exceede the [-8192,+8191] range. - */ - tex.offset += tmp.y * 1024; - tmp.y = 0; - LOCK_HARDWARE( rmesa ); do { ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE, @@ -473,7 +488,11 @@ int r200UploadTexImages( r200ContextPtr rmesa, r200TexObjPtr t, GLuint face ) t->bufAddr = rmesa->r200Screen->texOffset[heap] + t->base.memBlock->ofs; t->pp_txoffset = t->bufAddr; - + + if (!(t->base.tObj->Image[0][0]->IsClientData)) { + /* hope it's safe to add that here... */ + t->pp_txoffset |= t->tile_bits; + } /* Mark this texobj as dirty on all units: */ diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c index 1e56c78f9be..3fba25d0b50 100644 --- a/src/mesa/drivers/dri/r200/r200_texstate.c +++ b/src/mesa/drivers/dri/r200/r200_texstate.c @@ -125,8 +125,8 @@ static void r200SetTexImages( r200ContextPtr rmesa, { r200TexObjPtr t = (r200TexObjPtr)tObj->DriverData; const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel]; - GLint curOffset; - GLint i; + GLint curOffset, blitWidth; + GLint i, texelBytes; GLint numLevels; GLint log2Width, log2Height, log2Depth; @@ -146,6 +146,7 @@ static void r200SetTexImages( r200ContextPtr rmesa, return; } + texelBytes = baseImage->TexFormat->TexelBytes; /* Compute which mipmap levels we really want to send to the hardware. */ @@ -164,6 +165,28 @@ static void r200SetTexImages( r200ContextPtr rmesa, * memory organized as a rectangle of width BLIT_WIDTH_BYTES. */ curOffset = 0; + blitWidth = BLIT_WIDTH_BYTES; + t->tile_bits = 0; + + /* figure out if this texture is suitable for tiling. */ + if (texelBytes) { + if (rmesa->texmicrotile && (tObj->Target != GL_TEXTURE_RECTANGLE_NV) && + /* texrect might be able to use micro tiling too in theory? */ + (baseImage->Height > 1)) { + /* allow 32 (bytes) x 1 mip (which will use two times the space + the non-tiled version would use) max if base texture is large enough */ + if ((numLevels == 1) || + (((baseImage->Width * texelBytes / baseImage->Height) <= 32) && + (baseImage->Width * texelBytes > 64)) || + ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) { + t->tile_bits |= R200_TXO_MICRO_TILE; + } + } + if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) { + /* we can set macro tiling even for small textures, they will be untiled anyway */ + t->tile_bits |= R200_TXO_MACRO_TILE; + } + } for (i = 0; i < numLevels; i++) { const struct gl_texture_image *texImage; @@ -195,28 +218,41 @@ static void r200SetTexImages( r200ContextPtr rmesa, else size = texImage->CompressedSize; } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { - size = ((texImage->Width * texImage->TexFormat->TexelBytes + 63) - & ~63) * texImage->Height; + size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height; + } + else if (t->tile_bits & R200_TXO_MICRO_TILE) { + /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, + though the actual offset may be different (if texture is less than + 32 bytes width) to the untiled case */ + int w = (texImage->Width * texelBytes * 2 + 31) & ~31; + size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth; + blitWidth = MAX2(texImage->Width, 64 / texelBytes); } else { - int w = texImage->Width * texImage->TexFormat->TexelBytes; - if (w < 32) - w = 32; - size = w * texImage->Height * texImage->Depth; + int w = (texImage->Width * texelBytes + 31) & ~31; + size = w * texImage->Height * texImage->Depth; + blitWidth = MAX2(texImage->Width, 64 / texelBytes); } assert(size > 0); - /* Align to 32-byte offset. It is faster to do this unconditionally * (no branch penalty). */ curOffset = (curOffset + 0x1f) & ~0x1f; - t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES; - t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES; - t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES); - t->image[0][i].height = size / t->image[0][i].width; + if (texelBytes) { + t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */ + t->image[0][i].y = 0; + t->image[0][i].width = MIN2(size / texelBytes, blitWidth); + t->image[0][i].height = (size / texelBytes) / t->image[0][i].width; + } + else { + t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES; + t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES; + t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES); + t->image[0][i].height = size / t->image[0][i].width; + } #if 0 /* for debugging only and only applicable to non-rectangle targets */ @@ -242,16 +278,13 @@ static void r200SetTexImages( r200ContextPtr rmesa, /* Setup remaining cube face blits, if needed */ if (tObj->Target == GL_TEXTURE_CUBE_MAP) { - /* Round totalSize up to multiple of BLIT_WIDTH_BYTES */ - const GLuint faceSize = (t->base.totalSize + BLIT_WIDTH_BYTES - 1) - & ~(BLIT_WIDTH_BYTES-1); - const GLuint lines = faceSize / BLIT_WIDTH_BYTES; + const GLuint faceSize = t->base.totalSize; GLuint face; - /* reuse face 0 x/y/width/height - just adjust y */ + /* reuse face 0 x/y/width/height - just update the offset when uploading */ for (face = 1; face < 6; face++) { for (i = 0; i < numLevels; i++) { t->image[face][i].x = t->image[0][i].x; - t->image[face][i].y = t->image[0][i].y + face * lines; + t->image[face][i].y = t->image[0][i].y; t->image[face][i].width = t->image[0][i].width; t->image[face][i].height = t->image[0][i].height; } @@ -310,7 +343,7 @@ static void r200SetTexImages( r200ContextPtr rmesa, if (baseImage->IsCompressed) t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63); else - t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * baseImage->TexFormat->TexelBytes) + 63) & ~(63); + t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63); t->pp_txpitch -= 32; t->dirty_state = TEX_ALL; diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c index 4229d5cb5e5..5d7e28cf89a 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_context.c @@ -255,6 +255,9 @@ radeonCreateContext( const __GLcontextModes *glVisual, rmesa->using_hyperz = GL_TRUE; } + if ( sPriv->drmMinor >= 15 ) + rmesa->texmicrotile = GL_TRUE; + /* Init default driver functions then plug in our Radeon-specific functions * (the texture functions are especially important) */ @@ -445,6 +448,7 @@ radeonCreateContext( const __GLcontextModes *glVisual, } (*rmesa->get_ust)( & rmesa->swap_ust ); + if (rmesa->sarea->tiling_enabled != 0) fprintf(stderr, "color tiling enabled!\n"); #if DO_DEBUG RADEON_DEBUG = driParseDebugString( getenv( "RADEON_DEBUG" ), diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h index 53860c12b84..8d0637ca326 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_context.h @@ -162,6 +162,8 @@ struct radeon_tex_obj { GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */ GLboolean border_fallback; + + GLuint tile_bits; /* hw texture tile bits used on this texture */ }; @@ -186,7 +188,7 @@ struct radeon_state_atom { GLboolean dirty; /* dirty-mark in emit_state_list */ GLboolean (*check)( GLcontext * ); /* is this state active? */ }; - + /* Trying to keep these relatively short as the variables are becoming @@ -781,6 +783,7 @@ struct radeon_context { driOptionCache optionCache; GLboolean using_hyperz; + GLboolean texmicrotile; /* Performance counters */ diff --git a/src/mesa/drivers/dri/radeon/radeon_texmem.c b/src/mesa/drivers/dri/radeon/radeon_texmem.c index d910a6c15a5..d492e190c12 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texmem.c +++ b/src/mesa/drivers/dri/radeon/radeon_texmem.c @@ -46,6 +46,8 @@ SOFTWARE. #include "radeon_ioctl.h" #include "radeon_tex.h" +#include <unistd.h> /* for usleep() */ + /** * Destroy any device-dependent state associated with the texture. This may @@ -151,12 +153,12 @@ static void radeonUploadRectSubImage( radeonContextPtr rmesa, /* Blit to framebuffer */ - radeonEmitBlit( rmesa, - blit_format, - dstPitch, GET_START( ®ion ), - dstPitch, t->bufAddr, - 0, 0, - 0, done, + radeonEmitBlit( rmesa, + blit_format, + dstPitch, GET_START( ®ion ), + dstPitch, t->bufAddr, + 0, 0, + 0, done, width, lines ); radeonEmitWait( rmesa, RADEON_WAIT_2D ); @@ -248,19 +250,43 @@ static void uploadSubImage( radeonContextPtr rmesa, radeonTexObjPtr t, * We used to use 1, 2 and 4-byte texels and used to use the texture * width to dictate the blit width - but that won't work for compressed * textures. (Brian) + * NOTE: can't do that with texture tiling. (sroland) */ tex.offset = offset; - tex.pitch = BLIT_WIDTH_BYTES / 64; - tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */ + tex.image = &tmp; + /* copy (x,y,width,height,data) */ + memcpy( &tmp, &t->image[face][hwlevel], sizeof(drm_radeon_tex_image_t) ); + if (texImage->TexFormat->TexelBytes) { - tex.width = imageWidth * texImage->TexFormat->TexelBytes; /* in bytes */ + /* use multi-byte upload scheme */ tex.height = imageHeight; + tex.width = imageWidth; + tex.format = t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK; + tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1); + tex.offset += tmp.x & ~1023; + tmp.x = tmp.x % 1024; + if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) { + /* need something like "tiled coordinates" ? */ + tmp.y = tmp.x / (tex.pitch * 128) * 2; + tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes; + tex.pitch |= RADEON_DST_TILE_MICRO >> 22; + } + else { + tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1); + } + if ((t->tile_bits & RADEON_TXO_MACRO_TILE) && + (texImage->Width * texImage->TexFormat->TexelBytes >= 256)) { + /* radeon switches off macro tiling for small textures/mipmaps it seems */ + tex.pitch |= RADEON_DST_TILE_MACRO >> 22; + } } else { /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */ /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed so the kernel module reads the right amount of data. */ + tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */ + tex.pitch = (BLIT_WIDTH_BYTES / 64); tex.height = (imageHeight + 3) / 4; tex.width = (imageWidth + 3) / 4; switch (t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) { @@ -273,10 +299,6 @@ static void uploadSubImage( radeonContextPtr rmesa, radeonTexObjPtr t, break; } } - tex.image = &tmp; - - /* copy (x,y,width,height,data) */ - memcpy( &tmp, &t->image[face][hwlevel], sizeof(drm_radeon_tex_image_t) ); LOCK_HARDWARE( rmesa ); do { @@ -344,6 +366,10 @@ int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t, GLuint fac + t->base.memBlock->ofs; t->pp_txoffset = t->bufAddr; + if (!(t->base.tObj->Image[0][0]->IsClientData)) { + /* hope it's safe to add that here... */ + t->pp_txoffset |= t->tile_bits; + } /* Mark this texobj as dirty on all units: */ diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c index 5e818da9fd1..b96ad740d15 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texstate.c +++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c @@ -127,8 +127,8 @@ static void radeonSetTexImages( radeonContextPtr rmesa, { radeonTexObjPtr t = (radeonTexObjPtr)tObj->DriverData; const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel]; - GLint curOffset; - GLint i; + GLint curOffset, blitWidth; + GLint i, texelBytes; GLint numLevels; GLint log2Width, log2Height, log2Depth; @@ -148,6 +148,7 @@ static void radeonSetTexImages( radeonContextPtr rmesa, return; } + texelBytes = baseImage->TexFormat->TexelBytes; /* Compute which mipmap levels we really want to send to the hardware. */ @@ -166,6 +167,34 @@ static void radeonSetTexImages( radeonContextPtr rmesa, * memory organized as a rectangle of width BLIT_WIDTH_BYTES. */ curOffset = 0; + blitWidth = BLIT_WIDTH_BYTES; + t->tile_bits = 0; + + /* figure out if this texture is suitable for tiling. */ + if (texelBytes && (tObj->Target != GL_TEXTURE_RECTANGLE_NV)) { + if (rmesa->texmicrotile && (baseImage->Height > 1)) { + /* allow 32 (bytes) x 1 mip (which will use two times the space + the non-tiled version would use) max if base texture is large enough */ + if ((numLevels == 1) || + (((baseImage->Width * texelBytes / baseImage->Height) <= 32) && + (baseImage->Width * texelBytes > 64)) || + ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) { + /* R100 has two microtile bits (only the txoffset reg, not the blitter) + weird: X2 + OPT: 32bit correct, 16bit completely hosed + X2: 32bit correct, 16bit correct + OPT: 32bit large mips correct, small mips hosed, 16bit completely hosed */ + t->tile_bits |= RADEON_TXO_MICRO_TILE_X2 /*| RADEON_TXO_MICRO_TILE_OPT*/; + } + } + if ((baseImage->Width * texelBytes >= 256) && (baseImage->Height >= 16)) { + /* R100 disables macro tiling only if mip width is smaller than 256 bytes, and not + in the case if height is smaller than 16 (not 100% sure), as does the r200, + so need to disable macro tiling in that case */ + if ((numLevels == 1) || ((baseImage->Width * texelBytes / baseImage->Height) <= 4)) { + t->tile_bits |= RADEON_TXO_MACRO_TILE; + } + } + } for (i = 0; i < numLevels; i++) { const struct gl_texture_image *texImage; @@ -197,28 +226,41 @@ static void radeonSetTexImages( radeonContextPtr rmesa, else size = texImage->CompressedSize; } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { - size = ((texImage->Width * texImage->TexFormat->TexelBytes + 63) - & ~63) * texImage->Height; + size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height; + } + else if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) { + /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, + though the actual offset may be different (if texture is less than + 32 bytes width) to the untiled case */ + int w = (texImage->Width * texelBytes * 2 + 31) & ~31; + size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth; + blitWidth = MAX2(texImage->Width, 64 / texelBytes); } else { - int w = texImage->Width * texImage->TexFormat->TexelBytes; - if (w < 32) - w = 32; - size = w * texImage->Height * texImage->Depth; + int w = (texImage->Width * texelBytes + 31) & ~31; + size = w * texImage->Height * texImage->Depth; + blitWidth = MAX2(texImage->Width, 64 / texelBytes); } assert(size > 0); - /* Align to 32-byte offset. It is faster to do this unconditionally * (no branch penalty). */ curOffset = (curOffset + 0x1f) & ~0x1f; - t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES; - t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES; - t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES); - t->image[0][i].height = size / t->image[0][i].width; + if (texelBytes) { + t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */ + t->image[0][i].y = 0; + t->image[0][i].width = MIN2(size / texelBytes, blitWidth); + t->image[0][i].height = (size / texelBytes) / t->image[0][i].width; + } + else { + t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES; + t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES; + t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES); + t->image[0][i].height = size / t->image[0][i].width; + } #if 0 /* for debugging only and only applicable to non-rectangle targets */ @@ -263,7 +305,7 @@ static void radeonSetTexImages( radeonContextPtr rmesa, if (baseImage->IsCompressed) t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63); else - t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * baseImage->TexFormat->TexelBytes) + 63) & ~(63); + t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63); t->pp_txpitch -= 32; t->dirty_state = TEX_ALL; |