diff options
78 files changed, 2352 insertions, 381 deletions
diff --git a/common.py b/common.py index b44f20e8216..1d0c6a71fa5 100644 --- a/common.py +++ b/common.py @@ -15,6 +15,8 @@ import SCons.Script.SConscript # Defaults host_platform = _platform.system().lower() +if host_platform.startswith('cygwin'): + host_platform = 'cygwin' # Search sys.argv[] for a "platform=foo" argument since we don't have # an 'env' variable at this point. @@ -81,7 +83,7 @@ def AddOptions(opts): opts.Add(EnumOption('machine', 'use machine-specific assembly code', default_machine, allowed_values=('generic', 'ppc', 'x86', 'x86_64'))) opts.Add(EnumOption('platform', 'target platform', host_platform, - allowed_values=('linux', 'cell', 'windows', 'winddk', 'wince', 'darwin', 'embedded', 'cygwin_nt-5.1', 'cygwin_nt-6.1', 'sunos5', 'freebsd8'))) + allowed_values=('linux', 'cell', 'windows', 'winddk', 'wince', 'darwin', 'embedded', 'cygwin', 'sunos5', 'freebsd8'))) opts.Add('toolchain', 'compiler toolchain', default_toolchain) opts.Add(BoolOption('gles', 'EXPERIMENTAL: enable OpenGL ES support', 'no')) opts.Add(BoolOption('llvm', 'use LLVM', default_llvm)) diff --git a/docs/GL3.txt b/docs/GL3.txt index 9ff25a95297..0c53bc42d54 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -27,7 +27,7 @@ Non-normalized Integer texture/framebuffer formats ~50% done 1D/2D Texture arrays core Mesa, swrast done Packed depth/stencil formats DONE Per-buffer blend and masks (GL_EXT_draw_buffers2) DONE -GL_EXT_texture_compression_rgtc not started +GL_EXT_texture_compression_rgtc DONE (swrast, gallium r600) Red and red/green texture formats DONE (swrast, i965, gallium) Transform feedback (GL_EXT_transform_feedback) ~50% done glBindFragDataLocation, glGetFragDataLocation, diff --git a/docs/MESA_multithread_makecurrent.spec b/docs/MESA_multithread_makecurrent.spec new file mode 100644 index 00000000000..5065c2fc0a3 --- /dev/null +++ b/docs/MESA_multithread_makecurrent.spec @@ -0,0 +1,158 @@ +Name + + MESA_multithread_makecurrent + +Name Strings + + GLX_MESA_multithread_makecurrent + +Contact + + Eric Anholt ([email protected]) + +Status + + Not shipping. + +Version + + Last Modified Date: 21 February 2011 + +Number + + TBD + +Dependencies + + OpenGL 1.0 or later is required. + GLX 1.3 or later is required. + +Overview + + The GLX context setup encourages multithreaded applications to + create a context per thread which each operate on their own + objects in parallel, and leaves synchronization for write access + to shared objects up to the application. + + For some applications, maintaining per-thread contexts and + ensuring that the glFlush happens in one thread before another + thread starts working on that object is difficult. For them, + using the same context across multiple threads and protecting its + usage with a mutex is both higher performance and easier to + implement. This extension gives those applications that option by + relaxing the context binding requirements. + + This new behavior matches the requirements of AGL, while providing + a feature not specified in WGL. + +IP Status + + Open-source; freely implementable. + +Issues + + None. + +New Procedures and Functions + + None. + +New Tokens + + None. + +Changes to Chapter 2 of the GLX 1.3 Specification (Functions and Errors) + + Replace the following sentence from section 2.2 Rendering Contexts: + In addition, a rendering context can be current for only one + thread at a time. + with: + In addition, an indirect rendering context can be current for + only one thread at a time. A direct rendering context may be + current to multiple threads, with synchronization of access to + the context thruogh the GL managed by the application through + mutexes. + +Changes to Chapter 3 of the GLX 1.3 Specification (Functions and Errors) + + Replace the following sentence from section 3.3.7 Rendering Contexts: + If ctx is current to some other thread, then + glXMakeContextCurrent will generate a BadAccess error. + with: + If ctx is an indirect context current to some other thread, + then glXMakeContextCurrent will generate a BadAccess error. + + Replace the following sentence from section 3.5 Rendering Contexts: + If ctx is current to some other thread, then + glXMakeCurrent will generate a BadAccess error. + with: + If ctx is an indirect context current to some other thread, + then glXMakeCurrent will generate a BadAccess error. + +GLX Protocol + + None. The GLX extension only extends to direct rendering contexts. + +Errors + + None. + +New State + + None. + +Issues + + (1) What happens if the app binds a context/drawable in multiple + threads, then binds a different context/thread in one of them? + + As with binding a new context from the current thread, the old + context's refcount is reduced and the new context's refcount is + increased. + + (2) What happens if the app binds a context/drawable in multiple + threads, then binds None/None in one of them? + + The GLX context is unreferenced from that thread, and the other + threads retain their GLX context binding. + + (3) What happens if the app binds a context/drawable in 7 threads, + then destroys the context in one of them? + + As with GLX context destruction previously, the XID is destroyed + but the context remains usable by threads that have the context + current. + + (4) What happens if the app binds a new drawable/readable with + glXMakeCurrent() when it is already bound to another thread? + + The context becomes bound to the new drawable/readable, and + further rendering in either thread will use the new + drawable/readable. + + (5) What requirements should be placed on the user managing contexts + from multiple threads? + + The intention is to allow multithreaded access to the GL at the + minimal performance cost, so requiring that the GL do general + synchronization (beyond that already required by context sharing) + is not an option, and synchronizing of GL's access to the GL + context between multiple threads is left to the application to do + across GL calls. However, it would be unfortunate for a library + doing multithread_makecurrent to require that other libraries + share in synchronization for binding of their own contexts, so the + refcounting of the contexts is required to be threadsafe. + + (6) Does this apply to indirect contexts? + + This was ignored in the initial revision of the spec. Behavior + for indirect contexts is left as-is. + +Revision History + + 20 November 2009 Eric Anholt - initial specification + 22 November 2009 Eric Anholt - added issues from Ian Romanick. + 3 February 2011 Eric Anholt - updated with resolution to issues 1-3 + 3 February 2011 Eric Anholt - added issue 4, 5 + 21 February 2011 Eric Anholt - Include glXMakeCurrent() sentence + along with glXMakeContextCurrent() for removal. diff --git a/docs/relnotes-7.11.html b/docs/relnotes-7.11.html index 6c6622ed3f4..4b1730b17ec 100644 --- a/docs/relnotes-7.11.html +++ b/docs/relnotes-7.11.html @@ -38,6 +38,7 @@ tbd <ul> <li>GL_ARB_draw_instanced extension (gallium drivers, swrast) <li>GL_ARB_instanced_arrays extension (gallium drivers) +<li>GL_ARB_texture_compression_rgtc (gallium r600, swrast) <li>GL_ARB_draw_buffers_blend (gallium) <li>GL_EXT_texture_sRGB_decode (gallium drivers, swrast, i965) </ul> diff --git a/scons/gallium.py b/scons/gallium.py index 9118257ac05..112f6c89dca 100755 --- a/scons/gallium.py +++ b/scons/gallium.py @@ -195,6 +195,8 @@ def generate(env): # Determine whether we are cross compiling; in particular, whether we need # to compile code generators with a different compiler as the target code. host_platform = _platform.system().lower() + if host_platform.startswith('cygwin'): + host_platform = 'cygwin' host_machine = os.environ.get('PROCESSOR_ARCHITEW6432', os.environ.get('PROCESSOR_ARCHITECTURE', _platform.machine())) host_machine = { 'x86': 'x86', diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index e685f4b73f0..1fec3adf5b1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -832,14 +832,14 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, LLVMValueRef *colors_out) { LLVMBuilderRef builder = bld->gallivm->builder; - LLVMValueRef size0; - LLVMValueRef size1; - LLVMValueRef row_stride0_vec; - LLVMValueRef row_stride1_vec; - LLVMValueRef img_stride0_vec; - LLVMValueRef img_stride1_vec; - LLVMValueRef data_ptr0; - LLVMValueRef data_ptr1; + LLVMValueRef size0 = NULL; + LLVMValueRef size1 = NULL; + LLVMValueRef row_stride0_vec = NULL; + LLVMValueRef row_stride1_vec = NULL; + LLVMValueRef img_stride0_vec = NULL; + LLVMValueRef img_stride1_vec = NULL; + LLVMValueRef data_ptr0 = NULL; + LLVMValueRef data_ptr1 = NULL; LLVMValueRef colors0[4], colors1[4]; unsigned chan; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index b8d193f3f89..9d5553f0ea0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -43,24 +43,24 @@ struct ureg_program; */ struct ureg_src { - unsigned File : 4; /* TGSI_FILE_ */ - unsigned SwizzleX : 2; /* TGSI_SWIZZLE_ */ - unsigned SwizzleY : 2; /* TGSI_SWIZZLE_ */ - unsigned SwizzleZ : 2; /* TGSI_SWIZZLE_ */ - unsigned SwizzleW : 2; /* TGSI_SWIZZLE_ */ - unsigned Indirect : 1; /* BOOL */ - unsigned DimIndirect : 1; /* BOOL */ - unsigned Dimension : 1; /* BOOL */ - unsigned Absolute : 1; /* BOOL */ - unsigned Negate : 1; /* BOOL */ - int Index : 16; /* SINT */ + unsigned File : 4; /* TGSI_FILE_ */ + unsigned SwizzleX : 2; /* TGSI_SWIZZLE_ */ + unsigned SwizzleY : 2; /* TGSI_SWIZZLE_ */ + unsigned SwizzleZ : 2; /* TGSI_SWIZZLE_ */ + unsigned SwizzleW : 2; /* TGSI_SWIZZLE_ */ + unsigned Indirect : 1; /* BOOL */ + unsigned DimIndirect : 1; /* BOOL */ + unsigned Dimension : 1; /* BOOL */ + unsigned Absolute : 1; /* BOOL */ + unsigned Negate : 1; /* BOOL */ unsigned IndirectFile : 4; /* TGSI_FILE_ */ - int IndirectIndex : 16; /* SINT */ unsigned IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ - int DimensionIndex : 16; /* SINT */ unsigned DimIndFile : 4; /* TGSI_FILE_ */ - int DimIndIndex : 16; /* SINT */ unsigned DimIndSwizzle : 2; /* TGSI_SWIZZLE_ */ + int Index : 16; /* SINT */ + int IndirectIndex : 16; /* SINT */ + int DimensionIndex : 16; /* SINT */ + int DimIndIndex : 16; /* SINT */ }; /* Very similar to a tgsi_dst_register, removing unsupported fields diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 3b6342ad8d1..4f1b0e71934 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -67,7 +67,7 @@ struct gen_mipmap_state struct pipe_vertex_element velem[2]; void *vs; - void *fs1d, *fs2d, *fs3d, *fsCube; + void *fs1d, *fs2d, *fs3d, *fsCube, *fs1da, *fs2da; struct pipe_resource *vbuf; /**< quad vertices */ unsigned vbuf_slot; @@ -1321,6 +1321,13 @@ util_create_gen_mipmap(struct pipe_context *pipe, TGSI_INTERPOLATE_LINEAR); ctx->fsCube = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_CUBE, TGSI_INTERPOLATE_LINEAR); + if (pipe->screen->get_param(pipe->screen, PIPE_CAP_ARRAY_TEXTURES)) { + ctx->fs1da = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_1D_ARRAY, + TGSI_INTERPOLATE_LINEAR); + ctx->fs2da = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D_ARRAY, + TGSI_INTERPOLATE_LINEAR); + } + /* vertex data that doesn't change */ for (i = 0; i < 4; i++) { @@ -1390,8 +1397,25 @@ set_vertex_data(struct gen_mipmap_state *ctx, util_map_texcoords2d_onto_cubemap(layer, &st[0][0], 2, &ctx->vertices[0][1][0], 8); } - else { - /* 1D/2D/3D */ + else if (tex_target == PIPE_TEXTURE_1D_ARRAY) { + /* 1D texture array */ + ctx->vertices[0][1][0] = 0.0f; /*s*/ + ctx->vertices[0][1][1] = r; /*t*/ + ctx->vertices[0][1][2] = 0.0f; /*r*/ + + ctx->vertices[1][1][0] = 1.0f; + ctx->vertices[1][1][1] = r; + ctx->vertices[1][1][2] = 0.0f; + + ctx->vertices[2][1][0] = 1.0f; + ctx->vertices[2][1][1] = r; + ctx->vertices[2][1][2] = 0.0f; + + ctx->vertices[3][1][0] = 0.0f; + ctx->vertices[3][1][1] = r; + ctx->vertices[3][1][2] = 0.0f; + } else { + /* 1D/2D/3D/2D array */ ctx->vertices[0][1][0] = 0.0f; /*s*/ ctx->vertices[0][1][1] = 0.0f; /*t*/ ctx->vertices[0][1][2] = r; /*r*/ @@ -1427,6 +1451,10 @@ util_destroy_gen_mipmap(struct gen_mipmap_state *ctx) { struct pipe_context *pipe = ctx->pipe; + if (ctx->fs2da) + pipe->delete_fs_state(pipe, ctx->fs2da); + if (ctx->fs1da) + pipe->delete_fs_state(pipe, ctx->fs1da); pipe->delete_fs_state(pipe, ctx->fsCube); pipe->delete_fs_state(pipe, ctx->fs3d); pipe->delete_fs_state(pipe, ctx->fs2d); @@ -1499,7 +1527,11 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, fs = ctx->fsCube; break; case PIPE_TEXTURE_1D_ARRAY: + fs = ctx->fs1da; + break; case PIPE_TEXTURE_2D_ARRAY: + fs = ctx->fs2da; + break; default: assert(0); fs = ctx->fs2d; @@ -1555,6 +1587,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, if (pt->target == PIPE_TEXTURE_3D) nr_layers = u_minify(pt->depth0, dstLevel); + else if (pt->target == PIPE_TEXTURE_2D_ARRAY || pt->target == PIPE_TEXTURE_1D_ARRAY) + nr_layers = pt->array_size; else nr_layers = 1; @@ -1564,11 +1598,12 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, /* in theory with geom shaders and driver with full layer support could do that in one go. */ layer = i; - offset = 1.0f / (float)(nr_layers * 2); /* XXX hmm really? */ rcoord = (float)layer / (float)nr_layers + 1.0f / (float)(nr_layers * 2); - } - else + } else if (pt->target == PIPE_TEXTURE_2D_ARRAY || pt->target == PIPE_TEXTURE_1D_ARRAY) { + layer = i; + rcoord = (float)layer; + } else layer = face; memset(&surf_templ, 0, sizeof(surf_templ)); diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.c b/src/gallium/auxiliary/util/u_vbuf_mgr.c index dec8dd717e8..3cf8ee0831d 100644 --- a/src/gallium/auxiliary/util/u_vbuf_mgr.c +++ b/src/gallium/auxiliary/util/u_vbuf_mgr.c @@ -531,7 +531,10 @@ static void u_vbuf_upload_buffers(struct u_vbuf_mgr_priv *mgr, unsigned first, size; boolean flushed; - if (vb->stride) { + if (mgr->ve->ve[i].instance_divisor) { + first = 0; + size = vb->buffer->width0; + } else if (vb->stride) { first = vb->stride * min_index; size = vb->stride * count; } else { diff --git a/src/gallium/drivers/i915/i915_batchbuffer.h b/src/gallium/drivers/i915/i915_batchbuffer.h index d92b2ccb31e..b4a91dabb37 100644 --- a/src/gallium/drivers/i915/i915_batchbuffer.h +++ b/src/gallium/drivers/i915/i915_batchbuffer.h @@ -75,6 +75,14 @@ i915_winsys_batchbuffer_write(struct i915_winsys_batchbuffer *batch, batch->ptr += size; } +static INLINE boolean +i915_winsys_validate_buffers(struct i915_winsys_batchbuffer *batch, + struct i915_winsys_buffer **buffers, + int num_of_buffers) +{ + return batch->iws->validate_buffers(batch, buffers, num_of_buffers); +} + static INLINE int i915_winsys_batchbuffer_reloc(struct i915_winsys_batchbuffer *batch, struct i915_winsys_buffer *buffer, diff --git a/src/gallium/drivers/i915/i915_blit.c b/src/gallium/drivers/i915/i915_blit.c index 97c25665156..f885417f8ed 100644 --- a/src/gallium/drivers/i915/i915_blit.c +++ b/src/gallium/drivers/i915/i915_blit.c @@ -49,6 +49,11 @@ i915_fill_blit(struct i915_context *i915, I915_DBG(DBG_BLIT, "%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h); + if(!i915_winsys_validate_buffers(i915->batch, &dst_buffer, 1)) { + FLUSH_BATCH(NULL); + assert(i915_winsys_validate_buffers(i915->batch, &dst_buffer, 1)); + } + switch (cpp) { case 1: case 2: @@ -76,6 +81,8 @@ i915_fill_blit(struct i915_context *i915, OUT_BATCH(((y + h) << 16) | (x + w)); OUT_RELOC_FENCED(dst_buffer, I915_USAGE_2D_TARGET, dst_offset); OUT_BATCH(color); + + i915_set_flush_dirty(i915, I915_FLUSH_CACHE); } void @@ -94,6 +101,7 @@ i915_copy_blit(struct i915_context *i915, unsigned CMD, BR13; int dst_y2 = dst_y + h; int dst_x2 = dst_x + w; + struct i915_winsys_buffer *buffers[2] = {src_buffer, dst_buffer}; I915_DBG(DBG_BLIT, @@ -102,6 +110,11 @@ i915_copy_blit(struct i915_context *i915, src_buffer, src_pitch, src_offset, src_x, src_y, dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); + if(!i915_winsys_validate_buffers(i915->batch, buffers, 2)) { + FLUSH_BATCH(NULL); + assert(i915_winsys_validate_buffers(i915->batch, buffers, 2)); + } + switch (cpp) { case 1: case 2: @@ -142,4 +155,6 @@ i915_copy_blit(struct i915_context *i915, OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(((int) src_pitch & 0xffff)); OUT_RELOC_FENCED(src_buffer, I915_USAGE_2D_SOURCE, src_offset); + + i915_set_flush_dirty(i915, I915_FLUSH_CACHE); } diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 707b2e9f956..cbf919754e5 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -73,10 +73,13 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) draw_set_mapped_index_buffer(draw, mapped_indices); if (cbuf_dirty) { - draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, - i915_buffer(i915->constants[PIPE_SHADER_VERTEX])->data, - (i915->current.num_user_constants[PIPE_SHADER_VERTEX] * + if (i915->constants[PIPE_SHADER_VERTEX]) + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, + i915_buffer(i915->constants[PIPE_SHADER_VERTEX])->data, + (i915->current.num_user_constants[PIPE_SHADER_VERTEX] * 4 * sizeof(float))); + else + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, NULL, 0); } /* @@ -165,6 +168,7 @@ i915_create_context(struct pipe_screen *screen, void *priv) i915->hardware_dirty = ~0; i915->immediate_dirty = ~0; i915->dynamic_dirty = ~0; + i915->flush_dirty = 0; /* Batch stream debugging is a bit hacked up at the moment: */ diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index 2cf53424f06..1da637d068e 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -150,6 +150,15 @@ struct i915_state /** Describes the current hardware vertex layout */ struct vertex_info vertex_info; + /* static state (dst/depth buffer state) */ + struct i915_winsys_buffer *cbuf_bo; + unsigned cbuf_flags; + struct i915_winsys_buffer *depth_bo; + unsigned depth_flags; + unsigned dst_buf_vars; + uint32_t draw_offset; + uint32_t draw_size; + unsigned id; /* track lost context events */ }; @@ -237,6 +246,10 @@ struct i915_context { unsigned hardware_dirty; unsigned immediate_dirty; unsigned dynamic_dirty; + unsigned flush_dirty; + + struct i915_winsys_buffer *validation_buffers[2 + 1 + I915_TEX_UNITS]; + int num_validation_buffers; struct util_slab_mempool transfer_pool; }; @@ -277,6 +290,18 @@ struct i915_context { #define I915_HW_CONSTANTS (1<<I915_CACHE_CONSTANTS) #define I915_HW_IMMEDIATE (1<<(I915_MAX_CACHE+0)) #define I915_HW_INVARIANT (1<<(I915_MAX_CACHE+1)) +#define I915_HW_FLUSH (1<<(I915_MAX_CACHE+1)) + +/* hw flush handling */ +#define I915_FLUSH_CACHE 1 +#define I915_PIPELINE_FLUSH 2 + +static INLINE +void i915_set_flush_dirty(struct i915_context *i915, unsigned flush) +{ + i915->hardware_dirty |= I915_HW_FLUSH; + i915->flush_dirty |= flush; +} /*********************************************************************** diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c index 911c051d1f2..22a2c7b2cb4 100644 --- a/src/gallium/drivers/i915/i915_flush.c +++ b/src/gallium/drivers/i915/i915_flush.c @@ -96,4 +96,6 @@ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence) i915->hardware_dirty = ~0; i915->immediate_dirty = ~0; i915->dynamic_dirty = ~0; + /* kernel emits flushes in between batchbuffers */ + i915->flush_dirty = 0; } diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 509d487b498..0323ad940f9 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -36,73 +36,105 @@ #include "pipe/p_defines.h" #include "util/u_math.h" +#include "util/u_memory.h" -static unsigned translate_format( enum pipe_format format ) +struct i915_tracked_hw_state { + const char *name; + void (*validate)(struct i915_context *); + void (*emit)(struct i915_context *); + unsigned dirty, batch_space; +}; + + +static void +emit_flush(struct i915_context *i915) { - switch (format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - return COLOR_BUF_ARGB8888; - case PIPE_FORMAT_B5G6R5_UNORM: - return COLOR_BUF_RGB565; - default: - assert(0); - return 0; - } + /* Cache handling is very cheap atm. State handling can request to flushes: + * - I915_FLUSH_CACHE which is a flush everything request and + * - I915_PIPELINE_FLUSH which is specifically for the draw_offset flush. + * Because the cache handling is so dumb, no explicit "invalidate map cache". + * Also, the first is a strict superset of the latter, so the following logic + * works. */ + if (i915->flush_dirty & I915_FLUSH_CACHE) + OUT_BATCH(MI_FLUSH | FLUSH_MAP_CACHE); + else if (i915->flush_dirty & I915_PIPELINE_FLUSH) + OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE); +} + +static void +validate_immediate(struct i915_context *i915) +{ + if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) + i915->validation_buffers[i915->num_validation_buffers++] = i915->vbo; +} + +static void +validate_static(struct i915_context *i915) +{ + if (i915->current.cbuf_bo) + i915->validation_buffers[i915->num_validation_buffers++] + = i915->current.cbuf_bo; + + if (i915->current.depth_bo) + i915->validation_buffers[i915->num_validation_buffers++] + = i915->current.depth_bo; } -static unsigned translate_depth_format( enum pipe_format zformat ) +static void +validate_map(struct i915_context *i915) { - switch (zformat) { - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return DEPTH_FRMT_24_FIXED_8_OTHER; - case PIPE_FORMAT_Z16_UNORM: - return DEPTH_FRMT_16_FIXED; - default: - assert(0); - return 0; + const uint enabled = i915->current.sampler_enable_flags; + uint unit; + struct i915_texture *tex; + + + for (unit = 0; unit < I915_TEX_UNITS; unit++) { + if (enabled & (1 << unit)) { + tex = i915_texture(i915->fragment_sampler_views[unit]->texture); + i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer; + } } } +const static struct i915_tracked_hw_state hw_atoms[] = { + { "flush", NULL, emit_flush, I915_HW_FLUSH, 1 }, + { "immediate", validate_immediate, NULL, I915_HW_IMMEDIATE }, + { "static", validate_static, NULL, I915_HW_STATIC }, + { "map", validate_map, NULL, I915_HW_MAP } +}; -/** - * Examine framebuffer state to determine width, height. - */ static boolean -framebuffer_size(const struct pipe_framebuffer_state *fb, - uint *width, uint *height) +i915_validate_state(struct i915_context *i915, unsigned *batch_space) { - if (fb->cbufs[0]) { - *width = fb->cbufs[0]->width; - *height = fb->cbufs[0]->height; - return TRUE; - } - else if (fb->zsbuf) { - *width = fb->zsbuf->width; - *height = fb->zsbuf->height; + int i; + + i915->num_validation_buffers = 0; + *batch_space = 0; + + for (i = 0; i < Elements(hw_atoms); i++) + if ((i915->hardware_dirty & hw_atoms[i].dirty) && hw_atoms[i].validate) { + hw_atoms[i].validate(i915); + *batch_space += hw_atoms[i].batch_space; + } + + if (i915->num_validation_buffers == 0) return TRUE; - } - else { - *width = *height = 0; + + if (!i915_winsys_validate_buffers(i915->batch, i915->validation_buffers, + i915->num_validation_buffers)) return FALSE; - } + + return TRUE; } -static inline uint32_t -buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling) +static void +emit_state(struct i915_context *i915) { - uint32_t tiling_bits = 0; - - switch (tiling) { - case I915_TILE_Y: - tiling_bits |= BUF_3D_TILE_WALK_Y; - case I915_TILE_X: - tiling_bits |= BUF_3D_TILED_SURFACE; - case I915_TILE_NONE: - break; - } + int i; - return tiling_bits; + for (i = 0; i < Elements(hw_atoms); i++) + if ((i915->hardware_dirty & hw_atoms[i].dirty) && hw_atoms[i].emit) + hw_atoms[i].emit(i915); } /* Push the state into the sarea and/or texture memory. @@ -110,6 +142,7 @@ buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling) void i915_emit_hardware_state(struct i915_context *i915 ) { + unsigned batch_space; /* XXX: there must be an easier way */ const unsigned dwords = ( 14 + 7 + @@ -135,14 +168,21 @@ i915_emit_hardware_state(struct i915_context *i915 ) if (I915_DBG_ON(DBG_ATOMS)) i915_dump_hardware_dirty(i915, __FUNCTION__); - if(!BEGIN_BATCH(dwords, relocs)) { + if (!i915_validate_state(i915, &batch_space)) { + FLUSH_BATCH(NULL); + assert(i915_validate_state(i915, &batch_space)); + } + + if(!BEGIN_BATCH(batch_space + dwords, relocs)) { FLUSH_BATCH(NULL); - assert(BEGIN_BATCH(dwords, relocs)); + assert(i915_validate_state(i915, &batch_space)); + assert(BEGIN_BATCH(batch_space + dwords, relocs)); } save_ptr = (uintptr_t)i915->batch->ptr; save_relocs = i915->batch->relocs; + emit_state(i915); /* 14 dwords, 0 relocs */ if (i915->hardware_dirty & I915_HW_INVARIANT) { @@ -223,7 +263,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) { int i; for (i = 0; i < I915_MAX_DYNAMIC; i++) { - if (i915->dynamic_dirty & (1 << i)); + if (i915->dynamic_dirty & (1 << i)) OUT_BATCH(i915->current.dynamic[i]); } } @@ -233,64 +273,27 @@ i915_emit_hardware_state(struct i915_context *i915 ) /* 8 dwords, 2 relocs */ if (i915->hardware_dirty & I915_HW_STATIC) { - struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; - struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; - - if (cbuf_surface) { - struct i915_texture *tex = i915_texture(cbuf_surface->texture); - assert(tex); - + if (i915->current.cbuf_bo) { OUT_BATCH(_3DSTATE_BUF_INFO_CMD); - - OUT_BATCH(BUF_3D_ID_COLOR_BACK | - BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ - buf_3d_tiling_bits(tex->tiling)); - - OUT_RELOC(tex->buffer, + OUT_BATCH(i915->current.cbuf_flags); + OUT_RELOC(i915->current.cbuf_bo, I915_USAGE_RENDER, 0); } /* What happens if no zbuf?? */ - if (depth_surface) { - struct i915_texture *tex = i915_texture(depth_surface->texture); - unsigned offset = i915_texture_offset(tex, depth_surface->u.tex.level, - depth_surface->u.tex.first_layer); - assert(tex); - assert(offset == 0); - + if (i915->current.depth_bo) { OUT_BATCH(_3DSTATE_BUF_INFO_CMD); - - assert(tex); - OUT_BATCH(BUF_3D_ID_DEPTH | - BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ - buf_3d_tiling_bits(tex->tiling)); - - OUT_RELOC(tex->buffer, + OUT_BATCH(i915->current.depth_flags); + OUT_RELOC(i915->current.depth_bo, I915_USAGE_RENDER, 0); } { - unsigned cformat, zformat = 0; - - if (cbuf_surface) - cformat = cbuf_surface->format; - else - cformat = PIPE_FORMAT_B8G8R8A8_UNORM; /* arbitrary */ - cformat = translate_format(cformat); - - if (depth_surface) - zformat = translate_depth_format( i915->framebuffer.zsbuf->format ); - OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); - OUT_BATCH(DSTORG_HORT_BIAS(0x8) | /* .5 */ - DSTORG_VERT_BIAS(0x8) | /* .5 */ - LOD_PRECLAMP_OGL | - TEX_DEFAULT_COLOR_OGL | - cformat | - zformat ); + OUT_BATCH(i915->current.dst_buf_vars); } } #endif @@ -362,7 +365,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) uint i; OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); - OUT_BATCH( (1 << (nr - 1)) | ((1 << (nr - 1)) - 1) ); + OUT_BATCH((1 << nr) - 1); for (i = 0; i < nr; i++) { const uint *c; @@ -411,31 +414,13 @@ i915_emit_hardware_state(struct i915_context *i915 ) /* 6 dwords, 0 relocs */ if (i915->hardware_dirty & I915_HW_STATIC) { - uint w, h; - struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; - struct i915_texture *tex = i915_texture(cbuf_surface->texture); - unsigned x, y; - int layer; - uint32_t draw_offset; - boolean ret; - - ret = framebuffer_size(&i915->framebuffer, &w, &h); - assert(ret); - - layer = cbuf_surface->u.tex.first_layer; - - x = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksx; - y = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksy; - - draw_offset = x | (y << 16); - /* XXX flush only required when the draw_offset changes! */ OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE); OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS); - OUT_BATCH(draw_offset); - OUT_BATCH((w - 1 + x) | ((h - 1 + y) << 16)); - OUT_BATCH(draw_offset); + OUT_BATCH(i915->current.draw_offset); + OUT_BATCH(i915->current.draw_size); + OUT_BATCH(i915->current.draw_offset); } #endif diff --git a/src/gallium/drivers/i915/i915_state_static.c b/src/gallium/drivers/i915/i915_state_static.c index dc9a4c1e2fd..97044499990 100644 --- a/src/gallium/drivers/i915/i915_state_static.c +++ b/src/gallium/drivers/i915/i915_state_static.c @@ -27,17 +27,151 @@ #include "i915_reg.h" #include "i915_context.h" #include "i915_state.h" +#include "i915_resource.h" /*********************************************************************** * Update framebuffer state */ +static unsigned translate_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + return COLOR_BUF_ARGB8888; + case PIPE_FORMAT_B5G6R5_UNORM: + return COLOR_BUF_RGB565; + default: + assert(0); + return 0; + } +} + +static unsigned translate_depth_format(enum pipe_format zformat) +{ + switch (zformat) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return DEPTH_FRMT_24_FIXED_8_OTHER; + case PIPE_FORMAT_Z16_UNORM: + return DEPTH_FRMT_16_FIXED; + default: + assert(0); + return 0; + } +} + +static inline uint32_t +buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling) +{ + uint32_t tiling_bits = 0; + + switch (tiling) { + case I915_TILE_Y: + tiling_bits |= BUF_3D_TILE_WALK_Y; + case I915_TILE_X: + tiling_bits |= BUF_3D_TILED_SURFACE; + case I915_TILE_NONE: + break; + } + + return tiling_bits; +} + +/** + * Examine framebuffer state to determine width, height. + */ +static boolean +framebuffer_size(const struct pipe_framebuffer_state *fb, + uint *width, uint *height) +{ + if (fb->cbufs[0]) { + *width = fb->cbufs[0]->width; + *height = fb->cbufs[0]->height; + return TRUE; + } + else if (fb->zsbuf) { + *width = fb->zsbuf->width; + *height = fb->zsbuf->height; + return TRUE; + } + else { + *width = *height = 0; + return FALSE; + } +} + static void update_framebuffer(struct i915_context *i915) { - /* HW emit currently references framebuffer state directly: + struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; + struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; + unsigned cformat, zformat; + unsigned x, y, w, h; + int layer; + uint32_t draw_offset; + boolean ret; + + if (cbuf_surface) { + struct i915_texture *tex = i915_texture(cbuf_surface->texture); + assert(tex); + + i915->current.cbuf_bo = tex->buffer; + i915->current.cbuf_flags = BUF_3D_ID_COLOR_BACK | + BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ + buf_3d_tiling_bits(tex->tiling); + cformat = cbuf_surface->format; + + layer = cbuf_surface->u.tex.first_layer; + + x = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksx; + y = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksy; + } else { + i915->current.cbuf_bo = NULL; + cformat = PIPE_FORMAT_B8G8R8A8_UNORM; /* arbitrary */ + x = y = 0; + } + cformat = translate_format(cformat); + + /* What happens if no zbuf?? */ + if (depth_surface) { + struct i915_texture *tex = i915_texture(depth_surface->texture); + unsigned offset = i915_texture_offset(tex, depth_surface->u.tex.level, + depth_surface->u.tex.first_layer); + assert(tex); + assert(offset == 0); + + i915->current.depth_bo = tex->buffer; + i915->current.depth_flags = BUF_3D_ID_DEPTH | + BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ + buf_3d_tiling_bits(tex->tiling); + zformat = translate_depth_format(depth_surface->format); + } else { + i915->current.depth_bo = NULL; + zformat = 0; + } + + i915->current.dst_buf_vars = DSTORG_HORT_BIAS(0x8) | /* .5 */ + DSTORG_VERT_BIAS(0x8) | /* .5 */ + LOD_PRECLAMP_OGL | + TEX_DEFAULT_COLOR_OGL | + cformat | + zformat; + + /* drawing rect calculations */ + draw_offset = x | (y << 16); + ret = framebuffer_size(&i915->framebuffer, &w, &h); + assert(ret); + if (i915->current.draw_offset != draw_offset) { + i915->current.draw_offset = draw_offset; + /* XXX: only emit flush on change and not always in emit */ + } + i915->current.draw_size = (w - 1 + x) | ((h - 1 + y) << 16); + i915->hardware_dirty |= I915_HW_STATIC; + + /* flush the cache in case we sample from the old renderbuffers */ + i915_set_flush_dirty(i915, I915_FLUSH_CACHE); } struct i915_tracked_state i915_hw_framebuffer = { diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h index e915a886c9b..4ac2f5b9777 100644 --- a/src/gallium/drivers/i915/i915_winsys.h +++ b/src/gallium/drivers/i915/i915_winsys.h @@ -95,6 +95,18 @@ struct i915_winsys { (*batchbuffer_create)(struct i915_winsys *iws); /** + * Validate buffers for usage in this batchbuffer. + * Does space-checking and asorted other book-keeping. + * + * @batch + * @buffers array to buffers to validate + * @num_of_buffers size of the passed array + */ + boolean (*validate_buffers)(struct i915_winsys_batchbuffer *batch, + struct i915_winsys_buffer **buffers, + int num_of_buffers); + + /** * Emit a relocation to a buffer. * Target position in batchbuffer is the same as ptr. * diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c index 3a55e76bc35..a21a3c74484 100644 --- a/src/gallium/drivers/llvmpipe/lp_fence.c +++ b/src/gallium/drivers/llvmpipe/lp_fence.c @@ -47,6 +47,9 @@ lp_fence_create(unsigned rank) static int fence_id; struct lp_fence *fence = CALLOC_STRUCT(lp_fence); + if (!fence) + return NULL; + pipe_reference_init(&fence->reference, 1); pipe_mutex_init(fence->mutex); diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 21e8012d46a..2c32aa93cdf 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -278,6 +278,11 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, return util_format_s3tc_enabled; } + /* u_format doesn't support RGTC yet */ + if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { + return FALSE; + } + /* * Everything else should be supported by u_format. */ diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 1968d0feb35..990acea9f44 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -255,8 +255,6 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS690; caps->has_tcl = FALSE; caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x793F: @@ -265,8 +263,6 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS600; caps->has_tcl = FALSE; caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x796C: @@ -276,8 +272,6 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS740; caps->has_tcl = FALSE; caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7100: diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 9f85bd4ce5f..d422ffe03f8 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -203,7 +203,7 @@ static boolean r300_setup_atoms(struct r300_context* r300) /* SC. */ R300_INIT_ATOM(scissor_state, 3); /* GB, FG, GA, SU, SC, RB3D. */ - R300_INIT_ATOM(invariant_state, 16 + (is_rv350 ? 4 : 0)); + R300_INIT_ATOM(invariant_state, 18 + (is_rv350 ? 4 : 0)); /* VAP. */ R300_INIT_ATOM(viewport_state, 9); R300_INIT_ATOM(pvs_flush, 2); @@ -353,6 +353,7 @@ static void r300_init_states(struct pipe_context *pipe) OUT_CB_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF); OUT_CB_REG(R300_SU_DEPTH_OFFSET, 0); OUT_CB_REG(R300_SC_EDGERULE, 0x2DA49525); + OUT_CB_REG(R300_SC_SCREENDOOR, 0xffffff); if (r300->screen->caps.is_rv350) { OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 30073759476..e9c7d7bf63f 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -220,7 +220,7 @@ struct r300_vertex_stream_state { }; struct r300_invariant_state { - uint32_t cb[20]; + uint32_t cb[22]; }; struct r300_vap_invariant_state { diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 354144cac79..b97c45ac198 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -214,11 +214,18 @@ uint32_t r300_translate_texformat(enum pipe_format format, /* RGTC formats. */ if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { switch (format) { - case PIPE_FORMAT_RGTC1_UNORM: case PIPE_FORMAT_RGTC1_SNORM: + result |= sign_bit[0]; + case PIPE_FORMAT_RGTC1_UNORM: + result &= ~(0xfff << 9); /* mask off swizzle */ + result |= R300_TX_FORMAT_Y << R300_TX_FORMAT_R_SHIFT; return R500_TX_FORMAT_ATI1N | result; - case PIPE_FORMAT_RGTC2_UNORM: case PIPE_FORMAT_RGTC2_SNORM: + result |= sign_bit[0] | sign_bit[1]; + case PIPE_FORMAT_RGTC2_UNORM: + result &= ~(0xfff << 9); /* mask off swizzle */ + result |= R300_TX_FORMAT_Y << R300_TX_FORMAT_R_SHIFT | + R300_TX_FORMAT_X << R300_TX_FORMAT_G_SHIFT; return R400_TX_FORMAT_ATI2N | result; default: return ~0; /* Unsupported/unknown. */ diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index 4f86e3b4c38..8cb417f9731 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -98,31 +98,9 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) return 0; } -void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) +void eg_cf_vtx(struct r600_vertex_element *ve) { - struct r600_pipe_state *rstate; - unsigned i = 0; - - if (count > 8) { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(8 - 1); - bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 8 - 1); - } else { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 1); - } - bytecode[i++] = S_SQ_CF_WORD0_ADDR(0); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN) | - S_SQ_CF_WORD1_BARRIER(1); - - rstate = &ve->rstate; + struct r600_pipe_state *rstate = &ve->rstate; rstate->id = R600_PIPE_STATE_FETCH_SHADER; rstate->nregs = 0; r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS, diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 64c52bca795..1b76f0098dd 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -118,10 +118,10 @@ unsigned r600_get_clock_crystal_freq(struct radeon *radeon); /* r600_bo.c */ struct r600_bo; struct r600_bo *r600_bo(struct radeon *radeon, - unsigned size, unsigned alignment, - unsigned binding, unsigned usage); + unsigned size, unsigned alignment, + unsigned binding, unsigned usage); struct r600_bo *r600_bo_handle(struct radeon *radeon, - unsigned handle, unsigned *array_mode); + unsigned handle, unsigned *array_mode); void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx); void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo); void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 1393df88757..8006e9b9a58 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -86,6 +86,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: return 1; @@ -135,6 +136,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: return 1; @@ -1441,7 +1443,8 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) | S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) | S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); - bc->bytecode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1); + bc->bytecode[id++] = S_SQ_VTX_WORD2_OFFSET(vtx->offset) | + S_SQ_VTX_WORD2_MEGA_FETCH(1); bc->bytecode[id++] = 0; return 0; } @@ -2778,12 +2781,13 @@ void r600_bc_dump(struct r600_bc *bc) fprintf(stderr, "SEL_Z:%d ", vtx->dst_sel_z); fprintf(stderr, "SEL_W:%d) ", vtx->dst_sel_w); fprintf(stderr, "USE_CONST_FIELDS:%d ", vtx->use_const_fields); - fprintf(stderr, "DATA_FORMAT:%d ", vtx->data_format); - fprintf(stderr, "NUM_FORMAT_ALL:%d ", vtx->num_format_all); - fprintf(stderr, "FORMAT_COMP_ALL:%d ", vtx->format_comp_all); - fprintf(stderr, "SRF_MODE_ALL:%d\n", vtx->srf_mode_all); + fprintf(stderr, "FORMAT(DATA:%d ", vtx->data_format); + fprintf(stderr, "NUM:%d ", vtx->num_format_all); + fprintf(stderr, "COMP:%d ", vtx->format_comp_all); + fprintf(stderr, "MODE:%d)\n", vtx->srf_mode_all); id++; - fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]); + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "OFFSET:%d\n", vtx->offset); //TODO id++; fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]); @@ -2794,29 +2798,9 @@ void r600_bc_dump(struct r600_bc *bc) fprintf(stderr, "--------------------------------------\n"); } -static void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) +static void r600_cf_vtx(struct r600_vertex_element *ve) { struct r600_pipe_state *rstate; - unsigned i = 0; - - if (count > 8) { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(8 - 1); - bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 8 - 1); - } else { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 1); - } - bytecode[i++] = S_SQ_CF_WORD0_ADDR(0); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) | - S_SQ_CF_WORD1_BARRIER(1); rstate = &ve->rstate; rstate->id = R600_PIPE_STATE_FETCH_SHADER; @@ -2962,37 +2946,19 @@ out_unknown: int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve) { - unsigned ndw, i; - u32 *bytecode; - unsigned fetch_resource_start = 0, format, num_format, format_comp; + static int dump_shaders = -1; + + struct r600_bc bc; + struct r600_bc_vtx vtx; struct pipe_vertex_element *elements = ve->elements; const struct util_format_description *desc; - - /* 2 dwords for cf aligned to 4 + 4 dwords per input */ - ndw = 8 + ve->count * 4; - ve->fs_size = ndw * 4; - - /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */ - ve->fetch_shader = r600_bo(rctx->radeon, ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0); - if (ve->fetch_shader == NULL) { - return -ENOMEM; - } - - bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL); - if (bytecode == NULL) { - r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); - return -ENOMEM; - } - - if (rctx->family >= CHIP_CEDAR) { - eg_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4); - } else { - r600_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4); - fetch_resource_start = 160; - } + unsigned fetch_resource_start = rctx->family >= CHIP_CEDAR ? 0 : 160; + unsigned format, num_format, format_comp; + u32 *bytecode; + int i, r; /* vertex elements offset need special handling, if offset is bigger - * than what we can put in fetch instruction then we need to alterate + + * than what we can put in fetch instruction then we need to alterate * the vertex resource offset. In such case in order to simplify code * we will bound one resource per elements. It's a worst case scenario. */ @@ -3003,40 +2969,155 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru } } + memset(&bc, 0, sizeof(bc)); + r = r600_bc_init(&bc, r600_get_family(rctx->radeon)); + if (r) + return r; + + for (i = 0; i < ve->count; i++) { + if (elements[i].instance_divisor > 1) { + struct r600_bc_alu alu; + + memset(&alu, 0, sizeof(alu)); + alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); + alu.src[0].sel = 0; + alu.src[0].chan = 3; + + alu.dst.sel = i + 1; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(&bc, &alu))) { + r600_bc_clear(&bc); + return r; + } + + memset(&alu, 0, sizeof(alu)); + alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); + alu.src[0].sel = i + 1; + alu.src[0].chan = 3; + + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = fui(1.0f / (float)elements[i].instance_divisor); + + alu.dst.sel = i + 1; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(&bc, &alu))) { + r600_bc_clear(&bc); + return r; + } + + memset(&alu, 0, sizeof(alu)); + alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC); + alu.src[0].sel = i + 1; + alu.src[0].chan = 3; + + alu.dst.sel = i + 1; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(&bc, &alu))) { + r600_bc_clear(&bc); + return r; + } + + memset(&alu, 0, sizeof(alu)); + alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT); + alu.src[0].sel = i + 1; + alu.src[0].chan = 3; + + alu.dst.sel = i + 1; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(&bc, &alu))) { + r600_bc_clear(&bc); + return r; + } + } + } + for (i = 0; i < ve->count; i++) { unsigned vbuffer_index; r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp); desc = util_format_description(ve->elements[i].src_format); if (desc == NULL) { + r600_bc_clear(&bc); R600_ERR("unknown format %d\n", ve->elements[i].src_format); - r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); return -EINVAL; } /* see above for vbuffer_need_offset explanation */ vbuffer_index = elements[i].vertex_buffer_index; - if (ve->vbuffer_need_offset) { - bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(i + fetch_resource_start); - } else { - bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(vbuffer_index + fetch_resource_start); - } - bytecode[8 + i * 4 + 0] |= S_SQ_VTX_WORD0_SRC_GPR(0) | - S_SQ_VTX_WORD0_SRC_SEL_X(0) | - S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F); - bytecode[8 + i * 4 + 1] = S_SQ_VTX_WORD1_DST_SEL_X(desc->swizzle[0]) | - S_SQ_VTX_WORD1_DST_SEL_Y(desc->swizzle[1]) | - S_SQ_VTX_WORD1_DST_SEL_Z(desc->swizzle[2]) | - S_SQ_VTX_WORD1_DST_SEL_W(desc->swizzle[3]) | - S_SQ_VTX_WORD1_USE_CONST_FIELDS(0) | - S_SQ_VTX_WORD1_DATA_FORMAT(format) | - S_SQ_VTX_WORD1_NUM_FORMAT_ALL(num_format) | - S_SQ_VTX_WORD1_FORMAT_COMP_ALL(format_comp) | - S_SQ_VTX_WORD1_SRF_MODE_ALL(1) | - S_SQ_VTX_WORD1_GPR_DST_GPR(i + 1); - bytecode[8 + i * 4 + 2] = S_SQ_VTX_WORD2_OFFSET(elements[i].src_offset) | - S_SQ_VTX_WORD2_MEGA_FETCH(1); - bytecode[8 + i * 4 + 3] = 0; + memset(&vtx, 0, sizeof(vtx)); + vtx.buffer_id = (ve->vbuffer_need_offset ? i : vbuffer_index) + fetch_resource_start; + vtx.fetch_type = elements[i].instance_divisor ? 1 : 0; + vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0; + vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0; + vtx.mega_fetch_count = 16; + vtx.dst_gpr = i + 1; + vtx.dst_sel_x = desc->swizzle[0]; + vtx.dst_sel_y = desc->swizzle[1]; + vtx.dst_sel_z = desc->swizzle[2]; + vtx.dst_sel_w = desc->swizzle[3]; + vtx.data_format = format; + vtx.num_format_all = num_format; + vtx.format_comp_all = format_comp; + vtx.srf_mode_all = 1; + vtx.offset = elements[i].src_offset; + + if ((r = r600_bc_add_vtx(&bc, &vtx))) { + r600_bc_clear(&bc); + return r; + } + } + + r600_bc_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN)); + + /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */ + ve->fetch_shader = r600_bo(rctx->radeon, bc.ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0); + if (ve->fetch_shader == NULL) { + r600_bc_clear(&bc); + return -ENOMEM; + } + + ve->fs_size = bc.ndw*4; + if ((r = r600_bc_build(&bc))) { + r600_bc_clear(&bc); + return r; } + + if (dump_shaders == -1) + dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); + + if (dump_shaders) { + fprintf(stderr, "--------------------------------------------------------------\n"); + r600_bc_dump(&bc); + fprintf(stderr, "______________________________________________________________\n"); + } + + bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL); + if (bytecode == NULL) { + r600_bc_clear(&bc); + r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); + return -ENOMEM; + } + + memcpy(bytecode, bc.bytecode, ve->fs_size); + r600_bo_unmap(rctx->radeon, ve->fetch_shader); + r600_bc_clear(&bc); + + if (rctx->family >= CHIP_CEDAR) + eg_cf_vtx(ve); + else + r600_cf_vtx(ve); + return 0; } diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 453c29790c1..dbd1e204b49 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -103,6 +103,7 @@ struct r600_bc_vtx { unsigned num_format_all; unsigned format_comp_all; unsigned srf_mode_all; + unsigned offset; }; struct r600_bc_output { @@ -187,7 +188,7 @@ struct r600_bc { /* eg_asm.c */ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf); -void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); +void eg_cf_vtx(struct r600_vertex_element *ve); /* r600_asm.c */ int r600_bc_init(struct r600_bc *bc, enum radeon_family family); diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 9865ea17ae5..04408a5cc8e 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -225,7 +225,7 @@ struct texture_orig_info { unsigned height0; }; -static void r600_s3tc_to_blittable(struct pipe_resource *tex, +static void r600_compressed_to_blittable(struct pipe_resource *tex, unsigned level, struct texture_orig_info *orig) { @@ -253,7 +253,7 @@ static void r600_s3tc_to_blittable(struct pipe_resource *tex, } -static void r600_reset_blittable_to_s3tc(struct pipe_resource *tex, +static void r600_reset_blittable_to_compressed(struct pipe_resource *tex, unsigned level, struct texture_orig_info *orig) { @@ -282,13 +282,13 @@ static void r600_resource_copy_region(struct pipe_context *ctx, restore_orig[0] = restore_orig[1] = FALSE; - if (util_format_is_s3tc(src->format)) { - r600_s3tc_to_blittable(src, src_level, &orig_info[0]); + if (util_format_is_compressed(src->format)) { + r600_compressed_to_blittable(src, src_level, &orig_info[0]); restore_orig[0] = TRUE; } - if (util_format_is_s3tc(dst->format)) { - r600_s3tc_to_blittable(dst, dst_level, &orig_info[1]); + if (util_format_is_compressed(dst->format)) { + r600_compressed_to_blittable(dst, dst_level, &orig_info[1]); restore_orig[1] = TRUE; /* translate the dst box as well */ dstx = util_format_get_nblocksx(orig_info[1].format, dstx); @@ -299,10 +299,10 @@ static void r600_resource_copy_region(struct pipe_context *ctx, src, src_level, src_box); if (restore_orig[0]) - r600_reset_blittable_to_s3tc(src, src_level, &orig_info[0]); + r600_reset_blittable_to_compressed(src, src_level, &orig_info[0]); if (restore_orig[1]) - r600_reset_blittable_to_s3tc(dst, dst_level, &orig_info[1]); + r600_reset_blittable_to_compressed(dst, dst_level, &orig_info[1]); } void r600_init_blit_functions(struct r600_pipe_context *rctx) diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 0c5d7133c7a..2363cd1ebc5 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -132,13 +132,13 @@ static void r600_transfer_destroy(struct pipe_context *ctx, } static void r600_buffer_transfer_inline_write(struct pipe_context *pipe, - struct pipe_resource *resource, - unsigned level, - unsigned usage, - const struct pipe_box *box, - const void *data, - unsigned stride, - unsigned layer_stride) + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride) { struct radeon *ws = (struct radeon*)pipe->winsys; struct r600_resource_buffer *rbuffer = r600_buffer(resource); @@ -224,7 +224,7 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, rbuffer->r.b.b.b.depth0 = 1; rbuffer->r.b.b.b.array_size = 1; rbuffer->r.b.b.b.flags = 0; - rbuffer->r.b.user_ptr = ptr; + rbuffer->r.b.user_ptr = ptr; rbuffer->r.bo = NULL; rbuffer->r.bo_size = 0; return &rbuffer->r.b.b.b; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 34094001b75..3fd6668f718 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -77,8 +77,7 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, u_upload_flush(rctx->vbuf_mgr->uploader); } -static void r600_update_num_contexts(struct r600_screen *rscreen, - int diff) +static void r600_update_num_contexts(struct r600_screen *rscreen, int diff) { pipe_mutex_lock(rscreen->mutex_num_contexts); if (diff > 0) { @@ -286,13 +285,13 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_DEPTH_CLAMP: case PIPE_CAP_SHADER_STENCIL_EXPORT: + case PIPE_CAP_INSTANCED_DRAWING: return 1; /* Unsupported features (boolean caps). */ case PIPE_CAP_STREAM_OUTPUT: case PIPE_CAP_PRIMITIVE_RESTART: case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */ - case PIPE_CAP_INSTANCED_DRAWING: return 0; case PIPE_CAP_ARRAY_TEXTURES: diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 240c8f1ffd0..0b4dc75e584 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -241,10 +241,10 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; int r; - /* Would like some magic "get_bool_option_once" routine. - */ - if (dump_shaders == -1) - dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); + /* Would like some magic "get_bool_option_once" routine. + */ + if (dump_shaders == -1) + dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); if (dump_shaders) { fprintf(stderr, "--------------------------------------------------------------\n"); @@ -420,6 +420,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) { struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; unsigned i; + int r; switch (d->Declaration.File) { case TGSI_FILE_INPUT: @@ -451,6 +452,26 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) case TGSI_FILE_SAMPLER: case TGSI_FILE_ADDRESS: break; + + case TGSI_FILE_SYSTEM_VALUE: + if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { + struct r600_bc_alu alu; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); + alu.src[0].sel = 0; + alu.src[0].chan = 3; + + alu.dst.sel = 0; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; + break; + } + default: R600_ERR("unsupported file %d declaration\n", d->Declaration.File); return -EINVAL; @@ -521,6 +542,7 @@ static void tgsi_src(struct r600_shader_ctx *ctx, r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; r600_src->neg = tgsi_src->Register.Negate; r600_src->abs = tgsi_src->Register.Absolute; + if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { int index; if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && @@ -535,6 +557,13 @@ static void tgsi_src(struct r600_shader_ctx *ctx, index = tgsi_src->Register.Index; r600_src->sel = V_SQ_ALU_SRC_LITERAL; memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); + } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { + /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */ + r600_src->swizzle[0] = 3; + r600_src->swizzle[1] = 3; + r600_src->swizzle[2] = 3; + r600_src->swizzle[3] = 3; + r600_src->sel = 0; } else { if (tgsi_src->Register.Indirect) r600_src->rel = V_SQ_REL_RELATIVE; @@ -2858,7 +2887,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, + {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* gap */ {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, @@ -3016,7 +3045,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, + {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* gap */ {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 72707fbd8b8..3c072fe7ca9 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -299,13 +299,13 @@ void r600_spi_update(struct r600_pipe_context *rctx) tmp |= S_028644_PT_SPRITE_TEX(1); } - if (rctx->family < CHIP_CEDAR) { - if (rshader->input[i].centroid) - tmp |= S_028644_SEL_CENTROID(1); + if (rctx->family < CHIP_CEDAR) { + if (rshader->input[i].centroid) + tmp |= S_028644_SEL_CENTROID(1); - if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) - tmp |= S_028644_SEL_LINEAR(1); - } + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) + tmp |= S_028644_SEL_LINEAR(1); + } r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); } @@ -520,7 +520,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) r600_context_pipe_state_set(&rctx->ctx, &vgt); rdraw.vgt_num_indices = draw.info.count; - rdraw.vgt_num_instances = 1; + rdraw.vgt_num_instances = draw.info.instance_count; rdraw.vgt_index_type = vgt_dma_index_type; rdraw.vgt_draw_initiator = vgt_draw_initiator; rdraw.indices = NULL; diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 4c9d5609c06..048d0b61e3b 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -292,7 +292,7 @@ static boolean permit_hardware_blit(struct pipe_screen *screen, bind = PIPE_BIND_RENDER_TARGET; /* hackaround for S3TC */ - if (util_format_is_s3tc(res->format)) + if (util_format_is_compressed(res->format)) return TRUE; if (!screen->is_format_supported(screen, @@ -433,7 +433,7 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, } if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) && - util_format_is_s3tc(templ->format)) + util_format_is_compressed(templ->format)) array_mode = V_038000_ARRAY_1D_TILED_THIN1; return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode, @@ -887,12 +887,14 @@ uint32_t r600_translate_texformat(enum pipe_format format, goto out_unknown; switch (format) { - case PIPE_FORMAT_RGTC1_UNORM: case PIPE_FORMAT_RGTC1_SNORM: + word4 |= sign_bit[0]; + case PIPE_FORMAT_RGTC1_UNORM: result = FMT_BC4; goto out_word4; - case PIPE_FORMAT_RGTC2_UNORM: case PIPE_FORMAT_RGTC2_SNORM: + word4 |= sign_bit[0] | sign_bit[1]; + case PIPE_FORMAT_RGTC2_UNORM: result = FMT_BC5; goto out_word4; default: diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index a06817c5735..603e1de7982 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -249,6 +249,11 @@ softpipe_is_format_supported( struct pipe_screen *screen, return util_format_s3tc_enabled; } + /* u_format doesn't implement RGTC yet */ + if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { + return FALSE; + } + /* * Everything else should be supported by u_format. */ diff --git a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c index afeab5eef42..7cc5af89639 100644 --- a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c +++ b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c @@ -5,6 +5,7 @@ #include "i915_drm.h" #include "i915/i915_debug.h" #include <xf86drm.h> +#include <stdio.h> #define BATCH_RESERVED 16 @@ -71,6 +72,26 @@ i915_drm_batchbuffer_create(struct i915_winsys *iws) return &batch->base; } +static boolean +i915_drm_batchbuffer_validate_buffers(struct i915_winsys_batchbuffer *batch, + struct i915_winsys_buffer **buffer, + int num_of_buffers) +{ + struct i915_drm_batchbuffer *drm_batch = i915_drm_batchbuffer(batch); + drm_intel_bo *bos[num_of_buffers + 1]; + int i, ret; + + bos[0] = drm_batch->bo; + for (i = 0; i < num_of_buffers; i++) + bos[i+1] = intel_bo(buffer[i]); + + ret = drm_intel_bufmgr_check_aperture_space(bos, num_of_buffers); + if (ret != 0) + return FALSE; + + return TRUE; +} + static int i915_drm_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch, struct i915_winsys_buffer *buffer, @@ -169,6 +190,14 @@ i915_drm_batchbuffer_flush(struct i915_winsys_batchbuffer *ibatch, assert(ret == 0); } + if (i915_drm_winsys(ibatch->iws)->dump_raw_file) { + FILE *file = fopen(i915_drm_winsys(ibatch->iws)->dump_raw_file, "a"); + if (file) { + fwrite(batch->base.map, used, 1, file); + fclose(file); + } + } + #ifdef INTEL_RUN_SYNC drm_intel_bo_wait_rendering(batch->bo); #endif @@ -202,6 +231,7 @@ i915_drm_batchbuffer_destroy(struct i915_winsys_batchbuffer *ibatch) void i915_drm_winsys_init_batchbuffer_functions(struct i915_drm_winsys *idws) { idws->base.batchbuffer_create = i915_drm_batchbuffer_create; + idws->base.validate_buffers = i915_drm_batchbuffer_validate_buffers; idws->base.batchbuffer_reloc = i915_drm_batchbuffer_reloc; idws->base.batchbuffer_flush = i915_drm_batchbuffer_flush; idws->base.batchbuffer_destroy = i915_drm_batchbuffer_destroy; diff --git a/src/gallium/winsys/i915/drm/i915_drm_winsys.c b/src/gallium/winsys/i915/drm/i915_drm_winsys.c index 2288b48b2bd..2c3b508d056 100644 --- a/src/gallium/winsys/i915/drm/i915_drm_winsys.c +++ b/src/gallium/winsys/i915/drm/i915_drm_winsys.c @@ -72,6 +72,7 @@ i915_drm_winsys_create(int drmFD) drm_intel_bufmgr_gem_enable_fenced_relocs(idws->gem_manager); idws->dump_cmd = debug_get_bool_option("I915_DUMP_CMD", FALSE); + idws->dump_raw_file = debug_get_option("I915_DUMP_RAW_FILE", NULL); idws->send_cmd = !debug_get_bool_option("I915_NO_HW", FALSE); return &idws->base; diff --git a/src/gallium/winsys/i915/drm/i915_drm_winsys.h b/src/gallium/winsys/i915/drm/i915_drm_winsys.h index 0d74d0270c7..dae53c3e801 100644 --- a/src/gallium/winsys/i915/drm/i915_drm_winsys.h +++ b/src/gallium/winsys/i915/drm/i915_drm_winsys.h @@ -18,6 +18,7 @@ struct i915_drm_winsys struct i915_winsys base; boolean dump_cmd; + char *dump_raw_file; boolean send_cmd; int fd; /**< Drm file discriptor */ diff --git a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c index 8085591c8eb..3d0c1fa6224 100644 --- a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c +++ b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c @@ -58,6 +58,14 @@ i915_sw_batchbuffer_create(struct i915_winsys *iws) return &batch->base; } +static boolean +i915_sw_batchbuffer_validate_buffers(struct i915_winsys_batchbuffer *batch, + struct i915_winsys_buffer **buffer, + int num_of_buffers) +{ + return TRUE; +} + static int i915_sw_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch, struct i915_winsys_buffer *buffer, @@ -107,16 +115,16 @@ i915_sw_batchbuffer_flush(struct i915_winsys_batchbuffer *ibatch, #ifdef INTEL_ALWAYS_FLUSH /* MI_FLUSH | FLUSH_MAP_CACHE */ - i915_winsys_batchbuffer_dword(ibatch, (0x4<<23)|(1<<0)); + i915_winsys_batchbuffer_dword_unchecked(ibatch, (0x4<<23)|(1<<0)); used += 4; #endif if ((used & 4) == 0) { /* MI_NOOP */ - i915_winsys_batchbuffer_dword(ibatch, 0); + i915_winsys_batchbuffer_dword_unchecked(ibatch, 0); } /* MI_BATCH_BUFFER_END */ - i915_winsys_batchbuffer_dword(ibatch, (0xA<<23)); + i915_winsys_batchbuffer_dword_unchecked(ibatch, (0xA<<23)); used = batch->base.ptr - batch->base.map; assert((used & 4) == 0); @@ -146,6 +154,7 @@ i915_sw_batchbuffer_destroy(struct i915_winsys_batchbuffer *ibatch) void i915_sw_winsys_init_batchbuffer_functions(struct i915_sw_winsys *isws) { isws->base.batchbuffer_create = i915_sw_batchbuffer_create; + isws->base.validate_buffers = i915_sw_batchbuffer_validate_buffers; isws->base.batchbuffer_reloc = i915_sw_batchbuffer_reloc; isws->base.batchbuffer_flush = i915_sw_batchbuffer_flush; isws->base.batchbuffer_destroy = i915_sw_batchbuffer_destroy; diff --git a/src/gallium/winsys/i915/sw/i915_sw_winsys.c b/src/gallium/winsys/i915/sw/i915_sw_winsys.c index 058ddc44aaf..fc48da6fb92 100644 --- a/src/gallium/winsys/i915/sw/i915_sw_winsys.c +++ b/src/gallium/winsys/i915/sw/i915_sw_winsys.c @@ -50,7 +50,7 @@ i915_sw_winsys_create() isws->base.pci_id = deviceID; isws->max_batch_size = 16 * 4096; - isws->dump_cmd = debug_get_bool_option("INTEL_DUMP_CMD", FALSE); + isws->dump_cmd = debug_get_bool_option("I915_DUMP_CMD", FALSE); return &isws->base; } diff --git a/src/gallium/winsys/sw/xlib/SConscript b/src/gallium/winsys/sw/xlib/SConscript index f6c47411831..df01a9ec2bf 100644 --- a/src/gallium/winsys/sw/xlib/SConscript +++ b/src/gallium/winsys/sw/xlib/SConscript @@ -4,7 +4,7 @@ Import('*') -if env['platform'] in ('cygwin_nt-5.1', 'cygwin_nt-6.1', 'linux'): +if env['platform'] in ('cygwin', 'linux'): env = env.Clone() diff --git a/src/glsl/Makefile b/src/glsl/Makefile index 876f0dfc2a5..df031d2d548 100644 --- a/src/glsl/Makefile +++ b/src/glsl/Makefile @@ -208,6 +208,6 @@ builtin_compiler: $(GLSL2_OBJECTS) $(OBJECTS) builtin_stubs.o builtin_function.cpp: builtins/profiles/* builtins/ir/* builtins/tools/generate_builtins.py builtins/tools/texture_builtins.py builtin_compiler @echo Regenerating builtin_function.cpp... - $(PYTHON2) $(PYTHON_FLAGS) builtins/tools/generate_builtins.py ./builtin_compiler > builtin_function.cpp + $(PYTHON2) $(PYTHON_FLAGS) builtins/tools/generate_builtins.py ./builtin_compiler > builtin_function.cpp || rm -f builtin_function.cpp -include depend diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index bef099cca3b..fd1f0b49f42 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -3445,11 +3445,9 @@ ast_struct_specifier::hir(exec_list *instructions, if (!state->symbols->add_type(name, t)) { _mesa_glsl_error(& loc, state, "struct `%s' previously defined", name); } else { - - const glsl_type **s = (const glsl_type **) - realloc(state->user_structures, - sizeof(state->user_structures[0]) * - (state->num_user_structures + 1)); + const glsl_type **s = reralloc(state, state->user_structures, + const glsl_type *, + state->num_user_structures + 1); if (s != NULL) { s[state->num_user_structures] = t; state->user_structures = s; diff --git a/src/glsl/builtin_types.h b/src/glsl/builtin_types.h index 8ccbf6e312f..58b9a81273a 100644 --- a/src/glsl/builtin_types.h +++ b/src/glsl/builtin_types.h @@ -27,6 +27,10 @@ const glsl_type glsl_type::_error_type = const glsl_type glsl_type::_void_type = glsl_type(GL_INVALID_ENUM, GLSL_TYPE_VOID, 0, 0, "void"); +const glsl_type glsl_type::_sampler3D_type = + glsl_type(GL_SAMPLER_3D, GLSL_SAMPLER_DIM_3D, 0, 0, GLSL_TYPE_FLOAT, + "sampler3D"); + const glsl_type *const glsl_type::error_type = & glsl_type::_error_type; const glsl_type *const glsl_type::void_type = & glsl_type::_void_type; @@ -181,8 +185,6 @@ const glsl_type glsl_type::builtin_110_types[] = { "sampler1DShadow"), glsl_type(GL_SAMPLER_2D_SHADOW, GLSL_SAMPLER_DIM_2D, 1, 0, GLSL_TYPE_FLOAT, "sampler2DShadow"), - glsl_type(GL_SAMPLER_3D, GLSL_SAMPLER_DIM_3D, 0, 0, GLSL_TYPE_FLOAT, - "sampler3D"), }; /*@}*/ diff --git a/src/glsl/builtins/profiles/130.frag b/src/glsl/builtins/profiles/130.frag index 43653a906f8..0e3c7ac4199 100644 --- a/src/glsl/builtins/profiles/130.frag +++ b/src/glsl/builtins/profiles/130.frag @@ -491,8 +491,8 @@ ivec2 textureSize( sampler1DArray sampler, int lod); ivec2 textureSize(isampler1DArray sampler, int lod); ivec2 textureSize(usampler1DArray sampler, int lod); ivec3 textureSize( sampler2DArray sampler, int lod); -ivec2 textureSize(isampler2DArray sampler, int lod); -ivec2 textureSize(usampler2DArray sampler, int lod); +ivec3 textureSize(isampler2DArray sampler, int lod); +ivec3 textureSize(usampler2DArray sampler, int lod); ivec2 textureSize(sampler1DArrayShadow sampler, int lod); ivec3 textureSize(sampler2DArrayShadow sampler, int lod); diff --git a/src/glsl/builtins/profiles/130.vert b/src/glsl/builtins/profiles/130.vert index 742dec6e6d5..f85b27f8f8c 100644 --- a/src/glsl/builtins/profiles/130.vert +++ b/src/glsl/builtins/profiles/130.vert @@ -493,8 +493,8 @@ ivec2 textureSize( sampler1DArray sampler, int lod); ivec2 textureSize(isampler1DArray sampler, int lod); ivec2 textureSize(usampler1DArray sampler, int lod); ivec3 textureSize( sampler2DArray sampler, int lod); -ivec2 textureSize(isampler2DArray sampler, int lod); -ivec2 textureSize(usampler2DArray sampler, int lod); +ivec3 textureSize(isampler2DArray sampler, int lod); +ivec3 textureSize(usampler2DArray sampler, int lod); ivec2 textureSize(sampler1DArrayShadow sampler, int lod); ivec3 textureSize(sampler2DArrayShadow sampler, int lod); diff --git a/src/glsl/builtins/profiles/OES_texture_3D.frag b/src/glsl/builtins/profiles/OES_texture_3D.frag new file mode 100644 index 00000000000..b6ebd6a311f --- /dev/null +++ b/src/glsl/builtins/profiles/OES_texture_3D.frag @@ -0,0 +1,7 @@ +#version 100 +#extension GL_OES_texture_3D : enable + +vec4 texture3D (sampler3D sampler, vec3 coord); +vec4 texture3DProj (sampler3D sampler, vec4 coord); +vec4 texture3D (sampler3D sampler, vec3 coord, float bias); +vec4 texture3DProj (sampler3D sampler, vec4 coord, float bias); diff --git a/src/glsl/builtins/profiles/OES_texture_3D.vert b/src/glsl/builtins/profiles/OES_texture_3D.vert new file mode 100644 index 00000000000..81d12f51e9f --- /dev/null +++ b/src/glsl/builtins/profiles/OES_texture_3D.vert @@ -0,0 +1,7 @@ +#version 100 +#extension GL_OES_texture_3D : enable + +vec4 texture3D (sampler3D sampler, vec3 coord); +vec4 texture3DProj (sampler3D sampler, vec4 coord); +vec4 texture3DLod (sampler3D sampler, vec3 coord, float lod); +vec4 texture3DProjLod (sampler3D sampler, vec4 coord, float lod); diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index d7a37aef46d..e8c60936fb6 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -256,6 +256,11 @@ _mesa_glsl_process_extension(const char *name, YYLTYPE *name_locp, state->AMD_conservative_depth_enable = (ext_mode != extension_disable); state->AMD_conservative_depth_warn = (ext_mode == extension_warn); unsupported = !state->extensions->AMD_conservative_depth; + } else if (strcmp(name, "GL_OES_texture_3D") == 0 && state->es_shader) { + state->OES_texture_3D_enable = (ext_mode != extension_disable); + state->OES_texture_3D_warn = (ext_mode == extension_warn); + + unsupported = !state->extensions->EXT_texture3D; } else { unsupported = true; } diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h index 10cb673c694..b5c016fb399 100644 --- a/src/glsl/glsl_parser_extras.h +++ b/src/glsl/glsl_parser_extras.h @@ -172,6 +172,8 @@ struct _mesa_glsl_parse_state { unsigned ARB_shader_stencil_export_warn:1; unsigned AMD_conservative_depth_enable:1; unsigned AMD_conservative_depth_warn:1; + unsigned OES_texture_3D_enable:1; + unsigned OES_texture_3D_warn:1; /*@}*/ /** Extensions supported by the OpenGL implementation. */ diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp index 76b4f3e4cb0..78d10bd9380 100644 --- a/src/glsl/glsl_types.cpp +++ b/src/glsl/glsl_types.cpp @@ -131,6 +131,7 @@ glsl_type::generate_110_types(glsl_symbol_table *symtab) add_types_to_symbol_table(symtab, builtin_110_types, Elements(builtin_110_types), false); + add_types_to_symbol_table(symtab, &_sampler3D_type, 1, false); add_types_to_symbol_table(symtab, builtin_110_deprecated_structure_types, Elements(builtin_110_deprecated_structure_types), false); @@ -179,6 +180,13 @@ glsl_type::generate_EXT_texture_array_types(glsl_symbol_table *symtab, void +glsl_type::generate_OES_texture_3D_types(glsl_symbol_table *symtab, bool warn) +{ + add_types_to_symbol_table(symtab, &_sampler3D_type, 1, warn); +} + + +void _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state) { switch (state->language_version) { @@ -204,6 +212,10 @@ _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state) glsl_type::generate_ARB_texture_rectangle_types(state->symbols, state->ARB_texture_rectangle_warn); } + if (state->OES_texture_3D_enable && state->language_version == 100) { + glsl_type::generate_OES_texture_3D_types(state->symbols, + state->OES_texture_3D_warn); + } if (state->EXT_texture_array_enable && state->language_version < 130) { // These are already included in 130; don't create twice. diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h index 61bf5e0cfd2..3c2672c01a0 100644 --- a/src/glsl/glsl_types.h +++ b/src/glsl/glsl_types.h @@ -427,6 +427,7 @@ private: /*@{*/ static const glsl_type _error_type; static const glsl_type _void_type; + static const glsl_type _sampler3D_type; static const glsl_type builtin_core_types[]; static const glsl_type builtin_structure_types[]; static const glsl_type builtin_110_deprecated_structure_types[]; @@ -453,6 +454,7 @@ private: static void generate_130_types(glsl_symbol_table *); static void generate_ARB_texture_rectangle_types(glsl_symbol_table *, bool); static void generate_EXT_texture_array_types(glsl_symbol_table *, bool); + static void generate_OES_texture_3D_types(glsl_symbol_table *, bool); /*@}*/ /** diff --git a/src/glx/dri2_glx.c b/src/glx/dri2_glx.c index a275ba5b9fe..2c28bc27150 100644 --- a/src/glx/dri2_glx.c +++ b/src/glx/dri2_glx.c @@ -535,8 +535,13 @@ dri2SwapBuffers(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor, CARD64 ret = 0; #ifdef __DRI2_FLUSH - if (psc->f) - (*psc->f->flush)(priv->driDrawable); + if (psc->f) { + struct glx_context *gc = __glXGetCurrentContext(); + + if (gc) { + (*psc->f->flush)(priv->driDrawable); + } + } #endif /* Old servers don't send invalidate events */ diff --git a/src/glx/glxclient.h b/src/glx/glxclient.h index fdcef8075a8..2b6966f2e08 100644 --- a/src/glx/glxclient.h +++ b/src/glx/glxclient.h @@ -419,9 +419,9 @@ struct glx_context /*@} */ /** - * Thread ID we're currently current in. Zero if none. + * Number of threads we're currently current in. */ - unsigned long thread_id; + unsigned long thread_refcount; char gl_extension_bits[__GL_EXT_BYTES]; }; diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c index 80eaf72b7d5..22bebab26bc 100644 --- a/src/glx/glxcmds.c +++ b/src/glx/glxcmds.c @@ -727,11 +727,16 @@ glXSwapBuffers(Display * dpy, GLXDrawable drawable) xGLXSwapBuffersReq *req; #endif + gc = __glXGetCurrentContext(); + #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) __GLXDRIdrawable *pdraw = GetGLXDRIDrawable(dpy, drawable); if (pdraw != NULL) { - glFlush(); + if (gc && drawable == gc->currentDrawable) { + glFlush(); + } + (*pdraw->psc->driScreen->swapBuffers)(pdraw, 0, 0, 0); return; } @@ -746,7 +751,6 @@ glXSwapBuffers(Display * dpy, GLXDrawable drawable) ** The calling thread may or may not have a current context. If it ** does, send the context tag so the server can do a flush. */ - gc = __glXGetCurrentContext(); if ((gc != NULL) && (dpy == gc->currentDpy) && ((drawable == gc->currentDrawable) || (drawable == gc->currentReadable))) { diff --git a/src/glx/glxcurrent.c b/src/glx/glxcurrent.c index 36317383544..9a6499037b1 100644 --- a/src/glx/glxcurrent.c +++ b/src/glx/glxcurrent.c @@ -216,6 +216,16 @@ MakeContextCurrent(Display * dpy, GLXDrawable draw, struct glx_context *oldGC = __glXGetCurrentContext(); int ret = Success; + /* XXX: If this is left out, then libGL ends up not having this + * symbol, and drivers using it fail to load. Compare the + * implementation of this symbol to _glapi_noop_enable_warnings(), + * though, which gets into the library despite no callers, the same + * prototypes, and the same compile flags to the files containing + * them. Moving the definition to glapi_nop.c gets it into the + * library, though. + */ + (void)_glthread_GetID(); + /* Make sure that the new context has a nonzero ID. In the request, * a zero context ID is used only to mean that we bind to no current * context. @@ -236,41 +246,42 @@ MakeContextCurrent(Display * dpy, GLXDrawable draw, _glapi_check_multithread(); - if (gc != NULL && gc->thread_id != 0 && gc->thread_id != _glthread_GetID()) { - __glXGenerateError(dpy, gc, gc->xid, - BadAccess, X_GLXMakeContextCurrent); - return False; - } - + __glXLock(); if (oldGC == gc && - gc->currentDrawable == draw && gc->currentReadable == read) + gc->currentDrawable == draw && gc->currentReadable == read) { + __glXUnlock(); return True; + } if (oldGC != &dummyContext) { - oldGC->vtable->unbind(oldGC, gc); - oldGC->currentDpy = 0; - oldGC->currentDrawable = None; - oldGC->currentReadable = None; - oldGC->thread_id = 0; + if (--oldGC->thread_refcount == 0) { + oldGC->vtable->unbind(oldGC, gc); + oldGC->currentDpy = 0; + oldGC->currentDrawable = None; + oldGC->currentReadable = None; + + if (oldGC->xid == None && oldGC != gc) { + /* We are switching away from a context that was + * previously destroyed, so we need to free the memory + * for the old handle. */ + oldGC->vtable->destroy(oldGC); + } + } } if (gc) { - gc->currentDpy = dpy; - gc->currentDrawable = draw; - gc->currentReadable = read; - gc->thread_id = _glthread_GetID(); + if (gc->thread_refcount++ == 0) { + gc->currentDpy = dpy; + gc->currentDrawable = draw; + gc->currentReadable = read; + } __glXSetCurrentContext(gc); ret = gc->vtable->bind(gc, oldGC, draw, read); } else { __glXSetCurrentContextNull(); } - if (oldGC != &dummyContext && oldGC->xid == None && oldGC != gc) { - /* We are switching away from a context that was - * previously destroyed, so we need to free the memory - * for the old handle. */ - oldGC->vtable->destroy(oldGC); - } + __glXUnlock(); if (ret) { __glXGenerateError(dpy, gc, None, ret, X_GLXMakeContextCurrent); diff --git a/src/glx/glxextensions.c b/src/glx/glxextensions.c index 3a0e64c46d1..ffd466479b4 100644 --- a/src/glx/glxextensions.c +++ b/src/glx/glxextensions.c @@ -90,6 +90,7 @@ static const struct extension_info known_glx_extensions[] = { { GLX(MESA_agp_offset), VER(0,0), N, N, N, Y }, /* Deprecated */ { GLX(MESA_copy_sub_buffer), VER(0,0), Y, N, N, N }, #endif + { GLX(MESA_multithread_makecurrent),VER(0,0), Y, N, Y, N }, { GLX(MESA_pixmap_colormap), VER(0,0), N, N, N, N }, /* Deprecated */ { GLX(MESA_release_buffers), VER(0,0), N, N, N, N }, /* Deprecated */ #ifdef GLX_USE_APPLEGL diff --git a/src/glx/glxextensions.h b/src/glx/glxextensions.h index 78776618338..333b3f9adbd 100644 --- a/src/glx/glxextensions.h +++ b/src/glx/glxextensions.h @@ -43,6 +43,7 @@ enum MESA_agp_offset_bit, MESA_copy_sub_buffer_bit, MESA_depth_float_bit, + MESA_multithread_makecurrent_bit, MESA_pixmap_colormap_bit, MESA_release_buffers_bit, MESA_swap_control_bit, diff --git a/src/mesa/SConscript b/src/mesa/SConscript index ea04fb1a0ee..90fec124af9 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -106,6 +106,7 @@ main_sources = [ 'main/stencil.c', 'main/syncobj.c', 'main/texcompress.c', + 'main/texcompress_rgtc.c', 'main/texcompress_s3tc.c', 'main/texcompress_fxt1.c', 'main/texenv.c', diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index a413c02b573..5496b4fdd3b 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -679,6 +679,8 @@ #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 #define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 #define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1 #define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 #define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2 #define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 30e3bd54469..9bdcda780ef 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -213,6 +213,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) return 2; case FS_OPCODE_TEX: case FS_OPCODE_TXB: + case FS_OPCODE_TXD: case FS_OPCODE_TXL: return 1; case FS_OPCODE_FB_WRITE: @@ -1200,6 +1201,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) } /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ mlen += 3; + } else if (ir->op == ir_txd) { + assert(!"TXD isn't supported on gen4 yet."); } else { /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod * instructions. We'll need to do SIMD16 here. @@ -1253,6 +1256,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) inst = emit(fs_inst(FS_OPCODE_TXL, dst)); break; case ir_txd: + inst = emit(fs_inst(FS_OPCODE_TXD, dst)); + break; case ir_txf: assert(!"GLSL 1.30 features unsupported"); break; @@ -2308,6 +2313,16 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; } break; + case FS_OPCODE_TXL: + if (inst->shadow_compare) { + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE_GEN5; + } else { + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_LOD_GEN5; + } + break; + case FS_OPCODE_TXD: + assert(!"TXD isn't supported on gen5+ yet."); + break; } } else { switch (inst->opcode) { @@ -2325,13 +2340,26 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) case FS_OPCODE_TXB: if (inst->shadow_compare) { assert(inst->mlen == 6); - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE; } else { assert(inst->mlen == 9); msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; } break; + case FS_OPCODE_TXL: + if (inst->shadow_compare) { + assert(inst->mlen == 6); + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE; + } else { + assert(inst->mlen == 9); + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD; + simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; + } + break; + case FS_OPCODE_TXD: + assert(!"TXD isn't supported on gen4 yet."); + break; } } assert(msg_type != -1); @@ -3607,6 +3635,7 @@ fs_visitor::generate_code() break; case FS_OPCODE_TEX: case FS_OPCODE_TXB: + case FS_OPCODE_TXD: case FS_OPCODE_TXL: generate_tex(inst, dst, src[0]); break; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 8352760acf7..dc030ae5b50 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -71,6 +71,7 @@ enum fs_opcodes { FS_OPCODE_LINTERP, FS_OPCODE_TEX, FS_OPCODE_TXB, + FS_OPCODE_TXD, FS_OPCODE_TXL, FS_OPCODE_DISCARD_NOT, FS_OPCODE_DISCARD_AND, @@ -309,6 +310,7 @@ public: { return (opcode == FS_OPCODE_TEX || opcode == FS_OPCODE_TXB || + opcode == FS_OPCODE_TXD || opcode == FS_OPCODE_TXL); } diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index 38c98f30efb..d6c1f1c893d 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -68,7 +68,7 @@ upload_clip_state(struct brw_context *brw) depth_clamp | provoking); OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT | - U_FIXED(225.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT | + U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT | GEN6_CLIP_FORCE_ZERO_RTAINDEX); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 356d5f72d89..746da462ee2 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -104,7 +104,8 @@ static const __DRItexBufferExtension intelTexBufferExtension = { static void intelDRI2Flush(__DRIdrawable *drawable) { - struct intel_context *intel = drawable->driContextPriv->driverPrivate; + GET_CURRENT_CONTEXT(ctx); + struct intel_context *intel = intel_context(ctx); if (intel->gen < 4) INTEL_FIREVERTICES(intel); diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index 7504b8a85db..b8bb2555acd 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -241,8 +241,7 @@ static const struct extension extension_table[] = { { "GL_OES_stencil4", o(dummy_false), DISABLE }, { "GL_OES_stencil8", o(EXT_framebuffer_object), ES1 | ES2 }, { "GL_OES_stencil_wrap", o(EXT_stencil_wrap), ES1 }, - /* GL_OES_texture_3D is disabled due to missing GLSL support. */ - { "GL_OES_texture_3D", o(EXT_texture3D), DISABLE }, + { "GL_OES_texture_3D", o(EXT_texture3D), ES2 }, { "GL_OES_texture_cube_map", o(ARB_texture_cube_map), ES1 }, { "GL_OES_texture_env_crossbar", o(ARB_texture_env_crossbar), ES1 }, { "GL_OES_texture_mirrored_repeat", o(ARB_texture_mirrored_repeat), ES1 }, @@ -428,6 +427,7 @@ _mesa_enable_sw_extensions(struct gl_context *ctx) ctx->Extensions.ARB_texture_mirrored_repeat = GL_TRUE; ctx->Extensions.ARB_texture_non_power_of_two = GL_TRUE; ctx->Extensions.ARB_texture_rg = GL_TRUE; + ctx->Extensions.ARB_texture_compression_rgtc = GL_TRUE; ctx->Extensions.ARB_vertex_array_object = GL_TRUE; #if FEATURE_ARB_vertex_program ctx->Extensions.ARB_vertex_program = GL_TRUE; diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c index 1e395363475..947db84a69e 100644 --- a/src/mesa/main/formats.c +++ b/src/mesa/main/formats.c @@ -890,7 +890,43 @@ static struct gl_format_info format_info[MESA_FORMAT_COUNT] = 16, 16, 16, 16, 0, 0, 0, 0, 0, 1, 1, 8 - } + }, + { + MESA_FORMAT_RED_RGTC1, + "MESA_FORMAT_RED_RGTC1", + GL_RED, + GL_UNSIGNED_NORMALIZED, + 4, 0, 0, 0, + 0, 0, 0, 0, 0, + 4, 4, 8 /* 8 bytes per 4x4 block */ + }, + { + MESA_FORMAT_SIGNED_RED_RGTC1, + "MESA_FORMAT_SIGNED_RED_RGTC1", + GL_RED, + GL_SIGNED_NORMALIZED, + 4, 0, 0, 0, + 0, 0, 0, 0, 0, + 4, 4, 8 /* 8 bytes per 4x4 block */ + }, + { + MESA_FORMAT_RG_RGTC2, + "MESA_FORMAT_RG_RGTC2", + GL_RG, + GL_UNSIGNED_NORMALIZED, + 4, 4, 0, 0, + 0, 0, 0, 0, 0, + 4, 4, 16 /* 16 bytes per 4x4 block */ + }, + { + MESA_FORMAT_SIGNED_RG_RGTC2, + "MESA_FORMAT_SIGNED_RG_RGTC2", + GL_RG, + GL_SIGNED_NORMALIZED, + 4, 4, 0, 0, + 0, 0, 0, 0, 0, + 4, 4, 16 /* 16 bytes per 4x4 block */ + }, }; @@ -1530,6 +1566,10 @@ _mesa_format_to_type_and_comps(gl_format format, case MESA_FORMAT_SRGBA_DXT5: #endif #endif + case MESA_FORMAT_RED_RGTC1: + case MESA_FORMAT_SIGNED_RED_RGTC1: + case MESA_FORMAT_RG_RGTC2: + case MESA_FORMAT_SIGNED_RG_RGTC2: /* XXX generate error instead? */ *datatype = GL_UNSIGNED_BYTE; *comps = 0; diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h index 9a5cef37788..e21967e2b0c 100644 --- a/src/mesa/main/formats.h +++ b/src/mesa/main/formats.h @@ -179,6 +179,12 @@ typedef enum MESA_FORMAT_RGBA_16, /* ... */ /*@}*/ + /*@{*/ + MESA_FORMAT_RED_RGTC1, + MESA_FORMAT_SIGNED_RED_RGTC1, + MESA_FORMAT_RG_RGTC2, + MESA_FORMAT_SIGNED_RG_RGTC2, + /*@}*/ MESA_FORMAT_COUNT } gl_format; diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c index 7a0b522a2d8..82d02ed0ecf 100644 --- a/src/mesa/main/texcompress.c +++ b/src/mesa/main/texcompress.c @@ -64,6 +64,7 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean a n += 2; } } + /* don't return RGTC - ARB_texture_compression_rgtc query 19 */ if (ctx->Extensions.EXT_texture_compression_s3tc) { if (formats) { formats[n++] = GL_COMPRESSED_RGB_S3TC_DXT1_EXT; @@ -163,6 +164,15 @@ _mesa_glenum_to_compressed_format(GLenum format) case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: return MESA_FORMAT_SRGBA_DXT5; + case GL_COMPRESSED_RED_RGTC1: + return MESA_FORMAT_RED_RGTC1; + case GL_COMPRESSED_SIGNED_RED_RGTC1: + return MESA_FORMAT_SIGNED_RED_RGTC1; + case GL_COMPRESSED_RG_RGTC2: + return MESA_FORMAT_RG_RGTC2; + case GL_COMPRESSED_SIGNED_RG_RGTC2: + return MESA_FORMAT_SIGNED_RG_RGTC2; + default: return MESA_FORMAT_NONE; } @@ -209,6 +219,16 @@ _mesa_compressed_format_to_glenum(struct gl_context *ctx, GLuint mesaFormat) return GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT; #endif #endif + + case MESA_FORMAT_RED_RGTC1: + return GL_COMPRESSED_RED_RGTC1; + case MESA_FORMAT_SIGNED_RED_RGTC1: + return GL_COMPRESSED_SIGNED_RED_RGTC1; + case MESA_FORMAT_RG_RGTC2: + return GL_COMPRESSED_RG_RGTC2; + case MESA_FORMAT_SIGNED_RG_RGTC2: + return GL_COMPRESSED_SIGNED_RG_RGTC2; + default: _mesa_problem(ctx, "Unexpected mesa texture format in" " _mesa_compressed_format_to_glenum()"); diff --git a/src/mesa/main/texcompress_rgtc.c b/src/mesa/main/texcompress_rgtc.c new file mode 100644 index 00000000000..1a01755f14d --- /dev/null +++ b/src/mesa/main/texcompress_rgtc.c @@ -0,0 +1,1122 @@ +/* + * Copyright (C) 2011 Red Hat Inc. + * + * block compression parts are: + * Copyright (C) 2004 Roland Scheidegger All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: + * Dave Airlie + */ + +/** + * \file texcompress_rgtc.c + * GL_EXT_texture_compression_rgtc support. + */ + + +#include "glheader.h" +#include "imports.h" +#include "colormac.h" +#include "image.h" +#include "macros.h" +#include "mfeatures.h" +#include "mipmap.h" +#include "texcompress.h" +#include "texcompress_rgtc.h" +#include "texstore.h" + +#define RGTC_DEBUG 0 + +static void encode_rgtc_chan_u(GLubyte *blkaddr, GLubyte srccolors[4][4], + GLint numxpixels, GLint numypixels); +static void encode_rgtc_chan_s(GLbyte *blkaddr, GLbyte srccolors[4][4], + GLint numxpixels, GLint numypixels); + +static void extractsrc_u( GLubyte srcpixels[4][4], const GLchan *srcaddr, + GLint srcRowStride, GLint numxpixels, GLint numypixels, GLint comps) +{ + GLubyte i, j; + const GLchan *curaddr; + for (j = 0; j < numypixels; j++) { + curaddr = srcaddr + j * srcRowStride * comps; + for (i = 0; i < numxpixels; i++) { + srcpixels[j][i] = *curaddr / (CHAN_MAX / 255); + curaddr += comps; + } + } +} + +static void extractsrc_s( GLbyte srcpixels[4][4], const GLfloat *srcaddr, + GLint srcRowStride, GLint numxpixels, GLint numypixels, GLint comps) +{ + GLubyte i, j; + const GLfloat *curaddr; + for (j = 0; j < numypixels; j++) { + curaddr = srcaddr + j * srcRowStride * comps; + for (i = 0; i < numxpixels; i++) { + srcpixels[j][i] = FLOAT_TO_BYTE_TEX(*curaddr); + curaddr += comps; + } + } +} + + +GLboolean +_mesa_texstore_red_rgtc1(TEXSTORE_PARAMS) +{ + GLubyte *dst; + const GLint texWidth = dstRowStride * 4 / 8; /* a bit of a hack */ + const GLchan *tempImage = NULL; + int i, j; + int numxpixels, numypixels; + const GLchan *srcaddr; + GLubyte srcpixels[4][4]; + GLubyte *blkaddr; + GLint dstRowDiff; + ASSERT(dstFormat == MESA_FORMAT_RED_RGTC1); + ASSERT(dstXoffset % 4 == 0); + ASSERT(dstYoffset % 4 == 0); + ASSERT(dstZoffset % 4 == 0); + (void) dstZoffset; + (void) dstImageOffsets; + + + tempImage = _mesa_make_temp_chan_image(ctx, dims, + baseInternalFormat, + _mesa_get_format_base_format(dstFormat), + srcWidth, srcHeight, srcDepth, + srcFormat, srcType, srcAddr, + srcPacking); + if (!tempImage) + return GL_FALSE; /* out of memory */ + + dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0, + dstFormat, + texWidth, (GLubyte *) dstAddr); + + blkaddr = dst; + dstRowDiff = dstRowStride >= (srcWidth * 4) ? dstRowStride - (((srcWidth + 3) & ~3) * 4) : 0; + for (j = 0; j < srcHeight; j+=4) { + if (srcHeight > j + 3) numypixels = 4; + else numypixels = srcHeight - j; + srcaddr = tempImage + j * srcWidth; + for (i = 0; i < srcWidth; i += 4) { + if (srcWidth > i + 3) numxpixels = 4; + else numxpixels = srcWidth - i; + extractsrc_u(srcpixels, srcaddr, srcWidth, numxpixels, numypixels, 1); + encode_rgtc_chan_u(blkaddr, srcpixels, numxpixels, numypixels); + srcaddr += numxpixels; + blkaddr += 8; + } + blkaddr += dstRowDiff; + } + if (tempImage) + free((void *) tempImage); + + return GL_TRUE; +} + +GLboolean +_mesa_texstore_signed_red_rgtc1(TEXSTORE_PARAMS) +{ + GLbyte *dst; + const GLint texWidth = dstRowStride * 4 / 8; /* a bit of a hack */ + const GLfloat *tempImage = NULL; + int i, j; + int numxpixels, numypixels; + const GLfloat *srcaddr; + GLbyte srcpixels[4][4]; + GLbyte *blkaddr; + GLint dstRowDiff; + ASSERT(dstFormat == MESA_FORMAT_SIGNED_RED_RGTC1); + ASSERT(dstXoffset % 4 == 0); + ASSERT(dstYoffset % 4 == 0); + ASSERT(dstZoffset % 4 == 0); + (void) dstZoffset; + (void) dstImageOffsets; + + tempImage = _mesa_make_temp_float_image(ctx, dims, + baseInternalFormat, + _mesa_get_format_base_format(dstFormat), + srcWidth, srcHeight, srcDepth, + srcFormat, srcType, srcAddr, + srcPacking, 0x0); + if (!tempImage) + return GL_FALSE; /* out of memory */ + + dst = (GLbyte *)_mesa_compressed_image_address(dstXoffset, dstYoffset, 0, + dstFormat, + texWidth, (GLubyte *) dstAddr); + + blkaddr = dst; + dstRowDiff = dstRowStride >= (srcWidth * 4) ? dstRowStride - (((srcWidth + 3) & ~3) * 4) : 0; + for (j = 0; j < srcHeight; j+=4) { + if (srcHeight > j + 3) numypixels = 4; + else numypixels = srcHeight - j; + srcaddr = tempImage + j * srcWidth; + for (i = 0; i < srcWidth; i += 4) { + if (srcWidth > i + 3) numxpixels = 4; + else numxpixels = srcWidth - i; + extractsrc_s(srcpixels, srcaddr, srcWidth, numxpixels, numypixels, 1); + encode_rgtc_chan_s(blkaddr, srcpixels, numxpixels, numypixels); + srcaddr += numxpixels; + blkaddr += 8; + } + blkaddr += dstRowDiff; + } + if (tempImage) + free((void *) tempImage); + + return GL_TRUE; +} + +GLboolean +_mesa_texstore_rg_rgtc2(TEXSTORE_PARAMS) +{ + GLubyte *dst; + const GLint texWidth = dstRowStride * 4 / 16; /* a bit of a hack */ + const GLchan *tempImage = NULL; + int i, j; + int numxpixels, numypixels; + const GLchan *srcaddr; + GLubyte srcpixels[4][4]; + GLubyte *blkaddr; + GLint dstRowDiff; + + ASSERT(dstFormat == MESA_FORMAT_RG_RGTC2); + ASSERT(dstXoffset % 4 == 0); + ASSERT(dstYoffset % 4 == 0); + ASSERT(dstZoffset % 4 == 0); + (void) dstZoffset; + (void) dstImageOffsets; + + tempImage = _mesa_make_temp_chan_image(ctx, dims, + baseInternalFormat, + _mesa_get_format_base_format(dstFormat), + srcWidth, srcHeight, srcDepth, + srcFormat, srcType, srcAddr, + srcPacking); + if (!tempImage) + return GL_FALSE; /* out of memory */ + + dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0, + dstFormat, + texWidth, (GLubyte *) dstAddr); + + blkaddr = dst; + dstRowDiff = dstRowStride >= (srcWidth * 8) ? dstRowStride - (((srcWidth + 7) & ~7) * 8) : 0; + for (j = 0; j < srcHeight; j+=4) { + if (srcHeight > j + 3) numypixels = 4; + else numypixels = srcHeight - j; + srcaddr = tempImage + j * srcWidth * 2; + for (i = 0; i < srcWidth; i += 4) { + if (srcWidth > i + 3) numxpixels = 4; + else numxpixels = srcWidth - i; + extractsrc_u(srcpixels, srcaddr, srcWidth, numxpixels, numypixels, 2); + encode_rgtc_chan_u(blkaddr, srcpixels, numxpixels, numypixels); + + blkaddr += 8; + extractsrc_u(srcpixels, (GLchan *)srcaddr + 1, srcWidth, numxpixels, numypixels, 2); + encode_rgtc_chan_u(blkaddr, srcpixels, numxpixels, numypixels); + + blkaddr += 8; + + srcaddr += numxpixels * 2; + } + blkaddr += dstRowDiff; + } + if (tempImage) + free((void *) tempImage); + + return GL_TRUE; +} + +GLboolean +_mesa_texstore_signed_rg_rgtc2(TEXSTORE_PARAMS) +{ + GLbyte *dst; + const GLint texWidth = dstRowStride * 4 / 16; /* a bit of a hack */ + const GLfloat *tempImage = NULL; + int i, j; + int numxpixels, numypixels; + const GLfloat *srcaddr; + GLbyte srcpixels[4][4]; + GLbyte *blkaddr; + GLint dstRowDiff; + + ASSERT(dstFormat == MESA_FORMAT_SIGNED_RG_RGTC2); + ASSERT(dstXoffset % 4 == 0); + ASSERT(dstYoffset % 4 == 0); + ASSERT(dstZoffset % 4 == 0); + (void) dstZoffset; + (void) dstImageOffsets; + + tempImage = _mesa_make_temp_float_image(ctx, dims, + baseInternalFormat, + _mesa_get_format_base_format(dstFormat), + srcWidth, srcHeight, srcDepth, + srcFormat, srcType, srcAddr, + srcPacking, 0x0); + if (!tempImage) + return GL_FALSE; /* out of memory */ + + dst = (GLbyte *)_mesa_compressed_image_address(dstXoffset, dstYoffset, 0, + dstFormat, + texWidth, (GLubyte *) dstAddr); + + blkaddr = dst; + dstRowDiff = dstRowStride >= (srcWidth * 8) ? dstRowStride - (((srcWidth + 7) & ~7) * 8) : 0; + for (j = 0; j < srcHeight; j += 4) { + if (srcHeight > j + 3) numypixels = 4; + else numypixels = srcHeight - j; + srcaddr = tempImage + j * srcWidth * 2; + for (i = 0; i < srcWidth; i += 4) { + if (srcWidth > i + 3) numxpixels = 4; + else numxpixels = srcWidth - i; + + extractsrc_s(srcpixels, srcaddr, srcWidth, numxpixels, numypixels, 2); + encode_rgtc_chan_s(blkaddr, srcpixels, numxpixels, numypixels); + blkaddr += 8; + + extractsrc_s(srcpixels, srcaddr + 1, srcWidth, numxpixels, numypixels, 2); + encode_rgtc_chan_s(blkaddr, srcpixels, numxpixels, numypixels); + blkaddr += 8; + + srcaddr += numxpixels * 2; + + } + blkaddr += dstRowDiff; + } + if (tempImage) + free((void *) tempImage); + + return GL_TRUE; +} + +static void _fetch_texel_rgtc_u(GLint srcRowStride, const GLubyte *pixdata, + GLint i, GLint j, GLchan *value, int comps) +{ + GLchan decode; + const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8 * comps); + const GLubyte alpha0 = blksrc[0]; + const GLubyte alpha1 = blksrc[1]; + const GLubyte bit_pos = ((j&3) * 4 + (i&3)) * 3; + const GLubyte acodelow = blksrc[2 + bit_pos / 8]; + const GLubyte acodehigh = blksrc[3 + bit_pos / 8]; + const GLubyte code = (acodelow >> (bit_pos & 0x7) | + (acodehigh << (8 - (bit_pos & 0x7)))) & 0x7; + + if (code == 0) + decode = UBYTE_TO_CHAN( alpha0 ); + else if (code == 1) + decode = UBYTE_TO_CHAN( alpha1 ); + else if (alpha0 > alpha1) + decode = UBYTE_TO_CHAN( ((alpha0 * (8 - code) + (alpha1 * (code - 1))) / 7) ); + else if (code < 6) + decode = UBYTE_TO_CHAN( ((alpha0 * (6 - code) + (alpha1 * (code - 1))) / 5) ); + else if (code == 6) + decode = 0; + else + decode = CHAN_MAX; + + *value = decode; +} + + +static void _fetch_texel_rgtc_s(GLint srcRowStride, const GLbyte *pixdata, + GLint i, GLint j, GLbyte *value, int comps) +{ + GLbyte decode; + const GLbyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8 * comps); + const GLbyte alpha0 = blksrc[0]; + const GLbyte alpha1 = blksrc[1]; + const GLbyte bit_pos = ((j&3) * 4 + (i&3)) * 3; + const GLbyte acodelow = blksrc[2 + bit_pos / 8]; + const GLbyte acodehigh = blksrc[3 + bit_pos / 8]; + const GLbyte code = (acodelow >> (bit_pos & 0x7) | + (acodehigh << (8 - (bit_pos & 0x7)))) & 0x7; + + if (code == 0) + decode = alpha0; + else if (code == 1) + decode = alpha1; + else if (alpha0 > alpha1) + decode = ((alpha0 * (8 - code) + (alpha1 * (code - 1))) / 7); + else if (code < 6) + decode = ((alpha0 * (6 - code) + (alpha1 * (code - 1))) / 5); + else if (code == 6) + decode = -128; + else + decode = 127; + + *value = decode; +} + +void +_mesa_fetch_texel_2d_f_red_rgtc1(const struct gl_texture_image *texImage, + GLint i, GLint j, GLint k, GLfloat *texel) +{ + GLchan red; + _fetch_texel_rgtc_u(texImage->RowStride, (GLubyte *)(texImage->Data), + i, j, &red, 1); + texel[RCOMP] = CHAN_TO_FLOAT(red); + texel[GCOMP] = 0.0; + texel[BCOMP] = 0.0; + texel[ACOMP] = 1.0; +} + +void +_mesa_fetch_texel_2d_f_signed_red_rgtc1(const struct gl_texture_image *texImage, + GLint i, GLint j, GLint k, GLfloat *texel) +{ + GLbyte red; + _fetch_texel_rgtc_s(texImage->RowStride, (GLbyte *)(texImage->Data), + i, j, &red, 1); + texel[RCOMP] = BYTE_TO_FLOAT_TEX(red); + texel[GCOMP] = 0.0; + texel[BCOMP] = 0.0; + texel[ACOMP] = 1.0; +} + +void +_mesa_fetch_texel_2d_f_rg_rgtc2(const struct gl_texture_image *texImage, + GLint i, GLint j, GLint k, GLfloat *texel) +{ + GLchan red, green; + _fetch_texel_rgtc_u(texImage->RowStride, (GLubyte *)(texImage->Data), + i, j, &red, 2); + _fetch_texel_rgtc_u(texImage->RowStride, (GLubyte *)(texImage->Data) + 8, + i, j, &green, 2); + texel[RCOMP] = CHAN_TO_FLOAT(red); + texel[GCOMP] = CHAN_TO_FLOAT(green); + texel[BCOMP] = 0.0; + texel[ACOMP] = 1.0; +} + +void +_mesa_fetch_texel_2d_f_signed_rg_rgtc2(const struct gl_texture_image *texImage, + GLint i, GLint j, GLint k, GLfloat *texel) +{ + GLbyte red, green; + _fetch_texel_rgtc_s(texImage->RowStride, (GLbyte *)(texImage->Data), + i, j, &red, 2); + _fetch_texel_rgtc_s(texImage->RowStride, (GLbyte *)(texImage->Data) + 8, + i, j, &green, 2); + texel[RCOMP] = BYTE_TO_FLOAT_TEX(red); + texel[GCOMP] = BYTE_TO_FLOAT_TEX(green); + texel[BCOMP] = 0.0; + texel[ACOMP] = 1.0; +} + +static void write_rgtc_encoded_channel(GLubyte *blkaddr, + GLubyte alphabase1, + GLubyte alphabase2, + GLubyte alphaenc[16]) +{ + *blkaddr++ = alphabase1; + *blkaddr++ = alphabase2; + *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6); + *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7); + *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5); + *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6); + *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7); + *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5); +} + +static void encode_rgtc_chan_u(GLubyte *blkaddr, GLubyte srccolors[4][4], + GLint numxpixels, GLint numypixels) +{ + GLubyte alphabase[2], alphause[2]; + GLshort alphatest[2] = { 0 }; + GLuint alphablockerror1, alphablockerror2, alphablockerror3; + GLubyte i, j, aindex, acutValues[7]; + GLubyte alphaenc1[16], alphaenc2[16], alphaenc3[16]; + GLboolean alphaabsmin = GL_FALSE; + GLboolean alphaabsmax = GL_FALSE; + GLshort alphadist; + + /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */ + alphabase[0] = 0xff; alphabase[1] = 0x0; + for (j = 0; j < numypixels; j++) { + for (i = 0; i < numxpixels; i++) { + if (srccolors[j][i] == 0) + alphaabsmin = GL_TRUE; + else if (srccolors[j][i] == 255) + alphaabsmax = GL_TRUE; + else { + if (srccolors[j][i] > alphabase[1]) + alphabase[1] = srccolors[j][i]; + if (srccolors[j][i] < alphabase[0]) + alphabase[0] = srccolors[j][i]; + } + } + } + + + if ((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax)) { /* one color, either max or min */ + /* shortcut here since it is a very common case (and also avoids later problems) */ + /* || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax) */ + /* could also thest for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */ + + *blkaddr++ = srccolors[0][0]; + blkaddr++; + *blkaddr++ = 0; + *blkaddr++ = 0; + *blkaddr++ = 0; + *blkaddr++ = 0; + *blkaddr++ = 0; + *blkaddr++ = 0; +#if RGTC_DEBUG + fprintf(stderr, "enc0 used\n"); +#endif + return; + } + + /* find best encoding for alpha0 > alpha1 */ + /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */ + alphablockerror1 = 0x0; + alphablockerror2 = 0xffffffff; + alphablockerror3 = 0xffffffff; + if (alphaabsmin) alphause[0] = 0; + else alphause[0] = alphabase[0]; + if (alphaabsmax) alphause[1] = 255; + else alphause[1] = alphabase[1]; + /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */ + for (aindex = 0; aindex < 7; aindex++) { + /* don't forget here is always rounded down */ + acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14; + } + + for (j = 0; j < numypixels; j++) { + for (i = 0; i < numxpixels; i++) { + /* maybe it's overkill to have the most complicated calculation just for the error + calculation which we only need to figure out if encoding1 or encoding2 is better... */ + if (srccolors[j][i] > acutValues[0]) { + alphaenc1[4*j + i] = 0; + alphadist = srccolors[j][i] - alphause[1]; + } + else if (srccolors[j][i] > acutValues[1]) { + alphaenc1[4*j + i] = 2; + alphadist = srccolors[j][i] - (alphause[1] * 6 + alphause[0] * 1) / 7; + } + else if (srccolors[j][i] > acutValues[2]) { + alphaenc1[4*j + i] = 3; + alphadist = srccolors[j][i] - (alphause[1] * 5 + alphause[0] * 2) / 7; + } + else if (srccolors[j][i] > acutValues[3]) { + alphaenc1[4*j + i] = 4; + alphadist = srccolors[j][i] - (alphause[1] * 4 + alphause[0] * 3) / 7; + } + else if (srccolors[j][i] > acutValues[4]) { + alphaenc1[4*j + i] = 5; + alphadist = srccolors[j][i] - (alphause[1] * 3 + alphause[0] * 4) / 7; + } + else if (srccolors[j][i] > acutValues[5]) { + alphaenc1[4*j + i] = 6; + alphadist = srccolors[j][i] - (alphause[1] * 2 + alphause[0] * 5) / 7; + } + else if (srccolors[j][i] > acutValues[6]) { + alphaenc1[4*j + i] = 7; + alphadist = srccolors[j][i] - (alphause[1] * 1 + alphause[0] * 6) / 7; + } + else { + alphaenc1[4*j + i] = 1; + alphadist = srccolors[j][i] - alphause[0]; + } + alphablockerror1 += alphadist * alphadist; + } + } + +#if RGTC_DEBUG + for (i = 0; i < 16; i++) { + fprintf(stderr, "%d ", alphaenc1[i]); + } + fprintf(stderr, "cutVals "); + for (i = 0; i < 8; i++) { + fprintf(stderr, "%d ", acutValues[i]); + } + fprintf(stderr, "srcVals "); + for (j = 0; j < numypixels; j++) { + for (i = 0; i < numxpixels; i++) { + fprintf(stderr, "%d ", srccolors[j][i]); + } + } + fprintf(stderr, "\n"); +#endif + + /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax + are false but try it anyway */ + if (alphablockerror1 >= 32) { + + /* don't bother if encoding is already very good, this condition should also imply + we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */ + alphablockerror2 = 0; + for (aindex = 0; aindex < 5; aindex++) { + /* don't forget here is always rounded down */ + acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10; + } + for (j = 0; j < numypixels; j++) { + for (i = 0; i < numxpixels; i++) { + /* maybe it's overkill to have the most complicated calculation just for the error + calculation which we only need to figure out if encoding1 or encoding2 is better... */ + if (srccolors[j][i] == 0) { + alphaenc2[4*j + i] = 6; + alphadist = 0; + } + else if (srccolors[j][i] == 255) { + alphaenc2[4*j + i] = 7; + alphadist = 0; + } + else if (srccolors[j][i] <= acutValues[0]) { + alphaenc2[4*j + i] = 0; + alphadist = srccolors[j][i] - alphabase[0]; + } + else if (srccolors[j][i] <= acutValues[1]) { + alphaenc2[4*j + i] = 2; + alphadist = srccolors[j][i] - (alphabase[0] * 4 + alphabase[1] * 1) / 5; + } + else if (srccolors[j][i] <= acutValues[2]) { + alphaenc2[4*j + i] = 3; + alphadist = srccolors[j][i] - (alphabase[0] * 3 + alphabase[1] * 2) / 5; + } + else if (srccolors[j][i] <= acutValues[3]) { + alphaenc2[4*j + i] = 4; + alphadist = srccolors[j][i] - (alphabase[0] * 2 + alphabase[1] * 3) / 5; + } + else if (srccolors[j][i] <= acutValues[4]) { + alphaenc2[4*j + i] = 5; + alphadist = srccolors[j][i] - (alphabase[0] * 1 + alphabase[1] * 4) / 5; + } + else { + alphaenc2[4*j + i] = 1; + alphadist = srccolors[j][i] - alphabase[1]; + } + alphablockerror2 += alphadist * alphadist; + } + } + + + /* skip this if the error is already very small + this encoding is MUCH better on average than #2 though, but expensive! */ + if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) { + GLshort blockerrlin1 = 0; + GLshort blockerrlin2 = 0; + GLubyte nralphainrangelow = 0; + GLubyte nralphainrangehigh = 0; + alphatest[0] = 0xff; + alphatest[1] = 0x0; + /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */ + for (j = 0; j < numypixels; j++) { + for (i = 0; i < numxpixels; i++) { + if ((srccolors[j][i] > alphatest[1]) && (srccolors[j][i] < (255 -(alphabase[1] - alphabase[0]) / 28))) + alphatest[1] = srccolors[j][i]; + if ((srccolors[j][i] < alphatest[0]) && (srccolors[j][i] > (alphabase[1] - alphabase[0]) / 28)) + alphatest[0] = srccolors[j][i]; + } + } + /* shouldn't happen too often, don't really care about those degenerated cases */ + if (alphatest[1] <= alphatest[0]) { + alphatest[0] = 1; + alphatest[1] = 254; + } + for (aindex = 0; aindex < 5; aindex++) { + /* don't forget here is always rounded down */ + acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10; + } + + /* find the "average" difference between the alpha values and the next encoded value. + This is then used to calculate new base values. + Should there be some weighting, i.e. those values closer to alphatest[x] have more weight, + since they will see more improvement, and also because the values in the middle are somewhat + likely to get no improvement at all (because the base values might move in different directions)? + OTOH it would mean the values in the middle are even less likely to get an improvement + */ + for (j = 0; j < numypixels; j++) { + for (i = 0; i < numxpixels; i++) { + if (srccolors[j][i] <= alphatest[0] / 2) { + } + else if (srccolors[j][i] > ((255 + alphatest[1]) / 2)) { + } + else if (srccolors[j][i] <= acutValues[0]) { + blockerrlin1 += (srccolors[j][i] - alphatest[0]); + nralphainrangelow += 1; + } + else if (srccolors[j][i] <= acutValues[1]) { + blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5); + blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5); + nralphainrangelow += 1; + nralphainrangehigh += 1; + } + else if (srccolors[j][i] <= acutValues[2]) { + blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5); + blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5); + nralphainrangelow += 1; + nralphainrangehigh += 1; + } + else if (srccolors[j][i] <= acutValues[3]) { + blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5); + blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5); + nralphainrangelow += 1; + nralphainrangehigh += 1; + } + else if (srccolors[j][i] <= acutValues[4]) { + blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5); + blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5); + nralphainrangelow += 1; + nralphainrangehigh += 1; + } + else { + blockerrlin2 += (srccolors[j][i] - alphatest[1]); + nralphainrangehigh += 1; + } + } + } + /* shouldn't happen often, needed to avoid div by zero */ + if (nralphainrangelow == 0) nralphainrangelow = 1; + if (nralphainrangehigh == 0) nralphainrangehigh = 1; + alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow); +#if RGTC_DEBUG + fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow); + fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh); +#endif + /* again shouldn't really happen often... */ + if (alphatest[0] < 0) { + alphatest[0] = 0; + } + alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh); + if (alphatest[1] > 255) { + alphatest[1] = 255; + } + + alphablockerror3 = 0; + for (aindex = 0; aindex < 5; aindex++) { + /* don't forget here is always rounded down */ + acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10; + } + for (j = 0; j < numypixels; j++) { + for (i = 0; i < numxpixels; i++) { + /* maybe it's overkill to have the most complicated calculation just for the error + calculation which we only need to figure out if encoding1 or encoding2 is better... */ + if (srccolors[j][i] <= alphatest[0] / 2) { + alphaenc3[4*j + i] = 6; + alphadist = srccolors[j][i]; + } + else if (srccolors[j][i] > ((255 + alphatest[1]) / 2)) { + alphaenc3[4*j + i] = 7; + alphadist = 255 - srccolors[j][i]; + } + else if (srccolors[j][i] <= acutValues[0]) { + alphaenc3[4*j + i] = 0; + alphadist = srccolors[j][i] - alphatest[0]; + } + else if (srccolors[j][i] <= acutValues[1]) { + alphaenc3[4*j + i] = 2; + alphadist = srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5; + } + else if (srccolors[j][i] <= acutValues[2]) { + alphaenc3[4*j + i] = 3; + alphadist = srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5; + } + else if (srccolors[j][i] <= acutValues[3]) { + alphaenc3[4*j + i] = 4; + alphadist = srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5; + } + else if (srccolors[j][i] <= acutValues[4]) { + alphaenc3[4*j + i] = 5; + alphadist = srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5; + } + else { + alphaenc3[4*j + i] = 1; + alphadist = srccolors[j][i] - alphatest[1]; + } + alphablockerror3 += alphadist * alphadist; + } + } + } + } + /* write the alpha values and encoding back. */ + if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) { +#if RGTC_DEBUG + if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1); +#endif + write_rgtc_encoded_channel( blkaddr, alphause[1], alphause[0], alphaenc1 ); + } + else if (alphablockerror2 <= alphablockerror3) { +#if RGTC_DEBUG + if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2); +#endif + write_rgtc_encoded_channel( blkaddr, alphabase[0], alphabase[1], alphaenc2 ); + } + else { +#if RGTC_DEBUG + fprintf(stderr, "enc3 used, error %d\n", alphablockerror3); +#endif + write_rgtc_encoded_channel( blkaddr, (GLubyte)alphatest[0], (GLubyte)alphatest[1], alphaenc3 ); + } +} + + +static void write_rgtc_encoded_channel_s(GLbyte *blkaddr, + GLbyte alphabase1, + GLbyte alphabase2, + GLbyte alphaenc[16]) +{ + *blkaddr++ = alphabase1; + *blkaddr++ = alphabase2; + *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6); + *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7); + *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5); + *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6); + *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7); + *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5); +} + +static void encode_rgtc_chan_s(GLbyte *blkaddr, GLbyte srccolors[4][4], + GLint numxpixels, GLint numypixels) +{ + GLbyte alphabase[2], alphause[2]; + GLshort alphatest[2] = { 0 }; + GLuint alphablockerror1, alphablockerror2, alphablockerror3; + GLbyte i, j, aindex, acutValues[7]; + GLbyte alphaenc1[16], alphaenc2[16], alphaenc3[16]; + GLboolean alphaabsmin = GL_FALSE; + GLboolean alphaabsmax = GL_FALSE; + GLshort alphadist; + + /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */ + alphabase[0] = 0xff; alphabase[1] = 0x0; + for (j = 0; j < numypixels; j++) { + for (i = 0; i < numxpixels; i++) { + if (srccolors[j][i] == 0) + alphaabsmin = GL_TRUE; + else if (srccolors[j][i] == 255) + alphaabsmax = GL_TRUE; + else { + if (srccolors[j][i] > alphabase[1]) + alphabase[1] = srccolors[j][i]; + if (srccolors[j][i] < alphabase[0]) + alphabase[0] = srccolors[j][i]; + } + } + } + + + if ((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax)) { /* one color, either max or min */ + /* shortcut here since it is a very common case (and also avoids later problems) */ + /* || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax) */ + /* could also thest for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */ + + *blkaddr++ = srccolors[0][0]; + blkaddr++; + *blkaddr++ = 0; + *blkaddr++ = 0; + *blkaddr++ = 0; + *blkaddr++ = 0; + *blkaddr++ = 0; + *blkaddr++ = 0; +#if RGTC_DEBUG + fprintf(stderr, "enc0 used\n"); +#endif + return; + } + + /* find best encoding for alpha0 > alpha1 */ + /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */ + alphablockerror1 = 0x0; + alphablockerror2 = 0xffffffff; + alphablockerror3 = 0xffffffff; + if (alphaabsmin) alphause[0] = 0; + else alphause[0] = alphabase[0]; + if (alphaabsmax) alphause[1] = 255; + else alphause[1] = alphabase[1]; + /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */ + for (aindex = 0; aindex < 7; aindex++) { + /* don't forget here is always rounded down */ + acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14; + } + + for (j = 0; j < numypixels; j++) { + for (i = 0; i < numxpixels; i++) { + /* maybe it's overkill to have the most complicated calculation just for the error + calculation which we only need to figure out if encoding1 or encoding2 is better... */ + if (srccolors[j][i] > acutValues[0]) { + alphaenc1[4*j + i] = 0; + alphadist = srccolors[j][i] - alphause[1]; + } + else if (srccolors[j][i] > acutValues[1]) { + alphaenc1[4*j + i] = 2; + alphadist = srccolors[j][i] - (alphause[1] * 6 + alphause[0] * 1) / 7; + } + else if (srccolors[j][i] > acutValues[2]) { + alphaenc1[4*j + i] = 3; + alphadist = srccolors[j][i] - (alphause[1] * 5 + alphause[0] * 2) / 7; + } + else if (srccolors[j][i] > acutValues[3]) { + alphaenc1[4*j + i] = 4; + alphadist = srccolors[j][i] - (alphause[1] * 4 + alphause[0] * 3) / 7; + } + else if (srccolors[j][i] > acutValues[4]) { + alphaenc1[4*j + i] = 5; + alphadist = srccolors[j][i] - (alphause[1] * 3 + alphause[0] * 4) / 7; + } + else if (srccolors[j][i] > acutValues[5]) { + alphaenc1[4*j + i] = 6; + alphadist = srccolors[j][i] - (alphause[1] * 2 + alphause[0] * 5) / 7; + } + else if (srccolors[j][i] > acutValues[6]) { + alphaenc1[4*j + i] = 7; + alphadist = srccolors[j][i] - (alphause[1] * 1 + alphause[0] * 6) / 7; + } + else { + alphaenc1[4*j + i] = 1; + alphadist = srccolors[j][i] - alphause[0]; + } + alphablockerror1 += alphadist * alphadist; + } + } +#if RGTC_DEBUG + for (i = 0; i < 16; i++) { + fprintf(stderr, "%d ", alphaenc1[i]); + } + fprintf(stderr, "cutVals "); + for (i = 0; i < 8; i++) { + fprintf(stderr, "%d ", acutValues[i]); + } + fprintf(stderr, "srcVals "); + for (j = 0; j < numypixels; j++) + for (i = 0; i < numxpixels; i++) { + fprintf(stderr, "%d ", srccolors[j][i]); + } + + fprintf(stderr, "\n"); +#endif + + /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax + are false but try it anyway */ + if (alphablockerror1 >= 32) { + + /* don't bother if encoding is already very good, this condition should also imply + we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */ + alphablockerror2 = 0; + for (aindex = 0; aindex < 5; aindex++) { + /* don't forget here is always rounded down */ + acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10; + } + for (j = 0; j < numypixels; j++) { + for (i = 0; i < numxpixels; i++) { + /* maybe it's overkill to have the most complicated calculation just for the error + calculation which we only need to figure out if encoding1 or encoding2 is better... */ + if (srccolors[j][i] == 0) { + alphaenc2[4*j + i] = 6; + alphadist = 0; + } + else if (srccolors[j][i] == 255) { + alphaenc2[4*j + i] = 7; + alphadist = 0; + } + else if (srccolors[j][i] <= acutValues[0]) { + alphaenc2[4*j + i] = 0; + alphadist = srccolors[j][i] - alphabase[0]; + } + else if (srccolors[j][i] <= acutValues[1]) { + alphaenc2[4*j + i] = 2; + alphadist = srccolors[j][i] - (alphabase[0] * 4 + alphabase[1] * 1) / 5; + } + else if (srccolors[j][i] <= acutValues[2]) { + alphaenc2[4*j + i] = 3; + alphadist = srccolors[j][i] - (alphabase[0] * 3 + alphabase[1] * 2) / 5; + } + else if (srccolors[j][i] <= acutValues[3]) { + alphaenc2[4*j + i] = 4; + alphadist = srccolors[j][i] - (alphabase[0] * 2 + alphabase[1] * 3) / 5; + } + else if (srccolors[j][i] <= acutValues[4]) { + alphaenc2[4*j + i] = 5; + alphadist = srccolors[j][i] - (alphabase[0] * 1 + alphabase[1] * 4) / 5; + } + else { + alphaenc2[4*j + i] = 1; + alphadist = srccolors[j][i] - alphabase[1]; + } + alphablockerror2 += alphadist * alphadist; + } + } + + + /* skip this if the error is already very small + this encoding is MUCH better on average than #2 though, but expensive! */ + if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) { + GLshort blockerrlin1 = 0; + GLshort blockerrlin2 = 0; + GLubyte nralphainrangelow = 0; + GLubyte nralphainrangehigh = 0; + alphatest[0] = 0xff; + alphatest[1] = 0x0; + /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */ + for (j = 0; j < numypixels; j++) { + for (i = 0; i < numxpixels; i++) { + if ((srccolors[j][i] > alphatest[1]) && (srccolors[j][i] < (255 -(alphabase[1] - alphabase[0]) / 28))) + alphatest[1] = srccolors[j][i]; + if ((srccolors[j][i] < alphatest[0]) && (srccolors[j][i] > (alphabase[1] - alphabase[0]) / 28)) + alphatest[0] = srccolors[j][i]; + } + } + /* shouldn't happen too often, don't really care about those degenerated cases */ + if (alphatest[1] <= alphatest[0]) { + alphatest[0] = 1; + alphatest[1] = 254; + } + for (aindex = 0; aindex < 5; aindex++) { + /* don't forget here is always rounded down */ + acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10; + } + + /* find the "average" difference between the alpha values and the next encoded value. + This is then used to calculate new base values. + Should there be some weighting, i.e. those values closer to alphatest[x] have more weight, + since they will see more improvement, and also because the values in the middle are somewhat + likely to get no improvement at all (because the base values might move in different directions)? + OTOH it would mean the values in the middle are even less likely to get an improvement + */ + for (j = 0; j < numypixels; j++) { + for (i = 0; i < numxpixels; i++) { + if (srccolors[j][i] <= alphatest[0] / 2) { + } + else if (srccolors[j][i] > ((255 + alphatest[1]) / 2)) { + } + else if (srccolors[j][i] <= acutValues[0]) { + blockerrlin1 += (srccolors[j][i] - alphatest[0]); + nralphainrangelow += 1; + } + else if (srccolors[j][i] <= acutValues[1]) { + blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5); + blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5); + nralphainrangelow += 1; + nralphainrangehigh += 1; + } + else if (srccolors[j][i] <= acutValues[2]) { + blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5); + blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5); + nralphainrangelow += 1; + nralphainrangehigh += 1; + } + else if (srccolors[j][i] <= acutValues[3]) { + blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5); + blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5); + nralphainrangelow += 1; + nralphainrangehigh += 1; + } + else if (srccolors[j][i] <= acutValues[4]) { + blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5); + blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5); + nralphainrangelow += 1; + nralphainrangehigh += 1; + } + else { + blockerrlin2 += (srccolors[j][i] - alphatest[1]); + nralphainrangehigh += 1; + } + } + } + /* shouldn't happen often, needed to avoid div by zero */ + if (nralphainrangelow == 0) nralphainrangelow = 1; + if (nralphainrangehigh == 0) nralphainrangehigh = 1; + alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow); +#if RGTC_DEBUG + fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow); + fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh); +#endif + /* again shouldn't really happen often... */ + if (alphatest[0] < 0) { + alphatest[0] = 0; + } + alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh); + if (alphatest[1] > 255) { + alphatest[1] = 255; + } + + alphablockerror3 = 0; + for (aindex = 0; aindex < 5; aindex++) { + /* don't forget here is always rounded down */ + acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10; + } + for (j = 0; j < numypixels; j++) { + for (i = 0; i < numxpixels; i++) { + /* maybe it's overkill to have the most complicated calculation just for the error + calculation which we only need to figure out if encoding1 or encoding2 is better... */ + if (srccolors[j][i] <= alphatest[0] / 2) { + alphaenc3[4*j + i] = 6; + alphadist = srccolors[j][i]; + } + else if (srccolors[j][i] > ((255 + alphatest[1]) / 2)) { + alphaenc3[4*j + i] = 7; + alphadist = 255 - srccolors[j][i]; + } + else if (srccolors[j][i] <= acutValues[0]) { + alphaenc3[4*j + i] = 0; + alphadist = srccolors[j][i] - alphatest[0]; + } + else if (srccolors[j][i] <= acutValues[1]) { + alphaenc3[4*j + i] = 2; + alphadist = srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5; + } + else if (srccolors[j][i] <= acutValues[2]) { + alphaenc3[4*j + i] = 3; + alphadist = srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5; + } + else if (srccolors[j][i] <= acutValues[3]) { + alphaenc3[4*j + i] = 4; + alphadist = srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5; + } + else if (srccolors[j][i] <= acutValues[4]) { + alphaenc3[4*j + i] = 5; + alphadist = srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5; + } + else { + alphaenc3[4*j + i] = 1; + alphadist = srccolors[j][i] - alphatest[1]; + } + alphablockerror3 += alphadist * alphadist; + } + } + } + } + /* write the alpha values and encoding back. */ + if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) { +#if RGTC_DEBUG + if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1); +#endif + write_rgtc_encoded_channel_s( blkaddr, alphause[1], alphause[0], alphaenc1 ); + } + else if (alphablockerror2 <= alphablockerror3) { +#if RGTC_DEBUG + if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2); +#endif + write_rgtc_encoded_channel_s( blkaddr, alphabase[0], alphabase[1], alphaenc2 ); + } + else { +#if RGTC_DEBUG + fprintf(stderr, "enc3 used, error %d\n", alphablockerror3); +#endif + write_rgtc_encoded_channel_s( blkaddr, (GLubyte)alphatest[0], (GLubyte)alphatest[1], alphaenc3 ); + } +} diff --git a/src/mesa/main/texcompress_rgtc.h b/src/mesa/main/texcompress_rgtc.h new file mode 100644 index 00000000000..424edc4581c --- /dev/null +++ b/src/mesa/main/texcompress_rgtc.h @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2011 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef TEXCOMPRESS_RGTC_H +#define TEXCOMPRESS_RGTC_H + +#include "glheader.h" +#include "mfeatures.h" +#include "texstore.h" + +struct gl_texture_image; + +extern GLboolean +_mesa_texstore_red_rgtc1(TEXSTORE_PARAMS); + +extern GLboolean +_mesa_texstore_signed_red_rgtc1(TEXSTORE_PARAMS); + +extern GLboolean +_mesa_texstore_rg_rgtc2(TEXSTORE_PARAMS); + +extern GLboolean +_mesa_texstore_signed_rg_rgtc2(TEXSTORE_PARAMS); + +extern void +_mesa_fetch_texel_2d_f_red_rgtc1(const struct gl_texture_image *texImage, + GLint i, GLint j, GLint k, GLfloat *texel); + +extern void +_mesa_fetch_texel_2d_f_signed_red_rgtc1(const struct gl_texture_image *texImage, + GLint i, GLint j, GLint k, GLfloat *texel); + +extern void +_mesa_fetch_texel_2d_f_rg_rgtc2(const struct gl_texture_image *texImage, + GLint i, GLint j, GLint k, GLfloat *texel); + +extern void +_mesa_fetch_texel_2d_f_signed_rg_rgtc2(const struct gl_texture_image *texImage, + GLint i, GLint j, GLint k, GLfloat *texel); +#endif diff --git a/src/mesa/main/texfetch.c b/src/mesa/main/texfetch.c index 8aa1e4970d5..550597e1cdf 100644 --- a/src/mesa/main/texfetch.c +++ b/src/mesa/main/texfetch.c @@ -38,6 +38,7 @@ #include "texcompress.h" #include "texcompress_fxt1.h" #include "texcompress_s3tc.h" +#include "texcompress_rgtc.h" #include "texfetch.h" #include "teximage.h" @@ -756,7 +757,35 @@ texfetch_funcs[MESA_FORMAT_COUNT] = fetch_texel_2d_rgba_16, fetch_texel_3d_rgba_16, store_texel_rgba_16 - } + }, + { + MESA_FORMAT_RED_RGTC1, + NULL, + _mesa_fetch_texel_2d_f_red_rgtc1, + NULL, + NULL + }, + { + MESA_FORMAT_SIGNED_RED_RGTC1, + NULL, + _mesa_fetch_texel_2d_f_signed_red_rgtc1, + NULL, + NULL + }, + { + MESA_FORMAT_RG_RGTC2, + NULL, + _mesa_fetch_texel_2d_f_rg_rgtc2, + NULL, + NULL + }, + { + MESA_FORMAT_SIGNED_RG_RGTC2, + NULL, + _mesa_fetch_texel_2d_f_signed_rg_rgtc2, + NULL, + NULL + }, }; diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c index 2542cea856b..72025cf828e 100644 --- a/src/mesa/main/texformat.c +++ b/src/mesa/main/texformat.c @@ -602,6 +602,25 @@ _mesa_choose_tex_format( struct gl_context *ctx, GLint internalFormat, } } + if (ctx->Extensions.ARB_texture_compression_rgtc) { + switch (internalFormat) { + case GL_COMPRESSED_RED_RGTC1: + RETURN_IF_SUPPORTED(MESA_FORMAT_RED_RGTC1); + break; + case GL_COMPRESSED_SIGNED_RED_RGTC1: + RETURN_IF_SUPPORTED(MESA_FORMAT_SIGNED_RED_RGTC1); + break; + case GL_COMPRESSED_RG_RGTC2: + RETURN_IF_SUPPORTED(MESA_FORMAT_RG_RGTC2); + break; + case GL_COMPRESSED_SIGNED_RG_RGTC2: + RETURN_IF_SUPPORTED(MESA_FORMAT_SIGNED_RG_RGTC2); + break; + default: + ; /* fallthrough */ + } + } + _mesa_problem(ctx, "unexpected format in _mesa_choose_tex_format()"); return MESA_FORMAT_NONE; } diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index 7dd4a1fa650..8a3e5f77979 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -65,6 +65,7 @@ #include "pack.h" #include "texcompress.h" #include "texcompress_fxt1.h" +#include "texcompress_rgtc.h" #include "texcompress_s3tc.h" #include "teximage.h" #include "texstore.h" @@ -310,15 +311,15 @@ compute_component_mapping(GLenum inFormat, GLenum outFormat, * \param srcPacking source image pixel packing * \return resulting image with format = textureBaseFormat and type = GLfloat. */ -static GLfloat * -make_temp_float_image(struct gl_context *ctx, GLuint dims, - GLenum logicalBaseFormat, - GLenum textureBaseFormat, - GLint srcWidth, GLint srcHeight, GLint srcDepth, - GLenum srcFormat, GLenum srcType, - const GLvoid *srcAddr, - const struct gl_pixelstore_attrib *srcPacking, - GLbitfield transferOps) +GLfloat * +_mesa_make_temp_float_image(struct gl_context *ctx, GLuint dims, + GLenum logicalBaseFormat, + GLenum textureBaseFormat, + GLint srcWidth, GLint srcHeight, GLint srcDepth, + GLenum srcFormat, GLenum srcType, + const GLvoid *srcAddr, + const struct gl_pixelstore_attrib *srcPacking, + GLbitfield transferOps) { GLfloat *tempImage; const GLint components = _mesa_components_in_format(logicalBaseFormat); @@ -2065,7 +2066,7 @@ _mesa_texstore_argb2101010(TEXSTORE_PARAMS) } else { /* general path */ - const GLfloat *tempImage = make_temp_float_image(ctx, dims, + const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims, baseInternalFormat, baseFormat, srcWidth, srcHeight, srcDepth, @@ -2317,7 +2318,7 @@ _mesa_texstore_unorm1616(TEXSTORE_PARAMS) } else { /* general path */ - const GLfloat *tempImage = make_temp_float_image(ctx, dims, + const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims, baseInternalFormat, baseFormat, srcWidth, srcHeight, srcDepth, @@ -2394,7 +2395,7 @@ _mesa_texstore_unorm16(TEXSTORE_PARAMS) } else { /* general path */ - const GLfloat *tempImage = make_temp_float_image(ctx, dims, + const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims, baseInternalFormat, baseFormat, srcWidth, srcHeight, srcDepth, @@ -2452,7 +2453,7 @@ _mesa_texstore_rgba_16(TEXSTORE_PARAMS) } else { /* general path */ - const GLfloat *tempImage = make_temp_float_image(ctx, dims, + const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims, baseInternalFormat, baseFormat, srcWidth, srcHeight, srcDepth, @@ -2519,7 +2520,7 @@ _mesa_texstore_signed_rgba_16(TEXSTORE_PARAMS) } else { /* general path */ - const GLfloat *tempImage = make_temp_float_image(ctx, dims, + const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims, baseInternalFormat, baseFormat, srcWidth, srcHeight, srcDepth, @@ -2901,7 +2902,7 @@ _mesa_texstore_signed_r8(TEXSTORE_PARAMS) /* XXX look at adding optimized paths */ { /* general path */ - const GLfloat *tempImage = make_temp_float_image(ctx, dims, + const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims, baseInternalFormat, baseFormat, srcWidth, srcHeight, srcDepth, @@ -2946,7 +2947,7 @@ _mesa_texstore_signed_rg88(TEXSTORE_PARAMS) /* XXX look at adding optimized paths */ { /* general path */ - const GLfloat *tempImage = make_temp_float_image(ctx, dims, + const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims, baseInternalFormat, baseFormat, srcWidth, srcHeight, srcDepth, @@ -2991,7 +2992,7 @@ _mesa_texstore_signed_rgbx8888(TEXSTORE_PARAMS) { /* general path */ - const GLfloat *tempImage = make_temp_float_image(ctx, dims, + const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims, baseInternalFormat, baseFormat, srcWidth, srcHeight, srcDepth, @@ -3104,7 +3105,7 @@ _mesa_texstore_signed_rgba8888(TEXSTORE_PARAMS) } else { /* general path */ - const GLfloat *tempImage = make_temp_float_image(ctx, dims, + const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims, baseInternalFormat, baseFormat, srcWidth, srcHeight, srcDepth, @@ -3413,7 +3414,7 @@ _mesa_texstore_rgba_float32(TEXSTORE_PARAMS) } else { /* general path */ - const GLfloat *tempImage = make_temp_float_image(ctx, dims, + const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims, baseInternalFormat, baseFormat, srcWidth, srcHeight, srcDepth, @@ -3483,7 +3484,7 @@ _mesa_texstore_rgba_float16(TEXSTORE_PARAMS) } else { /* general path */ - const GLfloat *tempImage = make_temp_float_image(ctx, dims, + const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims, baseInternalFormat, baseFormat, srcWidth, srcHeight, srcDepth, @@ -3549,7 +3550,7 @@ _mesa_texstore_rgba_int8(TEXSTORE_PARAMS) } else { /* general path */ - const GLfloat *tempImage = make_temp_float_image(ctx, dims, + const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims, baseInternalFormat, baseFormat, srcWidth, srcHeight, srcDepth, @@ -3614,7 +3615,7 @@ _mesa_texstore_rgba_int16(TEXSTORE_PARAMS) } else { /* general path */ - const GLfloat *tempImage = make_temp_float_image(ctx, dims, + const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims, baseInternalFormat, baseFormat, srcWidth, srcHeight, srcDepth, @@ -3679,7 +3680,7 @@ _mesa_texstore_rgba_int32(TEXSTORE_PARAMS) } else { /* general path */ - const GLfloat *tempImage = make_temp_float_image(ctx, dims, + const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims, baseInternalFormat, baseFormat, srcWidth, srcHeight, srcDepth, @@ -4128,7 +4129,12 @@ texstore_funcs[MESA_FORMAT_COUNT] = { MESA_FORMAT_SIGNED_RG_16, _mesa_texstore_signed_rgba_16 }, { MESA_FORMAT_SIGNED_RGB_16, _mesa_texstore_signed_rgba_16 }, { MESA_FORMAT_SIGNED_RGBA_16, _mesa_texstore_signed_rgba_16 }, - { MESA_FORMAT_RGBA_16, _mesa_texstore_rgba_16 } + { MESA_FORMAT_RGBA_16, _mesa_texstore_rgba_16 }, + + { MESA_FORMAT_RED_RGTC1, _mesa_texstore_red_rgtc1 }, + { MESA_FORMAT_SIGNED_RED_RGTC1, _mesa_texstore_signed_red_rgtc1 }, + { MESA_FORMAT_RG_RGTC2, _mesa_texstore_rg_rgtc2 }, + { MESA_FORMAT_SIGNED_RG_RGTC2, _mesa_texstore_signed_rg_rgtc2 } }; diff --git a/src/mesa/main/texstore.h b/src/mesa/main/texstore.h index 177ede423f5..2f3c4e821fc 100644 --- a/src/mesa/main/texstore.h +++ b/src/mesa/main/texstore.h @@ -81,6 +81,15 @@ _mesa_make_temp_chan_image(struct gl_context *ctx, GLuint dims, const GLvoid *srcAddr, const struct gl_pixelstore_attrib *srcPacking); +GLfloat * +_mesa_make_temp_float_image(struct gl_context *ctx, GLuint dims, + GLenum logicalBaseFormat, + GLenum textureBaseFormat, + GLint srcWidth, GLint srcHeight, GLint srcDepth, + GLenum srcFormat, GLenum srcType, + const GLvoid *srcAddr, + const struct gl_pixelstore_attrib *srcPacking, + GLbitfield transferOps); extern void _mesa_store_teximage1d(struct gl_context *ctx, GLenum target, GLint level, diff --git a/src/mesa/sources.mak b/src/mesa/sources.mak index 9a78a23aa7e..bdf4126cf58 100644 --- a/src/mesa/sources.mak +++ b/src/mesa/sources.mak @@ -78,6 +78,7 @@ MAIN_SOURCES = \ main/stencil.c \ main/syncobj.c \ main/texcompress.c \ + main/texcompress_rgtc.c \ main/texcompress_s3tc.c \ main/texcompress_fxt1.c \ main/texenv.c \ diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 6530a06ade4..c99eafbadf3 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -579,6 +579,7 @@ st_validate_varrays(struct gl_context *ctx, if (is_interleaved_arrays(vp, vpv, arrays)) { setup_interleaved_attribs(ctx, vp, vpv, arrays, vbuffer, velements, max_index); + num_vbuffers = 1; num_velements = vpv->num_inputs; if (num_velements == 0) @@ -645,6 +646,7 @@ st_draw_vbo(struct gl_context *ctx, for (i = 0; i < nr_prims; i++) { min_index = MIN2(min_index, prims[i].start); max_index = MAX2(max_index, prims[i].start + prims[i].count - 1); + max_index = MAX2(max_index, prims[i].num_instances); } } diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 2f45f470334..d2098987d1d 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -416,6 +416,22 @@ void st_init_extensions(struct st_context *st) ctx->Extensions.S3_s3tc = GL_TRUE; } + if (screen->is_format_supported(screen, PIPE_FORMAT_RGTC1_UNORM, + PIPE_TEXTURE_2D, 0, + PIPE_BIND_SAMPLER_VIEW, 0) && + screen->is_format_supported(screen, PIPE_FORMAT_RGTC1_SNORM, + PIPE_TEXTURE_2D, 0, + PIPE_BIND_SAMPLER_VIEW, 0) && + screen->is_format_supported(screen, PIPE_FORMAT_RGTC2_UNORM, + PIPE_TEXTURE_2D, 0, + PIPE_BIND_SAMPLER_VIEW, 0) && + screen->is_format_supported(screen, PIPE_FORMAT_RGTC2_SNORM, + PIPE_TEXTURE_2D, 0, + PIPE_BIND_SAMPLER_VIEW, 0) + ) { + ctx->Extensions.ARB_texture_compression_rgtc = GL_TRUE; + } + /* ycbcr support */ if (screen->is_format_supported(screen, PIPE_FORMAT_UYVY, PIPE_TEXTURE_2D, 0, diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index 577ee6189bd..c58ec9267dc 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -241,6 +241,14 @@ st_mesa_format_to_pipe_format(gl_format mesaFormat) case MESA_FORMAT_RGBA_UINT32: return PIPE_FORMAT_R32G32B32A32_USCALED; + case MESA_FORMAT_RED_RGTC1: + return PIPE_FORMAT_RGTC1_UNORM; + case MESA_FORMAT_SIGNED_RED_RGTC1: + return PIPE_FORMAT_RGTC1_SNORM; + case MESA_FORMAT_RG_RGTC2: + return PIPE_FORMAT_RGTC2_UNORM; + case MESA_FORMAT_SIGNED_RG_RGTC2: + return PIPE_FORMAT_RGTC2_SNORM; default: assert(0); return PIPE_FORMAT_NONE; @@ -380,6 +388,15 @@ st_pipe_format_to_mesa_format(enum pipe_format format) case PIPE_FORMAT_R32G32B32A32_USCALED: return MESA_FORMAT_RGBA_UINT32; + case PIPE_FORMAT_RGTC1_UNORM: + return MESA_FORMAT_RED_RGTC1; + case PIPE_FORMAT_RGTC1_SNORM: + return MESA_FORMAT_SIGNED_RED_RGTC1; + case PIPE_FORMAT_RGTC2_UNORM: + return MESA_FORMAT_RG_RGTC2; + case PIPE_FORMAT_RGTC2_SNORM: + return MESA_FORMAT_SIGNED_RG_RGTC2; + default: assert(0); return MESA_FORMAT_NONE; diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 5c68fd78c30..c07739f9d53 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -224,9 +224,9 @@ src_register( struct st_translate *t, case PROGRAM_TEMPORARY: assert(index >= 0); + assert(index < Elements(t->temps)); if (ureg_dst_is_undef(t->temps[index])) t->temps[index] = ureg_DECL_temporary( t->ureg ); - assert(index < Elements(t->temps)); return ureg_src(t->temps[index]); case PROGRAM_NAMED_PARAM: |