summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristian König <[email protected]>2011-02-28 23:59:53 +0100
committerChristian König <[email protected]>2011-02-28 23:59:53 +0100
commitb97e41c7b18c363a303693fb841fe606b1106fe6 (patch)
tree5329179ac070a8e3ecc3d3e51c020e004f5b32af
parent77217af40d67612d1f1089ca188393d27a8a038f (diff)
parent5f44fab5a6ba99c287da8d01fa584763bff2565b (diff)
Merge remote branch 'origin/master' into pipe-video
-rw-r--r--common.py4
-rw-r--r--docs/GL3.txt2
-rw-r--r--docs/MESA_multithread_makecurrent.spec158
-rw-r--r--docs/relnotes-7.11.html1
-rwxr-xr-xscons/gallium.py2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c16
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.h28
-rw-r--r--src/gallium/auxiliary/util/u_gen_mipmap.c47
-rw-r--r--src/gallium/auxiliary/util/u_vbuf_mgr.c5
-rw-r--r--src/gallium/drivers/i915/i915_batchbuffer.h8
-rw-r--r--src/gallium/drivers/i915/i915_blit.c15
-rw-r--r--src/gallium/drivers/i915/i915_context.c10
-rw-r--r--src/gallium/drivers/i915/i915_context.h25
-rw-r--r--src/gallium/drivers/i915/i915_flush.c2
-rw-r--r--src/gallium/drivers/i915/i915_state_emit.c223
-rw-r--r--src/gallium/drivers/i915/i915_state_static.c136
-rw-r--r--src/gallium/drivers/i915/i915_winsys.h12
-rw-r--r--src/gallium/drivers/llvmpipe/lp_fence.c3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c5
-rw-r--r--src/gallium/drivers/r300/r300_chipset.c6
-rw-r--r--src/gallium/drivers/r300/r300_context.c3
-rw-r--r--src/gallium/drivers/r300/r300_context.h2
-rw-r--r--src/gallium/drivers/r300/r300_texture.c11
-rw-r--r--src/gallium/drivers/r600/eg_asm.c26
-rw-r--r--src/gallium/drivers/r600/r600.h6
-rw-r--r--src/gallium/drivers/r600/r600_asm.c233
-rw-r--r--src/gallium/drivers/r600/r600_asm.h3
-rw-r--r--src/gallium/drivers/r600/r600_blit.c16
-rw-r--r--src/gallium/drivers/r600/r600_buffer.c16
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c5
-rw-r--r--src/gallium/drivers/r600/r600_shader.c41
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c14
-rw-r--r--src/gallium/drivers/r600/r600_texture.c10
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c5
-rw-r--r--src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c30
-rw-r--r--src/gallium/winsys/i915/drm/i915_drm_winsys.c1
-rw-r--r--src/gallium/winsys/i915/drm/i915_drm_winsys.h1
-rw-r--r--src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c15
-rw-r--r--src/gallium/winsys/i915/sw/i915_sw_winsys.c2
-rw-r--r--src/gallium/winsys/sw/xlib/SConscript2
-rw-r--r--src/glsl/Makefile2
-rw-r--r--src/glsl/ast_to_hir.cpp8
-rw-r--r--src/glsl/builtin_types.h6
-rw-r--r--src/glsl/builtins/profiles/130.frag4
-rw-r--r--src/glsl/builtins/profiles/130.vert4
-rw-r--r--src/glsl/builtins/profiles/OES_texture_3D.frag7
-rw-r--r--src/glsl/builtins/profiles/OES_texture_3D.vert7
-rw-r--r--src/glsl/glsl_parser_extras.cpp5
-rw-r--r--src/glsl/glsl_parser_extras.h2
-rw-r--r--src/glsl/glsl_types.cpp12
-rw-r--r--src/glsl/glsl_types.h2
-rw-r--r--src/glx/dri2_glx.c9
-rw-r--r--src/glx/glxclient.h4
-rw-r--r--src/glx/glxcmds.c8
-rw-r--r--src/glx/glxcurrent.c55
-rw-r--r--src/glx/glxextensions.c1
-rw-r--r--src/glx/glxextensions.h1
-rw-r--r--src/mesa/SConscript1
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp31
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_clip_state.c2
-rw-r--r--src/mesa/drivers/dri/intel/intel_screen.c3
-rw-r--r--src/mesa/main/extensions.c4
-rw-r--r--src/mesa/main/formats.c42
-rw-r--r--src/mesa/main/formats.h6
-rw-r--r--src/mesa/main/texcompress.c20
-rw-r--r--src/mesa/main/texcompress_rgtc.c1122
-rw-r--r--src/mesa/main/texcompress_rgtc.h60
-rw-r--r--src/mesa/main/texfetch.c31
-rw-r--r--src/mesa/main/texformat.c19
-rw-r--r--src/mesa/main/texstore.c54
-rw-r--r--src/mesa/main/texstore.h9
-rw-r--r--src/mesa/sources.mak1
-rw-r--r--src/mesa/state_tracker/st_draw.c2
-rw-r--r--src/mesa/state_tracker/st_extensions.c16
-rw-r--r--src/mesa/state_tracker/st_format.c17
-rw-r--r--src/mesa/state_tracker/st_mesa_to_tgsi.c2
78 files changed, 2352 insertions, 381 deletions
diff --git a/common.py b/common.py
index b44f20e8216..1d0c6a71fa5 100644
--- a/common.py
+++ b/common.py
@@ -15,6 +15,8 @@ import SCons.Script.SConscript
# Defaults
host_platform = _platform.system().lower()
+if host_platform.startswith('cygwin'):
+ host_platform = 'cygwin'
# Search sys.argv[] for a "platform=foo" argument since we don't have
# an 'env' variable at this point.
@@ -81,7 +83,7 @@ def AddOptions(opts):
opts.Add(EnumOption('machine', 'use machine-specific assembly code', default_machine,
allowed_values=('generic', 'ppc', 'x86', 'x86_64')))
opts.Add(EnumOption('platform', 'target platform', host_platform,
- allowed_values=('linux', 'cell', 'windows', 'winddk', 'wince', 'darwin', 'embedded', 'cygwin_nt-5.1', 'cygwin_nt-6.1', 'sunos5', 'freebsd8')))
+ allowed_values=('linux', 'cell', 'windows', 'winddk', 'wince', 'darwin', 'embedded', 'cygwin', 'sunos5', 'freebsd8')))
opts.Add('toolchain', 'compiler toolchain', default_toolchain)
opts.Add(BoolOption('gles', 'EXPERIMENTAL: enable OpenGL ES support', 'no'))
opts.Add(BoolOption('llvm', 'use LLVM', default_llvm))
diff --git a/docs/GL3.txt b/docs/GL3.txt
index 9ff25a95297..0c53bc42d54 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -27,7 +27,7 @@ Non-normalized Integer texture/framebuffer formats ~50% done
1D/2D Texture arrays core Mesa, swrast done
Packed depth/stencil formats DONE
Per-buffer blend and masks (GL_EXT_draw_buffers2) DONE
-GL_EXT_texture_compression_rgtc not started
+GL_EXT_texture_compression_rgtc DONE (swrast, gallium r600)
Red and red/green texture formats DONE (swrast, i965, gallium)
Transform feedback (GL_EXT_transform_feedback) ~50% done
glBindFragDataLocation, glGetFragDataLocation,
diff --git a/docs/MESA_multithread_makecurrent.spec b/docs/MESA_multithread_makecurrent.spec
new file mode 100644
index 00000000000..5065c2fc0a3
--- /dev/null
+++ b/docs/MESA_multithread_makecurrent.spec
@@ -0,0 +1,158 @@
+Name
+
+ MESA_multithread_makecurrent
+
+Name Strings
+
+ GLX_MESA_multithread_makecurrent
+
+Contact
+
+ Eric Anholt ([email protected])
+
+Status
+
+ Not shipping.
+
+Version
+
+ Last Modified Date: 21 February 2011
+
+Number
+
+ TBD
+
+Dependencies
+
+ OpenGL 1.0 or later is required.
+ GLX 1.3 or later is required.
+
+Overview
+
+ The GLX context setup encourages multithreaded applications to
+ create a context per thread which each operate on their own
+ objects in parallel, and leaves synchronization for write access
+ to shared objects up to the application.
+
+ For some applications, maintaining per-thread contexts and
+ ensuring that the glFlush happens in one thread before another
+ thread starts working on that object is difficult. For them,
+ using the same context across multiple threads and protecting its
+ usage with a mutex is both higher performance and easier to
+ implement. This extension gives those applications that option by
+ relaxing the context binding requirements.
+
+ This new behavior matches the requirements of AGL, while providing
+ a feature not specified in WGL.
+
+IP Status
+
+ Open-source; freely implementable.
+
+Issues
+
+ None.
+
+New Procedures and Functions
+
+ None.
+
+New Tokens
+
+ None.
+
+Changes to Chapter 2 of the GLX 1.3 Specification (Functions and Errors)
+
+ Replace the following sentence from section 2.2 Rendering Contexts:
+ In addition, a rendering context can be current for only one
+ thread at a time.
+ with:
+ In addition, an indirect rendering context can be current for
+ only one thread at a time. A direct rendering context may be
+ current to multiple threads, with synchronization of access to
+ the context thruogh the GL managed by the application through
+ mutexes.
+
+Changes to Chapter 3 of the GLX 1.3 Specification (Functions and Errors)
+
+ Replace the following sentence from section 3.3.7 Rendering Contexts:
+ If ctx is current to some other thread, then
+ glXMakeContextCurrent will generate a BadAccess error.
+ with:
+ If ctx is an indirect context current to some other thread,
+ then glXMakeContextCurrent will generate a BadAccess error.
+
+ Replace the following sentence from section 3.5 Rendering Contexts:
+ If ctx is current to some other thread, then
+ glXMakeCurrent will generate a BadAccess error.
+ with:
+ If ctx is an indirect context current to some other thread,
+ then glXMakeCurrent will generate a BadAccess error.
+
+GLX Protocol
+
+ None. The GLX extension only extends to direct rendering contexts.
+
+Errors
+
+ None.
+
+New State
+
+ None.
+
+Issues
+
+ (1) What happens if the app binds a context/drawable in multiple
+ threads, then binds a different context/thread in one of them?
+
+ As with binding a new context from the current thread, the old
+ context's refcount is reduced and the new context's refcount is
+ increased.
+
+ (2) What happens if the app binds a context/drawable in multiple
+ threads, then binds None/None in one of them?
+
+ The GLX context is unreferenced from that thread, and the other
+ threads retain their GLX context binding.
+
+ (3) What happens if the app binds a context/drawable in 7 threads,
+ then destroys the context in one of them?
+
+ As with GLX context destruction previously, the XID is destroyed
+ but the context remains usable by threads that have the context
+ current.
+
+ (4) What happens if the app binds a new drawable/readable with
+ glXMakeCurrent() when it is already bound to another thread?
+
+ The context becomes bound to the new drawable/readable, and
+ further rendering in either thread will use the new
+ drawable/readable.
+
+ (5) What requirements should be placed on the user managing contexts
+ from multiple threads?
+
+ The intention is to allow multithreaded access to the GL at the
+ minimal performance cost, so requiring that the GL do general
+ synchronization (beyond that already required by context sharing)
+ is not an option, and synchronizing of GL's access to the GL
+ context between multiple threads is left to the application to do
+ across GL calls. However, it would be unfortunate for a library
+ doing multithread_makecurrent to require that other libraries
+ share in synchronization for binding of their own contexts, so the
+ refcounting of the contexts is required to be threadsafe.
+
+ (6) Does this apply to indirect contexts?
+
+ This was ignored in the initial revision of the spec. Behavior
+ for indirect contexts is left as-is.
+
+Revision History
+
+ 20 November 2009 Eric Anholt - initial specification
+ 22 November 2009 Eric Anholt - added issues from Ian Romanick.
+ 3 February 2011 Eric Anholt - updated with resolution to issues 1-3
+ 3 February 2011 Eric Anholt - added issue 4, 5
+ 21 February 2011 Eric Anholt - Include glXMakeCurrent() sentence
+ along with glXMakeContextCurrent() for removal.
diff --git a/docs/relnotes-7.11.html b/docs/relnotes-7.11.html
index 6c6622ed3f4..4b1730b17ec 100644
--- a/docs/relnotes-7.11.html
+++ b/docs/relnotes-7.11.html
@@ -38,6 +38,7 @@ tbd
<ul>
<li>GL_ARB_draw_instanced extension (gallium drivers, swrast)
<li>GL_ARB_instanced_arrays extension (gallium drivers)
+<li>GL_ARB_texture_compression_rgtc (gallium r600, swrast)
<li>GL_ARB_draw_buffers_blend (gallium)
<li>GL_EXT_texture_sRGB_decode (gallium drivers, swrast, i965)
</ul>
diff --git a/scons/gallium.py b/scons/gallium.py
index 9118257ac05..112f6c89dca 100755
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -195,6 +195,8 @@ def generate(env):
# Determine whether we are cross compiling; in particular, whether we need
# to compile code generators with a different compiler as the target code.
host_platform = _platform.system().lower()
+ if host_platform.startswith('cygwin'):
+ host_platform = 'cygwin'
host_machine = os.environ.get('PROCESSOR_ARCHITEW6432', os.environ.get('PROCESSOR_ARCHITECTURE', _platform.machine()))
host_machine = {
'x86': 'x86',
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index e685f4b73f0..1fec3adf5b1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -832,14 +832,14 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
LLVMValueRef *colors_out)
{
LLVMBuilderRef builder = bld->gallivm->builder;
- LLVMValueRef size0;
- LLVMValueRef size1;
- LLVMValueRef row_stride0_vec;
- LLVMValueRef row_stride1_vec;
- LLVMValueRef img_stride0_vec;
- LLVMValueRef img_stride1_vec;
- LLVMValueRef data_ptr0;
- LLVMValueRef data_ptr1;
+ LLVMValueRef size0 = NULL;
+ LLVMValueRef size1 = NULL;
+ LLVMValueRef row_stride0_vec = NULL;
+ LLVMValueRef row_stride1_vec = NULL;
+ LLVMValueRef img_stride0_vec = NULL;
+ LLVMValueRef img_stride1_vec = NULL;
+ LLVMValueRef data_ptr0 = NULL;
+ LLVMValueRef data_ptr1 = NULL;
LLVMValueRef colors0[4], colors1[4];
unsigned chan;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index b8d193f3f89..9d5553f0ea0 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -43,24 +43,24 @@ struct ureg_program;
*/
struct ureg_src
{
- unsigned File : 4; /* TGSI_FILE_ */
- unsigned SwizzleX : 2; /* TGSI_SWIZZLE_ */
- unsigned SwizzleY : 2; /* TGSI_SWIZZLE_ */
- unsigned SwizzleZ : 2; /* TGSI_SWIZZLE_ */
- unsigned SwizzleW : 2; /* TGSI_SWIZZLE_ */
- unsigned Indirect : 1; /* BOOL */
- unsigned DimIndirect : 1; /* BOOL */
- unsigned Dimension : 1; /* BOOL */
- unsigned Absolute : 1; /* BOOL */
- unsigned Negate : 1; /* BOOL */
- int Index : 16; /* SINT */
+ unsigned File : 4; /* TGSI_FILE_ */
+ unsigned SwizzleX : 2; /* TGSI_SWIZZLE_ */
+ unsigned SwizzleY : 2; /* TGSI_SWIZZLE_ */
+ unsigned SwizzleZ : 2; /* TGSI_SWIZZLE_ */
+ unsigned SwizzleW : 2; /* TGSI_SWIZZLE_ */
+ unsigned Indirect : 1; /* BOOL */
+ unsigned DimIndirect : 1; /* BOOL */
+ unsigned Dimension : 1; /* BOOL */
+ unsigned Absolute : 1; /* BOOL */
+ unsigned Negate : 1; /* BOOL */
unsigned IndirectFile : 4; /* TGSI_FILE_ */
- int IndirectIndex : 16; /* SINT */
unsigned IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */
- int DimensionIndex : 16; /* SINT */
unsigned DimIndFile : 4; /* TGSI_FILE_ */
- int DimIndIndex : 16; /* SINT */
unsigned DimIndSwizzle : 2; /* TGSI_SWIZZLE_ */
+ int Index : 16; /* SINT */
+ int IndirectIndex : 16; /* SINT */
+ int DimensionIndex : 16; /* SINT */
+ int DimIndIndex : 16; /* SINT */
};
/* Very similar to a tgsi_dst_register, removing unsupported fields
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index 3b6342ad8d1..4f1b0e71934 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -67,7 +67,7 @@ struct gen_mipmap_state
struct pipe_vertex_element velem[2];
void *vs;
- void *fs1d, *fs2d, *fs3d, *fsCube;
+ void *fs1d, *fs2d, *fs3d, *fsCube, *fs1da, *fs2da;
struct pipe_resource *vbuf; /**< quad vertices */
unsigned vbuf_slot;
@@ -1321,6 +1321,13 @@ util_create_gen_mipmap(struct pipe_context *pipe,
TGSI_INTERPOLATE_LINEAR);
ctx->fsCube = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_CUBE,
TGSI_INTERPOLATE_LINEAR);
+ if (pipe->screen->get_param(pipe->screen, PIPE_CAP_ARRAY_TEXTURES)) {
+ ctx->fs1da = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_1D_ARRAY,
+ TGSI_INTERPOLATE_LINEAR);
+ ctx->fs2da = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D_ARRAY,
+ TGSI_INTERPOLATE_LINEAR);
+ }
+
/* vertex data that doesn't change */
for (i = 0; i < 4; i++) {
@@ -1390,8 +1397,25 @@ set_vertex_data(struct gen_mipmap_state *ctx,
util_map_texcoords2d_onto_cubemap(layer, &st[0][0], 2,
&ctx->vertices[0][1][0], 8);
}
- else {
- /* 1D/2D/3D */
+ else if (tex_target == PIPE_TEXTURE_1D_ARRAY) {
+ /* 1D texture array */
+ ctx->vertices[0][1][0] = 0.0f; /*s*/
+ ctx->vertices[0][1][1] = r; /*t*/
+ ctx->vertices[0][1][2] = 0.0f; /*r*/
+
+ ctx->vertices[1][1][0] = 1.0f;
+ ctx->vertices[1][1][1] = r;
+ ctx->vertices[1][1][2] = 0.0f;
+
+ ctx->vertices[2][1][0] = 1.0f;
+ ctx->vertices[2][1][1] = r;
+ ctx->vertices[2][1][2] = 0.0f;
+
+ ctx->vertices[3][1][0] = 0.0f;
+ ctx->vertices[3][1][1] = r;
+ ctx->vertices[3][1][2] = 0.0f;
+ } else {
+ /* 1D/2D/3D/2D array */
ctx->vertices[0][1][0] = 0.0f; /*s*/
ctx->vertices[0][1][1] = 0.0f; /*t*/
ctx->vertices[0][1][2] = r; /*r*/
@@ -1427,6 +1451,10 @@ util_destroy_gen_mipmap(struct gen_mipmap_state *ctx)
{
struct pipe_context *pipe = ctx->pipe;
+ if (ctx->fs2da)
+ pipe->delete_fs_state(pipe, ctx->fs2da);
+ if (ctx->fs1da)
+ pipe->delete_fs_state(pipe, ctx->fs1da);
pipe->delete_fs_state(pipe, ctx->fsCube);
pipe->delete_fs_state(pipe, ctx->fs3d);
pipe->delete_fs_state(pipe, ctx->fs2d);
@@ -1499,7 +1527,11 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
fs = ctx->fsCube;
break;
case PIPE_TEXTURE_1D_ARRAY:
+ fs = ctx->fs1da;
+ break;
case PIPE_TEXTURE_2D_ARRAY:
+ fs = ctx->fs2da;
+ break;
default:
assert(0);
fs = ctx->fs2d;
@@ -1555,6 +1587,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
if (pt->target == PIPE_TEXTURE_3D)
nr_layers = u_minify(pt->depth0, dstLevel);
+ else if (pt->target == PIPE_TEXTURE_2D_ARRAY || pt->target == PIPE_TEXTURE_1D_ARRAY)
+ nr_layers = pt->array_size;
else
nr_layers = 1;
@@ -1564,11 +1598,12 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
/* in theory with geom shaders and driver with full layer support
could do that in one go. */
layer = i;
- offset = 1.0f / (float)(nr_layers * 2);
/* XXX hmm really? */
rcoord = (float)layer / (float)nr_layers + 1.0f / (float)(nr_layers * 2);
- }
- else
+ } else if (pt->target == PIPE_TEXTURE_2D_ARRAY || pt->target == PIPE_TEXTURE_1D_ARRAY) {
+ layer = i;
+ rcoord = (float)layer;
+ } else
layer = face;
memset(&surf_templ, 0, sizeof(surf_templ));
diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.c b/src/gallium/auxiliary/util/u_vbuf_mgr.c
index dec8dd717e8..3cf8ee0831d 100644
--- a/src/gallium/auxiliary/util/u_vbuf_mgr.c
+++ b/src/gallium/auxiliary/util/u_vbuf_mgr.c
@@ -531,7 +531,10 @@ static void u_vbuf_upload_buffers(struct u_vbuf_mgr_priv *mgr,
unsigned first, size;
boolean flushed;
- if (vb->stride) {
+ if (mgr->ve->ve[i].instance_divisor) {
+ first = 0;
+ size = vb->buffer->width0;
+ } else if (vb->stride) {
first = vb->stride * min_index;
size = vb->stride * count;
} else {
diff --git a/src/gallium/drivers/i915/i915_batchbuffer.h b/src/gallium/drivers/i915/i915_batchbuffer.h
index d92b2ccb31e..b4a91dabb37 100644
--- a/src/gallium/drivers/i915/i915_batchbuffer.h
+++ b/src/gallium/drivers/i915/i915_batchbuffer.h
@@ -75,6 +75,14 @@ i915_winsys_batchbuffer_write(struct i915_winsys_batchbuffer *batch,
batch->ptr += size;
}
+static INLINE boolean
+i915_winsys_validate_buffers(struct i915_winsys_batchbuffer *batch,
+ struct i915_winsys_buffer **buffers,
+ int num_of_buffers)
+{
+ return batch->iws->validate_buffers(batch, buffers, num_of_buffers);
+}
+
static INLINE int
i915_winsys_batchbuffer_reloc(struct i915_winsys_batchbuffer *batch,
struct i915_winsys_buffer *buffer,
diff --git a/src/gallium/drivers/i915/i915_blit.c b/src/gallium/drivers/i915/i915_blit.c
index 97c25665156..f885417f8ed 100644
--- a/src/gallium/drivers/i915/i915_blit.c
+++ b/src/gallium/drivers/i915/i915_blit.c
@@ -49,6 +49,11 @@ i915_fill_blit(struct i915_context *i915,
I915_DBG(DBG_BLIT, "%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
__FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h);
+ if(!i915_winsys_validate_buffers(i915->batch, &dst_buffer, 1)) {
+ FLUSH_BATCH(NULL);
+ assert(i915_winsys_validate_buffers(i915->batch, &dst_buffer, 1));
+ }
+
switch (cpp) {
case 1:
case 2:
@@ -76,6 +81,8 @@ i915_fill_blit(struct i915_context *i915,
OUT_BATCH(((y + h) << 16) | (x + w));
OUT_RELOC_FENCED(dst_buffer, I915_USAGE_2D_TARGET, dst_offset);
OUT_BATCH(color);
+
+ i915_set_flush_dirty(i915, I915_FLUSH_CACHE);
}
void
@@ -94,6 +101,7 @@ i915_copy_blit(struct i915_context *i915,
unsigned CMD, BR13;
int dst_y2 = dst_y + h;
int dst_x2 = dst_x + w;
+ struct i915_winsys_buffer *buffers[2] = {src_buffer, dst_buffer};
I915_DBG(DBG_BLIT,
@@ -102,6 +110,11 @@ i915_copy_blit(struct i915_context *i915,
src_buffer, src_pitch, src_offset, src_x, src_y,
dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
+ if(!i915_winsys_validate_buffers(i915->batch, buffers, 2)) {
+ FLUSH_BATCH(NULL);
+ assert(i915_winsys_validate_buffers(i915->batch, buffers, 2));
+ }
+
switch (cpp) {
case 1:
case 2:
@@ -142,4 +155,6 @@ i915_copy_blit(struct i915_context *i915,
OUT_BATCH((src_y << 16) | src_x);
OUT_BATCH(((int) src_pitch & 0xffff));
OUT_RELOC_FENCED(src_buffer, I915_USAGE_2D_SOURCE, src_offset);
+
+ i915_set_flush_dirty(i915, I915_FLUSH_CACHE);
}
diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c
index 707b2e9f956..cbf919754e5 100644
--- a/src/gallium/drivers/i915/i915_context.c
+++ b/src/gallium/drivers/i915/i915_context.c
@@ -73,10 +73,13 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
draw_set_mapped_index_buffer(draw, mapped_indices);
if (cbuf_dirty) {
- draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0,
- i915_buffer(i915->constants[PIPE_SHADER_VERTEX])->data,
- (i915->current.num_user_constants[PIPE_SHADER_VERTEX] *
+ if (i915->constants[PIPE_SHADER_VERTEX])
+ draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0,
+ i915_buffer(i915->constants[PIPE_SHADER_VERTEX])->data,
+ (i915->current.num_user_constants[PIPE_SHADER_VERTEX] *
4 * sizeof(float)));
+ else
+ draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, NULL, 0);
}
/*
@@ -165,6 +168,7 @@ i915_create_context(struct pipe_screen *screen, void *priv)
i915->hardware_dirty = ~0;
i915->immediate_dirty = ~0;
i915->dynamic_dirty = ~0;
+ i915->flush_dirty = 0;
/* Batch stream debugging is a bit hacked up at the moment:
*/
diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h
index 2cf53424f06..1da637d068e 100644
--- a/src/gallium/drivers/i915/i915_context.h
+++ b/src/gallium/drivers/i915/i915_context.h
@@ -150,6 +150,15 @@ struct i915_state
/** Describes the current hardware vertex layout */
struct vertex_info vertex_info;
+ /* static state (dst/depth buffer state) */
+ struct i915_winsys_buffer *cbuf_bo;
+ unsigned cbuf_flags;
+ struct i915_winsys_buffer *depth_bo;
+ unsigned depth_flags;
+ unsigned dst_buf_vars;
+ uint32_t draw_offset;
+ uint32_t draw_size;
+
unsigned id; /* track lost context events */
};
@@ -237,6 +246,10 @@ struct i915_context {
unsigned hardware_dirty;
unsigned immediate_dirty;
unsigned dynamic_dirty;
+ unsigned flush_dirty;
+
+ struct i915_winsys_buffer *validation_buffers[2 + 1 + I915_TEX_UNITS];
+ int num_validation_buffers;
struct util_slab_mempool transfer_pool;
};
@@ -277,6 +290,18 @@ struct i915_context {
#define I915_HW_CONSTANTS (1<<I915_CACHE_CONSTANTS)
#define I915_HW_IMMEDIATE (1<<(I915_MAX_CACHE+0))
#define I915_HW_INVARIANT (1<<(I915_MAX_CACHE+1))
+#define I915_HW_FLUSH (1<<(I915_MAX_CACHE+1))
+
+/* hw flush handling */
+#define I915_FLUSH_CACHE 1
+#define I915_PIPELINE_FLUSH 2
+
+static INLINE
+void i915_set_flush_dirty(struct i915_context *i915, unsigned flush)
+{
+ i915->hardware_dirty |= I915_HW_FLUSH;
+ i915->flush_dirty |= flush;
+}
/***********************************************************************
diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c
index 911c051d1f2..22a2c7b2cb4 100644
--- a/src/gallium/drivers/i915/i915_flush.c
+++ b/src/gallium/drivers/i915/i915_flush.c
@@ -96,4 +96,6 @@ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence)
i915->hardware_dirty = ~0;
i915->immediate_dirty = ~0;
i915->dynamic_dirty = ~0;
+ /* kernel emits flushes in between batchbuffers */
+ i915->flush_dirty = 0;
}
diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c
index 509d487b498..0323ad940f9 100644
--- a/src/gallium/drivers/i915/i915_state_emit.c
+++ b/src/gallium/drivers/i915/i915_state_emit.c
@@ -36,73 +36,105 @@
#include "pipe/p_defines.h"
#include "util/u_math.h"
+#include "util/u_memory.h"
-static unsigned translate_format( enum pipe_format format )
+struct i915_tracked_hw_state {
+ const char *name;
+ void (*validate)(struct i915_context *);
+ void (*emit)(struct i915_context *);
+ unsigned dirty, batch_space;
+};
+
+
+static void
+emit_flush(struct i915_context *i915)
{
- switch (format) {
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- return COLOR_BUF_ARGB8888;
- case PIPE_FORMAT_B5G6R5_UNORM:
- return COLOR_BUF_RGB565;
- default:
- assert(0);
- return 0;
- }
+ /* Cache handling is very cheap atm. State handling can request to flushes:
+ * - I915_FLUSH_CACHE which is a flush everything request and
+ * - I915_PIPELINE_FLUSH which is specifically for the draw_offset flush.
+ * Because the cache handling is so dumb, no explicit "invalidate map cache".
+ * Also, the first is a strict superset of the latter, so the following logic
+ * works. */
+ if (i915->flush_dirty & I915_FLUSH_CACHE)
+ OUT_BATCH(MI_FLUSH | FLUSH_MAP_CACHE);
+ else if (i915->flush_dirty & I915_PIPELINE_FLUSH)
+ OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE);
+}
+
+static void
+validate_immediate(struct i915_context *i915)
+{
+ if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0))
+ i915->validation_buffers[i915->num_validation_buffers++] = i915->vbo;
+}
+
+static void
+validate_static(struct i915_context *i915)
+{
+ if (i915->current.cbuf_bo)
+ i915->validation_buffers[i915->num_validation_buffers++]
+ = i915->current.cbuf_bo;
+
+ if (i915->current.depth_bo)
+ i915->validation_buffers[i915->num_validation_buffers++]
+ = i915->current.depth_bo;
}
-static unsigned translate_depth_format( enum pipe_format zformat )
+static void
+validate_map(struct i915_context *i915)
{
- switch (zformat) {
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- return DEPTH_FRMT_24_FIXED_8_OTHER;
- case PIPE_FORMAT_Z16_UNORM:
- return DEPTH_FRMT_16_FIXED;
- default:
- assert(0);
- return 0;
+ const uint enabled = i915->current.sampler_enable_flags;
+ uint unit;
+ struct i915_texture *tex;
+
+
+ for (unit = 0; unit < I915_TEX_UNITS; unit++) {
+ if (enabled & (1 << unit)) {
+ tex = i915_texture(i915->fragment_sampler_views[unit]->texture);
+ i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer;
+ }
}
}
+const static struct i915_tracked_hw_state hw_atoms[] = {
+ { "flush", NULL, emit_flush, I915_HW_FLUSH, 1 },
+ { "immediate", validate_immediate, NULL, I915_HW_IMMEDIATE },
+ { "static", validate_static, NULL, I915_HW_STATIC },
+ { "map", validate_map, NULL, I915_HW_MAP }
+};
-/**
- * Examine framebuffer state to determine width, height.
- */
static boolean
-framebuffer_size(const struct pipe_framebuffer_state *fb,
- uint *width, uint *height)
+i915_validate_state(struct i915_context *i915, unsigned *batch_space)
{
- if (fb->cbufs[0]) {
- *width = fb->cbufs[0]->width;
- *height = fb->cbufs[0]->height;
- return TRUE;
- }
- else if (fb->zsbuf) {
- *width = fb->zsbuf->width;
- *height = fb->zsbuf->height;
+ int i;
+
+ i915->num_validation_buffers = 0;
+ *batch_space = 0;
+
+ for (i = 0; i < Elements(hw_atoms); i++)
+ if ((i915->hardware_dirty & hw_atoms[i].dirty) && hw_atoms[i].validate) {
+ hw_atoms[i].validate(i915);
+ *batch_space += hw_atoms[i].batch_space;
+ }
+
+ if (i915->num_validation_buffers == 0)
return TRUE;
- }
- else {
- *width = *height = 0;
+
+ if (!i915_winsys_validate_buffers(i915->batch, i915->validation_buffers,
+ i915->num_validation_buffers))
return FALSE;
- }
+
+ return TRUE;
}
-static inline uint32_t
-buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling)
+static void
+emit_state(struct i915_context *i915)
{
- uint32_t tiling_bits = 0;
-
- switch (tiling) {
- case I915_TILE_Y:
- tiling_bits |= BUF_3D_TILE_WALK_Y;
- case I915_TILE_X:
- tiling_bits |= BUF_3D_TILED_SURFACE;
- case I915_TILE_NONE:
- break;
- }
+ int i;
- return tiling_bits;
+ for (i = 0; i < Elements(hw_atoms); i++)
+ if ((i915->hardware_dirty & hw_atoms[i].dirty) && hw_atoms[i].emit)
+ hw_atoms[i].emit(i915);
}
/* Push the state into the sarea and/or texture memory.
@@ -110,6 +142,7 @@ buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling)
void
i915_emit_hardware_state(struct i915_context *i915 )
{
+ unsigned batch_space;
/* XXX: there must be an easier way */
const unsigned dwords = ( 14 +
7 +
@@ -135,14 +168,21 @@ i915_emit_hardware_state(struct i915_context *i915 )
if (I915_DBG_ON(DBG_ATOMS))
i915_dump_hardware_dirty(i915, __FUNCTION__);
- if(!BEGIN_BATCH(dwords, relocs)) {
+ if (!i915_validate_state(i915, &batch_space)) {
+ FLUSH_BATCH(NULL);
+ assert(i915_validate_state(i915, &batch_space));
+ }
+
+ if(!BEGIN_BATCH(batch_space + dwords, relocs)) {
FLUSH_BATCH(NULL);
- assert(BEGIN_BATCH(dwords, relocs));
+ assert(i915_validate_state(i915, &batch_space));
+ assert(BEGIN_BATCH(batch_space + dwords, relocs));
}
save_ptr = (uintptr_t)i915->batch->ptr;
save_relocs = i915->batch->relocs;
+ emit_state(i915);
/* 14 dwords, 0 relocs */
if (i915->hardware_dirty & I915_HW_INVARIANT)
{
@@ -223,7 +263,7 @@ i915_emit_hardware_state(struct i915_context *i915 )
{
int i;
for (i = 0; i < I915_MAX_DYNAMIC; i++) {
- if (i915->dynamic_dirty & (1 << i));
+ if (i915->dynamic_dirty & (1 << i))
OUT_BATCH(i915->current.dynamic[i]);
}
}
@@ -233,64 +273,27 @@ i915_emit_hardware_state(struct i915_context *i915 )
/* 8 dwords, 2 relocs */
if (i915->hardware_dirty & I915_HW_STATIC)
{
- struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0];
- struct pipe_surface *depth_surface = i915->framebuffer.zsbuf;
-
- if (cbuf_surface) {
- struct i915_texture *tex = i915_texture(cbuf_surface->texture);
- assert(tex);
-
+ if (i915->current.cbuf_bo) {
OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
-
- OUT_BATCH(BUF_3D_ID_COLOR_BACK |
- BUF_3D_PITCH(tex->stride) | /* pitch in bytes */
- buf_3d_tiling_bits(tex->tiling));
-
- OUT_RELOC(tex->buffer,
+ OUT_BATCH(i915->current.cbuf_flags);
+ OUT_RELOC(i915->current.cbuf_bo,
I915_USAGE_RENDER,
0);
}
/* What happens if no zbuf??
*/
- if (depth_surface) {
- struct i915_texture *tex = i915_texture(depth_surface->texture);
- unsigned offset = i915_texture_offset(tex, depth_surface->u.tex.level,
- depth_surface->u.tex.first_layer);
- assert(tex);
- assert(offset == 0);
-
+ if (i915->current.depth_bo) {
OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
-
- assert(tex);
- OUT_BATCH(BUF_3D_ID_DEPTH |
- BUF_3D_PITCH(tex->stride) | /* pitch in bytes */
- buf_3d_tiling_bits(tex->tiling));
-
- OUT_RELOC(tex->buffer,
+ OUT_BATCH(i915->current.depth_flags);
+ OUT_RELOC(i915->current.depth_bo,
I915_USAGE_RENDER,
0);
}
{
- unsigned cformat, zformat = 0;
-
- if (cbuf_surface)
- cformat = cbuf_surface->format;
- else
- cformat = PIPE_FORMAT_B8G8R8A8_UNORM; /* arbitrary */
- cformat = translate_format(cformat);
-
- if (depth_surface)
- zformat = translate_depth_format( i915->framebuffer.zsbuf->format );
-
OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
- OUT_BATCH(DSTORG_HORT_BIAS(0x8) | /* .5 */
- DSTORG_VERT_BIAS(0x8) | /* .5 */
- LOD_PRECLAMP_OGL |
- TEX_DEFAULT_COLOR_OGL |
- cformat |
- zformat );
+ OUT_BATCH(i915->current.dst_buf_vars);
}
}
#endif
@@ -362,7 +365,7 @@ i915_emit_hardware_state(struct i915_context *i915 )
uint i;
OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) );
- OUT_BATCH( (1 << (nr - 1)) | ((1 << (nr - 1)) - 1) );
+ OUT_BATCH((1 << nr) - 1);
for (i = 0; i < nr; i++) {
const uint *c;
@@ -411,31 +414,13 @@ i915_emit_hardware_state(struct i915_context *i915 )
/* 6 dwords, 0 relocs */
if (i915->hardware_dirty & I915_HW_STATIC)
{
- uint w, h;
- struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0];
- struct i915_texture *tex = i915_texture(cbuf_surface->texture);
- unsigned x, y;
- int layer;
- uint32_t draw_offset;
- boolean ret;
-
- ret = framebuffer_size(&i915->framebuffer, &w, &h);
- assert(ret);
-
- layer = cbuf_surface->u.tex.first_layer;
-
- x = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksx;
- y = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksy;
-
- draw_offset = x | (y << 16);
-
/* XXX flush only required when the draw_offset changes! */
OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE);
OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS);
- OUT_BATCH(draw_offset);
- OUT_BATCH((w - 1 + x) | ((h - 1 + y) << 16));
- OUT_BATCH(draw_offset);
+ OUT_BATCH(i915->current.draw_offset);
+ OUT_BATCH(i915->current.draw_size);
+ OUT_BATCH(i915->current.draw_offset);
}
#endif
diff --git a/src/gallium/drivers/i915/i915_state_static.c b/src/gallium/drivers/i915/i915_state_static.c
index dc9a4c1e2fd..97044499990 100644
--- a/src/gallium/drivers/i915/i915_state_static.c
+++ b/src/gallium/drivers/i915/i915_state_static.c
@@ -27,17 +27,151 @@
#include "i915_reg.h"
#include "i915_context.h"
#include "i915_state.h"
+#include "i915_resource.h"
/***********************************************************************
* Update framebuffer state
*/
+static unsigned translate_format(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ return COLOR_BUF_ARGB8888;
+ case PIPE_FORMAT_B5G6R5_UNORM:
+ return COLOR_BUF_RGB565;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static unsigned translate_depth_format(enum pipe_format zformat)
+{
+ switch (zformat) {
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+ return DEPTH_FRMT_24_FIXED_8_OTHER;
+ case PIPE_FORMAT_Z16_UNORM:
+ return DEPTH_FRMT_16_FIXED;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static inline uint32_t
+buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling)
+{
+ uint32_t tiling_bits = 0;
+
+ switch (tiling) {
+ case I915_TILE_Y:
+ tiling_bits |= BUF_3D_TILE_WALK_Y;
+ case I915_TILE_X:
+ tiling_bits |= BUF_3D_TILED_SURFACE;
+ case I915_TILE_NONE:
+ break;
+ }
+
+ return tiling_bits;
+}
+
+/**
+ * Examine framebuffer state to determine width, height.
+ */
+static boolean
+framebuffer_size(const struct pipe_framebuffer_state *fb,
+ uint *width, uint *height)
+{
+ if (fb->cbufs[0]) {
+ *width = fb->cbufs[0]->width;
+ *height = fb->cbufs[0]->height;
+ return TRUE;
+ }
+ else if (fb->zsbuf) {
+ *width = fb->zsbuf->width;
+ *height = fb->zsbuf->height;
+ return TRUE;
+ }
+ else {
+ *width = *height = 0;
+ return FALSE;
+ }
+}
+
static void update_framebuffer(struct i915_context *i915)
{
- /* HW emit currently references framebuffer state directly:
+ struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0];
+ struct pipe_surface *depth_surface = i915->framebuffer.zsbuf;
+ unsigned cformat, zformat;
+ unsigned x, y, w, h;
+ int layer;
+ uint32_t draw_offset;
+ boolean ret;
+
+ if (cbuf_surface) {
+ struct i915_texture *tex = i915_texture(cbuf_surface->texture);
+ assert(tex);
+
+ i915->current.cbuf_bo = tex->buffer;
+ i915->current.cbuf_flags = BUF_3D_ID_COLOR_BACK |
+ BUF_3D_PITCH(tex->stride) | /* pitch in bytes */
+ buf_3d_tiling_bits(tex->tiling);
+ cformat = cbuf_surface->format;
+
+ layer = cbuf_surface->u.tex.first_layer;
+
+ x = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksx;
+ y = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksy;
+ } else {
+ i915->current.cbuf_bo = NULL;
+ cformat = PIPE_FORMAT_B8G8R8A8_UNORM; /* arbitrary */
+ x = y = 0;
+ }
+ cformat = translate_format(cformat);
+
+ /* What happens if no zbuf??
*/
+ if (depth_surface) {
+ struct i915_texture *tex = i915_texture(depth_surface->texture);
+ unsigned offset = i915_texture_offset(tex, depth_surface->u.tex.level,
+ depth_surface->u.tex.first_layer);
+ assert(tex);
+ assert(offset == 0);
+
+ i915->current.depth_bo = tex->buffer;
+ i915->current.depth_flags = BUF_3D_ID_DEPTH |
+ BUF_3D_PITCH(tex->stride) | /* pitch in bytes */
+ buf_3d_tiling_bits(tex->tiling);
+ zformat = translate_depth_format(depth_surface->format);
+ } else {
+ i915->current.depth_bo = NULL;
+ zformat = 0;
+ }
+
+ i915->current.dst_buf_vars = DSTORG_HORT_BIAS(0x8) | /* .5 */
+ DSTORG_VERT_BIAS(0x8) | /* .5 */
+ LOD_PRECLAMP_OGL |
+ TEX_DEFAULT_COLOR_OGL |
+ cformat |
+ zformat;
+
+ /* drawing rect calculations */
+ draw_offset = x | (y << 16);
+ ret = framebuffer_size(&i915->framebuffer, &w, &h);
+ assert(ret);
+ if (i915->current.draw_offset != draw_offset) {
+ i915->current.draw_offset = draw_offset;
+ /* XXX: only emit flush on change and not always in emit */
+ }
+ i915->current.draw_size = (w - 1 + x) | ((h - 1 + y) << 16);
+
i915->hardware_dirty |= I915_HW_STATIC;
+
+ /* flush the cache in case we sample from the old renderbuffers */
+ i915_set_flush_dirty(i915, I915_FLUSH_CACHE);
}
struct i915_tracked_state i915_hw_framebuffer = {
diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h
index e915a886c9b..4ac2f5b9777 100644
--- a/src/gallium/drivers/i915/i915_winsys.h
+++ b/src/gallium/drivers/i915/i915_winsys.h
@@ -95,6 +95,18 @@ struct i915_winsys {
(*batchbuffer_create)(struct i915_winsys *iws);
/**
+ * Validate buffers for usage in this batchbuffer.
+ * Does space-checking and asorted other book-keeping.
+ *
+ * @batch
+ * @buffers array to buffers to validate
+ * @num_of_buffers size of the passed array
+ */
+ boolean (*validate_buffers)(struct i915_winsys_batchbuffer *batch,
+ struct i915_winsys_buffer **buffers,
+ int num_of_buffers);
+
+ /**
* Emit a relocation to a buffer.
* Target position in batchbuffer is the same as ptr.
*
diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c
index 3a55e76bc35..a21a3c74484 100644
--- a/src/gallium/drivers/llvmpipe/lp_fence.c
+++ b/src/gallium/drivers/llvmpipe/lp_fence.c
@@ -47,6 +47,9 @@ lp_fence_create(unsigned rank)
static int fence_id;
struct lp_fence *fence = CALLOC_STRUCT(lp_fence);
+ if (!fence)
+ return NULL;
+
pipe_reference_init(&fence->reference, 1);
pipe_mutex_init(fence->mutex);
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 21e8012d46a..2c32aa93cdf 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -278,6 +278,11 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
return util_format_s3tc_enabled;
}
+ /* u_format doesn't support RGTC yet */
+ if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
+ return FALSE;
+ }
+
/*
* Everything else should be supported by u_format.
*/
diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c
index 1968d0feb35..990acea9f44 100644
--- a/src/gallium/drivers/r300/r300_chipset.c
+++ b/src/gallium/drivers/r300/r300_chipset.c
@@ -255,8 +255,6 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_RS690;
caps->has_tcl = FALSE;
caps->is_r400 = TRUE;
- caps->hiz_ram = R300_HIZ_LIMIT;
- caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x793F:
@@ -265,8 +263,6 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_RS600;
caps->has_tcl = FALSE;
caps->is_r400 = TRUE;
- caps->hiz_ram = R300_HIZ_LIMIT;
- caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x796C:
@@ -276,8 +272,6 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->family = CHIP_FAMILY_RS740;
caps->has_tcl = FALSE;
caps->is_r400 = TRUE;
- caps->hiz_ram = R300_HIZ_LIMIT;
- caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
case 0x7100:
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 9f85bd4ce5f..d422ffe03f8 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -203,7 +203,7 @@ static boolean r300_setup_atoms(struct r300_context* r300)
/* SC. */
R300_INIT_ATOM(scissor_state, 3);
/* GB, FG, GA, SU, SC, RB3D. */
- R300_INIT_ATOM(invariant_state, 16 + (is_rv350 ? 4 : 0));
+ R300_INIT_ATOM(invariant_state, 18 + (is_rv350 ? 4 : 0));
/* VAP. */
R300_INIT_ATOM(viewport_state, 9);
R300_INIT_ATOM(pvs_flush, 2);
@@ -353,6 +353,7 @@ static void r300_init_states(struct pipe_context *pipe)
OUT_CB_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF);
OUT_CB_REG(R300_SU_DEPTH_OFFSET, 0);
OUT_CB_REG(R300_SC_EDGERULE, 0x2DA49525);
+ OUT_CB_REG(R300_SC_SCREENDOOR, 0xffffff);
if (r300->screen->caps.is_rv350) {
OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101);
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 30073759476..e9c7d7bf63f 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -220,7 +220,7 @@ struct r300_vertex_stream_state {
};
struct r300_invariant_state {
- uint32_t cb[20];
+ uint32_t cb[22];
};
struct r300_vap_invariant_state {
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 354144cac79..b97c45ac198 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -214,11 +214,18 @@ uint32_t r300_translate_texformat(enum pipe_format format,
/* RGTC formats. */
if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
switch (format) {
- case PIPE_FORMAT_RGTC1_UNORM:
case PIPE_FORMAT_RGTC1_SNORM:
+ result |= sign_bit[0];
+ case PIPE_FORMAT_RGTC1_UNORM:
+ result &= ~(0xfff << 9); /* mask off swizzle */
+ result |= R300_TX_FORMAT_Y << R300_TX_FORMAT_R_SHIFT;
return R500_TX_FORMAT_ATI1N | result;
- case PIPE_FORMAT_RGTC2_UNORM:
case PIPE_FORMAT_RGTC2_SNORM:
+ result |= sign_bit[0] | sign_bit[1];
+ case PIPE_FORMAT_RGTC2_UNORM:
+ result &= ~(0xfff << 9); /* mask off swizzle */
+ result |= R300_TX_FORMAT_Y << R300_TX_FORMAT_R_SHIFT |
+ R300_TX_FORMAT_X << R300_TX_FORMAT_G_SHIFT;
return R400_TX_FORMAT_ATI2N | result;
default:
return ~0; /* Unsupported/unknown. */
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index 4f86e3b4c38..8cb417f9731 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -98,31 +98,9 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
return 0;
}
-void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
+void eg_cf_vtx(struct r600_vertex_element *ve)
{
- struct r600_pipe_state *rstate;
- unsigned i = 0;
-
- if (count > 8) {
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(8 - 1);
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(count - 8 - 1);
- } else {
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(count - 1);
- }
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
- S_SQ_CF_WORD1_BARRIER(1);
-
- rstate = &ve->rstate;
+ struct r600_pipe_state *rstate = &ve->rstate;
rstate->id = R600_PIPE_STATE_FETCH_SHADER;
rstate->nregs = 0;
r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS,
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 64c52bca795..1b76f0098dd 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -118,10 +118,10 @@ unsigned r600_get_clock_crystal_freq(struct radeon *radeon);
/* r600_bo.c */
struct r600_bo;
struct r600_bo *r600_bo(struct radeon *radeon,
- unsigned size, unsigned alignment,
- unsigned binding, unsigned usage);
+ unsigned size, unsigned alignment,
+ unsigned binding, unsigned usage);
struct r600_bo *r600_bo_handle(struct radeon *radeon,
- unsigned handle, unsigned *array_mode);
+ unsigned handle, unsigned *array_mode);
void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx);
void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo);
void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst,
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 1393df88757..8006e9b9a58 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -86,6 +86,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r
case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED:
case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT:
case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
return 1;
@@ -135,6 +136,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r
case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT:
case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
return 1;
@@ -1441,7 +1443,8 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign
S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) |
S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) |
S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr);
- bc->bytecode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1);
+ bc->bytecode[id++] = S_SQ_VTX_WORD2_OFFSET(vtx->offset) |
+ S_SQ_VTX_WORD2_MEGA_FETCH(1);
bc->bytecode[id++] = 0;
return 0;
}
@@ -2778,12 +2781,13 @@ void r600_bc_dump(struct r600_bc *bc)
fprintf(stderr, "SEL_Z:%d ", vtx->dst_sel_z);
fprintf(stderr, "SEL_W:%d) ", vtx->dst_sel_w);
fprintf(stderr, "USE_CONST_FIELDS:%d ", vtx->use_const_fields);
- fprintf(stderr, "DATA_FORMAT:%d ", vtx->data_format);
- fprintf(stderr, "NUM_FORMAT_ALL:%d ", vtx->num_format_all);
- fprintf(stderr, "FORMAT_COMP_ALL:%d ", vtx->format_comp_all);
- fprintf(stderr, "SRF_MODE_ALL:%d\n", vtx->srf_mode_all);
+ fprintf(stderr, "FORMAT(DATA:%d ", vtx->data_format);
+ fprintf(stderr, "NUM:%d ", vtx->num_format_all);
+ fprintf(stderr, "COMP:%d ", vtx->format_comp_all);
+ fprintf(stderr, "MODE:%d)\n", vtx->srf_mode_all);
id++;
- fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]);
+ fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
+ fprintf(stderr, "OFFSET:%d\n", vtx->offset);
//TODO
id++;
fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]);
@@ -2794,29 +2798,9 @@ void r600_bc_dump(struct r600_bc *bc)
fprintf(stderr, "--------------------------------------\n");
}
-static void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
+static void r600_cf_vtx(struct r600_vertex_element *ve)
{
struct r600_pipe_state *rstate;
- unsigned i = 0;
-
- if (count > 8) {
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(8 - 1);
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(count - 8 - 1);
- } else {
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(count - 1);
- }
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
- S_SQ_CF_WORD1_BARRIER(1);
rstate = &ve->rstate;
rstate->id = R600_PIPE_STATE_FETCH_SHADER;
@@ -2962,37 +2946,19 @@ out_unknown:
int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve)
{
- unsigned ndw, i;
- u32 *bytecode;
- unsigned fetch_resource_start = 0, format, num_format, format_comp;
+ static int dump_shaders = -1;
+
+ struct r600_bc bc;
+ struct r600_bc_vtx vtx;
struct pipe_vertex_element *elements = ve->elements;
const struct util_format_description *desc;
-
- /* 2 dwords for cf aligned to 4 + 4 dwords per input */
- ndw = 8 + ve->count * 4;
- ve->fs_size = ndw * 4;
-
- /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
- ve->fetch_shader = r600_bo(rctx->radeon, ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0);
- if (ve->fetch_shader == NULL) {
- return -ENOMEM;
- }
-
- bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL);
- if (bytecode == NULL) {
- r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
- return -ENOMEM;
- }
-
- if (rctx->family >= CHIP_CEDAR) {
- eg_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4);
- } else {
- r600_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4);
- fetch_resource_start = 160;
- }
+ unsigned fetch_resource_start = rctx->family >= CHIP_CEDAR ? 0 : 160;
+ unsigned format, num_format, format_comp;
+ u32 *bytecode;
+ int i, r;
/* vertex elements offset need special handling, if offset is bigger
- * than what we can put in fetch instruction then we need to alterate
+ + * than what we can put in fetch instruction then we need to alterate
* the vertex resource offset. In such case in order to simplify code
* we will bound one resource per elements. It's a worst case scenario.
*/
@@ -3003,40 +2969,155 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
}
}
+ memset(&bc, 0, sizeof(bc));
+ r = r600_bc_init(&bc, r600_get_family(rctx->radeon));
+ if (r)
+ return r;
+
+ for (i = 0; i < ve->count; i++) {
+ if (elements[i].instance_divisor > 1) {
+ struct r600_bc_alu alu;
+
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
+ alu.src[0].sel = 0;
+ alu.src[0].chan = 3;
+
+ alu.dst.sel = i + 1;
+ alu.dst.chan = 3;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(&bc, &alu))) {
+ r600_bc_clear(&bc);
+ return r;
+ }
+
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
+ alu.src[0].sel = i + 1;
+ alu.src[0].chan = 3;
+
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = fui(1.0f / (float)elements[i].instance_divisor);
+
+ alu.dst.sel = i + 1;
+ alu.dst.chan = 3;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(&bc, &alu))) {
+ r600_bc_clear(&bc);
+ return r;
+ }
+
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC);
+ alu.src[0].sel = i + 1;
+ alu.src[0].chan = 3;
+
+ alu.dst.sel = i + 1;
+ alu.dst.chan = 3;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(&bc, &alu))) {
+ r600_bc_clear(&bc);
+ return r;
+ }
+
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT);
+ alu.src[0].sel = i + 1;
+ alu.src[0].chan = 3;
+
+ alu.dst.sel = i + 1;
+ alu.dst.chan = 3;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(&bc, &alu))) {
+ r600_bc_clear(&bc);
+ return r;
+ }
+ }
+ }
+
for (i = 0; i < ve->count; i++) {
unsigned vbuffer_index;
r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp);
desc = util_format_description(ve->elements[i].src_format);
if (desc == NULL) {
+ r600_bc_clear(&bc);
R600_ERR("unknown format %d\n", ve->elements[i].src_format);
- r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
return -EINVAL;
}
/* see above for vbuffer_need_offset explanation */
vbuffer_index = elements[i].vertex_buffer_index;
- if (ve->vbuffer_need_offset) {
- bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(i + fetch_resource_start);
- } else {
- bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(vbuffer_index + fetch_resource_start);
- }
- bytecode[8 + i * 4 + 0] |= S_SQ_VTX_WORD0_SRC_GPR(0) |
- S_SQ_VTX_WORD0_SRC_SEL_X(0) |
- S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F);
- bytecode[8 + i * 4 + 1] = S_SQ_VTX_WORD1_DST_SEL_X(desc->swizzle[0]) |
- S_SQ_VTX_WORD1_DST_SEL_Y(desc->swizzle[1]) |
- S_SQ_VTX_WORD1_DST_SEL_Z(desc->swizzle[2]) |
- S_SQ_VTX_WORD1_DST_SEL_W(desc->swizzle[3]) |
- S_SQ_VTX_WORD1_USE_CONST_FIELDS(0) |
- S_SQ_VTX_WORD1_DATA_FORMAT(format) |
- S_SQ_VTX_WORD1_NUM_FORMAT_ALL(num_format) |
- S_SQ_VTX_WORD1_FORMAT_COMP_ALL(format_comp) |
- S_SQ_VTX_WORD1_SRF_MODE_ALL(1) |
- S_SQ_VTX_WORD1_GPR_DST_GPR(i + 1);
- bytecode[8 + i * 4 + 2] = S_SQ_VTX_WORD2_OFFSET(elements[i].src_offset) |
- S_SQ_VTX_WORD2_MEGA_FETCH(1);
- bytecode[8 + i * 4 + 3] = 0;
+ memset(&vtx, 0, sizeof(vtx));
+ vtx.buffer_id = (ve->vbuffer_need_offset ? i : vbuffer_index) + fetch_resource_start;
+ vtx.fetch_type = elements[i].instance_divisor ? 1 : 0;
+ vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0;
+ vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0;
+ vtx.mega_fetch_count = 16;
+ vtx.dst_gpr = i + 1;
+ vtx.dst_sel_x = desc->swizzle[0];
+ vtx.dst_sel_y = desc->swizzle[1];
+ vtx.dst_sel_z = desc->swizzle[2];
+ vtx.dst_sel_w = desc->swizzle[3];
+ vtx.data_format = format;
+ vtx.num_format_all = num_format;
+ vtx.format_comp_all = format_comp;
+ vtx.srf_mode_all = 1;
+ vtx.offset = elements[i].src_offset;
+
+ if ((r = r600_bc_add_vtx(&bc, &vtx))) {
+ r600_bc_clear(&bc);
+ return r;
+ }
+ }
+
+ r600_bc_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
+
+ /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
+ ve->fetch_shader = r600_bo(rctx->radeon, bc.ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0);
+ if (ve->fetch_shader == NULL) {
+ r600_bc_clear(&bc);
+ return -ENOMEM;
+ }
+
+ ve->fs_size = bc.ndw*4;
+ if ((r = r600_bc_build(&bc))) {
+ r600_bc_clear(&bc);
+ return r;
}
+
+ if (dump_shaders == -1)
+ dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
+
+ if (dump_shaders) {
+ fprintf(stderr, "--------------------------------------------------------------\n");
+ r600_bc_dump(&bc);
+ fprintf(stderr, "______________________________________________________________\n");
+ }
+
+ bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL);
+ if (bytecode == NULL) {
+ r600_bc_clear(&bc);
+ r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
+ return -ENOMEM;
+ }
+
+ memcpy(bytecode, bc.bytecode, ve->fs_size);
+
r600_bo_unmap(rctx->radeon, ve->fetch_shader);
+ r600_bc_clear(&bc);
+
+ if (rctx->family >= CHIP_CEDAR)
+ eg_cf_vtx(ve);
+ else
+ r600_cf_vtx(ve);
+
return 0;
}
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 453c29790c1..dbd1e204b49 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -103,6 +103,7 @@ struct r600_bc_vtx {
unsigned num_format_all;
unsigned format_comp_all;
unsigned srf_mode_all;
+ unsigned offset;
};
struct r600_bc_output {
@@ -187,7 +188,7 @@ struct r600_bc {
/* eg_asm.c */
int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf);
-void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
+void eg_cf_vtx(struct r600_vertex_element *ve);
/* r600_asm.c */
int r600_bc_init(struct r600_bc *bc, enum radeon_family family);
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 9865ea17ae5..04408a5cc8e 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -225,7 +225,7 @@ struct texture_orig_info {
unsigned height0;
};
-static void r600_s3tc_to_blittable(struct pipe_resource *tex,
+static void r600_compressed_to_blittable(struct pipe_resource *tex,
unsigned level,
struct texture_orig_info *orig)
{
@@ -253,7 +253,7 @@ static void r600_s3tc_to_blittable(struct pipe_resource *tex,
}
-static void r600_reset_blittable_to_s3tc(struct pipe_resource *tex,
+static void r600_reset_blittable_to_compressed(struct pipe_resource *tex,
unsigned level,
struct texture_orig_info *orig)
{
@@ -282,13 +282,13 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
restore_orig[0] = restore_orig[1] = FALSE;
- if (util_format_is_s3tc(src->format)) {
- r600_s3tc_to_blittable(src, src_level, &orig_info[0]);
+ if (util_format_is_compressed(src->format)) {
+ r600_compressed_to_blittable(src, src_level, &orig_info[0]);
restore_orig[0] = TRUE;
}
- if (util_format_is_s3tc(dst->format)) {
- r600_s3tc_to_blittable(dst, dst_level, &orig_info[1]);
+ if (util_format_is_compressed(dst->format)) {
+ r600_compressed_to_blittable(dst, dst_level, &orig_info[1]);
restore_orig[1] = TRUE;
/* translate the dst box as well */
dstx = util_format_get_nblocksx(orig_info[1].format, dstx);
@@ -299,10 +299,10 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
src, src_level, src_box);
if (restore_orig[0])
- r600_reset_blittable_to_s3tc(src, src_level, &orig_info[0]);
+ r600_reset_blittable_to_compressed(src, src_level, &orig_info[0]);
if (restore_orig[1])
- r600_reset_blittable_to_s3tc(dst, dst_level, &orig_info[1]);
+ r600_reset_blittable_to_compressed(dst, dst_level, &orig_info[1]);
}
void r600_init_blit_functions(struct r600_pipe_context *rctx)
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index 0c5d7133c7a..2363cd1ebc5 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -132,13 +132,13 @@ static void r600_transfer_destroy(struct pipe_context *ctx,
}
static void r600_buffer_transfer_inline_write(struct pipe_context *pipe,
- struct pipe_resource *resource,
- unsigned level,
- unsigned usage,
- const struct pipe_box *box,
- const void *data,
- unsigned stride,
- unsigned layer_stride)
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned layer_stride)
{
struct radeon *ws = (struct radeon*)pipe->winsys;
struct r600_resource_buffer *rbuffer = r600_buffer(resource);
@@ -224,7 +224,7 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen,
rbuffer->r.b.b.b.depth0 = 1;
rbuffer->r.b.b.b.array_size = 1;
rbuffer->r.b.b.b.flags = 0;
- rbuffer->r.b.user_ptr = ptr;
+ rbuffer->r.b.user_ptr = ptr;
rbuffer->r.bo = NULL;
rbuffer->r.bo_size = 0;
return &rbuffer->r.b.b.b;
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 34094001b75..3fd6668f718 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -77,8 +77,7 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags,
u_upload_flush(rctx->vbuf_mgr->uploader);
}
-static void r600_update_num_contexts(struct r600_screen *rscreen,
- int diff)
+static void r600_update_num_contexts(struct r600_screen *rscreen, int diff)
{
pipe_mutex_lock(rscreen->mutex_num_contexts);
if (diff > 0) {
@@ -286,13 +285,13 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
case PIPE_CAP_DEPTH_CLAMP:
case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ case PIPE_CAP_INSTANCED_DRAWING:
return 1;
/* Unsupported features (boolean caps). */
case PIPE_CAP_STREAM_OUTPUT:
case PIPE_CAP_PRIMITIVE_RESTART:
case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */
- case PIPE_CAP_INSTANCED_DRAWING:
return 0;
case PIPE_CAP_ARRAY_TEXTURES:
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 240c8f1ffd0..0b4dc75e584 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -241,10 +241,10 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
int r;
- /* Would like some magic "get_bool_option_once" routine.
- */
- if (dump_shaders == -1)
- dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
+ /* Would like some magic "get_bool_option_once" routine.
+ */
+ if (dump_shaders == -1)
+ dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
if (dump_shaders) {
fprintf(stderr, "--------------------------------------------------------------\n");
@@ -420,6 +420,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
{
struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
unsigned i;
+ int r;
switch (d->Declaration.File) {
case TGSI_FILE_INPUT:
@@ -451,6 +452,26 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
case TGSI_FILE_SAMPLER:
case TGSI_FILE_ADDRESS:
break;
+
+ case TGSI_FILE_SYSTEM_VALUE:
+ if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
+ struct r600_bc_alu alu;
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
+ alu.src[0].sel = 0;
+ alu.src[0].chan = 3;
+
+ alu.dst.sel = 0;
+ alu.dst.chan = 3;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
+ break;
+ }
+
default:
R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
return -EINVAL;
@@ -521,6 +542,7 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
r600_src->neg = tgsi_src->Register.Negate;
r600_src->abs = tgsi_src->Register.Absolute;
+
if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
int index;
if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
@@ -535,6 +557,13 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
index = tgsi_src->Register.Index;
r600_src->sel = V_SQ_ALU_SRC_LITERAL;
memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
+ } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
+ /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
+ r600_src->swizzle[0] = 3;
+ r600_src->swizzle[1] = 3;
+ r600_src->swizzle[2] = 3;
+ r600_src->swizzle[3] = 3;
+ r600_src->sel = 0;
} else {
if (tgsi_src->Register.Indirect)
r600_src->rel = V_SQ_REL_RELATIVE;
@@ -2858,7 +2887,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
+ {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
{TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
/* gap */
{88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
@@ -3016,7 +3045,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
{TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
+ {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
{TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
/* gap */
{88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 72707fbd8b8..3c072fe7ca9 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -299,13 +299,13 @@ void r600_spi_update(struct r600_pipe_context *rctx)
tmp |= S_028644_PT_SPRITE_TEX(1);
}
- if (rctx->family < CHIP_CEDAR) {
- if (rshader->input[i].centroid)
- tmp |= S_028644_SEL_CENTROID(1);
+ if (rctx->family < CHIP_CEDAR) {
+ if (rshader->input[i].centroid)
+ tmp |= S_028644_SEL_CENTROID(1);
- if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
- tmp |= S_028644_SEL_LINEAR(1);
- }
+ if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
+ tmp |= S_028644_SEL_LINEAR(1);
+ }
r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
}
@@ -520,7 +520,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
r600_context_pipe_state_set(&rctx->ctx, &vgt);
rdraw.vgt_num_indices = draw.info.count;
- rdraw.vgt_num_instances = 1;
+ rdraw.vgt_num_instances = draw.info.instance_count;
rdraw.vgt_index_type = vgt_dma_index_type;
rdraw.vgt_draw_initiator = vgt_draw_initiator;
rdraw.indices = NULL;
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 4c9d5609c06..048d0b61e3b 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -292,7 +292,7 @@ static boolean permit_hardware_blit(struct pipe_screen *screen,
bind = PIPE_BIND_RENDER_TARGET;
/* hackaround for S3TC */
- if (util_format_is_s3tc(res->format))
+ if (util_format_is_compressed(res->format))
return TRUE;
if (!screen->is_format_supported(screen,
@@ -433,7 +433,7 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
}
if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) &&
- util_format_is_s3tc(templ->format))
+ util_format_is_compressed(templ->format))
array_mode = V_038000_ARRAY_1D_TILED_THIN1;
return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode,
@@ -887,12 +887,14 @@ uint32_t r600_translate_texformat(enum pipe_format format,
goto out_unknown;
switch (format) {
- case PIPE_FORMAT_RGTC1_UNORM:
case PIPE_FORMAT_RGTC1_SNORM:
+ word4 |= sign_bit[0];
+ case PIPE_FORMAT_RGTC1_UNORM:
result = FMT_BC4;
goto out_word4;
- case PIPE_FORMAT_RGTC2_UNORM:
case PIPE_FORMAT_RGTC2_SNORM:
+ word4 |= sign_bit[0] | sign_bit[1];
+ case PIPE_FORMAT_RGTC2_UNORM:
result = FMT_BC5;
goto out_word4;
default:
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index a06817c5735..603e1de7982 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -249,6 +249,11 @@ softpipe_is_format_supported( struct pipe_screen *screen,
return util_format_s3tc_enabled;
}
+ /* u_format doesn't implement RGTC yet */
+ if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
+ return FALSE;
+ }
+
/*
* Everything else should be supported by u_format.
*/
diff --git a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c
index afeab5eef42..7cc5af89639 100644
--- a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c
+++ b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c
@@ -5,6 +5,7 @@
#include "i915_drm.h"
#include "i915/i915_debug.h"
#include <xf86drm.h>
+#include <stdio.h>
#define BATCH_RESERVED 16
@@ -71,6 +72,26 @@ i915_drm_batchbuffer_create(struct i915_winsys *iws)
return &batch->base;
}
+static boolean
+i915_drm_batchbuffer_validate_buffers(struct i915_winsys_batchbuffer *batch,
+ struct i915_winsys_buffer **buffer,
+ int num_of_buffers)
+{
+ struct i915_drm_batchbuffer *drm_batch = i915_drm_batchbuffer(batch);
+ drm_intel_bo *bos[num_of_buffers + 1];
+ int i, ret;
+
+ bos[0] = drm_batch->bo;
+ for (i = 0; i < num_of_buffers; i++)
+ bos[i+1] = intel_bo(buffer[i]);
+
+ ret = drm_intel_bufmgr_check_aperture_space(bos, num_of_buffers);
+ if (ret != 0)
+ return FALSE;
+
+ return TRUE;
+}
+
static int
i915_drm_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch,
struct i915_winsys_buffer *buffer,
@@ -169,6 +190,14 @@ i915_drm_batchbuffer_flush(struct i915_winsys_batchbuffer *ibatch,
assert(ret == 0);
}
+ if (i915_drm_winsys(ibatch->iws)->dump_raw_file) {
+ FILE *file = fopen(i915_drm_winsys(ibatch->iws)->dump_raw_file, "a");
+ if (file) {
+ fwrite(batch->base.map, used, 1, file);
+ fclose(file);
+ }
+ }
+
#ifdef INTEL_RUN_SYNC
drm_intel_bo_wait_rendering(batch->bo);
#endif
@@ -202,6 +231,7 @@ i915_drm_batchbuffer_destroy(struct i915_winsys_batchbuffer *ibatch)
void i915_drm_winsys_init_batchbuffer_functions(struct i915_drm_winsys *idws)
{
idws->base.batchbuffer_create = i915_drm_batchbuffer_create;
+ idws->base.validate_buffers = i915_drm_batchbuffer_validate_buffers;
idws->base.batchbuffer_reloc = i915_drm_batchbuffer_reloc;
idws->base.batchbuffer_flush = i915_drm_batchbuffer_flush;
idws->base.batchbuffer_destroy = i915_drm_batchbuffer_destroy;
diff --git a/src/gallium/winsys/i915/drm/i915_drm_winsys.c b/src/gallium/winsys/i915/drm/i915_drm_winsys.c
index 2288b48b2bd..2c3b508d056 100644
--- a/src/gallium/winsys/i915/drm/i915_drm_winsys.c
+++ b/src/gallium/winsys/i915/drm/i915_drm_winsys.c
@@ -72,6 +72,7 @@ i915_drm_winsys_create(int drmFD)
drm_intel_bufmgr_gem_enable_fenced_relocs(idws->gem_manager);
idws->dump_cmd = debug_get_bool_option("I915_DUMP_CMD", FALSE);
+ idws->dump_raw_file = debug_get_option("I915_DUMP_RAW_FILE", NULL);
idws->send_cmd = !debug_get_bool_option("I915_NO_HW", FALSE);
return &idws->base;
diff --git a/src/gallium/winsys/i915/drm/i915_drm_winsys.h b/src/gallium/winsys/i915/drm/i915_drm_winsys.h
index 0d74d0270c7..dae53c3e801 100644
--- a/src/gallium/winsys/i915/drm/i915_drm_winsys.h
+++ b/src/gallium/winsys/i915/drm/i915_drm_winsys.h
@@ -18,6 +18,7 @@ struct i915_drm_winsys
struct i915_winsys base;
boolean dump_cmd;
+ char *dump_raw_file;
boolean send_cmd;
int fd; /**< Drm file discriptor */
diff --git a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c
index 8085591c8eb..3d0c1fa6224 100644
--- a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c
+++ b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c
@@ -58,6 +58,14 @@ i915_sw_batchbuffer_create(struct i915_winsys *iws)
return &batch->base;
}
+static boolean
+i915_sw_batchbuffer_validate_buffers(struct i915_winsys_batchbuffer *batch,
+ struct i915_winsys_buffer **buffer,
+ int num_of_buffers)
+{
+ return TRUE;
+}
+
static int
i915_sw_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch,
struct i915_winsys_buffer *buffer,
@@ -107,16 +115,16 @@ i915_sw_batchbuffer_flush(struct i915_winsys_batchbuffer *ibatch,
#ifdef INTEL_ALWAYS_FLUSH
/* MI_FLUSH | FLUSH_MAP_CACHE */
- i915_winsys_batchbuffer_dword(ibatch, (0x4<<23)|(1<<0));
+ i915_winsys_batchbuffer_dword_unchecked(ibatch, (0x4<<23)|(1<<0));
used += 4;
#endif
if ((used & 4) == 0) {
/* MI_NOOP */
- i915_winsys_batchbuffer_dword(ibatch, 0);
+ i915_winsys_batchbuffer_dword_unchecked(ibatch, 0);
}
/* MI_BATCH_BUFFER_END */
- i915_winsys_batchbuffer_dword(ibatch, (0xA<<23));
+ i915_winsys_batchbuffer_dword_unchecked(ibatch, (0xA<<23));
used = batch->base.ptr - batch->base.map;
assert((used & 4) == 0);
@@ -146,6 +154,7 @@ i915_sw_batchbuffer_destroy(struct i915_winsys_batchbuffer *ibatch)
void i915_sw_winsys_init_batchbuffer_functions(struct i915_sw_winsys *isws)
{
isws->base.batchbuffer_create = i915_sw_batchbuffer_create;
+ isws->base.validate_buffers = i915_sw_batchbuffer_validate_buffers;
isws->base.batchbuffer_reloc = i915_sw_batchbuffer_reloc;
isws->base.batchbuffer_flush = i915_sw_batchbuffer_flush;
isws->base.batchbuffer_destroy = i915_sw_batchbuffer_destroy;
diff --git a/src/gallium/winsys/i915/sw/i915_sw_winsys.c b/src/gallium/winsys/i915/sw/i915_sw_winsys.c
index 058ddc44aaf..fc48da6fb92 100644
--- a/src/gallium/winsys/i915/sw/i915_sw_winsys.c
+++ b/src/gallium/winsys/i915/sw/i915_sw_winsys.c
@@ -50,7 +50,7 @@ i915_sw_winsys_create()
isws->base.pci_id = deviceID;
isws->max_batch_size = 16 * 4096;
- isws->dump_cmd = debug_get_bool_option("INTEL_DUMP_CMD", FALSE);
+ isws->dump_cmd = debug_get_bool_option("I915_DUMP_CMD", FALSE);
return &isws->base;
}
diff --git a/src/gallium/winsys/sw/xlib/SConscript b/src/gallium/winsys/sw/xlib/SConscript
index f6c47411831..df01a9ec2bf 100644
--- a/src/gallium/winsys/sw/xlib/SConscript
+++ b/src/gallium/winsys/sw/xlib/SConscript
@@ -4,7 +4,7 @@
Import('*')
-if env['platform'] in ('cygwin_nt-5.1', 'cygwin_nt-6.1', 'linux'):
+if env['platform'] in ('cygwin', 'linux'):
env = env.Clone()
diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index 876f0dfc2a5..df031d2d548 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -208,6 +208,6 @@ builtin_compiler: $(GLSL2_OBJECTS) $(OBJECTS) builtin_stubs.o
builtin_function.cpp: builtins/profiles/* builtins/ir/* builtins/tools/generate_builtins.py builtins/tools/texture_builtins.py builtin_compiler
@echo Regenerating builtin_function.cpp...
- $(PYTHON2) $(PYTHON_FLAGS) builtins/tools/generate_builtins.py ./builtin_compiler > builtin_function.cpp
+ $(PYTHON2) $(PYTHON_FLAGS) builtins/tools/generate_builtins.py ./builtin_compiler > builtin_function.cpp || rm -f builtin_function.cpp
-include depend
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index bef099cca3b..fd1f0b49f42 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -3445,11 +3445,9 @@ ast_struct_specifier::hir(exec_list *instructions,
if (!state->symbols->add_type(name, t)) {
_mesa_glsl_error(& loc, state, "struct `%s' previously defined", name);
} else {
-
- const glsl_type **s = (const glsl_type **)
- realloc(state->user_structures,
- sizeof(state->user_structures[0]) *
- (state->num_user_structures + 1));
+ const glsl_type **s = reralloc(state, state->user_structures,
+ const glsl_type *,
+ state->num_user_structures + 1);
if (s != NULL) {
s[state->num_user_structures] = t;
state->user_structures = s;
diff --git a/src/glsl/builtin_types.h b/src/glsl/builtin_types.h
index 8ccbf6e312f..58b9a81273a 100644
--- a/src/glsl/builtin_types.h
+++ b/src/glsl/builtin_types.h
@@ -27,6 +27,10 @@ const glsl_type glsl_type::_error_type =
const glsl_type glsl_type::_void_type =
glsl_type(GL_INVALID_ENUM, GLSL_TYPE_VOID, 0, 0, "void");
+const glsl_type glsl_type::_sampler3D_type =
+ glsl_type(GL_SAMPLER_3D, GLSL_SAMPLER_DIM_3D, 0, 0, GLSL_TYPE_FLOAT,
+ "sampler3D");
+
const glsl_type *const glsl_type::error_type = & glsl_type::_error_type;
const glsl_type *const glsl_type::void_type = & glsl_type::_void_type;
@@ -181,8 +185,6 @@ const glsl_type glsl_type::builtin_110_types[] = {
"sampler1DShadow"),
glsl_type(GL_SAMPLER_2D_SHADOW, GLSL_SAMPLER_DIM_2D, 1, 0, GLSL_TYPE_FLOAT,
"sampler2DShadow"),
- glsl_type(GL_SAMPLER_3D, GLSL_SAMPLER_DIM_3D, 0, 0, GLSL_TYPE_FLOAT,
- "sampler3D"),
};
/*@}*/
diff --git a/src/glsl/builtins/profiles/130.frag b/src/glsl/builtins/profiles/130.frag
index 43653a906f8..0e3c7ac4199 100644
--- a/src/glsl/builtins/profiles/130.frag
+++ b/src/glsl/builtins/profiles/130.frag
@@ -491,8 +491,8 @@ ivec2 textureSize( sampler1DArray sampler, int lod);
ivec2 textureSize(isampler1DArray sampler, int lod);
ivec2 textureSize(usampler1DArray sampler, int lod);
ivec3 textureSize( sampler2DArray sampler, int lod);
-ivec2 textureSize(isampler2DArray sampler, int lod);
-ivec2 textureSize(usampler2DArray sampler, int lod);
+ivec3 textureSize(isampler2DArray sampler, int lod);
+ivec3 textureSize(usampler2DArray sampler, int lod);
ivec2 textureSize(sampler1DArrayShadow sampler, int lod);
ivec3 textureSize(sampler2DArrayShadow sampler, int lod);
diff --git a/src/glsl/builtins/profiles/130.vert b/src/glsl/builtins/profiles/130.vert
index 742dec6e6d5..f85b27f8f8c 100644
--- a/src/glsl/builtins/profiles/130.vert
+++ b/src/glsl/builtins/profiles/130.vert
@@ -493,8 +493,8 @@ ivec2 textureSize( sampler1DArray sampler, int lod);
ivec2 textureSize(isampler1DArray sampler, int lod);
ivec2 textureSize(usampler1DArray sampler, int lod);
ivec3 textureSize( sampler2DArray sampler, int lod);
-ivec2 textureSize(isampler2DArray sampler, int lod);
-ivec2 textureSize(usampler2DArray sampler, int lod);
+ivec3 textureSize(isampler2DArray sampler, int lod);
+ivec3 textureSize(usampler2DArray sampler, int lod);
ivec2 textureSize(sampler1DArrayShadow sampler, int lod);
ivec3 textureSize(sampler2DArrayShadow sampler, int lod);
diff --git a/src/glsl/builtins/profiles/OES_texture_3D.frag b/src/glsl/builtins/profiles/OES_texture_3D.frag
new file mode 100644
index 00000000000..b6ebd6a311f
--- /dev/null
+++ b/src/glsl/builtins/profiles/OES_texture_3D.frag
@@ -0,0 +1,7 @@
+#version 100
+#extension GL_OES_texture_3D : enable
+
+vec4 texture3D (sampler3D sampler, vec3 coord);
+vec4 texture3DProj (sampler3D sampler, vec4 coord);
+vec4 texture3D (sampler3D sampler, vec3 coord, float bias);
+vec4 texture3DProj (sampler3D sampler, vec4 coord, float bias);
diff --git a/src/glsl/builtins/profiles/OES_texture_3D.vert b/src/glsl/builtins/profiles/OES_texture_3D.vert
new file mode 100644
index 00000000000..81d12f51e9f
--- /dev/null
+++ b/src/glsl/builtins/profiles/OES_texture_3D.vert
@@ -0,0 +1,7 @@
+#version 100
+#extension GL_OES_texture_3D : enable
+
+vec4 texture3D (sampler3D sampler, vec3 coord);
+vec4 texture3DProj (sampler3D sampler, vec4 coord);
+vec4 texture3DLod (sampler3D sampler, vec3 coord, float lod);
+vec4 texture3DProjLod (sampler3D sampler, vec4 coord, float lod);
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index d7a37aef46d..e8c60936fb6 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -256,6 +256,11 @@ _mesa_glsl_process_extension(const char *name, YYLTYPE *name_locp,
state->AMD_conservative_depth_enable = (ext_mode != extension_disable);
state->AMD_conservative_depth_warn = (ext_mode == extension_warn);
unsupported = !state->extensions->AMD_conservative_depth;
+ } else if (strcmp(name, "GL_OES_texture_3D") == 0 && state->es_shader) {
+ state->OES_texture_3D_enable = (ext_mode != extension_disable);
+ state->OES_texture_3D_warn = (ext_mode == extension_warn);
+
+ unsupported = !state->extensions->EXT_texture3D;
} else {
unsupported = true;
}
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index 10cb673c694..b5c016fb399 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -172,6 +172,8 @@ struct _mesa_glsl_parse_state {
unsigned ARB_shader_stencil_export_warn:1;
unsigned AMD_conservative_depth_enable:1;
unsigned AMD_conservative_depth_warn:1;
+ unsigned OES_texture_3D_enable:1;
+ unsigned OES_texture_3D_warn:1;
/*@}*/
/** Extensions supported by the OpenGL implementation. */
diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp
index 76b4f3e4cb0..78d10bd9380 100644
--- a/src/glsl/glsl_types.cpp
+++ b/src/glsl/glsl_types.cpp
@@ -131,6 +131,7 @@ glsl_type::generate_110_types(glsl_symbol_table *symtab)
add_types_to_symbol_table(symtab, builtin_110_types,
Elements(builtin_110_types),
false);
+ add_types_to_symbol_table(symtab, &_sampler3D_type, 1, false);
add_types_to_symbol_table(symtab, builtin_110_deprecated_structure_types,
Elements(builtin_110_deprecated_structure_types),
false);
@@ -179,6 +180,13 @@ glsl_type::generate_EXT_texture_array_types(glsl_symbol_table *symtab,
void
+glsl_type::generate_OES_texture_3D_types(glsl_symbol_table *symtab, bool warn)
+{
+ add_types_to_symbol_table(symtab, &_sampler3D_type, 1, warn);
+}
+
+
+void
_mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state)
{
switch (state->language_version) {
@@ -204,6 +212,10 @@ _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state)
glsl_type::generate_ARB_texture_rectangle_types(state->symbols,
state->ARB_texture_rectangle_warn);
}
+ if (state->OES_texture_3D_enable && state->language_version == 100) {
+ glsl_type::generate_OES_texture_3D_types(state->symbols,
+ state->OES_texture_3D_warn);
+ }
if (state->EXT_texture_array_enable && state->language_version < 130) {
// These are already included in 130; don't create twice.
diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h
index 61bf5e0cfd2..3c2672c01a0 100644
--- a/src/glsl/glsl_types.h
+++ b/src/glsl/glsl_types.h
@@ -427,6 +427,7 @@ private:
/*@{*/
static const glsl_type _error_type;
static const glsl_type _void_type;
+ static const glsl_type _sampler3D_type;
static const glsl_type builtin_core_types[];
static const glsl_type builtin_structure_types[];
static const glsl_type builtin_110_deprecated_structure_types[];
@@ -453,6 +454,7 @@ private:
static void generate_130_types(glsl_symbol_table *);
static void generate_ARB_texture_rectangle_types(glsl_symbol_table *, bool);
static void generate_EXT_texture_array_types(glsl_symbol_table *, bool);
+ static void generate_OES_texture_3D_types(glsl_symbol_table *, bool);
/*@}*/
/**
diff --git a/src/glx/dri2_glx.c b/src/glx/dri2_glx.c
index a275ba5b9fe..2c28bc27150 100644
--- a/src/glx/dri2_glx.c
+++ b/src/glx/dri2_glx.c
@@ -535,8 +535,13 @@ dri2SwapBuffers(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor,
CARD64 ret = 0;
#ifdef __DRI2_FLUSH
- if (psc->f)
- (*psc->f->flush)(priv->driDrawable);
+ if (psc->f) {
+ struct glx_context *gc = __glXGetCurrentContext();
+
+ if (gc) {
+ (*psc->f->flush)(priv->driDrawable);
+ }
+ }
#endif
/* Old servers don't send invalidate events */
diff --git a/src/glx/glxclient.h b/src/glx/glxclient.h
index fdcef8075a8..2b6966f2e08 100644
--- a/src/glx/glxclient.h
+++ b/src/glx/glxclient.h
@@ -419,9 +419,9 @@ struct glx_context
/*@} */
/**
- * Thread ID we're currently current in. Zero if none.
+ * Number of threads we're currently current in.
*/
- unsigned long thread_id;
+ unsigned long thread_refcount;
char gl_extension_bits[__GL_EXT_BYTES];
};
diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c
index 80eaf72b7d5..22bebab26bc 100644
--- a/src/glx/glxcmds.c
+++ b/src/glx/glxcmds.c
@@ -727,11 +727,16 @@ glXSwapBuffers(Display * dpy, GLXDrawable drawable)
xGLXSwapBuffersReq *req;
#endif
+ gc = __glXGetCurrentContext();
+
#if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL)
__GLXDRIdrawable *pdraw = GetGLXDRIDrawable(dpy, drawable);
if (pdraw != NULL) {
- glFlush();
+ if (gc && drawable == gc->currentDrawable) {
+ glFlush();
+ }
+
(*pdraw->psc->driScreen->swapBuffers)(pdraw, 0, 0, 0);
return;
}
@@ -746,7 +751,6 @@ glXSwapBuffers(Display * dpy, GLXDrawable drawable)
** The calling thread may or may not have a current context. If it
** does, send the context tag so the server can do a flush.
*/
- gc = __glXGetCurrentContext();
if ((gc != NULL) && (dpy == gc->currentDpy) &&
((drawable == gc->currentDrawable)
|| (drawable == gc->currentReadable))) {
diff --git a/src/glx/glxcurrent.c b/src/glx/glxcurrent.c
index 36317383544..9a6499037b1 100644
--- a/src/glx/glxcurrent.c
+++ b/src/glx/glxcurrent.c
@@ -216,6 +216,16 @@ MakeContextCurrent(Display * dpy, GLXDrawable draw,
struct glx_context *oldGC = __glXGetCurrentContext();
int ret = Success;
+ /* XXX: If this is left out, then libGL ends up not having this
+ * symbol, and drivers using it fail to load. Compare the
+ * implementation of this symbol to _glapi_noop_enable_warnings(),
+ * though, which gets into the library despite no callers, the same
+ * prototypes, and the same compile flags to the files containing
+ * them. Moving the definition to glapi_nop.c gets it into the
+ * library, though.
+ */
+ (void)_glthread_GetID();
+
/* Make sure that the new context has a nonzero ID. In the request,
* a zero context ID is used only to mean that we bind to no current
* context.
@@ -236,41 +246,42 @@ MakeContextCurrent(Display * dpy, GLXDrawable draw,
_glapi_check_multithread();
- if (gc != NULL && gc->thread_id != 0 && gc->thread_id != _glthread_GetID()) {
- __glXGenerateError(dpy, gc, gc->xid,
- BadAccess, X_GLXMakeContextCurrent);
- return False;
- }
-
+ __glXLock();
if (oldGC == gc &&
- gc->currentDrawable == draw && gc->currentReadable == read)
+ gc->currentDrawable == draw && gc->currentReadable == read) {
+ __glXUnlock();
return True;
+ }
if (oldGC != &dummyContext) {
- oldGC->vtable->unbind(oldGC, gc);
- oldGC->currentDpy = 0;
- oldGC->currentDrawable = None;
- oldGC->currentReadable = None;
- oldGC->thread_id = 0;
+ if (--oldGC->thread_refcount == 0) {
+ oldGC->vtable->unbind(oldGC, gc);
+ oldGC->currentDpy = 0;
+ oldGC->currentDrawable = None;
+ oldGC->currentReadable = None;
+
+ if (oldGC->xid == None && oldGC != gc) {
+ /* We are switching away from a context that was
+ * previously destroyed, so we need to free the memory
+ * for the old handle. */
+ oldGC->vtable->destroy(oldGC);
+ }
+ }
}
if (gc) {
- gc->currentDpy = dpy;
- gc->currentDrawable = draw;
- gc->currentReadable = read;
- gc->thread_id = _glthread_GetID();
+ if (gc->thread_refcount++ == 0) {
+ gc->currentDpy = dpy;
+ gc->currentDrawable = draw;
+ gc->currentReadable = read;
+ }
__glXSetCurrentContext(gc);
ret = gc->vtable->bind(gc, oldGC, draw, read);
} else {
__glXSetCurrentContextNull();
}
- if (oldGC != &dummyContext && oldGC->xid == None && oldGC != gc) {
- /* We are switching away from a context that was
- * previously destroyed, so we need to free the memory
- * for the old handle. */
- oldGC->vtable->destroy(oldGC);
- }
+ __glXUnlock();
if (ret) {
__glXGenerateError(dpy, gc, None, ret, X_GLXMakeContextCurrent);
diff --git a/src/glx/glxextensions.c b/src/glx/glxextensions.c
index 3a0e64c46d1..ffd466479b4 100644
--- a/src/glx/glxextensions.c
+++ b/src/glx/glxextensions.c
@@ -90,6 +90,7 @@ static const struct extension_info known_glx_extensions[] = {
{ GLX(MESA_agp_offset), VER(0,0), N, N, N, Y }, /* Deprecated */
{ GLX(MESA_copy_sub_buffer), VER(0,0), Y, N, N, N },
#endif
+ { GLX(MESA_multithread_makecurrent),VER(0,0), Y, N, Y, N },
{ GLX(MESA_pixmap_colormap), VER(0,0), N, N, N, N }, /* Deprecated */
{ GLX(MESA_release_buffers), VER(0,0), N, N, N, N }, /* Deprecated */
#ifdef GLX_USE_APPLEGL
diff --git a/src/glx/glxextensions.h b/src/glx/glxextensions.h
index 78776618338..333b3f9adbd 100644
--- a/src/glx/glxextensions.h
+++ b/src/glx/glxextensions.h
@@ -43,6 +43,7 @@ enum
MESA_agp_offset_bit,
MESA_copy_sub_buffer_bit,
MESA_depth_float_bit,
+ MESA_multithread_makecurrent_bit,
MESA_pixmap_colormap_bit,
MESA_release_buffers_bit,
MESA_swap_control_bit,
diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index ea04fb1a0ee..90fec124af9 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -106,6 +106,7 @@ main_sources = [
'main/stencil.c',
'main/syncobj.c',
'main/texcompress.c',
+ 'main/texcompress_rgtc.c',
'main/texcompress_s3tc.c',
'main/texcompress_fxt1.c',
'main/texenv.c',
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index a413c02b573..5496b4fdd3b 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -679,6 +679,8 @@
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1
#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2
#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 30e3bd54469..9bdcda780ef 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -213,6 +213,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
return 2;
case FS_OPCODE_TEX:
case FS_OPCODE_TXB:
+ case FS_OPCODE_TXD:
case FS_OPCODE_TXL:
return 1;
case FS_OPCODE_FB_WRITE:
@@ -1200,6 +1201,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
}
/* gen4's SIMD8 sampler always has the slots for u,v,r present. */
mlen += 3;
+ } else if (ir->op == ir_txd) {
+ assert(!"TXD isn't supported on gen4 yet.");
} else {
/* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod
* instructions. We'll need to do SIMD16 here.
@@ -1253,6 +1256,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
inst = emit(fs_inst(FS_OPCODE_TXL, dst));
break;
case ir_txd:
+ inst = emit(fs_inst(FS_OPCODE_TXD, dst));
+ break;
case ir_txf:
assert(!"GLSL 1.30 features unsupported");
break;
@@ -2308,6 +2313,16 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
}
break;
+ case FS_OPCODE_TXL:
+ if (inst->shadow_compare) {
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE_GEN5;
+ } else {
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_LOD_GEN5;
+ }
+ break;
+ case FS_OPCODE_TXD:
+ assert(!"TXD isn't supported on gen5+ yet.");
+ break;
}
} else {
switch (inst->opcode) {
@@ -2325,13 +2340,26 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
case FS_OPCODE_TXB:
if (inst->shadow_compare) {
assert(inst->mlen == 6);
- msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
} else {
assert(inst->mlen == 9);
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
}
break;
+ case FS_OPCODE_TXL:
+ if (inst->shadow_compare) {
+ assert(inst->mlen == 6);
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
+ } else {
+ assert(inst->mlen == 9);
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD;
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+ }
+ break;
+ case FS_OPCODE_TXD:
+ assert(!"TXD isn't supported on gen4 yet.");
+ break;
}
}
assert(msg_type != -1);
@@ -3607,6 +3635,7 @@ fs_visitor::generate_code()
break;
case FS_OPCODE_TEX:
case FS_OPCODE_TXB:
+ case FS_OPCODE_TXD:
case FS_OPCODE_TXL:
generate_tex(inst, dst, src[0]);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 8352760acf7..dc030ae5b50 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -71,6 +71,7 @@ enum fs_opcodes {
FS_OPCODE_LINTERP,
FS_OPCODE_TEX,
FS_OPCODE_TXB,
+ FS_OPCODE_TXD,
FS_OPCODE_TXL,
FS_OPCODE_DISCARD_NOT,
FS_OPCODE_DISCARD_AND,
@@ -309,6 +310,7 @@ public:
{
return (opcode == FS_OPCODE_TEX ||
opcode == FS_OPCODE_TXB ||
+ opcode == FS_OPCODE_TXD ||
opcode == FS_OPCODE_TXL);
}
diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c
index 38c98f30efb..d6c1f1c893d 100644
--- a/src/mesa/drivers/dri/i965/gen6_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
@@ -68,7 +68,7 @@ upload_clip_state(struct brw_context *brw)
depth_clamp |
provoking);
OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
- U_FIXED(225.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
+ U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
GEN6_CLIP_FORCE_ZERO_RTAINDEX);
ADVANCE_BATCH();
}
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index 356d5f72d89..746da462ee2 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -104,7 +104,8 @@ static const __DRItexBufferExtension intelTexBufferExtension = {
static void
intelDRI2Flush(__DRIdrawable *drawable)
{
- struct intel_context *intel = drawable->driContextPriv->driverPrivate;
+ GET_CURRENT_CONTEXT(ctx);
+ struct intel_context *intel = intel_context(ctx);
if (intel->gen < 4)
INTEL_FIREVERTICES(intel);
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 7504b8a85db..b8bb2555acd 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -241,8 +241,7 @@ static const struct extension extension_table[] = {
{ "GL_OES_stencil4", o(dummy_false), DISABLE },
{ "GL_OES_stencil8", o(EXT_framebuffer_object), ES1 | ES2 },
{ "GL_OES_stencil_wrap", o(EXT_stencil_wrap), ES1 },
- /* GL_OES_texture_3D is disabled due to missing GLSL support. */
- { "GL_OES_texture_3D", o(EXT_texture3D), DISABLE },
+ { "GL_OES_texture_3D", o(EXT_texture3D), ES2 },
{ "GL_OES_texture_cube_map", o(ARB_texture_cube_map), ES1 },
{ "GL_OES_texture_env_crossbar", o(ARB_texture_env_crossbar), ES1 },
{ "GL_OES_texture_mirrored_repeat", o(ARB_texture_mirrored_repeat), ES1 },
@@ -428,6 +427,7 @@ _mesa_enable_sw_extensions(struct gl_context *ctx)
ctx->Extensions.ARB_texture_mirrored_repeat = GL_TRUE;
ctx->Extensions.ARB_texture_non_power_of_two = GL_TRUE;
ctx->Extensions.ARB_texture_rg = GL_TRUE;
+ ctx->Extensions.ARB_texture_compression_rgtc = GL_TRUE;
ctx->Extensions.ARB_vertex_array_object = GL_TRUE;
#if FEATURE_ARB_vertex_program
ctx->Extensions.ARB_vertex_program = GL_TRUE;
diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c
index 1e395363475..947db84a69e 100644
--- a/src/mesa/main/formats.c
+++ b/src/mesa/main/formats.c
@@ -890,7 +890,43 @@ static struct gl_format_info format_info[MESA_FORMAT_COUNT] =
16, 16, 16, 16,
0, 0, 0, 0, 0,
1, 1, 8
- }
+ },
+ {
+ MESA_FORMAT_RED_RGTC1,
+ "MESA_FORMAT_RED_RGTC1",
+ GL_RED,
+ GL_UNSIGNED_NORMALIZED,
+ 4, 0, 0, 0,
+ 0, 0, 0, 0, 0,
+ 4, 4, 8 /* 8 bytes per 4x4 block */
+ },
+ {
+ MESA_FORMAT_SIGNED_RED_RGTC1,
+ "MESA_FORMAT_SIGNED_RED_RGTC1",
+ GL_RED,
+ GL_SIGNED_NORMALIZED,
+ 4, 0, 0, 0,
+ 0, 0, 0, 0, 0,
+ 4, 4, 8 /* 8 bytes per 4x4 block */
+ },
+ {
+ MESA_FORMAT_RG_RGTC2,
+ "MESA_FORMAT_RG_RGTC2",
+ GL_RG,
+ GL_UNSIGNED_NORMALIZED,
+ 4, 4, 0, 0,
+ 0, 0, 0, 0, 0,
+ 4, 4, 16 /* 16 bytes per 4x4 block */
+ },
+ {
+ MESA_FORMAT_SIGNED_RG_RGTC2,
+ "MESA_FORMAT_SIGNED_RG_RGTC2",
+ GL_RG,
+ GL_SIGNED_NORMALIZED,
+ 4, 4, 0, 0,
+ 0, 0, 0, 0, 0,
+ 4, 4, 16 /* 16 bytes per 4x4 block */
+ },
};
@@ -1530,6 +1566,10 @@ _mesa_format_to_type_and_comps(gl_format format,
case MESA_FORMAT_SRGBA_DXT5:
#endif
#endif
+ case MESA_FORMAT_RED_RGTC1:
+ case MESA_FORMAT_SIGNED_RED_RGTC1:
+ case MESA_FORMAT_RG_RGTC2:
+ case MESA_FORMAT_SIGNED_RG_RGTC2:
/* XXX generate error instead? */
*datatype = GL_UNSIGNED_BYTE;
*comps = 0;
diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h
index 9a5cef37788..e21967e2b0c 100644
--- a/src/mesa/main/formats.h
+++ b/src/mesa/main/formats.h
@@ -179,6 +179,12 @@ typedef enum
MESA_FORMAT_RGBA_16, /* ... */
/*@}*/
+ /*@{*/
+ MESA_FORMAT_RED_RGTC1,
+ MESA_FORMAT_SIGNED_RED_RGTC1,
+ MESA_FORMAT_RG_RGTC2,
+ MESA_FORMAT_SIGNED_RG_RGTC2,
+ /*@}*/
MESA_FORMAT_COUNT
} gl_format;
diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index 7a0b522a2d8..82d02ed0ecf 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -64,6 +64,7 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean a
n += 2;
}
}
+ /* don't return RGTC - ARB_texture_compression_rgtc query 19 */
if (ctx->Extensions.EXT_texture_compression_s3tc) {
if (formats) {
formats[n++] = GL_COMPRESSED_RGB_S3TC_DXT1_EXT;
@@ -163,6 +164,15 @@ _mesa_glenum_to_compressed_format(GLenum format)
case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
return MESA_FORMAT_SRGBA_DXT5;
+ case GL_COMPRESSED_RED_RGTC1:
+ return MESA_FORMAT_RED_RGTC1;
+ case GL_COMPRESSED_SIGNED_RED_RGTC1:
+ return MESA_FORMAT_SIGNED_RED_RGTC1;
+ case GL_COMPRESSED_RG_RGTC2:
+ return MESA_FORMAT_RG_RGTC2;
+ case GL_COMPRESSED_SIGNED_RG_RGTC2:
+ return MESA_FORMAT_SIGNED_RG_RGTC2;
+
default:
return MESA_FORMAT_NONE;
}
@@ -209,6 +219,16 @@ _mesa_compressed_format_to_glenum(struct gl_context *ctx, GLuint mesaFormat)
return GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT;
#endif
#endif
+
+ case MESA_FORMAT_RED_RGTC1:
+ return GL_COMPRESSED_RED_RGTC1;
+ case MESA_FORMAT_SIGNED_RED_RGTC1:
+ return GL_COMPRESSED_SIGNED_RED_RGTC1;
+ case MESA_FORMAT_RG_RGTC2:
+ return GL_COMPRESSED_RG_RGTC2;
+ case MESA_FORMAT_SIGNED_RG_RGTC2:
+ return GL_COMPRESSED_SIGNED_RG_RGTC2;
+
default:
_mesa_problem(ctx, "Unexpected mesa texture format in"
" _mesa_compressed_format_to_glenum()");
diff --git a/src/mesa/main/texcompress_rgtc.c b/src/mesa/main/texcompress_rgtc.c
new file mode 100644
index 00000000000..1a01755f14d
--- /dev/null
+++ b/src/mesa/main/texcompress_rgtc.c
@@ -0,0 +1,1122 @@
+/*
+ * Copyright (C) 2011 Red Hat Inc.
+ *
+ * block compression parts are:
+ * Copyright (C) 2004 Roland Scheidegger All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author:
+ * Dave Airlie
+ */
+
+/**
+ * \file texcompress_rgtc.c
+ * GL_EXT_texture_compression_rgtc support.
+ */
+
+
+#include "glheader.h"
+#include "imports.h"
+#include "colormac.h"
+#include "image.h"
+#include "macros.h"
+#include "mfeatures.h"
+#include "mipmap.h"
+#include "texcompress.h"
+#include "texcompress_rgtc.h"
+#include "texstore.h"
+
+#define RGTC_DEBUG 0
+
+static void encode_rgtc_chan_u(GLubyte *blkaddr, GLubyte srccolors[4][4],
+ GLint numxpixels, GLint numypixels);
+static void encode_rgtc_chan_s(GLbyte *blkaddr, GLbyte srccolors[4][4],
+ GLint numxpixels, GLint numypixels);
+
+static void extractsrc_u( GLubyte srcpixels[4][4], const GLchan *srcaddr,
+ GLint srcRowStride, GLint numxpixels, GLint numypixels, GLint comps)
+{
+ GLubyte i, j;
+ const GLchan *curaddr;
+ for (j = 0; j < numypixels; j++) {
+ curaddr = srcaddr + j * srcRowStride * comps;
+ for (i = 0; i < numxpixels; i++) {
+ srcpixels[j][i] = *curaddr / (CHAN_MAX / 255);
+ curaddr += comps;
+ }
+ }
+}
+
+static void extractsrc_s( GLbyte srcpixels[4][4], const GLfloat *srcaddr,
+ GLint srcRowStride, GLint numxpixels, GLint numypixels, GLint comps)
+{
+ GLubyte i, j;
+ const GLfloat *curaddr;
+ for (j = 0; j < numypixels; j++) {
+ curaddr = srcaddr + j * srcRowStride * comps;
+ for (i = 0; i < numxpixels; i++) {
+ srcpixels[j][i] = FLOAT_TO_BYTE_TEX(*curaddr);
+ curaddr += comps;
+ }
+ }
+}
+
+
+GLboolean
+_mesa_texstore_red_rgtc1(TEXSTORE_PARAMS)
+{
+ GLubyte *dst;
+ const GLint texWidth = dstRowStride * 4 / 8; /* a bit of a hack */
+ const GLchan *tempImage = NULL;
+ int i, j;
+ int numxpixels, numypixels;
+ const GLchan *srcaddr;
+ GLubyte srcpixels[4][4];
+ GLubyte *blkaddr;
+ GLint dstRowDiff;
+ ASSERT(dstFormat == MESA_FORMAT_RED_RGTC1);
+ ASSERT(dstXoffset % 4 == 0);
+ ASSERT(dstYoffset % 4 == 0);
+ ASSERT(dstZoffset % 4 == 0);
+ (void) dstZoffset;
+ (void) dstImageOffsets;
+
+
+ tempImage = _mesa_make_temp_chan_image(ctx, dims,
+ baseInternalFormat,
+ _mesa_get_format_base_format(dstFormat),
+ srcWidth, srcHeight, srcDepth,
+ srcFormat, srcType, srcAddr,
+ srcPacking);
+ if (!tempImage)
+ return GL_FALSE; /* out of memory */
+
+ dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
+ dstFormat,
+ texWidth, (GLubyte *) dstAddr);
+
+ blkaddr = dst;
+ dstRowDiff = dstRowStride >= (srcWidth * 4) ? dstRowStride - (((srcWidth + 3) & ~3) * 4) : 0;
+ for (j = 0; j < srcHeight; j+=4) {
+ if (srcHeight > j + 3) numypixels = 4;
+ else numypixels = srcHeight - j;
+ srcaddr = tempImage + j * srcWidth;
+ for (i = 0; i < srcWidth; i += 4) {
+ if (srcWidth > i + 3) numxpixels = 4;
+ else numxpixels = srcWidth - i;
+ extractsrc_u(srcpixels, srcaddr, srcWidth, numxpixels, numypixels, 1);
+ encode_rgtc_chan_u(blkaddr, srcpixels, numxpixels, numypixels);
+ srcaddr += numxpixels;
+ blkaddr += 8;
+ }
+ blkaddr += dstRowDiff;
+ }
+ if (tempImage)
+ free((void *) tempImage);
+
+ return GL_TRUE;
+}
+
+GLboolean
+_mesa_texstore_signed_red_rgtc1(TEXSTORE_PARAMS)
+{
+ GLbyte *dst;
+ const GLint texWidth = dstRowStride * 4 / 8; /* a bit of a hack */
+ const GLfloat *tempImage = NULL;
+ int i, j;
+ int numxpixels, numypixels;
+ const GLfloat *srcaddr;
+ GLbyte srcpixels[4][4];
+ GLbyte *blkaddr;
+ GLint dstRowDiff;
+ ASSERT(dstFormat == MESA_FORMAT_SIGNED_RED_RGTC1);
+ ASSERT(dstXoffset % 4 == 0);
+ ASSERT(dstYoffset % 4 == 0);
+ ASSERT(dstZoffset % 4 == 0);
+ (void) dstZoffset;
+ (void) dstImageOffsets;
+
+ tempImage = _mesa_make_temp_float_image(ctx, dims,
+ baseInternalFormat,
+ _mesa_get_format_base_format(dstFormat),
+ srcWidth, srcHeight, srcDepth,
+ srcFormat, srcType, srcAddr,
+ srcPacking, 0x0);
+ if (!tempImage)
+ return GL_FALSE; /* out of memory */
+
+ dst = (GLbyte *)_mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
+ dstFormat,
+ texWidth, (GLubyte *) dstAddr);
+
+ blkaddr = dst;
+ dstRowDiff = dstRowStride >= (srcWidth * 4) ? dstRowStride - (((srcWidth + 3) & ~3) * 4) : 0;
+ for (j = 0; j < srcHeight; j+=4) {
+ if (srcHeight > j + 3) numypixels = 4;
+ else numypixels = srcHeight - j;
+ srcaddr = tempImage + j * srcWidth;
+ for (i = 0; i < srcWidth; i += 4) {
+ if (srcWidth > i + 3) numxpixels = 4;
+ else numxpixels = srcWidth - i;
+ extractsrc_s(srcpixels, srcaddr, srcWidth, numxpixels, numypixels, 1);
+ encode_rgtc_chan_s(blkaddr, srcpixels, numxpixels, numypixels);
+ srcaddr += numxpixels;
+ blkaddr += 8;
+ }
+ blkaddr += dstRowDiff;
+ }
+ if (tempImage)
+ free((void *) tempImage);
+
+ return GL_TRUE;
+}
+
+GLboolean
+_mesa_texstore_rg_rgtc2(TEXSTORE_PARAMS)
+{
+ GLubyte *dst;
+ const GLint texWidth = dstRowStride * 4 / 16; /* a bit of a hack */
+ const GLchan *tempImage = NULL;
+ int i, j;
+ int numxpixels, numypixels;
+ const GLchan *srcaddr;
+ GLubyte srcpixels[4][4];
+ GLubyte *blkaddr;
+ GLint dstRowDiff;
+
+ ASSERT(dstFormat == MESA_FORMAT_RG_RGTC2);
+ ASSERT(dstXoffset % 4 == 0);
+ ASSERT(dstYoffset % 4 == 0);
+ ASSERT(dstZoffset % 4 == 0);
+ (void) dstZoffset;
+ (void) dstImageOffsets;
+
+ tempImage = _mesa_make_temp_chan_image(ctx, dims,
+ baseInternalFormat,
+ _mesa_get_format_base_format(dstFormat),
+ srcWidth, srcHeight, srcDepth,
+ srcFormat, srcType, srcAddr,
+ srcPacking);
+ if (!tempImage)
+ return GL_FALSE; /* out of memory */
+
+ dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
+ dstFormat,
+ texWidth, (GLubyte *) dstAddr);
+
+ blkaddr = dst;
+ dstRowDiff = dstRowStride >= (srcWidth * 8) ? dstRowStride - (((srcWidth + 7) & ~7) * 8) : 0;
+ for (j = 0; j < srcHeight; j+=4) {
+ if (srcHeight > j + 3) numypixels = 4;
+ else numypixels = srcHeight - j;
+ srcaddr = tempImage + j * srcWidth * 2;
+ for (i = 0; i < srcWidth; i += 4) {
+ if (srcWidth > i + 3) numxpixels = 4;
+ else numxpixels = srcWidth - i;
+ extractsrc_u(srcpixels, srcaddr, srcWidth, numxpixels, numypixels, 2);
+ encode_rgtc_chan_u(blkaddr, srcpixels, numxpixels, numypixels);
+
+ blkaddr += 8;
+ extractsrc_u(srcpixels, (GLchan *)srcaddr + 1, srcWidth, numxpixels, numypixels, 2);
+ encode_rgtc_chan_u(blkaddr, srcpixels, numxpixels, numypixels);
+
+ blkaddr += 8;
+
+ srcaddr += numxpixels * 2;
+ }
+ blkaddr += dstRowDiff;
+ }
+ if (tempImage)
+ free((void *) tempImage);
+
+ return GL_TRUE;
+}
+
+GLboolean
+_mesa_texstore_signed_rg_rgtc2(TEXSTORE_PARAMS)
+{
+ GLbyte *dst;
+ const GLint texWidth = dstRowStride * 4 / 16; /* a bit of a hack */
+ const GLfloat *tempImage = NULL;
+ int i, j;
+ int numxpixels, numypixels;
+ const GLfloat *srcaddr;
+ GLbyte srcpixels[4][4];
+ GLbyte *blkaddr;
+ GLint dstRowDiff;
+
+ ASSERT(dstFormat == MESA_FORMAT_SIGNED_RG_RGTC2);
+ ASSERT(dstXoffset % 4 == 0);
+ ASSERT(dstYoffset % 4 == 0);
+ ASSERT(dstZoffset % 4 == 0);
+ (void) dstZoffset;
+ (void) dstImageOffsets;
+
+ tempImage = _mesa_make_temp_float_image(ctx, dims,
+ baseInternalFormat,
+ _mesa_get_format_base_format(dstFormat),
+ srcWidth, srcHeight, srcDepth,
+ srcFormat, srcType, srcAddr,
+ srcPacking, 0x0);
+ if (!tempImage)
+ return GL_FALSE; /* out of memory */
+
+ dst = (GLbyte *)_mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
+ dstFormat,
+ texWidth, (GLubyte *) dstAddr);
+
+ blkaddr = dst;
+ dstRowDiff = dstRowStride >= (srcWidth * 8) ? dstRowStride - (((srcWidth + 7) & ~7) * 8) : 0;
+ for (j = 0; j < srcHeight; j += 4) {
+ if (srcHeight > j + 3) numypixels = 4;
+ else numypixels = srcHeight - j;
+ srcaddr = tempImage + j * srcWidth * 2;
+ for (i = 0; i < srcWidth; i += 4) {
+ if (srcWidth > i + 3) numxpixels = 4;
+ else numxpixels = srcWidth - i;
+
+ extractsrc_s(srcpixels, srcaddr, srcWidth, numxpixels, numypixels, 2);
+ encode_rgtc_chan_s(blkaddr, srcpixels, numxpixels, numypixels);
+ blkaddr += 8;
+
+ extractsrc_s(srcpixels, srcaddr + 1, srcWidth, numxpixels, numypixels, 2);
+ encode_rgtc_chan_s(blkaddr, srcpixels, numxpixels, numypixels);
+ blkaddr += 8;
+
+ srcaddr += numxpixels * 2;
+
+ }
+ blkaddr += dstRowDiff;
+ }
+ if (tempImage)
+ free((void *) tempImage);
+
+ return GL_TRUE;
+}
+
+static void _fetch_texel_rgtc_u(GLint srcRowStride, const GLubyte *pixdata,
+ GLint i, GLint j, GLchan *value, int comps)
+{
+ GLchan decode;
+ const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8 * comps);
+ const GLubyte alpha0 = blksrc[0];
+ const GLubyte alpha1 = blksrc[1];
+ const GLubyte bit_pos = ((j&3) * 4 + (i&3)) * 3;
+ const GLubyte acodelow = blksrc[2 + bit_pos / 8];
+ const GLubyte acodehigh = blksrc[3 + bit_pos / 8];
+ const GLubyte code = (acodelow >> (bit_pos & 0x7) |
+ (acodehigh << (8 - (bit_pos & 0x7)))) & 0x7;
+
+ if (code == 0)
+ decode = UBYTE_TO_CHAN( alpha0 );
+ else if (code == 1)
+ decode = UBYTE_TO_CHAN( alpha1 );
+ else if (alpha0 > alpha1)
+ decode = UBYTE_TO_CHAN( ((alpha0 * (8 - code) + (alpha1 * (code - 1))) / 7) );
+ else if (code < 6)
+ decode = UBYTE_TO_CHAN( ((alpha0 * (6 - code) + (alpha1 * (code - 1))) / 5) );
+ else if (code == 6)
+ decode = 0;
+ else
+ decode = CHAN_MAX;
+
+ *value = decode;
+}
+
+
+static void _fetch_texel_rgtc_s(GLint srcRowStride, const GLbyte *pixdata,
+ GLint i, GLint j, GLbyte *value, int comps)
+{
+ GLbyte decode;
+ const GLbyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8 * comps);
+ const GLbyte alpha0 = blksrc[0];
+ const GLbyte alpha1 = blksrc[1];
+ const GLbyte bit_pos = ((j&3) * 4 + (i&3)) * 3;
+ const GLbyte acodelow = blksrc[2 + bit_pos / 8];
+ const GLbyte acodehigh = blksrc[3 + bit_pos / 8];
+ const GLbyte code = (acodelow >> (bit_pos & 0x7) |
+ (acodehigh << (8 - (bit_pos & 0x7)))) & 0x7;
+
+ if (code == 0)
+ decode = alpha0;
+ else if (code == 1)
+ decode = alpha1;
+ else if (alpha0 > alpha1)
+ decode = ((alpha0 * (8 - code) + (alpha1 * (code - 1))) / 7);
+ else if (code < 6)
+ decode = ((alpha0 * (6 - code) + (alpha1 * (code - 1))) / 5);
+ else if (code == 6)
+ decode = -128;
+ else
+ decode = 127;
+
+ *value = decode;
+}
+
+void
+_mesa_fetch_texel_2d_f_red_rgtc1(const struct gl_texture_image *texImage,
+ GLint i, GLint j, GLint k, GLfloat *texel)
+{
+ GLchan red;
+ _fetch_texel_rgtc_u(texImage->RowStride, (GLubyte *)(texImage->Data),
+ i, j, &red, 1);
+ texel[RCOMP] = CHAN_TO_FLOAT(red);
+ texel[GCOMP] = 0.0;
+ texel[BCOMP] = 0.0;
+ texel[ACOMP] = 1.0;
+}
+
+void
+_mesa_fetch_texel_2d_f_signed_red_rgtc1(const struct gl_texture_image *texImage,
+ GLint i, GLint j, GLint k, GLfloat *texel)
+{
+ GLbyte red;
+ _fetch_texel_rgtc_s(texImage->RowStride, (GLbyte *)(texImage->Data),
+ i, j, &red, 1);
+ texel[RCOMP] = BYTE_TO_FLOAT_TEX(red);
+ texel[GCOMP] = 0.0;
+ texel[BCOMP] = 0.0;
+ texel[ACOMP] = 1.0;
+}
+
+void
+_mesa_fetch_texel_2d_f_rg_rgtc2(const struct gl_texture_image *texImage,
+ GLint i, GLint j, GLint k, GLfloat *texel)
+{
+ GLchan red, green;
+ _fetch_texel_rgtc_u(texImage->RowStride, (GLubyte *)(texImage->Data),
+ i, j, &red, 2);
+ _fetch_texel_rgtc_u(texImage->RowStride, (GLubyte *)(texImage->Data) + 8,
+ i, j, &green, 2);
+ texel[RCOMP] = CHAN_TO_FLOAT(red);
+ texel[GCOMP] = CHAN_TO_FLOAT(green);
+ texel[BCOMP] = 0.0;
+ texel[ACOMP] = 1.0;
+}
+
+void
+_mesa_fetch_texel_2d_f_signed_rg_rgtc2(const struct gl_texture_image *texImage,
+ GLint i, GLint j, GLint k, GLfloat *texel)
+{
+ GLbyte red, green;
+ _fetch_texel_rgtc_s(texImage->RowStride, (GLbyte *)(texImage->Data),
+ i, j, &red, 2);
+ _fetch_texel_rgtc_s(texImage->RowStride, (GLbyte *)(texImage->Data) + 8,
+ i, j, &green, 2);
+ texel[RCOMP] = BYTE_TO_FLOAT_TEX(red);
+ texel[GCOMP] = BYTE_TO_FLOAT_TEX(green);
+ texel[BCOMP] = 0.0;
+ texel[ACOMP] = 1.0;
+}
+
+static void write_rgtc_encoded_channel(GLubyte *blkaddr,
+ GLubyte alphabase1,
+ GLubyte alphabase2,
+ GLubyte alphaenc[16])
+{
+ *blkaddr++ = alphabase1;
+ *blkaddr++ = alphabase2;
+ *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6);
+ *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7);
+ *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5);
+ *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6);
+ *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7);
+ *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5);
+}
+
+static void encode_rgtc_chan_u(GLubyte *blkaddr, GLubyte srccolors[4][4],
+ GLint numxpixels, GLint numypixels)
+{
+ GLubyte alphabase[2], alphause[2];
+ GLshort alphatest[2] = { 0 };
+ GLuint alphablockerror1, alphablockerror2, alphablockerror3;
+ GLubyte i, j, aindex, acutValues[7];
+ GLubyte alphaenc1[16], alphaenc2[16], alphaenc3[16];
+ GLboolean alphaabsmin = GL_FALSE;
+ GLboolean alphaabsmax = GL_FALSE;
+ GLshort alphadist;
+
+ /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */
+ alphabase[0] = 0xff; alphabase[1] = 0x0;
+ for (j = 0; j < numypixels; j++) {
+ for (i = 0; i < numxpixels; i++) {
+ if (srccolors[j][i] == 0)
+ alphaabsmin = GL_TRUE;
+ else if (srccolors[j][i] == 255)
+ alphaabsmax = GL_TRUE;
+ else {
+ if (srccolors[j][i] > alphabase[1])
+ alphabase[1] = srccolors[j][i];
+ if (srccolors[j][i] < alphabase[0])
+ alphabase[0] = srccolors[j][i];
+ }
+ }
+ }
+
+
+ if ((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax)) { /* one color, either max or min */
+ /* shortcut here since it is a very common case (and also avoids later problems) */
+ /* || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax) */
+ /* could also thest for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */
+
+ *blkaddr++ = srccolors[0][0];
+ blkaddr++;
+ *blkaddr++ = 0;
+ *blkaddr++ = 0;
+ *blkaddr++ = 0;
+ *blkaddr++ = 0;
+ *blkaddr++ = 0;
+ *blkaddr++ = 0;
+#if RGTC_DEBUG
+ fprintf(stderr, "enc0 used\n");
+#endif
+ return;
+ }
+
+ /* find best encoding for alpha0 > alpha1 */
+ /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */
+ alphablockerror1 = 0x0;
+ alphablockerror2 = 0xffffffff;
+ alphablockerror3 = 0xffffffff;
+ if (alphaabsmin) alphause[0] = 0;
+ else alphause[0] = alphabase[0];
+ if (alphaabsmax) alphause[1] = 255;
+ else alphause[1] = alphabase[1];
+ /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */
+ for (aindex = 0; aindex < 7; aindex++) {
+ /* don't forget here is always rounded down */
+ acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14;
+ }
+
+ for (j = 0; j < numypixels; j++) {
+ for (i = 0; i < numxpixels; i++) {
+ /* maybe it's overkill to have the most complicated calculation just for the error
+ calculation which we only need to figure out if encoding1 or encoding2 is better... */
+ if (srccolors[j][i] > acutValues[0]) {
+ alphaenc1[4*j + i] = 0;
+ alphadist = srccolors[j][i] - alphause[1];
+ }
+ else if (srccolors[j][i] > acutValues[1]) {
+ alphaenc1[4*j + i] = 2;
+ alphadist = srccolors[j][i] - (alphause[1] * 6 + alphause[0] * 1) / 7;
+ }
+ else if (srccolors[j][i] > acutValues[2]) {
+ alphaenc1[4*j + i] = 3;
+ alphadist = srccolors[j][i] - (alphause[1] * 5 + alphause[0] * 2) / 7;
+ }
+ else if (srccolors[j][i] > acutValues[3]) {
+ alphaenc1[4*j + i] = 4;
+ alphadist = srccolors[j][i] - (alphause[1] * 4 + alphause[0] * 3) / 7;
+ }
+ else if (srccolors[j][i] > acutValues[4]) {
+ alphaenc1[4*j + i] = 5;
+ alphadist = srccolors[j][i] - (alphause[1] * 3 + alphause[0] * 4) / 7;
+ }
+ else if (srccolors[j][i] > acutValues[5]) {
+ alphaenc1[4*j + i] = 6;
+ alphadist = srccolors[j][i] - (alphause[1] * 2 + alphause[0] * 5) / 7;
+ }
+ else if (srccolors[j][i] > acutValues[6]) {
+ alphaenc1[4*j + i] = 7;
+ alphadist = srccolors[j][i] - (alphause[1] * 1 + alphause[0] * 6) / 7;
+ }
+ else {
+ alphaenc1[4*j + i] = 1;
+ alphadist = srccolors[j][i] - alphause[0];
+ }
+ alphablockerror1 += alphadist * alphadist;
+ }
+ }
+
+#if RGTC_DEBUG
+ for (i = 0; i < 16; i++) {
+ fprintf(stderr, "%d ", alphaenc1[i]);
+ }
+ fprintf(stderr, "cutVals ");
+ for (i = 0; i < 8; i++) {
+ fprintf(stderr, "%d ", acutValues[i]);
+ }
+ fprintf(stderr, "srcVals ");
+ for (j = 0; j < numypixels; j++) {
+ for (i = 0; i < numxpixels; i++) {
+ fprintf(stderr, "%d ", srccolors[j][i]);
+ }
+ }
+ fprintf(stderr, "\n");
+#endif
+
+ /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax
+ are false but try it anyway */
+ if (alphablockerror1 >= 32) {
+
+ /* don't bother if encoding is already very good, this condition should also imply
+ we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */
+ alphablockerror2 = 0;
+ for (aindex = 0; aindex < 5; aindex++) {
+ /* don't forget here is always rounded down */
+ acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10;
+ }
+ for (j = 0; j < numypixels; j++) {
+ for (i = 0; i < numxpixels; i++) {
+ /* maybe it's overkill to have the most complicated calculation just for the error
+ calculation which we only need to figure out if encoding1 or encoding2 is better... */
+ if (srccolors[j][i] == 0) {
+ alphaenc2[4*j + i] = 6;
+ alphadist = 0;
+ }
+ else if (srccolors[j][i] == 255) {
+ alphaenc2[4*j + i] = 7;
+ alphadist = 0;
+ }
+ else if (srccolors[j][i] <= acutValues[0]) {
+ alphaenc2[4*j + i] = 0;
+ alphadist = srccolors[j][i] - alphabase[0];
+ }
+ else if (srccolors[j][i] <= acutValues[1]) {
+ alphaenc2[4*j + i] = 2;
+ alphadist = srccolors[j][i] - (alphabase[0] * 4 + alphabase[1] * 1) / 5;
+ }
+ else if (srccolors[j][i] <= acutValues[2]) {
+ alphaenc2[4*j + i] = 3;
+ alphadist = srccolors[j][i] - (alphabase[0] * 3 + alphabase[1] * 2) / 5;
+ }
+ else if (srccolors[j][i] <= acutValues[3]) {
+ alphaenc2[4*j + i] = 4;
+ alphadist = srccolors[j][i] - (alphabase[0] * 2 + alphabase[1] * 3) / 5;
+ }
+ else if (srccolors[j][i] <= acutValues[4]) {
+ alphaenc2[4*j + i] = 5;
+ alphadist = srccolors[j][i] - (alphabase[0] * 1 + alphabase[1] * 4) / 5;
+ }
+ else {
+ alphaenc2[4*j + i] = 1;
+ alphadist = srccolors[j][i] - alphabase[1];
+ }
+ alphablockerror2 += alphadist * alphadist;
+ }
+ }
+
+
+ /* skip this if the error is already very small
+ this encoding is MUCH better on average than #2 though, but expensive! */
+ if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) {
+ GLshort blockerrlin1 = 0;
+ GLshort blockerrlin2 = 0;
+ GLubyte nralphainrangelow = 0;
+ GLubyte nralphainrangehigh = 0;
+ alphatest[0] = 0xff;
+ alphatest[1] = 0x0;
+ /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */
+ for (j = 0; j < numypixels; j++) {
+ for (i = 0; i < numxpixels; i++) {
+ if ((srccolors[j][i] > alphatest[1]) && (srccolors[j][i] < (255 -(alphabase[1] - alphabase[0]) / 28)))
+ alphatest[1] = srccolors[j][i];
+ if ((srccolors[j][i] < alphatest[0]) && (srccolors[j][i] > (alphabase[1] - alphabase[0]) / 28))
+ alphatest[0] = srccolors[j][i];
+ }
+ }
+ /* shouldn't happen too often, don't really care about those degenerated cases */
+ if (alphatest[1] <= alphatest[0]) {
+ alphatest[0] = 1;
+ alphatest[1] = 254;
+ }
+ for (aindex = 0; aindex < 5; aindex++) {
+ /* don't forget here is always rounded down */
+ acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
+ }
+
+ /* find the "average" difference between the alpha values and the next encoded value.
+ This is then used to calculate new base values.
+ Should there be some weighting, i.e. those values closer to alphatest[x] have more weight,
+ since they will see more improvement, and also because the values in the middle are somewhat
+ likely to get no improvement at all (because the base values might move in different directions)?
+ OTOH it would mean the values in the middle are even less likely to get an improvement
+ */
+ for (j = 0; j < numypixels; j++) {
+ for (i = 0; i < numxpixels; i++) {
+ if (srccolors[j][i] <= alphatest[0] / 2) {
+ }
+ else if (srccolors[j][i] > ((255 + alphatest[1]) / 2)) {
+ }
+ else if (srccolors[j][i] <= acutValues[0]) {
+ blockerrlin1 += (srccolors[j][i] - alphatest[0]);
+ nralphainrangelow += 1;
+ }
+ else if (srccolors[j][i] <= acutValues[1]) {
+ blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
+ blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
+ nralphainrangelow += 1;
+ nralphainrangehigh += 1;
+ }
+ else if (srccolors[j][i] <= acutValues[2]) {
+ blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
+ blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
+ nralphainrangelow += 1;
+ nralphainrangehigh += 1;
+ }
+ else if (srccolors[j][i] <= acutValues[3]) {
+ blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
+ blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
+ nralphainrangelow += 1;
+ nralphainrangehigh += 1;
+ }
+ else if (srccolors[j][i] <= acutValues[4]) {
+ blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
+ blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
+ nralphainrangelow += 1;
+ nralphainrangehigh += 1;
+ }
+ else {
+ blockerrlin2 += (srccolors[j][i] - alphatest[1]);
+ nralphainrangehigh += 1;
+ }
+ }
+ }
+ /* shouldn't happen often, needed to avoid div by zero */
+ if (nralphainrangelow == 0) nralphainrangelow = 1;
+ if (nralphainrangehigh == 0) nralphainrangehigh = 1;
+ alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow);
+#if RGTC_DEBUG
+ fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow);
+ fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh);
+#endif
+ /* again shouldn't really happen often... */
+ if (alphatest[0] < 0) {
+ alphatest[0] = 0;
+ }
+ alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh);
+ if (alphatest[1] > 255) {
+ alphatest[1] = 255;
+ }
+
+ alphablockerror3 = 0;
+ for (aindex = 0; aindex < 5; aindex++) {
+ /* don't forget here is always rounded down */
+ acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
+ }
+ for (j = 0; j < numypixels; j++) {
+ for (i = 0; i < numxpixels; i++) {
+ /* maybe it's overkill to have the most complicated calculation just for the error
+ calculation which we only need to figure out if encoding1 or encoding2 is better... */
+ if (srccolors[j][i] <= alphatest[0] / 2) {
+ alphaenc3[4*j + i] = 6;
+ alphadist = srccolors[j][i];
+ }
+ else if (srccolors[j][i] > ((255 + alphatest[1]) / 2)) {
+ alphaenc3[4*j + i] = 7;
+ alphadist = 255 - srccolors[j][i];
+ }
+ else if (srccolors[j][i] <= acutValues[0]) {
+ alphaenc3[4*j + i] = 0;
+ alphadist = srccolors[j][i] - alphatest[0];
+ }
+ else if (srccolors[j][i] <= acutValues[1]) {
+ alphaenc3[4*j + i] = 2;
+ alphadist = srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5;
+ }
+ else if (srccolors[j][i] <= acutValues[2]) {
+ alphaenc3[4*j + i] = 3;
+ alphadist = srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5;
+ }
+ else if (srccolors[j][i] <= acutValues[3]) {
+ alphaenc3[4*j + i] = 4;
+ alphadist = srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5;
+ }
+ else if (srccolors[j][i] <= acutValues[4]) {
+ alphaenc3[4*j + i] = 5;
+ alphadist = srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5;
+ }
+ else {
+ alphaenc3[4*j + i] = 1;
+ alphadist = srccolors[j][i] - alphatest[1];
+ }
+ alphablockerror3 += alphadist * alphadist;
+ }
+ }
+ }
+ }
+ /* write the alpha values and encoding back. */
+ if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) {
+#if RGTC_DEBUG
+ if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1);
+#endif
+ write_rgtc_encoded_channel( blkaddr, alphause[1], alphause[0], alphaenc1 );
+ }
+ else if (alphablockerror2 <= alphablockerror3) {
+#if RGTC_DEBUG
+ if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2);
+#endif
+ write_rgtc_encoded_channel( blkaddr, alphabase[0], alphabase[1], alphaenc2 );
+ }
+ else {
+#if RGTC_DEBUG
+ fprintf(stderr, "enc3 used, error %d\n", alphablockerror3);
+#endif
+ write_rgtc_encoded_channel( blkaddr, (GLubyte)alphatest[0], (GLubyte)alphatest[1], alphaenc3 );
+ }
+}
+
+
+static void write_rgtc_encoded_channel_s(GLbyte *blkaddr,
+ GLbyte alphabase1,
+ GLbyte alphabase2,
+ GLbyte alphaenc[16])
+{
+ *blkaddr++ = alphabase1;
+ *blkaddr++ = alphabase2;
+ *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6);
+ *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7);
+ *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5);
+ *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6);
+ *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7);
+ *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5);
+}
+
+static void encode_rgtc_chan_s(GLbyte *blkaddr, GLbyte srccolors[4][4],
+ GLint numxpixels, GLint numypixels)
+{
+ GLbyte alphabase[2], alphause[2];
+ GLshort alphatest[2] = { 0 };
+ GLuint alphablockerror1, alphablockerror2, alphablockerror3;
+ GLbyte i, j, aindex, acutValues[7];
+ GLbyte alphaenc1[16], alphaenc2[16], alphaenc3[16];
+ GLboolean alphaabsmin = GL_FALSE;
+ GLboolean alphaabsmax = GL_FALSE;
+ GLshort alphadist;
+
+ /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */
+ alphabase[0] = 0xff; alphabase[1] = 0x0;
+ for (j = 0; j < numypixels; j++) {
+ for (i = 0; i < numxpixels; i++) {
+ if (srccolors[j][i] == 0)
+ alphaabsmin = GL_TRUE;
+ else if (srccolors[j][i] == 255)
+ alphaabsmax = GL_TRUE;
+ else {
+ if (srccolors[j][i] > alphabase[1])
+ alphabase[1] = srccolors[j][i];
+ if (srccolors[j][i] < alphabase[0])
+ alphabase[0] = srccolors[j][i];
+ }
+ }
+ }
+
+
+ if ((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax)) { /* one color, either max or min */
+ /* shortcut here since it is a very common case (and also avoids later problems) */
+ /* || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax) */
+ /* could also thest for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */
+
+ *blkaddr++ = srccolors[0][0];
+ blkaddr++;
+ *blkaddr++ = 0;
+ *blkaddr++ = 0;
+ *blkaddr++ = 0;
+ *blkaddr++ = 0;
+ *blkaddr++ = 0;
+ *blkaddr++ = 0;
+#if RGTC_DEBUG
+ fprintf(stderr, "enc0 used\n");
+#endif
+ return;
+ }
+
+ /* find best encoding for alpha0 > alpha1 */
+ /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */
+ alphablockerror1 = 0x0;
+ alphablockerror2 = 0xffffffff;
+ alphablockerror3 = 0xffffffff;
+ if (alphaabsmin) alphause[0] = 0;
+ else alphause[0] = alphabase[0];
+ if (alphaabsmax) alphause[1] = 255;
+ else alphause[1] = alphabase[1];
+ /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */
+ for (aindex = 0; aindex < 7; aindex++) {
+ /* don't forget here is always rounded down */
+ acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14;
+ }
+
+ for (j = 0; j < numypixels; j++) {
+ for (i = 0; i < numxpixels; i++) {
+ /* maybe it's overkill to have the most complicated calculation just for the error
+ calculation which we only need to figure out if encoding1 or encoding2 is better... */
+ if (srccolors[j][i] > acutValues[0]) {
+ alphaenc1[4*j + i] = 0;
+ alphadist = srccolors[j][i] - alphause[1];
+ }
+ else if (srccolors[j][i] > acutValues[1]) {
+ alphaenc1[4*j + i] = 2;
+ alphadist = srccolors[j][i] - (alphause[1] * 6 + alphause[0] * 1) / 7;
+ }
+ else if (srccolors[j][i] > acutValues[2]) {
+ alphaenc1[4*j + i] = 3;
+ alphadist = srccolors[j][i] - (alphause[1] * 5 + alphause[0] * 2) / 7;
+ }
+ else if (srccolors[j][i] > acutValues[3]) {
+ alphaenc1[4*j + i] = 4;
+ alphadist = srccolors[j][i] - (alphause[1] * 4 + alphause[0] * 3) / 7;
+ }
+ else if (srccolors[j][i] > acutValues[4]) {
+ alphaenc1[4*j + i] = 5;
+ alphadist = srccolors[j][i] - (alphause[1] * 3 + alphause[0] * 4) / 7;
+ }
+ else if (srccolors[j][i] > acutValues[5]) {
+ alphaenc1[4*j + i] = 6;
+ alphadist = srccolors[j][i] - (alphause[1] * 2 + alphause[0] * 5) / 7;
+ }
+ else if (srccolors[j][i] > acutValues[6]) {
+ alphaenc1[4*j + i] = 7;
+ alphadist = srccolors[j][i] - (alphause[1] * 1 + alphause[0] * 6) / 7;
+ }
+ else {
+ alphaenc1[4*j + i] = 1;
+ alphadist = srccolors[j][i] - alphause[0];
+ }
+ alphablockerror1 += alphadist * alphadist;
+ }
+ }
+#if RGTC_DEBUG
+ for (i = 0; i < 16; i++) {
+ fprintf(stderr, "%d ", alphaenc1[i]);
+ }
+ fprintf(stderr, "cutVals ");
+ for (i = 0; i < 8; i++) {
+ fprintf(stderr, "%d ", acutValues[i]);
+ }
+ fprintf(stderr, "srcVals ");
+ for (j = 0; j < numypixels; j++)
+ for (i = 0; i < numxpixels; i++) {
+ fprintf(stderr, "%d ", srccolors[j][i]);
+ }
+
+ fprintf(stderr, "\n");
+#endif
+
+ /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax
+ are false but try it anyway */
+ if (alphablockerror1 >= 32) {
+
+ /* don't bother if encoding is already very good, this condition should also imply
+ we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */
+ alphablockerror2 = 0;
+ for (aindex = 0; aindex < 5; aindex++) {
+ /* don't forget here is always rounded down */
+ acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10;
+ }
+ for (j = 0; j < numypixels; j++) {
+ for (i = 0; i < numxpixels; i++) {
+ /* maybe it's overkill to have the most complicated calculation just for the error
+ calculation which we only need to figure out if encoding1 or encoding2 is better... */
+ if (srccolors[j][i] == 0) {
+ alphaenc2[4*j + i] = 6;
+ alphadist = 0;
+ }
+ else if (srccolors[j][i] == 255) {
+ alphaenc2[4*j + i] = 7;
+ alphadist = 0;
+ }
+ else if (srccolors[j][i] <= acutValues[0]) {
+ alphaenc2[4*j + i] = 0;
+ alphadist = srccolors[j][i] - alphabase[0];
+ }
+ else if (srccolors[j][i] <= acutValues[1]) {
+ alphaenc2[4*j + i] = 2;
+ alphadist = srccolors[j][i] - (alphabase[0] * 4 + alphabase[1] * 1) / 5;
+ }
+ else if (srccolors[j][i] <= acutValues[2]) {
+ alphaenc2[4*j + i] = 3;
+ alphadist = srccolors[j][i] - (alphabase[0] * 3 + alphabase[1] * 2) / 5;
+ }
+ else if (srccolors[j][i] <= acutValues[3]) {
+ alphaenc2[4*j + i] = 4;
+ alphadist = srccolors[j][i] - (alphabase[0] * 2 + alphabase[1] * 3) / 5;
+ }
+ else if (srccolors[j][i] <= acutValues[4]) {
+ alphaenc2[4*j + i] = 5;
+ alphadist = srccolors[j][i] - (alphabase[0] * 1 + alphabase[1] * 4) / 5;
+ }
+ else {
+ alphaenc2[4*j + i] = 1;
+ alphadist = srccolors[j][i] - alphabase[1];
+ }
+ alphablockerror2 += alphadist * alphadist;
+ }
+ }
+
+
+ /* skip this if the error is already very small
+ this encoding is MUCH better on average than #2 though, but expensive! */
+ if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) {
+ GLshort blockerrlin1 = 0;
+ GLshort blockerrlin2 = 0;
+ GLubyte nralphainrangelow = 0;
+ GLubyte nralphainrangehigh = 0;
+ alphatest[0] = 0xff;
+ alphatest[1] = 0x0;
+ /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */
+ for (j = 0; j < numypixels; j++) {
+ for (i = 0; i < numxpixels; i++) {
+ if ((srccolors[j][i] > alphatest[1]) && (srccolors[j][i] < (255 -(alphabase[1] - alphabase[0]) / 28)))
+ alphatest[1] = srccolors[j][i];
+ if ((srccolors[j][i] < alphatest[0]) && (srccolors[j][i] > (alphabase[1] - alphabase[0]) / 28))
+ alphatest[0] = srccolors[j][i];
+ }
+ }
+ /* shouldn't happen too often, don't really care about those degenerated cases */
+ if (alphatest[1] <= alphatest[0]) {
+ alphatest[0] = 1;
+ alphatest[1] = 254;
+ }
+ for (aindex = 0; aindex < 5; aindex++) {
+ /* don't forget here is always rounded down */
+ acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
+ }
+
+ /* find the "average" difference between the alpha values and the next encoded value.
+ This is then used to calculate new base values.
+ Should there be some weighting, i.e. those values closer to alphatest[x] have more weight,
+ since they will see more improvement, and also because the values in the middle are somewhat
+ likely to get no improvement at all (because the base values might move in different directions)?
+ OTOH it would mean the values in the middle are even less likely to get an improvement
+ */
+ for (j = 0; j < numypixels; j++) {
+ for (i = 0; i < numxpixels; i++) {
+ if (srccolors[j][i] <= alphatest[0] / 2) {
+ }
+ else if (srccolors[j][i] > ((255 + alphatest[1]) / 2)) {
+ }
+ else if (srccolors[j][i] <= acutValues[0]) {
+ blockerrlin1 += (srccolors[j][i] - alphatest[0]);
+ nralphainrangelow += 1;
+ }
+ else if (srccolors[j][i] <= acutValues[1]) {
+ blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
+ blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
+ nralphainrangelow += 1;
+ nralphainrangehigh += 1;
+ }
+ else if (srccolors[j][i] <= acutValues[2]) {
+ blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
+ blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
+ nralphainrangelow += 1;
+ nralphainrangehigh += 1;
+ }
+ else if (srccolors[j][i] <= acutValues[3]) {
+ blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
+ blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
+ nralphainrangelow += 1;
+ nralphainrangehigh += 1;
+ }
+ else if (srccolors[j][i] <= acutValues[4]) {
+ blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
+ blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
+ nralphainrangelow += 1;
+ nralphainrangehigh += 1;
+ }
+ else {
+ blockerrlin2 += (srccolors[j][i] - alphatest[1]);
+ nralphainrangehigh += 1;
+ }
+ }
+ }
+ /* shouldn't happen often, needed to avoid div by zero */
+ if (nralphainrangelow == 0) nralphainrangelow = 1;
+ if (nralphainrangehigh == 0) nralphainrangehigh = 1;
+ alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow);
+#if RGTC_DEBUG
+ fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow);
+ fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh);
+#endif
+ /* again shouldn't really happen often... */
+ if (alphatest[0] < 0) {
+ alphatest[0] = 0;
+ }
+ alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh);
+ if (alphatest[1] > 255) {
+ alphatest[1] = 255;
+ }
+
+ alphablockerror3 = 0;
+ for (aindex = 0; aindex < 5; aindex++) {
+ /* don't forget here is always rounded down */
+ acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
+ }
+ for (j = 0; j < numypixels; j++) {
+ for (i = 0; i < numxpixels; i++) {
+ /* maybe it's overkill to have the most complicated calculation just for the error
+ calculation which we only need to figure out if encoding1 or encoding2 is better... */
+ if (srccolors[j][i] <= alphatest[0] / 2) {
+ alphaenc3[4*j + i] = 6;
+ alphadist = srccolors[j][i];
+ }
+ else if (srccolors[j][i] > ((255 + alphatest[1]) / 2)) {
+ alphaenc3[4*j + i] = 7;
+ alphadist = 255 - srccolors[j][i];
+ }
+ else if (srccolors[j][i] <= acutValues[0]) {
+ alphaenc3[4*j + i] = 0;
+ alphadist = srccolors[j][i] - alphatest[0];
+ }
+ else if (srccolors[j][i] <= acutValues[1]) {
+ alphaenc3[4*j + i] = 2;
+ alphadist = srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5;
+ }
+ else if (srccolors[j][i] <= acutValues[2]) {
+ alphaenc3[4*j + i] = 3;
+ alphadist = srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5;
+ }
+ else if (srccolors[j][i] <= acutValues[3]) {
+ alphaenc3[4*j + i] = 4;
+ alphadist = srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5;
+ }
+ else if (srccolors[j][i] <= acutValues[4]) {
+ alphaenc3[4*j + i] = 5;
+ alphadist = srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5;
+ }
+ else {
+ alphaenc3[4*j + i] = 1;
+ alphadist = srccolors[j][i] - alphatest[1];
+ }
+ alphablockerror3 += alphadist * alphadist;
+ }
+ }
+ }
+ }
+ /* write the alpha values and encoding back. */
+ if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) {
+#if RGTC_DEBUG
+ if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1);
+#endif
+ write_rgtc_encoded_channel_s( blkaddr, alphause[1], alphause[0], alphaenc1 );
+ }
+ else if (alphablockerror2 <= alphablockerror3) {
+#if RGTC_DEBUG
+ if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2);
+#endif
+ write_rgtc_encoded_channel_s( blkaddr, alphabase[0], alphabase[1], alphaenc2 );
+ }
+ else {
+#if RGTC_DEBUG
+ fprintf(stderr, "enc3 used, error %d\n", alphablockerror3);
+#endif
+ write_rgtc_encoded_channel_s( blkaddr, (GLubyte)alphatest[0], (GLubyte)alphatest[1], alphaenc3 );
+ }
+}
diff --git a/src/mesa/main/texcompress_rgtc.h b/src/mesa/main/texcompress_rgtc.h
new file mode 100644
index 00000000000..424edc4581c
--- /dev/null
+++ b/src/mesa/main/texcompress_rgtc.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef TEXCOMPRESS_RGTC_H
+#define TEXCOMPRESS_RGTC_H
+
+#include "glheader.h"
+#include "mfeatures.h"
+#include "texstore.h"
+
+struct gl_texture_image;
+
+extern GLboolean
+_mesa_texstore_red_rgtc1(TEXSTORE_PARAMS);
+
+extern GLboolean
+_mesa_texstore_signed_red_rgtc1(TEXSTORE_PARAMS);
+
+extern GLboolean
+_mesa_texstore_rg_rgtc2(TEXSTORE_PARAMS);
+
+extern GLboolean
+_mesa_texstore_signed_rg_rgtc2(TEXSTORE_PARAMS);
+
+extern void
+_mesa_fetch_texel_2d_f_red_rgtc1(const struct gl_texture_image *texImage,
+ GLint i, GLint j, GLint k, GLfloat *texel);
+
+extern void
+_mesa_fetch_texel_2d_f_signed_red_rgtc1(const struct gl_texture_image *texImage,
+ GLint i, GLint j, GLint k, GLfloat *texel);
+
+extern void
+_mesa_fetch_texel_2d_f_rg_rgtc2(const struct gl_texture_image *texImage,
+ GLint i, GLint j, GLint k, GLfloat *texel);
+
+extern void
+_mesa_fetch_texel_2d_f_signed_rg_rgtc2(const struct gl_texture_image *texImage,
+ GLint i, GLint j, GLint k, GLfloat *texel);
+#endif
diff --git a/src/mesa/main/texfetch.c b/src/mesa/main/texfetch.c
index 8aa1e4970d5..550597e1cdf 100644
--- a/src/mesa/main/texfetch.c
+++ b/src/mesa/main/texfetch.c
@@ -38,6 +38,7 @@
#include "texcompress.h"
#include "texcompress_fxt1.h"
#include "texcompress_s3tc.h"
+#include "texcompress_rgtc.h"
#include "texfetch.h"
#include "teximage.h"
@@ -756,7 +757,35 @@ texfetch_funcs[MESA_FORMAT_COUNT] =
fetch_texel_2d_rgba_16,
fetch_texel_3d_rgba_16,
store_texel_rgba_16
- }
+ },
+ {
+ MESA_FORMAT_RED_RGTC1,
+ NULL,
+ _mesa_fetch_texel_2d_f_red_rgtc1,
+ NULL,
+ NULL
+ },
+ {
+ MESA_FORMAT_SIGNED_RED_RGTC1,
+ NULL,
+ _mesa_fetch_texel_2d_f_signed_red_rgtc1,
+ NULL,
+ NULL
+ },
+ {
+ MESA_FORMAT_RG_RGTC2,
+ NULL,
+ _mesa_fetch_texel_2d_f_rg_rgtc2,
+ NULL,
+ NULL
+ },
+ {
+ MESA_FORMAT_SIGNED_RG_RGTC2,
+ NULL,
+ _mesa_fetch_texel_2d_f_signed_rg_rgtc2,
+ NULL,
+ NULL
+ },
};
diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c
index 2542cea856b..72025cf828e 100644
--- a/src/mesa/main/texformat.c
+++ b/src/mesa/main/texformat.c
@@ -602,6 +602,25 @@ _mesa_choose_tex_format( struct gl_context *ctx, GLint internalFormat,
}
}
+ if (ctx->Extensions.ARB_texture_compression_rgtc) {
+ switch (internalFormat) {
+ case GL_COMPRESSED_RED_RGTC1:
+ RETURN_IF_SUPPORTED(MESA_FORMAT_RED_RGTC1);
+ break;
+ case GL_COMPRESSED_SIGNED_RED_RGTC1:
+ RETURN_IF_SUPPORTED(MESA_FORMAT_SIGNED_RED_RGTC1);
+ break;
+ case GL_COMPRESSED_RG_RGTC2:
+ RETURN_IF_SUPPORTED(MESA_FORMAT_RG_RGTC2);
+ break;
+ case GL_COMPRESSED_SIGNED_RG_RGTC2:
+ RETURN_IF_SUPPORTED(MESA_FORMAT_SIGNED_RG_RGTC2);
+ break;
+ default:
+ ; /* fallthrough */
+ }
+ }
+
_mesa_problem(ctx, "unexpected format in _mesa_choose_tex_format()");
return MESA_FORMAT_NONE;
}
diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
index 7dd4a1fa650..8a3e5f77979 100644
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -65,6 +65,7 @@
#include "pack.h"
#include "texcompress.h"
#include "texcompress_fxt1.h"
+#include "texcompress_rgtc.h"
#include "texcompress_s3tc.h"
#include "teximage.h"
#include "texstore.h"
@@ -310,15 +311,15 @@ compute_component_mapping(GLenum inFormat, GLenum outFormat,
* \param srcPacking source image pixel packing
* \return resulting image with format = textureBaseFormat and type = GLfloat.
*/
-static GLfloat *
-make_temp_float_image(struct gl_context *ctx, GLuint dims,
- GLenum logicalBaseFormat,
- GLenum textureBaseFormat,
- GLint srcWidth, GLint srcHeight, GLint srcDepth,
- GLenum srcFormat, GLenum srcType,
- const GLvoid *srcAddr,
- const struct gl_pixelstore_attrib *srcPacking,
- GLbitfield transferOps)
+GLfloat *
+_mesa_make_temp_float_image(struct gl_context *ctx, GLuint dims,
+ GLenum logicalBaseFormat,
+ GLenum textureBaseFormat,
+ GLint srcWidth, GLint srcHeight, GLint srcDepth,
+ GLenum srcFormat, GLenum srcType,
+ const GLvoid *srcAddr,
+ const struct gl_pixelstore_attrib *srcPacking,
+ GLbitfield transferOps)
{
GLfloat *tempImage;
const GLint components = _mesa_components_in_format(logicalBaseFormat);
@@ -2065,7 +2066,7 @@ _mesa_texstore_argb2101010(TEXSTORE_PARAMS)
}
else {
/* general path */
- const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+ const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
baseInternalFormat,
baseFormat,
srcWidth, srcHeight, srcDepth,
@@ -2317,7 +2318,7 @@ _mesa_texstore_unorm1616(TEXSTORE_PARAMS)
}
else {
/* general path */
- const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+ const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
baseInternalFormat,
baseFormat,
srcWidth, srcHeight, srcDepth,
@@ -2394,7 +2395,7 @@ _mesa_texstore_unorm16(TEXSTORE_PARAMS)
}
else {
/* general path */
- const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+ const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
baseInternalFormat,
baseFormat,
srcWidth, srcHeight, srcDepth,
@@ -2452,7 +2453,7 @@ _mesa_texstore_rgba_16(TEXSTORE_PARAMS)
}
else {
/* general path */
- const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+ const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
baseInternalFormat,
baseFormat,
srcWidth, srcHeight, srcDepth,
@@ -2519,7 +2520,7 @@ _mesa_texstore_signed_rgba_16(TEXSTORE_PARAMS)
}
else {
/* general path */
- const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+ const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
baseInternalFormat,
baseFormat,
srcWidth, srcHeight, srcDepth,
@@ -2901,7 +2902,7 @@ _mesa_texstore_signed_r8(TEXSTORE_PARAMS)
/* XXX look at adding optimized paths */
{
/* general path */
- const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+ const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
baseInternalFormat,
baseFormat,
srcWidth, srcHeight, srcDepth,
@@ -2946,7 +2947,7 @@ _mesa_texstore_signed_rg88(TEXSTORE_PARAMS)
/* XXX look at adding optimized paths */
{
/* general path */
- const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+ const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
baseInternalFormat,
baseFormat,
srcWidth, srcHeight, srcDepth,
@@ -2991,7 +2992,7 @@ _mesa_texstore_signed_rgbx8888(TEXSTORE_PARAMS)
{
/* general path */
- const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+ const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
baseInternalFormat,
baseFormat,
srcWidth, srcHeight, srcDepth,
@@ -3104,7 +3105,7 @@ _mesa_texstore_signed_rgba8888(TEXSTORE_PARAMS)
}
else {
/* general path */
- const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+ const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
baseInternalFormat,
baseFormat,
srcWidth, srcHeight, srcDepth,
@@ -3413,7 +3414,7 @@ _mesa_texstore_rgba_float32(TEXSTORE_PARAMS)
}
else {
/* general path */
- const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+ const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
baseInternalFormat,
baseFormat,
srcWidth, srcHeight, srcDepth,
@@ -3483,7 +3484,7 @@ _mesa_texstore_rgba_float16(TEXSTORE_PARAMS)
}
else {
/* general path */
- const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+ const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
baseInternalFormat,
baseFormat,
srcWidth, srcHeight, srcDepth,
@@ -3549,7 +3550,7 @@ _mesa_texstore_rgba_int8(TEXSTORE_PARAMS)
}
else {
/* general path */
- const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+ const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
baseInternalFormat,
baseFormat,
srcWidth, srcHeight, srcDepth,
@@ -3614,7 +3615,7 @@ _mesa_texstore_rgba_int16(TEXSTORE_PARAMS)
}
else {
/* general path */
- const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+ const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
baseInternalFormat,
baseFormat,
srcWidth, srcHeight, srcDepth,
@@ -3679,7 +3680,7 @@ _mesa_texstore_rgba_int32(TEXSTORE_PARAMS)
}
else {
/* general path */
- const GLfloat *tempImage = make_temp_float_image(ctx, dims,
+ const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
baseInternalFormat,
baseFormat,
srcWidth, srcHeight, srcDepth,
@@ -4128,7 +4129,12 @@ texstore_funcs[MESA_FORMAT_COUNT] =
{ MESA_FORMAT_SIGNED_RG_16, _mesa_texstore_signed_rgba_16 },
{ MESA_FORMAT_SIGNED_RGB_16, _mesa_texstore_signed_rgba_16 },
{ MESA_FORMAT_SIGNED_RGBA_16, _mesa_texstore_signed_rgba_16 },
- { MESA_FORMAT_RGBA_16, _mesa_texstore_rgba_16 }
+ { MESA_FORMAT_RGBA_16, _mesa_texstore_rgba_16 },
+
+ { MESA_FORMAT_RED_RGTC1, _mesa_texstore_red_rgtc1 },
+ { MESA_FORMAT_SIGNED_RED_RGTC1, _mesa_texstore_signed_red_rgtc1 },
+ { MESA_FORMAT_RG_RGTC2, _mesa_texstore_rg_rgtc2 },
+ { MESA_FORMAT_SIGNED_RG_RGTC2, _mesa_texstore_signed_rg_rgtc2 }
};
diff --git a/src/mesa/main/texstore.h b/src/mesa/main/texstore.h
index 177ede423f5..2f3c4e821fc 100644
--- a/src/mesa/main/texstore.h
+++ b/src/mesa/main/texstore.h
@@ -81,6 +81,15 @@ _mesa_make_temp_chan_image(struct gl_context *ctx, GLuint dims,
const GLvoid *srcAddr,
const struct gl_pixelstore_attrib *srcPacking);
+GLfloat *
+_mesa_make_temp_float_image(struct gl_context *ctx, GLuint dims,
+ GLenum logicalBaseFormat,
+ GLenum textureBaseFormat,
+ GLint srcWidth, GLint srcHeight, GLint srcDepth,
+ GLenum srcFormat, GLenum srcType,
+ const GLvoid *srcAddr,
+ const struct gl_pixelstore_attrib *srcPacking,
+ GLbitfield transferOps);
extern void
_mesa_store_teximage1d(struct gl_context *ctx, GLenum target, GLint level,
diff --git a/src/mesa/sources.mak b/src/mesa/sources.mak
index 9a78a23aa7e..bdf4126cf58 100644
--- a/src/mesa/sources.mak
+++ b/src/mesa/sources.mak
@@ -78,6 +78,7 @@ MAIN_SOURCES = \
main/stencil.c \
main/syncobj.c \
main/texcompress.c \
+ main/texcompress_rgtc.c \
main/texcompress_s3tc.c \
main/texcompress_fxt1.c \
main/texenv.c \
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index 6530a06ade4..c99eafbadf3 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -579,6 +579,7 @@ st_validate_varrays(struct gl_context *ctx,
if (is_interleaved_arrays(vp, vpv, arrays)) {
setup_interleaved_attribs(ctx, vp, vpv, arrays, vbuffer, velements,
max_index);
+
num_vbuffers = 1;
num_velements = vpv->num_inputs;
if (num_velements == 0)
@@ -645,6 +646,7 @@ st_draw_vbo(struct gl_context *ctx,
for (i = 0; i < nr_prims; i++) {
min_index = MIN2(min_index, prims[i].start);
max_index = MAX2(max_index, prims[i].start + prims[i].count - 1);
+ max_index = MAX2(max_index, prims[i].num_instances);
}
}
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 2f45f470334..d2098987d1d 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -416,6 +416,22 @@ void st_init_extensions(struct st_context *st)
ctx->Extensions.S3_s3tc = GL_TRUE;
}
+ if (screen->is_format_supported(screen, PIPE_FORMAT_RGTC1_UNORM,
+ PIPE_TEXTURE_2D, 0,
+ PIPE_BIND_SAMPLER_VIEW, 0) &&
+ screen->is_format_supported(screen, PIPE_FORMAT_RGTC1_SNORM,
+ PIPE_TEXTURE_2D, 0,
+ PIPE_BIND_SAMPLER_VIEW, 0) &&
+ screen->is_format_supported(screen, PIPE_FORMAT_RGTC2_UNORM,
+ PIPE_TEXTURE_2D, 0,
+ PIPE_BIND_SAMPLER_VIEW, 0) &&
+ screen->is_format_supported(screen, PIPE_FORMAT_RGTC2_SNORM,
+ PIPE_TEXTURE_2D, 0,
+ PIPE_BIND_SAMPLER_VIEW, 0)
+ ) {
+ ctx->Extensions.ARB_texture_compression_rgtc = GL_TRUE;
+ }
+
/* ycbcr support */
if (screen->is_format_supported(screen, PIPE_FORMAT_UYVY,
PIPE_TEXTURE_2D, 0,
diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c
index 577ee6189bd..c58ec9267dc 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -241,6 +241,14 @@ st_mesa_format_to_pipe_format(gl_format mesaFormat)
case MESA_FORMAT_RGBA_UINT32:
return PIPE_FORMAT_R32G32B32A32_USCALED;
+ case MESA_FORMAT_RED_RGTC1:
+ return PIPE_FORMAT_RGTC1_UNORM;
+ case MESA_FORMAT_SIGNED_RED_RGTC1:
+ return PIPE_FORMAT_RGTC1_SNORM;
+ case MESA_FORMAT_RG_RGTC2:
+ return PIPE_FORMAT_RGTC2_UNORM;
+ case MESA_FORMAT_SIGNED_RG_RGTC2:
+ return PIPE_FORMAT_RGTC2_SNORM;
default:
assert(0);
return PIPE_FORMAT_NONE;
@@ -380,6 +388,15 @@ st_pipe_format_to_mesa_format(enum pipe_format format)
case PIPE_FORMAT_R32G32B32A32_USCALED:
return MESA_FORMAT_RGBA_UINT32;
+ case PIPE_FORMAT_RGTC1_UNORM:
+ return MESA_FORMAT_RED_RGTC1;
+ case PIPE_FORMAT_RGTC1_SNORM:
+ return MESA_FORMAT_SIGNED_RED_RGTC1;
+ case PIPE_FORMAT_RGTC2_UNORM:
+ return MESA_FORMAT_RG_RGTC2;
+ case PIPE_FORMAT_RGTC2_SNORM:
+ return MESA_FORMAT_SIGNED_RG_RGTC2;
+
default:
assert(0);
return MESA_FORMAT_NONE;
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 5c68fd78c30..c07739f9d53 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -224,9 +224,9 @@ src_register( struct st_translate *t,
case PROGRAM_TEMPORARY:
assert(index >= 0);
+ assert(index < Elements(t->temps));
if (ureg_dst_is_undef(t->temps[index]))
t->temps[index] = ureg_DECL_temporary( t->ureg );
- assert(index < Elements(t->temps));
return ureg_src(t->temps[index]);
case PROGRAM_NAMED_PARAM: