summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/Makefile1
-rw-r--r--src/gallium/auxiliary/SConscript1
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aapoint.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c47
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h6
-rw-r--r--src/gallium/auxiliary/util/u_format_pack.py2
-rw-r--r--src/gallium/auxiliary/util/u_format_rgtc.c452
-rw-r--r--src/gallium/auxiliary/util/u_format_rgtc.h108
-rwxr-xr-xsrc/gallium/auxiliary/util/u_format_table.py3
-rw-r--r--src/gallium/auxiliary/util/u_math.h11
-rw-r--r--src/gallium/auxiliary/util/u_vbuf_mgr.c10
-rw-r--r--src/gallium/drivers/i915/i915_batch.h4
-rw-r--r--src/gallium/drivers/i915/i915_batchbuffer.h8
-rw-r--r--src/gallium/drivers/i915/i915_blit.c8
-rw-r--r--src/gallium/drivers/i915/i915_context.c2
-rw-r--r--src/gallium/drivers/i915/i915_flush.c4
-rw-r--r--src/gallium/drivers/i915/i915_prim_emit.c4
-rw-r--r--src/gallium/drivers/i915/i915_prim_vbuf.c12
-rw-r--r--src/gallium/drivers/i915/i915_state_emit.c614
-rw-r--r--src/gallium/drivers/i915/i915_state_static.c2
-rw-r--r--src/gallium/drivers/i915/i915_winsys.h1
-rw-r--r--src/gallium/drivers/r300/r300_blit.c74
-rw-r--r--src/gallium/drivers/r300/r300_chipset.h2
-rw-r--r--src/gallium/drivers/r300/r300_context.c39
-rw-r--r--src/gallium/drivers/r300/r300_context.h50
-rw-r--r--src/gallium/drivers/r300/r300_emit.c132
-rw-r--r--src/gallium/drivers/r300/r300_flush.c2
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.c212
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.h35
-rw-r--r--src/gallium/drivers/r300/r300_reg.h4
-rw-r--r--src/gallium/drivers/r300/r300_render.c22
-rw-r--r--src/gallium/drivers/r300/r300_screen.c4
-rw-r--r--src/gallium/drivers/r300/r300_state.c68
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c24
-rw-r--r--src/gallium/drivers/r300/r300_texture.c60
-rw-r--r--src/gallium/drivers/r300/r300_texture_desc.c118
-rw-r--r--src/gallium/drivers/r300/r300_winsys.h4
-rw-r--r--src/gallium/drivers/r600/eg_state_inlines.h4
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c8
-rw-r--r--src/gallium/drivers/r600/evergreend.h3
-rw-r--r--src/gallium/drivers/r600/r600.h11
-rw-r--r--src/gallium/drivers/r600/r600_asm.c51
-rw-r--r--src/gallium/drivers/r600/r600_buffer.c16
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c5
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h2
-rw-r--r--src/gallium/drivers/r600/r600_query.c28
-rw-r--r--src/gallium/drivers/r600/r600_shader.c46
-rw-r--r--src/gallium/drivers/r600/r600_state.c11
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c12
-rw-r--r--src/gallium/drivers/r600/r600_state_inlines.h4
-rw-r--r--src/gallium/drivers/r600/r600_texture.c23
-rw-r--r--src/gallium/drivers/r600/r600d.h4
-rw-r--r--src/gallium/drivers/rbug/rbug_context.c3
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c5
-rw-r--r--src/gallium/drivers/softpipe/sp_state_shader.c2
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer_upload.c2
-rw-r--r--src/gallium/drivers/svga/svga_state_constants.c7
-rw-r--r--src/gallium/state_trackers/dri/drm/dri2.c2
-rw-r--r--src/gallium/state_trackers/egl/Makefile1
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d.c3
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d.h1
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d_api.c15
-rw-r--r--src/gallium/state_trackers/egl/common/egl_g3d_image.c1
-rw-r--r--src/gallium/state_trackers/egl/common/native.h28
-rw-r--r--src/gallium/state_trackers/egl/common/native_helper.c142
-rw-r--r--src/gallium/state_trackers/egl/common/native_helper.h31
-rw-r--r--src/gallium/state_trackers/egl/drm/modeset.c48
-rw-r--r--src/gallium/state_trackers/egl/drm/native_drm.c3
-rw-r--r--src/gallium/state_trackers/egl/drm/native_drm.h2
-rw-r--r--src/gallium/state_trackers/egl/fbdev/native_fbdev.c2
-rw-r--r--src/gallium/state_trackers/egl/gdi/native_gdi.c2
-rw-r--r--src/gallium/state_trackers/egl/wayland/native_wayland.c4
-rw-r--r--src/gallium/state_trackers/egl/x11/native_ximage.c2
-rw-r--r--src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c3
-rw-r--r--src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c3
-rw-r--r--src/gallium/winsys/r600/drm/evergreen_hw_context.c12
-rw-r--r--src/gallium/winsys/r600/drm/r600_drm.c65
-rw-r--r--src/gallium/winsys/r600/drm/r600_hw_context.c79
-rw-r--r--src/gallium/winsys/r600/drm/r600_priv.h2
-rw-r--r--src/gallium/winsys/r600/drm/r600d.h15
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_common.c15
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_cs.c2
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_winsys.h1
83 files changed, 1914 insertions, 969 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 7d7d700eacd..e48063166ff 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -111,6 +111,7 @@ C_SOURCES = \
util/u_format.c \
util/u_format_other.c \
util/u_format_s3tc.c \
+ util/u_format_rgtc.c \
util/u_format_srgb.c \
util/u_format_table.c \
util/u_format_tests.c \
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index 0ec63071615..ef0b15f06ce 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -158,6 +158,7 @@ source = [
'util/u_format.c',
'util/u_format_other.c',
'util/u_format_s3tc.c',
+ 'util/u_format_rgtc.c',
'util/u_format_srgb.c',
'util/u_format_table.c',
'util/u_format_tests.c',
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index 5ea552f51c1..60f6380c503 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -874,6 +874,8 @@ draw_install_aapoint_stage(struct draw_context *draw,
{
struct aapoint_stage *aapoint;
+ pipe->draw = (void *) draw;
+
/*
* Create / install AA point drawing / prim stage
*/
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index cbb090b2803..9cf74a838fe 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -672,6 +672,31 @@ tgsi_exec_machine_bind_shader(
mach->Processor = parse.FullHeader.Processor.Processor;
mach->ImmLimit = 0;
+ if (mach->Processor == TGSI_PROCESSOR_GEOMETRY &&
+ !mach->UsedGeometryShader) {
+ struct tgsi_exec_vector *inputs =
+ align_malloc(sizeof(struct tgsi_exec_vector) *
+ TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS,
+ 16);
+ struct tgsi_exec_vector *outputs =
+ align_malloc(sizeof(struct tgsi_exec_vector) *
+ TGSI_MAX_TOTAL_VERTICES, 16);
+
+ if (!inputs)
+ return;
+ if (!outputs) {
+ align_free(inputs);
+ return;
+ }
+
+ align_free(mach->Inputs);
+ align_free(mach->Outputs);
+
+ mach->Inputs = inputs;
+ mach->Outputs = outputs;
+ mach->UsedGeometryShader = TRUE;
+ }
+
declarations = (struct tgsi_full_declaration *)
MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
@@ -801,6 +826,11 @@ tgsi_exec_machine_create( void )
mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
+ mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16);
+ mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16);
+ if (!mach->Inputs || !mach->Outputs)
+ goto fail;
+
/* Setup constants needed by the SSE2 executor. */
for( i = 0; i < 4; i++ ) {
mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000;
@@ -824,7 +854,11 @@ tgsi_exec_machine_create( void )
return mach;
fail:
- align_free(mach);
+ if (mach) {
+ align_free(mach->Inputs);
+ align_free(mach->Outputs);
+ align_free(mach);
+ }
return NULL;
}
@@ -836,10 +870,13 @@ tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
if (mach->Instructions)
FREE(mach->Instructions);
if (mach->Declarations)
- FREE(mach->Declarations);
- }
+ FREE(mach->Declarations);
+
+ align_free(mach->Inputs);
+ align_free(mach->Outputs);
- align_free(mach);
+ align_free(mach);
+ }
}
static void
@@ -1047,7 +1084,7 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach,
}*/
int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i];
assert(pos >= 0);
- assert(pos < Elements(mach->Inputs));
+ assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i];
}
break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 4a423b5bb4e..33f33aa82c7 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -228,8 +228,8 @@ struct tgsi_exec_machine
float ImmArray[TGSI_EXEC_NUM_IMMEDIATES][4];
- struct tgsi_exec_vector Inputs[TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS];
- struct tgsi_exec_vector Outputs[TGSI_MAX_TOTAL_VERTICES];
+ struct tgsi_exec_vector *Inputs;
+ struct tgsi_exec_vector *Outputs;
/* System values */
unsigned SysSemanticToIndex[TGSI_SEMANTIC_COUNT];
@@ -309,6 +309,8 @@ struct tgsi_exec_machine
uint NumDeclarations;
struct tgsi_declaration_resource Resources[PIPE_MAX_SHADER_RESOURCES];
+
+ boolean UsedGeometryShader;
};
struct tgsi_exec_machine *
diff --git a/src/gallium/auxiliary/util/u_format_pack.py b/src/gallium/auxiliary/util/u_format_pack.py
index 6d0016c0ad8..cc173f808ae 100644
--- a/src/gallium/auxiliary/util/u_format_pack.py
+++ b/src/gallium/auxiliary/util/u_format_pack.py
@@ -632,7 +632,7 @@ def generate_format_fetch(format, dst_channel, dst_native_type, dst_suffix):
def is_format_hand_written(format):
- return format.layout in ('s3tc', 'subsampled', 'other') or format.colorspace == ZS
+ return format.layout in ('s3tc', 'rgtc', 'subsampled', 'other') or format.colorspace == ZS
def generate(formats):
diff --git a/src/gallium/auxiliary/util/u_format_rgtc.c b/src/gallium/auxiliary/util/u_format_rgtc.c
new file mode 100644
index 00000000000..6ffcd7e99ef
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_format_rgtc.c
@@ -0,0 +1,452 @@
+/**************************************************************************
+ *
+ * Copyright (C) 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+#include "u_math.h"
+#include "u_format.h"
+#include "u_format_rgtc.h"
+
+static void u_format_unsigned_encode_rgtc_chan(uint8_t *blkaddr, uint8_t srccolors[4][4],
+ int numxpixels, int numypixels);
+
+static void u_format_unsigned_fetch_texel_rgtc(unsigned srcRowStride, const uint8_t *pixdata,
+ unsigned i, unsigned j, uint8_t *value, unsigned comps);
+
+static void u_format_signed_encode_rgtc_chan(int8_t *blkaddr, int8_t srccolors[4][4],
+ int numxpixels, int numypixels);
+
+static void u_format_signed_fetch_texel_rgtc(unsigned srcRowStride, const int8_t *pixdata,
+ unsigned i, unsigned j, int8_t *value, unsigned comps);
+
+void
+util_format_rgtc1_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ u_format_unsigned_fetch_texel_rgtc(0, src, i, j, dst, 1);
+}
+
+void
+util_format_rgtc1_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ const unsigned bw = 4, bh = 4, comps = 4;
+ unsigned x, y, i, j;
+ unsigned block_size = 8;
+
+ for(y = 0; y < height; y += bh) {
+ const uint8_t *src = src_row;
+ for(x = 0; x < width; x += bw) {
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*comps;
+ u_format_unsigned_fetch_texel_rgtc(0, src, i, j, dst, 1);
+ }
+ }
+ src += block_size;
+ }
+ src_row += src_stride;
+ }
+}
+
+void
+util_format_rgtc1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row,
+ unsigned src_stride, unsigned width, unsigned height)
+{
+ const unsigned bw = 4, bh = 4, bytes_per_block = 8;
+ unsigned x, y, i, j;
+
+ for(y = 0; y < height; y += bh) {
+ uint8_t *dst = dst_row;
+ for(x = 0; x < width; x += bw) {
+ uint8_t tmp[4][4]; /* [bh][bw][comps] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ tmp[j][i] = src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4];
+ }
+ }
+ u_format_unsigned_encode_rgtc_chan(dst, tmp, 4, 4);
+ dst += bytes_per_block;
+ }
+ dst_row += dst_stride / sizeof(*dst_row);
+ }
+}
+
+void
+util_format_rgtc1_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y, i, j;
+ int block_size = 8;
+ for(y = 0; y < height; y += 4) {
+ const uint8_t *src = src_row;
+ for(x = 0; x < width; x += 4) {
+ for(j = 0; j < 4; ++j) {
+ for(i = 0; i < 4; ++i) {
+ float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
+ uint8_t tmp_r;
+ u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 1);
+ dst[0] = ubyte_to_float(tmp_r);
+ dst[1] = 0.0;
+ dst[2] = 0.0;
+ dst[3] = 1.0;
+ }
+ }
+ src += block_size;
+ }
+ src_row += src_stride;
+ }
+}
+
+void
+util_format_rgtc1_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ const unsigned bw = 4, bh = 4, bytes_per_block = 8;
+ unsigned x, y, i, j;
+
+ for(y = 0; y < height; y += bh) {
+ uint8_t *dst = dst_row;
+ for(x = 0; x < width; x += bw) {
+ uint8_t tmp[4][4]; /* [bh][bw][comps] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ tmp[j][i] = float_to_ubyte(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4]);
+ }
+ }
+ u_format_unsigned_encode_rgtc_chan(dst, tmp, 4, 4);
+ dst += bytes_per_block;
+ }
+ dst_row += dst_stride / sizeof(*dst_row);
+ }
+}
+
+void
+util_format_rgtc1_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ uint8_t tmp_r;
+ u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 1);
+ dst[0] = ubyte_to_float(tmp_r);
+ dst[1] = 0.0;
+ dst[2] = 0.0;
+ dst[3] = 1.0;
+}
+
+void
+util_format_rgtc1_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ fprintf(stderr,"%s\n", __func__);
+}
+
+void
+util_format_rgtc1_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ fprintf(stderr,"%s\n", __func__);
+}
+
+void
+util_format_rgtc1_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ fprintf(stderr,"%s\n", __func__);
+}
+
+void
+util_format_rgtc1_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ const unsigned bw = 4, bh = 4, bytes_per_block = 8;
+ unsigned x, y, i, j;
+
+ for(y = 0; y < height; y += bh) {
+ int8_t *dst = (int8_t *)dst_row;
+ for(x = 0; x < width; x += bw) {
+ int8_t tmp[4][4]; /* [bh][bw][comps] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ tmp[j][i] = float_to_byte_tex(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4]);
+ }
+ }
+ u_format_signed_encode_rgtc_chan(dst, tmp, 4, 4);
+ dst += bytes_per_block;
+ }
+ dst_row += dst_stride / sizeof(*dst_row);
+ }
+}
+
+void
+util_format_rgtc1_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y, i, j;
+ int block_size = 8;
+ for(y = 0; y < height; y += 4) {
+ const int8_t *src = (int8_t *)src_row;
+ for(x = 0; x < width; x += 4) {
+ for(j = 0; j < 4; ++j) {
+ for(i = 0; i < 4; ++i) {
+ float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
+ int8_t tmp_r;
+ u_format_signed_fetch_texel_rgtc(0, src, i, j, &tmp_r, 1);
+ dst[0] = byte_to_float_tex(tmp_r);
+ dst[1] = 0.0;
+ dst[2] = 0.0;
+ dst[3] = 1.0;
+ }
+ }
+ src += block_size;
+ }
+ src_row += src_stride;
+ }
+}
+
+void
+util_format_rgtc1_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ int8_t tmp_r;
+ u_format_signed_fetch_texel_rgtc(0, (int8_t *)src, i, j, &tmp_r, 1);
+ dst[0] = byte_to_float_tex(tmp_r);
+ dst[1] = 0.0;
+ dst[2] = 0.0;
+ dst[3] = 1.0;
+}
+
+
+void
+util_format_rgtc2_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ u_format_unsigned_fetch_texel_rgtc(0, src, i, j, dst, 2);
+ u_format_unsigned_fetch_texel_rgtc(0, src + 8, i, j, dst + 1, 2);
+}
+
+void
+util_format_rgtc2_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ const unsigned bw = 4, bh = 4, comps = 4;
+ unsigned x, y, i, j;
+ unsigned block_size = 16;
+
+ for(y = 0; y < height; y += bh) {
+ const uint8_t *src = src_row;
+ for(x = 0; x < width; x += bw) {
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*comps;
+ u_format_unsigned_fetch_texel_rgtc(0, src, i, j, dst, 2);
+ u_format_unsigned_fetch_texel_rgtc(0, src + 8, i, j, dst + 1, 2);
+
+ }
+ }
+ src += block_size;
+ }
+ src_row += src_stride;
+ }
+}
+
+void
+util_format_rgtc2_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ const unsigned bw = 4, bh = 4, bytes_per_block = 16;
+ unsigned x, y, i, j;
+
+ for(y = 0; y < height; y += bh) {
+ uint8_t *dst = dst_row;
+ for(x = 0; x < width; x += bw) {
+ uint8_t tmp_r[4][4]; /* [bh][bw] */
+ uint8_t tmp_g[4][4]; /* [bh][bw] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ tmp_r[j][i] = src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4];
+ tmp_g[j][i] = src_row[((y + j)*src_stride/sizeof(*src_row) + (x + i)*4) + 1];
+ }
+ }
+ u_format_unsigned_encode_rgtc_chan(dst, tmp_r, 4, 4);
+ u_format_unsigned_encode_rgtc_chan(dst + 8, tmp_g, 4, 4);
+ dst += bytes_per_block;
+ }
+ dst_row += dst_stride / sizeof(*dst_row);
+ }
+}
+
+void
+util_format_rgtc2_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ const unsigned bw = 4, bh = 4, bytes_per_block = 16;
+ unsigned x, y, i, j;
+
+ for(y = 0; y < height; y += bh) {
+ uint8_t *dst = dst_row;
+ for(x = 0; x < width; x += bw) {
+ uint8_t tmp_r[4][4]; /* [bh][bw][comps] */
+ uint8_t tmp_g[4][4]; /* [bh][bw][comps] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ tmp_r[j][i] = float_to_ubyte(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4]);
+ tmp_g[j][i] = float_to_ubyte(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4 + 1]);
+ }
+ }
+ u_format_unsigned_encode_rgtc_chan(dst, tmp_r, 4, 4);
+ u_format_unsigned_encode_rgtc_chan(dst + 8, tmp_g, 4, 4);
+ dst += bytes_per_block;
+ }
+ dst_row += dst_stride / sizeof(*dst_row);
+ }
+}
+
+void
+util_format_rgtc2_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y, i, j;
+ int block_size = 16;
+ for(y = 0; y < height; y += 4) {
+ const uint8_t *src = src_row;
+ for(x = 0; x < width; x += 4) {
+ for(j = 0; j < 4; ++j) {
+ for(i = 0; i < 4; ++i) {
+ float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
+ uint8_t tmp_r, tmp_g;
+ u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 2);
+ u_format_unsigned_fetch_texel_rgtc(0, src + 8, i, j, &tmp_g, 2);
+ dst[0] = ubyte_to_float(tmp_r);
+ dst[1] = ubyte_to_float(tmp_g);
+ dst[2] = 0.0;
+ dst[3] = 1.0;
+ }
+ }
+ src += block_size;
+ }
+ src_row += src_stride;
+ }
+}
+
+void
+util_format_rgtc2_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ uint8_t tmp_r, tmp_g;
+ u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 2);
+ u_format_unsigned_fetch_texel_rgtc(0, src + 8, i, j, &tmp_g, 2);
+ dst[0] = ubyte_to_float(tmp_r);
+ dst[1] = ubyte_to_float(tmp_g);
+ dst[2] = 0.0;
+ dst[3] = 1.0;
+}
+
+
+void
+util_format_rgtc2_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ fprintf(stderr,"%s\n", __func__);
+}
+
+void
+util_format_rgtc2_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ fprintf(stderr,"%s\n", __func__);
+}
+
+void
+util_format_rgtc2_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ fprintf(stderr,"%s\n", __func__);
+}
+
+void
+util_format_rgtc2_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y, i, j;
+ int block_size = 16;
+ for(y = 0; y < height; y += 4) {
+ const int8_t *src = (int8_t *)src_row;
+ for(x = 0; x < width; x += 4) {
+ for(j = 0; j < 4; ++j) {
+ for(i = 0; i < 4; ++i) {
+ float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
+ int8_t tmp_r, tmp_g;
+ u_format_signed_fetch_texel_rgtc(0, src, i, j, &tmp_r, 2);
+ u_format_signed_fetch_texel_rgtc(0, src + 8, i, j, &tmp_g, 2);
+ dst[0] = byte_to_float_tex(tmp_r);
+ dst[1] = byte_to_float_tex(tmp_g);
+ dst[2] = 0.0;
+ dst[3] = 1.0;
+ }
+ }
+ src += block_size;
+ }
+ src_row += src_stride;
+ }
+}
+
+void
+util_format_rgtc2_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ const unsigned bw = 4, bh = 4, bytes_per_block = 16;
+ unsigned x, y, i, j;
+
+ for(y = 0; y < height; y += bh) {
+ int8_t *dst = (int8_t *)dst_row;
+ for(x = 0; x < width; x += bw) {
+ int8_t tmp_r[4][4]; /* [bh][bw][comps] */
+ int8_t tmp_g[4][4]; /* [bh][bw][comps] */
+ for(j = 0; j < bh; ++j) {
+ for(i = 0; i < bw; ++i) {
+ tmp_r[j][i] = float_to_byte_tex(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4]);
+ tmp_g[j][i] = float_to_byte_tex(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4 + 1]);
+ }
+ }
+ u_format_signed_encode_rgtc_chan(dst, tmp_r, 4, 4);
+ u_format_signed_encode_rgtc_chan(dst + 8, tmp_g, 4, 4);
+ dst += bytes_per_block;
+ }
+ dst_row += dst_stride / sizeof(*dst_row);
+ }
+}
+
+void
+util_format_rgtc2_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+ int8_t tmp_r, tmp_g;
+ u_format_signed_fetch_texel_rgtc(0, (int8_t *)src, i, j, &tmp_r, 2);
+ u_format_signed_fetch_texel_rgtc(0, (int8_t *)src + 8, i, j, &tmp_g, 2);
+ dst[0] = byte_to_float_tex(tmp_r);
+ dst[1] = byte_to_float_tex(tmp_g);
+ dst[2] = 0.0;
+ dst[3] = 1.0;
+}
+
+
+#define TAG(x) u_format_unsigned_##x
+#define TYPE uint8_t
+#define T_MIN 0
+#define T_MAX 255
+
+#include "../../../mesa/main/texcompress_rgtc_tmp.h"
+
+#undef TYPE
+#undef TAG
+#undef T_MIN
+#undef T_MAX
+
+
+#define TAG(x) u_format_signed_##x
+#define TYPE int8_t
+#define T_MIN (int8_t)-128
+#define T_MAX (int8_t)127
+
+#include "../../../mesa/main/texcompress_rgtc_tmp.h"
+
+#undef TYPE
+#undef TAG
+#undef T_MIN
+#undef T_MAX
diff --git a/src/gallium/auxiliary/util/u_format_rgtc.h b/src/gallium/auxiliary/util/u_format_rgtc.h
new file mode 100644
index 00000000000..3e8636d110c
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_format_rgtc.h
@@ -0,0 +1,108 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Red Hat Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+#ifndef U_FORMAT_RGTC_H_
+#define U_FORMAT_RGTC_H_
+
+void
+util_format_rgtc1_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_rgtc1_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc1_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc1_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc1_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
+
+
+
+void
+util_format_rgtc1_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_rgtc1_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc1_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc1_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc1_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc1_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
+
+
+void
+util_format_rgtc2_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_rgtc2_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc2_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc2_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc2_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc2_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
+
+
+void
+util_format_rgtc2_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
+
+void
+util_format_rgtc2_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc2_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc2_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc2_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_rgtc2_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
+
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py
index 8cc22a56371..7468bc38b32 100755
--- a/src/gallium/auxiliary/util/u_format_table.py
+++ b/src/gallium/auxiliary/util/u_format_table.py
@@ -87,6 +87,7 @@ def write_format_table(formats):
print
print '#include "u_format.h"'
print '#include "u_format_s3tc.h"'
+ print '#include "u_format_rgtc.h"'
print
u_format_pack.generate(formats)
@@ -132,7 +133,7 @@ def write_format_table(formats):
if format.colorspace != ZS:
print " &util_format_%s_unpack_rgba_8unorm," % format.short_name()
print " &util_format_%s_pack_rgba_8unorm," % format.short_name()
- if format.layout == 's3tc':
+ if format.layout == 's3tc' or format.layout == 'rgtc':
print " &util_format_%s_fetch_rgba_8unorm," % format.short_name()
else:
print " NULL, /* fetch_rgba_8unorm */"
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index 30555f92a6d..e3d4c06b6f9 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -475,6 +475,17 @@ float_to_ubyte(float f)
}
}
+static INLINE float
+byte_to_float_tex(int8_t b)
+{
+ return (b == -128) ? -1.0F : b * 1.0F / 127.0F;
+}
+
+static INLINE int8_t
+float_to_byte_tex(float f)
+{
+ return (int8_t) (127.0F * f);
+}
/**
* Calc log base 2
diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.c b/src/gallium/auxiliary/util/u_vbuf_mgr.c
index 3cf8ee0831d..7d157c99ccc 100644
--- a/src/gallium/auxiliary/util/u_vbuf_mgr.c
+++ b/src/gallium/auxiliary/util/u_vbuf_mgr.c
@@ -515,6 +515,7 @@ void u_vbuf_mgr_set_vertex_buffers(struct u_vbuf_mgr *mgrb,
static void u_vbuf_upload_buffers(struct u_vbuf_mgr_priv *mgr,
int min_index, int max_index,
+ unsigned instance_count,
boolean *upload_flushed)
{
int i, nr = mgr->ve->count;
@@ -530,10 +531,12 @@ static void u_vbuf_upload_buffers(struct u_vbuf_mgr_priv *mgr,
!uploaded[index]) {
unsigned first, size;
boolean flushed;
+ unsigned instance_div = mgr->ve->ve[i].instance_divisor;
- if (mgr->ve->ve[i].instance_divisor) {
+ if (instance_div) {
first = 0;
- size = vb->buffer->width0;
+ size = vb->stride *
+ ((instance_count + instance_div - 1) / instance_div);
} else if (vb->stride) {
first = vb->stride * min_index;
size = vb->stride * count;
@@ -581,7 +584,8 @@ void u_vbuf_mgr_draw_begin(struct u_vbuf_mgr *mgrb,
/* Upload user buffers. */
if (mgr->any_user_vbs) {
- u_vbuf_upload_buffers(mgr, min_index, max_index, &upload_flushed);
+ u_vbuf_upload_buffers(mgr, min_index, max_index, info->instance_count,
+ &upload_flushed);
bufs_updated = TRUE;
}
diff --git a/src/gallium/drivers/i915/i915_batch.h b/src/gallium/drivers/i915/i915_batch.h
index 6e93da76209..039c8713570 100644
--- a/src/gallium/drivers/i915/i915_batch.h
+++ b/src/gallium/drivers/i915/i915_batch.h
@@ -31,8 +31,8 @@
#include "i915_batchbuffer.h"
-#define BEGIN_BATCH(dwords, relocs) \
- (i915_winsys_batchbuffer_check(i915->batch, dwords, relocs))
+#define BEGIN_BATCH(dwords) \
+ (i915_winsys_batchbuffer_check(i915->batch, dwords))
#define OUT_BATCH(dword) \
i915_winsys_batchbuffer_dword(i915->batch, dword)
diff --git a/src/gallium/drivers/i915/i915_batchbuffer.h b/src/gallium/drivers/i915/i915_batchbuffer.h
index b4a91dabb37..9df82272604 100644
--- a/src/gallium/drivers/i915/i915_batchbuffer.h
+++ b/src/gallium/drivers/i915/i915_batchbuffer.h
@@ -41,11 +41,9 @@ i915_winsys_batchbuffer_space(struct i915_winsys_batchbuffer *batch)
static INLINE boolean
i915_winsys_batchbuffer_check(struct i915_winsys_batchbuffer *batch,
- size_t dwords,
- size_t relocs)
+ size_t dwords)
{
- return dwords * 4 <= i915_winsys_batchbuffer_space(batch) &&
- relocs <= (batch->max_relocs - batch->relocs);
+ return dwords * 4 <= i915_winsys_batchbuffer_space(batch);
}
static INLINE void
@@ -71,7 +69,7 @@ i915_winsys_batchbuffer_write(struct i915_winsys_batchbuffer *batch,
{
assert (i915_winsys_batchbuffer_space(batch) >= size);
- memcpy(data, batch->ptr, size);
+ memcpy(batch->ptr, data, size);
batch->ptr += size;
}
diff --git a/src/gallium/drivers/i915/i915_blit.c b/src/gallium/drivers/i915/i915_blit.c
index f885417f8ed..baaed3767ff 100644
--- a/src/gallium/drivers/i915/i915_blit.c
+++ b/src/gallium/drivers/i915/i915_blit.c
@@ -71,9 +71,9 @@ i915_fill_blit(struct i915_context *i915,
return;
}
- if (!BEGIN_BATCH(6, 1)) {
+ if (!BEGIN_BATCH(6)) {
FLUSH_BATCH(NULL);
- assert(BEGIN_BATCH(6, 1));
+ assert(BEGIN_BATCH(6));
}
OUT_BATCH(CMD);
OUT_BATCH(BR13);
@@ -143,9 +143,9 @@ i915_copy_blit(struct i915_context *i915,
*/
assert (dst_pitch > 0 && src_pitch > 0);
- if (!BEGIN_BATCH(8, 2)) {
+ if (!BEGIN_BATCH(8)) {
FLUSH_BATCH(NULL);
- assert(BEGIN_BATCH(8, 2));
+ assert(BEGIN_BATCH(8));
}
OUT_BATCH(CMD);
OUT_BATCH(BR13);
diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c
index cbf919754e5..84c8cb54436 100644
--- a/src/gallium/drivers/i915/i915_context.c
+++ b/src/gallium/drivers/i915/i915_context.c
@@ -39,7 +39,7 @@
#include "pipe/p_screen.h"
-DEBUG_GET_ONCE_BOOL_OPTION(i915_no_vbuf, "I915_NO_VBUF", FALSE);
+DEBUG_GET_ONCE_BOOL_OPTION(i915_no_vbuf, "I915_NO_VBUF", FALSE)
/*
diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c
index 22a2c7b2cb4..f2044d661e3 100644
--- a/src/gallium/drivers/i915/i915_flush.c
+++ b/src/gallium/drivers/i915/i915_flush.c
@@ -59,9 +59,9 @@ static void i915_flush_pipe( struct pipe_context *pipe,
if (flags & PIPE_FLUSH_TEXTURE_CACHE)
flush |= FLUSH_MAP_CACHE;
- if (!BEGIN_BATCH(1, 0)) {
+ if (!BEGIN_BATCH(1)) {
FLUSH_BATCH(NULL);
- assert(BEGIN_BATCH(1, 0));
+ assert(BEGIN_BATCH(1));
}
OUT_BATCH( flush );
}
diff --git a/src/gallium/drivers/i915/i915_prim_emit.c b/src/gallium/drivers/i915/i915_prim_emit.c
index dd997e2cf48..276e33d4b9d 100644
--- a/src/gallium/drivers/i915/i915_prim_emit.c
+++ b/src/gallium/drivers/i915/i915_prim_emit.c
@@ -144,7 +144,7 @@ emit_prim( struct draw_stage *stage,
vertex_size = i915->current.vertex_info.size * 4; /* in bytes */
assert(vertex_size >= 12); /* never smaller than 12 bytes */
- if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) {
+ if (!BEGIN_BATCH( 1 + nr * vertex_size / 4)) {
FLUSH_BATCH(NULL);
/* Make sure state is re-emitted after a flush:
@@ -152,7 +152,7 @@ emit_prim( struct draw_stage *stage,
i915_update_derived( i915 );
i915_emit_hardware_state( i915 );
- if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) {
+ if (!BEGIN_BATCH( 1 + nr * vertex_size / 4)) {
assert(0);
return;
}
diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c
index 3473c863970..fb4c0516dd8 100644
--- a/src/gallium/drivers/i915/i915_prim_vbuf.c
+++ b/src/gallium/drivers/i915/i915_prim_vbuf.c
@@ -465,7 +465,7 @@ draw_arrays_fallback(struct vbuf_render *render,
if (i915->hardware_dirty)
i915_emit_hardware_state(i915);
- if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) {
+ if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) {
FLUSH_BATCH(NULL);
/* Make sure state is re-emitted after a flush:
@@ -474,7 +474,7 @@ draw_arrays_fallback(struct vbuf_render *render,
i915_emit_hardware_state(i915);
i915->vbo_flushed = 1;
- if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) {
+ if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) {
assert(0);
goto out;
}
@@ -514,7 +514,7 @@ i915_vbuf_render_draw_arrays(struct vbuf_render *render,
if (i915->hardware_dirty)
i915_emit_hardware_state(i915);
- if (!BEGIN_BATCH(2, 0)) {
+ if (!BEGIN_BATCH(2)) {
FLUSH_BATCH(NULL);
/* Make sure state is re-emitted after a flush:
@@ -523,7 +523,7 @@ i915_vbuf_render_draw_arrays(struct vbuf_render *render,
i915_emit_hardware_state(i915);
i915->vbo_flushed = 1;
- if (!BEGIN_BATCH(2, 0)) {
+ if (!BEGIN_BATCH(2)) {
assert(0);
goto out;
}
@@ -635,7 +635,7 @@ i915_vbuf_render_draw_elements(struct vbuf_render *render,
if (i915->hardware_dirty)
i915_emit_hardware_state(i915);
- if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) {
+ if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) {
FLUSH_BATCH(NULL);
/* Make sure state is re-emitted after a flush:
@@ -644,7 +644,7 @@ i915_vbuf_render_draw_elements(struct vbuf_render *render,
i915_emit_hardware_state(i915);
i915->vbo_flushed = 1;
- if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) {
+ if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) {
assert(0);
goto out;
}
diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c
index 0323ad940f9..15350c0a5d7 100644
--- a/src/gallium/drivers/i915/i915_state_emit.c
+++ b/src/gallium/drivers/i915/i915_state_emit.c
@@ -40,13 +40,19 @@
struct i915_tracked_hw_state {
const char *name;
- void (*validate)(struct i915_context *);
+ void (*validate)(struct i915_context *, unsigned *batch_space);
void (*emit)(struct i915_context *);
unsigned dirty, batch_space;
};
static void
+validate_flush(struct i915_context *i915, unsigned *batch_space)
+{
+ *batch_space = i915->flush_dirty ? 1 : 0;
+}
+
+static void
emit_flush(struct i915_context *i915)
{
/* Cache handling is very cheap atm. State handling can request to flushes:
@@ -61,109 +67,343 @@ emit_flush(struct i915_context *i915)
OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE);
}
+uint32_t invariant_state[] = {
+ _3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | AA_LINE_ECAAR_WIDTH_1_0 |
+ AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0,
+
+ _3DSTATE_DFLT_DIFFUSE_CMD, 0,
+
+ _3DSTATE_DFLT_SPEC_CMD, 0,
+
+ _3DSTATE_DFLT_Z_CMD, 0,
+
+ _3DSTATE_COORD_SET_BINDINGS |
+ CSB_TCB(0, 0) |
+ CSB_TCB(1, 1) |
+ CSB_TCB(2, 2) |
+ CSB_TCB(3, 3) |
+ CSB_TCB(4, 4) |
+ CSB_TCB(5, 5) |
+ CSB_TCB(6, 6) |
+ CSB_TCB(7, 7),
+
+ _3DSTATE_RASTER_RULES_CMD |
+ ENABLE_POINT_RASTER_RULE |
+ OGL_POINT_RASTER_RULE |
+ ENABLE_LINE_STRIP_PROVOKE_VRTX |
+ ENABLE_TRI_FAN_PROVOKE_VRTX |
+ LINE_STRIP_PROVOKE_VRTX(1) |
+ TRI_FAN_PROVOKE_VRTX(2) |
+ ENABLE_TEXKILL_3D_4D |
+ TEXKILL_4D,
+
+ _3DSTATE_DEPTH_SUBRECT_DISABLE,
+
+ /* disable indirect state for now
+ */
+ _3DSTATE_LOAD_INDIRECT | 0, 0};
+
static void
-validate_immediate(struct i915_context *i915)
+emit_invariant(struct i915_context *i915)
{
+ i915_winsys_batchbuffer_write(i915->batch, invariant_state,
+ Elements(invariant_state)*sizeof(uint32_t));
+}
+
+static void
+validate_immediate(struct i915_context *i915, unsigned *batch_space)
+{
+ unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |
+ 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |
+ 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |
+ 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &
+ i915->immediate_dirty;
+
if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0))
i915->validation_buffers[i915->num_validation_buffers++] = i915->vbo;
+
+ *batch_space = 1 + util_bitcount(dirty);
}
static void
-validate_static(struct i915_context *i915)
+emit_immediate(struct i915_context *i915)
{
- if (i915->current.cbuf_bo)
+ /* remove unwatned bits and S7 */
+ unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |
+ 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |
+ 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |
+ 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &
+ i915->immediate_dirty;
+ int i, num = util_bitcount(dirty);
+ assert(num && num <= I915_MAX_IMMEDIATE);
+
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+ dirty << 4 | (num - 1));
+
+ if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) {
+ if (i915->vbo)
+ OUT_RELOC(i915->vbo, I915_USAGE_VERTEX,
+ i915->current.immediate[I915_IMMEDIATE_S0]);
+ else
+ OUT_BATCH(0);
+ }
+
+ for (i = 1; i < I915_MAX_IMMEDIATE; i++) {
+ if (dirty & (1 << i))
+ OUT_BATCH(i915->current.immediate[i]);
+ }
+}
+
+static void
+validate_dynamic(struct i915_context *i915, unsigned *batch_space)
+{
+ *batch_space = util_bitcount(i915->dynamic_dirty & ((1 << I915_MAX_DYNAMIC) - 1));
+}
+
+static void
+emit_dynamic(struct i915_context *i915)
+{
+ int i;
+ for (i = 0; i < I915_MAX_DYNAMIC; i++) {
+ if (i915->dynamic_dirty & (1 << i))
+ OUT_BATCH(i915->current.dynamic[i]);
+ }
+}
+
+static void
+validate_static(struct i915_context *i915, unsigned *batch_space)
+{
+ *batch_space = 2 + 5; /* including DRAW_RECT */
+
+ if (i915->current.cbuf_bo) {
i915->validation_buffers[i915->num_validation_buffers++]
= i915->current.cbuf_bo;
+ *batch_space += 3;
+ }
- if (i915->current.depth_bo)
+ if (i915->current.depth_bo) {
i915->validation_buffers[i915->num_validation_buffers++]
= i915->current.depth_bo;
+ *batch_space += 3;
+ }
}
static void
-validate_map(struct i915_context *i915)
+emit_static(struct i915_context *i915)
+{
+ if (i915->current.cbuf_bo) {
+ OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
+ OUT_BATCH(i915->current.cbuf_flags);
+ OUT_RELOC(i915->current.cbuf_bo,
+ I915_USAGE_RENDER,
+ 0);
+ }
+
+ /* What happens if no zbuf??
+ */
+ if (i915->current.depth_bo) {
+ OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
+ OUT_BATCH(i915->current.depth_flags);
+ OUT_RELOC(i915->current.depth_bo,
+ I915_USAGE_RENDER,
+ 0);
+ }
+
+ {
+ OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
+ OUT_BATCH(i915->current.dst_buf_vars);
+ }
+}
+
+static void
+validate_map(struct i915_context *i915, unsigned *batch_space)
{
const uint enabled = i915->current.sampler_enable_flags;
uint unit;
struct i915_texture *tex;
+ *batch_space = i915->current.sampler_enable_nr ?
+ 2 + 3*i915->current.sampler_enable_nr : 0;
for (unit = 0; unit < I915_TEX_UNITS; unit++) {
if (enabled & (1 << unit)) {
- tex = i915_texture(i915->fragment_sampler_views[unit]->texture);
- i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer;
+ tex = i915_texture(i915->fragment_sampler_views[unit]->texture);
+ i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer;
}
}
}
-const static struct i915_tracked_hw_state hw_atoms[] = {
- { "flush", NULL, emit_flush, I915_HW_FLUSH, 1 },
- { "immediate", validate_immediate, NULL, I915_HW_IMMEDIATE },
- { "static", validate_static, NULL, I915_HW_STATIC },
- { "map", validate_map, NULL, I915_HW_MAP }
-};
+static void
+emit_map(struct i915_context *i915)
+{
+ const uint nr = i915->current.sampler_enable_nr;
+ if (nr) {
+ const uint enabled = i915->current.sampler_enable_flags;
+ uint unit;
+ uint count = 0;
+ OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));
+ OUT_BATCH(enabled);
+ for (unit = 0; unit < I915_TEX_UNITS; unit++) {
+ if (enabled & (1 << unit)) {
+ struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture);
+ struct i915_winsys_buffer *buf = texture->buffer;
+ assert(buf);
+
+ count++;
+
+ OUT_RELOC(buf, I915_USAGE_SAMPLER, 0);
+ OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */
+ OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */
+ }
+ }
+ assert(count == nr);
+ }
+}
+
+static void
+validate_sampler(struct i915_context *i915, unsigned *batch_space)
+{
+ *batch_space = i915->current.sampler_enable_nr ?
+ 2 + 3*i915->current.sampler_enable_nr : 0;
+}
+
+static void
+emit_sampler(struct i915_context *i915)
+{
+ if (i915->current.sampler_enable_nr) {
+ int i;
+
+ OUT_BATCH( _3DSTATE_SAMPLER_STATE |
+ (3 * i915->current.sampler_enable_nr) );
+
+ OUT_BATCH( i915->current.sampler_enable_flags );
+
+ for (i = 0; i < I915_TEX_UNITS; i++) {
+ if (i915->current.sampler_enable_flags & (1<<i)) {
+ OUT_BATCH( i915->current.sampler[i][0] );
+ OUT_BATCH( i915->current.sampler[i][1] );
+ OUT_BATCH( i915->current.sampler[i][2] );
+ }
+ }
+ }
+}
+
+static void
+validate_constants(struct i915_context *i915, unsigned *batch_space)
+{
+ *batch_space = i915->fs->num_constants ?
+ 2 + 4*i915->fs->num_constants : 0;
+}
+
+static void
+emit_constants(struct i915_context *i915)
+{
+ /* Collate the user-defined constants with the fragment shader's
+ * immediates according to the constant_flags[] array.
+ */
+ const uint nr = i915->fs->num_constants;
+ if (nr) {
+ uint i;
+
+ OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) );
+ OUT_BATCH((1 << nr) - 1);
+
+ for (i = 0; i < nr; i++) {
+ const uint *c;
+ if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) {
+ /* grab user-defined constant */
+ c = (uint *) i915_buffer(i915->constants[PIPE_SHADER_FRAGMENT])->data;
+ c += 4 * i;
+ }
+ else {
+ /* emit program constant */
+ c = (uint *) i915->fs->constants[i];
+ }
+#if 0 /* debug */
+ {
+ float *f = (float *) c;
+ printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3],
+ (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER
+ ? "user" : "immediate"));
+ }
+#endif
+ OUT_BATCH(*c++);
+ OUT_BATCH(*c++);
+ OUT_BATCH(*c++);
+ OUT_BATCH(*c++);
+ }
+ }
+}
+
+static void
+validate_program(struct i915_context *i915, unsigned *batch_space)
+{
+ *batch_space = i915->fs->program_len;
+}
+
+static void
+emit_program(struct i915_context *i915)
+{
+ uint i;
+ /* we should always have, at least, a pass-through program */
+ assert(i915->fs->program_len > 0);
+ for (i = 0; i < i915->fs->program_len; i++) {
+ OUT_BATCH(i915->fs->program[i]);
+ }
+}
+
+static void
+emit_draw_rect(struct i915_context *i915)
+{
+ OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
+ OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS);
+ OUT_BATCH(i915->current.draw_offset);
+ OUT_BATCH(i915->current.draw_size);
+ OUT_BATCH(i915->current.draw_offset);
+}
static boolean
i915_validate_state(struct i915_context *i915, unsigned *batch_space)
{
- int i;
+ unsigned tmp;
i915->num_validation_buffers = 0;
- *batch_space = 0;
-
- for (i = 0; i < Elements(hw_atoms); i++)
- if ((i915->hardware_dirty & hw_atoms[i].dirty) && hw_atoms[i].validate) {
- hw_atoms[i].validate(i915);
- *batch_space += hw_atoms[i].batch_space;
- }
+ if (i915->hardware_dirty & I915_HW_INVARIANT)
+ *batch_space = Elements(invariant_state);
+ else
+ *batch_space = 0;
+
+#define VALIDATE_ATOM(atom, hw_dirty) \
+ if (i915->hardware_dirty & hw_dirty) { \
+ validate_##atom(i915, &tmp); \
+ *batch_space += tmp; }
+ VALIDATE_ATOM(flush, I915_HW_FLUSH);
+ VALIDATE_ATOM(immediate, I915_HW_IMMEDIATE);
+ VALIDATE_ATOM(dynamic, I915_HW_DYNAMIC);
+ VALIDATE_ATOM(static, I915_HW_STATIC);
+ VALIDATE_ATOM(map, I915_HW_MAP);
+ VALIDATE_ATOM(sampler, I915_HW_SAMPLER);
+ VALIDATE_ATOM(constants, I915_HW_CONSTANTS);
+ VALIDATE_ATOM(program, I915_HW_PROGRAM);
+#undef VALIDATE_ATOM
if (i915->num_validation_buffers == 0)
return TRUE;
if (!i915_winsys_validate_buffers(i915->batch, i915->validation_buffers,
- i915->num_validation_buffers))
+ i915->num_validation_buffers))
return FALSE;
return TRUE;
}
-static void
-emit_state(struct i915_context *i915)
-{
- int i;
-
- for (i = 0; i < Elements(hw_atoms); i++)
- if ((i915->hardware_dirty & hw_atoms[i].dirty) && hw_atoms[i].emit)
- hw_atoms[i].emit(i915);
-}
-
/* Push the state into the sarea and/or texture memory.
*/
void
i915_emit_hardware_state(struct i915_context *i915 )
{
unsigned batch_space;
- /* XXX: there must be an easier way */
- const unsigned dwords = ( 14 +
- 7 +
- I915_MAX_DYNAMIC +
- 8 +
- 2 + I915_TEX_UNITS*3 +
- 2 + I915_TEX_UNITS*3 +
- 2 + I915_MAX_CONSTANT*4 +
-#if 0
- i915->current.program_len +
-#else
- i915->fs->program_len +
-#endif
- 6
- ) * 3/2; /* plus 50% margin */
- const unsigned relocs = ( I915_TEX_UNITS +
- 3
- ) * 3/2; /* plus 50% margin */
-
uintptr_t save_ptr;
- size_t save_relocs;
if (I915_DBG_ON(DBG_ATOMS))
i915_dump_hardware_dirty(i915, __FUNCTION__);
@@ -173,262 +413,36 @@ i915_emit_hardware_state(struct i915_context *i915 )
assert(i915_validate_state(i915, &batch_space));
}
- if(!BEGIN_BATCH(batch_space + dwords, relocs)) {
+ if(!BEGIN_BATCH(batch_space)) {
FLUSH_BATCH(NULL);
assert(i915_validate_state(i915, &batch_space));
- assert(BEGIN_BATCH(batch_space + dwords, relocs));
+ assert(BEGIN_BATCH(batch_space));
}
save_ptr = (uintptr_t)i915->batch->ptr;
- save_relocs = i915->batch->relocs;
-
- emit_state(i915);
- /* 14 dwords, 0 relocs */
- if (i915->hardware_dirty & I915_HW_INVARIANT)
- {
- OUT_BATCH(_3DSTATE_AA_CMD |
- AA_LINE_ECAAR_WIDTH_ENABLE |
- AA_LINE_ECAAR_WIDTH_1_0 |
- AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0);
-
- OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
- OUT_BATCH(0);
-
- OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
- OUT_BATCH(0);
-
- OUT_BATCH(_3DSTATE_DFLT_Z_CMD);
- OUT_BATCH(0);
-
- OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
- CSB_TCB(0, 0) |
- CSB_TCB(1, 1) |
- CSB_TCB(2, 2) |
- CSB_TCB(3, 3) |
- CSB_TCB(4, 4) |
- CSB_TCB(5, 5) |
- CSB_TCB(6, 6) |
- CSB_TCB(7, 7));
-
- OUT_BATCH(_3DSTATE_RASTER_RULES_CMD |
- ENABLE_POINT_RASTER_RULE |
- OGL_POINT_RASTER_RULE |
- ENABLE_LINE_STRIP_PROVOKE_VRTX |
- ENABLE_TRI_FAN_PROVOKE_VRTX |
- LINE_STRIP_PROVOKE_VRTX(1) |
- TRI_FAN_PROVOKE_VRTX(2) |
- ENABLE_TEXKILL_3D_4D |
- TEXKILL_4D);
-
- OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
-
- /* disable indirect state for now
- */
- OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0);
- OUT_BATCH(0);
- }
-
- /* 7 dwords, 1 relocs */
- if (i915->hardware_dirty & I915_HW_IMMEDIATE)
- {
- /* remove unwatned bits and S7 */
- unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |
- 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |
- 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |
- 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &
- i915->immediate_dirty;
- int i, num = util_bitcount(dirty);
- assert(num && num <= I915_MAX_IMMEDIATE);
-
- OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
- dirty << 4 | (num - 1));
-
- if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) {
- if (i915->vbo)
- OUT_RELOC(i915->vbo, I915_USAGE_VERTEX,
- i915->current.immediate[I915_IMMEDIATE_S0]);
- else
- OUT_BATCH(0);
- }
-
- for (i = 1; i < I915_MAX_IMMEDIATE; i++) {
- if (dirty & (1 << i))
- OUT_BATCH(i915->current.immediate[i]);
- }
- }
-
-#if 01
- /* I915_MAX_DYNAMIC dwords, 0 relocs */
- if (i915->hardware_dirty & I915_HW_DYNAMIC)
- {
- int i;
- for (i = 0; i < I915_MAX_DYNAMIC; i++) {
- if (i915->dynamic_dirty & (1 << i))
- OUT_BATCH(i915->current.dynamic[i]);
- }
- }
-#endif
-
-#if 01
- /* 8 dwords, 2 relocs */
- if (i915->hardware_dirty & I915_HW_STATIC)
- {
- if (i915->current.cbuf_bo) {
- OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
- OUT_BATCH(i915->current.cbuf_flags);
- OUT_RELOC(i915->current.cbuf_bo,
- I915_USAGE_RENDER,
- 0);
- }
-
- /* What happens if no zbuf??
- */
- if (i915->current.depth_bo) {
- OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
- OUT_BATCH(i915->current.depth_flags);
- OUT_RELOC(i915->current.depth_bo,
- I915_USAGE_RENDER,
- 0);
- }
-
- {
- OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
- OUT_BATCH(i915->current.dst_buf_vars);
- }
- }
-#endif
-
-#if 01
- /* texture images */
- /* 2 + I915_TEX_UNITS*3 dwords, I915_TEX_UNITS relocs */
- if (i915->hardware_dirty & (I915_HW_MAP | I915_HW_SAMPLER))
- {
- const uint nr = i915->current.sampler_enable_nr;
- if (nr) {
- const uint enabled = i915->current.sampler_enable_flags;
- uint unit;
- uint count = 0;
- OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));
- OUT_BATCH(enabled);
- for (unit = 0; unit < I915_TEX_UNITS; unit++) {
- if (enabled & (1 << unit)) {
- struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture);
- struct i915_winsys_buffer *buf = texture->buffer;
- assert(buf);
-
- count++;
-
- OUT_RELOC(buf, I915_USAGE_SAMPLER, 0);
- OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */
- OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */
- }
- }
- assert(count == nr);
- }
- }
-#endif
-
-#if 01
- /* samplers */
- /* 2 + I915_TEX_UNITS*3 dwords, 0 relocs */
- if (i915->hardware_dirty & I915_HW_SAMPLER)
- {
- if (i915->current.sampler_enable_nr) {
- int i;
-
- OUT_BATCH( _3DSTATE_SAMPLER_STATE |
- (3 * i915->current.sampler_enable_nr) );
-
- OUT_BATCH( i915->current.sampler_enable_flags );
-
- for (i = 0; i < I915_TEX_UNITS; i++) {
- if (i915->current.sampler_enable_flags & (1<<i)) {
- OUT_BATCH( i915->current.sampler[i][0] );
- OUT_BATCH( i915->current.sampler[i][1] );
- OUT_BATCH( i915->current.sampler[i][2] );
- }
- }
- }
- }
-#endif
-
-#if 01
- /* constants */
- /* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */
- if (i915->hardware_dirty & I915_HW_CONSTANTS)
- {
- /* Collate the user-defined constants with the fragment shader's
- * immediates according to the constant_flags[] array.
- */
- const uint nr = i915->fs->num_constants;
- if (nr) {
- uint i;
-
- OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) );
- OUT_BATCH((1 << nr) - 1);
-
- for (i = 0; i < nr; i++) {
- const uint *c;
- if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) {
- /* grab user-defined constant */
- c = (uint *) i915_buffer(i915->constants[PIPE_SHADER_FRAGMENT])->data;
- c += 4 * i;
- }
- else {
- /* emit program constant */
- c = (uint *) i915->fs->constants[i];
- }
-#if 0 /* debug */
- {
- float *f = (float *) c;
- printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3],
- (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER
- ? "user" : "immediate"));
- }
-#endif
- OUT_BATCH(*c++);
- OUT_BATCH(*c++);
- OUT_BATCH(*c++);
- OUT_BATCH(*c++);
- }
- }
- }
-#endif
-
-#if 01
- /* Fragment program */
- /* i915->current.program_len dwords, 0 relocs */
- if (i915->hardware_dirty & I915_HW_PROGRAM)
- {
- uint i;
- /* we should always have, at least, a pass-through program */
- assert(i915->fs->program_len > 0);
- for (i = 0; i < i915->fs->program_len; i++) {
- OUT_BATCH(i915->fs->program[i]);
- }
- }
-#endif
-
-#if 01
- /* drawing surface size */
- /* 6 dwords, 0 relocs */
- if (i915->hardware_dirty & I915_HW_STATIC)
- {
- /* XXX flush only required when the draw_offset changes! */
- OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE);
- OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
- OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS);
- OUT_BATCH(i915->current.draw_offset);
- OUT_BATCH(i915->current.draw_size);
- OUT_BATCH(i915->current.draw_offset);
- }
-#endif
- I915_DBG(DBG_EMIT, "%s: used %d dwords, %d relocs\n", __FUNCTION__,
+#define EMIT_ATOM(atom, hw_dirty) \
+ if (i915->hardware_dirty & hw_dirty) \
+ emit_##atom(i915);
+ EMIT_ATOM(flush, I915_HW_FLUSH);
+ EMIT_ATOM(invariant, I915_HW_INVARIANT);
+ EMIT_ATOM(immediate, I915_HW_IMMEDIATE);
+ EMIT_ATOM(dynamic, I915_HW_DYNAMIC);
+ EMIT_ATOM(static, I915_HW_STATIC);
+ EMIT_ATOM(map, I915_HW_MAP);
+ EMIT_ATOM(sampler, I915_HW_SAMPLER);
+ EMIT_ATOM(constants, I915_HW_CONSTANTS);
+ EMIT_ATOM(program, I915_HW_PROGRAM);
+ EMIT_ATOM(draw_rect, I915_HW_STATIC);
+#undef EMIT_ATOM
+
+ I915_DBG(DBG_EMIT, "%s: used %d dwords, %d dwords reserved\n", __FUNCTION__,
((uintptr_t)i915->batch->ptr - save_ptr) / 4,
- i915->batch->relocs - save_relocs);
+ batch_space);
+ assert(((uintptr_t)i915->batch->ptr - save_ptr) / 4 == batch_space);
i915->hardware_dirty = 0;
i915->immediate_dirty = 0;
i915->dynamic_dirty = 0;
+ i915->flush_dirty = 0;
}
diff --git a/src/gallium/drivers/i915/i915_state_static.c b/src/gallium/drivers/i915/i915_state_static.c
index 97044499990..20cd23f8f73 100644
--- a/src/gallium/drivers/i915/i915_state_static.c
+++ b/src/gallium/drivers/i915/i915_state_static.c
@@ -164,7 +164,7 @@ static void update_framebuffer(struct i915_context *i915)
assert(ret);
if (i915->current.draw_offset != draw_offset) {
i915->current.draw_offset = draw_offset;
- /* XXX: only emit flush on change and not always in emit */
+ i915_set_flush_dirty(i915, I915_PIPELINE_FLUSH);
}
i915->current.draw_size = (w - 1 + x) | ((h - 1 + y) << 16);
diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h
index 4ac2f5b9777..21cfdc9613e 100644
--- a/src/gallium/drivers/i915/i915_winsys.h
+++ b/src/gallium/drivers/i915/i915_winsys.h
@@ -76,7 +76,6 @@ struct i915_winsys_batchbuffer {
size_t size;
size_t relocs;
- size_t max_relocs;
/*@}*/
};
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index 4f86db39926..6391ea7f3be 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -22,7 +22,6 @@
#include "r300_context.h"
#include "r300_emit.h"
-#include "r300_hyperz.h"
#include "r300_texture.h"
#include "r300_winsys.h"
@@ -117,6 +116,14 @@ static boolean r300_fast_zclear_allowed(struct r300_context *r300)
return r300_resource(fb->zsbuf->texture)->tex.zmask_dwords[fb->zsbuf->u.tex.level];
}
+static boolean r300_hiz_clear_allowed(struct r300_context *r300)
+{
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+
+ return r300_resource(fb->zsbuf->texture)->tex.hiz_dwords[fb->zsbuf->u.tex.level];
+}
+
static uint32_t r300_depth_clear_value(enum pipe_format format,
double depth, unsigned stencil)
{
@@ -134,6 +141,13 @@ static uint32_t r300_depth_clear_value(enum pipe_format format,
}
}
+static uint32_t r300_hiz_clear_value(double depth)
+{
+ uint32_t r = (uint32_t)(CLAMP(depth, 0, 1) * 255.5);
+ assert(r <= 255);
+ return r | (r << 8) | (r << 16) | (r << 24);
+}
+
/* Clear currently bound buffers. */
static void r300_clear(struct pipe_context* pipe,
unsigned buffers,
@@ -190,8 +204,6 @@ static void r300_clear(struct pipe_context* pipe,
(struct pipe_framebuffer_state*)r300->fb_state.state;
struct r300_hyperz_state *hyperz =
(struct r300_hyperz_state*)r300->hyperz_state.state;
- struct r300_resource *zstex =
- fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL;
uint32_t width = fb->width;
uint32_t height = fb->height;
boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
@@ -200,20 +212,18 @@ static void r300_clear(struct pipe_context* pipe,
/* Enable fast Z clear.
* The zbuffer must be in micro-tiled mode, otherwise it locks up. */
if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && can_hyperz) {
- hyperz_dcv = hyperz->zb_depthclearvalue =
- r300_depth_clear_value(fb->zsbuf->format, depth, stencil);
-
if (r300_fast_zclear_allowed(r300)) {
+ hyperz_dcv = hyperz->zb_depthclearvalue =
+ r300_depth_clear_value(fb->zsbuf->format, depth, stencil);
+
r300_mark_atom_dirty(r300, &r300->zmask_clear);
buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
}
- if (zstex->hiz_mem[fb->zsbuf->u.tex.level])
+ if (r300_hiz_clear_allowed(r300)) {
+ r300->hiz_clear_value = r300_hiz_clear_value(depth);
r300_mark_atom_dirty(r300, &r300->hiz_clear);
-
- /* XXX Change this to r300_mark_atom_dirty(r300, &r300->hyperz_state);
- * once hiz offset is constant. */
- r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG);
+ }
}
/* Enable CBZB clear. */
@@ -240,14 +250,14 @@ static void r300_clear(struct pipe_context* pipe,
fb->nr_cbufs,
buffers, rgba, depth, stencil);
r300_blitter_end(r300);
- } else if (r300->zmask_clear.dirty) {
+ } else if (r300->zmask_clear.dirty || r300->hiz_clear.dirty) {
/* Just clear zmask and hiz now, this does not use the standard draw
* procedure. */
unsigned dwords;
/* Calculate zmask_clear and hiz_clear atom sizes. */
r300_update_hyperz_state(r300);
- dwords = r300->zmask_clear.size +
+ dwords = (r300->zmask_clear.dirty ? r300->zmask_clear.size : 0) +
(r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) +
r300_get_num_cs_end_dwords(r300);
@@ -257,9 +267,11 @@ static void r300_clear(struct pipe_context* pipe,
}
/* Emit clear packets. */
- r300_emit_zmask_clear(r300, r300->zmask_clear.size,
- r300->zmask_clear.state);
- r300->zmask_clear.dirty = FALSE;
+ if (r300->zmask_clear.dirty) {
+ r300_emit_zmask_clear(r300, r300->zmask_clear.size,
+ r300->zmask_clear.state);
+ r300->zmask_clear.dirty = FALSE;
+ }
if (r300->hiz_clear.dirty) {
r300_emit_hiz_clear(r300, r300->hiz_clear.size,
r300->hiz_clear.state);
@@ -279,9 +291,8 @@ static void r300_clear(struct pipe_context* pipe,
/* Enable fastfill and/or hiz.
*
* If we cleared zmask/hiz, it's in use now. The Hyper-Z state update
- * looks if zmask/hiz is in use and enables fastfill accordingly. */
- if (r300->zmask_in_use ||
- (zstex && zstex->hiz_in_use[fb->zsbuf->u.tex.level])) {
+ * looks if zmask/hiz is in use and programs hardware accordingly. */
+ if (r300->zmask_in_use || r300->hiz_in_use) {
r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
}
@@ -295,7 +306,7 @@ static void r300_clear_render_target(struct pipe_context *pipe,
{
struct r300_context *r300 = r300_context(pipe);
- r300->zmask_locked = TRUE;
+ r300->hyperz_locked = TRUE;
r300_mark_atom_dirty(r300, &r300->hyperz_state);
r300_blitter_begin(r300, R300_CLEAR_SURFACE);
@@ -303,7 +314,7 @@ static void r300_clear_render_target(struct pipe_context *pipe,
dstx, dsty, width, height);
r300_blitter_end(r300);
- r300->zmask_locked = FALSE;
+ r300->hyperz_locked = FALSE;
r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
@@ -320,11 +331,11 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe,
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
- if (r300->zmask_in_use && !r300->zmask_locked) {
+ if (r300->zmask_in_use && !r300->hyperz_locked) {
if (fb->zsbuf->texture == dst->texture) {
r300_decompress_zmask(r300);
} else {
- r300->zmask_locked = TRUE;
+ r300->hyperz_locked = TRUE;
r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
}
@@ -334,8 +345,8 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe,
dstx, dsty, width, height);
r300_blitter_end(r300);
- if (r300->zmask_locked) {
- r300->zmask_locked = FALSE;
+ if (r300->hyperz_locked) {
+ r300->hyperz_locked = FALSE;
r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
}
@@ -345,7 +356,7 @@ void r300_decompress_zmask(struct r300_context *r300)
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
- if (!r300->zmask_in_use || r300->zmask_locked)
+ if (!r300->zmask_in_use || r300->hyperz_locked)
return;
r300->zmask_decompress = TRUE;
@@ -420,12 +431,12 @@ static void r300_resource_copy_region(struct pipe_context *pipe,
util_format_description(dst->format);
struct pipe_box box;
- if (r300->zmask_in_use && !r300->zmask_locked) {
+ if (r300->zmask_in_use && !r300->hyperz_locked) {
if (fb->zsbuf->texture == src ||
fb->zsbuf->texture == dst) {
r300_decompress_zmask(r300);
} else {
- r300->zmask_locked = TRUE;
+ r300->hyperz_locked = TRUE;
r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
}
@@ -463,7 +474,8 @@ static void r300_resource_copy_region(struct pipe_context *pipe,
}
/* Handle compressed formats. */
- if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
+ if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC ||
+ desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
switch (util_format_get_blocksize(old_dst.format)) {
case 8:
/* 1 pixel = 4 bits,
@@ -502,8 +514,8 @@ static void r300_resource_copy_region(struct pipe_context *pipe,
if (old_dst.format != new_dst.format)
r300_resource_set_properties(pipe->screen, dst, 0, &old_dst);
- if (r300->zmask_locked) {
- r300->zmask_locked = FALSE;
+ if (r300->hyperz_locked) {
+ r300->hyperz_locked = FALSE;
r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
}
diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h
index 0be161fa07a..68943d561ba 100644
--- a/src/gallium/drivers/r300/r300_chipset.h
+++ b/src/gallium/drivers/r300/r300_chipset.h
@@ -90,8 +90,6 @@ struct r300_capabilities {
boolean high_second_pipe;
/* DXTC texture swizzling. */
boolean dxtc_swizzle;
- /* Index bias (AKA index offset). */
- boolean index_bias_supported;
};
/* Enumerations for legibility and telling which card we're running on. */
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 9f85bd4ce5f..166d965aa5b 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -30,7 +30,6 @@
#include "r300_cb.h"
#include "r300_context.h"
#include "r300_emit.h"
-#include "r300_hyperz.h"
#include "r300_screen.h"
#include "r300_screen_buffer.h"
#include "r300_winsys.h"
@@ -170,7 +169,6 @@ static boolean r300_setup_atoms(struct r300_context* r300)
boolean is_rv350 = r300->screen->caps.is_rv350;
boolean is_r500 = r300->screen->caps.is_r500;
boolean has_tcl = r300->screen->caps.has_tcl;
- boolean drm_2_3_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
boolean drm_2_6_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_6_0);
boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
boolean has_hiz_ram = r300->screen->caps.hiz_ram > 0;
@@ -203,11 +201,11 @@ static boolean r300_setup_atoms(struct r300_context* r300)
/* SC. */
R300_INIT_ATOM(scissor_state, 3);
/* GB, FG, GA, SU, SC, RB3D. */
- R300_INIT_ATOM(invariant_state, 16 + (is_rv350 ? 4 : 0));
+ R300_INIT_ATOM(invariant_state, 18 + (is_rv350 ? 4 : 0) + (is_r500 ? 4 : 0));
/* VAP. */
R300_INIT_ATOM(viewport_state, 9);
R300_INIT_ATOM(pvs_flush, 2);
- R300_INIT_ATOM(vap_invariant_state, 9);
+ R300_INIT_ATOM(vap_invariant_state, is_r500 ? 11 : 9);
R300_INIT_ATOM(vertex_stream_state, 0);
R300_INIT_ATOM(vs_state, 0);
R300_INIT_ATOM(vs_constants, 0);
@@ -216,7 +214,7 @@ static boolean r300_setup_atoms(struct r300_context* r300)
R300_INIT_ATOM(rs_block_state, 0);
R300_INIT_ATOM(rs_state, 0);
/* SC, US. */
- R300_INIT_ATOM(fb_state_pipelined, 5 + (drm_2_3_0 ? 3 : 0));
+ R300_INIT_ATOM(fb_state_pipelined, 8);
/* US. */
R300_INIT_ATOM(fs, 0);
R300_INIT_ATOM(fs_rc_constant_state, 0);
@@ -227,7 +225,7 @@ static boolean r300_setup_atoms(struct r300_context* r300)
if (can_hyperz) {
/* HiZ Clear */
if (has_hiz_ram)
- R300_INIT_ATOM(hiz_clear, 0);
+ R300_INIT_ATOM(hiz_clear, 4);
/* zmask clear */
R300_INIT_ATOM(zmask_clear, 4);
}
@@ -331,7 +329,7 @@ static void r300_init_states(struct pipe_context *pipe)
/* Initialize the VAP invariant state. */
{
- BEGIN_CB(vap_invariant->cb, 9);
+ BEGIN_CB(vap_invariant->cb, r300->vap_invariant_state.size);
OUT_CB_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff);
OUT_CB_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4);
OUT_CB_32F(1.0);
@@ -339,6 +337,10 @@ static void r300_init_states(struct pipe_context *pipe)
OUT_CB_32F(1.0);
OUT_CB_32F(1.0);
OUT_CB_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO);
+
+ if (r300->screen->caps.is_r500) {
+ OUT_CB_REG(R500_VAP_TEX_TO_COLOR_CNTL, 0);
+ }
END_CB;
}
@@ -353,11 +355,17 @@ static void r300_init_states(struct pipe_context *pipe)
OUT_CB_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF);
OUT_CB_REG(R300_SU_DEPTH_OFFSET, 0);
OUT_CB_REG(R300_SC_EDGERULE, 0x2DA49525);
+ OUT_CB_REG(R300_SC_SCREENDOOR, 0xffffff);
if (r300->screen->caps.is_rv350) {
OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101);
OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE);
}
+
+ if (r300->screen->caps.is_r500) {
+ OUT_CB_REG(R500_GA_COLOR_CONTROL_PS3, 0);
+ OUT_CB_REG(R500_SU_TEX_WRAP_PS3, 0);
+ }
END_CB;
}
@@ -446,16 +454,10 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
/* Render functions must be initialized after blitter. */
r300_init_render_functions(r300);
+ r300_init_states(&r300->context);
rws->cs_set_flush(r300->cs, r300_flush_cb, r300);
- /* setup hyper-z mm */
- if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
- if (!r300_hyperz_init_mm(r300))
- goto fail;
-
- r300_init_states(&r300->context);
-
/* The KIL opcode needs the first texture unit to be enabled
* on r3xx-r4xx. In order to calm down the CS checker, we bind this
* dummy texture there. */
@@ -506,10 +508,10 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
}
/* Print driver info. */
-#ifdef NDEBUG
- if (DBG_ON(r300, DBG_INFO)) {
-#else
+#ifdef DEBUG
{
+#else
+ if (DBG_ON(r300, DBG_INFO)) {
#endif
fprintf(stderr,
"r300: DRM version: %d.%d.%d, Name: %s, ID: 0x%04x, GB: %d, Z: %d\n"
@@ -525,7 +527,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
rws->get_value(rws, R300_VID_GART_SIZE) >> 20,
rws->get_value(rws, R300_VID_VRAM_SIZE) >> 20,
rws->get_value(rws, R300_CAN_AACOMPRESS) ? "YES" : "NO",
- rws->get_value(rws, R300_CAN_HYPERZ) ? "YES" : "NO",
+ rws->get_value(rws, R300_CAN_HYPERZ) &&
+ r300->screen->caps.zmask_ram ? "YES" : "NO",
rws->get_value(rws, R300_CAN_HYPERZ) &&
r300->screen->caps.hiz_ram ? "YES" : "NO");
}
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 30073759476..6f2aab69ab1 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -102,7 +102,6 @@ struct r300_dsa_state {
};
struct r300_hyperz_state {
- int current_func; /* -1 after a clear before first op */
int flush;
/* This is actually a command buffer with named dwords. */
uint32_t cb_flush_begin;
@@ -220,11 +219,11 @@ struct r300_vertex_stream_state {
};
struct r300_invariant_state {
- uint32_t cb[20];
+ uint32_t cb[26];
};
struct r300_vap_invariant_state {
- uint32_t cb[9];
+ uint32_t cb[11];
};
struct r300_viewport_state {
@@ -295,6 +294,8 @@ struct r300_surface {
uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */
uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */
+ uint32_t pitch_zmask; /* ZMASK_PITCH */
+ uint32_t pitch_hiz; /* HIZ_PITCH */
uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */
/* Parameters dedicated to the CBZB clear. */
@@ -363,8 +364,12 @@ struct r300_texture_desc {
/* Zbuffer compression info for each miplevel. */
boolean zcomp8x8[R300_MAX_TEXTURE_LEVELS];
- /* If zero, then disable compression. */
+ /* If zero, then disable Z compression/HiZ. */
unsigned zmask_dwords[R300_MAX_TEXTURE_LEVELS];
+ unsigned hiz_dwords[R300_MAX_TEXTURE_LEVELS];
+ /* Zmask/HiZ strides for each miplevel. */
+ unsigned zmask_stride_in_pixels[R300_MAX_TEXTURE_LEVELS];
+ unsigned hiz_stride_in_pixels[R300_MAX_TEXTURE_LEVELS];
};
struct r300_resource
@@ -390,10 +395,6 @@ struct r300_resource
/* Where the texture starts in the buffer. */
unsigned tex_offset;
- /* HiZ memory allocations. */
- struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS];
- boolean hiz_in_use[R300_MAX_TEXTURE_LEVELS];
-
/* This is the level tiling flags were last time set for.
* It's used to prevent redundant tiling-flags changes from happening.*/
unsigned surface_level;
@@ -412,6 +413,21 @@ struct r300_vertex_element_state {
struct r300_vertex_stream_state vertex_stream;
};
+enum r300_hiz_func {
+ HIZ_FUNC_NONE,
+
+ /* The function, when determined, is set in stone
+ * until the next HiZ clear. */
+
+ /* MAX is written to the HiZ buffer.
+ * Used for LESS, LEQUAL. */
+ HIZ_FUNC_MAX,
+
+ /* MIN is written to the HiZ buffer.
+ * Used for GREATER, GEQUAL. */
+ HIZ_FUNC_MIN,
+};
+
struct r300_context {
/* Parent class */
struct pipe_context context;
@@ -545,22 +561,25 @@ struct r300_context {
int sprite_coord_enable;
/* Whether two-sided color selection is enabled (AKA light_twoside). */
boolean two_sided_color;
-
+ /* Whether fast color clear is enabled. */
boolean cbzb_clear;
/* Whether ZMASK is enabled. */
boolean zmask_in_use;
/* Whether ZMASK is being decompressed. */
boolean zmask_decompress;
- /* Whether ZMASK is locked, i.e. should be disabled and cannot be taken over. */
- boolean zmask_locked;
+ /* Whether ZMASK/HIZ is locked, i.e. should be disabled and cannot be taken over. */
+ boolean hyperz_locked;
/* The zbuffer the ZMASK of which is locked. */
struct pipe_surface *locked_zbuffer;
+ /* Whether HIZ is enabled. */
+ boolean hiz_in_use;
+ /* HiZ function. Can be either MIN or MAX. */
+ enum r300_hiz_func hiz_func;
+ /* HiZ clear value. */
+ uint32_t hiz_clear_value;
void *dsa_decompress_zmask;
- /* two mem block managers for hiz/zmask ram space */
- struct mem_block *hiz_mm;
-
struct u_vbuf_mgr *vbuf_mgr;
struct util_slab_mempool pool_transfers;
@@ -644,6 +663,9 @@ void r300_decompress_zmask(struct r300_context *r300);
void r300_decompress_zmask_locked_unsafe(struct r300_context *r300);
void r300_decompress_zmask_locked(struct r300_context *r300);
+/* r300_hyperz.c */
+void r300_update_hyperz_state(struct r300_context* r300);
+
/* r300_query.c */
void r300_resume_query(struct r300_context *r300,
struct r300_query *query);
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index e2e4719ec82..24c82a3efd2 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -425,27 +425,12 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
OUT_CS_RELOC(surf);
if (can_hyperz) {
- uint32_t surf_pitch;
- struct r300_resource *tex;
- int level = surf->base.u.tex.level;
- tex = r300_resource(surf->base.texture);
-
- surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK;
-
/* HiZ RAM. */
- if (r300->screen->caps.hiz_ram) {
- if (tex->hiz_mem[level]) {
- OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs << 2);
- OUT_CS_REG(R300_ZB_HIZ_PITCH, surf_pitch);
- } else {
- OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0);
- OUT_CS_REG(R300_ZB_HIZ_PITCH, 0);
- }
- }
-
+ OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0);
+ OUT_CS_REG(R300_ZB_HIZ_PITCH, surf->pitch_hiz);
/* Z Mask RAM. (compressed zbuffer) */
OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0);
- OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch);
+ OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf->pitch_zmask);
}
}
@@ -484,6 +469,7 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300,
struct pipe_framebuffer_state* fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
unsigned i, num_cbufs = fb->nr_cbufs;
+ unsigned mspos0, mspos1;
CS_LOCALS(r300);
/* If we use the multiwrite feature, the colorbuffers 2,3,4 must be
@@ -507,38 +493,36 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300,
/* Multisampling. Depends on framebuffer sample count.
* These are pipelined regs and as such cannot be moved
* to the AA state. */
- if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) {
- unsigned mspos0 = 0x66666666;
- unsigned mspos1 = 0x6666666;
-
- if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) {
- /* Subsample placement. These may not be optimal. */
- switch (fb->cbufs[0]->texture->nr_samples) {
- case 2:
- mspos0 = 0x33996633;
- mspos1 = 0x6666663;
- break;
- case 3:
- mspos0 = 0x33936933;
- mspos1 = 0x6666663;
- break;
- case 4:
- mspos0 = 0x33939933;
- mspos1 = 0x3966663;
- break;
- case 6:
- mspos0 = 0x22a2aa22;
- mspos1 = 0x2a65672;
- break;
- default:
- debug_printf("r300: Bad number of multisamples!\n");
- }
- }
+ mspos0 = 0x66666666;
+ mspos1 = 0x6666666;
- OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2);
- OUT_CS(mspos0);
- OUT_CS(mspos1);
+ if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) {
+ /* Subsample placement. These may not be optimal. */
+ switch (fb->cbufs[0]->texture->nr_samples) {
+ case 2:
+ mspos0 = 0x33996633;
+ mspos1 = 0x6666663;
+ break;
+ case 3:
+ mspos0 = 0x33936933;
+ mspos1 = 0x6666663;
+ break;
+ case 4:
+ mspos0 = 0x33939933;
+ mspos1 = 0x3966663;
+ break;
+ case 6:
+ mspos0 = 0x22a2aa22;
+ mspos1 = 0x2a65672;
+ break;
+ default:
+ debug_printf("r300: Bad number of multisamples!\n");
+ }
}
+
+ OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2);
+ OUT_CS(mspos0);
+ OUT_CS(mspos1);
END_CS;
}
@@ -1039,56 +1023,26 @@ void r300_emit_viewport_state(struct r300_context* r300,
END_CS;
}
-static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val)
-{
- CS_LOCALS(r300);
- BEGIN_CS(4);
- OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2);
- OUT_CS(start);
- OUT_CS(count);
- OUT_CS(val);
- END_CS;
-}
-
-#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
-
void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state)
{
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
- struct r300_hyperz_state *z =
- (struct r300_hyperz_state*)r300->hyperz_state.state;
- struct r300_screen* r300screen = r300->screen;
- uint32_t stride, offset = 0, height, offset_shift;
struct r300_resource* tex;
- int i;
+ CS_LOCALS(r300);
tex = r300_resource(fb->zsbuf->texture);
- offset = tex->hiz_mem[fb->zsbuf->u.tex.level]->ofs;
- stride = tex->tex.stride_in_pixels[fb->zsbuf->u.tex.level];
-
- /* convert from pixels to 4x4 blocks */
- stride = ALIGN_DIVUP(stride, 4);
-
- stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes);
- /* there are 4 blocks per dwords */
- stride = ALIGN_DIVUP(stride, 4);
-
- height = ALIGN_DIVUP(fb->zsbuf->height, 4);
-
- offset_shift = 2;
- offset_shift += (r300screen->caps.num_frag_pipes / 2);
-
- for (i = 0; i < height; i++) {
- offset = i * stride;
- offset <<= offset_shift;
- r300_emit_hiz_line_clear(r300, offset, stride, 0xffffffff);
- }
- z->current_func = -1;
+ BEGIN_CS(size);
+ OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2);
+ OUT_CS(0);
+ OUT_CS(tex->tex.hiz_dwords[fb->zsbuf->u.tex.level]);
+ OUT_CS(r300->hiz_clear_value);
+ END_CS;
/* Mark the current zbuffer's hiz ram as in use. */
- tex->hiz_in_use[fb->zsbuf->u.tex.level] = TRUE;
+ r300->hiz_in_use = TRUE;
+ r300->hiz_func = HIZ_FUNC_NONE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state)
@@ -1236,7 +1190,7 @@ unsigned r300_get_num_cs_end_dwords(struct r300_context *r300)
/* Emitted in flush. */
dwords += 26; /* emit_query_end */
dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */
- if (r300->screen->caps.index_bias_supported)
+ if (r300->screen->caps.is_r500)
dwords += 2;
return dwords;
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index c77cc08539d..9c41a1383ce 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -57,7 +57,7 @@ static void r300_flush(struct pipe_context* pipe,
if (r300->dirty_hw) {
r300_emit_hyperz_end(r300);
r300_emit_query_end(r300);
- if (r300->screen->caps.index_bias_supported)
+ if (r300->screen->caps.is_r500)
r500_emit_index_bias(r300, 0);
r300->flush_counter++;
diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c
index 873e0209d42..ecaadf4af8e 100644
--- a/src/gallium/drivers/r300/r300_hyperz.c
+++ b/src/gallium/drivers/r300/r300_hyperz.c
@@ -22,7 +22,6 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include "r300_context.h"
-#include "r300_hyperz.h"
#include "r300_reg.h"
#include "r300_fs.h"
#include "r300_winsys.h"
@@ -41,58 +40,74 @@
/* The HyperZ setup */
/*****************************************************************************/
-static bool r300_get_sc_hz_max(struct r300_context *r300)
+static enum r300_hiz_func r300_get_hiz_func(struct r300_context *r300)
{
- struct r300_dsa_state *dsa_state = r300->dsa_state.state;
- int func = dsa_state->z_stencil_control & R300_ZS_MASK;
- int ret = R300_SC_HYPERZ_MIN;
+ struct r300_dsa_state *dsa = r300->dsa_state.state;
- if (func >= R300_ZS_GEQUAL && func <= R300_ZS_ALWAYS)
- ret = R300_SC_HYPERZ_MAX;
- return ret;
+ if (!dsa->dsa.depth.enabled || !dsa->dsa.depth.writemask)
+ return HIZ_FUNC_NONE;
+
+ switch (dsa->dsa.depth.func) {
+ case PIPE_FUNC_NEVER:
+ case PIPE_FUNC_EQUAL:
+ case PIPE_FUNC_NOTEQUAL:
+ case PIPE_FUNC_ALWAYS:
+ return HIZ_FUNC_NONE;
+
+ case PIPE_FUNC_LESS:
+ case PIPE_FUNC_LEQUAL:
+ return HIZ_FUNC_MAX;
+
+ case PIPE_FUNC_GREATER:
+ case PIPE_FUNC_GEQUAL:
+ return HIZ_FUNC_MIN;
+
+ default:
+ assert(0);
+ return HIZ_FUNC_NONE;
+ }
}
-static bool r300_zfunc_same_direction(int func1, int func2)
+/* Return what's used for the depth test (either minimum or maximum). */
+static unsigned r300_get_sc_hz_max(struct r300_context *r300)
{
- /* func1 is less/lessthan */
- if ((func1 == R300_ZS_LESS || func1 == R300_ZS_LEQUAL) &&
- (func2 == R300_ZS_EQUAL || func2 == R300_ZS_GEQUAL ||
- func2 == R300_ZS_GREATER))
- return FALSE;
-
- /* func1 is greater/greaterthan */
- if ((func1 == R300_ZS_GEQUAL || func1 == R300_ZS_GREATER) &&
- (func2 == R300_ZS_LESS || func2 == R300_ZS_LEQUAL))
- return FALSE;
+ struct r300_dsa_state *dsa = r300->dsa_state.state;
+ unsigned func = dsa->dsa.depth.func;
- return TRUE;
+ return func >= PIPE_FUNC_GREATER ? R300_SC_HYPERZ_MAX : R300_SC_HYPERZ_MIN;
}
-static int r300_get_hiz_min(struct r300_context *r300)
+static boolean r300_is_hiz_func_valid(struct r300_context *r300)
{
- struct r300_dsa_state *dsa_state = r300->dsa_state.state;
- int func = dsa_state->z_stencil_control & R300_ZS_MASK;
- int ret = R300_HIZ_MIN;
+ struct r300_dsa_state *dsa = r300->dsa_state.state;
+ unsigned func = dsa->dsa.depth.func;
+
+ if (r300->hiz_func == HIZ_FUNC_NONE)
+ return TRUE;
+
+ /* func1 is less/lessthan */
+ if (r300->hiz_func == HIZ_FUNC_MAX &&
+ (func == PIPE_FUNC_GEQUAL || func == PIPE_FUNC_GREATER))
+ return FALSE;
- if (func == R300_ZS_LESS || func == R300_ZS_LEQUAL)
- ret = R300_HIZ_MAX;
- return ret;
+ /* func1 is greater/greaterthan */
+ if (r300->hiz_func == HIZ_FUNC_MIN &&
+ (func == PIPE_FUNC_LESS || func == PIPE_FUNC_LEQUAL))
+ return FALSE;
+
+ return TRUE;
}
static boolean r300_dsa_stencil_op_not_keep(struct pipe_stencil_state *s)
{
- if (s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP ||
- s->zfail_op != PIPE_STENCIL_OP_KEEP))
- return TRUE;
- return FALSE;
+ return s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP ||
+ s->zfail_op != PIPE_STENCIL_OP_KEEP);
}
static boolean r300_can_hiz(struct r300_context *r300)
{
- struct r300_dsa_state *dsa_state = r300->dsa_state.state;
- struct pipe_depth_stencil_alpha_state *dsa = &dsa_state->dsa;
- struct r300_screen* r300screen = r300->screen;
- struct r300_hyperz_state *z = r300->hyperz_state.state;
+ struct r300_dsa_state *dsa = r300->dsa_state.state;
+ struct r300_screen *r300screen = r300->screen;
/* shader writes depth - no HiZ */
if (r300_fragment_shader_writes_depth(r300_fs(r300))) /* (5) */
@@ -100,34 +115,21 @@ static boolean r300_can_hiz(struct r300_context *r300)
if (r300->query_current)
return FALSE;
+
/* if stencil fail/zfail op is not KEEP */
- if (r300_dsa_stencil_op_not_keep(&dsa->stencil[0]) ||
- r300_dsa_stencil_op_not_keep(&dsa->stencil[1]))
+ if (r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[0]) ||
+ r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[1]))
return FALSE;
- if (dsa->depth.enabled) {
+ if (dsa->dsa.depth.enabled) {
/* if depth func is EQUAL pre-r500 */
- if (dsa->depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500)
+ if (dsa->dsa.depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500)
return FALSE;
+
/* if depth func is NOTEQUAL */
- if (dsa->depth.func == PIPE_FUNC_NOTEQUAL)
+ if (dsa->dsa.depth.func == PIPE_FUNC_NOTEQUAL)
return FALSE;
}
- /* depth comparison function - if just cleared save and return okay */
- if (z->current_func == -1) {
- int func = dsa_state->z_stencil_control & R300_ZS_MASK;
- if (func != 0 && func != 7)
- z->current_func = dsa_state->z_stencil_control & R300_ZS_MASK;
- } else {
- /* simple don't change */
- if (!r300_zfunc_same_direction(z->current_func,
- (dsa_state->z_stencil_control & R300_ZS_MASK))) {
- DBG(r300, DBG_HYPERZ,
- "z func changed direction - disabling hyper-z %d -> %d\n",
- z->current_func, dsa_state->z_stencil_control);
- return FALSE;
- }
- }
return TRUE;
}
@@ -139,7 +141,6 @@ static void r300_update_hyperz(struct r300_context* r300)
(struct pipe_framebuffer_state*)r300->fb_state.state;
struct r300_resource *zstex =
fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL;
- boolean hiz_in_use = FALSE;
z->gb_z_peq_config = 0;
z->zb_bw_cntl = 0;
@@ -151,16 +152,12 @@ static void r300_update_hyperz(struct r300_context* r300)
return;
}
- if (!zstex)
+ if (!zstex ||
+ !r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
return;
- if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
- return;
-
- hiz_in_use = zstex->hiz_in_use[fb->zsbuf->u.tex.level];
-
/* Zbuffer compression. */
- if (r300->zmask_in_use && !r300->zmask_locked) {
+ if (r300->zmask_in_use && !r300->hyperz_locked) {
z->zb_bw_cntl |= R300_FAST_FILL_ENABLE |
/*R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE |*/
R300_RD_COMP_ENABLE;
@@ -174,16 +171,28 @@ static void r300_update_hyperz(struct r300_context* r300)
z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8;
}
- if (hiz_in_use && r300_can_hiz(r300)) {
- z->zb_bw_cntl |= R300_HIZ_ENABLE |
- r300_get_hiz_min(r300);
-
- z->sc_hyperz |= R300_SC_HYPERZ_ENABLE |
- r300_get_sc_hz_max(r300);
+ /* HiZ. */
+ if (r300->hiz_in_use && !r300->hyperz_locked) {
+ /* Set the HiZ function if needed. */
+ if (r300->hiz_func == HIZ_FUNC_NONE) {
+ r300->hiz_func = r300_get_hiz_func(r300);
+ }
- if (r300->screen->caps.is_r500) {
- z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3 |
- R500_HIZ_EQUAL_REJECT_ENABLE;
+ /* If the depth function is inverted, HiZ must be disabled. */
+ if (!r300_is_hiz_func_valid(r300)) {
+ r300->hiz_in_use = FALSE;
+ } else if (r300_can_hiz(r300)) {
+ /* Setup the HiZ bits. */
+ z->zb_bw_cntl |=
+ R300_HIZ_ENABLE |
+ (r300->hiz_func == HIZ_FUNC_MIN ? R300_HIZ_MIN : R300_HIZ_MAX);
+
+ z->sc_hyperz |= R300_SC_HYPERZ_ENABLE |
+ r300_get_sc_hz_max(r300);
+
+ if (r300->screen->caps.is_r500) {
+ z->zb_bw_cntl |= R500_HIZ_EQUAL_REJECT_ENABLE;
+ }
}
}
@@ -282,18 +291,6 @@ static void r300_update_ztop(struct r300_context* r300)
r300_mark_atom_dirty(r300, &r300->ztop_state);
}
-#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
-
-static void r300_update_hiz_clear(struct r300_context *r300)
-{
- struct pipe_framebuffer_state *fb =
- (struct pipe_framebuffer_state*)r300->fb_state.state;
- uint32_t height;
-
- height = ALIGN_DIVUP(fb->zsbuf->height, 4);
- r300->hiz_clear.size = height * 4;
-}
-
void r300_update_hyperz_state(struct r300_context* r300)
{
r300_update_ztop(r300);
@@ -301,51 +298,4 @@ void r300_update_hyperz_state(struct r300_context* r300)
if (r300->hyperz_state.dirty) {
r300_update_hyperz(r300);
}
-
- if (r300->hiz_clear.dirty) {
- r300_update_hiz_clear(r300);
- }
-}
-
-void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf)
-{
- struct r300_resource *tex;
- uint32_t zsize, ndw;
- int level = surf->base.u.tex.level;
-
- tex = r300_resource(surf->base.texture);
-
- if (tex->hiz_mem[level])
- return;
-
- zsize = tex->tex.layer_size_in_bytes[level];
- zsize /= util_format_get_blocksize(tex->b.b.b.format);
- ndw = ALIGN_DIVUP(zsize, 64);
-
- tex->hiz_mem[level] = u_mmAllocMem(r300->hiz_mm, ndw, 0, 0);
-}
-
-boolean r300_hyperz_init_mm(struct r300_context *r300)
-{
- struct r300_screen* r300screen = r300->screen;
- int frag_pipes = r300screen->caps.num_frag_pipes;
-
- if (r300screen->caps.hiz_ram) {
- r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes);
- if (!r300->hiz_mm) {
- return FALSE;
- }
- }
-
- return TRUE;
-}
-
-void r300_hyperz_destroy_mm(struct r300_context *r300)
-{
- struct r300_screen* r300screen = r300->screen;
-
- if (r300screen->caps.hiz_ram) {
- u_mmDestroy(r300->hiz_mm);
- r300->hiz_mm = NULL;
- }
}
diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h
deleted file mode 100644
index d4c8e7c60a9..00000000000
--- a/src/gallium/drivers/r300/r300_hyperz.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright 2010 Marek Olšák <[email protected]>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R300_HYPERZ_H
-#define R300_HYPERZ_H
-
-struct r300_context;
-
-void r300_update_hyperz_state(struct r300_context* r300);
-
-void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf);
-
-boolean r300_hyperz_init_mm(struct r300_context *r300);
-void r300_hyperz_destroy_mm(struct r300_context *r300);
-
-#endif
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 1d93dab2ca2..bb30b1ab0be 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -467,6 +467,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view
*/
+#define R500_VAP_TEX_TO_COLOR_CNTL 0x2218
+
#define R300_VAP_CLIP_CNTL 0x221C
# define R300_VAP_UCP_ENABLE_0 (1 << 0)
# define R300_VAP_UCP_ENABLE_1 (1 << 1)
@@ -857,6 +859,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R500_TX_DIRECTION_HORIZONTAL (0<<27)
# define R500_TX_DIRECTION_VERITCAL (1<<27)
+#define R500_SU_TEX_WRAP_PS3 0x4114
+
/* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */
#define R300_GA_POINT_S0 0x4200
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 2ead8667bda..0ec4a225865 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -205,7 +205,7 @@ static boolean r300_reserve_cs_dwords(struct r300_context *r300,
if (first_draw) {
cs_dwords += r300_get_num_dirty_dwords(r300);
- if (r300->screen->caps.index_bias_supported)
+ if (r300->screen->caps.is_r500)
cs_dwords += 2; /* emit_index_offset */
if (emit_vertex_arrays)
@@ -257,7 +257,7 @@ static boolean r300_emit_states(struct r300_context *r300,
}
r300_emit_dirty_state(r300);
- if (r300->screen->caps.index_bias_supported) {
+ if (r300->screen->caps.is_r500) {
if (r300->screen->caps.has_tcl)
r500_emit_index_bias(r300, index_bias);
else
@@ -557,7 +557,7 @@ static void r300_draw_elements_immediate(struct r300_context *r300,
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
r300_translate_primitive(mode));
- if (indexBias && !r300->screen->caps.index_bias_supported) {
+ if (indexBias && !r300->screen->caps.is_r500) {
for (i = 0; i < count-1; i += 2)
OUT_CS(((ptr1[i+1] + indexBias) << 16) |
(ptr1[i] + indexBias));
@@ -581,7 +581,7 @@ static void r300_draw_elements_immediate(struct r300_context *r300,
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
r300_translate_primitive(mode));
- if (indexBias && !r300->screen->caps.index_bias_supported) {
+ if (indexBias && !r300->screen->caps.is_r500) {
for (i = 0; i < count-1; i += 2)
OUT_CS(((ptr2[i+1] + indexBias) << 16) |
(ptr2[i] + indexBias));
@@ -601,7 +601,7 @@ static void r300_draw_elements_immediate(struct r300_context *r300,
R300_VAP_VF_CNTL__INDEX_SIZE_32bit |
r300_translate_primitive(mode));
- if (indexBias && !r300->screen->caps.index_bias_supported) {
+ if (indexBias && !r300->screen->caps.is_r500) {
for (i = 0; i < count; i++)
OUT_CS(ptr4[i] + indexBias);
} else {
@@ -620,13 +620,12 @@ static void r300_draw_elements(struct r300_context *r300, int indexBias,
unsigned indexSize = r300->index_buffer.index_size;
struct pipe_resource* orgIndexBuffer = indexBuffer;
boolean alt_num_verts = r300->screen->caps.is_r500 &&
- count > 65536 &&
- r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
+ count > 65536;
unsigned short_count;
int buffer_offset = 0, index_offset = 0; /* for index bias emulation */
uint16_t indices3[3];
- if (indexBias && !r300->screen->caps.index_bias_supported) {
+ if (indexBias && !r300->screen->caps.is_r500) {
r300_split_index_bias(r300, indexBias, &buffer_offset, &index_offset);
}
@@ -702,8 +701,7 @@ static void r300_draw_arrays(struct r300_context *r300, unsigned mode,
unsigned start, unsigned count)
{
boolean alt_num_verts = r300->screen->caps.is_r500 &&
- count > 65536 &&
- r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
+ count > 65536;
unsigned short_count;
/* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */
@@ -748,6 +746,8 @@ static void r300_draw_vbo(struct pipe_context* pipe,
return;
}
+ r300_update_derived_state(r300);
+
/* Start the vbuf manager and update buffers if needed. */
u_vbuf_mgr_draw_begin(r300->vbuf_mgr, info,
&buffers_updated, &uploader_flushed);
@@ -756,8 +756,6 @@ static void r300_draw_vbo(struct pipe_context* pipe,
}
/* Draw. */
- r300_update_derived_state(r300);
-
if (indexed) {
if (count <= 8 &&
r300_resource(r300->index_buffer.buffer)->b.user_ptr) {
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 77a9c6ad86f..52d0247fbfd 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -456,10 +456,6 @@ struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws)
if (SCREEN_DBG_ON(r300screen, DBG_NO_HIZ))
r300screen->caps.hiz_ram = 0;
- r300screen->caps.index_bias_supported =
- r300screen->caps.is_r500 &&
- rws->get_value(rws, R300_VID_DRM_2_3_0);
-
pipe_mutex_init(r300screen->num_contexts_mutex);
util_slab_create(&r300screen->pool_buffers,
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 09f18b3e624..b810f4081c8 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -45,7 +45,6 @@
#include "r300_texture.h"
#include "r300_vs.h"
#include "r300_winsys.h"
-#include "r300_hyperz.h"
/* r300_state: Functions used to intialize state context by translating
* Gallium state objects into semi-native r300 state objects. */
@@ -707,7 +706,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300,
else if (state->zsbuf) {
r300->fb_state.size += 10;
if (can_hyperz)
- r300->fb_state.size += r300->screen->caps.hiz_ram ? 8 : 4;
+ r300->fb_state.size += 8;
}
/* The size of the rest of atoms stays the same. */
@@ -720,7 +719,6 @@ r300_set_framebuffer_state(struct pipe_context* pipe,
struct r300_context* r300 = r300_context(pipe);
struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;
struct pipe_framebuffer_state *old_state = r300->fb_state.state;
- boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
unsigned max_width, max_height, i;
uint32_t zbuffer_bpp = 0;
@@ -738,28 +736,30 @@ r300_set_framebuffer_state(struct pipe_context* pipe,
return;
}
- if (old_state->zsbuf && r300->zmask_in_use && !r300->zmask_locked) {
+ if (old_state->zsbuf && r300->zmask_in_use && !r300->hyperz_locked) {
/* There is a zmask in use, what are we gonna do? */
if (state->zsbuf) {
if (!pipe_surface_equal(old_state->zsbuf, state->zsbuf)) {
/* Decompress the currently bound zbuffer before we bind another one. */
r300_decompress_zmask(r300);
+ r300->hiz_in_use = FALSE;
}
} else {
/* We don't bind another zbuffer, so lock the current one. */
- r300->zmask_locked = TRUE;
+ r300->hyperz_locked = TRUE;
pipe_surface_reference(&r300->locked_zbuffer, old_state->zsbuf);
}
- } else if (r300->zmask_locked && r300->locked_zbuffer) {
+ } else if (r300->hyperz_locked && r300->locked_zbuffer) {
/* We have a locked zbuffer now, what are we gonna do? */
if (state->zsbuf) {
if (!pipe_surface_equal(r300->locked_zbuffer, state->zsbuf)) {
/* We are binding some other zbuffer, so decompress the locked one,
* it gets unlocked automatically. */
r300_decompress_zmask_locked_unsafe(r300);
+ r300->hiz_in_use = FALSE;
} else {
/* We are binding the locked zbuffer again, so unlock it. */
- r300->zmask_locked = FALSE;
+ r300->hyperz_locked = FALSE;
}
}
}
@@ -778,7 +778,7 @@ r300_set_framebuffer_state(struct pipe_context* pipe,
util_copy_framebuffer_state(r300->fb_state.state, state);
- if (!r300->zmask_locked) {
+ if (!r300->hyperz_locked) {
pipe_surface_reference(&r300->locked_zbuffer, NULL);
}
@@ -794,20 +794,6 @@ r300_set_framebuffer_state(struct pipe_context* pipe,
break;
}
- /* Setup Hyper-Z. */
- if (can_hyperz) {
- struct r300_surface *zs_surf = r300_surface(state->zsbuf);
- struct r300_resource *tex = r300_resource(zs_surf->base.texture);
- int level = zs_surf->base.u.tex.level;
-
- /* work out whether we can support hiz on this buffer */
- r300_hiz_alloc_block(r300, zs_surf);
-
- DBG(r300, DBG_HYPERZ,
- "hyper-z features: hiz: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0,
- tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef);
- }
-
/* Polygon offset depends on the zbuffer bit depth. */
if (r300->zbuffer_bpp != zbuffer_bpp) {
r300->zbuffer_bpp = zbuffer_bpp;
@@ -818,27 +804,25 @@ r300_set_framebuffer_state(struct pipe_context* pipe,
}
/* Set up AA config. */
- if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) {
- if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) {
- aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE;
-
- switch (state->cbufs[0]->texture->nr_samples) {
- case 2:
- aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2;
- break;
- case 3:
- aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3;
- break;
- case 4:
- aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4;
- break;
- case 6:
- aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6;
- break;
- }
- } else {
- aa->aa_config = 0;
+ if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) {
+ aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE;
+
+ switch (state->cbufs[0]->texture->nr_samples) {
+ case 2:
+ aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2;
+ break;
+ case 3:
+ aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3;
+ break;
+ case 4:
+ aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4;
+ break;
+ case 6:
+ aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6;
+ break;
}
+ } else {
+ aa->aa_config = 0;
}
if (DBG_ON(r300, DBG_FB)) {
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 003fe9a58cd..a1e116f4b61 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -29,7 +29,6 @@
#include "r300_context.h"
#include "r300_fs.h"
-#include "r300_hyperz.h"
#include "r300_screen.h"
#include "r300_shader_semantics.h"
#include "r300_state_inlines.h"
@@ -642,8 +641,25 @@ static uint32_t r300_get_border_color(enum pipe_format format,
/* Compressed formats. */
if (util_format_is_compressed(format)) {
- util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
- return uc.ui;
+ switch (format) {
+ case PIPE_FORMAT_RGTC1_SNORM:
+ case PIPE_FORMAT_RGTC1_UNORM:
+ /* Add 1/32 to round the border color instead of truncating. */
+ /* The Y component is used for the border color. */
+ border_swizzled[1] = border_swizzled[2] + 1.0f/32;
+ util_pack_color(border_swizzled, PIPE_FORMAT_B4G4R4A4_UNORM, &uc);
+ return uc.ui;
+ case PIPE_FORMAT_RGTC2_SNORM:
+ border_swizzled[0] = border_swizzled[2];
+ util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_SNORM, &uc);
+ return uc.ui;
+ case PIPE_FORMAT_RGTC2_UNORM:
+ util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
+ return uc.ui;
+ default:
+ util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
+ return uc.ui;
+ }
}
switch (desc->channel[0].size) {
@@ -937,7 +953,7 @@ static void r300_decompress_depth_textures(struct r300_context *r300)
state->sampler_state_count);
unsigned i;
- if (!r300->zmask_locked || !r300->locked_zbuffer) {
+ if (!r300->hyperz_locked || !r300->locked_zbuffer) {
return;
}
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index b97c45ac198..86ad0b8b8e0 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -171,8 +171,16 @@ uint32_t r300_translate_texformat(enum pipe_format format,
}
}
- result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view,
- util_format_is_compressed(format) && dxtc_swizzle);
+ if (util_format_is_compressed(format) &&
+ dxtc_swizzle &&
+ format != PIPE_FORMAT_RGTC2_UNORM &&
+ format != PIPE_FORMAT_RGTC2_SNORM) {
+ result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view,
+ dxtc_swizzle);
+ } else {
+ result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view,
+ FALSE);
+ }
/* S3TC formats. */
if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
@@ -197,41 +205,36 @@ uint32_t r300_translate_texformat(enum pipe_format format,
}
}
- /* Add sign. */
- for (i = 0; i < desc->nr_channels; i++) {
- if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
- result |= sign_bit[i];
- }
- }
-
- /* This is truly a special format.
- * It stores R8G8 and B is computed using sqrt(1 - R^2 - G^2)
- * in the sampler unit. Also known as D3DFMT_CxV8U8. */
- if (format == PIPE_FORMAT_R8G8Bx_SNORM) {
- return R300_TX_FORMAT_CxV8U8 | result;
- }
-
/* RGTC formats. */
if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
switch (format) {
case PIPE_FORMAT_RGTC1_SNORM:
- result |= sign_bit[0];
+ result |= sign_bit[1];
case PIPE_FORMAT_RGTC1_UNORM:
- result &= ~(0xfff << 9); /* mask off swizzle */
- result |= R300_TX_FORMAT_Y << R300_TX_FORMAT_R_SHIFT;
return R500_TX_FORMAT_ATI1N | result;
case PIPE_FORMAT_RGTC2_SNORM:
- result |= sign_bit[0] | sign_bit[1];
+ result |= sign_bit[2] | sign_bit[3];
case PIPE_FORMAT_RGTC2_UNORM:
- result &= ~(0xfff << 9); /* mask off swizzle */
- result |= R300_TX_FORMAT_Y << R300_TX_FORMAT_R_SHIFT |
- R300_TX_FORMAT_X << R300_TX_FORMAT_G_SHIFT;
return R400_TX_FORMAT_ATI2N | result;
default:
return ~0; /* Unsupported/unknown. */
}
}
+ /* This is truly a special format.
+ * It stores R8G8 and B is computed using sqrt(1 - R^2 - G^2)
+ * in the sampler unit. Also known as D3DFMT_CxV8U8. */
+ if (format == PIPE_FORMAT_R8G8Bx_SNORM) {
+ return R300_TX_FORMAT_CxV8U8 | result;
+ }
+
+ /* Add sign. */
+ for (i = 0; i < desc->nr_channels; i++) {
+ if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
+ result |= sign_bit[i];
+ }
+ }
+
/* See whether the components are of the same size. */
for (i = 1; i < desc->nr_channels; i++) {
uniform = uniform && desc->channel[0].size == desc->channel[i].size;
@@ -676,6 +679,8 @@ static void r300_texture_setup_fb_state(struct r300_surface *surf)
R300_DEPTHMACROTILE(tex->tex.macrotile[level]) |
R300_DEPTHMICROTILE(tex->tex.microtile);
surf->format = r300_translate_zsformat(surf->base.format);
+ surf->pitch_zmask = tex->tex.zmask_stride_in_pixels[level];
+ surf->pitch_hiz = tex->tex.hiz_stride_in_pixels[level];
} else {
surf->pitch =
tex->tex.stride_in_pixels[level] |
@@ -713,14 +718,8 @@ static void r300_texture_destroy(struct pipe_screen *screen,
struct pipe_resource* texture)
{
struct r300_resource* tex = (struct r300_resource*)texture;
- int i;
r300_winsys_bo_reference(&tex->buf, NULL);
- for (i = 0; i < R300_MAX_TEXTURE_LEVELS; i++) {
- if (tex->hiz_mem[i])
- u_mmFreeMem(tex->hiz_mem[i]);
- }
-
FREE(tex);
}
@@ -868,8 +867,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen,
break;
case 2:
- if (rws->get_value(rws, R300_VID_DRM_2_1_0))
- microtile = R300_BUFFER_SQUARETILED;
+ microtile = R300_BUFFER_SQUARETILED;
break;
}
}
diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c
index 2cfeec7d751..3846fb8b6b3 100644
--- a/src/gallium/drivers/r300/r300_texture_desc.c
+++ b/src/gallium/drivers/r300/r300_texture_desc.c
@@ -207,29 +207,6 @@ static unsigned r300_texture_get_nblocksy(struct r300_resource *tex,
return util_format_get_nblocksy(tex->b.b.b.format, height);
}
-static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen,
- struct r300_resource *tex)
-{
- /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures
- * incorrectly. This is a workaround to prevent CS from being rejected. */
-
- unsigned i, size;
-
- if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) &&
- tex->b.b.b.target == PIPE_TEXTURE_3D &&
- tex->b.b.b.last_level > 0) {
- size = 0;
-
- for (i = 0; i <= tex->b.b.b.last_level; i++) {
- size += tex->tex.stride_in_bytes[i] *
- r300_texture_get_nblocksy(tex, i, FALSE);
- }
-
- size *= tex->tex.depth0;
- tex->tex.size_in_bytes = size;
- }
-}
-
/* Get a width in pixels from a stride in bytes. */
static unsigned stride_to_width(enum pipe_format format,
unsigned stride_in_bytes)
@@ -334,12 +311,17 @@ static void r300_setup_cbzb_flags(struct r300_screen *rscreen,
tex->tex.cbzb_allowed[i] = first_level_valid && tex->tex.macrotile[i];
}
-#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
+static unsigned r300_pixels_to_dwords(unsigned stride,
+ unsigned height,
+ unsigned xblock, unsigned yblock)
+{
+ return (align(stride, xblock) * align(height, yblock)) / (xblock * yblock);
+}
-static void r300_setup_zmask_flags(struct r300_screen *screen,
- struct r300_resource *tex)
+static void r300_setup_hyperz_properties(struct r300_screen *screen,
+ struct r300_resource *tex)
{
- /* The tile size of 1 DWORD is:
+ /* The tile size of 1 DWORD in ZMASK RAM is:
*
* GPU Pipes 4x4 mode 8x8 mode
* ------------------------------------------
@@ -348,8 +330,31 @@ static void r300_setup_zmask_flags(struct r300_screen *screen,
* RV530 1P/2Z 32x16 64x32
* 1P/1Z 16x16 32x32
*/
- static unsigned num_blocks_x_per_dw[4] = {4, 8, 12, 8};
- static unsigned num_blocks_y_per_dw[4] = {4, 4, 4, 8};
+ static unsigned zmask_blocks_x_per_dw[4] = {4, 8, 12, 8};
+ static unsigned zmask_blocks_y_per_dw[4] = {4, 4, 4, 8};
+
+ /* In HIZ RAM, one dword is always 8x8 pixels (each byte is 4x4 pixels),
+ * but the blocks have very weird ordering.
+ *
+ * With 2 pipes and an image of size 8xY, where Y >= 1,
+ * clearing 4 dwords clears blocks like this:
+ *
+ * 01012323
+ *
+ * where numbers correspond to dword indices. The blocks are interleaved
+ * in the X direction, so the alignment must be 4x1 blocks (32x8 pixels).
+ *
+ * With 4 pipes and an image of size 8xY, where Y >= 4,
+ * clearing 8 dwords clears blocks like this:
+ * 01012323
+ * 45456767
+ * 01012323
+ * 45456767
+ * where numbers correspond to dword indices. The blocks are interleaved
+ * in both directions, so the alignment must be 4x4 blocks (32x32 pixels)
+ */
+ static unsigned hiz_align_x[4] = {8, 32, 48, 32};
+ static unsigned hiz_align_y[4] = {8, 8, 8, 32};
if (util_format_is_depth_or_stencil(tex->b.b.b.format) &&
util_format_get_blocksizebits(tex->b.b.b.format) == 32 &&
@@ -363,30 +368,49 @@ static void r300_setup_zmask_flags(struct r300_screen *screen,
}
for (i = 0; i <= tex->b.b.b.last_level; i++) {
- unsigned numdw, compsize;
+ unsigned zcomp_numdw, zcompsize, hiz_numdw, stride, height;
+
+ stride = align(tex->tex.stride_in_pixels[i], 16);
+ height = u_minify(tex->b.b.b.height0, i);
/* The 8x8 compression mode needs macrotiling. */
- compsize = screen->caps.z_compress == R300_ZCOMP_8X8 &&
+ zcompsize = screen->caps.z_compress == R300_ZCOMP_8X8 &&
tex->tex.macrotile[i] &&
tex->b.b.b.nr_samples <= 1 ? 8 : 4;
- /* Get the zbuffer size (with the aligned width and height). */
- numdw = align(tex->tex.stride_in_pixels[i],
- num_blocks_x_per_dw[pipes-1] * compsize) *
- align(u_minify(tex->b.b.b.height0, i),
- num_blocks_y_per_dw[pipes-1] * compsize);
+ /* Get the ZMASK buffer size in dwords. */
+ zcomp_numdw = r300_pixels_to_dwords(stride, height,
+ zmask_blocks_x_per_dw[pipes-1] * zcompsize,
+ zmask_blocks_y_per_dw[pipes-1] * zcompsize);
- /* Convert pixels -> dwords. */
- numdw = ALIGN_DIVUP(numdw, num_blocks_x_per_dw[pipes-1] * compsize *
- num_blocks_y_per_dw[pipes-1] * compsize);
+ /* Check whether we have enough ZMASK memory. */
+ if (util_format_get_blocksizebits(tex->b.b.b.format) == 32 &&
+ zcomp_numdw <= screen->caps.zmask_ram * pipes) {
+ tex->tex.zmask_dwords[i] = zcomp_numdw;
+ tex->tex.zcomp8x8[i] = zcompsize == 8;
- /* Check that we have enough ZMASK memory. */
- if (numdw <= screen->caps.zmask_ram * pipes) {
- tex->tex.zmask_dwords[i] = numdw;
- tex->tex.zcomp8x8[i] = compsize == 8;
+ tex->tex.zmask_stride_in_pixels[i] =
+ align(stride, zmask_blocks_x_per_dw[pipes-1] * zcompsize);
} else {
tex->tex.zmask_dwords[i] = 0;
tex->tex.zcomp8x8[i] = FALSE;
+ tex->tex.zmask_stride_in_pixels[i] = 0;
+ }
+
+ /* Now setup HIZ. */
+ stride = align(stride, hiz_align_x[pipes-1]);
+ height = align(height, hiz_align_y[pipes-1]);
+
+ /* Get the HIZ buffer size in dwords. */
+ hiz_numdw = (stride * height) / (8*8 * pipes);
+
+ /* Check whether we have enough HIZ memory. */
+ if (hiz_numdw <= screen->caps.hiz_ram * pipes) {
+ tex->tex.hiz_dwords[i] = hiz_numdw;
+ tex->tex.hiz_stride_in_pixels[i] = stride;
+ } else {
+ tex->tex.hiz_dwords[i] = 0;
+ tex->tex.hiz_stride_in_pixels[i] = 0;
}
}
}
@@ -395,7 +419,6 @@ static void r300_setup_zmask_flags(struct r300_screen *screen,
static void r300_setup_tiling(struct r300_screen *screen,
struct r300_resource *tex)
{
- struct r300_winsys_screen *rws = screen->rws;
enum pipe_format format = tex->b.b.b.format;
boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350;
boolean is_zb = util_format_is_depth_or_stencil(format);
@@ -422,9 +445,7 @@ static void r300_setup_tiling(struct r300_screen *screen,
break;
case 2:
- if (rws->get_value(rws, R300_VID_DRM_2_1_0)) {
- tex->tex.microtile = R300_BUFFER_SQUARETILED;
- }
+ tex->tex.microtile = R300_BUFFER_SQUARETILED;
break;
}
@@ -494,8 +515,7 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen,
r300_setup_miptree(rscreen, tex, FALSE);
}
- r300_texture_3d_fix_mipmapping(rscreen, tex);
- r300_setup_zmask_flags(rscreen, tex);
+ r300_setup_hyperz_properties(rscreen, tex);
if (tex->buf_size) {
/* Make sure the buffer we got is large enough. */
diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h
index d5c73585c81..c0b66899f8b 100644
--- a/src/gallium/drivers/r300/r300_winsys.h
+++ b/src/gallium/drivers/r300/r300_winsys.h
@@ -60,10 +60,8 @@ enum r300_value_id {
R300_VID_DRM_PATCHLEVEL,
/* These should probably go away: */
- R300_VID_DRM_2_1_0, /* Square tiling. */
- R300_VID_DRM_2_3_0, /* R500 VAP regs, MSPOS regs, fixed tex3D size checking */
R300_VID_DRM_2_6_0, /* Hyper-Z, GB_Z_PEQ_CONFIG on rv350->r4xx, R500 FG_ALPHA_VALUE */
- R300_VID_DRM_2_8_0, /* R500 US_FORMAT regs, R500 ARGB2101010 colorbuffer, CMask */
+ R300_VID_DRM_2_8_0, /* R500 US_FORMAT regs, R500 ARGB2101010 colorbuffer, CMask, R16F/RG16F */
R300_CAN_HYPERZ, /* ZMask + HiZ */
R300_CAN_AACOMPRESS, /* CMask */
diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h
index b5fcc7106fe..cae3888051b 100644
--- a/src/gallium/drivers/r600/eg_state_inlines.h
+++ b/src/gallium/drivers/r600/eg_state_inlines.h
@@ -503,9 +503,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
}
}
-static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format)
+static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
{
- return r600_translate_texformat(format, NULL, NULL, NULL) != ~0;
+ return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0;
}
static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format)
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 3efdbaba0c3..4206b4a201d 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -305,11 +305,16 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx,
{
struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
union util_color uc;
+ uint32_t coord_trunc = 0;
if (rstate == NULL) {
return NULL;
}
+ if ((state->mag_img_filter == PIPE_TEX_FILTER_NEAREST) ||
+ (state->min_img_filter == PIPE_TEX_FILTER_NEAREST))
+ coord_trunc = 1;
+
rstate->id = R600_PIPE_STATE_SAMPLER;
util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0,
@@ -328,6 +333,7 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx,
0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0,
S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
+ S_03C008_MC_COORD_TRUNCATE(coord_trunc) |
S_03C008_TYPE(1),
0xFFFFFFFF, NULL);
@@ -370,7 +376,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
swizzle[1] = state->swizzle_g;
swizzle[2] = state->swizzle_b;
swizzle[3] = state->swizzle_a;
- format = r600_translate_texformat(state->format,
+ format = r600_translate_texformat(ctx->screen, state->format,
swizzle,
&word4, &yuv_format);
if (format == ~0) {
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index f0a1ee0cd02..c51a163bd06 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -108,8 +108,9 @@
#define PKT3_IT_OPCODE_S(x) (((x) & 0xFF) << 8)
#define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF)
#define PKT3_IT_OPCODE_C 0xFFFF00FF
+#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1)
#define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
-#define PKT3(op, count) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count))
+#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PREDICATE(predicate))
/* Registers */
#define R_008C00_SQ_CONFIG 0x00008C00
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 64c52bca795..0b7d6f70968 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -114,14 +114,16 @@ enum radeon_family r600_get_family(struct radeon *rw);
enum chip_class r600_get_family_class(struct radeon *radeon);
struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon);
unsigned r600_get_clock_crystal_freq(struct radeon *radeon);
+unsigned r600_get_minor_version(struct radeon *radeon);
+unsigned r600_get_num_backends(struct radeon *radeon);
/* r600_bo.c */
struct r600_bo;
struct r600_bo *r600_bo(struct radeon *radeon,
- unsigned size, unsigned alignment,
- unsigned binding, unsigned usage);
+ unsigned size, unsigned alignment,
+ unsigned binding, unsigned usage);
struct r600_bo *r600_bo_handle(struct radeon *radeon,
- unsigned handle, unsigned *array_mode);
+ unsigned handle, unsigned *array_mode);
void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx);
void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo);
void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst,
@@ -251,6 +253,7 @@ struct r600_context {
unsigned num_query_running;
struct list_head fenced_bo;
unsigned max_db; /* for OQ */
+ boolean predicate_drawing;
};
struct r600_draw {
@@ -283,6 +286,8 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query);
void r600_query_end(struct r600_context *ctx, struct r600_query *query);
void r600_context_queries_suspend(struct r600_context *ctx);
void r600_context_queries_resume(struct r600_context *ctx);
+void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation,
+ int flag_wait);
int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon);
void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw);
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 5d59356bf70..6777be80402 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -133,6 +133,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r
case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT:
case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
return 1;
@@ -2070,7 +2071,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
unsigned fetch_resource_start = rctx->family >= CHIP_CEDAR ? 0 : 160;
unsigned format, num_format, format_comp;
u32 *bytecode;
- int i, r;
+ int i, r;
/* vertex elements offset need special handling, if offset is bigger
+ * than what we can put in fetch instruction then we need to alterate
@@ -2090,23 +2091,23 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
return r;
for (i = 0; i < ve->count; i++) {
- if (elements[i].instance_divisor > 1) {
+ if (elements[i].instance_divisor > 1) {
struct r600_bc_alu alu;
memset(&alu, 0, sizeof(alu));
- alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
- alu.src[0].sel = 0;
- alu.src[0].chan = 3;
+ alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
+ alu.src[0].sel = 0;
+ alu.src[0].chan = 3;
alu.dst.sel = i + 1;
alu.dst.chan = 3;
alu.dst.write = 1;
alu.last = 1;
- if ((r = r600_bc_add_alu(&bc, &alu))) {
+ if ((r = r600_bc_add_alu(&bc, &alu))) {
r600_bc_clear(&bc);
- return r;
- }
+ return r;
+ }
memset(&alu, 0, sizeof(alu));
alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
@@ -2121,10 +2122,10 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
alu.dst.write = 1;
alu.last = 1;
- if ((r = r600_bc_add_alu(&bc, &alu))) {
+ if ((r = r600_bc_add_alu(&bc, &alu))) {
r600_bc_clear(&bc);
- return r;
- }
+ return r;
+ }
memset(&alu, 0, sizeof(alu));
alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC);
@@ -2136,26 +2137,26 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
alu.dst.write = 1;
alu.last = 1;
- if ((r = r600_bc_add_alu(&bc, &alu))) {
+ if ((r = r600_bc_add_alu(&bc, &alu))) {
r600_bc_clear(&bc);
- return r;
- }
+ return r;
+ }
memset(&alu, 0, sizeof(alu));
- alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT);
- alu.src[0].sel = i + 1;
- alu.src[0].chan = 3;
+ alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT);
+ alu.src[0].sel = i + 1;
+ alu.src[0].chan = 3;
alu.dst.sel = i + 1;
alu.dst.chan = 3;
alu.dst.write = 1;
alu.last = 1;
- if ((r = r600_bc_add_alu(&bc, &alu))) {
+ if ((r = r600_bc_add_alu(&bc, &alu))) {
r600_bc_clear(&bc);
- return r;
- }
- }
+ return r;
+ }
+ }
}
for (i = 0; i < ve->count; i++) {
@@ -2175,7 +2176,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
vtx.fetch_type = elements[i].instance_divisor ? 1 : 0;
vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0;
vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0;
- vtx.mega_fetch_count = 16;
+ vtx.mega_fetch_count = 0x1F;
vtx.dst_gpr = i + 1;
vtx.dst_sel_x = desc->swizzle[0];
vtx.dst_sel_y = desc->swizzle[1];
@@ -2202,14 +2203,14 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
return -ENOMEM;
}
- ve->fs_size = bc.ndw*4;
+ ve->fs_size = bc.ndw*4;
if ((r = r600_bc_build(&bc))) {
r600_bc_clear(&bc);
return r;
}
- if (dump_shaders == -1)
- dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
+ if (dump_shaders == -1)
+ dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
if (dump_shaders) {
fprintf(stderr, "--------------------------------------------------------------\n");
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index 0c5d7133c7a..2363cd1ebc5 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -132,13 +132,13 @@ static void r600_transfer_destroy(struct pipe_context *ctx,
}
static void r600_buffer_transfer_inline_write(struct pipe_context *pipe,
- struct pipe_resource *resource,
- unsigned level,
- unsigned usage,
- const struct pipe_box *box,
- const void *data,
- unsigned stride,
- unsigned layer_stride)
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned layer_stride)
{
struct radeon *ws = (struct radeon*)pipe->winsys;
struct r600_resource_buffer *rbuffer = r600_buffer(resource);
@@ -224,7 +224,7 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen,
rbuffer->r.b.b.b.depth0 = 1;
rbuffer->r.b.b.b.array_size = 1;
rbuffer->r.b.b.b.flags = 0;
- rbuffer->r.b.user_ptr = ptr;
+ rbuffer->r.b.user_ptr = ptr;
rbuffer->r.bo = NULL;
rbuffer->r.bo_size = 0;
return &rbuffer->r.b.b.b;
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index adcd74aec76..fcca7f705d4 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -76,8 +76,7 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags,
u_upload_flush(rctx->vbuf_mgr->uploader);
}
-static void r600_update_num_contexts(struct r600_screen *rscreen,
- int diff)
+static void r600_update_num_contexts(struct r600_screen *rscreen, int diff)
{
pipe_mutex_lock(rscreen->mutex_num_contexts);
if (diff > 0) {
@@ -431,7 +430,7 @@ static boolean r600_is_format_supported(struct pipe_screen* screen,
return FALSE;
if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
- r600_is_sampler_format_supported(format)) {
+ r600_is_sampler_format_supported(screen, format)) {
retval |= PIPE_BIND_SAMPLER_VIEW;
}
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 8dc1f4ad5c3..5f701d87e8f 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -227,7 +227,7 @@ int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
/* r600_texture.c */
void r600_init_screen_texture_functions(struct pipe_screen *screen);
void r600_init_surface_functions(struct r600_pipe_context *r600);
-uint32_t r600_translate_texformat(enum pipe_format format,
+uint32_t r600_translate_texformat(struct pipe_screen *screen, enum pipe_format format,
const unsigned char *swizzle_view,
uint32_t *word4_p, uint32_t *yuv_format_p);
unsigned r600_texture_get_offset(struct r600_resource_texture *rtex,
diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c
index 726668260cc..343403f92f3 100644
--- a/src/gallium/drivers/r600/r600_query.c
+++ b/src/gallium/drivers/r600/r600_query.c
@@ -21,6 +21,7 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "r600_pipe.h"
+#include "r600d.h"
static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type)
{
@@ -66,6 +67,30 @@ static boolean r600_get_query_result(struct pipe_context *ctx,
return r600_context_query_result(&rctx->ctx, (struct r600_query *)query, wait, vresult);
}
+static void r600_render_condition(struct pipe_context *ctx,
+ struct pipe_query *query,
+ uint mode)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_query *rquery = (struct r600_query *)query;
+ int wait_flag = 0;
+
+ if (!query) {
+ rctx->ctx.predicate_drawing = false;
+ r600_query_predication(&rctx->ctx, NULL, PREDICATION_OP_CLEAR, 1);
+ return;
+ }
+
+ if (mode == PIPE_RENDER_COND_WAIT ||
+ mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
+ wait_flag = 1;
+ }
+
+ rctx->ctx.predicate_drawing = true;
+ r600_query_predication(&rctx->ctx, rquery, PREDICATION_OP_ZPASS, wait_flag);
+
+}
+
void r600_init_query_functions(struct r600_pipe_context *rctx)
{
rctx->context.create_query = r600_create_query;
@@ -73,4 +98,7 @@ void r600_init_query_functions(struct r600_pipe_context *rctx)
rctx->context.begin_query = r600_begin_query;
rctx->context.end_query = r600_end_query;
rctx->context.get_query_result = r600_get_query_result;
+
+ if (r600_get_num_backends(rctx->screen->radeon) > 0)
+ rctx->context.render_condition = r600_render_condition;
}
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 65923fb9648..4146cb3c5fd 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -241,10 +241,10 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
int r;
- /* Would like some magic "get_bool_option_once" routine.
- */
- if (dump_shaders == -1)
- dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
+ /* Would like some magic "get_bool_option_once" routine.
+ */
+ if (dump_shaders == -1)
+ dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
if (dump_shaders) {
fprintf(stderr, "--------------------------------------------------------------\n");
@@ -453,24 +453,24 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
case TGSI_FILE_ADDRESS:
break;
- case TGSI_FILE_SYSTEM_VALUE:
- if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
- struct r600_bc_alu alu;
- memset(&alu, 0, sizeof(struct r600_bc_alu));
+ case TGSI_FILE_SYSTEM_VALUE:
+ if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
+ struct r600_bc_alu alu;
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
- alu.src[0].sel = 0;
- alu.src[0].chan = 3;
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
+ alu.src[0].sel = 0;
+ alu.src[0].chan = 3;
alu.dst.sel = 0;
alu.dst.chan = 3;
alu.dst.write = 1;
- alu.last = 1;
+ alu.last = 1;
- if ((r = r600_bc_add_alu(ctx->bc, &alu)))
- return r;
- break;
- }
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
+ break;
+ }
default:
R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
@@ -558,13 +558,13 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
r600_src->sel = V_SQ_ALU_SRC_LITERAL;
memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
} else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
- /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
- r600_src->swizzle[0] = 3;
- r600_src->swizzle[1] = 3;
- r600_src->swizzle[2] = 3;
- r600_src->swizzle[3] = 3;
- r600_src->sel = 0;
- } else {
+ /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
+ r600_src->swizzle[0] = 3;
+ r600_src->swizzle[1] = 3;
+ r600_src->swizzle[2] = 3;
+ r600_src->swizzle[3] = 3;
+ r600_src->sel = 0;
+ } else {
if (tgsi_src->Register.Indirect)
r600_src->rel = V_SQ_REL_RELATIVE;
r600_src->sel = tgsi_src->Register.Index;
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index c365979e439..2f7263cdcff 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -358,11 +358,16 @@ static void *r600_create_sampler_state(struct pipe_context *ctx,
{
struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
union util_color uc;
+ uint32_t coord_trunc = 0;
if (rstate == NULL) {
return NULL;
}
+ if ((state->mag_img_filter == PIPE_TEX_FILTER_NEAREST) ||
+ (state->min_img_filter == PIPE_TEX_FILTER_NEAREST))
+ coord_trunc = 1;
+
rstate->id = R600_PIPE_STATE_SAMPLER;
util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0,
@@ -379,7 +384,9 @@ static void *r600_create_sampler_state(struct pipe_context *ctx,
S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) |
S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) |
S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0,
+ S_03C008_MC_COORD_TRUNCATE(coord_trunc) |
+ S_03C008_TYPE(1), 0xFFFFFFFF, NULL);
if (uc.ui) {
r600_pipe_state_add_reg(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL);
@@ -420,7 +427,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
swizzle[1] = state->swizzle_g;
swizzle[2] = state->swizzle_b;
swizzle[3] = state->swizzle_a;
- format = r600_translate_texformat(state->format,
+ format = r600_translate_texformat(ctx->screen, state->format,
swizzle,
&word4, &yuv_format);
if (format == ~0) {
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 677e2209340..3c072fe7ca9 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -299,13 +299,13 @@ void r600_spi_update(struct r600_pipe_context *rctx)
tmp |= S_028644_PT_SPRITE_TEX(1);
}
- if (rctx->family < CHIP_CEDAR) {
- if (rshader->input[i].centroid)
- tmp |= S_028644_SEL_CENTROID(1);
+ if (rctx->family < CHIP_CEDAR) {
+ if (rshader->input[i].centroid)
+ tmp |= S_028644_SEL_CENTROID(1);
- if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
- tmp |= S_028644_SEL_LINEAR(1);
- }
+ if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
+ tmp |= S_028644_SEL_LINEAR(1);
+ }
r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
}
diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h
index 29e12f1d468..3d0360485aa 100644
--- a/src/gallium/drivers/r600/r600_state_inlines.h
+++ b/src/gallium/drivers/r600/r600_state_inlines.h
@@ -498,9 +498,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
}
}
-static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format)
+static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
{
- return r600_translate_texformat(format, NULL, NULL, NULL) != ~0;
+ return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0;
}
static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format)
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 3a85a25065a..df83302b38e 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -226,7 +226,7 @@ static void r600_texture_set_array_mode(struct pipe_screen *screen,
w = mip_minify(ptex->width0, level);
h = mip_minify(ptex->height0, level);
- if (w < tile_width || h < tile_height)
+ if (w <= tile_width || h <= tile_height)
rtex->array_mode[level] = V_0280A0_ARRAY_1D_TILED_THIN1;
else
rtex->array_mode[level] = array_mode;
@@ -413,8 +413,13 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
/* Would like some magic "get_bool_option_once" routine.
*/
- if (force_tiling == -1)
- force_tiling = debug_get_bool_option("R600_FORCE_TILING", FALSE);
+ if (force_tiling == -1) {
+ struct r600_screen *rscreen = (struct r600_screen *)screen;
+ /* reenable when 2D tiling is fixed better */
+ /*if (r600_get_minor_version(rscreen->radeon) >= 9)
+ force_tiling = debug_get_bool_option("R600_TILING", TRUE);*/
+ force_tiling = debug_get_bool_option("R600_TILING", FALSE);
+ }
if (force_tiling && permit_hardware_blit(screen, templ)) {
if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) &&
@@ -804,7 +809,8 @@ static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
}
/* texture format translate */
-uint32_t r600_translate_texformat(enum pipe_format format,
+uint32_t r600_translate_texformat(struct pipe_screen *screen,
+ enum pipe_format format,
const unsigned char *swizzle_view,
uint32_t *word4_p, uint32_t *yuv_format_p)
{
@@ -870,8 +876,13 @@ uint32_t r600_translate_texformat(enum pipe_format format,
break;
}
- if (r600_enable_s3tc == -1)
- r600_enable_s3tc = debug_get_bool_option("R600_ENABLE_S3TC", FALSE);
+ if (r600_enable_s3tc == -1) {
+ struct r600_screen *rscreen = (struct r600_screen *)screen;
+ if (r600_get_minor_version(rscreen->radeon) >= 9)
+ r600_enable_s3tc = 1;
+ else
+ r600_enable_s3tc = debug_get_bool_option("R600_ENABLE_S3TC", FALSE);
+ }
if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
if (!r600_enable_s3tc)
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index e8558c49a7c..df70e2889e2 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -67,6 +67,10 @@
#define PKT3_SET_CTL_CONST 0x6F
#define PKT3_SURFACE_BASE_UPDATE 0x73
+#define PREDICATION_OP_CLEAR 0x0
+#define PREDICATION_OP_ZPASS 0x1
+#define PREDICATION_OP_PRIMCOUNT 0x2
+
#define PKT_TYPE_S(x) (((x) & 0x3) << 30)
#define PKT_TYPE_G(x) (((x) >> 30) & 0x3)
#define PKT_TYPE_C 0x3FFFFFFF
diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c
index 3aefb5b3bb5..4a5d5413d8c 100644
--- a/src/gallium/drivers/rbug/rbug_context.c
+++ b/src/gallium/drivers/rbug/rbug_context.c
@@ -544,6 +544,7 @@ rbug_set_framebuffer_state(struct pipe_context *_pipe,
rb_pipe->curr.nr_cbufs = 0;
memset(rb_pipe->curr.cbufs, 0, sizeof(rb_pipe->curr.cbufs));
+ rb_pipe->curr.zsbuf = NULL;
/* unwrap the input state */
if (_state) {
@@ -556,6 +557,8 @@ rbug_set_framebuffer_state(struct pipe_context *_pipe,
rb_pipe->curr.cbufs[i] = rbug_resource(_state->cbufs[i]->texture);
}
unwrapped_state.zsbuf = rbug_surface_unwrap(_state->zsbuf);
+ if (_state->zsbuf)
+ rb_pipe->curr.zsbuf = rbug_resource(_state->zsbuf->texture);
state = &unwrapped_state;
}
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 685fbfc3087..6d47fb96280 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -248,11 +248,6 @@ softpipe_is_format_supported( struct pipe_screen *screen,
return util_format_s3tc_enabled;
}
- /* u_format doesn't implement RGTC yet */
- if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
- return FALSE;
- }
-
/*
* Everything else should be supported by u_format.
*/
diff --git a/src/gallium/drivers/softpipe/sp_state_shader.c b/src/gallium/drivers/softpipe/sp_state_shader.c
index 66ddc565722..3dec5de3cc4 100644
--- a/src/gallium/drivers/softpipe/sp_state_shader.c
+++ b/src/gallium/drivers/softpipe/sp_state_shader.c
@@ -91,8 +91,6 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
{
struct softpipe_context *softpipe = softpipe_context(pipe);
- draw_flush(softpipe->draw);
-
if (softpipe->fs == fs)
return;
diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
index 76a3803224a..b7d54605e66 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
@@ -40,7 +40,7 @@
#include "svga_debug.h"
-#define MAX_DMA_SIZE (8 * 1024 * 1024)
+#define MAX_DMA_SIZE (4 * 1024 * 1024)
/**
diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c
index daf1024fd02..6c3275e74c0 100644
--- a/src/gallium/drivers/svga/svga_state_constants.c
+++ b/src/gallium/drivers/svga/svga_state_constants.c
@@ -40,9 +40,12 @@
/* Convert from PIPE_SHADER_* to SVGA3D_SHADERTYPE_*
*/
-static int svga_shader_type( int unit )
+static int svga_shader_type( int shader )
{
- return unit + 1;
+ assert(PIPE_SHADER_VERTEX + 1 == SVGA3D_SHADERTYPE_VS);
+ assert(PIPE_SHADER_FRAGMENT + 1 == SVGA3D_SHADERTYPE_PS);
+ assert(shader <= PIPE_SHADER_FRAGMENT);
+ return shader + 1;
}
diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c
index 0181588096d..a5c94b4166b 100644
--- a/src/gallium/state_trackers/dri/drm/dri2.c
+++ b/src/gallium/state_trackers/dri/drm/dri2.c
@@ -317,6 +317,7 @@ dri2_allocate_buffer(__DRIscreen *sPriv,
templ.width0 = width;
templ.height0 = height;
templ.depth0 = 1;
+ templ.array_size = 1;
buffer->resource =
screen->base.screen->resource_create(screen->base.screen, &templ);
@@ -510,6 +511,7 @@ dri2_create_image(__DRIscreen *_screen,
templ.width0 = width;
templ.height0 = height;
templ.depth0 = 1;
+ templ.array_size = 1;
img->texture = screen->base.screen->resource_create(screen->base.screen, &templ);
if (!img->texture) {
diff --git a/src/gallium/state_trackers/egl/Makefile b/src/gallium/state_trackers/egl/Makefile
index 98167cceeb3..68643214060 100644
--- a/src/gallium/state_trackers/egl/Makefile
+++ b/src/gallium/state_trackers/egl/Makefile
@@ -26,6 +26,7 @@ x11_OBJECTS = $(x11_SOURCES:.c=.o)
wayland_INCLUDES = \
-I$(TOP)/src/gallium/winsys \
-I$(TOP)/src/egl/wayland \
+ -I$(TOP)/src/egl/wayland/wayland-drm \
$(shell pkg-config --cflags-only-I libdrm wayland-client)
wayland_SOURCES = $(wildcard wayland/*.c)
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c b/src/gallium/state_trackers/egl/common/egl_g3d.c
index 6107df48822..e60a56074dd 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d.c
@@ -455,9 +455,6 @@ egl_g3d_terminate(_EGLDriver *drv, _EGLDisplay *dpy)
_eglReleaseDisplayResources(drv, dpy);
- if (gdpy->pipe)
- gdpy->pipe->destroy(gdpy->pipe);
-
if (dpy->Configs) {
_eglDestroyArray(dpy->Configs, egl_g3d_free_config);
dpy->Configs = NULL;
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.h b/src/gallium/state_trackers/egl/common/egl_g3d.h
index 9873fee6ec2..301db3128ff 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d.h
+++ b/src/gallium/state_trackers/egl/common/egl_g3d.h
@@ -56,7 +56,6 @@ struct egl_g3d_display {
const struct egl_g3d_loader *loader;
struct st_manager *smapi;
- struct pipe_context *pipe;
};
struct egl_g3d_context {
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_api.c b/src/gallium/state_trackers/egl/common/egl_g3d_api.c
index c9f94a3c498..2068256dff2 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d_api.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d_api.c
@@ -640,6 +640,7 @@ egl_g3d_copy_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf,
_EGLContext *ctx = _eglGetCurrentContext();
struct native_surface *nsurf;
struct pipe_resource *ptex;
+ struct pipe_context *pipe;
if (!gsurf->render_texture)
return EGL_TRUE;
@@ -655,22 +656,18 @@ egl_g3d_copy_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf,
PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, NULL);
}
- /* create a pipe context to copy surfaces */
- if (!gdpy->pipe) {
- gdpy->pipe =
- gdpy->native->screen->context_create(gdpy->native->screen, NULL);
- if (!gdpy->pipe)
- return EGL_FALSE;
- }
+ pipe = ndpy_get_copy_context(gdpy->native);
+ if (!pipe)
+ return EGL_FALSE;
ptex = get_pipe_resource(gdpy->native, nsurf, NATIVE_ATTACHMENT_FRONT_LEFT);
if (ptex) {
struct pipe_box src_box;
u_box_origin_2d(ptex->width0, ptex->height0, &src_box);
- gdpy->pipe->resource_copy_region(gdpy->pipe, ptex, 0, 0, 0, 0,
+ pipe->resource_copy_region(pipe, ptex, 0, 0, 0, 0,
gsurf->render_texture, 0, &src_box);
- gdpy->pipe->flush(gdpy->pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
+ pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
nsurf->present(nsurf, NATIVE_ATTACHMENT_FRONT_LEFT, FALSE, 0);
pipe_resource_reference(&ptex, NULL);
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_image.c b/src/gallium/state_trackers/egl/common/egl_g3d_image.c
index 81ce7ab45c7..e1c83168b3a 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d_image.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d_image.c
@@ -111,6 +111,7 @@ egl_g3d_create_drm_buffer(_EGLDisplay *dpy, _EGLImage *img,
templ.width0 = attrs.Width;
templ.height0 = attrs.Height;
templ.depth0 = 1;
+ templ.array_size = 1;
/*
* XXX fix apps (e.g. wayland) and pipe drivers (e.g. i915) and remove the
diff --git a/src/gallium/state_trackers/egl/common/native.h b/src/gallium/state_trackers/egl/common/native.h
index 42889075a39..9246f8c32a4 100644
--- a/src/gallium/state_trackers/egl/common/native.h
+++ b/src/gallium/state_trackers/egl/common/native.h
@@ -142,6 +142,11 @@ struct native_display {
struct pipe_screen *screen;
/**
+ * Context used for copy operations.
+ */
+ struct pipe_context *pipe;
+
+ /**
* Available for caller's use.
*/
void *user_data;
@@ -223,6 +228,29 @@ native_attachment_mask_test(uint mask, enum native_attachment att)
return !!(mask & (1 << att));
}
+/**
+ * Get the display copy context
+ */
+static INLINE struct pipe_context *
+ndpy_get_copy_context(struct native_display *ndpy)
+{
+ if (!ndpy->pipe)
+ ndpy->pipe = ndpy->screen->context_create(ndpy->screen, NULL);
+ return ndpy->pipe;
+}
+
+/**
+ * Free display screen and context resources
+ */
+static INLINE void
+ndpy_uninit(struct native_display *ndpy)
+{
+ if (ndpy->pipe)
+ ndpy->pipe->destroy(ndpy->pipe);
+ if (ndpy->screen)
+ ndpy->screen->destroy(ndpy->screen);
+}
+
struct native_platform {
const char *name;
diff --git a/src/gallium/state_trackers/egl/common/native_helper.c b/src/gallium/state_trackers/egl/common/native_helper.c
index a9d8f32e007..be6713d03a8 100644
--- a/src/gallium/state_trackers/egl/common/native_helper.c
+++ b/src/gallium/state_trackers/egl/common/native_helper.c
@@ -3,6 +3,7 @@
* Version: 7.9
*
* Copyright (C) 2010 LunarG Inc.
+ * Copyright (C) 2011 VMware Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -24,6 +25,7 @@
*
* Authors:
* Chia-I Wu <[email protected]>
+ * Thomas Hellstrom <[email protected]>
*/
#include "util/u_inlines.h"
@@ -34,6 +36,14 @@
#include "native_helper.h"
+/**
+ * Number of swap fences and mask
+ */
+
+#define EGL_SWAP_FENCES_MAX 4
+#define EGL_SWAP_FENCES_MASK 3
+#define EGL_SWAP_FENCES_DEFAULT 1
+
struct resource_surface {
struct pipe_screen *screen;
enum pipe_format format;
@@ -42,6 +52,15 @@ struct resource_surface {
struct pipe_resource *resources[NUM_NATIVE_ATTACHMENTS];
uint resource_mask;
uint width, height;
+
+ /**
+ * Swap fences.
+ */
+ struct pipe_fence_handle *swap_fences[EGL_SWAP_FENCES_MAX];
+ unsigned int cur_fences;
+ unsigned int head;
+ unsigned int tail;
+ unsigned int desired_fences;
};
struct resource_surface *
@@ -49,11 +68,16 @@ resource_surface_create(struct pipe_screen *screen,
enum pipe_format format, uint bind)
{
struct resource_surface *rsurf = CALLOC_STRUCT(resource_surface);
+ char *swap_fences = getenv("EGL_THROTTLE_FENCES");
if (rsurf) {
rsurf->screen = screen;
rsurf->format = format;
rsurf->bind = bind;
+ rsurf->desired_fences = (swap_fences) ? atoi(swap_fences) :
+ EGL_SWAP_FENCES_DEFAULT;
+ if (rsurf->desired_fences > EGL_SWAP_FENCES_MAX)
+ rsurf->desired_fences = EGL_SWAP_FENCES_MAX;
}
return rsurf;
@@ -225,3 +249,121 @@ resource_surface_present(struct resource_surface *rsurf,
return TRUE;
}
+
+/**
+ * Schedule a copy swap from the back to the front buffer using the
+ * native display's copy context.
+ */
+boolean
+resource_surface_copy_swap(struct resource_surface *rsurf,
+ struct native_display *ndpy)
+{
+ struct pipe_resource *ftex;
+ struct pipe_resource *btex;
+ struct pipe_context *pipe;
+ struct pipe_box src_box;
+ boolean ret = FALSE;
+
+ pipe = ndpy_get_copy_context(ndpy);
+ if (!pipe)
+ return FALSE;
+
+ ftex = resource_surface_get_single_resource(rsurf,
+ NATIVE_ATTACHMENT_FRONT_LEFT);
+ if (!ftex)
+ goto out_no_ftex;
+ btex = resource_surface_get_single_resource(rsurf,
+ NATIVE_ATTACHMENT_BACK_LEFT);
+ if (!btex)
+ goto out_no_btex;
+
+ u_box_origin_2d(ftex->width0, ftex->height0, &src_box);
+ pipe->resource_copy_region(pipe, ftex, 0, 0, 0, 0,
+ btex, 0, &src_box);
+ ret = TRUE;
+
+ out_no_ftex:
+ pipe_resource_reference(&btex, NULL);
+ out_no_btex:
+ pipe_resource_reference(&ftex, NULL);
+
+ return ret;
+}
+
+static struct pipe_fence_handle *
+swap_fences_pop_front(struct resource_surface *rsurf)
+{
+ struct pipe_screen *screen = rsurf->screen;
+ struct pipe_fence_handle *fence = NULL;
+
+ if (rsurf->desired_fences == 0)
+ return NULL;
+
+ if (rsurf->cur_fences >= rsurf->desired_fences) {
+ screen->fence_reference(screen, &fence, rsurf->swap_fences[rsurf->tail]);
+ screen->fence_reference(screen, &rsurf->swap_fences[rsurf->tail++], NULL);
+ rsurf->tail &= EGL_SWAP_FENCES_MASK;
+ --rsurf->cur_fences;
+ }
+ return fence;
+}
+
+static void
+swap_fences_push_back(struct resource_surface *rsurf,
+ struct pipe_fence_handle *fence)
+{
+ struct pipe_screen *screen = rsurf->screen;
+
+ if (!fence || rsurf->desired_fences == 0)
+ return;
+
+ while(rsurf->cur_fences == rsurf->desired_fences)
+ swap_fences_pop_front(rsurf);
+
+ rsurf->cur_fences++;
+ screen->fence_reference(screen, &rsurf->swap_fences[rsurf->head++],
+ fence);
+ rsurf->head &= EGL_SWAP_FENCES_MASK;
+}
+
+boolean
+resource_surface_throttle(struct resource_surface *rsurf)
+{
+ struct pipe_screen *screen = rsurf->screen;
+ struct pipe_fence_handle *fence = swap_fences_pop_front(rsurf);
+
+ if (fence) {
+ (void) screen->fence_finish(screen, fence, 0);
+ screen->fence_reference(screen, &fence, NULL);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+boolean
+resource_surface_flush(struct resource_surface *rsurf,
+ struct native_display *ndpy)
+{
+ struct pipe_fence_handle *fence = NULL;
+ struct pipe_screen *screen = rsurf->screen;
+ struct pipe_context *pipe= ndpy_get_copy_context(ndpy);
+
+ if (!pipe)
+ return FALSE;
+
+ pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, &fence);
+ if (fence == NULL)
+ return FALSE;
+
+ swap_fences_push_back(rsurf, fence);
+ screen->fence_reference(screen, &fence, NULL);
+
+ return TRUE;
+}
+
+void
+resource_surface_wait(struct resource_surface *rsurf)
+{
+ while (resource_surface_throttle(rsurf));
+}
diff --git a/src/gallium/state_trackers/egl/common/native_helper.h b/src/gallium/state_trackers/egl/common/native_helper.h
index 03995de19ae..39564a04365 100644
--- a/src/gallium/state_trackers/egl/common/native_helper.h
+++ b/src/gallium/state_trackers/egl/common/native_helper.h
@@ -3,6 +3,7 @@
* Version: 7.9
*
* Copyright (C) 2010 LunarG Inc.
+ * Copyright (C) 2011 VMware Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -24,6 +25,7 @@
*
* Authors:
* Chia-I Wu <[email protected]>
+ * Thomas Hellstrom <[email protected]>
*/
#include "native.h"
@@ -74,3 +76,32 @@ boolean
resource_surface_present(struct resource_surface *rsurf,
enum native_attachment which,
void *winsys_drawable_handle);
+
+/**
+ * Perform a gallium copy blit between the back left and front left
+ * surfaces. Needs to be followed by a call to resource_surface_flush.
+ */
+boolean
+resource_surface_copy_swap(struct resource_surface *rsurf,
+ struct native_display *ndpy);
+
+/**
+ * Throttle on outstanding rendering using the copy context. For example
+ * copy swaps.
+ */
+boolean
+resource_surface_throttle(struct resource_surface *rsurf);
+
+/**
+ * Flush pending rendering using the copy context. This function saves a
+ * marker for upcoming throttles.
+ */
+boolean
+resource_surface_flush(struct resource_surface *rsurf,
+ struct native_display *ndpy);
+/**
+ * Wait for all rendering using the copy context to be complete. Frees all
+ * throttle markers saved using resource_surface_flush.
+ */
+void
+resource_surface_wait(struct resource_surface *rsurf);
diff --git a/src/gallium/state_trackers/egl/drm/modeset.c b/src/gallium/state_trackers/egl/drm/modeset.c
index 0cc06caa2a1..3fff9540905 100644
--- a/src/gallium/state_trackers/egl/drm/modeset.c
+++ b/src/gallium/state_trackers/egl/drm/modeset.c
@@ -3,6 +3,7 @@
* Version: 7.9
*
* Copyright (C) 2010 LunarG Inc.
+ * Copyright (C) 2011 VMware Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -24,6 +25,7 @@
*
* Authors:
* Chia-I Wu <[email protected]>
+ * Thomas Hellstrom <[email protected]>
*/
#include "util/u_memory.h"
@@ -131,6 +133,25 @@ drm_surface_flush_frontbuffer(struct native_surface *nsurf)
}
static boolean
+drm_surface_copy_swap(struct native_surface *nsurf)
+{
+ struct drm_surface *drmsurf = drm_surface(nsurf);
+ struct drm_display *drmdpy = drmsurf->drmdpy;
+
+ (void) resource_surface_throttle(drmsurf->rsurf);
+ if (!resource_surface_copy_swap(drmsurf->rsurf, &drmdpy->base))
+ return FALSE;
+
+ (void) resource_surface_flush(drmsurf->rsurf, &drmdpy->base);
+ if (!drm_surface_flush_frontbuffer(nsurf))
+ return FALSE;
+
+ drmsurf->sequence_number++;
+
+ return TRUE;
+}
+
+static boolean
drm_surface_swap_buffers(struct native_surface *nsurf)
{
struct drm_surface *drmsurf = drm_surface(nsurf);
@@ -139,17 +160,21 @@ drm_surface_swap_buffers(struct native_surface *nsurf)
struct drm_framebuffer tmp_fb;
int err;
+ if (!drmsurf->have_pageflip)
+ return drm_surface_copy_swap(nsurf);
+
if (!drmsurf->back_fb.buffer_id) {
if (!drm_surface_init_framebuffers(&drmsurf->base, TRUE))
return FALSE;
}
if (drmsurf->is_shown && drmcrtc->crtc) {
- err = drmModeSetCrtc(drmdpy->fd, drmcrtc->crtc->crtc_id,
- drmsurf->back_fb.buffer_id, drmcrtc->crtc->x, drmcrtc->crtc->y,
- drmcrtc->connectors, drmcrtc->num_connectors, &drmcrtc->crtc->mode);
- if (err)
- return FALSE;
+ err = drmModePageFlip(drmdpy->fd, drmcrtc->crtc->crtc_id,
+ drmsurf->back_fb.buffer_id, 0, NULL);
+ if (err) {
+ drmsurf->have_pageflip = FALSE;
+ return drm_surface_copy_swap(nsurf);
+ }
}
/* swap the buffers */
@@ -175,7 +200,7 @@ drm_surface_present(struct native_surface *nsurf,
{
boolean ret;
- if (preserve || swap_interval)
+ if (swap_interval)
return FALSE;
switch (natt) {
@@ -183,7 +208,10 @@ drm_surface_present(struct native_surface *nsurf,
ret = drm_surface_flush_frontbuffer(nsurf);
break;
case NATIVE_ATTACHMENT_BACK_LEFT:
- ret = drm_surface_swap_buffers(nsurf);
+ if (preserve)
+ ret = drm_surface_copy_swap(nsurf);
+ else
+ ret = drm_surface_swap_buffers(nsurf);
break;
default:
ret = FALSE;
@@ -196,7 +224,9 @@ drm_surface_present(struct native_surface *nsurf,
static void
drm_surface_wait(struct native_surface *nsurf)
{
- /* no-op */
+ struct drm_surface *drmsurf = drm_surface(nsurf);
+
+ resource_surface_wait(drmsurf->rsurf);
}
static void
@@ -204,6 +234,7 @@ drm_surface_destroy(struct native_surface *nsurf)
{
struct drm_surface *drmsurf = drm_surface(nsurf);
+ resource_surface_wait(drmsurf->rsurf);
if (drmsurf->current_crtc.crtc)
drmModeFreeCrtc(drmsurf->current_crtc.crtc);
@@ -236,6 +267,7 @@ drm_display_create_surface(struct native_display *ndpy,
drmsurf->color_format = drmconf->base.color_format;
drmsurf->width = width;
drmsurf->height = height;
+ drmsurf->have_pageflip = TRUE;
drmsurf->rsurf = resource_surface_create(drmdpy->base.screen,
drmsurf->color_format,
diff --git a/src/gallium/state_trackers/egl/drm/native_drm.c b/src/gallium/state_trackers/egl/drm/native_drm.c
index 6932f30a6a4..cdbb304c5ee 100644
--- a/src/gallium/state_trackers/egl/drm/native_drm.c
+++ b/src/gallium/state_trackers/egl/drm/native_drm.c
@@ -124,8 +124,7 @@ drm_display_destroy(struct native_display *ndpy)
drm_display_fini_modeset(&drmdpy->base);
- if (drmdpy->base.screen)
- drmdpy->base.screen->destroy(drmdpy->base.screen);
+ ndpy_uninit(ndpy);
if (drmdpy->fd >= 0)
close(drmdpy->fd);
diff --git a/src/gallium/state_trackers/egl/drm/native_drm.h b/src/gallium/state_trackers/egl/drm/native_drm.h
index 03c4fe01dc1..7da9b45f23e 100644
--- a/src/gallium/state_trackers/egl/drm/native_drm.h
+++ b/src/gallium/state_trackers/egl/drm/native_drm.h
@@ -91,6 +91,8 @@ struct drm_surface {
boolean is_shown;
struct drm_crtc current_crtc;
+
+ boolean have_pageflip;
};
struct drm_connector {
diff --git a/src/gallium/state_trackers/egl/fbdev/native_fbdev.c b/src/gallium/state_trackers/egl/fbdev/native_fbdev.c
index a1e91ba701c..4a96039ac39 100644
--- a/src/gallium/state_trackers/egl/fbdev/native_fbdev.c
+++ b/src/gallium/state_trackers/egl/fbdev/native_fbdev.c
@@ -320,7 +320,7 @@ fbdev_display_destroy(struct native_display *ndpy)
{
struct fbdev_display *fbdpy = fbdev_display(ndpy);
- fbdpy->base.screen->destroy(fbdpy->base.screen);
+ ndpy_uninit(&fbdpy->base);
close(fbdpy->fd);
FREE(fbdpy);
}
diff --git a/src/gallium/state_trackers/egl/gdi/native_gdi.c b/src/gallium/state_trackers/egl/gdi/native_gdi.c
index 3cc4aefa937..3c2475f84d8 100644
--- a/src/gallium/state_trackers/egl/gdi/native_gdi.c
+++ b/src/gallium/state_trackers/egl/gdi/native_gdi.c
@@ -363,7 +363,7 @@ gdi_display_destroy(struct native_display *ndpy)
if (gdpy->configs)
FREE(gdpy->configs);
- gdpy->base.screen->destroy(gdpy->base.screen);
+ ndpy_uninit(ndpy);
FREE(gdpy);
}
diff --git a/src/gallium/state_trackers/egl/wayland/native_wayland.c b/src/gallium/state_trackers/egl/wayland/native_wayland.c
index d4d5f9c2ebb..068c3cd7c8e 100644
--- a/src/gallium/state_trackers/egl/wayland/native_wayland.c
+++ b/src/gallium/state_trackers/egl/wayland/native_wayland.c
@@ -41,6 +41,7 @@
#include "radeon/drm/radeon_drm_public.h"
#include <wayland-client.h>
+#include "wayland-drm-client-protocol.h"
#include "wayland-egl-priv.h"
#include <xf86drm.h>
@@ -137,8 +138,7 @@ wayland_display_destroy(struct native_display *ndpy)
if (display->config)
FREE(display->config);
- if (display->base.screen)
- display->base.screen->destroy(display->base.screen);
+ ndpy_uninit(ndpy);
FREE(display);
}
diff --git a/src/gallium/state_trackers/egl/x11/native_ximage.c b/src/gallium/state_trackers/egl/x11/native_ximage.c
index d4f4dd04df4..8e32c6ff0c4 100644
--- a/src/gallium/state_trackers/egl/x11/native_ximage.c
+++ b/src/gallium/state_trackers/egl/x11/native_ximage.c
@@ -476,7 +476,7 @@ ximage_display_destroy(struct native_display *ndpy)
if (xdpy->configs)
FREE(xdpy->configs);
- xdpy->base.screen->destroy(xdpy->base.screen);
+ ndpy_uninit(ndpy);
x11_screen_destroy(xdpy->xscr);
if (xdpy->own_dpy)
diff --git a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c
index 7cc5af89639..03aa1b1537a 100644
--- a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c
+++ b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c
@@ -63,7 +63,6 @@ i915_drm_batchbuffer_create(struct i915_winsys *iws)
batch->base.size = 0;
batch->base.relocs = 0;
- batch->base.max_relocs = 300;/*INTEL_DEFAULT_RELOCS;*/
batch->base.iws = iws;
@@ -104,8 +103,6 @@ i915_drm_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch,
unsigned offset;
int ret = 0;
- assert(batch->base.relocs < batch->base.max_relocs);
-
switch (usage) {
case I915_USAGE_SAMPLER:
write_domain = 0;
diff --git a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c
index 3d0c1fa6224..3bf54011d9e 100644
--- a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c
+++ b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c
@@ -49,7 +49,6 @@ i915_sw_batchbuffer_create(struct i915_winsys *iws)
batch->base.size = 0;
batch->base.relocs = 0;
- batch->base.max_relocs = 300;/*INTEL_DEFAULT_RELOCS;*/
batch->base.iws = iws;
@@ -75,8 +74,6 @@ i915_sw_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch,
struct i915_sw_batchbuffer *batch = i915_sw_batchbuffer(ibatch);
int ret = 0;
- assert(batch->base.relocs < batch->base.max_relocs);
-
if (usage == I915_USAGE_SAMPLER) {
} else if (usage == I915_USAGE_RENDER) {
diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
index aa4035a302b..66398afa698 100644
--- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -834,25 +834,25 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr
}
/* draw packet */
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_index_type;
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_instances;
if (draw->indices) {
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = draw->indices_bo_offset + r600_bo_offset(draw->indices);
ctx->pm4[ctx->pm4_cdwords++] = 0;
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices;
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator;
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = 0;
r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], draw->indices);
} else {
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices;
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator;
}
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
/* flush color buffer */
diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c
index f5cd48d39c6..cd0aa318be5 100644
--- a/src/gallium/winsys/r600/drm/r600_drm.c
+++ b/src/gallium/winsys/r600/drm/r600_drm.c
@@ -45,6 +45,10 @@
#define RADEON_INFO_CLOCK_CRYSTAL_FREQ 0x9
#endif
+#ifndef RADEON_INFO_NUM_BACKENDS
+#define RADEON_INFO_NUM_BACKENDS 0xa
+#endif
+
enum radeon_family r600_get_family(struct radeon *r600)
{
return r600->family;
@@ -65,6 +69,17 @@ unsigned r600_get_clock_crystal_freq(struct radeon *radeon)
return radeon->clock_crystal_freq;
}
+unsigned r600_get_num_backends(struct radeon *radeon)
+{
+ return radeon->num_backends;
+}
+
+unsigned r600_get_minor_version(struct radeon *radeon)
+{
+ return radeon->minor_version;
+}
+
+
static int radeon_get_device(struct radeon *radeon)
{
struct drm_radeon_info info = {};
@@ -195,6 +210,26 @@ static int radeon_get_clock_crystal_freq(struct radeon *radeon)
return 0;
}
+
+static int radeon_get_num_backends(struct radeon *radeon)
+{
+ struct drm_radeon_info info;
+ uint32_t num_backends;
+ int r;
+
+ radeon->device = 0;
+ info.request = RADEON_INFO_NUM_BACKENDS;
+ info.value = (uintptr_t)&num_backends;
+ r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info,
+ sizeof(struct drm_radeon_info));
+ if (r)
+ return r;
+
+ radeon->num_backends = num_backends;
+ return 0;
+}
+
+
static int radeon_init_fence(struct radeon *radeon)
{
radeon->fence = 1;
@@ -211,6 +246,7 @@ static struct radeon *radeon_new(int fd, unsigned device)
{
struct radeon *radeon;
int r;
+ drmVersionPtr version;
radeon = calloc(1, sizeof(*radeon));
if (radeon == NULL) {
@@ -219,13 +255,27 @@ static struct radeon *radeon_new(int fd, unsigned device)
radeon->fd = fd;
radeon->device = device;
radeon->refcount = 1;
- if (fd >= 0) {
- r = radeon_get_device(radeon);
- if (r) {
- fprintf(stderr, "Failed to get device id\n");
- return radeon_decref(radeon);
- }
+
+ version = drmGetVersion(radeon->fd);
+ if (version->version_major != 2) {
+ fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is "
+ "only compatible with 2.x.x\n", __FUNCTION__,
+ version->version_major, version->version_minor,
+ version->version_patchlevel);
+ drmFreeVersion(version);
+ exit(1);
}
+
+ radeon->minor_version = version->version_minor;
+
+ drmFreeVersion(version);
+
+ r = radeon_get_device(radeon);
+ if (r) {
+ fprintf(stderr, "Failed to get device id\n");
+ return radeon_decref(radeon);
+ }
+
radeon->family = radeon_family_from_device(radeon->device);
if (radeon->family == CHIP_UNKNOWN) {
fprintf(stderr, "Unknown chipset 0x%04X\n", radeon->device);
@@ -278,6 +328,9 @@ static struct radeon *radeon_new(int fd, unsigned device)
/* get the GPU counter frequency, failure is non fatal */
radeon_get_clock_crystal_freq(radeon);
+ if (radeon->minor_version >= 9)
+ radeon_get_num_backends(radeon);
+
radeon->bomgr = r600_bomgr_create(radeon, 1000000);
if (radeon->bomgr == NULL) {
return NULL;
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index f170640407d..6b3baa2c909 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -107,7 +107,7 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg,
/* initialize block */
block->start_offset = reg[i].offset;
- block->pm4[block->pm4_ndwords++] = PKT3(reg[i].opcode, n);
+ block->pm4[block->pm4_ndwords++] = PKT3(reg[i].opcode, n, 0);
block->pm4[block->pm4_ndwords++] = (block->start_offset - reg[i].offset_base) >> 2;
block->reg = &block->pm4[block->pm4_ndwords];
block->pm4_ndwords += n;
@@ -119,7 +119,7 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg,
block->nbo++;
assert(block->nbo < R600_BLOCK_MAX_BO);
block->pm4_bo_index[j] = block->nbo;
- block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0);
+ block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0, 0);
block->pm4[block->pm4_ndwords++] = 0x00000000;
block->reloc[block->nbo].flush_flags = reg[i+j].flush_flags;
block->reloc[block->nbo].flush_mask = reg[i+j].flush_mask;
@@ -771,12 +771,12 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
bo->last_flush &= flush_mask;
return;
}
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = flush_flags;
ctx->pm4[ctx->pm4_cdwords++] = (bo->size + 255) >> 8;
ctx->pm4[ctx->pm4_cdwords++] = 0x00000000;
ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A;
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = bo->reloc_id;
bo->last_flush = (bo->last_flush | flush_flags) & flush_mask;
}
@@ -1048,25 +1048,25 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
}
/* draw packet */
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_index_type;
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_instances;
if (draw->indices) {
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = draw->indices_bo_offset + r600_bo_offset(draw->indices);
ctx->pm4[ctx->pm4_cdwords++] = 0;
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices;
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator;
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = 0;
r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], draw->indices);
} else {
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices;
ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator;
}
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, ctx->predicate_drawing);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
/* flush color buffer */
@@ -1101,15 +1101,15 @@ void r600_context_flush(struct r600_context *ctx)
r600_context_queries_suspend(ctx);
/* emit fence */
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
ctx->pm4[ctx->pm4_cdwords++] = 0;
ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24);
ctx->pm4[ctx->pm4_cdwords++] = ctx->radeon->fence;
ctx->pm4[ctx->pm4_cdwords++] = 0;
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
ctx->pm4[ctx->pm4_cdwords++] = 0;
r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], ctx->radeon->fence_bo);
@@ -1282,6 +1282,7 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu
void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
{
unsigned required_space;
+ int num_backends = r600_get_num_backends(ctx->radeon);
/* query request needs 6/8 dwords for begin + 6/8 dwords for end */
if (query->type == PIPE_QUERY_TIME_ELAPSED)
@@ -1300,21 +1301,39 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
r600_query_result(ctx, query, TRUE);
}
+ if (query->type == PIPE_QUERY_OCCLUSION_COUNTER &&
+ num_backends > 0 && num_backends < ctx->max_db) {
+ /* as per info on ZPASS the driver must set the unusued DB top bits */
+ u32 *results;
+ int i;
+
+ results = r600_bo_map(ctx->radeon, query->buffer, PB_USAGE_DONTBLOCK | PB_USAGE_CPU_WRITE, NULL);
+ if (results) {
+ memset(results + (query->num_results * 4), 0, ctx->max_db * 4 * 4);
+
+ for (i = num_backends; i < ctx->max_db; i++) {
+ results[(i * 4)+1] = 0x80000000;
+ results[(i * 4)+3] = 0x80000000;
+ }
+ r600_bo_unmap(ctx->radeon, query->buffer);
+ }
+ }
+
/* emit begin query */
if (query->type == PIPE_QUERY_TIME_ELAPSED) {
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + r600_bo_offset(query->buffer);
ctx->pm4[ctx->pm4_cdwords++] = (3 << 29);
ctx->pm4[ctx->pm4_cdwords++] = 0;
ctx->pm4[ctx->pm4_cdwords++] = 0;
} else {
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + r600_bo_offset(query->buffer);
ctx->pm4[ctx->pm4_cdwords++] = 0;
}
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
ctx->pm4[ctx->pm4_cdwords++] = 0;
r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer);
@@ -1327,19 +1346,19 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query)
{
/* emit begin query */
if (query->type == PIPE_QUERY_TIME_ELAPSED) {
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + 8 + r600_bo_offset(query->buffer);
ctx->pm4[ctx->pm4_cdwords++] = (3 << 29);
ctx->pm4[ctx->pm4_cdwords++] = 0;
ctx->pm4[ctx->pm4_cdwords++] = 0;
} else {
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + 8 + r600_bo_offset(query->buffer);
ctx->pm4[ctx->pm4_cdwords++] = 0;
}
- ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
ctx->pm4[ctx->pm4_cdwords++] = 0;
r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer);
@@ -1349,6 +1368,28 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query)
ctx->num_query_running--;
}
+void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation,
+ int flag_wait)
+{
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
+
+ if (operation == PREDICATION_OP_CLEAR) {
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ ctx->pm4[ctx->pm4_cdwords++] = PRED_OP(PREDICATION_OP_CLEAR);
+ } else {
+ int results_base = query->num_results - (4 * ctx->max_db);
+
+ if (results_base < 0)
+ results_base = 0;
+
+ ctx->pm4[ctx->pm4_cdwords++] = results_base*4 + r600_bo_offset(query->buffer);
+ ctx->pm4[ctx->pm4_cdwords++] = PRED_OP(operation) | (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW) | PREDICATION_DRAW_VISIBLE;
+ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
+ ctx->pm4[ctx->pm4_cdwords++] = 0;
+ r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer);
+ }
+}
+
struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type)
{
struct r600_query *query;
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 2d91cd97d68..41c5ee02c38 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -50,6 +50,8 @@ struct radeon {
unsigned *cfence;
struct r600_bo *fence_bo;
unsigned clock_crystal_freq;
+ unsigned num_backends;
+ unsigned minor_version;
};
struct r600_reg {
diff --git a/src/gallium/winsys/r600/drm/r600d.h b/src/gallium/winsys/r600/drm/r600d.h
index 1c1ac76fe69..cb12865ff01 100644
--- a/src/gallium/winsys/r600/drm/r600d.h
+++ b/src/gallium/winsys/r600/drm/r600d.h
@@ -105,6 +105,18 @@
* 5 - TS events
*/
+#define PREDICATION_OP_CLEAR 0x0
+#define PREDICATION_OP_ZPASS 0x1
+#define PREDICATION_OP_PRIMCOUNT 0x2
+
+#define PRED_OP(x) ((x) << 16)
+
+#define PREDICATION_HINT_WAIT (0 << 12)
+#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12)
+
+#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8)
+#define PREDICATION_DRAW_VISIBLE (1 << 8)
+
#define PKT_TYPE_S(x) (((x) & 0x3) << 30)
#define PKT_TYPE_G(x) (((x) >> 30) & 0x3)
#define PKT_TYPE_C 0x3FFFFFFF
@@ -117,8 +129,9 @@
#define PKT3_IT_OPCODE_S(x) (((x) & 0xFF) << 8)
#define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF)
#define PKT3_IT_OPCODE_C 0xFFFF00FF
+#define PKT3_PRED_S(x) (((x) >> 0) & 0x1)
#define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
-#define PKT3(op, count) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count))
+#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PRED_S(predicate))
/* Registers */
#define R_0280A0_CB_COLOR0_INFO 0x0280A0
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_common.c b/src/gallium/winsys/radeon/drm/radeon_drm_common.c
index 4676c2a1ea9..72c2ff11125 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_common.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_common.c
@@ -96,10 +96,13 @@ static void do_ioctls(struct radeon_drm_winsys *winsys)
* we don't actually use the info for anything yet. */
version = drmGetVersion(winsys->fd);
- if (version->version_major != 2) {
+ if (version->version_major != 2 ||
+ version->version_minor < 3) {
fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is "
- "only compatible with 2.x.x\n", __FUNCTION__,
- version->version_major, version->version_minor,
+ "only compatible with 2.3.x (kernel 2.6.34) and later.\n",
+ __FUNCTION__,
+ version->version_major,
+ version->version_minor,
version->version_patchlevel);
drmFreeVersion(version);
exit(1);
@@ -157,6 +160,8 @@ static void do_ioctls(struct radeon_drm_winsys *winsys)
winsys->vram_size = gem_info.vram_size;
drmFreeVersion(version);
+
+ winsys->num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
}
static void radeon_winsys_destroy(struct r300_winsys_screen *rws)
@@ -190,10 +195,6 @@ static uint32_t radeon_get_value(struct r300_winsys_screen *rws,
return ws->drm_minor;
case R300_VID_DRM_PATCHLEVEL:
return ws->drm_patchlevel;
- case R300_VID_DRM_2_1_0:
- return ws->drm_major*100 + ws->drm_minor >= 201;
- case R300_VID_DRM_2_3_0:
- return ws->drm_major*100 + ws->drm_minor >= 203;
case R300_VID_DRM_2_6_0:
return ws->drm_major*100 + ws->drm_minor >= 206;
case R300_VID_DRM_2_8_0:
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index a38b01048b2..9ee800f5950 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -384,7 +384,7 @@ void radeon_drm_cs_flush(struct r300_winsys_cs *rcs)
for (i = 0; i < crelocs; i++)
p_atomic_inc(&cs->csc->relocs_bo[i]->num_active_ioctls);
- if (debug_get_option_thread()) {
+ if (cs->ws->num_cpus > 1 && debug_get_option_thread()) {
cs->thread = pipe_thread_create(radeon_drm_cs_emit_ioctl, cs->csc);
assert(cs->thread);
} else {
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index f8a89abcfe4..9ecbb074572 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -46,6 +46,7 @@ struct radeon_drm_winsys {
uint32_t z_pipes; /* Z pipe count (rv530 only) */
uint32_t gart_size; /* GART size. */
uint32_t vram_size; /* VRAM size. */
+ uint32_t num_cpus; /* Number of CPUs. */
unsigned drm_major;
unsigned drm_minor;