diff options
Diffstat (limited to 'src/gallium')
83 files changed, 1914 insertions, 969 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 7d7d700eacd..e48063166ff 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -111,6 +111,7 @@ C_SOURCES = \ util/u_format.c \ util/u_format_other.c \ util/u_format_s3tc.c \ + util/u_format_rgtc.c \ util/u_format_srgb.c \ util/u_format_table.c \ util/u_format_tests.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 0ec63071615..ef0b15f06ce 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -158,6 +158,7 @@ source = [ 'util/u_format.c', 'util/u_format_other.c', 'util/u_format_s3tc.c', + 'util/u_format_rgtc.c', 'util/u_format_srgb.c', 'util/u_format_table.c', 'util/u_format_tests.c', diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index 5ea552f51c1..60f6380c503 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -874,6 +874,8 @@ draw_install_aapoint_stage(struct draw_context *draw, { struct aapoint_stage *aapoint; + pipe->draw = (void *) draw; + /* * Create / install AA point drawing / prim stage */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index cbb090b2803..9cf74a838fe 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -672,6 +672,31 @@ tgsi_exec_machine_bind_shader( mach->Processor = parse.FullHeader.Processor.Processor; mach->ImmLimit = 0; + if (mach->Processor == TGSI_PROCESSOR_GEOMETRY && + !mach->UsedGeometryShader) { + struct tgsi_exec_vector *inputs = + align_malloc(sizeof(struct tgsi_exec_vector) * + TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS, + 16); + struct tgsi_exec_vector *outputs = + align_malloc(sizeof(struct tgsi_exec_vector) * + TGSI_MAX_TOTAL_VERTICES, 16); + + if (!inputs) + return; + if (!outputs) { + align_free(inputs); + return; + } + + align_free(mach->Inputs); + align_free(mach->Outputs); + + mach->Inputs = inputs; + mach->Outputs = outputs; + mach->UsedGeometryShader = TRUE; + } + declarations = (struct tgsi_full_declaration *) MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); @@ -801,6 +826,11 @@ tgsi_exec_machine_create( void ) mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; + mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16); + mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16); + if (!mach->Inputs || !mach->Outputs) + goto fail; + /* Setup constants needed by the SSE2 executor. */ for( i = 0; i < 4; i++ ) { mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000; @@ -824,7 +854,11 @@ tgsi_exec_machine_create( void ) return mach; fail: - align_free(mach); + if (mach) { + align_free(mach->Inputs); + align_free(mach->Outputs); + align_free(mach); + } return NULL; } @@ -836,10 +870,13 @@ tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) if (mach->Instructions) FREE(mach->Instructions); if (mach->Declarations) - FREE(mach->Declarations); - } + FREE(mach->Declarations); + + align_free(mach->Inputs); + align_free(mach->Outputs); - align_free(mach); + align_free(mach); + } } static void @@ -1047,7 +1084,7 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, }*/ int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]; assert(pos >= 0); - assert(pos < Elements(mach->Inputs)); + assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS); chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i]; } break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 4a423b5bb4e..33f33aa82c7 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -228,8 +228,8 @@ struct tgsi_exec_machine float ImmArray[TGSI_EXEC_NUM_IMMEDIATES][4]; - struct tgsi_exec_vector Inputs[TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS]; - struct tgsi_exec_vector Outputs[TGSI_MAX_TOTAL_VERTICES]; + struct tgsi_exec_vector *Inputs; + struct tgsi_exec_vector *Outputs; /* System values */ unsigned SysSemanticToIndex[TGSI_SEMANTIC_COUNT]; @@ -309,6 +309,8 @@ struct tgsi_exec_machine uint NumDeclarations; struct tgsi_declaration_resource Resources[PIPE_MAX_SHADER_RESOURCES]; + + boolean UsedGeometryShader; }; struct tgsi_exec_machine * diff --git a/src/gallium/auxiliary/util/u_format_pack.py b/src/gallium/auxiliary/util/u_format_pack.py index 6d0016c0ad8..cc173f808ae 100644 --- a/src/gallium/auxiliary/util/u_format_pack.py +++ b/src/gallium/auxiliary/util/u_format_pack.py @@ -632,7 +632,7 @@ def generate_format_fetch(format, dst_channel, dst_native_type, dst_suffix): def is_format_hand_written(format): - return format.layout in ('s3tc', 'subsampled', 'other') or format.colorspace == ZS + return format.layout in ('s3tc', 'rgtc', 'subsampled', 'other') or format.colorspace == ZS def generate(formats): diff --git a/src/gallium/auxiliary/util/u_format_rgtc.c b/src/gallium/auxiliary/util/u_format_rgtc.c new file mode 100644 index 00000000000..6ffcd7e99ef --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_rgtc.c @@ -0,0 +1,452 @@ +/************************************************************************** + * + * Copyright (C) 2011 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdio.h> +#include "u_math.h" +#include "u_format.h" +#include "u_format_rgtc.h" + +static void u_format_unsigned_encode_rgtc_chan(uint8_t *blkaddr, uint8_t srccolors[4][4], + int numxpixels, int numypixels); + +static void u_format_unsigned_fetch_texel_rgtc(unsigned srcRowStride, const uint8_t *pixdata, + unsigned i, unsigned j, uint8_t *value, unsigned comps); + +static void u_format_signed_encode_rgtc_chan(int8_t *blkaddr, int8_t srccolors[4][4], + int numxpixels, int numypixels); + +static void u_format_signed_fetch_texel_rgtc(unsigned srcRowStride, const int8_t *pixdata, + unsigned i, unsigned j, int8_t *value, unsigned comps); + +void +util_format_rgtc1_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + u_format_unsigned_fetch_texel_rgtc(0, src, i, j, dst, 1); +} + +void +util_format_rgtc1_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + const unsigned bw = 4, bh = 4, comps = 4; + unsigned x, y, i, j; + unsigned block_size = 8; + + for(y = 0; y < height; y += bh) { + const uint8_t *src = src_row; + for(x = 0; x < width; x += bw) { + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*comps; + u_format_unsigned_fetch_texel_rgtc(0, src, i, j, dst, 1); + } + } + src += block_size; + } + src_row += src_stride; + } +} + +void +util_format_rgtc1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, + unsigned src_stride, unsigned width, unsigned height) +{ + const unsigned bw = 4, bh = 4, bytes_per_block = 8; + unsigned x, y, i, j; + + for(y = 0; y < height; y += bh) { + uint8_t *dst = dst_row; + for(x = 0; x < width; x += bw) { + uint8_t tmp[4][4]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + tmp[j][i] = src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4]; + } + } + u_format_unsigned_encode_rgtc_chan(dst, tmp, 4, 4); + dst += bytes_per_block; + } + dst_row += dst_stride / sizeof(*dst_row); + } +} + +void +util_format_rgtc1_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j; + int block_size = 8; + for(y = 0; y < height; y += 4) { + const uint8_t *src = src_row; + for(x = 0; x < width; x += 4) { + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; + uint8_t tmp_r; + u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 1); + dst[0] = ubyte_to_float(tmp_r); + dst[1] = 0.0; + dst[2] = 0.0; + dst[3] = 1.0; + } + } + src += block_size; + } + src_row += src_stride; + } +} + +void +util_format_rgtc1_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + const unsigned bw = 4, bh = 4, bytes_per_block = 8; + unsigned x, y, i, j; + + for(y = 0; y < height; y += bh) { + uint8_t *dst = dst_row; + for(x = 0; x < width; x += bw) { + uint8_t tmp[4][4]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + tmp[j][i] = float_to_ubyte(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4]); + } + } + u_format_unsigned_encode_rgtc_chan(dst, tmp, 4, 4); + dst += bytes_per_block; + } + dst_row += dst_stride / sizeof(*dst_row); + } +} + +void +util_format_rgtc1_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + uint8_t tmp_r; + u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 1); + dst[0] = ubyte_to_float(tmp_r); + dst[1] = 0.0; + dst[2] = 0.0; + dst[3] = 1.0; +} + +void +util_format_rgtc1_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_rgtc1_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_rgtc1_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_rgtc1_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + const unsigned bw = 4, bh = 4, bytes_per_block = 8; + unsigned x, y, i, j; + + for(y = 0; y < height; y += bh) { + int8_t *dst = (int8_t *)dst_row; + for(x = 0; x < width; x += bw) { + int8_t tmp[4][4]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + tmp[j][i] = float_to_byte_tex(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4]); + } + } + u_format_signed_encode_rgtc_chan(dst, tmp, 4, 4); + dst += bytes_per_block; + } + dst_row += dst_stride / sizeof(*dst_row); + } +} + +void +util_format_rgtc1_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j; + int block_size = 8; + for(y = 0; y < height; y += 4) { + const int8_t *src = (int8_t *)src_row; + for(x = 0; x < width; x += 4) { + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; + int8_t tmp_r; + u_format_signed_fetch_texel_rgtc(0, src, i, j, &tmp_r, 1); + dst[0] = byte_to_float_tex(tmp_r); + dst[1] = 0.0; + dst[2] = 0.0; + dst[3] = 1.0; + } + } + src += block_size; + } + src_row += src_stride; + } +} + +void +util_format_rgtc1_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + int8_t tmp_r; + u_format_signed_fetch_texel_rgtc(0, (int8_t *)src, i, j, &tmp_r, 1); + dst[0] = byte_to_float_tex(tmp_r); + dst[1] = 0.0; + dst[2] = 0.0; + dst[3] = 1.0; +} + + +void +util_format_rgtc2_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + u_format_unsigned_fetch_texel_rgtc(0, src, i, j, dst, 2); + u_format_unsigned_fetch_texel_rgtc(0, src + 8, i, j, dst + 1, 2); +} + +void +util_format_rgtc2_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + const unsigned bw = 4, bh = 4, comps = 4; + unsigned x, y, i, j; + unsigned block_size = 16; + + for(y = 0; y < height; y += bh) { + const uint8_t *src = src_row; + for(x = 0; x < width; x += bw) { + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*comps; + u_format_unsigned_fetch_texel_rgtc(0, src, i, j, dst, 2); + u_format_unsigned_fetch_texel_rgtc(0, src + 8, i, j, dst + 1, 2); + + } + } + src += block_size; + } + src_row += src_stride; + } +} + +void +util_format_rgtc2_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + const unsigned bw = 4, bh = 4, bytes_per_block = 16; + unsigned x, y, i, j; + + for(y = 0; y < height; y += bh) { + uint8_t *dst = dst_row; + for(x = 0; x < width; x += bw) { + uint8_t tmp_r[4][4]; /* [bh][bw] */ + uint8_t tmp_g[4][4]; /* [bh][bw] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + tmp_r[j][i] = src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4]; + tmp_g[j][i] = src_row[((y + j)*src_stride/sizeof(*src_row) + (x + i)*4) + 1]; + } + } + u_format_unsigned_encode_rgtc_chan(dst, tmp_r, 4, 4); + u_format_unsigned_encode_rgtc_chan(dst + 8, tmp_g, 4, 4); + dst += bytes_per_block; + } + dst_row += dst_stride / sizeof(*dst_row); + } +} + +void +util_format_rgtc2_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + const unsigned bw = 4, bh = 4, bytes_per_block = 16; + unsigned x, y, i, j; + + for(y = 0; y < height; y += bh) { + uint8_t *dst = dst_row; + for(x = 0; x < width; x += bw) { + uint8_t tmp_r[4][4]; /* [bh][bw][comps] */ + uint8_t tmp_g[4][4]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + tmp_r[j][i] = float_to_ubyte(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4]); + tmp_g[j][i] = float_to_ubyte(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4 + 1]); + } + } + u_format_unsigned_encode_rgtc_chan(dst, tmp_r, 4, 4); + u_format_unsigned_encode_rgtc_chan(dst + 8, tmp_g, 4, 4); + dst += bytes_per_block; + } + dst_row += dst_stride / sizeof(*dst_row); + } +} + +void +util_format_rgtc2_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j; + int block_size = 16; + for(y = 0; y < height; y += 4) { + const uint8_t *src = src_row; + for(x = 0; x < width; x += 4) { + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; + uint8_t tmp_r, tmp_g; + u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 2); + u_format_unsigned_fetch_texel_rgtc(0, src + 8, i, j, &tmp_g, 2); + dst[0] = ubyte_to_float(tmp_r); + dst[1] = ubyte_to_float(tmp_g); + dst[2] = 0.0; + dst[3] = 1.0; + } + } + src += block_size; + } + src_row += src_stride; + } +} + +void +util_format_rgtc2_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + uint8_t tmp_r, tmp_g; + u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 2); + u_format_unsigned_fetch_texel_rgtc(0, src + 8, i, j, &tmp_g, 2); + dst[0] = ubyte_to_float(tmp_r); + dst[1] = ubyte_to_float(tmp_g); + dst[2] = 0.0; + dst[3] = 1.0; +} + + +void +util_format_rgtc2_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_rgtc2_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_rgtc2_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_rgtc2_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j; + int block_size = 16; + for(y = 0; y < height; y += 4) { + const int8_t *src = (int8_t *)src_row; + for(x = 0; x < width; x += 4) { + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; + int8_t tmp_r, tmp_g; + u_format_signed_fetch_texel_rgtc(0, src, i, j, &tmp_r, 2); + u_format_signed_fetch_texel_rgtc(0, src + 8, i, j, &tmp_g, 2); + dst[0] = byte_to_float_tex(tmp_r); + dst[1] = byte_to_float_tex(tmp_g); + dst[2] = 0.0; + dst[3] = 1.0; + } + } + src += block_size; + } + src_row += src_stride; + } +} + +void +util_format_rgtc2_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + const unsigned bw = 4, bh = 4, bytes_per_block = 16; + unsigned x, y, i, j; + + for(y = 0; y < height; y += bh) { + int8_t *dst = (int8_t *)dst_row; + for(x = 0; x < width; x += bw) { + int8_t tmp_r[4][4]; /* [bh][bw][comps] */ + int8_t tmp_g[4][4]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + tmp_r[j][i] = float_to_byte_tex(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4]); + tmp_g[j][i] = float_to_byte_tex(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4 + 1]); + } + } + u_format_signed_encode_rgtc_chan(dst, tmp_r, 4, 4); + u_format_signed_encode_rgtc_chan(dst + 8, tmp_g, 4, 4); + dst += bytes_per_block; + } + dst_row += dst_stride / sizeof(*dst_row); + } +} + +void +util_format_rgtc2_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + int8_t tmp_r, tmp_g; + u_format_signed_fetch_texel_rgtc(0, (int8_t *)src, i, j, &tmp_r, 2); + u_format_signed_fetch_texel_rgtc(0, (int8_t *)src + 8, i, j, &tmp_g, 2); + dst[0] = byte_to_float_tex(tmp_r); + dst[1] = byte_to_float_tex(tmp_g); + dst[2] = 0.0; + dst[3] = 1.0; +} + + +#define TAG(x) u_format_unsigned_##x +#define TYPE uint8_t +#define T_MIN 0 +#define T_MAX 255 + +#include "../../../mesa/main/texcompress_rgtc_tmp.h" + +#undef TYPE +#undef TAG +#undef T_MIN +#undef T_MAX + + +#define TAG(x) u_format_signed_##x +#define TYPE int8_t +#define T_MIN (int8_t)-128 +#define T_MAX (int8_t)127 + +#include "../../../mesa/main/texcompress_rgtc_tmp.h" + +#undef TYPE +#undef TAG +#undef T_MIN +#undef T_MAX diff --git a/src/gallium/auxiliary/util/u_format_rgtc.h b/src/gallium/auxiliary/util/u_format_rgtc.h new file mode 100644 index 00000000000..3e8636d110c --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_rgtc.h @@ -0,0 +1,108 @@ +/************************************************************************** + * + * Copyright 2011 Red Hat Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + +#ifndef U_FORMAT_RGTC_H_ +#define U_FORMAT_RGTC_H_ + +void +util_format_rgtc1_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_rgtc1_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc1_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc1_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc1_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j); + + + +void +util_format_rgtc1_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_rgtc1_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc1_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc1_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc1_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc1_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j); + + +void +util_format_rgtc2_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_rgtc2_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc2_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc2_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc2_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc2_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j); + + +void +util_format_rgtc2_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_rgtc2_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc2_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc2_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc2_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc2_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j); + + +#endif diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py index 8cc22a56371..7468bc38b32 100755 --- a/src/gallium/auxiliary/util/u_format_table.py +++ b/src/gallium/auxiliary/util/u_format_table.py @@ -87,6 +87,7 @@ def write_format_table(formats): print print '#include "u_format.h"' print '#include "u_format_s3tc.h"' + print '#include "u_format_rgtc.h"' print u_format_pack.generate(formats) @@ -132,7 +133,7 @@ def write_format_table(formats): if format.colorspace != ZS: print " &util_format_%s_unpack_rgba_8unorm," % format.short_name() print " &util_format_%s_pack_rgba_8unorm," % format.short_name() - if format.layout == 's3tc': + if format.layout == 's3tc' or format.layout == 'rgtc': print " &util_format_%s_fetch_rgba_8unorm," % format.short_name() else: print " NULL, /* fetch_rgba_8unorm */" diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 30555f92a6d..e3d4c06b6f9 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -475,6 +475,17 @@ float_to_ubyte(float f) } } +static INLINE float +byte_to_float_tex(int8_t b) +{ + return (b == -128) ? -1.0F : b * 1.0F / 127.0F; +} + +static INLINE int8_t +float_to_byte_tex(float f) +{ + return (int8_t) (127.0F * f); +} /** * Calc log base 2 diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.c b/src/gallium/auxiliary/util/u_vbuf_mgr.c index 3cf8ee0831d..7d157c99ccc 100644 --- a/src/gallium/auxiliary/util/u_vbuf_mgr.c +++ b/src/gallium/auxiliary/util/u_vbuf_mgr.c @@ -515,6 +515,7 @@ void u_vbuf_mgr_set_vertex_buffers(struct u_vbuf_mgr *mgrb, static void u_vbuf_upload_buffers(struct u_vbuf_mgr_priv *mgr, int min_index, int max_index, + unsigned instance_count, boolean *upload_flushed) { int i, nr = mgr->ve->count; @@ -530,10 +531,12 @@ static void u_vbuf_upload_buffers(struct u_vbuf_mgr_priv *mgr, !uploaded[index]) { unsigned first, size; boolean flushed; + unsigned instance_div = mgr->ve->ve[i].instance_divisor; - if (mgr->ve->ve[i].instance_divisor) { + if (instance_div) { first = 0; - size = vb->buffer->width0; + size = vb->stride * + ((instance_count + instance_div - 1) / instance_div); } else if (vb->stride) { first = vb->stride * min_index; size = vb->stride * count; @@ -581,7 +584,8 @@ void u_vbuf_mgr_draw_begin(struct u_vbuf_mgr *mgrb, /* Upload user buffers. */ if (mgr->any_user_vbs) { - u_vbuf_upload_buffers(mgr, min_index, max_index, &upload_flushed); + u_vbuf_upload_buffers(mgr, min_index, max_index, info->instance_count, + &upload_flushed); bufs_updated = TRUE; } diff --git a/src/gallium/drivers/i915/i915_batch.h b/src/gallium/drivers/i915/i915_batch.h index 6e93da76209..039c8713570 100644 --- a/src/gallium/drivers/i915/i915_batch.h +++ b/src/gallium/drivers/i915/i915_batch.h @@ -31,8 +31,8 @@ #include "i915_batchbuffer.h" -#define BEGIN_BATCH(dwords, relocs) \ - (i915_winsys_batchbuffer_check(i915->batch, dwords, relocs)) +#define BEGIN_BATCH(dwords) \ + (i915_winsys_batchbuffer_check(i915->batch, dwords)) #define OUT_BATCH(dword) \ i915_winsys_batchbuffer_dword(i915->batch, dword) diff --git a/src/gallium/drivers/i915/i915_batchbuffer.h b/src/gallium/drivers/i915/i915_batchbuffer.h index b4a91dabb37..9df82272604 100644 --- a/src/gallium/drivers/i915/i915_batchbuffer.h +++ b/src/gallium/drivers/i915/i915_batchbuffer.h @@ -41,11 +41,9 @@ i915_winsys_batchbuffer_space(struct i915_winsys_batchbuffer *batch) static INLINE boolean i915_winsys_batchbuffer_check(struct i915_winsys_batchbuffer *batch, - size_t dwords, - size_t relocs) + size_t dwords) { - return dwords * 4 <= i915_winsys_batchbuffer_space(batch) && - relocs <= (batch->max_relocs - batch->relocs); + return dwords * 4 <= i915_winsys_batchbuffer_space(batch); } static INLINE void @@ -71,7 +69,7 @@ i915_winsys_batchbuffer_write(struct i915_winsys_batchbuffer *batch, { assert (i915_winsys_batchbuffer_space(batch) >= size); - memcpy(data, batch->ptr, size); + memcpy(batch->ptr, data, size); batch->ptr += size; } diff --git a/src/gallium/drivers/i915/i915_blit.c b/src/gallium/drivers/i915/i915_blit.c index f885417f8ed..baaed3767ff 100644 --- a/src/gallium/drivers/i915/i915_blit.c +++ b/src/gallium/drivers/i915/i915_blit.c @@ -71,9 +71,9 @@ i915_fill_blit(struct i915_context *i915, return; } - if (!BEGIN_BATCH(6, 1)) { + if (!BEGIN_BATCH(6)) { FLUSH_BATCH(NULL); - assert(BEGIN_BATCH(6, 1)); + assert(BEGIN_BATCH(6)); } OUT_BATCH(CMD); OUT_BATCH(BR13); @@ -143,9 +143,9 @@ i915_copy_blit(struct i915_context *i915, */ assert (dst_pitch > 0 && src_pitch > 0); - if (!BEGIN_BATCH(8, 2)) { + if (!BEGIN_BATCH(8)) { FLUSH_BATCH(NULL); - assert(BEGIN_BATCH(8, 2)); + assert(BEGIN_BATCH(8)); } OUT_BATCH(CMD); OUT_BATCH(BR13); diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index cbf919754e5..84c8cb54436 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -39,7 +39,7 @@ #include "pipe/p_screen.h" -DEBUG_GET_ONCE_BOOL_OPTION(i915_no_vbuf, "I915_NO_VBUF", FALSE); +DEBUG_GET_ONCE_BOOL_OPTION(i915_no_vbuf, "I915_NO_VBUF", FALSE) /* diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c index 22a2c7b2cb4..f2044d661e3 100644 --- a/src/gallium/drivers/i915/i915_flush.c +++ b/src/gallium/drivers/i915/i915_flush.c @@ -59,9 +59,9 @@ static void i915_flush_pipe( struct pipe_context *pipe, if (flags & PIPE_FLUSH_TEXTURE_CACHE) flush |= FLUSH_MAP_CACHE; - if (!BEGIN_BATCH(1, 0)) { + if (!BEGIN_BATCH(1)) { FLUSH_BATCH(NULL); - assert(BEGIN_BATCH(1, 0)); + assert(BEGIN_BATCH(1)); } OUT_BATCH( flush ); } diff --git a/src/gallium/drivers/i915/i915_prim_emit.c b/src/gallium/drivers/i915/i915_prim_emit.c index dd997e2cf48..276e33d4b9d 100644 --- a/src/gallium/drivers/i915/i915_prim_emit.c +++ b/src/gallium/drivers/i915/i915_prim_emit.c @@ -144,7 +144,7 @@ emit_prim( struct draw_stage *stage, vertex_size = i915->current.vertex_info.size * 4; /* in bytes */ assert(vertex_size >= 12); /* never smaller than 12 bytes */ - if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { + if (!BEGIN_BATCH( 1 + nr * vertex_size / 4)) { FLUSH_BATCH(NULL); /* Make sure state is re-emitted after a flush: @@ -152,7 +152,7 @@ emit_prim( struct draw_stage *stage, i915_update_derived( i915 ); i915_emit_hardware_state( i915 ); - if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { + if (!BEGIN_BATCH( 1 + nr * vertex_size / 4)) { assert(0); return; } diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c index 3473c863970..fb4c0516dd8 100644 --- a/src/gallium/drivers/i915/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915/i915_prim_vbuf.c @@ -465,7 +465,7 @@ draw_arrays_fallback(struct vbuf_render *render, if (i915->hardware_dirty) i915_emit_hardware_state(i915); - if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { + if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) { FLUSH_BATCH(NULL); /* Make sure state is re-emitted after a flush: @@ -474,7 +474,7 @@ draw_arrays_fallback(struct vbuf_render *render, i915_emit_hardware_state(i915); i915->vbo_flushed = 1; - if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { + if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) { assert(0); goto out; } @@ -514,7 +514,7 @@ i915_vbuf_render_draw_arrays(struct vbuf_render *render, if (i915->hardware_dirty) i915_emit_hardware_state(i915); - if (!BEGIN_BATCH(2, 0)) { + if (!BEGIN_BATCH(2)) { FLUSH_BATCH(NULL); /* Make sure state is re-emitted after a flush: @@ -523,7 +523,7 @@ i915_vbuf_render_draw_arrays(struct vbuf_render *render, i915_emit_hardware_state(i915); i915->vbo_flushed = 1; - if (!BEGIN_BATCH(2, 0)) { + if (!BEGIN_BATCH(2)) { assert(0); goto out; } @@ -635,7 +635,7 @@ i915_vbuf_render_draw_elements(struct vbuf_render *render, if (i915->hardware_dirty) i915_emit_hardware_state(i915); - if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { + if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) { FLUSH_BATCH(NULL); /* Make sure state is re-emitted after a flush: @@ -644,7 +644,7 @@ i915_vbuf_render_draw_elements(struct vbuf_render *render, i915_emit_hardware_state(i915); i915->vbo_flushed = 1; - if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { + if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) { assert(0); goto out; } diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 0323ad940f9..15350c0a5d7 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -40,13 +40,19 @@ struct i915_tracked_hw_state { const char *name; - void (*validate)(struct i915_context *); + void (*validate)(struct i915_context *, unsigned *batch_space); void (*emit)(struct i915_context *); unsigned dirty, batch_space; }; static void +validate_flush(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = i915->flush_dirty ? 1 : 0; +} + +static void emit_flush(struct i915_context *i915) { /* Cache handling is very cheap atm. State handling can request to flushes: @@ -61,109 +67,343 @@ emit_flush(struct i915_context *i915) OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE); } +uint32_t invariant_state[] = { + _3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | AA_LINE_ECAAR_WIDTH_1_0 | + AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0, + + _3DSTATE_DFLT_DIFFUSE_CMD, 0, + + _3DSTATE_DFLT_SPEC_CMD, 0, + + _3DSTATE_DFLT_Z_CMD, 0, + + _3DSTATE_COORD_SET_BINDINGS | + CSB_TCB(0, 0) | + CSB_TCB(1, 1) | + CSB_TCB(2, 2) | + CSB_TCB(3, 3) | + CSB_TCB(4, 4) | + CSB_TCB(5, 5) | + CSB_TCB(6, 6) | + CSB_TCB(7, 7), + + _3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | + ENABLE_TEXKILL_3D_4D | + TEXKILL_4D, + + _3DSTATE_DEPTH_SUBRECT_DISABLE, + + /* disable indirect state for now + */ + _3DSTATE_LOAD_INDIRECT | 0, 0}; + static void -validate_immediate(struct i915_context *i915) +emit_invariant(struct i915_context *i915) { + i915_winsys_batchbuffer_write(i915->batch, invariant_state, + Elements(invariant_state)*sizeof(uint32_t)); +} + +static void +validate_immediate(struct i915_context *i915, unsigned *batch_space) +{ + unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 | + 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 | + 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 | + 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) & + i915->immediate_dirty; + if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) i915->validation_buffers[i915->num_validation_buffers++] = i915->vbo; + + *batch_space = 1 + util_bitcount(dirty); } static void -validate_static(struct i915_context *i915) +emit_immediate(struct i915_context *i915) { - if (i915->current.cbuf_bo) + /* remove unwatned bits and S7 */ + unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 | + 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 | + 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 | + 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) & + i915->immediate_dirty; + int i, num = util_bitcount(dirty); + assert(num && num <= I915_MAX_IMMEDIATE); + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + dirty << 4 | (num - 1)); + + if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) { + if (i915->vbo) + OUT_RELOC(i915->vbo, I915_USAGE_VERTEX, + i915->current.immediate[I915_IMMEDIATE_S0]); + else + OUT_BATCH(0); + } + + for (i = 1; i < I915_MAX_IMMEDIATE; i++) { + if (dirty & (1 << i)) + OUT_BATCH(i915->current.immediate[i]); + } +} + +static void +validate_dynamic(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = util_bitcount(i915->dynamic_dirty & ((1 << I915_MAX_DYNAMIC) - 1)); +} + +static void +emit_dynamic(struct i915_context *i915) +{ + int i; + for (i = 0; i < I915_MAX_DYNAMIC; i++) { + if (i915->dynamic_dirty & (1 << i)) + OUT_BATCH(i915->current.dynamic[i]); + } +} + +static void +validate_static(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = 2 + 5; /* including DRAW_RECT */ + + if (i915->current.cbuf_bo) { i915->validation_buffers[i915->num_validation_buffers++] = i915->current.cbuf_bo; + *batch_space += 3; + } - if (i915->current.depth_bo) + if (i915->current.depth_bo) { i915->validation_buffers[i915->num_validation_buffers++] = i915->current.depth_bo; + *batch_space += 3; + } } static void -validate_map(struct i915_context *i915) +emit_static(struct i915_context *i915) +{ + if (i915->current.cbuf_bo) { + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(i915->current.cbuf_flags); + OUT_RELOC(i915->current.cbuf_bo, + I915_USAGE_RENDER, + 0); + } + + /* What happens if no zbuf?? + */ + if (i915->current.depth_bo) { + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(i915->current.depth_flags); + OUT_RELOC(i915->current.depth_bo, + I915_USAGE_RENDER, + 0); + } + + { + OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); + OUT_BATCH(i915->current.dst_buf_vars); + } +} + +static void +validate_map(struct i915_context *i915, unsigned *batch_space) { const uint enabled = i915->current.sampler_enable_flags; uint unit; struct i915_texture *tex; + *batch_space = i915->current.sampler_enable_nr ? + 2 + 3*i915->current.sampler_enable_nr : 0; for (unit = 0; unit < I915_TEX_UNITS; unit++) { if (enabled & (1 << unit)) { - tex = i915_texture(i915->fragment_sampler_views[unit]->texture); - i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer; + tex = i915_texture(i915->fragment_sampler_views[unit]->texture); + i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer; } } } -const static struct i915_tracked_hw_state hw_atoms[] = { - { "flush", NULL, emit_flush, I915_HW_FLUSH, 1 }, - { "immediate", validate_immediate, NULL, I915_HW_IMMEDIATE }, - { "static", validate_static, NULL, I915_HW_STATIC }, - { "map", validate_map, NULL, I915_HW_MAP } -}; +static void +emit_map(struct i915_context *i915) +{ + const uint nr = i915->current.sampler_enable_nr; + if (nr) { + const uint enabled = i915->current.sampler_enable_flags; + uint unit; + uint count = 0; + OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); + OUT_BATCH(enabled); + for (unit = 0; unit < I915_TEX_UNITS; unit++) { + if (enabled & (1 << unit)) { + struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture); + struct i915_winsys_buffer *buf = texture->buffer; + assert(buf); + + count++; + + OUT_RELOC(buf, I915_USAGE_SAMPLER, 0); + OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */ + OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */ + } + } + assert(count == nr); + } +} + +static void +validate_sampler(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = i915->current.sampler_enable_nr ? + 2 + 3*i915->current.sampler_enable_nr : 0; +} + +static void +emit_sampler(struct i915_context *i915) +{ + if (i915->current.sampler_enable_nr) { + int i; + + OUT_BATCH( _3DSTATE_SAMPLER_STATE | + (3 * i915->current.sampler_enable_nr) ); + + OUT_BATCH( i915->current.sampler_enable_flags ); + + for (i = 0; i < I915_TEX_UNITS; i++) { + if (i915->current.sampler_enable_flags & (1<<i)) { + OUT_BATCH( i915->current.sampler[i][0] ); + OUT_BATCH( i915->current.sampler[i][1] ); + OUT_BATCH( i915->current.sampler[i][2] ); + } + } + } +} + +static void +validate_constants(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = i915->fs->num_constants ? + 2 + 4*i915->fs->num_constants : 0; +} + +static void +emit_constants(struct i915_context *i915) +{ + /* Collate the user-defined constants with the fragment shader's + * immediates according to the constant_flags[] array. + */ + const uint nr = i915->fs->num_constants; + if (nr) { + uint i; + + OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); + OUT_BATCH((1 << nr) - 1); + + for (i = 0; i < nr; i++) { + const uint *c; + if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) { + /* grab user-defined constant */ + c = (uint *) i915_buffer(i915->constants[PIPE_SHADER_FRAGMENT])->data; + c += 4 * i; + } + else { + /* emit program constant */ + c = (uint *) i915->fs->constants[i]; + } +#if 0 /* debug */ + { + float *f = (float *) c; + printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3], + (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER + ? "user" : "immediate")); + } +#endif + OUT_BATCH(*c++); + OUT_BATCH(*c++); + OUT_BATCH(*c++); + OUT_BATCH(*c++); + } + } +} + +static void +validate_program(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = i915->fs->program_len; +} + +static void +emit_program(struct i915_context *i915) +{ + uint i; + /* we should always have, at least, a pass-through program */ + assert(i915->fs->program_len > 0); + for (i = 0; i < i915->fs->program_len; i++) { + OUT_BATCH(i915->fs->program[i]); + } +} + +static void +emit_draw_rect(struct i915_context *i915) +{ + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS); + OUT_BATCH(i915->current.draw_offset); + OUT_BATCH(i915->current.draw_size); + OUT_BATCH(i915->current.draw_offset); +} static boolean i915_validate_state(struct i915_context *i915, unsigned *batch_space) { - int i; + unsigned tmp; i915->num_validation_buffers = 0; - *batch_space = 0; - - for (i = 0; i < Elements(hw_atoms); i++) - if ((i915->hardware_dirty & hw_atoms[i].dirty) && hw_atoms[i].validate) { - hw_atoms[i].validate(i915); - *batch_space += hw_atoms[i].batch_space; - } + if (i915->hardware_dirty & I915_HW_INVARIANT) + *batch_space = Elements(invariant_state); + else + *batch_space = 0; + +#define VALIDATE_ATOM(atom, hw_dirty) \ + if (i915->hardware_dirty & hw_dirty) { \ + validate_##atom(i915, &tmp); \ + *batch_space += tmp; } + VALIDATE_ATOM(flush, I915_HW_FLUSH); + VALIDATE_ATOM(immediate, I915_HW_IMMEDIATE); + VALIDATE_ATOM(dynamic, I915_HW_DYNAMIC); + VALIDATE_ATOM(static, I915_HW_STATIC); + VALIDATE_ATOM(map, I915_HW_MAP); + VALIDATE_ATOM(sampler, I915_HW_SAMPLER); + VALIDATE_ATOM(constants, I915_HW_CONSTANTS); + VALIDATE_ATOM(program, I915_HW_PROGRAM); +#undef VALIDATE_ATOM if (i915->num_validation_buffers == 0) return TRUE; if (!i915_winsys_validate_buffers(i915->batch, i915->validation_buffers, - i915->num_validation_buffers)) + i915->num_validation_buffers)) return FALSE; return TRUE; } -static void -emit_state(struct i915_context *i915) -{ - int i; - - for (i = 0; i < Elements(hw_atoms); i++) - if ((i915->hardware_dirty & hw_atoms[i].dirty) && hw_atoms[i].emit) - hw_atoms[i].emit(i915); -} - /* Push the state into the sarea and/or texture memory. */ void i915_emit_hardware_state(struct i915_context *i915 ) { unsigned batch_space; - /* XXX: there must be an easier way */ - const unsigned dwords = ( 14 + - 7 + - I915_MAX_DYNAMIC + - 8 + - 2 + I915_TEX_UNITS*3 + - 2 + I915_TEX_UNITS*3 + - 2 + I915_MAX_CONSTANT*4 + -#if 0 - i915->current.program_len + -#else - i915->fs->program_len + -#endif - 6 - ) * 3/2; /* plus 50% margin */ - const unsigned relocs = ( I915_TEX_UNITS + - 3 - ) * 3/2; /* plus 50% margin */ - uintptr_t save_ptr; - size_t save_relocs; if (I915_DBG_ON(DBG_ATOMS)) i915_dump_hardware_dirty(i915, __FUNCTION__); @@ -173,262 +413,36 @@ i915_emit_hardware_state(struct i915_context *i915 ) assert(i915_validate_state(i915, &batch_space)); } - if(!BEGIN_BATCH(batch_space + dwords, relocs)) { + if(!BEGIN_BATCH(batch_space)) { FLUSH_BATCH(NULL); assert(i915_validate_state(i915, &batch_space)); - assert(BEGIN_BATCH(batch_space + dwords, relocs)); + assert(BEGIN_BATCH(batch_space)); } save_ptr = (uintptr_t)i915->batch->ptr; - save_relocs = i915->batch->relocs; - - emit_state(i915); - /* 14 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_INVARIANT) - { - OUT_BATCH(_3DSTATE_AA_CMD | - AA_LINE_ECAAR_WIDTH_ENABLE | - AA_LINE_ECAAR_WIDTH_1_0 | - AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0); - - OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); - OUT_BATCH(0); - - OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); - OUT_BATCH(0); - - OUT_BATCH(_3DSTATE_DFLT_Z_CMD); - OUT_BATCH(0); - - OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS | - CSB_TCB(0, 0) | - CSB_TCB(1, 1) | - CSB_TCB(2, 2) | - CSB_TCB(3, 3) | - CSB_TCB(4, 4) | - CSB_TCB(5, 5) | - CSB_TCB(6, 6) | - CSB_TCB(7, 7)); - - OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | - ENABLE_POINT_RASTER_RULE | - OGL_POINT_RASTER_RULE | - ENABLE_LINE_STRIP_PROVOKE_VRTX | - ENABLE_TRI_FAN_PROVOKE_VRTX | - LINE_STRIP_PROVOKE_VRTX(1) | - TRI_FAN_PROVOKE_VRTX(2) | - ENABLE_TEXKILL_3D_4D | - TEXKILL_4D); - - OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); - - /* disable indirect state for now - */ - OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); - OUT_BATCH(0); - } - - /* 7 dwords, 1 relocs */ - if (i915->hardware_dirty & I915_HW_IMMEDIATE) - { - /* remove unwatned bits and S7 */ - unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 | - 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 | - 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 | - 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) & - i915->immediate_dirty; - int i, num = util_bitcount(dirty); - assert(num && num <= I915_MAX_IMMEDIATE); - - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | - dirty << 4 | (num - 1)); - - if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) { - if (i915->vbo) - OUT_RELOC(i915->vbo, I915_USAGE_VERTEX, - i915->current.immediate[I915_IMMEDIATE_S0]); - else - OUT_BATCH(0); - } - - for (i = 1; i < I915_MAX_IMMEDIATE; i++) { - if (dirty & (1 << i)) - OUT_BATCH(i915->current.immediate[i]); - } - } - -#if 01 - /* I915_MAX_DYNAMIC dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_DYNAMIC) - { - int i; - for (i = 0; i < I915_MAX_DYNAMIC; i++) { - if (i915->dynamic_dirty & (1 << i)) - OUT_BATCH(i915->current.dynamic[i]); - } - } -#endif - -#if 01 - /* 8 dwords, 2 relocs */ - if (i915->hardware_dirty & I915_HW_STATIC) - { - if (i915->current.cbuf_bo) { - OUT_BATCH(_3DSTATE_BUF_INFO_CMD); - OUT_BATCH(i915->current.cbuf_flags); - OUT_RELOC(i915->current.cbuf_bo, - I915_USAGE_RENDER, - 0); - } - - /* What happens if no zbuf?? - */ - if (i915->current.depth_bo) { - OUT_BATCH(_3DSTATE_BUF_INFO_CMD); - OUT_BATCH(i915->current.depth_flags); - OUT_RELOC(i915->current.depth_bo, - I915_USAGE_RENDER, - 0); - } - - { - OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); - OUT_BATCH(i915->current.dst_buf_vars); - } - } -#endif - -#if 01 - /* texture images */ - /* 2 + I915_TEX_UNITS*3 dwords, I915_TEX_UNITS relocs */ - if (i915->hardware_dirty & (I915_HW_MAP | I915_HW_SAMPLER)) - { - const uint nr = i915->current.sampler_enable_nr; - if (nr) { - const uint enabled = i915->current.sampler_enable_flags; - uint unit; - uint count = 0; - OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); - OUT_BATCH(enabled); - for (unit = 0; unit < I915_TEX_UNITS; unit++) { - if (enabled & (1 << unit)) { - struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture); - struct i915_winsys_buffer *buf = texture->buffer; - assert(buf); - - count++; - - OUT_RELOC(buf, I915_USAGE_SAMPLER, 0); - OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */ - OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */ - } - } - assert(count == nr); - } - } -#endif - -#if 01 - /* samplers */ - /* 2 + I915_TEX_UNITS*3 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_SAMPLER) - { - if (i915->current.sampler_enable_nr) { - int i; - - OUT_BATCH( _3DSTATE_SAMPLER_STATE | - (3 * i915->current.sampler_enable_nr) ); - - OUT_BATCH( i915->current.sampler_enable_flags ); - - for (i = 0; i < I915_TEX_UNITS; i++) { - if (i915->current.sampler_enable_flags & (1<<i)) { - OUT_BATCH( i915->current.sampler[i][0] ); - OUT_BATCH( i915->current.sampler[i][1] ); - OUT_BATCH( i915->current.sampler[i][2] ); - } - } - } - } -#endif - -#if 01 - /* constants */ - /* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_CONSTANTS) - { - /* Collate the user-defined constants with the fragment shader's - * immediates according to the constant_flags[] array. - */ - const uint nr = i915->fs->num_constants; - if (nr) { - uint i; - - OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); - OUT_BATCH((1 << nr) - 1); - - for (i = 0; i < nr; i++) { - const uint *c; - if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) { - /* grab user-defined constant */ - c = (uint *) i915_buffer(i915->constants[PIPE_SHADER_FRAGMENT])->data; - c += 4 * i; - } - else { - /* emit program constant */ - c = (uint *) i915->fs->constants[i]; - } -#if 0 /* debug */ - { - float *f = (float *) c; - printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3], - (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER - ? "user" : "immediate")); - } -#endif - OUT_BATCH(*c++); - OUT_BATCH(*c++); - OUT_BATCH(*c++); - OUT_BATCH(*c++); - } - } - } -#endif - -#if 01 - /* Fragment program */ - /* i915->current.program_len dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_PROGRAM) - { - uint i; - /* we should always have, at least, a pass-through program */ - assert(i915->fs->program_len > 0); - for (i = 0; i < i915->fs->program_len; i++) { - OUT_BATCH(i915->fs->program[i]); - } - } -#endif - -#if 01 - /* drawing surface size */ - /* 6 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_STATIC) - { - /* XXX flush only required when the draw_offset changes! */ - OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE); - OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); - OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS); - OUT_BATCH(i915->current.draw_offset); - OUT_BATCH(i915->current.draw_size); - OUT_BATCH(i915->current.draw_offset); - } -#endif - I915_DBG(DBG_EMIT, "%s: used %d dwords, %d relocs\n", __FUNCTION__, +#define EMIT_ATOM(atom, hw_dirty) \ + if (i915->hardware_dirty & hw_dirty) \ + emit_##atom(i915); + EMIT_ATOM(flush, I915_HW_FLUSH); + EMIT_ATOM(invariant, I915_HW_INVARIANT); + EMIT_ATOM(immediate, I915_HW_IMMEDIATE); + EMIT_ATOM(dynamic, I915_HW_DYNAMIC); + EMIT_ATOM(static, I915_HW_STATIC); + EMIT_ATOM(map, I915_HW_MAP); + EMIT_ATOM(sampler, I915_HW_SAMPLER); + EMIT_ATOM(constants, I915_HW_CONSTANTS); + EMIT_ATOM(program, I915_HW_PROGRAM); + EMIT_ATOM(draw_rect, I915_HW_STATIC); +#undef EMIT_ATOM + + I915_DBG(DBG_EMIT, "%s: used %d dwords, %d dwords reserved\n", __FUNCTION__, ((uintptr_t)i915->batch->ptr - save_ptr) / 4, - i915->batch->relocs - save_relocs); + batch_space); + assert(((uintptr_t)i915->batch->ptr - save_ptr) / 4 == batch_space); i915->hardware_dirty = 0; i915->immediate_dirty = 0; i915->dynamic_dirty = 0; + i915->flush_dirty = 0; } diff --git a/src/gallium/drivers/i915/i915_state_static.c b/src/gallium/drivers/i915/i915_state_static.c index 97044499990..20cd23f8f73 100644 --- a/src/gallium/drivers/i915/i915_state_static.c +++ b/src/gallium/drivers/i915/i915_state_static.c @@ -164,7 +164,7 @@ static void update_framebuffer(struct i915_context *i915) assert(ret); if (i915->current.draw_offset != draw_offset) { i915->current.draw_offset = draw_offset; - /* XXX: only emit flush on change and not always in emit */ + i915_set_flush_dirty(i915, I915_PIPELINE_FLUSH); } i915->current.draw_size = (w - 1 + x) | ((h - 1 + y) << 16); diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h index 4ac2f5b9777..21cfdc9613e 100644 --- a/src/gallium/drivers/i915/i915_winsys.h +++ b/src/gallium/drivers/i915/i915_winsys.h @@ -76,7 +76,6 @@ struct i915_winsys_batchbuffer { size_t size; size_t relocs; - size_t max_relocs; /*@}*/ }; diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 4f86db39926..6391ea7f3be 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -22,7 +22,6 @@ #include "r300_context.h" #include "r300_emit.h" -#include "r300_hyperz.h" #include "r300_texture.h" #include "r300_winsys.h" @@ -117,6 +116,14 @@ static boolean r300_fast_zclear_allowed(struct r300_context *r300) return r300_resource(fb->zsbuf->texture)->tex.zmask_dwords[fb->zsbuf->u.tex.level]; } +static boolean r300_hiz_clear_allowed(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + + return r300_resource(fb->zsbuf->texture)->tex.hiz_dwords[fb->zsbuf->u.tex.level]; +} + static uint32_t r300_depth_clear_value(enum pipe_format format, double depth, unsigned stencil) { @@ -134,6 +141,13 @@ static uint32_t r300_depth_clear_value(enum pipe_format format, } } +static uint32_t r300_hiz_clear_value(double depth) +{ + uint32_t r = (uint32_t)(CLAMP(depth, 0, 1) * 255.5); + assert(r <= 255); + return r | (r << 8) | (r << 16) | (r << 24); +} + /* Clear currently bound buffers. */ static void r300_clear(struct pipe_context* pipe, unsigned buffers, @@ -190,8 +204,6 @@ static void r300_clear(struct pipe_context* pipe, (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_hyperz_state *hyperz = (struct r300_hyperz_state*)r300->hyperz_state.state; - struct r300_resource *zstex = - fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL; uint32_t width = fb->width; uint32_t height = fb->height; boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); @@ -200,20 +212,18 @@ static void r300_clear(struct pipe_context* pipe, /* Enable fast Z clear. * The zbuffer must be in micro-tiled mode, otherwise it locks up. */ if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && can_hyperz) { - hyperz_dcv = hyperz->zb_depthclearvalue = - r300_depth_clear_value(fb->zsbuf->format, depth, stencil); - if (r300_fast_zclear_allowed(r300)) { + hyperz_dcv = hyperz->zb_depthclearvalue = + r300_depth_clear_value(fb->zsbuf->format, depth, stencil); + r300_mark_atom_dirty(r300, &r300->zmask_clear); buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; } - if (zstex->hiz_mem[fb->zsbuf->u.tex.level]) + if (r300_hiz_clear_allowed(r300)) { + r300->hiz_clear_value = r300_hiz_clear_value(depth); r300_mark_atom_dirty(r300, &r300->hiz_clear); - - /* XXX Change this to r300_mark_atom_dirty(r300, &r300->hyperz_state); - * once hiz offset is constant. */ - r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG); + } } /* Enable CBZB clear. */ @@ -240,14 +250,14 @@ static void r300_clear(struct pipe_context* pipe, fb->nr_cbufs, buffers, rgba, depth, stencil); r300_blitter_end(r300); - } else if (r300->zmask_clear.dirty) { + } else if (r300->zmask_clear.dirty || r300->hiz_clear.dirty) { /* Just clear zmask and hiz now, this does not use the standard draw * procedure. */ unsigned dwords; /* Calculate zmask_clear and hiz_clear atom sizes. */ r300_update_hyperz_state(r300); - dwords = r300->zmask_clear.size + + dwords = (r300->zmask_clear.dirty ? r300->zmask_clear.size : 0) + (r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) + r300_get_num_cs_end_dwords(r300); @@ -257,9 +267,11 @@ static void r300_clear(struct pipe_context* pipe, } /* Emit clear packets. */ - r300_emit_zmask_clear(r300, r300->zmask_clear.size, - r300->zmask_clear.state); - r300->zmask_clear.dirty = FALSE; + if (r300->zmask_clear.dirty) { + r300_emit_zmask_clear(r300, r300->zmask_clear.size, + r300->zmask_clear.state); + r300->zmask_clear.dirty = FALSE; + } if (r300->hiz_clear.dirty) { r300_emit_hiz_clear(r300, r300->hiz_clear.size, r300->hiz_clear.state); @@ -279,9 +291,8 @@ static void r300_clear(struct pipe_context* pipe, /* Enable fastfill and/or hiz. * * If we cleared zmask/hiz, it's in use now. The Hyper-Z state update - * looks if zmask/hiz is in use and enables fastfill accordingly. */ - if (r300->zmask_in_use || - (zstex && zstex->hiz_in_use[fb->zsbuf->u.tex.level])) { + * looks if zmask/hiz is in use and programs hardware accordingly. */ + if (r300->zmask_in_use || r300->hiz_in_use) { r300_mark_atom_dirty(r300, &r300->hyperz_state); } } @@ -295,7 +306,7 @@ static void r300_clear_render_target(struct pipe_context *pipe, { struct r300_context *r300 = r300_context(pipe); - r300->zmask_locked = TRUE; + r300->hyperz_locked = TRUE; r300_mark_atom_dirty(r300, &r300->hyperz_state); r300_blitter_begin(r300, R300_CLEAR_SURFACE); @@ -303,7 +314,7 @@ static void r300_clear_render_target(struct pipe_context *pipe, dstx, dsty, width, height); r300_blitter_end(r300); - r300->zmask_locked = FALSE; + r300->hyperz_locked = FALSE; r300_mark_atom_dirty(r300, &r300->hyperz_state); } @@ -320,11 +331,11 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - if (r300->zmask_in_use && !r300->zmask_locked) { + if (r300->zmask_in_use && !r300->hyperz_locked) { if (fb->zsbuf->texture == dst->texture) { r300_decompress_zmask(r300); } else { - r300->zmask_locked = TRUE; + r300->hyperz_locked = TRUE; r300_mark_atom_dirty(r300, &r300->hyperz_state); } } @@ -334,8 +345,8 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, dstx, dsty, width, height); r300_blitter_end(r300); - if (r300->zmask_locked) { - r300->zmask_locked = FALSE; + if (r300->hyperz_locked) { + r300->hyperz_locked = FALSE; r300_mark_atom_dirty(r300, &r300->hyperz_state); } } @@ -345,7 +356,7 @@ void r300_decompress_zmask(struct r300_context *r300) struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - if (!r300->zmask_in_use || r300->zmask_locked) + if (!r300->zmask_in_use || r300->hyperz_locked) return; r300->zmask_decompress = TRUE; @@ -420,12 +431,12 @@ static void r300_resource_copy_region(struct pipe_context *pipe, util_format_description(dst->format); struct pipe_box box; - if (r300->zmask_in_use && !r300->zmask_locked) { + if (r300->zmask_in_use && !r300->hyperz_locked) { if (fb->zsbuf->texture == src || fb->zsbuf->texture == dst) { r300_decompress_zmask(r300); } else { - r300->zmask_locked = TRUE; + r300->hyperz_locked = TRUE; r300_mark_atom_dirty(r300, &r300->hyperz_state); } } @@ -463,7 +474,8 @@ static void r300_resource_copy_region(struct pipe_context *pipe, } /* Handle compressed formats. */ - if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { + if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC || + desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { switch (util_format_get_blocksize(old_dst.format)) { case 8: /* 1 pixel = 4 bits, @@ -502,8 +514,8 @@ static void r300_resource_copy_region(struct pipe_context *pipe, if (old_dst.format != new_dst.format) r300_resource_set_properties(pipe->screen, dst, 0, &old_dst); - if (r300->zmask_locked) { - r300->zmask_locked = FALSE; + if (r300->hyperz_locked) { + r300->hyperz_locked = FALSE; r300_mark_atom_dirty(r300, &r300->hyperz_state); } } diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 0be161fa07a..68943d561ba 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -90,8 +90,6 @@ struct r300_capabilities { boolean high_second_pipe; /* DXTC texture swizzling. */ boolean dxtc_swizzle; - /* Index bias (AKA index offset). */ - boolean index_bias_supported; }; /* Enumerations for legibility and telling which card we're running on. */ diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 9f85bd4ce5f..166d965aa5b 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -30,7 +30,6 @@ #include "r300_cb.h" #include "r300_context.h" #include "r300_emit.h" -#include "r300_hyperz.h" #include "r300_screen.h" #include "r300_screen_buffer.h" #include "r300_winsys.h" @@ -170,7 +169,6 @@ static boolean r300_setup_atoms(struct r300_context* r300) boolean is_rv350 = r300->screen->caps.is_rv350; boolean is_r500 = r300->screen->caps.is_r500; boolean has_tcl = r300->screen->caps.has_tcl; - boolean drm_2_3_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); boolean drm_2_6_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_6_0); boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); boolean has_hiz_ram = r300->screen->caps.hiz_ram > 0; @@ -203,11 +201,11 @@ static boolean r300_setup_atoms(struct r300_context* r300) /* SC. */ R300_INIT_ATOM(scissor_state, 3); /* GB, FG, GA, SU, SC, RB3D. */ - R300_INIT_ATOM(invariant_state, 16 + (is_rv350 ? 4 : 0)); + R300_INIT_ATOM(invariant_state, 18 + (is_rv350 ? 4 : 0) + (is_r500 ? 4 : 0)); /* VAP. */ R300_INIT_ATOM(viewport_state, 9); R300_INIT_ATOM(pvs_flush, 2); - R300_INIT_ATOM(vap_invariant_state, 9); + R300_INIT_ATOM(vap_invariant_state, is_r500 ? 11 : 9); R300_INIT_ATOM(vertex_stream_state, 0); R300_INIT_ATOM(vs_state, 0); R300_INIT_ATOM(vs_constants, 0); @@ -216,7 +214,7 @@ static boolean r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(rs_block_state, 0); R300_INIT_ATOM(rs_state, 0); /* SC, US. */ - R300_INIT_ATOM(fb_state_pipelined, 5 + (drm_2_3_0 ? 3 : 0)); + R300_INIT_ATOM(fb_state_pipelined, 8); /* US. */ R300_INIT_ATOM(fs, 0); R300_INIT_ATOM(fs_rc_constant_state, 0); @@ -227,7 +225,7 @@ static boolean r300_setup_atoms(struct r300_context* r300) if (can_hyperz) { /* HiZ Clear */ if (has_hiz_ram) - R300_INIT_ATOM(hiz_clear, 0); + R300_INIT_ATOM(hiz_clear, 4); /* zmask clear */ R300_INIT_ATOM(zmask_clear, 4); } @@ -331,7 +329,7 @@ static void r300_init_states(struct pipe_context *pipe) /* Initialize the VAP invariant state. */ { - BEGIN_CB(vap_invariant->cb, 9); + BEGIN_CB(vap_invariant->cb, r300->vap_invariant_state.size); OUT_CB_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff); OUT_CB_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4); OUT_CB_32F(1.0); @@ -339,6 +337,10 @@ static void r300_init_states(struct pipe_context *pipe) OUT_CB_32F(1.0); OUT_CB_32F(1.0); OUT_CB_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO); + + if (r300->screen->caps.is_r500) { + OUT_CB_REG(R500_VAP_TEX_TO_COLOR_CNTL, 0); + } END_CB; } @@ -353,11 +355,17 @@ static void r300_init_states(struct pipe_context *pipe) OUT_CB_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF); OUT_CB_REG(R300_SU_DEPTH_OFFSET, 0); OUT_CB_REG(R300_SC_EDGERULE, 0x2DA49525); + OUT_CB_REG(R300_SC_SCREENDOOR, 0xffffff); if (r300->screen->caps.is_rv350) { OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE); } + + if (r300->screen->caps.is_r500) { + OUT_CB_REG(R500_GA_COLOR_CONTROL_PS3, 0); + OUT_CB_REG(R500_SU_TEX_WRAP_PS3, 0); + } END_CB; } @@ -446,16 +454,10 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, /* Render functions must be initialized after blitter. */ r300_init_render_functions(r300); + r300_init_states(&r300->context); rws->cs_set_flush(r300->cs, r300_flush_cb, r300); - /* setup hyper-z mm */ - if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) - if (!r300_hyperz_init_mm(r300)) - goto fail; - - r300_init_states(&r300->context); - /* The KIL opcode needs the first texture unit to be enabled * on r3xx-r4xx. In order to calm down the CS checker, we bind this * dummy texture there. */ @@ -506,10 +508,10 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, } /* Print driver info. */ -#ifdef NDEBUG - if (DBG_ON(r300, DBG_INFO)) { -#else +#ifdef DEBUG { +#else + if (DBG_ON(r300, DBG_INFO)) { #endif fprintf(stderr, "r300: DRM version: %d.%d.%d, Name: %s, ID: 0x%04x, GB: %d, Z: %d\n" @@ -525,7 +527,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, rws->get_value(rws, R300_VID_GART_SIZE) >> 20, rws->get_value(rws, R300_VID_VRAM_SIZE) >> 20, rws->get_value(rws, R300_CAN_AACOMPRESS) ? "YES" : "NO", - rws->get_value(rws, R300_CAN_HYPERZ) ? "YES" : "NO", + rws->get_value(rws, R300_CAN_HYPERZ) && + r300->screen->caps.zmask_ram ? "YES" : "NO", rws->get_value(rws, R300_CAN_HYPERZ) && r300->screen->caps.hiz_ram ? "YES" : "NO"); } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 30073759476..6f2aab69ab1 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -102,7 +102,6 @@ struct r300_dsa_state { }; struct r300_hyperz_state { - int current_func; /* -1 after a clear before first op */ int flush; /* This is actually a command buffer with named dwords. */ uint32_t cb_flush_begin; @@ -220,11 +219,11 @@ struct r300_vertex_stream_state { }; struct r300_invariant_state { - uint32_t cb[20]; + uint32_t cb[26]; }; struct r300_vap_invariant_state { - uint32_t cb[9]; + uint32_t cb[11]; }; struct r300_viewport_state { @@ -295,6 +294,8 @@ struct r300_surface { uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */ uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */ + uint32_t pitch_zmask; /* ZMASK_PITCH */ + uint32_t pitch_hiz; /* HIZ_PITCH */ uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */ /* Parameters dedicated to the CBZB clear. */ @@ -363,8 +364,12 @@ struct r300_texture_desc { /* Zbuffer compression info for each miplevel. */ boolean zcomp8x8[R300_MAX_TEXTURE_LEVELS]; - /* If zero, then disable compression. */ + /* If zero, then disable Z compression/HiZ. */ unsigned zmask_dwords[R300_MAX_TEXTURE_LEVELS]; + unsigned hiz_dwords[R300_MAX_TEXTURE_LEVELS]; + /* Zmask/HiZ strides for each miplevel. */ + unsigned zmask_stride_in_pixels[R300_MAX_TEXTURE_LEVELS]; + unsigned hiz_stride_in_pixels[R300_MAX_TEXTURE_LEVELS]; }; struct r300_resource @@ -390,10 +395,6 @@ struct r300_resource /* Where the texture starts in the buffer. */ unsigned tex_offset; - /* HiZ memory allocations. */ - struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS]; - boolean hiz_in_use[R300_MAX_TEXTURE_LEVELS]; - /* This is the level tiling flags were last time set for. * It's used to prevent redundant tiling-flags changes from happening.*/ unsigned surface_level; @@ -412,6 +413,21 @@ struct r300_vertex_element_state { struct r300_vertex_stream_state vertex_stream; }; +enum r300_hiz_func { + HIZ_FUNC_NONE, + + /* The function, when determined, is set in stone + * until the next HiZ clear. */ + + /* MAX is written to the HiZ buffer. + * Used for LESS, LEQUAL. */ + HIZ_FUNC_MAX, + + /* MIN is written to the HiZ buffer. + * Used for GREATER, GEQUAL. */ + HIZ_FUNC_MIN, +}; + struct r300_context { /* Parent class */ struct pipe_context context; @@ -545,22 +561,25 @@ struct r300_context { int sprite_coord_enable; /* Whether two-sided color selection is enabled (AKA light_twoside). */ boolean two_sided_color; - + /* Whether fast color clear is enabled. */ boolean cbzb_clear; /* Whether ZMASK is enabled. */ boolean zmask_in_use; /* Whether ZMASK is being decompressed. */ boolean zmask_decompress; - /* Whether ZMASK is locked, i.e. should be disabled and cannot be taken over. */ - boolean zmask_locked; + /* Whether ZMASK/HIZ is locked, i.e. should be disabled and cannot be taken over. */ + boolean hyperz_locked; /* The zbuffer the ZMASK of which is locked. */ struct pipe_surface *locked_zbuffer; + /* Whether HIZ is enabled. */ + boolean hiz_in_use; + /* HiZ function. Can be either MIN or MAX. */ + enum r300_hiz_func hiz_func; + /* HiZ clear value. */ + uint32_t hiz_clear_value; void *dsa_decompress_zmask; - /* two mem block managers for hiz/zmask ram space */ - struct mem_block *hiz_mm; - struct u_vbuf_mgr *vbuf_mgr; struct util_slab_mempool pool_transfers; @@ -644,6 +663,9 @@ void r300_decompress_zmask(struct r300_context *r300); void r300_decompress_zmask_locked_unsafe(struct r300_context *r300); void r300_decompress_zmask_locked(struct r300_context *r300); +/* r300_hyperz.c */ +void r300_update_hyperz_state(struct r300_context* r300); + /* r300_query.c */ void r300_resume_query(struct r300_context *r300, struct r300_query *query); diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index e2e4719ec82..24c82a3efd2 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -425,27 +425,12 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_RELOC(surf); if (can_hyperz) { - uint32_t surf_pitch; - struct r300_resource *tex; - int level = surf->base.u.tex.level; - tex = r300_resource(surf->base.texture); - - surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK; - /* HiZ RAM. */ - if (r300->screen->caps.hiz_ram) { - if (tex->hiz_mem[level]) { - OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs << 2); - OUT_CS_REG(R300_ZB_HIZ_PITCH, surf_pitch); - } else { - OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); - OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); - } - } - + OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); + OUT_CS_REG(R300_ZB_HIZ_PITCH, surf->pitch_hiz); /* Z Mask RAM. (compressed zbuffer) */ OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); - OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf->pitch_zmask); } } @@ -484,6 +469,7 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300, struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; unsigned i, num_cbufs = fb->nr_cbufs; + unsigned mspos0, mspos1; CS_LOCALS(r300); /* If we use the multiwrite feature, the colorbuffers 2,3,4 must be @@ -507,38 +493,36 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300, /* Multisampling. Depends on framebuffer sample count. * These are pipelined regs and as such cannot be moved * to the AA state. */ - if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { - unsigned mspos0 = 0x66666666; - unsigned mspos1 = 0x6666666; - - if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) { - /* Subsample placement. These may not be optimal. */ - switch (fb->cbufs[0]->texture->nr_samples) { - case 2: - mspos0 = 0x33996633; - mspos1 = 0x6666663; - break; - case 3: - mspos0 = 0x33936933; - mspos1 = 0x6666663; - break; - case 4: - mspos0 = 0x33939933; - mspos1 = 0x3966663; - break; - case 6: - mspos0 = 0x22a2aa22; - mspos1 = 0x2a65672; - break; - default: - debug_printf("r300: Bad number of multisamples!\n"); - } - } + mspos0 = 0x66666666; + mspos1 = 0x6666666; - OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); - OUT_CS(mspos0); - OUT_CS(mspos1); + if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) { + /* Subsample placement. These may not be optimal. */ + switch (fb->cbufs[0]->texture->nr_samples) { + case 2: + mspos0 = 0x33996633; + mspos1 = 0x6666663; + break; + case 3: + mspos0 = 0x33936933; + mspos1 = 0x6666663; + break; + case 4: + mspos0 = 0x33939933; + mspos1 = 0x3966663; + break; + case 6: + mspos0 = 0x22a2aa22; + mspos1 = 0x2a65672; + break; + default: + debug_printf("r300: Bad number of multisamples!\n"); + } } + + OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); + OUT_CS(mspos0); + OUT_CS(mspos1); END_CS; } @@ -1039,56 +1023,26 @@ void r300_emit_viewport_state(struct r300_context* r300, END_CS; } -static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) -{ - CS_LOCALS(r300); - BEGIN_CS(4); - OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2); - OUT_CS(start); - OUT_CS(count); - OUT_CS(val); - END_CS; -} - -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) - void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) { struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - struct r300_hyperz_state *z = - (struct r300_hyperz_state*)r300->hyperz_state.state; - struct r300_screen* r300screen = r300->screen; - uint32_t stride, offset = 0, height, offset_shift; struct r300_resource* tex; - int i; + CS_LOCALS(r300); tex = r300_resource(fb->zsbuf->texture); - offset = tex->hiz_mem[fb->zsbuf->u.tex.level]->ofs; - stride = tex->tex.stride_in_pixels[fb->zsbuf->u.tex.level]; - - /* convert from pixels to 4x4 blocks */ - stride = ALIGN_DIVUP(stride, 4); - - stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); - /* there are 4 blocks per dwords */ - stride = ALIGN_DIVUP(stride, 4); - - height = ALIGN_DIVUP(fb->zsbuf->height, 4); - - offset_shift = 2; - offset_shift += (r300screen->caps.num_frag_pipes / 2); - - for (i = 0; i < height; i++) { - offset = i * stride; - offset <<= offset_shift; - r300_emit_hiz_line_clear(r300, offset, stride, 0xffffffff); - } - z->current_func = -1; + BEGIN_CS(size); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2); + OUT_CS(0); + OUT_CS(tex->tex.hiz_dwords[fb->zsbuf->u.tex.level]); + OUT_CS(r300->hiz_clear_value); + END_CS; /* Mark the current zbuffer's hiz ram as in use. */ - tex->hiz_in_use[fb->zsbuf->u.tex.level] = TRUE; + r300->hiz_in_use = TRUE; + r300->hiz_func = HIZ_FUNC_NONE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); } void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state) @@ -1236,7 +1190,7 @@ unsigned r300_get_num_cs_end_dwords(struct r300_context *r300) /* Emitted in flush. */ dwords += 26; /* emit_query_end */ dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ - if (r300->screen->caps.index_bias_supported) + if (r300->screen->caps.is_r500) dwords += 2; return dwords; diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index c77cc08539d..9c41a1383ce 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -57,7 +57,7 @@ static void r300_flush(struct pipe_context* pipe, if (r300->dirty_hw) { r300_emit_hyperz_end(r300); r300_emit_query_end(r300); - if (r300->screen->caps.index_bias_supported) + if (r300->screen->caps.is_r500) r500_emit_index_bias(r300, 0); r300->flush_counter++; diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index 873e0209d42..ecaadf4af8e 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -22,7 +22,6 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_context.h" -#include "r300_hyperz.h" #include "r300_reg.h" #include "r300_fs.h" #include "r300_winsys.h" @@ -41,58 +40,74 @@ /* The HyperZ setup */ /*****************************************************************************/ -static bool r300_get_sc_hz_max(struct r300_context *r300) +static enum r300_hiz_func r300_get_hiz_func(struct r300_context *r300) { - struct r300_dsa_state *dsa_state = r300->dsa_state.state; - int func = dsa_state->z_stencil_control & R300_ZS_MASK; - int ret = R300_SC_HYPERZ_MIN; + struct r300_dsa_state *dsa = r300->dsa_state.state; - if (func >= R300_ZS_GEQUAL && func <= R300_ZS_ALWAYS) - ret = R300_SC_HYPERZ_MAX; - return ret; + if (!dsa->dsa.depth.enabled || !dsa->dsa.depth.writemask) + return HIZ_FUNC_NONE; + + switch (dsa->dsa.depth.func) { + case PIPE_FUNC_NEVER: + case PIPE_FUNC_EQUAL: + case PIPE_FUNC_NOTEQUAL: + case PIPE_FUNC_ALWAYS: + return HIZ_FUNC_NONE; + + case PIPE_FUNC_LESS: + case PIPE_FUNC_LEQUAL: + return HIZ_FUNC_MAX; + + case PIPE_FUNC_GREATER: + case PIPE_FUNC_GEQUAL: + return HIZ_FUNC_MIN; + + default: + assert(0); + return HIZ_FUNC_NONE; + } } -static bool r300_zfunc_same_direction(int func1, int func2) +/* Return what's used for the depth test (either minimum or maximum). */ +static unsigned r300_get_sc_hz_max(struct r300_context *r300) { - /* func1 is less/lessthan */ - if ((func1 == R300_ZS_LESS || func1 == R300_ZS_LEQUAL) && - (func2 == R300_ZS_EQUAL || func2 == R300_ZS_GEQUAL || - func2 == R300_ZS_GREATER)) - return FALSE; - - /* func1 is greater/greaterthan */ - if ((func1 == R300_ZS_GEQUAL || func1 == R300_ZS_GREATER) && - (func2 == R300_ZS_LESS || func2 == R300_ZS_LEQUAL)) - return FALSE; + struct r300_dsa_state *dsa = r300->dsa_state.state; + unsigned func = dsa->dsa.depth.func; - return TRUE; + return func >= PIPE_FUNC_GREATER ? R300_SC_HYPERZ_MAX : R300_SC_HYPERZ_MIN; } -static int r300_get_hiz_min(struct r300_context *r300) +static boolean r300_is_hiz_func_valid(struct r300_context *r300) { - struct r300_dsa_state *dsa_state = r300->dsa_state.state; - int func = dsa_state->z_stencil_control & R300_ZS_MASK; - int ret = R300_HIZ_MIN; + struct r300_dsa_state *dsa = r300->dsa_state.state; + unsigned func = dsa->dsa.depth.func; + + if (r300->hiz_func == HIZ_FUNC_NONE) + return TRUE; + + /* func1 is less/lessthan */ + if (r300->hiz_func == HIZ_FUNC_MAX && + (func == PIPE_FUNC_GEQUAL || func == PIPE_FUNC_GREATER)) + return FALSE; - if (func == R300_ZS_LESS || func == R300_ZS_LEQUAL) - ret = R300_HIZ_MAX; - return ret; + /* func1 is greater/greaterthan */ + if (r300->hiz_func == HIZ_FUNC_MIN && + (func == PIPE_FUNC_LESS || func == PIPE_FUNC_LEQUAL)) + return FALSE; + + return TRUE; } static boolean r300_dsa_stencil_op_not_keep(struct pipe_stencil_state *s) { - if (s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP || - s->zfail_op != PIPE_STENCIL_OP_KEEP)) - return TRUE; - return FALSE; + return s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP || + s->zfail_op != PIPE_STENCIL_OP_KEEP); } static boolean r300_can_hiz(struct r300_context *r300) { - struct r300_dsa_state *dsa_state = r300->dsa_state.state; - struct pipe_depth_stencil_alpha_state *dsa = &dsa_state->dsa; - struct r300_screen* r300screen = r300->screen; - struct r300_hyperz_state *z = r300->hyperz_state.state; + struct r300_dsa_state *dsa = r300->dsa_state.state; + struct r300_screen *r300screen = r300->screen; /* shader writes depth - no HiZ */ if (r300_fragment_shader_writes_depth(r300_fs(r300))) /* (5) */ @@ -100,34 +115,21 @@ static boolean r300_can_hiz(struct r300_context *r300) if (r300->query_current) return FALSE; + /* if stencil fail/zfail op is not KEEP */ - if (r300_dsa_stencil_op_not_keep(&dsa->stencil[0]) || - r300_dsa_stencil_op_not_keep(&dsa->stencil[1])) + if (r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[0]) || + r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[1])) return FALSE; - if (dsa->depth.enabled) { + if (dsa->dsa.depth.enabled) { /* if depth func is EQUAL pre-r500 */ - if (dsa->depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500) + if (dsa->dsa.depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500) return FALSE; + /* if depth func is NOTEQUAL */ - if (dsa->depth.func == PIPE_FUNC_NOTEQUAL) + if (dsa->dsa.depth.func == PIPE_FUNC_NOTEQUAL) return FALSE; } - /* depth comparison function - if just cleared save and return okay */ - if (z->current_func == -1) { - int func = dsa_state->z_stencil_control & R300_ZS_MASK; - if (func != 0 && func != 7) - z->current_func = dsa_state->z_stencil_control & R300_ZS_MASK; - } else { - /* simple don't change */ - if (!r300_zfunc_same_direction(z->current_func, - (dsa_state->z_stencil_control & R300_ZS_MASK))) { - DBG(r300, DBG_HYPERZ, - "z func changed direction - disabling hyper-z %d -> %d\n", - z->current_func, dsa_state->z_stencil_control); - return FALSE; - } - } return TRUE; } @@ -139,7 +141,6 @@ static void r300_update_hyperz(struct r300_context* r300) (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_resource *zstex = fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL; - boolean hiz_in_use = FALSE; z->gb_z_peq_config = 0; z->zb_bw_cntl = 0; @@ -151,16 +152,12 @@ static void r300_update_hyperz(struct r300_context* r300) return; } - if (!zstex) + if (!zstex || + !r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) return; - if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) - return; - - hiz_in_use = zstex->hiz_in_use[fb->zsbuf->u.tex.level]; - /* Zbuffer compression. */ - if (r300->zmask_in_use && !r300->zmask_locked) { + if (r300->zmask_in_use && !r300->hyperz_locked) { z->zb_bw_cntl |= R300_FAST_FILL_ENABLE | /*R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE |*/ R300_RD_COMP_ENABLE; @@ -174,16 +171,28 @@ static void r300_update_hyperz(struct r300_context* r300) z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; } - if (hiz_in_use && r300_can_hiz(r300)) { - z->zb_bw_cntl |= R300_HIZ_ENABLE | - r300_get_hiz_min(r300); - - z->sc_hyperz |= R300_SC_HYPERZ_ENABLE | - r300_get_sc_hz_max(r300); + /* HiZ. */ + if (r300->hiz_in_use && !r300->hyperz_locked) { + /* Set the HiZ function if needed. */ + if (r300->hiz_func == HIZ_FUNC_NONE) { + r300->hiz_func = r300_get_hiz_func(r300); + } - if (r300->screen->caps.is_r500) { - z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3 | - R500_HIZ_EQUAL_REJECT_ENABLE; + /* If the depth function is inverted, HiZ must be disabled. */ + if (!r300_is_hiz_func_valid(r300)) { + r300->hiz_in_use = FALSE; + } else if (r300_can_hiz(r300)) { + /* Setup the HiZ bits. */ + z->zb_bw_cntl |= + R300_HIZ_ENABLE | + (r300->hiz_func == HIZ_FUNC_MIN ? R300_HIZ_MIN : R300_HIZ_MAX); + + z->sc_hyperz |= R300_SC_HYPERZ_ENABLE | + r300_get_sc_hz_max(r300); + + if (r300->screen->caps.is_r500) { + z->zb_bw_cntl |= R500_HIZ_EQUAL_REJECT_ENABLE; + } } } @@ -282,18 +291,6 @@ static void r300_update_ztop(struct r300_context* r300) r300_mark_atom_dirty(r300, &r300->ztop_state); } -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) - -static void r300_update_hiz_clear(struct r300_context *r300) -{ - struct pipe_framebuffer_state *fb = - (struct pipe_framebuffer_state*)r300->fb_state.state; - uint32_t height; - - height = ALIGN_DIVUP(fb->zsbuf->height, 4); - r300->hiz_clear.size = height * 4; -} - void r300_update_hyperz_state(struct r300_context* r300) { r300_update_ztop(r300); @@ -301,51 +298,4 @@ void r300_update_hyperz_state(struct r300_context* r300) if (r300->hyperz_state.dirty) { r300_update_hyperz(r300); } - - if (r300->hiz_clear.dirty) { - r300_update_hiz_clear(r300); - } -} - -void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf) -{ - struct r300_resource *tex; - uint32_t zsize, ndw; - int level = surf->base.u.tex.level; - - tex = r300_resource(surf->base.texture); - - if (tex->hiz_mem[level]) - return; - - zsize = tex->tex.layer_size_in_bytes[level]; - zsize /= util_format_get_blocksize(tex->b.b.b.format); - ndw = ALIGN_DIVUP(zsize, 64); - - tex->hiz_mem[level] = u_mmAllocMem(r300->hiz_mm, ndw, 0, 0); -} - -boolean r300_hyperz_init_mm(struct r300_context *r300) -{ - struct r300_screen* r300screen = r300->screen; - int frag_pipes = r300screen->caps.num_frag_pipes; - - if (r300screen->caps.hiz_ram) { - r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes); - if (!r300->hiz_mm) { - return FALSE; - } - } - - return TRUE; -} - -void r300_hyperz_destroy_mm(struct r300_context *r300) -{ - struct r300_screen* r300screen = r300->screen; - - if (r300screen->caps.hiz_ram) { - u_mmDestroy(r300->hiz_mm); - r300->hiz_mm = NULL; - } } diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h deleted file mode 100644 index d4c8e7c60a9..00000000000 --- a/src/gallium/drivers/r300/r300_hyperz.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright 2010 Marek Olšák <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_HYPERZ_H -#define R300_HYPERZ_H - -struct r300_context; - -void r300_update_hyperz_state(struct r300_context* r300); - -void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf); - -boolean r300_hyperz_init_mm(struct r300_context *r300); -void r300_hyperz_destroy_mm(struct r300_context *r300); - -#endif diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 1d93dab2ca2..bb30b1ab0be 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -467,6 +467,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * * See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view */ +#define R500_VAP_TEX_TO_COLOR_CNTL 0x2218 + #define R300_VAP_CLIP_CNTL 0x221C # define R300_VAP_UCP_ENABLE_0 (1 << 0) # define R300_VAP_UCP_ENABLE_1 (1 << 1) @@ -857,6 +859,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R500_TX_DIRECTION_HORIZONTAL (0<<27) # define R500_TX_DIRECTION_VERITCAL (1<<27) +#define R500_SU_TEX_WRAP_PS3 0x4114 + /* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */ #define R300_GA_POINT_S0 0x4200 diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 2ead8667bda..0ec4a225865 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -205,7 +205,7 @@ static boolean r300_reserve_cs_dwords(struct r300_context *r300, if (first_draw) { cs_dwords += r300_get_num_dirty_dwords(r300); - if (r300->screen->caps.index_bias_supported) + if (r300->screen->caps.is_r500) cs_dwords += 2; /* emit_index_offset */ if (emit_vertex_arrays) @@ -257,7 +257,7 @@ static boolean r300_emit_states(struct r300_context *r300, } r300_emit_dirty_state(r300); - if (r300->screen->caps.index_bias_supported) { + if (r300->screen->caps.is_r500) { if (r300->screen->caps.has_tcl) r500_emit_index_bias(r300, index_bias); else @@ -557,7 +557,7 @@ static void r300_draw_elements_immediate(struct r300_context *r300, OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300_translate_primitive(mode)); - if (indexBias && !r300->screen->caps.index_bias_supported) { + if (indexBias && !r300->screen->caps.is_r500) { for (i = 0; i < count-1; i += 2) OUT_CS(((ptr1[i+1] + indexBias) << 16) | (ptr1[i] + indexBias)); @@ -581,7 +581,7 @@ static void r300_draw_elements_immediate(struct r300_context *r300, OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300_translate_primitive(mode)); - if (indexBias && !r300->screen->caps.index_bias_supported) { + if (indexBias && !r300->screen->caps.is_r500) { for (i = 0; i < count-1; i += 2) OUT_CS(((ptr2[i+1] + indexBias) << 16) | (ptr2[i] + indexBias)); @@ -601,7 +601,7 @@ static void r300_draw_elements_immediate(struct r300_context *r300, R300_VAP_VF_CNTL__INDEX_SIZE_32bit | r300_translate_primitive(mode)); - if (indexBias && !r300->screen->caps.index_bias_supported) { + if (indexBias && !r300->screen->caps.is_r500) { for (i = 0; i < count; i++) OUT_CS(ptr4[i] + indexBias); } else { @@ -620,13 +620,12 @@ static void r300_draw_elements(struct r300_context *r300, int indexBias, unsigned indexSize = r300->index_buffer.index_size; struct pipe_resource* orgIndexBuffer = indexBuffer; boolean alt_num_verts = r300->screen->caps.is_r500 && - count > 65536 && - r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); + count > 65536; unsigned short_count; int buffer_offset = 0, index_offset = 0; /* for index bias emulation */ uint16_t indices3[3]; - if (indexBias && !r300->screen->caps.index_bias_supported) { + if (indexBias && !r300->screen->caps.is_r500) { r300_split_index_bias(r300, indexBias, &buffer_offset, &index_offset); } @@ -702,8 +701,7 @@ static void r300_draw_arrays(struct r300_context *r300, unsigned mode, unsigned start, unsigned count) { boolean alt_num_verts = r300->screen->caps.is_r500 && - count > 65536 && - r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); + count > 65536; unsigned short_count; /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ @@ -748,6 +746,8 @@ static void r300_draw_vbo(struct pipe_context* pipe, return; } + r300_update_derived_state(r300); + /* Start the vbuf manager and update buffers if needed. */ u_vbuf_mgr_draw_begin(r300->vbuf_mgr, info, &buffers_updated, &uploader_flushed); @@ -756,8 +756,6 @@ static void r300_draw_vbo(struct pipe_context* pipe, } /* Draw. */ - r300_update_derived_state(r300); - if (indexed) { if (count <= 8 && r300_resource(r300->index_buffer.buffer)->b.user_ptr) { diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 77a9c6ad86f..52d0247fbfd 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -456,10 +456,6 @@ struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws) if (SCREEN_DBG_ON(r300screen, DBG_NO_HIZ)) r300screen->caps.hiz_ram = 0; - r300screen->caps.index_bias_supported = - r300screen->caps.is_r500 && - rws->get_value(rws, R300_VID_DRM_2_3_0); - pipe_mutex_init(r300screen->num_contexts_mutex); util_slab_create(&r300screen->pool_buffers, diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 09f18b3e624..b810f4081c8 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -45,7 +45,6 @@ #include "r300_texture.h" #include "r300_vs.h" #include "r300_winsys.h" -#include "r300_hyperz.h" /* r300_state: Functions used to intialize state context by translating * Gallium state objects into semi-native r300 state objects. */ @@ -707,7 +706,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, else if (state->zsbuf) { r300->fb_state.size += 10; if (can_hyperz) - r300->fb_state.size += r300->screen->caps.hiz_ram ? 8 : 4; + r300->fb_state.size += 8; } /* The size of the rest of atoms stays the same. */ @@ -720,7 +719,6 @@ r300_set_framebuffer_state(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; struct pipe_framebuffer_state *old_state = r300->fb_state.state; - boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); unsigned max_width, max_height, i; uint32_t zbuffer_bpp = 0; @@ -738,28 +736,30 @@ r300_set_framebuffer_state(struct pipe_context* pipe, return; } - if (old_state->zsbuf && r300->zmask_in_use && !r300->zmask_locked) { + if (old_state->zsbuf && r300->zmask_in_use && !r300->hyperz_locked) { /* There is a zmask in use, what are we gonna do? */ if (state->zsbuf) { if (!pipe_surface_equal(old_state->zsbuf, state->zsbuf)) { /* Decompress the currently bound zbuffer before we bind another one. */ r300_decompress_zmask(r300); + r300->hiz_in_use = FALSE; } } else { /* We don't bind another zbuffer, so lock the current one. */ - r300->zmask_locked = TRUE; + r300->hyperz_locked = TRUE; pipe_surface_reference(&r300->locked_zbuffer, old_state->zsbuf); } - } else if (r300->zmask_locked && r300->locked_zbuffer) { + } else if (r300->hyperz_locked && r300->locked_zbuffer) { /* We have a locked zbuffer now, what are we gonna do? */ if (state->zsbuf) { if (!pipe_surface_equal(r300->locked_zbuffer, state->zsbuf)) { /* We are binding some other zbuffer, so decompress the locked one, * it gets unlocked automatically. */ r300_decompress_zmask_locked_unsafe(r300); + r300->hiz_in_use = FALSE; } else { /* We are binding the locked zbuffer again, so unlock it. */ - r300->zmask_locked = FALSE; + r300->hyperz_locked = FALSE; } } } @@ -778,7 +778,7 @@ r300_set_framebuffer_state(struct pipe_context* pipe, util_copy_framebuffer_state(r300->fb_state.state, state); - if (!r300->zmask_locked) { + if (!r300->hyperz_locked) { pipe_surface_reference(&r300->locked_zbuffer, NULL); } @@ -794,20 +794,6 @@ r300_set_framebuffer_state(struct pipe_context* pipe, break; } - /* Setup Hyper-Z. */ - if (can_hyperz) { - struct r300_surface *zs_surf = r300_surface(state->zsbuf); - struct r300_resource *tex = r300_resource(zs_surf->base.texture); - int level = zs_surf->base.u.tex.level; - - /* work out whether we can support hiz on this buffer */ - r300_hiz_alloc_block(r300, zs_surf); - - DBG(r300, DBG_HYPERZ, - "hyper-z features: hiz: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0, - tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef); - } - /* Polygon offset depends on the zbuffer bit depth. */ if (r300->zbuffer_bpp != zbuffer_bpp) { r300->zbuffer_bpp = zbuffer_bpp; @@ -818,27 +804,25 @@ r300_set_framebuffer_state(struct pipe_context* pipe, } /* Set up AA config. */ - if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { - if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) { - aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE; - - switch (state->cbufs[0]->texture->nr_samples) { - case 2: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2; - break; - case 3: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3; - break; - case 4: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4; - break; - case 6: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6; - break; - } - } else { - aa->aa_config = 0; + if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) { + aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE; + + switch (state->cbufs[0]->texture->nr_samples) { + case 2: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2; + break; + case 3: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3; + break; + case 4: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4; + break; + case 6: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6; + break; } + } else { + aa->aa_config = 0; } if (DBG_ON(r300, DBG_FB)) { diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 003fe9a58cd..a1e116f4b61 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -29,7 +29,6 @@ #include "r300_context.h" #include "r300_fs.h" -#include "r300_hyperz.h" #include "r300_screen.h" #include "r300_shader_semantics.h" #include "r300_state_inlines.h" @@ -642,8 +641,25 @@ static uint32_t r300_get_border_color(enum pipe_format format, /* Compressed formats. */ if (util_format_is_compressed(format)) { - util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc); - return uc.ui; + switch (format) { + case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_RGTC1_UNORM: + /* Add 1/32 to round the border color instead of truncating. */ + /* The Y component is used for the border color. */ + border_swizzled[1] = border_swizzled[2] + 1.0f/32; + util_pack_color(border_swizzled, PIPE_FORMAT_B4G4R4A4_UNORM, &uc); + return uc.ui; + case PIPE_FORMAT_RGTC2_SNORM: + border_swizzled[0] = border_swizzled[2]; + util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_SNORM, &uc); + return uc.ui; + case PIPE_FORMAT_RGTC2_UNORM: + util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + return uc.ui; + default: + util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc); + return uc.ui; + } } switch (desc->channel[0].size) { @@ -937,7 +953,7 @@ static void r300_decompress_depth_textures(struct r300_context *r300) state->sampler_state_count); unsigned i; - if (!r300->zmask_locked || !r300->locked_zbuffer) { + if (!r300->hyperz_locked || !r300->locked_zbuffer) { return; } diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index b97c45ac198..86ad0b8b8e0 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -171,8 +171,16 @@ uint32_t r300_translate_texformat(enum pipe_format format, } } - result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, - util_format_is_compressed(format) && dxtc_swizzle); + if (util_format_is_compressed(format) && + dxtc_swizzle && + format != PIPE_FORMAT_RGTC2_UNORM && + format != PIPE_FORMAT_RGTC2_SNORM) { + result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, + dxtc_swizzle); + } else { + result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, + FALSE); + } /* S3TC formats. */ if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { @@ -197,41 +205,36 @@ uint32_t r300_translate_texformat(enum pipe_format format, } } - /* Add sign. */ - for (i = 0; i < desc->nr_channels; i++) { - if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { - result |= sign_bit[i]; - } - } - - /* This is truly a special format. - * It stores R8G8 and B is computed using sqrt(1 - R^2 - G^2) - * in the sampler unit. Also known as D3DFMT_CxV8U8. */ - if (format == PIPE_FORMAT_R8G8Bx_SNORM) { - return R300_TX_FORMAT_CxV8U8 | result; - } - /* RGTC formats. */ if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { switch (format) { case PIPE_FORMAT_RGTC1_SNORM: - result |= sign_bit[0]; + result |= sign_bit[1]; case PIPE_FORMAT_RGTC1_UNORM: - result &= ~(0xfff << 9); /* mask off swizzle */ - result |= R300_TX_FORMAT_Y << R300_TX_FORMAT_R_SHIFT; return R500_TX_FORMAT_ATI1N | result; case PIPE_FORMAT_RGTC2_SNORM: - result |= sign_bit[0] | sign_bit[1]; + result |= sign_bit[2] | sign_bit[3]; case PIPE_FORMAT_RGTC2_UNORM: - result &= ~(0xfff << 9); /* mask off swizzle */ - result |= R300_TX_FORMAT_Y << R300_TX_FORMAT_R_SHIFT | - R300_TX_FORMAT_X << R300_TX_FORMAT_G_SHIFT; return R400_TX_FORMAT_ATI2N | result; default: return ~0; /* Unsupported/unknown. */ } } + /* This is truly a special format. + * It stores R8G8 and B is computed using sqrt(1 - R^2 - G^2) + * in the sampler unit. Also known as D3DFMT_CxV8U8. */ + if (format == PIPE_FORMAT_R8G8Bx_SNORM) { + return R300_TX_FORMAT_CxV8U8 | result; + } + + /* Add sign. */ + for (i = 0; i < desc->nr_channels; i++) { + if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { + result |= sign_bit[i]; + } + } + /* See whether the components are of the same size. */ for (i = 1; i < desc->nr_channels; i++) { uniform = uniform && desc->channel[0].size == desc->channel[i].size; @@ -676,6 +679,8 @@ static void r300_texture_setup_fb_state(struct r300_surface *surf) R300_DEPTHMACROTILE(tex->tex.macrotile[level]) | R300_DEPTHMICROTILE(tex->tex.microtile); surf->format = r300_translate_zsformat(surf->base.format); + surf->pitch_zmask = tex->tex.zmask_stride_in_pixels[level]; + surf->pitch_hiz = tex->tex.hiz_stride_in_pixels[level]; } else { surf->pitch = tex->tex.stride_in_pixels[level] | @@ -713,14 +718,8 @@ static void r300_texture_destroy(struct pipe_screen *screen, struct pipe_resource* texture) { struct r300_resource* tex = (struct r300_resource*)texture; - int i; r300_winsys_bo_reference(&tex->buf, NULL); - for (i = 0; i < R300_MAX_TEXTURE_LEVELS; i++) { - if (tex->hiz_mem[i]) - u_mmFreeMem(tex->hiz_mem[i]); - } - FREE(tex); } @@ -868,8 +867,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, break; case 2: - if (rws->get_value(rws, R300_VID_DRM_2_1_0)) - microtile = R300_BUFFER_SQUARETILED; + microtile = R300_BUFFER_SQUARETILED; break; } } diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 2cfeec7d751..3846fb8b6b3 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -207,29 +207,6 @@ static unsigned r300_texture_get_nblocksy(struct r300_resource *tex, return util_format_get_nblocksy(tex->b.b.b.format, height); } -static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, - struct r300_resource *tex) -{ - /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures - * incorrectly. This is a workaround to prevent CS from being rejected. */ - - unsigned i, size; - - if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) && - tex->b.b.b.target == PIPE_TEXTURE_3D && - tex->b.b.b.last_level > 0) { - size = 0; - - for (i = 0; i <= tex->b.b.b.last_level; i++) { - size += tex->tex.stride_in_bytes[i] * - r300_texture_get_nblocksy(tex, i, FALSE); - } - - size *= tex->tex.depth0; - tex->tex.size_in_bytes = size; - } -} - /* Get a width in pixels from a stride in bytes. */ static unsigned stride_to_width(enum pipe_format format, unsigned stride_in_bytes) @@ -334,12 +311,17 @@ static void r300_setup_cbzb_flags(struct r300_screen *rscreen, tex->tex.cbzb_allowed[i] = first_level_valid && tex->tex.macrotile[i]; } -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) +static unsigned r300_pixels_to_dwords(unsigned stride, + unsigned height, + unsigned xblock, unsigned yblock) +{ + return (align(stride, xblock) * align(height, yblock)) / (xblock * yblock); +} -static void r300_setup_zmask_flags(struct r300_screen *screen, - struct r300_resource *tex) +static void r300_setup_hyperz_properties(struct r300_screen *screen, + struct r300_resource *tex) { - /* The tile size of 1 DWORD is: + /* The tile size of 1 DWORD in ZMASK RAM is: * * GPU Pipes 4x4 mode 8x8 mode * ------------------------------------------ @@ -348,8 +330,31 @@ static void r300_setup_zmask_flags(struct r300_screen *screen, * RV530 1P/2Z 32x16 64x32 * 1P/1Z 16x16 32x32 */ - static unsigned num_blocks_x_per_dw[4] = {4, 8, 12, 8}; - static unsigned num_blocks_y_per_dw[4] = {4, 4, 4, 8}; + static unsigned zmask_blocks_x_per_dw[4] = {4, 8, 12, 8}; + static unsigned zmask_blocks_y_per_dw[4] = {4, 4, 4, 8}; + + /* In HIZ RAM, one dword is always 8x8 pixels (each byte is 4x4 pixels), + * but the blocks have very weird ordering. + * + * With 2 pipes and an image of size 8xY, where Y >= 1, + * clearing 4 dwords clears blocks like this: + * + * 01012323 + * + * where numbers correspond to dword indices. The blocks are interleaved + * in the X direction, so the alignment must be 4x1 blocks (32x8 pixels). + * + * With 4 pipes and an image of size 8xY, where Y >= 4, + * clearing 8 dwords clears blocks like this: + * 01012323 + * 45456767 + * 01012323 + * 45456767 + * where numbers correspond to dword indices. The blocks are interleaved + * in both directions, so the alignment must be 4x4 blocks (32x32 pixels) + */ + static unsigned hiz_align_x[4] = {8, 32, 48, 32}; + static unsigned hiz_align_y[4] = {8, 8, 8, 32}; if (util_format_is_depth_or_stencil(tex->b.b.b.format) && util_format_get_blocksizebits(tex->b.b.b.format) == 32 && @@ -363,30 +368,49 @@ static void r300_setup_zmask_flags(struct r300_screen *screen, } for (i = 0; i <= tex->b.b.b.last_level; i++) { - unsigned numdw, compsize; + unsigned zcomp_numdw, zcompsize, hiz_numdw, stride, height; + + stride = align(tex->tex.stride_in_pixels[i], 16); + height = u_minify(tex->b.b.b.height0, i); /* The 8x8 compression mode needs macrotiling. */ - compsize = screen->caps.z_compress == R300_ZCOMP_8X8 && + zcompsize = screen->caps.z_compress == R300_ZCOMP_8X8 && tex->tex.macrotile[i] && tex->b.b.b.nr_samples <= 1 ? 8 : 4; - /* Get the zbuffer size (with the aligned width and height). */ - numdw = align(tex->tex.stride_in_pixels[i], - num_blocks_x_per_dw[pipes-1] * compsize) * - align(u_minify(tex->b.b.b.height0, i), - num_blocks_y_per_dw[pipes-1] * compsize); + /* Get the ZMASK buffer size in dwords. */ + zcomp_numdw = r300_pixels_to_dwords(stride, height, + zmask_blocks_x_per_dw[pipes-1] * zcompsize, + zmask_blocks_y_per_dw[pipes-1] * zcompsize); - /* Convert pixels -> dwords. */ - numdw = ALIGN_DIVUP(numdw, num_blocks_x_per_dw[pipes-1] * compsize * - num_blocks_y_per_dw[pipes-1] * compsize); + /* Check whether we have enough ZMASK memory. */ + if (util_format_get_blocksizebits(tex->b.b.b.format) == 32 && + zcomp_numdw <= screen->caps.zmask_ram * pipes) { + tex->tex.zmask_dwords[i] = zcomp_numdw; + tex->tex.zcomp8x8[i] = zcompsize == 8; - /* Check that we have enough ZMASK memory. */ - if (numdw <= screen->caps.zmask_ram * pipes) { - tex->tex.zmask_dwords[i] = numdw; - tex->tex.zcomp8x8[i] = compsize == 8; + tex->tex.zmask_stride_in_pixels[i] = + align(stride, zmask_blocks_x_per_dw[pipes-1] * zcompsize); } else { tex->tex.zmask_dwords[i] = 0; tex->tex.zcomp8x8[i] = FALSE; + tex->tex.zmask_stride_in_pixels[i] = 0; + } + + /* Now setup HIZ. */ + stride = align(stride, hiz_align_x[pipes-1]); + height = align(height, hiz_align_y[pipes-1]); + + /* Get the HIZ buffer size in dwords. */ + hiz_numdw = (stride * height) / (8*8 * pipes); + + /* Check whether we have enough HIZ memory. */ + if (hiz_numdw <= screen->caps.hiz_ram * pipes) { + tex->tex.hiz_dwords[i] = hiz_numdw; + tex->tex.hiz_stride_in_pixels[i] = stride; + } else { + tex->tex.hiz_dwords[i] = 0; + tex->tex.hiz_stride_in_pixels[i] = 0; } } } @@ -395,7 +419,6 @@ static void r300_setup_zmask_flags(struct r300_screen *screen, static void r300_setup_tiling(struct r300_screen *screen, struct r300_resource *tex) { - struct r300_winsys_screen *rws = screen->rws; enum pipe_format format = tex->b.b.b.format; boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350; boolean is_zb = util_format_is_depth_or_stencil(format); @@ -422,9 +445,7 @@ static void r300_setup_tiling(struct r300_screen *screen, break; case 2: - if (rws->get_value(rws, R300_VID_DRM_2_1_0)) { - tex->tex.microtile = R300_BUFFER_SQUARETILED; - } + tex->tex.microtile = R300_BUFFER_SQUARETILED; break; } @@ -494,8 +515,7 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, r300_setup_miptree(rscreen, tex, FALSE); } - r300_texture_3d_fix_mipmapping(rscreen, tex); - r300_setup_zmask_flags(rscreen, tex); + r300_setup_hyperz_properties(rscreen, tex); if (tex->buf_size) { /* Make sure the buffer we got is large enough. */ diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index d5c73585c81..c0b66899f8b 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -60,10 +60,8 @@ enum r300_value_id { R300_VID_DRM_PATCHLEVEL, /* These should probably go away: */ - R300_VID_DRM_2_1_0, /* Square tiling. */ - R300_VID_DRM_2_3_0, /* R500 VAP regs, MSPOS regs, fixed tex3D size checking */ R300_VID_DRM_2_6_0, /* Hyper-Z, GB_Z_PEQ_CONFIG on rv350->r4xx, R500 FG_ALPHA_VALUE */ - R300_VID_DRM_2_8_0, /* R500 US_FORMAT regs, R500 ARGB2101010 colorbuffer, CMask */ + R300_VID_DRM_2_8_0, /* R500 US_FORMAT regs, R500 ARGB2101010 colorbuffer, CMask, R16F/RG16F */ R300_CAN_HYPERZ, /* ZMask + HiZ */ R300_CAN_AACOMPRESS, /* CMask */ diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index b5fcc7106fe..cae3888051b 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -503,9 +503,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) } } -static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format) +static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) { - return r600_translate_texformat(format, NULL, NULL, NULL) != ~0; + return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0; } static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 3efdbaba0c3..4206b4a201d 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -305,11 +305,16 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, { struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); union util_color uc; + uint32_t coord_trunc = 0; if (rstate == NULL) { return NULL; } + if ((state->mag_img_filter == PIPE_TEX_FILTER_NEAREST) || + (state->min_img_filter == PIPE_TEX_FILTER_NEAREST)) + coord_trunc = 1; + rstate->id = R600_PIPE_STATE_SAMPLER; util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0, @@ -328,6 +333,7 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | + S_03C008_MC_COORD_TRUNCATE(coord_trunc) | S_03C008_TYPE(1), 0xFFFFFFFF, NULL); @@ -370,7 +376,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte swizzle[1] = state->swizzle_g; swizzle[2] = state->swizzle_b; swizzle[3] = state->swizzle_a; - format = r600_translate_texformat(state->format, + format = r600_translate_texformat(ctx->screen, state->format, swizzle, &word4, &yuv_format); if (format == ~0) { diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index f0a1ee0cd02..c51a163bd06 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -108,8 +108,9 @@ #define PKT3_IT_OPCODE_S(x) (((x) & 0xFF) << 8) #define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF) #define PKT3_IT_OPCODE_C 0xFFFF00FF +#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1) #define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count)) -#define PKT3(op, count) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count)) +#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PREDICATE(predicate)) /* Registers */ #define R_008C00_SQ_CONFIG 0x00008C00 diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 64c52bca795..0b7d6f70968 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -114,14 +114,16 @@ enum radeon_family r600_get_family(struct radeon *rw); enum chip_class r600_get_family_class(struct radeon *radeon); struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon); unsigned r600_get_clock_crystal_freq(struct radeon *radeon); +unsigned r600_get_minor_version(struct radeon *radeon); +unsigned r600_get_num_backends(struct radeon *radeon); /* r600_bo.c */ struct r600_bo; struct r600_bo *r600_bo(struct radeon *radeon, - unsigned size, unsigned alignment, - unsigned binding, unsigned usage); + unsigned size, unsigned alignment, + unsigned binding, unsigned usage); struct r600_bo *r600_bo_handle(struct radeon *radeon, - unsigned handle, unsigned *array_mode); + unsigned handle, unsigned *array_mode); void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx); void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo); void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, @@ -251,6 +253,7 @@ struct r600_context { unsigned num_query_running; struct list_head fenced_bo; unsigned max_db; /* for OQ */ + boolean predicate_drawing; }; struct r600_draw { @@ -283,6 +286,8 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query); void r600_query_end(struct r600_context *ctx, struct r600_query *query); void r600_context_queries_suspend(struct r600_context *ctx); void r600_context_queries_resume(struct r600_context *ctx); +void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation, + int flag_wait); int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon); void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw); diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 5d59356bf70..6777be80402 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -133,6 +133,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: return 1; @@ -2070,7 +2071,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru unsigned fetch_resource_start = rctx->family >= CHIP_CEDAR ? 0 : 160; unsigned format, num_format, format_comp; u32 *bytecode; - int i, r; + int i, r; /* vertex elements offset need special handling, if offset is bigger + * than what we can put in fetch instruction then we need to alterate @@ -2090,23 +2091,23 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru return r; for (i = 0; i < ve->count; i++) { - if (elements[i].instance_divisor > 1) { + if (elements[i].instance_divisor > 1) { struct r600_bc_alu alu; memset(&alu, 0, sizeof(alu)); - alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); - alu.src[0].sel = 0; - alu.src[0].chan = 3; + alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); + alu.src[0].sel = 0; + alu.src[0].chan = 3; alu.dst.sel = i + 1; alu.dst.chan = 3; alu.dst.write = 1; alu.last = 1; - if ((r = r600_bc_add_alu(&bc, &alu))) { + if ((r = r600_bc_add_alu(&bc, &alu))) { r600_bc_clear(&bc); - return r; - } + return r; + } memset(&alu, 0, sizeof(alu)); alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); @@ -2121,10 +2122,10 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru alu.dst.write = 1; alu.last = 1; - if ((r = r600_bc_add_alu(&bc, &alu))) { + if ((r = r600_bc_add_alu(&bc, &alu))) { r600_bc_clear(&bc); - return r; - } + return r; + } memset(&alu, 0, sizeof(alu)); alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC); @@ -2136,26 +2137,26 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru alu.dst.write = 1; alu.last = 1; - if ((r = r600_bc_add_alu(&bc, &alu))) { + if ((r = r600_bc_add_alu(&bc, &alu))) { r600_bc_clear(&bc); - return r; - } + return r; + } memset(&alu, 0, sizeof(alu)); - alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT); - alu.src[0].sel = i + 1; - alu.src[0].chan = 3; + alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT); + alu.src[0].sel = i + 1; + alu.src[0].chan = 3; alu.dst.sel = i + 1; alu.dst.chan = 3; alu.dst.write = 1; alu.last = 1; - if ((r = r600_bc_add_alu(&bc, &alu))) { + if ((r = r600_bc_add_alu(&bc, &alu))) { r600_bc_clear(&bc); - return r; - } - } + return r; + } + } } for (i = 0; i < ve->count; i++) { @@ -2175,7 +2176,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru vtx.fetch_type = elements[i].instance_divisor ? 1 : 0; vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0; vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0; - vtx.mega_fetch_count = 16; + vtx.mega_fetch_count = 0x1F; vtx.dst_gpr = i + 1; vtx.dst_sel_x = desc->swizzle[0]; vtx.dst_sel_y = desc->swizzle[1]; @@ -2202,14 +2203,14 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru return -ENOMEM; } - ve->fs_size = bc.ndw*4; + ve->fs_size = bc.ndw*4; if ((r = r600_bc_build(&bc))) { r600_bc_clear(&bc); return r; } - if (dump_shaders == -1) - dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); + if (dump_shaders == -1) + dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); if (dump_shaders) { fprintf(stderr, "--------------------------------------------------------------\n"); diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 0c5d7133c7a..2363cd1ebc5 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -132,13 +132,13 @@ static void r600_transfer_destroy(struct pipe_context *ctx, } static void r600_buffer_transfer_inline_write(struct pipe_context *pipe, - struct pipe_resource *resource, - unsigned level, - unsigned usage, - const struct pipe_box *box, - const void *data, - unsigned stride, - unsigned layer_stride) + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride) { struct radeon *ws = (struct radeon*)pipe->winsys; struct r600_resource_buffer *rbuffer = r600_buffer(resource); @@ -224,7 +224,7 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, rbuffer->r.b.b.b.depth0 = 1; rbuffer->r.b.b.b.array_size = 1; rbuffer->r.b.b.b.flags = 0; - rbuffer->r.b.user_ptr = ptr; + rbuffer->r.b.user_ptr = ptr; rbuffer->r.bo = NULL; rbuffer->r.bo_size = 0; return &rbuffer->r.b.b.b; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index adcd74aec76..fcca7f705d4 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -76,8 +76,7 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, u_upload_flush(rctx->vbuf_mgr->uploader); } -static void r600_update_num_contexts(struct r600_screen *rscreen, - int diff) +static void r600_update_num_contexts(struct r600_screen *rscreen, int diff) { pipe_mutex_lock(rscreen->mutex_num_contexts); if (diff > 0) { @@ -431,7 +430,7 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, return FALSE; if ((usage & PIPE_BIND_SAMPLER_VIEW) && - r600_is_sampler_format_supported(format)) { + r600_is_sampler_format_supported(screen, format)) { retval |= PIPE_BIND_SAMPLER_VIEW; } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 8dc1f4ad5c3..5f701d87e8f 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -227,7 +227,7 @@ int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); /* r600_texture.c */ void r600_init_screen_texture_functions(struct pipe_screen *screen); void r600_init_surface_functions(struct r600_pipe_context *r600); -uint32_t r600_translate_texformat(enum pipe_format format, +uint32_t r600_translate_texformat(struct pipe_screen *screen, enum pipe_format format, const unsigned char *swizzle_view, uint32_t *word4_p, uint32_t *yuv_format_p); unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index 726668260cc..343403f92f3 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -21,6 +21,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r600_pipe.h" +#include "r600d.h" static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type) { @@ -66,6 +67,30 @@ static boolean r600_get_query_result(struct pipe_context *ctx, return r600_context_query_result(&rctx->ctx, (struct r600_query *)query, wait, vresult); } +static void r600_render_condition(struct pipe_context *ctx, + struct pipe_query *query, + uint mode) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_query *rquery = (struct r600_query *)query; + int wait_flag = 0; + + if (!query) { + rctx->ctx.predicate_drawing = false; + r600_query_predication(&rctx->ctx, NULL, PREDICATION_OP_CLEAR, 1); + return; + } + + if (mode == PIPE_RENDER_COND_WAIT || + mode == PIPE_RENDER_COND_BY_REGION_WAIT) { + wait_flag = 1; + } + + rctx->ctx.predicate_drawing = true; + r600_query_predication(&rctx->ctx, rquery, PREDICATION_OP_ZPASS, wait_flag); + +} + void r600_init_query_functions(struct r600_pipe_context *rctx) { rctx->context.create_query = r600_create_query; @@ -73,4 +98,7 @@ void r600_init_query_functions(struct r600_pipe_context *rctx) rctx->context.begin_query = r600_begin_query; rctx->context.end_query = r600_end_query; rctx->context.get_query_result = r600_get_query_result; + + if (r600_get_num_backends(rctx->screen->radeon) > 0) + rctx->context.render_condition = r600_render_condition; } diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 65923fb9648..4146cb3c5fd 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -241,10 +241,10 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; int r; - /* Would like some magic "get_bool_option_once" routine. - */ - if (dump_shaders == -1) - dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); + /* Would like some magic "get_bool_option_once" routine. + */ + if (dump_shaders == -1) + dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); if (dump_shaders) { fprintf(stderr, "--------------------------------------------------------------\n"); @@ -453,24 +453,24 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) case TGSI_FILE_ADDRESS: break; - case TGSI_FILE_SYSTEM_VALUE: - if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { - struct r600_bc_alu alu; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + case TGSI_FILE_SYSTEM_VALUE: + if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { + struct r600_bc_alu alu; + memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); - alu.src[0].sel = 0; - alu.src[0].chan = 3; + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); + alu.src[0].sel = 0; + alu.src[0].chan = 3; alu.dst.sel = 0; alu.dst.chan = 3; alu.dst.write = 1; - alu.last = 1; + alu.last = 1; - if ((r = r600_bc_add_alu(ctx->bc, &alu))) - return r; - break; - } + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; + break; + } default: R600_ERR("unsupported file %d declaration\n", d->Declaration.File); @@ -558,13 +558,13 @@ static void tgsi_src(struct r600_shader_ctx *ctx, r600_src->sel = V_SQ_ALU_SRC_LITERAL; memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { - /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */ - r600_src->swizzle[0] = 3; - r600_src->swizzle[1] = 3; - r600_src->swizzle[2] = 3; - r600_src->swizzle[3] = 3; - r600_src->sel = 0; - } else { + /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */ + r600_src->swizzle[0] = 3; + r600_src->swizzle[1] = 3; + r600_src->swizzle[2] = 3; + r600_src->swizzle[3] = 3; + r600_src->sel = 0; + } else { if (tgsi_src->Register.Indirect) r600_src->rel = V_SQ_REL_RELATIVE; r600_src->sel = tgsi_src->Register.Index; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index c365979e439..2f7263cdcff 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -358,11 +358,16 @@ static void *r600_create_sampler_state(struct pipe_context *ctx, { struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); union util_color uc; + uint32_t coord_trunc = 0; if (rstate == NULL) { return NULL; } + if ((state->mag_img_filter == PIPE_TEX_FILTER_NEAREST) || + (state->min_img_filter == PIPE_TEX_FILTER_NEAREST)) + coord_trunc = 1; + rstate->id = R600_PIPE_STATE_SAMPLER; util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0, @@ -379,7 +384,9 @@ static void *r600_create_sampler_state(struct pipe_context *ctx, S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, + S_03C008_MC_COORD_TRUNCATE(coord_trunc) | + S_03C008_TYPE(1), 0xFFFFFFFF, NULL); if (uc.ui) { r600_pipe_state_add_reg(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL); @@ -420,7 +427,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c swizzle[1] = state->swizzle_g; swizzle[2] = state->swizzle_b; swizzle[3] = state->swizzle_a; - format = r600_translate_texformat(state->format, + format = r600_translate_texformat(ctx->screen, state->format, swizzle, &word4, &yuv_format); if (format == ~0) { diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 677e2209340..3c072fe7ca9 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -299,13 +299,13 @@ void r600_spi_update(struct r600_pipe_context *rctx) tmp |= S_028644_PT_SPRITE_TEX(1); } - if (rctx->family < CHIP_CEDAR) { - if (rshader->input[i].centroid) - tmp |= S_028644_SEL_CENTROID(1); + if (rctx->family < CHIP_CEDAR) { + if (rshader->input[i].centroid) + tmp |= S_028644_SEL_CENTROID(1); - if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) - tmp |= S_028644_SEL_LINEAR(1); - } + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) + tmp |= S_028644_SEL_LINEAR(1); + } r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); } diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 29e12f1d468..3d0360485aa 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -498,9 +498,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) } } -static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format) +static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) { - return r600_translate_texformat(format, NULL, NULL, NULL) != ~0; + return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0; } static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format) diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 3a85a25065a..df83302b38e 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -226,7 +226,7 @@ static void r600_texture_set_array_mode(struct pipe_screen *screen, w = mip_minify(ptex->width0, level); h = mip_minify(ptex->height0, level); - if (w < tile_width || h < tile_height) + if (w <= tile_width || h <= tile_height) rtex->array_mode[level] = V_0280A0_ARRAY_1D_TILED_THIN1; else rtex->array_mode[level] = array_mode; @@ -413,8 +413,13 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, /* Would like some magic "get_bool_option_once" routine. */ - if (force_tiling == -1) - force_tiling = debug_get_bool_option("R600_FORCE_TILING", FALSE); + if (force_tiling == -1) { + struct r600_screen *rscreen = (struct r600_screen *)screen; + /* reenable when 2D tiling is fixed better */ + /*if (r600_get_minor_version(rscreen->radeon) >= 9) + force_tiling = debug_get_bool_option("R600_TILING", TRUE);*/ + force_tiling = debug_get_bool_option("R600_TILING", FALSE); + } if (force_tiling && permit_hardware_blit(screen, templ)) { if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) && @@ -804,7 +809,8 @@ static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format, } /* texture format translate */ -uint32_t r600_translate_texformat(enum pipe_format format, +uint32_t r600_translate_texformat(struct pipe_screen *screen, + enum pipe_format format, const unsigned char *swizzle_view, uint32_t *word4_p, uint32_t *yuv_format_p) { @@ -870,8 +876,13 @@ uint32_t r600_translate_texformat(enum pipe_format format, break; } - if (r600_enable_s3tc == -1) - r600_enable_s3tc = debug_get_bool_option("R600_ENABLE_S3TC", FALSE); + if (r600_enable_s3tc == -1) { + struct r600_screen *rscreen = (struct r600_screen *)screen; + if (r600_get_minor_version(rscreen->radeon) >= 9) + r600_enable_s3tc = 1; + else + r600_enable_s3tc = debug_get_bool_option("R600_ENABLE_S3TC", FALSE); + } if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { if (!r600_enable_s3tc) diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index e8558c49a7c..df70e2889e2 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -67,6 +67,10 @@ #define PKT3_SET_CTL_CONST 0x6F #define PKT3_SURFACE_BASE_UPDATE 0x73 +#define PREDICATION_OP_CLEAR 0x0 +#define PREDICATION_OP_ZPASS 0x1 +#define PREDICATION_OP_PRIMCOUNT 0x2 + #define PKT_TYPE_S(x) (((x) & 0x3) << 30) #define PKT_TYPE_G(x) (((x) >> 30) & 0x3) #define PKT_TYPE_C 0x3FFFFFFF diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c index 3aefb5b3bb5..4a5d5413d8c 100644 --- a/src/gallium/drivers/rbug/rbug_context.c +++ b/src/gallium/drivers/rbug/rbug_context.c @@ -544,6 +544,7 @@ rbug_set_framebuffer_state(struct pipe_context *_pipe, rb_pipe->curr.nr_cbufs = 0; memset(rb_pipe->curr.cbufs, 0, sizeof(rb_pipe->curr.cbufs)); + rb_pipe->curr.zsbuf = NULL; /* unwrap the input state */ if (_state) { @@ -556,6 +557,8 @@ rbug_set_framebuffer_state(struct pipe_context *_pipe, rb_pipe->curr.cbufs[i] = rbug_resource(_state->cbufs[i]->texture); } unwrapped_state.zsbuf = rbug_surface_unwrap(_state->zsbuf); + if (_state->zsbuf) + rb_pipe->curr.zsbuf = rbug_resource(_state->zsbuf->texture); state = &unwrapped_state; } diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 685fbfc3087..6d47fb96280 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -248,11 +248,6 @@ softpipe_is_format_supported( struct pipe_screen *screen, return util_format_s3tc_enabled; } - /* u_format doesn't implement RGTC yet */ - if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { - return FALSE; - } - /* * Everything else should be supported by u_format. */ diff --git a/src/gallium/drivers/softpipe/sp_state_shader.c b/src/gallium/drivers/softpipe/sp_state_shader.c index 66ddc565722..3dec5de3cc4 100644 --- a/src/gallium/drivers/softpipe/sp_state_shader.c +++ b/src/gallium/drivers/softpipe/sp_state_shader.c @@ -91,8 +91,6 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct softpipe_context *softpipe = softpipe_context(pipe); - draw_flush(softpipe->draw); - if (softpipe->fs == fs) return; diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c index 76a3803224a..b7d54605e66 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c +++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c @@ -40,7 +40,7 @@ #include "svga_debug.h" -#define MAX_DMA_SIZE (8 * 1024 * 1024) +#define MAX_DMA_SIZE (4 * 1024 * 1024) /** diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c index daf1024fd02..6c3275e74c0 100644 --- a/src/gallium/drivers/svga/svga_state_constants.c +++ b/src/gallium/drivers/svga/svga_state_constants.c @@ -40,9 +40,12 @@ /* Convert from PIPE_SHADER_* to SVGA3D_SHADERTYPE_* */ -static int svga_shader_type( int unit ) +static int svga_shader_type( int shader ) { - return unit + 1; + assert(PIPE_SHADER_VERTEX + 1 == SVGA3D_SHADERTYPE_VS); + assert(PIPE_SHADER_FRAGMENT + 1 == SVGA3D_SHADERTYPE_PS); + assert(shader <= PIPE_SHADER_FRAGMENT); + return shader + 1; } diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c index 0181588096d..a5c94b4166b 100644 --- a/src/gallium/state_trackers/dri/drm/dri2.c +++ b/src/gallium/state_trackers/dri/drm/dri2.c @@ -317,6 +317,7 @@ dri2_allocate_buffer(__DRIscreen *sPriv, templ.width0 = width; templ.height0 = height; templ.depth0 = 1; + templ.array_size = 1; buffer->resource = screen->base.screen->resource_create(screen->base.screen, &templ); @@ -510,6 +511,7 @@ dri2_create_image(__DRIscreen *_screen, templ.width0 = width; templ.height0 = height; templ.depth0 = 1; + templ.array_size = 1; img->texture = screen->base.screen->resource_create(screen->base.screen, &templ); if (!img->texture) { diff --git a/src/gallium/state_trackers/egl/Makefile b/src/gallium/state_trackers/egl/Makefile index 98167cceeb3..68643214060 100644 --- a/src/gallium/state_trackers/egl/Makefile +++ b/src/gallium/state_trackers/egl/Makefile @@ -26,6 +26,7 @@ x11_OBJECTS = $(x11_SOURCES:.c=.o) wayland_INCLUDES = \ -I$(TOP)/src/gallium/winsys \ -I$(TOP)/src/egl/wayland \ + -I$(TOP)/src/egl/wayland/wayland-drm \ $(shell pkg-config --cflags-only-I libdrm wayland-client) wayland_SOURCES = $(wildcard wayland/*.c) diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c b/src/gallium/state_trackers/egl/common/egl_g3d.c index 6107df48822..e60a56074dd 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d.c @@ -455,9 +455,6 @@ egl_g3d_terminate(_EGLDriver *drv, _EGLDisplay *dpy) _eglReleaseDisplayResources(drv, dpy); - if (gdpy->pipe) - gdpy->pipe->destroy(gdpy->pipe); - if (dpy->Configs) { _eglDestroyArray(dpy->Configs, egl_g3d_free_config); dpy->Configs = NULL; diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.h b/src/gallium/state_trackers/egl/common/egl_g3d.h index 9873fee6ec2..301db3128ff 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d.h +++ b/src/gallium/state_trackers/egl/common/egl_g3d.h @@ -56,7 +56,6 @@ struct egl_g3d_display { const struct egl_g3d_loader *loader; struct st_manager *smapi; - struct pipe_context *pipe; }; struct egl_g3d_context { diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_api.c b/src/gallium/state_trackers/egl/common/egl_g3d_api.c index c9f94a3c498..2068256dff2 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_api.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d_api.c @@ -640,6 +640,7 @@ egl_g3d_copy_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf, _EGLContext *ctx = _eglGetCurrentContext(); struct native_surface *nsurf; struct pipe_resource *ptex; + struct pipe_context *pipe; if (!gsurf->render_texture) return EGL_TRUE; @@ -655,22 +656,18 @@ egl_g3d_copy_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf, PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, NULL); } - /* create a pipe context to copy surfaces */ - if (!gdpy->pipe) { - gdpy->pipe = - gdpy->native->screen->context_create(gdpy->native->screen, NULL); - if (!gdpy->pipe) - return EGL_FALSE; - } + pipe = ndpy_get_copy_context(gdpy->native); + if (!pipe) + return EGL_FALSE; ptex = get_pipe_resource(gdpy->native, nsurf, NATIVE_ATTACHMENT_FRONT_LEFT); if (ptex) { struct pipe_box src_box; u_box_origin_2d(ptex->width0, ptex->height0, &src_box); - gdpy->pipe->resource_copy_region(gdpy->pipe, ptex, 0, 0, 0, 0, + pipe->resource_copy_region(pipe, ptex, 0, 0, 0, 0, gsurf->render_texture, 0, &src_box); - gdpy->pipe->flush(gdpy->pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); nsurf->present(nsurf, NATIVE_ATTACHMENT_FRONT_LEFT, FALSE, 0); pipe_resource_reference(&ptex, NULL); diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_image.c b/src/gallium/state_trackers/egl/common/egl_g3d_image.c index 81ce7ab45c7..e1c83168b3a 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_image.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d_image.c @@ -111,6 +111,7 @@ egl_g3d_create_drm_buffer(_EGLDisplay *dpy, _EGLImage *img, templ.width0 = attrs.Width; templ.height0 = attrs.Height; templ.depth0 = 1; + templ.array_size = 1; /* * XXX fix apps (e.g. wayland) and pipe drivers (e.g. i915) and remove the diff --git a/src/gallium/state_trackers/egl/common/native.h b/src/gallium/state_trackers/egl/common/native.h index 42889075a39..9246f8c32a4 100644 --- a/src/gallium/state_trackers/egl/common/native.h +++ b/src/gallium/state_trackers/egl/common/native.h @@ -142,6 +142,11 @@ struct native_display { struct pipe_screen *screen; /** + * Context used for copy operations. + */ + struct pipe_context *pipe; + + /** * Available for caller's use. */ void *user_data; @@ -223,6 +228,29 @@ native_attachment_mask_test(uint mask, enum native_attachment att) return !!(mask & (1 << att)); } +/** + * Get the display copy context + */ +static INLINE struct pipe_context * +ndpy_get_copy_context(struct native_display *ndpy) +{ + if (!ndpy->pipe) + ndpy->pipe = ndpy->screen->context_create(ndpy->screen, NULL); + return ndpy->pipe; +} + +/** + * Free display screen and context resources + */ +static INLINE void +ndpy_uninit(struct native_display *ndpy) +{ + if (ndpy->pipe) + ndpy->pipe->destroy(ndpy->pipe); + if (ndpy->screen) + ndpy->screen->destroy(ndpy->screen); +} + struct native_platform { const char *name; diff --git a/src/gallium/state_trackers/egl/common/native_helper.c b/src/gallium/state_trackers/egl/common/native_helper.c index a9d8f32e007..be6713d03a8 100644 --- a/src/gallium/state_trackers/egl/common/native_helper.c +++ b/src/gallium/state_trackers/egl/common/native_helper.c @@ -3,6 +3,7 @@ * Version: 7.9 * * Copyright (C) 2010 LunarG Inc. + * Copyright (C) 2011 VMware Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -24,6 +25,7 @@ * * Authors: * Chia-I Wu <[email protected]> + * Thomas Hellstrom <[email protected]> */ #include "util/u_inlines.h" @@ -34,6 +36,14 @@ #include "native_helper.h" +/** + * Number of swap fences and mask + */ + +#define EGL_SWAP_FENCES_MAX 4 +#define EGL_SWAP_FENCES_MASK 3 +#define EGL_SWAP_FENCES_DEFAULT 1 + struct resource_surface { struct pipe_screen *screen; enum pipe_format format; @@ -42,6 +52,15 @@ struct resource_surface { struct pipe_resource *resources[NUM_NATIVE_ATTACHMENTS]; uint resource_mask; uint width, height; + + /** + * Swap fences. + */ + struct pipe_fence_handle *swap_fences[EGL_SWAP_FENCES_MAX]; + unsigned int cur_fences; + unsigned int head; + unsigned int tail; + unsigned int desired_fences; }; struct resource_surface * @@ -49,11 +68,16 @@ resource_surface_create(struct pipe_screen *screen, enum pipe_format format, uint bind) { struct resource_surface *rsurf = CALLOC_STRUCT(resource_surface); + char *swap_fences = getenv("EGL_THROTTLE_FENCES"); if (rsurf) { rsurf->screen = screen; rsurf->format = format; rsurf->bind = bind; + rsurf->desired_fences = (swap_fences) ? atoi(swap_fences) : + EGL_SWAP_FENCES_DEFAULT; + if (rsurf->desired_fences > EGL_SWAP_FENCES_MAX) + rsurf->desired_fences = EGL_SWAP_FENCES_MAX; } return rsurf; @@ -225,3 +249,121 @@ resource_surface_present(struct resource_surface *rsurf, return TRUE; } + +/** + * Schedule a copy swap from the back to the front buffer using the + * native display's copy context. + */ +boolean +resource_surface_copy_swap(struct resource_surface *rsurf, + struct native_display *ndpy) +{ + struct pipe_resource *ftex; + struct pipe_resource *btex; + struct pipe_context *pipe; + struct pipe_box src_box; + boolean ret = FALSE; + + pipe = ndpy_get_copy_context(ndpy); + if (!pipe) + return FALSE; + + ftex = resource_surface_get_single_resource(rsurf, + NATIVE_ATTACHMENT_FRONT_LEFT); + if (!ftex) + goto out_no_ftex; + btex = resource_surface_get_single_resource(rsurf, + NATIVE_ATTACHMENT_BACK_LEFT); + if (!btex) + goto out_no_btex; + + u_box_origin_2d(ftex->width0, ftex->height0, &src_box); + pipe->resource_copy_region(pipe, ftex, 0, 0, 0, 0, + btex, 0, &src_box); + ret = TRUE; + + out_no_ftex: + pipe_resource_reference(&btex, NULL); + out_no_btex: + pipe_resource_reference(&ftex, NULL); + + return ret; +} + +static struct pipe_fence_handle * +swap_fences_pop_front(struct resource_surface *rsurf) +{ + struct pipe_screen *screen = rsurf->screen; + struct pipe_fence_handle *fence = NULL; + + if (rsurf->desired_fences == 0) + return NULL; + + if (rsurf->cur_fences >= rsurf->desired_fences) { + screen->fence_reference(screen, &fence, rsurf->swap_fences[rsurf->tail]); + screen->fence_reference(screen, &rsurf->swap_fences[rsurf->tail++], NULL); + rsurf->tail &= EGL_SWAP_FENCES_MASK; + --rsurf->cur_fences; + } + return fence; +} + +static void +swap_fences_push_back(struct resource_surface *rsurf, + struct pipe_fence_handle *fence) +{ + struct pipe_screen *screen = rsurf->screen; + + if (!fence || rsurf->desired_fences == 0) + return; + + while(rsurf->cur_fences == rsurf->desired_fences) + swap_fences_pop_front(rsurf); + + rsurf->cur_fences++; + screen->fence_reference(screen, &rsurf->swap_fences[rsurf->head++], + fence); + rsurf->head &= EGL_SWAP_FENCES_MASK; +} + +boolean +resource_surface_throttle(struct resource_surface *rsurf) +{ + struct pipe_screen *screen = rsurf->screen; + struct pipe_fence_handle *fence = swap_fences_pop_front(rsurf); + + if (fence) { + (void) screen->fence_finish(screen, fence, 0); + screen->fence_reference(screen, &fence, NULL); + return TRUE; + } + + return FALSE; +} + +boolean +resource_surface_flush(struct resource_surface *rsurf, + struct native_display *ndpy) +{ + struct pipe_fence_handle *fence = NULL; + struct pipe_screen *screen = rsurf->screen; + struct pipe_context *pipe= ndpy_get_copy_context(ndpy); + + if (!pipe) + return FALSE; + + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, &fence); + if (fence == NULL) + return FALSE; + + swap_fences_push_back(rsurf, fence); + screen->fence_reference(screen, &fence, NULL); + + return TRUE; +} + +void +resource_surface_wait(struct resource_surface *rsurf) +{ + while (resource_surface_throttle(rsurf)); +} diff --git a/src/gallium/state_trackers/egl/common/native_helper.h b/src/gallium/state_trackers/egl/common/native_helper.h index 03995de19ae..39564a04365 100644 --- a/src/gallium/state_trackers/egl/common/native_helper.h +++ b/src/gallium/state_trackers/egl/common/native_helper.h @@ -3,6 +3,7 @@ * Version: 7.9 * * Copyright (C) 2010 LunarG Inc. + * Copyright (C) 2011 VMware Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -24,6 +25,7 @@ * * Authors: * Chia-I Wu <[email protected]> + * Thomas Hellstrom <[email protected]> */ #include "native.h" @@ -74,3 +76,32 @@ boolean resource_surface_present(struct resource_surface *rsurf, enum native_attachment which, void *winsys_drawable_handle); + +/** + * Perform a gallium copy blit between the back left and front left + * surfaces. Needs to be followed by a call to resource_surface_flush. + */ +boolean +resource_surface_copy_swap(struct resource_surface *rsurf, + struct native_display *ndpy); + +/** + * Throttle on outstanding rendering using the copy context. For example + * copy swaps. + */ +boolean +resource_surface_throttle(struct resource_surface *rsurf); + +/** + * Flush pending rendering using the copy context. This function saves a + * marker for upcoming throttles. + */ +boolean +resource_surface_flush(struct resource_surface *rsurf, + struct native_display *ndpy); +/** + * Wait for all rendering using the copy context to be complete. Frees all + * throttle markers saved using resource_surface_flush. + */ +void +resource_surface_wait(struct resource_surface *rsurf); diff --git a/src/gallium/state_trackers/egl/drm/modeset.c b/src/gallium/state_trackers/egl/drm/modeset.c index 0cc06caa2a1..3fff9540905 100644 --- a/src/gallium/state_trackers/egl/drm/modeset.c +++ b/src/gallium/state_trackers/egl/drm/modeset.c @@ -3,6 +3,7 @@ * Version: 7.9 * * Copyright (C) 2010 LunarG Inc. + * Copyright (C) 2011 VMware Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -24,6 +25,7 @@ * * Authors: * Chia-I Wu <[email protected]> + * Thomas Hellstrom <[email protected]> */ #include "util/u_memory.h" @@ -131,6 +133,25 @@ drm_surface_flush_frontbuffer(struct native_surface *nsurf) } static boolean +drm_surface_copy_swap(struct native_surface *nsurf) +{ + struct drm_surface *drmsurf = drm_surface(nsurf); + struct drm_display *drmdpy = drmsurf->drmdpy; + + (void) resource_surface_throttle(drmsurf->rsurf); + if (!resource_surface_copy_swap(drmsurf->rsurf, &drmdpy->base)) + return FALSE; + + (void) resource_surface_flush(drmsurf->rsurf, &drmdpy->base); + if (!drm_surface_flush_frontbuffer(nsurf)) + return FALSE; + + drmsurf->sequence_number++; + + return TRUE; +} + +static boolean drm_surface_swap_buffers(struct native_surface *nsurf) { struct drm_surface *drmsurf = drm_surface(nsurf); @@ -139,17 +160,21 @@ drm_surface_swap_buffers(struct native_surface *nsurf) struct drm_framebuffer tmp_fb; int err; + if (!drmsurf->have_pageflip) + return drm_surface_copy_swap(nsurf); + if (!drmsurf->back_fb.buffer_id) { if (!drm_surface_init_framebuffers(&drmsurf->base, TRUE)) return FALSE; } if (drmsurf->is_shown && drmcrtc->crtc) { - err = drmModeSetCrtc(drmdpy->fd, drmcrtc->crtc->crtc_id, - drmsurf->back_fb.buffer_id, drmcrtc->crtc->x, drmcrtc->crtc->y, - drmcrtc->connectors, drmcrtc->num_connectors, &drmcrtc->crtc->mode); - if (err) - return FALSE; + err = drmModePageFlip(drmdpy->fd, drmcrtc->crtc->crtc_id, + drmsurf->back_fb.buffer_id, 0, NULL); + if (err) { + drmsurf->have_pageflip = FALSE; + return drm_surface_copy_swap(nsurf); + } } /* swap the buffers */ @@ -175,7 +200,7 @@ drm_surface_present(struct native_surface *nsurf, { boolean ret; - if (preserve || swap_interval) + if (swap_interval) return FALSE; switch (natt) { @@ -183,7 +208,10 @@ drm_surface_present(struct native_surface *nsurf, ret = drm_surface_flush_frontbuffer(nsurf); break; case NATIVE_ATTACHMENT_BACK_LEFT: - ret = drm_surface_swap_buffers(nsurf); + if (preserve) + ret = drm_surface_copy_swap(nsurf); + else + ret = drm_surface_swap_buffers(nsurf); break; default: ret = FALSE; @@ -196,7 +224,9 @@ drm_surface_present(struct native_surface *nsurf, static void drm_surface_wait(struct native_surface *nsurf) { - /* no-op */ + struct drm_surface *drmsurf = drm_surface(nsurf); + + resource_surface_wait(drmsurf->rsurf); } static void @@ -204,6 +234,7 @@ drm_surface_destroy(struct native_surface *nsurf) { struct drm_surface *drmsurf = drm_surface(nsurf); + resource_surface_wait(drmsurf->rsurf); if (drmsurf->current_crtc.crtc) drmModeFreeCrtc(drmsurf->current_crtc.crtc); @@ -236,6 +267,7 @@ drm_display_create_surface(struct native_display *ndpy, drmsurf->color_format = drmconf->base.color_format; drmsurf->width = width; drmsurf->height = height; + drmsurf->have_pageflip = TRUE; drmsurf->rsurf = resource_surface_create(drmdpy->base.screen, drmsurf->color_format, diff --git a/src/gallium/state_trackers/egl/drm/native_drm.c b/src/gallium/state_trackers/egl/drm/native_drm.c index 6932f30a6a4..cdbb304c5ee 100644 --- a/src/gallium/state_trackers/egl/drm/native_drm.c +++ b/src/gallium/state_trackers/egl/drm/native_drm.c @@ -124,8 +124,7 @@ drm_display_destroy(struct native_display *ndpy) drm_display_fini_modeset(&drmdpy->base); - if (drmdpy->base.screen) - drmdpy->base.screen->destroy(drmdpy->base.screen); + ndpy_uninit(ndpy); if (drmdpy->fd >= 0) close(drmdpy->fd); diff --git a/src/gallium/state_trackers/egl/drm/native_drm.h b/src/gallium/state_trackers/egl/drm/native_drm.h index 03c4fe01dc1..7da9b45f23e 100644 --- a/src/gallium/state_trackers/egl/drm/native_drm.h +++ b/src/gallium/state_trackers/egl/drm/native_drm.h @@ -91,6 +91,8 @@ struct drm_surface { boolean is_shown; struct drm_crtc current_crtc; + + boolean have_pageflip; }; struct drm_connector { diff --git a/src/gallium/state_trackers/egl/fbdev/native_fbdev.c b/src/gallium/state_trackers/egl/fbdev/native_fbdev.c index a1e91ba701c..4a96039ac39 100644 --- a/src/gallium/state_trackers/egl/fbdev/native_fbdev.c +++ b/src/gallium/state_trackers/egl/fbdev/native_fbdev.c @@ -320,7 +320,7 @@ fbdev_display_destroy(struct native_display *ndpy) { struct fbdev_display *fbdpy = fbdev_display(ndpy); - fbdpy->base.screen->destroy(fbdpy->base.screen); + ndpy_uninit(&fbdpy->base); close(fbdpy->fd); FREE(fbdpy); } diff --git a/src/gallium/state_trackers/egl/gdi/native_gdi.c b/src/gallium/state_trackers/egl/gdi/native_gdi.c index 3cc4aefa937..3c2475f84d8 100644 --- a/src/gallium/state_trackers/egl/gdi/native_gdi.c +++ b/src/gallium/state_trackers/egl/gdi/native_gdi.c @@ -363,7 +363,7 @@ gdi_display_destroy(struct native_display *ndpy) if (gdpy->configs) FREE(gdpy->configs); - gdpy->base.screen->destroy(gdpy->base.screen); + ndpy_uninit(ndpy); FREE(gdpy); } diff --git a/src/gallium/state_trackers/egl/wayland/native_wayland.c b/src/gallium/state_trackers/egl/wayland/native_wayland.c index d4d5f9c2ebb..068c3cd7c8e 100644 --- a/src/gallium/state_trackers/egl/wayland/native_wayland.c +++ b/src/gallium/state_trackers/egl/wayland/native_wayland.c @@ -41,6 +41,7 @@ #include "radeon/drm/radeon_drm_public.h" #include <wayland-client.h> +#include "wayland-drm-client-protocol.h" #include "wayland-egl-priv.h" #include <xf86drm.h> @@ -137,8 +138,7 @@ wayland_display_destroy(struct native_display *ndpy) if (display->config) FREE(display->config); - if (display->base.screen) - display->base.screen->destroy(display->base.screen); + ndpy_uninit(ndpy); FREE(display); } diff --git a/src/gallium/state_trackers/egl/x11/native_ximage.c b/src/gallium/state_trackers/egl/x11/native_ximage.c index d4f4dd04df4..8e32c6ff0c4 100644 --- a/src/gallium/state_trackers/egl/x11/native_ximage.c +++ b/src/gallium/state_trackers/egl/x11/native_ximage.c @@ -476,7 +476,7 @@ ximage_display_destroy(struct native_display *ndpy) if (xdpy->configs) FREE(xdpy->configs); - xdpy->base.screen->destroy(xdpy->base.screen); + ndpy_uninit(ndpy); x11_screen_destroy(xdpy->xscr); if (xdpy->own_dpy) diff --git a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c index 7cc5af89639..03aa1b1537a 100644 --- a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c +++ b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c @@ -63,7 +63,6 @@ i915_drm_batchbuffer_create(struct i915_winsys *iws) batch->base.size = 0; batch->base.relocs = 0; - batch->base.max_relocs = 300;/*INTEL_DEFAULT_RELOCS;*/ batch->base.iws = iws; @@ -104,8 +103,6 @@ i915_drm_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch, unsigned offset; int ret = 0; - assert(batch->base.relocs < batch->base.max_relocs); - switch (usage) { case I915_USAGE_SAMPLER: write_domain = 0; diff --git a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c index 3d0c1fa6224..3bf54011d9e 100644 --- a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c +++ b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c @@ -49,7 +49,6 @@ i915_sw_batchbuffer_create(struct i915_winsys *iws) batch->base.size = 0; batch->base.relocs = 0; - batch->base.max_relocs = 300;/*INTEL_DEFAULT_RELOCS;*/ batch->base.iws = iws; @@ -75,8 +74,6 @@ i915_sw_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch, struct i915_sw_batchbuffer *batch = i915_sw_batchbuffer(ibatch); int ret = 0; - assert(batch->base.relocs < batch->base.max_relocs); - if (usage == I915_USAGE_SAMPLER) { } else if (usage == I915_USAGE_RENDER) { diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index aa4035a302b..66398afa698 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -834,25 +834,25 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr } /* draw packet */ - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_index_type; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_instances; if (draw->indices) { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = draw->indices_bo_offset + r600_bo_offset(draw->indices); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = 0; r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], draw->indices); } else { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; } - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0); /* flush color buffer */ diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index f5cd48d39c6..cd0aa318be5 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -45,6 +45,10 @@ #define RADEON_INFO_CLOCK_CRYSTAL_FREQ 0x9 #endif +#ifndef RADEON_INFO_NUM_BACKENDS +#define RADEON_INFO_NUM_BACKENDS 0xa +#endif + enum radeon_family r600_get_family(struct radeon *r600) { return r600->family; @@ -65,6 +69,17 @@ unsigned r600_get_clock_crystal_freq(struct radeon *radeon) return radeon->clock_crystal_freq; } +unsigned r600_get_num_backends(struct radeon *radeon) +{ + return radeon->num_backends; +} + +unsigned r600_get_minor_version(struct radeon *radeon) +{ + return radeon->minor_version; +} + + static int radeon_get_device(struct radeon *radeon) { struct drm_radeon_info info = {}; @@ -195,6 +210,26 @@ static int radeon_get_clock_crystal_freq(struct radeon *radeon) return 0; } + +static int radeon_get_num_backends(struct radeon *radeon) +{ + struct drm_radeon_info info; + uint32_t num_backends; + int r; + + radeon->device = 0; + info.request = RADEON_INFO_NUM_BACKENDS; + info.value = (uintptr_t)&num_backends; + r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, + sizeof(struct drm_radeon_info)); + if (r) + return r; + + radeon->num_backends = num_backends; + return 0; +} + + static int radeon_init_fence(struct radeon *radeon) { radeon->fence = 1; @@ -211,6 +246,7 @@ static struct radeon *radeon_new(int fd, unsigned device) { struct radeon *radeon; int r; + drmVersionPtr version; radeon = calloc(1, sizeof(*radeon)); if (radeon == NULL) { @@ -219,13 +255,27 @@ static struct radeon *radeon_new(int fd, unsigned device) radeon->fd = fd; radeon->device = device; radeon->refcount = 1; - if (fd >= 0) { - r = radeon_get_device(radeon); - if (r) { - fprintf(stderr, "Failed to get device id\n"); - return radeon_decref(radeon); - } + + version = drmGetVersion(radeon->fd); + if (version->version_major != 2) { + fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is " + "only compatible with 2.x.x\n", __FUNCTION__, + version->version_major, version->version_minor, + version->version_patchlevel); + drmFreeVersion(version); + exit(1); } + + radeon->minor_version = version->version_minor; + + drmFreeVersion(version); + + r = radeon_get_device(radeon); + if (r) { + fprintf(stderr, "Failed to get device id\n"); + return radeon_decref(radeon); + } + radeon->family = radeon_family_from_device(radeon->device); if (radeon->family == CHIP_UNKNOWN) { fprintf(stderr, "Unknown chipset 0x%04X\n", radeon->device); @@ -278,6 +328,9 @@ static struct radeon *radeon_new(int fd, unsigned device) /* get the GPU counter frequency, failure is non fatal */ radeon_get_clock_crystal_freq(radeon); + if (radeon->minor_version >= 9) + radeon_get_num_backends(radeon); + radeon->bomgr = r600_bomgr_create(radeon, 1000000); if (radeon->bomgr == NULL) { return NULL; diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index f170640407d..6b3baa2c909 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -107,7 +107,7 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, /* initialize block */ block->start_offset = reg[i].offset; - block->pm4[block->pm4_ndwords++] = PKT3(reg[i].opcode, n); + block->pm4[block->pm4_ndwords++] = PKT3(reg[i].opcode, n, 0); block->pm4[block->pm4_ndwords++] = (block->start_offset - reg[i].offset_base) >> 2; block->reg = &block->pm4[block->pm4_ndwords]; block->pm4_ndwords += n; @@ -119,7 +119,7 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, block->nbo++; assert(block->nbo < R600_BLOCK_MAX_BO); block->pm4_bo_index[j] = block->nbo; - block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0); + block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0, 0); block->pm4[block->pm4_ndwords++] = 0x00000000; block->reloc[block->nbo].flush_flags = reg[i+j].flush_flags; block->reloc[block->nbo].flush_mask = reg[i+j].flush_mask; @@ -771,12 +771,12 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, bo->last_flush &= flush_mask; return; } - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = flush_flags; ctx->pm4[ctx->pm4_cdwords++] = (bo->size + 255) >> 8; ctx->pm4[ctx->pm4_cdwords++] = 0x00000000; ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = bo->reloc_id; bo->last_flush = (bo->last_flush | flush_flags) & flush_mask; } @@ -1048,25 +1048,25 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) } /* draw packet */ - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_index_type; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_instances; if (draw->indices) { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = draw->indices_bo_offset + r600_bo_offset(draw->indices); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = 0; r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], draw->indices); } else { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; } - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0); /* flush color buffer */ @@ -1101,15 +1101,15 @@ void r600_context_flush(struct r600_context *ctx) r600_context_queries_suspend(ctx); /* emit fence */ - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24); ctx->pm4[ctx->pm4_cdwords++] = ctx->radeon->fence; ctx->pm4[ctx->pm4_cdwords++] = 0; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); ctx->pm4[ctx->pm4_cdwords++] = 0; r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], ctx->radeon->fence_bo); @@ -1282,6 +1282,7 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu void r600_query_begin(struct r600_context *ctx, struct r600_query *query) { unsigned required_space; + int num_backends = r600_get_num_backends(ctx->radeon); /* query request needs 6/8 dwords for begin + 6/8 dwords for end */ if (query->type == PIPE_QUERY_TIME_ELAPSED) @@ -1300,21 +1301,39 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) r600_query_result(ctx, query, TRUE); } + if (query->type == PIPE_QUERY_OCCLUSION_COUNTER && + num_backends > 0 && num_backends < ctx->max_db) { + /* as per info on ZPASS the driver must set the unusued DB top bits */ + u32 *results; + int i; + + results = r600_bo_map(ctx->radeon, query->buffer, PB_USAGE_DONTBLOCK | PB_USAGE_CPU_WRITE, NULL); + if (results) { + memset(results + (query->num_results * 4), 0, ctx->max_db * 4 * 4); + + for (i = num_backends; i < ctx->max_db; i++) { + results[(i * 4)+1] = 0x80000000; + results[(i * 4)+3] = 0x80000000; + } + r600_bo_unmap(ctx->radeon, query->buffer); + } + } + /* emit begin query */ if (query->type == PIPE_QUERY_TIME_ELAPSED) { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + r600_bo_offset(query->buffer); ctx->pm4[ctx->pm4_cdwords++] = (3 << 29); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = 0; } else { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + r600_bo_offset(query->buffer); ctx->pm4[ctx->pm4_cdwords++] = 0; } - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); ctx->pm4[ctx->pm4_cdwords++] = 0; r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); @@ -1327,19 +1346,19 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query) { /* emit begin query */ if (query->type == PIPE_QUERY_TIME_ELAPSED) { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + 8 + r600_bo_offset(query->buffer); ctx->pm4[ctx->pm4_cdwords++] = (3 << 29); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = 0; } else { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + 8 + r600_bo_offset(query->buffer); ctx->pm4[ctx->pm4_cdwords++] = 0; } - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); ctx->pm4[ctx->pm4_cdwords++] = 0; r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); @@ -1349,6 +1368,28 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query) ctx->num_query_running--; } +void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation, + int flag_wait) +{ + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0); + + if (operation == PREDICATION_OP_CLEAR) { + ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = PRED_OP(PREDICATION_OP_CLEAR); + } else { + int results_base = query->num_results - (4 * ctx->max_db); + + if (results_base < 0) + results_base = 0; + + ctx->pm4[ctx->pm4_cdwords++] = results_base*4 + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = PRED_OP(operation) | (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW) | PREDICATION_DRAW_VISIBLE; + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); + ctx->pm4[ctx->pm4_cdwords++] = 0; + r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); + } +} + struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type) { struct r600_query *query; diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 2d91cd97d68..41c5ee02c38 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -50,6 +50,8 @@ struct radeon { unsigned *cfence; struct r600_bo *fence_bo; unsigned clock_crystal_freq; + unsigned num_backends; + unsigned minor_version; }; struct r600_reg { diff --git a/src/gallium/winsys/r600/drm/r600d.h b/src/gallium/winsys/r600/drm/r600d.h index 1c1ac76fe69..cb12865ff01 100644 --- a/src/gallium/winsys/r600/drm/r600d.h +++ b/src/gallium/winsys/r600/drm/r600d.h @@ -105,6 +105,18 @@ * 5 - TS events */ +#define PREDICATION_OP_CLEAR 0x0 +#define PREDICATION_OP_ZPASS 0x1 +#define PREDICATION_OP_PRIMCOUNT 0x2 + +#define PRED_OP(x) ((x) << 16) + +#define PREDICATION_HINT_WAIT (0 << 12) +#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12) + +#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8) +#define PREDICATION_DRAW_VISIBLE (1 << 8) + #define PKT_TYPE_S(x) (((x) & 0x3) << 30) #define PKT_TYPE_G(x) (((x) >> 30) & 0x3) #define PKT_TYPE_C 0x3FFFFFFF @@ -117,8 +129,9 @@ #define PKT3_IT_OPCODE_S(x) (((x) & 0xFF) << 8) #define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF) #define PKT3_IT_OPCODE_C 0xFFFF00FF +#define PKT3_PRED_S(x) (((x) >> 0) & 0x1) #define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count)) -#define PKT3(op, count) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count)) +#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PRED_S(predicate)) /* Registers */ #define R_0280A0_CB_COLOR0_INFO 0x0280A0 diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_common.c b/src/gallium/winsys/radeon/drm/radeon_drm_common.c index 4676c2a1ea9..72c2ff11125 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_common.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_common.c @@ -96,10 +96,13 @@ static void do_ioctls(struct radeon_drm_winsys *winsys) * we don't actually use the info for anything yet. */ version = drmGetVersion(winsys->fd); - if (version->version_major != 2) { + if (version->version_major != 2 || + version->version_minor < 3) { fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is " - "only compatible with 2.x.x\n", __FUNCTION__, - version->version_major, version->version_minor, + "only compatible with 2.3.x (kernel 2.6.34) and later.\n", + __FUNCTION__, + version->version_major, + version->version_minor, version->version_patchlevel); drmFreeVersion(version); exit(1); @@ -157,6 +160,8 @@ static void do_ioctls(struct radeon_drm_winsys *winsys) winsys->vram_size = gem_info.vram_size; drmFreeVersion(version); + + winsys->num_cpus = sysconf(_SC_NPROCESSORS_ONLN); } static void radeon_winsys_destroy(struct r300_winsys_screen *rws) @@ -190,10 +195,6 @@ static uint32_t radeon_get_value(struct r300_winsys_screen *rws, return ws->drm_minor; case R300_VID_DRM_PATCHLEVEL: return ws->drm_patchlevel; - case R300_VID_DRM_2_1_0: - return ws->drm_major*100 + ws->drm_minor >= 201; - case R300_VID_DRM_2_3_0: - return ws->drm_major*100 + ws->drm_minor >= 203; case R300_VID_DRM_2_6_0: return ws->drm_major*100 + ws->drm_minor >= 206; case R300_VID_DRM_2_8_0: diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index a38b01048b2..9ee800f5950 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -384,7 +384,7 @@ void radeon_drm_cs_flush(struct r300_winsys_cs *rcs) for (i = 0; i < crelocs; i++) p_atomic_inc(&cs->csc->relocs_bo[i]->num_active_ioctls); - if (debug_get_option_thread()) { + if (cs->ws->num_cpus > 1 && debug_get_option_thread()) { cs->thread = pipe_thread_create(radeon_drm_cs_emit_ioctl, cs->csc); assert(cs->thread); } else { diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index f8a89abcfe4..9ecbb074572 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -46,6 +46,7 @@ struct radeon_drm_winsys { uint32_t z_pipes; /* Z pipe count (rv530 only) */ uint32_t gart_size; /* GART size. */ uint32_t vram_size; /* VRAM size. */ + uint32_t num_cpus; /* Number of CPUs. */ unsigned drm_major; unsigned drm_minor; |