diff options
Diffstat (limited to 'src/mesa')
256 files changed, 3337 insertions, 1536 deletions
diff --git a/src/mesa/Makefile b/src/mesa/Makefile index 3e0f010671c..7073c92240b 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -20,6 +20,13 @@ MESA_CPPFLAGS := $(API_DEFINES) ES1_CPPFLAGS := -DFEATURE_ES1=1 ES2_CPPFLAGS := -DFEATURE_ES2=1 +ifeq ($(MESA_LLVM),1) +MESA_CPPFLAGS += $(LLVM_CFLAGS) +ES1_CPPFLAGS += $(LLVM_CFLAGS) +ES2_CPPFLAGS += $(LLVM_CFLAGS) +endif + + include sources.mak # adjust object dirs diff --git a/src/mesa/SConscript b/src/mesa/SConscript index 79e9b4553b7..d31b957234b 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -80,6 +80,7 @@ if env['platform'] != 'winddk': 'main/pixelstore.c', 'main/points.c', 'main/polygon.c', + 'main/querymatrix.c', 'main/queryobj.c', 'main/rastpos.c', 'main/readpix.c', diff --git a/src/mesa/drivers/dri/common/dri_metaops.c b/src/mesa/drivers/dri/common/dri_metaops.c index 86e59a8e51c..a2f404b616f 100644 --- a/src/mesa/drivers/dri/common/dri_metaops.c +++ b/src/mesa/drivers/dri/common/dri_metaops.c @@ -29,6 +29,7 @@ #include "main/arbprogram.h" #include "main/arrayobj.h" #include "main/bufferobj.h" +#include "main/context.h" #include "main/enable.h" #include "main/matrix.h" #include "main/texstate.h" diff --git a/src/mesa/drivers/dri/i810/i810render.c b/src/mesa/drivers/dri/i810/i810render.c index b543d4f012c..205f0cebc1c 100644 --- a/src/mesa/drivers/dri/i810/i810render.c +++ b/src/mesa/drivers/dri/i810/i810render.c @@ -37,6 +37,8 @@ #include "main/imports.h" #include "main/mtypes.h" +#include "math/m_xform.h" + #include "tnl/t_context.h" #include "i810screen.h" diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile index 71ee753748c..65fd658c047 100644 --- a/src/mesa/drivers/dri/i915/Makefile +++ b/src/mesa/drivers/dri/i915/Makefile @@ -56,7 +56,7 @@ C_SOURCES = \ ASM_SOURCES = -DRIVER_DEFINES = -I../intel -I../intel/server -DI915 \ +DRIVER_DEFINES = -I../intel -DI915 \ $(shell pkg-config libdrm --atleast-version=2.3.1 \ && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP") diff --git a/src/mesa/drivers/dri/i915/intel_render.c b/src/mesa/drivers/dri/i915/intel_render.c index ec209391ab4..add0adacb56 100644 --- a/src/mesa/drivers/dri/i915/intel_render.c +++ b/src/mesa/drivers/dri/i915/intel_render.c @@ -37,6 +37,8 @@ #include "main/mtypes.h" #include "main/enums.h" +#include "math/m_xform.h" + #include "tnl/t_context.h" #include "tnl/t_vertex.h" #include "tnl/t_pipeline.h" diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 831981558d8..e381a5c714b 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -106,7 +106,7 @@ C_SOURCES = \ ASM_SOURCES = -DRIVER_DEFINES = -I../intel -I../intel/server +DRIVER_DEFINES = -I../intel INCLUDES += $(INTEL_CFLAGS) DRI_LIB_DEPS += $(INTEL_LIBS) diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c index a74bbc25643..d2ac1235e46 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_util.c +++ b/src/mesa/drivers/dri/i965/brw_clip_util.c @@ -192,11 +192,6 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, brw_clip_project_vertex(c, dest_ptr ); } - - - -#define MAX_MRF 16 - void brw_clip_emit_vue(struct brw_clip_compile *c, struct brw_indirect vert, GLboolean allocate, diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 6b20a2979f8..f7a68cead7c 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -604,6 +604,8 @@ #define BRW_ARF_NOTIFICATION_COUNT 0x90 #define BRW_ARF_IP 0xA0 +#define BRW_MRF_COMPR4 (1 << 7) + #define BRW_AMASK 0 #define BRW_IMASK 1 #define BRW_LMASK 2 diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 31ff86cf731..ffdddd0a388 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -984,5 +984,7 @@ void brw_set_src1( struct brw_instruction *insn, /* brw_optimize.c */ void brw_optimize(struct brw_compile *p); +void brw_remove_duplicate_mrf_moves(struct brw_compile *p); +void brw_remove_grf_to_mrf_moves(struct brw_compile *p); #endif diff --git a/src/mesa/drivers/dri/i965/brw_optimize.c b/src/mesa/drivers/dri/i965/brw_optimize.c index a364b158209..8aa6fb6cc6f 100644 --- a/src/mesa/drivers/dri/i965/brw_optimize.c +++ b/src/mesa/drivers/dri/i965/brw_optimize.c @@ -32,6 +32,594 @@ #include "brw_defines.h" #include "brw_eu.h" +static const struct { + char *name; + int nsrc; + int ndst; + GLboolean is_arith; +} inst_opcode[128] = { + [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1, .is_arith = 1 }, + [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1, .is_arith = 1 }, + [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1, .is_arith = 1 }, + [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1, .is_arith = 1 }, + [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1, .is_arith = 1 }, + [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1, .is_arith = 1 }, + [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1, .is_arith = 1 }, + [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 }, + + [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1, .is_arith = 1 }, + [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1, .is_arith = 1 }, + [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1, .is_arith = 1 }, + [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1, .is_arith = 1 }, + [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1, .is_arith = 1 }, + [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1, .is_arith = 1 }, + [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1, .is_arith = 1 }, + [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1, .is_arith = 1 }, + [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1, .is_arith = 1 }, + [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1, .is_arith = 1 }, + [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1, .is_arith = 1 }, + [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1, .is_arith = 1 }, + [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 }, +}; + +static INLINE +GLboolean brw_is_arithmetic_inst(const struct brw_instruction *inst) +{ + return inst_opcode[inst->header.opcode].is_arith; +} + +static const GLuint inst_stride[7] = { + [0] = 0, + [1] = 1, + [2] = 2, + [3] = 4, + [4] = 8, + [5] = 16, + [6] = 32 +}; + +static const GLuint inst_type_size[8] = { + [BRW_REGISTER_TYPE_UD] = 4, + [BRW_REGISTER_TYPE_D] = 4, + [BRW_REGISTER_TYPE_UW] = 2, + [BRW_REGISTER_TYPE_W] = 2, + [BRW_REGISTER_TYPE_UB] = 1, + [BRW_REGISTER_TYPE_B] = 1, + [BRW_REGISTER_TYPE_F] = 4 +}; + +static INLINE GLboolean +brw_is_grf_written(const struct brw_instruction *inst, + int reg_index, int size, + int gen) +{ + if (inst_opcode[inst->header.opcode].ndst == 0) + return GL_FALSE; + + if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT) + if (inst->bits1.ia1.dest_reg_file == BRW_GENERAL_REGISTER_FILE) + return GL_TRUE; + + if (inst->bits1.da1.dest_reg_file != BRW_GENERAL_REGISTER_FILE) + return GL_FALSE; + + const int reg_start = reg_index * REG_SIZE; + const int reg_end = reg_start + size; + + const int type_size = inst_type_size[inst->bits1.da1.dest_reg_type]; + const int write_start = inst->bits1.da1.dest_reg_nr*REG_SIZE + + inst->bits1.da1.dest_subreg_nr; + int length, write_end; + + /* SEND is specific */ + if (inst->header.opcode == BRW_OPCODE_SEND) { + if (gen >= 5) + length = inst->bits3.generic_gen5.response_length*REG_SIZE; + else + length = inst->bits3.generic.response_length*REG_SIZE; + } + else { + length = 1 << inst->header.execution_size; + length *= type_size; + length *= inst->bits1.da1.dest_horiz_stride; + } + + /* If the two intervals intersect, we overwrite the register */ + write_end = write_start + length; + const int left = MAX2(write_start, reg_start); + const int right = MIN2(write_end, reg_end); + + return left < right; +} + +/* Specific path for message register since we need to handle the compr4 case */ +static INLINE GLboolean +brw_is_mrf_written(const struct brw_instruction *inst, int reg_index, int size) +{ + if (inst_opcode[inst->header.opcode].ndst == 0) + return GL_FALSE; + + if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT) + if (inst->bits1.ia1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE) + return GL_TRUE; + + if (inst->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE) + return GL_FALSE; + + const int reg_start = reg_index * REG_SIZE; + const int reg_end = reg_start + size; + + const int mrf_index = inst->bits1.da1.dest_reg_nr & 0x0f; + const int is_compr4 = inst->bits1.da1.dest_reg_nr & BRW_MRF_COMPR4; + const int type_size = inst_type_size[inst->bits1.da1.dest_reg_type]; + + /* We use compr4 with a size != 16 elements. Strange, we conservatively + * consider that we are writing the register. + */ + if (is_compr4 && inst->header.execution_size != BRW_EXECUTE_16) + return GL_TRUE; + + GLboolean is_written = GL_FALSE; + + /* Here we write mrf_{i} and mrf_{i+4}. So we read two times 8 elements */ + if (is_compr4) { + const int length = 8 * type_size * inst->bits1.da1.dest_horiz_stride; + + /* First 8-way register */ + const int write_start0 = mrf_index*REG_SIZE + + inst->bits1.da1.dest_subreg_nr; + const int write_end0 = write_start0 + length; + + /* Second 8-way register */ + const int write_start1 = (mrf_index+4)*REG_SIZE + + inst->bits1.da1.dest_subreg_nr; + const int write_end1 = write_start1 + length; + + /* If the two intervals intersect, we overwrite the register */ + const int left0 = MAX2(write_start0, reg_start); + const int right0 = MIN2(write_end0, reg_end); + const int left1 = MAX2(write_start1, reg_start); + const int right1 = MIN2(write_end1, reg_end); + + is_written = left0 < right0 || left1 < right1; + } + else { + int length; + length = 1 << inst->header.execution_size; + length *= type_size; + length *= inst->bits1.da1.dest_horiz_stride; + + /* If the two intervals intersect, we write into the register */ + const int write_start = inst->bits1.da1.dest_reg_nr*REG_SIZE + + inst->bits1.da1.dest_subreg_nr; + const int write_end = write_start + length; + const int left = MAX2(write_start, reg_start); + const int right = MIN2(write_end, reg_end);; + + is_written = left < right; + } + + /* SEND may perform an implicit mov to a mrf register */ + if (is_written == GL_FALSE && + inst->header.opcode == BRW_OPCODE_SEND && + inst->bits1.da1.src0_reg_file != 0) { + + const int mrf_start = inst->header.destreg__conditionalmod; + const int write_start = mrf_start * REG_SIZE; + const int write_end = write_start + REG_SIZE; + const int left = MAX2(write_start, reg_start); + const int right = MIN2(write_end, reg_end);; + is_written = left < right; + } + + return is_written; +} + +static INLINE GLboolean +brw_is_mrf_read(const struct brw_instruction *inst, + int reg_index, int size, int gen) +{ + if (inst->header.opcode != BRW_OPCODE_SEND) + return GL_FALSE; + if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT) + return GL_TRUE; + + const int reg_start = reg_index*REG_SIZE; + const int reg_end = reg_start + size; + + int length, read_start, read_end; + if (gen >= 5) + length = inst->bits3.generic_gen5.msg_length*REG_SIZE; + else + length = inst->bits3.generic.msg_length*REG_SIZE; + + /* Look if SEND uses an implicit mov. In that case, we read one less register + * (but we write it) + */ + if (inst->bits1.da1.src0_reg_file != 0) + read_start = inst->header.destreg__conditionalmod; + else { + length--; + read_start = inst->header.destreg__conditionalmod + 1; + } + read_start *= REG_SIZE; + read_end = read_start + length; + + const int left = MAX2(read_start, reg_start); + const int right = MIN2(read_end, reg_end); + + return left < right; +} + +static INLINE GLboolean +brw_is_grf_read(const struct brw_instruction *inst, int reg_index, int size) +{ + int i, j; + if (inst_opcode[inst->header.opcode].nsrc == 0) + return GL_FALSE; + + /* Look at first source. We must take into account register regions to + * monitor carefully the read. Note that we are a bit too conservative here + * since we do not take into account the fact that some complete registers + * may be skipped + */ + if (inst_opcode[inst->header.opcode].nsrc >= 1) { + + if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT) + if (inst->bits1.ia1.src0_reg_file == BRW_GENERAL_REGISTER_FILE) + return GL_TRUE; + if (inst->bits1.da1.src0_reg_file != BRW_GENERAL_REGISTER_FILE) + return GL_FALSE; + + const int reg_start = reg_index*REG_SIZE; + const int reg_end = reg_start + size; + + /* See if at least one of this element intersects the interval */ + const int type_size = inst_type_size[inst->bits1.da1.src0_reg_type]; + const int elem_num = 1 << inst->header.execution_size; + const int width = 1 << inst->bits2.da1.src0_width; + const int row_num = elem_num >> inst->bits2.da1.src0_width; + const int hs = type_size*inst_stride[inst->bits2.da1.src0_horiz_stride]; + const int vs = type_size*inst_stride[inst->bits2.da1.src0_vert_stride]; + int row_start = inst->bits2.da1.src0_reg_nr*REG_SIZE + + inst->bits2.da1.src0_subreg_nr; + for (j = 0; j < row_num; ++j) { + int write_start = row_start; + for (i = 0; i < width; ++i) { + const int write_end = write_start + type_size; + const int left = write_start > reg_start ? write_start : reg_start; + const int right = write_end < reg_end ? write_end : reg_end; + if (left < right) + return GL_TRUE; + write_start += hs; + } + row_start += vs; + } + } + + /* Second src register */ + if (inst_opcode[inst->header.opcode].nsrc >= 2) { + + if (inst->bits3.da1.src1_address_mode != BRW_ADDRESS_DIRECT) + if (inst->bits1.ia1.src1_reg_file == BRW_GENERAL_REGISTER_FILE) + return GL_TRUE; + if (inst->bits1.da1.src1_reg_file != BRW_GENERAL_REGISTER_FILE) + return GL_FALSE; + + const int reg_start = reg_index*REG_SIZE; + const int reg_end = reg_start + size; + + /* See if at least one of this element intersects the interval */ + const int type_size = inst_type_size[inst->bits1.da1.src1_reg_type]; + const int elem_num = 1 << inst->header.execution_size; + const int width = 1 << inst->bits3.da1.src1_width; + const int row_num = elem_num >> inst->bits3.da1.src1_width; + const int hs = type_size*inst_stride[inst->bits3.da1.src1_horiz_stride]; + const int vs = type_size*inst_stride[inst->bits3.da1.src1_vert_stride]; + int row_start = inst->bits3.da1.src1_reg_nr*REG_SIZE + + inst->bits3.da1.src1_subreg_nr; + for (j = 0; j < row_num; ++j) { + int write_start = row_start; + for (i = 0; i < width; ++i) { + const int write_end = write_start + type_size; + const int left = write_start > reg_start ? write_start : reg_start; + const int right = write_end < reg_end ? write_end : reg_end; + if (left < right) + return GL_TRUE; + write_start += hs; + } + row_start += vs; + } + } + + return GL_FALSE; +} + +static INLINE GLboolean +brw_is_control_done(const struct brw_instruction *mov) { + return + mov->header.dependency_control != 0 || + mov->header.thread_control != 0 || + mov->header.mask_control != 0 || + mov->header.saturate != 0 || + mov->header.debug_control != 0; +} + +static INLINE GLboolean +brw_is_predicated(const struct brw_instruction *mov) { + return mov->header.predicate_control != 0; +} + +static INLINE GLboolean +brw_is_grf_to_mrf_mov(const struct brw_instruction *mov, + int *mrf_index, + int *grf_index, + GLboolean *is_compr4) +{ + if (brw_is_predicated(mov) || + brw_is_control_done(mov) || + mov->header.debug_control != 0) + return GL_FALSE; + + if (mov->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT || + mov->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE || + mov->bits1.da1.dest_reg_type != BRW_REGISTER_TYPE_F || + mov->bits1.da1.dest_horiz_stride != BRW_HORIZONTAL_STRIDE_1 || + mov->bits1.da1.dest_subreg_nr != 0) + return GL_FALSE; + + if (mov->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT || + mov->bits1.da1.src0_reg_file != BRW_GENERAL_REGISTER_FILE || + mov->bits1.da1.src0_reg_type != BRW_REGISTER_TYPE_F || + mov->bits2.da1.src0_width != BRW_WIDTH_8 || + mov->bits2.da1.src0_horiz_stride != BRW_HORIZONTAL_STRIDE_1 || + mov->bits2.da1.src0_vert_stride != BRW_VERTICAL_STRIDE_8 || + mov->bits2.da1.src0_subreg_nr != 0 || + mov->bits2.da1.src0_abs != 0 || + mov->bits2.da1.src0_negate != 0) + return GL_FALSE; + + *grf_index = mov->bits2.da1.src0_reg_nr; + *mrf_index = mov->bits1.da1.dest_reg_nr & 0x0f; + *is_compr4 = (mov->bits1.da1.dest_reg_nr & BRW_MRF_COMPR4) != 0; + return GL_TRUE; +} + +static INLINE GLboolean +brw_is_grf_straight_write(const struct brw_instruction *inst, int grf_index) +{ + /* remark: no problem to predicate a SEL instruction */ + if ((!brw_is_predicated(inst) || inst->header.opcode == BRW_OPCODE_SEL) && + brw_is_control_done(inst) == GL_FALSE && + inst->header.execution_size == 4 && + inst->header.access_mode == BRW_ALIGN_1 && + inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT && + inst->bits1.da1.dest_reg_file == BRW_GENERAL_REGISTER_FILE && + inst->bits1.da1.dest_reg_type == BRW_REGISTER_TYPE_F && + inst->bits1.da1.dest_horiz_stride == BRW_HORIZONTAL_STRIDE_1 && + inst->bits1.da1.dest_reg_nr == grf_index && + inst->bits1.da1.dest_subreg_nr == 0 && + brw_is_arithmetic_inst(inst)) + return GL_TRUE; + + return GL_FALSE; +} + +static INLINE GLboolean +brw_inst_are_equal(const struct brw_instruction *src0, + const struct brw_instruction *src1) +{ + const GLuint *field0 = (GLuint *) src0; + const GLuint *field1 = (GLuint *) src1; + return field0[0] == field1[0] && + field0[1] == field1[1] && + field0[2] == field1[2] && + field0[3] == field1[3]; +} + +static INLINE void +brw_inst_copy(struct brw_instruction *dst, + const struct brw_instruction *src) +{ + GLuint *field_dst = (GLuint *) dst; + const GLuint *field_src = (GLuint *) src; + field_dst[0] = field_src[0]; + field_dst[1] = field_src[1]; + field_dst[2] = field_src[2]; + field_dst[3] = field_src[3]; +} + +static void brw_remove_inst(struct brw_compile *p, const GLboolean *removeInst) +{ + int i, nr_insn = 0, to = 0, from = 0; + + for (from = 0; from < p->nr_insn; ++from) { + if (removeInst[from]) + continue; + if(to != from) + brw_inst_copy(p->store + to, p->store + from); + to++; + } + + for (i = 0; i < p->nr_insn; ++i) + if (removeInst[i] == GL_FALSE) + nr_insn++; + p->nr_insn = nr_insn; +} + +/* The gen code emitter generates a lot of duplications in the + * grf-to-mrf moves, for example when texture sampling with the same + * coordinates from multiple textures.. Here, we monitor same mov + * grf-to-mrf instrutions and remove repeated ones where the operands + * and dst ahven't changed in between. + */ +void brw_remove_duplicate_mrf_moves(struct brw_compile *p) +{ + const int gen = p->brw->intel.gen; + int i, j; + + GLboolean *removeInst = calloc(sizeof(GLboolean), p->nr_insn); + for (i = 0; i < p->nr_insn; i++) { + if (removeInst[i]) + continue; + + const struct brw_instruction *mov = p->store + i; + int mrf_index, grf_index; + GLboolean is_compr4; + + /* Only consider _straight_ grf-to-mrf moves */ + if (!brw_is_grf_to_mrf_mov(mov, &mrf_index, &grf_index, &is_compr4)) + continue; + + const int mrf_index0 = mrf_index; + const int mrf_index1 = is_compr4 ? mrf_index0+4 : mrf_index0+1; + const int simd16_size = 2 * REG_SIZE; + + for (j = i + 1; j < p->nr_insn; j++) { + const struct brw_instruction *inst = p->store + j; + + if (brw_inst_are_equal(mov, inst)) { + removeInst[j] = GL_TRUE; + continue; + } + + if (brw_is_grf_written(inst, grf_index, simd16_size, gen) || + brw_is_mrf_written(inst, mrf_index0, REG_SIZE) || + brw_is_mrf_written(inst, mrf_index1, REG_SIZE)) + break; + } + } + + brw_remove_inst(p, removeInst); + free(removeInst); +} + +/* Replace moves to MRFs where the value moved is the result of a + * normal arithmetic operation with computation right into the MRF. + */ +void brw_remove_grf_to_mrf_moves(struct brw_compile *p) +{ + int i, j, prev; + struct brw_context *brw = p->brw; + const int gen = brw->intel.gen; + const int simd16_size = 2*REG_SIZE; + + GLboolean *removeInst = calloc(sizeof(GLboolean), p->nr_insn); + assert(removeInst); + + for (i = 0; i < p->nr_insn; i++) { + if (removeInst[i]) + continue; + + struct brw_instruction *grf_inst = NULL; + const struct brw_instruction *mov = p->store + i; + int mrf_index, grf_index; + GLboolean is_compr4; + + /* Only consider _straight_ grf-to-mrf moves */ + if (!brw_is_grf_to_mrf_mov(mov, &mrf_index, &grf_index, &is_compr4)) + continue; + + /* Using comp4 enables a stride of 4 for this instruction */ + const int mrf_index0 = mrf_index; + const int mrf_index1 = is_compr4 ? mrf_index+4 : mrf_index+1; + + /* Look where the register has been set */ + prev = i; + GLboolean potential_remove = GL_FALSE; + while (prev--) { + + /* If _one_ instruction writes the grf, we try to remove the mov */ + struct brw_instruction *inst = p->store + prev; + if (brw_is_grf_straight_write(inst, grf_index)) { + potential_remove = GL_TRUE; + grf_inst = inst; + break; + } + + } + + if (potential_remove == GL_FALSE) + continue; + removeInst[i] = GL_TRUE; + + /* Monitor first the section of code between the grf computation and the + * mov. Here we cannot read or write both mrf and grf register + */ + for (j = prev + 1; j < i; ++j) { + struct brw_instruction *inst = p->store + j; + if (removeInst[j]) + continue; + if (brw_is_grf_written(inst, grf_index, simd16_size, gen) || + brw_is_grf_read(inst, grf_index, simd16_size) || + brw_is_mrf_written(inst, mrf_index0, REG_SIZE) || + brw_is_mrf_written(inst, mrf_index1, REG_SIZE) || + brw_is_mrf_read(inst, mrf_index0, REG_SIZE, gen) || + brw_is_mrf_read(inst, mrf_index1, REG_SIZE, gen)) { + removeInst[i] = GL_FALSE; + break; + } + } + + /* After the mov, we can read or write the mrf. If the grf is overwritten, + * we are done + */ + for (j = i + 1; j < p->nr_insn; ++j) { + struct brw_instruction *inst = p->store + j; + if (removeInst[j]) + continue; + + if (brw_is_grf_read(inst, grf_index, simd16_size)) { + removeInst[i] = GL_FALSE; + break; + } + + if (brw_is_grf_straight_write(inst, grf_index)) + break; + } + + /* Note that with the top down traversal, we can safely pacth the mov + * instruction + */ + if (removeInst[i]) { + grf_inst->bits1.da1.dest_reg_file = mov->bits1.da1.dest_reg_file; + grf_inst->bits1.da1.dest_reg_nr = mov->bits1.da1.dest_reg_nr; + } + } + + brw_remove_inst(p, removeInst); + free(removeInst); +} + static GLboolean is_single_channel_dp4(struct brw_instruction *insn) { diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 40eece276b7..af08446f2d8 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -46,68 +46,68 @@ brw_add_validated_bo(struct brw_context *brw, drm_intel_bo *bo) } }; -const struct brw_tracked_state brw_blend_constant_color; -const struct brw_tracked_state brw_cc_unit; -const struct brw_tracked_state brw_check_fallback; -const struct brw_tracked_state brw_clip_prog; -const struct brw_tracked_state brw_clip_unit; -const struct brw_tracked_state brw_vs_constants; -const struct brw_tracked_state brw_wm_constants; -const struct brw_tracked_state brw_constant_buffer; -const struct brw_tracked_state brw_curbe_offsets; -const struct brw_tracked_state brw_invarient_state; -const struct brw_tracked_state brw_gs_prog; -const struct brw_tracked_state brw_gs_unit; -const struct brw_tracked_state brw_line_stipple; -const struct brw_tracked_state brw_aa_line_parameters; -const struct brw_tracked_state brw_pipelined_state_pointers; -const struct brw_tracked_state brw_binding_table_pointers; -const struct brw_tracked_state brw_depthbuffer; -const struct brw_tracked_state brw_polygon_stipple_offset; -const struct brw_tracked_state brw_polygon_stipple; -const struct brw_tracked_state brw_program_parameters; -const struct brw_tracked_state brw_recalculate_urb_fence; -const struct brw_tracked_state brw_sf_prog; -const struct brw_tracked_state brw_sf_unit; -const struct brw_tracked_state brw_sf_vp; -const struct brw_tracked_state brw_state_base_address; -const struct brw_tracked_state brw_urb_fence; -const struct brw_tracked_state brw_vertex_state; -const struct brw_tracked_state brw_vs_surfaces; -const struct brw_tracked_state brw_vs_prog; -const struct brw_tracked_state brw_vs_unit; -const struct brw_tracked_state brw_wm_input_sizes; -const struct brw_tracked_state brw_wm_prog; -const struct brw_tracked_state brw_wm_samplers; -const struct brw_tracked_state brw_wm_constant_surface; -const struct brw_tracked_state brw_wm_surfaces; -const struct brw_tracked_state brw_wm_binding_table; -const struct brw_tracked_state brw_wm_unit; - -const struct brw_tracked_state brw_psp_urb_cbs; - -const struct brw_tracked_state brw_pipe_control; - -const struct brw_tracked_state brw_drawing_rect; -const struct brw_tracked_state brw_indices; -const struct brw_tracked_state brw_vertices; -const struct brw_tracked_state brw_index_buffer; -const struct brw_tracked_state gen6_binding_table_pointers; -const struct brw_tracked_state gen6_blend_state; -const struct brw_tracked_state gen6_cc_state_pointers; -const struct brw_tracked_state gen6_clip_state; -const struct brw_tracked_state gen6_clip_vp; -const struct brw_tracked_state gen6_color_calc_state; -const struct brw_tracked_state gen6_depth_stencil_state; -const struct brw_tracked_state gen6_gs_state; -const struct brw_tracked_state gen6_sampler_state; -const struct brw_tracked_state gen6_scissor_state; -const struct brw_tracked_state gen6_sf_state; -const struct brw_tracked_state gen6_sf_vp; -const struct brw_tracked_state gen6_urb; -const struct brw_tracked_state gen6_viewport_state; -const struct brw_tracked_state gen6_vs_state; -const struct brw_tracked_state gen6_wm_state; +extern const struct brw_tracked_state brw_blend_constant_color; +extern const struct brw_tracked_state brw_cc_unit; +extern const struct brw_tracked_state brw_check_fallback; +extern const struct brw_tracked_state brw_clip_prog; +extern const struct brw_tracked_state brw_clip_unit; +extern const struct brw_tracked_state brw_vs_constants; +extern const struct brw_tracked_state brw_wm_constants; +extern const struct brw_tracked_state brw_constant_buffer; +extern const struct brw_tracked_state brw_curbe_offsets; +extern const struct brw_tracked_state brw_invarient_state; +extern const struct brw_tracked_state brw_gs_prog; +extern const struct brw_tracked_state brw_gs_unit; +extern const struct brw_tracked_state brw_line_stipple; +extern const struct brw_tracked_state brw_aa_line_parameters; +extern const struct brw_tracked_state brw_pipelined_state_pointers; +extern const struct brw_tracked_state brw_binding_table_pointers; +extern const struct brw_tracked_state brw_depthbuffer; +extern const struct brw_tracked_state brw_polygon_stipple_offset; +extern const struct brw_tracked_state brw_polygon_stipple; +extern const struct brw_tracked_state brw_program_parameters; +extern const struct brw_tracked_state brw_recalculate_urb_fence; +extern const struct brw_tracked_state brw_sf_prog; +extern const struct brw_tracked_state brw_sf_unit; +extern const struct brw_tracked_state brw_sf_vp; +extern const struct brw_tracked_state brw_state_base_address; +extern const struct brw_tracked_state brw_urb_fence; +extern const struct brw_tracked_state brw_vertex_state; +extern const struct brw_tracked_state brw_vs_surfaces; +extern const struct brw_tracked_state brw_vs_prog; +extern const struct brw_tracked_state brw_vs_unit; +extern const struct brw_tracked_state brw_wm_input_sizes; +extern const struct brw_tracked_state brw_wm_prog; +extern const struct brw_tracked_state brw_wm_samplers; +extern const struct brw_tracked_state brw_wm_constant_surface; +extern const struct brw_tracked_state brw_wm_surfaces; +extern const struct brw_tracked_state brw_wm_binding_table; +extern const struct brw_tracked_state brw_wm_unit; + +extern const struct brw_tracked_state brw_psp_urb_cbs; + +extern const struct brw_tracked_state brw_pipe_control; + +extern const struct brw_tracked_state brw_drawing_rect; +extern const struct brw_tracked_state brw_indices; +extern const struct brw_tracked_state brw_vertices; +extern const struct brw_tracked_state brw_index_buffer; +extern const struct brw_tracked_state gen6_binding_table_pointers; +extern const struct brw_tracked_state gen6_blend_state; +extern const struct brw_tracked_state gen6_cc_state_pointers; +extern const struct brw_tracked_state gen6_clip_state; +extern const struct brw_tracked_state gen6_clip_vp; +extern const struct brw_tracked_state gen6_color_calc_state; +extern const struct brw_tracked_state gen6_depth_stencil_state; +extern const struct brw_tracked_state gen6_gs_state; +extern const struct brw_tracked_state gen6_sampler_state; +extern const struct brw_tracked_state gen6_scissor_state; +extern const struct brw_tracked_state gen6_sf_state; +extern const struct brw_tracked_state gen6_sf_vp; +extern const struct brw_tracked_state gen6_urb; +extern const struct brw_tracked_state gen6_viewport_state; +extern const struct brw_tracked_state gen6_vs_state; +extern const struct brw_tracked_state gen6_wm_state; /*********************************************************************** * brw_state.c diff --git a/src/mesa/drivers/dri/i965/brw_util.c b/src/mesa/drivers/dri/i965/brw_util.c index 1db2a210d45..e878da3850d 100644 --- a/src/mesa/drivers/dri/i965/brw_util.c +++ b/src/mesa/drivers/dri/i965/brw_util.c @@ -30,6 +30,8 @@ */ +#include <assert.h> + #include "main/mtypes.h" #include "program/prog_parameter.h" #include "brw_util.h" diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index a1bee2e44ab..b6b558e9a69 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -44,6 +44,7 @@ static GLboolean brw_vs_arg_can_be_immediate(enum prog_opcode opcode, int arg) { int opcode_array[] = { + [OPCODE_MOV] = 1, [OPCODE_ADD] = 2, [OPCODE_CMP] = 3, [OPCODE_DP3] = 2, diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 323cfac8fa7..d9fa2e63354 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -1283,7 +1283,7 @@ void emit_fb_write(struct brw_wm_compile *c, * + 1 for the second half we get destination + 4. */ brw_MOV(p, - brw_message_reg(nr + channel + (1 << 7)), + brw_message_reg(nr + channel + BRW_MRF_COMPR4), arg0[channel]); } else { /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ @@ -1712,12 +1712,20 @@ void brw_wm_emit( struct brw_wm_compile *c ) inst->dst[i]->spill_slot); } + /* Only properly tested on ILK */ + if (p->brw->intel.gen == 5) { + brw_remove_duplicate_mrf_moves(p); + if (c->dispatch_width == 16) + brw_remove_grf_to_mrf_moves(p); + } + if (INTEL_DEBUG & DEBUG_WM) { int i; - printf("wm-native:\n"); - for (i = 0; i < p->nr_insn; i++) + printf("wm-native:\n"); + for (i = 0; i < p->nr_insn; i++) brw_disasm(stderr, &p->store[i], p->brw->intel.gen); printf("\n"); } } + diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 5f2035d79c9..e19f44035fd 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -29,6 +29,7 @@ #include "main/glheader.h" #include "main/context.h" #include "main/extensions.h" +#include "main/fbobject.h" #include "main/framebuffer.h" #include "main/imports.h" #include "main/points.h" @@ -39,8 +40,6 @@ #include "drivers/common/driverfuncs.h" #include "drivers/common/meta.h" -#include "i830_dri.h" - #include "intel_chipset.h" #include "intel_buffers.h" #include "intel_tex.h" @@ -420,7 +419,7 @@ intel_prepare_render(struct intel_context *intel) __DRIdrawable *drawable; drawable = driContext->driDrawablePriv; - if (drawable->dri2.stamp != driContext->dri2.draw_stamp) { + if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) { if (drawable->lastStamp != drawable->dri2.stamp) intel_update_renderbuffers(driContext, drawable); intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer); @@ -428,7 +427,7 @@ intel_prepare_render(struct intel_context *intel) } drawable = driContext->driReadablePriv; - if (drawable->dri2.stamp != driContext->dri2.read_stamp) { + if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) { if (drawable->lastStamp != drawable->dri2.stamp) intel_update_renderbuffers(driContext, drawable); driContext->dri2.read_stamp = drawable->dri2.stamp; @@ -613,6 +612,7 @@ intelInitContext(struct intel_context *intel, __DRIscreen *sPriv = driContextPriv->driScreenPriv; struct intel_screen *intelScreen = sPriv->private; int bo_reuse_mode; + __GLcontextModes visual; /* we can't do anything without a connection to the device */ if (intelScreen->bufmgr == NULL) @@ -624,6 +624,11 @@ intelInitContext(struct intel_context *intel, functions->Viewport = intel_viewport; } + if (mesaVis == NULL) { + memset(&visual, 0, sizeof visual); + mesaVis = &visual; + } + if (!_mesa_initialize_context_for_api(&intel->ctx, api, mesaVis, shareCtx, functions, (void *) intel)) { printf("%s: failed to init mesa context\n", __FUNCTION__); @@ -890,14 +895,21 @@ intelMakeCurrent(__DRIcontext * driContextPriv, } if (driContextPriv) { - struct gl_framebuffer *fb = driDrawPriv->driverPrivate; - struct gl_framebuffer *readFb = driReadPriv->driverPrivate; + struct gl_framebuffer *fb, *readFb; + + if (driDrawPriv == NULL && driReadPriv == NULL) { + fb = _mesa_get_incomplete_framebuffer(); + readFb = _mesa_get_incomplete_framebuffer(); + } else { + fb = driDrawPriv->driverPrivate; + readFb = driReadPriv->driverPrivate; + driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1; + driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1; + } - driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1; - driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1; intel_prepare_render(intel); _mesa_make_current(&intel->ctx, fb, readFb); - + /* We do this in intel_prepare_render() too, but intel->ctx.DrawBuffer * is NULL at that point. We can't call _mesa_makecurrent() * first, since we need the buffer size for the initial diff --git a/src/mesa/drivers/dri/intel/intel_extensions_es2.c b/src/mesa/drivers/dri/intel/intel_extensions_es2.c index baf8e130010..de34bbb2aec 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions_es2.c +++ b/src/mesa/drivers/dri/intel/intel_extensions_es2.c @@ -28,7 +28,6 @@ #include "main/extensions.h" #include "intel_extensions.h" -#include "utils.h" static const char *es2_extensions[] = { /* Used by mesa internally (cf all_mesa_extensions in ../common/utils.c) */ diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c index 0e2fe893fed..02c0ffce31d 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c @@ -45,6 +45,7 @@ #include "main/attrib.h" #include "main/enable.h" #include "main/viewport.h" +#include "main/context.h" #include "swrast/swrast.h" #include "intel_screen.h" diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index fe4de189600..680d18ba299 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -155,6 +155,9 @@ intel_region_alloc_internal(struct intel_context *intel, } region = calloc(sizeof(*region), 1); + if (region == NULL) + return region; + region->cpp = cpp; region->width = width; region->height = height; @@ -189,6 +192,9 @@ intel_region_alloc(struct intel_context *intel, region = intel_region_alloc_internal(intel, cpp, width, height, aligned_pitch / cpp, buffer); + if (region == NULL) + return region; + region->tiling = tiling; return region; diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 224b506c05b..6efb2ddc553 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -102,7 +102,7 @@ do_copy_texsubimage(struct intel_context *intel, GLcontext *ctx = &intel->ctx; const struct intel_region *src = get_teximage_source(intel, internalFormat); - if (!intelImage->mt || !src) { + if (!intelImage->mt || !src || !src->buffer) { if (INTEL_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "%s fail %p %p (0x%08x)\n", __FUNCTION__, intelImage->mt, src, internalFormat); diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c b/src/mesa/drivers/dri/intel/intel_tex_format.c index 5f813c0efa2..e03b203fb40 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_format.c +++ b/src/mesa/drivers/dri/intel/intel_tex_format.c @@ -19,7 +19,6 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, GLenum format, GLenum type) { struct intel_context *intel = intel_context(ctx); - const GLboolean do32bpt = (intel->ctx.Visual.rgbBits >= 24); #if 0 printf("%s intFmt=0x%x format=0x%x type=0x%x\n", @@ -30,39 +29,28 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, case 4: case GL_RGBA: case GL_COMPRESSED_RGBA: - if (format == GL_BGRA) { - if (type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) { - return MESA_FORMAT_ARGB8888; - } - else if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV) { - return MESA_FORMAT_ARGB4444; - } - else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV) { - return MESA_FORMAT_ARGB1555; - } - } - return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444; + if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV) + return MESA_FORMAT_ARGB4444; + else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV) + return MESA_FORMAT_ARGB1555; + else + return MESA_FORMAT_ARGB8888; case 3: case GL_RGB: case GL_COMPRESSED_RGB: - if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) { - return MESA_FORMAT_RGB565; - } - if (do32bpt) { - if (intel->has_xrgb_textures) - return MESA_FORMAT_XRGB8888; - else - return MESA_FORMAT_ARGB8888; - } else { + if (type == GL_UNSIGNED_SHORT_5_6_5) return MESA_FORMAT_RGB565; - } + else if (intel->has_xrgb_textures) + return MESA_FORMAT_XRGB8888; + else + return MESA_FORMAT_ARGB8888; case GL_RGBA8: case GL_RGB10_A2: case GL_RGBA12: case GL_RGBA16: - return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444; + return MESA_FORMAT_ARGB8888; case GL_RGBA4: case GL_RGBA2: diff --git a/src/mesa/drivers/dri/intel/server/i830_dri.h b/src/mesa/drivers/dri/intel/server/i830_dri.h deleted file mode 100644 index def049e7a6b..00000000000 --- a/src/mesa/drivers/dri/intel/server/i830_dri.h +++ /dev/null @@ -1,62 +0,0 @@ -/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/i810/i830_dri.h,v 1.6 2003/09/28 20:15:59 alanh Exp $ */ - -#ifndef _I830_DRI_H -#define _I830_DRI_H - -#include "xf86drm.h" - -#define I830_MAX_DRAWABLES 256 - -#define I830_MAJOR_VERSION 1 -#define I830_MINOR_VERSION 9 -#define I830_PATCHLEVEL 0 - -#define I830_REG_SIZE 0x80000 - -typedef struct _I830DRIRec { - drm_handle_t regs; - drmSize regsSize; - - drmSize unused1; /* backbufferSize */ - drm_handle_t unused2; /* backbuffer */ - - drmSize unused3; /* depthbufferSize */ - drm_handle_t unused4; /* depthbuffer */ - - drmSize unused5; /* rotatedSize */ - drm_handle_t unused6; /* rotatedbuffer */ - - drm_handle_t unused7; /* textures */ - int unused8; /* textureSize */ - - drm_handle_t unused9; /* agp_buffers */ - drmSize unused10; /* agp_buf_size */ - - int deviceID; - int width; - int height; - int mem; - int cpp; - int bitsPerPixel; - - int unused11[8]; /* was front/back/depth/rotated offset/pitch */ - - int unused12; /* logTextureGranularity */ - int unused13; /* textureOffset */ - - int irq; - int sarea_priv_offset; -} I830DRIRec, *I830DRIPtr; - -typedef struct { - /* Nothing here yet */ - int dummy; -} I830ConfigPrivRec, *I830ConfigPrivPtr; - -typedef struct { - /* Nothing here yet */ - int dummy; -} I830DRIContextRec, *I830DRIContextPtr; - - -#endif diff --git a/src/mesa/drivers/dri/intel/server/intel.h b/src/mesa/drivers/dri/intel/server/intel.h deleted file mode 100644 index 6ea72499c1c..00000000000 --- a/src/mesa/drivers/dri/intel/server/intel.h +++ /dev/null @@ -1,331 +0,0 @@ -#ifndef _INTEL_H_ -#define _INTEL_H_ - -#include "xf86drm.h" /* drm_handle_t, etc */ - -/* Intel */ -#ifndef PCI_CHIP_I810 -#define PCI_CHIP_I810 0x7121 -#define PCI_CHIP_I810_DC100 0x7123 -#define PCI_CHIP_I810_E 0x7125 -#define PCI_CHIP_I815 0x1132 -#define PCI_CHIP_I810_BRIDGE 0x7120 -#define PCI_CHIP_I810_DC100_BRIDGE 0x7122 -#define PCI_CHIP_I810_E_BRIDGE 0x7124 -#define PCI_CHIP_I815_BRIDGE 0x1130 -#endif - -#define PCI_CHIP_845_G 0x2562 -#define PCI_CHIP_I830_M 0x3577 - -#ifndef PCI_CHIP_I855_GM -#define PCI_CHIP_I855_GM 0x3582 -#define PCI_CHIP_I855_GM_BRIDGE 0x3580 -#endif - -#ifndef PCI_CHIP_I865_G -#define PCI_CHIP_I865_G 0x2572 -#define PCI_CHIP_I865_G_BRIDGE 0x2570 -#endif - -#ifndef PCI_CHIP_I915_G -#define PCI_CHIP_I915_G 0x2582 -#define PCI_CHIP_I915_G_BRIDGE 0x2580 -#endif - -#ifndef PCI_CHIP_I915_GM -#define PCI_CHIP_I915_GM 0x2592 -#define PCI_CHIP_I915_GM_BRIDGE 0x2590 -#endif - -#ifndef PCI_CHIP_E7221_G -#define PCI_CHIP_E7221_G 0x258A -/* Same as I915_G_BRIDGE */ -#define PCI_CHIP_E7221_G_BRIDGE 0x2580 -#endif - -#ifndef PCI_CHIP_I945_G -#define PCI_CHIP_I945_G 0x2772 -#define PCI_CHIP_I945_G_BRIDGE 0x2770 -#endif - -#ifndef PCI_CHIP_I945_GM -#define PCI_CHIP_I945_GM 0x27A2 -#define PCI_CHIP_I945_GM_BRIDGE 0x27A0 -#endif - -#define IS_I810(pI810) (pI810->Chipset == PCI_CHIP_I810 || \ - pI810->Chipset == PCI_CHIP_I810_DC100 || \ - pI810->Chipset == PCI_CHIP_I810_E) -#define IS_I815(pI810) (pI810->Chipset == PCI_CHIP_I815) -#define IS_I830(pI810) (pI810->Chipset == PCI_CHIP_I830_M) -#define IS_845G(pI810) (pI810->Chipset == PCI_CHIP_845_G) -#define IS_I85X(pI810) (pI810->Chipset == PCI_CHIP_I855_GM) -#define IS_I852(pI810) (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I852_GM || pI810->variant == I852_GME)) -#define IS_I855(pI810) (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I855_GM || pI810->variant == I855_GME)) -#define IS_I865G(pI810) (pI810->Chipset == PCI_CHIP_I865_G) - -#define IS_I915G(pI810) (pI810->Chipset == PCI_CHIP_I915_G || pI810->Chipset == PCI_CHIP_E7221_G) -#define IS_I915GM(pI810) (pI810->Chipset == PCI_CHIP_I915_GM) -#define IS_I945G(pI810) (pI810->Chipset == PCI_CHIP_I945_G) -#define IS_I945GM(pI810) (pI810->Chipset == PCI_CHIP_I945_GM) -#define IS_I9XX(pI810) (IS_I915G(pI810) || IS_I915GM(pI810) || IS_I945G(pI810) || IS_I945GM(pI810)) - -#define IS_MOBILE(pI810) (IS_I830(pI810) || IS_I85X(pI810) || IS_I915GM(pI810) || IS_I945GM(pI810)) - -#define I830_GMCH_CTRL 0x52 - -#define I830_GMCH_MEM_MASK 0x1 -#define I830_GMCH_MEM_64M 0x1 -#define I830_GMCH_MEM_128M 0 - -#define I830_GMCH_GMS_MASK 0x70 -#define I830_GMCH_GMS_DISABLED 0x00 -#define I830_GMCH_GMS_LOCAL 0x10 -#define I830_GMCH_GMS_STOLEN_512 0x20 -#define I830_GMCH_GMS_STOLEN_1024 0x30 -#define I830_GMCH_GMS_STOLEN_8192 0x40 - -#define I855_GMCH_GMS_MASK (0x7 << 4) -#define I855_GMCH_GMS_DISABLED 0x00 -#define I855_GMCH_GMS_STOLEN_1M (0x1 << 4) -#define I855_GMCH_GMS_STOLEN_4M (0x2 << 4) -#define I855_GMCH_GMS_STOLEN_8M (0x3 << 4) -#define I855_GMCH_GMS_STOLEN_16M (0x4 << 4) -#define I855_GMCH_GMS_STOLEN_32M (0x5 << 4) -#define I915G_GMCH_GMS_STOLEN_48M (0x6 << 4) -#define I915G_GMCH_GMS_STOLEN_64M (0x7 << 4) - -typedef unsigned char Bool; -#define TRUE 1 -#define FALSE 0 - -#define PIPE_NONE 0<<0 -#define PIPE_CRT 1<<0 -#define PIPE_TV 1<<1 -#define PIPE_DFP 1<<2 -#define PIPE_LFP 1<<3 -#define PIPE_CRT2 1<<4 -#define PIPE_TV2 1<<5 -#define PIPE_DFP2 1<<6 -#define PIPE_LFP2 1<<7 - -typedef struct _I830MemPool *I830MemPoolPtr; -typedef struct _I830MemRange *I830MemRangePtr; -typedef struct _I830MemRange { - long Start; - long End; - long Size; - unsigned long Physical; - unsigned long Offset; /* Offset of AGP-allocated portion */ - unsigned long Alignment; - drm_handle_t Key; - unsigned long Pitch; // add pitch - I830MemPoolPtr Pool; -} I830MemRange; - -typedef struct _I830MemPool { - I830MemRange Total; - I830MemRange Free; - I830MemRange Fixed; - I830MemRange Allocated; -} I830MemPool; - -typedef struct { - int tail_mask; - I830MemRange mem; - unsigned char *virtual_start; - int head; - int tail; - int space; -} I830RingBuffer; - -typedef struct _I830Rec { - unsigned char *MMIOBase; - unsigned char *FbBase; - int cpp; - uint32_t aper_size; - unsigned int bios_version; - - /* These are set in PreInit and never changed. */ - long FbMapSize; - long TotalVideoRam; - I830MemRange StolenMemory; /* pre-allocated memory */ - long BIOSMemorySize; /* min stolen pool size */ - int BIOSMemSizeLoc; - - /* These change according to what has been allocated. */ - long FreeMemory; - I830MemRange MemoryAperture; - I830MemPool StolenPool; - long allocatedMemory; - - /* Regions allocated either from the above pools, or from agpgart. */ - /* for single and dual head configurations */ - I830MemRange FrontBuffer; - I830MemRange FrontBuffer2; - I830MemRange Scratch; - I830MemRange Scratch2; - - I830RingBuffer *LpRing; - - I830MemRange BackBuffer; - I830MemRange DepthBuffer; - I830MemRange TexMem; - int TexGranularity; - I830MemRange ContextMem; - int drmMinor; - Bool have3DWindows; - - Bool NeedRingBufferLow; - Bool allowPageFlip; - Bool disableTiling; - - int Chipset; - unsigned long LinearAddr; - unsigned long MMIOAddr; - - drmSize registerSize; /**< \brief MMIO register map size */ - drm_handle_t registerHandle; /**< \brief MMIO register map handle */ - // IOADDRESS ioBase; - int irq; /**< \brief IRQ number */ - int GttBound; - - drm_handle_t ring_map; - unsigned int Fence[8]; - -} I830Rec; - -/* - * 12288 is set as the maximum, chosen because it is enough for - * 1920x1440@32bpp with a 2048 pixel line pitch with some to spare. - */ -#define I830_MAXIMUM_VBIOS_MEM 12288 -#define I830_DEFAULT_VIDEOMEM_2D (MB(32) / 1024) -#define I830_DEFAULT_VIDEOMEM_3D (MB(64) / 1024) - -/* Flags for memory allocation function */ -#define FROM_ANYWHERE 0x00000000 -#define FROM_POOL_ONLY 0x00000001 -#define FROM_NEW_ONLY 0x00000002 -#define FROM_MASK 0x0000000f - -#define ALLOCATE_AT_TOP 0x00000010 -#define ALLOCATE_AT_BOTTOM 0x00000020 -#define FORCE_GAPS 0x00000040 - -#define NEED_PHYSICAL_ADDR 0x00000100 -#define ALIGN_BOTH_ENDS 0x00000200 -#define FORCE_LOW 0x00000400 - -#define ALLOC_NO_TILING 0x00001000 -#define ALLOC_INITIAL 0x00002000 - -#define ALLOCATE_DRY_RUN 0x80000000 - -/* Chipset registers for VIDEO BIOS memory RW access */ -#define _855_DRAM_RW_CONTROL 0x58 -#define _845_DRAM_RW_CONTROL 0x90 -#define DRAM_WRITE 0x33330000 - -#define KB(x) ((x) * 1024) -#define MB(x) ((x) * KB(1024)) - -#define GTT_PAGE_SIZE KB(4) -#define ROUND_TO(x, y) (((x) + (y) - 1) / (y) * (y)) -#define ROUND_DOWN_TO(x, y) ((x) / (y) * (y)) -#define ROUND_TO_PAGE(x) ROUND_TO((x), GTT_PAGE_SIZE) -#define ROUND_TO_MB(x) ROUND_TO((x), MB(1)) -#define PRIMARY_RINGBUFFER_SIZE KB(128) - - -/* Ring buffer registers, p277, overview p19 - */ -#define LP_RING 0x2030 -#define HP_RING 0x2040 - -#define RING_TAIL 0x00 -#define TAIL_ADDR 0x000FFFF8 -#define I830_TAIL_MASK 0x001FFFF8 - -#define RING_HEAD 0x04 -#define HEAD_WRAP_COUNT 0xFFE00000 -#define HEAD_WRAP_ONE 0x00200000 -#define HEAD_ADDR 0x001FFFFC -#define I830_HEAD_MASK 0x001FFFFC - -#define RING_START 0x08 -#define START_ADDR 0x03FFFFF8 -#define I830_RING_START_MASK 0xFFFFF000 - -#define RING_LEN 0x0C -#define RING_NR_PAGES 0x001FF000 -#define I830_RING_NR_PAGES 0x001FF000 -#define RING_REPORT_MASK 0x00000006 -#define RING_REPORT_64K 0x00000002 -#define RING_REPORT_128K 0x00000004 -#define RING_NO_REPORT 0x00000000 -#define RING_VALID_MASK 0x00000001 -#define RING_VALID 0x00000001 -#define RING_INVALID 0x00000000 - - -/* Fence/Tiling ranges [0..7] - */ -#define FENCE 0x2000 -#define FENCE_NR 8 - -#define I915G_FENCE_START_MASK 0x0ff00000 - -#define I830_FENCE_START_MASK 0x07f80000 - -#define FENCE_START_MASK 0x03F80000 -#define FENCE_X_MAJOR 0x00000000 -#define FENCE_Y_MAJOR 0x00001000 -#define FENCE_SIZE_MASK 0x00000700 -#define FENCE_SIZE_512K 0x00000000 -#define FENCE_SIZE_1M 0x00000100 -#define FENCE_SIZE_2M 0x00000200 -#define FENCE_SIZE_4M 0x00000300 -#define FENCE_SIZE_8M 0x00000400 -#define FENCE_SIZE_16M 0x00000500 -#define FENCE_SIZE_32M 0x00000600 -#define FENCE_SIZE_64M 0x00000700 -#define I915G_FENCE_SIZE_1M 0x00000000 -#define I915G_FENCE_SIZE_2M 0x00000100 -#define I915G_FENCE_SIZE_4M 0x00000200 -#define I915G_FENCE_SIZE_8M 0x00000300 -#define I915G_FENCE_SIZE_16M 0x00000400 -#define I915G_FENCE_SIZE_32M 0x00000500 -#define I915G_FENCE_SIZE_64M 0x00000600 -#define I915G_FENCE_SIZE_128M 0x00000700 -#define FENCE_PITCH_1 0x00000000 -#define FENCE_PITCH_2 0x00000010 -#define FENCE_PITCH_4 0x00000020 -#define FENCE_PITCH_8 0x00000030 -#define FENCE_PITCH_16 0x00000040 -#define FENCE_PITCH_32 0x00000050 -#define FENCE_PITCH_64 0x00000060 -#define FENCE_VALID 0x00000001 - -#include <mmio.h> - -# define MMIO_IN8(base, offset) \ - *(volatile unsigned char *)(((unsigned char*)(base)) + (offset)) -# define MMIO_IN32(base, offset) \ - read_MMIO_LE32(base, offset) -# define MMIO_OUT8(base, offset, val) \ - *(volatile unsigned char *)(((unsigned char*)(base)) + (offset)) = (val) -# define MMIO_OUT32(base, offset, val) \ - *(volatile unsigned int *)(void *)(((unsigned char*)(base)) + (offset)) = CPU_TO_LE32(val) - - - /* Memory mapped register access macros */ -#define INREG8(addr) MMIO_IN8(MMIO, addr) -#define INREG(addr) MMIO_IN32(MMIO, addr) -#define OUTREG8(addr, val) MMIO_OUT8(MMIO, addr, val) -#define OUTREG(addr, val) MMIO_OUT32(MMIO, addr, val) - -#define DSPABASE 0x70184 - -#endif diff --git a/src/mesa/drivers/dri/mach64/mach64_ioctl.h b/src/mesa/drivers/dri/mach64/mach64_ioctl.h index 1ffda1932f1..9145ee6e6cf 100644 --- a/src/mesa/drivers/dri/mach64/mach64_ioctl.h +++ b/src/mesa/drivers/dri/mach64/mach64_ioctl.h @@ -32,6 +32,9 @@ #ifndef __MACH64_IOCTL_H__ #define __MACH64_IOCTL_H__ +#include <stdio.h> +#include <stdlib.h> + #include "mach64_dri.h" #include "mach64_reg.h" #include "mach64_lock.h" diff --git a/src/mesa/drivers/dri/mga/mgarender.c b/src/mesa/drivers/dri/mga/mgarender.c index 8b8fc485d31..cc0cea618d1 100644 --- a/src/mesa/drivers/dri/mga/mgarender.c +++ b/src/mesa/drivers/dri/mga/mgarender.c @@ -44,6 +44,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/imports.h" #include "main/mtypes.h" +#include "math/m_xform.h" + #include "tnl/t_context.h" #include "mgacontext.h" diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c index 8be7edb150b..bd1273beea7 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c @@ -220,7 +220,7 @@ get_tex_format(struct gl_texture_image *ti) case MESA_FORMAT_RGB565: return GL_RGB5; default: - assert(0); + return GL_NONE; } } @@ -231,7 +231,6 @@ nouveau_render_texture(GLcontext *ctx, struct gl_framebuffer *fb, struct gl_renderbuffer *rb = att->Renderbuffer; struct gl_texture_image *ti = att->Texture->Image[att->CubeMapFace][att->TextureLevel]; - int ret; /* Allocate a renderbuffer object for the texture if we * haven't already done so. */ @@ -244,9 +243,7 @@ nouveau_render_texture(GLcontext *ctx, struct gl_framebuffer *fb, } /* Update the renderbuffer fields from the texture. */ - ret = set_renderbuffer_format(rb, get_tex_format(ti)); - assert(ret); - + set_renderbuffer_format(rb, get_tex_format(ti)); rb->Width = ti->Width; rb->Height = ti->Height; nouveau_surface_ref(&to_nouveau_teximage(ti)->surface, diff --git a/src/mesa/drivers/dri/nouveau/nouveau_texture.c b/src/mesa/drivers/dri/nouveau/nouveau_texture.c index dbf9a5cc613..442f4e899ee 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_texture.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_texture.c @@ -38,6 +38,7 @@ #include "main/mipmap.h" #include "main/texfetch.h" #include "main/teximage.h" +#include "drivers/common/meta.h" static struct gl_texture_object * nouveau_texture_new(GLcontext *ctx, GLuint name, GLenum target) @@ -182,10 +183,10 @@ teximage_fits(struct gl_texture_object *t, int level) struct nouveau_surface *s = &to_nouveau_texture(t)->surfaces[level]; struct gl_texture_image *ti = t->Image[0][level]; - return ti && (t->Target == GL_TEXTURE_RECTANGLE || - (s->bo && s->width == ti->Width && - s->height == ti->Height && - s->format == ti->TexFormat)); + return ti && to_nouveau_teximage(ti)->surface.bo && + (t->Target == GL_TEXTURE_RECTANGLE || + (s->bo && s->format == ti->TexFormat && + s->width == ti->Width && s->height == ti->Height)); } static GLboolean @@ -589,6 +590,53 @@ nouveau_texture_unmap(GLcontext *ctx, struct gl_texture_object *t) } } +static void +store_mipmap(GLcontext *ctx, GLenum target, int first, int last, + struct gl_texture_object *t) +{ + struct gl_pixelstore_attrib packing = { + .BufferObj = ctx->Shared->NullBufferObj, + .Alignment = 1 + }; + GLenum format = t->Image[0][first]->TexFormat; + unsigned base_format, type, comps; + int i; + + base_format = _mesa_get_format_base_format(format); + _mesa_format_to_type_and_comps(format, &type, &comps); + + for (i = first; i <= last; i++) { + struct gl_texture_image *ti = t->Image[0][i]; + void *data = ti->Data; + + nouveau_teximage(ctx, 3, target, i, ti->InternalFormat, + ti->Width, ti->Height, ti->Depth, + ti->Border, base_format, type, data, + &packing, t, ti); + + _mesa_free_texmemory(data); + } +} + +static void +nouveau_generate_mipmap(GLcontext *ctx, GLenum target, + struct gl_texture_object *t) +{ + if (_mesa_meta_check_generate_mipmap_fallback(ctx, target, t)) { + struct gl_texture_image *base = t->Image[0][t->BaseLevel]; + + nouveau_teximage_map(ctx, base); + _mesa_generate_mipmap(ctx, target, t); + nouveau_teximage_unmap(ctx, base); + + store_mipmap(ctx, target, t->BaseLevel + 1, + get_last_level(t), t); + + } else { + _mesa_meta_GenerateMipmap(ctx, target, t); + } +} + void nouveau_texture_functions_init(struct dd_function_table *functions) { @@ -607,4 +655,5 @@ nouveau_texture_functions_init(struct dd_function_table *functions) functions->BindTexture = nouveau_bind_texture; functions->MapTexture = nouveau_texture_map; functions->UnmapTexture = nouveau_texture_unmap; + functions->GenerateMipmap = nouveau_generate_mipmap; } diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_fb.c b/src/mesa/drivers/dri/nouveau/nv20_state_fb.c index 21da4f7af16..95691cad047 100644 --- a/src/mesa/drivers/dri/nouveau/nv20_state_fb.c +++ b/src/mesa/drivers/dri/nouveau/nv20_state_fb.c @@ -72,7 +72,7 @@ nv20_emit_framebuffer(GLcontext *ctx, int emit) fb->_ColorDrawBuffers[0])->surface; rt_format |= get_rt_format(s->format); - zeta_pitch = rt_pitch = s->pitch; + rt_pitch = s->pitch; nouveau_bo_markl(bctx, kelvin, NV20TCL_COLOR_OFFSET, s->bo, 0, bo_flags); @@ -88,6 +88,9 @@ nv20_emit_framebuffer(GLcontext *ctx, int emit) nouveau_bo_markl(bctx, kelvin, NV20TCL_ZETA_OFFSET, s->bo, 0, bo_flags); + } else { + rt_format |= get_rt_format(MESA_FORMAT_Z24_S8); + zeta_pitch = rt_pitch; } BEGIN_RING(chan, kelvin, NV20TCL_RT_FORMAT, 2); diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_tex.c b/src/mesa/drivers/dri/nouveau/nv20_state_tex.c index e46118e4fce..2d45513bb4c 100644 --- a/src/mesa/drivers/dri/nouveau/nv20_state_tex.c +++ b/src/mesa/drivers/dri/nouveau/nv20_state_tex.c @@ -194,7 +194,8 @@ nv20_emit_tex_obj(GLcontext *ctx, int emit) | nvgl_wrap_mode(t->WrapS) << 0; tx_filter = nvgl_filter_mode(t->MagFilter) << 24 - | nvgl_filter_mode(t->MinFilter) << 16; + | nvgl_filter_mode(t->MinFilter) << 16 + | 2 << 12; tx_enable = NV20TCL_TX_ENABLE_ENABLE | log2i(t->MaxAnisotropy) << 4; diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c index 262fe3cddee..dbf4ad477db 100644 --- a/src/mesa/drivers/dri/r200/r200_swtcl.c +++ b/src/mesa/drivers/dri/r200/r200_swtcl.c @@ -612,6 +612,8 @@ static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); + radeon_prepare_render(&rmesa->radeon); + if (rmesa->radeon.swtcl.hw_primitive != hwprim) { /* need to disable perspective-correct texturing for point sprites */ if ((hwprim & 0xf) == R200_VF_PRIM_POINT_SPRITES && ctx->Point.PointSprite) { diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c index d43e14581e9..4ae0f304918 100644 --- a/src/mesa/drivers/dri/r200/r200_tcl.c +++ b/src/mesa/drivers/dri/r200/r200_tcl.c @@ -264,6 +264,8 @@ void r200TclPrimitive( GLcontext *ctx, r200ContextPtr rmesa = R200_CONTEXT(ctx); GLuint newprim = hw_prim | R200_VF_TCL_OUTPUT_VTX_ENABLE; + radeon_prepare_render(&rmesa->radeon); + if (newprim != rmesa->tcl.hw_primitive || !discrete_prim[hw_prim&0xf]) { /* need to disable perspective-correct texturing for point sprites */ diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index a326ee4c4fa..d2fa816894c 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -109,13 +109,13 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) debug_program_log(c, "before compilation"); if (c->Base.is_r500){ - r500_transform_unroll_loops(&c->Base, &loop_state); - debug_program_log(c, "after r500 transform loops"); + rc_unroll_loops(&c->Base, R500_PFS_MAX_INST); + debug_program_log(c, "after unroll loops"); } else{ - rc_transform_unroll_loops(&c->Base, &loop_state); + rc_transform_loops(&c->Base, &loop_state, -1); debug_program_log(c, "after transform loops"); - + rc_emulate_branches(&c->Base); debug_program_log(c, "after emulate branches"); } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index d347b4df9cd..666c9c2a7a9 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -32,6 +32,11 @@ #include "radeon_emulate_branches.h" #include "radeon_emulate_loops.h" +struct loop { + int BgnLoop; + +}; + /* * Take an already-setup and valid source then swizzle it appropriately to * obtain a constant ZERO or ONE source. @@ -332,11 +337,140 @@ static void ei_pow(struct r300_vertex_program_code *vp, inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); } +static void mark_write(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + unsigned int * writemasks = userdata; + + if (file != RC_FILE_TEMPORARY) + return; + + if (index >= R300_VS_MAX_TEMPS) + return; + + writemasks[index] |= mask; +} + +static unsigned long t_pred_src(struct r300_vertex_program_compiler * compiler) +{ + return PVS_SRC_OPERAND(compiler->PredicateIndex, + t_swizzle(RC_SWIZZLE_ZERO), + t_swizzle(RC_SWIZZLE_ZERO), + t_swizzle(RC_SWIZZLE_ZERO), + t_swizzle(RC_SWIZZLE_W), + t_src_class(RC_FILE_TEMPORARY), + 0); +} + +static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler, + unsigned int hw_opcode, int is_math) +{ + return PVS_OP_DST_OPERAND(hw_opcode, + is_math, + 0, + compiler->PredicateIndex, + RC_MASK_W, + t_dst_class(RC_FILE_TEMPORARY)); + +} + +static void ei_if(struct r300_vertex_program_compiler * compiler, + struct rc_instruction *rci, + unsigned int * inst, + unsigned int branch_depth) +{ + unsigned int predicate_opcode; + int is_math = 0; + + if (!compiler->Base.is_r500) { + rc_error(&compiler->Base,"Opcode IF not supported\n"); + return; + } + + /* Reserve a temporary to use as our predicate stack counter, if we + * don't already have one. */ + if (!compiler->PredicateMask) { + unsigned int writemasks[R300_VS_MAX_TEMPS]; + memset(writemasks, 0, sizeof(writemasks)); + struct rc_instruction * inst; + unsigned int i; + for(inst = compiler->Base.Program.Instructions.Next; + inst != &compiler->Base.Program.Instructions; + inst = inst->Next) { + rc_for_all_writes_mask(inst, mark_write, writemasks); + } + for(i = 0; i < R300_VS_MAX_TEMPS; i++) { + unsigned int mask = ~writemasks[i] & RC_MASK_XYZW; + /* Only the W component can be used fo the predicate + * stack counter. */ + if (mask & RC_MASK_W) { + compiler->PredicateMask = RC_MASK_W; + compiler->PredicateIndex = i; + break; + } + } + if (i == R300_VS_MAX_TEMPS) { + rc_error(&compiler->Base, "No free temporary to use for" + " predicate stack counter.\n"); + return; + } + } + predicate_opcode = + branch_depth ? VE_PRED_SET_NEQ_PUSH : ME_PRED_SET_NEQ; + + rci->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(rci->U.I.SrcReg[0].Swizzle,0)); + if (branch_depth == 0) { + is_math = 1; + predicate_opcode = ME_PRED_SET_NEQ; + inst[1] = t_src(compiler->code, &rci->U.I.SrcReg[0]); + inst[2] = 0; + } else { + predicate_opcode = VE_PRED_SET_NEQ_PUSH; + inst[1] = t_pred_src(compiler); + inst[2] = t_src(compiler->code, &rci->U.I.SrcReg[0]); + } + + inst[0] = t_pred_dst(compiler, predicate_opcode, is_math); + inst[3] = 0; + +} + +static void ei_else(struct r300_vertex_program_compiler * compiler, + unsigned int * inst) +{ + if (!compiler->Base.is_r500) { + rc_error(&compiler->Base,"Opcode ELSE not supported\n"); + return; + } + inst[0] = t_pred_dst(compiler, ME_PRED_SET_INV, 1); + inst[1] = t_pred_src(compiler); + inst[2] = 0; + inst[3] = 0; +} + +static void ei_endif(struct r300_vertex_program_compiler *compiler, + unsigned int * inst) +{ + if (!compiler->Base.is_r500) { + rc_error(&compiler->Base,"Opcode ENDIF not supported\n"); + return; + } + inst[0] = t_pred_dst(compiler, ME_PRED_SET_POP, 1); + inst[1] = t_pred_src(compiler); + inst[2] = 0; + inst[3] = 0; +} static void translate_vertex_program(struct r300_vertex_program_compiler * compiler) { struct rc_instruction *rci; + struct loop * loops; + int current_loop_depth = 0; + int loops_reserved = 0; + + unsigned int branch_depth = 0; + compiler->code->pos_end = 0; /* Not supported yet */ compiler->code->length = 0; @@ -366,9 +500,12 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break; case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; + case RC_OPCODE_ELSE: ei_else(compiler, inst); break; + case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break; case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; + case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break; case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; @@ -385,11 +522,86 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break; case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break; + case RC_OPCODE_BGNLOOP: + { + struct loop * l; + + if ((!compiler->Base.is_r500 + && loops_reserved >= R300_VS_MAX_LOOP_DEPTH) + || loops_reserved >= R500_VS_MAX_FC_DEPTH) { + rc_error(&compiler->Base, + "Loops are nested too deep."); + return; + } + memory_pool_array_reserve(&compiler->Base.Pool, + struct loop, loops, current_loop_depth, + loops_reserved, 1); + l = &loops[current_loop_depth++]; + memset(l , 0, sizeof(struct loop)); + l->BgnLoop = (compiler->code->length / 4); + continue; + } + case RC_OPCODE_ENDLOOP: + { + struct loop * l = &loops[current_loop_depth - 1]; + unsigned int act_addr = l->BgnLoop - 1; + unsigned int last_addr = (compiler->code->length / 4) - 1; + unsigned int ret_addr = l->BgnLoop; + + if (loops_reserved >= R300_VS_MAX_FC_OPS) { + rc_error(&compiler->Base, + "Too many flow control instructions."); + return; + } + if (compiler->Base.is_r500) { + compiler->code->fc_op_addrs.r500 + [compiler->code->num_fc_ops].lw = + R500_PVS_FC_ACT_ADRS(act_addr) + | R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff) + ; + compiler->code->fc_op_addrs.r500 + [compiler->code->num_fc_ops].uw = + R500_PVS_FC_LAST_INST(last_addr) + | R500_PVS_FC_RTN_INST(ret_addr) + ; + } else { + compiler->code->fc_op_addrs.r300 + [compiler->code->num_fc_ops] = + R300_PVS_FC_ACT_ADRS(act_addr) + | R300_PVS_FC_LOOP_CNT_JMP_INST(0xff) + | R300_PVS_FC_LAST_INST(last_addr) + | R300_PVS_FC_RTN_INST(ret_addr) + ; + } + compiler->code->fc_loop_index[compiler->code->num_fc_ops] = + R300_PVS_FC_LOOP_INIT_VAL(0x0) + | R300_PVS_FC_LOOP_STEP_VAL(0x1) + ; + compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP( + compiler->code->num_fc_ops); + compiler->code->num_fc_ops++; + current_loop_depth--; + continue; + } + default: rc_error(&compiler->Base, "Unknown opcode %s\n", rc_get_opcode_info(vpi->Opcode)->Name); return; } + /* Non-flow control instructions that are inside an if statement + * need to pay attention to the predicate bit. */ + if (branch_depth + && vpi->Opcode != RC_OPCODE_IF + && vpi->Opcode != RC_OPCODE_ELSE + && vpi->Opcode != RC_OPCODE_ENDIF) { + + inst[0] |= (PVS_DST_PRED_ENABLE_MASK + << PVS_DST_PRED_ENABLE_SHIFT); + inst[0] |= (PVS_DST_PRED_SENSE_MASK + << PVS_DST_PRED_SENSE_SHIFT); + } + compiler->code->length += 4; if (compiler->Base.Error) @@ -406,6 +618,7 @@ struct temporary_allocation { static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler) { struct rc_instruction *inst; + struct rc_instruction *end_loop = NULL; unsigned int num_orig_temps = 0; char hwtemps[R300_VS_MAX_TEMPS]; struct temporary_allocation * ta; @@ -440,10 +653,35 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c /* Pass 2: Determine original temporary lifetimes */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + /* Instructions inside of loops need to use the ENDLOOP + * instruction as their LastRead. */ + if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { + int endloops = 1; + struct rc_instruction * ptr; + for(ptr = inst->Next; + ptr != &compiler->Base.Program.Instructions; + ptr = ptr->Next){ + if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { + endloops++; + } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { + endloops--; + if (endloops <= 0) { + end_loop = ptr; + break; + } + } + } + } + + if (inst == end_loop) { + end_loop = NULL; + continue; + } for (i = 0; i < opcode->NumSrcRegs; ++i) { if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) - ta[inst->U.I.SrcReg[i].Index].LastRead = inst; + ta[inst->U.I.SrcReg[i].Index].LastRead = + end_loop ? end_loop : inst; } } @@ -633,30 +871,24 @@ static struct rc_swizzle_caps r300_vertprog_swizzle_caps = { void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) { struct emulate_loop_state loop_state; - + compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; addArtificialOutputs(compiler); debug_program_log(compiler, "before compilation"); - /* XXX Ideally this should be done only for r3xx, but since - * we don't have branching support for r5xx, we use the emulation - * on all chipsets. */ - rc_transform_unroll_loops(&compiler->Base, &loop_state); - - debug_program_log(compiler, "after transform loops"); - - if (compiler->Base.is_r500){ - rc_emulate_loops(&loop_state, R500_VS_MAX_ALU); - } else { - rc_emulate_loops(&loop_state, R300_VS_MAX_ALU); - } - debug_program_log(compiler, "after emulate loops"); + if (compiler->Base.is_r500) + rc_transform_loops(&compiler->Base, &loop_state, R500_VS_MAX_ALU); + else + rc_transform_loops(&compiler->Base, &loop_state, R300_VS_MAX_ALU); - rc_emulate_branches(&compiler->Base); + debug_program_log(compiler, "after emulate loops"); - debug_program_log(compiler, "after emulate branches"); + if (!compiler->Base.is_r500) { + rc_emulate_branches(&compiler->Base); + debug_program_log(compiler, "after emulate branches"); + } if (compiler->Base.is_r500) { struct radeon_program_transformation transformations[] = { @@ -718,6 +950,6 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) if (compiler->Base.Debug) { fprintf(stderr, "Final vertex program code:\n"); - r300_vertex_program_dump(compiler->code); + r300_vertex_program_dump(compiler); } } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c index 5800f1a78e1..e6009338e2e 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c @@ -20,7 +20,9 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "radeon_compiler.h" #include "radeon_code.h" +#include "../r300_reg.h" #include <stdio.h> @@ -133,6 +135,10 @@ static void r300_vs_op_dump(uint32_t op) { fprintf(stderr, " dst: %d%s op: ", (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]); + if ((op >> PVS_DST_PRED_ENABLE_SHIFT) & 0x1) { + fprintf(stderr, "PRED %u", + (op >> PVS_DST_PRED_SENSE_SHIFT) & 0x1); + } if (op & 0x80) { if (op & 0x1) { fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n"); @@ -160,8 +166,9 @@ static void r300_vs_src_dump(uint32_t src) r300_vs_swiz_debug[(src >> 22) & 0x7]); } -void r300_vertex_program_dump(struct r300_vertex_program_code * vs) +void r300_vertex_program_dump(struct r300_vertex_program_compiler * c) { + struct r300_vertex_program_code * vs = c->code; unsigned instrcount = vs->length / 4; unsigned i; @@ -177,4 +184,21 @@ void r300_vertex_program_dump(struct r300_vertex_program_code * vs) r300_vs_src_dump(vs->body.d[offset+1+src]); } } + + fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops); + for(i = 0; i < vs->num_fc_ops; i++) { + switch((vs->fc_ops >> (i * 2)) & 0x3 ) { + case 0: fprintf(stderr, "NOP"); break; + case 1: fprintf(stderr, "JUMP"); break; + case 2: fprintf(stderr, "LOOP"); break; + case 3: fprintf(stderr, "JSR"); break; + } + if (c->Base.is_r500) { + fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x\n", + vs->fc_op_addrs.r500[i].uw, + vs->fc_op_addrs.r500[i].lw); + } else { + fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]); + } + } } diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index e6b5522c5b9..80a120497e3 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -30,7 +30,6 @@ #include <stdio.h> #include "../r300_reg.h" -#include "radeon_emulate_loops.h" /** * Rewrite IF instructions to use the ALU result special register. @@ -60,31 +59,6 @@ int r500_transform_IF( return 1; } -/** - * Rewrite loops to make them easier to emit. This is not a local - * transformation, because it modifies and reorders an entire block of code. - */ -void r500_transform_unroll_loops(struct radeon_compiler * c, - struct emulate_loop_state *s) -{ - int i; - - rc_transform_unroll_loops(c, s); - - for( i = s->LoopCount - 1; i >= 0; i-- ){ - struct rc_instruction * inst_continue; - if(!s->Loops[i].EndLoop){ - continue; - } - /* Insert a continue instruction at the end of the loop. This - * is required in order to emit loops correctly. */ - inst_continue = rc_insert_new_instruction(c, - s->Loops[i].EndIf->Prev); - inst_continue->U.I.Opcode = RC_OPCODE_CONTINUE; - } - -} - static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) { unsigned int relevant; diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h index 0d005a794ff..34173351f83 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h @@ -49,6 +49,4 @@ extern int r500_transform_IF( struct rc_instruction * inst, void* data); -void r500_transform_unroll_loops(struct radeon_compiler * c, - struct emulate_loop_state * s); #endif diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index 0bd8f0a239f..9b60e30f586 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -64,7 +64,16 @@ struct branch_info { }; struct loop_info { - int LoopStart; + int BgnLoop; + + int BranchDepth; + int * Brks; + int BrkCount; + int BrkReserved; + + int * Conts; + int ContCount; + int ContReserved; }; struct emit_state { @@ -368,6 +377,12 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst unsigned int newip = ++s->Code->inst_end; + /* Currently all loops use the same integer constant to intialize + * the loop variables. */ + if(!s->Code->int_constants[0]) { + s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff); + s->Code->int_constant_count = 1; + } s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT; switch(inst->U.I.Opcode){ @@ -378,32 +393,77 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1); loop = &s->Loops[s->CurrentLoopDepth++]; - - /* We don't emit an instruction for BGNLOOP, so we need to - * decrement the instruction counter, but first we need to - * set LoopStart to the current value of inst_end, which - * will end up being the first real instruction in the loop.*/ - loop->LoopStart = s->Code->inst_end--; + memset(loop, 0, sizeof(struct loop_info)); + loop->BranchDepth = s->CurrentBranchDepth; + loop->BgnLoop = newip; + + s->Code->inst[newip].inst2 = R500_FC_OP_LOOP + | R500_FC_JUMP_FUNC(0x00) + | R500_FC_IGNORE_UNCOVERED + ; break; - case RC_OPCODE_BRK: - /* Don't emit an instruction for BRK */ - s->Code->inst_end--; + loop = &s->Loops[s->CurrentLoopDepth - 1]; + memory_pool_array_reserve(&s->C->Pool, int, loop->Brks, + loop->BrkCount, loop->BrkReserved, 1); + + loop->Brks[loop->BrkCount++] = newip; + s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_B_OP1_DECR + | R500_FC_B_POP_CNT( + s->CurrentBranchDepth - loop->BranchDepth) + | R500_FC_IGNORE_UNCOVERED + ; break; - case RC_OPCODE_CONTINUE: + case RC_OPCODE_CONT: loop = &s->Loops[s->CurrentLoopDepth - 1]; - s->Code->inst[newip].inst2 = R500_FC_OP_JUMP | - R500_FC_JUMP_FUNC(0xff); - s->Code->inst[newip].inst3 = R500_FC_JUMP_ADDR(loop->LoopStart); + memory_pool_array_reserve(&s->C->Pool, int, loop->Conts, + loop->ContCount, loop->ContReserved, 1); + loop->Conts[loop->ContCount++] = newip; + s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_B_OP1_DECR + | R500_FC_B_POP_CNT( + s->CurrentBranchDepth - loop->BranchDepth) + | R500_FC_IGNORE_UNCOVERED + ; break; case RC_OPCODE_ENDLOOP: - /* Don't emit an instruction for ENDLOOP */ - s->Code->inst_end--; + { + loop = &s->Loops[s->CurrentLoopDepth - 1]; + /* Emit ENDLOOP */ + s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_JUMP_ANY + | R500_FC_IGNORE_UNCOVERED + ; + /* The constant integer at index 0 is used by all loops. */ + s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0) + | R500_FC_JUMP_ADDR(loop->BgnLoop + 1) + ; + + /* Set jump address and int constant for BGNLOOP */ + s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0) + | R500_FC_JUMP_ADDR(newip) + ; + + /* Set jump address for the BRK instructions. */ + while(loop->BrkCount--) { + s->Code->inst[loop->Brks[loop->BrkCount]].inst3 = + R500_FC_JUMP_ADDR(newip + 1); + } + + /* Set jump address for CONT instructions. */ + while(loop->ContCount--) { + s->Code->inst[loop->Conts[loop->ContCount]].inst3 = + R500_FC_JUMP_ADDR(newip); + } s->CurrentLoopDepth--; break; - + } case RC_OPCODE_IF: if ( s->CurrentBranchDepth >= MAX_BRANCH_DEPTH_FULL) { rc_error(s->C, "Branch depth exceeds hardware limit"); @@ -442,24 +502,16 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst } branch = &s->Branches[s->CurrentBranchDepth - 1]; - - if(inst->Prev->U.I.Opcode == RC_OPCODE_BRK){ - branch->Endif = --s->Code->inst_end; - s->Code->inst[branch->Endif].inst2 |= - R500_FC_B_OP0_DECR; - } - else{ - branch->Endif = newip; - - s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP - | R500_FC_A_OP_NONE /* no address stack */ - | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ - | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ - | R500_FC_B_OP1_NONE /* no branch counter if stay */ - | R500_FC_B_POP_CNT(1) + branch->Endif = newip; + + s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ + | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ + | R500_FC_B_OP1_NONE /* no branch counter if stay */ + | R500_FC_B_POP_CNT(1) ; - s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); - } + s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP | R500_FC_A_OP_NONE /* no address stack */ | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */ @@ -544,11 +596,9 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; } - /* Use FULL flow control mode if branches are nested deep enough. - * We don not need to enable FULL flow control mode for loops, becasue - * we aren't using the hardware loop instructions. - */ - if (s.MaxBranchDepth >= 4) { + /* Enable full flow control mode if we are using loops or have if + * statements nested at least four deep. */ + if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) { if (code->max_temp_idx < 1) code->max_temp_idx = 1; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index d03689763bc..896246d2035 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -221,6 +221,9 @@ struct r500_fragment_program_code { int max_temp_idx; uint32_t us_fc_ctrl; + + uint32_t int_constants[32]; + uint32_t int_constant_count; }; struct rX00_fragment_program_code { @@ -240,6 +243,12 @@ struct rX00_fragment_program_code { #define R500_VS_MAX_ALU 1024 #define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4) #define R300_VS_MAX_TEMPS 32 +/* This is the max for all chipsets (r300-r500) */ +#define R300_VS_MAX_FC_OPS 16 +/* The r500 maximum depth is not just for loops, but any combination of loops + * and subroutine jumps. */ +#define R500_VS_MAX_FC_DEPTH 8 +#define R300_VS_MAX_LOOP_DEPTH 1 #define VSF_MAX_INPUTS 32 #define VSF_MAX_OUTPUTS 32 @@ -260,9 +269,18 @@ struct r300_vertex_program_code { uint32_t InputsRead; uint32_t OutputsWritten; -}; -void r300_vertex_program_dump(struct r300_vertex_program_code * vs); + unsigned int num_fc_ops; + uint32_t fc_ops; + union { + uint32_t r300[R300_VS_MAX_FC_OPS]; + struct { + uint32_t lw; + uint32_t uw; + } r500[R300_VS_MAX_FC_OPS]; + } fc_op_addrs; + int32_t fc_loop_index[R300_VS_MAX_FC_OPS]; +}; #endif /* RADEON_CODE_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c index 1c8ba864a41..935dc9b0a80 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -307,3 +307,46 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig } } + +/** + * The FACE input in hardware contains 1 if it's a back face, 0 otherwise. + * Gallium and OpenGL define it the other way around. + * + * So let's just negate FACE at the beginning of the shader and rewrite the rest + * of the shader to read from the newly allocated temporary. + */ +void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) +{ + unsigned tempregi = rc_find_free_temporary(c); + struct rc_instruction *inst_add; + struct rc_instruction *inst; + + /* perspective divide */ + inst_add = rc_insert_new_instruction(c, &c->Program.Instructions); + inst_add->U.I.Opcode = RC_OPCODE_ADD; + + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = tempregi; + inst_add->U.I.DstReg.WriteMask = RC_MASK_X; + + inst_add->U.I.SrcReg[0].File = RC_FILE_NONE; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; + + inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT; + inst_add->U.I.SrcReg[1].Index = face; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; + inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW; + + for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + for(i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && + inst->U.I.SrcReg[i].Index == face) { + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = tempregi; + } + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index f15905d79d4..7c42eb3ae57 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -81,6 +81,7 @@ void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_ou void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output); void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, int full_vtransform); +void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face); struct r300_fragment_program_compiler { struct radeon_compiler Base; @@ -110,8 +111,12 @@ struct r300_vertex_program_compiler { void * UserData; void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c); + + int PredicateIndex; + unsigned int PredicateMask; }; void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c); +void r300_vertex_program_dump(struct r300_vertex_program_compiler * c); #endif /* RADEON_COMPILER_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c index fbb4235c223..faf531b412e 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c @@ -43,6 +43,12 @@ struct instruction_state { unsigned char SrcReg[3]; }; +struct loopinfo { + struct updatemask_state * Breaks; + unsigned int BreakCount; + unsigned int BreaksReserved; +}; + struct branchinfo { unsigned int HaveElse:1; @@ -59,6 +65,10 @@ struct deadcode_state { struct branchinfo * BranchStack; unsigned int BranchStackSize; unsigned int BranchStackReserved; + + struct loopinfo * LoopStack; + unsigned int LoopStackSize; + unsigned int LoopStackReserved; }; @@ -78,6 +88,22 @@ static void or_updatemasks( dst->Address = a->Address | b->Address; } +static void push_break(struct deadcode_state *s) +{ + struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1]; + memory_pool_array_reserve(&s->C->Pool, struct updatemask_state, + loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1); + + memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R)); +} + +static void push_loop(struct deadcode_state * s) +{ + memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack, + s->LoopStackSize, s->LoopStackReserved, 1); + memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo)); +} + static void push_branch(struct deadcode_state * s) { memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack, @@ -233,11 +259,22 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f } } } + push_loop(&s); break; } - case RC_OPCODE_CONTINUE: case RC_OPCODE_BRK: + push_break(&s); + break; case RC_OPCODE_BGNLOOP: + { + unsigned int i; + struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1]; + for(i = 0; i < loop->BreakCount; i++) { + or_updatemasks(&s.R, &s.R, &loop->Breaks[i]); + } + break; + } + case RC_OPCODE_CONT: break; case RC_OPCODE_ENDIF: push_branch(&s); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c index 131e9e7436d..32d4b45dd6d 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c @@ -39,7 +39,6 @@ #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) struct const_value { - struct radeon_compiler * C; struct rc_src_register * Src; float Value; @@ -78,17 +77,17 @@ static int src_reg_is_immediate(struct rc_src_register * src, c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE; } -static unsigned int loop_calc_iterations(struct emulate_loop_state *s, - struct loop_info * loop, unsigned int max_instructions) +static unsigned int loop_max_possible_iterations(struct radeon_compiler *c, + struct loop_info * loop, unsigned int prog_inst_limit) { - unsigned int total_i = rc_recompute_ips(s->C); + unsigned int total_i = rc_recompute_ips(c); unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1; /* +1 because the program already has one iteration of the loop. */ - return 1 + ((max_instructions - total_i) / (s->LoopCount * loop_i)); + return 1 + ((prog_inst_limit - total_i) / loop_i); } -static void loop_unroll(struct emulate_loop_state * s, - struct loop_info *loop, unsigned int iterations) +static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop, + unsigned int iterations) { unsigned int i; struct rc_instruction * ptr; @@ -99,7 +98,7 @@ static void loop_unroll(struct emulate_loop_state * s, rc_remove_instruction(loop->EndLoop); for( i = 1; i < iterations; i++){ for(ptr = first; ptr != last->Next; ptr = ptr->Next){ - struct rc_instruction *new = rc_alloc_instruction(s->C); + struct rc_instruction *new = rc_alloc_instruction(c); memcpy(new, ptr, sizeof(struct rc_instruction)); rc_insert_instruction(append_to, new); append_to = new; @@ -115,7 +114,7 @@ static void update_const_value(void * data, struct rc_instruction * inst, if(value->Src->File != file || value->Src->Index != index || !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){ - return; + return; } switch(inst->U.I.Opcode){ case RC_OPCODE_MOV: @@ -140,7 +139,7 @@ static void get_incr_amount(void * data, struct rc_instruction * inst, if(file != RC_FILE_TEMPORARY || count_inst->Index != index || (1 << GET_SWZ(count_inst->Swz,0) != mask)){ - return; + return; } /* Find the index of the counter register. */ opcode = rc_get_opcode_info(inst->U.I.Opcode); @@ -185,13 +184,16 @@ static void get_incr_amount(void * data, struct rc_instruction * inst, count_inst->Unknown = 1; return; } - } -static int transform_const_loop(struct emulate_loop_state * s, - struct loop_info * loop) +/** + * If prog_inst_limit is -1, then all eligible loops will be unrolled regardless + * of how many iterations they have. + */ +static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop, + unsigned int prog_inst_limit) { - int end_loops = 1; + int end_loops; int iterations; struct count_inst count_inst; float limit_value; @@ -201,12 +203,12 @@ static int transform_const_loop(struct emulate_loop_state * s, struct rc_instruction * inst; /* Find the counter and the upper limit */ - - if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], s->C)){ + + if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], c)){ limit = &loop->Cond->U.I.SrcReg[0]; counter = &loop->Cond->U.I.SrcReg[1]; } - else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], s->C)){ + else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], c)){ limit = &loop->Cond->U.I.SrcReg[1]; counter = &loop->Cond->U.I.SrcReg[0]; } @@ -214,13 +216,13 @@ static int transform_const_loop(struct emulate_loop_state * s, DBG("No constant limit.\n"); return 0; } - + /* Find the initial value of the counter */ counter_value.Src = counter; counter_value.Value = 0.0f; counter_value.HasValue = 0; - counter_value.C = s->C; - for(inst = s->C->Program.Instructions.Next; inst != loop->BeginLoop; + counter_value.C = c; + for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop; inst = inst->Next){ rc_for_all_writes_mask(inst, update_const_value, &counter_value); } @@ -230,11 +232,12 @@ static int transform_const_loop(struct emulate_loop_state * s, } DBG("Initial counter value is %f\n", counter_value.Value); /* Determine how the counter is modified each loop */ - count_inst.C = s->C; + count_inst.C = c; count_inst.Index = counter->Index; count_inst.Swz = counter->Swizzle; count_inst.Amount = 0.0f; count_inst.Unknown = 0; + end_loops = 1; for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){ switch(inst->U.I.Opcode){ /* XXX In the future we might want to try to unroll nested @@ -246,6 +249,16 @@ static int transform_const_loop(struct emulate_loop_state * s, loop->EndLoop = inst; end_loops--; break; + case RC_OPCODE_BRK: + /* Don't unroll loops if it has a BRK instruction + * other one used when testing the main conditional + * of the loop. */ + + /* Make sure we haven't entered a nested loops. */ + if(inst != loop->Brk && end_loops == 1) { + return 0; + } + break; /* XXX Check if the counter is modified within an if statement. */ case RC_OPCODE_IF: @@ -266,17 +279,20 @@ static int transform_const_loop(struct emulate_loop_state * s, /* Calculate the number of iterations of this loop. Keeping this * simple, since we only support increment and decrement loops. */ - limit_value = get_constant_value(s->C, limit, 0); + limit_value = get_constant_value(c, limit, 0); DBG("Limit is %f.\n", limit_value); + /* The iteration calculations are opposite of what you would expect. + * In a normal loop, if the condition is met, then loop continues, but + * with our loops, if the condition is met, the is exited. */ switch(loop->Cond->U.I.Opcode){ - case RC_OPCODE_SGT: - case RC_OPCODE_SLT: + case RC_OPCODE_SGE: + case RC_OPCODE_SLE: iterations = (int) ceilf((limit_value - counter_value.Value) / count_inst.Amount); break; - case RC_OPCODE_SLE: - case RC_OPCODE_SGE: + case RC_OPCODE_SGT: + case RC_OPCODE_SLT: iterations = (int) floorf((limit_value - counter_value.Value) / count_inst.Amount) + 1; break; @@ -284,77 +300,85 @@ static int transform_const_loop(struct emulate_loop_state * s, return 0; } + if (prog_inst_limit > 0 + && iterations > loop_max_possible_iterations(c, loop, + prog_inst_limit)) { + return 0; + } + DBG("Loop will have %d iterations.\n", iterations); - + /* Prepare loop for unrolling */ rc_remove_instruction(loop->Cond); rc_remove_instruction(loop->If); rc_remove_instruction(loop->Brk); rc_remove_instruction(loop->EndIf); - - loop_unroll(s, loop, iterations); + + unroll_loop(c, loop, iterations); loop->EndLoop = NULL; return 1; } -/** - * This function prepares a loop to be unrolled by converting it into an if - * statement. Here is an outline of the conversion process: - * BGNLOOP; -> BGNLOOP; - * <Additional conditional code> -> <Additional conditional code> - * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2]; - * IF temp[0]; -> IF temp[0]; - * BRK; -> - * ENDIF; -> <Loop Body> - * <Loop Body> -> ENDIF; - * ENDLOOP; -> ENDLOOP - * +/** + * @param c + * @param loop * @param inst A pointer to a BGNLOOP instruction. - * @return If the loop can be unrolled, a pointer to the first instruction of - * the unrolled loop. - * Otherwise, A pointer to the ENDLOOP instruction. - * Null if there is an error. + * @return 1 if all of the members of loop where set. + * @return 0 if there was an error and some members of loop are still NULL. */ -static struct rc_instruction * transform_loop(struct emulate_loop_state * s, +static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop, struct rc_instruction * inst) { - struct loop_info *loop; struct rc_instruction * ptr; - memory_pool_array_reserve(&s->C->Pool, struct loop_info, - s->Loops, s->LoopCount, s->LoopReserved, 1); - - loop = &s->Loops[s->LoopCount++]; - memset(loop, 0, sizeof(struct loop_info)); if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){ - rc_error(s->C, "expected BGNLOOP\n", __FUNCTION__); - return NULL; + rc_error(c, "%s: expected BGNLOOP", __FUNCTION__); + return 0; } + + memset(loop, 0, sizeof(struct loop_info)); + loop->BeginLoop = inst; - for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next){ + for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) { + + if (ptr == &c->Program.Instructions) { + rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n", + __FUNCTION__); + return 0; + } + switch(ptr->U.I.Opcode){ case RC_OPCODE_BGNLOOP: - /* Nested loop */ - ptr = transform_loop(s, ptr); - if(!ptr){ - return NULL; + { + /* Nested loop, skip ahead to the end. */ + unsigned int loop_depth = 1; + for(ptr = ptr->Next; ptr != &c->Program.Instructions; + ptr = ptr->Next){ + if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { + loop_depth++; + } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { + if (!--loop_depth) { + break; + } + } + } + if (ptr == &c->Program.Instructions) { + rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n", + __FUNCTION__); + return 0; } break; + } case RC_OPCODE_BRK: - loop->Brk = ptr; - if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF){ - rc_error(s->C, - "%s: expected ENDIF\n",__FUNCTION__); - return NULL; - } - loop->EndIf = ptr->Next; - if(ptr->Prev->U.I.Opcode != RC_OPCODE_IF){ - rc_error(s->C, - "%s: expected IF\n", __FUNCTION__); - return NULL; + if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF + || ptr->Prev->U.I.Opcode != RC_OPCODE_IF + || loop->Brk){ + continue; } + loop->Brk = ptr; loop->If = ptr->Prev; + loop->EndIf = ptr->Next; switch(loop->If->Prev->U.I.Opcode){ case RC_OPCODE_SLT: case RC_OPCODE_SGE: @@ -364,18 +388,58 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s, case RC_OPCODE_SNE: break; default: - rc_error(s->C, "%s expected conditional\n", + rc_error(c, "%s: expected conditional", __FUNCTION__); - return NULL; + return 0; } loop->Cond = loop->If->Prev; - ptr = loop->EndIf; break; + case RC_OPCODE_ENDLOOP: loop->EndLoop = ptr; break; } } + + if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf + && loop->Cond && loop->EndLoop) { + return 1; + } + return 0; +} + +/** + * This function prepares a loop to be unrolled by converting it into an if + * statement. Here is an outline of the conversion process: + * BGNLOOP; -> BGNLOOP; + * <Additional conditional code> -> <Additional conditional code> + * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2]; + * IF temp[0]; -> IF temp[0]; + * BRK; -> + * ENDIF; -> <Loop Body> + * <Loop Body> -> ENDIF; + * ENDLOOP; -> ENDLOOP + * + * @param inst A pointer to a BGNLOOP instruction. + * @return 1 for success, 0 for failure + */ +static int transform_loop(struct emulate_loop_state * s, + struct rc_instruction * inst) +{ + struct loop_info * loop; + + memory_pool_array_reserve(&s->C->Pool, struct loop_info, + s->Loops, s->LoopCount, s->LoopReserved, 1); + + loop = &s->Loops[s->LoopCount++]; + + if (!build_loop_info(s->C, loop, inst)) + return 0; + + if(try_unroll_loop(s->C, loop, s->prog_inst_limit)){ + return 1; + } + /* Reverse the conditional instruction */ switch(loop->Cond->U.I.Opcode){ case RC_OPCODE_SGE: @@ -398,43 +462,51 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s, break; default: rc_error(s->C, "loop->Cond is not a conditional.\n"); - return NULL; - } - - /* Check if the number of loops is known at compile time. */ - if(transform_const_loop(s, loop)){ - return loop->BeginLoop->Next; + return 0; } - /* Prepare the loop to be unrolled */ + /* Prepare the loop to be emulated */ rc_remove_instruction(loop->Brk); rc_remove_instruction(loop->EndIf); rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf); - return loop->EndLoop; + return 1; } -void rc_transform_unroll_loops(struct radeon_compiler *c, - struct emulate_loop_state * s) +void rc_transform_loops(struct radeon_compiler *c, + struct emulate_loop_state * s, int prog_inst_limit) { struct rc_instruction * ptr; - + memset(s, 0, sizeof(struct emulate_loop_state)); s->C = c; - ptr = s->C->Program.Instructions.Next; - while(ptr != &s->C->Program.Instructions) { + s->prog_inst_limit = prog_inst_limit; + for(ptr = s->C->Program.Instructions.Next; + ptr != &s->C->Program.Instructions; ptr = ptr->Next) { if(ptr->Type == RC_INSTRUCTION_NORMAL && ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ - ptr = transform_loop(s, ptr); - if(!ptr){ + if (!transform_loop(s, ptr)) return; + } + } +} + +void rc_unroll_loops(struct radeon_compiler *c, int prog_inst_limit) +{ + struct rc_instruction * inst; + struct loop_info loop; + + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { + if (build_loop_info(c, &loop, inst)) { + try_unroll_loop(c, &loop, prog_inst_limit); } } - ptr = ptr->Next; } } -void rc_emulate_loops(struct emulate_loop_state *s, - unsigned int max_instructions) +void rc_emulate_loops(struct emulate_loop_state *s, int prog_inst_limit) { int i; /* Iterate backwards of the list of loops so that loops that nested @@ -444,8 +516,8 @@ void rc_emulate_loops(struct emulate_loop_state *s, if(!s->Loops[i].EndLoop){ continue; } - unsigned int iterations = loop_calc_iterations(s, &s->Loops[i], - max_instructions); - loop_unroll(s, &s->Loops[i], iterations); + unsigned int iterations = loop_max_possible_iterations( + s->C, &s->Loops[i], prog_inst_limit); + unroll_loop(s->C, &s->Loops[i], iterations); } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h index 7748813c4eb..bba1f68e308 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h @@ -21,12 +21,14 @@ struct emulate_loop_state { struct loop_info * Loops; unsigned int LoopCount; unsigned int LoopReserved; + int prog_inst_limit; }; -void rc_transform_unroll_loops(struct radeon_compiler *c, - struct emulate_loop_state * s); +void rc_transform_loops(struct radeon_compiler *c, + struct emulate_loop_state * s, int prog_inst_limit); -void rc_emulate_loops(struct emulate_loop_state *s, - unsigned int max_instructions); +void rc_unroll_loops(struct radeon_compiler * c, int prog_inst_limit); + +void rc_emulate_loops(struct emulate_loop_state * s, int prog_inst_limit); #endif /* RADEON_EMULATE_LOOPS_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index 04f234f11d8..2ea830be7f9 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -386,8 +386,8 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .NumSrcRegs = 0, }, { - .Opcode = RC_OPCODE_CONTINUE, - .Name = "CONTINUE", + .Opcode = RC_OPCODE_CONT, + .Name = "CONT", .IsFlowControl = 1, .NumSrcRegs = 0 }, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h index 8b9fa07dde2..6e18d6eb3f1 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -187,7 +187,7 @@ typedef enum { RC_OPCODE_ENDLOOP, - RC_OPCODE_CONTINUE, + RC_OPCODE_CONT, /** special instruction, used in R300-R500 fragment program pair instructions * indicates that the result of the alpha operation shall be replicated diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index eca06515367..7a3f35950a6 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -164,7 +164,8 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo inst = inst->Next) { /* XXX In the future we might be able to make the optimizer * smart enough to handle loops. */ - if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP){ + if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP + || inst->U.I.Opcode == RC_OPCODE_ENDLOOP){ return; } rc_for_all_reads_mask(inst, peephole_scan_read, &s); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c index 8a912da4613..ce72cd97ab2 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c @@ -65,6 +65,11 @@ struct regalloc_state { struct hardware_register * HwTemporary; unsigned int NumHwTemporaries; + /** + * If an instruction is inside of a loop, end_loop will be the + * IP of the ENDLOOP instruction, otherwise end_loop will be 0 + */ + int end_loop; }; static void print_live_intervals(struct live_intervals * src) @@ -178,10 +183,10 @@ static void scan_callback(void * data, struct rc_instruction * inst, else reg->Live.Start = inst->IP; reg->Live.End = inst->IP; - } else { - if (inst->IP > reg->Live.End) - reg->Live.End = inst->IP; - } + } else if (s->end_loop) + reg->Live.End = s->end_loop; + else if (inst->IP > reg->Live.End) + reg->Live.End = inst->IP; } static void compute_live_intervals(struct regalloc_state * s) @@ -191,6 +196,31 @@ static void compute_live_intervals(struct regalloc_state * s) for(struct rc_instruction * inst = s->C->Program.Instructions.Next; inst != &s->C->Program.Instructions; inst = inst->Next) { + + /* For all instructions inside of a loop, the ENDLOOP + * instruction is used as the end of the live interval. */ + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && !s->end_loop) { + int loops = 1; + struct rc_instruction * tmp; + for(tmp = inst->Next; + tmp != &s->C->Program.Instructions; + tmp = tmp->Next) { + if (tmp->U.I.Opcode == RC_OPCODE_BGNLOOP) { + loops++; + break; + } else if (tmp->U.I.Opcode + == RC_OPCODE_ENDLOOP) { + if(!--loops) { + s->end_loop = tmp->IP; + break; + } + } + } + } + + if (inst->IP == s->end_loop) + s->end_loop = 0; + rc_for_all_reads_mask(inst, scan_callback, s); rc_for_all_writes_mask(inst, scan_callback, s); } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 3cc28972934..857aae55145 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -988,17 +988,22 @@ void radeonTransformKILP(struct radeon_compiler * c) for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - if (inst->U.I.Opcode != RC_OPCODE_KILP - || inst->Prev->U.I.Opcode != RC_OPCODE_IF - || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) { + if (inst->U.I.Opcode != RC_OPCODE_KILP) continue; - } + inst->U.I.Opcode = RC_OPCODE_KIL; - inst->U.I.SrcReg[0] = negate(absolute(inst->Prev->U.I.SrcReg[0])); - /* Remove IF */ - rc_remove_instruction(inst->Prev); - /* Remove ENDIF */ - rc_remove_instruction(inst->Next); + if (inst->Prev->U.I.Opcode != RC_OPCODE_IF + || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) { + inst->U.I.SrcReg[0] = negate(builtin_one); + } else { + + inst->U.I.SrcReg[0] = + negate(absolute(inst->Prev->U.I.SrcReg[0])); + /* Remove IF */ + rc_remove_instruction(inst->Prev); + /* Remove ENDIF */ + rc_remove_instruction(inst->Next); + } } } diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index e4b302bbad9..3d2f8928fa6 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -461,7 +461,7 @@ static void r300InitGLExtensions(GLcontext *ctx) if (!r300->radeon.radeonScreen->drmSupportsOcclusionQueries) { _mesa_disable_extension(ctx, "GL_ARB_occlusion_query"); } - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV350) + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_R420) _mesa_enable_extension(ctx, "GL_ARB_half_float_vertex"); if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index f25264b6f2d..f7705b0f6fe 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -441,6 +441,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228 #define R300_VAP_GB_HORZ_DISC_ADJ 0x222c +#define R300_VAP_PVS_FLOW_CNTL_ADDRS_0 0x2230 +#define R300_PVS_FC_ACT_ADRS(x) ((x) << 0) +#define R300_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 8) +#define R300_PVS_FC_LAST_INST(x) ((x) << 16) +#define R300_PVS_FC_RTN_INST(x) ((x) << 24) + /* gap */ /* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between @@ -459,6 +465,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_2288_R300 0x00750000 /* -- nh */ # define R300_2288_RV350 0x0000FFFF /* -- Vladimir */ +#define R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0 0x2290 +#define R300_PVS_FC_LOOP_INIT_VAL(x) ((x) << 0) +#define R300_PVS_FC_LOOP_STEP_VAL(x) ((x) << 8) + /* gap */ /* Addresses are relative to the vertex program instruction area of the @@ -489,6 +499,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_VAP_PVS_CODE_CNTL_1 0x22D8 # define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0 #define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC +#define R300_VAP_PVS_FC_OPC_JUMP(x) (1 << (2 * (x))) +#define R300_VAP_PVS_FC_OPC_LOOP(x) (2 << (2 * (x))) +#define R300_VAP_PVS_FC_OPC_JSR(x) (3 << (2 * (x))) /* The entire range from 0x2300 to 0x2AC inclusive seems to be used for * immediate vertices @@ -505,6 +518,14 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* write 0 to indicate end of packet? */ #define R300_VAP_VTX_END_OF_PKT 0x24AC +#define R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0 0x2500 +#define R500_PVS_FC_ACT_ADRS(x) ((x) << 0) +#define R500_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 16) + +#define R500_VAP_PVS_FLOW_CNTL_ADDRS_UW_0 0x2504 +#define R500_PVS_FC_LAST_INST(x) ((x) << 0) +#define R500_PVS_FC_RTN_INST(x) ((x) << 16) + /* gap */ /* These are values from r300_reg/r300_reg.h - they are known to be correct diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index bb8f91491f5..cf89ab7ec3d 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -327,6 +327,8 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) BATCH_LOCALS(&rmesa->radeon); int type, num_verts; + radeon_prepare_render(&rmesa->radeon); + type = r300PrimitiveType(rmesa, prim); num_verts = r300NumVerts(rmesa, end - start, prim); diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index 4ba6740e3d9..94588698265 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -152,8 +152,8 @@ int32_t r300TranslateTexFormat(gl_format mesaFormat) case MESA_FORMAT_Z32: return R300_EASY_TX_FORMAT(X, X, X, X, X32); /* EXT_texture_sRGB */ - case MESA_FORMAT_SRGBA8: - return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8) | R300_TX_FORMAT_GAMMA; + case MESA_FORMAT_SARGB8: + return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8) | R300_TX_FORMAT_GAMMA; case MESA_FORMAT_SLA8: return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8) | R300_TX_FORMAT_GAMMA; case MESA_FORMAT_SL8: diff --git a/src/mesa/drivers/dri/r600/r600_blit.c b/src/mesa/drivers/dri/r600/r600_blit.c index 172f85eb264..27acff9c166 100644 --- a/src/mesa/drivers/dri/r600/r600_blit.c +++ b/src/mesa/drivers/dri/r600/r600_blit.c @@ -72,7 +72,7 @@ unsigned r600_check_blit(gl_format mesa_format) case MESA_FORMAT_Z24_S8: case MESA_FORMAT_Z16: case MESA_FORMAT_Z32: - case MESA_FORMAT_SRGBA8: + case MESA_FORMAT_SARGB8: case MESA_FORMAT_SLA8: case MESA_FORMAT_SL8: break; @@ -320,9 +320,9 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); break; - case MESA_FORMAT_SRGBA8: + case MESA_FORMAT_SARGB8: format = COLOR_8_8_8_8; - comp_swap = SWAP_STD_REV; + comp_swap = SWAP_ALT; SETbit(cb_color0_info, SOURCE_FORMAT_bit); SETfield(cb_color0_info, NUMBER_SRGB, NUMBER_TYPE_shift, NUMBER_TYPE_mask); break; @@ -390,13 +390,20 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); END_BATCH(); - BEGIN_BATCH_NO_AUTOSTATE(12); + BEGIN_BATCH_NO_AUTOSTATE(9); R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), cb_color0_size); R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), cb_color0_view); - R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), cb_color0_info); R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), 0); END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), cb_color0_info); + R600_OUT_BATCH_RELOC(0, + bo, + 0, + 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); + END_BATCH(); + COMMIT_BATCH(); } @@ -1043,17 +1050,17 @@ set_tex_resource(context_t * context, SETfield(sq_tex_resource4, SQ_SEL_X, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; - case MESA_FORMAT_SRGBA8: + case MESA_FORMAT_SARGB8: SETfield(sq_tex_resource1, FMT_8_8_8_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - SETfield(sq_tex_resource4, SQ_SEL_W, - SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); SETfield(sq_tex_resource4, SQ_SEL_Z, - SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); SETfield(sq_tex_resource4, SQ_SEL_Y, - SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); SETbit(sq_tex_resource4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); break; @@ -1477,7 +1484,6 @@ set_default_state(context_t *context) (CLRCMP_SEL_SRC << CLRCMP_FCN_SEL_shift)); R600_OUT_BATCH_REGVAL(SQ_VTX_BASE_VTX_LOC, 0); R600_OUT_BATCH_REGVAL(SQ_VTX_START_INST_LOC, 0); - R600_OUT_BATCH_REGVAL(DB_DEPTH_INFO, 0); R600_OUT_BATCH_REGVAL(DB_DEPTH_CONTROL, 0); R600_OUT_BATCH_REGVAL(CB_SHADER_MASK, (OUTPUT0_ENABLE_mask)); R600_OUT_BATCH_REGVAL(CB_TARGET_MASK, (TARGET0_ENABLE_mask)); @@ -1526,6 +1532,7 @@ set_default_state(context_t *context) R600_OUT_BATCH(0); R600_OUT_BATCH_REGVAL(VGT_STRMOUT_BUFFER_EN, 0); + R600_OUT_BATCH_REGVAL(SX_ALPHA_TEST_CONTROL, 0); END_BATCH(); COMMIT_BATCH(); @@ -1607,7 +1614,7 @@ unsigned r600_blit(GLcontext *ctx, /* Flush is needed to make sure that source buffer has correct data */ radeonFlush(ctx); - rcommonEnsureCmdBufSpace(&context->radeon, 304, __FUNCTION__); + rcommonEnsureCmdBufSpace(&context->radeon, 308, __FUNCTION__); /* load shaders */ load_shaders(context->radeon.glCtx); @@ -1632,7 +1639,7 @@ unsigned r600_blit(GLcontext *ctx, set_tex_sampler(context); /* dst */ - /* 27 */ + /* 31 */ set_render_target(context, dst_bo, dst_mesaformat, dst_pitch, dst_width, dst_height, dst_offset); /* scissors */ diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index 84d9d423124..389b0412baa 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -72,6 +72,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define R600_ENABLE_GLSL_TEST 1 #define need_GL_VERSION_2_0 +#define need_GL_VERSION_2_1 +#define need_GL_ARB_draw_elements_base_vertex #define need_GL_ARB_occlusion_query #define need_GL_ARB_point_parameters #define need_GL_ARB_vertex_program @@ -140,6 +142,7 @@ static const struct dri_extension card_extensions[] = { {"GL_NV_vertex_program", GL_NV_vertex_program_functions}, {"GL_SGIS_generate_mipmap", NULL}, {"GL_ARB_pixel_buffer_object", NULL}, + {"GL_ARB_draw_elements_base_vertex", GL_ARB_draw_elements_base_vertex_functions }, {NULL, NULL} /* *INDENT-ON* */ }; @@ -157,6 +160,7 @@ static const struct dri_extension mm_extensions[] = { static const struct dri_extension gl_20_extension[] = { #ifdef R600_ENABLE_GLSL_TEST {"GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, + {"GL_ARB_shading_language_120", GL_VERSION_2_1_functions }, #else {"GL_VERSION_2_0", GL_VERSION_2_0_functions }, #endif /* R600_ENABLE_GLSL_TEST */ diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c index 41419f84601..512a52ede3e 100644 --- a/src/mesa/drivers/dri/r600/r600_tex.c +++ b/src/mesa/drivers/dri/r600/r600_tex.c @@ -431,7 +431,7 @@ unsigned r600IsFormatRenderable(gl_format mesa_format) case MESA_FORMAT_Z24_S8: case MESA_FORMAT_Z16: case MESA_FORMAT_Z32: - case MESA_FORMAT_SRGBA8: + case MESA_FORMAT_SARGB8: case MESA_FORMAT_SLA8: case MESA_FORMAT_SL8: return 1; diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index 1600033b9bd..ba3690b70ed 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -605,17 +605,17 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, gl_format mesa } break; /* EXT_texture_sRGB */ - case MESA_FORMAT_SRGBA8: + case MESA_FORMAT_SARGB8: SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, - SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, - SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, - SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); break; diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 99a33df4fcb..9c954cbf70c 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -275,7 +275,10 @@ GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size) case 2: format = FMT_8_8; break; case 3: - format = FMT_8_8_8; break; + /* for some (small/unaligned) strides using 4 comps works + * better, probably same as GL_SHORT below + * test piglit/draw-vertices */ + format = FMT_8_8_8_8; break; case 4: format = FMT_8_8_8_8; break; default: @@ -2872,25 +2875,92 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm) GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode) { + /* + * r600 - trunc to -PI..PI range + * r700 - normalize by dividing by 2PI + * see fdo bug 27901 + */ + int tmp; checkop1(pAsm); tmp = gethelpr(pAsm); - pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; pAsm->D.dst.reg = tmp; - pAsm->D.dst.writex = 1; assemble_src(pAsm, 0, -1); pAsm->S[1].src.rtype = SRC_REC_LITERAL; setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + + pAsm->S[2].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y); + pAsm->D2.dst2.literal_slots = 1; pAsm->C[0].f = 1/(3.1415926535 * 2); - pAsm->C[1].f = 0.0F; - next_ins(pAsm); + pAsm->C[1].f = 0.5f; + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_FRACT; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + + pAsm->S[2].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y); + + pAsm->D2.dst2.literal_slots = 1; + + if (pAsm->bR6xx) + { + pAsm->C[0].f = 3.1415926535897f * 2.0f; + pAsm->C[1].f = -3.1415926535897f; + } + else + { + pAsm->C[0].f = 1.0f; + pAsm->C[1].f = -0.5f; + } + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } pAsm->D.dst.opcode = opcode; pAsm->D.dst.math = 1; @@ -4030,22 +4100,79 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm) checkop1(pAsm); tmp = gethelpr(pAsm); - /* tmp.x = src /2*PI */ - pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; pAsm->D.dst.reg = tmp; - pAsm->D.dst.writex = 1; assemble_src(pAsm, 0, -1); pAsm->S[1].src.rtype = SRC_REC_LITERAL; setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + + pAsm->S[2].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y); + pAsm->D2.dst2.literal_slots = 1; pAsm->C[0].f = 1/(3.1415926535 * 2); - pAsm->C[1].f = 0.0F; + pAsm->C[1].f = 0.5F; - next_ins(pAsm); + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_FRACT; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + + pAsm->S[2].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y); + + pAsm->D2.dst2.literal_slots = 1; + + if(pAsm->bR6xx) { + pAsm->C[0].f = 3.1415926535897f * 2.0f; + pAsm->C[1].f = -3.1415926535897f; + } else { + pAsm->C[0].f = 1.0f; + pAsm->C[1].f = -0.5f; + } + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } // COS dst.x, a.x pAsm->D.dst.opcode = SQ_OP2_INST_COS; @@ -6473,7 +6600,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, * results are undefined anyway */ if(export_count == 0) { - Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE); + Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, pR700AsmCode->starting_export_register_number, GL_FALSE); } if(pR700AsmCode->cf_last_export_ptr != NULL) diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index cefda3ac4ba..bf8063391a2 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -265,17 +265,6 @@ static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom) if (context->radeon.tcl.aos_count == 0) return; - BEGIN_BATCH_NO_AUTOSTATE(6); - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1)); - R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); - R600_OUT_BATCH(0); - - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1)); - R600_OUT_BATCH(mmSQ_VTX_START_INST_LOC - ASIC_CTL_CONST_BASE_INDEX); - R600_OUT_BATCH(0); - END_BATCH(); - COMMIT_BATCH(); - for(i=0; i<VERT_ATTRIB_MAX; i++) { if(vp->mesa_program->Base.InputsRead & (1 << i)) { @@ -523,9 +512,9 @@ static void r700SetRenderTarget(context_t *context, int id) CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); CLEARbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit); break; - case MESA_FORMAT_SRGBA8: + case MESA_FORMAT_SARGB8: format = COLOR_8_8_8_8; - comp_swap = SWAP_STD_REV; + comp_swap = SWAP_ALT; number_type = NUMBER_SRGB; SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit); break; @@ -617,18 +606,25 @@ static void r700SendDepthTargetState(GLcontext *ctx, struct radeon_state_atom *a r700SetDepthTarget(context); - BEGIN_BATCH_NO_AUTOSTATE(8 + 2); + BEGIN_BATCH_NO_AUTOSTATE(7 + 2); R600_OUT_BATCH_REGSEQ(DB_DEPTH_SIZE, 2); R600_OUT_BATCH(r700->DB_DEPTH_SIZE.u32All); R600_OUT_BATCH(r700->DB_DEPTH_VIEW.u32All); - R600_OUT_BATCH_REGSEQ(DB_DEPTH_BASE, 2); + R600_OUT_BATCH_REGSEQ(DB_DEPTH_BASE, 1); R600_OUT_BATCH(r700->DB_DEPTH_BASE.u32All); - R600_OUT_BATCH(r700->DB_DEPTH_INFO.u32All); R600_OUT_BATCH_RELOC(r700->DB_DEPTH_BASE.u32All, rrb->bo, r700->DB_DEPTH_BASE.u32All, 0, RADEON_GEM_DOMAIN_VRAM, 0); END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(DB_DEPTH_INFO, 1); + R600_OUT_BATCH(r700->DB_DEPTH_INFO.u32All); + R600_OUT_BATCH_RELOC(r700->DB_DEPTH_INFO.u32All, + rrb->bo, + r700->DB_DEPTH_INFO.u32All, + 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); if ((context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) && (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)) { @@ -687,27 +683,35 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom * BEGIN_BATCH_NO_AUTOSTATE(3 + 2); R600_OUT_BATCH_REGSEQ(CB_COLOR0_TILE + (4 * id), 1); R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_TILE.u32All); - R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All, + R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_TILE.u32All, rrb->bo, - r700->render_target[id].CB_COLOR0_BASE.u32All, + r700->render_target[id].CB_COLOR0_TILE.u32All, 0, RADEON_GEM_DOMAIN_VRAM, 0); END_BATCH(); BEGIN_BATCH_NO_AUTOSTATE(3 + 2); R600_OUT_BATCH_REGSEQ(CB_COLOR0_FRAG + (4 * id), 1); R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_FRAG.u32All); - R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All, + R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_FRAG.u32All, rrb->bo, - r700->render_target[id].CB_COLOR0_BASE.u32All, + r700->render_target[id].CB_COLOR0_FRAG.u32All, 0, RADEON_GEM_DOMAIN_VRAM, 0); END_BATCH(); - BEGIN_BATCH_NO_AUTOSTATE(12); + BEGIN_BATCH_NO_AUTOSTATE(9); R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), r700->render_target[id].CB_COLOR0_SIZE.u32All); R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), r700->render_target[id].CB_COLOR0_VIEW.u32All); - R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), r700->render_target[id].CB_COLOR0_INFO.u32All); R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), r700->render_target[id].CB_COLOR0_MASK.u32All); END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), r700->render_target[id].CB_COLOR0_INFO.u32All); + R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_INFO.u32All, + rrb->bo, + r700->render_target[id].CB_COLOR0_INFO.u32All, + 0, RADEON_GEM_DOMAIN_VRAM, 0); + + END_BATCH(); + COMMIT_BATCH(); } @@ -1465,9 +1469,6 @@ static int check_vtx(GLcontext *ctx, struct radeon_state_atom *atom) context_t *context = R700_CONTEXT(ctx); int count = context->radeon.tcl.aos_count * 18; - if (count) - count += 6; - radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); return count; } @@ -1567,7 +1568,7 @@ void r600InitAtoms(context_t *context) ALLOC_STATE(sq, always, 34, r700SendSQConfig); ALLOC_STATE(db, always, 17, r700SendDBState); ALLOC_STATE(stencil, always, 4, r700SendStencilState); - ALLOC_STATE(db_target, always, 12, r700SendDepthTargetState); + ALLOC_STATE(db_target, always, 16, r700SendDepthTargetState); ALLOC_STATE(sc, always, 15, r700SendSCState); ALLOC_STATE(scissor, always, 22, r700SendScissorState); ALLOC_STATE(aa, always, 12, r700SendAAState); @@ -1578,7 +1579,7 @@ void r600InitAtoms(context_t *context) ALLOC_STATE(poly, always, 10, r700SendPolyState); ALLOC_STATE(cb, cb, 18, r700SendCBState); ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState); - ALLOC_STATE(cb_target, always, 29, r700SendRenderTargetState); + ALLOC_STATE(cb_target, always, 31, r700SendRenderTargetState); ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState); ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState); ALLOC_STATE(sx, always, 9, r700SendSXState); @@ -1590,7 +1591,7 @@ void r600InitAtoms(context_t *context) ALLOC_STATE(ps, always, 24, r700SendPSState); ALLOC_STATE(vs_consts, vs_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendVSConsts); ALLOC_STATE(ps_consts, ps_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendPSConsts); - ALLOC_STATE(vtx, vtx, (6 + (VERT_ATTRIB_MAX * 18)), r700SendVTXState); + ALLOC_STATE(vtx, vtx, (VERT_ATTRIB_MAX * 18), r700SendVTXState); ALLOC_STATE(tx, tx, (R700_TEXTURE_NUMBERUNITS * 20), r700SendTexState); ALLOC_STATE(tx_smplr, tx, (R700_TEXTURE_NUMBERUNITS * 5), r700SendTexSamplerState); ALLOC_STATE(tx_brdr_clr, tx, (R700_TEXTURE_NUMBERUNITS * 6), r700SendTexBorderColorState); diff --git a/src/mesa/drivers/dri/r600/r700_clear.c b/src/mesa/drivers/dri/r600/r700_clear.c index 09c48565b68..d1008f28b9b 100644 --- a/src/mesa/drivers/dri/r600/r700_clear.c +++ b/src/mesa/drivers/dri/r600/r700_clear.c @@ -48,6 +48,7 @@ static GLboolean r700ClearFast(context_t *context, GLbitfield mask) void r700Clear(GLcontext * ctx, GLbitfield mask) { context_t *context = R700_CONTEXT(ctx); + radeonContextPtr radeon = &context->radeon; __DRIdrawable *dPriv = radeon_get_drawable(&context->radeon); const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask[0]); GLbitfield swrast_mask = 0, tri_mask = 0; @@ -60,6 +61,8 @@ void r700Clear(GLcontext * ctx, GLbitfield mask) context->radeon.front_buffer_dirty = GL_TRUE; } + radeon_prepare_render(radeon); + if( GL_TRUE == r700ClearFast(context, mask) ) { return; diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 1929b7cc129..c5771f9fd0b 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -244,7 +244,8 @@ static int r700NumVerts(int num_verts, int prim) return num_verts - verts_off; } -static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) +static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, + int prim, GLint basevertex) { context_t *context = R700_CONTEXT(ctx); BATCH_LOCALS(&context->radeon); @@ -282,6 +283,7 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim total_emit = 3 /* VGT_PRIMITIVE_TYPE */ + 2 /* VGT_INDEX_TYPE */ + 2 /* NUM_INSTANCES */ + + 4 /* VTX_BASE_VTX_LOC + VTX_START_INST_LOC */ + 5 + 2; /* DRAW_INDEX */ BEGIN_BATCH_NO_AUTOSTATE(total_emit); @@ -294,6 +296,11 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim // num instances R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); R600_OUT_BATCH(1); + /* offset */ + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 2)); + R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); + R600_OUT_BATCH(basevertex); //VTX_BASE_VTX_LOC + R600_OUT_BATCH(0); //VTX_START_INST_LOC // draw packet R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3)); R600_OUT_BATCH(context->ind_buf.bo_offset); @@ -364,6 +371,7 @@ static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, total_emit += 3 /* VGT_PRIMITIVE_TYPE */ + 2 /* VGT_INDEX_TYPE */ + 2 /* NUM_INSTANCES */ + + 4 /* VTX_BASE_VTX_LOC + VTX_START_INST_LOC */ + 3; /* DRAW */ BEGIN_BATCH_NO_AUTOSTATE(total_emit); @@ -376,6 +384,11 @@ static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, // num instances R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); R600_OUT_BATCH(1); + /* offset */ + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 2)); + R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); + R600_OUT_BATCH(0); //VTX_BASE_VTX_LOC + R600_OUT_BATCH(0); //VTX_START_INST_LOC // draw packet if(start == 0) { @@ -433,16 +446,16 @@ static GLuint r700PredictRenderSize(GLcontext* ctx, dwords = PRE_EMIT_STATE_BUFSZ; if (ib) - dwords += nr_prims * 14; + dwords += nr_prims * 18; else { for (i = 0; i < nr_prims; ++i) { if (prim[i].start == 0) - dwords += 10; + dwords += 14; else if (prim[i].count > 0xffff) - dwords += prim[i].count + 10; + dwords += prim[i].count + 14; else - dwords += ((prim[i].count + 1) / 2) + 10; + dwords += ((prim[i].count + 1) / 2) + 14; } } @@ -625,11 +638,11 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB; - if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT || + if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT #if MESA_BIG_ENDIAN - getTypeSize(input[i]->Type) != 4 || + || getTypeSize(input[i]->Type) != 4 #endif - stride < 4) + ) { r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]); } @@ -637,19 +650,10 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input { if (input[i]->BufferObj->Name) { - if (stride % 4 != 0) - { - assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0); - r700AlignDataToDword(ctx, input[i], count, &context->stream_desc[index]); - context->stream_desc[index].is_named_bo = GL_FALSE; - } - else - { - context->stream_desc[index].stride = input[i]->StrideB; - context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr; - context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo; - context->stream_desc[index].is_named_bo = GL_TRUE; - } + context->stream_desc[index].stride = input[i]->StrideB; + context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr; + context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo; + context->stream_desc[index].is_named_bo = GL_TRUE; } else { @@ -932,7 +936,8 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, r700RunRenderPrimitive(ctx, prim[i].start, prim[i].start + prim[i].count, - prim[i].mode); + prim[i].mode, + prim[i].basevertex); else r700RunRenderPrimitiveImmediate(ctx, prim[i].start, @@ -977,18 +982,24 @@ static void r700DrawPrims(GLcontext *ctx, { GLboolean retval = GL_FALSE; + context_t *context = R700_CONTEXT(ctx); + radeonContextPtr radeon = &context->radeon; + radeon_prepare_render(radeon); + /* This check should get folded into just the places that * min/max index are really needed. */ - if (!index_bounds_valid) { - vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); - } - if (min_index) { + if (!vbo_all_varyings_in_vbos(arrays)) { + if (!index_bounds_valid) + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + /* do we want to rebase, minimizes the + * amount of data to upload? */ + if (min_index) { vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrims ); return; + } } - /* Make an attempt at drawing */ retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 137f3007ced..6a2a09eaf1a 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -461,11 +461,11 @@ static void r700TranslateAttrib(GLcontext *ctx, GLuint unLoc, int count, const s stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB; - if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT || + if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT #if MESA_BIG_ENDIAN - getTypeSize(input->Type) != 4 || + || getTypeSize(input->Type) != 4 #endif - stride < 4) + ) { pStreamDesc->type = GL_FLOAT; diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index b7ee9a134bf..7d54fabebbc 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -414,9 +414,9 @@ enum { CHIP_FAMILY_R350, CHIP_FAMILY_RV350, CHIP_FAMILY_RV380, + CHIP_FAMILY_RS400, CHIP_FAMILY_R420, CHIP_FAMILY_RV410, - CHIP_FAMILY_RS400, CHIP_FAMILY_RS600, CHIP_FAMILY_RS690, CHIP_FAMILY_RS740, diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index 13f1f0611b8..c1a660af3d0 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -708,7 +708,6 @@ void radeon_draw_buffer(GLcontext *ctx, struct gl_framebuffer *fb) if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer); radeon->front_cliprects = GL_TRUE; - radeon->front_buffer_dirty = GL_TRUE; } else { rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer); radeon->front_cliprects = GL_FALSE; @@ -1132,17 +1131,13 @@ flush_front: if (screen->dri2.loader && (screen->dri2.loader->base.version >= 2) && (screen->dri2.loader->flushFrontBuffer != NULL)) { __DRIdrawable * drawable = radeon_get_drawable(radeon); - (*screen->dri2.loader->flushFrontBuffer)(drawable, drawable->loaderPrivate); - /* Only clear the dirty bit if front-buffer rendering is no longer - * enabled. This is done so that the dirty bit can only be set in - * glDrawBuffer. Otherwise the dirty bit would have to be set at - * each of N places that do rendering. This has worse performances, - * but it is much easier to get correct. + /* We set the dirty bit in radeon_prepare_render() if we're + * front buffer rendering once we get there. */ - if (!radeon->is_front_buffer_rendering) { - radeon->front_buffer_dirty = GL_FALSE; - } + radeon->front_buffer_dirty = GL_FALSE; + + (*screen->dri2.loader->flushFrontBuffer)(drawable, drawable->loaderPrivate); } } } diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index 5a7d52c4d2f..92663bf66d7 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -493,6 +493,50 @@ radeon_bits_per_pixel(const struct radeon_renderbuffer *rb) return _mesa_get_format_bytes(rb->base.Format) * 8; } +/* + * Check if drawable has been invalidated by dri2InvalidateDrawable(). + * Update renderbuffers if so. This prevents a client from accessing + * a backbuffer that has a swap pending but not yet completed. + * + * See intel_prepare_render for equivalent code in intel driver. + * + */ +void radeon_prepare_render(radeonContextPtr radeon) +{ + __DRIcontext *driContext = radeon->dri.context; + __DRIdrawable *drawable; + __DRIscreen *screen; + + screen = driContext->driScreenPriv; + if (!screen->dri2.loader) + return; + + drawable = driContext->driDrawablePriv; + if (drawable->dri2.stamp != driContext->dri2.draw_stamp) { + if (drawable->lastStamp != drawable->dri2.stamp) + radeon_update_renderbuffers(driContext, drawable, GL_FALSE); + + /* Intel driver does the equivalent of this, no clue if it is needed: + * radeon_draw_buffer(radeon->glCtx, &(drawable->driverPrivate)->base); + */ + driContext->dri2.draw_stamp = drawable->dri2.stamp; + } + + drawable = driContext->driReadablePriv; + if (drawable->dri2.stamp != driContext->dri2.read_stamp) { + if (drawable->lastStamp != drawable->dri2.stamp) + radeon_update_renderbuffers(driContext, drawable, GL_FALSE); + driContext->dri2.read_stamp = drawable->dri2.stamp; + } + + /* If we're currently rendering to the front buffer, the rendering + * that will happen next will probably dirty the front buffer. So + * mark it as dirty here. + */ + if (radeon->is_front_buffer_rendering) + radeon->front_buffer_dirty = GL_TRUE; +} + void radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable, GLboolean front_only) @@ -514,6 +558,11 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable, screen = context->driScreenPriv; radeon = (radeonContextPtr) context->driverPrivate; + /* Set this up front, so that in case our buffers get invalidated + * while we're getting new buffers, we don't clobber the stamp and + * thus ignore the invalidate. */ + drawable->lastStamp = drawable->dri2.stamp; + if (screen->dri2.loader && (screen->dri2.loader->base.version > 2) && (screen->dri2.loader->getBuffersWithFormat != NULL)) { @@ -650,6 +699,13 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable, rb->base.Height = drawable->h; rb->has_surface = 0; + /* r6xx+ tiling */ + rb->tile_config = radeon->radeonScreen->tile_config; + rb->group_bytes = radeon->radeonScreen->group_bytes; + rb->num_channels = radeon->radeonScreen->num_channels; + rb->num_banks = radeon->radeonScreen->num_banks; + rb->r7xx_bank_op = radeon->radeonScreen->r7xx_bank_op; + if (buffers[i].attachment == __DRI_BUFFER_STENCIL && depth_bo) { if (RADEON_DEBUG & RADEON_DRI) fprintf(stderr, "(reusing depth buffer as stencil)\n"); @@ -678,7 +734,7 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable, bo->flags |= RADEON_BO_FLAGS_MACRO_TILE; if (tiling_flags & RADEON_TILING_MICRO) bo->flags |= RADEON_BO_FLAGS_MICRO_TILE; - + } if (buffers[i].attachment == __DRI_BUFFER_DEPTH) { diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h index 5156c5d0d0a..f06e5fdf244 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -93,6 +93,13 @@ struct radeon_renderbuffer GLuint pf_pending; /**< sequence number of pending flip */ GLuint vbl_pending; /**< vblank sequence number of pending flip */ __DRIdrawable *dPriv; + + /* r6xx+ tiling */ + GLuint tile_config; + GLint group_bytes; + GLint num_channels; + GLint num_banks; + GLint r7xx_bank_op; }; struct radeon_framebuffer @@ -614,5 +621,6 @@ GLboolean radeonMakeCurrent(__DRIcontext * driContextPriv, __DRIdrawable * driDrawPriv, __DRIdrawable * driReadPriv); extern void radeonDestroyContext(__DRIcontext * driContextPriv); +void radeon_prepare_render(radeonContextPtr radeon); #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c index c877e6c1765..c6e5f110ea3 100644 --- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c @@ -133,7 +133,7 @@ static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree height = _mesa_next_pow_two_32(lvl->height); lvl->rowstride = get_texture_image_row_stride(rmesa, mt->mesaFormat, lvl->width, mt->tilebits); - lvl->size = get_texture_image_size(mt->mesaFormat, lvl->rowstride, lvl->height, lvl->depth, mt->tilebits); + lvl->size = get_texture_image_size(mt->mesaFormat, lvl->rowstride, height, lvl->depth, mt->tilebits); assert(lvl->size > 0); diff --git a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c index dadb8002c7d..fb741173ca8 100644 --- a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c +++ b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c @@ -179,6 +179,9 @@ radeonReadPixels(GLcontext * ctx, GLenum format, GLenum type, const struct gl_pixelstore_attrib *pack, GLvoid * pixels) { + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + radeon_prepare_render(radeon); + if (do_blit_readpixels(ctx, x, y, width, height, format, type, pack, pixels)) return; diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 82107cc6aeb..fa97a19302c 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -213,6 +213,10 @@ static const GLuint __driNConfigOptions = 17; static int getSwapInfo( __DRIdrawable *dPriv, __DRIswapInfo * sInfo ); +#ifndef RADEON_INFO_TILE_CONFIG +#define RADEON_INFO_TILE_CONFIG 0x6 +#endif + static int radeonGetParam(__DRIscreen *sPriv, int param, void *value) { @@ -232,6 +236,9 @@ radeonGetParam(__DRIscreen *sPriv, int param, void *value) case RADEON_PARAM_NUM_Z_PIPES: info.request = RADEON_INFO_NUM_Z_PIPES; break; + case RADEON_INFO_TILE_CONFIG: + info.request = RADEON_INFO_TILE_CONFIG; + break; default: return -EINVAL; } @@ -376,6 +383,21 @@ static const __DRItexBufferExtension r600TexBufferExtension = { }; #endif +static void +radeonDRI2Flush(__DRIdrawable *drawable) +{ + radeonContextPtr rmesa; + + rmesa = (radeonContextPtr) drawable->driContextPriv->driverPrivate; + radeonFlush(rmesa->glCtx); +} + +static const struct __DRI2flushExtensionRec radeonFlushExtension = { + { __DRI2_FLUSH, __DRI2_FLUSH_VERSION }, + radeonDRI2Flush, + dri2InvalidateDrawable, +}; + static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) { screen->device_id = device_id; @@ -1305,6 +1327,56 @@ radeonCreateScreen2(__DRIscreen *sPriv) else screen->chip_flags |= RADEON_CLASS_R600; + /* r6xx+ tiling */ + if (IS_R600_CLASS(screen) && (sPriv->drm_version.minor >= 6)) { + ret = radeonGetParam(sPriv, RADEON_INFO_TILE_CONFIG, &temp); + if (ret) + fprintf(stderr, "failed to get tiling info\n"); + else { + screen->tile_config = temp; + screen->r7xx_bank_op = 0; + switch((screen->tile_config & 0xe) >> 1) { + case 0: + screen->num_channels = 1; + break; + case 1: + screen->num_channels = 2; + break; + case 2: + screen->num_channels = 4; + break; + case 3: + screen->num_channels = 8; + break; + default: + fprintf(stderr, "bad channels\n"); + break; + } + switch((screen->tile_config & 0x30) >> 4) { + case 0: + screen->num_banks = 4; + break; + case 1: + screen->num_banks = 8; + break; + default: + fprintf(stderr, "bad banks\n"); + break; + } + switch((screen->tile_config & 0xc0) >> 6) { + case 0: + screen->group_bytes = 256; + break; + case 1: + screen->group_bytes = 512; + break; + default: + fprintf(stderr, "bad group_bytes\n"); + break; + } + } + } + if (IS_R300_CLASS(screen)) { ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_GB_PIPES, &temp); if (ret) { @@ -1379,6 +1451,8 @@ radeonCreateScreen2(__DRIscreen *sPriv) screen->extensions[i++] = &r600TexBufferExtension.base; #endif + screen->extensions[i++] = &radeonFlushExtension.base; + screen->extensions[i++] = NULL; sPriv->extensions = screen->extensions; diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h index 0d7e335fa3a..2b33201a538 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.h +++ b/src/mesa/drivers/dri/radeon/radeon_screen.h @@ -112,6 +112,13 @@ typedef struct radeon_screen { int kernel_mm; drm_radeon_sarea_t *sarea; /* Private SAREA data */ struct radeon_bo_manager *bom; + + /* r6xx+ tiling */ + GLuint tile_config; + GLint group_bytes; + GLint num_channels; + GLint num_banks; + GLint r7xx_bank_op; } radeonScreenRec, *radeonScreenPtr; #define IS_R100_CLASS(screen) \ diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c index 1adb6096033..9dfe2dd2433 100644 --- a/src/mesa/drivers/dri/radeon/radeon_span.c +++ b/src/mesa/drivers/dri/radeon/radeon_span.c @@ -111,7 +111,6 @@ static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb, * two main types: * - 1D (akin to macro-linear/micro-tiled on older asics) * - 2D (akin to macro-tiled/micro-tiled on older asics) - * only 1D tiling is implemented below */ #if defined(RADEON_R600) static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb, @@ -208,12 +207,190 @@ static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb, return offset; } +static inline GLint r600_log2(GLint n) +{ + GLint log2 = 0; + + while (n >>= 1) + ++log2; + return log2; +} + +static inline GLint r600_2d_tile_helper(const struct radeon_renderbuffer * rrb, + GLint x, GLint y, GLint is_depth, GLint is_stencil) +{ + GLint group_bytes = rrb->group_bytes; + GLint num_channels = rrb->num_channels; + GLint num_banks = rrb->num_banks; + GLint r7xx_bank_op = rrb->r7xx_bank_op; + /* */ + GLint group_bits = r600_log2(group_bytes); + GLint channel_bits = r600_log2(num_channels); + GLint bank_bits = r600_log2(num_banks); + GLint element_bytes = rrb->cpp; + GLint num_samples = 1; + GLint tile_width = 8; + GLint tile_height = 8; + GLint tile_thickness = 1; + GLint macro_tile_width = num_banks; + GLint macro_tile_height = num_channels; + GLint pitch_elements = (rrb->pitch / element_bytes) / tile_width; + GLint height = rrb->base.Height / tile_height; + GLint z = 0; + GLint sample_number = 0; + /* */ + GLint tile_bytes; + GLint macro_tile_bytes; + GLint macro_tiles_per_row; + GLint macro_tiles_per_slice; + GLint slice_offset; + GLint macro_tile_row_index; + GLint macro_tile_column_index; + GLint macro_tile_offset; + GLint pixel_number = 0; + GLint element_offset; + GLint bank = 0; + GLint channel = 0; + GLint total_offset; + GLint group_mask = (1 << group_bits) - 1; + GLint offset_low; + GLint offset_high; + GLint offset = 0; + + switch (num_channels) { + case 2: + default: + // channel[0] = x[3] ^ y[3] + channel |= (((x >> 3) ^ (y >> 3)) & 1) << 0; + break; + case 4: + // channel[0] = x[4] ^ y[3] + channel |= (((x >> 4) ^ (y >> 3)) & 1) << 0; + // channel[1] = x[3] ^ y[4] + channel |= (((x >> 3) ^ (y >> 4)) & 1) << 1; + break; + case 8: + // channel[0] = x[5] ^ y[3] + channel |= (((x >> 5) ^ (y >> 3)) & 1) << 0; + // channel[0] = x[4] ^ x[5] ^ y[4] + channel |= (((x >> 4) ^ (x >> 5) ^ (y >> 4)) & 1) << 1; + // channel[0] = x[3] ^ y[5] + channel |= (((x >> 3) ^ (y >> 5)) & 1) << 2; + break; + } + + switch (num_banks) { + case 4: + // bank[0] = x[3] ^ y[4 + log2(num_channels)] + bank |= (((x >> 3) ^ (y >> (4 + channel_bits))) & 1) << 0; + if (r7xx_bank_op) + // bank[1] = x[3] ^ y[4 + log2(num_channels)] ^ x[5] + bank |= (((x >> 4) ^ (y >> (3 + channel_bits)) ^ (x >> 5)) & 1) << 1; + else + // bank[1] = x[4] ^ y[3 + log2(num_channels)] + bank |= (((x >> 4) ^ (y >> (3 + channel_bits))) & 1) << 1; + break; + case 8: + // bank[0] = x[3] ^ y[5 + log2(num_channels)] + bank |= (((x >> 3) ^ (y >> (5 + channel_bits))) & 1) << 0; + // bank[1] = x[4] ^ y[4 + log2(num_channels)] ^ y[5 + log2(num_channels)] + bank |= (((x >> 4) ^ (y >> (4 + channel_bits)) ^ (y >> (5 + channel_bits))) & 1) << 1; + if (r7xx_bank_op) + // bank[2] = x[5] ^ y[3 + log2(num_channels)] ^ x[6] + bank |= (((x >> 5) ^ (y >> (3 + channel_bits)) ^ (x >> 6)) & 1) << 2; + else + // bank[2] = x[5] ^ y[3 + log2(num_channels)] + bank |= (((x >> 5) ^ (y >> (3 + channel_bits))) & 1) << 2; + break; + } + + tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples; + macro_tile_bytes = macro_tile_width * macro_tile_height * tile_bytes; + macro_tiles_per_row = pitch_elements / macro_tile_width; + macro_tiles_per_slice = macro_tiles_per_row * (height / macro_tile_height); + slice_offset = (z / tile_thickness) * macro_tiles_per_slice * macro_tile_bytes; + macro_tile_row_index = (y / tile_height) / macro_tile_height; + macro_tile_column_index = (x / tile_width) / macro_tile_width; + macro_tile_offset = ((macro_tile_row_index * macro_tiles_per_row) + macro_tile_column_index) * macro_tile_bytes; + + if (is_depth) { + GLint pixel_offset = 0; + + pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] + pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0] + pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1] + pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1] + pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2] + pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] + switch (element_bytes) { + case 2: + pixel_offset = pixel_number * element_bytes * num_samples; + break; + case 4: + /* stencil and depth data are stored separately within a tile. + * stencil is stored in a contiguous tile before the depth tile. + * stencil element is 1 byte, depth element is 3 bytes. + * stencil tile is 64 bytes. + */ + if (is_stencil) + pixel_offset = pixel_number * 1 * num_samples; + else + pixel_offset = (pixel_number * 3 * num_samples) + 64; + break; + } + element_offset = pixel_offset + (sample_number * element_bytes); + } else { + GLint sample_offset; + + switch (element_bytes) { + case 1: + pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] + pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1] + pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2] + pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1] + pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0] + pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] + break; + case 2: + pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] + pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1] + pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2] + pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0] + pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1] + pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] + break; + case 4: + pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] + pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1] + pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0] + pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2] + pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1] + pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] + break; + } + sample_offset = sample_number * (tile_bytes / num_samples); + element_offset = sample_offset + (pixel_number * element_bytes); + } + total_offset = (slice_offset + macro_tile_offset) >> (channel_bits + bank_bits); + total_offset += element_offset; + + offset_low = total_offset & group_mask; + offset_high = (total_offset & ~group_mask) << (channel_bits + bank_bits); + offset = (bank << (group_bits + channel_bits)) + (channel << group_bits) + offset_low + offset_high; + + return offset; +} + /* depth buffers */ static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb, GLint x, GLint y) { GLubyte *ptr = rrb->bo->ptr; - GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 0); + GLint offset; + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) + offset = r600_2d_tile_helper(rrb, x, y, 1, 0); + else + offset = r600_1d_tile_helper(rrb, x, y, 1, 0); return &ptr[offset]; } @@ -221,7 +398,11 @@ static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb, GLint x, GLint y) { GLubyte *ptr = rrb->bo->ptr; - GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 1); + GLint offset; + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) + offset = r600_2d_tile_helper(rrb, x, y, 1, 1); + else + offset = r600_1d_tile_helper(rrb, x, y, 1, 1); return &ptr[offset]; } @@ -235,7 +416,10 @@ static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb, if (rrb->has_surface || !(rrb->bo->flags & mask)) { offset = x * rrb->cpp + y * rrb->pitch; } else { - offset = r600_1d_tile_helper(rrb, x, y, 0, 0); + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) + offset = r600_2d_tile_helper(rrb, x, y, 0, 0); + else + offset = r600_1d_tile_helper(rrb, x, y, 0, 0); } return &ptr[offset]; } diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c index f2fcb46688a..29defe73a70 100644 --- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c @@ -40,7 +40,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/macros.h" #include "main/simple_list.h" +#include "math/m_xform.h" + #include "swrast_setup/swrast_setup.h" + #include "tnl/tnl.h" #include "tnl/t_context.h" #include "tnl/t_pipeline.h" @@ -408,6 +411,8 @@ static GLboolean radeon_run_render( GLcontext *ctx, !radeon_dma_validate_render( ctx, VB )) return GL_TRUE; + radeon_prepare_render(&rmesa->radeon); + tnl->Driver.Render.Start( ctx ); for (i = 0 ; i < VB->PrimitiveCount ; i++) diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c index ea796e1a45f..5e1718f9dfc 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c @@ -252,6 +252,8 @@ void radeonTclPrimitive( GLcontext *ctx, GLuint se_cntl; GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE; + radeon_prepare_render(&rmesa->radeon); + if (newprim != rmesa->tcl.hw_primitive || !discrete_prim[hw_prim&0xf]) { RADEON_NEWPRIM( rmesa ); diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c index 29fd31ac23f..4cb0bb60c85 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c @@ -153,6 +153,9 @@ radeonCopyTexImage2D(GLcontext *ctx, GLenum target, GLint level, _mesa_select_tex_image(ctx, texObj, target, level); int srcx, srcy, dstx, dsty; + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + radeon_prepare_render(radeon); + if (border) goto fail; @@ -202,6 +205,9 @@ radeonCopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level, struct gl_texture_object *texObj = _mesa_select_tex_object(ctx, texUnit, target); struct gl_texture_image *texImage = _mesa_select_tex_image(ctx, texObj, target, level); + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + radeon_prepare_render(radeon); + if (!do_copy_texsubimage(ctx, target, level, radeon_tex_obj(texObj), (radeon_texture_image *)texImage, xoffset, yoffset, x, y, width, height)) { diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index d2b190e42e0..8c6a50d2f0d 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -551,7 +551,7 @@ gl_format radeonChooseTextureFormat(GLcontext * ctx, case GL_SRGB8_ALPHA8: case GL_COMPRESSED_SRGB: case GL_COMPRESSED_SRGB_ALPHA: - return MESA_FORMAT_SRGBA8; + return MESA_FORMAT_SARGB8; case GL_SLUMINANCE: case GL_SLUMINANCE8: diff --git a/src/mesa/drivers/dri/savage/savagerender.c b/src/mesa/drivers/dri/savage/savagerender.c index c369bb124c2..2d9e80e29c4 100644 --- a/src/mesa/drivers/dri/savage/savagerender.c +++ b/src/mesa/drivers/dri/savage/savagerender.c @@ -33,6 +33,8 @@ #include "main/imports.h" #include "main/mtypes.h" +#include "math/m_xform.h" + #include "tnl/t_context.h" #include "savagecontext.h" diff --git a/src/mesa/drivers/dri/unichrome/via_render.c b/src/mesa/drivers/dri/unichrome/via_render.c index 896c43db1b0..4351f119555 100644 --- a/src/mesa/drivers/dri/unichrome/via_render.c +++ b/src/mesa/drivers/dri/unichrome/via_render.c @@ -33,6 +33,8 @@ #include "main/macros.h" #include "main/mtypes.h" +#include "math/m_xform.h" + #include "tnl/t_context.h" #include "via_context.h" diff --git a/src/mesa/main/arbprogram.h b/src/mesa/main/arbprogram.h index 787ffd62f4b..e2e535e911e 100644 --- a/src/mesa/main/arbprogram.h +++ b/src/mesa/main/arbprogram.h @@ -27,7 +27,6 @@ #define ARBPROGRAM_H -#include "compiler.h" #include "glheader.h" diff --git a/src/mesa/main/arrayobj.h b/src/mesa/main/arrayobj.h index 8999edc724f..fdf7e2bca46 100644 --- a/src/mesa/main/arrayobj.h +++ b/src/mesa/main/arrayobj.h @@ -28,7 +28,7 @@ #ifndef ARRAYOBJ_H #define ARRAYOBJ_H -#include "context.h" +#include "mtypes.h" /** * \file arrayobj.h diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c index 002448fedb8..753949be503 100644 --- a/src/mesa/main/attrib.c +++ b/src/mesa/main/attrib.c @@ -41,6 +41,7 @@ #include "hint.h" #include "light.h" #include "lines.h" +#include "macros.h" #include "matrix.h" #include "multisample.h" #include "points.h" diff --git a/src/mesa/main/bufferobj.h b/src/mesa/main/bufferobj.h index 912529cfdf9..f234d06c6cc 100644 --- a/src/mesa/main/bufferobj.h +++ b/src/mesa/main/bufferobj.h @@ -29,7 +29,7 @@ #define BUFFEROBJ_H -#include "context.h" +#include "mtypes.h" /* diff --git a/src/mesa/main/clear.c b/src/mesa/main/clear.c index e76ab5527b0..49d86b3b1f1 100644 --- a/src/mesa/main/clear.c +++ b/src/mesa/main/clear.c @@ -35,6 +35,7 @@ #include "context.h" #include "colormac.h" #include "enums.h" +#include "macros.h" #include "state.h" diff --git a/src/mesa/main/clear.h b/src/mesa/main/clear.h index 4c78eeda488..6657370c4b6 100644 --- a/src/mesa/main/clear.h +++ b/src/mesa/main/clear.h @@ -27,7 +27,7 @@ #define CLEAR_H -#include "main/mtypes.h" +#include "glheader.h" extern void GLAPIENTRY diff --git a/src/mesa/main/clip.h b/src/mesa/main/clip.h index d53afb45bdf..ac472d66e08 100644 --- a/src/mesa/main/clip.h +++ b/src/mesa/main/clip.h @@ -31,7 +31,7 @@ #ifndef CLIP_H #define CLIP_H -#include "mtypes.h" +#include "glheader.h" extern void GLAPIENTRY _mesa_ClipPlane( GLenum plane, const GLdouble *equation ); diff --git a/src/mesa/main/colormac.h b/src/mesa/main/colormac.h index 905f4e22837..245fb658bb3 100644 --- a/src/mesa/main/colormac.h +++ b/src/mesa/main/colormac.h @@ -33,9 +33,9 @@ #define COLORMAC_H -#include "imports.h" #include "config.h" #include "macros.h" +#include "mtypes.h" /** \def BYTE_TO_CHAN diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h index 32f7d969d8d..0f2d1a8f8da 100644 --- a/src/mesa/main/config.h +++ b/src/mesa/main/config.h @@ -177,7 +177,7 @@ /** * Per-program constants (power of two) * - * \c MAX_PROGRAM_LOCAL_PARAMS and \c MAX_UNIFORMS are just the assmebly shader + * \c MAX_PROGRAM_LOCAL_PARAMS and \c MAX_UNIFORMS are just the assembly shader * and GLSL shader names for the same thing. They should \b always have the * same value. Each refers to the number of vec4 values supplied as * per-program parameters. diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index a369532e99c..b01fed1781e 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -462,7 +462,7 @@ _mesa_init_current(GLcontext *ctx) /** - * Init vertex/fragment program limits. + * Init vertex/fragment/geometry program limits. * Important: drivers should override these with actual limits. */ static void @@ -477,16 +477,18 @@ init_program_limits(GLenum type, struct gl_program_constants *prog) prog->MaxLocalParams = MAX_PROGRAM_LOCAL_PARAMS; prog->MaxUniformComponents = 4 * MAX_UNIFORMS; - if (type == GL_VERTEX_PROGRAM_ARB) { + switch (type) { + case GL_VERTEX_PROGRAM_ARB: prog->MaxParameters = MAX_VERTEX_PROGRAM_PARAMS; prog->MaxAttribs = MAX_NV_VERTEX_PROGRAM_INPUTS; prog->MaxAddressRegs = MAX_VERTEX_PROGRAM_ADDRESS_REGS; - } - else if (type == GL_FRAGMENT_PROGRAM_ARB) { + break; + case GL_FRAGMENT_PROGRAM_ARB: prog->MaxParameters = MAX_NV_FRAGMENT_PROGRAM_PARAMS; prog->MaxAttribs = MAX_NV_FRAGMENT_PROGRAM_INPUTS; prog->MaxAddressRegs = MAX_FRAGMENT_PROGRAM_ADDRESS_REGS; - } else { + break; + case MESA_GEOMETRY_PROGRAM: prog->MaxParameters = MAX_NV_VERTEX_PROGRAM_PARAMS; prog->MaxAttribs = MAX_NV_VERTEX_PROGRAM_INPUTS; prog->MaxAddressRegs = MAX_VERTEX_PROGRAM_ADDRESS_REGS; @@ -497,6 +499,9 @@ init_program_limits(GLenum type, struct gl_program_constants *prog) prog->MaxGeometryUniformComponents = MAX_GEOMETRY_UNIFORM_COMPONENTS; prog->MaxGeometryOutputVertices = MAX_GEOMETRY_OUTPUT_VERTICES; prog->MaxGeometryTotalOutputComponents = MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS; + break; + default: + assert(0 && "Bad program type in init_program_limits()"); } /* Set the native limits to zero. This implies that there is no native diff --git a/src/mesa/main/convolve.c b/src/mesa/main/convolve.c index 15e8dffc230..f63bddc44d5 100644 --- a/src/mesa/main/convolve.c +++ b/src/mesa/main/convolve.c @@ -37,6 +37,7 @@ #include "convolve.h" #include "context.h" #include "image.h" +#include "macros.h" #include "mtypes.h" #include "state.h" #include "main/dispatch.h" diff --git a/src/mesa/main/debug.h b/src/mesa/main/debug.h index 0449cb1798a..b517cc8259f 100644 --- a/src/mesa/main/debug.h +++ b/src/mesa/main/debug.h @@ -36,6 +36,9 @@ #ifndef _DEBUG_H #define _DEBUG_H +#include "glheader.h" +#include "mtypes.h" + #if _HAVE_FULL_GL extern void _mesa_print_tri_caps( const char *name, GLuint flags ); diff --git a/src/mesa/main/depthstencil.h b/src/mesa/main/depthstencil.h index 3dde081f5a5..afbac77f0e2 100644 --- a/src/mesa/main/depthstencil.h +++ b/src/mesa/main/depthstencil.h @@ -26,6 +26,7 @@ #ifndef DEPTHSTENCIL_H #define DEPTHSTENCIL_H +#include "mtypes.h" extern struct gl_renderbuffer * _mesa_new_z24_renderbuffer_wrapper(GLcontext *ctx, diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 8c86b392c7b..9a84e5a79cf 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -97,6 +97,11 @@ _mesa_init_fbobjects(GLcontext *ctx) DummyRenderbuffer.Delete = delete_dummy_renderbuffer; } +struct gl_framebuffer * +_mesa_get_incomplete_framebuffer(void) +{ + return &DummyFramebuffer; +} /** * Helper routine for getting a gl_renderbuffer. diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h index ff946033a4d..9850ee9aa23 100644 --- a/src/mesa/main/fbobject.h +++ b/src/mesa/main/fbobject.h @@ -26,10 +26,14 @@ #ifndef FBOBJECT_H #define FBOBJECT_H +#include "mtypes.h" extern void _mesa_init_fbobjects(GLcontext *ctx); +extern struct gl_framebuffer * +_mesa_get_incomplete_framebuffer(void); + extern struct gl_renderbuffer * _mesa_lookup_renderbuffer(GLcontext *ctx, GLuint id); diff --git a/src/mesa/main/fog.c b/src/mesa/main/fog.c index 269ff3f8b99..9f26c012d66 100644 --- a/src/mesa/main/fog.c +++ b/src/mesa/main/fog.c @@ -27,6 +27,7 @@ #include "colormac.h" #include "context.h" #include "fog.h" +#include "macros.h" #include "mtypes.h" diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c index 49463fcc3c2..90449cc04f0 100644 --- a/src/mesa/main/formats.c +++ b/src/mesa/main/formats.c @@ -26,7 +26,6 @@ #include "imports.h" #include "formats.h" -#include "config.h" /** diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h index aa14185628f..ad176caaa0f 100644 --- a/src/mesa/main/formats.h +++ b/src/mesa/main/formats.h @@ -33,7 +33,7 @@ #define FORMATS_H -#include "main/mtypes.h" +#include <GL/gl.h> diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index 56558cfcc1e..e0aac26f62b 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -75,7 +75,6 @@ compute_depth_max(struct gl_framebuffer *fb) fb->_MRD = (GLfloat)1.0 / fb->_DepthMaxF; } - /** * Create and initialize a gl_framebuffer object. * This is intended for creating _window_system_ framebuffers, not generic diff --git a/src/mesa/main/framebuffer.h b/src/mesa/main/framebuffer.h index 1b6e3b1f0cb..2e9844282f8 100644 --- a/src/mesa/main/framebuffer.h +++ b/src/mesa/main/framebuffer.h @@ -26,6 +26,7 @@ #ifndef FRAMEBUFFER_H #define FRAMEBUFFER_H +#include "mtypes.h" extern struct gl_framebuffer * _mesa_create_framebuffer(const GLvisual *visual); diff --git a/src/mesa/main/get.h b/src/mesa/main/get.h index 320492b4ce2..99a004b71dd 100644 --- a/src/mesa/main/get.h +++ b/src/mesa/main/get.h @@ -32,7 +32,7 @@ #define GET_H -#include "mtypes.h" +#include "glheader.h" extern void GLAPIENTRY diff --git a/src/mesa/main/histogram.c b/src/mesa/main/histogram.c index 3a65bb19260..4e482bcd54b 100644 --- a/src/mesa/main/histogram.c +++ b/src/mesa/main/histogram.c @@ -29,6 +29,7 @@ #include "context.h" #include "image.h" #include "histogram.h" +#include "macros.h" #include "main/dispatch.h" diff --git a/src/mesa/main/image.c b/src/mesa/main/image.c index 63c28342f26..86aa6d0d702 100644 --- a/src/mesa/main/image.c +++ b/src/mesa/main/image.c @@ -32,7 +32,6 @@ #include "glheader.h" #include "colormac.h" -#include "context.h" #include "enums.h" #include "image.h" #include "imports.h" diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c index 25080db40c4..46e5c932d0f 100644 --- a/src/mesa/main/imports.c +++ b/src/mesa/main/imports.c @@ -756,7 +756,7 @@ _mesa_strdup( const char *s ) float _mesa_strtof( const char *s, char **end ) { -#ifdef _GNU_SOURCE +#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__) static locale_t loc = NULL; if (!loc) { loc = newlocale(LC_CTYPE_MASK, "C", NULL); diff --git a/src/mesa/main/mm.c b/src/mesa/main/mm.c index 3ef38e94be9..25a0293703c 100644 --- a/src/mesa/main/mm.c +++ b/src/mesa/main/mm.c @@ -22,6 +22,11 @@ * */ +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> + +#include "compiler.h" #include "mm.h" diff --git a/src/mesa/main/mm.h b/src/mesa/main/mm.h index df340808ac9..228721ca2a5 100644 --- a/src/mesa/main/mm.h +++ b/src/mesa/main/mm.h @@ -32,9 +32,6 @@ #define MM_H -#include "imports.h" - - struct mem_block { struct mem_block *next, *prev; struct mem_block *next_free, *prev_free; diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index cbb9eb84f33..8d92892ad7d 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -36,7 +36,6 @@ #include "main/glheader.h" #include "main/config.h" -#include "main/compiler.h" #include "main/mfeatures.h" #include "glapi/glapi.h" #include "math/m_matrix.h" /* GLmatrix */ @@ -1808,6 +1807,11 @@ struct gl_program /** Which texture target is being sampled (TEXTURE_1D/2D/3D/etc_INDEX) */ gl_texture_index SamplerTargets[MAX_SAMPLERS]; + /** Bitmask of which register files are read/written with indirect + * addressing. Mask of (1 << PROGRAM_x) bits. + */ + GLbitfield IndirectRegisterFiles; + /** Logical counts */ /*@{*/ GLuint NumInstructions; @@ -2477,29 +2481,29 @@ struct gl_framebuffer /** - * Limits for vertex and fragment programs. + * Limits for vertex and fragment programs/shaders. */ struct gl_program_constants { /* logical limits */ GLuint MaxInstructions; - GLuint MaxAluInstructions; /* fragment programs only, for now */ - GLuint MaxTexInstructions; /* fragment programs only, for now */ - GLuint MaxTexIndirections; /* fragment programs only, for now */ + GLuint MaxAluInstructions; + GLuint MaxTexInstructions; + GLuint MaxTexIndirections; GLuint MaxAttribs; GLuint MaxTemps; - GLuint MaxAddressRegs; /* vertex program only, for now */ + GLuint MaxAddressRegs; GLuint MaxParameters; GLuint MaxLocalParams; GLuint MaxEnvParams; /* native/hardware limits */ GLuint MaxNativeInstructions; - GLuint MaxNativeAluInstructions; /* fragment programs only, for now */ - GLuint MaxNativeTexInstructions; /* fragment programs only, for now */ - GLuint MaxNativeTexIndirections; /* fragment programs only, for now */ + GLuint MaxNativeAluInstructions; + GLuint MaxNativeTexInstructions; + GLuint MaxNativeTexIndirections; GLuint MaxNativeAttribs; GLuint MaxNativeTemps; - GLuint MaxNativeAddressRegs; /* vertex program only, for now */ + GLuint MaxNativeAddressRegs; GLuint MaxNativeParameters; /* For shaders */ GLuint MaxUniformComponents; diff --git a/src/mesa/main/multisample.h b/src/mesa/main/multisample.h index 4305900cc49..998488ef420 100644 --- a/src/mesa/main/multisample.h +++ b/src/mesa/main/multisample.h @@ -26,6 +26,7 @@ #ifndef MULTISAMPLE_H #define MULTISAMPLE_H +#include "mtypes.h" extern void GLAPIENTRY _mesa_SampleCoverageARB(GLclampf value, GLboolean invert); diff --git a/src/mesa/main/nvprogram.h b/src/mesa/main/nvprogram.h index 8ee59661bd0..260a25ba9e9 100644 --- a/src/mesa/main/nvprogram.h +++ b/src/mesa/main/nvprogram.h @@ -29,6 +29,8 @@ #ifndef NVPROGRAM_H #define NVPROGRAM_H +#include "glheader.h" +#include "mtypes.h" extern void GLAPIENTRY _mesa_ExecuteProgramNV(GLenum target, GLuint id, const GLfloat *params); diff --git a/src/mesa/main/pixelstore.h b/src/mesa/main/pixelstore.h index ee963f9ba3c..47bff4276d1 100644 --- a/src/mesa/main/pixelstore.h +++ b/src/mesa/main/pixelstore.h @@ -33,6 +33,7 @@ #include "glheader.h" +#include "mtypes.h" extern void GLAPIENTRY diff --git a/src/mesa/main/querymatrix.c b/src/mesa/main/querymatrix.c index 6f62415ba8c..32aaa79f7fb 100644 --- a/src/mesa/main/querymatrix.c +++ b/src/mesa/main/querymatrix.c @@ -36,9 +36,9 @@ #define INT_TO_FIXED(x) ((GLfixed) ((x) << 16)) #define FLOAT_TO_FIXED(x) ((GLfixed) ((x) * 65536.0)) -#if defined(WIN32) || defined(_WIN32_WCE) +#if defined(_MSC_VER) /* Oddly, the fpclassify() function doesn't exist in such a form - * on Windows. This is an implementation using slightly different + * on MSVC. This is an implementation using slightly different * lower-level Windows functions. */ #include <float.h> @@ -72,7 +72,7 @@ fpclassify(double x) #elif defined(__APPLE__) || defined(__CYGWIN__) || defined(__FreeBSD__) || \ defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || \ - (defined(__sun) && defined(__C99FEATURES__)) + (defined(__sun) && defined(__C99FEATURES__)) || defined(__MINGW32__) /* fpclassify is available. */ diff --git a/src/mesa/main/remap.h b/src/mesa/main/remap.h index 7afdee36f5b..a2a55f615d5 100644 --- a/src/mesa/main/remap.h +++ b/src/mesa/main/remap.h @@ -28,6 +28,7 @@ #define REMAP_H +#include "main/compiler.h" #include "main/mfeatures.h" struct gl_function_pool_remap { diff --git a/src/mesa/main/renderbuffer.h b/src/mesa/main/renderbuffer.h index 7c205e141c1..bc92b269821 100644 --- a/src/mesa/main/renderbuffer.h +++ b/src/mesa/main/renderbuffer.h @@ -26,6 +26,11 @@ #ifndef RENDERBUFFER_H #define RENDERBUFFER_H +#include "glheader.h" +#include "mtypes.h" + +struct gl_framebuffer; +struct gl_renderbuffer; extern void _mesa_init_renderbuffer(struct gl_renderbuffer *rb, GLuint name); diff --git a/src/mesa/main/restart.h b/src/mesa/main/restart.h index 931cd701281..25f58f24c35 100644 --- a/src/mesa/main/restart.h +++ b/src/mesa/main/restart.h @@ -28,6 +28,7 @@ #ifndef RESTART_H #define RESTART_H +#include "glheader.h" extern void GLAPIENTRY _mesa_PrimitiveRestart(void); diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c index f9d10f3bbea..cbe004518a0 100644 --- a/src/mesa/main/shared.c +++ b/src/mesa/main/shared.c @@ -32,7 +32,6 @@ #include "imports.h" #include "mtypes.h" #include "hash.h" -#include "arrayobj.h" #if FEATURE_ATI_fragment_shader #include "atifragshader.h" #endif diff --git a/src/mesa/main/shared.h b/src/mesa/main/shared.h index ef164a14590..5166a0ce51f 100644 --- a/src/mesa/main/shared.h +++ b/src/mesa/main/shared.h @@ -25,6 +25,7 @@ #ifndef SHARED_H #define SHARED_H +#include "mtypes.h" struct gl_shared_state * _mesa_alloc_shared_state(GLcontext *ctx); diff --git a/src/mesa/main/syncobj.h b/src/mesa/main/syncobj.h index f23fa281e20..c53511995b1 100644 --- a/src/mesa/main/syncobj.h +++ b/src/mesa/main/syncobj.h @@ -31,7 +31,10 @@ #ifndef SYNCOBJ_H #define SYNCOBJ_H -#include "context.h" +#include "glheader.h" +#include "mtypes.h" + +struct dd_function_table; extern void _mesa_init_sync_object_functions(struct dd_function_table *driver); diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c index f4b1119eb17..e911524cbc5 100644 --- a/src/mesa/main/texcompress.c +++ b/src/mesa/main/texcompress.c @@ -33,7 +33,6 @@ #include "glheader.h" #include "imports.h" #include "colormac.h" -#include "context.h" #include "formats.h" #include "texcompress.h" diff --git a/src/mesa/main/texcompress_fxt1.c b/src/mesa/main/texcompress_fxt1.c index 04acf05e528..c8b45bd3a55 100644 --- a/src/mesa/main/texcompress_fxt1.c +++ b/src/mesa/main/texcompress_fxt1.c @@ -32,9 +32,9 @@ #include "glheader.h" #include "imports.h" #include "colormac.h" -#include "context.h" #include "convolve.h" #include "image.h" +#include "macros.h" #include "mipmap.h" #include "texcompress.h" #include "texcompress_fxt1.h" diff --git a/src/mesa/main/texcompress_fxt1.h b/src/mesa/main/texcompress_fxt1.h index d63ca71e212..38048b26ccb 100644 --- a/src/mesa/main/texcompress_fxt1.h +++ b/src/mesa/main/texcompress_fxt1.h @@ -25,9 +25,11 @@ #ifndef TEXCOMPRESS_FXT1_H #define TEXCOMPRESS_FXT1_H -#include "main/mtypes.h" +#include "glheader.h" #include "texstore.h" +struct gl_texture_image; + #if FEATURE_texture_fxt1 extern GLboolean diff --git a/src/mesa/main/texcompress_s3tc.c b/src/mesa/main/texcompress_s3tc.c index 85c394b051f..c70792cab61 100644 --- a/src/mesa/main/texcompress_s3tc.c +++ b/src/mesa/main/texcompress_s3tc.c @@ -36,10 +36,10 @@ #include "glheader.h" #include "imports.h" #include "colormac.h" -#include "context.h" #include "convolve.h" #include "dlopen.h" #include "image.h" +#include "macros.h" #include "texcompress.h" #include "texcompress_s3tc.h" #include "texstore.h" diff --git a/src/mesa/main/texfetch.c b/src/mesa/main/texfetch.c index fe002082cca..c03bc71cd7a 100644 --- a/src/mesa/main/texfetch.c +++ b/src/mesa/main/texfetch.c @@ -34,7 +34,7 @@ #include "colormac.h" -#include "context.h" +#include "macros.h" #include "texcompress.h" #include "texcompress_fxt1.h" #include "texcompress_s3tc.h" diff --git a/src/mesa/main/texgen.h b/src/mesa/main/texgen.h index 397d89e630f..2224a937611 100644 --- a/src/mesa/main/texgen.h +++ b/src/mesa/main/texgen.h @@ -27,7 +27,10 @@ #define TEXGEN_H -#include "main/mtypes.h" +#include "compiler.h" +#include "glheader.h" + +struct _glapi_table; #if FEATURE_texgen diff --git a/src/mesa/main/texgetimage.h b/src/mesa/main/texgetimage.h index 088d27c7e17..866ab704945 100644 --- a/src/mesa/main/texgetimage.h +++ b/src/mesa/main/texgetimage.h @@ -27,6 +27,7 @@ #ifndef TEXGETIMAGE_H #define TEXGETIMAGE_H +#include "mtypes.h" extern void _mesa_get_teximage(GLcontext *ctx, GLenum target, GLint level, diff --git a/src/mesa/main/texrender.c b/src/mesa/main/texrender.c index d29af5a5b2f..c68105b3951 100644 --- a/src/mesa/main/texrender.c +++ b/src/mesa/main/texrender.c @@ -1,6 +1,7 @@ #include "context.h" #include "colormac.h" +#include "macros.h" #include "texfetch.h" #include "texrender.h" #include "renderbuffer.h" diff --git a/src/mesa/main/texrender.h b/src/mesa/main/texrender.h index 7c3fb0871bd..1e87d594a28 100644 --- a/src/mesa/main/texrender.h +++ b/src/mesa/main/texrender.h @@ -1,6 +1,7 @@ #ifndef TEXRENDER_H #define TEXRENDER_H +#include "mtypes.h" extern void _mesa_render_texture(GLcontext *ctx, diff --git a/src/mesa/main/texstate.h b/src/mesa/main/texstate.h index 17ac68000c5..912cb677985 100644 --- a/src/mesa/main/texstate.h +++ b/src/mesa/main/texstate.h @@ -32,6 +32,7 @@ #define TEXSTATE_H +#include "compiler.h" #include "mtypes.h" diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index 0f21395af39..2989fdb72ed 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -55,7 +55,6 @@ #include "glheader.h" #include "bufferobj.h" #include "colormac.h" -#include "context.h" #include "convolve.h" #include "image.h" #include "macros.h" diff --git a/src/mesa/main/uniforms.h b/src/mesa/main/uniforms.h index 29f77cb35a0..ef98fe16bb1 100644 --- a/src/mesa/main/uniforms.h +++ b/src/mesa/main/uniforms.h @@ -25,6 +25,10 @@ #ifndef UNIFORMS_H #define UNIFORMS_H +#include "glheader.h" + +struct gl_program; +struct _glapi_table; extern void GLAPIENTRY _mesa_Uniform1fARB(GLint, GLfloat); diff --git a/src/mesa/main/viewport.h b/src/mesa/main/viewport.h index f08fef27978..ec054a7c597 100644 --- a/src/mesa/main/viewport.h +++ b/src/mesa/main/viewport.h @@ -27,6 +27,8 @@ #ifndef VIEWPORT_H #define VIEWPORT_H +#include "glheader.h" +#include "mtypes.h" extern void GLAPIENTRY _mesa_Viewport(GLint x, GLint y, GLsizei width, GLsizei height); diff --git a/src/mesa/main/vtxfmt.h b/src/mesa/main/vtxfmt.h index fb6c23abe98..aad38b87c35 100644 --- a/src/mesa/main/vtxfmt.h +++ b/src/mesa/main/vtxfmt.h @@ -33,6 +33,9 @@ #ifndef _VTXFMT_H_ #define _VTXFMT_H_ +#include "compiler.h" +#include "mtypes.h" + #if FEATURE_beginend extern void _mesa_init_exec_vtxfmt( GLcontext *ctx ); diff --git a/src/mesa/math/m_matrix.h b/src/mesa/math/m_matrix.h index 3bc5de6cd4d..a69afb8589a 100644 --- a/src/mesa/math/m_matrix.h +++ b/src/mesa/math/m_matrix.h @@ -32,6 +32,8 @@ #define _M_MATRIX_H +#include "main/glheader.h" + /** * \name Symbolic names to some of the entries in the matrix diff --git a/src/mesa/math/m_translate.c b/src/mesa/math/m_translate.c index b12b07957cb..51daf7bfd37 100644 --- a/src/mesa/math/m_translate.c +++ b/src/mesa/math/m_translate.c @@ -29,8 +29,8 @@ #include "main/glheader.h" +#include "main/macros.h" #include "main/mtypes.h" /* GLchan hack */ -#include "main/colormac.h" #include "m_translate.h" diff --git a/src/mesa/math/m_translate.h b/src/mesa/math/m_translate.h index c677682d506..58041031163 100644 --- a/src/mesa/math/m_translate.h +++ b/src/mesa/math/m_translate.h @@ -26,7 +26,8 @@ #ifndef _M_TRANSLATE_H_ #define _M_TRANSLATE_H_ -#include "main/config.h" +#include "main/compiler.h" +#include "main/glheader.h" #include "main/mtypes.h" /* hack for GLchan */ diff --git a/src/mesa/math/m_xform.h b/src/mesa/math/m_xform.h index 33421ad1c0a..14ac956a7bc 100644 --- a/src/mesa/math/m_xform.h +++ b/src/mesa/math/m_xform.h @@ -27,10 +27,10 @@ #define _M_XFORM_H +#include "main/compiler.h" #include "main/glheader.h" -#include "main/config.h" -#include "math/m_vector.h" #include "math/m_matrix.h" +#include "math/m_vector.h" #ifdef USE_X86_ASM #define _XFORMAPI _ASMAPI diff --git a/src/mesa/program/arbprogparse.c b/src/mesa/program/arbprogparse.c index 6373529e4e8..f834aaf5686 100644 --- a/src/mesa/program/arbprogparse.c +++ b/src/mesa/program/arbprogparse.c @@ -109,6 +109,7 @@ _mesa_parse_arb_fragment_program(GLcontext* ctx, GLenum target, program->Base.NumNativeTexIndirections = prog.NumTexIndirections; program->Base.InputsRead = prog.InputsRead; program->Base.OutputsWritten = prog.OutputsWritten; + program->Base.IndirectRegisterFiles = prog.IndirectRegisterFiles; for (i = 0; i < MAX_TEXTURE_IMAGE_UNITS; i++) { program->Base.TexturesUsed[i] = prog.TexturesUsed[i]; if (prog.TexturesUsed[i]) @@ -199,6 +200,7 @@ _mesa_parse_arb_vertex_program(GLcontext *ctx, GLenum target, program->Base.NumNativeAddressRegs = prog.NumNativeAddressRegs; program->Base.InputsRead = prog.InputsRead; program->Base.OutputsWritten = prog.OutputsWritten; + program->Base.IndirectRegisterFiles = prog.IndirectRegisterFiles; program->IsPositionInvariant = (state.option.PositionInvariant) ? GL_TRUE : GL_FALSE; diff --git a/src/mesa/program/hash_table.h b/src/mesa/program/hash_table.h index 7b302f5dbee..e750906f961 100644 --- a/src/mesa/program/hash_table.h +++ b/src/mesa/program/hash_table.h @@ -31,8 +31,6 @@ #ifndef HASH_TABLE_H #define HASH_TABLE_H -#include <string.h> - struct hash_table; typedef unsigned (*hash_func_t)(const void *key); diff --git a/src/mesa/program/nvfragparse.h b/src/mesa/program/nvfragparse.h index 544ab80c56c..e28a6c49349 100644 --- a/src/mesa/program/nvfragparse.h +++ b/src/mesa/program/nvfragparse.h @@ -30,6 +30,7 @@ #ifndef NVFRAGPARSE_H #define NVFRAGPARSE_H +#include "main/mtypes.h" extern void _mesa_parse_nv_fragment_program(GLcontext *ctx, GLenum target, diff --git a/src/mesa/program/nvvertparse.c b/src/mesa/program/nvvertparse.c index e2afcfd4ce6..1ac83d0e59d 100644 --- a/src/mesa/program/nvvertparse.c +++ b/src/mesa/program/nvvertparse.c @@ -64,6 +64,7 @@ struct parse_state { GLbitfield inputsRead; GLbitfield outputsWritten; GLboolean anyProgRegsWritten; + GLboolean indirectRegisterFiles; GLuint numInst; /* number of instructions parsed */ }; @@ -410,6 +411,7 @@ Parse_ParamReg(struct parse_state *parseState, struct prog_src_register *srcReg) srcReg->RelAddr = GL_TRUE; srcReg->File = PROGRAM_ENV_PARAM; + parseState->indirectRegisterFiles |= (1 << srcReg->File); /* Look for +/-N offset */ if (!Peek_Token(parseState, token)) RETURN_ERROR; @@ -1308,6 +1310,7 @@ _mesa_parse_nv_vertex_program(GLcontext *ctx, GLenum dstTarget, parseState.inputsRead = 0; parseState.outputsWritten = 0; parseState.anyProgRegsWritten = GL_FALSE; + parseState.indirectRegisterFiles = 0x0; /* Reset error state */ _mesa_set_program_error(ctx, -1, NULL); @@ -1408,6 +1411,8 @@ _mesa_parse_nv_vertex_program(GLcontext *ctx, GLenum dstTarget, program->Base.Parameters = _mesa_new_parameter_list (); program->Base.NumParameters = 0; + program->Base.IndirectRegisterFiles = parseState.indirectRegisterFiles; + state_tokens[0] = STATE_VERTEX_PROGRAM; state_tokens[1] = STATE_ENV; /* Add refs to all of the potential params, in order. If we want to not diff --git a/src/mesa/program/nvvertparse.h b/src/mesa/program/nvvertparse.h index 9919e22388d..91ef79e6c3c 100644 --- a/src/mesa/program/nvvertparse.h +++ b/src/mesa/program/nvvertparse.h @@ -29,6 +29,7 @@ #ifndef NVVERTPARSE_H #define NVVERTPARSE_H +#include "main/mtypes.h" extern void _mesa_parse_nv_vertex_program(GLcontext *ctx, GLenum target, diff --git a/src/mesa/program/prog_cache.h b/src/mesa/program/prog_cache.h index 4e1ccac03ff..bfe8f99d445 100644 --- a/src/mesa/program/prog_cache.h +++ b/src/mesa/program/prog_cache.h @@ -30,6 +30,9 @@ #define PROG_CACHE_H +#include "main/mtypes.h" + + /** Opaque type */ struct gl_program_cache; diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c index f85c6513f31..1670c91b6ad 100644 --- a/src/mesa/program/prog_execute.c +++ b/src/mesa/program/prog_execute.c @@ -37,7 +37,7 @@ #include "main/glheader.h" #include "main/colormac.h" -#include "main/context.h" +#include "main/macros.h" #include "prog_execute.h" #include "prog_instruction.h" #include "prog_parameter.h" @@ -81,6 +81,22 @@ static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F }; /** + * Return TRUE for +0 and other positive values, FALSE otherwise. + * Used for RCC opcode. + */ +static INLINE GLboolean +positive(float x) +{ + fi_type fi; + fi.f = x; + if (fi.i & 0x80000000) + return GL_FALSE; + return GL_TRUE; +} + + + +/** * Return a pointer to the 4-element float vector specified by the given * source register. */ @@ -1340,6 +1356,44 @@ _mesa_execute_program(GLcontext * ctx, store_vector4(inst, machine, result); } break; + case OPCODE_RCC: /* clamped riciprocal */ + { + const float largest = 1.884467e+19, smallest = 5.42101e-20; + GLfloat a[4], r, result[4]; + fetch_vector1(&inst->SrcReg[0], machine, a); + if (DEBUG_PROG) { + if (a[0] == 0) + printf("RCC(0)\n"); + else if (IS_INF_OR_NAN(a[0])) + printf("RCC(inf)\n"); + } + if (a[0] == 1.0F) { + r = 1.0F; + } + else { + r = 1.0F / a[0]; + } + if (positive(r)) { + if (r > largest) { + r = largest; + } + else if (r < smallest) { + r = smallest; + } + } + else { + if (r < -largest) { + r = -largest; + } + else if (r > -smallest) { + r = -smallest; + } + } + result[0] = result[1] = result[2] = result[3] = r; + store_vector4(inst, machine, result); + } + break; + case OPCODE_RCP: { GLfloat a[4], result[4]; diff --git a/src/mesa/program/prog_execute.h b/src/mesa/program/prog_execute.h index adefc5439de..f59b65176ff 100644 --- a/src/mesa/program/prog_execute.h +++ b/src/mesa/program/prog_execute.h @@ -26,6 +26,7 @@ #define PROG_EXECUTE_H #include "main/config.h" +#include "main/mtypes.h" typedef void (*FetchTexelLodFunc)(GLcontext *ctx, const GLfloat texcoord[4], diff --git a/src/mesa/program/prog_instruction.h b/src/mesa/program/prog_instruction.h index dacbc33704b..098b366ab56 100644 --- a/src/mesa/program/prog_instruction.h +++ b/src/mesa/program/prog_instruction.h @@ -38,7 +38,7 @@ #define PROG_INSTRUCTION_H -#include "main/mfeatures.h" +#include "main/glheader.h" /** @@ -149,20 +149,20 @@ typedef enum prog_opcode { OPCODE_ADD, /* X X X X X */ OPCODE_AND, /* */ OPCODE_ARA, /* 2 */ - OPCODE_ARL, /* X X */ + OPCODE_ARL, /* X X X */ OPCODE_ARL_NV, /* 2 */ OPCODE_ARR, /* 2 */ OPCODE_BGNLOOP, /* opt */ OPCODE_BGNSUB, /* opt */ OPCODE_BRA, /* 2 X */ OPCODE_BRK, /* 2 opt */ - OPCODE_CAL, /* 2 2 */ - OPCODE_CMP, /* X */ + OPCODE_CAL, /* 2 2 X */ + OPCODE_CMP, /* X X */ OPCODE_CONT, /* opt */ OPCODE_COS, /* X 2 X X */ OPCODE_DDX, /* X X */ OPCODE_DDY, /* X X */ - OPCODE_DP2, /* 2 */ + OPCODE_DP2, /* 2 X */ OPCODE_DP2A, /* 2 */ OPCODE_DP3, /* X X X X X */ OPCODE_DP4, /* X X X X X */ @@ -185,7 +185,7 @@ typedef enum prog_opcode { OPCODE_LG2, /* X X 2 X X */ OPCODE_LIT, /* X X X X */ OPCODE_LOG, /* X X X */ - OPCODE_LRP, /* X X */ + OPCODE_LRP, /* X X X */ OPCODE_MAD, /* X X X X X */ OPCODE_MAX, /* X X X X X */ OPCODE_MIN, /* X X X X X */ @@ -196,8 +196,8 @@ typedef enum prog_opcode { OPCODE_NOISE3, /* X */ OPCODE_NOISE4, /* X */ OPCODE_NOT, /* */ - OPCODE_NRM3, /* */ - OPCODE_NRM4, /* */ + OPCODE_NRM3, /* X */ + OPCODE_NRM4, /* X */ OPCODE_OR, /* */ OPCODE_PK2H, /* X */ OPCODE_PK2US, /* X */ @@ -209,7 +209,7 @@ typedef enum prog_opcode { OPCODE_PUSHA, /* 3 */ OPCODE_RCC, /* 1.1 */ OPCODE_RCP, /* X X X X X */ - OPCODE_RET, /* 2 2 */ + OPCODE_RET, /* 2 2 X */ OPCODE_RFL, /* X X */ OPCODE_RSQ, /* X X X X X */ OPCODE_SCS, /* X */ diff --git a/src/mesa/program/prog_noise.h b/src/mesa/program/prog_noise.h index c4779479f9b..dd7986efcdb 100644 --- a/src/mesa/program/prog_noise.h +++ b/src/mesa/program/prog_noise.h @@ -25,6 +25,8 @@ #ifndef PROG_NOISE #define PROG_NOISE +#include "main/glheader.h" + extern GLfloat _mesa_noise1(GLfloat); extern GLfloat _mesa_noise2(GLfloat, GLfloat); extern GLfloat _mesa_noise3(GLfloat, GLfloat, GLfloat); diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c index 2941a17da3f..c78187c983d 100644 --- a/src/mesa/program/prog_optimize.c +++ b/src/mesa/program/prog_optimize.c @@ -38,40 +38,117 @@ static GLboolean dbg = GL_FALSE; -/* Returns the mask of channels read from the given srcreg in this instruction. +#define NO_MASK 0xf + +/** + * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which + * are read from the given src in this instruction, We also provide + * one optional masks which may mask other components in the dst + * register */ static GLuint -get_src_arg_mask(const struct prog_instruction *inst, int arg) +get_src_arg_mask(const struct prog_instruction *inst, + GLuint arg, GLuint dst_mask) { - int writemask = inst->DstReg.WriteMask; + GLuint read_mask, channel_mask; + GLuint comp; - if (inst->CondUpdate) - writemask = WRITEMASK_XYZW; + ASSERT(arg < _mesa_num_inst_src_regs(inst->Opcode)); - switch (inst->Opcode) { - case OPCODE_MOV: - case OPCODE_ABS: - case OPCODE_ADD: - case OPCODE_MUL: - case OPCODE_SUB: - return writemask; - case OPCODE_RCP: - case OPCODE_SIN: - case OPCODE_COS: - case OPCODE_RSQ: - case OPCODE_POW: - case OPCODE_EX2: - return WRITEMASK_X; - case OPCODE_DP2: - return WRITEMASK_XY; - case OPCODE_DP3: - case OPCODE_XPD: - return WRITEMASK_XYZ; - default: - return WRITEMASK_XYZW; + /* Form the dst register, find the written channels */ + if (inst->CondUpdate) { + channel_mask = WRITEMASK_XYZW; + } + else { + switch (inst->Opcode) { + case OPCODE_MOV: + case OPCODE_MIN: + case OPCODE_MAX: + case OPCODE_ABS: + case OPCODE_ADD: + case OPCODE_MAD: + case OPCODE_MUL: + case OPCODE_SUB: + channel_mask = inst->DstReg.WriteMask & dst_mask; + break; + case OPCODE_RCP: + case OPCODE_SIN: + case OPCODE_COS: + case OPCODE_RSQ: + case OPCODE_POW: + case OPCODE_EX2: + case OPCODE_LOG: + channel_mask = WRITEMASK_X; + break; + case OPCODE_DP2: + channel_mask = WRITEMASK_XY; + break; + case OPCODE_DP3: + case OPCODE_XPD: + channel_mask = WRITEMASK_XYZ; + break; + default: + channel_mask = WRITEMASK_XYZW; + break; + } } + + /* Now, given the src swizzle and the written channels, find which + * components are actually read + */ + read_mask = 0x0; + for (comp = 0; comp < 4; ++comp) { + const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp); + ASSERT(coord < 4); + if (channel_mask & (1 << comp) && coord <= SWIZZLE_W) + read_mask |= 1 << coord; + } + + return read_mask; +} + + +/** + * For a MOV instruction, compute a write mask when src register also has + * a mask + */ +static GLuint +get_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask) +{ + const GLuint mask = mov->DstReg.WriteMask; + GLuint comp; + GLuint updated_mask = 0x0; + + ASSERT(mov->Opcode == OPCODE_MOV); + + for (comp = 0; comp < 4; ++comp) { + GLuint src_comp; + if ((mask & (1 << comp)) == 0) + continue; + src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp); + if ((src_mask & (1 << src_comp)) == 0) + continue; + updated_mask |= 1 << comp; + } + + return updated_mask; +} + + +/** + * Ensure that the swizzle is regular. That is, all of the swizzle + * terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE. + */ +static GLboolean +is_swizzle_regular(GLuint swz) +{ + return GET_SWZ(swz,0) <= SWIZZLE_W && + GET_SWZ(swz,1) <= SWIZZLE_W && + GET_SWZ(swz,2) <= SWIZZLE_W && + GET_SWZ(swz,3) <= SWIZZLE_W; } + /** * In 'prog' remove instruction[i] if removeFlags[i] == TRUE. * \return number of instructions removed @@ -148,82 +225,13 @@ replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[]) /** - * Consolidate temporary registers to use low numbers. For example, if the - * shader only uses temps 4, 5, 8, replace them with 0, 1, 2. - */ -static void -_mesa_consolidate_registers(struct gl_program *prog) -{ - GLboolean tempUsed[MAX_PROGRAM_TEMPS]; - GLint tempMap[MAX_PROGRAM_TEMPS]; - GLuint tempMax = 0, i; - - if (dbg) { - printf("Optimize: Begin register consolidation\n"); - } - - memset(tempUsed, 0, sizeof(tempUsed)); - - for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { - tempMap[i] = -1; - } - - /* set tempUsed[i] if temporary [i] is referenced */ - for (i = 0; i < prog->NumInstructions; i++) { - const struct prog_instruction *inst = prog->Instructions + i; - const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); - GLuint j; - for (j = 0; j < numSrc; j++) { - if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { - const GLuint index = inst->SrcReg[j].Index; - ASSERT(index < MAX_PROGRAM_TEMPS); - tempUsed[index] = GL_TRUE; - tempMax = MAX2(tempMax, index); - break; - } - } - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - const GLuint index = inst->DstReg.Index; - ASSERT(index < MAX_PROGRAM_TEMPS); - tempUsed[index] = GL_TRUE; - tempMax = MAX2(tempMax, index); - } - } - - /* allocate a new index for each temp that's used */ - { - GLuint freeTemp = 0; - for (i = 0; i <= tempMax; i++) { - if (tempUsed[i]) { - tempMap[i] = freeTemp++; - /*printf("replace %u with %u\n", i, tempMap[i]);*/ - } - } - if (freeTemp == tempMax + 1) { - /* no consolidation possible */ - return; - } - if (dbg) { - printf("Replace regs 0..%u with 0..%u\n", tempMax, freeTemp-1); - } - } - - replace_regs(prog, PROGRAM_TEMPORARY, tempMap); - - if (dbg) { - printf("Optimize: End register consolidation\n"); - } -} - - -/** * Remove dead instructions from the given program. * This is very primitive for now. Basically look for temp registers * that are written to but never read. Remove any instructions that * write to such registers. Be careful with condition code setters. */ -static void -_mesa_remove_dead_code(struct gl_program *prog) +static GLboolean +_mesa_remove_dead_code_global(struct gl_program *prog) { GLboolean tempRead[MAX_PROGRAM_TEMPS][4]; GLboolean *removeInst; /* per-instruction removal flag */ @@ -251,7 +259,7 @@ _mesa_remove_dead_code(struct gl_program *prog) const GLuint index = inst->SrcReg[j].Index; GLuint read_mask; ASSERT(index < MAX_PROGRAM_TEMPS); - read_mask = get_src_arg_mask(inst, j); + read_mask = get_src_arg_mask(inst, j, NO_MASK); if (inst->SrcReg[j].RelAddr) { if (dbg) @@ -260,25 +268,12 @@ _mesa_remove_dead_code(struct gl_program *prog) } for (comp = 0; comp < 4; comp++) { - GLuint swz = (inst->SrcReg[j].Swizzle >> (3 * comp)) & 0x7; - - if ((read_mask & (1 << comp)) == 0) + const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp); + ASSERT(swz < 4); + if ((read_mask & (1 << swz)) == 0) continue; - - switch (swz) { - case SWIZZLE_X: - tempRead[index][0] = GL_TRUE; - break; - case SWIZZLE_Y: - tempRead[index][1] = GL_TRUE; - break; - case SWIZZLE_Z: - tempRead[index][2] = GL_TRUE; - break; - case SWIZZLE_W: - tempRead[index][3] = GL_TRUE; - break; - } + if (swz <= SWIZZLE_W) + tempRead[index][swz] = GL_TRUE; } } } @@ -348,10 +343,11 @@ _mesa_remove_dead_code(struct gl_program *prog) done: free(removeInst); + return rem != 0; } -enum temp_use +enum inst_use { READ, WRITE, @@ -359,13 +355,19 @@ enum temp_use END }; + /** - * Scan forward in program from 'start' for the next occurance of TEMP[index]. + * Scan forward in program from 'start' for the next occurances of TEMP[index]. + * We look if an instruction reads the component given by the masks and if they + * are overwritten. * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator * that we can't look further. */ -static enum temp_use -find_next_temp_use(const struct gl_program *prog, GLuint start, GLuint index) +static enum inst_use +find_next_use(const struct gl_program *prog, + GLuint start, + GLuint index, + GLuint mask) { GLuint i; @@ -373,30 +375,50 @@ find_next_temp_use(const struct gl_program *prog, GLuint start, GLuint index) const struct prog_instruction *inst = prog->Instructions + i; switch (inst->Opcode) { case OPCODE_BGNLOOP: - case OPCODE_ENDLOOP: case OPCODE_BGNSUB: + case OPCODE_BRA: + case OPCODE_CAL: + case OPCODE_CONT: + case OPCODE_IF: + case OPCODE_ELSE: + case OPCODE_ENDIF: + case OPCODE_ENDLOOP: case OPCODE_ENDSUB: + case OPCODE_RET: return FLOW; + case OPCODE_END: + return END; default: { const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); GLuint j; for (j = 0; j < numSrc; j++) { - if (inst->SrcReg[j].File == PROGRAM_TEMPORARY && - inst->SrcReg[j].Index == index) + if (inst->SrcReg[j].RelAddr || + (inst->SrcReg[j].File == PROGRAM_TEMPORARY && + inst->SrcReg[j].Index == index && + (get_src_arg_mask(inst,j,NO_MASK) & mask))) return READ; } - if (inst->DstReg.File == PROGRAM_TEMPORARY && - inst->DstReg.Index == index) - return WRITE; + if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 && + inst->DstReg.File == PROGRAM_TEMPORARY && + inst->DstReg.Index == index) { + mask &= ~inst->DstReg.WriteMask; + if (mask == 0) + return WRITE; + } } } } - return END; } -static GLboolean _mesa_is_flow_control_opcode(enum prog_opcode opcode) + +/** + * Is the given instruction opcode a flow-control opcode? + * XXX maybe move this into prog_instruction.[ch] + */ +static GLboolean +_mesa_is_flow_control_opcode(enum prog_opcode opcode) { switch (opcode) { case OPCODE_BGNLOOP: @@ -417,6 +439,37 @@ static GLboolean _mesa_is_flow_control_opcode(enum prog_opcode opcode) } } + +/** + * Test if the given instruction is a simple MOV (no conditional updating, + * not relative addressing, no negation/abs, etc). + */ +static GLboolean +can_downward_mov_be_modifed(const struct prog_instruction *mov) +{ + return + mov->Opcode == OPCODE_MOV && + mov->CondUpdate == GL_FALSE && + mov->SrcReg[0].RelAddr == 0 && + mov->SrcReg[0].Negate == 0 && + mov->SrcReg[0].Abs == 0 && + mov->SrcReg[0].HasIndex2 == 0 && + mov->SrcReg[0].RelAddr2 == 0 && + mov->DstReg.RelAddr == 0 && + mov->DstReg.CondMask == COND_TR && + mov->SaturateMode == SATURATE_OFF; +} + + +static GLboolean +can_upward_mov_be_modifed(const struct prog_instruction *mov) +{ + return + can_downward_mov_be_modifed(mov) && + mov->DstReg.File == PROGRAM_TEMPORARY; +} + + /** * Try to remove use of extraneous MOV instructions, to free them up for dead * code removal. @@ -444,14 +497,15 @@ _mesa_remove_extra_move_use(struct gl_program *prog) for (i = 0; i + 1 < prog->NumInstructions; i++) { const struct prog_instruction *mov = prog->Instructions + i; + GLuint dst_mask, src_mask; + if (can_upward_mov_be_modifed(mov) == GL_FALSE) + continue; - if (mov->Opcode != OPCODE_MOV || - mov->DstReg.File != PROGRAM_TEMPORARY || - mov->DstReg.RelAddr || - mov->DstReg.CondMask != COND_TR || - mov->SaturateMode != SATURATE_OFF || - mov->SrcReg[0].RelAddr) - continue; + /* Scanning the code, we maintain the components which are still active in + * these two masks + */ + dst_mask = mov->DstReg.WriteMask; + src_mask = get_src_arg_mask(mov, 0, NO_MASK); /* Walk through remaining instructions until the or src reg gets * rewritten or we get into some flow-control, eliminating the use of @@ -459,61 +513,60 @@ _mesa_remove_extra_move_use(struct gl_program *prog) */ for (j = i + 1; j < prog->NumInstructions; j++) { struct prog_instruction *inst2 = prog->Instructions + j; - GLuint arg; + GLuint arg; if (_mesa_is_flow_control_opcode(inst2->Opcode)) break; /* First rewrite this instruction's args if appropriate. */ for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) { - int comp; - int read_mask = get_src_arg_mask(inst2, arg); + GLuint comp, read_mask; if (inst2->SrcReg[arg].File != mov->DstReg.File || inst2->SrcReg[arg].Index != mov->DstReg.Index || inst2->SrcReg[arg].RelAddr || inst2->SrcReg[arg].Abs) continue; + read_mask = get_src_arg_mask(inst2, arg, NO_MASK); - /* Check that all the sources for this arg of inst2 come from inst1 - * or constants. - */ - for (comp = 0; comp < 4; comp++) { - int src_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp); - - /* If the MOV didn't write that channel, can't use it. */ - if ((read_mask & (1 << comp)) && - src_swz <= SWIZZLE_W && - (mov->DstReg.WriteMask & (1 << src_swz)) == 0) - break; - } - if (comp != 4) - continue; - - /* Adjust the swizzles of inst2 to point at MOV's source */ - for (comp = 0; comp < 4; comp++) { - int inst2_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp); - - if (inst2_swz <= SWIZZLE_W) { - GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz); - inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp)); - inst2->SrcReg[arg].Swizzle |= s << (3 * comp); - inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >> - inst2_swz) & 0x1) << comp); - } - } - inst2->SrcReg[arg].File = mov->SrcReg[0].File; - inst2->SrcReg[arg].Index = mov->SrcReg[0].Index; + /* Adjust the swizzles of inst2 to point at MOV's source if ALL the + * components read still come from the mov instructions + */ + if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) && + (read_mask & dst_mask) == read_mask) { + for (comp = 0; comp < 4; comp++) { + const GLuint inst2_swz = + GET_SWZ(inst2->SrcReg[arg].Swizzle, comp); + const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz); + inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp)); + inst2->SrcReg[arg].Swizzle |= s << (3 * comp); + inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >> + inst2_swz) & 0x1) << comp); + } + inst2->SrcReg[arg].File = mov->SrcReg[0].File; + inst2->SrcReg[arg].Index = mov->SrcReg[0].Index; + } } - /* If this instruction overwrote part of the move, our time is up. */ - if ((inst2->DstReg.File == mov->DstReg.File && - (inst2->DstReg.RelAddr || - inst2->DstReg.Index == mov->DstReg.Index)) || - (inst2->DstReg.File == mov->SrcReg[0].File && - (inst2->DstReg.RelAddr || - inst2->DstReg.Index == mov->SrcReg[0].Index))) - break; + /* The source of MOV is written. This potentially deactivates some + * components from the src and dst of the MOV instruction + */ + if (inst2->DstReg.File == mov->DstReg.File && + (inst2->DstReg.RelAddr || + inst2->DstReg.Index == mov->DstReg.Index)) { + dst_mask &= ~inst2->DstReg.WriteMask; + src_mask = get_src_arg_mask(mov, 0, dst_mask); + } + + /* Idem when the destination of mov is written */ + if (inst2->DstReg.File == mov->SrcReg[0].File && + (inst2->DstReg.RelAddr || + inst2->DstReg.Index == mov->SrcReg[0].Index)) { + src_mask &= ~inst2->DstReg.WriteMask; + dst_mask &= get_dst_mask_for_mov(mov, src_mask); + } + if (dst_mask == 0) + break; } } @@ -523,14 +576,151 @@ _mesa_remove_extra_move_use(struct gl_program *prog) } } + +/** + * Complements dead_code_global. Try to remove code in block of code by + * carefully monitoring the swizzles. Both functions should be merged into one + * with a proper control flow graph + */ +static GLboolean +_mesa_remove_dead_code_local(struct gl_program *prog) +{ + GLboolean *removeInst; + GLuint i, arg, rem = 0; + + removeInst = (GLboolean *) + calloc(1, prog->NumInstructions * sizeof(GLboolean)); + + for (i = 0; i < prog->NumInstructions; i++) { + const struct prog_instruction *inst = prog->Instructions + i; + const GLuint index = inst->DstReg.Index; + const GLuint mask = inst->DstReg.WriteMask; + enum inst_use use; + + /* We must deactivate the pass as soon as some indirection is used */ + if (inst->DstReg.RelAddr) + goto done; + for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) + if (inst->SrcReg[arg].RelAddr) + goto done; + + if (_mesa_is_flow_control_opcode(inst->Opcode) || + _mesa_num_inst_dst_regs(inst->Opcode) == 0 || + inst->DstReg.File != PROGRAM_TEMPORARY || + inst->DstReg.RelAddr) + continue; + + use = find_next_use(prog, i+1, index, mask); + if (use == WRITE || use == END) + removeInst[i] = GL_TRUE; + } + + rem = remove_instructions(prog, removeInst); + +done: + free(removeInst); + return rem != 0; +} + + +/** + * Try to inject the destination of mov as the destination of inst and recompute + * the swizzles operators for the sources of inst if required. Return GL_TRUE + * of the substitution was possible, GL_FALSE otherwise + */ +static GLboolean +_mesa_merge_mov_into_inst(struct prog_instruction *inst, + const struct prog_instruction *mov) +{ + /* Indirection table which associates destination and source components for + * the mov instruction + */ + const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK); + + /* Some components are not written by inst. We cannot remove the mov */ + if (mask != (inst->DstReg.WriteMask & mask)) + return GL_FALSE; + + /* Depending on the instruction, we may need to recompute the swizzles. + * Also, some other instructions (like TEX) are not linear. We will only + * consider completely active sources and destinations + */ + switch (inst->Opcode) { + + /* Carstesian instructions: we compute the swizzle */ + case OPCODE_MOV: + case OPCODE_MIN: + case OPCODE_MAX: + case OPCODE_ABS: + case OPCODE_ADD: + case OPCODE_MAD: + case OPCODE_MUL: + case OPCODE_SUB: + { + GLuint dst_to_src_comp[4] = {0,0,0,0}; + GLuint dst_comp, arg; + for (dst_comp = 0; dst_comp < 4; ++dst_comp) { + if (mov->DstReg.WriteMask & (1 << dst_comp)) { + const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp); + ASSERT(src_comp < 4); + dst_to_src_comp[dst_comp] = src_comp; + } + } + + /* Patch each source of the instruction */ + for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) { + const GLuint arg_swz = inst->SrcReg[arg].Swizzle; + inst->SrcReg[arg].Swizzle = 0; + + /* Reset each active component of the swizzle */ + for (dst_comp = 0; dst_comp < 4; ++dst_comp) { + GLuint src_comp, arg_comp; + if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0) + continue; + src_comp = dst_to_src_comp[dst_comp]; + ASSERT(src_comp < 4); + arg_comp = GET_SWZ(arg_swz, src_comp); + ASSERT(arg_comp < 4); + inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp); + } + } + inst->DstReg = mov->DstReg; + return GL_TRUE; + } + + /* Dot products and scalar instructions: we only change the destination */ + case OPCODE_RCP: + case OPCODE_SIN: + case OPCODE_COS: + case OPCODE_RSQ: + case OPCODE_POW: + case OPCODE_EX2: + case OPCODE_LOG: + case OPCODE_DP2: + case OPCODE_DP3: + case OPCODE_DP4: + inst->DstReg = mov->DstReg; + return GL_TRUE; + + /* All other instructions require fully active components with no swizzle */ + default: + if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW || + inst->DstReg.WriteMask != WRITEMASK_XYZW) + return GL_FALSE; + inst->DstReg = mov->DstReg; + return GL_TRUE; + } +} + + /** * Try to remove extraneous MOV instructions from the given program. */ -static void +static GLboolean _mesa_remove_extra_moves(struct gl_program *prog) { GLboolean *removeInst; /* per-instruction removal flag */ - GLuint i, rem, loopNesting = 0, subroutineNesting = 0; + GLuint i, rem = 0, nesting = 0; if (dbg) { printf("Optimize: Begin remove extra moves\n"); @@ -549,29 +739,24 @@ _mesa_remove_extra_moves(struct gl_program *prog) */ for (i = 0; i < prog->NumInstructions; i++) { - const struct prog_instruction *inst = prog->Instructions + i; + const struct prog_instruction *mov = prog->Instructions + i; - switch (inst->Opcode) { + switch (mov->Opcode) { case OPCODE_BGNLOOP: - loopNesting++; - break; - case OPCODE_ENDLOOP: - loopNesting--; - break; case OPCODE_BGNSUB: - subroutineNesting++; + case OPCODE_IF: + nesting++; break; + case OPCODE_ENDLOOP: case OPCODE_ENDSUB: - subroutineNesting--; + case OPCODE_ENDIF: + nesting--; break; case OPCODE_MOV: - if (i > 0 && - loopNesting == 0 && - subroutineNesting == 0 && - inst->SrcReg[0].File == PROGRAM_TEMPORARY && - inst->SrcReg[0].Swizzle == SWIZZLE_XYZW) { + if (i > 0 && can_downward_mov_be_modifed(mov) && nesting == 0) { + /* see if this MOV can be removed */ - const GLuint tempIndex = inst->SrcReg[0].Index; + const GLuint id = mov->SrcReg[0].Index; struct prog_instruction *prevInst; GLuint prevI; @@ -582,11 +767,13 @@ _mesa_remove_extra_moves(struct gl_program *prog) prevInst = prog->Instructions + prevI; if (prevInst->DstReg.File == PROGRAM_TEMPORARY && - prevInst->DstReg.Index == tempIndex && - prevInst->DstReg.WriteMask == WRITEMASK_XYZW) { + prevInst->DstReg.Index == id && + prevInst->DstReg.RelAddr == 0 && + prevInst->DstReg.CondSrc == 0 && + prevInst->DstReg.CondMask == COND_TR) { - enum temp_use next_use = - find_next_temp_use(prog, i + 1, tempIndex); + const GLuint dst_mask = prevInst->DstReg.WriteMask; + enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask); if (next_use == WRITE || next_use == END) { /* OK, we can safely remove this MOV instruction. @@ -596,18 +783,13 @@ _mesa_remove_extra_moves(struct gl_program *prog) * Into: * prevI: FOO z, x, y; */ - - /* patch up prev inst */ - prevInst->DstReg.File = inst->DstReg.File; - prevInst->DstReg.Index = inst->DstReg.Index; - - /* flag this instruction for removal */ - removeInst[i] = GL_TRUE; - - if (dbg) { - printf("Remove MOV at %u\n", i); - printf("new prev inst %u: ", prevI); - _mesa_print_instruction(prevInst); + if (_mesa_merge_mov_into_inst(prevInst, mov)) { + removeInst[i] = GL_TRUE; + if (dbg) { + printf("Remove MOV at %u\n", i); + printf("new prev inst %u: ", prevI); + _mesa_print_instruction(prevInst); + } } } } @@ -627,6 +809,8 @@ _mesa_remove_extra_moves(struct gl_program *prog) printf("Optimize: End remove extra moves. %u instructions removed\n", rem); /*_mesa_print_program(prog);*/ } + + return rem != 0; } @@ -713,6 +897,7 @@ compare_start(const void *a, const void *b) return 0; } + /** sort the interval list according to interval starts */ static void sort_interval_list_by_start(struct interval_list *list) @@ -1013,6 +1198,17 @@ _mesa_reallocate_registers(struct gl_program *prog) } +#if 0 +static void +print_it(GLcontext *ctx, struct gl_program *program, const char *txt) { + fprintf(stderr, "%s (%u inst):\n", txt, program->NumInstructions); + _mesa_print_program(program); + _mesa_print_program_parameters(ctx, program); + fprintf(stderr, "\n\n"); +} +#endif + + /** * Apply optimizations to the given program to eliminate unnecessary * instructions, temp regs, etc. @@ -1020,16 +1216,19 @@ _mesa_reallocate_registers(struct gl_program *prog) void _mesa_optimize_program(GLcontext *ctx, struct gl_program *program) { - _mesa_remove_extra_move_use(program); - - if (1) - _mesa_remove_dead_code(program); - - if (0) /* not tested much yet */ - _mesa_remove_extra_moves(program); - - if (0) - _mesa_consolidate_registers(program); - else + GLboolean any_change; + + /* Stop when no modifications were output */ + do { + any_change = GL_FALSE; + _mesa_remove_extra_move_use(program); + if (_mesa_remove_dead_code_global(program)) + any_change = GL_TRUE; + if (_mesa_remove_extra_moves(program)) + any_change = GL_TRUE; + if (_mesa_remove_dead_code_local(program)) + any_change = GL_TRUE; _mesa_reallocate_registers(program); + } while (any_change); } + diff --git a/src/mesa/program/prog_optimize.h b/src/mesa/program/prog_optimize.h index 43894a27237..06cd9cb2c20 100644 --- a/src/mesa/program/prog_optimize.h +++ b/src/mesa/program/prog_optimize.h @@ -27,6 +27,7 @@ #include "main/config.h" +#include "main/mtypes.h" struct gl_program; diff --git a/src/mesa/program/prog_parameter_layout.c b/src/mesa/program/prog_parameter_layout.c index a8885738321..d7dc97edbfb 100644 --- a/src/mesa/program/prog_parameter_layout.c +++ b/src/mesa/program/prog_parameter_layout.c @@ -28,6 +28,7 @@ * \author Ian Romanick <[email protected]> */ +#include "main/compiler.h" #include "main/mtypes.h" #include "prog_parameter.h" #include "prog_parameter_layout.h" diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c index 6ab199aa02b..6056c459e4c 100644 --- a/src/mesa/program/prog_print.c +++ b/src/mesa/program/prog_print.c @@ -924,6 +924,8 @@ _mesa_fprint_program_parameters(FILE *f, fprintf(f, "NumParameters=%d\n", prog->NumParameters); fprintf(f, "NumAttributes=%d\n", prog->NumAttributes); fprintf(f, "NumAddressRegs=%d\n", prog->NumAddressRegs); + fprintf(f, "IndirectRegisterFiles: 0x%x (0b%s)\n", + prog->IndirectRegisterFiles, binary(prog->IndirectRegisterFiles)); fprintf(f, "SamplersUsed: 0x%x (0b%s)\n", prog->SamplersUsed, binary(prog->SamplersUsed)); fprintf(f, "Samplers=[ "); diff --git a/src/mesa/program/prog_print.h b/src/mesa/program/prog_print.h index 9ab74560169..4667373f379 100644 --- a/src/mesa/program/prog_print.h +++ b/src/mesa/program/prog_print.h @@ -26,6 +26,16 @@ #ifndef PROG_PRINT_H #define PROG_PRINT_H +#include <stdio.h> + +#include "main/glheader.h" +#include "main/mtypes.h" + +struct gl_program; +struct gl_program_parameter_list; +struct gl_shader; +struct prog_instruction; + /** * The output style to use when printing programs. diff --git a/src/mesa/program/prog_uniform.h b/src/mesa/program/prog_uniform.h index a671d30bfe8..7988d534a7d 100644 --- a/src/mesa/program/prog_uniform.h +++ b/src/mesa/program/prog_uniform.h @@ -31,8 +31,7 @@ #ifndef PROG_UNIFORM_H #define PROG_UNIFORM_H -#include "main/mtypes.h" -#include "prog_statevars.h" +#include "main/glheader.h" /** diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index cf46095ce84..3b6d6827446 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -55,13 +55,21 @@ _mesa_init_program(GLcontext *ctx) /* * If this assertion fails, we need to increase the field - * size for register indexes. + * size for register indexes (see INST_INDEX_BITS). */ ASSERT(ctx->Const.VertexProgram.MaxUniformComponents / 4 <= (1 << INST_INDEX_BITS)); ASSERT(ctx->Const.FragmentProgram.MaxUniformComponents / 4 <= (1 << INST_INDEX_BITS)); + ASSERT(ctx->Const.VertexProgram.MaxTemps <= (1 << INST_INDEX_BITS)); + ASSERT(ctx->Const.VertexProgram.MaxLocalParams <= (1 << INST_INDEX_BITS)); + ASSERT(ctx->Const.FragmentProgram.MaxTemps <= (1 << INST_INDEX_BITS)); + ASSERT(ctx->Const.FragmentProgram.MaxLocalParams <= (1 << INST_INDEX_BITS)); + + ASSERT(ctx->Const.VertexProgram.MaxUniformComponents <= 4 * MAX_UNIFORMS); + ASSERT(ctx->Const.FragmentProgram.MaxUniformComponents <= 4 * MAX_UNIFORMS); + /* If this fails, increase prog_instruction::TexSrcUnit size */ ASSERT(MAX_TEXTURE_UNITS < (1 << 5)); @@ -512,6 +520,7 @@ _mesa_clone_program(GLcontext *ctx, const struct gl_program *prog) if (prog->Attributes) clone->Attributes = _mesa_clone_parameter_list(prog->Attributes); memcpy(clone->LocalParams, prog->LocalParams, sizeof(clone->LocalParams)); + clone->IndirectRegisterFiles = prog->IndirectRegisterFiles; clone->NumInstructions = prog->NumInstructions; clone->NumTemporaries = prog->NumTemporaries; clone->NumParameters = prog->NumParameters; diff --git a/src/mesa/program/program_parse.tab.c b/src/mesa/program/program_parse.tab.c index 6421d1f58aa..31a609600b7 100644 --- a/src/mesa/program/program_parse.tab.c +++ b/src/mesa/program/program_parse.tab.c @@ -798,29 +798,29 @@ static const yytype_uint16 yyrline[] = 415, 459, 464, 474, 518, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 538, 550, 558, 575, 582, 601, 612, 632, 657, 664, 697, 704, 719, - 774, 817, 826, 847, 857, 861, 890, 909, 909, 911, - 918, 930, 931, 932, 935, 949, 963, 983, 994, 1006, - 1008, 1009, 1010, 1011, 1014, 1014, 1014, 1014, 1015, 1018, - 1022, 1027, 1034, 1041, 1048, 1071, 1094, 1095, 1096, 1097, - 1098, 1099, 1102, 1121, 1125, 1131, 1135, 1139, 1143, 1152, - 1161, 1165, 1170, 1176, 1187, 1187, 1188, 1190, 1194, 1198, - 1202, 1208, 1208, 1210, 1228, 1254, 1257, 1268, 1274, 1280, - 1281, 1288, 1294, 1300, 1308, 1314, 1320, 1328, 1334, 1340, - 1348, 1349, 1352, 1353, 1354, 1355, 1356, 1357, 1358, 1359, - 1360, 1361, 1362, 1365, 1374, 1378, 1382, 1388, 1397, 1401, - 1405, 1414, 1418, 1424, 1430, 1437, 1442, 1450, 1460, 1462, - 1470, 1476, 1480, 1484, 1490, 1501, 1510, 1514, 1519, 1523, - 1527, 1531, 1537, 1544, 1548, 1554, 1562, 1573, 1580, 1584, - 1590, 1600, 1611, 1615, 1633, 1642, 1645, 1651, 1655, 1659, - 1665, 1676, 1681, 1686, 1691, 1696, 1701, 1709, 1712, 1717, - 1730, 1738, 1749, 1757, 1757, 1759, 1759, 1761, 1771, 1776, - 1783, 1793, 1802, 1807, 1814, 1824, 1834, 1846, 1846, 1847, - 1847, 1849, 1859, 1867, 1877, 1885, 1893, 1902, 1913, 1917, - 1923, 1924, 1925, 1928, 1928, 1931, 1966, 1970, 1970, 1973, - 1980, 1989, 2003, 2012, 2021, 2025, 2034, 2043, 2054, 2061, - 2066, 2075, 2087, 2090, 2099, 2110, 2111, 2112, 2115, 2116, - 2117, 2120, 2121, 2124, 2125, 2128, 2129, 2132, 2143, 2154, - 2165, 2191, 2192 + 774, 817, 826, 848, 858, 862, 891, 910, 910, 912, + 919, 931, 932, 933, 936, 950, 964, 984, 995, 1007, + 1009, 1010, 1011, 1012, 1015, 1015, 1015, 1015, 1016, 1019, + 1023, 1028, 1035, 1042, 1049, 1072, 1095, 1096, 1097, 1098, + 1099, 1100, 1103, 1122, 1126, 1132, 1136, 1140, 1144, 1153, + 1162, 1166, 1171, 1177, 1188, 1188, 1189, 1191, 1195, 1199, + 1203, 1209, 1209, 1211, 1229, 1255, 1258, 1269, 1275, 1281, + 1282, 1289, 1295, 1301, 1309, 1315, 1321, 1329, 1335, 1341, + 1349, 1350, 1353, 1354, 1355, 1356, 1357, 1358, 1359, 1360, + 1361, 1362, 1363, 1366, 1375, 1379, 1383, 1389, 1398, 1402, + 1406, 1415, 1419, 1425, 1431, 1438, 1443, 1451, 1461, 1463, + 1471, 1477, 1481, 1485, 1491, 1502, 1511, 1515, 1520, 1524, + 1528, 1532, 1538, 1545, 1549, 1555, 1563, 1574, 1581, 1585, + 1591, 1601, 1612, 1616, 1634, 1643, 1646, 1652, 1656, 1660, + 1666, 1677, 1682, 1687, 1692, 1697, 1702, 1710, 1713, 1718, + 1731, 1739, 1750, 1758, 1758, 1760, 1760, 1762, 1772, 1777, + 1784, 1794, 1803, 1808, 1815, 1825, 1835, 1847, 1847, 1848, + 1848, 1850, 1860, 1868, 1878, 1886, 1894, 1903, 1914, 1918, + 1924, 1925, 1926, 1929, 1929, 1932, 1967, 1971, 1971, 1974, + 1981, 1990, 2004, 2013, 2022, 2026, 2035, 2044, 2055, 2062, + 2067, 2076, 2088, 2091, 2100, 2111, 2112, 2113, 2116, 2117, + 2118, 2121, 2122, 2125, 2126, 2129, 2130, 2133, 2144, 2155, + 2166, 2192, 2193 }; #endif @@ -2844,6 +2844,7 @@ yyreduce: (yyval.src_reg).Base.File = (yyvsp[(1) - (4)].sym)->param_binding_type; if ((yyvsp[(3) - (4)].src_reg).Base.RelAddr) { + state->prog->IndirectRegisterFiles |= (1 << (yyval.src_reg).Base.File); (yyvsp[(1) - (4)].sym)->param_accessed_indirectly = 1; (yyval.src_reg).Base.RelAddr = 1; @@ -2858,7 +2859,7 @@ yyreduce: case 63: /* Line 1455 of yacc.c */ -#line 848 "program_parse.y" +#line 849 "program_parse.y" { gl_register_file file = ((yyvsp[(1) - (1)].temp_sym).name != NULL) ? (yyvsp[(1) - (1)].temp_sym).param_binding_type @@ -2871,7 +2872,7 @@ yyreduce: case 64: /* Line 1455 of yacc.c */ -#line 858 "program_parse.y" +#line 859 "program_parse.y" { set_dst_reg(& (yyval.dst_reg), PROGRAM_OUTPUT, (yyvsp[(1) - (1)].result)); ;} @@ -2880,7 +2881,7 @@ yyreduce: case 65: /* Line 1455 of yacc.c */ -#line 862 "program_parse.y" +#line 863 "program_parse.y" { struct asm_symbol *const s = (struct asm_symbol *) _mesa_symbol_table_find_symbol(state->st, 0, (yyvsp[(1) - (1)].string)); @@ -2912,7 +2913,7 @@ yyreduce: case 66: /* Line 1455 of yacc.c */ -#line 891 "program_parse.y" +#line 892 "program_parse.y" { struct asm_symbol *const s = (struct asm_symbol *) _mesa_symbol_table_find_symbol(state->st, 0, (yyvsp[(1) - (1)].string)); @@ -2934,7 +2935,7 @@ yyreduce: case 69: /* Line 1455 of yacc.c */ -#line 912 "program_parse.y" +#line 913 "program_parse.y" { init_src_reg(& (yyval.src_reg)); (yyval.src_reg).Base.Index = (yyvsp[(1) - (1)].integer); @@ -2944,7 +2945,7 @@ yyreduce: case 70: /* Line 1455 of yacc.c */ -#line 919 "program_parse.y" +#line 920 "program_parse.y" { /* FINISHME: Add support for multiple address registers. */ @@ -2959,30 +2960,30 @@ yyreduce: case 71: /* Line 1455 of yacc.c */ -#line 930 "program_parse.y" +#line 931 "program_parse.y" { (yyval.integer) = 0; ;} break; case 72: /* Line 1455 of yacc.c */ -#line 931 "program_parse.y" +#line 932 "program_parse.y" { (yyval.integer) = (yyvsp[(2) - (2)].integer); ;} break; case 73: /* Line 1455 of yacc.c */ -#line 932 "program_parse.y" +#line 933 "program_parse.y" { (yyval.integer) = -(yyvsp[(2) - (2)].integer); ;} break; case 74: /* Line 1455 of yacc.c */ -#line 936 "program_parse.y" +#line 937 "program_parse.y" { - if (((yyvsp[(1) - (1)].integer) < 0) || ((yyvsp[(1) - (1)].integer) > 63)) { + if (((yyvsp[(1) - (1)].integer) < 0) || ((yyvsp[(1) - (1)].integer) > 4095)) { char s[100]; _mesa_snprintf(s, sizeof(s), "relative address offset too large (%d)", (yyvsp[(1) - (1)].integer)); @@ -2997,9 +2998,9 @@ yyreduce: case 75: /* Line 1455 of yacc.c */ -#line 950 "program_parse.y" +#line 951 "program_parse.y" { - if (((yyvsp[(1) - (1)].integer) < 0) || ((yyvsp[(1) - (1)].integer) > 64)) { + if (((yyvsp[(1) - (1)].integer) < 0) || ((yyvsp[(1) - (1)].integer) > 4096)) { char s[100]; _mesa_snprintf(s, sizeof(s), "relative address offset too large (%d)", (yyvsp[(1) - (1)].integer)); @@ -3014,7 +3015,7 @@ yyreduce: case 76: /* Line 1455 of yacc.c */ -#line 964 "program_parse.y" +#line 965 "program_parse.y" { struct asm_symbol *const s = (struct asm_symbol *) _mesa_symbol_table_find_symbol(state->st, 0, (yyvsp[(1) - (1)].string)); @@ -3037,7 +3038,7 @@ yyreduce: case 77: /* Line 1455 of yacc.c */ -#line 984 "program_parse.y" +#line 985 "program_parse.y" { if ((yyvsp[(1) - (1)].swiz_mask).mask != WRITEMASK_X) { yyerror(& (yylsp[(1) - (1)]), state, "invalid address component selector"); @@ -3051,7 +3052,7 @@ yyreduce: case 78: /* Line 1455 of yacc.c */ -#line 995 "program_parse.y" +#line 996 "program_parse.y" { if ((yyvsp[(1) - (1)].swiz_mask).mask != WRITEMASK_X) { yyerror(& (yylsp[(1) - (1)]), state, @@ -3066,21 +3067,21 @@ yyreduce: case 83: /* Line 1455 of yacc.c */ -#line 1011 "program_parse.y" +#line 1012 "program_parse.y" { (yyval.swiz_mask).swizzle = SWIZZLE_NOOP; (yyval.swiz_mask).mask = WRITEMASK_XYZW; ;} break; case 88: /* Line 1455 of yacc.c */ -#line 1015 "program_parse.y" +#line 1016 "program_parse.y" { (yyval.swiz_mask).swizzle = SWIZZLE_NOOP; (yyval.swiz_mask).mask = WRITEMASK_XYZW; ;} break; case 89: /* Line 1455 of yacc.c */ -#line 1019 "program_parse.y" +#line 1020 "program_parse.y" { (yyval.dst_reg) = (yyvsp[(2) - (3)].dst_reg); ;} @@ -3089,7 +3090,7 @@ yyreduce: case 90: /* Line 1455 of yacc.c */ -#line 1023 "program_parse.y" +#line 1024 "program_parse.y" { (yyval.dst_reg) = (yyvsp[(2) - (3)].dst_reg); ;} @@ -3098,7 +3099,7 @@ yyreduce: case 91: /* Line 1455 of yacc.c */ -#line 1027 "program_parse.y" +#line 1028 "program_parse.y" { (yyval.dst_reg).CondMask = COND_TR; (yyval.dst_reg).CondSwizzle = SWIZZLE_NOOP; @@ -3109,7 +3110,7 @@ yyreduce: case 92: /* Line 1455 of yacc.c */ -#line 1035 "program_parse.y" +#line 1036 "program_parse.y" { (yyval.dst_reg) = (yyvsp[(1) - (2)].dst_reg); (yyval.dst_reg).CondSwizzle = (yyvsp[(2) - (2)].swiz_mask).swizzle; @@ -3119,7 +3120,7 @@ yyreduce: case 93: /* Line 1455 of yacc.c */ -#line 1042 "program_parse.y" +#line 1043 "program_parse.y" { (yyval.dst_reg) = (yyvsp[(1) - (2)].dst_reg); (yyval.dst_reg).CondSwizzle = (yyvsp[(2) - (2)].swiz_mask).swizzle; @@ -3129,7 +3130,7 @@ yyreduce: case 94: /* Line 1455 of yacc.c */ -#line 1049 "program_parse.y" +#line 1050 "program_parse.y" { const int cond = _mesa_parse_cc((yyvsp[(1) - (1)].string)); if ((cond == 0) || ((yyvsp[(1) - (1)].string)[2] != '\0')) { @@ -3155,7 +3156,7 @@ yyreduce: case 95: /* Line 1455 of yacc.c */ -#line 1072 "program_parse.y" +#line 1073 "program_parse.y" { const int cond = _mesa_parse_cc((yyvsp[(1) - (1)].string)); if ((cond == 0) || ((yyvsp[(1) - (1)].string)[2] != '\0')) { @@ -3181,7 +3182,7 @@ yyreduce: case 102: /* Line 1455 of yacc.c */ -#line 1103 "program_parse.y" +#line 1104 "program_parse.y" { struct asm_symbol *const s = declare_variable(state, (yyvsp[(2) - (4)].string), at_attrib, & (yylsp[(2) - (4)])); @@ -3203,7 +3204,7 @@ yyreduce: case 103: /* Line 1455 of yacc.c */ -#line 1122 "program_parse.y" +#line 1123 "program_parse.y" { (yyval.attrib) = (yyvsp[(2) - (2)].attrib); ;} @@ -3212,7 +3213,7 @@ yyreduce: case 104: /* Line 1455 of yacc.c */ -#line 1126 "program_parse.y" +#line 1127 "program_parse.y" { (yyval.attrib) = (yyvsp[(2) - (2)].attrib); ;} @@ -3221,7 +3222,7 @@ yyreduce: case 105: /* Line 1455 of yacc.c */ -#line 1132 "program_parse.y" +#line 1133 "program_parse.y" { (yyval.attrib) = VERT_ATTRIB_POS; ;} @@ -3230,7 +3231,7 @@ yyreduce: case 106: /* Line 1455 of yacc.c */ -#line 1136 "program_parse.y" +#line 1137 "program_parse.y" { (yyval.attrib) = VERT_ATTRIB_WEIGHT; ;} @@ -3239,7 +3240,7 @@ yyreduce: case 107: /* Line 1455 of yacc.c */ -#line 1140 "program_parse.y" +#line 1141 "program_parse.y" { (yyval.attrib) = VERT_ATTRIB_NORMAL; ;} @@ -3248,7 +3249,7 @@ yyreduce: case 108: /* Line 1455 of yacc.c */ -#line 1144 "program_parse.y" +#line 1145 "program_parse.y" { if (!state->ctx->Extensions.EXT_secondary_color) { yyerror(& (yylsp[(2) - (2)]), state, "GL_EXT_secondary_color not supported"); @@ -3262,7 +3263,7 @@ yyreduce: case 109: /* Line 1455 of yacc.c */ -#line 1153 "program_parse.y" +#line 1154 "program_parse.y" { if (!state->ctx->Extensions.EXT_fog_coord) { yyerror(& (yylsp[(1) - (1)]), state, "GL_EXT_fog_coord not supported"); @@ -3276,7 +3277,7 @@ yyreduce: case 110: /* Line 1455 of yacc.c */ -#line 1162 "program_parse.y" +#line 1163 "program_parse.y" { (yyval.attrib) = VERT_ATTRIB_TEX0 + (yyvsp[(2) - (2)].integer); ;} @@ -3285,7 +3286,7 @@ yyreduce: case 111: /* Line 1455 of yacc.c */ -#line 1166 "program_parse.y" +#line 1167 "program_parse.y" { yyerror(& (yylsp[(1) - (4)]), state, "GL_ARB_matrix_palette not supported"); YYERROR; @@ -3295,7 +3296,7 @@ yyreduce: case 112: /* Line 1455 of yacc.c */ -#line 1171 "program_parse.y" +#line 1172 "program_parse.y" { (yyval.attrib) = VERT_ATTRIB_GENERIC0 + (yyvsp[(3) - (4)].integer); ;} @@ -3304,7 +3305,7 @@ yyreduce: case 113: /* Line 1455 of yacc.c */ -#line 1177 "program_parse.y" +#line 1178 "program_parse.y" { if ((unsigned) (yyvsp[(1) - (1)].integer) >= state->limits->MaxAttribs) { yyerror(& (yylsp[(1) - (1)]), state, "invalid vertex attribute reference"); @@ -3318,7 +3319,7 @@ yyreduce: case 117: /* Line 1455 of yacc.c */ -#line 1191 "program_parse.y" +#line 1192 "program_parse.y" { (yyval.attrib) = FRAG_ATTRIB_WPOS; ;} @@ -3327,7 +3328,7 @@ yyreduce: case 118: /* Line 1455 of yacc.c */ -#line 1195 "program_parse.y" +#line 1196 "program_parse.y" { (yyval.attrib) = FRAG_ATTRIB_COL0 + (yyvsp[(2) - (2)].integer); ;} @@ -3336,7 +3337,7 @@ yyreduce: case 119: /* Line 1455 of yacc.c */ -#line 1199 "program_parse.y" +#line 1200 "program_parse.y" { (yyval.attrib) = FRAG_ATTRIB_FOGC; ;} @@ -3345,7 +3346,7 @@ yyreduce: case 120: /* Line 1455 of yacc.c */ -#line 1203 "program_parse.y" +#line 1204 "program_parse.y" { (yyval.attrib) = FRAG_ATTRIB_TEX0 + (yyvsp[(2) - (2)].integer); ;} @@ -3354,7 +3355,7 @@ yyreduce: case 123: /* Line 1455 of yacc.c */ -#line 1211 "program_parse.y" +#line 1212 "program_parse.y" { struct asm_symbol *const s = declare_variable(state, (yyvsp[(2) - (3)].string), at_param, & (yylsp[(2) - (3)])); @@ -3375,7 +3376,7 @@ yyreduce: case 124: /* Line 1455 of yacc.c */ -#line 1229 "program_parse.y" +#line 1230 "program_parse.y" { if (((yyvsp[(4) - (6)].integer) != 0) && ((unsigned) (yyvsp[(4) - (6)].integer) != (yyvsp[(6) - (6)].temp_sym).param_binding_length)) { free((yyvsp[(2) - (6)].string)); @@ -3403,7 +3404,7 @@ yyreduce: case 125: /* Line 1455 of yacc.c */ -#line 1254 "program_parse.y" +#line 1255 "program_parse.y" { (yyval.integer) = 0; ;} @@ -3412,7 +3413,7 @@ yyreduce: case 126: /* Line 1455 of yacc.c */ -#line 1258 "program_parse.y" +#line 1259 "program_parse.y" { if (((yyvsp[(1) - (1)].integer) < 1) || ((unsigned) (yyvsp[(1) - (1)].integer) > state->limits->MaxParameters)) { yyerror(& (yylsp[(1) - (1)]), state, "invalid parameter array size"); @@ -3426,7 +3427,7 @@ yyreduce: case 127: /* Line 1455 of yacc.c */ -#line 1269 "program_parse.y" +#line 1270 "program_parse.y" { (yyval.temp_sym) = (yyvsp[(2) - (2)].temp_sym); ;} @@ -3435,7 +3436,7 @@ yyreduce: case 128: /* Line 1455 of yacc.c */ -#line 1275 "program_parse.y" +#line 1276 "program_parse.y" { (yyval.temp_sym) = (yyvsp[(3) - (4)].temp_sym); ;} @@ -3444,7 +3445,7 @@ yyreduce: case 130: /* Line 1455 of yacc.c */ -#line 1282 "program_parse.y" +#line 1283 "program_parse.y" { (yyvsp[(1) - (3)].temp_sym).param_binding_length += (yyvsp[(3) - (3)].temp_sym).param_binding_length; (yyval.temp_sym) = (yyvsp[(1) - (3)].temp_sym); @@ -3454,7 +3455,7 @@ yyreduce: case 131: /* Line 1455 of yacc.c */ -#line 1289 "program_parse.y" +#line 1290 "program_parse.y" { memset(& (yyval.temp_sym), 0, sizeof((yyval.temp_sym))); (yyval.temp_sym).param_binding_begin = ~0; @@ -3465,7 +3466,7 @@ yyreduce: case 132: /* Line 1455 of yacc.c */ -#line 1295 "program_parse.y" +#line 1296 "program_parse.y" { memset(& (yyval.temp_sym), 0, sizeof((yyval.temp_sym))); (yyval.temp_sym).param_binding_begin = ~0; @@ -3476,7 +3477,7 @@ yyreduce: case 133: /* Line 1455 of yacc.c */ -#line 1301 "program_parse.y" +#line 1302 "program_parse.y" { memset(& (yyval.temp_sym), 0, sizeof((yyval.temp_sym))); (yyval.temp_sym).param_binding_begin = ~0; @@ -3487,7 +3488,7 @@ yyreduce: case 134: /* Line 1455 of yacc.c */ -#line 1309 "program_parse.y" +#line 1310 "program_parse.y" { memset(& (yyval.temp_sym), 0, sizeof((yyval.temp_sym))); (yyval.temp_sym).param_binding_begin = ~0; @@ -3498,7 +3499,7 @@ yyreduce: case 135: /* Line 1455 of yacc.c */ -#line 1315 "program_parse.y" +#line 1316 "program_parse.y" { memset(& (yyval.temp_sym), 0, sizeof((yyval.temp_sym))); (yyval.temp_sym).param_binding_begin = ~0; @@ -3509,7 +3510,7 @@ yyreduce: case 136: /* Line 1455 of yacc.c */ -#line 1321 "program_parse.y" +#line 1322 "program_parse.y" { memset(& (yyval.temp_sym), 0, sizeof((yyval.temp_sym))); (yyval.temp_sym).param_binding_begin = ~0; @@ -3520,7 +3521,7 @@ yyreduce: case 137: /* Line 1455 of yacc.c */ -#line 1329 "program_parse.y" +#line 1330 "program_parse.y" { memset(& (yyval.temp_sym), 0, sizeof((yyval.temp_sym))); (yyval.temp_sym).param_binding_begin = ~0; @@ -3531,7 +3532,7 @@ yyreduce: case 138: /* Line 1455 of yacc.c */ -#line 1335 "program_parse.y" +#line 1336 "program_parse.y" { memset(& (yyval.temp_sym), 0, sizeof((yyval.temp_sym))); (yyval.temp_sym).param_binding_begin = ~0; @@ -3542,7 +3543,7 @@ yyreduce: case 139: /* Line 1455 of yacc.c */ -#line 1341 "program_parse.y" +#line 1342 "program_parse.y" { memset(& (yyval.temp_sym), 0, sizeof((yyval.temp_sym))); (yyval.temp_sym).param_binding_begin = ~0; @@ -3553,98 +3554,98 @@ yyreduce: case 140: /* Line 1455 of yacc.c */ -#line 1348 "program_parse.y" +#line 1349 "program_parse.y" { memcpy((yyval.state), (yyvsp[(1) - (1)].state), sizeof((yyval.state))); ;} break; case 141: /* Line 1455 of yacc.c */ -#line 1349 "program_parse.y" +#line 1350 "program_parse.y" { memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;} break; case 142: /* Line 1455 of yacc.c */ -#line 1352 "program_parse.y" +#line 1353 "program_parse.y" { memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;} break; case 143: /* Line 1455 of yacc.c */ -#line 1353 "program_parse.y" +#line 1354 "program_parse.y" { memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;} break; case 144: /* Line 1455 of yacc.c */ -#line 1354 "program_parse.y" +#line 1355 "program_parse.y" { memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;} break; case 145: /* Line 1455 of yacc.c */ -#line 1355 "program_parse.y" +#line 1356 "program_parse.y" { memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;} break; case 146: /* Line 1455 of yacc.c */ -#line 1356 "program_parse.y" +#line 1357 "program_parse.y" { memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;} break; case 147: /* Line 1455 of yacc.c */ -#line 1357 "program_parse.y" +#line 1358 "program_parse.y" { memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;} break; case 148: /* Line 1455 of yacc.c */ -#line 1358 "program_parse.y" +#line 1359 "program_parse.y" { memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;} break; case 149: /* Line 1455 of yacc.c */ -#line 1359 "program_parse.y" +#line 1360 "program_parse.y" { memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;} break; case 150: /* Line 1455 of yacc.c */ -#line 1360 "program_parse.y" +#line 1361 "program_parse.y" { memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;} break; case 151: /* Line 1455 of yacc.c */ -#line 1361 "program_parse.y" +#line 1362 "program_parse.y" { memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;} break; case 152: /* Line 1455 of yacc.c */ -#line 1362 "program_parse.y" +#line 1363 "program_parse.y" { memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;} break; case 153: /* Line 1455 of yacc.c */ -#line 1366 "program_parse.y" +#line 1367 "program_parse.y" { memset((yyval.state), 0, sizeof((yyval.state))); (yyval.state)[0] = STATE_MATERIAL; @@ -3656,7 +3657,7 @@ yyreduce: case 154: /* Line 1455 of yacc.c */ -#line 1375 "program_parse.y" +#line 1376 "program_parse.y" { (yyval.integer) = (yyvsp[(1) - (1)].integer); ;} @@ -3665,7 +3666,7 @@ yyreduce: case 155: /* Line 1455 of yacc.c */ -#line 1379 "program_parse.y" +#line 1380 "program_parse.y" { (yyval.integer) = STATE_EMISSION; ;} @@ -3674,7 +3675,7 @@ yyreduce: case 156: /* Line 1455 of yacc.c */ -#line 1383 "program_parse.y" +#line 1384 "program_parse.y" { (yyval.integer) = STATE_SHININESS; ;} @@ -3683,7 +3684,7 @@ yyreduce: case 157: /* Line 1455 of yacc.c */ -#line 1389 "program_parse.y" +#line 1390 "program_parse.y" { memset((yyval.state), 0, sizeof((yyval.state))); (yyval.state)[0] = STATE_LIGHT; @@ -3695,7 +3696,7 @@ yyreduce: case 158: /* Line 1455 of yacc.c */ -#line 1398 "program_parse.y" +#line 1399 "program_parse.y" { (yyval.integer) = (yyvsp[(1) - (1)].integer); ;} @@ -3704,7 +3705,7 @@ yyreduce: case 159: /* Line 1455 of yacc.c */ -#line 1402 "program_parse.y" +#line 1403 "program_parse.y" { (yyval.integer) = STATE_POSITION; ;} @@ -3713,7 +3714,7 @@ yyreduce: case 160: /* Line 1455 of yacc.c */ -#line 1406 "program_parse.y" +#line 1407 "program_parse.y" { if (!state->ctx->Extensions.EXT_point_parameters) { yyerror(& (yylsp[(1) - (1)]), state, "GL_ARB_point_parameters not supported"); @@ -3727,7 +3728,7 @@ yyreduce: case 161: /* Line 1455 of yacc.c */ -#line 1415 "program_parse.y" +#line 1416 "program_parse.y" { (yyval.integer) = (yyvsp[(2) - (2)].integer); ;} @@ -3736,7 +3737,7 @@ yyreduce: case 162: /* Line 1455 of yacc.c */ -#line 1419 "program_parse.y" +#line 1420 "program_parse.y" { (yyval.integer) = STATE_HALF_VECTOR; ;} @@ -3745,7 +3746,7 @@ yyreduce: case 163: /* Line 1455 of yacc.c */ -#line 1425 "program_parse.y" +#line 1426 "program_parse.y" { (yyval.integer) = STATE_SPOT_DIRECTION; ;} @@ -3754,7 +3755,7 @@ yyreduce: case 164: /* Line 1455 of yacc.c */ -#line 1431 "program_parse.y" +#line 1432 "program_parse.y" { (yyval.state)[0] = (yyvsp[(2) - (2)].state)[0]; (yyval.state)[1] = (yyvsp[(2) - (2)].state)[1]; @@ -3764,7 +3765,7 @@ yyreduce: case 165: /* Line 1455 of yacc.c */ -#line 1438 "program_parse.y" +#line 1439 "program_parse.y" { memset((yyval.state), 0, sizeof((yyval.state))); (yyval.state)[0] = STATE_LIGHTMODEL_AMBIENT; @@ -3774,7 +3775,7 @@ yyreduce: case 166: /* Line 1455 of yacc.c */ -#line 1443 "program_parse.y" +#line 1444 "program_parse.y" { memset((yyval.state), 0, sizeof((yyval.state))); (yyval.state)[0] = STATE_LIGHTMODEL_SCENECOLOR; @@ -3785,7 +3786,7 @@ yyreduce: case 167: /* Line 1455 of yacc.c */ -#line 1451 "program_parse.y" +#line 1452 "program_parse.y" { memset((yyval.state), 0, sizeof((yyval.state))); (yyval.state)[0] = STATE_LIGHTPROD; @@ -3798,7 +3799,7 @@ yyreduce: case 169: /* Line 1455 of yacc.c */ -#line 1463 "program_parse.y" +#line 1464 "program_parse.y" { memset((yyval.state), 0, sizeof((yyval.state))); (yyval.state)[0] = (yyvsp[(3) - (3)].integer); @@ -3809,7 +3810,7 @@ yyreduce: case 170: /* Line 1455 of yacc.c */ -#line 1471 "program_parse.y" +#line 1472 "program_parse.y" { (yyval.integer) = STATE_TEXENV_COLOR; ;} @@ -3818,7 +3819,7 @@ yyreduce: case 171: /* Line 1455 of yacc.c */ -#line 1477 "program_parse.y" +#line 1478 "program_parse.y" { (yyval.integer) = STATE_AMBIENT; ;} @@ -3827,7 +3828,7 @@ yyreduce: case 172: /* Line 1455 of yacc.c */ -#line 1481 "program_parse.y" +#line 1482 "program_parse.y" { (yyval.integer) = STATE_DIFFUSE; ;} @@ -3836,7 +3837,7 @@ yyreduce: case 173: /* Line 1455 of yacc.c */ -#line 1485 "program_parse.y" +#line 1486 "program_parse.y" { (yyval.integer) = STATE_SPECULAR; ;} @@ -3845,7 +3846,7 @@ yyreduce: case 174: /* Line 1455 of yacc.c */ -#line 1491 "program_parse.y" +#line 1492 "program_parse.y" { if ((unsigned) (yyvsp[(1) - (1)].integer) >= state->MaxLights) { yyerror(& (yylsp[(1) - (1)]), state, "invalid light selector"); @@ -3859,7 +3860,7 @@ yyreduce: case 175: /* Line 1455 of yacc.c */ -#line 1502 "program_parse.y" +#line 1503 "program_parse.y" { memset((yyval.state), 0, sizeof((yyval.state))); (yyval.state)[0] = STATE_TEXGEN; @@ -3871,7 +3872,7 @@ yyreduce: case 176: /* Line 1455 of yacc.c */ -#line 1511 "program_parse.y" +#line 1512 "program_parse.y" { (yyval.integer) = STATE_TEXGEN_EYE_S; ;} @@ -3880,7 +3881,7 @@ yyreduce: case 177: /* Line 1455 of yacc.c */ -#line 1515 "program_parse.y" +#line 1516 "program_parse.y" { (yyval.integer) = STATE_TEXGEN_OBJECT_S; ;} @@ -3889,7 +3890,7 @@ yyreduce: case 178: /* Line 1455 of yacc.c */ -#line 1520 "program_parse.y" +#line 1521 "program_parse.y" { (yyval.integer) = STATE_TEXGEN_EYE_S - STATE_TEXGEN_EYE_S; ;} @@ -3898,7 +3899,7 @@ yyreduce: case 179: /* Line 1455 of yacc.c */ -#line 1524 "program_parse.y" +#line 1525 "program_parse.y" { (yyval.integer) = STATE_TEXGEN_EYE_T - STATE_TEXGEN_EYE_S; ;} @@ -3907,7 +3908,7 @@ yyreduce: case 180: /* Line 1455 of yacc.c */ -#line 1528 "program_parse.y" +#line 1529 "program_parse.y" { (yyval.integer) = STATE_TEXGEN_EYE_R - STATE_TEXGEN_EYE_S; ;} @@ -3916,7 +3917,7 @@ yyreduce: case 181: /* Line 1455 of yacc.c */ -#line 1532 "program_parse.y" +#line 1533 "program_parse.y" { (yyval.integer) = STATE_TEXGEN_EYE_Q - STATE_TEXGEN_EYE_S; ;} @@ -3925,7 +3926,7 @@ yyreduce: case 182: /* Line 1455 of yacc.c */ -#line 1538 "program_parse.y" +#line 1539 "program_parse.y" { memset((yyval.state), 0, sizeof((yyval.state))); (yyval.state)[0] = (yyvsp[(2) - (2)].integer); @@ -3935,7 +3936,7 @@ yyreduce: case 183: /* Line 1455 of yacc.c */ -#line 1545 "program_parse.y" +#line 1546 "program_parse.y" { (yyval.integer) = STATE_FOG_COLOR; ;} @@ -3944,7 +3945,7 @@ yyreduce: case 184: /* Line 1455 of yacc.c */ -#line 1549 "program_parse.y" +#line 1550 "program_parse.y" { (yyval.integer) = STATE_FOG_PARAMS; ;} @@ -3953,7 +3954,7 @@ yyreduce: case 185: /* Line 1455 of yacc.c */ -#line 1555 "program_parse.y" +#line 1556 "program_parse.y" { memset((yyval.state), 0, sizeof((yyval.state))); (yyval.state)[0] = STATE_CLIPPLANE; @@ -3964,7 +3965,7 @@ yyreduce: case 186: /* Line 1455 of yacc.c */ -#line 1563 "program_parse.y" +#line 1564 "program_parse.y" { if ((unsigned) (yyvsp[(1) - (1)].integer) >= state->MaxClipPlanes) { yyerror(& (yylsp[(1) - (1)]), state, "invalid clip plane selector"); @@ -3978,7 +3979,7 @@ yyreduce: case 187: /* Line 1455 of yacc.c */ -#line 1574 "program_parse.y" +#line 1575 "program_parse.y" { memset((yyval.state), 0, sizeof((yyval.state))); (yyval.state)[0] = (yyvsp[(2) - (2)].integer); @@ -3988,7 +3989,7 @@ yyreduce: case 188: /* Line 1455 of yacc.c */ -#line 1581 "program_parse.y" +#line 1582 "program_parse.y" { (yyval.integer) = STATE_POINT_SIZE; ;} @@ -3997,7 +3998,7 @@ yyreduce: case 189: /* Line 1455 of yacc.c */ -#line 1585 "program_parse.y" +#line 1586 "program_parse.y" { (yyval.integer) = STATE_POINT_ATTENUATION; ;} @@ -4006,7 +4007,7 @@ yyreduce: case 190: /* Line 1455 of yacc.c */ -#line 1591 "program_parse.y" +#line 1592 "program_parse.y" { (yyval.state)[0] = (yyvsp[(1) - (5)].state)[0]; (yyval.state)[1] = (yyvsp[(1) - (5)].state)[1]; @@ -4019,7 +4020,7 @@ yyreduce: case 191: /* Line 1455 of yacc.c */ -#line 1601 "program_parse.y" +#line 1602 "program_parse.y" { (yyval.state)[0] = (yyvsp[(1) - (2)].state)[0]; (yyval.state)[1] = (yyvsp[(1) - (2)].state)[1]; @@ -4032,7 +4033,7 @@ yyreduce: case 192: /* Line 1455 of yacc.c */ -#line 1611 "program_parse.y" +#line 1612 "program_parse.y" { (yyval.state)[2] = 0; (yyval.state)[3] = 3; @@ -4042,7 +4043,7 @@ yyreduce: case 193: /* Line 1455 of yacc.c */ -#line 1616 "program_parse.y" +#line 1617 "program_parse.y" { /* It seems logical that the matrix row range specifier would have * to specify a range or more than one row (i.e., $5 > $3). @@ -4063,7 +4064,7 @@ yyreduce: case 194: /* Line 1455 of yacc.c */ -#line 1634 "program_parse.y" +#line 1635 "program_parse.y" { (yyval.state)[0] = (yyvsp[(2) - (3)].state)[0]; (yyval.state)[1] = (yyvsp[(2) - (3)].state)[1]; @@ -4074,7 +4075,7 @@ yyreduce: case 195: /* Line 1455 of yacc.c */ -#line 1642 "program_parse.y" +#line 1643 "program_parse.y" { (yyval.integer) = 0; ;} @@ -4083,7 +4084,7 @@ yyreduce: case 196: /* Line 1455 of yacc.c */ -#line 1646 "program_parse.y" +#line 1647 "program_parse.y" { (yyval.integer) = (yyvsp[(1) - (1)].integer); ;} @@ -4092,7 +4093,7 @@ yyreduce: case 197: /* Line 1455 of yacc.c */ -#line 1652 "program_parse.y" +#line 1653 "program_parse.y" { (yyval.integer) = STATE_MATRIX_INVERSE; ;} @@ -4101,7 +4102,7 @@ yyreduce: case 198: /* Line 1455 of yacc.c */ -#line 1656 "program_parse.y" +#line 1657 "program_parse.y" { (yyval.integer) = STATE_MATRIX_TRANSPOSE; ;} @@ -4110,7 +4111,7 @@ yyreduce: case 199: /* Line 1455 of yacc.c */ -#line 1660 "program_parse.y" +#line 1661 "program_parse.y" { (yyval.integer) = STATE_MATRIX_INVTRANS; ;} @@ -4119,7 +4120,7 @@ yyreduce: case 200: /* Line 1455 of yacc.c */ -#line 1666 "program_parse.y" +#line 1667 "program_parse.y" { if ((yyvsp[(1) - (1)].integer) > 3) { yyerror(& (yylsp[(1) - (1)]), state, "invalid matrix row reference"); @@ -4133,7 +4134,7 @@ yyreduce: case 201: /* Line 1455 of yacc.c */ -#line 1677 "program_parse.y" +#line 1678 "program_parse.y" { (yyval.state)[0] = STATE_MODELVIEW_MATRIX; (yyval.state)[1] = (yyvsp[(2) - (2)].integer); @@ -4143,7 +4144,7 @@ yyreduce: case 202: /* Line 1455 of yacc.c */ -#line 1682 "program_parse.y" +#line 1683 "program_parse.y" { (yyval.state)[0] = STATE_PROJECTION_MATRIX; (yyval.state)[1] = 0; @@ -4153,7 +4154,7 @@ yyreduce: case 203: /* Line 1455 of yacc.c */ -#line 1687 "program_parse.y" +#line 1688 "program_parse.y" { (yyval.state)[0] = STATE_MVP_MATRIX; (yyval.state)[1] = 0; @@ -4163,7 +4164,7 @@ yyreduce: case 204: /* Line 1455 of yacc.c */ -#line 1692 "program_parse.y" +#line 1693 "program_parse.y" { (yyval.state)[0] = STATE_TEXTURE_MATRIX; (yyval.state)[1] = (yyvsp[(2) - (2)].integer); @@ -4173,7 +4174,7 @@ yyreduce: case 205: /* Line 1455 of yacc.c */ -#line 1697 "program_parse.y" +#line 1698 "program_parse.y" { yyerror(& (yylsp[(1) - (4)]), state, "GL_ARB_matrix_palette not supported"); YYERROR; @@ -4183,7 +4184,7 @@ yyreduce: case 206: /* Line 1455 of yacc.c */ -#line 1702 "program_parse.y" +#line 1703 "program_parse.y" { (yyval.state)[0] = STATE_PROGRAM_MATRIX; (yyval.state)[1] = (yyvsp[(3) - (4)].integer); @@ -4193,7 +4194,7 @@ yyreduce: case 207: /* Line 1455 of yacc.c */ -#line 1709 "program_parse.y" +#line 1710 "program_parse.y" { (yyval.integer) = 0; ;} @@ -4202,7 +4203,7 @@ yyreduce: case 208: /* Line 1455 of yacc.c */ -#line 1713 "program_parse.y" +#line 1714 "program_parse.y" { (yyval.integer) = (yyvsp[(2) - (3)].integer); ;} @@ -4211,7 +4212,7 @@ yyreduce: case 209: /* Line 1455 of yacc.c */ -#line 1718 "program_parse.y" +#line 1719 "program_parse.y" { /* Since GL_ARB_vertex_blend isn't supported, only modelview matrix * zero is valid. @@ -4228,7 +4229,7 @@ yyreduce: case 210: /* Line 1455 of yacc.c */ -#line 1731 "program_parse.y" +#line 1732 "program_parse.y" { /* Since GL_ARB_matrix_palette isn't supported, just let any value * through here. The error will be generated later. @@ -4240,7 +4241,7 @@ yyreduce: case 211: /* Line 1455 of yacc.c */ -#line 1739 "program_parse.y" +#line 1740 "program_parse.y" { if ((unsigned) (yyvsp[(1) - (1)].integer) >= state->MaxProgramMatrices) { yyerror(& (yylsp[(1) - (1)]), state, "invalid program matrix selector"); @@ -4254,7 +4255,7 @@ yyreduce: case 212: /* Line 1455 of yacc.c */ -#line 1750 "program_parse.y" +#line 1751 "program_parse.y" { memset((yyval.state), 0, sizeof((yyval.state))); (yyval.state)[0] = STATE_DEPTH_RANGE; @@ -4264,7 +4265,7 @@ yyreduce: case 217: /* Line 1455 of yacc.c */ -#line 1762 "program_parse.y" +#line 1763 "program_parse.y" { memset((yyval.state), 0, sizeof((yyval.state))); (yyval.state)[0] = state->state_param_enum; @@ -4277,7 +4278,7 @@ yyreduce: case 218: /* Line 1455 of yacc.c */ -#line 1772 "program_parse.y" +#line 1773 "program_parse.y" { (yyval.state)[0] = (yyvsp[(1) - (1)].integer); (yyval.state)[1] = (yyvsp[(1) - (1)].integer); @@ -4287,7 +4288,7 @@ yyreduce: case 219: /* Line 1455 of yacc.c */ -#line 1777 "program_parse.y" +#line 1778 "program_parse.y" { (yyval.state)[0] = (yyvsp[(1) - (3)].integer); (yyval.state)[1] = (yyvsp[(3) - (3)].integer); @@ -4297,7 +4298,7 @@ yyreduce: case 220: /* Line 1455 of yacc.c */ -#line 1784 "program_parse.y" +#line 1785 "program_parse.y" { memset((yyval.state), 0, sizeof((yyval.state))); (yyval.state)[0] = state->state_param_enum; @@ -4310,7 +4311,7 @@ yyreduce: case 221: /* Line 1455 of yacc.c */ -#line 1794 "program_parse.y" +#line 1795 "program_parse.y" { memset((yyval.state), 0, sizeof((yyval.state))); (yyval.state)[0] = state->state_param_enum; @@ -4323,7 +4324,7 @@ yyreduce: case 222: /* Line 1455 of yacc.c */ -#line 1803 "program_parse.y" +#line 1804 "program_parse.y" { (yyval.state)[0] = (yyvsp[(1) - (1)].integer); (yyval.state)[1] = (yyvsp[(1) - (1)].integer); @@ -4333,7 +4334,7 @@ yyreduce: case 223: /* Line 1455 of yacc.c */ -#line 1808 "program_parse.y" +#line 1809 "program_parse.y" { (yyval.state)[0] = (yyvsp[(1) - (3)].integer); (yyval.state)[1] = (yyvsp[(3) - (3)].integer); @@ -4343,7 +4344,7 @@ yyreduce: case 224: /* Line 1455 of yacc.c */ -#line 1815 "program_parse.y" +#line 1816 "program_parse.y" { memset((yyval.state), 0, sizeof((yyval.state))); (yyval.state)[0] = state->state_param_enum; @@ -4356,7 +4357,7 @@ yyreduce: case 225: /* Line 1455 of yacc.c */ -#line 1825 "program_parse.y" +#line 1826 "program_parse.y" { if ((unsigned) (yyvsp[(1) - (1)].integer) >= state->limits->MaxEnvParams) { yyerror(& (yylsp[(1) - (1)]), state, "invalid environment parameter reference"); @@ -4369,7 +4370,7 @@ yyreduce: case 226: /* Line 1455 of yacc.c */ -#line 1835 "program_parse.y" +#line 1836 "program_parse.y" { if ((unsigned) (yyvsp[(1) - (1)].integer) >= state->limits->MaxLocalParams) { yyerror(& (yylsp[(1) - (1)]), state, "invalid local parameter reference"); @@ -4382,7 +4383,7 @@ yyreduce: case 231: /* Line 1455 of yacc.c */ -#line 1850 "program_parse.y" +#line 1851 "program_parse.y" { (yyval.vector).count = 4; (yyval.vector).data[0] = (yyvsp[(1) - (1)].real); @@ -4395,7 +4396,7 @@ yyreduce: case 232: /* Line 1455 of yacc.c */ -#line 1860 "program_parse.y" +#line 1861 "program_parse.y" { (yyval.vector).count = 1; (yyval.vector).data[0] = (yyvsp[(1) - (1)].real); @@ -4408,7 +4409,7 @@ yyreduce: case 233: /* Line 1455 of yacc.c */ -#line 1868 "program_parse.y" +#line 1869 "program_parse.y" { (yyval.vector).count = 1; (yyval.vector).data[0] = (float) (yyvsp[(1) - (1)].integer); @@ -4421,7 +4422,7 @@ yyreduce: case 234: /* Line 1455 of yacc.c */ -#line 1878 "program_parse.y" +#line 1879 "program_parse.y" { (yyval.vector).count = 4; (yyval.vector).data[0] = (yyvsp[(2) - (3)].real); @@ -4434,7 +4435,7 @@ yyreduce: case 235: /* Line 1455 of yacc.c */ -#line 1886 "program_parse.y" +#line 1887 "program_parse.y" { (yyval.vector).count = 4; (yyval.vector).data[0] = (yyvsp[(2) - (5)].real); @@ -4447,7 +4448,7 @@ yyreduce: case 236: /* Line 1455 of yacc.c */ -#line 1895 "program_parse.y" +#line 1896 "program_parse.y" { (yyval.vector).count = 4; (yyval.vector).data[0] = (yyvsp[(2) - (7)].real); @@ -4460,7 +4461,7 @@ yyreduce: case 237: /* Line 1455 of yacc.c */ -#line 1904 "program_parse.y" +#line 1905 "program_parse.y" { (yyval.vector).count = 4; (yyval.vector).data[0] = (yyvsp[(2) - (9)].real); @@ -4473,7 +4474,7 @@ yyreduce: case 238: /* Line 1455 of yacc.c */ -#line 1914 "program_parse.y" +#line 1915 "program_parse.y" { (yyval.real) = ((yyvsp[(1) - (2)].negate)) ? -(yyvsp[(2) - (2)].real) : (yyvsp[(2) - (2)].real); ;} @@ -4482,7 +4483,7 @@ yyreduce: case 239: /* Line 1455 of yacc.c */ -#line 1918 "program_parse.y" +#line 1919 "program_parse.y" { (yyval.real) = (float)(((yyvsp[(1) - (2)].negate)) ? -(yyvsp[(2) - (2)].integer) : (yyvsp[(2) - (2)].integer)); ;} @@ -4491,35 +4492,35 @@ yyreduce: case 240: /* Line 1455 of yacc.c */ -#line 1923 "program_parse.y" +#line 1924 "program_parse.y" { (yyval.negate) = FALSE; ;} break; case 241: /* Line 1455 of yacc.c */ -#line 1924 "program_parse.y" +#line 1925 "program_parse.y" { (yyval.negate) = TRUE; ;} break; case 242: /* Line 1455 of yacc.c */ -#line 1925 "program_parse.y" +#line 1926 "program_parse.y" { (yyval.negate) = FALSE; ;} break; case 243: /* Line 1455 of yacc.c */ -#line 1928 "program_parse.y" +#line 1929 "program_parse.y" { (yyval.integer) = (yyvsp[(2) - (2)].integer); ;} break; case 245: /* Line 1455 of yacc.c */ -#line 1932 "program_parse.y" +#line 1933 "program_parse.y" { /* NV_fragment_program_option defines the size qualifiers in a * fairly broken way. "SHORT" or "LONG" can optionally be used @@ -4558,7 +4559,7 @@ yyreduce: case 246: /* Line 1455 of yacc.c */ -#line 1966 "program_parse.y" +#line 1967 "program_parse.y" { ;} break; @@ -4566,14 +4567,14 @@ yyreduce: case 247: /* Line 1455 of yacc.c */ -#line 1970 "program_parse.y" +#line 1971 "program_parse.y" { (yyval.integer) = (yyvsp[(1) - (1)].integer); ;} break; case 249: /* Line 1455 of yacc.c */ -#line 1974 "program_parse.y" +#line 1975 "program_parse.y" { if (!declare_variable(state, (yyvsp[(3) - (3)].string), (yyvsp[(0) - (3)].integer), & (yylsp[(3) - (3)]))) { free((yyvsp[(3) - (3)].string)); @@ -4585,7 +4586,7 @@ yyreduce: case 250: /* Line 1455 of yacc.c */ -#line 1981 "program_parse.y" +#line 1982 "program_parse.y" { if (!declare_variable(state, (yyvsp[(1) - (1)].string), (yyvsp[(0) - (1)].integer), & (yylsp[(1) - (1)]))) { free((yyvsp[(1) - (1)].string)); @@ -4597,7 +4598,7 @@ yyreduce: case 251: /* Line 1455 of yacc.c */ -#line 1990 "program_parse.y" +#line 1991 "program_parse.y" { struct asm_symbol *const s = declare_variable(state, (yyvsp[(3) - (5)].string), at_output, & (yylsp[(3) - (5)])); @@ -4614,7 +4615,7 @@ yyreduce: case 252: /* Line 1455 of yacc.c */ -#line 2004 "program_parse.y" +#line 2005 "program_parse.y" { if (state->mode == ARB_vertex) { (yyval.result) = VERT_RESULT_HPOS; @@ -4628,7 +4629,7 @@ yyreduce: case 253: /* Line 1455 of yacc.c */ -#line 2013 "program_parse.y" +#line 2014 "program_parse.y" { if (state->mode == ARB_vertex) { (yyval.result) = VERT_RESULT_FOGC; @@ -4642,7 +4643,7 @@ yyreduce: case 254: /* Line 1455 of yacc.c */ -#line 2022 "program_parse.y" +#line 2023 "program_parse.y" { (yyval.result) = (yyvsp[(2) - (2)].result); ;} @@ -4651,7 +4652,7 @@ yyreduce: case 255: /* Line 1455 of yacc.c */ -#line 2026 "program_parse.y" +#line 2027 "program_parse.y" { if (state->mode == ARB_vertex) { (yyval.result) = VERT_RESULT_PSIZ; @@ -4665,7 +4666,7 @@ yyreduce: case 256: /* Line 1455 of yacc.c */ -#line 2035 "program_parse.y" +#line 2036 "program_parse.y" { if (state->mode == ARB_vertex) { (yyval.result) = VERT_RESULT_TEX0 + (yyvsp[(3) - (3)].integer); @@ -4679,7 +4680,7 @@ yyreduce: case 257: /* Line 1455 of yacc.c */ -#line 2044 "program_parse.y" +#line 2045 "program_parse.y" { if (state->mode == ARB_fragment) { (yyval.result) = FRAG_RESULT_DEPTH; @@ -4693,7 +4694,7 @@ yyreduce: case 258: /* Line 1455 of yacc.c */ -#line 2055 "program_parse.y" +#line 2056 "program_parse.y" { (yyval.result) = (yyvsp[(2) - (3)].integer) + (yyvsp[(3) - (3)].integer); ;} @@ -4702,7 +4703,7 @@ yyreduce: case 259: /* Line 1455 of yacc.c */ -#line 2061 "program_parse.y" +#line 2062 "program_parse.y" { (yyval.integer) = (state->mode == ARB_vertex) ? VERT_RESULT_COL0 @@ -4713,7 +4714,7 @@ yyreduce: case 260: /* Line 1455 of yacc.c */ -#line 2067 "program_parse.y" +#line 2068 "program_parse.y" { if (state->mode == ARB_vertex) { (yyval.integer) = VERT_RESULT_COL0; @@ -4727,7 +4728,7 @@ yyreduce: case 261: /* Line 1455 of yacc.c */ -#line 2076 "program_parse.y" +#line 2077 "program_parse.y" { if (state->mode == ARB_vertex) { (yyval.integer) = VERT_RESULT_BFC0; @@ -4741,7 +4742,7 @@ yyreduce: case 262: /* Line 1455 of yacc.c */ -#line 2087 "program_parse.y" +#line 2088 "program_parse.y" { (yyval.integer) = 0; ;} @@ -4750,7 +4751,7 @@ yyreduce: case 263: /* Line 1455 of yacc.c */ -#line 2091 "program_parse.y" +#line 2092 "program_parse.y" { if (state->mode == ARB_vertex) { (yyval.integer) = 0; @@ -4764,7 +4765,7 @@ yyreduce: case 264: /* Line 1455 of yacc.c */ -#line 2100 "program_parse.y" +#line 2101 "program_parse.y" { if (state->mode == ARB_vertex) { (yyval.integer) = 1; @@ -4778,91 +4779,91 @@ yyreduce: case 265: /* Line 1455 of yacc.c */ -#line 2110 "program_parse.y" +#line 2111 "program_parse.y" { (yyval.integer) = 0; ;} break; case 266: /* Line 1455 of yacc.c */ -#line 2111 "program_parse.y" +#line 2112 "program_parse.y" { (yyval.integer) = 0; ;} break; case 267: /* Line 1455 of yacc.c */ -#line 2112 "program_parse.y" +#line 2113 "program_parse.y" { (yyval.integer) = 1; ;} break; case 268: /* Line 1455 of yacc.c */ -#line 2115 "program_parse.y" +#line 2116 "program_parse.y" { (yyval.integer) = 0; ;} break; case 269: /* Line 1455 of yacc.c */ -#line 2116 "program_parse.y" +#line 2117 "program_parse.y" { (yyval.integer) = 0; ;} break; case 270: /* Line 1455 of yacc.c */ -#line 2117 "program_parse.y" +#line 2118 "program_parse.y" { (yyval.integer) = 1; ;} break; case 271: /* Line 1455 of yacc.c */ -#line 2120 "program_parse.y" +#line 2121 "program_parse.y" { (yyval.integer) = 0; ;} break; case 272: /* Line 1455 of yacc.c */ -#line 2121 "program_parse.y" +#line 2122 "program_parse.y" { (yyval.integer) = (yyvsp[(2) - (3)].integer); ;} break; case 273: /* Line 1455 of yacc.c */ -#line 2124 "program_parse.y" +#line 2125 "program_parse.y" { (yyval.integer) = 0; ;} break; case 274: /* Line 1455 of yacc.c */ -#line 2125 "program_parse.y" +#line 2126 "program_parse.y" { (yyval.integer) = (yyvsp[(2) - (3)].integer); ;} break; case 275: /* Line 1455 of yacc.c */ -#line 2128 "program_parse.y" +#line 2129 "program_parse.y" { (yyval.integer) = 0; ;} break; case 276: /* Line 1455 of yacc.c */ -#line 2129 "program_parse.y" +#line 2130 "program_parse.y" { (yyval.integer) = (yyvsp[(2) - (3)].integer); ;} break; case 277: /* Line 1455 of yacc.c */ -#line 2133 "program_parse.y" +#line 2134 "program_parse.y" { if ((unsigned) (yyvsp[(1) - (1)].integer) >= state->MaxTextureCoordUnits) { yyerror(& (yylsp[(1) - (1)]), state, "invalid texture coordinate unit selector"); @@ -4876,7 +4877,7 @@ yyreduce: case 278: /* Line 1455 of yacc.c */ -#line 2144 "program_parse.y" +#line 2145 "program_parse.y" { if ((unsigned) (yyvsp[(1) - (1)].integer) >= state->MaxTextureImageUnits) { yyerror(& (yylsp[(1) - (1)]), state, "invalid texture image unit selector"); @@ -4890,7 +4891,7 @@ yyreduce: case 279: /* Line 1455 of yacc.c */ -#line 2155 "program_parse.y" +#line 2156 "program_parse.y" { if ((unsigned) (yyvsp[(1) - (1)].integer) >= state->MaxTextureUnits) { yyerror(& (yylsp[(1) - (1)]), state, "invalid texture unit selector"); @@ -4904,7 +4905,7 @@ yyreduce: case 280: /* Line 1455 of yacc.c */ -#line 2166 "program_parse.y" +#line 2167 "program_parse.y" { struct asm_symbol *exist = (struct asm_symbol *) _mesa_symbol_table_find_symbol(state->st, 0, (yyvsp[(2) - (4)].string)); @@ -4933,7 +4934,7 @@ yyreduce: /* Line 1455 of yacc.c */ -#line 4937 "program_parse.tab.c" +#line 4938 "program_parse.tab.c" default: break; } YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); @@ -5152,7 +5153,7 @@ yyreturn: /* Line 1675 of yacc.c */ -#line 2195 "program_parse.y" +#line 2196 "program_parse.y" void diff --git a/src/mesa/program/program_parse.y b/src/mesa/program/program_parse.y index 861927c744c..fb6ef85a9fc 100644 --- a/src/mesa/program/program_parse.y +++ b/src/mesa/program/program_parse.y @@ -835,6 +835,7 @@ srcReg: USED_IDENTIFIER /* temporaryReg | progParamSingle */ $$.Base.File = $1->param_binding_type; if ($3.Base.RelAddr) { + state->prog->IndirectRegisterFiles |= (1 << $$.Base.File); $1->param_accessed_indirectly = 1; $$.Base.RelAddr = 1; @@ -934,7 +935,7 @@ addrRegRelOffset: { $$ = 0; } addrRegPosOffset: INTEGER { - if (($1 < 0) || ($1 > 63)) { + if (($1 < 0) || ($1 > 4095)) { char s[100]; _mesa_snprintf(s, sizeof(s), "relative address offset too large (%d)", $1); @@ -948,7 +949,7 @@ addrRegPosOffset: INTEGER addrRegNegOffset: INTEGER { - if (($1 < 0) || ($1 > 64)) { + if (($1 < 0) || ($1 > 4096)) { char s[100]; _mesa_snprintf(s, sizeof(s), "relative address offset too large (%d)", $1); diff --git a/src/mesa/program/programopt.h b/src/mesa/program/programopt.h index 21fac07849a..4af6357f976 100644 --- a/src/mesa/program/programopt.h +++ b/src/mesa/program/programopt.h @@ -26,6 +26,7 @@ #ifndef PROGRAMOPT_H #define PROGRAMOPT_H 1 +#include "main/mtypes.h" extern void _mesa_insert_mvp_code(GLcontext *ctx, struct gl_vertex_program *vprog); diff --git a/src/mesa/slang/library/slang_common_builtin.gc b/src/mesa/slang/library/slang_common_builtin.gc index d75354deffe..1f5ddbc1ee2 100644 --- a/src/mesa/slang/library/slang_common_builtin.gc +++ b/src/mesa/slang/library/slang_common_builtin.gc @@ -411,7 +411,7 @@ float atan(const float y, const float x) if (abs(x) > 1.0e-4) { r = atan(y / x); if (x < 0.0) { - r = r + sign(y) * 3.141593; + r = r + 3.141593 - 6.283186 * float(y < 0.0); } } else { diff --git a/src/mesa/slang/slang_builtin.h b/src/mesa/slang/slang_builtin.h index ed9ae80b3c3..dc92f83f8ef 100644 --- a/src/mesa/slang/slang_builtin.h +++ b/src/mesa/slang/slang_builtin.h @@ -26,8 +26,8 @@ #ifndef SLANG_BUILTIN_H #define SLANG_BUILTIN_H -#include "program/prog_parameter.h" -#include "slang_utility.h" +#include "main/glheader.h" +#include "main/mtypes.h" #include "slang_ir.h" diff --git a/src/mesa/slang/slang_codegen.h b/src/mesa/slang/slang_codegen.h index 461633fe346..ff0279bbfed 100644 --- a/src/mesa/slang/slang_codegen.h +++ b/src/mesa/slang/slang_codegen.h @@ -27,9 +27,13 @@ #define SLANG_CODEGEN_H -#include "main/imports.h" +#include "main/glheader.h" #include "slang_compile.h" +#include "slang_compile_variable.h" +#include "slang_typeinfo.h" +#include "slang_utility.h" +struct slang_function_; #define MAX_LOOP_DEPTH 30 diff --git a/src/mesa/slang/slang_compile.c b/src/mesa/slang/slang_compile.c index 12ab4666aed..de1bb56cd9a 100644 --- a/src/mesa/slang/slang_compile.c +++ b/src/mesa/slang/slang_compile.c @@ -36,6 +36,7 @@ #include "program/prog_print.h" #include "program/prog_parameter.h" #include "../../glsl/pp/sl_pp_public.h" +#include "../../glsl/pp/sl_pp_purify.h" #include "../../glsl/cl/sl_cl_parse.h" #include "slang_codegen.h" #include "slang_compile.h" diff --git a/src/mesa/slang/slang_compile.h b/src/mesa/slang/slang_compile.h index 71fcaa39931..6061f878e75 100644 --- a/src/mesa/slang/slang_compile.h +++ b/src/mesa/slang/slang_compile.h @@ -25,13 +25,14 @@ #if !defined SLANG_COMPILE_H #define SLANG_COMPILE_H -#include "main/imports.h" +#include "main/glheader.h" #include "main/mtypes.h" -#include "slang_typeinfo.h" -#include "slang_compile_variable.h" -#include "slang_compile_struct.h" -#include "slang_compile_operation.h" #include "slang_compile_function.h" +#include "slang_compile_struct.h" +#include "slang_compile_variable.h" +#include "slang_utility.h" + +struct slang_code_object_; #if defined __cplusplus extern "C" { diff --git a/src/mesa/slang/slang_compile_function.h b/src/mesa/slang/slang_compile_function.h index a5445ec2537..0eced3ca1a1 100644 --- a/src/mesa/slang/slang_compile_function.h +++ b/src/mesa/slang/slang_compile_function.h @@ -25,6 +25,14 @@ #ifndef SLANG_COMPILE_FUNCTION_H #define SLANG_COMPILE_FUNCTION_H +#include "main/glheader.h" +#include "slang_compile_operation.h" +#include "slang_compile_variable.h" +#include "slang_log.h" +#include "slang_utility.h" + +struct slang_name_space_; +struct slang_operation_; /** * Types of functions. diff --git a/src/mesa/slang/slang_compile_operation.h b/src/mesa/slang/slang_compile_operation.h index 1f15c198963..b8c5f214cf0 100644 --- a/src/mesa/slang/slang_compile_operation.h +++ b/src/mesa/slang/slang_compile_operation.h @@ -26,6 +26,10 @@ #define SLANG_COMPILE_OPERATION_H +#include "main/glheader.h" +#include "slang_compile_variable.h" +#include "slang_utility.h" + /** * Types of slang operations. * These are the types of the AST (abstract syntax tree) nodes. diff --git a/src/mesa/slang/slang_compile_struct.h b/src/mesa/slang/slang_compile_struct.h index 90c5512f4d3..7be6f204e11 100644 --- a/src/mesa/slang/slang_compile_struct.h +++ b/src/mesa/slang/slang_compile_struct.h @@ -29,6 +29,9 @@ extern "C" { #endif +#include "main/glheader.h" +#include "slang_utility.h" + struct slang_function_; typedef struct slang_struct_scope_ diff --git a/src/mesa/slang/slang_compile_variable.h b/src/mesa/slang/slang_compile_variable.h index 5c9d248b354..48dc6efca4b 100644 --- a/src/mesa/slang/slang_compile_variable.h +++ b/src/mesa/slang/slang_compile_variable.h @@ -26,7 +26,9 @@ #define SLANG_COMPILE_VARIABLE_H -struct slang_ir_storage_; +#include "main/glheader.h" +#include "slang_typeinfo.h" +#include "slang_utility.h" /** diff --git a/src/mesa/slang/slang_emit.h b/src/mesa/slang/slang_emit.h index ab4c202d673..f93d6b00d69 100644 --- a/src/mesa/slang/slang_emit.h +++ b/src/mesa/slang/slang_emit.h @@ -25,11 +25,9 @@ #ifndef SLANG_EMIT_H #define SLANG_EMIT_H - -#include "main/imports.h" -#include "slang_compile.h" +#include "main/glheader.h" #include "slang_ir.h" -#include "main/mtypes.h" +#include "slang_vartable.h" extern GLuint diff --git a/src/mesa/slang/slang_ir.h b/src/mesa/slang/slang_ir.h index b7a373746b4..ce9a6c5a483 100644 --- a/src/mesa/slang/slang_ir.h +++ b/src/mesa/slang/slang_ir.h @@ -37,6 +37,7 @@ #include "slang_compile.h" #include "slang_label.h" #include "main/mtypes.h" +#include "program/prog_instruction.h" /** diff --git a/src/mesa/slang/slang_label.c b/src/mesa/slang/slang_label.c index 8e3a8ebc1aa..24881d5b6e6 100644 --- a/src/mesa/slang/slang_label.c +++ b/src/mesa/slang/slang_label.c @@ -7,6 +7,8 @@ */ +#include "main/mtypes.h" +#include "program/prog_instruction.h" #include "slang_label.h" #include "slang_mem.h" diff --git a/src/mesa/slang/slang_label.h b/src/mesa/slang/slang_label.h index 4d04df18d25..b0cff3a8e89 100644 --- a/src/mesa/slang/slang_label.h +++ b/src/mesa/slang/slang_label.h @@ -1,10 +1,9 @@ #ifndef SLANG_LABEL_H #define SLANG_LABEL_H 1 -#include "main/imports.h" -#include "main/mtypes.h" -#include "program/prog_instruction.h" +#include "main/glheader.h" +struct gl_program; struct slang_label_ { diff --git a/src/mesa/slang/slang_link.c b/src/mesa/slang/slang_link.c index 00c2c13cc67..c21f67256a5 100644 --- a/src/mesa/slang/slang_link.c +++ b/src/mesa/slang/slang_link.c @@ -756,6 +756,8 @@ _slang_update_inputs_outputs(struct gl_program *prog) prog->InputsRead = 0x0; prog->OutputsWritten = 0x0; + prog->IndirectRegisterFiles = 0x0; + for (i = 0; i < prog->NumInstructions; i++) { const struct prog_instruction *inst = prog->Instructions + i; const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); @@ -774,6 +776,9 @@ _slang_update_inputs_outputs(struct gl_program *prog) else if (inst->SrcReg[j].File == PROGRAM_ADDRESS) { maxAddrReg = MAX2(maxAddrReg, (GLuint) (inst->SrcReg[j].Index + 1)); } + + if (inst->SrcReg[j].RelAddr) + prog->IndirectRegisterFiles |= (1 << inst->SrcReg[j].File); } if (inst->DstReg.File == PROGRAM_OUTPUT) { @@ -784,6 +789,8 @@ _slang_update_inputs_outputs(struct gl_program *prog) else if (inst->DstReg.File == PROGRAM_ADDRESS) { maxAddrReg = MAX2(maxAddrReg, inst->DstReg.Index + 1); } + if (inst->DstReg.RelAddr) + prog->IndirectRegisterFiles |= (1 << inst->DstReg.File); } prog->NumAddressRegs = maxAddrReg; } @@ -1199,11 +1206,11 @@ _slang_link(GLcontext *ctx, vertNotify = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB, &shProg->FragmentProgram->Base); if (ctx->Shader.Flags & GLSL_DUMP) { - printf("Mesa pre-link fragment program:\n"); + fprintf(stderr, "Mesa pre-link fragment program:\n"); _mesa_print_program(&fragProg->Base); _mesa_print_program_parameters(ctx, &fragProg->Base); - printf("Mesa post-link fragment program:\n"); + fprintf(stderr, "Mesa post-link fragment program:\n"); _mesa_print_program(&shProg->FragmentProgram->Base); _mesa_print_program_parameters(ctx, &shProg->FragmentProgram->Base); } @@ -1222,11 +1229,11 @@ _slang_link(GLcontext *ctx, geomNotify = ctx->Driver.ProgramStringNotify(ctx, MESA_GEOMETRY_PROGRAM, &shProg->GeometryProgram->Base); if (ctx->Shader.Flags & GLSL_DUMP) { - printf("Mesa pre-link geometry program:\n"); + fprintf(stderr, "Mesa pre-link geometry program:\n"); _mesa_print_program(&geomProg->Base); _mesa_print_program_parameters(ctx, &geomProg->Base); - printf("Mesa post-link geometry program:\n"); + fprintf(stderr, "Mesa post-link geometry program:\n"); _mesa_print_program(&shProg->GeometryProgram->Base); _mesa_print_program_parameters(ctx, &shProg->GeometryProgram->Base); } @@ -1240,11 +1247,11 @@ _slang_link(GLcontext *ctx, fragNotify = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB, &shProg->VertexProgram->Base); if (ctx->Shader.Flags & GLSL_DUMP) { - printf("Mesa pre-link vertex program:\n"); + fprintf(stderr, "Mesa pre-link vertex program:\n"); _mesa_print_program(&vertProg->Base); _mesa_print_program_parameters(ctx, &vertProg->Base); - printf("Mesa post-link vertex program:\n"); + fprintf(stderr, "Mesa post-link vertex program:\n"); _mesa_print_program(&shProg->VertexProgram->Base); _mesa_print_program_parameters(ctx, &shProg->VertexProgram->Base); } @@ -1259,10 +1266,10 @@ _slang_link(GLcontext *ctx, } if (ctx->Shader.Flags & GLSL_DUMP) { - printf("Varying vars:\n"); + fprintf(stderr, "Varying vars:\n"); _mesa_print_parameter_list(shProg->Varying); if (shProg->InfoLog) { - printf("Info Log: %s\n", shProg->InfoLog); + fprintf(stderr, "Info Log: %s\n", shProg->InfoLog); } } diff --git a/src/mesa/slang/slang_link.h b/src/mesa/slang/slang_link.h index 2b44d20787a..3e9fa2d743d 100644 --- a/src/mesa/slang/slang_link.h +++ b/src/mesa/slang/slang_link.h @@ -25,7 +25,7 @@ #ifndef SLANG_LINK_H #define SLANG_LINK_H 1 -#include "slang_compile.h" +#include "main/mtypes.h" extern void diff --git a/src/mesa/slang/slang_log.h b/src/mesa/slang/slang_log.h index dcaba0285a7..544a26654e7 100644 --- a/src/mesa/slang/slang_log.h +++ b/src/mesa/slang/slang_log.h @@ -27,6 +27,8 @@ #define SLANG_LOG_H +#include "main/glheader.h" + typedef struct slang_info_log_ { char *text; diff --git a/src/mesa/slang/slang_print.h b/src/mesa/slang/slang_print.h index 46605c80610..99da3041437 100644 --- a/src/mesa/slang/slang_print.h +++ b/src/mesa/slang/slang_print.h @@ -3,6 +3,12 @@ #ifndef SLANG_PRINT #define SLANG_PRINT +#include "main/glheader.h" +#include "slang_compile_function.h" +#include "slang_compile_operation.h" +#include "slang_compile_variable.h" +#include "slang_typeinfo.h" + extern void slang_print_function(const slang_function *f, GLboolean body); diff --git a/src/mesa/slang/slang_simplify.h b/src/mesa/slang/slang_simplify.h index 8689c23b1a0..37fb938d4fb 100644 --- a/src/mesa/slang/slang_simplify.h +++ b/src/mesa/slang/slang_simplify.h @@ -26,6 +26,13 @@ #define SLANG_SIMPLIFY_H +#include "main/glheader.h" +#include "slang_compile.h" +#include "slang_compile_function.h" +#include "slang_compile_operation.h" +#include "slang_log.h" +#include "slang_utility.h" + extern GLint _slang_lookup_constant(const char *name); diff --git a/src/mesa/slang/slang_utility.h b/src/mesa/slang/slang_utility.h index 2c0d0bcbb2a..cb9b6d2aaaa 100644 --- a/src/mesa/slang/slang_utility.h +++ b/src/mesa/slang/slang_utility.h @@ -26,6 +26,8 @@ #define SLANG_UTILITY_H +#include "main/glheader.h" + /* Compile-time assertions. If the expression is zero, try to declare an * array of size [-1] to cause compilation error. */ diff --git a/src/mesa/slang/slang_vartable.h b/src/mesa/slang/slang_vartable.h index 94bcd63f45a..97945b89d03 100644 --- a/src/mesa/slang/slang_vartable.h +++ b/src/mesa/slang/slang_vartable.h @@ -2,6 +2,9 @@ #ifndef SLANG_VARTABLE_H #define SLANG_VARTABLE_H +#include "main/glheader.h" +#include "slang_utility.h" + struct slang_ir_storage_; typedef struct slang_var_table_ slang_var_table; diff --git a/src/mesa/state_tracker/st_atom.h b/src/mesa/state_tracker/st_atom.h index 1f0fef63df5..c7a04951bff 100644 --- a/src/mesa/state_tracker/st_atom.h +++ b/src/mesa/state_tracker/st_atom.h @@ -34,6 +34,8 @@ #ifndef ST_ATOM_H #define ST_ATOM_H +#include "main/glheader.h" + struct st_context; struct st_tracked_state; diff --git a/src/mesa/state_tracker/st_atom_constbuf.h b/src/mesa/state_tracker/st_atom_constbuf.h index f707534e2cf..97b076629ee 100644 --- a/src/mesa/state_tracker/st_atom_constbuf.h +++ b/src/mesa/state_tracker/st_atom_constbuf.h @@ -29,6 +29,9 @@ #ifndef ST_ATOM_CONSTBUF_H #define ST_ATOM_CONSTBUF_H +struct gl_program_parameter_list; +struct st_context; + void st_upload_constants( struct st_context *st, struct gl_program_parameter_list *params, diff --git a/src/mesa/state_tracker/st_atom_depth.c b/src/mesa/state_tracker/st_atom_depth.c index 3c07afba9aa..1616e945fea 100644 --- a/src/mesa/state_tracker/st_atom_depth.c +++ b/src/mesa/state_tracker/st_atom_depth.c @@ -33,6 +33,8 @@ */ +#include <assert.h> + #include "st_context.h" #include "st_atom.h" #include "pipe/p_context.h" diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c index b88c74fa03a..8a8d17599ec 100644 --- a/src/mesa/state_tracker/st_atom_pixeltransfer.c +++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c @@ -37,6 +37,7 @@ #include "main/image.h" #include "main/macros.h" #include "program/program.h" +#include "program/prog_cache.h" #include "program/prog_instruction.h" #include "program/prog_parameter.h" #include "program/prog_print.h" diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index cebaad5f000..05442ef91b5 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -40,7 +40,6 @@ #include "program/program.h" #include "pipe/p_context.h" -#include "pipe/p_shader_tokens.h" #include "util/u_simple_shaders.h" diff --git a/src/mesa/state_tracker/st_atom_shader.h b/src/mesa/state_tracker/st_atom_shader.h index 8403bc66c92..56d4c68f4f7 100644 --- a/src/mesa/state_tracker/st_atom_shader.h +++ b/src/mesa/state_tracker/st_atom_shader.h @@ -30,6 +30,9 @@ #define ST_ATOM_SHADER_H +struct st_context; +struct translated_vertex_program; + extern void st_free_translated_vertex_programs(struct st_context *st, struct translated_vertex_program *xvp); diff --git a/src/mesa/state_tracker/st_atom_stipple.c b/src/mesa/state_tracker/st_atom_stipple.c index 31e124b3293..ecdd9f06f6a 100644 --- a/src/mesa/state_tracker/st_atom_stipple.c +++ b/src/mesa/state_tracker/st_atom_stipple.c @@ -33,6 +33,8 @@ */ +#include <assert.h> + #include "st_context.h" #include "st_atom.h" #include "pipe/p_context.h" diff --git a/src/mesa/state_tracker/st_cache.h b/src/mesa/state_tracker/st_cache.h index b81de316ec9..6d5de7b13ad 100644 --- a/src/mesa/state_tracker/st_cache.h +++ b/src/mesa/state_tracker/st_cache.h @@ -33,10 +33,11 @@ #ifndef ST_CACHE_H #define ST_CACHE_H -#include "cso_cache/cso_cache.h" - struct pipe_blend_state; +struct pipe_depth_stencil_alpha_state; +struct pipe_rasterizer_state; struct pipe_sampler_state; +struct pipe_shader_state; struct st_context; diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index ba600ccef6d..0b8ecd27cb9 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -46,6 +46,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" #include "util/u_inlines.h" #include "util/u_draw_quad.h" #include "util/u_simple_shaders.h" diff --git a/src/mesa/state_tracker/st_cb_bitmap.h b/src/mesa/state_tracker/st_cb_bitmap.h index 8af975b74fc..d04b2b67795 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.h +++ b/src/mesa/state_tracker/st_cb_bitmap.h @@ -30,7 +30,10 @@ #define ST_CB_BITMAP_H -#include "main/mtypes.h" +#include "main/compiler.h" + +struct dd_function_table; +struct st_context; #if FEATURE_drawpix diff --git a/src/mesa/state_tracker/st_cb_blit.h b/src/mesa/state_tracker/st_cb_blit.h index 7ab9a54df90..c230652cefc 100644 --- a/src/mesa/state_tracker/st_cb_blit.h +++ b/src/mesa/state_tracker/st_cb_blit.h @@ -29,8 +29,10 @@ #define ST_CB_BLIT_H -#include "main/mtypes.h" -#include "st_context.h" +#include "main/compiler.h" + +struct dd_function_table; +struct st_context; extern void diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.h b/src/mesa/state_tracker/st_cb_bufferobjects.h index a27daac2bf0..1c991d20837 100644 --- a/src/mesa/state_tracker/st_cb_bufferobjects.h +++ b/src/mesa/state_tracker/st_cb_bufferobjects.h @@ -28,9 +28,12 @@ #ifndef ST_CB_BUFFEROBJECTS_H #define ST_CB_BUFFEROBJECTS_H -struct st_context; -struct gl_buffer_object; +#include "main/compiler.h" +#include "main/mtypes.h" + +struct dd_function_table; struct pipe_resource; +struct st_context; /** * State_tracker vertex/pixel buffer object, derived from Mesa's diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index ea2414c4a00..246ab2e9579 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -45,6 +45,7 @@ #include "st_program.h" #include "pipe/p_context.h" +#include "pipe/p_shader_tokens.h" #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "util/u_format.h" diff --git a/src/mesa/state_tracker/st_cb_clear.h b/src/mesa/state_tracker/st_cb_clear.h index bc035ac25ca..b27c09d10e4 100644 --- a/src/mesa/state_tracker/st_cb_clear.h +++ b/src/mesa/state_tracker/st_cb_clear.h @@ -30,6 +30,9 @@ #define ST_CB_CLEAR_H +struct dd_function_table; +struct st_context; + extern void st_init_clear(struct st_context *st); diff --git a/src/mesa/state_tracker/st_cb_condrender.h b/src/mesa/state_tracker/st_cb_condrender.h index 891f1cbcd8c..79d0db8d08a 100644 --- a/src/mesa/state_tracker/st_cb_condrender.h +++ b/src/mesa/state_tracker/st_cb_condrender.h @@ -29,6 +29,8 @@ #define ST_CB_CONDRENDER_H +struct dd_function_table; + extern void st_init_cond_render_functions(struct dd_function_table *functions); diff --git a/src/mesa/state_tracker/st_cb_drawpixels.h b/src/mesa/state_tracker/st_cb_drawpixels.h index 7d5e901ccc5..575f169e08e 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.h +++ b/src/mesa/state_tracker/st_cb_drawpixels.h @@ -30,7 +30,10 @@ #define ST_CB_DRAWPIXELS_H -#include "main/mtypes.h" +#include "main/compiler.h" + +struct dd_function_table; +struct st_context; #if FEATURE_drawpix diff --git a/src/mesa/state_tracker/st_cb_drawtex.c b/src/mesa/state_tracker/st_cb_drawtex.c index b191a7f8902..c99a8d792ed 100644 --- a/src/mesa/state_tracker/st_cb_drawtex.c +++ b/src/mesa/state_tracker/st_cb_drawtex.c @@ -14,7 +14,6 @@ #include "main/imports.h" #include "main/image.h" -#include "main/bufferobj.h" #include "main/macros.h" #include "program/program.h" #include "program/prog_print.h" diff --git a/src/mesa/state_tracker/st_cb_drawtex.h b/src/mesa/state_tracker/st_cb_drawtex.h index a3f54a349cc..d21262f8977 100644 --- a/src/mesa/state_tracker/st_cb_drawtex.h +++ b/src/mesa/state_tracker/st_cb_drawtex.h @@ -10,7 +10,10 @@ #define ST_CB_DRAWTEX_H -#include "main/mtypes.h" +#include "main/compiler.h" + +struct dd_function_table; +struct st_context; #if FEATURE_OES_draw_texture diff --git a/src/mesa/state_tracker/st_cb_eglimage.c b/src/mesa/state_tracker/st_cb_eglimage.c index 4aaf91d5a19..037e576fabe 100644 --- a/src/mesa/state_tracker/st_cb_eglimage.c +++ b/src/mesa/state_tracker/st_cb_eglimage.c @@ -33,6 +33,7 @@ #include "util/u_format.h" #include "st_cb_eglimage.h" #include "st_cb_fbo.h" +#include "st_context.h" #include "st_texture.h" #include "st_format.h" #include "st_manager.h" diff --git a/src/mesa/state_tracker/st_cb_eglimage.h b/src/mesa/state_tracker/st_cb_eglimage.h index d6953e99f69..b6e44d5aff5 100644 --- a/src/mesa/state_tracker/st_cb_eglimage.h +++ b/src/mesa/state_tracker/st_cb_eglimage.h @@ -29,8 +29,9 @@ #ifndef ST_CB_EGLIMAGE_H #define ST_CB_EGLIMAGE_H -#include "main/mtypes.h" -#include "main/dd.h" +#include "main/compiler.h" + +struct dd_function_table; #if FEATURE_OES_EGL_image diff --git a/src/mesa/state_tracker/st_cb_fbo.h b/src/mesa/state_tracker/st_cb_fbo.h index 43b6c1e75f4..62a9bbcb25f 100644 --- a/src/mesa/state_tracker/st_cb_fbo.h +++ b/src/mesa/state_tracker/st_cb_fbo.h @@ -29,6 +29,15 @@ #ifndef ST_CB_FBO_H #define ST_CB_FBO_H +#include "main/compiler.h" +#include "main/glheader.h" +#include "main/mtypes.h" + +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" + +struct dd_function_table; +struct pipe_context; /** * Derived renderbuffer class. Just need to add a pointer to the diff --git a/src/mesa/state_tracker/st_cb_feedback.h b/src/mesa/state_tracker/st_cb_feedback.h index 706d84960f7..f2342f58238 100644 --- a/src/mesa/state_tracker/st_cb_feedback.h +++ b/src/mesa/state_tracker/st_cb_feedback.h @@ -30,7 +30,9 @@ #define ST_CB_FEEDBACK_H -#include "main/mtypes.h" +#include "main/compiler.h" + +struct dd_function_table; #if FEATURE_feedback diff --git a/src/mesa/state_tracker/st_cb_flush.h b/src/mesa/state_tracker/st_cb_flush.h index 7fca0176a30..7672b4cf1da 100644 --- a/src/mesa/state_tracker/st_cb_flush.h +++ b/src/mesa/state_tracker/st_cb_flush.h @@ -30,6 +30,12 @@ #define ST_CB_FLUSH_H +#include "pipe/p_compiler.h" + +struct dd_function_table; +struct pipe_fence_handle; +struct st_context; + extern void st_init_flush_functions(struct dd_function_table *functions); diff --git a/src/mesa/state_tracker/st_cb_program.h b/src/mesa/state_tracker/st_cb_program.h index 0de96f2fd22..0fd179ef3df 100644 --- a/src/mesa/state_tracker/st_cb_program.h +++ b/src/mesa/state_tracker/st_cb_program.h @@ -29,6 +29,10 @@ #define ST_CB_PROGRAM_H +#include "main/mtypes.h" + +struct dd_function_table; + extern void st_init_program_functions(struct dd_function_table *functions); diff --git a/src/mesa/state_tracker/st_cb_rasterpos.h b/src/mesa/state_tracker/st_cb_rasterpos.h index d2ed7297f15..2dc109bb184 100644 --- a/src/mesa/state_tracker/st_cb_rasterpos.h +++ b/src/mesa/state_tracker/st_cb_rasterpos.h @@ -29,7 +29,9 @@ #define ST_CB_RASTERPOS_H -#include "main/mtypes.h" +#include "main/compiler.h" + +struct dd_function_table; #if FEATURE_rastpos diff --git a/src/mesa/state_tracker/st_cb_readpixels.h b/src/mesa/state_tracker/st_cb_readpixels.h index c90ef029062..9e1f7b4925e 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.h +++ b/src/mesa/state_tracker/st_cb_readpixels.h @@ -29,6 +29,10 @@ #ifndef ST_CB_READPIXELS_H #define ST_CB_READPIXELS_H +#include "main/mtypes.h" + +struct dd_function_table; + extern struct st_renderbuffer * st_get_color_read_renderbuffer(GLcontext *ctx); diff --git a/src/mesa/state_tracker/st_cb_strings.h b/src/mesa/state_tracker/st_cb_strings.h index 3b765aaa592..92d5d2d9ba7 100644 --- a/src/mesa/state_tracker/st_cb_strings.h +++ b/src/mesa/state_tracker/st_cb_strings.h @@ -30,6 +30,8 @@ #define ST_CB_STRINGS_H +struct dd_function_table; + extern void st_init_string_functions(struct dd_function_table *functions); diff --git a/src/mesa/state_tracker/st_cb_texture.h b/src/mesa/state_tracker/st_cb_texture.h index 1cd9fc3a50f..6942478e815 100644 --- a/src/mesa/state_tracker/st_cb_texture.h +++ b/src/mesa/state_tracker/st_cb_texture.h @@ -30,6 +30,13 @@ #define ST_CB_TEXTURE_H +#include "main/glheader.h" +#include "main/mtypes.h" + +struct dd_function_table; +struct pipe_context; +struct st_context; + extern GLboolean st_finalize_texture(GLcontext *ctx, struct pipe_context *pipe, diff --git a/src/mesa/state_tracker/st_cb_viewport.h b/src/mesa/state_tracker/st_cb_viewport.h index db7dd6eab82..bcfd7cb68af 100644 --- a/src/mesa/state_tracker/st_cb_viewport.h +++ b/src/mesa/state_tracker/st_cb_viewport.h @@ -25,5 +25,12 @@ * **************************************************************************/ +#ifndef ST_CB_VIEWPORT_H +#define ST_CB_VIEWPORT_H + +struct dd_function_table; + extern void st_init_viewport_functions(struct dd_function_table *functions); + +#endif /* ST_CB_VIEW_PORT_H */ diff --git a/src/mesa/state_tracker/st_cb_xformfb.h b/src/mesa/state_tracker/st_cb_xformfb.h index 50efcb9293f..574cf481e18 100644 --- a/src/mesa/state_tracker/st_cb_xformfb.h +++ b/src/mesa/state_tracker/st_cb_xformfb.h @@ -29,6 +29,10 @@ #define ST_CB_XFORMFB_H +#include "main/compiler.h" + +struct dd_function_table; + #if FEATURE_EXT_transform_feedback extern void diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 7eb5f32611d..2ce5f087536 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -28,6 +28,7 @@ #include "main/imports.h" #include "main/context.h" #include "main/shaderobj.h" +#include "program/prog_cache.h" #include "vbo/vbo.h" #include "glapi/glapi.h" #include "st_context.h" @@ -62,6 +63,9 @@ #include "cso_cache/cso_context.h" +DEBUG_GET_ONCE_BOOL_OPTION(mesa_mvp_dp4, "MESA_MVP_DP4", FALSE) + + /** * Called via ctx->Driver.UpdateState() */ @@ -169,7 +173,7 @@ struct st_context *st_create_context(gl_api api, struct pipe_context *pipe, /* XXX: need a capability bit in gallium to query if the pipe * driver prefers DP4 or MUL/MAD for vertex transformation. */ - if (debug_get_bool_option("MESA_MVP_DP4", FALSE)) + if (debug_get_option_mesa_mvp_dp4()) _mesa_set_mvp_with_dp4( ctx, GL_TRUE ); return st_create_context_priv(ctx, pipe); diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index a147a021176..60c25fb8f00 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -1,3 +1,4 @@ +//struct dd_function_table; /************************************************************************** * * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. @@ -29,21 +30,17 @@ #define ST_CONTEXT_H #include "main/mtypes.h" -#include "program/prog_cache.h" #include "pipe/p_state.h" #include "state_tracker/st_api.h" - -struct st_context; -struct st_texture_object; -struct st_fragment_program; +struct bitmap_cache; +struct blit_state; +struct dd_function_table; struct draw_context; struct draw_stage; -struct cso_cache; -struct cso_blend; struct gen_mipmap_state; -struct blit_state; -struct bitmap_cache; +struct st_context; +struct st_fragment_program; #define ST_NEW_MESA 0x1 /* Mesa state has changed */ diff --git a/src/mesa/state_tracker/st_debug.c b/src/mesa/state_tracker/st_debug.c index ebf6ec6e7e2..df32491d044 100644 --- a/src/mesa/state_tracker/st_debug.c +++ b/src/mesa/state_tracker/st_debug.c @@ -55,6 +55,8 @@ static const struct debug_named_value st_debug_flags[] = { { "query", DEBUG_QUERY, NULL }, DEBUG_NAMED_VALUE_END }; + +DEBUG_GET_ONCE_FLAGS_OPTION(st_debug, "ST_DEBUG", st_debug_flags, 0) #endif @@ -62,7 +64,7 @@ void st_debug_init(void) { #ifdef DEBUG - ST_DEBUG = debug_get_flags_option("ST_DEBUG", st_debug_flags, 0 ); + ST_DEBUG = debug_get_option_st_debug(); #endif } diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 5821da4889d..5b054892702 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -58,6 +58,7 @@ #include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_prim.h" +#include "util/u_draw_quad.h" #include "draw/draw_context.h" #include "cso_cache/cso_context.h" @@ -494,6 +495,49 @@ setup_non_interleaved_attribs(GLcontext *ctx, } +static void +setup_index_buffer(GLcontext *ctx, + const struct _mesa_index_buffer *ib, + struct pipe_index_buffer *ibuffer) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + + memset(ibuffer, 0, sizeof(*ibuffer)); + if (ib) { + struct gl_buffer_object *bufobj = ib->obj; + + switch (ib->type) { + case GL_UNSIGNED_INT: + ibuffer->index_size = 4; + break; + case GL_UNSIGNED_SHORT: + ibuffer->index_size = 2; + break; + case GL_UNSIGNED_BYTE: + ibuffer->index_size = 1; + break; + default: + assert(0); + return; + } + + /* get/create the index buffer object */ + if (bufobj && bufobj->Name) { + /* elements/indexes are in a real VBO */ + struct st_buffer_object *stobj = st_buffer_object(bufobj); + pipe_resource_reference(&ibuffer->buffer, stobj->buffer); + ibuffer->offset = pointer_to_offset(ib->ptr); + } + else { + /* element/indicies are in user space memory */ + ibuffer->buffer = + pipe_user_buffer_create(pipe->screen, (void *) ib->ptr, + ib->count * ibuffer->index_size, + PIPE_BIND_INDEX_BUFFER); + } + } +} /** * Prior to drawing, check that any uniforms referenced by the @@ -568,8 +612,11 @@ st_draw_vbo(GLcontext *ctx, GLuint attr; struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; unsigned num_vbuffers, num_velements; + struct pipe_index_buffer ibuffer; GLboolean userSpace = GL_FALSE; GLboolean vertDataEdgeFlags; + struct pipe_draw_info info; + unsigned i; /* Mesa core state should have been validated already */ assert(ctx->NewState == 0x0); @@ -647,113 +694,35 @@ st_draw_vbo(GLcontext *ctx, if (num_vbuffers == 0 || num_velements == 0) return; - /* do actual drawing */ - if (ib) { - /* indexed primitive */ - struct gl_buffer_object *bufobj = ib->obj; - struct pipe_resource *indexBuf = NULL; - unsigned indexSize, indexOffset, i; + setup_index_buffer(ctx, ib, &ibuffer); + pipe->set_index_buffer(pipe, &ibuffer); - switch (ib->type) { - case GL_UNSIGNED_INT: - indexSize = 4; - break; - case GL_UNSIGNED_SHORT: - indexSize = 2; - break; - case GL_UNSIGNED_BYTE: - indexSize = 1; - break; - default: - assert(0); - return; - } - - /* get/create the index buffer object */ - if (bufobj && bufobj->Name) { - /* elements/indexes are in a real VBO */ - struct st_buffer_object *stobj = st_buffer_object(bufobj); - pipe_resource_reference(&indexBuf, stobj->buffer); - indexOffset = pointer_to_offset(ib->ptr) / indexSize; - } - else { - /* element/indicies are in user space memory */ - indexBuf = pipe_user_buffer_create(pipe->screen, (void *) ib->ptr, - ib->count * indexSize, - PIPE_BIND_INDEX_BUFFER); - indexOffset = 0; + util_draw_init_info(&info); + if (ib) { + info.indexed = TRUE; + if (min_index != ~0 && max_index != ~0) { + info.min_index = min_index; + info.max_index = max_index; } + } - /* draw */ - if (pipe->draw_range_elements && min_index != ~0 && max_index != ~0) { - /* XXX: exercise temporary path to pass min/max directly - * through to driver & draw module. These interfaces still - * need a bit of work... - */ - for (i = 0; i < nr_prims; i++) { - unsigned vcount = prims[i].count; - unsigned prim = translate_prim(ctx, prims[i].mode); - - if (u_trim_pipe_prim(prims[i].mode, &vcount)) { - pipe->draw_range_elements(pipe, indexBuf, indexSize, - prims[i].basevertex, - min_index, max_index, prim, - prims[i].start + indexOffset, vcount); - } - } - } - else { - for (i = 0; i < nr_prims; i++) { - unsigned vcount = prims[i].count; - unsigned prim = translate_prim(ctx, prims[i].mode); - - if (u_trim_pipe_prim(prims[i].mode, &vcount)) { - if (prims[i].num_instances == 1) { - pipe->draw_elements(pipe, indexBuf, - indexSize, - prims[i].basevertex, - prim, - prims[i].start + indexOffset, - vcount); - } - else { - pipe->draw_elements_instanced(pipe, indexBuf, - indexSize, - prims[i].basevertex, - prim, - prims[i].start + indexOffset, - vcount, - 0, /* startInstance */ - prims[i].num_instances); - } - } - } + /* do actual drawing */ + for (i = 0; i < nr_prims; i++) { + info.mode = translate_prim( ctx, prims[i].mode ); + info.start = prims[i].start; + info.count = prims[i].count; + info.instance_count = prims[i].num_instances; + info.index_bias = prims[i].basevertex; + if (!ib) { + info.min_index = info.start; + info.max_index = info.start + info.count - 1; } - pipe_resource_reference(&indexBuf, NULL); + if (u_trim_pipe_prim(info.mode, &info.count)) + pipe->draw_vbo(pipe, &info); } - else { - /* non-indexed */ - GLuint i; - - for (i = 0; i < nr_prims; i++) { - unsigned vcount = prims[i].count; - unsigned prim = translate_prim(ctx, prims[i].mode); - if (u_trim_pipe_prim(prims[i].mode, &vcount)) { - if (prims[i].num_instances == 1) { - pipe->draw_arrays(pipe, prim, prims[i].start, vcount); - } - else { - pipe->draw_arrays_instanced(pipe, prim, - prims[i].start, - vcount, - 0, /* startInstance */ - prims[i].num_instances); - } - } - } - } + pipe_resource_reference(&ibuffer.buffer, NULL); /* unreference buffers (frees wrapped user-space buffer objects) */ for (attr = 0; attr < num_vbuffers; attr++) { diff --git a/src/mesa/state_tracker/st_draw.h b/src/mesa/state_tracker/st_draw.h index 3e0face656b..f36184487a6 100644 --- a/src/mesa/state_tracker/st_draw.h +++ b/src/mesa/state_tracker/st_draw.h @@ -34,8 +34,13 @@ #ifndef ST_DRAW_H #define ST_DRAW_H -struct _mesa_prim; +#include "main/compiler.h" +#include "main/glheader.h" +#include "main/mtypes.h" + struct _mesa_index_buffer; +struct _mesa_prim; +struct st_context; void st_init_draw( struct st_context *st ); diff --git a/src/mesa/state_tracker/st_extensions.h b/src/mesa/state_tracker/st_extensions.h index 2994f16dd33..aa9b2b2b914 100644 --- a/src/mesa/state_tracker/st_extensions.h +++ b/src/mesa/state_tracker/st_extensions.h @@ -30,6 +30,8 @@ #define ST_EXTENSIONS_H +struct st_context; + extern void st_init_limits(struct st_context *st); extern void st_init_extensions(struct st_context *st); diff --git a/src/mesa/state_tracker/st_format.h b/src/mesa/state_tracker/st_format.h index 29768f296d6..841c58cadc8 100644 --- a/src/mesa/state_tracker/st_format.h +++ b/src/mesa/state_tracker/st_format.h @@ -31,7 +31,12 @@ #define ST_FORMAT_H #include "main/formats.h" +#include "main/mtypes.h" +#include "pipe/p_defines.h" +#include "pipe/p_format.h" + +struct pipe_screen; extern GLenum st_format_datatype(enum pipe_format format); diff --git a/src/mesa/state_tracker/st_gen_mipmap.h b/src/mesa/state_tracker/st_gen_mipmap.h index 00fbae93026..016bf3f4bba 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.h +++ b/src/mesa/state_tracker/st_gen_mipmap.h @@ -30,6 +30,10 @@ #define ST_GEN_MIPMAP_H +#include "main/mtypes.h" + +struct st_context; + extern void st_init_generate_mipmap(struct st_context *st); diff --git a/src/mesa/state_tracker/st_gl_api.h b/src/mesa/state_tracker/st_gl_api.h index fe1aec207ea..57c6d9f24d2 100644 --- a/src/mesa/state_tracker/st_gl_api.h +++ b/src/mesa/state_tracker/st_gl_api.h @@ -2,8 +2,6 @@ #ifndef ST_GL_API_H #define ST_GL_API_H -#include "state_tracker/st_api.h" - struct st_api *st_gl_api_create(void); struct st_api *st_gl_api_create_es1(void); struct st_api *st_gl_api_create_es2(void); diff --git a/src/mesa/state_tracker/st_manager.h b/src/mesa/state_tracker/st_manager.h index cd2887b1e0f..48a9d4d99a6 100644 --- a/src/mesa/state_tracker/st_manager.h +++ b/src/mesa/state_tracker/st_manager.h @@ -29,8 +29,11 @@ #ifndef ST_MANAGER_H #define ST_MANAGER_H -#include "state_tracker/st_api.h" -#include "st_context.h" +#include "main/mtypes.h" + +#include "pipe/p_compiler.h" + +struct st_context; struct pipe_surface * st_manager_get_egl_image_surface(struct st_context *st, diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index bacd091853b..a19dcc92534 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -44,6 +44,15 @@ #include "util/u_math.h" #include "util/u_memory.h" + +#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ + (1 << PROGRAM_ENV_PARAM) | \ + (1 << PROGRAM_STATE_VAR) | \ + (1 << PROGRAM_NAMED_PARAM) | \ + (1 << PROGRAM_CONSTANT) | \ + (1 << PROGRAM_UNIFORM)) + + struct label { unsigned branch_target; unsigned token; @@ -205,7 +214,7 @@ src_register( struct st_translate *t, return ureg_src_undef(); case PROGRAM_TEMPORARY: - ASSERT(index >= 0); + assert(index >= 0); if (ureg_dst_is_undef(t->temps[index])) t->temps[index] = ureg_DECL_temporary( t->ureg ); assert(index < Elements(t->temps)); @@ -215,7 +224,7 @@ src_register( struct st_translate *t, case PROGRAM_ENV_PARAM: case PROGRAM_LOCAL_PARAM: case PROGRAM_UNIFORM: - ASSERT(index >= 0); + assert(index >= 0); return t->constants[index]; case PROGRAM_STATE_VAR: case PROGRAM_CONSTANT: /* ie, immediate */ @@ -738,9 +747,11 @@ emit_adjusted_wpos( struct st_translate *t, struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; - ureg_ADD(ureg, - ureg_writemask(wpos_temp, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y), - wpos_input, ureg_imm1f(ureg, value)); + /* Note that we bias X and Y and pass Z and W through unchanged. + * The shader might also use gl_FragCoord.w and .z. + */ + ureg_ADD(ureg, wpos_temp, wpos_input, + ureg_imm4f(ureg, value, value, 0.0f, 0.0f)); t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); } @@ -1057,6 +1068,16 @@ st_translate_mesa_program( t->address[0] = ureg_DECL_address( ureg ); } + if (program->IndirectRegisterFiles & (1 << PROGRAM_TEMPORARY)) { + /* If temps are accessed with indirect addressing, declare temporaries + * in sequential order. Else, we declare them on demand elsewhere. + */ + for (i = 0; i < program->NumTemporaries; i++) { + /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ + t->temps[i] = ureg_DECL_temporary( t->ureg ); + } + } + /* Emit constants and immediates. Mesa uses a single index space * for these, so we put all the translated regs in t->constants. */ @@ -1067,7 +1088,7 @@ st_translate_mesa_program( ret = PIPE_ERROR_OUT_OF_MEMORY; goto out; } - + for (i = 0; i < program->Parameters->NumParameters; i++) { switch (program->Parameters->Parameters[i].Type) { case PROGRAM_ENV_PARAM: @@ -1078,13 +1099,14 @@ st_translate_mesa_program( t->constants[i] = ureg_DECL_constant( ureg, i ); break; - /* Emit immediates only when there is no address register - * in use. FIXME: Be smarter and recognize param arrays: + /* Emit immediates only when there's no indirect addressing of + * the const buffer. + * FIXME: Be smarter and recognize param arrays: * indirect addressing is only valid within the referenced * array. */ case PROGRAM_CONSTANT: - if (program->NumAddressRegs > 0) + if (program->IndirectRegisterFiles & PROGRAM_ANY_CONST) t->constants[i] = ureg_DECL_constant( ureg, i ); else t->constants[i] = diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.h b/src/mesa/state_tracker/st_mesa_to_tgsi.h index e3c5bd1d94d..ca076ce3622 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.h +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.h @@ -30,8 +30,10 @@ #define ST_MESA_TO_TGSI_H #include "main/mtypes.h" -#include "tgsi/tgsi_ureg.h" +#include "pipe/p_compiler.h" + +struct ureg_program; #if defined __cplusplus extern "C" { diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 6f3ecdbce11..91528c227b2 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -41,6 +41,7 @@ #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_ureg.h" #include "st_debug.h" #include "st_context.h" diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index d779d5a6dde..3805b9a725e 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -36,11 +36,8 @@ #include "main/mtypes.h" #include "program/program.h" -#include "pipe/p_shader_tokens.h" - - -struct cso_fragment_shader; -struct cso_vertex_shader; +#include "pipe/p_state.h" +#include "st_context.h" /** diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index dbdf1ea1ad0..add6e949dfb 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -25,14 +25,14 @@ * **************************************************************************/ +#include <stdio.h> + #include "st_context.h" #include "st_format.h" #include "st_texture.h" #include "st_cb_fbo.h" #include "main/enums.h" -#undef Elements /* fix re-defined macro warning */ - #include "pipe/p_state.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" diff --git a/src/mesa/swrast/s_aaline.h b/src/mesa/swrast/s_aaline.h index f1d708ec801..922eb230e51 100644 --- a/src/mesa/swrast/s_aaline.h +++ b/src/mesa/swrast/s_aaline.h @@ -28,7 +28,7 @@ #define S_AALINE_H -#include "swrast.h" +#include "main/mtypes.h" extern void diff --git a/src/mesa/swrast/s_aatriangle.h b/src/mesa/swrast/s_aatriangle.h index 4b57fa73a27..9aed41a1915 100644 --- a/src/mesa/swrast/s_aatriangle.h +++ b/src/mesa/swrast/s_aatriangle.h @@ -28,7 +28,7 @@ #define S_AATRIANGLE_H -#include "swrast.h" +#include "main/mtypes.h" extern void diff --git a/src/mesa/swrast/s_alpha.h b/src/mesa/swrast/s_alpha.h index 7a5b72e650a..239484a9743 100644 --- a/src/mesa/swrast/s_alpha.h +++ b/src/mesa/swrast/s_alpha.h @@ -28,7 +28,8 @@ #define S_ALPHA_H -#include "s_context.h" +#include "main/mtypes.h" +#include "s_span.h" extern GLint diff --git a/src/mesa/swrast/s_atifragshader.c b/src/mesa/swrast/s_atifragshader.c index fa280e72e40..1338b6802d4 100644 --- a/src/mesa/swrast/s_atifragshader.c +++ b/src/mesa/swrast/s_atifragshader.c @@ -21,10 +21,10 @@ #include "main/glheader.h" #include "main/colormac.h" -#include "main/context.h" #include "main/macros.h" #include "main/atifragshader.h" #include "swrast/s_atifragshader.h" +#include "swrast/s_context.h" /** diff --git a/src/mesa/swrast/s_atifragshader.h b/src/mesa/swrast/s_atifragshader.h index 871a0c04559..cce455a0465 100644 --- a/src/mesa/swrast/s_atifragshader.h +++ b/src/mesa/swrast/s_atifragshader.h @@ -27,7 +27,8 @@ #define S_ATIFRAGSHADER_H -#include "s_context.h" +#include "main/mtypes.h" +#include "s_span.h" extern void diff --git a/src/mesa/swrast/s_blend.h b/src/mesa/swrast/s_blend.h index 8d5a81635d5..9cedde3bf20 100644 --- a/src/mesa/swrast/s_blend.h +++ b/src/mesa/swrast/s_blend.h @@ -27,7 +27,8 @@ #define S_BLEND_H -#include "s_context.h" +#include "main/mtypes.h" +#include "s_span.h" extern void diff --git a/src/mesa/swrast/s_context.c b/src/mesa/swrast/s_context.c index 6d2d17c61d9..d8d8a80b7d7 100644 --- a/src/mesa/swrast/s_context.c +++ b/src/mesa/swrast/s_context.c @@ -28,7 +28,6 @@ #include "main/imports.h" #include "main/bufferobj.h" -#include "main/context.h" #include "main/colormac.h" #include "main/mtypes.h" #include "main/teximage.h" diff --git a/src/mesa/swrast/s_context.h b/src/mesa/swrast/s_context.h index c9755e6da18..6d81f74768f 100644 --- a/src/mesa/swrast/s_context.h +++ b/src/mesa/swrast/s_context.h @@ -43,6 +43,7 @@ #ifndef S_CONTEXT_H #define S_CONTEXT_H +#include "main/compiler.h" #include "main/mtypes.h" #include "program/prog_execute.h" #include "swrast.h" diff --git a/src/mesa/swrast/s_depth.c b/src/mesa/swrast/s_depth.c index ed637cac124..f952fd6baa7 100644 --- a/src/mesa/swrast/s_depth.c +++ b/src/mesa/swrast/s_depth.c @@ -30,7 +30,6 @@ #include "main/imports.h" #include "s_depth.h" -#include "s_context.h" #include "s_span.h" diff --git a/src/mesa/swrast/s_depth.h b/src/mesa/swrast/s_depth.h index 7eae3667428..878d242f5e5 100644 --- a/src/mesa/swrast/s_depth.h +++ b/src/mesa/swrast/s_depth.h @@ -27,7 +27,8 @@ #define S_DEPTH_H -#include "s_context.h" +#include "main/mtypes.h" +#include "s_span.h" extern GLuint diff --git a/src/mesa/swrast/s_feedback.c b/src/mesa/swrast/s_feedback.c index 373b1416e28..6ac8ac73b0b 100644 --- a/src/mesa/swrast/s_feedback.c +++ b/src/mesa/swrast/s_feedback.c @@ -24,7 +24,6 @@ #include "main/glheader.h" #include "main/colormac.h" -#include "main/context.h" #include "main/feedback.h" #include "main/macros.h" diff --git a/src/mesa/swrast/s_fog.c b/src/mesa/swrast/s_fog.c index 3fc84392133..689500a613a 100644 --- a/src/mesa/swrast/s_fog.c +++ b/src/mesa/swrast/s_fog.c @@ -25,7 +25,6 @@ #include "main/glheader.h" #include "main/colormac.h" -#include "main/context.h" #include "main/macros.h" #include "s_context.h" diff --git a/src/mesa/swrast/s_fog.h b/src/mesa/swrast/s_fog.h index 06107de3f9d..a496746d106 100644 --- a/src/mesa/swrast/s_fog.h +++ b/src/mesa/swrast/s_fog.h @@ -28,7 +28,8 @@ #define S_FOG_H -#include "swrast.h" +#include "main/mtypes.h" +#include "s_span.h" extern GLfloat diff --git a/src/mesa/swrast/s_fragprog.c b/src/mesa/swrast/s_fragprog.c index 413f136cd59..9facb44d9bf 100644 --- a/src/mesa/swrast/s_fragprog.c +++ b/src/mesa/swrast/s_fragprog.c @@ -24,9 +24,9 @@ #include "main/glheader.h" #include "main/colormac.h" -#include "main/context.h" #include "program/prog_instruction.h" +#include "s_context.h" #include "s_fragprog.h" #include "s_span.h" diff --git a/src/mesa/swrast/s_fragprog.h b/src/mesa/swrast/s_fragprog.h index e1b7e679185..92b9d01e173 100644 --- a/src/mesa/swrast/s_fragprog.h +++ b/src/mesa/swrast/s_fragprog.h @@ -27,7 +27,8 @@ #define S_FRAGPROG_H -#include "s_context.h" +#include "main/mtypes.h" +#include "s_span.h" extern void diff --git a/src/mesa/swrast/s_logic.h b/src/mesa/swrast/s_logic.h index e8cfae33f23..d609513348d 100644 --- a/src/mesa/swrast/s_logic.h +++ b/src/mesa/swrast/s_logic.h @@ -27,7 +27,8 @@ #define S_LOGIC_H -#include "swrast.h" +#include "main/mtypes.h" +#include "s_span.h" extern void _swrast_logicop_rgba_span(GLcontext *ctx, struct gl_renderbuffer *rb, diff --git a/src/mesa/swrast/s_masking.h b/src/mesa/swrast/s_masking.h index 3ba4f8356cb..cb000da0fd8 100644 --- a/src/mesa/swrast/s_masking.h +++ b/src/mesa/swrast/s_masking.h @@ -27,7 +27,8 @@ #define S_MASKING_H -#include "swrast.h" +#include "main/mtypes.h" +#include "s_span.h" extern void diff --git a/src/mesa/swrast/s_points.c b/src/mesa/swrast/s_points.c index 1663ece8294..12431662c47 100644 --- a/src/mesa/swrast/s_points.c +++ b/src/mesa/swrast/s_points.c @@ -25,7 +25,6 @@ #include "main/glheader.h" #include "main/colormac.h" -#include "main/context.h" #include "main/macros.h" #include "s_context.h" #include "s_feedback.h" diff --git a/src/mesa/swrast/s_readpix.c b/src/mesa/swrast/s_readpix.c index 6ad9aceec77..553fd9a76d8 100644 --- a/src/mesa/swrast/s_readpix.c +++ b/src/mesa/swrast/s_readpix.c @@ -27,7 +27,6 @@ #include "main/bufferobj.h" #include "main/colormac.h" #include "main/convolve.h" -#include "main/context.h" #include "main/feedback.h" #include "main/formats.h" #include "main/image.h" diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c index 687c8eb0bf8..8931cdec1bc 100644 --- a/src/mesa/swrast/s_span.c +++ b/src/mesa/swrast/s_span.c @@ -33,7 +33,6 @@ #include "main/glheader.h" #include "main/colormac.h" -#include "main/context.h" #include "main/macros.h" #include "main/imports.h" #include "main/image.h" @@ -971,6 +970,10 @@ shade_texture_span(GLcontext *ctx, SWspan *span) if (span->primitive == GL_BITMAP && span->array->ChanType != GL_FLOAT) { convert_color_type(span, GL_FLOAT, 0); } + else { + span->array->rgba = (void *) span->array->attribs[FRAG_ATTRIB_COL0]; + } + if (span->primitive != GL_POINT || (span->interpMask & SPAN_RGBA) || ctx->Point.PointSprite) { @@ -1222,9 +1225,22 @@ _swrast_write_rgba_span( GLcontext *ctx, SWspan *span) GLchan rgbaSave[MAX_WIDTH][4]; const GLuint fragOutput = multiFragOutputs ? buf : 0; + /* set span->array->rgba to colors for render buffer's datatype */ if (rb->DataType != span->array->ChanType || fragOutput > 0) { convert_color_type(span, rb->DataType, fragOutput); } + else { + if (rb->DataType == GL_UNSIGNED_BYTE) { + span->array->rgba = span->array->rgba8; + } + else if (rb->DataType == GL_UNSIGNED_SHORT) { + span->array->rgba = (void *) span->array->rgba16; + } + else { + span->array->rgba = (void *) + span->array->attribs[FRAG_ATTRIB_COL0]; + } + } if (!multiFragOutputs && numBuffers > 1) { /* save colors for second, third renderbuffer writes */ diff --git a/src/mesa/swrast/s_stencil.h b/src/mesa/swrast/s_stencil.h index cd6cbc57b0b..c076ebbe2a1 100644 --- a/src/mesa/swrast/s_stencil.h +++ b/src/mesa/swrast/s_stencil.h @@ -27,7 +27,8 @@ #define S_STENCIL_H -#include "swrast.h" +#include "main/mtypes.h" +#include "s_span.h" diff --git a/src/mesa/swrast/s_texcombine.h b/src/mesa/swrast/s_texcombine.h index 9ed96efb879..4f5dfbe1afe 100644 --- a/src/mesa/swrast/s_texcombine.h +++ b/src/mesa/swrast/s_texcombine.h @@ -27,7 +27,8 @@ #define S_TEXCOMBINE_H -#include "swrast.h" +#include "main/mtypes.h" +#include "s_span.h" extern void _swrast_texture_span( GLcontext *ctx, SWspan *span ); diff --git a/src/mesa/swrast/s_texfilter.h b/src/mesa/swrast/s_texfilter.h index 2e265d685c5..eceab59658e 100644 --- a/src/mesa/swrast/s_texfilter.h +++ b/src/mesa/swrast/s_texfilter.h @@ -27,7 +27,8 @@ #define S_TEXFILTER_H -#include "swrast.h" +#include "main/mtypes.h" +#include "s_context.h" extern texture_sample_func diff --git a/src/mesa/swrast/s_zoom.h b/src/mesa/swrast/s_zoom.h index 43917be65fc..09f624efad5 100644 --- a/src/mesa/swrast/s_zoom.h +++ b/src/mesa/swrast/s_zoom.h @@ -25,7 +25,8 @@ #ifndef S_ZOOM_H #define S_ZOOM_H -#include "swrast.h" +#include "main/mtypes.h" +#include "s_span.h" extern void diff --git a/src/mesa/swrast_setup/ss_context.h b/src/mesa/swrast_setup/ss_context.h index 1ec293fade1..56551ab273c 100644 --- a/src/mesa/swrast_setup/ss_context.h +++ b/src/mesa/swrast_setup/ss_context.h @@ -28,9 +28,8 @@ #ifndef SS_CONTEXT_H #define SS_CONTEXT_H -#include "main/mtypes.h" +#include "main/glheader.h" #include "swrast/swrast.h" -#include "swrast_setup.h" #include "tnl/t_context.h" typedef struct { diff --git a/src/mesa/swrast_setup/ss_triangle.h b/src/mesa/swrast_setup/ss_triangle.h index 007fa2e9141..ac553cbd018 100644 --- a/src/mesa/swrast_setup/ss_triangle.h +++ b/src/mesa/swrast_setup/ss_triangle.h @@ -29,7 +29,7 @@ #ifndef SS_TRIANGLE_H #define SS_TRIANGLE_H -#include "ss_context.h" +#include "main/mtypes.h" void _swsetup_trifuncs_init( GLcontext *ctx ); diff --git a/src/mesa/swrast_setup/ss_vb.h b/src/mesa/swrast_setup/ss_vb.h index 2ad1f56f396..944a3b78d8c 100644 --- a/src/mesa/swrast_setup/ss_vb.h +++ b/src/mesa/swrast_setup/ss_vb.h @@ -30,7 +30,6 @@ #define SS_VB_H #include "main/mtypes.h" -#include "swrast_setup.h" void _swsetup_vb_init( GLcontext *ctx ); void _swsetup_choose_rastersetup_func( GLcontext *ctx ); diff --git a/src/mesa/tnl/t_context.h b/src/mesa/tnl/t_context.h index ebaae6335b9..258906f7956 100644 --- a/src/mesa/tnl/t_context.h +++ b/src/mesa/tnl/t_context.h @@ -53,9 +53,7 @@ #include "main/bitset.h" #include "main/mtypes.h" -#include "math/m_matrix.h" #include "math/m_vector.h" -#include "math/m_xform.h" #include "vbo/vbo.h" diff --git a/src/mesa/tnl/t_rasterpos.c b/src/mesa/tnl/t_rasterpos.c index 3596d162b23..d82d5b50736 100644 --- a/src/mesa/tnl/t_rasterpos.c +++ b/src/mesa/tnl/t_rasterpos.c @@ -25,7 +25,6 @@ #include "main/glheader.h" #include "main/colormac.h" -#include "main/context.h" #include "main/feedback.h" #include "main/light.h" #include "main/macros.h" diff --git a/src/mesa/tnl/t_vb_cull.c b/src/mesa/tnl/t_vb_cull.c index 712901acf30..22df7166735 100644 --- a/src/mesa/tnl/t_vb_cull.c +++ b/src/mesa/tnl/t_vb_cull.c @@ -28,7 +28,6 @@ #include "main/glheader.h" #include "main/colormac.h" -#include "main/context.h" #include "main/macros.h" #include "main/imports.h" #include "main/mtypes.h" diff --git a/src/mesa/tnl/t_vb_fog.c b/src/mesa/tnl/t_vb_fog.c index 4a0e6ad4f99..9faae24ec6d 100644 --- a/src/mesa/tnl/t_vb_fog.c +++ b/src/mesa/tnl/t_vb_fog.c @@ -28,7 +28,6 @@ #include "main/glheader.h" #include "main/colormac.h" -#include "main/context.h" #include "main/macros.h" #include "main/imports.h" #include "main/mtypes.h" diff --git a/src/mesa/tnl/t_vb_normals.c b/src/mesa/tnl/t_vb_normals.c index 61ac4095733..c2aa655674c 100644 --- a/src/mesa/tnl/t_vb_normals.c +++ b/src/mesa/tnl/t_vb_normals.c @@ -28,7 +28,6 @@ #include "main/glheader.h" #include "main/colormac.h" -#include "main/context.h" #include "main/macros.h" #include "main/imports.h" #include "main/mtypes.h" diff --git a/src/mesa/tnl/t_vb_program.c b/src/mesa/tnl/t_vb_program.c index 614c67d05eb..f3a338ef1ed 100644 --- a/src/mesa/tnl/t_vb_program.c +++ b/src/mesa/tnl/t_vb_program.c @@ -33,9 +33,9 @@ #include "main/glheader.h" #include "main/colormac.h" -#include "main/context.h" #include "main/macros.h" #include "main/imports.h" +#include "math/m_xform.h" #include "program/prog_instruction.h" #include "program/prog_statevars.h" #include "program/prog_execute.h" diff --git a/src/mesa/tnl/t_vb_render.c b/src/mesa/tnl/t_vb_render.c index c1bebc99423..7d991009a14 100644 --- a/src/mesa/tnl/t_vb_render.c +++ b/src/mesa/tnl/t_vb_render.c @@ -44,6 +44,7 @@ #include "main/macros.h" #include "main/imports.h" #include "main/mtypes.h" +#include "math/m_xform.h" #include "t_pipeline.h" diff --git a/src/mesa/tnl/t_vb_texgen.c b/src/mesa/tnl/t_vb_texgen.c index 9ef13bc96d8..950e0f54e9f 100644 --- a/src/mesa/tnl/t_vb_texgen.c +++ b/src/mesa/tnl/t_vb_texgen.c @@ -37,7 +37,6 @@ #include "main/glheader.h" #include "main/colormac.h" -#include "main/context.h" #include "main/macros.h" #include "main/imports.h" #include "main/mtypes.h" diff --git a/src/mesa/tnl/t_vb_texmat.c b/src/mesa/tnl/t_vb_texmat.c index 83688290e59..985d137e5cc 100644 --- a/src/mesa/tnl/t_vb_texmat.c +++ b/src/mesa/tnl/t_vb_texmat.c @@ -28,7 +28,6 @@ #include "main/glheader.h" #include "main/colormac.h" -#include "main/context.h" #include "main/macros.h" #include "main/imports.h" #include "main/mtypes.h" diff --git a/src/mesa/tnl/t_vb_vertex.c b/src/mesa/tnl/t_vb_vertex.c index a2753425633..453479227b7 100644 --- a/src/mesa/tnl/t_vb_vertex.c +++ b/src/mesa/tnl/t_vb_vertex.c @@ -28,7 +28,6 @@ #include "main/glheader.h" #include "main/colormac.h" -#include "main/context.h" #include "main/macros.h" #include "main/imports.h" #include "main/mtypes.h" diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c index 045af46da8d..84ae1b87f93 100644 --- a/src/mesa/vbo/vbo_exec_draw.c +++ b/src/mesa/vbo/vbo_exec_draw.c @@ -27,7 +27,7 @@ #include "main/glheader.h" #include "main/bufferobj.h" -#include "main/context.h" +#include "main/compiler.h" #include "main/enums.h" #include "main/state.h" diff --git a/src/mesa/vf/vf.h b/src/mesa/vf/vf.h index 83d7547619c..5fe392bbe51 100644 --- a/src/mesa/vf/vf.h +++ b/src/mesa/vf/vf.h @@ -28,7 +28,7 @@ #ifndef VF_VERTEX_H #define VF_VERTEX_H -#include "main/mtypes.h" +#include "main/glheader.h" #include "math/m_vector.h" enum { diff --git a/src/mesa/vf/vf_generic.c b/src/mesa/vf/vf_generic.c index 0af8893c302..95a317e99db 100644 --- a/src/mesa/vf/vf_generic.c +++ b/src/mesa/vf/vf_generic.c @@ -29,6 +29,7 @@ #include "main/glheader.h" #include "main/context.h" #include "main/colormac.h" +#include "main/macros.h" #include "main/simple_list.h" #include "vf/vf.h" |