summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri
diff options
context:
space:
mode:
authorChristoph Bumiller <[email protected]>2010-08-18 14:37:47 +0200
committerChristoph Bumiller <[email protected]>2010-08-18 14:37:47 +0200
commit3e54d63429fe7ca5db3c75c181abbaf7a7f55724 (patch)
treee129c36aaef712525f0a04fc5b06c445e3cf84df /src/mesa/drivers/dri
parenteaab76457818fad0926b84c663440e8987e1f19f (diff)
parent85d9bc236d6a8ff8f12cbc2150f8c3740354f573 (diff)
Merge remote branch 'origin/master' into nv50-compiler
Diffstat (limited to 'src/mesa/drivers/dri')
-rw-r--r--src/mesa/drivers/dri/common/dri_metaops.c1
-rw-r--r--src/mesa/drivers/dri/i810/i810render.c2
-rw-r--r--src/mesa/drivers/dri/i915/Makefile2
-rw-r--r--src/mesa/drivers/dri/i915/intel_render.c2
-rw-r--r--src/mesa/drivers/dri/i965/Makefile2
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_util.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_optimize.c588
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h124
-rw-r--r--src/mesa/drivers/dri/i965/brw_util.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c14
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.c30
-rw-r--r--src/mesa/drivers/dri/intel/intel_extensions_es2.c1
-rw-r--r--src/mesa/drivers/dri/intel/intel_pixel_bitmap.c1
-rw-r--r--src/mesa/drivers/dri/intel/intel_regions.c6
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_copy.c2
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_format.c36
-rw-r--r--src/mesa/drivers/dri/intel/server/i830_dri.h62
-rw-r--r--src/mesa/drivers/dri/intel/server/intel.h331
-rw-r--r--src/mesa/drivers/dri/mach64/mach64_ioctl.h3
-rw-r--r--src/mesa/drivers/dri/mga/mgarender.c2
-rw-r--r--src/mesa/drivers/dri/nouveau/nouveau_fbo.c7
-rw-r--r--src/mesa/drivers/dri/nouveau/nouveau_texture.c57
-rw-r--r--src/mesa/drivers/dri/nouveau/nv20_state_fb.c5
-rw-r--r--src/mesa/drivers/dri/nouveau/nv20_state_tex.c3
-rw-r--r--src/mesa/drivers/dri/r200/r200_swtcl.c2
-rw-r--r--src/mesa/drivers/dri/r200/r200_tcl.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c8
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c268
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c26
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.c26
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.h2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c128
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.h22
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.c43
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.h5
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c39
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c260
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h10
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c3
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c38
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c23
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.c2
-rw-r--r--src/mesa/drivers/dri/r300/r300_reg.h21
-rw-r--r--src/mesa/drivers/dri/r300/r300_render.c2
-rw-r--r--src/mesa/drivers/dri/r300/r300_texstate.c4
-rw-r--r--src/mesa/drivers/dri/r600/r600_blit.c33
-rw-r--r--src/mesa/drivers/dri/r600/r600_context.c4
-rw-r--r--src/mesa/drivers/dri/r600/r600_tex.c2
-rw-r--r--src/mesa/drivers/dri/r600/r600_texstate.c10
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.c149
-rw-r--r--src/mesa/drivers/dri/r600/r700_chip.c57
-rw-r--r--src/mesa/drivers/dri/r600/r700_clear.c3
-rw-r--r--src/mesa/drivers/dri/r600/r700_render.c65
-rw-r--r--src/mesa/drivers/dri/r600/r700_vertprog.c6
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_chipset.h2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common.c15
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common_context.c58
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common_context.h8
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_pixel_read.c3
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_screen.c74
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_screen.h7
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_span.c192
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_swtcl.c5
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_tcl.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_tex_copy.c6
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texture.c2
-rw-r--r--src/mesa/drivers/dri/savage/savagerender.c2
-rw-r--r--src/mesa/drivers/dri/unichrome/via_render.c2
74 files changed, 2114 insertions, 828 deletions
diff --git a/src/mesa/drivers/dri/common/dri_metaops.c b/src/mesa/drivers/dri/common/dri_metaops.c
index 86e59a8e51c..a2f404b616f 100644
--- a/src/mesa/drivers/dri/common/dri_metaops.c
+++ b/src/mesa/drivers/dri/common/dri_metaops.c
@@ -29,6 +29,7 @@
#include "main/arbprogram.h"
#include "main/arrayobj.h"
#include "main/bufferobj.h"
+#include "main/context.h"
#include "main/enable.h"
#include "main/matrix.h"
#include "main/texstate.h"
diff --git a/src/mesa/drivers/dri/i810/i810render.c b/src/mesa/drivers/dri/i810/i810render.c
index b543d4f012c..205f0cebc1c 100644
--- a/src/mesa/drivers/dri/i810/i810render.c
+++ b/src/mesa/drivers/dri/i810/i810render.c
@@ -37,6 +37,8 @@
#include "main/imports.h"
#include "main/mtypes.h"
+#include "math/m_xform.h"
+
#include "tnl/t_context.h"
#include "i810screen.h"
diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile
index 71ee753748c..65fd658c047 100644
--- a/src/mesa/drivers/dri/i915/Makefile
+++ b/src/mesa/drivers/dri/i915/Makefile
@@ -56,7 +56,7 @@ C_SOURCES = \
ASM_SOURCES =
-DRIVER_DEFINES = -I../intel -I../intel/server -DI915 \
+DRIVER_DEFINES = -I../intel -DI915 \
$(shell pkg-config libdrm --atleast-version=2.3.1 \
&& echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP")
diff --git a/src/mesa/drivers/dri/i915/intel_render.c b/src/mesa/drivers/dri/i915/intel_render.c
index ec209391ab4..add0adacb56 100644
--- a/src/mesa/drivers/dri/i915/intel_render.c
+++ b/src/mesa/drivers/dri/i915/intel_render.c
@@ -37,6 +37,8 @@
#include "main/mtypes.h"
#include "main/enums.h"
+#include "math/m_xform.h"
+
#include "tnl/t_context.h"
#include "tnl/t_vertex.h"
#include "tnl/t_pipeline.h"
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index 831981558d8..e381a5c714b 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -106,7 +106,7 @@ C_SOURCES = \
ASM_SOURCES =
-DRIVER_DEFINES = -I../intel -I../intel/server
+DRIVER_DEFINES = -I../intel
INCLUDES += $(INTEL_CFLAGS)
DRI_LIB_DEPS += $(INTEL_LIBS)
diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c
index a74bbc25643..d2ac1235e46 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_util.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_util.c
@@ -192,11 +192,6 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
brw_clip_project_vertex(c, dest_ptr );
}
-
-
-
-#define MAX_MRF 16
-
void brw_clip_emit_vue(struct brw_clip_compile *c,
struct brw_indirect vert,
GLboolean allocate,
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 6b20a2979f8..f7a68cead7c 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -604,6 +604,8 @@
#define BRW_ARF_NOTIFICATION_COUNT 0x90
#define BRW_ARF_IP 0xA0
+#define BRW_MRF_COMPR4 (1 << 7)
+
#define BRW_AMASK 0
#define BRW_IMASK 1
#define BRW_LMASK 2
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 31ff86cf731..ffdddd0a388 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -984,5 +984,7 @@ void brw_set_src1( struct brw_instruction *insn,
/* brw_optimize.c */
void brw_optimize(struct brw_compile *p);
+void brw_remove_duplicate_mrf_moves(struct brw_compile *p);
+void brw_remove_grf_to_mrf_moves(struct brw_compile *p);
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_optimize.c b/src/mesa/drivers/dri/i965/brw_optimize.c
index a364b158209..8aa6fb6cc6f 100644
--- a/src/mesa/drivers/dri/i965/brw_optimize.c
+++ b/src/mesa/drivers/dri/i965/brw_optimize.c
@@ -32,6 +32,594 @@
#include "brw_defines.h"
#include "brw_eu.h"
+static const struct {
+ char *name;
+ int nsrc;
+ int ndst;
+ GLboolean is_arith;
+} inst_opcode[128] = {
+ [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
+
+ [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
+};
+
+static INLINE
+GLboolean brw_is_arithmetic_inst(const struct brw_instruction *inst)
+{
+ return inst_opcode[inst->header.opcode].is_arith;
+}
+
+static const GLuint inst_stride[7] = {
+ [0] = 0,
+ [1] = 1,
+ [2] = 2,
+ [3] = 4,
+ [4] = 8,
+ [5] = 16,
+ [6] = 32
+};
+
+static const GLuint inst_type_size[8] = {
+ [BRW_REGISTER_TYPE_UD] = 4,
+ [BRW_REGISTER_TYPE_D] = 4,
+ [BRW_REGISTER_TYPE_UW] = 2,
+ [BRW_REGISTER_TYPE_W] = 2,
+ [BRW_REGISTER_TYPE_UB] = 1,
+ [BRW_REGISTER_TYPE_B] = 1,
+ [BRW_REGISTER_TYPE_F] = 4
+};
+
+static INLINE GLboolean
+brw_is_grf_written(const struct brw_instruction *inst,
+ int reg_index, int size,
+ int gen)
+{
+ if (inst_opcode[inst->header.opcode].ndst == 0)
+ return GL_FALSE;
+
+ if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT)
+ if (inst->bits1.ia1.dest_reg_file == BRW_GENERAL_REGISTER_FILE)
+ return GL_TRUE;
+
+ if (inst->bits1.da1.dest_reg_file != BRW_GENERAL_REGISTER_FILE)
+ return GL_FALSE;
+
+ const int reg_start = reg_index * REG_SIZE;
+ const int reg_end = reg_start + size;
+
+ const int type_size = inst_type_size[inst->bits1.da1.dest_reg_type];
+ const int write_start = inst->bits1.da1.dest_reg_nr*REG_SIZE
+ + inst->bits1.da1.dest_subreg_nr;
+ int length, write_end;
+
+ /* SEND is specific */
+ if (inst->header.opcode == BRW_OPCODE_SEND) {
+ if (gen >= 5)
+ length = inst->bits3.generic_gen5.response_length*REG_SIZE;
+ else
+ length = inst->bits3.generic.response_length*REG_SIZE;
+ }
+ else {
+ length = 1 << inst->header.execution_size;
+ length *= type_size;
+ length *= inst->bits1.da1.dest_horiz_stride;
+ }
+
+ /* If the two intervals intersect, we overwrite the register */
+ write_end = write_start + length;
+ const int left = MAX2(write_start, reg_start);
+ const int right = MIN2(write_end, reg_end);
+
+ return left < right;
+}
+
+/* Specific path for message register since we need to handle the compr4 case */
+static INLINE GLboolean
+brw_is_mrf_written(const struct brw_instruction *inst, int reg_index, int size)
+{
+ if (inst_opcode[inst->header.opcode].ndst == 0)
+ return GL_FALSE;
+
+ if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT)
+ if (inst->bits1.ia1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE)
+ return GL_TRUE;
+
+ if (inst->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE)
+ return GL_FALSE;
+
+ const int reg_start = reg_index * REG_SIZE;
+ const int reg_end = reg_start + size;
+
+ const int mrf_index = inst->bits1.da1.dest_reg_nr & 0x0f;
+ const int is_compr4 = inst->bits1.da1.dest_reg_nr & BRW_MRF_COMPR4;
+ const int type_size = inst_type_size[inst->bits1.da1.dest_reg_type];
+
+ /* We use compr4 with a size != 16 elements. Strange, we conservatively
+ * consider that we are writing the register.
+ */
+ if (is_compr4 && inst->header.execution_size != BRW_EXECUTE_16)
+ return GL_TRUE;
+
+ GLboolean is_written = GL_FALSE;
+
+ /* Here we write mrf_{i} and mrf_{i+4}. So we read two times 8 elements */
+ if (is_compr4) {
+ const int length = 8 * type_size * inst->bits1.da1.dest_horiz_stride;
+
+ /* First 8-way register */
+ const int write_start0 = mrf_index*REG_SIZE
+ + inst->bits1.da1.dest_subreg_nr;
+ const int write_end0 = write_start0 + length;
+
+ /* Second 8-way register */
+ const int write_start1 = (mrf_index+4)*REG_SIZE
+ + inst->bits1.da1.dest_subreg_nr;
+ const int write_end1 = write_start1 + length;
+
+ /* If the two intervals intersect, we overwrite the register */
+ const int left0 = MAX2(write_start0, reg_start);
+ const int right0 = MIN2(write_end0, reg_end);
+ const int left1 = MAX2(write_start1, reg_start);
+ const int right1 = MIN2(write_end1, reg_end);
+
+ is_written = left0 < right0 || left1 < right1;
+ }
+ else {
+ int length;
+ length = 1 << inst->header.execution_size;
+ length *= type_size;
+ length *= inst->bits1.da1.dest_horiz_stride;
+
+ /* If the two intervals intersect, we write into the register */
+ const int write_start = inst->bits1.da1.dest_reg_nr*REG_SIZE
+ + inst->bits1.da1.dest_subreg_nr;
+ const int write_end = write_start + length;
+ const int left = MAX2(write_start, reg_start);
+ const int right = MIN2(write_end, reg_end);;
+
+ is_written = left < right;
+ }
+
+ /* SEND may perform an implicit mov to a mrf register */
+ if (is_written == GL_FALSE &&
+ inst->header.opcode == BRW_OPCODE_SEND &&
+ inst->bits1.da1.src0_reg_file != 0) {
+
+ const int mrf_start = inst->header.destreg__conditionalmod;
+ const int write_start = mrf_start * REG_SIZE;
+ const int write_end = write_start + REG_SIZE;
+ const int left = MAX2(write_start, reg_start);
+ const int right = MIN2(write_end, reg_end);;
+ is_written = left < right;
+ }
+
+ return is_written;
+}
+
+static INLINE GLboolean
+brw_is_mrf_read(const struct brw_instruction *inst,
+ int reg_index, int size, int gen)
+{
+ if (inst->header.opcode != BRW_OPCODE_SEND)
+ return GL_FALSE;
+ if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT)
+ return GL_TRUE;
+
+ const int reg_start = reg_index*REG_SIZE;
+ const int reg_end = reg_start + size;
+
+ int length, read_start, read_end;
+ if (gen >= 5)
+ length = inst->bits3.generic_gen5.msg_length*REG_SIZE;
+ else
+ length = inst->bits3.generic.msg_length*REG_SIZE;
+
+ /* Look if SEND uses an implicit mov. In that case, we read one less register
+ * (but we write it)
+ */
+ if (inst->bits1.da1.src0_reg_file != 0)
+ read_start = inst->header.destreg__conditionalmod;
+ else {
+ length--;
+ read_start = inst->header.destreg__conditionalmod + 1;
+ }
+ read_start *= REG_SIZE;
+ read_end = read_start + length;
+
+ const int left = MAX2(read_start, reg_start);
+ const int right = MIN2(read_end, reg_end);
+
+ return left < right;
+}
+
+static INLINE GLboolean
+brw_is_grf_read(const struct brw_instruction *inst, int reg_index, int size)
+{
+ int i, j;
+ if (inst_opcode[inst->header.opcode].nsrc == 0)
+ return GL_FALSE;
+
+ /* Look at first source. We must take into account register regions to
+ * monitor carefully the read. Note that we are a bit too conservative here
+ * since we do not take into account the fact that some complete registers
+ * may be skipped
+ */
+ if (inst_opcode[inst->header.opcode].nsrc >= 1) {
+
+ if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT)
+ if (inst->bits1.ia1.src0_reg_file == BRW_GENERAL_REGISTER_FILE)
+ return GL_TRUE;
+ if (inst->bits1.da1.src0_reg_file != BRW_GENERAL_REGISTER_FILE)
+ return GL_FALSE;
+
+ const int reg_start = reg_index*REG_SIZE;
+ const int reg_end = reg_start + size;
+
+ /* See if at least one of this element intersects the interval */
+ const int type_size = inst_type_size[inst->bits1.da1.src0_reg_type];
+ const int elem_num = 1 << inst->header.execution_size;
+ const int width = 1 << inst->bits2.da1.src0_width;
+ const int row_num = elem_num >> inst->bits2.da1.src0_width;
+ const int hs = type_size*inst_stride[inst->bits2.da1.src0_horiz_stride];
+ const int vs = type_size*inst_stride[inst->bits2.da1.src0_vert_stride];
+ int row_start = inst->bits2.da1.src0_reg_nr*REG_SIZE
+ + inst->bits2.da1.src0_subreg_nr;
+ for (j = 0; j < row_num; ++j) {
+ int write_start = row_start;
+ for (i = 0; i < width; ++i) {
+ const int write_end = write_start + type_size;
+ const int left = write_start > reg_start ? write_start : reg_start;
+ const int right = write_end < reg_end ? write_end : reg_end;
+ if (left < right)
+ return GL_TRUE;
+ write_start += hs;
+ }
+ row_start += vs;
+ }
+ }
+
+ /* Second src register */
+ if (inst_opcode[inst->header.opcode].nsrc >= 2) {
+
+ if (inst->bits3.da1.src1_address_mode != BRW_ADDRESS_DIRECT)
+ if (inst->bits1.ia1.src1_reg_file == BRW_GENERAL_REGISTER_FILE)
+ return GL_TRUE;
+ if (inst->bits1.da1.src1_reg_file != BRW_GENERAL_REGISTER_FILE)
+ return GL_FALSE;
+
+ const int reg_start = reg_index*REG_SIZE;
+ const int reg_end = reg_start + size;
+
+ /* See if at least one of this element intersects the interval */
+ const int type_size = inst_type_size[inst->bits1.da1.src1_reg_type];
+ const int elem_num = 1 << inst->header.execution_size;
+ const int width = 1 << inst->bits3.da1.src1_width;
+ const int row_num = elem_num >> inst->bits3.da1.src1_width;
+ const int hs = type_size*inst_stride[inst->bits3.da1.src1_horiz_stride];
+ const int vs = type_size*inst_stride[inst->bits3.da1.src1_vert_stride];
+ int row_start = inst->bits3.da1.src1_reg_nr*REG_SIZE
+ + inst->bits3.da1.src1_subreg_nr;
+ for (j = 0; j < row_num; ++j) {
+ int write_start = row_start;
+ for (i = 0; i < width; ++i) {
+ const int write_end = write_start + type_size;
+ const int left = write_start > reg_start ? write_start : reg_start;
+ const int right = write_end < reg_end ? write_end : reg_end;
+ if (left < right)
+ return GL_TRUE;
+ write_start += hs;
+ }
+ row_start += vs;
+ }
+ }
+
+ return GL_FALSE;
+}
+
+static INLINE GLboolean
+brw_is_control_done(const struct brw_instruction *mov) {
+ return
+ mov->header.dependency_control != 0 ||
+ mov->header.thread_control != 0 ||
+ mov->header.mask_control != 0 ||
+ mov->header.saturate != 0 ||
+ mov->header.debug_control != 0;
+}
+
+static INLINE GLboolean
+brw_is_predicated(const struct brw_instruction *mov) {
+ return mov->header.predicate_control != 0;
+}
+
+static INLINE GLboolean
+brw_is_grf_to_mrf_mov(const struct brw_instruction *mov,
+ int *mrf_index,
+ int *grf_index,
+ GLboolean *is_compr4)
+{
+ if (brw_is_predicated(mov) ||
+ brw_is_control_done(mov) ||
+ mov->header.debug_control != 0)
+ return GL_FALSE;
+
+ if (mov->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT ||
+ mov->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE ||
+ mov->bits1.da1.dest_reg_type != BRW_REGISTER_TYPE_F ||
+ mov->bits1.da1.dest_horiz_stride != BRW_HORIZONTAL_STRIDE_1 ||
+ mov->bits1.da1.dest_subreg_nr != 0)
+ return GL_FALSE;
+
+ if (mov->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT ||
+ mov->bits1.da1.src0_reg_file != BRW_GENERAL_REGISTER_FILE ||
+ mov->bits1.da1.src0_reg_type != BRW_REGISTER_TYPE_F ||
+ mov->bits2.da1.src0_width != BRW_WIDTH_8 ||
+ mov->bits2.da1.src0_horiz_stride != BRW_HORIZONTAL_STRIDE_1 ||
+ mov->bits2.da1.src0_vert_stride != BRW_VERTICAL_STRIDE_8 ||
+ mov->bits2.da1.src0_subreg_nr != 0 ||
+ mov->bits2.da1.src0_abs != 0 ||
+ mov->bits2.da1.src0_negate != 0)
+ return GL_FALSE;
+
+ *grf_index = mov->bits2.da1.src0_reg_nr;
+ *mrf_index = mov->bits1.da1.dest_reg_nr & 0x0f;
+ *is_compr4 = (mov->bits1.da1.dest_reg_nr & BRW_MRF_COMPR4) != 0;
+ return GL_TRUE;
+}
+
+static INLINE GLboolean
+brw_is_grf_straight_write(const struct brw_instruction *inst, int grf_index)
+{
+ /* remark: no problem to predicate a SEL instruction */
+ if ((!brw_is_predicated(inst) || inst->header.opcode == BRW_OPCODE_SEL) &&
+ brw_is_control_done(inst) == GL_FALSE &&
+ inst->header.execution_size == 4 &&
+ inst->header.access_mode == BRW_ALIGN_1 &&
+ inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT &&
+ inst->bits1.da1.dest_reg_file == BRW_GENERAL_REGISTER_FILE &&
+ inst->bits1.da1.dest_reg_type == BRW_REGISTER_TYPE_F &&
+ inst->bits1.da1.dest_horiz_stride == BRW_HORIZONTAL_STRIDE_1 &&
+ inst->bits1.da1.dest_reg_nr == grf_index &&
+ inst->bits1.da1.dest_subreg_nr == 0 &&
+ brw_is_arithmetic_inst(inst))
+ return GL_TRUE;
+
+ return GL_FALSE;
+}
+
+static INLINE GLboolean
+brw_inst_are_equal(const struct brw_instruction *src0,
+ const struct brw_instruction *src1)
+{
+ const GLuint *field0 = (GLuint *) src0;
+ const GLuint *field1 = (GLuint *) src1;
+ return field0[0] == field1[0] &&
+ field0[1] == field1[1] &&
+ field0[2] == field1[2] &&
+ field0[3] == field1[3];
+}
+
+static INLINE void
+brw_inst_copy(struct brw_instruction *dst,
+ const struct brw_instruction *src)
+{
+ GLuint *field_dst = (GLuint *) dst;
+ const GLuint *field_src = (GLuint *) src;
+ field_dst[0] = field_src[0];
+ field_dst[1] = field_src[1];
+ field_dst[2] = field_src[2];
+ field_dst[3] = field_src[3];
+}
+
+static void brw_remove_inst(struct brw_compile *p, const GLboolean *removeInst)
+{
+ int i, nr_insn = 0, to = 0, from = 0;
+
+ for (from = 0; from < p->nr_insn; ++from) {
+ if (removeInst[from])
+ continue;
+ if(to != from)
+ brw_inst_copy(p->store + to, p->store + from);
+ to++;
+ }
+
+ for (i = 0; i < p->nr_insn; ++i)
+ if (removeInst[i] == GL_FALSE)
+ nr_insn++;
+ p->nr_insn = nr_insn;
+}
+
+/* The gen code emitter generates a lot of duplications in the
+ * grf-to-mrf moves, for example when texture sampling with the same
+ * coordinates from multiple textures.. Here, we monitor same mov
+ * grf-to-mrf instrutions and remove repeated ones where the operands
+ * and dst ahven't changed in between.
+ */
+void brw_remove_duplicate_mrf_moves(struct brw_compile *p)
+{
+ const int gen = p->brw->intel.gen;
+ int i, j;
+
+ GLboolean *removeInst = calloc(sizeof(GLboolean), p->nr_insn);
+ for (i = 0; i < p->nr_insn; i++) {
+ if (removeInst[i])
+ continue;
+
+ const struct brw_instruction *mov = p->store + i;
+ int mrf_index, grf_index;
+ GLboolean is_compr4;
+
+ /* Only consider _straight_ grf-to-mrf moves */
+ if (!brw_is_grf_to_mrf_mov(mov, &mrf_index, &grf_index, &is_compr4))
+ continue;
+
+ const int mrf_index0 = mrf_index;
+ const int mrf_index1 = is_compr4 ? mrf_index0+4 : mrf_index0+1;
+ const int simd16_size = 2 * REG_SIZE;
+
+ for (j = i + 1; j < p->nr_insn; j++) {
+ const struct brw_instruction *inst = p->store + j;
+
+ if (brw_inst_are_equal(mov, inst)) {
+ removeInst[j] = GL_TRUE;
+ continue;
+ }
+
+ if (brw_is_grf_written(inst, grf_index, simd16_size, gen) ||
+ brw_is_mrf_written(inst, mrf_index0, REG_SIZE) ||
+ brw_is_mrf_written(inst, mrf_index1, REG_SIZE))
+ break;
+ }
+ }
+
+ brw_remove_inst(p, removeInst);
+ free(removeInst);
+}
+
+/* Replace moves to MRFs where the value moved is the result of a
+ * normal arithmetic operation with computation right into the MRF.
+ */
+void brw_remove_grf_to_mrf_moves(struct brw_compile *p)
+{
+ int i, j, prev;
+ struct brw_context *brw = p->brw;
+ const int gen = brw->intel.gen;
+ const int simd16_size = 2*REG_SIZE;
+
+ GLboolean *removeInst = calloc(sizeof(GLboolean), p->nr_insn);
+ assert(removeInst);
+
+ for (i = 0; i < p->nr_insn; i++) {
+ if (removeInst[i])
+ continue;
+
+ struct brw_instruction *grf_inst = NULL;
+ const struct brw_instruction *mov = p->store + i;
+ int mrf_index, grf_index;
+ GLboolean is_compr4;
+
+ /* Only consider _straight_ grf-to-mrf moves */
+ if (!brw_is_grf_to_mrf_mov(mov, &mrf_index, &grf_index, &is_compr4))
+ continue;
+
+ /* Using comp4 enables a stride of 4 for this instruction */
+ const int mrf_index0 = mrf_index;
+ const int mrf_index1 = is_compr4 ? mrf_index+4 : mrf_index+1;
+
+ /* Look where the register has been set */
+ prev = i;
+ GLboolean potential_remove = GL_FALSE;
+ while (prev--) {
+
+ /* If _one_ instruction writes the grf, we try to remove the mov */
+ struct brw_instruction *inst = p->store + prev;
+ if (brw_is_grf_straight_write(inst, grf_index)) {
+ potential_remove = GL_TRUE;
+ grf_inst = inst;
+ break;
+ }
+
+ }
+
+ if (potential_remove == GL_FALSE)
+ continue;
+ removeInst[i] = GL_TRUE;
+
+ /* Monitor first the section of code between the grf computation and the
+ * mov. Here we cannot read or write both mrf and grf register
+ */
+ for (j = prev + 1; j < i; ++j) {
+ struct brw_instruction *inst = p->store + j;
+ if (removeInst[j])
+ continue;
+ if (brw_is_grf_written(inst, grf_index, simd16_size, gen) ||
+ brw_is_grf_read(inst, grf_index, simd16_size) ||
+ brw_is_mrf_written(inst, mrf_index0, REG_SIZE) ||
+ brw_is_mrf_written(inst, mrf_index1, REG_SIZE) ||
+ brw_is_mrf_read(inst, mrf_index0, REG_SIZE, gen) ||
+ brw_is_mrf_read(inst, mrf_index1, REG_SIZE, gen)) {
+ removeInst[i] = GL_FALSE;
+ break;
+ }
+ }
+
+ /* After the mov, we can read or write the mrf. If the grf is overwritten,
+ * we are done
+ */
+ for (j = i + 1; j < p->nr_insn; ++j) {
+ struct brw_instruction *inst = p->store + j;
+ if (removeInst[j])
+ continue;
+
+ if (brw_is_grf_read(inst, grf_index, simd16_size)) {
+ removeInst[i] = GL_FALSE;
+ break;
+ }
+
+ if (brw_is_grf_straight_write(inst, grf_index))
+ break;
+ }
+
+ /* Note that with the top down traversal, we can safely pacth the mov
+ * instruction
+ */
+ if (removeInst[i]) {
+ grf_inst->bits1.da1.dest_reg_file = mov->bits1.da1.dest_reg_file;
+ grf_inst->bits1.da1.dest_reg_nr = mov->bits1.da1.dest_reg_nr;
+ }
+ }
+
+ brw_remove_inst(p, removeInst);
+ free(removeInst);
+}
+
static GLboolean
is_single_channel_dp4(struct brw_instruction *insn)
{
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 40eece276b7..af08446f2d8 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -46,68 +46,68 @@ brw_add_validated_bo(struct brw_context *brw, drm_intel_bo *bo)
}
};
-const struct brw_tracked_state brw_blend_constant_color;
-const struct brw_tracked_state brw_cc_unit;
-const struct brw_tracked_state brw_check_fallback;
-const struct brw_tracked_state brw_clip_prog;
-const struct brw_tracked_state brw_clip_unit;
-const struct brw_tracked_state brw_vs_constants;
-const struct brw_tracked_state brw_wm_constants;
-const struct brw_tracked_state brw_constant_buffer;
-const struct brw_tracked_state brw_curbe_offsets;
-const struct brw_tracked_state brw_invarient_state;
-const struct brw_tracked_state brw_gs_prog;
-const struct brw_tracked_state brw_gs_unit;
-const struct brw_tracked_state brw_line_stipple;
-const struct brw_tracked_state brw_aa_line_parameters;
-const struct brw_tracked_state brw_pipelined_state_pointers;
-const struct brw_tracked_state brw_binding_table_pointers;
-const struct brw_tracked_state brw_depthbuffer;
-const struct brw_tracked_state brw_polygon_stipple_offset;
-const struct brw_tracked_state brw_polygon_stipple;
-const struct brw_tracked_state brw_program_parameters;
-const struct brw_tracked_state brw_recalculate_urb_fence;
-const struct brw_tracked_state brw_sf_prog;
-const struct brw_tracked_state brw_sf_unit;
-const struct brw_tracked_state brw_sf_vp;
-const struct brw_tracked_state brw_state_base_address;
-const struct brw_tracked_state brw_urb_fence;
-const struct brw_tracked_state brw_vertex_state;
-const struct brw_tracked_state brw_vs_surfaces;
-const struct brw_tracked_state brw_vs_prog;
-const struct brw_tracked_state brw_vs_unit;
-const struct brw_tracked_state brw_wm_input_sizes;
-const struct brw_tracked_state brw_wm_prog;
-const struct brw_tracked_state brw_wm_samplers;
-const struct brw_tracked_state brw_wm_constant_surface;
-const struct brw_tracked_state brw_wm_surfaces;
-const struct brw_tracked_state brw_wm_binding_table;
-const struct brw_tracked_state brw_wm_unit;
-
-const struct brw_tracked_state brw_psp_urb_cbs;
-
-const struct brw_tracked_state brw_pipe_control;
-
-const struct brw_tracked_state brw_drawing_rect;
-const struct brw_tracked_state brw_indices;
-const struct brw_tracked_state brw_vertices;
-const struct brw_tracked_state brw_index_buffer;
-const struct brw_tracked_state gen6_binding_table_pointers;
-const struct brw_tracked_state gen6_blend_state;
-const struct brw_tracked_state gen6_cc_state_pointers;
-const struct brw_tracked_state gen6_clip_state;
-const struct brw_tracked_state gen6_clip_vp;
-const struct brw_tracked_state gen6_color_calc_state;
-const struct brw_tracked_state gen6_depth_stencil_state;
-const struct brw_tracked_state gen6_gs_state;
-const struct brw_tracked_state gen6_sampler_state;
-const struct brw_tracked_state gen6_scissor_state;
-const struct brw_tracked_state gen6_sf_state;
-const struct brw_tracked_state gen6_sf_vp;
-const struct brw_tracked_state gen6_urb;
-const struct brw_tracked_state gen6_viewport_state;
-const struct brw_tracked_state gen6_vs_state;
-const struct brw_tracked_state gen6_wm_state;
+extern const struct brw_tracked_state brw_blend_constant_color;
+extern const struct brw_tracked_state brw_cc_unit;
+extern const struct brw_tracked_state brw_check_fallback;
+extern const struct brw_tracked_state brw_clip_prog;
+extern const struct brw_tracked_state brw_clip_unit;
+extern const struct brw_tracked_state brw_vs_constants;
+extern const struct brw_tracked_state brw_wm_constants;
+extern const struct brw_tracked_state brw_constant_buffer;
+extern const struct brw_tracked_state brw_curbe_offsets;
+extern const struct brw_tracked_state brw_invarient_state;
+extern const struct brw_tracked_state brw_gs_prog;
+extern const struct brw_tracked_state brw_gs_unit;
+extern const struct brw_tracked_state brw_line_stipple;
+extern const struct brw_tracked_state brw_aa_line_parameters;
+extern const struct brw_tracked_state brw_pipelined_state_pointers;
+extern const struct brw_tracked_state brw_binding_table_pointers;
+extern const struct brw_tracked_state brw_depthbuffer;
+extern const struct brw_tracked_state brw_polygon_stipple_offset;
+extern const struct brw_tracked_state brw_polygon_stipple;
+extern const struct brw_tracked_state brw_program_parameters;
+extern const struct brw_tracked_state brw_recalculate_urb_fence;
+extern const struct brw_tracked_state brw_sf_prog;
+extern const struct brw_tracked_state brw_sf_unit;
+extern const struct brw_tracked_state brw_sf_vp;
+extern const struct brw_tracked_state brw_state_base_address;
+extern const struct brw_tracked_state brw_urb_fence;
+extern const struct brw_tracked_state brw_vertex_state;
+extern const struct brw_tracked_state brw_vs_surfaces;
+extern const struct brw_tracked_state brw_vs_prog;
+extern const struct brw_tracked_state brw_vs_unit;
+extern const struct brw_tracked_state brw_wm_input_sizes;
+extern const struct brw_tracked_state brw_wm_prog;
+extern const struct brw_tracked_state brw_wm_samplers;
+extern const struct brw_tracked_state brw_wm_constant_surface;
+extern const struct brw_tracked_state brw_wm_surfaces;
+extern const struct brw_tracked_state brw_wm_binding_table;
+extern const struct brw_tracked_state brw_wm_unit;
+
+extern const struct brw_tracked_state brw_psp_urb_cbs;
+
+extern const struct brw_tracked_state brw_pipe_control;
+
+extern const struct brw_tracked_state brw_drawing_rect;
+extern const struct brw_tracked_state brw_indices;
+extern const struct brw_tracked_state brw_vertices;
+extern const struct brw_tracked_state brw_index_buffer;
+extern const struct brw_tracked_state gen6_binding_table_pointers;
+extern const struct brw_tracked_state gen6_blend_state;
+extern const struct brw_tracked_state gen6_cc_state_pointers;
+extern const struct brw_tracked_state gen6_clip_state;
+extern const struct brw_tracked_state gen6_clip_vp;
+extern const struct brw_tracked_state gen6_color_calc_state;
+extern const struct brw_tracked_state gen6_depth_stencil_state;
+extern const struct brw_tracked_state gen6_gs_state;
+extern const struct brw_tracked_state gen6_sampler_state;
+extern const struct brw_tracked_state gen6_scissor_state;
+extern const struct brw_tracked_state gen6_sf_state;
+extern const struct brw_tracked_state gen6_sf_vp;
+extern const struct brw_tracked_state gen6_urb;
+extern const struct brw_tracked_state gen6_viewport_state;
+extern const struct brw_tracked_state gen6_vs_state;
+extern const struct brw_tracked_state gen6_wm_state;
/***********************************************************************
* brw_state.c
diff --git a/src/mesa/drivers/dri/i965/brw_util.c b/src/mesa/drivers/dri/i965/brw_util.c
index 1db2a210d45..e878da3850d 100644
--- a/src/mesa/drivers/dri/i965/brw_util.c
+++ b/src/mesa/drivers/dri/i965/brw_util.c
@@ -30,6 +30,8 @@
*/
+#include <assert.h>
+
#include "main/mtypes.h"
#include "program/prog_parameter.h"
#include "brw_util.h"
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index a1bee2e44ab..b6b558e9a69 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -44,6 +44,7 @@ static GLboolean
brw_vs_arg_can_be_immediate(enum prog_opcode opcode, int arg)
{
int opcode_array[] = {
+ [OPCODE_MOV] = 1,
[OPCODE_ADD] = 2,
[OPCODE_CMP] = 3,
[OPCODE_DP3] = 2,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 323cfac8fa7..d9fa2e63354 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -1283,7 +1283,7 @@ void emit_fb_write(struct brw_wm_compile *c,
* + 1 for the second half we get destination + 4.
*/
brw_MOV(p,
- brw_message_reg(nr + channel + (1 << 7)),
+ brw_message_reg(nr + channel + BRW_MRF_COMPR4),
arg0[channel]);
} else {
/* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
@@ -1712,12 +1712,20 @@ void brw_wm_emit( struct brw_wm_compile *c )
inst->dst[i]->spill_slot);
}
+ /* Only properly tested on ILK */
+ if (p->brw->intel.gen == 5) {
+ brw_remove_duplicate_mrf_moves(p);
+ if (c->dispatch_width == 16)
+ brw_remove_grf_to_mrf_moves(p);
+ }
+
if (INTEL_DEBUG & DEBUG_WM) {
int i;
- printf("wm-native:\n");
- for (i = 0; i < p->nr_insn; i++)
+ printf("wm-native:\n");
+ for (i = 0; i < p->nr_insn; i++)
brw_disasm(stderr, &p->store[i], p->brw->intel.gen);
printf("\n");
}
}
+
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 5f2035d79c9..e19f44035fd 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -29,6 +29,7 @@
#include "main/glheader.h"
#include "main/context.h"
#include "main/extensions.h"
+#include "main/fbobject.h"
#include "main/framebuffer.h"
#include "main/imports.h"
#include "main/points.h"
@@ -39,8 +40,6 @@
#include "drivers/common/driverfuncs.h"
#include "drivers/common/meta.h"
-#include "i830_dri.h"
-
#include "intel_chipset.h"
#include "intel_buffers.h"
#include "intel_tex.h"
@@ -420,7 +419,7 @@ intel_prepare_render(struct intel_context *intel)
__DRIdrawable *drawable;
drawable = driContext->driDrawablePriv;
- if (drawable->dri2.stamp != driContext->dri2.draw_stamp) {
+ if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
if (drawable->lastStamp != drawable->dri2.stamp)
intel_update_renderbuffers(driContext, drawable);
intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer);
@@ -428,7 +427,7 @@ intel_prepare_render(struct intel_context *intel)
}
drawable = driContext->driReadablePriv;
- if (drawable->dri2.stamp != driContext->dri2.read_stamp) {
+ if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
if (drawable->lastStamp != drawable->dri2.stamp)
intel_update_renderbuffers(driContext, drawable);
driContext->dri2.read_stamp = drawable->dri2.stamp;
@@ -613,6 +612,7 @@ intelInitContext(struct intel_context *intel,
__DRIscreen *sPriv = driContextPriv->driScreenPriv;
struct intel_screen *intelScreen = sPriv->private;
int bo_reuse_mode;
+ __GLcontextModes visual;
/* we can't do anything without a connection to the device */
if (intelScreen->bufmgr == NULL)
@@ -624,6 +624,11 @@ intelInitContext(struct intel_context *intel,
functions->Viewport = intel_viewport;
}
+ if (mesaVis == NULL) {
+ memset(&visual, 0, sizeof visual);
+ mesaVis = &visual;
+ }
+
if (!_mesa_initialize_context_for_api(&intel->ctx, api, mesaVis, shareCtx,
functions, (void *) intel)) {
printf("%s: failed to init mesa context\n", __FUNCTION__);
@@ -890,14 +895,21 @@ intelMakeCurrent(__DRIcontext * driContextPriv,
}
if (driContextPriv) {
- struct gl_framebuffer *fb = driDrawPriv->driverPrivate;
- struct gl_framebuffer *readFb = driReadPriv->driverPrivate;
+ struct gl_framebuffer *fb, *readFb;
+
+ if (driDrawPriv == NULL && driReadPriv == NULL) {
+ fb = _mesa_get_incomplete_framebuffer();
+ readFb = _mesa_get_incomplete_framebuffer();
+ } else {
+ fb = driDrawPriv->driverPrivate;
+ readFb = driReadPriv->driverPrivate;
+ driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
+ driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
+ }
- driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
- driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
intel_prepare_render(intel);
_mesa_make_current(&intel->ctx, fb, readFb);
-
+
/* We do this in intel_prepare_render() too, but intel->ctx.DrawBuffer
* is NULL at that point. We can't call _mesa_makecurrent()
* first, since we need the buffer size for the initial
diff --git a/src/mesa/drivers/dri/intel/intel_extensions_es2.c b/src/mesa/drivers/dri/intel/intel_extensions_es2.c
index baf8e130010..de34bbb2aec 100644
--- a/src/mesa/drivers/dri/intel/intel_extensions_es2.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions_es2.c
@@ -28,7 +28,6 @@
#include "main/extensions.h"
#include "intel_extensions.h"
-#include "utils.h"
static const char *es2_extensions[] = {
/* Used by mesa internally (cf all_mesa_extensions in ../common/utils.c) */
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
index 0e2fe893fed..02c0ffce31d 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -45,6 +45,7 @@
#include "main/attrib.h"
#include "main/enable.h"
#include "main/viewport.h"
+#include "main/context.h"
#include "swrast/swrast.h"
#include "intel_screen.h"
diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c
index fe4de189600..680d18ba299 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.c
+++ b/src/mesa/drivers/dri/intel/intel_regions.c
@@ -155,6 +155,9 @@ intel_region_alloc_internal(struct intel_context *intel,
}
region = calloc(sizeof(*region), 1);
+ if (region == NULL)
+ return region;
+
region->cpp = cpp;
region->width = width;
region->height = height;
@@ -189,6 +192,9 @@ intel_region_alloc(struct intel_context *intel,
region = intel_region_alloc_internal(intel, cpp, width, height,
aligned_pitch / cpp, buffer);
+ if (region == NULL)
+ return region;
+
region->tiling = tiling;
return region;
diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c
index 224b506c05b..6efb2ddc553 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -102,7 +102,7 @@ do_copy_texsubimage(struct intel_context *intel,
GLcontext *ctx = &intel->ctx;
const struct intel_region *src = get_teximage_source(intel, internalFormat);
- if (!intelImage->mt || !src) {
+ if (!intelImage->mt || !src || !src->buffer) {
if (INTEL_DEBUG & DEBUG_FALLBACKS)
fprintf(stderr, "%s fail %p %p (0x%08x)\n",
__FUNCTION__, intelImage->mt, src, internalFormat);
diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c b/src/mesa/drivers/dri/intel/intel_tex_format.c
index 5f813c0efa2..e03b203fb40 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_format.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_format.c
@@ -19,7 +19,6 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
GLenum format, GLenum type)
{
struct intel_context *intel = intel_context(ctx);
- const GLboolean do32bpt = (intel->ctx.Visual.rgbBits >= 24);
#if 0
printf("%s intFmt=0x%x format=0x%x type=0x%x\n",
@@ -30,39 +29,28 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
case 4:
case GL_RGBA:
case GL_COMPRESSED_RGBA:
- if (format == GL_BGRA) {
- if (type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) {
- return MESA_FORMAT_ARGB8888;
- }
- else if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV) {
- return MESA_FORMAT_ARGB4444;
- }
- else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV) {
- return MESA_FORMAT_ARGB1555;
- }
- }
- return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444;
+ if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV)
+ return MESA_FORMAT_ARGB4444;
+ else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV)
+ return MESA_FORMAT_ARGB1555;
+ else
+ return MESA_FORMAT_ARGB8888;
case 3:
case GL_RGB:
case GL_COMPRESSED_RGB:
- if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) {
- return MESA_FORMAT_RGB565;
- }
- if (do32bpt) {
- if (intel->has_xrgb_textures)
- return MESA_FORMAT_XRGB8888;
- else
- return MESA_FORMAT_ARGB8888;
- } else {
+ if (type == GL_UNSIGNED_SHORT_5_6_5)
return MESA_FORMAT_RGB565;
- }
+ else if (intel->has_xrgb_textures)
+ return MESA_FORMAT_XRGB8888;
+ else
+ return MESA_FORMAT_ARGB8888;
case GL_RGBA8:
case GL_RGB10_A2:
case GL_RGBA12:
case GL_RGBA16:
- return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444;
+ return MESA_FORMAT_ARGB8888;
case GL_RGBA4:
case GL_RGBA2:
diff --git a/src/mesa/drivers/dri/intel/server/i830_dri.h b/src/mesa/drivers/dri/intel/server/i830_dri.h
deleted file mode 100644
index def049e7a6b..00000000000
--- a/src/mesa/drivers/dri/intel/server/i830_dri.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/i810/i830_dri.h,v 1.6 2003/09/28 20:15:59 alanh Exp $ */
-
-#ifndef _I830_DRI_H
-#define _I830_DRI_H
-
-#include "xf86drm.h"
-
-#define I830_MAX_DRAWABLES 256
-
-#define I830_MAJOR_VERSION 1
-#define I830_MINOR_VERSION 9
-#define I830_PATCHLEVEL 0
-
-#define I830_REG_SIZE 0x80000
-
-typedef struct _I830DRIRec {
- drm_handle_t regs;
- drmSize regsSize;
-
- drmSize unused1; /* backbufferSize */
- drm_handle_t unused2; /* backbuffer */
-
- drmSize unused3; /* depthbufferSize */
- drm_handle_t unused4; /* depthbuffer */
-
- drmSize unused5; /* rotatedSize */
- drm_handle_t unused6; /* rotatedbuffer */
-
- drm_handle_t unused7; /* textures */
- int unused8; /* textureSize */
-
- drm_handle_t unused9; /* agp_buffers */
- drmSize unused10; /* agp_buf_size */
-
- int deviceID;
- int width;
- int height;
- int mem;
- int cpp;
- int bitsPerPixel;
-
- int unused11[8]; /* was front/back/depth/rotated offset/pitch */
-
- int unused12; /* logTextureGranularity */
- int unused13; /* textureOffset */
-
- int irq;
- int sarea_priv_offset;
-} I830DRIRec, *I830DRIPtr;
-
-typedef struct {
- /* Nothing here yet */
- int dummy;
-} I830ConfigPrivRec, *I830ConfigPrivPtr;
-
-typedef struct {
- /* Nothing here yet */
- int dummy;
-} I830DRIContextRec, *I830DRIContextPtr;
-
-
-#endif
diff --git a/src/mesa/drivers/dri/intel/server/intel.h b/src/mesa/drivers/dri/intel/server/intel.h
deleted file mode 100644
index 6ea72499c1c..00000000000
--- a/src/mesa/drivers/dri/intel/server/intel.h
+++ /dev/null
@@ -1,331 +0,0 @@
-#ifndef _INTEL_H_
-#define _INTEL_H_
-
-#include "xf86drm.h" /* drm_handle_t, etc */
-
-/* Intel */
-#ifndef PCI_CHIP_I810
-#define PCI_CHIP_I810 0x7121
-#define PCI_CHIP_I810_DC100 0x7123
-#define PCI_CHIP_I810_E 0x7125
-#define PCI_CHIP_I815 0x1132
-#define PCI_CHIP_I810_BRIDGE 0x7120
-#define PCI_CHIP_I810_DC100_BRIDGE 0x7122
-#define PCI_CHIP_I810_E_BRIDGE 0x7124
-#define PCI_CHIP_I815_BRIDGE 0x1130
-#endif
-
-#define PCI_CHIP_845_G 0x2562
-#define PCI_CHIP_I830_M 0x3577
-
-#ifndef PCI_CHIP_I855_GM
-#define PCI_CHIP_I855_GM 0x3582
-#define PCI_CHIP_I855_GM_BRIDGE 0x3580
-#endif
-
-#ifndef PCI_CHIP_I865_G
-#define PCI_CHIP_I865_G 0x2572
-#define PCI_CHIP_I865_G_BRIDGE 0x2570
-#endif
-
-#ifndef PCI_CHIP_I915_G
-#define PCI_CHIP_I915_G 0x2582
-#define PCI_CHIP_I915_G_BRIDGE 0x2580
-#endif
-
-#ifndef PCI_CHIP_I915_GM
-#define PCI_CHIP_I915_GM 0x2592
-#define PCI_CHIP_I915_GM_BRIDGE 0x2590
-#endif
-
-#ifndef PCI_CHIP_E7221_G
-#define PCI_CHIP_E7221_G 0x258A
-/* Same as I915_G_BRIDGE */
-#define PCI_CHIP_E7221_G_BRIDGE 0x2580
-#endif
-
-#ifndef PCI_CHIP_I945_G
-#define PCI_CHIP_I945_G 0x2772
-#define PCI_CHIP_I945_G_BRIDGE 0x2770
-#endif
-
-#ifndef PCI_CHIP_I945_GM
-#define PCI_CHIP_I945_GM 0x27A2
-#define PCI_CHIP_I945_GM_BRIDGE 0x27A0
-#endif
-
-#define IS_I810(pI810) (pI810->Chipset == PCI_CHIP_I810 || \
- pI810->Chipset == PCI_CHIP_I810_DC100 || \
- pI810->Chipset == PCI_CHIP_I810_E)
-#define IS_I815(pI810) (pI810->Chipset == PCI_CHIP_I815)
-#define IS_I830(pI810) (pI810->Chipset == PCI_CHIP_I830_M)
-#define IS_845G(pI810) (pI810->Chipset == PCI_CHIP_845_G)
-#define IS_I85X(pI810) (pI810->Chipset == PCI_CHIP_I855_GM)
-#define IS_I852(pI810) (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I852_GM || pI810->variant == I852_GME))
-#define IS_I855(pI810) (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I855_GM || pI810->variant == I855_GME))
-#define IS_I865G(pI810) (pI810->Chipset == PCI_CHIP_I865_G)
-
-#define IS_I915G(pI810) (pI810->Chipset == PCI_CHIP_I915_G || pI810->Chipset == PCI_CHIP_E7221_G)
-#define IS_I915GM(pI810) (pI810->Chipset == PCI_CHIP_I915_GM)
-#define IS_I945G(pI810) (pI810->Chipset == PCI_CHIP_I945_G)
-#define IS_I945GM(pI810) (pI810->Chipset == PCI_CHIP_I945_GM)
-#define IS_I9XX(pI810) (IS_I915G(pI810) || IS_I915GM(pI810) || IS_I945G(pI810) || IS_I945GM(pI810))
-
-#define IS_MOBILE(pI810) (IS_I830(pI810) || IS_I85X(pI810) || IS_I915GM(pI810) || IS_I945GM(pI810))
-
-#define I830_GMCH_CTRL 0x52
-
-#define I830_GMCH_MEM_MASK 0x1
-#define I830_GMCH_MEM_64M 0x1
-#define I830_GMCH_MEM_128M 0
-
-#define I830_GMCH_GMS_MASK 0x70
-#define I830_GMCH_GMS_DISABLED 0x00
-#define I830_GMCH_GMS_LOCAL 0x10
-#define I830_GMCH_GMS_STOLEN_512 0x20
-#define I830_GMCH_GMS_STOLEN_1024 0x30
-#define I830_GMCH_GMS_STOLEN_8192 0x40
-
-#define I855_GMCH_GMS_MASK (0x7 << 4)
-#define I855_GMCH_GMS_DISABLED 0x00
-#define I855_GMCH_GMS_STOLEN_1M (0x1 << 4)
-#define I855_GMCH_GMS_STOLEN_4M (0x2 << 4)
-#define I855_GMCH_GMS_STOLEN_8M (0x3 << 4)
-#define I855_GMCH_GMS_STOLEN_16M (0x4 << 4)
-#define I855_GMCH_GMS_STOLEN_32M (0x5 << 4)
-#define I915G_GMCH_GMS_STOLEN_48M (0x6 << 4)
-#define I915G_GMCH_GMS_STOLEN_64M (0x7 << 4)
-
-typedef unsigned char Bool;
-#define TRUE 1
-#define FALSE 0
-
-#define PIPE_NONE 0<<0
-#define PIPE_CRT 1<<0
-#define PIPE_TV 1<<1
-#define PIPE_DFP 1<<2
-#define PIPE_LFP 1<<3
-#define PIPE_CRT2 1<<4
-#define PIPE_TV2 1<<5
-#define PIPE_DFP2 1<<6
-#define PIPE_LFP2 1<<7
-
-typedef struct _I830MemPool *I830MemPoolPtr;
-typedef struct _I830MemRange *I830MemRangePtr;
-typedef struct _I830MemRange {
- long Start;
- long End;
- long Size;
- unsigned long Physical;
- unsigned long Offset; /* Offset of AGP-allocated portion */
- unsigned long Alignment;
- drm_handle_t Key;
- unsigned long Pitch; // add pitch
- I830MemPoolPtr Pool;
-} I830MemRange;
-
-typedef struct _I830MemPool {
- I830MemRange Total;
- I830MemRange Free;
- I830MemRange Fixed;
- I830MemRange Allocated;
-} I830MemPool;
-
-typedef struct {
- int tail_mask;
- I830MemRange mem;
- unsigned char *virtual_start;
- int head;
- int tail;
- int space;
-} I830RingBuffer;
-
-typedef struct _I830Rec {
- unsigned char *MMIOBase;
- unsigned char *FbBase;
- int cpp;
- uint32_t aper_size;
- unsigned int bios_version;
-
- /* These are set in PreInit and never changed. */
- long FbMapSize;
- long TotalVideoRam;
- I830MemRange StolenMemory; /* pre-allocated memory */
- long BIOSMemorySize; /* min stolen pool size */
- int BIOSMemSizeLoc;
-
- /* These change according to what has been allocated. */
- long FreeMemory;
- I830MemRange MemoryAperture;
- I830MemPool StolenPool;
- long allocatedMemory;
-
- /* Regions allocated either from the above pools, or from agpgart. */
- /* for single and dual head configurations */
- I830MemRange FrontBuffer;
- I830MemRange FrontBuffer2;
- I830MemRange Scratch;
- I830MemRange Scratch2;
-
- I830RingBuffer *LpRing;
-
- I830MemRange BackBuffer;
- I830MemRange DepthBuffer;
- I830MemRange TexMem;
- int TexGranularity;
- I830MemRange ContextMem;
- int drmMinor;
- Bool have3DWindows;
-
- Bool NeedRingBufferLow;
- Bool allowPageFlip;
- Bool disableTiling;
-
- int Chipset;
- unsigned long LinearAddr;
- unsigned long MMIOAddr;
-
- drmSize registerSize; /**< \brief MMIO register map size */
- drm_handle_t registerHandle; /**< \brief MMIO register map handle */
- // IOADDRESS ioBase;
- int irq; /**< \brief IRQ number */
- int GttBound;
-
- drm_handle_t ring_map;
- unsigned int Fence[8];
-
-} I830Rec;
-
-/*
- * 12288 is set as the maximum, chosen because it is enough for
- * 1920x1440@32bpp with a 2048 pixel line pitch with some to spare.
- */
-#define I830_MAXIMUM_VBIOS_MEM 12288
-#define I830_DEFAULT_VIDEOMEM_2D (MB(32) / 1024)
-#define I830_DEFAULT_VIDEOMEM_3D (MB(64) / 1024)
-
-/* Flags for memory allocation function */
-#define FROM_ANYWHERE 0x00000000
-#define FROM_POOL_ONLY 0x00000001
-#define FROM_NEW_ONLY 0x00000002
-#define FROM_MASK 0x0000000f
-
-#define ALLOCATE_AT_TOP 0x00000010
-#define ALLOCATE_AT_BOTTOM 0x00000020
-#define FORCE_GAPS 0x00000040
-
-#define NEED_PHYSICAL_ADDR 0x00000100
-#define ALIGN_BOTH_ENDS 0x00000200
-#define FORCE_LOW 0x00000400
-
-#define ALLOC_NO_TILING 0x00001000
-#define ALLOC_INITIAL 0x00002000
-
-#define ALLOCATE_DRY_RUN 0x80000000
-
-/* Chipset registers for VIDEO BIOS memory RW access */
-#define _855_DRAM_RW_CONTROL 0x58
-#define _845_DRAM_RW_CONTROL 0x90
-#define DRAM_WRITE 0x33330000
-
-#define KB(x) ((x) * 1024)
-#define MB(x) ((x) * KB(1024))
-
-#define GTT_PAGE_SIZE KB(4)
-#define ROUND_TO(x, y) (((x) + (y) - 1) / (y) * (y))
-#define ROUND_DOWN_TO(x, y) ((x) / (y) * (y))
-#define ROUND_TO_PAGE(x) ROUND_TO((x), GTT_PAGE_SIZE)
-#define ROUND_TO_MB(x) ROUND_TO((x), MB(1))
-#define PRIMARY_RINGBUFFER_SIZE KB(128)
-
-
-/* Ring buffer registers, p277, overview p19
- */
-#define LP_RING 0x2030
-#define HP_RING 0x2040
-
-#define RING_TAIL 0x00
-#define TAIL_ADDR 0x000FFFF8
-#define I830_TAIL_MASK 0x001FFFF8
-
-#define RING_HEAD 0x04
-#define HEAD_WRAP_COUNT 0xFFE00000
-#define HEAD_WRAP_ONE 0x00200000
-#define HEAD_ADDR 0x001FFFFC
-#define I830_HEAD_MASK 0x001FFFFC
-
-#define RING_START 0x08
-#define START_ADDR 0x03FFFFF8
-#define I830_RING_START_MASK 0xFFFFF000
-
-#define RING_LEN 0x0C
-#define RING_NR_PAGES 0x001FF000
-#define I830_RING_NR_PAGES 0x001FF000
-#define RING_REPORT_MASK 0x00000006
-#define RING_REPORT_64K 0x00000002
-#define RING_REPORT_128K 0x00000004
-#define RING_NO_REPORT 0x00000000
-#define RING_VALID_MASK 0x00000001
-#define RING_VALID 0x00000001
-#define RING_INVALID 0x00000000
-
-
-/* Fence/Tiling ranges [0..7]
- */
-#define FENCE 0x2000
-#define FENCE_NR 8
-
-#define I915G_FENCE_START_MASK 0x0ff00000
-
-#define I830_FENCE_START_MASK 0x07f80000
-
-#define FENCE_START_MASK 0x03F80000
-#define FENCE_X_MAJOR 0x00000000
-#define FENCE_Y_MAJOR 0x00001000
-#define FENCE_SIZE_MASK 0x00000700
-#define FENCE_SIZE_512K 0x00000000
-#define FENCE_SIZE_1M 0x00000100
-#define FENCE_SIZE_2M 0x00000200
-#define FENCE_SIZE_4M 0x00000300
-#define FENCE_SIZE_8M 0x00000400
-#define FENCE_SIZE_16M 0x00000500
-#define FENCE_SIZE_32M 0x00000600
-#define FENCE_SIZE_64M 0x00000700
-#define I915G_FENCE_SIZE_1M 0x00000000
-#define I915G_FENCE_SIZE_2M 0x00000100
-#define I915G_FENCE_SIZE_4M 0x00000200
-#define I915G_FENCE_SIZE_8M 0x00000300
-#define I915G_FENCE_SIZE_16M 0x00000400
-#define I915G_FENCE_SIZE_32M 0x00000500
-#define I915G_FENCE_SIZE_64M 0x00000600
-#define I915G_FENCE_SIZE_128M 0x00000700
-#define FENCE_PITCH_1 0x00000000
-#define FENCE_PITCH_2 0x00000010
-#define FENCE_PITCH_4 0x00000020
-#define FENCE_PITCH_8 0x00000030
-#define FENCE_PITCH_16 0x00000040
-#define FENCE_PITCH_32 0x00000050
-#define FENCE_PITCH_64 0x00000060
-#define FENCE_VALID 0x00000001
-
-#include <mmio.h>
-
-# define MMIO_IN8(base, offset) \
- *(volatile unsigned char *)(((unsigned char*)(base)) + (offset))
-# define MMIO_IN32(base, offset) \
- read_MMIO_LE32(base, offset)
-# define MMIO_OUT8(base, offset, val) \
- *(volatile unsigned char *)(((unsigned char*)(base)) + (offset)) = (val)
-# define MMIO_OUT32(base, offset, val) \
- *(volatile unsigned int *)(void *)(((unsigned char*)(base)) + (offset)) = CPU_TO_LE32(val)
-
-
- /* Memory mapped register access macros */
-#define INREG8(addr) MMIO_IN8(MMIO, addr)
-#define INREG(addr) MMIO_IN32(MMIO, addr)
-#define OUTREG8(addr, val) MMIO_OUT8(MMIO, addr, val)
-#define OUTREG(addr, val) MMIO_OUT32(MMIO, addr, val)
-
-#define DSPABASE 0x70184
-
-#endif
diff --git a/src/mesa/drivers/dri/mach64/mach64_ioctl.h b/src/mesa/drivers/dri/mach64/mach64_ioctl.h
index 1ffda1932f1..9145ee6e6cf 100644
--- a/src/mesa/drivers/dri/mach64/mach64_ioctl.h
+++ b/src/mesa/drivers/dri/mach64/mach64_ioctl.h
@@ -32,6 +32,9 @@
#ifndef __MACH64_IOCTL_H__
#define __MACH64_IOCTL_H__
+#include <stdio.h>
+#include <stdlib.h>
+
#include "mach64_dri.h"
#include "mach64_reg.h"
#include "mach64_lock.h"
diff --git a/src/mesa/drivers/dri/mga/mgarender.c b/src/mesa/drivers/dri/mga/mgarender.c
index 8b8fc485d31..cc0cea618d1 100644
--- a/src/mesa/drivers/dri/mga/mgarender.c
+++ b/src/mesa/drivers/dri/mga/mgarender.c
@@ -44,6 +44,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/imports.h"
#include "main/mtypes.h"
+#include "math/m_xform.h"
+
#include "tnl/t_context.h"
#include "mgacontext.h"
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
index 8be7edb150b..bd1273beea7 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
@@ -220,7 +220,7 @@ get_tex_format(struct gl_texture_image *ti)
case MESA_FORMAT_RGB565:
return GL_RGB5;
default:
- assert(0);
+ return GL_NONE;
}
}
@@ -231,7 +231,6 @@ nouveau_render_texture(GLcontext *ctx, struct gl_framebuffer *fb,
struct gl_renderbuffer *rb = att->Renderbuffer;
struct gl_texture_image *ti =
att->Texture->Image[att->CubeMapFace][att->TextureLevel];
- int ret;
/* Allocate a renderbuffer object for the texture if we
* haven't already done so. */
@@ -244,9 +243,7 @@ nouveau_render_texture(GLcontext *ctx, struct gl_framebuffer *fb,
}
/* Update the renderbuffer fields from the texture. */
- ret = set_renderbuffer_format(rb, get_tex_format(ti));
- assert(ret);
-
+ set_renderbuffer_format(rb, get_tex_format(ti));
rb->Width = ti->Width;
rb->Height = ti->Height;
nouveau_surface_ref(&to_nouveau_teximage(ti)->surface,
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_texture.c b/src/mesa/drivers/dri/nouveau/nouveau_texture.c
index dbf9a5cc613..442f4e899ee 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_texture.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_texture.c
@@ -38,6 +38,7 @@
#include "main/mipmap.h"
#include "main/texfetch.h"
#include "main/teximage.h"
+#include "drivers/common/meta.h"
static struct gl_texture_object *
nouveau_texture_new(GLcontext *ctx, GLuint name, GLenum target)
@@ -182,10 +183,10 @@ teximage_fits(struct gl_texture_object *t, int level)
struct nouveau_surface *s = &to_nouveau_texture(t)->surfaces[level];
struct gl_texture_image *ti = t->Image[0][level];
- return ti && (t->Target == GL_TEXTURE_RECTANGLE ||
- (s->bo && s->width == ti->Width &&
- s->height == ti->Height &&
- s->format == ti->TexFormat));
+ return ti && to_nouveau_teximage(ti)->surface.bo &&
+ (t->Target == GL_TEXTURE_RECTANGLE ||
+ (s->bo && s->format == ti->TexFormat &&
+ s->width == ti->Width && s->height == ti->Height));
}
static GLboolean
@@ -589,6 +590,53 @@ nouveau_texture_unmap(GLcontext *ctx, struct gl_texture_object *t)
}
}
+static void
+store_mipmap(GLcontext *ctx, GLenum target, int first, int last,
+ struct gl_texture_object *t)
+{
+ struct gl_pixelstore_attrib packing = {
+ .BufferObj = ctx->Shared->NullBufferObj,
+ .Alignment = 1
+ };
+ GLenum format = t->Image[0][first]->TexFormat;
+ unsigned base_format, type, comps;
+ int i;
+
+ base_format = _mesa_get_format_base_format(format);
+ _mesa_format_to_type_and_comps(format, &type, &comps);
+
+ for (i = first; i <= last; i++) {
+ struct gl_texture_image *ti = t->Image[0][i];
+ void *data = ti->Data;
+
+ nouveau_teximage(ctx, 3, target, i, ti->InternalFormat,
+ ti->Width, ti->Height, ti->Depth,
+ ti->Border, base_format, type, data,
+ &packing, t, ti);
+
+ _mesa_free_texmemory(data);
+ }
+}
+
+static void
+nouveau_generate_mipmap(GLcontext *ctx, GLenum target,
+ struct gl_texture_object *t)
+{
+ if (_mesa_meta_check_generate_mipmap_fallback(ctx, target, t)) {
+ struct gl_texture_image *base = t->Image[0][t->BaseLevel];
+
+ nouveau_teximage_map(ctx, base);
+ _mesa_generate_mipmap(ctx, target, t);
+ nouveau_teximage_unmap(ctx, base);
+
+ store_mipmap(ctx, target, t->BaseLevel + 1,
+ get_last_level(t), t);
+
+ } else {
+ _mesa_meta_GenerateMipmap(ctx, target, t);
+ }
+}
+
void
nouveau_texture_functions_init(struct dd_function_table *functions)
{
@@ -607,4 +655,5 @@ nouveau_texture_functions_init(struct dd_function_table *functions)
functions->BindTexture = nouveau_bind_texture;
functions->MapTexture = nouveau_texture_map;
functions->UnmapTexture = nouveau_texture_unmap;
+ functions->GenerateMipmap = nouveau_generate_mipmap;
}
diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_fb.c b/src/mesa/drivers/dri/nouveau/nv20_state_fb.c
index 21da4f7af16..95691cad047 100644
--- a/src/mesa/drivers/dri/nouveau/nv20_state_fb.c
+++ b/src/mesa/drivers/dri/nouveau/nv20_state_fb.c
@@ -72,7 +72,7 @@ nv20_emit_framebuffer(GLcontext *ctx, int emit)
fb->_ColorDrawBuffers[0])->surface;
rt_format |= get_rt_format(s->format);
- zeta_pitch = rt_pitch = s->pitch;
+ rt_pitch = s->pitch;
nouveau_bo_markl(bctx, kelvin, NV20TCL_COLOR_OFFSET,
s->bo, 0, bo_flags);
@@ -88,6 +88,9 @@ nv20_emit_framebuffer(GLcontext *ctx, int emit)
nouveau_bo_markl(bctx, kelvin, NV20TCL_ZETA_OFFSET,
s->bo, 0, bo_flags);
+ } else {
+ rt_format |= get_rt_format(MESA_FORMAT_Z24_S8);
+ zeta_pitch = rt_pitch;
}
BEGIN_RING(chan, kelvin, NV20TCL_RT_FORMAT, 2);
diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_tex.c b/src/mesa/drivers/dri/nouveau/nv20_state_tex.c
index e46118e4fce..2d45513bb4c 100644
--- a/src/mesa/drivers/dri/nouveau/nv20_state_tex.c
+++ b/src/mesa/drivers/dri/nouveau/nv20_state_tex.c
@@ -194,7 +194,8 @@ nv20_emit_tex_obj(GLcontext *ctx, int emit)
| nvgl_wrap_mode(t->WrapS) << 0;
tx_filter = nvgl_filter_mode(t->MagFilter) << 24
- | nvgl_filter_mode(t->MinFilter) << 16;
+ | nvgl_filter_mode(t->MinFilter) << 16
+ | 2 << 12;
tx_enable = NV20TCL_TX_ENABLE_ENABLE
| log2i(t->MaxAnisotropy) << 4;
diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c
index 262fe3cddee..dbf4ad477db 100644
--- a/src/mesa/drivers/dri/r200/r200_swtcl.c
+++ b/src/mesa/drivers/dri/r200/r200_swtcl.c
@@ -612,6 +612,8 @@ static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ radeon_prepare_render(&rmesa->radeon);
+
if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
/* need to disable perspective-correct texturing for point sprites */
if ((hwprim & 0xf) == R200_VF_PRIM_POINT_SPRITES && ctx->Point.PointSprite) {
diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c
index d43e14581e9..4ae0f304918 100644
--- a/src/mesa/drivers/dri/r200/r200_tcl.c
+++ b/src/mesa/drivers/dri/r200/r200_tcl.c
@@ -264,6 +264,8 @@ void r200TclPrimitive( GLcontext *ctx,
r200ContextPtr rmesa = R200_CONTEXT(ctx);
GLuint newprim = hw_prim | R200_VF_TCL_OUTPUT_VTX_ENABLE;
+ radeon_prepare_render(&rmesa->radeon);
+
if (newprim != rmesa->tcl.hw_primitive ||
!discrete_prim[hw_prim&0xf]) {
/* need to disable perspective-correct texturing for point sprites */
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index a326ee4c4fa..d2fa816894c 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -109,13 +109,13 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
debug_program_log(c, "before compilation");
if (c->Base.is_r500){
- r500_transform_unroll_loops(&c->Base, &loop_state);
- debug_program_log(c, "after r500 transform loops");
+ rc_unroll_loops(&c->Base, R500_PFS_MAX_INST);
+ debug_program_log(c, "after unroll loops");
}
else{
- rc_transform_unroll_loops(&c->Base, &loop_state);
+ rc_transform_loops(&c->Base, &loop_state, -1);
debug_program_log(c, "after transform loops");
-
+
rc_emulate_branches(&c->Base);
debug_program_log(c, "after emulate branches");
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index d347b4df9cd..666c9c2a7a9 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -32,6 +32,11 @@
#include "radeon_emulate_branches.h"
#include "radeon_emulate_loops.h"
+struct loop {
+ int BgnLoop;
+
+};
+
/*
* Take an already-setup and valid source then swizzle it appropriately to
* obtain a constant ZERO or ONE source.
@@ -332,11 +337,140 @@ static void ei_pow(struct r300_vertex_program_code *vp,
inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
}
+static void mark_write(void * userdata, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ unsigned int * writemasks = userdata;
+
+ if (file != RC_FILE_TEMPORARY)
+ return;
+
+ if (index >= R300_VS_MAX_TEMPS)
+ return;
+
+ writemasks[index] |= mask;
+}
+
+static unsigned long t_pred_src(struct r300_vertex_program_compiler * compiler)
+{
+ return PVS_SRC_OPERAND(compiler->PredicateIndex,
+ t_swizzle(RC_SWIZZLE_ZERO),
+ t_swizzle(RC_SWIZZLE_ZERO),
+ t_swizzle(RC_SWIZZLE_ZERO),
+ t_swizzle(RC_SWIZZLE_W),
+ t_src_class(RC_FILE_TEMPORARY),
+ 0);
+}
+
+static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler,
+ unsigned int hw_opcode, int is_math)
+{
+ return PVS_OP_DST_OPERAND(hw_opcode,
+ is_math,
+ 0,
+ compiler->PredicateIndex,
+ RC_MASK_W,
+ t_dst_class(RC_FILE_TEMPORARY));
+
+}
+
+static void ei_if(struct r300_vertex_program_compiler * compiler,
+ struct rc_instruction *rci,
+ unsigned int * inst,
+ unsigned int branch_depth)
+{
+ unsigned int predicate_opcode;
+ int is_math = 0;
+
+ if (!compiler->Base.is_r500) {
+ rc_error(&compiler->Base,"Opcode IF not supported\n");
+ return;
+ }
+
+ /* Reserve a temporary to use as our predicate stack counter, if we
+ * don't already have one. */
+ if (!compiler->PredicateMask) {
+ unsigned int writemasks[R300_VS_MAX_TEMPS];
+ memset(writemasks, 0, sizeof(writemasks));
+ struct rc_instruction * inst;
+ unsigned int i;
+ for(inst = compiler->Base.Program.Instructions.Next;
+ inst != &compiler->Base.Program.Instructions;
+ inst = inst->Next) {
+ rc_for_all_writes_mask(inst, mark_write, writemasks);
+ }
+ for(i = 0; i < R300_VS_MAX_TEMPS; i++) {
+ unsigned int mask = ~writemasks[i] & RC_MASK_XYZW;
+ /* Only the W component can be used fo the predicate
+ * stack counter. */
+ if (mask & RC_MASK_W) {
+ compiler->PredicateMask = RC_MASK_W;
+ compiler->PredicateIndex = i;
+ break;
+ }
+ }
+ if (i == R300_VS_MAX_TEMPS) {
+ rc_error(&compiler->Base, "No free temporary to use for"
+ " predicate stack counter.\n");
+ return;
+ }
+ }
+ predicate_opcode =
+ branch_depth ? VE_PRED_SET_NEQ_PUSH : ME_PRED_SET_NEQ;
+
+ rci->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(rci->U.I.SrcReg[0].Swizzle,0));
+ if (branch_depth == 0) {
+ is_math = 1;
+ predicate_opcode = ME_PRED_SET_NEQ;
+ inst[1] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
+ inst[2] = 0;
+ } else {
+ predicate_opcode = VE_PRED_SET_NEQ_PUSH;
+ inst[1] = t_pred_src(compiler);
+ inst[2] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
+ }
+
+ inst[0] = t_pred_dst(compiler, predicate_opcode, is_math);
+ inst[3] = 0;
+
+}
+
+static void ei_else(struct r300_vertex_program_compiler * compiler,
+ unsigned int * inst)
+{
+ if (!compiler->Base.is_r500) {
+ rc_error(&compiler->Base,"Opcode ELSE not supported\n");
+ return;
+ }
+ inst[0] = t_pred_dst(compiler, ME_PRED_SET_INV, 1);
+ inst[1] = t_pred_src(compiler);
+ inst[2] = 0;
+ inst[3] = 0;
+}
+
+static void ei_endif(struct r300_vertex_program_compiler *compiler,
+ unsigned int * inst)
+{
+ if (!compiler->Base.is_r500) {
+ rc_error(&compiler->Base,"Opcode ENDIF not supported\n");
+ return;
+ }
+ inst[0] = t_pred_dst(compiler, ME_PRED_SET_POP, 1);
+ inst[1] = t_pred_src(compiler);
+ inst[2] = 0;
+ inst[3] = 0;
+}
static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
{
struct rc_instruction *rci;
+ struct loop * loops;
+ int current_loop_depth = 0;
+ int loops_reserved = 0;
+
+ unsigned int branch_depth = 0;
+
compiler->code->pos_end = 0; /* Not supported yet */
compiler->code->length = 0;
@@ -366,9 +500,12 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
+ case RC_OPCODE_ELSE: ei_else(compiler, inst); break;
+ case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break;
case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
+ case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break;
case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
@@ -385,11 +522,86 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break;
case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
+ case RC_OPCODE_BGNLOOP:
+ {
+ struct loop * l;
+
+ if ((!compiler->Base.is_r500
+ && loops_reserved >= R300_VS_MAX_LOOP_DEPTH)
+ || loops_reserved >= R500_VS_MAX_FC_DEPTH) {
+ rc_error(&compiler->Base,
+ "Loops are nested too deep.");
+ return;
+ }
+ memory_pool_array_reserve(&compiler->Base.Pool,
+ struct loop, loops, current_loop_depth,
+ loops_reserved, 1);
+ l = &loops[current_loop_depth++];
+ memset(l , 0, sizeof(struct loop));
+ l->BgnLoop = (compiler->code->length / 4);
+ continue;
+ }
+ case RC_OPCODE_ENDLOOP:
+ {
+ struct loop * l = &loops[current_loop_depth - 1];
+ unsigned int act_addr = l->BgnLoop - 1;
+ unsigned int last_addr = (compiler->code->length / 4) - 1;
+ unsigned int ret_addr = l->BgnLoop;
+
+ if (loops_reserved >= R300_VS_MAX_FC_OPS) {
+ rc_error(&compiler->Base,
+ "Too many flow control instructions.");
+ return;
+ }
+ if (compiler->Base.is_r500) {
+ compiler->code->fc_op_addrs.r500
+ [compiler->code->num_fc_ops].lw =
+ R500_PVS_FC_ACT_ADRS(act_addr)
+ | R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff)
+ ;
+ compiler->code->fc_op_addrs.r500
+ [compiler->code->num_fc_ops].uw =
+ R500_PVS_FC_LAST_INST(last_addr)
+ | R500_PVS_FC_RTN_INST(ret_addr)
+ ;
+ } else {
+ compiler->code->fc_op_addrs.r300
+ [compiler->code->num_fc_ops] =
+ R300_PVS_FC_ACT_ADRS(act_addr)
+ | R300_PVS_FC_LOOP_CNT_JMP_INST(0xff)
+ | R300_PVS_FC_LAST_INST(last_addr)
+ | R300_PVS_FC_RTN_INST(ret_addr)
+ ;
+ }
+ compiler->code->fc_loop_index[compiler->code->num_fc_ops] =
+ R300_PVS_FC_LOOP_INIT_VAL(0x0)
+ | R300_PVS_FC_LOOP_STEP_VAL(0x1)
+ ;
+ compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP(
+ compiler->code->num_fc_ops);
+ compiler->code->num_fc_ops++;
+ current_loop_depth--;
+ continue;
+ }
+
default:
rc_error(&compiler->Base, "Unknown opcode %s\n", rc_get_opcode_info(vpi->Opcode)->Name);
return;
}
+ /* Non-flow control instructions that are inside an if statement
+ * need to pay attention to the predicate bit. */
+ if (branch_depth
+ && vpi->Opcode != RC_OPCODE_IF
+ && vpi->Opcode != RC_OPCODE_ELSE
+ && vpi->Opcode != RC_OPCODE_ENDIF) {
+
+ inst[0] |= (PVS_DST_PRED_ENABLE_MASK
+ << PVS_DST_PRED_ENABLE_SHIFT);
+ inst[0] |= (PVS_DST_PRED_SENSE_MASK
+ << PVS_DST_PRED_SENSE_SHIFT);
+ }
+
compiler->code->length += 4;
if (compiler->Base.Error)
@@ -406,6 +618,7 @@ struct temporary_allocation {
static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
{
struct rc_instruction *inst;
+ struct rc_instruction *end_loop = NULL;
unsigned int num_orig_temps = 0;
char hwtemps[R300_VS_MAX_TEMPS];
struct temporary_allocation * ta;
@@ -440,10 +653,35 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
/* Pass 2: Determine original temporary lifetimes */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ /* Instructions inside of loops need to use the ENDLOOP
+ * instruction as their LastRead. */
+ if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ int endloops = 1;
+ struct rc_instruction * ptr;
+ for(ptr = inst->Next;
+ ptr != &compiler->Base.Program.Instructions;
+ ptr = ptr->Next){
+ if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ endloops++;
+ } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
+ endloops--;
+ if (endloops <= 0) {
+ end_loop = ptr;
+ break;
+ }
+ }
+ }
+ }
+
+ if (inst == end_loop) {
+ end_loop = NULL;
+ continue;
+ }
for (i = 0; i < opcode->NumSrcRegs; ++i) {
if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY)
- ta[inst->U.I.SrcReg[i].Index].LastRead = inst;
+ ta[inst->U.I.SrcReg[i].Index].LastRead =
+ end_loop ? end_loop : inst;
}
}
@@ -633,30 +871,24 @@ static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
{
struct emulate_loop_state loop_state;
-
+
compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
addArtificialOutputs(compiler);
debug_program_log(compiler, "before compilation");
- /* XXX Ideally this should be done only for r3xx, but since
- * we don't have branching support for r5xx, we use the emulation
- * on all chipsets. */
- rc_transform_unroll_loops(&compiler->Base, &loop_state);
-
- debug_program_log(compiler, "after transform loops");
-
- if (compiler->Base.is_r500){
- rc_emulate_loops(&loop_state, R500_VS_MAX_ALU);
- } else {
- rc_emulate_loops(&loop_state, R300_VS_MAX_ALU);
- }
- debug_program_log(compiler, "after emulate loops");
+ if (compiler->Base.is_r500)
+ rc_transform_loops(&compiler->Base, &loop_state, R500_VS_MAX_ALU);
+ else
+ rc_transform_loops(&compiler->Base, &loop_state, R300_VS_MAX_ALU);
- rc_emulate_branches(&compiler->Base);
+ debug_program_log(compiler, "after emulate loops");
- debug_program_log(compiler, "after emulate branches");
+ if (!compiler->Base.is_r500) {
+ rc_emulate_branches(&compiler->Base);
+ debug_program_log(compiler, "after emulate branches");
+ }
if (compiler->Base.is_r500) {
struct radeon_program_transformation transformations[] = {
@@ -718,6 +950,6 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
if (compiler->Base.Debug) {
fprintf(stderr, "Final vertex program code:\n");
- r300_vertex_program_dump(compiler->code);
+ r300_vertex_program_dump(compiler);
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
index 5800f1a78e1..e6009338e2e 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
@@ -20,7 +20,9 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
+#include "radeon_compiler.h"
#include "radeon_code.h"
+#include "../r300_reg.h"
#include <stdio.h>
@@ -133,6 +135,10 @@ static void r300_vs_op_dump(uint32_t op)
{
fprintf(stderr, " dst: %d%s op: ",
(op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]);
+ if ((op >> PVS_DST_PRED_ENABLE_SHIFT) & 0x1) {
+ fprintf(stderr, "PRED %u",
+ (op >> PVS_DST_PRED_SENSE_SHIFT) & 0x1);
+ }
if (op & 0x80) {
if (op & 0x1) {
fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n");
@@ -160,8 +166,9 @@ static void r300_vs_src_dump(uint32_t src)
r300_vs_swiz_debug[(src >> 22) & 0x7]);
}
-void r300_vertex_program_dump(struct r300_vertex_program_code * vs)
+void r300_vertex_program_dump(struct r300_vertex_program_compiler * c)
{
+ struct r300_vertex_program_code * vs = c->code;
unsigned instrcount = vs->length / 4;
unsigned i;
@@ -177,4 +184,21 @@ void r300_vertex_program_dump(struct r300_vertex_program_code * vs)
r300_vs_src_dump(vs->body.d[offset+1+src]);
}
}
+
+ fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops);
+ for(i = 0; i < vs->num_fc_ops; i++) {
+ switch((vs->fc_ops >> (i * 2)) & 0x3 ) {
+ case 0: fprintf(stderr, "NOP"); break;
+ case 1: fprintf(stderr, "JUMP"); break;
+ case 2: fprintf(stderr, "LOOP"); break;
+ case 3: fprintf(stderr, "JSR"); break;
+ }
+ if (c->Base.is_r500) {
+ fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x\n",
+ vs->fc_op_addrs.r500[i].uw,
+ vs->fc_op_addrs.r500[i].lw);
+ } else {
+ fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]);
+ }
+ }
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index e6b5522c5b9..80a120497e3 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -30,7 +30,6 @@
#include <stdio.h>
#include "../r300_reg.h"
-#include "radeon_emulate_loops.h"
/**
* Rewrite IF instructions to use the ALU result special register.
@@ -60,31 +59,6 @@ int r500_transform_IF(
return 1;
}
-/**
- * Rewrite loops to make them easier to emit. This is not a local
- * transformation, because it modifies and reorders an entire block of code.
- */
-void r500_transform_unroll_loops(struct radeon_compiler * c,
- struct emulate_loop_state *s)
-{
- int i;
-
- rc_transform_unroll_loops(c, s);
-
- for( i = s->LoopCount - 1; i >= 0; i-- ){
- struct rc_instruction * inst_continue;
- if(!s->Loops[i].EndLoop){
- continue;
- }
- /* Insert a continue instruction at the end of the loop. This
- * is required in order to emit loops correctly. */
- inst_continue = rc_insert_new_instruction(c,
- s->Loops[i].EndIf->Prev);
- inst_continue->U.I.Opcode = RC_OPCODE_CONTINUE;
- }
-
-}
-
static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
unsigned int relevant;
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
index 0d005a794ff..34173351f83 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
@@ -49,6 +49,4 @@ extern int r500_transform_IF(
struct rc_instruction * inst,
void* data);
-void r500_transform_unroll_loops(struct radeon_compiler * c,
- struct emulate_loop_state * s);
#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index 0bd8f0a239f..9b60e30f586 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -64,7 +64,16 @@ struct branch_info {
};
struct loop_info {
- int LoopStart;
+ int BgnLoop;
+
+ int BranchDepth;
+ int * Brks;
+ int BrkCount;
+ int BrkReserved;
+
+ int * Conts;
+ int ContCount;
+ int ContReserved;
};
struct emit_state {
@@ -368,6 +377,12 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
unsigned int newip = ++s->Code->inst_end;
+ /* Currently all loops use the same integer constant to intialize
+ * the loop variables. */
+ if(!s->Code->int_constants[0]) {
+ s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
+ s->Code->int_constant_count = 1;
+ }
s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
switch(inst->U.I.Opcode){
@@ -378,32 +393,77 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
loop = &s->Loops[s->CurrentLoopDepth++];
-
- /* We don't emit an instruction for BGNLOOP, so we need to
- * decrement the instruction counter, but first we need to
- * set LoopStart to the current value of inst_end, which
- * will end up being the first real instruction in the loop.*/
- loop->LoopStart = s->Code->inst_end--;
+ memset(loop, 0, sizeof(struct loop_info));
+ loop->BranchDepth = s->CurrentBranchDepth;
+ loop->BgnLoop = newip;
+
+ s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
+ | R500_FC_JUMP_FUNC(0x00)
+ | R500_FC_IGNORE_UNCOVERED
+ ;
break;
-
case RC_OPCODE_BRK:
- /* Don't emit an instruction for BRK */
- s->Code->inst_end--;
+ loop = &s->Loops[s->CurrentLoopDepth - 1];
+ memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
+ loop->BrkCount, loop->BrkReserved, 1);
+
+ loop->Brks[loop->BrkCount++] = newip;
+ s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
+ | R500_FC_JUMP_FUNC(0xff)
+ | R500_FC_B_OP1_DECR
+ | R500_FC_B_POP_CNT(
+ s->CurrentBranchDepth - loop->BranchDepth)
+ | R500_FC_IGNORE_UNCOVERED
+ ;
break;
- case RC_OPCODE_CONTINUE:
+ case RC_OPCODE_CONT:
loop = &s->Loops[s->CurrentLoopDepth - 1];
- s->Code->inst[newip].inst2 = R500_FC_OP_JUMP |
- R500_FC_JUMP_FUNC(0xff);
- s->Code->inst[newip].inst3 = R500_FC_JUMP_ADDR(loop->LoopStart);
+ memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
+ loop->ContCount, loop->ContReserved, 1);
+ loop->Conts[loop->ContCount++] = newip;
+ s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
+ | R500_FC_JUMP_FUNC(0xff)
+ | R500_FC_B_OP1_DECR
+ | R500_FC_B_POP_CNT(
+ s->CurrentBranchDepth - loop->BranchDepth)
+ | R500_FC_IGNORE_UNCOVERED
+ ;
break;
case RC_OPCODE_ENDLOOP:
- /* Don't emit an instruction for ENDLOOP */
- s->Code->inst_end--;
+ {
+ loop = &s->Loops[s->CurrentLoopDepth - 1];
+ /* Emit ENDLOOP */
+ s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
+ | R500_FC_JUMP_FUNC(0xff)
+ | R500_FC_JUMP_ANY
+ | R500_FC_IGNORE_UNCOVERED
+ ;
+ /* The constant integer at index 0 is used by all loops. */
+ s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
+ | R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
+ ;
+
+ /* Set jump address and int constant for BGNLOOP */
+ s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
+ | R500_FC_JUMP_ADDR(newip)
+ ;
+
+ /* Set jump address for the BRK instructions. */
+ while(loop->BrkCount--) {
+ s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
+ R500_FC_JUMP_ADDR(newip + 1);
+ }
+
+ /* Set jump address for CONT instructions. */
+ while(loop->ContCount--) {
+ s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
+ R500_FC_JUMP_ADDR(newip);
+ }
s->CurrentLoopDepth--;
break;
-
+ }
case RC_OPCODE_IF:
if ( s->CurrentBranchDepth >= MAX_BRANCH_DEPTH_FULL) {
rc_error(s->C, "Branch depth exceeds hardware limit");
@@ -442,24 +502,16 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
}
branch = &s->Branches[s->CurrentBranchDepth - 1];
-
- if(inst->Prev->U.I.Opcode == RC_OPCODE_BRK){
- branch->Endif = --s->Code->inst_end;
- s->Code->inst[branch->Endif].inst2 |=
- R500_FC_B_OP0_DECR;
- }
- else{
- branch->Endif = newip;
-
- s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
- | R500_FC_A_OP_NONE /* no address stack */
- | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
- | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
- | R500_FC_B_OP1_NONE /* no branch counter if stay */
- | R500_FC_B_POP_CNT(1)
+ branch->Endif = newip;
+
+ s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
+ | R500_FC_A_OP_NONE /* no address stack */
+ | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
+ | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
+ | R500_FC_B_OP1_NONE /* no branch counter if stay */
+ | R500_FC_B_POP_CNT(1)
;
- s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
- }
+ s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
| R500_FC_A_OP_NONE /* no address stack */
| R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
@@ -544,11 +596,9 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
}
- /* Use FULL flow control mode if branches are nested deep enough.
- * We don not need to enable FULL flow control mode for loops, becasue
- * we aren't using the hardware loop instructions.
- */
- if (s.MaxBranchDepth >= 4) {
+ /* Enable full flow control mode if we are using loops or have if
+ * statements nested at least four deep. */
+ if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
if (code->max_temp_idx < 1)
code->max_temp_idx = 1;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index d03689763bc..896246d2035 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -221,6 +221,9 @@ struct r500_fragment_program_code {
int max_temp_idx;
uint32_t us_fc_ctrl;
+
+ uint32_t int_constants[32];
+ uint32_t int_constant_count;
};
struct rX00_fragment_program_code {
@@ -240,6 +243,12 @@ struct rX00_fragment_program_code {
#define R500_VS_MAX_ALU 1024
#define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4)
#define R300_VS_MAX_TEMPS 32
+/* This is the max for all chipsets (r300-r500) */
+#define R300_VS_MAX_FC_OPS 16
+/* The r500 maximum depth is not just for loops, but any combination of loops
+ * and subroutine jumps. */
+#define R500_VS_MAX_FC_DEPTH 8
+#define R300_VS_MAX_LOOP_DEPTH 1
#define VSF_MAX_INPUTS 32
#define VSF_MAX_OUTPUTS 32
@@ -260,9 +269,18 @@ struct r300_vertex_program_code {
uint32_t InputsRead;
uint32_t OutputsWritten;
-};
-void r300_vertex_program_dump(struct r300_vertex_program_code * vs);
+ unsigned int num_fc_ops;
+ uint32_t fc_ops;
+ union {
+ uint32_t r300[R300_VS_MAX_FC_OPS];
+ struct {
+ uint32_t lw;
+ uint32_t uw;
+ } r500[R300_VS_MAX_FC_OPS];
+ } fc_op_addrs;
+ int32_t fc_loop_index[R300_VS_MAX_FC_OPS];
+};
#endif /* RADEON_CODE_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
index 1c8ba864a41..935dc9b0a80 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -307,3 +307,46 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig
}
}
+
+/**
+ * The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
+ * Gallium and OpenGL define it the other way around.
+ *
+ * So let's just negate FACE at the beginning of the shader and rewrite the rest
+ * of the shader to read from the newly allocated temporary.
+ */
+void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
+{
+ unsigned tempregi = rc_find_free_temporary(c);
+ struct rc_instruction *inst_add;
+ struct rc_instruction *inst;
+
+ /* perspective divide */
+ inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
+ inst_add->U.I.Opcode = RC_OPCODE_ADD;
+
+ inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_add->U.I.DstReg.Index = tempregi;
+ inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
+
+ inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
+ inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
+
+ inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
+ inst_add->U.I.SrcReg[1].Index = face;
+ inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
+ inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
+
+ for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned i;
+
+ for(i = 0; i < opcode->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
+ inst->U.I.SrcReg[i].Index == face) {
+ inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[i].Index = tempregi;
+ }
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index f15905d79d4..7c42eb3ae57 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -81,6 +81,7 @@ void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_ou
void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output);
void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
int full_vtransform);
+void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face);
struct r300_fragment_program_compiler {
struct radeon_compiler Base;
@@ -110,8 +111,12 @@ struct r300_vertex_program_compiler {
void * UserData;
void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c);
+
+ int PredicateIndex;
+ unsigned int PredicateMask;
};
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
+void r300_vertex_program_dump(struct r300_vertex_program_compiler * c);
#endif /* RADEON_COMPILER_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
index fbb4235c223..faf531b412e 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
@@ -43,6 +43,12 @@ struct instruction_state {
unsigned char SrcReg[3];
};
+struct loopinfo {
+ struct updatemask_state * Breaks;
+ unsigned int BreakCount;
+ unsigned int BreaksReserved;
+};
+
struct branchinfo {
unsigned int HaveElse:1;
@@ -59,6 +65,10 @@ struct deadcode_state {
struct branchinfo * BranchStack;
unsigned int BranchStackSize;
unsigned int BranchStackReserved;
+
+ struct loopinfo * LoopStack;
+ unsigned int LoopStackSize;
+ unsigned int LoopStackReserved;
};
@@ -78,6 +88,22 @@ static void or_updatemasks(
dst->Address = a->Address | b->Address;
}
+static void push_break(struct deadcode_state *s)
+{
+ struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1];
+ memory_pool_array_reserve(&s->C->Pool, struct updatemask_state,
+ loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1);
+
+ memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R));
+}
+
+static void push_loop(struct deadcode_state * s)
+{
+ memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack,
+ s->LoopStackSize, s->LoopStackReserved, 1);
+ memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
+}
+
static void push_branch(struct deadcode_state * s)
{
memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
@@ -233,11 +259,22 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f
}
}
}
+ push_loop(&s);
break;
}
- case RC_OPCODE_CONTINUE:
case RC_OPCODE_BRK:
+ push_break(&s);
+ break;
case RC_OPCODE_BGNLOOP:
+ {
+ unsigned int i;
+ struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1];
+ for(i = 0; i < loop->BreakCount; i++) {
+ or_updatemasks(&s.R, &s.R, &loop->Breaks[i]);
+ }
+ break;
+ }
+ case RC_OPCODE_CONT:
break;
case RC_OPCODE_ENDIF:
push_branch(&s);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
index 131e9e7436d..32d4b45dd6d 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
@@ -39,7 +39,6 @@
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
struct const_value {
-
struct radeon_compiler * C;
struct rc_src_register * Src;
float Value;
@@ -78,17 +77,17 @@ static int src_reg_is_immediate(struct rc_src_register * src,
c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE;
}
-static unsigned int loop_calc_iterations(struct emulate_loop_state *s,
- struct loop_info * loop, unsigned int max_instructions)
+static unsigned int loop_max_possible_iterations(struct radeon_compiler *c,
+ struct loop_info * loop, unsigned int prog_inst_limit)
{
- unsigned int total_i = rc_recompute_ips(s->C);
+ unsigned int total_i = rc_recompute_ips(c);
unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1;
/* +1 because the program already has one iteration of the loop. */
- return 1 + ((max_instructions - total_i) / (s->LoopCount * loop_i));
+ return 1 + ((prog_inst_limit - total_i) / loop_i);
}
-static void loop_unroll(struct emulate_loop_state * s,
- struct loop_info *loop, unsigned int iterations)
+static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
+ unsigned int iterations)
{
unsigned int i;
struct rc_instruction * ptr;
@@ -99,7 +98,7 @@ static void loop_unroll(struct emulate_loop_state * s,
rc_remove_instruction(loop->EndLoop);
for( i = 1; i < iterations; i++){
for(ptr = first; ptr != last->Next; ptr = ptr->Next){
- struct rc_instruction *new = rc_alloc_instruction(s->C);
+ struct rc_instruction *new = rc_alloc_instruction(c);
memcpy(new, ptr, sizeof(struct rc_instruction));
rc_insert_instruction(append_to, new);
append_to = new;
@@ -115,7 +114,7 @@ static void update_const_value(void * data, struct rc_instruction * inst,
if(value->Src->File != file ||
value->Src->Index != index ||
!(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){
- return;
+ return;
}
switch(inst->U.I.Opcode){
case RC_OPCODE_MOV:
@@ -140,7 +139,7 @@ static void get_incr_amount(void * data, struct rc_instruction * inst,
if(file != RC_FILE_TEMPORARY ||
count_inst->Index != index ||
(1 << GET_SWZ(count_inst->Swz,0) != mask)){
- return;
+ return;
}
/* Find the index of the counter register. */
opcode = rc_get_opcode_info(inst->U.I.Opcode);
@@ -185,13 +184,16 @@ static void get_incr_amount(void * data, struct rc_instruction * inst,
count_inst->Unknown = 1;
return;
}
-
}
-static int transform_const_loop(struct emulate_loop_state * s,
- struct loop_info * loop)
+/**
+ * If prog_inst_limit is -1, then all eligible loops will be unrolled regardless
+ * of how many iterations they have.
+ */
+static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
+ unsigned int prog_inst_limit)
{
- int end_loops = 1;
+ int end_loops;
int iterations;
struct count_inst count_inst;
float limit_value;
@@ -201,12 +203,12 @@ static int transform_const_loop(struct emulate_loop_state * s,
struct rc_instruction * inst;
/* Find the counter and the upper limit */
-
- if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], s->C)){
+
+ if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], c)){
limit = &loop->Cond->U.I.SrcReg[0];
counter = &loop->Cond->U.I.SrcReg[1];
}
- else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], s->C)){
+ else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], c)){
limit = &loop->Cond->U.I.SrcReg[1];
counter = &loop->Cond->U.I.SrcReg[0];
}
@@ -214,13 +216,13 @@ static int transform_const_loop(struct emulate_loop_state * s,
DBG("No constant limit.\n");
return 0;
}
-
+
/* Find the initial value of the counter */
counter_value.Src = counter;
counter_value.Value = 0.0f;
counter_value.HasValue = 0;
- counter_value.C = s->C;
- for(inst = s->C->Program.Instructions.Next; inst != loop->BeginLoop;
+ counter_value.C = c;
+ for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop;
inst = inst->Next){
rc_for_all_writes_mask(inst, update_const_value, &counter_value);
}
@@ -230,11 +232,12 @@ static int transform_const_loop(struct emulate_loop_state * s,
}
DBG("Initial counter value is %f\n", counter_value.Value);
/* Determine how the counter is modified each loop */
- count_inst.C = s->C;
+ count_inst.C = c;
count_inst.Index = counter->Index;
count_inst.Swz = counter->Swizzle;
count_inst.Amount = 0.0f;
count_inst.Unknown = 0;
+ end_loops = 1;
for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){
switch(inst->U.I.Opcode){
/* XXX In the future we might want to try to unroll nested
@@ -246,6 +249,16 @@ static int transform_const_loop(struct emulate_loop_state * s,
loop->EndLoop = inst;
end_loops--;
break;
+ case RC_OPCODE_BRK:
+ /* Don't unroll loops if it has a BRK instruction
+ * other one used when testing the main conditional
+ * of the loop. */
+
+ /* Make sure we haven't entered a nested loops. */
+ if(inst != loop->Brk && end_loops == 1) {
+ return 0;
+ }
+ break;
/* XXX Check if the counter is modified within an if statement.
*/
case RC_OPCODE_IF:
@@ -266,17 +279,20 @@ static int transform_const_loop(struct emulate_loop_state * s,
/* Calculate the number of iterations of this loop. Keeping this
* simple, since we only support increment and decrement loops.
*/
- limit_value = get_constant_value(s->C, limit, 0);
+ limit_value = get_constant_value(c, limit, 0);
DBG("Limit is %f.\n", limit_value);
+ /* The iteration calculations are opposite of what you would expect.
+ * In a normal loop, if the condition is met, then loop continues, but
+ * with our loops, if the condition is met, the is exited. */
switch(loop->Cond->U.I.Opcode){
- case RC_OPCODE_SGT:
- case RC_OPCODE_SLT:
+ case RC_OPCODE_SGE:
+ case RC_OPCODE_SLE:
iterations = (int) ceilf((limit_value - counter_value.Value) /
count_inst.Amount);
break;
- case RC_OPCODE_SLE:
- case RC_OPCODE_SGE:
+ case RC_OPCODE_SGT:
+ case RC_OPCODE_SLT:
iterations = (int) floorf((limit_value - counter_value.Value) /
count_inst.Amount) + 1;
break;
@@ -284,77 +300,85 @@ static int transform_const_loop(struct emulate_loop_state * s,
return 0;
}
+ if (prog_inst_limit > 0
+ && iterations > loop_max_possible_iterations(c, loop,
+ prog_inst_limit)) {
+ return 0;
+ }
+
DBG("Loop will have %d iterations.\n", iterations);
-
+
/* Prepare loop for unrolling */
rc_remove_instruction(loop->Cond);
rc_remove_instruction(loop->If);
rc_remove_instruction(loop->Brk);
rc_remove_instruction(loop->EndIf);
-
- loop_unroll(s, loop, iterations);
+
+ unroll_loop(c, loop, iterations);
loop->EndLoop = NULL;
return 1;
}
-/**
- * This function prepares a loop to be unrolled by converting it into an if
- * statement. Here is an outline of the conversion process:
- * BGNLOOP; -> BGNLOOP;
- * <Additional conditional code> -> <Additional conditional code>
- * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2];
- * IF temp[0]; -> IF temp[0];
- * BRK; ->
- * ENDIF; -> <Loop Body>
- * <Loop Body> -> ENDIF;
- * ENDLOOP; -> ENDLOOP
- *
+/**
+ * @param c
+ * @param loop
* @param inst A pointer to a BGNLOOP instruction.
- * @return If the loop can be unrolled, a pointer to the first instruction of
- * the unrolled loop.
- * Otherwise, A pointer to the ENDLOOP instruction.
- * Null if there is an error.
+ * @return 1 if all of the members of loop where set.
+ * @return 0 if there was an error and some members of loop are still NULL.
*/
-static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
+static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop,
struct rc_instruction * inst)
{
- struct loop_info *loop;
struct rc_instruction * ptr;
- memory_pool_array_reserve(&s->C->Pool, struct loop_info,
- s->Loops, s->LoopCount, s->LoopReserved, 1);
-
- loop = &s->Loops[s->LoopCount++];
- memset(loop, 0, sizeof(struct loop_info));
if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){
- rc_error(s->C, "expected BGNLOOP\n", __FUNCTION__);
- return NULL;
+ rc_error(c, "%s: expected BGNLOOP", __FUNCTION__);
+ return 0;
}
+
+ memset(loop, 0, sizeof(struct loop_info));
+
loop->BeginLoop = inst;
- for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next){
+ for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) {
+
+ if (ptr == &c->Program.Instructions) {
+ rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n",
+ __FUNCTION__);
+ return 0;
+ }
+
switch(ptr->U.I.Opcode){
case RC_OPCODE_BGNLOOP:
- /* Nested loop */
- ptr = transform_loop(s, ptr);
- if(!ptr){
- return NULL;
+ {
+ /* Nested loop, skip ahead to the end. */
+ unsigned int loop_depth = 1;
+ for(ptr = ptr->Next; ptr != &c->Program.Instructions;
+ ptr = ptr->Next){
+ if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ loop_depth++;
+ } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
+ if (!--loop_depth) {
+ break;
+ }
+ }
+ }
+ if (ptr == &c->Program.Instructions) {
+ rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n",
+ __FUNCTION__);
+ return 0;
}
break;
+ }
case RC_OPCODE_BRK:
- loop->Brk = ptr;
- if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF){
- rc_error(s->C,
- "%s: expected ENDIF\n",__FUNCTION__);
- return NULL;
- }
- loop->EndIf = ptr->Next;
- if(ptr->Prev->U.I.Opcode != RC_OPCODE_IF){
- rc_error(s->C,
- "%s: expected IF\n", __FUNCTION__);
- return NULL;
+ if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF
+ || ptr->Prev->U.I.Opcode != RC_OPCODE_IF
+ || loop->Brk){
+ continue;
}
+ loop->Brk = ptr;
loop->If = ptr->Prev;
+ loop->EndIf = ptr->Next;
switch(loop->If->Prev->U.I.Opcode){
case RC_OPCODE_SLT:
case RC_OPCODE_SGE:
@@ -364,18 +388,58 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
case RC_OPCODE_SNE:
break;
default:
- rc_error(s->C, "%s expected conditional\n",
+ rc_error(c, "%s: expected conditional",
__FUNCTION__);
- return NULL;
+ return 0;
}
loop->Cond = loop->If->Prev;
- ptr = loop->EndIf;
break;
+
case RC_OPCODE_ENDLOOP:
loop->EndLoop = ptr;
break;
}
}
+
+ if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf
+ && loop->Cond && loop->EndLoop) {
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * This function prepares a loop to be unrolled by converting it into an if
+ * statement. Here is an outline of the conversion process:
+ * BGNLOOP; -> BGNLOOP;
+ * <Additional conditional code> -> <Additional conditional code>
+ * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2];
+ * IF temp[0]; -> IF temp[0];
+ * BRK; ->
+ * ENDIF; -> <Loop Body>
+ * <Loop Body> -> ENDIF;
+ * ENDLOOP; -> ENDLOOP
+ *
+ * @param inst A pointer to a BGNLOOP instruction.
+ * @return 1 for success, 0 for failure
+ */
+static int transform_loop(struct emulate_loop_state * s,
+ struct rc_instruction * inst)
+{
+ struct loop_info * loop;
+
+ memory_pool_array_reserve(&s->C->Pool, struct loop_info,
+ s->Loops, s->LoopCount, s->LoopReserved, 1);
+
+ loop = &s->Loops[s->LoopCount++];
+
+ if (!build_loop_info(s->C, loop, inst))
+ return 0;
+
+ if(try_unroll_loop(s->C, loop, s->prog_inst_limit)){
+ return 1;
+ }
+
/* Reverse the conditional instruction */
switch(loop->Cond->U.I.Opcode){
case RC_OPCODE_SGE:
@@ -398,43 +462,51 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
break;
default:
rc_error(s->C, "loop->Cond is not a conditional.\n");
- return NULL;
- }
-
- /* Check if the number of loops is known at compile time. */
- if(transform_const_loop(s, loop)){
- return loop->BeginLoop->Next;
+ return 0;
}
- /* Prepare the loop to be unrolled */
+ /* Prepare the loop to be emulated */
rc_remove_instruction(loop->Brk);
rc_remove_instruction(loop->EndIf);
rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf);
- return loop->EndLoop;
+ return 1;
}
-void rc_transform_unroll_loops(struct radeon_compiler *c,
- struct emulate_loop_state * s)
+void rc_transform_loops(struct radeon_compiler *c,
+ struct emulate_loop_state * s, int prog_inst_limit)
{
struct rc_instruction * ptr;
-
+
memset(s, 0, sizeof(struct emulate_loop_state));
s->C = c;
- ptr = s->C->Program.Instructions.Next;
- while(ptr != &s->C->Program.Instructions) {
+ s->prog_inst_limit = prog_inst_limit;
+ for(ptr = s->C->Program.Instructions.Next;
+ ptr != &s->C->Program.Instructions; ptr = ptr->Next) {
if(ptr->Type == RC_INSTRUCTION_NORMAL &&
ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
- ptr = transform_loop(s, ptr);
- if(!ptr){
+ if (!transform_loop(s, ptr))
return;
+ }
+ }
+}
+
+void rc_unroll_loops(struct radeon_compiler *c, int prog_inst_limit)
+{
+ struct rc_instruction * inst;
+ struct loop_info loop;
+
+ for(inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions; inst = inst->Next) {
+
+ if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ if (build_loop_info(c, &loop, inst)) {
+ try_unroll_loop(c, &loop, prog_inst_limit);
}
}
- ptr = ptr->Next;
}
}
-void rc_emulate_loops(struct emulate_loop_state *s,
- unsigned int max_instructions)
+void rc_emulate_loops(struct emulate_loop_state *s, int prog_inst_limit)
{
int i;
/* Iterate backwards of the list of loops so that loops that nested
@@ -444,8 +516,8 @@ void rc_emulate_loops(struct emulate_loop_state *s,
if(!s->Loops[i].EndLoop){
continue;
}
- unsigned int iterations = loop_calc_iterations(s, &s->Loops[i],
- max_instructions);
- loop_unroll(s, &s->Loops[i], iterations);
+ unsigned int iterations = loop_max_possible_iterations(
+ s->C, &s->Loops[i], prog_inst_limit);
+ unroll_loop(s->C, &s->Loops[i], iterations);
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
index 7748813c4eb..bba1f68e308 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
@@ -21,12 +21,14 @@ struct emulate_loop_state {
struct loop_info * Loops;
unsigned int LoopCount;
unsigned int LoopReserved;
+ int prog_inst_limit;
};
-void rc_transform_unroll_loops(struct radeon_compiler *c,
- struct emulate_loop_state * s);
+void rc_transform_loops(struct radeon_compiler *c,
+ struct emulate_loop_state * s, int prog_inst_limit);
-void rc_emulate_loops(struct emulate_loop_state *s,
- unsigned int max_instructions);
+void rc_unroll_loops(struct radeon_compiler * c, int prog_inst_limit);
+
+void rc_emulate_loops(struct emulate_loop_state * s, int prog_inst_limit);
#endif /* RADEON_EMULATE_LOOPS_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
index 04f234f11d8..2ea830be7f9 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -386,8 +386,8 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.NumSrcRegs = 0,
},
{
- .Opcode = RC_OPCODE_CONTINUE,
- .Name = "CONTINUE",
+ .Opcode = RC_OPCODE_CONT,
+ .Name = "CONT",
.IsFlowControl = 1,
.NumSrcRegs = 0
},
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
index 8b9fa07dde2..6e18d6eb3f1 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -187,7 +187,7 @@ typedef enum {
RC_OPCODE_ENDLOOP,
- RC_OPCODE_CONTINUE,
+ RC_OPCODE_CONT,
/** special instruction, used in R300-R500 fragment program pair instructions
* indicates that the result of the alpha operation shall be replicated
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index eca06515367..7a3f35950a6 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -164,7 +164,8 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo
inst = inst->Next) {
/* XXX In the future we might be able to make the optimizer
* smart enough to handle loops. */
- if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP){
+ if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP
+ || inst->U.I.Opcode == RC_OPCODE_ENDLOOP){
return;
}
rc_for_all_reads_mask(inst, peephole_scan_read, &s);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
index 8a912da4613..ce72cd97ab2 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
@@ -65,6 +65,11 @@ struct regalloc_state {
struct hardware_register * HwTemporary;
unsigned int NumHwTemporaries;
+ /**
+ * If an instruction is inside of a loop, end_loop will be the
+ * IP of the ENDLOOP instruction, otherwise end_loop will be 0
+ */
+ int end_loop;
};
static void print_live_intervals(struct live_intervals * src)
@@ -178,10 +183,10 @@ static void scan_callback(void * data, struct rc_instruction * inst,
else
reg->Live.Start = inst->IP;
reg->Live.End = inst->IP;
- } else {
- if (inst->IP > reg->Live.End)
- reg->Live.End = inst->IP;
- }
+ } else if (s->end_loop)
+ reg->Live.End = s->end_loop;
+ else if (inst->IP > reg->Live.End)
+ reg->Live.End = inst->IP;
}
static void compute_live_intervals(struct regalloc_state * s)
@@ -191,6 +196,31 @@ static void compute_live_intervals(struct regalloc_state * s)
for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
inst != &s->C->Program.Instructions;
inst = inst->Next) {
+
+ /* For all instructions inside of a loop, the ENDLOOP
+ * instruction is used as the end of the live interval. */
+ if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && !s->end_loop) {
+ int loops = 1;
+ struct rc_instruction * tmp;
+ for(tmp = inst->Next;
+ tmp != &s->C->Program.Instructions;
+ tmp = tmp->Next) {
+ if (tmp->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ loops++;
+ break;
+ } else if (tmp->U.I.Opcode
+ == RC_OPCODE_ENDLOOP) {
+ if(!--loops) {
+ s->end_loop = tmp->IP;
+ break;
+ }
+ }
+ }
+ }
+
+ if (inst->IP == s->end_loop)
+ s->end_loop = 0;
+
rc_for_all_reads_mask(inst, scan_callback, s);
rc_for_all_writes_mask(inst, scan_callback, s);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index 3cc28972934..857aae55145 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -988,17 +988,22 @@ void radeonTransformKILP(struct radeon_compiler * c)
for (inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions; inst = inst->Next) {
- if (inst->U.I.Opcode != RC_OPCODE_KILP
- || inst->Prev->U.I.Opcode != RC_OPCODE_IF
- || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) {
+ if (inst->U.I.Opcode != RC_OPCODE_KILP)
continue;
- }
+
inst->U.I.Opcode = RC_OPCODE_KIL;
- inst->U.I.SrcReg[0] = negate(absolute(inst->Prev->U.I.SrcReg[0]));
- /* Remove IF */
- rc_remove_instruction(inst->Prev);
- /* Remove ENDIF */
- rc_remove_instruction(inst->Next);
+ if (inst->Prev->U.I.Opcode != RC_OPCODE_IF
+ || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) {
+ inst->U.I.SrcReg[0] = negate(builtin_one);
+ } else {
+
+ inst->U.I.SrcReg[0] =
+ negate(absolute(inst->Prev->U.I.SrcReg[0]));
+ /* Remove IF */
+ rc_remove_instruction(inst->Prev);
+ /* Remove ENDIF */
+ rc_remove_instruction(inst->Next);
+ }
}
}
diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index e4b302bbad9..3d2f8928fa6 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -461,7 +461,7 @@ static void r300InitGLExtensions(GLcontext *ctx)
if (!r300->radeon.radeonScreen->drmSupportsOcclusionQueries) {
_mesa_disable_extension(ctx, "GL_ARB_occlusion_query");
}
- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV350)
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_R420)
_mesa_enable_extension(ctx, "GL_ARB_half_float_vertex");
if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
index f25264b6f2d..f7705b0f6fe 100644
--- a/src/mesa/drivers/dri/r300/r300_reg.h
+++ b/src/mesa/drivers/dri/r300/r300_reg.h
@@ -441,6 +441,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228
#define R300_VAP_GB_HORZ_DISC_ADJ 0x222c
+#define R300_VAP_PVS_FLOW_CNTL_ADDRS_0 0x2230
+#define R300_PVS_FC_ACT_ADRS(x) ((x) << 0)
+#define R300_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 8)
+#define R300_PVS_FC_LAST_INST(x) ((x) << 16)
+#define R300_PVS_FC_RTN_INST(x) ((x) << 24)
+
/* gap */
/* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between
@@ -459,6 +465,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_2288_R300 0x00750000 /* -- nh */
# define R300_2288_RV350 0x0000FFFF /* -- Vladimir */
+#define R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0 0x2290
+#define R300_PVS_FC_LOOP_INIT_VAL(x) ((x) << 0)
+#define R300_PVS_FC_LOOP_STEP_VAL(x) ((x) << 8)
+
/* gap */
/* Addresses are relative to the vertex program instruction area of the
@@ -489,6 +499,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R300_VAP_PVS_CODE_CNTL_1 0x22D8
# define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0
#define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC
+#define R300_VAP_PVS_FC_OPC_JUMP(x) (1 << (2 * (x)))
+#define R300_VAP_PVS_FC_OPC_LOOP(x) (2 << (2 * (x)))
+#define R300_VAP_PVS_FC_OPC_JSR(x) (3 << (2 * (x)))
/* The entire range from 0x2300 to 0x2AC inclusive seems to be used for
* immediate vertices
@@ -505,6 +518,14 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* write 0 to indicate end of packet? */
#define R300_VAP_VTX_END_OF_PKT 0x24AC
+#define R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0 0x2500
+#define R500_PVS_FC_ACT_ADRS(x) ((x) << 0)
+#define R500_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 16)
+
+#define R500_VAP_PVS_FLOW_CNTL_ADDRS_UW_0 0x2504
+#define R500_PVS_FC_LAST_INST(x) ((x) << 0)
+#define R500_PVS_FC_RTN_INST(x) ((x) << 16)
+
/* gap */
/* These are values from r300_reg/r300_reg.h - they are known to be correct
diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c
index bb8f91491f5..cf89ab7ec3d 100644
--- a/src/mesa/drivers/dri/r300/r300_render.c
+++ b/src/mesa/drivers/dri/r300/r300_render.c
@@ -327,6 +327,8 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
BATCH_LOCALS(&rmesa->radeon);
int type, num_verts;
+ radeon_prepare_render(&rmesa->radeon);
+
type = r300PrimitiveType(rmesa, prim);
num_verts = r300NumVerts(rmesa, end - start, prim);
diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
index 4ba6740e3d9..94588698265 100644
--- a/src/mesa/drivers/dri/r300/r300_texstate.c
+++ b/src/mesa/drivers/dri/r300/r300_texstate.c
@@ -152,8 +152,8 @@ int32_t r300TranslateTexFormat(gl_format mesaFormat)
case MESA_FORMAT_Z32:
return R300_EASY_TX_FORMAT(X, X, X, X, X32);
/* EXT_texture_sRGB */
- case MESA_FORMAT_SRGBA8:
- return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8) | R300_TX_FORMAT_GAMMA;
+ case MESA_FORMAT_SARGB8:
+ return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8) | R300_TX_FORMAT_GAMMA;
case MESA_FORMAT_SLA8:
return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8) | R300_TX_FORMAT_GAMMA;
case MESA_FORMAT_SL8:
diff --git a/src/mesa/drivers/dri/r600/r600_blit.c b/src/mesa/drivers/dri/r600/r600_blit.c
index 172f85eb264..27acff9c166 100644
--- a/src/mesa/drivers/dri/r600/r600_blit.c
+++ b/src/mesa/drivers/dri/r600/r600_blit.c
@@ -72,7 +72,7 @@ unsigned r600_check_blit(gl_format mesa_format)
case MESA_FORMAT_Z24_S8:
case MESA_FORMAT_Z16:
case MESA_FORMAT_Z32:
- case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
case MESA_FORMAT_SLA8:
case MESA_FORMAT_SL8:
break;
@@ -320,9 +320,9 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma
CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
break;
- case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
format = COLOR_8_8_8_8;
- comp_swap = SWAP_STD_REV;
+ comp_swap = SWAP_ALT;
SETbit(cb_color0_info, SOURCE_FORMAT_bit);
SETfield(cb_color0_info, NUMBER_SRGB, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
break;
@@ -390,13 +390,20 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma
0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
END_BATCH();
- BEGIN_BATCH_NO_AUTOSTATE(12);
+ BEGIN_BATCH_NO_AUTOSTATE(9);
R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), cb_color0_size);
R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), cb_color0_view);
- R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), cb_color0_info);
R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), 0);
END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), cb_color0_info);
+ R600_OUT_BATCH_RELOC(0,
+ bo,
+ 0,
+ 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
+ END_BATCH();
+
COMMIT_BATCH();
}
@@ -1043,17 +1050,17 @@ set_tex_resource(context_t * context,
SETfield(sq_tex_resource4, SQ_SEL_X,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
- case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
SETfield(sq_tex_resource1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- SETfield(sq_tex_resource4, SQ_SEL_W,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(sq_tex_resource4, SQ_SEL_Z,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(sq_tex_resource4, SQ_SEL_Y,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_W,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
SETbit(sq_tex_resource4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
break;
@@ -1477,7 +1484,6 @@ set_default_state(context_t *context)
(CLRCMP_SEL_SRC << CLRCMP_FCN_SEL_shift));
R600_OUT_BATCH_REGVAL(SQ_VTX_BASE_VTX_LOC, 0);
R600_OUT_BATCH_REGVAL(SQ_VTX_START_INST_LOC, 0);
- R600_OUT_BATCH_REGVAL(DB_DEPTH_INFO, 0);
R600_OUT_BATCH_REGVAL(DB_DEPTH_CONTROL, 0);
R600_OUT_BATCH_REGVAL(CB_SHADER_MASK, (OUTPUT0_ENABLE_mask));
R600_OUT_BATCH_REGVAL(CB_TARGET_MASK, (TARGET0_ENABLE_mask));
@@ -1526,6 +1532,7 @@ set_default_state(context_t *context)
R600_OUT_BATCH(0);
R600_OUT_BATCH_REGVAL(VGT_STRMOUT_BUFFER_EN, 0);
+ R600_OUT_BATCH_REGVAL(SX_ALPHA_TEST_CONTROL, 0);
END_BATCH();
COMMIT_BATCH();
@@ -1607,7 +1614,7 @@ unsigned r600_blit(GLcontext *ctx,
/* Flush is needed to make sure that source buffer has correct data */
radeonFlush(ctx);
- rcommonEnsureCmdBufSpace(&context->radeon, 304, __FUNCTION__);
+ rcommonEnsureCmdBufSpace(&context->radeon, 308, __FUNCTION__);
/* load shaders */
load_shaders(context->radeon.glCtx);
@@ -1632,7 +1639,7 @@ unsigned r600_blit(GLcontext *ctx,
set_tex_sampler(context);
/* dst */
- /* 27 */
+ /* 31 */
set_render_target(context, dst_bo, dst_mesaformat,
dst_pitch, dst_width, dst_height, dst_offset);
/* scissors */
diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c
index 84d9d423124..389b0412baa 100644
--- a/src/mesa/drivers/dri/r600/r600_context.c
+++ b/src/mesa/drivers/dri/r600/r600_context.c
@@ -72,6 +72,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R600_ENABLE_GLSL_TEST 1
#define need_GL_VERSION_2_0
+#define need_GL_VERSION_2_1
+#define need_GL_ARB_draw_elements_base_vertex
#define need_GL_ARB_occlusion_query
#define need_GL_ARB_point_parameters
#define need_GL_ARB_vertex_program
@@ -140,6 +142,7 @@ static const struct dri_extension card_extensions[] = {
{"GL_NV_vertex_program", GL_NV_vertex_program_functions},
{"GL_SGIS_generate_mipmap", NULL},
{"GL_ARB_pixel_buffer_object", NULL},
+ {"GL_ARB_draw_elements_base_vertex", GL_ARB_draw_elements_base_vertex_functions },
{NULL, NULL}
/* *INDENT-ON* */
};
@@ -157,6 +160,7 @@ static const struct dri_extension mm_extensions[] = {
static const struct dri_extension gl_20_extension[] = {
#ifdef R600_ENABLE_GLSL_TEST
{"GL_ARB_shading_language_100", GL_VERSION_2_0_functions },
+ {"GL_ARB_shading_language_120", GL_VERSION_2_1_functions },
#else
{"GL_VERSION_2_0", GL_VERSION_2_0_functions },
#endif /* R600_ENABLE_GLSL_TEST */
diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c
index 41419f84601..512a52ede3e 100644
--- a/src/mesa/drivers/dri/r600/r600_tex.c
+++ b/src/mesa/drivers/dri/r600/r600_tex.c
@@ -431,7 +431,7 @@ unsigned r600IsFormatRenderable(gl_format mesa_format)
case MESA_FORMAT_Z24_S8:
case MESA_FORMAT_Z16:
case MESA_FORMAT_Z32:
- case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
case MESA_FORMAT_SLA8:
case MESA_FORMAT_SL8:
return 1;
diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c
index 1600033b9bd..ba3690b70ed 100644
--- a/src/mesa/drivers/dri/r600/r600_texstate.c
+++ b/src/mesa/drivers/dri/r600/r600_texstate.c
@@ -605,17 +605,17 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, gl_format mesa
}
break;
/* EXT_texture_sRGB */
- case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
break;
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
index 99a33df4fcb..9c954cbf70c 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.c
+++ b/src/mesa/drivers/dri/r600/r700_assembler.c
@@ -275,7 +275,10 @@ GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
case 2:
format = FMT_8_8; break;
case 3:
- format = FMT_8_8_8; break;
+ /* for some (small/unaligned) strides using 4 comps works
+ * better, probably same as GL_SHORT below
+ * test piglit/draw-vertices */
+ format = FMT_8_8_8_8; break;
case 4:
format = FMT_8_8_8_8; break;
default:
@@ -2872,25 +2875,92 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
{
+ /*
+ * r600 - trunc to -PI..PI range
+ * r700 - normalize by dividing by 2PI
+ * see fdo bug 27901
+ */
+
int tmp;
checkop1(pAsm);
tmp = gethelpr(pAsm);
- pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
pAsm->D.dst.reg = tmp;
- pAsm->D.dst.writex = 1;
assemble_src(pAsm, 0, -1);
pAsm->S[1].src.rtype = SRC_REC_LITERAL;
setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+
+ pAsm->S[2].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
+
pAsm->D2.dst2.literal_slots = 1;
pAsm->C[0].f = 1/(3.1415926535 * 2);
- pAsm->C[1].f = 0.0F;
- next_ins(pAsm);
+ pAsm->C[1].f = 0.5f;
+
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+
+ if(( GL_FALSE == next_ins(pAsm) ))
+ {
+ return GL_FALSE;
+ }
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+
+ pAsm->S[1].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+
+ pAsm->S[2].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
+
+ pAsm->D2.dst2.literal_slots = 1;
+
+ if (pAsm->bR6xx)
+ {
+ pAsm->C[0].f = 3.1415926535897f * 2.0f;
+ pAsm->C[1].f = -3.1415926535897f;
+ }
+ else
+ {
+ pAsm->C[0].f = 1.0f;
+ pAsm->C[1].f = -0.5f;
+ }
+
+ if(( GL_FALSE == next_ins(pAsm) ))
+ {
+ return GL_FALSE;
+ }
pAsm->D.dst.opcode = opcode;
pAsm->D.dst.math = 1;
@@ -4030,22 +4100,79 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
checkop1(pAsm);
tmp = gethelpr(pAsm);
- /* tmp.x = src /2*PI */
- pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
pAsm->D.dst.reg = tmp;
- pAsm->D.dst.writex = 1;
assemble_src(pAsm, 0, -1);
pAsm->S[1].src.rtype = SRC_REC_LITERAL;
setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+
+ pAsm->S[2].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
+
pAsm->D2.dst2.literal_slots = 1;
pAsm->C[0].f = 1/(3.1415926535 * 2);
- pAsm->C[1].f = 0.0F;
+ pAsm->C[1].f = 0.5F;
- next_ins(pAsm);
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+
+ if(( GL_FALSE == next_ins(pAsm) ))
+ {
+ return GL_FALSE;
+ }
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+
+ pAsm->S[1].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+
+ pAsm->S[2].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
+
+ pAsm->D2.dst2.literal_slots = 1;
+
+ if(pAsm->bR6xx) {
+ pAsm->C[0].f = 3.1415926535897f * 2.0f;
+ pAsm->C[1].f = -3.1415926535897f;
+ } else {
+ pAsm->C[0].f = 1.0f;
+ pAsm->C[1].f = -0.5f;
+ }
+
+ if(( GL_FALSE == next_ins(pAsm) ))
+ {
+ return GL_FALSE;
+ }
// COS dst.x, a.x
pAsm->D.dst.opcode = SQ_OP2_INST_COS;
@@ -6473,7 +6600,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
* results are undefined anyway */
if(export_count == 0)
{
- Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE);
+ Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, pR700AsmCode->starting_export_register_number, GL_FALSE);
}
if(pR700AsmCode->cf_last_export_ptr != NULL)
diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c
index cefda3ac4ba..bf8063391a2 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.c
+++ b/src/mesa/drivers/dri/r600/r700_chip.c
@@ -265,17 +265,6 @@ static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom)
if (context->radeon.tcl.aos_count == 0)
return;
- BEGIN_BATCH_NO_AUTOSTATE(6);
- R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1));
- R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX);
- R600_OUT_BATCH(0);
-
- R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1));
- R600_OUT_BATCH(mmSQ_VTX_START_INST_LOC - ASIC_CTL_CONST_BASE_INDEX);
- R600_OUT_BATCH(0);
- END_BATCH();
- COMMIT_BATCH();
-
for(i=0; i<VERT_ATTRIB_MAX; i++) {
if(vp->mesa_program->Base.InputsRead & (1 << i))
{
@@ -523,9 +512,9 @@ static void r700SetRenderTarget(context_t *context, int id)
CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
CLEARbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
break;
- case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
format = COLOR_8_8_8_8;
- comp_swap = SWAP_STD_REV;
+ comp_swap = SWAP_ALT;
number_type = NUMBER_SRGB;
SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
break;
@@ -617,18 +606,25 @@ static void r700SendDepthTargetState(GLcontext *ctx, struct radeon_state_atom *a
r700SetDepthTarget(context);
- BEGIN_BATCH_NO_AUTOSTATE(8 + 2);
+ BEGIN_BATCH_NO_AUTOSTATE(7 + 2);
R600_OUT_BATCH_REGSEQ(DB_DEPTH_SIZE, 2);
R600_OUT_BATCH(r700->DB_DEPTH_SIZE.u32All);
R600_OUT_BATCH(r700->DB_DEPTH_VIEW.u32All);
- R600_OUT_BATCH_REGSEQ(DB_DEPTH_BASE, 2);
+ R600_OUT_BATCH_REGSEQ(DB_DEPTH_BASE, 1);
R600_OUT_BATCH(r700->DB_DEPTH_BASE.u32All);
- R600_OUT_BATCH(r700->DB_DEPTH_INFO.u32All);
R600_OUT_BATCH_RELOC(r700->DB_DEPTH_BASE.u32All,
rrb->bo,
r700->DB_DEPTH_BASE.u32All,
0, RADEON_GEM_DOMAIN_VRAM, 0);
END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGSEQ(DB_DEPTH_INFO, 1);
+ R600_OUT_BATCH(r700->DB_DEPTH_INFO.u32All);
+ R600_OUT_BATCH_RELOC(r700->DB_DEPTH_INFO.u32All,
+ rrb->bo,
+ r700->DB_DEPTH_INFO.u32All,
+ 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
if ((context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) &&
(context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)) {
@@ -687,27 +683,35 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom *
BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
R600_OUT_BATCH_REGSEQ(CB_COLOR0_TILE + (4 * id), 1);
R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_TILE.u32All);
- R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All,
+ R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_TILE.u32All,
rrb->bo,
- r700->render_target[id].CB_COLOR0_BASE.u32All,
+ r700->render_target[id].CB_COLOR0_TILE.u32All,
0, RADEON_GEM_DOMAIN_VRAM, 0);
END_BATCH();
BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
R600_OUT_BATCH_REGSEQ(CB_COLOR0_FRAG + (4 * id), 1);
R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_FRAG.u32All);
- R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All,
+ R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_FRAG.u32All,
rrb->bo,
- r700->render_target[id].CB_COLOR0_BASE.u32All,
+ r700->render_target[id].CB_COLOR0_FRAG.u32All,
0, RADEON_GEM_DOMAIN_VRAM, 0);
END_BATCH();
- BEGIN_BATCH_NO_AUTOSTATE(12);
+ BEGIN_BATCH_NO_AUTOSTATE(9);
R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), r700->render_target[id].CB_COLOR0_SIZE.u32All);
R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), r700->render_target[id].CB_COLOR0_VIEW.u32All);
- R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), r700->render_target[id].CB_COLOR0_INFO.u32All);
R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), r700->render_target[id].CB_COLOR0_MASK.u32All);
END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), r700->render_target[id].CB_COLOR0_INFO.u32All);
+ R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_INFO.u32All,
+ rrb->bo,
+ r700->render_target[id].CB_COLOR0_INFO.u32All,
+ 0, RADEON_GEM_DOMAIN_VRAM, 0);
+
+ END_BATCH();
+
COMMIT_BATCH();
}
@@ -1465,9 +1469,6 @@ static int check_vtx(GLcontext *ctx, struct radeon_state_atom *atom)
context_t *context = R700_CONTEXT(ctx);
int count = context->radeon.tcl.aos_count * 18;
- if (count)
- count += 6;
-
radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
return count;
}
@@ -1567,7 +1568,7 @@ void r600InitAtoms(context_t *context)
ALLOC_STATE(sq, always, 34, r700SendSQConfig);
ALLOC_STATE(db, always, 17, r700SendDBState);
ALLOC_STATE(stencil, always, 4, r700SendStencilState);
- ALLOC_STATE(db_target, always, 12, r700SendDepthTargetState);
+ ALLOC_STATE(db_target, always, 16, r700SendDepthTargetState);
ALLOC_STATE(sc, always, 15, r700SendSCState);
ALLOC_STATE(scissor, always, 22, r700SendScissorState);
ALLOC_STATE(aa, always, 12, r700SendAAState);
@@ -1578,7 +1579,7 @@ void r600InitAtoms(context_t *context)
ALLOC_STATE(poly, always, 10, r700SendPolyState);
ALLOC_STATE(cb, cb, 18, r700SendCBState);
ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState);
- ALLOC_STATE(cb_target, always, 29, r700SendRenderTargetState);
+ ALLOC_STATE(cb_target, always, 31, r700SendRenderTargetState);
ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState);
ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState);
ALLOC_STATE(sx, always, 9, r700SendSXState);
@@ -1590,7 +1591,7 @@ void r600InitAtoms(context_t *context)
ALLOC_STATE(ps, always, 24, r700SendPSState);
ALLOC_STATE(vs_consts, vs_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendVSConsts);
ALLOC_STATE(ps_consts, ps_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendPSConsts);
- ALLOC_STATE(vtx, vtx, (6 + (VERT_ATTRIB_MAX * 18)), r700SendVTXState);
+ ALLOC_STATE(vtx, vtx, (VERT_ATTRIB_MAX * 18), r700SendVTXState);
ALLOC_STATE(tx, tx, (R700_TEXTURE_NUMBERUNITS * 20), r700SendTexState);
ALLOC_STATE(tx_smplr, tx, (R700_TEXTURE_NUMBERUNITS * 5), r700SendTexSamplerState);
ALLOC_STATE(tx_brdr_clr, tx, (R700_TEXTURE_NUMBERUNITS * 6), r700SendTexBorderColorState);
diff --git a/src/mesa/drivers/dri/r600/r700_clear.c b/src/mesa/drivers/dri/r600/r700_clear.c
index 09c48565b68..d1008f28b9b 100644
--- a/src/mesa/drivers/dri/r600/r700_clear.c
+++ b/src/mesa/drivers/dri/r600/r700_clear.c
@@ -48,6 +48,7 @@ static GLboolean r700ClearFast(context_t *context, GLbitfield mask)
void r700Clear(GLcontext * ctx, GLbitfield mask)
{
context_t *context = R700_CONTEXT(ctx);
+ radeonContextPtr radeon = &context->radeon;
__DRIdrawable *dPriv = radeon_get_drawable(&context->radeon);
const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask[0]);
GLbitfield swrast_mask = 0, tri_mask = 0;
@@ -60,6 +61,8 @@ void r700Clear(GLcontext * ctx, GLbitfield mask)
context->radeon.front_buffer_dirty = GL_TRUE;
}
+ radeon_prepare_render(radeon);
+
if( GL_TRUE == r700ClearFast(context, mask) )
{
return;
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index 1929b7cc129..c5771f9fd0b 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -244,7 +244,8 @@ static int r700NumVerts(int num_verts, int prim)
return num_verts - verts_off;
}
-static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
+static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end,
+ int prim, GLint basevertex)
{
context_t *context = R700_CONTEXT(ctx);
BATCH_LOCALS(&context->radeon);
@@ -282,6 +283,7 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim
total_emit = 3 /* VGT_PRIMITIVE_TYPE */
+ 2 /* VGT_INDEX_TYPE */
+ 2 /* NUM_INSTANCES */
+ + 4 /* VTX_BASE_VTX_LOC + VTX_START_INST_LOC */
+ 5 + 2; /* DRAW_INDEX */
BEGIN_BATCH_NO_AUTOSTATE(total_emit);
@@ -294,6 +296,11 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim
// num instances
R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
R600_OUT_BATCH(1);
+ /* offset */
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 2));
+ R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX);
+ R600_OUT_BATCH(basevertex); //VTX_BASE_VTX_LOC
+ R600_OUT_BATCH(0); //VTX_START_INST_LOC
// draw packet
R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3));
R600_OUT_BATCH(context->ind_buf.bo_offset);
@@ -364,6 +371,7 @@ static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end,
total_emit += 3 /* VGT_PRIMITIVE_TYPE */
+ 2 /* VGT_INDEX_TYPE */
+ 2 /* NUM_INSTANCES */
+ + 4 /* VTX_BASE_VTX_LOC + VTX_START_INST_LOC */
+ 3; /* DRAW */
BEGIN_BATCH_NO_AUTOSTATE(total_emit);
@@ -376,6 +384,11 @@ static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end,
// num instances
R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
R600_OUT_BATCH(1);
+ /* offset */
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 2));
+ R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX);
+ R600_OUT_BATCH(0); //VTX_BASE_VTX_LOC
+ R600_OUT_BATCH(0); //VTX_START_INST_LOC
// draw packet
if(start == 0)
{
@@ -433,16 +446,16 @@ static GLuint r700PredictRenderSize(GLcontext* ctx,
dwords = PRE_EMIT_STATE_BUFSZ;
if (ib)
- dwords += nr_prims * 14;
+ dwords += nr_prims * 18;
else {
for (i = 0; i < nr_prims; ++i)
{
if (prim[i].start == 0)
- dwords += 10;
+ dwords += 14;
else if (prim[i].count > 0xffff)
- dwords += prim[i].count + 10;
+ dwords += prim[i].count + 14;
else
- dwords += ((prim[i].count + 1) / 2) + 10;
+ dwords += ((prim[i].count + 1) / 2) + 14;
}
}
@@ -625,11 +638,11 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input
stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB;
- if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT ||
+ if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT
#if MESA_BIG_ENDIAN
- getTypeSize(input[i]->Type) != 4 ||
+ || getTypeSize(input[i]->Type) != 4
#endif
- stride < 4)
+ )
{
r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]);
}
@@ -637,19 +650,10 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input
{
if (input[i]->BufferObj->Name)
{
- if (stride % 4 != 0)
- {
- assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0);
- r700AlignDataToDword(ctx, input[i], count, &context->stream_desc[index]);
- context->stream_desc[index].is_named_bo = GL_FALSE;
- }
- else
- {
- context->stream_desc[index].stride = input[i]->StrideB;
- context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr;
- context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo;
- context->stream_desc[index].is_named_bo = GL_TRUE;
- }
+ context->stream_desc[index].stride = input[i]->StrideB;
+ context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr;
+ context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo;
+ context->stream_desc[index].is_named_bo = GL_TRUE;
}
else
{
@@ -932,7 +936,8 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx,
r700RunRenderPrimitive(ctx,
prim[i].start,
prim[i].start + prim[i].count,
- prim[i].mode);
+ prim[i].mode,
+ prim[i].basevertex);
else
r700RunRenderPrimitiveImmediate(ctx,
prim[i].start,
@@ -977,18 +982,24 @@ static void r700DrawPrims(GLcontext *ctx,
{
GLboolean retval = GL_FALSE;
+ context_t *context = R700_CONTEXT(ctx);
+ radeonContextPtr radeon = &context->radeon;
+ radeon_prepare_render(radeon);
+
/* This check should get folded into just the places that
* min/max index are really needed.
*/
- if (!index_bounds_valid) {
- vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
- }
- if (min_index) {
+ if (!vbo_all_varyings_in_vbos(arrays)) {
+ if (!index_bounds_valid)
+ vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
+ /* do we want to rebase, minimizes the
+ * amount of data to upload? */
+ if (min_index) {
vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrims );
return;
+ }
}
-
/* Make an attempt at drawing */
retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
index 137f3007ced..6a2a09eaf1a 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.c
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.c
@@ -461,11 +461,11 @@ static void r700TranslateAttrib(GLcontext *ctx, GLuint unLoc, int count, const s
stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size
: input->StrideB;
- if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT ||
+ if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT
#if MESA_BIG_ENDIAN
- getTypeSize(input->Type) != 4 ||
+ || getTypeSize(input->Type) != 4
#endif
- stride < 4)
+ )
{
pStreamDesc->type = GL_FLOAT;
diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h
index b7ee9a134bf..7d54fabebbc 100644
--- a/src/mesa/drivers/dri/radeon/radeon_chipset.h
+++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h
@@ -414,9 +414,9 @@ enum {
CHIP_FAMILY_R350,
CHIP_FAMILY_RV350,
CHIP_FAMILY_RV380,
+ CHIP_FAMILY_RS400,
CHIP_FAMILY_R420,
CHIP_FAMILY_RV410,
- CHIP_FAMILY_RS400,
CHIP_FAMILY_RS600,
CHIP_FAMILY_RS690,
CHIP_FAMILY_RS740,
diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c
index 13f1f0611b8..c1a660af3d0 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common.c
@@ -708,7 +708,6 @@ void radeon_draw_buffer(GLcontext *ctx, struct gl_framebuffer *fb)
if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
radeon->front_cliprects = GL_TRUE;
- radeon->front_buffer_dirty = GL_TRUE;
} else {
rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer);
radeon->front_cliprects = GL_FALSE;
@@ -1132,17 +1131,13 @@ flush_front:
if (screen->dri2.loader && (screen->dri2.loader->base.version >= 2)
&& (screen->dri2.loader->flushFrontBuffer != NULL)) {
__DRIdrawable * drawable = radeon_get_drawable(radeon);
- (*screen->dri2.loader->flushFrontBuffer)(drawable, drawable->loaderPrivate);
- /* Only clear the dirty bit if front-buffer rendering is no longer
- * enabled. This is done so that the dirty bit can only be set in
- * glDrawBuffer. Otherwise the dirty bit would have to be set at
- * each of N places that do rendering. This has worse performances,
- * but it is much easier to get correct.
+ /* We set the dirty bit in radeon_prepare_render() if we're
+ * front buffer rendering once we get there.
*/
- if (!radeon->is_front_buffer_rendering) {
- radeon->front_buffer_dirty = GL_FALSE;
- }
+ radeon->front_buffer_dirty = GL_FALSE;
+
+ (*screen->dri2.loader->flushFrontBuffer)(drawable, drawable->loaderPrivate);
}
}
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
index 5a7d52c4d2f..92663bf66d7 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -493,6 +493,50 @@ radeon_bits_per_pixel(const struct radeon_renderbuffer *rb)
return _mesa_get_format_bytes(rb->base.Format) * 8;
}
+/*
+ * Check if drawable has been invalidated by dri2InvalidateDrawable().
+ * Update renderbuffers if so. This prevents a client from accessing
+ * a backbuffer that has a swap pending but not yet completed.
+ *
+ * See intel_prepare_render for equivalent code in intel driver.
+ *
+ */
+void radeon_prepare_render(radeonContextPtr radeon)
+{
+ __DRIcontext *driContext = radeon->dri.context;
+ __DRIdrawable *drawable;
+ __DRIscreen *screen;
+
+ screen = driContext->driScreenPriv;
+ if (!screen->dri2.loader)
+ return;
+
+ drawable = driContext->driDrawablePriv;
+ if (drawable->dri2.stamp != driContext->dri2.draw_stamp) {
+ if (drawable->lastStamp != drawable->dri2.stamp)
+ radeon_update_renderbuffers(driContext, drawable, GL_FALSE);
+
+ /* Intel driver does the equivalent of this, no clue if it is needed:
+ * radeon_draw_buffer(radeon->glCtx, &(drawable->driverPrivate)->base);
+ */
+ driContext->dri2.draw_stamp = drawable->dri2.stamp;
+ }
+
+ drawable = driContext->driReadablePriv;
+ if (drawable->dri2.stamp != driContext->dri2.read_stamp) {
+ if (drawable->lastStamp != drawable->dri2.stamp)
+ radeon_update_renderbuffers(driContext, drawable, GL_FALSE);
+ driContext->dri2.read_stamp = drawable->dri2.stamp;
+ }
+
+ /* If we're currently rendering to the front buffer, the rendering
+ * that will happen next will probably dirty the front buffer. So
+ * mark it as dirty here.
+ */
+ if (radeon->is_front_buffer_rendering)
+ radeon->front_buffer_dirty = GL_TRUE;
+}
+
void
radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable,
GLboolean front_only)
@@ -514,6 +558,11 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable,
screen = context->driScreenPriv;
radeon = (radeonContextPtr) context->driverPrivate;
+ /* Set this up front, so that in case our buffers get invalidated
+ * while we're getting new buffers, we don't clobber the stamp and
+ * thus ignore the invalidate. */
+ drawable->lastStamp = drawable->dri2.stamp;
+
if (screen->dri2.loader
&& (screen->dri2.loader->base.version > 2)
&& (screen->dri2.loader->getBuffersWithFormat != NULL)) {
@@ -650,6 +699,13 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable,
rb->base.Height = drawable->h;
rb->has_surface = 0;
+ /* r6xx+ tiling */
+ rb->tile_config = radeon->radeonScreen->tile_config;
+ rb->group_bytes = radeon->radeonScreen->group_bytes;
+ rb->num_channels = radeon->radeonScreen->num_channels;
+ rb->num_banks = radeon->radeonScreen->num_banks;
+ rb->r7xx_bank_op = radeon->radeonScreen->r7xx_bank_op;
+
if (buffers[i].attachment == __DRI_BUFFER_STENCIL && depth_bo) {
if (RADEON_DEBUG & RADEON_DRI)
fprintf(stderr, "(reusing depth buffer as stencil)\n");
@@ -678,7 +734,7 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable,
bo->flags |= RADEON_BO_FLAGS_MACRO_TILE;
if (tiling_flags & RADEON_TILING_MICRO)
bo->flags |= RADEON_BO_FLAGS_MICRO_TILE;
-
+
}
if (buffers[i].attachment == __DRI_BUFFER_DEPTH) {
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h
index 5156c5d0d0a..f06e5fdf244 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.h
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h
@@ -93,6 +93,13 @@ struct radeon_renderbuffer
GLuint pf_pending; /**< sequence number of pending flip */
GLuint vbl_pending; /**< vblank sequence number of pending flip */
__DRIdrawable *dPriv;
+
+ /* r6xx+ tiling */
+ GLuint tile_config;
+ GLint group_bytes;
+ GLint num_channels;
+ GLint num_banks;
+ GLint r7xx_bank_op;
};
struct radeon_framebuffer
@@ -614,5 +621,6 @@ GLboolean radeonMakeCurrent(__DRIcontext * driContextPriv,
__DRIdrawable * driDrawPriv,
__DRIdrawable * driReadPriv);
extern void radeonDestroyContext(__DRIcontext * driContextPriv);
+void radeon_prepare_render(radeonContextPtr radeon);
#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
index c877e6c1765..c6e5f110ea3 100644
--- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
+++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
@@ -133,7 +133,7 @@ static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree
height = _mesa_next_pow_two_32(lvl->height);
lvl->rowstride = get_texture_image_row_stride(rmesa, mt->mesaFormat, lvl->width, mt->tilebits);
- lvl->size = get_texture_image_size(mt->mesaFormat, lvl->rowstride, lvl->height, lvl->depth, mt->tilebits);
+ lvl->size = get_texture_image_size(mt->mesaFormat, lvl->rowstride, height, lvl->depth, mt->tilebits);
assert(lvl->size > 0);
diff --git a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
index dadb8002c7d..fb741173ca8 100644
--- a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
+++ b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
@@ -179,6 +179,9 @@ radeonReadPixels(GLcontext * ctx,
GLenum format, GLenum type,
const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ radeon_prepare_render(radeon);
+
if (do_blit_readpixels(ctx, x, y, width, height, format, type, pack, pixels))
return;
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
index 82107cc6aeb..fa97a19302c 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -213,6 +213,10 @@ static const GLuint __driNConfigOptions = 17;
static int getSwapInfo( __DRIdrawable *dPriv, __DRIswapInfo * sInfo );
+#ifndef RADEON_INFO_TILE_CONFIG
+#define RADEON_INFO_TILE_CONFIG 0x6
+#endif
+
static int
radeonGetParam(__DRIscreen *sPriv, int param, void *value)
{
@@ -232,6 +236,9 @@ radeonGetParam(__DRIscreen *sPriv, int param, void *value)
case RADEON_PARAM_NUM_Z_PIPES:
info.request = RADEON_INFO_NUM_Z_PIPES;
break;
+ case RADEON_INFO_TILE_CONFIG:
+ info.request = RADEON_INFO_TILE_CONFIG;
+ break;
default:
return -EINVAL;
}
@@ -376,6 +383,21 @@ static const __DRItexBufferExtension r600TexBufferExtension = {
};
#endif
+static void
+radeonDRI2Flush(__DRIdrawable *drawable)
+{
+ radeonContextPtr rmesa;
+
+ rmesa = (radeonContextPtr) drawable->driContextPriv->driverPrivate;
+ radeonFlush(rmesa->glCtx);
+}
+
+static const struct __DRI2flushExtensionRec radeonFlushExtension = {
+ { __DRI2_FLUSH, __DRI2_FLUSH_VERSION },
+ radeonDRI2Flush,
+ dri2InvalidateDrawable,
+};
+
static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
{
screen->device_id = device_id;
@@ -1305,6 +1327,56 @@ radeonCreateScreen2(__DRIscreen *sPriv)
else
screen->chip_flags |= RADEON_CLASS_R600;
+ /* r6xx+ tiling */
+ if (IS_R600_CLASS(screen) && (sPriv->drm_version.minor >= 6)) {
+ ret = radeonGetParam(sPriv, RADEON_INFO_TILE_CONFIG, &temp);
+ if (ret)
+ fprintf(stderr, "failed to get tiling info\n");
+ else {
+ screen->tile_config = temp;
+ screen->r7xx_bank_op = 0;
+ switch((screen->tile_config & 0xe) >> 1) {
+ case 0:
+ screen->num_channels = 1;
+ break;
+ case 1:
+ screen->num_channels = 2;
+ break;
+ case 2:
+ screen->num_channels = 4;
+ break;
+ case 3:
+ screen->num_channels = 8;
+ break;
+ default:
+ fprintf(stderr, "bad channels\n");
+ break;
+ }
+ switch((screen->tile_config & 0x30) >> 4) {
+ case 0:
+ screen->num_banks = 4;
+ break;
+ case 1:
+ screen->num_banks = 8;
+ break;
+ default:
+ fprintf(stderr, "bad banks\n");
+ break;
+ }
+ switch((screen->tile_config & 0xc0) >> 6) {
+ case 0:
+ screen->group_bytes = 256;
+ break;
+ case 1:
+ screen->group_bytes = 512;
+ break;
+ default:
+ fprintf(stderr, "bad group_bytes\n");
+ break;
+ }
+ }
+ }
+
if (IS_R300_CLASS(screen)) {
ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_GB_PIPES, &temp);
if (ret) {
@@ -1379,6 +1451,8 @@ radeonCreateScreen2(__DRIscreen *sPriv)
screen->extensions[i++] = &r600TexBufferExtension.base;
#endif
+ screen->extensions[i++] = &radeonFlushExtension.base;
+
screen->extensions[i++] = NULL;
sPriv->extensions = screen->extensions;
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h
index 0d7e335fa3a..2b33201a538 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.h
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.h
@@ -112,6 +112,13 @@ typedef struct radeon_screen {
int kernel_mm;
drm_radeon_sarea_t *sarea; /* Private SAREA data */
struct radeon_bo_manager *bom;
+
+ /* r6xx+ tiling */
+ GLuint tile_config;
+ GLint group_bytes;
+ GLint num_channels;
+ GLint num_banks;
+ GLint r7xx_bank_op;
} radeonScreenRec, *radeonScreenPtr;
#define IS_R100_CLASS(screen) \
diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c
index 1adb6096033..9dfe2dd2433 100644
--- a/src/mesa/drivers/dri/radeon/radeon_span.c
+++ b/src/mesa/drivers/dri/radeon/radeon_span.c
@@ -111,7 +111,6 @@ static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
* two main types:
* - 1D (akin to macro-linear/micro-tiled on older asics)
* - 2D (akin to macro-tiled/micro-tiled on older asics)
- * only 1D tiling is implemented below
*/
#if defined(RADEON_R600)
static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb,
@@ -208,12 +207,190 @@ static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb,
return offset;
}
+static inline GLint r600_log2(GLint n)
+{
+ GLint log2 = 0;
+
+ while (n >>= 1)
+ ++log2;
+ return log2;
+}
+
+static inline GLint r600_2d_tile_helper(const struct radeon_renderbuffer * rrb,
+ GLint x, GLint y, GLint is_depth, GLint is_stencil)
+{
+ GLint group_bytes = rrb->group_bytes;
+ GLint num_channels = rrb->num_channels;
+ GLint num_banks = rrb->num_banks;
+ GLint r7xx_bank_op = rrb->r7xx_bank_op;
+ /* */
+ GLint group_bits = r600_log2(group_bytes);
+ GLint channel_bits = r600_log2(num_channels);
+ GLint bank_bits = r600_log2(num_banks);
+ GLint element_bytes = rrb->cpp;
+ GLint num_samples = 1;
+ GLint tile_width = 8;
+ GLint tile_height = 8;
+ GLint tile_thickness = 1;
+ GLint macro_tile_width = num_banks;
+ GLint macro_tile_height = num_channels;
+ GLint pitch_elements = (rrb->pitch / element_bytes) / tile_width;
+ GLint height = rrb->base.Height / tile_height;
+ GLint z = 0;
+ GLint sample_number = 0;
+ /* */
+ GLint tile_bytes;
+ GLint macro_tile_bytes;
+ GLint macro_tiles_per_row;
+ GLint macro_tiles_per_slice;
+ GLint slice_offset;
+ GLint macro_tile_row_index;
+ GLint macro_tile_column_index;
+ GLint macro_tile_offset;
+ GLint pixel_number = 0;
+ GLint element_offset;
+ GLint bank = 0;
+ GLint channel = 0;
+ GLint total_offset;
+ GLint group_mask = (1 << group_bits) - 1;
+ GLint offset_low;
+ GLint offset_high;
+ GLint offset = 0;
+
+ switch (num_channels) {
+ case 2:
+ default:
+ // channel[0] = x[3] ^ y[3]
+ channel |= (((x >> 3) ^ (y >> 3)) & 1) << 0;
+ break;
+ case 4:
+ // channel[0] = x[4] ^ y[3]
+ channel |= (((x >> 4) ^ (y >> 3)) & 1) << 0;
+ // channel[1] = x[3] ^ y[4]
+ channel |= (((x >> 3) ^ (y >> 4)) & 1) << 1;
+ break;
+ case 8:
+ // channel[0] = x[5] ^ y[3]
+ channel |= (((x >> 5) ^ (y >> 3)) & 1) << 0;
+ // channel[0] = x[4] ^ x[5] ^ y[4]
+ channel |= (((x >> 4) ^ (x >> 5) ^ (y >> 4)) & 1) << 1;
+ // channel[0] = x[3] ^ y[5]
+ channel |= (((x >> 3) ^ (y >> 5)) & 1) << 2;
+ break;
+ }
+
+ switch (num_banks) {
+ case 4:
+ // bank[0] = x[3] ^ y[4 + log2(num_channels)]
+ bank |= (((x >> 3) ^ (y >> (4 + channel_bits))) & 1) << 0;
+ if (r7xx_bank_op)
+ // bank[1] = x[3] ^ y[4 + log2(num_channels)] ^ x[5]
+ bank |= (((x >> 4) ^ (y >> (3 + channel_bits)) ^ (x >> 5)) & 1) << 1;
+ else
+ // bank[1] = x[4] ^ y[3 + log2(num_channels)]
+ bank |= (((x >> 4) ^ (y >> (3 + channel_bits))) & 1) << 1;
+ break;
+ case 8:
+ // bank[0] = x[3] ^ y[5 + log2(num_channels)]
+ bank |= (((x >> 3) ^ (y >> (5 + channel_bits))) & 1) << 0;
+ // bank[1] = x[4] ^ y[4 + log2(num_channels)] ^ y[5 + log2(num_channels)]
+ bank |= (((x >> 4) ^ (y >> (4 + channel_bits)) ^ (y >> (5 + channel_bits))) & 1) << 1;
+ if (r7xx_bank_op)
+ // bank[2] = x[5] ^ y[3 + log2(num_channels)] ^ x[6]
+ bank |= (((x >> 5) ^ (y >> (3 + channel_bits)) ^ (x >> 6)) & 1) << 2;
+ else
+ // bank[2] = x[5] ^ y[3 + log2(num_channels)]
+ bank |= (((x >> 5) ^ (y >> (3 + channel_bits))) & 1) << 2;
+ break;
+ }
+
+ tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples;
+ macro_tile_bytes = macro_tile_width * macro_tile_height * tile_bytes;
+ macro_tiles_per_row = pitch_elements / macro_tile_width;
+ macro_tiles_per_slice = macro_tiles_per_row * (height / macro_tile_height);
+ slice_offset = (z / tile_thickness) * macro_tiles_per_slice * macro_tile_bytes;
+ macro_tile_row_index = (y / tile_height) / macro_tile_height;
+ macro_tile_column_index = (x / tile_width) / macro_tile_width;
+ macro_tile_offset = ((macro_tile_row_index * macro_tiles_per_row) + macro_tile_column_index) * macro_tile_bytes;
+
+ if (is_depth) {
+ GLint pixel_offset = 0;
+
+ pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+ pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0]
+ pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1]
+ pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
+ pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2]
+ pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+ switch (element_bytes) {
+ case 2:
+ pixel_offset = pixel_number * element_bytes * num_samples;
+ break;
+ case 4:
+ /* stencil and depth data are stored separately within a tile.
+ * stencil is stored in a contiguous tile before the depth tile.
+ * stencil element is 1 byte, depth element is 3 bytes.
+ * stencil tile is 64 bytes.
+ */
+ if (is_stencil)
+ pixel_offset = pixel_number * 1 * num_samples;
+ else
+ pixel_offset = (pixel_number * 3 * num_samples) + 64;
+ break;
+ }
+ element_offset = pixel_offset + (sample_number * element_bytes);
+ } else {
+ GLint sample_offset;
+
+ switch (element_bytes) {
+ case 1:
+ pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+ pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+ pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
+ pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
+ pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0]
+ pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+ break;
+ case 2:
+ pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+ pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+ pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
+ pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0]
+ pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
+ pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+ break;
+ case 4:
+ pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+ pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+ pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0]
+ pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2]
+ pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
+ pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+ break;
+ }
+ sample_offset = sample_number * (tile_bytes / num_samples);
+ element_offset = sample_offset + (pixel_number * element_bytes);
+ }
+ total_offset = (slice_offset + macro_tile_offset) >> (channel_bits + bank_bits);
+ total_offset += element_offset;
+
+ offset_low = total_offset & group_mask;
+ offset_high = (total_offset & ~group_mask) << (channel_bits + bank_bits);
+ offset = (bank << (group_bits + channel_bits)) + (channel << group_bits) + offset_low + offset_high;
+
+ return offset;
+}
+
/* depth buffers */
static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb,
GLint x, GLint y)
{
GLubyte *ptr = rrb->bo->ptr;
- GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 0);
+ GLint offset;
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+ offset = r600_2d_tile_helper(rrb, x, y, 1, 0);
+ else
+ offset = r600_1d_tile_helper(rrb, x, y, 1, 0);
return &ptr[offset];
}
@@ -221,7 +398,11 @@ static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb,
GLint x, GLint y)
{
GLubyte *ptr = rrb->bo->ptr;
- GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 1);
+ GLint offset;
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+ offset = r600_2d_tile_helper(rrb, x, y, 1, 1);
+ else
+ offset = r600_1d_tile_helper(rrb, x, y, 1, 1);
return &ptr[offset];
}
@@ -235,7 +416,10 @@ static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb,
if (rrb->has_surface || !(rrb->bo->flags & mask)) {
offset = x * rrb->cpp + y * rrb->pitch;
} else {
- offset = r600_1d_tile_helper(rrb, x, y, 0, 0);
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+ offset = r600_2d_tile_helper(rrb, x, y, 0, 0);
+ else
+ offset = r600_1d_tile_helper(rrb, x, y, 0, 0);
}
return &ptr[offset];
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
index f2fcb46688a..29defe73a70 100644
--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
@@ -40,7 +40,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/macros.h"
#include "main/simple_list.h"
+#include "math/m_xform.h"
+
#include "swrast_setup/swrast_setup.h"
+
#include "tnl/tnl.h"
#include "tnl/t_context.h"
#include "tnl/t_pipeline.h"
@@ -408,6 +411,8 @@ static GLboolean radeon_run_render( GLcontext *ctx,
!radeon_dma_validate_render( ctx, VB ))
return GL_TRUE;
+ radeon_prepare_render(&rmesa->radeon);
+
tnl->Driver.Render.Start( ctx );
for (i = 0 ; i < VB->PrimitiveCount ; i++)
diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c
index ea796e1a45f..5e1718f9dfc 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c
@@ -252,6 +252,8 @@ void radeonTclPrimitive( GLcontext *ctx,
GLuint se_cntl;
GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
+ radeon_prepare_render(&rmesa->radeon);
+
if (newprim != rmesa->tcl.hw_primitive ||
!discrete_prim[hw_prim&0xf]) {
RADEON_NEWPRIM( rmesa );
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
index 29fd31ac23f..4cb0bb60c85 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
@@ -153,6 +153,9 @@ radeonCopyTexImage2D(GLcontext *ctx, GLenum target, GLint level,
_mesa_select_tex_image(ctx, texObj, target, level);
int srcx, srcy, dstx, dsty;
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ radeon_prepare_render(radeon);
+
if (border)
goto fail;
@@ -202,6 +205,9 @@ radeonCopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
struct gl_texture_object *texObj = _mesa_select_tex_object(ctx, texUnit, target);
struct gl_texture_image *texImage = _mesa_select_tex_image(ctx, texObj, target, level);
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ radeon_prepare_render(radeon);
+
if (!do_copy_texsubimage(ctx, target, level,
radeon_tex_obj(texObj), (radeon_texture_image *)texImage,
xoffset, yoffset, x, y, width, height)) {
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c
index d2b190e42e0..8c6a50d2f0d 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.c
@@ -551,7 +551,7 @@ gl_format radeonChooseTextureFormat(GLcontext * ctx,
case GL_SRGB8_ALPHA8:
case GL_COMPRESSED_SRGB:
case GL_COMPRESSED_SRGB_ALPHA:
- return MESA_FORMAT_SRGBA8;
+ return MESA_FORMAT_SARGB8;
case GL_SLUMINANCE:
case GL_SLUMINANCE8:
diff --git a/src/mesa/drivers/dri/savage/savagerender.c b/src/mesa/drivers/dri/savage/savagerender.c
index c369bb124c2..2d9e80e29c4 100644
--- a/src/mesa/drivers/dri/savage/savagerender.c
+++ b/src/mesa/drivers/dri/savage/savagerender.c
@@ -33,6 +33,8 @@
#include "main/imports.h"
#include "main/mtypes.h"
+#include "math/m_xform.h"
+
#include "tnl/t_context.h"
#include "savagecontext.h"
diff --git a/src/mesa/drivers/dri/unichrome/via_render.c b/src/mesa/drivers/dri/unichrome/via_render.c
index 896c43db1b0..4351f119555 100644
--- a/src/mesa/drivers/dri/unichrome/via_render.c
+++ b/src/mesa/drivers/dri/unichrome/via_render.c
@@ -33,6 +33,8 @@
#include "main/macros.h"
#include "main/mtypes.h"
+#include "math/m_xform.h"
+
#include "tnl/t_context.h"
#include "via_context.h"