aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/r300
diff options
context:
space:
mode:
authorYounes Manton <[email protected]>2010-04-30 20:42:30 -0400
committerYounes Manton <[email protected]>2010-04-30 20:42:30 -0400
commita8ea1dacc63ac567498049e5756c247b9fec6cd9 (patch)
tree4031e2e2b6166bd926b43fa4bbb3aab773a30ee5 /src/mesa/drivers/dri/r300
parent404fb63b4649f58fce443615e49337d42b8ddece (diff)
parent35d960cc744c374ccaad48c3d80559b59c74e28a (diff)
Merge branch 'master' of ssh://git.freedesktop.org/git/mesa/mesa into pipe-video
Conflicts: src/gallium/auxiliary/Makefile src/gallium/auxiliary/SConscript src/gallium/auxiliary/util/u_format.csv src/gallium/auxiliary/vl/vl_compositor.c src/gallium/auxiliary/vl/vl_compositor.h src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h src/gallium/drivers/identity/id_objects.c src/gallium/drivers/identity/id_objects.h src/gallium/drivers/identity/id_screen.c src/gallium/drivers/nv40/Makefile src/gallium/drivers/nv40/nv40_screen.c src/gallium/drivers/softpipe/sp_texture.c src/gallium/drivers/softpipe/sp_texture.h src/gallium/drivers/softpipe/sp_video_context.c src/gallium/drivers/softpipe/sp_video_context.h src/gallium/include/pipe/p_format.h src/gallium/include/pipe/p_screen.h src/gallium/include/pipe/p_video_context.h src/gallium/include/pipe/p_video_state.h src/gallium/include/state_tracker/dri1_api.h src/gallium/include/state_tracker/drm_api.h src/gallium/state_trackers/dri/common/dri_context.c src/gallium/state_trackers/xorg/xvmc/attributes.c src/gallium/state_trackers/xorg/xvmc/block.c src/gallium/state_trackers/xorg/xvmc/context.c src/gallium/state_trackers/xorg/xvmc/subpicture.c src/gallium/state_trackers/xorg/xvmc/surface.c src/gallium/state_trackers/xorg/xvmc/tests/.gitignore src/gallium/state_trackers/xorg/xvmc/tests/Makefile src/gallium/state_trackers/xorg/xvmc/xvmc_private.h src/gallium/winsys/drm/radeon/core/radeon_drm.c src/gallium/winsys/g3dvl/vl_winsys.h src/gallium/winsys/g3dvl/xlib/xsp_winsys.c src/gallium/winsys/sw/Makefile
Diffstat (limited to 'src/mesa/drivers/dri/r300')
-rw-r--r--src/mesa/drivers/dri/r300/Makefile9
-rw-r--r--src/mesa/drivers/dri/r300/compiler/Makefile2
-rwxr-xr-xsrc/mesa/drivers/dri/r300/compiler/SConscript1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/memory_pool.h31
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog.c160
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog.h2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c83
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c43
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.c145
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.h5
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c47
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.h33
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.c6
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.h4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c89
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h6
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c21
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c331
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h30
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c41
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h7
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c76
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c6
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c54
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c360
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h39
-rw-r--r--src/mesa/drivers/dri/r300/r300_blit.c19
-rw-r--r--src/mesa/drivers/dri/r300/r300_cmdbuf.c98
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.c15
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.h1
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog_common.c15
-rw-r--r--src/mesa/drivers/dri/r300/r300_reg.h4
-rw-r--r--src/mesa/drivers/dri/r300/r300_render.c16
-rw-r--r--src/mesa/drivers/dri/r300/r300_render.h1
-rw-r--r--src/mesa/drivers/dri/r300/r300_state.c118
-rw-r--r--src/mesa/drivers/dri/r300/r300_state.h2
-rw-r--r--src/mesa/drivers/dri/r300/r300_tex.c39
-rw-r--r--src/mesa/drivers/dri/r300/r300_tex.h3
-rw-r--r--src/mesa/drivers/dri/r300/r300_vertprog.c34
l---------src/mesa/drivers/dri/r300/radeon_pixel_read.c1
l---------src/mesa/drivers/dri/r300/radeon_tex_getimage.c1
l---------src/mesa/drivers/dri/r300/radeon_tile.c1
l---------src/mesa/drivers/dri/r300/radeon_tile.h1
l---------src/mesa/drivers/dri/r300/server/radeon_dri.c1
46 files changed, 1391 insertions, 614 deletions
diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
index 04459c2ddfa..2245998c952 100644
--- a/src/mesa/drivers/dri/r300/Makefile
+++ b/src/mesa/drivers/dri/r300/Makefile
@@ -7,8 +7,6 @@ CFLAGS += $(RADEON_CFLAGS)
LIBNAME = r300_dri.so
-MINIGLX_SOURCES = server/radeon_dri.c
-
ifeq ($(RADEON_LDFLAGS),)
CS_SOURCES = radeon_cs_space_drm.c radeon_bo.c radeon_cs.c
endif
@@ -33,10 +31,13 @@ RADEON_COMMON_SOURCES = \
radeon_fbo.c \
radeon_lock.c \
radeon_mipmap_tree.c \
- radeon_span.c \
+ radeon_pixel_read.c \
radeon_queryobj.c \
+ radeon_span.c \
radeon_texture.c \
- radeon_tex_copy.c
+ radeon_tex_copy.c \
+ radeon_tex_getimage.c \
+ radeon_tile.c
DRIVER_SOURCES = \
radeon_screen.c \
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
index d83888d90a3..e432afc3d41 100644
--- a/src/mesa/drivers/dri/r300/compiler/Makefile
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -8,11 +8,13 @@ LIBNAME = r300compiler
C_SOURCES = \
radeon_code.c \
radeon_compiler.c \
+ radeon_emulate_branches.c \
radeon_program.c \
radeon_program_print.c \
radeon_opcodes.c \
radeon_program_alu.c \
radeon_program_pair.c \
+ radeon_program_tex.c \
radeon_pair_translate.c \
radeon_pair_schedule.c \
radeon_pair_regalloc.c \
diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript
index 46075a8aee9..28a3d39d961 100755
--- a/src/mesa/drivers/dri/r300/compiler/SConscript
+++ b/src/mesa/drivers/dri/r300/compiler/SConscript
@@ -17,6 +17,7 @@ r300compiler = env.ConvenienceLibrary(
'radeon_opcodes.c',
'radeon_program_alu.c',
'radeon_program_pair.c',
+ 'radeon_program_tex.c',
'radeon_pair_translate.c',
'radeon_pair_schedule.c',
'radeon_pair_regalloc.c',
diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.h b/src/mesa/drivers/dri/r300/compiler/memory_pool.h
index ce23c319ad3..42344d0e3ba 100644
--- a/src/mesa/drivers/dri/r300/compiler/memory_pool.h
+++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.h
@@ -46,4 +46,35 @@ void memory_pool_init(struct memory_pool * pool);
void memory_pool_destroy(struct memory_pool * pool);
void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes);
+
+/**
+ * Generic helper for growing an array that has separate size/count
+ * and reserved counters to accomodate up to num new element.
+ *
+ * type * Array;
+ * unsigned int Size;
+ * unsigned int Reserved;
+ *
+ * memory_pool_array_reserve(pool, type, Array, Size, Reserved, k);
+ * assert(Size + k < Reserved);
+ *
+ * \note Size is not changed by this macro.
+ *
+ * \warning Array, Size, Reserved have to be lvalues and may be evaluated
+ * several times.
+ */
+#define memory_pool_array_reserve(pool, type, array, size, reserved, num) do { \
+ unsigned int _num = (num); \
+ if ((size) + _num > (reserved)) { \
+ unsigned int newreserve = (reserved) * 2; \
+ type * newarray; \
+ if (newreserve < _num) \
+ newreserve = 4 * _num; /* arbitrary heuristic */ \
+ newarray = memory_pool_malloc((pool), newreserve * sizeof(type)); \
+ memcpy(newarray, (array), (size) * sizeof(type)); \
+ (array) = newarray; \
+ (reserved) = newreserve; \
+ } \
+} while(0)
+
#endif /* MEMORY_POOL_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
index 928c15e1e40..794db8335a2 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
@@ -31,166 +31,6 @@
#include "../r300_reg.h"
-static struct rc_src_register shadow_ambient(struct radeon_compiler * c, int tmu)
-{
- struct rc_src_register reg = { 0, };
-
- reg.File = RC_FILE_CONSTANT;
- reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu);
- reg.Swizzle = RC_SWIZZLE_WWWW;
- return reg;
-}
-
-/**
- * Transform TEX, TXP, TXB, and KIL instructions in the following way:
- * - premultiply texture coordinates for RECT
- * - extract operand swizzles
- * - introduce a temporary register when write masks are needed
- */
-int r300_transform_TEX(
- struct radeon_compiler * c,
- struct rc_instruction* inst,
- void* data)
-{
- struct r300_fragment_program_compiler *compiler =
- (struct r300_fragment_program_compiler*)data;
-
- if (inst->U.I.Opcode != RC_OPCODE_TEX &&
- inst->U.I.Opcode != RC_OPCODE_TXB &&
- inst->U.I.Opcode != RC_OPCODE_TXP &&
- inst->U.I.Opcode != RC_OPCODE_KIL)
- return 0;
-
- /* ARB_shadow & EXT_shadow_funcs */
- if (inst->U.I.Opcode != RC_OPCODE_KIL &&
- c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) {
- rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
-
- if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
- inst->U.I.Opcode = RC_OPCODE_MOV;
-
- if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
- inst->U.I.SrcReg[0].File = RC_FILE_NONE;
- inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
- } else {
- inst->U.I.SrcReg[0] = shadow_ambient(c, inst->U.I.TexSrcUnit);
- }
-
- return 1;
- } else {
- rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
- unsigned int depthmode = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_mode;
- struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst);
- struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp);
- struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad);
- int pass, fail;
-
- inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
- inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
- inst_rcp->U.I.DstReg.Index = rc_find_free_temporary(c);
- inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
- inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
- inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
-
- inst_cmp->U.I.DstReg = inst->U.I.DstReg;
- inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
- inst->U.I.DstReg.Index = rc_find_free_temporary(c);
- inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
-
- inst_mad->U.I.Opcode = RC_OPCODE_MAD;
- inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
- inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c);
- inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
- inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ;
- inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
- inst_mad->U.I.SrcReg[1].Index = inst_rcp->U.I.DstReg.Index;
- inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
- inst_mad->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
- inst_mad->U.I.SrcReg[2].Index = inst->U.I.DstReg.Index;
- if (depthmode == 0) /* GL_LUMINANCE */
- inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z);
- else if (depthmode == 2) /* GL_ALPHA */
- inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW;
-
- /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
- * r < tex <=> -tex+r < 0
- * r >= tex <=> not (-tex+r < 0 */
- if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL)
- inst_mad->U.I.SrcReg[2].Negate = inst_mad->U.I.SrcReg[2].Negate ^ RC_MASK_XYZW;
- else
- inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
-
- inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
- /* DstReg has been filled out above */
- inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
- inst_cmp->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index;
-
- if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) {
- pass = 1;
- fail = 2;
- } else {
- pass = 2;
- fail = 1;
- }
-
- inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE;
- inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111;
- inst_cmp->U.I.SrcReg[fail] = shadow_ambient(c, inst->U.I.TexSrcUnit);
- }
- }
-
- /* Hardware uses [0..1]x[0..1] range for rectangle textures
- * instead of [0..Width]x[0..Height].
- * Add a scaling instruction.
- */
- if (inst->U.I.Opcode != RC_OPCODE_KIL && inst->U.I.TexSrcTarget == RC_TEXTURE_RECT) {
- struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst->Prev);
-
- inst_mul->U.I.Opcode = RC_OPCODE_MUL;
- inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
- inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c);
- inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
- inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
- inst_mul->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->U.I.TexSrcUnit);
-
- reset_srcreg(&inst->U.I.SrcReg[0]);
- inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
- inst->U.I.SrcReg[0].Index = inst_mul->U.I.DstReg.Index;
- }
-
- /* Cannot write texture to output registers or with masks */
- if (inst->U.I.Opcode != RC_OPCODE_KIL &&
- (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || inst->U.I.DstReg.WriteMask != RC_MASK_XYZW)) {
- struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
-
- inst_mov->U.I.Opcode = RC_OPCODE_MOV;
- inst_mov->U.I.DstReg = inst->U.I.DstReg;
- inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
- inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);
-
- inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
- inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
- inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
- }
-
-
- /* Cannot read texture coordinate from constants file */
- if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
- struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
-
- inst_mov->U.I.Opcode = RC_OPCODE_MOV;
- inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
- inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
- inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
-
- reset_srcreg(&inst->U.I.SrcReg[0]);
- inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
- inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
- }
-
- return 1;
-}
-
/* just some random things... */
void r300FragmentProgramDump(struct rX00_fragment_program_code *c)
{
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
index 418df36c936..8b755703be4 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
@@ -41,6 +41,4 @@ extern void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler
extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c);
-extern int r300_transform_TEX(struct radeon_compiler * c, struct rc_instruction* inst, void* data);
-
#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
index cc552aee176..37dafa77106 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -353,7 +353,7 @@ void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
}
}
- if (code->pixsize >= R300_PFS_NUM_TEMP_REGS)
+ if (code->pixsize >= compiler->max_temp_regs)
rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
if (compiler->Base.Error)
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index c2d5dc27b49..25bf373b6fd 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -25,7 +25,9 @@
#include <stdio.h>
#include "radeon_dataflow.h"
+#include "radeon_emulate_branches.h"
#include "radeon_program_alu.h"
+#include "radeon_program_tex.h"
#include "r300_fragprog.h"
#include "r300_fragprog_swizzle.h"
#include "r500_fragprog.h"
@@ -84,91 +86,96 @@ static void rewrite_depth_out(struct r300_fragment_program_compiler * c)
}
}
+static void debug_program_log(struct r300_fragment_program_compiler* c, const char * where)
+{
+ if (c->Base.Debug) {
+ fprintf(stderr, "Fragment Program: %s\n", where);
+ rc_print_program(&c->Base.Program);
+ }
+}
+
void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{
rewrite_depth_out(c);
+ debug_program_log(c, "before compilation");
+
+ /* XXX Ideally this should be done only for r3xx, but since
+ * we don't have branching support for r5xx, we use the emulation
+ * on all chipsets. */
+ rc_emulate_branches(&c->Base);
+
+ debug_program_log(c, "after emulate branches");
+
if (c->is_r500) {
struct radeon_program_transformation transformations[] = {
- { &r500_transform_TEX, c },
{ &r500_transform_IF, 0 },
{ &radeonTransformALU, 0 },
{ &radeonTransformDeriv, 0 },
{ &radeonTransformTrigScale, 0 }
};
- radeonLocalTransform(&c->Base, 5, transformations);
+ radeonLocalTransform(&c->Base, 4, transformations);
+
+ debug_program_log(c, "after native rewrite part 1");
c->Base.SwizzleCaps = &r500_swizzle_caps;
} else {
struct radeon_program_transformation transformations[] = {
- { &r300_transform_TEX, c },
{ &radeonTransformALU, 0 },
{ &radeonTransformTrigSimple, 0 }
};
- radeonLocalTransform(&c->Base, 3, transformations);
+ radeonLocalTransform(&c->Base, 2, transformations);
+
+ debug_program_log(c, "after native rewrite part 1");
c->Base.SwizzleCaps = &r300_swizzle_caps;
}
- if (c->Base.Debug) {
- fprintf(stderr, "Fragment Program: After native rewrite:\n");
- rc_print_program(&c->Base.Program);
- fflush(stderr);
- }
+ /* Run the common transformations too.
+ * Remember, lowering comes last! */
+ struct radeon_program_transformation common_transformations[] = {
+ { &radeonTransformTEX, c },
+ };
+ radeonLocalTransform(&c->Base, 1, common_transformations);
+
+ common_transformations[0].function = &radeonTransformALU;
+ radeonLocalTransform(&c->Base, 1, common_transformations);
+
+ if (c->Base.Error)
+ return;
+
+ debug_program_log(c, "after native rewrite part 2");
rc_dataflow_deadcode(&c->Base, &dataflow_outputs_mark_use, c);
if (c->Base.Error)
return;
- if (c->Base.Debug) {
- fprintf(stderr, "Fragment Program: After deadcode:\n");
- rc_print_program(&c->Base.Program);
- fflush(stderr);
- }
+ debug_program_log(c, "after deadcode");
rc_dataflow_swizzles(&c->Base);
if (c->Base.Error)
return;
- if (c->Base.Debug) {
- fprintf(stderr, "Compiler: after dataflow passes:\n");
- rc_print_program(&c->Base.Program);
- fflush(stderr);
- }
+ debug_program_log(c, "after dataflow passes");
rc_pair_translate(c);
if (c->Base.Error)
return;
- if (c->Base.Debug) {
- fprintf(stderr, "Compiler: after pair translate:\n");
- rc_print_program(&c->Base.Program);
- fflush(stderr);
- }
+ debug_program_log(c, "after pair translate");
rc_pair_schedule(c);
if (c->Base.Error)
return;
- if (c->Base.Debug) {
- fprintf(stderr, "Compiler: after pair scheduling:\n");
- rc_print_program(&c->Base.Program);
- fflush(stderr);
- }
+ debug_program_log(c, "after pair scheduling");
- if (c->is_r500)
- rc_pair_regalloc(c, 128);
- else
- rc_pair_regalloc(c, R300_PFS_NUM_TEMP_REGS);
+ rc_pair_regalloc(c, c->max_temp_regs);
if (c->Base.Error)
return;
- if (c->Base.Debug) {
- fprintf(stderr, "Compiler: after pair register allocation:\n");
- rc_print_program(&c->Base.Program);
- fflush(stderr);
- }
+ debug_program_log(c, "after register allocation");
if (c->is_r500) {
r500BuildFragmentProgramHwCode(c);
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index 1b2cb8dde7d..4a0b6c02efe 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -29,7 +29,7 @@
#include "radeon_dataflow.h"
#include "radeon_program_alu.h"
#include "radeon_swizzle.h"
-
+#include "radeon_emulate_branches.h"
/*
* Take an already-setup and valid source then swizzle it appropriately to
@@ -566,6 +566,14 @@ static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
return 1;
}
+static void debug_program_log(struct r300_vertex_program_compiler* c, const char * where)
+{
+ if (c->Base.Debug) {
+ fprintf(stderr, "Vertex Program: %s\n", where);
+ rc_print_program(&c->Base.Program);
+ }
+}
+
static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
.IsNative = &swizzle_is_native,
@@ -579,6 +587,15 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
addArtificialOutputs(compiler);
+ debug_program_log(compiler, "before compilation");
+
+ /* XXX Ideally this should be done only for r3xx, but since
+ * we don't have branching support for r5xx, we use the emulation
+ * on all chipsets. */
+ rc_emulate_branches(&compiler->Base);
+
+ debug_program_log(compiler, "after emulate branches");
+
{
struct radeon_program_transformation transformations[] = {
{ &r300_transform_vertex_alu, 0 },
@@ -586,11 +603,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
radeonLocalTransform(&compiler->Base, 1, transformations);
}
- if (compiler->Base.Debug) {
- fprintf(stderr, "Vertex program after native rewrite:\n");
- rc_print_program(&compiler->Base.Program);
- fflush(stderr);
- }
+ debug_program_log(compiler, "after native rewrite");
{
/* Note: This pass has to be done seperately from ALU rewrite,
@@ -603,29 +616,17 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
radeonLocalTransform(&compiler->Base, 1, transformations);
}
- if (compiler->Base.Debug) {
- fprintf(stderr, "Vertex program after source conflict resolve:\n");
- rc_print_program(&compiler->Base.Program);
- fflush(stderr);
- }
+ debug_program_log(compiler, "after source conflict resolve");
rc_dataflow_deadcode(&compiler->Base, &dataflow_outputs_mark_used, compiler);
- if (compiler->Base.Debug) {
- fprintf(stderr, "Vertex program after deadcode:\n");
- rc_print_program(&compiler->Base.Program);
- fflush(stderr);
- }
+ debug_program_log(compiler, "after deadcode");
rc_dataflow_swizzles(&compiler->Base);
allocate_temporary_registers(compiler);
- if (compiler->Base.Debug) {
- fprintf(stderr, "Vertex program after dataflow:\n");
- rc_print_program(&compiler->Base.Program);
- fflush(stderr);
- }
+ debug_program_log(compiler, "after dataflow");
translate_vertex_program(compiler);
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index b0fb8e970b7..632f0bcf4f8 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -31,144 +31,6 @@
#include "../r300_reg.h"
-static struct rc_src_register shadow_ambient(struct radeon_compiler * c, int tmu)
-{
- struct rc_src_register reg = { 0, };
-
- reg.File = RC_FILE_CONSTANT;
- reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu);
- reg.Swizzle = RC_SWIZZLE_WWWW;
- return reg;
-}
-
-/**
- * Transform TEX, TXP, TXB, and KIL instructions in the following way:
- * - implement texture compare (shadow extensions)
- * - extract non-native source / destination operands
- */
-int r500_transform_TEX(
- struct radeon_compiler * c,
- struct rc_instruction * inst,
- void* data)
-{
- struct r300_fragment_program_compiler *compiler =
- (struct r300_fragment_program_compiler*)data;
-
- if (inst->U.I.Opcode != RC_OPCODE_TEX &&
- inst->U.I.Opcode != RC_OPCODE_TXB &&
- inst->U.I.Opcode != RC_OPCODE_TXP &&
- inst->U.I.Opcode != RC_OPCODE_KIL)
- return 0;
-
- /* ARB_shadow & EXT_shadow_funcs */
- if (inst->U.I.Opcode != RC_OPCODE_KIL &&
- c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) {
- rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
-
- if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
- inst->U.I.Opcode = RC_OPCODE_MOV;
-
- if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
- inst->U.I.SrcReg[0].File = RC_FILE_NONE;
- inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
- } else {
- inst->U.I.SrcReg[0] = shadow_ambient(c, inst->U.I.TexSrcUnit);
- }
-
- return 1;
- } else {
- rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
- unsigned int depthmode = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_mode;
- struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst);
- struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp);
- struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad);
- int pass, fail;
-
- inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
- inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
- inst_rcp->U.I.DstReg.Index = rc_find_free_temporary(c);
- inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
- inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
- inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
-
- inst_cmp->U.I.DstReg = inst->U.I.DstReg;
- inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
- inst->U.I.DstReg.Index = rc_find_free_temporary(c);
- inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
-
- inst_mad->U.I.Opcode = RC_OPCODE_MAD;
- inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
- inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c);
- inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
- inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ;
- inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
- inst_mad->U.I.SrcReg[1].Index = inst_rcp->U.I.DstReg.Index;
- inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
- inst_mad->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
- inst_mad->U.I.SrcReg[2].Index = inst->U.I.DstReg.Index;
- if (depthmode == 0) /* GL_LUMINANCE */
- inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z);
- else if (depthmode == 2) /* GL_ALPHA */
- inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW;
-
- /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
- * r < tex <=> -tex+r < 0
- * r >= tex <=> not (-tex+r < 0 */
- if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL)
- inst_mad->U.I.SrcReg[2].Negate = inst_mad->U.I.SrcReg[2].Negate ^ RC_MASK_XYZW;
- else
- inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
-
- inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
- /* DstReg has been filled out above */
- inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
- inst_cmp->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index;
-
- if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) {
- pass = 1;
- fail = 2;
- } else {
- pass = 2;
- fail = 1;
- }
-
- inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE;
- inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111;
- inst_cmp->U.I.SrcReg[fail] = shadow_ambient(c, inst->U.I.TexSrcUnit);
- }
- }
-
- /* Cannot write texture to output registers */
- if (inst->U.I.Opcode != RC_OPCODE_KIL && inst->U.I.DstReg.File != RC_FILE_TEMPORARY) {
- struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
-
- inst_mov->U.I.Opcode = RC_OPCODE_MOV;
- inst_mov->U.I.DstReg = inst->U.I.DstReg;
- inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
- inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);
-
- inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
- inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
- inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
- }
-
- /* Cannot read texture coordinate from constants file */
- if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
- struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
-
- inst_mov->U.I.Opcode = RC_OPCODE_MOV;
- inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
- inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
- inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
-
- reset_srcreg(&inst->U.I.SrcReg[0]);
- inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
- inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
- }
-
- return 1;
-}
-
/**
* Rewrite IF instructions to use the ALU result special register.
*/
@@ -433,19 +295,20 @@ void r500FragmentProgramDump(struct rX00_fragment_program_code *c)
(inst >> 30));
fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3);
inst = code->inst[n].inst3;
- fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
+ fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n",
(inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
(inst >> 11) & 0x3,
(inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
- (inst >> 24) & 0x3);
+ (inst >> 24) & 0x3, (inst >> 29) & 0x3);
fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4);
inst = code->inst[n].inst4;
- fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf),
+ fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf),
(inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
(inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
(inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
+ (inst >> 29) & 0x3,
(inst >> 31) & 0x1);
fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5);
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
index 0918cdf518b..4efbae7ba67 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
@@ -42,11 +42,6 @@ extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c);
extern struct rc_swizzle_caps r500_swizzle_caps;
-extern int r500_transform_TEX(
- struct radeon_compiler * c,
- struct rc_instruction * inst,
- void* data);
-
extern int r500_transform_IF(
struct radeon_compiler * c,
struct rc_instruction * inst,
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index 710cae727a1..10c5e2349e9 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -190,6 +190,17 @@ static unsigned int use_source(struct r500_fragment_program_code* code, struct r
return 0;
}
+/**
+ * NOP the specified instruction if it is not a texture lookup.
+ */
+static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
+{
+ PROG_CODE;
+
+ if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
+ code->inst[ip].inst0 |= R500_INST_NOP;
+ }
+}
/**
* Emit a paired ALU instruction.
@@ -205,6 +216,14 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
int ip = ++code->inst_end;
+ /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
+ if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
+ inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
+ if (ip > 0) {
+ alu_nop(c, ip - 1);
+ }
+ }
+
code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
@@ -252,8 +271,8 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
- code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
- code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
+ code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
+ code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
if (inst->WriteALUResult) {
code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
@@ -329,21 +348,6 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst
return 1;
}
-static void grow_branches(struct emit_state * s)
-{
- unsigned int newreserved = s->BranchesReserved * 2;
- struct branch_info * newbranches;
-
- if (!newreserved)
- newreserved = 4;
-
- newbranches = memory_pool_malloc(&s->C->Pool, newreserved*sizeof(struct branch_info));
- memcpy(newbranches, s->Branches, s->CurrentBranchDepth*sizeof(struct branch_info));
-
- s->Branches = newbranches;
- s->BranchesReserved = newreserved;
-}
-
static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
{
if (s->Code->inst_end >= 511) {
@@ -361,8 +365,8 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
return;
}
- if (s->CurrentBranchDepth >= s->BranchesReserved)
- grow_branches(s);
+ memory_pool_array_reserve(&s->C->Pool, struct branch_info,
+ s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
struct branch_info * branch = &s->Branches[s->CurrentBranchDepth++];
branch->If = newip;
@@ -469,9 +473,8 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
if (compiler->Base.Error)
return;
- assert(code->inst_end >= 0);
-
- if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
+ if (code->inst_end == -1 ||
+ (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
/* This may happen when dead-code elimination is disabled or
* when most of the fragment program logic is leading to a KIL */
if (code->inst_end >= 511) {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index 6d979bbaecf..27274f07122 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -108,6 +108,18 @@ typedef enum {
} rc_compare_func;
/**
+ * Coordinate wrapping modes.
+ *
+ * These are not quite the same as their GL counterparts yet.
+ */
+typedef enum {
+ RC_WRAP_NONE = 0,
+ RC_WRAP_REPEAT,
+ RC_WRAP_MIRRORED_REPEAT,
+ RC_WRAP_MIRRORED_CLAMP
+} rc_wrap_mode;
+
+/**
* Stores state that influences the compilation of a fragment program.
*/
struct r300_fragment_program_external_state {
@@ -127,11 +139,28 @@ struct r300_fragment_program_external_state {
* this field specifies the compare function.
*
* Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0).
- *
- * Otherwise, this field is 0.
* \sa rc_compare_func
*/
unsigned texture_compare_func : 3;
+
+ /**
+ * If the sampler needs to fake NPOT, this field is set.
+ */
+ unsigned fake_npot : 1;
+
+ /**
+ * If the sampler will recieve non-normalized coords,
+ * this field is set.
+ */
+ unsigned non_normalized_coords : 1;
+
+ /**
+ * This field specifies wrapping modes for the sampler.
+ *
+ * If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths
+ * will be performed on the coordinates.
+ */
+ unsigned wrap_mode : 2;
} unit[16];
};
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
index 272f9072d4a..1c8ba864a41 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -277,13 +277,13 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig
inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_mad->U.I.SrcReg[0].Index = tempregi;
- inst_mad->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO);
+ inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
- inst_mad->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO);
+ inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
- inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO);
+ inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
if (full_vtransform) {
inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 6bfda0574f6..09794a52ad8 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -81,8 +81,12 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig
struct r300_fragment_program_compiler {
struct radeon_compiler Base;
struct rX00_fragment_program_code *code;
+ /* Optional transformations and features. */
struct r300_fragment_program_external_state state;
+ unsigned enable_shadow_ambient;
+ /* Hardware specification. */
unsigned is_r500;
+ unsigned max_temp_regs;
/* Register corresponding to the depthbuffer. */
unsigned OutputDepth;
/* Registers corresponding to the four colorbuffers. */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
index cce9166e644..16e2f3a2181 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
@@ -160,3 +160,92 @@ void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void *
writes_pair(inst, cb, userdata);
}
}
+
+
+static void remap_normal_instruction(struct rc_instruction * fullinst,
+ rc_remap_register_fn cb, void * userdata)
+{
+ struct rc_sub_instruction * inst = &fullinst->U.I;
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+
+ if (opcode->HasDstReg) {
+ rc_register_file file = inst->DstReg.File;
+ unsigned int index = inst->DstReg.Index;
+
+ cb(userdata, fullinst, &file, &index);
+
+ inst->DstReg.File = file;
+ inst->DstReg.Index = index;
+ }
+
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+ rc_register_file file = inst->SrcReg[src].File;
+ unsigned int index = inst->SrcReg[src].Index;
+
+ cb(userdata, fullinst, &file, &index);
+
+ inst->SrcReg[src].File = file;
+ inst->SrcReg[src].Index = index;
+ }
+}
+
+static void remap_pair_instruction(struct rc_instruction * fullinst,
+ rc_remap_register_fn cb, void * userdata)
+{
+ struct rc_pair_instruction * inst = &fullinst->U.P;
+
+ if (inst->RGB.WriteMask) {
+ rc_register_file file = RC_FILE_TEMPORARY;
+ unsigned int index = inst->RGB.DestIndex;
+
+ cb(userdata, fullinst, &file, &index);
+
+ inst->RGB.DestIndex = index;
+ }
+
+ if (inst->Alpha.WriteMask) {
+ rc_register_file file = RC_FILE_TEMPORARY;
+ unsigned int index = inst->Alpha.DestIndex;
+
+ cb(userdata, fullinst, &file, &index);
+
+ inst->Alpha.DestIndex = index;
+ }
+
+ for(unsigned int src = 0; src < 3; ++src) {
+ if (inst->RGB.Src[src].Used) {
+ rc_register_file file = inst->RGB.Src[src].File;
+ unsigned int index = inst->RGB.Src[src].Index;
+
+ cb(userdata, fullinst, &file, &index);
+
+ inst->RGB.Src[src].File = file;
+ inst->RGB.Src[src].Index = index;
+ }
+
+ if (inst->Alpha.Src[src].Used) {
+ rc_register_file file = inst->Alpha.Src[src].File;
+ unsigned int index = inst->Alpha.Src[src].Index;
+
+ cb(userdata, fullinst, &file, &index);
+
+ inst->Alpha.Src[src].File = file;
+ inst->Alpha.Src[src].Index = index;
+ }
+ }
+}
+
+
+/**
+ * Remap all register accesses according to the given function.
+ * That is, call the function \p cb for each referenced register (both read and written)
+ * and update the given instruction \p inst accordingly
+ * if it modifies its \ref pfile and \ref pindex contents.
+ */
+void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata)
+{
+ if (inst->Type == RC_INSTRUCTION_NORMAL)
+ remap_normal_instruction(inst, cb, userdata);
+ else
+ remap_pair_instruction(inst, cb, userdata);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
index 5aa4cb64f3d..62cda20eea6 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
@@ -36,13 +36,17 @@ struct rc_swizzle_caps;
/**
- * Help analyze the register accesses of instructions.
+ * Help analyze and modify the register accesses of instructions.
*/
/*@{*/
typedef void (*rc_read_write_fn)(void * userdata, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int chan);
void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata);
void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata);
+
+typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst,
+ rc_register_file * pfile, unsigned int * pindex);
+void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata);
/*@}*/
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
index e0c66c4aeb0..e3c2c83c0cf 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
@@ -80,19 +80,8 @@ static void or_updatemasks(
static void push_branch(struct deadcode_state * s)
{
- if (s->BranchStackSize >= s->BranchStackReserved) {
- unsigned int new_reserve = 2 * s->BranchStackReserved;
- struct branchinfo * new_stack;
-
- if (!new_reserve)
- new_reserve = 4;
-
- new_stack = memory_pool_malloc(&s->C->Pool, new_reserve * sizeof(struct branchinfo));
- memcpy(new_stack, s->BranchStack, s->BranchStackSize * sizeof(struct branchinfo));
-
- s->BranchStack = new_stack;
- s->BranchStackReserved = new_reserve;
- }
+ memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
+ s->BranchStackSize, s->BranchStackReserved, 1);
struct branchinfo * branch = &s->BranchStack[s->BranchStackSize++];
branch->HaveElse = 0;
@@ -162,7 +151,7 @@ static void update_instruction(struct deadcode_state * s, struct rc_instruction
}
unsigned int srcmasks[3];
- rc_compute_sources_for_writemask(opcode, usedmask, srcmasks);
+ rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
unsigned int refmask = 0;
@@ -250,7 +239,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f
for(struct rc_instruction * inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next, ++ip) {
- const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);\
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
int dead = 1;
if (!opcode->HasDstReg) {
@@ -281,7 +270,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f
else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
usemask |= RC_MASK_W;
- rc_compute_sources_for_writemask(opcode, usemask, srcmasks);
+ rc_compute_sources_for_writemask(inst, usemask, srcmasks);
for(unsigned int src = 0; src < 3; ++src) {
for(unsigned int chan = 0; chan < 4; ++chan) {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c
new file mode 100644
index 00000000000..d889612f4f4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c
@@ -0,0 +1,331 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_emulate_branches.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+
+struct proxy_info {
+ unsigned int Proxied:1;
+ unsigned int Index:RC_REGISTER_INDEX_BITS;
+};
+
+struct register_proxies {
+ struct proxy_info Temporary[RC_REGISTER_MAX_INDEX];
+};
+
+struct branch_info {
+ struct rc_instruction * If;
+ struct rc_instruction * Else;
+};
+
+struct emulate_branch_state {
+ struct radeon_compiler * C;
+
+ struct branch_info * Branches;
+ unsigned int BranchCount;
+ unsigned int BranchReserved;
+};
+
+
+static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+ memory_pool_array_reserve(&s->C->Pool, struct branch_info,
+ s->Branches, s->BranchCount, s->BranchReserved, 1);
+
+ DBG("%s\n", __FUNCTION__);
+
+ struct branch_info * branch = &s->Branches[s->BranchCount++];
+ memset(branch, 0, sizeof(struct branch_info));
+ branch->If = inst;
+
+ /* Make a safety copy of the decision register, because we will need
+ * it at ENDIF time and it might be overwritten in both branches. */
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, inst->Prev);
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C);
+ inst_mov->U.I.DstReg.WriteMask = RC_MASK_X;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
+ inst->U.I.SrcReg[0].Swizzle = 0;
+ inst->U.I.SrcReg[0].Abs = 0;
+ inst->U.I.SrcReg[0].Negate = 0;
+}
+
+static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+ if (!s->BranchCount) {
+ rc_error(s->C, "Encountered ELSE outside of branches");
+ return;
+ }
+
+ DBG("%s\n", __FUNCTION__);
+
+ struct branch_info * branch = &s->Branches[s->BranchCount - 1];
+ branch->Else = inst;
+}
+
+
+struct state_and_proxies {
+ struct emulate_branch_state * S;
+ struct register_proxies * Proxies;
+};
+
+static struct proxy_info * get_proxy_info(struct state_and_proxies * sap,
+ rc_register_file file, unsigned int index)
+{
+ if (file == RC_FILE_TEMPORARY) {
+ return &sap->Proxies->Temporary[index];
+ } else {
+ return 0;
+ }
+}
+
+static void scan_write(void * userdata, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int comp)
+{
+ struct state_and_proxies * sap = userdata;
+ struct proxy_info * proxy = get_proxy_info(sap, file, index);
+
+ if (proxy && !proxy->Proxied) {
+ proxy->Proxied = 1;
+ proxy->Index = rc_find_free_temporary(sap->S->C);
+ }
+}
+
+static void remap_proxy_function(void * userdata, struct rc_instruction * inst,
+ rc_register_file * pfile, unsigned int * pindex)
+{
+ struct state_and_proxies * sap = userdata;
+ struct proxy_info * proxy = get_proxy_info(sap, *pfile, *pindex);
+
+ if (proxy && proxy->Proxied) {
+ *pfile = RC_FILE_TEMPORARY;
+ *pindex = proxy->Index;
+ }
+}
+
+/**
+ * Redirect all writes in the instruction range [begin, end) to proxy
+ * temporary registers.
+ */
+static void allocate_and_insert_proxies(struct emulate_branch_state * s,
+ struct register_proxies * proxies,
+ struct rc_instruction * begin,
+ struct rc_instruction * end)
+{
+ struct state_and_proxies sap;
+
+ sap.S = s;
+ sap.Proxies = proxies;
+
+ for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
+ rc_for_all_writes(inst, scan_write, &sap);
+ rc_remap_registers(inst, remap_proxy_function, &sap);
+ }
+
+ for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
+ if (proxies->Temporary[index].Proxied) {
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, begin->Prev);
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = proxies->Temporary[index].Index;
+ inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+ inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.SrcReg[0].Index = index;
+ }
+ }
+}
+
+
+static void inject_cmp(struct emulate_branch_state * s,
+ struct rc_instruction * inst_if,
+ struct rc_instruction * inst_endif,
+ rc_register_file file, unsigned int index,
+ struct proxy_info ifproxy,
+ struct proxy_info elseproxy)
+{
+ struct rc_instruction * inst_cmp = rc_insert_new_instruction(s->C, inst_endif);
+ inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
+ inst_cmp->U.I.DstReg.File = file;
+ inst_cmp->U.I.DstReg.Index = index;
+ inst_cmp->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+ inst_cmp->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0];
+ inst_cmp->U.I.SrcReg[0].Abs = 1;
+ inst_cmp->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
+ inst_cmp->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst_cmp->U.I.SrcReg[1].Index = ifproxy.Proxied ? ifproxy.Index : index;
+ inst_cmp->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
+ inst_cmp->U.I.SrcReg[2].Index = elseproxy.Proxied ? elseproxy.Index : index;
+}
+
+static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+ if (!s->BranchCount) {
+ rc_error(s->C, "Encountered ENDIF outside of branches");
+ return;
+ }
+
+ DBG("%s\n", __FUNCTION__);
+
+ struct branch_info * branch = &s->Branches[s->BranchCount - 1];
+ struct register_proxies IfProxies;
+ struct register_proxies ElseProxies;
+
+ memset(&IfProxies, 0, sizeof(IfProxies));
+ memset(&ElseProxies, 0, sizeof(ElseProxies));
+
+ allocate_and_insert_proxies(s, &IfProxies, branch->If->Next, branch->Else ? branch->Else : inst);
+
+ if (branch->Else)
+ allocate_and_insert_proxies(s, &ElseProxies, branch->Else->Next, inst);
+
+ /* Insert the CMP instructions at the end. */
+ for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
+ if (IfProxies.Temporary[index].Proxied || ElseProxies.Temporary[index].Proxied) {
+ inject_cmp(s, branch->If, inst, RC_FILE_TEMPORARY, index,
+ IfProxies.Temporary[index], ElseProxies.Temporary[index]);
+ }
+ }
+
+ /* Remove all traces of the branch instructions */
+ rc_remove_instruction(branch->If);
+ if (branch->Else)
+ rc_remove_instruction(branch->Else);
+ rc_remove_instruction(inst);
+
+ s->BranchCount--;
+
+ if (VERBOSE) {
+ DBG("Program after ENDIF handling:\n");
+ rc_print_program(&s->C->Program);
+ }
+}
+
+
+struct remap_output_data {
+ unsigned int Output:RC_REGISTER_INDEX_BITS;
+ unsigned int Temporary:RC_REGISTER_INDEX_BITS;
+};
+
+static void remap_output_function(void * userdata, struct rc_instruction * inst,
+ rc_register_file * pfile, unsigned int * pindex)
+{
+ struct remap_output_data * data = userdata;
+
+ if (*pfile == RC_FILE_OUTPUT && *pindex == data->Output) {
+ *pfile = RC_FILE_TEMPORARY;
+ *pindex = data->Temporary;
+ }
+}
+
+
+/**
+ * Output registers cannot be read from and so cannot be dealt with like
+ * temporary registers.
+ *
+ * We do the simplest thing: If an output registers is written within
+ * a branch, then *all* writes to this register are proxied to a
+ * temporary register, and a final MOV is appended to the end of
+ * the program.
+ */
+static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+ if (!s->BranchCount)
+ return;
+
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (!opcode->HasDstReg)
+ return;
+
+ if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) {
+ struct remap_output_data remap;
+
+ remap.Output = inst->U.I.DstReg.Index;
+ remap.Temporary = rc_find_free_temporary(s->C);
+
+ for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
+ inst != &s->C->Program.Instructions;
+ inst = inst->Next) {
+ rc_remap_registers(inst, &remap_output_function, &remap);
+ }
+
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev);
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT;
+ inst_mov->U.I.DstReg.Index = remap.Output;
+ inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+ inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.SrcReg[0].Index = remap.Temporary;
+ }
+}
+
+/**
+ * Remove branch instructions; instead, execute both branches
+ * on different register sets and choose between their results
+ * using CMP instructions in place of the original ENDIF.
+ */
+void rc_emulate_branches(struct radeon_compiler * c)
+{
+ struct emulate_branch_state s;
+
+ memset(&s, 0, sizeof(s));
+ s.C = c;
+
+ /* Untypical loop because we may remove the current instruction */
+ struct rc_instruction * ptr = c->Program.Instructions.Next;
+ while(ptr != &c->Program.Instructions) {
+ struct rc_instruction * inst = ptr;
+ ptr = ptr->Next;
+
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ switch(inst->U.I.Opcode) {
+ case RC_OPCODE_IF:
+ handle_if(&s, inst);
+ break;
+ case RC_OPCODE_ELSE:
+ handle_else(&s, inst);
+ break;
+ case RC_OPCODE_ENDIF:
+ handle_endif(&s, inst);
+ break;
+ default:
+ fix_output_writes(&s, inst);
+ break;
+ }
+ } else {
+ rc_error(c, "%s: unhandled instruction type\n", __FUNCTION__);
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h
new file mode 100644
index 00000000000..e07279f0933
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_EMULATE_BRANCHES_H
+#define RADEON_EMULATE_BRANCHES_H
+
+struct radeon_compiler;
+
+void rc_emulate_branches(struct radeon_compiler * c);
+
+#endif /* RADEON_EMULATE_BRANCHES_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
index c1c0181fac1..d593b3e81ae 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -26,6 +26,7 @@
*/
#include "radeon_opcodes.h"
+#include "radeon_program.h"
#include "radeon_program_constants.h"
@@ -59,6 +60,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.HasDstReg = 1
},
{
+ .Opcode = RC_OPCODE_CEIL,
+ .Name = "CEIL",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
.Opcode = RC_OPCODE_CMP,
.Name = "CMP",
.NumSrcRegs = 3,
@@ -75,14 +83,14 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
{
.Opcode = RC_OPCODE_DDX,
.Name = "DDX",
- .NumSrcRegs = 1,
+ .NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_DDY,
.Name = "DDY",
- .NumSrcRegs = 1,
+ .NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
@@ -371,10 +379,11 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
};
void rc_compute_sources_for_writemask(
- const struct rc_opcode_info * opcode,
+ const struct rc_instruction *inst,
unsigned int writemask,
unsigned int *srcmasks)
{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
srcmasks[0] = 0;
srcmasks[1] = 0;
srcmasks[2] = 0;
@@ -406,21 +415,37 @@ void rc_compute_sources_for_writemask(
srcmasks[0] |= RC_MASK_XYZW;
srcmasks[1] |= RC_MASK_XYZW;
break;
- case RC_OPCODE_TEX:
case RC_OPCODE_TXB:
case RC_OPCODE_TXP:
- srcmasks[0] |= RC_MASK_XYZW;
+ srcmasks[0] |= RC_MASK_W;
+ /* Fall through */
+ case RC_OPCODE_TEX:
+ switch (inst->U.I.TexSrcTarget) {
+ case RC_TEXTURE_1D:
+ srcmasks[0] |= RC_MASK_X;
+ break;
+ case RC_TEXTURE_2D:
+ case RC_TEXTURE_RECT:
+ case RC_TEXTURE_1D_ARRAY:
+ srcmasks[0] |= RC_MASK_XY;
+ break;
+ case RC_TEXTURE_3D:
+ case RC_TEXTURE_CUBE:
+ case RC_TEXTURE_2D_ARRAY:
+ srcmasks[0] |= RC_MASK_XYZ;
+ break;
+ }
break;
case RC_OPCODE_DST:
- srcmasks[0] |= 0x6;
- srcmasks[1] |= 0xa;
+ srcmasks[0] |= RC_MASK_Y | RC_MASK_Z;
+ srcmasks[1] |= RC_MASK_Y | RC_MASK_W;
break;
case RC_OPCODE_EXP:
case RC_OPCODE_LOG:
srcmasks[0] |= RC_MASK_XY;
break;
case RC_OPCODE_LIT:
- srcmasks[0] |= 0xb;
+ srcmasks[0] |= RC_MASK_X | RC_MASK_Y | RC_MASK_W;
break;
default:
break;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
index a3c5b869546..87a2e23084c 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -47,6 +47,9 @@ typedef enum {
* dst.x = floor(src.x), where dst must be an address register */
RC_OPCODE_ARL,
+ /** vec4 instruction: dst.c = ceil(src0.c) */
+ RC_OPCODE_CEIL,
+
/** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */
RC_OPCODE_CMP,
@@ -227,8 +230,10 @@ static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode)
return &rc_opcodes[opcode];
}
+struct rc_instruction;
+
void rc_compute_sources_for_writemask(
- const struct rc_opcode_info * opcode,
+ const struct rc_instruction *inst,
unsigned int writemask,
unsigned int *srcmasks);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
index b2fe7f76b2f..fdfee867014 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
@@ -196,9 +196,10 @@ static void compute_live_intervals(struct regalloc_state * s)
}
}
-static void rewrite_register(struct regalloc_state * s,
+static void remap_register(void * data, struct rc_instruction * inst,
rc_register_file * file, unsigned int * index)
{
+ struct regalloc_state * s = data;
const struct register_info * reg;
if (*file == RC_FILE_TEMPORARY)
@@ -214,74 +215,6 @@ static void rewrite_register(struct regalloc_state * s,
}
}
-static void rewrite_normal_instruction(struct regalloc_state * s, struct rc_sub_instruction * inst)
-{
- const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
-
- if (opcode->HasDstReg) {
- rc_register_file file = inst->DstReg.File;
- unsigned int index = inst->DstReg.Index;
-
- rewrite_register(s, &file, &index);
-
- inst->DstReg.File = file;
- inst->DstReg.Index = index;
- }
-
- for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
- rc_register_file file = inst->SrcReg[src].File;
- unsigned int index = inst->SrcReg[src].Index;
-
- rewrite_register(s, &file, &index);
-
- inst->SrcReg[src].File = file;
- inst->SrcReg[src].Index = index;
- }
-}
-
-static void rewrite_pair_instruction(struct regalloc_state * s, struct rc_pair_instruction * inst)
-{
- if (inst->RGB.WriteMask) {
- rc_register_file file = RC_FILE_TEMPORARY;
- unsigned int index = inst->RGB.DestIndex;
-
- rewrite_register(s, &file, &index);
-
- inst->RGB.DestIndex = index;
- }
-
- if (inst->Alpha.WriteMask) {
- rc_register_file file = RC_FILE_TEMPORARY;
- unsigned int index = inst->Alpha.DestIndex;
-
- rewrite_register(s, &file, &index);
-
- inst->Alpha.DestIndex = index;
- }
-
- for(unsigned int src = 0; src < 3; ++src) {
- if (inst->RGB.Src[src].Used) {
- rc_register_file file = inst->RGB.Src[src].File;
- unsigned int index = inst->RGB.Src[src].Index;
-
- rewrite_register(s, &file, &index);
-
- inst->RGB.Src[src].File = file;
- inst->RGB.Src[src].Index = index;
- }
-
- if (inst->Alpha.Src[src].Used) {
- rc_register_file file = inst->Alpha.Src[src].File;
- unsigned int index = inst->Alpha.Src[src].Index;
-
- rewrite_register(s, &file, &index);
-
- inst->Alpha.Src[src].File = file;
- inst->Alpha.Src[src].Index = index;
- }
- }
-}
-
static void do_regalloc(struct regalloc_state * s)
{
/* Simple and stupid greedy register allocation */
@@ -310,10 +243,7 @@ static void do_regalloc(struct regalloc_state * s)
for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
inst != &s->C->Program.Instructions;
inst = inst->Next) {
- if (inst->Type == RC_INSTRUCTION_NORMAL)
- rewrite_normal_instruction(s, &inst->U.I);
- else
- rewrite_pair_instruction(s, &inst->U.P);
+ rc_remap_registers(inst, &remap_register, s);
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
index fff5b0c2173..407a0a55ee2 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
@@ -156,14 +156,8 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
}
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
- int nargs = opcode->NumSrcRegs;
int i;
- /* Special case for DDX/DDY (MDH/MDV). */
- if (inst->Opcode == RC_OPCODE_DDX || inst->Opcode == RC_OPCODE_DDY) {
- nargs++;
- }
-
for(i = 0; i < opcode->NumSrcRegs; ++i) {
int source;
if (needrgb && !istranscendent) {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index b5c08aea49e..05b874ba7cf 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -175,6 +175,26 @@ static void transform_ABS(struct radeon_compiler* c,
rc_remove_instruction(inst);
}
+static void transform_CEIL(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* Assuming:
+ * ceil(x) = -floor(-x)
+ *
+ * After inlining floor:
+ * ceil(x) = -(-x-frac(-x))
+ *
+ * After simplification:
+ * ceil(x) = x+frac(-x)
+ */
+
+ int tempreg = rc_find_free_temporary(c);
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]));
+ emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg));
+ rc_remove_instruction(inst);
+}
+
static void transform_DP3(struct radeon_compiler* c,
struct rc_instruction* inst)
{
@@ -458,7 +478,7 @@ static void transform_XPD(struct radeon_compiler* c,
* no userData necessary.
*
* Eliminates the following ALU instructions:
- * ABS, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD
+ * ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD
* using:
* MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
*
@@ -474,6 +494,7 @@ int radeonTransformALU(
{
switch(inst->U.I.Opcode) {
case RC_OPCODE_ABS: transform_ABS(c, inst); return 1;
+ case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
case RC_OPCODE_DST: transform_DST(c, inst); return 1;
case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
@@ -506,6 +527,35 @@ static void transform_r300_vertex_ABS(struct radeon_compiler* c,
inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
}
+static void transform_r300_vertex_CMP(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* There is no decent CMP available, so let's rig one up.
+ * CMP is defined as dst = src0 < 0.0 ? src1 : src2
+ * The following sequence consumes two temps and two extra slots
+ * (the second temp and the second slot is consumed by transform_LRP),
+ * but should be equivalent:
+ *
+ * SLT tmp0, src0, 0.0
+ * LRP dst, tmp0, src1, src2
+ *
+ * Yes, I know, I'm a mad scientist. ~ C. & M. */
+ int tempreg0 = rc_find_free_temporary(c);
+
+ /* SLT tmp0, src0, 0.0 */
+ emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+ dstreg(RC_FILE_TEMPORARY, tempreg0),
+ inst->U.I.SrcReg[0], builtin_zero);
+
+ /* LRP dst, tmp0, src1, src2 */
+ transform_LRP(c,
+ emit3(c, inst->Prev, RC_OPCODE_LRP, 0,
+ inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]));
+
+ rc_remove_instruction(inst);
+}
+
/**
* For use with radeonLocalTransform, this transforms non-native ALU
* instructions of the r300 up to r500 vertex engine.
@@ -517,6 +567,8 @@ int r300_transform_vertex_alu(
{
switch(inst->U.I.Opcode) {
case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
+ case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+ case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1;
case RC_OPCODE_DP3: transform_DP3(c, inst); return 1;
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
index 7c0d6720b11..842012def02 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
@@ -114,12 +114,14 @@ typedef enum {
} while(0)
#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)
+#define RC_SWIZZLE_XYZ0 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO)
#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X)
#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y)
#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z)
#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W)
#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO)
#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE)
+#define RC_SWIZZLE_HHHH RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF)
/**
* \name Bitmasks for components of vectors.
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
new file mode 100644
index 00000000000..b4ba0b3f870
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
@@ -0,0 +1,360 @@
+/*
+ * Copyright (C) 2010 Corbin Simpson
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_tex.h"
+
+/* Series of transformations to be done on textures. */
+
+static struct rc_src_register shadow_ambient(struct r300_fragment_program_compiler *compiler,
+ int tmu)
+{
+ struct rc_src_register reg = { 0, };
+
+ if (compiler->enable_shadow_ambient) {
+ reg.File = RC_FILE_CONSTANT;
+ reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants,
+ RC_STATE_SHADOW_AMBIENT, tmu);
+ reg.Swizzle = RC_SWIZZLE_WWWW;
+ } else {
+ reg.File = RC_FILE_NONE;
+ reg.Swizzle = RC_SWIZZLE_0000;
+ }
+ return reg;
+}
+
+static void lower_texture_rect(struct r300_fragment_program_compiler *compiler,
+ struct rc_instruction *inst)
+{
+ struct rc_instruction *inst_rect;
+ unsigned temp = rc_find_free_temporary(&compiler->Base);
+
+ if (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
+ compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords) {
+ inst_rect = rc_insert_new_instruction(&compiler->Base, inst->Prev);
+
+ inst_rect->U.I.Opcode = RC_OPCODE_MUL;
+ inst_rect->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_rect->U.I.DstReg.Index = temp;
+ inst_rect->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_rect->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+ inst_rect->U.I.SrcReg[1].Index =
+ rc_constants_add_state(&compiler->Base.Program.Constants,
+ RC_STATE_R300_TEXRECT_FACTOR, inst->U.I.TexSrcUnit);
+
+ reset_srcreg(&inst->U.I.SrcReg[0]);
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = temp;
+
+ inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
+ }
+}
+
+/**
+ * Transform TEX, TXP, TXB, and KIL instructions in the following ways:
+ * - implement texture compare (shadow extensions)
+ * - extract non-native source / destination operands
+ * - premultiply texture coordinates for RECT
+ * - extract operand swizzles
+ * - introduce a temporary register when write masks are needed
+ */
+int radeonTransformTEX(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void* data)
+{
+ struct r300_fragment_program_compiler *compiler =
+ (struct r300_fragment_program_compiler*)data;
+
+ if (inst->U.I.Opcode != RC_OPCODE_TEX &&
+ inst->U.I.Opcode != RC_OPCODE_TXB &&
+ inst->U.I.Opcode != RC_OPCODE_TXP &&
+ inst->U.I.Opcode != RC_OPCODE_KIL)
+ return 0;
+
+ /* ARB_shadow & EXT_shadow_funcs */
+ if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+ c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) {
+ rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
+
+ if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+
+ if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
+ inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
+ } else {
+ inst->U.I.SrcReg[0] = shadow_ambient(compiler, inst->U.I.TexSrcUnit);
+ }
+
+ return 1;
+ } else {
+ rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
+ unsigned int depthmode = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_mode;
+ struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst);
+ struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp);
+ struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad);
+ int pass, fail;
+
+ inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
+ inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_rcp->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
+ inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
+
+ inst_cmp->U.I.DstReg = inst->U.I.DstReg;
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+
+ inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+ inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ;
+ inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[1].Index = inst_rcp->U.I.DstReg.Index;
+ inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
+ inst_mad->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[2].Index = inst->U.I.DstReg.Index;
+ if (depthmode == 0) /* GL_LUMINANCE */
+ inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z);
+ else if (depthmode == 2) /* GL_ALPHA */
+ inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW;
+
+ /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
+ * r < tex <=> -tex+r < 0
+ * r >= tex <=> not (-tex+r < 0 */
+ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL)
+ inst_mad->U.I.SrcReg[2].Negate = inst_mad->U.I.SrcReg[2].Negate ^ RC_MASK_XYZW;
+ else
+ inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
+
+ inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
+ /* DstReg has been filled out above */
+ inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_cmp->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index;
+
+ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) {
+ pass = 1;
+ fail = 2;
+ } else {
+ pass = 2;
+ fail = 1;
+ }
+
+ inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE;
+ inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111;
+ inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit);
+ }
+ }
+
+ /* Texture wrap modes don't work on NPOT textures or texrects.
+ *
+ * The game plan is simple. We have two flags, fake_npot and
+ * non_normalized_coords, as well as a tex target. The RECT tex target
+ * will make the emitted code use non-scaled texcoords.
+ *
+ * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and
+ * mirroring are not. If we need to repeat, we do:
+ *
+ * MUL temp, texcoord, <scaling factor constant>
+ * FRC temp, temp ; Discard integer portion of coords
+ *
+ * This gives us coords in [0, 1].
+ *
+ * Mirroring is trickier. We're going to start out like repeat:
+ *
+ * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes
+ * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2]
+ * ; so scale to [0, 1]
+ * FRC temp, temp ; Make the pattern repeat
+ * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1]
+ * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern.
+ * ; The pattern is backwards, so reverse it (1-x).
+ *
+ * This gives us coords in [0, 1].
+ *
+ * ~ C & M. ;)
+ */
+ if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+ (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
+ compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot ||
+ compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords)) {
+ rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode;
+
+ /* R300 cannot sample from rectangles. */
+ if (!compiler->is_r500) {
+ lower_texture_rect(compiler, inst);
+ }
+
+ if (compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot &&
+ wrapmode != RC_WRAP_NONE) {
+ struct rc_instruction *inst_mov;
+ unsigned temp = rc_find_free_temporary(c);
+
+ /* For NPOT fallback, we need normalized coordinates anyway. */
+ if (compiler->is_r500) {
+ lower_texture_rect(compiler, inst);
+ }
+
+ if (wrapmode == RC_WRAP_REPEAT) {
+ /* Both instructions will be paired up. */
+ struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_frc->U.I.Opcode = RC_OPCODE_FRC;
+ inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_frc->U.I.DstReg.Index = temp;
+ inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) {
+ /*
+ * Function:
+ * f(v) = 1 - abs(frac(v * 0.5) * 2 - 1)
+ *
+ * Code:
+ * MUL temp, src0, 0.5
+ * FRC temp, temp
+ * MAD temp, temp, 2, -1
+ * ADD temp, 1, -abs(temp)
+ */
+
+ struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add;
+ unsigned two, two_swizzle;
+
+ inst_mul = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+ inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mul->U.I.DstReg.Index = temp;
+ inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH;
+
+ inst_frc = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_frc->U.I.Opcode = RC_OPCODE_FRC;
+ inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_frc->U.I.DstReg.Index = temp;
+ inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_frc->U.I.SrcReg[0].Index = temp;
+ inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+
+ two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle);
+ inst_mad = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+ inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.DstReg.Index = temp;
+ inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[0].Index = temp;
+ inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+ inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+ inst_mad->U.I.SrcReg[1].Index = two;
+ inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle;
+ inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111;
+ inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ;
+
+ inst_add = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_add->U.I.Opcode = RC_OPCODE_ADD;
+ inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_add->U.I.DstReg.Index = temp;
+ inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
+ inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst_add->U.I.SrcReg[1].Index = temp;
+ inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
+ inst_add->U.I.SrcReg[1].Abs = 1;
+ inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ;
+ } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) {
+ /*
+ * Mirrored clamp modes are bloody simple, we just use abs
+ * to mirror [0, 1] into [-1, 0]. This works for
+ * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER.
+ */
+ struct rc_instruction *inst_mov;
+
+ inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = temp;
+ inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_mov->U.I.SrcReg[0].Abs = 1;
+ }
+
+ /* Preserve W for TXP/TXB. */
+ inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = temp;
+ inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+ reset_srcreg(&inst->U.I.SrcReg[0]);
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = temp;
+ }
+ }
+
+ /* Cannot write texture to output registers (all chips) or with masks (non-r500) */
+ if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+ (inst->U.I.DstReg.File != RC_FILE_TEMPORARY ||
+ (!compiler->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) {
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
+
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg = inst->U.I.DstReg;
+ inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);
+
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+ }
+
+ /* Cannot read texture coordinate from constants file */
+ if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+ reset_srcreg(&inst->U.I.SrcReg[0]);
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
+ }
+
+ return 1;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h
new file mode 100644
index 00000000000..a0105051ac4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2010 Corbin Simpson
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_TEX_H_
+#define __RADEON_PROGRAM_TEX_H_
+
+#include "radeon_compiler.h"
+#include "radeon_program.h"
+
+int radeonTransformTEX(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void* data);
+
+#endif /* __RADEON_PROGRAM_TEX_H_ */
diff --git a/src/mesa/drivers/dri/r300/r300_blit.c b/src/mesa/drivers/dri/r300/r300_blit.c
index 2bc761bc208..0865a456443 100644
--- a/src/mesa/drivers/dri/r300/r300_blit.c
+++ b/src/mesa/drivers/dri/r300/r300_blit.c
@@ -117,7 +117,9 @@ static void create_fragment_program(struct r300_context *r300)
compiler.Base.Program.InputsRead = (1 << FRAG_ATTRIB_TEX0);
compiler.OutputColor[0] = FRAG_RESULT_COLOR;
compiler.OutputDepth = FRAG_RESULT_DEPTH;
+ compiler.enable_shadow_ambient = GL_TRUE;
compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515);
+ compiler.max_temp_regs = (compiler.is_r500) ? 128 : 32;
compiler.code = &r300->blit.fp_code;
compiler.AllocateHwInputs = fp_allocate_hw_inputs;
@@ -381,19 +383,16 @@ static GLboolean validate_buffers(struct r300_context *r300,
struct radeon_bo *dst_bo)
{
int ret;
- ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs,
- src_bo, RADEON_GEM_DOMAIN_VRAM, 0);
- if (ret)
- return GL_FALSE;
+
+ radeon_cs_space_reset_bos(r300->radeon.cmdbuf.cs);
ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs,
- dst_bo, 0, RADEON_GEM_DOMAIN_VRAM);
+ src_bo, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
if (ret)
return GL_FALSE;
ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs,
- first_elem(&r300->radeon.dma.reserved)->bo,
- RADEON_GEM_DOMAIN_GTT, 0);
+ dst_bo, 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT);
if (ret)
return GL_FALSE;
@@ -585,12 +584,6 @@ unsigned r300_blit(GLcontext *ctx,
if (dst_pitch % 2 > 0)
++dst_pitch;
- /* Rendering to small buffer doesn't work.
- * Looks like a hw limitation.
- */
- if (dst_pitch < 32)
- return 0;
-
/* Need to clamp the region size to make sure
* we don't read outside of the source buffer
* or write outside of the destination buffer.
diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
index 4787bafc66a..c40802aec6e 100644
--- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c
+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
@@ -77,12 +77,29 @@ static int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom)
cnt = vpu_count(atom->cmd);
if (r300->radeon.radeonScreen->kernel_mm) {
- extra = 5;
+ extra = 3;
}
return cnt ? (cnt * 4) + extra : 0;
}
+static int check_vpp(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int cnt;
+ int extra = 1;
+
+ if (r300->radeon.radeonScreen->kernel_mm) {
+ cnt = r300->selected_vp->code.constants.Count * 4;
+ extra = 3;
+ } else {
+ cnt = vpu_count(atom->cmd);
+ extra = 1;
+ }
+
+ return cnt ? (cnt * 4) + extra : 0;
+}
+
void r300_emit_vpu(struct r300_context *r300,
uint32_t *data,
unsigned len,
@@ -90,8 +107,7 @@ void r300_emit_vpu(struct r300_context *r300,
{
BATCH_LOCALS(&r300->radeon);
- BEGIN_BATCH_NO_AUTOSTATE(5 + len);
- OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0);
+ BEGIN_BATCH_NO_AUTOSTATE(3 + len);
OUT_BATCH_REGVAL(R300_VAP_PVS_VECTOR_INDX_REG, addr);
OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, len-1) | RADEON_ONE_REG_WR);
OUT_BATCH_TABLE(data, len);
@@ -102,15 +118,26 @@ static void emit_vpu_state(GLcontext *ctx, struct radeon_state_atom * atom)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
drm_r300_cmd_header_t cmd;
- uint32_t addr, ndw;
+ uint32_t addr;
cmd.u = atom->cmd[0];
addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo;
- ndw = atom->check(ctx, atom);
r300_emit_vpu(r300, &atom->cmd[1], vpu_count(atom->cmd) * 4, addr);
}
+static void emit_vpp_state(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ drm_r300_cmd_header_t cmd;
+ uint32_t addr;
+
+ cmd.u = atom->cmd[0];
+ addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo;
+
+ r300_emit_vpu(r300, &atom->cmd[1], r300->selected_vp->code.constants.Count * 4, addr);
+}
+
void r500_emit_fp(struct r300_context *r300,
uint32_t *data,
unsigned len,
@@ -333,36 +360,37 @@ void r300_emit_cb_setup(struct r300_context *r300,
assert(offset % 32 == 0);
switch (format) {
- case MESA_FORMAT_RGB565:
- assert(_mesa_little_endian());
- cbpitch |= R300_COLOR_FORMAT_RGB565;
+ case MESA_FORMAT_SL8:
+ case MESA_FORMAT_A8:
+ case MESA_FORMAT_L8:
+ case MESA_FORMAT_I8:
+ cbpitch |= R300_COLOR_FORMAT_I8;
break;
+ case MESA_FORMAT_RGB565:
case MESA_FORMAT_RGB565_REV:
- assert(!_mesa_little_endian());
cbpitch |= R300_COLOR_FORMAT_RGB565;
break;
case MESA_FORMAT_ARGB4444:
- assert(_mesa_little_endian());
- cbpitch |= R300_COLOR_FORMAT_ARGB4444;
- break;
case MESA_FORMAT_ARGB4444_REV:
- assert(!_mesa_little_endian());
cbpitch |= R300_COLOR_FORMAT_ARGB4444;
break;
+ case MESA_FORMAT_RGBA5551:
case MESA_FORMAT_ARGB1555:
- assert(_mesa_little_endian());
- cbpitch |= R300_COLOR_FORMAT_ARGB1555;
- break;
case MESA_FORMAT_ARGB1555_REV:
- assert(!_mesa_little_endian());
cbpitch |= R300_COLOR_FORMAT_ARGB1555;
break;
+ case MESA_FORMAT_RGBA8888:
+ case MESA_FORMAT_RGBA8888_REV:
+ case MESA_FORMAT_XRGB8888:
+ case MESA_FORMAT_ARGB8888:
+ case MESA_FORMAT_XRGB8888_REV:
+ case MESA_FORMAT_ARGB8888_REV:
+ case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
+ cbpitch |= R300_COLOR_FORMAT_ARGB8888;
+ break;
default:
- if (cpp == 4) {
- cbpitch |= R300_COLOR_FORMAT_ARGB8888;
- } else {
- _mesa_problem(r300->radeon.glCtx, "unexpected format in emit_cb_offset()");;
- }
+ _mesa_problem(r300->radeon.glCtx, "unexpected format in emit_cb_offset()");
break;
}
@@ -778,24 +806,6 @@ void r300InitCmdBuf(r300ContextPtr r300)
/* VPU only on TCL */
if (has_tcl) {
int i;
- if (r300->radeon.radeonScreen->kernel_mm) {
- ALLOC_STATE(vap_flush, always, 10, 0);
- /* flush processing vertices */
- r300->hw.vap_flush.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1);
- r300->hw.vap_flush.cmd[1] = 0;
- r300->hw.vap_flush.cmd[2] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DSTCACHE_CTLSTAT, 1);
- r300->hw.vap_flush.cmd[3] = R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D;
- r300->hw.vap_flush.cmd[4] = cmdpacket0(r300->radeon.radeonScreen, RADEON_WAIT_UNTIL, 1);
- r300->hw.vap_flush.cmd[5] = RADEON_WAIT_3D_IDLECLEAN;
- r300->hw.vap_flush.cmd[6] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1);
- r300->hw.vap_flush.cmd[7] = 0xffffff;
- r300->hw.vap_flush.cmd[8] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_STATE_FLUSH_REG, 1);
- r300->hw.vap_flush.cmd[9] = 0;
- } else {
- ALLOC_STATE(vap_flush, never, 10, 0);
- }
-
-
ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0);
r300->hw.vpi.cmd[0] =
cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0);
@@ -803,11 +813,11 @@ void r300InitCmdBuf(r300ContextPtr r300)
r300->hw.vpi.emit = emit_vpu_state;
if (is_r500) {
- ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
+ ALLOC_STATE(vpp, vpp, R300_VPP_CMDSIZE, 0);
r300->hw.vpp.cmd[0] =
cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0);
if (r300->radeon.radeonScreen->kernel_mm)
- r300->hw.vpp.emit = emit_vpu_state;
+ r300->hw.vpp.emit = emit_vpp_state;
ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
r300->hw.vps.cmd[0] =
@@ -824,11 +834,11 @@ void r300InitCmdBuf(r300ContextPtr r300)
r300->hw.vpucp[i].emit = emit_vpu_state;
}
} else {
- ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
+ ALLOC_STATE(vpp, vpp, R300_VPP_CMDSIZE, 0);
r300->hw.vpp.cmd[0] =
cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0);
if (r300->radeon.radeonScreen->kernel_mm)
- r300->hw.vpp.emit = emit_vpu_state;
+ r300->hw.vpp.emit = emit_vpp_state;
ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
r300->hw.vps.cmd[0] =
diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index df4cc11da42..4dce454c3a7 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -61,6 +61,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r300_state.h"
#include "r300_tex.h"
#include "r300_emit.h"
+#include "r300_render.h"
#include "r300_swtcl.h"
#include "radeon_bocs_wrapper.h"
#include "radeon_buffer_objects.h"
@@ -109,7 +110,6 @@ static const struct dri_extension card_extensions[] = {
{"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions},
{"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions},
{"GL_EXT_blend_subtract", NULL},
- {"GL_EXT_packed_depth_stencil", NULL},
{"GL_EXT_fog_coord", GL_EXT_fog_coord_functions },
{"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions},
{"GL_EXT_provoking_vertex", GL_EXT_provoking_vertex_functions },
@@ -227,6 +227,8 @@ static void r300_fallback(GLcontext *ctx, GLuint bit, GLboolean mode)
r300->radeon.Fallback |= bit;
else
r300->radeon.Fallback &= ~bit;
+
+ r300SwitchFallback(ctx, R300_FALLBACK_RADEON_COMMON, mode);
}
static void r300_emit_query_finish(radeonContextPtr radeon)
@@ -322,6 +324,12 @@ static void r300_init_vtbl(radeonContextPtr radeon)
radeon->vtbl.check_blit = r300_check_blit;
radeon->vtbl.blit = r300_blit;
+
+ if (radeon->radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ radeon->vtbl.is_format_renderable = r500IsFormatRenderable;
+ } else {
+ radeon->vtbl.is_format_renderable = r300IsFormatRenderable;
+ }
}
static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
@@ -456,6 +464,9 @@ static void r300InitGLExtensions(GLcontext *ctx)
}
if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV350)
_mesa_enable_extension(ctx, "GL_ARB_half_float_vertex");
+
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+ _mesa_enable_extension(ctx, "GL_EXT_packed_depth_stencil");
}
static void r300InitIoctlFuncs(struct dd_function_table *functions)
@@ -492,7 +503,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
_mesa_init_driver_functions(&functions);
r300InitIoctlFuncs(&functions);
- r300InitStateFuncs(&functions);
+ r300InitStateFuncs(&r300->radeon, &functions);
r300InitTextureFuncs(&r300->radeon, &functions);
r300InitShaderFuncs(&functions);
radeonInitQueryObjFunctions(&functions);
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index 78ab43a99f9..df7115e7dae 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -355,7 +355,6 @@ struct r300_hw_state {
struct radeon_state_atom zb_hiz_offset; /* (4F44) */
struct radeon_state_atom zb_hiz_pitch; /* (4F54) */
- struct radeon_state_atom vap_flush;
struct radeon_state_atom vpi; /* vp instructions */
struct radeon_state_atom vpp; /* vp parameters */
struct radeon_state_atom vps; /* vertex point size (?) */
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index 61ea5e4d9a3..2b7c93a9575 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -219,7 +219,9 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog
compiler.code = &fp->code;
compiler.state = fp->state;
+ compiler.enable_shadow_ambient = GL_TRUE;
compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE;
+ compiler.max_temp_regs = (compiler.is_r500) ? 128 : 32;
compiler.OutputDepth = FRAG_RESULT_DEPTH;
memset(compiler.OutputColor, 0, 4 * sizeof(unsigned));
compiler.OutputColor[0] = FRAG_RESULT_COLOR;
@@ -256,6 +258,19 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog
fp->InputsRead = compiler.Base.Program.InputsRead;
+ /* Clear the fog/wpos_attr if code accessing these
+ * attributes has been removed during compilation
+ */
+ if (fp->fog_attr != FRAG_ATTRIB_MAX) {
+ if (!(fp->InputsRead & (1 << fp->fog_attr)))
+ fp->fog_attr = FRAG_ATTRIB_MAX;
+ }
+
+ if (fp->wpos_attr != FRAG_ATTRIB_MAX) {
+ if (!(fp->InputsRead & (1 << fp->wpos_attr)))
+ fp->wpos_attr = FRAG_ATTRIB_MAX;
+ }
+
rc_destroy(&compiler.Base);
}
diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
index d18ebab8ff2..ac93563ed9e 100644
--- a/src/mesa/drivers/dri/r300/r300_reg.h
+++ b/src/mesa/drivers/dri/r300/r300_reg.h
@@ -482,7 +482,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_PVS_FIRST_INST_SHIFT 0
# define R300_PVS_XYZW_VALID_INST_SHIFT 10
# define R300_PVS_LAST_INST_SHIFT 20
-/* Addresses are relative the the vertex program parameters area. */
+/* Addresses are relative to the vertex program parameters area. */
#define R300_VAP_PVS_CONST_CNTL 0x22D4
# define R300_PVS_CONST_BASE_OFFSET_SHIFT 0
# define R300_PVS_MAX_CONST_ADDR_SHIFT 16
@@ -1760,7 +1760,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
* The destination register index is in FPI1 (color) and FPI3 (alpha)
* together with enable bits.
* There are separate enable bits for writing into temporary registers
- * (DSTC_REG_* /DSTA_REG) and and program output registers (DSTC_OUTPUT_*
+ * (DSTC_REG_* /DSTA_REG) and program output registers (DSTC_OUTPUT_*
* /DSTA_OUTPUT). You can write to both at once, or not write at all (the
* same index must be used for both).
*
diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c
index 95961314863..bb8f91491f5 100644
--- a/src/mesa/drivers/dri/r300/r300_render.c
+++ b/src/mesa/drivers/dri/r300/r300_render.c
@@ -386,6 +386,14 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim);
return;
}
+
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ BEGIN_BATCH_NO_AUTOSTATE(2);
+ OUT_BATCH_REGSEQ(R300_VAP_VF_MAX_VTX_INDX, 1);
+ OUT_BATCH(rmesa->radeon.tcl.aos[0].count);
+ END_BATCH();
+ }
+
r300_emit_scissor(rmesa->radeon.glCtx);
while (num_verts > 0) {
int nr;
@@ -400,8 +408,9 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
COMMIT_BATCH();
}
-static const char *getFallbackString(uint32_t bit)
+static const char *getFallbackString(r300ContextPtr rmesa, uint32_t bit)
{
+ static char common_fallback_str[32];
switch (bit) {
case R300_FALLBACK_VERTEX_PROGRAM :
return "vertex program";
@@ -421,6 +430,9 @@ static const char *getFallbackString(uint32_t bit)
return "render mode != GL_RENDER";
case R300_FALLBACK_FRAGMENT_PROGRAM:
return "fragment program";
+ case R300_FALLBACK_RADEON_COMMON:
+ snprintf(common_fallback_str, 32, "radeon common 0x%08x", rmesa->radeon.Fallback);
+ return common_fallback_str;
case R300_FALLBACK_AOS_LIMIT:
return "aos limit";
case R300_FALLBACK_INVALID_BUFFERS:
@@ -440,7 +452,7 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode)
if (mode) {
if ((fallback_warn & bit) == 0) {
if (RADEON_DEBUG & RADEON_FALLBACKS)
- fprintf(stderr, "WARNING! Falling back to software for %s\n", getFallbackString(bit));
+ fprintf(stderr, "WARNING! Falling back to software for %s\n", getFallbackString(rmesa, bit));
fallback_warn |= bit;
}
rmesa->fallback |= bit;
diff --git a/src/mesa/drivers/dri/r300/r300_render.h b/src/mesa/drivers/dri/r300/r300_render.h
index ec785474a67..581e9fa0ccd 100644
--- a/src/mesa/drivers/dri/r300/r300_render.h
+++ b/src/mesa/drivers/dri/r300/r300_render.h
@@ -41,6 +41,7 @@
#define R300_FALLBACK_STENCIL_TWOSIDE (1 << 21)
#define R300_FALLBACK_RENDER_MODE (1 << 22)
#define R300_FALLBACK_FRAGMENT_PROGRAM (1 << 23)
+#define R300_FALLBACK_RADEON_COMMON (1 << 29)
#define R300_FALLBACK_AOS_LIMIT (1 << 30)
#define R300_FALLBACK_INVALID_BUFFERS (1 << 31)
#define R300_RASTER_FALLBACK_MASK 0xffff0000
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index 9d1ff6e2ba2..fa33be49989 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -46,13 +46,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/simple_list.h"
#include "main/api_arrayelt.h"
+#include "drivers/common/meta.h"
#include "swrast/swrast.h"
#include "swrast_setup/swrast_setup.h"
#include "shader/prog_parameter.h"
#include "shader/prog_statevars.h"
#include "vbo/vbo.h"
#include "tnl/tnl.h"
-#include "tnl/t_vp_build.h"
#include "r300_context.h"
#include "r300_state.h"
@@ -366,7 +366,6 @@ static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
ip = (GLint *)ctx->Transform._ClipUserPlane[p];
- R300_STATECHANGE( rmesa, vap_flush );
R300_STATECHANGE( rmesa, vpucp[p] );
rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0];
rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1];
@@ -590,7 +589,7 @@ static void r300SetDepthState(GLcontext * ctx)
R500_STENCIL_REFMASK_FRONT_BACK);
r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_Z_FUNC_SHIFT);
- if (ctx->Depth.Test) {
+ if (ctx->Depth.Test && ctx->DrawBuffer->_DepthBuffer) {
r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_ENABLE;
if (ctx->Depth.Mask)
r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_WRITE_ENABLE;
@@ -794,12 +793,14 @@ static void r300PointParameter(GLcontext * ctx, GLenum pname, const GLfloat * pa
R300_STATECHANGE(r300, ga_point_minmax);
r300->hw.ga_point_minmax.cmd[1] &= ~R300_GA_POINT_MINMAX_MIN_MASK;
r300->hw.ga_point_minmax.cmd[1] |= (GLuint)(ctx->Point.MinSize * 6.0);
+ r300PointSize(ctx, ctx->Point.Size);
break;
case GL_POINT_SIZE_MAX:
R300_STATECHANGE(r300, ga_point_minmax);
r300->hw.ga_point_minmax.cmd[1] &= ~R300_GA_POINT_MINMAX_MAX_MASK;
r300->hw.ga_point_minmax.cmd[1] |= (GLuint)(ctx->Point.MaxSize * 6.0)
<< R300_GA_POINT_MINMAX_MAX_SHIFT;
+ r300PointSize(ctx, ctx->Point.Size);
break;
case GL_POINT_DISTANCE_ATTENUATION:
break;
@@ -1657,20 +1658,21 @@ void r300VapCntl(r300ContextPtr rmesa, GLuint input_count,
(5 << R300_PVS_NUM_CNTLRS_SHIFT) |
(5 << R300_VF_MAX_VTX_NUM_SHIFT));
- if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515)
- rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (2 << R300_PVS_NUM_FPUS_SHIFT);
- else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) ||
- (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) ||
- (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570))
+ if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R300) ||
+ (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R350))
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (4 << R300_PVS_NUM_FPUS_SHIFT);
+ else if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530)
rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (5 << R300_PVS_NUM_FPUS_SHIFT);
else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) ||
(rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420))
rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (6 << R300_PVS_NUM_FPUS_SHIFT);
else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) ||
- (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580))
+ (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580) ||
+ (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) ||
+ (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570))
rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (8 << R300_PVS_NUM_FPUS_SHIFT);
else
- rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (4 << R300_PVS_NUM_FPUS_SHIFT);
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (2 << R300_PVS_NUM_FPUS_SHIFT);
}
@@ -1762,8 +1764,6 @@ static void r300ResetHwState(r300ContextPtr r300)
if (RADEON_DEBUG & RADEON_STATE)
fprintf(stderr, "%s\n", __FUNCTION__);
- radeon_firevertices(&r300->radeon);
-
r300ColorMask(ctx,
ctx->Color.ColorMask[0][RCOMP],
ctx->Color.ColorMask[0][GCOMP],
@@ -1985,23 +1985,6 @@ void r300UpdateShaders(r300ContextPtr rmesa)
if (rmesa->options.hw_tcl_enabled) {
struct r300_vertex_program *vp;
- if (rmesa->radeon.NewGLState) {
- int i;
- for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
- rmesa->temp_attrib[i] =
- TNL_CONTEXT(ctx)->vb.AttribPtr[i];
- TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
- &rmesa->dummy_attrib[i];
- }
-
- _tnl_UpdateFixedFunctionProgram(ctx);
-
- for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
- TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
- rmesa->temp_attrib[i];
- }
- }
-
vp = r300SelectAndTranslateVertexShader(ctx);
r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, vp->error);
@@ -2255,6 +2238,68 @@ void r300UpdateShaderStates(r300ContextPtr rmesa)
}
}
+#define EASY_US_OUT_FMT(comps, c0, c1, c2, c3) \
+ (R500_OUT_FMT_##comps | R500_C0_SEL_##c0 | R500_C1_SEL_##c1 | \
+ R500_C2_SEL_##c2 | R500_C3_SEL_##c3)
+static void r300SetupUsOutputFormat(GLcontext *ctx)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ uint32_t hw_format;
+ struct radeon_renderbuffer *rrb = radeon_get_colorbuffer(&rmesa->radeon);
+
+ if (!rrb) {
+ return;
+ }
+
+ switch (rrb->base.Format)
+ {
+ case MESA_FORMAT_RGBA5551:
+ case MESA_FORMAT_RGBA8888:
+ hw_format = EASY_US_OUT_FMT(C4_8, A, B, G, R);
+ break;
+ case MESA_FORMAT_RGB565_REV:
+ case MESA_FORMAT_RGBA8888_REV:
+ hw_format = EASY_US_OUT_FMT(C4_8, R, G, B, A);
+ break;
+ case MESA_FORMAT_RGB565:
+ case MESA_FORMAT_ARGB4444:
+ case MESA_FORMAT_ARGB1555:
+ case MESA_FORMAT_XRGB8888:
+ case MESA_FORMAT_ARGB8888:
+ hw_format = EASY_US_OUT_FMT(C4_8, B, G, R, A);
+ break;
+ case MESA_FORMAT_ARGB4444_REV:
+ case MESA_FORMAT_ARGB1555_REV:
+ case MESA_FORMAT_XRGB8888_REV:
+ case MESA_FORMAT_ARGB8888_REV:
+ hw_format = EASY_US_OUT_FMT(C4_8, A, R, G, B);
+ break;
+ case MESA_FORMAT_SRGBA8:
+ hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, A, B, G, R);
+ break;
+ case MESA_FORMAT_SARGB8:
+ hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, B, G, R, A);
+ break;
+ case MESA_FORMAT_SL8:
+ hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, A, A, R, A);
+ break;
+ case MESA_FORMAT_A8:
+ hw_format = EASY_US_OUT_FMT(C4_8, A, A, A, A);
+ break;
+ case MESA_FORMAT_L8:
+ case MESA_FORMAT_I8:
+ hw_format = EASY_US_OUT_FMT(C4_8, A, A, R, A);
+ break;
+ default:
+ assert(!"Unsupported format");
+ break;
+ }
+
+ R300_STATECHANGE(rmesa, us_out_fmt);
+ rmesa->hw.us_out_fmt.cmd[1] = hw_format;
+}
+#undef EASY_US_OUT_FMT
+
/**
* Called by Mesa after an internal state update.
*/
@@ -2284,6 +2329,10 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
r300->hw.shade2.cmd[1] &= ~R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
}
+ if (new_state & _NEW_BUFFERS) {
+ r300SetupUsOutputFormat(ctx);
+ }
+
r300->radeon.NewGLState |= new_state;
}
@@ -2305,7 +2354,7 @@ static void r300RenderMode(GLcontext * ctx, GLenum mode)
/**
* Initialize driver's state callback functions
*/
-void r300InitStateFuncs(struct dd_function_table *functions)
+void r300InitStateFuncs(radeonContextPtr radeon, struct dd_function_table *functions)
{
functions->UpdateState = r300InvalidateState;
@@ -2344,8 +2393,13 @@ void r300InitStateFuncs(struct dd_function_table *functions)
functions->ClipPlane = r300ClipPlane;
functions->Scissor = radeonScissor;
- functions->DrawBuffer = radeonDrawBuffer;
- functions->ReadBuffer = radeonReadBuffer;
+ functions->DrawBuffer = radeonDrawBuffer;
+ functions->ReadBuffer = radeonReadBuffer;
+
+ functions->CopyPixels = _mesa_meta_CopyPixels;
+ functions->DrawPixels = _mesa_meta_DrawPixels;
+ if (radeon->radeonScreen->kernel_mm)
+ functions->ReadPixels = radeonReadPixels;
}
void r300InitShaderFunctions(r300ContextPtr r300)
diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h
index d46bf9f1796..e70f84f4e4b 100644
--- a/src/mesa/drivers/dri/r300/r300_state.h
+++ b/src/mesa/drivers/dri/r300/r300_state.h
@@ -55,7 +55,7 @@ void r300UpdateDrawBuffer (GLcontext * ctx);
void r300UpdateShaders (r300ContextPtr rmesa);
void r300UpdateShaderStates (r300ContextPtr rmesa);
void r300InitState (r300ContextPtr r300);
-void r300InitStateFuncs (struct dd_function_table *functions);
+void r300InitStateFuncs (radeonContextPtr radeon, struct dd_function_table *functions);
void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, GLuint output_count, GLuint temp_count);
void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten);
diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c
index 8dd85073954..baef206bc26 100644
--- a/src/mesa/drivers/dri/r300/r300_tex.c
+++ b/src/mesa/drivers/dri/r300/r300_tex.c
@@ -308,6 +308,45 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
return &t->base;
}
+unsigned r300IsFormatRenderable(gl_format mesa_format)
+{
+ switch (mesa_format)
+ {
+ case MESA_FORMAT_RGB565:
+ case MESA_FORMAT_RGBA5551:
+ case MESA_FORMAT_RGBA8888:
+ case MESA_FORMAT_RGB565_REV:
+ case MESA_FORMAT_RGBA8888_REV:
+ case MESA_FORMAT_ARGB4444:
+ case MESA_FORMAT_ARGB1555:
+ case MESA_FORMAT_XRGB8888:
+ case MESA_FORMAT_ARGB8888:
+ case MESA_FORMAT_ARGB4444_REV:
+ case MESA_FORMAT_ARGB1555_REV:
+ case MESA_FORMAT_XRGB8888_REV:
+ case MESA_FORMAT_ARGB8888_REV:
+ case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
+ case MESA_FORMAT_SL8:
+ case MESA_FORMAT_A8:
+ case MESA_FORMAT_L8:
+ case MESA_FORMAT_I8:
+ case MESA_FORMAT_Z16:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+unsigned r500IsFormatRenderable(gl_format mesa_format)
+{
+ if (mesa_format == MESA_FORMAT_S8_Z24) {
+ return 1;
+ } else {
+ return r300IsFormatRenderable(mesa_format);
+ }
+}
+
void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions)
{
/* Note: we only plug in the functions we implement in the driver
diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h
index 9694e703b83..aca44cd7669 100644
--- a/src/mesa/drivers/dri/r300/r300_tex.h
+++ b/src/mesa/drivers/dri/r300/r300_tex.h
@@ -53,4 +53,7 @@ extern void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_tab
int32_t r300TranslateTexFormat(gl_format mesaFormat);
+unsigned r300IsFormatRenderable(gl_format mesaFormat);
+unsigned r500IsFormatRenderable(gl_format mesaFormat);
+
#endif /* __r300_TEX_H__ */
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index cbe4cb83047..a1fe3780294 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -263,15 +263,25 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X);
if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) {
- rc_copy_output(&compiler.Base,
- VERT_RESULT_HPOS,
- vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0);
+ unsigned int vp_wpos_attr = vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0;
+
+ /* Set empty writemask for instructions writing to vp_wpos_attr
+ * before moving the wpos attr there.
+ * Such instructions will be removed by DCE.
+ */
+ rc_move_output(&compiler.Base, vp_wpos_attr, vp->key.WPosAttr, 0);
+ rc_copy_output(&compiler.Base, VERT_RESULT_HPOS, vp_wpos_attr);
}
if (vp->key.FogAttr != FRAG_ATTRIB_MAX) {
- rc_move_output(&compiler.Base,
- VERT_RESULT_FOGC,
- vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X);
+ unsigned int vp_fog_attr = vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0;
+
+ /* Set empty writemask for instructions writing to vp_fog_attr
+ * before moving the fog attr there.
+ * Such instructions will be removed by DCE.
+ */
+ rc_move_output(&compiler.Base, vp_fog_attr, vp->key.FogAttr, 0);
+ rc_move_output(&compiler.Base, VERT_RESULT_FOGC, vp_fog_attr, WRITEMASK_X);
}
r3xx_compile_vertex_program(&compiler);
@@ -342,8 +352,6 @@ static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_ver
assert((code->length > 0) && (code->length % 4 == 0));
- R300_STATECHANGE( r300, vap_flush );
-
switch ((dest >> 8) & 0xf) {
case 0:
R300_STATECHANGE(r300, vpi);
@@ -381,10 +389,14 @@ void r300SetupVertexProgram(r300ContextPtr rmesa)
((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
- R300_STATECHANGE(rmesa, vap_flush);
+ R300_STATECHANGE(rmesa, vap_cntl);
R300_STATECHANGE(rmesa, vpp);
param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
- bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
+ if (!rmesa->radeon.radeonScreen->kernel_mm && param_count > 255 * 4) {
+ WARN_ONCE("Too many VP params, expect rendering errors\n");
+ }
+ /* Prevent the overflow (vpu.count is u8) */
+ bump_vpu_count(rmesa->hw.vpp.cmd, MIN2(255 * 4, param_count));
param_count /= 4;
r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code));
@@ -397,6 +409,6 @@ void r300SetupVertexProgram(r300ContextPtr rmesa)
rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
(inst_count << R300_PVS_LAST_INST_SHIFT);
- rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT);
+ rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | ((param_count - 1) << R300_PVS_MAX_CONST_ADDR_SHIFT);
rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
}
diff --git a/src/mesa/drivers/dri/r300/radeon_pixel_read.c b/src/mesa/drivers/dri/r300/radeon_pixel_read.c
new file mode 120000
index 00000000000..3b03803126f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_pixel_read.c
@@ -0,0 +1 @@
+../radeon/radeon_pixel_read.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_tex_getimage.c b/src/mesa/drivers/dri/r300/radeon_tex_getimage.c
new file mode 120000
index 00000000000..d9836d7326e
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_tex_getimage.c
@@ -0,0 +1 @@
+../radeon/radeon_tex_getimage.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_tile.c b/src/mesa/drivers/dri/r300/radeon_tile.c
new file mode 120000
index 00000000000..d4bfe27da64
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_tile.c
@@ -0,0 +1 @@
+../radeon/radeon_tile.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_tile.h b/src/mesa/drivers/dri/r300/radeon_tile.h
new file mode 120000
index 00000000000..31074c581ea
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_tile.h
@@ -0,0 +1 @@
+../radeon/radeon_tile.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/server/radeon_dri.c b/src/mesa/drivers/dri/r300/server/radeon_dri.c
deleted file mode 120000
index d05847d650f..00000000000
--- a/src/mesa/drivers/dri/r300/server/radeon_dri.c
+++ /dev/null
@@ -1 +0,0 @@
-../../radeon/server/radeon_dri.c \ No newline at end of file