diff options
author | Younes Manton <[email protected]> | 2010-04-30 20:42:30 -0400 |
---|---|---|
committer | Younes Manton <[email protected]> | 2010-04-30 20:42:30 -0400 |
commit | a8ea1dacc63ac567498049e5756c247b9fec6cd9 (patch) | |
tree | 4031e2e2b6166bd926b43fa4bbb3aab773a30ee5 /src/mesa/drivers/dri/r300/compiler | |
parent | 404fb63b4649f58fce443615e49337d42b8ddece (diff) | |
parent | 35d960cc744c374ccaad48c3d80559b59c74e28a (diff) |
Merge branch 'master' of ssh://git.freedesktop.org/git/mesa/mesa into pipe-video
Conflicts:
src/gallium/auxiliary/Makefile
src/gallium/auxiliary/SConscript
src/gallium/auxiliary/util/u_format.csv
src/gallium/auxiliary/vl/vl_compositor.c
src/gallium/auxiliary/vl/vl_compositor.h
src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
src/gallium/drivers/identity/id_objects.c
src/gallium/drivers/identity/id_objects.h
src/gallium/drivers/identity/id_screen.c
src/gallium/drivers/nv40/Makefile
src/gallium/drivers/nv40/nv40_screen.c
src/gallium/drivers/softpipe/sp_texture.c
src/gallium/drivers/softpipe/sp_texture.h
src/gallium/drivers/softpipe/sp_video_context.c
src/gallium/drivers/softpipe/sp_video_context.h
src/gallium/include/pipe/p_format.h
src/gallium/include/pipe/p_screen.h
src/gallium/include/pipe/p_video_context.h
src/gallium/include/pipe/p_video_state.h
src/gallium/include/state_tracker/dri1_api.h
src/gallium/include/state_tracker/drm_api.h
src/gallium/state_trackers/dri/common/dri_context.c
src/gallium/state_trackers/xorg/xvmc/attributes.c
src/gallium/state_trackers/xorg/xvmc/block.c
src/gallium/state_trackers/xorg/xvmc/context.c
src/gallium/state_trackers/xorg/xvmc/subpicture.c
src/gallium/state_trackers/xorg/xvmc/surface.c
src/gallium/state_trackers/xorg/xvmc/tests/.gitignore
src/gallium/state_trackers/xorg/xvmc/tests/Makefile
src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
src/gallium/winsys/drm/radeon/core/radeon_drm.c
src/gallium/winsys/g3dvl/vl_winsys.h
src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
src/gallium/winsys/sw/Makefile
Diffstat (limited to 'src/mesa/drivers/dri/r300/compiler')
27 files changed, 1125 insertions, 501 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index d83888d90a3..e432afc3d41 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -8,11 +8,13 @@ LIBNAME = r300compiler C_SOURCES = \ radeon_code.c \ radeon_compiler.c \ + radeon_emulate_branches.c \ radeon_program.c \ radeon_program_print.c \ radeon_opcodes.c \ radeon_program_alu.c \ radeon_program_pair.c \ + radeon_program_tex.c \ radeon_pair_translate.c \ radeon_pair_schedule.c \ radeon_pair_regalloc.c \ diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript index 46075a8aee9..28a3d39d961 100755 --- a/src/mesa/drivers/dri/r300/compiler/SConscript +++ b/src/mesa/drivers/dri/r300/compiler/SConscript @@ -17,6 +17,7 @@ r300compiler = env.ConvenienceLibrary( 'radeon_opcodes.c', 'radeon_program_alu.c', 'radeon_program_pair.c', + 'radeon_program_tex.c', 'radeon_pair_translate.c', 'radeon_pair_schedule.c', 'radeon_pair_regalloc.c', diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.h b/src/mesa/drivers/dri/r300/compiler/memory_pool.h index ce23c319ad3..42344d0e3ba 100644 --- a/src/mesa/drivers/dri/r300/compiler/memory_pool.h +++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.h @@ -46,4 +46,35 @@ void memory_pool_init(struct memory_pool * pool); void memory_pool_destroy(struct memory_pool * pool); void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes); + +/** + * Generic helper for growing an array that has separate size/count + * and reserved counters to accomodate up to num new element. + * + * type * Array; + * unsigned int Size; + * unsigned int Reserved; + * + * memory_pool_array_reserve(pool, type, Array, Size, Reserved, k); + * assert(Size + k < Reserved); + * + * \note Size is not changed by this macro. + * + * \warning Array, Size, Reserved have to be lvalues and may be evaluated + * several times. + */ +#define memory_pool_array_reserve(pool, type, array, size, reserved, num) do { \ + unsigned int _num = (num); \ + if ((size) + _num > (reserved)) { \ + unsigned int newreserve = (reserved) * 2; \ + type * newarray; \ + if (newreserve < _num) \ + newreserve = 4 * _num; /* arbitrary heuristic */ \ + newarray = memory_pool_malloc((pool), newreserve * sizeof(type)); \ + memcpy(newarray, (array), (size) * sizeof(type)); \ + (array) = newarray; \ + (reserved) = newreserve; \ + } \ +} while(0) + #endif /* MEMORY_POOL_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c index 928c15e1e40..794db8335a2 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c @@ -31,166 +31,6 @@ #include "../r300_reg.h" -static struct rc_src_register shadow_ambient(struct radeon_compiler * c, int tmu) -{ - struct rc_src_register reg = { 0, }; - - reg.File = RC_FILE_CONSTANT; - reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu); - reg.Swizzle = RC_SWIZZLE_WWWW; - return reg; -} - -/** - * Transform TEX, TXP, TXB, and KIL instructions in the following way: - * - premultiply texture coordinates for RECT - * - extract operand swizzles - * - introduce a temporary register when write masks are needed - */ -int r300_transform_TEX( - struct radeon_compiler * c, - struct rc_instruction* inst, - void* data) -{ - struct r300_fragment_program_compiler *compiler = - (struct r300_fragment_program_compiler*)data; - - if (inst->U.I.Opcode != RC_OPCODE_TEX && - inst->U.I.Opcode != RC_OPCODE_TXB && - inst->U.I.Opcode != RC_OPCODE_TXP && - inst->U.I.Opcode != RC_OPCODE_KIL) - return 0; - - /* ARB_shadow & EXT_shadow_funcs */ - if (inst->U.I.Opcode != RC_OPCODE_KIL && - c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) { - rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; - - if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { - inst->U.I.Opcode = RC_OPCODE_MOV; - - if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { - inst->U.I.SrcReg[0].File = RC_FILE_NONE; - inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; - } else { - inst->U.I.SrcReg[0] = shadow_ambient(c, inst->U.I.TexSrcUnit); - } - - return 1; - } else { - rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; - unsigned int depthmode = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_mode; - struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst); - struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp); - struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad); - int pass, fail; - - inst_rcp->U.I.Opcode = RC_OPCODE_RCP; - inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_rcp->U.I.DstReg.Index = rc_find_free_temporary(c); - inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; - inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; - - inst_cmp->U.I.DstReg = inst->U.I.DstReg; - inst->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst->U.I.DstReg.Index = rc_find_free_temporary(c); - inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - - inst_mad->U.I.Opcode = RC_OPCODE_MAD; - inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c); - inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ; - inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst_mad->U.I.SrcReg[1].Index = inst_rcp->U.I.DstReg.Index; - inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; - inst_mad->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; - inst_mad->U.I.SrcReg[2].Index = inst->U.I.DstReg.Index; - if (depthmode == 0) /* GL_LUMINANCE */ - inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z); - else if (depthmode == 2) /* GL_ALPHA */ - inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW; - - /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: - * r < tex <=> -tex+r < 0 - * r >= tex <=> not (-tex+r < 0 */ - if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL) - inst_mad->U.I.SrcReg[2].Negate = inst_mad->U.I.SrcReg[2].Negate ^ RC_MASK_XYZW; - else - inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; - - inst_cmp->U.I.Opcode = RC_OPCODE_CMP; - /* DstReg has been filled out above */ - inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_cmp->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index; - - if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) { - pass = 1; - fail = 2; - } else { - pass = 2; - fail = 1; - } - - inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE; - inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; - inst_cmp->U.I.SrcReg[fail] = shadow_ambient(c, inst->U.I.TexSrcUnit); - } - } - - /* Hardware uses [0..1]x[0..1] range for rectangle textures - * instead of [0..Width]x[0..Height]. - * Add a scaling instruction. - */ - if (inst->U.I.Opcode != RC_OPCODE_KIL && inst->U.I.TexSrcTarget == RC_TEXTURE_RECT) { - struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst->Prev); - - inst_mul->U.I.Opcode = RC_OPCODE_MUL; - inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c); - inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; - inst_mul->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->U.I.TexSrcUnit); - - reset_srcreg(&inst->U.I.SrcReg[0]); - inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[0].Index = inst_mul->U.I.DstReg.Index; - } - - /* Cannot write texture to output registers or with masks */ - if (inst->U.I.Opcode != RC_OPCODE_KIL && - (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || inst->U.I.DstReg.WriteMask != RC_MASK_XYZW)) { - struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); - - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg = inst->U.I.DstReg; - inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); - - inst->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; - inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - } - - - /* Cannot read texture coordinate from constants file */ - if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { - struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); - inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - - reset_srcreg(&inst->U.I.SrcReg[0]); - inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; - } - - return 1; -} - /* just some random things... */ void r300FragmentProgramDump(struct rX00_fragment_program_code *c) { diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h index 418df36c936..8b755703be4 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h @@ -41,6 +41,4 @@ extern void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c); -extern int r300_transform_TEX(struct radeon_compiler * c, struct rc_instruction* inst, void* data); - #endif diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index cc552aee176..37dafa77106 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -353,7 +353,7 @@ void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi } } - if (code->pixsize >= R300_PFS_NUM_TEMP_REGS) + if (code->pixsize >= compiler->max_temp_regs) rc_error(&compiler->Base, "Too many hardware temporaries used.\n"); if (compiler->Base.Error) diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index c2d5dc27b49..25bf373b6fd 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -25,7 +25,9 @@ #include <stdio.h> #include "radeon_dataflow.h" +#include "radeon_emulate_branches.h" #include "radeon_program_alu.h" +#include "radeon_program_tex.h" #include "r300_fragprog.h" #include "r300_fragprog_swizzle.h" #include "r500_fragprog.h" @@ -84,91 +86,96 @@ static void rewrite_depth_out(struct r300_fragment_program_compiler * c) } } +static void debug_program_log(struct r300_fragment_program_compiler* c, const char * where) +{ + if (c->Base.Debug) { + fprintf(stderr, "Fragment Program: %s\n", where); + rc_print_program(&c->Base.Program); + } +} + void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) { rewrite_depth_out(c); + debug_program_log(c, "before compilation"); + + /* XXX Ideally this should be done only for r3xx, but since + * we don't have branching support for r5xx, we use the emulation + * on all chipsets. */ + rc_emulate_branches(&c->Base); + + debug_program_log(c, "after emulate branches"); + if (c->is_r500) { struct radeon_program_transformation transformations[] = { - { &r500_transform_TEX, c }, { &r500_transform_IF, 0 }, { &radeonTransformALU, 0 }, { &radeonTransformDeriv, 0 }, { &radeonTransformTrigScale, 0 } }; - radeonLocalTransform(&c->Base, 5, transformations); + radeonLocalTransform(&c->Base, 4, transformations); + + debug_program_log(c, "after native rewrite part 1"); c->Base.SwizzleCaps = &r500_swizzle_caps; } else { struct radeon_program_transformation transformations[] = { - { &r300_transform_TEX, c }, { &radeonTransformALU, 0 }, { &radeonTransformTrigSimple, 0 } }; - radeonLocalTransform(&c->Base, 3, transformations); + radeonLocalTransform(&c->Base, 2, transformations); + + debug_program_log(c, "after native rewrite part 1"); c->Base.SwizzleCaps = &r300_swizzle_caps; } - if (c->Base.Debug) { - fprintf(stderr, "Fragment Program: After native rewrite:\n"); - rc_print_program(&c->Base.Program); - fflush(stderr); - } + /* Run the common transformations too. + * Remember, lowering comes last! */ + struct radeon_program_transformation common_transformations[] = { + { &radeonTransformTEX, c }, + }; + radeonLocalTransform(&c->Base, 1, common_transformations); + + common_transformations[0].function = &radeonTransformALU; + radeonLocalTransform(&c->Base, 1, common_transformations); + + if (c->Base.Error) + return; + + debug_program_log(c, "after native rewrite part 2"); rc_dataflow_deadcode(&c->Base, &dataflow_outputs_mark_use, c); if (c->Base.Error) return; - if (c->Base.Debug) { - fprintf(stderr, "Fragment Program: After deadcode:\n"); - rc_print_program(&c->Base.Program); - fflush(stderr); - } + debug_program_log(c, "after deadcode"); rc_dataflow_swizzles(&c->Base); if (c->Base.Error) return; - if (c->Base.Debug) { - fprintf(stderr, "Compiler: after dataflow passes:\n"); - rc_print_program(&c->Base.Program); - fflush(stderr); - } + debug_program_log(c, "after dataflow passes"); rc_pair_translate(c); if (c->Base.Error) return; - if (c->Base.Debug) { - fprintf(stderr, "Compiler: after pair translate:\n"); - rc_print_program(&c->Base.Program); - fflush(stderr); - } + debug_program_log(c, "after pair translate"); rc_pair_schedule(c); if (c->Base.Error) return; - if (c->Base.Debug) { - fprintf(stderr, "Compiler: after pair scheduling:\n"); - rc_print_program(&c->Base.Program); - fflush(stderr); - } + debug_program_log(c, "after pair scheduling"); - if (c->is_r500) - rc_pair_regalloc(c, 128); - else - rc_pair_regalloc(c, R300_PFS_NUM_TEMP_REGS); + rc_pair_regalloc(c, c->max_temp_regs); if (c->Base.Error) return; - if (c->Base.Debug) { - fprintf(stderr, "Compiler: after pair register allocation:\n"); - rc_print_program(&c->Base.Program); - fflush(stderr); - } + debug_program_log(c, "after register allocation"); if (c->is_r500) { r500BuildFragmentProgramHwCode(c); diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 1b2cb8dde7d..4a0b6c02efe 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -29,7 +29,7 @@ #include "radeon_dataflow.h" #include "radeon_program_alu.h" #include "radeon_swizzle.h" - +#include "radeon_emulate_branches.h" /* * Take an already-setup and valid source then swizzle it appropriately to @@ -566,6 +566,14 @@ static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) return 1; } +static void debug_program_log(struct r300_vertex_program_compiler* c, const char * where) +{ + if (c->Base.Debug) { + fprintf(stderr, "Vertex Program: %s\n", where); + rc_print_program(&c->Base.Program); + } +} + static struct rc_swizzle_caps r300_vertprog_swizzle_caps = { .IsNative = &swizzle_is_native, @@ -579,6 +587,15 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) addArtificialOutputs(compiler); + debug_program_log(compiler, "before compilation"); + + /* XXX Ideally this should be done only for r3xx, but since + * we don't have branching support for r5xx, we use the emulation + * on all chipsets. */ + rc_emulate_branches(&compiler->Base); + + debug_program_log(compiler, "after emulate branches"); + { struct radeon_program_transformation transformations[] = { { &r300_transform_vertex_alu, 0 }, @@ -586,11 +603,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) radeonLocalTransform(&compiler->Base, 1, transformations); } - if (compiler->Base.Debug) { - fprintf(stderr, "Vertex program after native rewrite:\n"); - rc_print_program(&compiler->Base.Program); - fflush(stderr); - } + debug_program_log(compiler, "after native rewrite"); { /* Note: This pass has to be done seperately from ALU rewrite, @@ -603,29 +616,17 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) radeonLocalTransform(&compiler->Base, 1, transformations); } - if (compiler->Base.Debug) { - fprintf(stderr, "Vertex program after source conflict resolve:\n"); - rc_print_program(&compiler->Base.Program); - fflush(stderr); - } + debug_program_log(compiler, "after source conflict resolve"); rc_dataflow_deadcode(&compiler->Base, &dataflow_outputs_mark_used, compiler); - if (compiler->Base.Debug) { - fprintf(stderr, "Vertex program after deadcode:\n"); - rc_print_program(&compiler->Base.Program); - fflush(stderr); - } + debug_program_log(compiler, "after deadcode"); rc_dataflow_swizzles(&compiler->Base); allocate_temporary_registers(compiler); - if (compiler->Base.Debug) { - fprintf(stderr, "Vertex program after dataflow:\n"); - rc_print_program(&compiler->Base.Program); - fflush(stderr); - } + debug_program_log(compiler, "after dataflow"); translate_vertex_program(compiler); diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index b0fb8e970b7..632f0bcf4f8 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -31,144 +31,6 @@ #include "../r300_reg.h" -static struct rc_src_register shadow_ambient(struct radeon_compiler * c, int tmu) -{ - struct rc_src_register reg = { 0, }; - - reg.File = RC_FILE_CONSTANT; - reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu); - reg.Swizzle = RC_SWIZZLE_WWWW; - return reg; -} - -/** - * Transform TEX, TXP, TXB, and KIL instructions in the following way: - * - implement texture compare (shadow extensions) - * - extract non-native source / destination operands - */ -int r500_transform_TEX( - struct radeon_compiler * c, - struct rc_instruction * inst, - void* data) -{ - struct r300_fragment_program_compiler *compiler = - (struct r300_fragment_program_compiler*)data; - - if (inst->U.I.Opcode != RC_OPCODE_TEX && - inst->U.I.Opcode != RC_OPCODE_TXB && - inst->U.I.Opcode != RC_OPCODE_TXP && - inst->U.I.Opcode != RC_OPCODE_KIL) - return 0; - - /* ARB_shadow & EXT_shadow_funcs */ - if (inst->U.I.Opcode != RC_OPCODE_KIL && - c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) { - rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; - - if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { - inst->U.I.Opcode = RC_OPCODE_MOV; - - if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { - inst->U.I.SrcReg[0].File = RC_FILE_NONE; - inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; - } else { - inst->U.I.SrcReg[0] = shadow_ambient(c, inst->U.I.TexSrcUnit); - } - - return 1; - } else { - rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; - unsigned int depthmode = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_mode; - struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst); - struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp); - struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad); - int pass, fail; - - inst_rcp->U.I.Opcode = RC_OPCODE_RCP; - inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_rcp->U.I.DstReg.Index = rc_find_free_temporary(c); - inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; - inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; - - inst_cmp->U.I.DstReg = inst->U.I.DstReg; - inst->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst->U.I.DstReg.Index = rc_find_free_temporary(c); - inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - - inst_mad->U.I.Opcode = RC_OPCODE_MAD; - inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c); - inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ; - inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst_mad->U.I.SrcReg[1].Index = inst_rcp->U.I.DstReg.Index; - inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; - inst_mad->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; - inst_mad->U.I.SrcReg[2].Index = inst->U.I.DstReg.Index; - if (depthmode == 0) /* GL_LUMINANCE */ - inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z); - else if (depthmode == 2) /* GL_ALPHA */ - inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW; - - /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: - * r < tex <=> -tex+r < 0 - * r >= tex <=> not (-tex+r < 0 */ - if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL) - inst_mad->U.I.SrcReg[2].Negate = inst_mad->U.I.SrcReg[2].Negate ^ RC_MASK_XYZW; - else - inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; - - inst_cmp->U.I.Opcode = RC_OPCODE_CMP; - /* DstReg has been filled out above */ - inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_cmp->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index; - - if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) { - pass = 1; - fail = 2; - } else { - pass = 2; - fail = 1; - } - - inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE; - inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; - inst_cmp->U.I.SrcReg[fail] = shadow_ambient(c, inst->U.I.TexSrcUnit); - } - } - - /* Cannot write texture to output registers */ - if (inst->U.I.Opcode != RC_OPCODE_KIL && inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { - struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); - - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg = inst->U.I.DstReg; - inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); - - inst->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; - inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - } - - /* Cannot read texture coordinate from constants file */ - if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { - struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); - inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - - reset_srcreg(&inst->U.I.SrcReg[0]); - inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; - } - - return 1; -} - /** * Rewrite IF instructions to use the ALU result special register. */ @@ -433,19 +295,20 @@ void r500FragmentProgramDump(struct rX00_fragment_program_code *c) (inst >> 30)); fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3); inst = code->inst[n].inst3; - fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n", + fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n", (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), (inst >> 11) & 0x3, (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), - (inst >> 24) & 0x3); + (inst >> 24) & 0x3, (inst >> 29) & 0x3); fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4); inst = code->inst[n].inst4; - fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf), + fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf), (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3, + (inst >> 29) & 0x3, (inst >> 31) & 0x1); fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5); diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h index 0918cdf518b..4efbae7ba67 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h @@ -42,11 +42,6 @@ extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c); extern struct rc_swizzle_caps r500_swizzle_caps; -extern int r500_transform_TEX( - struct radeon_compiler * c, - struct rc_instruction * inst, - void* data); - extern int r500_transform_IF( struct radeon_compiler * c, struct rc_instruction * inst, diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index 710cae727a1..10c5e2349e9 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -190,6 +190,17 @@ static unsigned int use_source(struct r500_fragment_program_code* code, struct r return 0; } +/** + * NOP the specified instruction if it is not a texture lookup. + */ +static void alu_nop(struct r300_fragment_program_compiler *c, int ip) +{ + PROG_CODE; + + if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) { + code->inst[ip].inst0 |= R500_INST_NOP; + } +} /** * Emit a paired ALU instruction. @@ -205,6 +216,14 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair int ip = ++code->inst_end; + /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */ + if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX || + inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) { + if (ip > 0) { + alu_nop(c, ip - 1); + } + } + code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode); code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode); @@ -252,8 +271,8 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; - code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target); - code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target); + code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target); + code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target); if (inst->WriteALUResult) { code->inst[ip].inst3 |= R500_ALU_RGB_WMASK; @@ -329,21 +348,6 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst return 1; } -static void grow_branches(struct emit_state * s) -{ - unsigned int newreserved = s->BranchesReserved * 2; - struct branch_info * newbranches; - - if (!newreserved) - newreserved = 4; - - newbranches = memory_pool_malloc(&s->C->Pool, newreserved*sizeof(struct branch_info)); - memcpy(newbranches, s->Branches, s->CurrentBranchDepth*sizeof(struct branch_info)); - - s->Branches = newbranches; - s->BranchesReserved = newreserved; -} - static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst) { if (s->Code->inst_end >= 511) { @@ -361,8 +365,8 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst return; } - if (s->CurrentBranchDepth >= s->BranchesReserved) - grow_branches(s); + memory_pool_array_reserve(&s->C->Pool, struct branch_info, + s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1); struct branch_info * branch = &s->Branches[s->CurrentBranchDepth++]; branch->If = newip; @@ -469,9 +473,8 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi if (compiler->Base.Error) return; - assert(code->inst_end >= 0); - - if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { + if (code->inst_end == -1 || + (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { /* This may happen when dead-code elimination is disabled or * when most of the fragment program logic is leading to a KIL */ if (code->inst_end >= 511) { diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index 6d979bbaecf..27274f07122 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -108,6 +108,18 @@ typedef enum { } rc_compare_func; /** + * Coordinate wrapping modes. + * + * These are not quite the same as their GL counterparts yet. + */ +typedef enum { + RC_WRAP_NONE = 0, + RC_WRAP_REPEAT, + RC_WRAP_MIRRORED_REPEAT, + RC_WRAP_MIRRORED_CLAMP +} rc_wrap_mode; + +/** * Stores state that influences the compilation of a fragment program. */ struct r300_fragment_program_external_state { @@ -127,11 +139,28 @@ struct r300_fragment_program_external_state { * this field specifies the compare function. * * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0). - * - * Otherwise, this field is 0. * \sa rc_compare_func */ unsigned texture_compare_func : 3; + + /** + * If the sampler needs to fake NPOT, this field is set. + */ + unsigned fake_npot : 1; + + /** + * If the sampler will recieve non-normalized coords, + * this field is set. + */ + unsigned non_normalized_coords : 1; + + /** + * This field specifies wrapping modes for the sampler. + * + * If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths + * will be performed on the coordinates. + */ + unsigned wrap_mode : 2; } unit[16]; }; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c index 272f9072d4a..1c8ba864a41 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -277,13 +277,13 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_mad->U.I.SrcReg[0].Index = tempregi; - inst_mad->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; - inst_mad->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); + inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT; - inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0; if (full_vtransform) { inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index 6bfda0574f6..09794a52ad8 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -81,8 +81,12 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig struct r300_fragment_program_compiler { struct radeon_compiler Base; struct rX00_fragment_program_code *code; + /* Optional transformations and features. */ struct r300_fragment_program_external_state state; + unsigned enable_shadow_ambient; + /* Hardware specification. */ unsigned is_r500; + unsigned max_temp_regs; /* Register corresponding to the depthbuffer. */ unsigned OutputDepth; /* Registers corresponding to the four colorbuffers. */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index cce9166e644..16e2f3a2181 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -160,3 +160,92 @@ void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * writes_pair(inst, cb, userdata); } } + + +static void remap_normal_instruction(struct rc_instruction * fullinst, + rc_remap_register_fn cb, void * userdata) +{ + struct rc_sub_instruction * inst = &fullinst->U.I; + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + + if (opcode->HasDstReg) { + rc_register_file file = inst->DstReg.File; + unsigned int index = inst->DstReg.Index; + + cb(userdata, fullinst, &file, &index); + + inst->DstReg.File = file; + inst->DstReg.Index = index; + } + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + rc_register_file file = inst->SrcReg[src].File; + unsigned int index = inst->SrcReg[src].Index; + + cb(userdata, fullinst, &file, &index); + + inst->SrcReg[src].File = file; + inst->SrcReg[src].Index = index; + } +} + +static void remap_pair_instruction(struct rc_instruction * fullinst, + rc_remap_register_fn cb, void * userdata) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + + if (inst->RGB.WriteMask) { + rc_register_file file = RC_FILE_TEMPORARY; + unsigned int index = inst->RGB.DestIndex; + + cb(userdata, fullinst, &file, &index); + + inst->RGB.DestIndex = index; + } + + if (inst->Alpha.WriteMask) { + rc_register_file file = RC_FILE_TEMPORARY; + unsigned int index = inst->Alpha.DestIndex; + + cb(userdata, fullinst, &file, &index); + + inst->Alpha.DestIndex = index; + } + + for(unsigned int src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used) { + rc_register_file file = inst->RGB.Src[src].File; + unsigned int index = inst->RGB.Src[src].Index; + + cb(userdata, fullinst, &file, &index); + + inst->RGB.Src[src].File = file; + inst->RGB.Src[src].Index = index; + } + + if (inst->Alpha.Src[src].Used) { + rc_register_file file = inst->Alpha.Src[src].File; + unsigned int index = inst->Alpha.Src[src].Index; + + cb(userdata, fullinst, &file, &index); + + inst->Alpha.Src[src].File = file; + inst->Alpha.Src[src].Index = index; + } + } +} + + +/** + * Remap all register accesses according to the given function. + * That is, call the function \p cb for each referenced register (both read and written) + * and update the given instruction \p inst accordingly + * if it modifies its \ref pfile and \ref pindex contents. + */ +void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) + remap_normal_instruction(inst, cb, userdata); + else + remap_pair_instruction(inst, cb, userdata); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h index 5aa4cb64f3d..62cda20eea6 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h @@ -36,13 +36,17 @@ struct rc_swizzle_caps; /** - * Help analyze the register accesses of instructions. + * Help analyze and modify the register accesses of instructions. */ /*@{*/ typedef void (*rc_read_write_fn)(void * userdata, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int chan); void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata); void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata); + +typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex); +void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata); /*@}*/ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c index e0c66c4aeb0..e3c2c83c0cf 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c @@ -80,19 +80,8 @@ static void or_updatemasks( static void push_branch(struct deadcode_state * s) { - if (s->BranchStackSize >= s->BranchStackReserved) { - unsigned int new_reserve = 2 * s->BranchStackReserved; - struct branchinfo * new_stack; - - if (!new_reserve) - new_reserve = 4; - - new_stack = memory_pool_malloc(&s->C->Pool, new_reserve * sizeof(struct branchinfo)); - memcpy(new_stack, s->BranchStack, s->BranchStackSize * sizeof(struct branchinfo)); - - s->BranchStack = new_stack; - s->BranchStackReserved = new_reserve; - } + memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack, + s->BranchStackSize, s->BranchStackReserved, 1); struct branchinfo * branch = &s->BranchStack[s->BranchStackSize++]; branch->HaveElse = 0; @@ -162,7 +151,7 @@ static void update_instruction(struct deadcode_state * s, struct rc_instruction } unsigned int srcmasks[3]; - rc_compute_sources_for_writemask(opcode, usedmask, srcmasks); + rc_compute_sources_for_writemask(inst, usedmask, srcmasks); for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { unsigned int refmask = 0; @@ -250,7 +239,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f for(struct rc_instruction * inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next, ++ip) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);\ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); int dead = 1; if (!opcode->HasDstReg) { @@ -281,7 +270,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) usemask |= RC_MASK_W; - rc_compute_sources_for_writemask(opcode, usemask, srcmasks); + rc_compute_sources_for_writemask(inst, usemask, srcmasks); for(unsigned int src = 0; src < 3; ++src) { for(unsigned int chan = 0; chan < 4; ++chan) { diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c new file mode 100644 index 00000000000..d889612f4f4 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c @@ -0,0 +1,331 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_emulate_branches.h" + +#include <stdio.h> + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + + +struct proxy_info { + unsigned int Proxied:1; + unsigned int Index:RC_REGISTER_INDEX_BITS; +}; + +struct register_proxies { + struct proxy_info Temporary[RC_REGISTER_MAX_INDEX]; +}; + +struct branch_info { + struct rc_instruction * If; + struct rc_instruction * Else; +}; + +struct emulate_branch_state { + struct radeon_compiler * C; + + struct branch_info * Branches; + unsigned int BranchCount; + unsigned int BranchReserved; +}; + + +static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + memory_pool_array_reserve(&s->C->Pool, struct branch_info, + s->Branches, s->BranchCount, s->BranchReserved, 1); + + DBG("%s\n", __FUNCTION__); + + struct branch_info * branch = &s->Branches[s->BranchCount++]; + memset(branch, 0, sizeof(struct branch_info)); + branch->If = inst; + + /* Make a safety copy of the decision register, because we will need + * it at ENDIF time and it might be overwritten in both branches. */ + struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C); + inst_mov->U.I.DstReg.WriteMask = RC_MASK_X; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; + inst->U.I.SrcReg[0].Swizzle = 0; + inst->U.I.SrcReg[0].Abs = 0; + inst->U.I.SrcReg[0].Negate = 0; +} + +static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + if (!s->BranchCount) { + rc_error(s->C, "Encountered ELSE outside of branches"); + return; + } + + DBG("%s\n", __FUNCTION__); + + struct branch_info * branch = &s->Branches[s->BranchCount - 1]; + branch->Else = inst; +} + + +struct state_and_proxies { + struct emulate_branch_state * S; + struct register_proxies * Proxies; +}; + +static struct proxy_info * get_proxy_info(struct state_and_proxies * sap, + rc_register_file file, unsigned int index) +{ + if (file == RC_FILE_TEMPORARY) { + return &sap->Proxies->Temporary[index]; + } else { + return 0; + } +} + +static void scan_write(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int comp) +{ + struct state_and_proxies * sap = userdata; + struct proxy_info * proxy = get_proxy_info(sap, file, index); + + if (proxy && !proxy->Proxied) { + proxy->Proxied = 1; + proxy->Index = rc_find_free_temporary(sap->S->C); + } +} + +static void remap_proxy_function(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex) +{ + struct state_and_proxies * sap = userdata; + struct proxy_info * proxy = get_proxy_info(sap, *pfile, *pindex); + + if (proxy && proxy->Proxied) { + *pfile = RC_FILE_TEMPORARY; + *pindex = proxy->Index; + } +} + +/** + * Redirect all writes in the instruction range [begin, end) to proxy + * temporary registers. + */ +static void allocate_and_insert_proxies(struct emulate_branch_state * s, + struct register_proxies * proxies, + struct rc_instruction * begin, + struct rc_instruction * end) +{ + struct state_and_proxies sap; + + sap.S = s; + sap.Proxies = proxies; + + for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { + rc_for_all_writes(inst, scan_write, &sap); + rc_remap_registers(inst, remap_proxy_function, &sap); + } + + for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { + if (proxies->Temporary[index].Proxied) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, begin->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = proxies->Temporary[index].Index; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = index; + } + } +} + + +static void inject_cmp(struct emulate_branch_state * s, + struct rc_instruction * inst_if, + struct rc_instruction * inst_endif, + rc_register_file file, unsigned int index, + struct proxy_info ifproxy, + struct proxy_info elseproxy) +{ + struct rc_instruction * inst_cmp = rc_insert_new_instruction(s->C, inst_endif); + inst_cmp->U.I.Opcode = RC_OPCODE_CMP; + inst_cmp->U.I.DstReg.File = file; + inst_cmp->U.I.DstReg.Index = index; + inst_cmp->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst_cmp->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0]; + inst_cmp->U.I.SrcReg[0].Abs = 1; + inst_cmp->U.I.SrcReg[0].Negate = RC_MASK_XYZW; + inst_cmp->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[1].Index = ifproxy.Proxied ? ifproxy.Index : index; + inst_cmp->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[2].Index = elseproxy.Proxied ? elseproxy.Index : index; +} + +static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + if (!s->BranchCount) { + rc_error(s->C, "Encountered ENDIF outside of branches"); + return; + } + + DBG("%s\n", __FUNCTION__); + + struct branch_info * branch = &s->Branches[s->BranchCount - 1]; + struct register_proxies IfProxies; + struct register_proxies ElseProxies; + + memset(&IfProxies, 0, sizeof(IfProxies)); + memset(&ElseProxies, 0, sizeof(ElseProxies)); + + allocate_and_insert_proxies(s, &IfProxies, branch->If->Next, branch->Else ? branch->Else : inst); + + if (branch->Else) + allocate_and_insert_proxies(s, &ElseProxies, branch->Else->Next, inst); + + /* Insert the CMP instructions at the end. */ + for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { + if (IfProxies.Temporary[index].Proxied || ElseProxies.Temporary[index].Proxied) { + inject_cmp(s, branch->If, inst, RC_FILE_TEMPORARY, index, + IfProxies.Temporary[index], ElseProxies.Temporary[index]); + } + } + + /* Remove all traces of the branch instructions */ + rc_remove_instruction(branch->If); + if (branch->Else) + rc_remove_instruction(branch->Else); + rc_remove_instruction(inst); + + s->BranchCount--; + + if (VERBOSE) { + DBG("Program after ENDIF handling:\n"); + rc_print_program(&s->C->Program); + } +} + + +struct remap_output_data { + unsigned int Output:RC_REGISTER_INDEX_BITS; + unsigned int Temporary:RC_REGISTER_INDEX_BITS; +}; + +static void remap_output_function(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex) +{ + struct remap_output_data * data = userdata; + + if (*pfile == RC_FILE_OUTPUT && *pindex == data->Output) { + *pfile = RC_FILE_TEMPORARY; + *pindex = data->Temporary; + } +} + + +/** + * Output registers cannot be read from and so cannot be dealt with like + * temporary registers. + * + * We do the simplest thing: If an output registers is written within + * a branch, then *all* writes to this register are proxied to a + * temporary register, and a final MOV is appended to the end of + * the program. + */ +static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + if (!s->BranchCount) + return; + + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (!opcode->HasDstReg) + return; + + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) { + struct remap_output_data remap; + + remap.Output = inst->U.I.DstReg.Index; + remap.Temporary = rc_find_free_temporary(s->C); + + for(struct rc_instruction * inst = s->C->Program.Instructions.Next; + inst != &s->C->Program.Instructions; + inst = inst->Next) { + rc_remap_registers(inst, &remap_output_function, &remap); + } + + struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT; + inst_mov->U.I.DstReg.Index = remap.Output; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = remap.Temporary; + } +} + +/** + * Remove branch instructions; instead, execute both branches + * on different register sets and choose between their results + * using CMP instructions in place of the original ENDIF. + */ +void rc_emulate_branches(struct radeon_compiler * c) +{ + struct emulate_branch_state s; + + memset(&s, 0, sizeof(s)); + s.C = c; + + /* Untypical loop because we may remove the current instruction */ + struct rc_instruction * ptr = c->Program.Instructions.Next; + while(ptr != &c->Program.Instructions) { + struct rc_instruction * inst = ptr; + ptr = ptr->Next; + + if (inst->Type == RC_INSTRUCTION_NORMAL) { + switch(inst->U.I.Opcode) { + case RC_OPCODE_IF: + handle_if(&s, inst); + break; + case RC_OPCODE_ELSE: + handle_else(&s, inst); + break; + case RC_OPCODE_ENDIF: + handle_endif(&s, inst); + break; + default: + fix_output_writes(&s, inst); + break; + } + } else { + rc_error(c, "%s: unhandled instruction type\n", __FUNCTION__); + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h new file mode 100644 index 00000000000..e07279f0933 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h @@ -0,0 +1,30 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_EMULATE_BRANCHES_H +#define RADEON_EMULATE_BRANCHES_H + +struct radeon_compiler; + +void rc_emulate_branches(struct radeon_compiler * c); + +#endif /* RADEON_EMULATE_BRANCHES_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index c1c0181fac1..d593b3e81ae 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -26,6 +26,7 @@ */ #include "radeon_opcodes.h" +#include "radeon_program.h" #include "radeon_program_constants.h" @@ -59,6 +60,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .HasDstReg = 1 }, { + .Opcode = RC_OPCODE_CEIL, + .Name = "CEIL", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { .Opcode = RC_OPCODE_CMP, .Name = "CMP", .NumSrcRegs = 3, @@ -75,14 +83,14 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { { .Opcode = RC_OPCODE_DDX, .Name = "DDX", - .NumSrcRegs = 1, + .NumSrcRegs = 2, .HasDstReg = 1, .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_DDY, .Name = "DDY", - .NumSrcRegs = 1, + .NumSrcRegs = 2, .HasDstReg = 1, .IsComponentwise = 1 }, @@ -371,10 +379,11 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { }; void rc_compute_sources_for_writemask( - const struct rc_opcode_info * opcode, + const struct rc_instruction *inst, unsigned int writemask, unsigned int *srcmasks) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); srcmasks[0] = 0; srcmasks[1] = 0; srcmasks[2] = 0; @@ -406,21 +415,37 @@ void rc_compute_sources_for_writemask( srcmasks[0] |= RC_MASK_XYZW; srcmasks[1] |= RC_MASK_XYZW; break; - case RC_OPCODE_TEX: case RC_OPCODE_TXB: case RC_OPCODE_TXP: - srcmasks[0] |= RC_MASK_XYZW; + srcmasks[0] |= RC_MASK_W; + /* Fall through */ + case RC_OPCODE_TEX: + switch (inst->U.I.TexSrcTarget) { + case RC_TEXTURE_1D: + srcmasks[0] |= RC_MASK_X; + break; + case RC_TEXTURE_2D: + case RC_TEXTURE_RECT: + case RC_TEXTURE_1D_ARRAY: + srcmasks[0] |= RC_MASK_XY; + break; + case RC_TEXTURE_3D: + case RC_TEXTURE_CUBE: + case RC_TEXTURE_2D_ARRAY: + srcmasks[0] |= RC_MASK_XYZ; + break; + } break; case RC_OPCODE_DST: - srcmasks[0] |= 0x6; - srcmasks[1] |= 0xa; + srcmasks[0] |= RC_MASK_Y | RC_MASK_Z; + srcmasks[1] |= RC_MASK_Y | RC_MASK_W; break; case RC_OPCODE_EXP: case RC_OPCODE_LOG: srcmasks[0] |= RC_MASK_XY; break; case RC_OPCODE_LIT: - srcmasks[0] |= 0xb; + srcmasks[0] |= RC_MASK_X | RC_MASK_Y | RC_MASK_W; break; default: break; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h index a3c5b869546..87a2e23084c 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -47,6 +47,9 @@ typedef enum { * dst.x = floor(src.x), where dst must be an address register */ RC_OPCODE_ARL, + /** vec4 instruction: dst.c = ceil(src0.c) */ + RC_OPCODE_CEIL, + /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */ RC_OPCODE_CMP, @@ -227,8 +230,10 @@ static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode) return &rc_opcodes[opcode]; } +struct rc_instruction; + void rc_compute_sources_for_writemask( - const struct rc_opcode_info * opcode, + const struct rc_instruction *inst, unsigned int writemask, unsigned int *srcmasks); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c index b2fe7f76b2f..fdfee867014 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c @@ -196,9 +196,10 @@ static void compute_live_intervals(struct regalloc_state * s) } } -static void rewrite_register(struct regalloc_state * s, +static void remap_register(void * data, struct rc_instruction * inst, rc_register_file * file, unsigned int * index) { + struct regalloc_state * s = data; const struct register_info * reg; if (*file == RC_FILE_TEMPORARY) @@ -214,74 +215,6 @@ static void rewrite_register(struct regalloc_state * s, } } -static void rewrite_normal_instruction(struct regalloc_state * s, struct rc_sub_instruction * inst) -{ - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); - - if (opcode->HasDstReg) { - rc_register_file file = inst->DstReg.File; - unsigned int index = inst->DstReg.Index; - - rewrite_register(s, &file, &index); - - inst->DstReg.File = file; - inst->DstReg.Index = index; - } - - for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { - rc_register_file file = inst->SrcReg[src].File; - unsigned int index = inst->SrcReg[src].Index; - - rewrite_register(s, &file, &index); - - inst->SrcReg[src].File = file; - inst->SrcReg[src].Index = index; - } -} - -static void rewrite_pair_instruction(struct regalloc_state * s, struct rc_pair_instruction * inst) -{ - if (inst->RGB.WriteMask) { - rc_register_file file = RC_FILE_TEMPORARY; - unsigned int index = inst->RGB.DestIndex; - - rewrite_register(s, &file, &index); - - inst->RGB.DestIndex = index; - } - - if (inst->Alpha.WriteMask) { - rc_register_file file = RC_FILE_TEMPORARY; - unsigned int index = inst->Alpha.DestIndex; - - rewrite_register(s, &file, &index); - - inst->Alpha.DestIndex = index; - } - - for(unsigned int src = 0; src < 3; ++src) { - if (inst->RGB.Src[src].Used) { - rc_register_file file = inst->RGB.Src[src].File; - unsigned int index = inst->RGB.Src[src].Index; - - rewrite_register(s, &file, &index); - - inst->RGB.Src[src].File = file; - inst->RGB.Src[src].Index = index; - } - - if (inst->Alpha.Src[src].Used) { - rc_register_file file = inst->Alpha.Src[src].File; - unsigned int index = inst->Alpha.Src[src].Index; - - rewrite_register(s, &file, &index); - - inst->Alpha.Src[src].File = file; - inst->Alpha.Src[src].Index = index; - } - } -} - static void do_regalloc(struct regalloc_state * s) { /* Simple and stupid greedy register allocation */ @@ -310,10 +243,7 @@ static void do_regalloc(struct regalloc_state * s) for(struct rc_instruction * inst = s->C->Program.Instructions.Next; inst != &s->C->Program.Instructions; inst = inst->Next) { - if (inst->Type == RC_INSTRUCTION_NORMAL) - rewrite_normal_instruction(s, &inst->U.I); - else - rewrite_pair_instruction(s, &inst->U.P); + rc_remap_registers(inst, &remap_register, s); } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c index fff5b0c2173..407a0a55ee2 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c @@ -156,14 +156,8 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, } const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); - int nargs = opcode->NumSrcRegs; int i; - /* Special case for DDX/DDY (MDH/MDV). */ - if (inst->Opcode == RC_OPCODE_DDX || inst->Opcode == RC_OPCODE_DDY) { - nargs++; - } - for(i = 0; i < opcode->NumSrcRegs; ++i) { int source; if (needrgb && !istranscendent) { diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index b5c08aea49e..05b874ba7cf 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -175,6 +175,26 @@ static void transform_ABS(struct radeon_compiler* c, rc_remove_instruction(inst); } +static void transform_CEIL(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* Assuming: + * ceil(x) = -floor(-x) + * + * After inlining floor: + * ceil(x) = -(-x-frac(-x)) + * + * After simplification: + * ceil(x) = x+frac(-x) + */ + + int tempreg = rc_find_free_temporary(c); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0])); + emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg)); + rc_remove_instruction(inst); +} + static void transform_DP3(struct radeon_compiler* c, struct rc_instruction* inst) { @@ -458,7 +478,7 @@ static void transform_XPD(struct radeon_compiler* c, * no userData necessary. * * Eliminates the following ALU instructions: - * ABS, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD + * ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD * using: * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP * @@ -474,6 +494,7 @@ int radeonTransformALU( { switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; + case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; case RC_OPCODE_DST: transform_DST(c, inst); return 1; case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; @@ -506,6 +527,35 @@ static void transform_r300_vertex_ABS(struct radeon_compiler* c, inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; } +static void transform_r300_vertex_CMP(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* There is no decent CMP available, so let's rig one up. + * CMP is defined as dst = src0 < 0.0 ? src1 : src2 + * The following sequence consumes two temps and two extra slots + * (the second temp and the second slot is consumed by transform_LRP), + * but should be equivalent: + * + * SLT tmp0, src0, 0.0 + * LRP dst, tmp0, src1, src2 + * + * Yes, I know, I'm a mad scientist. ~ C. & M. */ + int tempreg0 = rc_find_free_temporary(c); + + /* SLT tmp0, src0, 0.0 */ + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dstreg(RC_FILE_TEMPORARY, tempreg0), + inst->U.I.SrcReg[0], builtin_zero); + + /* LRP dst, tmp0, src1, src2 */ + transform_LRP(c, + emit3(c, inst->Prev, RC_OPCODE_LRP, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); + + rc_remove_instruction(inst); +} + /** * For use with radeonLocalTransform, this transforms non-native ALU * instructions of the r300 up to r500 vertex engine. @@ -517,6 +567,8 @@ int r300_transform_vertex_alu( { switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; + case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; case RC_OPCODE_DP3: transform_DP3(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h index 7c0d6720b11..842012def02 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h @@ -114,12 +114,14 @@ typedef enum { } while(0) #define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W) +#define RC_SWIZZLE_XYZ0 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO) #define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X) #define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y) #define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z) #define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W) #define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO) #define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE) +#define RC_SWIZZLE_HHHH RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF) /** * \name Bitmasks for components of vectors. diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c new file mode 100644 index 00000000000..b4ba0b3f870 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c @@ -0,0 +1,360 @@ +/* + * Copyright (C) 2010 Corbin Simpson + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_tex.h" + +/* Series of transformations to be done on textures. */ + +static struct rc_src_register shadow_ambient(struct r300_fragment_program_compiler *compiler, + int tmu) +{ + struct rc_src_register reg = { 0, }; + + if (compiler->enable_shadow_ambient) { + reg.File = RC_FILE_CONSTANT; + reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants, + RC_STATE_SHADOW_AMBIENT, tmu); + reg.Swizzle = RC_SWIZZLE_WWWW; + } else { + reg.File = RC_FILE_NONE; + reg.Swizzle = RC_SWIZZLE_0000; + } + return reg; +} + +static void lower_texture_rect(struct r300_fragment_program_compiler *compiler, + struct rc_instruction *inst) +{ + struct rc_instruction *inst_rect; + unsigned temp = rc_find_free_temporary(&compiler->Base); + + if (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT || + compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords) { + inst_rect = rc_insert_new_instruction(&compiler->Base, inst->Prev); + + inst_rect->U.I.Opcode = RC_OPCODE_MUL; + inst_rect->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rect->U.I.DstReg.Index = temp; + inst_rect->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_rect->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_rect->U.I.SrcReg[1].Index = + rc_constants_add_state(&compiler->Base.Program.Constants, + RC_STATE_R300_TEXRECT_FACTOR, inst->U.I.TexSrcUnit); + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; + + inst->U.I.TexSrcTarget = RC_TEXTURE_2D; + } +} + +/** + * Transform TEX, TXP, TXB, and KIL instructions in the following ways: + * - implement texture compare (shadow extensions) + * - extract non-native source / destination operands + * - premultiply texture coordinates for RECT + * - extract operand swizzles + * - introduce a temporary register when write masks are needed + */ +int radeonTransformTEX( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data) +{ + struct r300_fragment_program_compiler *compiler = + (struct r300_fragment_program_compiler*)data; + + if (inst->U.I.Opcode != RC_OPCODE_TEX && + inst->U.I.Opcode != RC_OPCODE_TXB && + inst->U.I.Opcode != RC_OPCODE_TXP && + inst->U.I.Opcode != RC_OPCODE_KIL) + return 0; + + /* ARB_shadow & EXT_shadow_funcs */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) { + rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; + + if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.Opcode = RC_OPCODE_MOV; + + if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.SrcReg[0].File = RC_FILE_NONE; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; + } else { + inst->U.I.SrcReg[0] = shadow_ambient(compiler, inst->U.I.TexSrcUnit); + } + + return 1; + } else { + rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; + unsigned int depthmode = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_mode; + struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst); + struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp); + struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad); + int pass, fail; + + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + + inst_cmp->U.I.DstReg = inst->U.I.DstReg; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = rc_find_free_temporary(c); + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ; + inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[1].Index = inst_rcp->U.I.DstReg.Index; + inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + inst_mad->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[2].Index = inst->U.I.DstReg.Index; + if (depthmode == 0) /* GL_LUMINANCE */ + inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z); + else if (depthmode == 2) /* GL_ALPHA */ + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW; + + /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: + * r < tex <=> -tex+r < 0 + * r >= tex <=> not (-tex+r < 0 */ + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL) + inst_mad->U.I.SrcReg[2].Negate = inst_mad->U.I.SrcReg[2].Negate ^ RC_MASK_XYZW; + else + inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; + + inst_cmp->U.I.Opcode = RC_OPCODE_CMP; + /* DstReg has been filled out above */ + inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index; + + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) { + pass = 1; + fail = 2; + } else { + pass = 2; + fail = 1; + } + + inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE; + inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; + inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit); + } + } + + /* Texture wrap modes don't work on NPOT textures or texrects. + * + * The game plan is simple. We have two flags, fake_npot and + * non_normalized_coords, as well as a tex target. The RECT tex target + * will make the emitted code use non-scaled texcoords. + * + * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and + * mirroring are not. If we need to repeat, we do: + * + * MUL temp, texcoord, <scaling factor constant> + * FRC temp, temp ; Discard integer portion of coords + * + * This gives us coords in [0, 1]. + * + * Mirroring is trickier. We're going to start out like repeat: + * + * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes + * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2] + * ; so scale to [0, 1] + * FRC temp, temp ; Make the pattern repeat + * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1] + * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern. + * ; The pattern is backwards, so reverse it (1-x). + * + * This gives us coords in [0, 1]. + * + * ~ C & M. ;) + */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT || + compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot || + compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords)) { + rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode; + + /* R300 cannot sample from rectangles. */ + if (!compiler->is_r500) { + lower_texture_rect(compiler, inst); + } + + if (compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot && + wrapmode != RC_WRAP_NONE) { + struct rc_instruction *inst_mov; + unsigned temp = rc_find_free_temporary(c); + + /* For NPOT fallback, we need normalized coordinates anyway. */ + if (compiler->is_r500) { + lower_texture_rect(compiler, inst); + } + + if (wrapmode == RC_WRAP_REPEAT) { + /* Both instructions will be paired up. */ + struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev); + + inst_frc->U.I.Opcode = RC_OPCODE_FRC; + inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_frc->U.I.DstReg.Index = temp; + inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) { + /* + * Function: + * f(v) = 1 - abs(frac(v * 0.5) * 2 - 1) + * + * Code: + * MUL temp, src0, 0.5 + * FRC temp, temp + * MAD temp, temp, 2, -1 + * ADD temp, 1, -abs(temp) + */ + + struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add; + unsigned two, two_swizzle; + + inst_mul = rc_insert_new_instruction(c, inst->Prev); + + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = temp; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH; + + inst_frc = rc_insert_new_instruction(c, inst->Prev); + + inst_frc->U.I.Opcode = RC_OPCODE_FRC; + inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_frc->U.I.DstReg.Index = temp; + inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_frc->U.I.SrcReg[0].Index = temp; + inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; + + two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle); + inst_mad = rc_insert_new_instruction(c, inst->Prev); + + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = temp; + inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[0].Index = temp; + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; + inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[1].Index = two; + inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle; + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111; + inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ; + + inst_add = rc_insert_new_instruction(c, inst->Prev); + + inst_add->U.I.Opcode = RC_OPCODE_ADD; + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = temp; + inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; + inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_add->U.I.SrcReg[1].Index = temp; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; + inst_add->U.I.SrcReg[1].Abs = 1; + inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ; + } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) { + /* + * Mirrored clamp modes are bloody simple, we just use abs + * to mirror [0, 1] into [-1, 0]. This works for + * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER. + */ + struct rc_instruction *inst_mov; + + inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mov->U.I.SrcReg[0].Abs = 1; + } + + /* Preserve W for TXP/TXB. */ + inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; + } + } + + /* Cannot write texture to output registers (all chips) or with masks (non-r500) */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || + (!compiler->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg = inst->U.I.DstReg; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); + + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + } + + /* Cannot read texture coordinate from constants file */ + if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; + } + + return 1; +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h new file mode 100644 index 00000000000..a0105051ac4 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2010 Corbin Simpson + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_TEX_H_ +#define __RADEON_PROGRAM_TEX_H_ + +#include "radeon_compiler.h" +#include "radeon_program.h" + +int radeonTransformTEX( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data); + +#endif /* __RADEON_PROGRAM_TEX_H_ */ |