diff options
author | Younes Manton <[email protected]> | 2010-04-30 20:42:30 -0400 |
---|---|---|
committer | Younes Manton <[email protected]> | 2010-04-30 20:42:30 -0400 |
commit | a8ea1dacc63ac567498049e5756c247b9fec6cd9 (patch) | |
tree | 4031e2e2b6166bd926b43fa4bbb3aab773a30ee5 /src/mesa/drivers/dri/r300 | |
parent | 404fb63b4649f58fce443615e49337d42b8ddece (diff) | |
parent | 35d960cc744c374ccaad48c3d80559b59c74e28a (diff) |
Merge branch 'master' of ssh://git.freedesktop.org/git/mesa/mesa into pipe-video
Conflicts:
src/gallium/auxiliary/Makefile
src/gallium/auxiliary/SConscript
src/gallium/auxiliary/util/u_format.csv
src/gallium/auxiliary/vl/vl_compositor.c
src/gallium/auxiliary/vl/vl_compositor.h
src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
src/gallium/drivers/identity/id_objects.c
src/gallium/drivers/identity/id_objects.h
src/gallium/drivers/identity/id_screen.c
src/gallium/drivers/nv40/Makefile
src/gallium/drivers/nv40/nv40_screen.c
src/gallium/drivers/softpipe/sp_texture.c
src/gallium/drivers/softpipe/sp_texture.h
src/gallium/drivers/softpipe/sp_video_context.c
src/gallium/drivers/softpipe/sp_video_context.h
src/gallium/include/pipe/p_format.h
src/gallium/include/pipe/p_screen.h
src/gallium/include/pipe/p_video_context.h
src/gallium/include/pipe/p_video_state.h
src/gallium/include/state_tracker/dri1_api.h
src/gallium/include/state_tracker/drm_api.h
src/gallium/state_trackers/dri/common/dri_context.c
src/gallium/state_trackers/xorg/xvmc/attributes.c
src/gallium/state_trackers/xorg/xvmc/block.c
src/gallium/state_trackers/xorg/xvmc/context.c
src/gallium/state_trackers/xorg/xvmc/subpicture.c
src/gallium/state_trackers/xorg/xvmc/surface.c
src/gallium/state_trackers/xorg/xvmc/tests/.gitignore
src/gallium/state_trackers/xorg/xvmc/tests/Makefile
src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
src/gallium/winsys/drm/radeon/core/radeon_drm.c
src/gallium/winsys/g3dvl/vl_winsys.h
src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
src/gallium/winsys/sw/Makefile
Diffstat (limited to 'src/mesa/drivers/dri/r300')
46 files changed, 1391 insertions, 614 deletions
diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 04459c2ddfa..2245998c952 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -7,8 +7,6 @@ CFLAGS += $(RADEON_CFLAGS) LIBNAME = r300_dri.so -MINIGLX_SOURCES = server/radeon_dri.c - ifeq ($(RADEON_LDFLAGS),) CS_SOURCES = radeon_cs_space_drm.c radeon_bo.c radeon_cs.c endif @@ -33,10 +31,13 @@ RADEON_COMMON_SOURCES = \ radeon_fbo.c \ radeon_lock.c \ radeon_mipmap_tree.c \ - radeon_span.c \ + radeon_pixel_read.c \ radeon_queryobj.c \ + radeon_span.c \ radeon_texture.c \ - radeon_tex_copy.c + radeon_tex_copy.c \ + radeon_tex_getimage.c \ + radeon_tile.c DRIVER_SOURCES = \ radeon_screen.c \ diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index d83888d90a3..e432afc3d41 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -8,11 +8,13 @@ LIBNAME = r300compiler C_SOURCES = \ radeon_code.c \ radeon_compiler.c \ + radeon_emulate_branches.c \ radeon_program.c \ radeon_program_print.c \ radeon_opcodes.c \ radeon_program_alu.c \ radeon_program_pair.c \ + radeon_program_tex.c \ radeon_pair_translate.c \ radeon_pair_schedule.c \ radeon_pair_regalloc.c \ diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript index 46075a8aee9..28a3d39d961 100755 --- a/src/mesa/drivers/dri/r300/compiler/SConscript +++ b/src/mesa/drivers/dri/r300/compiler/SConscript @@ -17,6 +17,7 @@ r300compiler = env.ConvenienceLibrary( 'radeon_opcodes.c', 'radeon_program_alu.c', 'radeon_program_pair.c', + 'radeon_program_tex.c', 'radeon_pair_translate.c', 'radeon_pair_schedule.c', 'radeon_pair_regalloc.c', diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.h b/src/mesa/drivers/dri/r300/compiler/memory_pool.h index ce23c319ad3..42344d0e3ba 100644 --- a/src/mesa/drivers/dri/r300/compiler/memory_pool.h +++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.h @@ -46,4 +46,35 @@ void memory_pool_init(struct memory_pool * pool); void memory_pool_destroy(struct memory_pool * pool); void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes); + +/** + * Generic helper for growing an array that has separate size/count + * and reserved counters to accomodate up to num new element. + * + * type * Array; + * unsigned int Size; + * unsigned int Reserved; + * + * memory_pool_array_reserve(pool, type, Array, Size, Reserved, k); + * assert(Size + k < Reserved); + * + * \note Size is not changed by this macro. + * + * \warning Array, Size, Reserved have to be lvalues and may be evaluated + * several times. + */ +#define memory_pool_array_reserve(pool, type, array, size, reserved, num) do { \ + unsigned int _num = (num); \ + if ((size) + _num > (reserved)) { \ + unsigned int newreserve = (reserved) * 2; \ + type * newarray; \ + if (newreserve < _num) \ + newreserve = 4 * _num; /* arbitrary heuristic */ \ + newarray = memory_pool_malloc((pool), newreserve * sizeof(type)); \ + memcpy(newarray, (array), (size) * sizeof(type)); \ + (array) = newarray; \ + (reserved) = newreserve; \ + } \ +} while(0) + #endif /* MEMORY_POOL_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c index 928c15e1e40..794db8335a2 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c @@ -31,166 +31,6 @@ #include "../r300_reg.h" -static struct rc_src_register shadow_ambient(struct radeon_compiler * c, int tmu) -{ - struct rc_src_register reg = { 0, }; - - reg.File = RC_FILE_CONSTANT; - reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu); - reg.Swizzle = RC_SWIZZLE_WWWW; - return reg; -} - -/** - * Transform TEX, TXP, TXB, and KIL instructions in the following way: - * - premultiply texture coordinates for RECT - * - extract operand swizzles - * - introduce a temporary register when write masks are needed - */ -int r300_transform_TEX( - struct radeon_compiler * c, - struct rc_instruction* inst, - void* data) -{ - struct r300_fragment_program_compiler *compiler = - (struct r300_fragment_program_compiler*)data; - - if (inst->U.I.Opcode != RC_OPCODE_TEX && - inst->U.I.Opcode != RC_OPCODE_TXB && - inst->U.I.Opcode != RC_OPCODE_TXP && - inst->U.I.Opcode != RC_OPCODE_KIL) - return 0; - - /* ARB_shadow & EXT_shadow_funcs */ - if (inst->U.I.Opcode != RC_OPCODE_KIL && - c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) { - rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; - - if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { - inst->U.I.Opcode = RC_OPCODE_MOV; - - if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { - inst->U.I.SrcReg[0].File = RC_FILE_NONE; - inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; - } else { - inst->U.I.SrcReg[0] = shadow_ambient(c, inst->U.I.TexSrcUnit); - } - - return 1; - } else { - rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; - unsigned int depthmode = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_mode; - struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst); - struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp); - struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad); - int pass, fail; - - inst_rcp->U.I.Opcode = RC_OPCODE_RCP; - inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_rcp->U.I.DstReg.Index = rc_find_free_temporary(c); - inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; - inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; - - inst_cmp->U.I.DstReg = inst->U.I.DstReg; - inst->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst->U.I.DstReg.Index = rc_find_free_temporary(c); - inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - - inst_mad->U.I.Opcode = RC_OPCODE_MAD; - inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c); - inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ; - inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst_mad->U.I.SrcReg[1].Index = inst_rcp->U.I.DstReg.Index; - inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; - inst_mad->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; - inst_mad->U.I.SrcReg[2].Index = inst->U.I.DstReg.Index; - if (depthmode == 0) /* GL_LUMINANCE */ - inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z); - else if (depthmode == 2) /* GL_ALPHA */ - inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW; - - /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: - * r < tex <=> -tex+r < 0 - * r >= tex <=> not (-tex+r < 0 */ - if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL) - inst_mad->U.I.SrcReg[2].Negate = inst_mad->U.I.SrcReg[2].Negate ^ RC_MASK_XYZW; - else - inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; - - inst_cmp->U.I.Opcode = RC_OPCODE_CMP; - /* DstReg has been filled out above */ - inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_cmp->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index; - - if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) { - pass = 1; - fail = 2; - } else { - pass = 2; - fail = 1; - } - - inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE; - inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; - inst_cmp->U.I.SrcReg[fail] = shadow_ambient(c, inst->U.I.TexSrcUnit); - } - } - - /* Hardware uses [0..1]x[0..1] range for rectangle textures - * instead of [0..Width]x[0..Height]. - * Add a scaling instruction. - */ - if (inst->U.I.Opcode != RC_OPCODE_KIL && inst->U.I.TexSrcTarget == RC_TEXTURE_RECT) { - struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst->Prev); - - inst_mul->U.I.Opcode = RC_OPCODE_MUL; - inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c); - inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; - inst_mul->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->U.I.TexSrcUnit); - - reset_srcreg(&inst->U.I.SrcReg[0]); - inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[0].Index = inst_mul->U.I.DstReg.Index; - } - - /* Cannot write texture to output registers or with masks */ - if (inst->U.I.Opcode != RC_OPCODE_KIL && - (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || inst->U.I.DstReg.WriteMask != RC_MASK_XYZW)) { - struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); - - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg = inst->U.I.DstReg; - inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); - - inst->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; - inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - } - - - /* Cannot read texture coordinate from constants file */ - if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { - struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); - inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - - reset_srcreg(&inst->U.I.SrcReg[0]); - inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; - } - - return 1; -} - /* just some random things... */ void r300FragmentProgramDump(struct rX00_fragment_program_code *c) { diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h index 418df36c936..8b755703be4 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h @@ -41,6 +41,4 @@ extern void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c); -extern int r300_transform_TEX(struct radeon_compiler * c, struct rc_instruction* inst, void* data); - #endif diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index cc552aee176..37dafa77106 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -353,7 +353,7 @@ void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi } } - if (code->pixsize >= R300_PFS_NUM_TEMP_REGS) + if (code->pixsize >= compiler->max_temp_regs) rc_error(&compiler->Base, "Too many hardware temporaries used.\n"); if (compiler->Base.Error) diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index c2d5dc27b49..25bf373b6fd 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -25,7 +25,9 @@ #include <stdio.h> #include "radeon_dataflow.h" +#include "radeon_emulate_branches.h" #include "radeon_program_alu.h" +#include "radeon_program_tex.h" #include "r300_fragprog.h" #include "r300_fragprog_swizzle.h" #include "r500_fragprog.h" @@ -84,91 +86,96 @@ static void rewrite_depth_out(struct r300_fragment_program_compiler * c) } } +static void debug_program_log(struct r300_fragment_program_compiler* c, const char * where) +{ + if (c->Base.Debug) { + fprintf(stderr, "Fragment Program: %s\n", where); + rc_print_program(&c->Base.Program); + } +} + void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) { rewrite_depth_out(c); + debug_program_log(c, "before compilation"); + + /* XXX Ideally this should be done only for r3xx, but since + * we don't have branching support for r5xx, we use the emulation + * on all chipsets. */ + rc_emulate_branches(&c->Base); + + debug_program_log(c, "after emulate branches"); + if (c->is_r500) { struct radeon_program_transformation transformations[] = { - { &r500_transform_TEX, c }, { &r500_transform_IF, 0 }, { &radeonTransformALU, 0 }, { &radeonTransformDeriv, 0 }, { &radeonTransformTrigScale, 0 } }; - radeonLocalTransform(&c->Base, 5, transformations); + radeonLocalTransform(&c->Base, 4, transformations); + + debug_program_log(c, "after native rewrite part 1"); c->Base.SwizzleCaps = &r500_swizzle_caps; } else { struct radeon_program_transformation transformations[] = { - { &r300_transform_TEX, c }, { &radeonTransformALU, 0 }, { &radeonTransformTrigSimple, 0 } }; - radeonLocalTransform(&c->Base, 3, transformations); + radeonLocalTransform(&c->Base, 2, transformations); + + debug_program_log(c, "after native rewrite part 1"); c->Base.SwizzleCaps = &r300_swizzle_caps; } - if (c->Base.Debug) { - fprintf(stderr, "Fragment Program: After native rewrite:\n"); - rc_print_program(&c->Base.Program); - fflush(stderr); - } + /* Run the common transformations too. + * Remember, lowering comes last! */ + struct radeon_program_transformation common_transformations[] = { + { &radeonTransformTEX, c }, + }; + radeonLocalTransform(&c->Base, 1, common_transformations); + + common_transformations[0].function = &radeonTransformALU; + radeonLocalTransform(&c->Base, 1, common_transformations); + + if (c->Base.Error) + return; + + debug_program_log(c, "after native rewrite part 2"); rc_dataflow_deadcode(&c->Base, &dataflow_outputs_mark_use, c); if (c->Base.Error) return; - if (c->Base.Debug) { - fprintf(stderr, "Fragment Program: After deadcode:\n"); - rc_print_program(&c->Base.Program); - fflush(stderr); - } + debug_program_log(c, "after deadcode"); rc_dataflow_swizzles(&c->Base); if (c->Base.Error) return; - if (c->Base.Debug) { - fprintf(stderr, "Compiler: after dataflow passes:\n"); - rc_print_program(&c->Base.Program); - fflush(stderr); - } + debug_program_log(c, "after dataflow passes"); rc_pair_translate(c); if (c->Base.Error) return; - if (c->Base.Debug) { - fprintf(stderr, "Compiler: after pair translate:\n"); - rc_print_program(&c->Base.Program); - fflush(stderr); - } + debug_program_log(c, "after pair translate"); rc_pair_schedule(c); if (c->Base.Error) return; - if (c->Base.Debug) { - fprintf(stderr, "Compiler: after pair scheduling:\n"); - rc_print_program(&c->Base.Program); - fflush(stderr); - } + debug_program_log(c, "after pair scheduling"); - if (c->is_r500) - rc_pair_regalloc(c, 128); - else - rc_pair_regalloc(c, R300_PFS_NUM_TEMP_REGS); + rc_pair_regalloc(c, c->max_temp_regs); if (c->Base.Error) return; - if (c->Base.Debug) { - fprintf(stderr, "Compiler: after pair register allocation:\n"); - rc_print_program(&c->Base.Program); - fflush(stderr); - } + debug_program_log(c, "after register allocation"); if (c->is_r500) { r500BuildFragmentProgramHwCode(c); diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 1b2cb8dde7d..4a0b6c02efe 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -29,7 +29,7 @@ #include "radeon_dataflow.h" #include "radeon_program_alu.h" #include "radeon_swizzle.h" - +#include "radeon_emulate_branches.h" /* * Take an already-setup and valid source then swizzle it appropriately to @@ -566,6 +566,14 @@ static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) return 1; } +static void debug_program_log(struct r300_vertex_program_compiler* c, const char * where) +{ + if (c->Base.Debug) { + fprintf(stderr, "Vertex Program: %s\n", where); + rc_print_program(&c->Base.Program); + } +} + static struct rc_swizzle_caps r300_vertprog_swizzle_caps = { .IsNative = &swizzle_is_native, @@ -579,6 +587,15 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) addArtificialOutputs(compiler); + debug_program_log(compiler, "before compilation"); + + /* XXX Ideally this should be done only for r3xx, but since + * we don't have branching support for r5xx, we use the emulation + * on all chipsets. */ + rc_emulate_branches(&compiler->Base); + + debug_program_log(compiler, "after emulate branches"); + { struct radeon_program_transformation transformations[] = { { &r300_transform_vertex_alu, 0 }, @@ -586,11 +603,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) radeonLocalTransform(&compiler->Base, 1, transformations); } - if (compiler->Base.Debug) { - fprintf(stderr, "Vertex program after native rewrite:\n"); - rc_print_program(&compiler->Base.Program); - fflush(stderr); - } + debug_program_log(compiler, "after native rewrite"); { /* Note: This pass has to be done seperately from ALU rewrite, @@ -603,29 +616,17 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) radeonLocalTransform(&compiler->Base, 1, transformations); } - if (compiler->Base.Debug) { - fprintf(stderr, "Vertex program after source conflict resolve:\n"); - rc_print_program(&compiler->Base.Program); - fflush(stderr); - } + debug_program_log(compiler, "after source conflict resolve"); rc_dataflow_deadcode(&compiler->Base, &dataflow_outputs_mark_used, compiler); - if (compiler->Base.Debug) { - fprintf(stderr, "Vertex program after deadcode:\n"); - rc_print_program(&compiler->Base.Program); - fflush(stderr); - } + debug_program_log(compiler, "after deadcode"); rc_dataflow_swizzles(&compiler->Base); allocate_temporary_registers(compiler); - if (compiler->Base.Debug) { - fprintf(stderr, "Vertex program after dataflow:\n"); - rc_print_program(&compiler->Base.Program); - fflush(stderr); - } + debug_program_log(compiler, "after dataflow"); translate_vertex_program(compiler); diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index b0fb8e970b7..632f0bcf4f8 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -31,144 +31,6 @@ #include "../r300_reg.h" -static struct rc_src_register shadow_ambient(struct radeon_compiler * c, int tmu) -{ - struct rc_src_register reg = { 0, }; - - reg.File = RC_FILE_CONSTANT; - reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu); - reg.Swizzle = RC_SWIZZLE_WWWW; - return reg; -} - -/** - * Transform TEX, TXP, TXB, and KIL instructions in the following way: - * - implement texture compare (shadow extensions) - * - extract non-native source / destination operands - */ -int r500_transform_TEX( - struct radeon_compiler * c, - struct rc_instruction * inst, - void* data) -{ - struct r300_fragment_program_compiler *compiler = - (struct r300_fragment_program_compiler*)data; - - if (inst->U.I.Opcode != RC_OPCODE_TEX && - inst->U.I.Opcode != RC_OPCODE_TXB && - inst->U.I.Opcode != RC_OPCODE_TXP && - inst->U.I.Opcode != RC_OPCODE_KIL) - return 0; - - /* ARB_shadow & EXT_shadow_funcs */ - if (inst->U.I.Opcode != RC_OPCODE_KIL && - c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) { - rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; - - if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { - inst->U.I.Opcode = RC_OPCODE_MOV; - - if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { - inst->U.I.SrcReg[0].File = RC_FILE_NONE; - inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; - } else { - inst->U.I.SrcReg[0] = shadow_ambient(c, inst->U.I.TexSrcUnit); - } - - return 1; - } else { - rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; - unsigned int depthmode = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_mode; - struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst); - struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp); - struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad); - int pass, fail; - - inst_rcp->U.I.Opcode = RC_OPCODE_RCP; - inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_rcp->U.I.DstReg.Index = rc_find_free_temporary(c); - inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; - inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; - - inst_cmp->U.I.DstReg = inst->U.I.DstReg; - inst->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst->U.I.DstReg.Index = rc_find_free_temporary(c); - inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - - inst_mad->U.I.Opcode = RC_OPCODE_MAD; - inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c); - inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ; - inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst_mad->U.I.SrcReg[1].Index = inst_rcp->U.I.DstReg.Index; - inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; - inst_mad->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; - inst_mad->U.I.SrcReg[2].Index = inst->U.I.DstReg.Index; - if (depthmode == 0) /* GL_LUMINANCE */ - inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z); - else if (depthmode == 2) /* GL_ALPHA */ - inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW; - - /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: - * r < tex <=> -tex+r < 0 - * r >= tex <=> not (-tex+r < 0 */ - if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL) - inst_mad->U.I.SrcReg[2].Negate = inst_mad->U.I.SrcReg[2].Negate ^ RC_MASK_XYZW; - else - inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; - - inst_cmp->U.I.Opcode = RC_OPCODE_CMP; - /* DstReg has been filled out above */ - inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_cmp->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index; - - if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) { - pass = 1; - fail = 2; - } else { - pass = 2; - fail = 1; - } - - inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE; - inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; - inst_cmp->U.I.SrcReg[fail] = shadow_ambient(c, inst->U.I.TexSrcUnit); - } - } - - /* Cannot write texture to output registers */ - if (inst->U.I.Opcode != RC_OPCODE_KIL && inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { - struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); - - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg = inst->U.I.DstReg; - inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); - - inst->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; - inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - } - - /* Cannot read texture coordinate from constants file */ - if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { - struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); - inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - - reset_srcreg(&inst->U.I.SrcReg[0]); - inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; - } - - return 1; -} - /** * Rewrite IF instructions to use the ALU result special register. */ @@ -433,19 +295,20 @@ void r500FragmentProgramDump(struct rX00_fragment_program_code *c) (inst >> 30)); fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3); inst = code->inst[n].inst3; - fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n", + fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n", (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), (inst >> 11) & 0x3, (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), - (inst >> 24) & 0x3); + (inst >> 24) & 0x3, (inst >> 29) & 0x3); fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4); inst = code->inst[n].inst4; - fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf), + fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf), (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3, + (inst >> 29) & 0x3, (inst >> 31) & 0x1); fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5); diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h index 0918cdf518b..4efbae7ba67 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h @@ -42,11 +42,6 @@ extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c); extern struct rc_swizzle_caps r500_swizzle_caps; -extern int r500_transform_TEX( - struct radeon_compiler * c, - struct rc_instruction * inst, - void* data); - extern int r500_transform_IF( struct radeon_compiler * c, struct rc_instruction * inst, diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index 710cae727a1..10c5e2349e9 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -190,6 +190,17 @@ static unsigned int use_source(struct r500_fragment_program_code* code, struct r return 0; } +/** + * NOP the specified instruction if it is not a texture lookup. + */ +static void alu_nop(struct r300_fragment_program_compiler *c, int ip) +{ + PROG_CODE; + + if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) { + code->inst[ip].inst0 |= R500_INST_NOP; + } +} /** * Emit a paired ALU instruction. @@ -205,6 +216,14 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair int ip = ++code->inst_end; + /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */ + if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX || + inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) { + if (ip > 0) { + alu_nop(c, ip - 1); + } + } + code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode); code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode); @@ -252,8 +271,8 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; - code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target); - code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target); + code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target); + code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target); if (inst->WriteALUResult) { code->inst[ip].inst3 |= R500_ALU_RGB_WMASK; @@ -329,21 +348,6 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst return 1; } -static void grow_branches(struct emit_state * s) -{ - unsigned int newreserved = s->BranchesReserved * 2; - struct branch_info * newbranches; - - if (!newreserved) - newreserved = 4; - - newbranches = memory_pool_malloc(&s->C->Pool, newreserved*sizeof(struct branch_info)); - memcpy(newbranches, s->Branches, s->CurrentBranchDepth*sizeof(struct branch_info)); - - s->Branches = newbranches; - s->BranchesReserved = newreserved; -} - static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst) { if (s->Code->inst_end >= 511) { @@ -361,8 +365,8 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst return; } - if (s->CurrentBranchDepth >= s->BranchesReserved) - grow_branches(s); + memory_pool_array_reserve(&s->C->Pool, struct branch_info, + s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1); struct branch_info * branch = &s->Branches[s->CurrentBranchDepth++]; branch->If = newip; @@ -469,9 +473,8 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi if (compiler->Base.Error) return; - assert(code->inst_end >= 0); - - if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { + if (code->inst_end == -1 || + (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { /* This may happen when dead-code elimination is disabled or * when most of the fragment program logic is leading to a KIL */ if (code->inst_end >= 511) { diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index 6d979bbaecf..27274f07122 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -108,6 +108,18 @@ typedef enum { } rc_compare_func; /** + * Coordinate wrapping modes. + * + * These are not quite the same as their GL counterparts yet. + */ +typedef enum { + RC_WRAP_NONE = 0, + RC_WRAP_REPEAT, + RC_WRAP_MIRRORED_REPEAT, + RC_WRAP_MIRRORED_CLAMP +} rc_wrap_mode; + +/** * Stores state that influences the compilation of a fragment program. */ struct r300_fragment_program_external_state { @@ -127,11 +139,28 @@ struct r300_fragment_program_external_state { * this field specifies the compare function. * * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0). - * - * Otherwise, this field is 0. * \sa rc_compare_func */ unsigned texture_compare_func : 3; + + /** + * If the sampler needs to fake NPOT, this field is set. + */ + unsigned fake_npot : 1; + + /** + * If the sampler will recieve non-normalized coords, + * this field is set. + */ + unsigned non_normalized_coords : 1; + + /** + * This field specifies wrapping modes for the sampler. + * + * If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths + * will be performed on the coordinates. + */ + unsigned wrap_mode : 2; } unit[16]; }; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c index 272f9072d4a..1c8ba864a41 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -277,13 +277,13 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_mad->U.I.SrcReg[0].Index = tempregi; - inst_mad->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; - inst_mad->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); + inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT; - inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0; if (full_vtransform) { inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index 6bfda0574f6..09794a52ad8 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -81,8 +81,12 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig struct r300_fragment_program_compiler { struct radeon_compiler Base; struct rX00_fragment_program_code *code; + /* Optional transformations and features. */ struct r300_fragment_program_external_state state; + unsigned enable_shadow_ambient; + /* Hardware specification. */ unsigned is_r500; + unsigned max_temp_regs; /* Register corresponding to the depthbuffer. */ unsigned OutputDepth; /* Registers corresponding to the four colorbuffers. */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index cce9166e644..16e2f3a2181 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -160,3 +160,92 @@ void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * writes_pair(inst, cb, userdata); } } + + +static void remap_normal_instruction(struct rc_instruction * fullinst, + rc_remap_register_fn cb, void * userdata) +{ + struct rc_sub_instruction * inst = &fullinst->U.I; + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + + if (opcode->HasDstReg) { + rc_register_file file = inst->DstReg.File; + unsigned int index = inst->DstReg.Index; + + cb(userdata, fullinst, &file, &index); + + inst->DstReg.File = file; + inst->DstReg.Index = index; + } + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + rc_register_file file = inst->SrcReg[src].File; + unsigned int index = inst->SrcReg[src].Index; + + cb(userdata, fullinst, &file, &index); + + inst->SrcReg[src].File = file; + inst->SrcReg[src].Index = index; + } +} + +static void remap_pair_instruction(struct rc_instruction * fullinst, + rc_remap_register_fn cb, void * userdata) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + + if (inst->RGB.WriteMask) { + rc_register_file file = RC_FILE_TEMPORARY; + unsigned int index = inst->RGB.DestIndex; + + cb(userdata, fullinst, &file, &index); + + inst->RGB.DestIndex = index; + } + + if (inst->Alpha.WriteMask) { + rc_register_file file = RC_FILE_TEMPORARY; + unsigned int index = inst->Alpha.DestIndex; + + cb(userdata, fullinst, &file, &index); + + inst->Alpha.DestIndex = index; + } + + for(unsigned int src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used) { + rc_register_file file = inst->RGB.Src[src].File; + unsigned int index = inst->RGB.Src[src].Index; + + cb(userdata, fullinst, &file, &index); + + inst->RGB.Src[src].File = file; + inst->RGB.Src[src].Index = index; + } + + if (inst->Alpha.Src[src].Used) { + rc_register_file file = inst->Alpha.Src[src].File; + unsigned int index = inst->Alpha.Src[src].Index; + + cb(userdata, fullinst, &file, &index); + + inst->Alpha.Src[src].File = file; + inst->Alpha.Src[src].Index = index; + } + } +} + + +/** + * Remap all register accesses according to the given function. + * That is, call the function \p cb for each referenced register (both read and written) + * and update the given instruction \p inst accordingly + * if it modifies its \ref pfile and \ref pindex contents. + */ +void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) + remap_normal_instruction(inst, cb, userdata); + else + remap_pair_instruction(inst, cb, userdata); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h index 5aa4cb64f3d..62cda20eea6 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h @@ -36,13 +36,17 @@ struct rc_swizzle_caps; /** - * Help analyze the register accesses of instructions. + * Help analyze and modify the register accesses of instructions. */ /*@{*/ typedef void (*rc_read_write_fn)(void * userdata, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int chan); void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata); void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata); + +typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex); +void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata); /*@}*/ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c index e0c66c4aeb0..e3c2c83c0cf 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c @@ -80,19 +80,8 @@ static void or_updatemasks( static void push_branch(struct deadcode_state * s) { - if (s->BranchStackSize >= s->BranchStackReserved) { - unsigned int new_reserve = 2 * s->BranchStackReserved; - struct branchinfo * new_stack; - - if (!new_reserve) - new_reserve = 4; - - new_stack = memory_pool_malloc(&s->C->Pool, new_reserve * sizeof(struct branchinfo)); - memcpy(new_stack, s->BranchStack, s->BranchStackSize * sizeof(struct branchinfo)); - - s->BranchStack = new_stack; - s->BranchStackReserved = new_reserve; - } + memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack, + s->BranchStackSize, s->BranchStackReserved, 1); struct branchinfo * branch = &s->BranchStack[s->BranchStackSize++]; branch->HaveElse = 0; @@ -162,7 +151,7 @@ static void update_instruction(struct deadcode_state * s, struct rc_instruction } unsigned int srcmasks[3]; - rc_compute_sources_for_writemask(opcode, usedmask, srcmasks); + rc_compute_sources_for_writemask(inst, usedmask, srcmasks); for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { unsigned int refmask = 0; @@ -250,7 +239,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f for(struct rc_instruction * inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next, ++ip) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);\ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); int dead = 1; if (!opcode->HasDstReg) { @@ -281,7 +270,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) usemask |= RC_MASK_W; - rc_compute_sources_for_writemask(opcode, usemask, srcmasks); + rc_compute_sources_for_writemask(inst, usemask, srcmasks); for(unsigned int src = 0; src < 3; ++src) { for(unsigned int chan = 0; chan < 4; ++chan) { diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c new file mode 100644 index 00000000000..d889612f4f4 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c @@ -0,0 +1,331 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_emulate_branches.h" + +#include <stdio.h> + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + + +struct proxy_info { + unsigned int Proxied:1; + unsigned int Index:RC_REGISTER_INDEX_BITS; +}; + +struct register_proxies { + struct proxy_info Temporary[RC_REGISTER_MAX_INDEX]; +}; + +struct branch_info { + struct rc_instruction * If; + struct rc_instruction * Else; +}; + +struct emulate_branch_state { + struct radeon_compiler * C; + + struct branch_info * Branches; + unsigned int BranchCount; + unsigned int BranchReserved; +}; + + +static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + memory_pool_array_reserve(&s->C->Pool, struct branch_info, + s->Branches, s->BranchCount, s->BranchReserved, 1); + + DBG("%s\n", __FUNCTION__); + + struct branch_info * branch = &s->Branches[s->BranchCount++]; + memset(branch, 0, sizeof(struct branch_info)); + branch->If = inst; + + /* Make a safety copy of the decision register, because we will need + * it at ENDIF time and it might be overwritten in both branches. */ + struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C); + inst_mov->U.I.DstReg.WriteMask = RC_MASK_X; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; + inst->U.I.SrcReg[0].Swizzle = 0; + inst->U.I.SrcReg[0].Abs = 0; + inst->U.I.SrcReg[0].Negate = 0; +} + +static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + if (!s->BranchCount) { + rc_error(s->C, "Encountered ELSE outside of branches"); + return; + } + + DBG("%s\n", __FUNCTION__); + + struct branch_info * branch = &s->Branches[s->BranchCount - 1]; + branch->Else = inst; +} + + +struct state_and_proxies { + struct emulate_branch_state * S; + struct register_proxies * Proxies; +}; + +static struct proxy_info * get_proxy_info(struct state_and_proxies * sap, + rc_register_file file, unsigned int index) +{ + if (file == RC_FILE_TEMPORARY) { + return &sap->Proxies->Temporary[index]; + } else { + return 0; + } +} + +static void scan_write(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int comp) +{ + struct state_and_proxies * sap = userdata; + struct proxy_info * proxy = get_proxy_info(sap, file, index); + + if (proxy && !proxy->Proxied) { + proxy->Proxied = 1; + proxy->Index = rc_find_free_temporary(sap->S->C); + } +} + +static void remap_proxy_function(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex) +{ + struct state_and_proxies * sap = userdata; + struct proxy_info * proxy = get_proxy_info(sap, *pfile, *pindex); + + if (proxy && proxy->Proxied) { + *pfile = RC_FILE_TEMPORARY; + *pindex = proxy->Index; + } +} + +/** + * Redirect all writes in the instruction range [begin, end) to proxy + * temporary registers. + */ +static void allocate_and_insert_proxies(struct emulate_branch_state * s, + struct register_proxies * proxies, + struct rc_instruction * begin, + struct rc_instruction * end) +{ + struct state_and_proxies sap; + + sap.S = s; + sap.Proxies = proxies; + + for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { + rc_for_all_writes(inst, scan_write, &sap); + rc_remap_registers(inst, remap_proxy_function, &sap); + } + + for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { + if (proxies->Temporary[index].Proxied) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, begin->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = proxies->Temporary[index].Index; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = index; + } + } +} + + +static void inject_cmp(struct emulate_branch_state * s, + struct rc_instruction * inst_if, + struct rc_instruction * inst_endif, + rc_register_file file, unsigned int index, + struct proxy_info ifproxy, + struct proxy_info elseproxy) +{ + struct rc_instruction * inst_cmp = rc_insert_new_instruction(s->C, inst_endif); + inst_cmp->U.I.Opcode = RC_OPCODE_CMP; + inst_cmp->U.I.DstReg.File = file; + inst_cmp->U.I.DstReg.Index = index; + inst_cmp->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst_cmp->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0]; + inst_cmp->U.I.SrcReg[0].Abs = 1; + inst_cmp->U.I.SrcReg[0].Negate = RC_MASK_XYZW; + inst_cmp->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[1].Index = ifproxy.Proxied ? ifproxy.Index : index; + inst_cmp->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[2].Index = elseproxy.Proxied ? elseproxy.Index : index; +} + +static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + if (!s->BranchCount) { + rc_error(s->C, "Encountered ENDIF outside of branches"); + return; + } + + DBG("%s\n", __FUNCTION__); + + struct branch_info * branch = &s->Branches[s->BranchCount - 1]; + struct register_proxies IfProxies; + struct register_proxies ElseProxies; + + memset(&IfProxies, 0, sizeof(IfProxies)); + memset(&ElseProxies, 0, sizeof(ElseProxies)); + + allocate_and_insert_proxies(s, &IfProxies, branch->If->Next, branch->Else ? branch->Else : inst); + + if (branch->Else) + allocate_and_insert_proxies(s, &ElseProxies, branch->Else->Next, inst); + + /* Insert the CMP instructions at the end. */ + for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { + if (IfProxies.Temporary[index].Proxied || ElseProxies.Temporary[index].Proxied) { + inject_cmp(s, branch->If, inst, RC_FILE_TEMPORARY, index, + IfProxies.Temporary[index], ElseProxies.Temporary[index]); + } + } + + /* Remove all traces of the branch instructions */ + rc_remove_instruction(branch->If); + if (branch->Else) + rc_remove_instruction(branch->Else); + rc_remove_instruction(inst); + + s->BranchCount--; + + if (VERBOSE) { + DBG("Program after ENDIF handling:\n"); + rc_print_program(&s->C->Program); + } +} + + +struct remap_output_data { + unsigned int Output:RC_REGISTER_INDEX_BITS; + unsigned int Temporary:RC_REGISTER_INDEX_BITS; +}; + +static void remap_output_function(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex) +{ + struct remap_output_data * data = userdata; + + if (*pfile == RC_FILE_OUTPUT && *pindex == data->Output) { + *pfile = RC_FILE_TEMPORARY; + *pindex = data->Temporary; + } +} + + +/** + * Output registers cannot be read from and so cannot be dealt with like + * temporary registers. + * + * We do the simplest thing: If an output registers is written within + * a branch, then *all* writes to this register are proxied to a + * temporary register, and a final MOV is appended to the end of + * the program. + */ +static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + if (!s->BranchCount) + return; + + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (!opcode->HasDstReg) + return; + + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) { + struct remap_output_data remap; + + remap.Output = inst->U.I.DstReg.Index; + remap.Temporary = rc_find_free_temporary(s->C); + + for(struct rc_instruction * inst = s->C->Program.Instructions.Next; + inst != &s->C->Program.Instructions; + inst = inst->Next) { + rc_remap_registers(inst, &remap_output_function, &remap); + } + + struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT; + inst_mov->U.I.DstReg.Index = remap.Output; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = remap.Temporary; + } +} + +/** + * Remove branch instructions; instead, execute both branches + * on different register sets and choose between their results + * using CMP instructions in place of the original ENDIF. + */ +void rc_emulate_branches(struct radeon_compiler * c) +{ + struct emulate_branch_state s; + + memset(&s, 0, sizeof(s)); + s.C = c; + + /* Untypical loop because we may remove the current instruction */ + struct rc_instruction * ptr = c->Program.Instructions.Next; + while(ptr != &c->Program.Instructions) { + struct rc_instruction * inst = ptr; + ptr = ptr->Next; + + if (inst->Type == RC_INSTRUCTION_NORMAL) { + switch(inst->U.I.Opcode) { + case RC_OPCODE_IF: + handle_if(&s, inst); + break; + case RC_OPCODE_ELSE: + handle_else(&s, inst); + break; + case RC_OPCODE_ENDIF: + handle_endif(&s, inst); + break; + default: + fix_output_writes(&s, inst); + break; + } + } else { + rc_error(c, "%s: unhandled instruction type\n", __FUNCTION__); + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h new file mode 100644 index 00000000000..e07279f0933 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h @@ -0,0 +1,30 @@ +/* + * Copyright 2009 Nicolai Hähnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_EMULATE_BRANCHES_H +#define RADEON_EMULATE_BRANCHES_H + +struct radeon_compiler; + +void rc_emulate_branches(struct radeon_compiler * c); + +#endif /* RADEON_EMULATE_BRANCHES_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index c1c0181fac1..d593b3e81ae 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -26,6 +26,7 @@ */ #include "radeon_opcodes.h" +#include "radeon_program.h" #include "radeon_program_constants.h" @@ -59,6 +60,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .HasDstReg = 1 }, { + .Opcode = RC_OPCODE_CEIL, + .Name = "CEIL", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { .Opcode = RC_OPCODE_CMP, .Name = "CMP", .NumSrcRegs = 3, @@ -75,14 +83,14 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { { .Opcode = RC_OPCODE_DDX, .Name = "DDX", - .NumSrcRegs = 1, + .NumSrcRegs = 2, .HasDstReg = 1, .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_DDY, .Name = "DDY", - .NumSrcRegs = 1, + .NumSrcRegs = 2, .HasDstReg = 1, .IsComponentwise = 1 }, @@ -371,10 +379,11 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { }; void rc_compute_sources_for_writemask( - const struct rc_opcode_info * opcode, + const struct rc_instruction *inst, unsigned int writemask, unsigned int *srcmasks) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); srcmasks[0] = 0; srcmasks[1] = 0; srcmasks[2] = 0; @@ -406,21 +415,37 @@ void rc_compute_sources_for_writemask( srcmasks[0] |= RC_MASK_XYZW; srcmasks[1] |= RC_MASK_XYZW; break; - case RC_OPCODE_TEX: case RC_OPCODE_TXB: case RC_OPCODE_TXP: - srcmasks[0] |= RC_MASK_XYZW; + srcmasks[0] |= RC_MASK_W; + /* Fall through */ + case RC_OPCODE_TEX: + switch (inst->U.I.TexSrcTarget) { + case RC_TEXTURE_1D: + srcmasks[0] |= RC_MASK_X; + break; + case RC_TEXTURE_2D: + case RC_TEXTURE_RECT: + case RC_TEXTURE_1D_ARRAY: + srcmasks[0] |= RC_MASK_XY; + break; + case RC_TEXTURE_3D: + case RC_TEXTURE_CUBE: + case RC_TEXTURE_2D_ARRAY: + srcmasks[0] |= RC_MASK_XYZ; + break; + } break; case RC_OPCODE_DST: - srcmasks[0] |= 0x6; - srcmasks[1] |= 0xa; + srcmasks[0] |= RC_MASK_Y | RC_MASK_Z; + srcmasks[1] |= RC_MASK_Y | RC_MASK_W; break; case RC_OPCODE_EXP: case RC_OPCODE_LOG: srcmasks[0] |= RC_MASK_XY; break; case RC_OPCODE_LIT: - srcmasks[0] |= 0xb; + srcmasks[0] |= RC_MASK_X | RC_MASK_Y | RC_MASK_W; break; default: break; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h index a3c5b869546..87a2e23084c 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -47,6 +47,9 @@ typedef enum { * dst.x = floor(src.x), where dst must be an address register */ RC_OPCODE_ARL, + /** vec4 instruction: dst.c = ceil(src0.c) */ + RC_OPCODE_CEIL, + /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */ RC_OPCODE_CMP, @@ -227,8 +230,10 @@ static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode) return &rc_opcodes[opcode]; } +struct rc_instruction; + void rc_compute_sources_for_writemask( - const struct rc_opcode_info * opcode, + const struct rc_instruction *inst, unsigned int writemask, unsigned int *srcmasks); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c index b2fe7f76b2f..fdfee867014 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c @@ -196,9 +196,10 @@ static void compute_live_intervals(struct regalloc_state * s) } } -static void rewrite_register(struct regalloc_state * s, +static void remap_register(void * data, struct rc_instruction * inst, rc_register_file * file, unsigned int * index) { + struct regalloc_state * s = data; const struct register_info * reg; if (*file == RC_FILE_TEMPORARY) @@ -214,74 +215,6 @@ static void rewrite_register(struct regalloc_state * s, } } -static void rewrite_normal_instruction(struct regalloc_state * s, struct rc_sub_instruction * inst) -{ - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); - - if (opcode->HasDstReg) { - rc_register_file file = inst->DstReg.File; - unsigned int index = inst->DstReg.Index; - - rewrite_register(s, &file, &index); - - inst->DstReg.File = file; - inst->DstReg.Index = index; - } - - for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { - rc_register_file file = inst->SrcReg[src].File; - unsigned int index = inst->SrcReg[src].Index; - - rewrite_register(s, &file, &index); - - inst->SrcReg[src].File = file; - inst->SrcReg[src].Index = index; - } -} - -static void rewrite_pair_instruction(struct regalloc_state * s, struct rc_pair_instruction * inst) -{ - if (inst->RGB.WriteMask) { - rc_register_file file = RC_FILE_TEMPORARY; - unsigned int index = inst->RGB.DestIndex; - - rewrite_register(s, &file, &index); - - inst->RGB.DestIndex = index; - } - - if (inst->Alpha.WriteMask) { - rc_register_file file = RC_FILE_TEMPORARY; - unsigned int index = inst->Alpha.DestIndex; - - rewrite_register(s, &file, &index); - - inst->Alpha.DestIndex = index; - } - - for(unsigned int src = 0; src < 3; ++src) { - if (inst->RGB.Src[src].Used) { - rc_register_file file = inst->RGB.Src[src].File; - unsigned int index = inst->RGB.Src[src].Index; - - rewrite_register(s, &file, &index); - - inst->RGB.Src[src].File = file; - inst->RGB.Src[src].Index = index; - } - - if (inst->Alpha.Src[src].Used) { - rc_register_file file = inst->Alpha.Src[src].File; - unsigned int index = inst->Alpha.Src[src].Index; - - rewrite_register(s, &file, &index); - - inst->Alpha.Src[src].File = file; - inst->Alpha.Src[src].Index = index; - } - } -} - static void do_regalloc(struct regalloc_state * s) { /* Simple and stupid greedy register allocation */ @@ -310,10 +243,7 @@ static void do_regalloc(struct regalloc_state * s) for(struct rc_instruction * inst = s->C->Program.Instructions.Next; inst != &s->C->Program.Instructions; inst = inst->Next) { - if (inst->Type == RC_INSTRUCTION_NORMAL) - rewrite_normal_instruction(s, &inst->U.I); - else - rewrite_pair_instruction(s, &inst->U.P); + rc_remap_registers(inst, &remap_register, s); } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c index fff5b0c2173..407a0a55ee2 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c @@ -156,14 +156,8 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, } const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); - int nargs = opcode->NumSrcRegs; int i; - /* Special case for DDX/DDY (MDH/MDV). */ - if (inst->Opcode == RC_OPCODE_DDX || inst->Opcode == RC_OPCODE_DDY) { - nargs++; - } - for(i = 0; i < opcode->NumSrcRegs; ++i) { int source; if (needrgb && !istranscendent) { diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index b5c08aea49e..05b874ba7cf 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -175,6 +175,26 @@ static void transform_ABS(struct radeon_compiler* c, rc_remove_instruction(inst); } +static void transform_CEIL(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* Assuming: + * ceil(x) = -floor(-x) + * + * After inlining floor: + * ceil(x) = -(-x-frac(-x)) + * + * After simplification: + * ceil(x) = x+frac(-x) + */ + + int tempreg = rc_find_free_temporary(c); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0])); + emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg)); + rc_remove_instruction(inst); +} + static void transform_DP3(struct radeon_compiler* c, struct rc_instruction* inst) { @@ -458,7 +478,7 @@ static void transform_XPD(struct radeon_compiler* c, * no userData necessary. * * Eliminates the following ALU instructions: - * ABS, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD + * ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD * using: * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP * @@ -474,6 +494,7 @@ int radeonTransformALU( { switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; + case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; case RC_OPCODE_DST: transform_DST(c, inst); return 1; case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; @@ -506,6 +527,35 @@ static void transform_r300_vertex_ABS(struct radeon_compiler* c, inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; } +static void transform_r300_vertex_CMP(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* There is no decent CMP available, so let's rig one up. + * CMP is defined as dst = src0 < 0.0 ? src1 : src2 + * The following sequence consumes two temps and two extra slots + * (the second temp and the second slot is consumed by transform_LRP), + * but should be equivalent: + * + * SLT tmp0, src0, 0.0 + * LRP dst, tmp0, src1, src2 + * + * Yes, I know, I'm a mad scientist. ~ C. & M. */ + int tempreg0 = rc_find_free_temporary(c); + + /* SLT tmp0, src0, 0.0 */ + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dstreg(RC_FILE_TEMPORARY, tempreg0), + inst->U.I.SrcReg[0], builtin_zero); + + /* LRP dst, tmp0, src1, src2 */ + transform_LRP(c, + emit3(c, inst->Prev, RC_OPCODE_LRP, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); + + rc_remove_instruction(inst); +} + /** * For use with radeonLocalTransform, this transforms non-native ALU * instructions of the r300 up to r500 vertex engine. @@ -517,6 +567,8 @@ int r300_transform_vertex_alu( { switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; + case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; case RC_OPCODE_DP3: transform_DP3(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h index 7c0d6720b11..842012def02 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h @@ -114,12 +114,14 @@ typedef enum { } while(0) #define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W) +#define RC_SWIZZLE_XYZ0 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO) #define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X) #define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y) #define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z) #define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W) #define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO) #define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE) +#define RC_SWIZZLE_HHHH RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF) /** * \name Bitmasks for components of vectors. diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c new file mode 100644 index 00000000000..b4ba0b3f870 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c @@ -0,0 +1,360 @@ +/* + * Copyright (C) 2010 Corbin Simpson + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_tex.h" + +/* Series of transformations to be done on textures. */ + +static struct rc_src_register shadow_ambient(struct r300_fragment_program_compiler *compiler, + int tmu) +{ + struct rc_src_register reg = { 0, }; + + if (compiler->enable_shadow_ambient) { + reg.File = RC_FILE_CONSTANT; + reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants, + RC_STATE_SHADOW_AMBIENT, tmu); + reg.Swizzle = RC_SWIZZLE_WWWW; + } else { + reg.File = RC_FILE_NONE; + reg.Swizzle = RC_SWIZZLE_0000; + } + return reg; +} + +static void lower_texture_rect(struct r300_fragment_program_compiler *compiler, + struct rc_instruction *inst) +{ + struct rc_instruction *inst_rect; + unsigned temp = rc_find_free_temporary(&compiler->Base); + + if (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT || + compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords) { + inst_rect = rc_insert_new_instruction(&compiler->Base, inst->Prev); + + inst_rect->U.I.Opcode = RC_OPCODE_MUL; + inst_rect->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rect->U.I.DstReg.Index = temp; + inst_rect->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_rect->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_rect->U.I.SrcReg[1].Index = + rc_constants_add_state(&compiler->Base.Program.Constants, + RC_STATE_R300_TEXRECT_FACTOR, inst->U.I.TexSrcUnit); + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; + + inst->U.I.TexSrcTarget = RC_TEXTURE_2D; + } +} + +/** + * Transform TEX, TXP, TXB, and KIL instructions in the following ways: + * - implement texture compare (shadow extensions) + * - extract non-native source / destination operands + * - premultiply texture coordinates for RECT + * - extract operand swizzles + * - introduce a temporary register when write masks are needed + */ +int radeonTransformTEX( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data) +{ + struct r300_fragment_program_compiler *compiler = + (struct r300_fragment_program_compiler*)data; + + if (inst->U.I.Opcode != RC_OPCODE_TEX && + inst->U.I.Opcode != RC_OPCODE_TXB && + inst->U.I.Opcode != RC_OPCODE_TXP && + inst->U.I.Opcode != RC_OPCODE_KIL) + return 0; + + /* ARB_shadow & EXT_shadow_funcs */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) { + rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; + + if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.Opcode = RC_OPCODE_MOV; + + if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.SrcReg[0].File = RC_FILE_NONE; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; + } else { + inst->U.I.SrcReg[0] = shadow_ambient(compiler, inst->U.I.TexSrcUnit); + } + + return 1; + } else { + rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; + unsigned int depthmode = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_mode; + struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst); + struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp); + struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad); + int pass, fail; + + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + + inst_cmp->U.I.DstReg = inst->U.I.DstReg; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = rc_find_free_temporary(c); + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ; + inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[1].Index = inst_rcp->U.I.DstReg.Index; + inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + inst_mad->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[2].Index = inst->U.I.DstReg.Index; + if (depthmode == 0) /* GL_LUMINANCE */ + inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z); + else if (depthmode == 2) /* GL_ALPHA */ + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW; + + /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: + * r < tex <=> -tex+r < 0 + * r >= tex <=> not (-tex+r < 0 */ + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL) + inst_mad->U.I.SrcReg[2].Negate = inst_mad->U.I.SrcReg[2].Negate ^ RC_MASK_XYZW; + else + inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; + + inst_cmp->U.I.Opcode = RC_OPCODE_CMP; + /* DstReg has been filled out above */ + inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index; + + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) { + pass = 1; + fail = 2; + } else { + pass = 2; + fail = 1; + } + + inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE; + inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; + inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit); + } + } + + /* Texture wrap modes don't work on NPOT textures or texrects. + * + * The game plan is simple. We have two flags, fake_npot and + * non_normalized_coords, as well as a tex target. The RECT tex target + * will make the emitted code use non-scaled texcoords. + * + * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and + * mirroring are not. If we need to repeat, we do: + * + * MUL temp, texcoord, <scaling factor constant> + * FRC temp, temp ; Discard integer portion of coords + * + * This gives us coords in [0, 1]. + * + * Mirroring is trickier. We're going to start out like repeat: + * + * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes + * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2] + * ; so scale to [0, 1] + * FRC temp, temp ; Make the pattern repeat + * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1] + * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern. + * ; The pattern is backwards, so reverse it (1-x). + * + * This gives us coords in [0, 1]. + * + * ~ C & M. ;) + */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT || + compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot || + compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords)) { + rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode; + + /* R300 cannot sample from rectangles. */ + if (!compiler->is_r500) { + lower_texture_rect(compiler, inst); + } + + if (compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot && + wrapmode != RC_WRAP_NONE) { + struct rc_instruction *inst_mov; + unsigned temp = rc_find_free_temporary(c); + + /* For NPOT fallback, we need normalized coordinates anyway. */ + if (compiler->is_r500) { + lower_texture_rect(compiler, inst); + } + + if (wrapmode == RC_WRAP_REPEAT) { + /* Both instructions will be paired up. */ + struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev); + + inst_frc->U.I.Opcode = RC_OPCODE_FRC; + inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_frc->U.I.DstReg.Index = temp; + inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) { + /* + * Function: + * f(v) = 1 - abs(frac(v * 0.5) * 2 - 1) + * + * Code: + * MUL temp, src0, 0.5 + * FRC temp, temp + * MAD temp, temp, 2, -1 + * ADD temp, 1, -abs(temp) + */ + + struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add; + unsigned two, two_swizzle; + + inst_mul = rc_insert_new_instruction(c, inst->Prev); + + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = temp; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH; + + inst_frc = rc_insert_new_instruction(c, inst->Prev); + + inst_frc->U.I.Opcode = RC_OPCODE_FRC; + inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_frc->U.I.DstReg.Index = temp; + inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_frc->U.I.SrcReg[0].Index = temp; + inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; + + two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle); + inst_mad = rc_insert_new_instruction(c, inst->Prev); + + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = temp; + inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[0].Index = temp; + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; + inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[1].Index = two; + inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle; + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111; + inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ; + + inst_add = rc_insert_new_instruction(c, inst->Prev); + + inst_add->U.I.Opcode = RC_OPCODE_ADD; + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = temp; + inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; + inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_add->U.I.SrcReg[1].Index = temp; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; + inst_add->U.I.SrcReg[1].Abs = 1; + inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ; + } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) { + /* + * Mirrored clamp modes are bloody simple, we just use abs + * to mirror [0, 1] into [-1, 0]. This works for + * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER. + */ + struct rc_instruction *inst_mov; + + inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mov->U.I.SrcReg[0].Abs = 1; + } + + /* Preserve W for TXP/TXB. */ + inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; + } + } + + /* Cannot write texture to output registers (all chips) or with masks (non-r500) */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || + (!compiler->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg = inst->U.I.DstReg; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); + + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + } + + /* Cannot read texture coordinate from constants file */ + if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; + } + + return 1; +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h new file mode 100644 index 00000000000..a0105051ac4 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2010 Corbin Simpson + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_TEX_H_ +#define __RADEON_PROGRAM_TEX_H_ + +#include "radeon_compiler.h" +#include "radeon_program.h" + +int radeonTransformTEX( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data); + +#endif /* __RADEON_PROGRAM_TEX_H_ */ diff --git a/src/mesa/drivers/dri/r300/r300_blit.c b/src/mesa/drivers/dri/r300/r300_blit.c index 2bc761bc208..0865a456443 100644 --- a/src/mesa/drivers/dri/r300/r300_blit.c +++ b/src/mesa/drivers/dri/r300/r300_blit.c @@ -117,7 +117,9 @@ static void create_fragment_program(struct r300_context *r300) compiler.Base.Program.InputsRead = (1 << FRAG_ATTRIB_TEX0); compiler.OutputColor[0] = FRAG_RESULT_COLOR; compiler.OutputDepth = FRAG_RESULT_DEPTH; + compiler.enable_shadow_ambient = GL_TRUE; compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515); + compiler.max_temp_regs = (compiler.is_r500) ? 128 : 32; compiler.code = &r300->blit.fp_code; compiler.AllocateHwInputs = fp_allocate_hw_inputs; @@ -381,19 +383,16 @@ static GLboolean validate_buffers(struct r300_context *r300, struct radeon_bo *dst_bo) { int ret; - ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, - src_bo, RADEON_GEM_DOMAIN_VRAM, 0); - if (ret) - return GL_FALSE; + + radeon_cs_space_reset_bos(r300->radeon.cmdbuf.cs); ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, - dst_bo, 0, RADEON_GEM_DOMAIN_VRAM); + src_bo, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); if (ret) return GL_FALSE; ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, - first_elem(&r300->radeon.dma.reserved)->bo, - RADEON_GEM_DOMAIN_GTT, 0); + dst_bo, 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT); if (ret) return GL_FALSE; @@ -585,12 +584,6 @@ unsigned r300_blit(GLcontext *ctx, if (dst_pitch % 2 > 0) ++dst_pitch; - /* Rendering to small buffer doesn't work. - * Looks like a hw limitation. - */ - if (dst_pitch < 32) - return 0; - /* Need to clamp the region size to make sure * we don't read outside of the source buffer * or write outside of the destination buffer. diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 4787bafc66a..c40802aec6e 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -77,12 +77,29 @@ static int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom) cnt = vpu_count(atom->cmd); if (r300->radeon.radeonScreen->kernel_mm) { - extra = 5; + extra = 3; } return cnt ? (cnt * 4) + extra : 0; } +static int check_vpp(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int cnt; + int extra = 1; + + if (r300->radeon.radeonScreen->kernel_mm) { + cnt = r300->selected_vp->code.constants.Count * 4; + extra = 3; + } else { + cnt = vpu_count(atom->cmd); + extra = 1; + } + + return cnt ? (cnt * 4) + extra : 0; +} + void r300_emit_vpu(struct r300_context *r300, uint32_t *data, unsigned len, @@ -90,8 +107,7 @@ void r300_emit_vpu(struct r300_context *r300, { BATCH_LOCALS(&r300->radeon); - BEGIN_BATCH_NO_AUTOSTATE(5 + len); - OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); + BEGIN_BATCH_NO_AUTOSTATE(3 + len); OUT_BATCH_REGVAL(R300_VAP_PVS_VECTOR_INDX_REG, addr); OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, len-1) | RADEON_ONE_REG_WR); OUT_BATCH_TABLE(data, len); @@ -102,15 +118,26 @@ static void emit_vpu_state(GLcontext *ctx, struct radeon_state_atom * atom) { r300ContextPtr r300 = R300_CONTEXT(ctx); drm_r300_cmd_header_t cmd; - uint32_t addr, ndw; + uint32_t addr; cmd.u = atom->cmd[0]; addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo; - ndw = atom->check(ctx, atom); r300_emit_vpu(r300, &atom->cmd[1], vpu_count(atom->cmd) * 4, addr); } +static void emit_vpp_state(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + drm_r300_cmd_header_t cmd; + uint32_t addr; + + cmd.u = atom->cmd[0]; + addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo; + + r300_emit_vpu(r300, &atom->cmd[1], r300->selected_vp->code.constants.Count * 4, addr); +} + void r500_emit_fp(struct r300_context *r300, uint32_t *data, unsigned len, @@ -333,36 +360,37 @@ void r300_emit_cb_setup(struct r300_context *r300, assert(offset % 32 == 0); switch (format) { - case MESA_FORMAT_RGB565: - assert(_mesa_little_endian()); - cbpitch |= R300_COLOR_FORMAT_RGB565; + case MESA_FORMAT_SL8: + case MESA_FORMAT_A8: + case MESA_FORMAT_L8: + case MESA_FORMAT_I8: + cbpitch |= R300_COLOR_FORMAT_I8; break; + case MESA_FORMAT_RGB565: case MESA_FORMAT_RGB565_REV: - assert(!_mesa_little_endian()); cbpitch |= R300_COLOR_FORMAT_RGB565; break; case MESA_FORMAT_ARGB4444: - assert(_mesa_little_endian()); - cbpitch |= R300_COLOR_FORMAT_ARGB4444; - break; case MESA_FORMAT_ARGB4444_REV: - assert(!_mesa_little_endian()); cbpitch |= R300_COLOR_FORMAT_ARGB4444; break; + case MESA_FORMAT_RGBA5551: case MESA_FORMAT_ARGB1555: - assert(_mesa_little_endian()); - cbpitch |= R300_COLOR_FORMAT_ARGB1555; - break; case MESA_FORMAT_ARGB1555_REV: - assert(!_mesa_little_endian()); cbpitch |= R300_COLOR_FORMAT_ARGB1555; break; + case MESA_FORMAT_RGBA8888: + case MESA_FORMAT_RGBA8888_REV: + case MESA_FORMAT_XRGB8888: + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888_REV: + case MESA_FORMAT_ARGB8888_REV: + case MESA_FORMAT_SRGBA8: + case MESA_FORMAT_SARGB8: + cbpitch |= R300_COLOR_FORMAT_ARGB8888; + break; default: - if (cpp == 4) { - cbpitch |= R300_COLOR_FORMAT_ARGB8888; - } else { - _mesa_problem(r300->radeon.glCtx, "unexpected format in emit_cb_offset()");; - } + _mesa_problem(r300->radeon.glCtx, "unexpected format in emit_cb_offset()"); break; } @@ -778,24 +806,6 @@ void r300InitCmdBuf(r300ContextPtr r300) /* VPU only on TCL */ if (has_tcl) { int i; - if (r300->radeon.radeonScreen->kernel_mm) { - ALLOC_STATE(vap_flush, always, 10, 0); - /* flush processing vertices */ - r300->hw.vap_flush.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1); - r300->hw.vap_flush.cmd[1] = 0; - r300->hw.vap_flush.cmd[2] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DSTCACHE_CTLSTAT, 1); - r300->hw.vap_flush.cmd[3] = R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D; - r300->hw.vap_flush.cmd[4] = cmdpacket0(r300->radeon.radeonScreen, RADEON_WAIT_UNTIL, 1); - r300->hw.vap_flush.cmd[5] = RADEON_WAIT_3D_IDLECLEAN; - r300->hw.vap_flush.cmd[6] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1); - r300->hw.vap_flush.cmd[7] = 0xffffff; - r300->hw.vap_flush.cmd[8] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_STATE_FLUSH_REG, 1); - r300->hw.vap_flush.cmd[9] = 0; - } else { - ALLOC_STATE(vap_flush, never, 10, 0); - } - - ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0); r300->hw.vpi.cmd[0] = cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0); @@ -803,11 +813,11 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.vpi.emit = emit_vpu_state; if (is_r500) { - ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); + ALLOC_STATE(vpp, vpp, R300_VPP_CMDSIZE, 0); r300->hw.vpp.cmd[0] = cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0); if (r300->radeon.radeonScreen->kernel_mm) - r300->hw.vpp.emit = emit_vpu_state; + r300->hw.vpp.emit = emit_vpp_state; ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); r300->hw.vps.cmd[0] = @@ -824,11 +834,11 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.vpucp[i].emit = emit_vpu_state; } } else { - ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); + ALLOC_STATE(vpp, vpp, R300_VPP_CMDSIZE, 0); r300->hw.vpp.cmd[0] = cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0); if (r300->radeon.radeonScreen->kernel_mm) - r300->hw.vpp.emit = emit_vpu_state; + r300->hw.vpp.emit = emit_vpp_state; ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); r300->hw.vps.cmd[0] = diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index df4cc11da42..4dce454c3a7 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -61,6 +61,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_state.h" #include "r300_tex.h" #include "r300_emit.h" +#include "r300_render.h" #include "r300_swtcl.h" #include "radeon_bocs_wrapper.h" #include "radeon_buffer_objects.h" @@ -109,7 +110,6 @@ static const struct dri_extension card_extensions[] = { {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions}, {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions}, {"GL_EXT_blend_subtract", NULL}, - {"GL_EXT_packed_depth_stencil", NULL}, {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, {"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions}, {"GL_EXT_provoking_vertex", GL_EXT_provoking_vertex_functions }, @@ -227,6 +227,8 @@ static void r300_fallback(GLcontext *ctx, GLuint bit, GLboolean mode) r300->radeon.Fallback |= bit; else r300->radeon.Fallback &= ~bit; + + r300SwitchFallback(ctx, R300_FALLBACK_RADEON_COMMON, mode); } static void r300_emit_query_finish(radeonContextPtr radeon) @@ -322,6 +324,12 @@ static void r300_init_vtbl(radeonContextPtr radeon) radeon->vtbl.check_blit = r300_check_blit; radeon->vtbl.blit = r300_blit; + + if (radeon->radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + radeon->vtbl.is_format_renderable = r500IsFormatRenderable; + } else { + radeon->vtbl.is_format_renderable = r300IsFormatRenderable; + } } static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen) @@ -456,6 +464,9 @@ static void r300InitGLExtensions(GLcontext *ctx) } if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV350) _mesa_enable_extension(ctx, "GL_ARB_half_float_vertex"); + + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + _mesa_enable_extension(ctx, "GL_EXT_packed_depth_stencil"); } static void r300InitIoctlFuncs(struct dd_function_table *functions) @@ -492,7 +503,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, _mesa_init_driver_functions(&functions); r300InitIoctlFuncs(&functions); - r300InitStateFuncs(&functions); + r300InitStateFuncs(&r300->radeon, &functions); r300InitTextureFuncs(&r300->radeon, &functions); r300InitShaderFuncs(&functions); radeonInitQueryObjFunctions(&functions); diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 78ab43a99f9..df7115e7dae 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -355,7 +355,6 @@ struct r300_hw_state { struct radeon_state_atom zb_hiz_offset; /* (4F44) */ struct radeon_state_atom zb_hiz_pitch; /* (4F54) */ - struct radeon_state_atom vap_flush; struct radeon_state_atom vpi; /* vp instructions */ struct radeon_state_atom vpp; /* vp parameters */ struct radeon_state_atom vps; /* vertex point size (?) */ diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index 61ea5e4d9a3..2b7c93a9575 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -219,7 +219,9 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog compiler.code = &fp->code; compiler.state = fp->state; + compiler.enable_shadow_ambient = GL_TRUE; compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE; + compiler.max_temp_regs = (compiler.is_r500) ? 128 : 32; compiler.OutputDepth = FRAG_RESULT_DEPTH; memset(compiler.OutputColor, 0, 4 * sizeof(unsigned)); compiler.OutputColor[0] = FRAG_RESULT_COLOR; @@ -256,6 +258,19 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog fp->InputsRead = compiler.Base.Program.InputsRead; + /* Clear the fog/wpos_attr if code accessing these + * attributes has been removed during compilation + */ + if (fp->fog_attr != FRAG_ATTRIB_MAX) { + if (!(fp->InputsRead & (1 << fp->fog_attr))) + fp->fog_attr = FRAG_ATTRIB_MAX; + } + + if (fp->wpos_attr != FRAG_ATTRIB_MAX) { + if (!(fp->InputsRead & (1 << fp->wpos_attr))) + fp->wpos_attr = FRAG_ATTRIB_MAX; + } + rc_destroy(&compiler.Base); } diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index d18ebab8ff2..ac93563ed9e 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -482,7 +482,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_PVS_FIRST_INST_SHIFT 0 # define R300_PVS_XYZW_VALID_INST_SHIFT 10 # define R300_PVS_LAST_INST_SHIFT 20 -/* Addresses are relative the the vertex program parameters area. */ +/* Addresses are relative to the vertex program parameters area. */ #define R300_VAP_PVS_CONST_CNTL 0x22D4 # define R300_PVS_CONST_BASE_OFFSET_SHIFT 0 # define R300_PVS_MAX_CONST_ADDR_SHIFT 16 @@ -1760,7 +1760,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * The destination register index is in FPI1 (color) and FPI3 (alpha) * together with enable bits. * There are separate enable bits for writing into temporary registers - * (DSTC_REG_* /DSTA_REG) and and program output registers (DSTC_OUTPUT_* + * (DSTC_REG_* /DSTA_REG) and program output registers (DSTC_OUTPUT_* * /DSTA_OUTPUT). You can write to both at once, or not write at all (the * same index must be used for both). * diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 95961314863..bb8f91491f5 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -386,6 +386,14 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim); return; } + + if (rmesa->radeon.radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH_REGSEQ(R300_VAP_VF_MAX_VTX_INDX, 1); + OUT_BATCH(rmesa->radeon.tcl.aos[0].count); + END_BATCH(); + } + r300_emit_scissor(rmesa->radeon.glCtx); while (num_verts > 0) { int nr; @@ -400,8 +408,9 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) COMMIT_BATCH(); } -static const char *getFallbackString(uint32_t bit) +static const char *getFallbackString(r300ContextPtr rmesa, uint32_t bit) { + static char common_fallback_str[32]; switch (bit) { case R300_FALLBACK_VERTEX_PROGRAM : return "vertex program"; @@ -421,6 +430,9 @@ static const char *getFallbackString(uint32_t bit) return "render mode != GL_RENDER"; case R300_FALLBACK_FRAGMENT_PROGRAM: return "fragment program"; + case R300_FALLBACK_RADEON_COMMON: + snprintf(common_fallback_str, 32, "radeon common 0x%08x", rmesa->radeon.Fallback); + return common_fallback_str; case R300_FALLBACK_AOS_LIMIT: return "aos limit"; case R300_FALLBACK_INVALID_BUFFERS: @@ -440,7 +452,7 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode) if (mode) { if ((fallback_warn & bit) == 0) { if (RADEON_DEBUG & RADEON_FALLBACKS) - fprintf(stderr, "WARNING! Falling back to software for %s\n", getFallbackString(bit)); + fprintf(stderr, "WARNING! Falling back to software for %s\n", getFallbackString(rmesa, bit)); fallback_warn |= bit; } rmesa->fallback |= bit; diff --git a/src/mesa/drivers/dri/r300/r300_render.h b/src/mesa/drivers/dri/r300/r300_render.h index ec785474a67..581e9fa0ccd 100644 --- a/src/mesa/drivers/dri/r300/r300_render.h +++ b/src/mesa/drivers/dri/r300/r300_render.h @@ -41,6 +41,7 @@ #define R300_FALLBACK_STENCIL_TWOSIDE (1 << 21) #define R300_FALLBACK_RENDER_MODE (1 << 22) #define R300_FALLBACK_FRAGMENT_PROGRAM (1 << 23) +#define R300_FALLBACK_RADEON_COMMON (1 << 29) #define R300_FALLBACK_AOS_LIMIT (1 << 30) #define R300_FALLBACK_INVALID_BUFFERS (1 << 31) #define R300_RASTER_FALLBACK_MASK 0xffff0000 diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 9d1ff6e2ba2..fa33be49989 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -46,13 +46,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/simple_list.h" #include "main/api_arrayelt.h" +#include "drivers/common/meta.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" #include "shader/prog_parameter.h" #include "shader/prog_statevars.h" #include "vbo/vbo.h" #include "tnl/tnl.h" -#include "tnl/t_vp_build.h" #include "r300_context.h" #include "r300_state.h" @@ -366,7 +366,6 @@ static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) p = (GLint) plane - (GLint) GL_CLIP_PLANE0; ip = (GLint *)ctx->Transform._ClipUserPlane[p]; - R300_STATECHANGE( rmesa, vap_flush ); R300_STATECHANGE( rmesa, vpucp[p] ); rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0]; rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1]; @@ -590,7 +589,7 @@ static void r300SetDepthState(GLcontext * ctx) R500_STENCIL_REFMASK_FRONT_BACK); r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_Z_FUNC_SHIFT); - if (ctx->Depth.Test) { + if (ctx->Depth.Test && ctx->DrawBuffer->_DepthBuffer) { r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_ENABLE; if (ctx->Depth.Mask) r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_WRITE_ENABLE; @@ -794,12 +793,14 @@ static void r300PointParameter(GLcontext * ctx, GLenum pname, const GLfloat * pa R300_STATECHANGE(r300, ga_point_minmax); r300->hw.ga_point_minmax.cmd[1] &= ~R300_GA_POINT_MINMAX_MIN_MASK; r300->hw.ga_point_minmax.cmd[1] |= (GLuint)(ctx->Point.MinSize * 6.0); + r300PointSize(ctx, ctx->Point.Size); break; case GL_POINT_SIZE_MAX: R300_STATECHANGE(r300, ga_point_minmax); r300->hw.ga_point_minmax.cmd[1] &= ~R300_GA_POINT_MINMAX_MAX_MASK; r300->hw.ga_point_minmax.cmd[1] |= (GLuint)(ctx->Point.MaxSize * 6.0) << R300_GA_POINT_MINMAX_MAX_SHIFT; + r300PointSize(ctx, ctx->Point.Size); break; case GL_POINT_DISTANCE_ATTENUATION: break; @@ -1657,20 +1658,21 @@ void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, (5 << R300_PVS_NUM_CNTLRS_SHIFT) | (5 << R300_VF_MAX_VTX_NUM_SHIFT)); - if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515) - rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (2 << R300_PVS_NUM_FPUS_SHIFT); - else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) || - (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) || - (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) + if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R300) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R350)) + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (4 << R300_PVS_NUM_FPUS_SHIFT); + else if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (5 << R300_PVS_NUM_FPUS_SHIFT); else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) || (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420)) rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (6 << R300_PVS_NUM_FPUS_SHIFT); else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || - (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580)) + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (8 << R300_PVS_NUM_FPUS_SHIFT); else - rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (4 << R300_PVS_NUM_FPUS_SHIFT); + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (2 << R300_PVS_NUM_FPUS_SHIFT); } @@ -1762,8 +1764,6 @@ static void r300ResetHwState(r300ContextPtr r300) if (RADEON_DEBUG & RADEON_STATE) fprintf(stderr, "%s\n", __FUNCTION__); - radeon_firevertices(&r300->radeon); - r300ColorMask(ctx, ctx->Color.ColorMask[0][RCOMP], ctx->Color.ColorMask[0][GCOMP], @@ -1985,23 +1985,6 @@ void r300UpdateShaders(r300ContextPtr rmesa) if (rmesa->options.hw_tcl_enabled) { struct r300_vertex_program *vp; - if (rmesa->radeon.NewGLState) { - int i; - for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { - rmesa->temp_attrib[i] = - TNL_CONTEXT(ctx)->vb.AttribPtr[i]; - TNL_CONTEXT(ctx)->vb.AttribPtr[i] = - &rmesa->dummy_attrib[i]; - } - - _tnl_UpdateFixedFunctionProgram(ctx); - - for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { - TNL_CONTEXT(ctx)->vb.AttribPtr[i] = - rmesa->temp_attrib[i]; - } - } - vp = r300SelectAndTranslateVertexShader(ctx); r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, vp->error); @@ -2255,6 +2238,68 @@ void r300UpdateShaderStates(r300ContextPtr rmesa) } } +#define EASY_US_OUT_FMT(comps, c0, c1, c2, c3) \ + (R500_OUT_FMT_##comps | R500_C0_SEL_##c0 | R500_C1_SEL_##c1 | \ + R500_C2_SEL_##c2 | R500_C3_SEL_##c3) +static void r300SetupUsOutputFormat(GLcontext *ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + uint32_t hw_format; + struct radeon_renderbuffer *rrb = radeon_get_colorbuffer(&rmesa->radeon); + + if (!rrb) { + return; + } + + switch (rrb->base.Format) + { + case MESA_FORMAT_RGBA5551: + case MESA_FORMAT_RGBA8888: + hw_format = EASY_US_OUT_FMT(C4_8, A, B, G, R); + break; + case MESA_FORMAT_RGB565_REV: + case MESA_FORMAT_RGBA8888_REV: + hw_format = EASY_US_OUT_FMT(C4_8, R, G, B, A); + break; + case MESA_FORMAT_RGB565: + case MESA_FORMAT_ARGB4444: + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_XRGB8888: + case MESA_FORMAT_ARGB8888: + hw_format = EASY_US_OUT_FMT(C4_8, B, G, R, A); + break; + case MESA_FORMAT_ARGB4444_REV: + case MESA_FORMAT_ARGB1555_REV: + case MESA_FORMAT_XRGB8888_REV: + case MESA_FORMAT_ARGB8888_REV: + hw_format = EASY_US_OUT_FMT(C4_8, A, R, G, B); + break; + case MESA_FORMAT_SRGBA8: + hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, A, B, G, R); + break; + case MESA_FORMAT_SARGB8: + hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, B, G, R, A); + break; + case MESA_FORMAT_SL8: + hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, A, A, R, A); + break; + case MESA_FORMAT_A8: + hw_format = EASY_US_OUT_FMT(C4_8, A, A, A, A); + break; + case MESA_FORMAT_L8: + case MESA_FORMAT_I8: + hw_format = EASY_US_OUT_FMT(C4_8, A, A, R, A); + break; + default: + assert(!"Unsupported format"); + break; + } + + R300_STATECHANGE(rmesa, us_out_fmt); + rmesa->hw.us_out_fmt.cmd[1] = hw_format; +} +#undef EASY_US_OUT_FMT + /** * Called by Mesa after an internal state update. */ @@ -2284,6 +2329,10 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state) r300->hw.shade2.cmd[1] &= ~R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST; } + if (new_state & _NEW_BUFFERS) { + r300SetupUsOutputFormat(ctx); + } + r300->radeon.NewGLState |= new_state; } @@ -2305,7 +2354,7 @@ static void r300RenderMode(GLcontext * ctx, GLenum mode) /** * Initialize driver's state callback functions */ -void r300InitStateFuncs(struct dd_function_table *functions) +void r300InitStateFuncs(radeonContextPtr radeon, struct dd_function_table *functions) { functions->UpdateState = r300InvalidateState; @@ -2344,8 +2393,13 @@ void r300InitStateFuncs(struct dd_function_table *functions) functions->ClipPlane = r300ClipPlane; functions->Scissor = radeonScissor; - functions->DrawBuffer = radeonDrawBuffer; - functions->ReadBuffer = radeonReadBuffer; + functions->DrawBuffer = radeonDrawBuffer; + functions->ReadBuffer = radeonReadBuffer; + + functions->CopyPixels = _mesa_meta_CopyPixels; + functions->DrawPixels = _mesa_meta_DrawPixels; + if (radeon->radeonScreen->kernel_mm) + functions->ReadPixels = radeonReadPixels; } void r300InitShaderFunctions(r300ContextPtr r300) diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h index d46bf9f1796..e70f84f4e4b 100644 --- a/src/mesa/drivers/dri/r300/r300_state.h +++ b/src/mesa/drivers/dri/r300/r300_state.h @@ -55,7 +55,7 @@ void r300UpdateDrawBuffer (GLcontext * ctx); void r300UpdateShaders (r300ContextPtr rmesa); void r300UpdateShaderStates (r300ContextPtr rmesa); void r300InitState (r300ContextPtr r300); -void r300InitStateFuncs (struct dd_function_table *functions); +void r300InitStateFuncs (radeonContextPtr radeon, struct dd_function_table *functions); void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, GLuint output_count, GLuint temp_count); void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten); diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 8dd85073954..baef206bc26 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -308,6 +308,45 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, return &t->base; } +unsigned r300IsFormatRenderable(gl_format mesa_format) +{ + switch (mesa_format) + { + case MESA_FORMAT_RGB565: + case MESA_FORMAT_RGBA5551: + case MESA_FORMAT_RGBA8888: + case MESA_FORMAT_RGB565_REV: + case MESA_FORMAT_RGBA8888_REV: + case MESA_FORMAT_ARGB4444: + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_XRGB8888: + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_ARGB4444_REV: + case MESA_FORMAT_ARGB1555_REV: + case MESA_FORMAT_XRGB8888_REV: + case MESA_FORMAT_ARGB8888_REV: + case MESA_FORMAT_SRGBA8: + case MESA_FORMAT_SARGB8: + case MESA_FORMAT_SL8: + case MESA_FORMAT_A8: + case MESA_FORMAT_L8: + case MESA_FORMAT_I8: + case MESA_FORMAT_Z16: + return 1; + default: + return 0; + } +} + +unsigned r500IsFormatRenderable(gl_format mesa_format) +{ + if (mesa_format == MESA_FORMAT_S8_Z24) { + return 1; + } else { + return r300IsFormatRenderable(mesa_format); + } +} + void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions) { /* Note: we only plug in the functions we implement in the driver diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h index 9694e703b83..aca44cd7669 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.h +++ b/src/mesa/drivers/dri/r300/r300_tex.h @@ -53,4 +53,7 @@ extern void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_tab int32_t r300TranslateTexFormat(gl_format mesaFormat); +unsigned r300IsFormatRenderable(gl_format mesaFormat); +unsigned r500IsFormatRenderable(gl_format mesaFormat); + #endif /* __r300_TEX_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index cbe4cb83047..a1fe3780294 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -263,15 +263,25 @@ static struct r300_vertex_program *build_program(GLcontext *ctx, rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X); if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) { - rc_copy_output(&compiler.Base, - VERT_RESULT_HPOS, - vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0); + unsigned int vp_wpos_attr = vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0; + + /* Set empty writemask for instructions writing to vp_wpos_attr + * before moving the wpos attr there. + * Such instructions will be removed by DCE. + */ + rc_move_output(&compiler.Base, vp_wpos_attr, vp->key.WPosAttr, 0); + rc_copy_output(&compiler.Base, VERT_RESULT_HPOS, vp_wpos_attr); } if (vp->key.FogAttr != FRAG_ATTRIB_MAX) { - rc_move_output(&compiler.Base, - VERT_RESULT_FOGC, - vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X); + unsigned int vp_fog_attr = vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0; + + /* Set empty writemask for instructions writing to vp_fog_attr + * before moving the fog attr there. + * Such instructions will be removed by DCE. + */ + rc_move_output(&compiler.Base, vp_fog_attr, vp->key.FogAttr, 0); + rc_move_output(&compiler.Base, VERT_RESULT_FOGC, vp_fog_attr, WRITEMASK_X); } r3xx_compile_vertex_program(&compiler); @@ -342,8 +352,6 @@ static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_ver assert((code->length > 0) && (code->length % 4 == 0)); - R300_STATECHANGE( r300, vap_flush ); - switch ((dest >> 8) & 0xf) { case 0: R300_STATECHANGE(r300, vpi); @@ -381,10 +389,14 @@ void r300SetupVertexProgram(r300ContextPtr rmesa) ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0; ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; - R300_STATECHANGE(rmesa, vap_flush); + R300_STATECHANGE(rmesa, vap_cntl); R300_STATECHANGE(rmesa, vpp); param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); - bump_vpu_count(rmesa->hw.vpp.cmd, param_count); + if (!rmesa->radeon.radeonScreen->kernel_mm && param_count > 255 * 4) { + WARN_ONCE("Too many VP params, expect rendering errors\n"); + } + /* Prevent the overflow (vpu.count is u8) */ + bump_vpu_count(rmesa->hw.vpp.cmd, MIN2(255 * 4, param_count)); param_count /= 4; r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code)); @@ -397,6 +409,6 @@ void r300SetupVertexProgram(r300ContextPtr rmesa) rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | (inst_count << R300_PVS_LAST_INST_SHIFT); - rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | ((param_count - 1) << R300_PVS_MAX_CONST_ADDR_SHIFT); rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT); } diff --git a/src/mesa/drivers/dri/r300/radeon_pixel_read.c b/src/mesa/drivers/dri/r300/radeon_pixel_read.c new file mode 120000 index 00000000000..3b03803126f --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_pixel_read.c @@ -0,0 +1 @@ +../radeon/radeon_pixel_read.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_tex_getimage.c b/src/mesa/drivers/dri/r300/radeon_tex_getimage.c new file mode 120000 index 00000000000..d9836d7326e --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_tex_getimage.c @@ -0,0 +1 @@ +../radeon/radeon_tex_getimage.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_tile.c b/src/mesa/drivers/dri/r300/radeon_tile.c new file mode 120000 index 00000000000..d4bfe27da64 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_tile.c @@ -0,0 +1 @@ +../radeon/radeon_tile.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_tile.h b/src/mesa/drivers/dri/r300/radeon_tile.h new file mode 120000 index 00000000000..31074c581ea --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_tile.h @@ -0,0 +1 @@ +../radeon/radeon_tile.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/server/radeon_dri.c b/src/mesa/drivers/dri/r300/server/radeon_dri.c deleted file mode 120000 index d05847d650f..00000000000 --- a/src/mesa/drivers/dri/r300/server/radeon_dri.c +++ /dev/null @@ -1 +0,0 @@ -../../radeon/server/radeon_dri.c
\ No newline at end of file |