diff options
Diffstat (limited to 'src/mesa/drivers/dri')
-rw-r--r-- | src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 131 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/compiler/radeon_compiler.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_vertprog.c | 2 |
3 files changed, 98 insertions, 37 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 400408620ea..a0e081fe6f8 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -28,7 +28,6 @@ #include "radeon_program.h" #include "radeon_program_alu.h" -#include "shader/prog_optimize.h" #include "shader/prog_print.h" @@ -438,6 +437,100 @@ static GLboolean translate_vertex_program(struct r300_vertex_program_compiler * return GL_TRUE; } +struct temporary_allocation { + GLuint Allocated:1; + GLuint HwTemp:15; + struct prog_instruction * LastRead; +}; + +static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler) +{ + struct prog_instruction *inst; + GLuint num_orig_temps = 0; + GLboolean hwtemps[VSF_MAX_FRAGMENT_TEMPS]; + struct temporary_allocation * ta; + GLuint i, j; + + compiler->code->num_temporaries = 0; + memset(hwtemps, 0, sizeof(hwtemps)); + + /* Pass 1: Count original temporaries and allocate structures */ + for(inst = compiler->program->Instructions; inst->Opcode != OPCODE_END; inst++) { + GLuint numsrcs = _mesa_num_inst_src_regs(inst->Opcode); + GLuint numdsts = _mesa_num_inst_dst_regs(inst->Opcode); + + for (i = 0; i < numsrcs; ++i) { + if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) { + if (inst->SrcReg[i].Index >= num_orig_temps) + num_orig_temps = inst->SrcReg[i].Index + 1; + } + } + + if (numdsts) { + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + if (inst->DstReg.Index >= num_orig_temps) + num_orig_temps = inst->DstReg.Index + 1; + } + } + } + + ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool, + sizeof(struct temporary_allocation) * num_orig_temps); + memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps); + + /* Pass 2: Determine original temporary lifetimes */ + for(inst = compiler->program->Instructions; inst->Opcode != OPCODE_END; inst++) { + GLuint numsrcs = _mesa_num_inst_src_regs(inst->Opcode); + + for (i = 0; i < numsrcs; ++i) { + if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) + ta[inst->SrcReg[i].Index].LastRead = inst; + } + } + + /* Pass 3: Register allocation */ + for(inst = compiler->program->Instructions; inst->Opcode != OPCODE_END; inst++) { + GLuint numsrcs = _mesa_num_inst_src_regs(inst->Opcode); + GLuint numdsts = _mesa_num_inst_dst_regs(inst->Opcode); + + for (i = 0; i < numsrcs; ++i) { + if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) { + GLuint orig = inst->SrcReg[i].Index; + inst->SrcReg[i].Index = ta[orig].HwTemp; + + if (ta[orig].Allocated && inst == ta[orig].LastRead) + hwtemps[ta[orig].HwTemp] = GL_FALSE; + } + } + + if (numdsts) { + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + GLuint orig = inst->DstReg.Index; + + if (!ta[orig].Allocated) { + for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) { + if (!hwtemps[j]) + break; + } + if (j >= VSF_MAX_FRAGMENT_TEMPS) { + fprintf(stderr, "Out of hw temporaries\n"); + } else { + ta[orig].Allocated = GL_TRUE; + ta[orig].HwTemp = j; + hwtemps[j] = GL_TRUE; + + if (j >= compiler->code->num_temporaries) + compiler->code->num_temporaries = j + 1; + } + } + + inst->DstReg.Index = ta[orig].HwTemp; + } + } + } +} + + /** * Vertex engine cannot read two inputs or two constants at the same time. * Introduce intermediate MOVs to temporary registers to account for this. @@ -675,7 +768,7 @@ static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) -GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler, GLcontext * ctx) +GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) { GLboolean success; @@ -728,7 +821,7 @@ GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compi radeonNqssaDce(compiler->program, &nqssadce, compiler); /* We need this step for reusing temporary registers */ - _mesa_optimize_program(ctx, compiler->program); + allocate_temporary_registers(compiler); if (compiler->Base.Debug) { fprintf(stderr, "Vertex program after NQSSADCE:\n"); @@ -738,38 +831,6 @@ GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compi } assert(compiler->program->NumInstructions); - { - struct prog_instruction *inst; - int max, i, tmp; - - inst = compiler->program->Instructions; - max = -1; - while (inst->Opcode != OPCODE_END) { - tmp = _mesa_num_inst_src_regs(inst->Opcode); - for (i = 0; i < tmp; ++i) { - if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) { - if ((int) inst->SrcReg[i].Index > max) { - max = inst->SrcReg[i].Index; - } - } - } - - if (_mesa_num_inst_dst_regs(inst->Opcode)) { - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - if ((int) inst->DstReg.Index > max) { - max = inst->DstReg.Index; - } - } - } - ++inst; - } - - /* We actually want highest index of used temporary register, - * not the number of temporaries used. - * These values aren't always the same. - */ - compiler->code->num_temporaries = max + 1; - } success = translate_vertex_program(compiler); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index f8e4b3c681a..b98b1c9e6b5 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -74,6 +74,6 @@ struct r300_vertex_program_compiler { struct gl_program *program; }; -GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c, GLcontext * ctx); +GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c); #endif /* RADEON_COMPILER_H */ diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index ec4ba9ca7da..dfb2a9e3d85 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -125,7 +125,7 @@ static struct r300_vertex_program *build_program(GLcontext *ctx, _mesa_insert_mvp_code(ctx, (struct gl_vertex_program *)compiler.program); } - if (!r3xx_compile_vertex_program(&compiler, ctx)) + if (!r3xx_compile_vertex_program(&compiler)) vp->error = GL_TRUE; rc_destroy(&compiler.Base); |