summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2010-09-03 20:26:43 +0200
committerMarek Olšák <[email protected]>2010-09-04 18:56:22 +0200
commit63943c8fcdc7dae4c059d364528b0a90b8c4041f (patch)
tree145a55d9004a6c1661916e76b476d0d7c91fb684
parent33360a707e16c3349fde9dd43fee8e38bae9e7f0 (diff)
r300/compiler: improve register allocation with indexable temporaries for VS
Register allocation can now reallocate temporaries right after the last indexed source operand, instead of being disabled for the whole shader.
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c63
1 files changed, 46 insertions, 17 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index 3e8a8236c05..1628078a327 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -665,12 +665,15 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
struct rc_instruction *inst;
struct rc_instruction *end_loop = NULL;
unsigned int num_orig_temps = 0;
- char hwtemps[R300_VS_MAX_TEMPS];
+ char hwtemps[RC_REGISTER_MAX_INDEX];
struct temporary_allocation * ta;
unsigned int i, j;
+ struct rc_instruction *last_inst_src_reladdr = NULL;
memset(hwtemps, 0, sizeof(hwtemps));
+ rc_recompute_ips(c);
+
/* Pass 1: Count original temporaries. */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
@@ -690,7 +693,8 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
}
}
- /* Pass 2: If there is relative addressing of temporaries, we cannot change register indices. Give up. */
+ /* Pass 2: If there is relative addressing of dst temporaries, we cannot change register indices. Give up.
+ * For src temporaries, save the last instruction which uses relative addressing. */
for (inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
@@ -701,7 +705,7 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
for (i = 0; i < opcode->NumSrcRegs; ++i) {
if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY &&
inst->U.I.SrcReg[i].RelAddr) {
- return;
+ last_inst_src_reladdr = inst;
}
}
}
@@ -739,9 +743,26 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
}
for (i = 0; i < opcode->NumSrcRegs; ++i) {
- if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY)
- ta[inst->U.I.SrcReg[i].Index].LastRead =
- end_loop ? end_loop : inst;
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+ struct rc_instruction *last_read;
+
+ /* From "last_inst_src_reladdr", "end_loop", and "inst",
+ * select the instruction with the highest instruction index (IP).
+ * Note that "end_loop", if available, has always a higher index than "inst". */
+ if (last_inst_src_reladdr) {
+ if (end_loop) {
+ last_read = last_inst_src_reladdr->IP > end_loop->IP ?
+ last_inst_src_reladdr : end_loop;
+ } else {
+ last_read = last_inst_src_reladdr->IP > inst->IP ?
+ last_inst_src_reladdr : inst;
+ }
+ } else {
+ last_read = end_loop ? end_loop : inst;
+ }
+
+ ta[inst->U.I.SrcReg[i].Index].LastRead = last_read;
+ }
}
}
@@ -749,13 +770,15 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- for (i = 0; i < opcode->NumSrcRegs; ++i) {
- if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
- unsigned int orig = inst->U.I.SrcReg[i].Index;
- inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
+ if (!last_inst_src_reladdr || last_inst_src_reladdr->IP < inst->IP) {
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+ unsigned int orig = inst->U.I.SrcReg[i].Index;
+ inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
- if (ta[orig].Allocated && inst == ta[orig].LastRead)
- hwtemps[ta[orig].HwTemp] = 0;
+ if (ta[orig].Allocated && inst == ta[orig].LastRead)
+ hwtemps[ta[orig].HwTemp] = 0;
+ }
}
}
@@ -764,16 +787,22 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
unsigned int orig = inst->U.I.DstReg.Index;
if (!ta[orig].Allocated) {
- for(j = 0; j < R300_VS_MAX_TEMPS; ++j) {
+ for(j = 0; j < c->max_temp_regs; ++j) {
if (!hwtemps[j])
break;
}
- if (j >= R300_VS_MAX_TEMPS) {
- fprintf(stderr, "Out of hw temporaries\n");
+ if (j >= c->max_temp_regs) {
+ rc_error(c, "Too many temporaries\n");
+ return;
} else {
ta[orig].Allocated = 1;
- ta[orig].HwTemp = j;
- hwtemps[j] = 1;
+ if (last_inst_src_reladdr &&
+ last_inst_src_reladdr->IP > inst->IP) {
+ ta[orig].HwTemp = orig;
+ } else {
+ ta[orig].HwTemp = j;
+ }
+ hwtemps[ta[orig].HwTemp] = 1;
}
}