summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorTom Stellard <[email protected]>2010-08-30 08:59:30 -0700
committerTom Stellard <[email protected]>2010-09-10 18:18:09 -0700
commita64b4a05af362fff52c9e52eb51cd92fe164afcc (patch)
treede658e53aefbcc154756fb6e8ee81f7f788429d2 /src
parent598e220f9581db750cf212c59275d25fda810fe1 (diff)
r300/compiler: Add peephole optimization for the 'add' presubtract operation
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c226
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c58
2 files changed, 200 insertions, 84 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index e01ba85aa56..3ff07d60396 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -38,6 +38,10 @@ struct peephole_state {
unsigned int WriteMask;
};
+typedef void (*rc_presub_replace_fn)(struct peephole_state *,
+ struct rc_instruction *,
+ unsigned int);
+
static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
{
struct rc_src_register combine;
@@ -516,68 +520,26 @@ static void peephole_scan_write(void * data, struct rc_instruction * inst,
}
}
-/**
- * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
- * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
- * of the add instruction must have the constatnt 1 swizzle. This function
- * does not check const registers to see if their value is 1.0, so it should
- * be called after the constant_folding optimization.
- * @return
- * 0 if the ADD instruction is still part of the program.
- * 1 if the ADD instruction is no longer part of the program.
- */
-static int peephole_add_presub_inv(
+static int presub_helper(
struct radeon_compiler * c,
- struct rc_instruction * inst_add)
+ struct peephole_state * s,
+ rc_presubtract_op presub_opcode,
+ rc_presub_replace_fn presub_replace)
{
- unsigned int i, swz, mask;
+ struct rc_instruction * inst;
unsigned int can_remove = 0;
unsigned int cant_sub = 0;
- struct rc_instruction * inst;
- struct peephole_state s;
-
- if (inst_add->U.I.SaturateMode)
- return 0;
-
- mask = inst_add->U.I.DstReg.WriteMask;
-
- /* Check if src0 is 1. */
- /* XXX It would be nice to use is_src_uniform_constant here, but that
- * function only works if the register's file is RC_FILE_NONE */
- for(i = 0; i < 4; i++ ) {
- swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
- if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
- && swz != RC_SWIZZLE_ONE) {
- return 0;
- }
- }
- /* Check src1. */
- if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
- inst_add->U.I.DstReg.WriteMask
- || inst_add->U.I.SrcReg[1].Abs
- || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
- && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
- || src_has_const_swz(inst_add->U.I.SrcReg[1])) {
-
- return 0;
- }
-
- /* Setup the peephole_state information. */
- s.Inst = inst_add;
- s.WriteMask = inst_add->U.I.DstReg.WriteMask;
-
- /* For all instructions that read inst_add->U.I.DstReg before it is
- * written again, use the 1 - src0 presubtact instead. */
- for(inst = inst_add->Next; inst != &c->Program.Instructions;
+ for(inst = s->Inst->Next; inst != &c->Program.Instructions;
inst = inst->Next) {
+ unsigned int i;
const struct rc_opcode_info * info =
rc_get_opcode_info(inst->U.I.Opcode);
for(i = 0; i < info->NumSrcRegs; i++) {
- if(inst_add->U.I.DstReg.WriteMask !=
+ if(s->Inst->U.I.DstReg.WriteMask !=
src_reads_dst_mask(inst->U.I.SrcReg[i],
- inst_add->U.I.DstReg)) {
+ s->Inst->U.I.DstReg)) {
continue;
}
if (cant_sub) {
@@ -601,47 +563,173 @@ static int peephole_add_presub_inv(
* instruction, unless the two prsubtract operations
* are the same and read from the same registers. */
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
- if (inst->U.I.PreSub.Opcode != RC_PRESUB_INV
+ if (inst->U.I.PreSub.Opcode != presub_opcode
|| inst->U.I.PreSub.SrcReg[0].File !=
- inst_add->U.I.SrcReg[1].File
+ s->Inst->U.I.SrcReg[1].File
|| inst->U.I.PreSub.SrcReg[0].Index !=
- inst_add->U.I.SrcReg[1].Index) {
+ s->Inst->U.I.SrcReg[1].Index) {
can_remove = 0;
break;
}
}
- /* We must be careful not to modify inst_add, since it
- * is possible it will remain part of the program. */
- inst->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
- inst->U.I.PreSub.SrcReg[0].Negate = 0;
- inst->U.I.PreSub.Opcode = RC_PRESUB_INV;
- inst->U.I.SrcReg[i] = chain_srcregs(inst->U.I.SrcReg[i],
- inst->U.I.PreSub.SrcReg[0]);
-
- inst->U.I.SrcReg[i].File = RC_FILE_PRESUB;
- inst->U.I.SrcReg[i].Index = RC_PRESUB_INV;
+ presub_replace(s, inst, i);
can_remove = 1;
}
if(!can_remove)
break;
- rc_for_all_writes_mask(inst, peephole_scan_write, &s);
+ rc_for_all_writes_mask(inst, peephole_scan_write, s);
/* If all components of inst_add's destination register have
* been written to by subsequent instructions, the original
* value of the destination register is no longer valid and
* we can't keep doing substitutions. */
- if (!s.WriteMask){
+ if (!s->WriteMask){
break;
}
/* Make this instruction doesn't write to the presubtract source. */
if (inst->U.I.DstReg.WriteMask &
- src_reads_dst_mask(inst_add->U.I.SrcReg[1],
+ src_reads_dst_mask(s->Inst->U.I.SrcReg[1],
inst->U.I.DstReg)
|| info->IsFlowControl) {
cant_sub = 1;
}
}
- if(can_remove) {
+ return can_remove;
+}
+
+static void presub_replace_add(struct peephole_state *s,
+ struct rc_instruction * inst,
+ unsigned int src_index)
+{
+ inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[0];
+ inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[1];
+ inst->U.I.PreSub.SrcReg[0].Negate = 0;
+ inst->U.I.PreSub.SrcReg[1].Negate = 0;
+ inst->U.I.PreSub.Opcode = RC_PRESUB_ADD;
+ inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index],
+ inst->U.I.PreSub.SrcReg[0]);
+ inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
+ inst->U.I.SrcReg[src_index].Index = RC_PRESUB_ADD;
+}
+
+static int peephole_add_presub_add(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst_add)
+{
+ struct rc_src_register * src0 = NULL;
+ struct rc_src_register * src1 = NULL;
+ unsigned int i;
+ struct peephole_state s;
+
+ if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE)
+ return 0;
+
+ if (inst_add->U.I.SaturateMode)
+ return 0;
+
+ if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
+ return 0;
+
+ /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */
+ for (i = 0; i < 2; i++) {
+ if (inst_add->U.I.SrcReg[i].Abs)
+ return 0;
+ if ((inst_add->U.I.SrcReg[i].Negate
+ & inst_add->U.I.DstReg.WriteMask) ==
+ inst_add->U.I.DstReg.WriteMask) {
+ src0 = &inst_add->U.I.SrcReg[i];
+ } else if (!src1) {
+ src1 = &inst_add->U.I.SrcReg[i];
+ } else {
+ src0 = &inst_add->U.I.SrcReg[i];
+ }
+ }
+
+ if (!src1)
+ return 0;
+
+ /* XXX Only do add for now. */
+ if (src0->Negate)
+ return 0;
+
+ s.Inst = inst_add;
+ s.WriteMask = inst_add->U.I.DstReg.WriteMask;
+ if (presub_helper(c, &s, RC_PRESUB_ADD, presub_replace_add)) {
+ rc_remove_instruction(inst_add);
+ return 1;
+ }
+ return 0;
+}
+
+static void presub_replace_inv(struct peephole_state * s,
+ struct rc_instruction * inst,
+ unsigned int src_index)
+{
+ /* We must be careful not to modify s->Inst, since it
+ * is possible it will remain part of the program.
+ * XXX Maybe pass a struct instead of a pointer for s->Inst.*/
+ inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1];
+ inst->U.I.PreSub.SrcReg[0].Negate = 0;
+ inst->U.I.PreSub.Opcode = RC_PRESUB_INV;
+ inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index],
+ inst->U.I.PreSub.SrcReg[0]);
+
+ inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
+ inst->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
+}
+
+/**
+ * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
+ * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
+ * of the add instruction must have the constatnt 1 swizzle. This function
+ * does not check const registers to see if their value is 1.0, so it should
+ * be called after the constant_folding optimization.
+ * @return
+ * 0 if the ADD instruction is still part of the program.
+ * 1 if the ADD instruction is no longer part of the program.
+ */
+static int peephole_add_presub_inv(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst_add)
+{
+ unsigned int i, swz, mask;
+ struct peephole_state s;
+
+ if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE)
+ return 0;
+
+ if (inst_add->U.I.SaturateMode)
+ return 0;
+
+ mask = inst_add->U.I.DstReg.WriteMask;
+
+ /* Check if src0 is 1. */
+ /* XXX It would be nice to use is_src_uniform_constant here, but that
+ * function only works if the register's file is RC_FILE_NONE */
+ for(i = 0; i < 4; i++ ) {
+ swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
+ if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
+ && swz != RC_SWIZZLE_ONE) {
+ return 0;
+ }
+ }
+
+ /* Check src1. */
+ if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
+ inst_add->U.I.DstReg.WriteMask
+ || inst_add->U.I.SrcReg[1].Abs
+ || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
+ && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
+ || src_has_const_swz(inst_add->U.I.SrcReg[1])) {
+
+ return 0;
+ }
+
+ /* Setup the peephole_state information. */
+ s.Inst = inst_add;
+ s.WriteMask = inst_add->U.I.DstReg.WriteMask;
+
+ if (presub_helper(c, &s, RC_PRESUB_INV, presub_replace_inv)) {
rc_remove_instruction(inst_add);
return 1;
}
@@ -660,6 +748,8 @@ static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
if (c->has_presub) {
if(peephole_add_presub_inv(c, inst))
return 1;
+ if(peephole_add_presub_add(c, inst))
+ return 1;
}
break;
default:
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
index 32c54fd74bc..5269d659851 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
@@ -300,6 +300,7 @@ static int destructive_merge_instructions(
for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
unsigned int arg;
int free_source;
+ unsigned int one_way = 0;
struct radeon_pair_instruction_source srcp =
alpha->RGB.Src[srcp_src];
struct radeon_pair_instruction_source temp;
@@ -307,14 +308,27 @@ static int destructive_merge_instructions(
* 3rd arg of 0 means this is not an alpha source. */
free_source = rc_pair_alloc_source(rgb, 1, 0,
srcp.File, srcp.Index);
- /* If free_source == srcp_src, then either the
- * presubtract source is already in the correct place. */
- if (free_source == srcp_src)
- continue;
/* If free_source < 0 then there are no free source
* slots. */
if (free_source < 0)
return 0;
+
+ temp = rgb->RGB.Src[srcp_src];
+ rgb->RGB.Src[srcp_src] = rgb->RGB.Src[free_source];
+ /* srcp needs src0 and src1 to be the same */
+ if (free_source < srcp_src) {
+ if (!temp.Used)
+ continue;
+ free_source = rc_pair_alloc_source(rgb, 1, 0,
+ srcp.File, srcp.Index);
+ one_way = 1;
+ } else {
+ rgb->RGB.Src[free_source] = temp;
+ }
+ /* If free_source == srcp_src, then the presubtract
+ * source is already in the correct place. */
+ if (free_source == srcp_src)
+ continue;
/* Shuffle the sources, so we can put the
* presubtract source in the correct place. */
for (arg = 0; arg < rgb_info->NumSrcRegs; arg++) {
@@ -331,12 +345,11 @@ static int destructive_merge_instructions(
/* We need to do this just in case register
* is one of the sources already, but in the
* wrong spot. */
- else if(rgb->RGB.Arg[arg].Source == free_source)
+ else if(rgb->RGB.Arg[arg].Source == free_source
+ && !one_way) {
rgb->RGB.Arg[arg].Source = srcp_src;
+ }
}
- temp = rgb->RGB.Src[srcp_src];
- rgb->RGB.Src[srcp_src] = rgb->RGB.Src[free_source];
- rgb->RGB.Src[free_source] = temp;
}
}
@@ -352,6 +365,7 @@ static int destructive_merge_instructions(
for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
unsigned int arg;
int free_source;
+ unsigned int one_way = 0;
struct radeon_pair_instruction_source srcp =
alpha->Alpha.Src[srcp_src];
struct radeon_pair_instruction_source temp;
@@ -359,14 +373,27 @@ static int destructive_merge_instructions(
* 3rd arg of 1 means this is an alpha source. */
free_source = rc_pair_alloc_source(rgb, 0, 1,
srcp.File, srcp.Index);
- /* If free_source == srcp_src, then either the
- * presubtract source is already in the correct place. */
- if (free_source == srcp_src)
- continue;
/* If free_source < 0 then there are no free source
* slots. */
if (free_source < 0)
return 0;
+
+ temp = rgb->Alpha.Src[srcp_src];
+ rgb->Alpha.Src[srcp_src] = rgb->Alpha.Src[free_source];
+ /* srcp needs src0 and src1 to be the same. */
+ if (free_source < srcp_src) {
+ if (!temp.Used)
+ continue;
+ free_source = rc_pair_alloc_source(rgb, 0, 1,
+ temp.File, temp.Index);
+ one_way = 1;
+ } else {
+ rgb->Alpha.Src[free_source] = temp;
+ }
+ /* If free_source == srcp_src, then the presubtract
+ * source is already in the correct place. */
+ if (free_source == srcp_src)
+ continue;
/* Shuffle the sources, so we can put the
* presubtract source in the correct place. */
for(arg = 0; arg < rgb_info->NumSrcRegs; arg++) {
@@ -380,12 +407,11 @@ static int destructive_merge_instructions(
}
if (rgb->RGB.Arg[arg].Source == srcp_src)
rgb->RGB.Arg[arg].Source = free_source;
- else if (rgb->RGB.Arg[arg].Source == free_source)
+ else if (rgb->RGB.Arg[arg].Source == free_source
+ && !one_way) {
rgb->RGB.Arg[arg].Source = srcp_src;
+ }
}
- temp = rgb->Alpha.Src[srcp_src];
- rgb->Alpha.Src[srcp_src] = rgb->Alpha.Src[free_source];
- rgb->Alpha.Src[free_source] = temp;
}
}