summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/r300/compiler
diff options
context:
space:
mode:
authorTom Stellard <[email protected]>2011-05-14 21:47:26 -0700
committerTom Stellard <[email protected]>2011-05-14 22:35:28 -0700
commit6d539579add0a9d3017f441d9fad5d4cd3ae7bb9 (patch)
treebdb1cc77fd7d0e13188ed358a744d72e86f8c5ea /src/mesa/drivers/dri/r300/compiler
parent8a4136f6246dc25f5b4327386a0d7972c90cae82 (diff)
r300/compiler: Use ALU Result for IF conditionals
This saves one instruction per IF.
Diffstat (limited to 'src/mesa/drivers/dri/r300/compiler')
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c7
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.c152
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.h2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c5
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_variable.c61
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_variable.h5
6 files changed, 212 insertions, 20 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index e2441e97d87..bb6c010e8e3 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -109,8 +109,12 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{ 0, 0 }
};
- struct radeon_program_transformation native_rewrite_r500[] = {
+ struct radeon_program_transformation rewrite_if[] = {
{ &r500_transform_IF, 0 },
+ {0, 0}
+ };
+
+ struct radeon_program_transformation native_rewrite_r500[] = {
{ &radeonTransformALU, 0 },
{ &radeonTransformDeriv, 0 },
{ &radeonTransformTrigScale, 0 },
@@ -135,6 +139,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{"emulate branches", 1, !is_r500, rc_emulate_branches, NULL},
{"saturate output writes", 1, sat_out, rc_local_transform, saturate_output},
{"transform TEX", 1, 1, rc_local_transform, rewrite_tex},
+ {"transform IF", 1, is_r500, rc_local_transform, rewrite_if},
{"native rewrite", 1, is_r500, rc_local_transform, native_rewrite_r500},
{"native rewrite", 1, !is_r500, rc_local_transform, native_rewrite_r300},
{"deadcode", 1, opt, rc_dataflow_deadcode, dataflow_outputs_mark_use},
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index 5e0be6b8881..cf99f5e4538 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -30,6 +30,8 @@
#include <stdio.h>
#include "radeon_compiler_util.h"
+#include "radeon_list.h"
+#include "radeon_variable.h"
#include "../r300_reg.h"
/**
@@ -37,27 +39,143 @@
*/
int r500_transform_IF(
struct radeon_compiler * c,
- struct rc_instruction * inst,
- void* data)
+ struct rc_instruction * inst_if,
+ void *data)
{
- struct rc_instruction * inst_mov;
+ struct rc_variable * writer;
+ struct rc_list * writer_list, * list_ptr;
+ struct rc_list * var_list = rc_get_variables(c);
+ unsigned int generic_if = 0;
+ unsigned int alu_chan;
- if (inst->U.I.Opcode != RC_OPCODE_IF)
+ if (inst_if->U.I.Opcode != RC_OPCODE_IF) {
return 0;
+ }
+
+ writer_list = rc_variable_list_get_writers(
+ var_list, inst_if->Type, &inst_if->U.I.SrcReg[0]);
+ if (!writer_list) {
+ generic_if = 1;
+ } else {
+
+ /* Make sure it is safe for the writers to write to
+ * ALU Result */
+ for (list_ptr = writer_list; list_ptr;
+ list_ptr = list_ptr->Next) {
+ struct rc_instruction * inst;
+ writer = list_ptr->Item;
+ /* We are going to modify the destination register
+ * of writer, so if it has a reader other than
+ * inst_if (aka ReaderCount > 1) we must fall back to
+ * our generic IF.
+ * If the writer has a lower IP than inst_if, this
+ * means that inst_if is above the writer in a loop.
+ * I'm not sure why this would ever happen, but
+ * if it does we want to make sure we fall back
+ * to our generic IF. */
+ if (writer->ReaderCount > 1 || writer->Inst->IP < inst_if->IP) {
+ generic_if = 1;
+ break;
+ }
+
+ /* The ALU Result is not preserved across IF
+ * instructions, so if there is another IF
+ * instruction between writer and inst_if, then
+ * we need to fall back to generic IF. */
+ for (inst = writer->Inst; inst != inst_if; inst = inst->Next) {
+ const struct rc_opcode_info * info =
+ rc_get_opcode_info(inst->U.I.Opcode);
+ if (info->IsFlowControl) {
+ generic_if = 1;
+ break;
+ }
+ }
+ if (generic_if) {
+ break;
+ }
+ }
+ }
+
+ if (GET_SWZ(inst_if->U.I.SrcReg[0].Swizzle, 0) == RC_SWIZZLE_X) {
+ alu_chan = RC_ALURESULT_X;
+ } else {
+ alu_chan = RC_ALURESULT_W;
+ }
+ if (generic_if) {
+ struct rc_instruction * inst_mov =
+ rc_insert_new_instruction(c, inst_if->Prev);
+
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.WriteMask = 0;
+ inst_mov->U.I.DstReg.File = RC_FILE_NONE;
+ inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL;
+ inst_mov->U.I.WriteALUResult = alu_chan;
+ inst_mov->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0];
+ if (alu_chan == RC_ALURESULT_X) {
+ inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(
+ inst_mov->U.I.SrcReg[0].Swizzle,
+ RC_SWIZZLE_X, RC_SWIZZLE_UNUSED,
+ RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED);
+ } else {
+ inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(
+ inst_mov->U.I.SrcReg[0].Swizzle,
+ RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
+ RC_SWIZZLE_UNUSED, RC_SWIZZLE_Z);
+ }
+ } else {
+ rc_compare_func compare_func = RC_COMPARE_FUNC_NEVER;
+ unsigned int reverse_srcs = 0;
+ unsigned int preserve_opcode = 0;
+ for (list_ptr = writer_list; list_ptr;
+ list_ptr = list_ptr->Next) {
+ writer = list_ptr->Item;
+ switch(writer->Inst->U.I.Opcode) {
+ case RC_OPCODE_SEQ:
+ compare_func = RC_COMPARE_FUNC_EQUAL;
+ break;
+ case RC_OPCODE_SNE:
+ compare_func = RC_COMPARE_FUNC_NOTEQUAL;
+ break;
+ case RC_OPCODE_SLE:
+ reverse_srcs = 1;
+ /* Fall through */
+ case RC_OPCODE_SGE:
+ compare_func = RC_COMPARE_FUNC_GEQUAL;
+ break;
+ case RC_OPCODE_SGT:
+ reverse_srcs = 1;
+ /* Fall through */
+ case RC_OPCODE_SLT:
+ compare_func = RC_COMPARE_FUNC_LESS;
+ break;
+ default:
+ compare_func = RC_COMPARE_FUNC_NOTEQUAL;
+ preserve_opcode = 1;
+ break;
+ }
+ if (!preserve_opcode) {
+ writer->Inst->U.I.Opcode = RC_OPCODE_SUB;
+ }
+ writer->Inst->U.I.DstReg.WriteMask = 0;
+ writer->Inst->U.I.DstReg.File = RC_FILE_NONE;
+ writer->Inst->U.I.WriteALUResult = alu_chan;
+ writer->Inst->U.I.ALUResultCompare = compare_func;
+ if (reverse_srcs) {
+ struct rc_src_register temp_src;
+ temp_src = writer->Inst->U.I.SrcReg[0];
+ writer->Inst->U.I.SrcReg[0] =
+ writer->Inst->U.I.SrcReg[1];
+ writer->Inst->U.I.SrcReg[1] = temp_src;
+ }
+ }
+ }
- inst_mov = rc_insert_new_instruction(c, inst->Prev);
- inst_mov->U.I.Opcode = RC_OPCODE_MOV;
- inst_mov->U.I.DstReg.WriteMask = 0;
- inst_mov->U.I.WriteALUResult = RC_ALURESULT_W;
- inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL;
- inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
- inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(inst_mov->U.I.SrcReg[0].Swizzle,
- RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_X);
-
- inst->U.I.SrcReg[0].File = RC_FILE_SPECIAL;
- inst->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT;
- inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
- inst->U.I.SrcReg[0].Negate = 0;
+ inst_if->U.I.SrcReg[0].File = RC_FILE_SPECIAL;
+ inst_if->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT;
+ inst_if->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE(
+ RC_SWIZZLE_X, RC_SWIZZLE_UNUSED,
+ RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED);
+ inst_if->U.I.SrcReg[0].Negate = 0;
return 1;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
index 1e665e27641..6aa448cc6f7 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
@@ -44,7 +44,7 @@ extern struct rc_swizzle_caps r500_swizzle_caps;
extern int r500_transform_IF(
struct radeon_compiler * c,
- struct rc_instruction * inst,
+ struct rc_instruction * inst_if,
void* data);
#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index 53ab5fbbbd9..ac73608839e 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -517,8 +517,11 @@ static int is_presub_candidate(
assert(inst->U.I.Opcode == RC_OPCODE_ADD);
- if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE || inst->U.I.SaturateMode)
+ if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
+ || inst->U.I.SaturateMode
+ || inst->U.I.WriteALUResult) {
return 0;
+ }
/* If both sources use a constant swizzle, then we can't convert it to
* a presubtract operation. In fact for the ADD and SUB presubtract
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_variable.c b/src/mesa/drivers/dri/r300/compiler/radeon_variable.c
index 5b2295dc7ce..33181bdcc88 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_variable.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_variable.c
@@ -469,6 +469,67 @@ struct rc_list * rc_variable_readers_union(struct rc_variable * var)
return list;
}
+static unsigned int reader_equals_src(
+ struct rc_reader reader,
+ unsigned int src_type,
+ void * src)
+{
+ if (reader.Inst->Type != src_type) {
+ return 0;
+ }
+ if (src_type == RC_INSTRUCTION_NORMAL) {
+ return reader.U.I.Src == src;
+ } else {
+ return reader.U.P.Src == src;
+ }
+}
+
+static unsigned int variable_writes_src(
+ struct rc_variable * var,
+ unsigned int src_type,
+ void * src)
+{
+ unsigned int i;
+ for (i = 0; i < var->ReaderCount; i++) {
+ if (reader_equals_src(var->Readers[i], src_type, src)) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
+struct rc_list * rc_variable_list_get_writers(
+ struct rc_list * var_list,
+ unsigned int src_type,
+ void * src)
+{
+ struct rc_list * list_ptr;
+ struct rc_list * writer_list = NULL;
+ for (list_ptr = var_list; list_ptr; list_ptr = list_ptr->Next) {
+ struct rc_variable * var = list_ptr->Item;
+ if (variable_writes_src(var, src_type, src)) {
+ struct rc_variable * friend;
+ rc_list_add(&writer_list, rc_list(&var->C->Pool, var));
+ for (friend = var->Friend; friend;
+ friend = friend->Friend) {
+ if (variable_writes_src(friend, src_type, src)) {
+ rc_list_add(&writer_list,
+ rc_list(&var->C->Pool, friend));
+ }
+ }
+ /* Once we have indentifed the variable and its
+ * friends that write this source, we can stop
+ * stop searching, because we know know of the
+ * other variables in the list will write this source.
+ * If they did they would be friends of var.
+ */
+ break;
+ }
+ }
+ return writer_list;
+}
+
void rc_variable_print(struct rc_variable * var)
{
unsigned int i;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_variable.h b/src/mesa/drivers/dri/r300/compiler/radeon_variable.h
index b8fbcaa4029..9427bee18a7 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_variable.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_variable.h
@@ -79,6 +79,11 @@ unsigned int rc_variable_writemask_sum(struct rc_variable * var);
struct rc_list * rc_variable_readers_union(struct rc_variable * var);
+struct rc_list * rc_variable_list_get_writers(
+ struct rc_list * var_list,
+ unsigned int src_type,
+ void * src);
+
void rc_variable_print(struct rc_variable * var);
#endif /* RADEON_VARIABLE_H */