summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
authorTom Stellard <[email protected]>2010-05-27 17:14:51 -0700
committerMarek Olšák <[email protected]>2010-06-11 22:06:58 +0200
commit622fd4d061678027d5de2c84d1c07370830c4264 (patch)
tree15eb6911bed9f7be4f66b0245962c213c9b47b9a /src/mesa
parent108264e859b4f435e9608472dc2e388aa200183c (diff)
r300/compiler: Implement simple loop emulation
The loop emulation unrolls loops as may times as possbile while still keeping the shader program below the maximum instruction limit. At this point, there are no checks for constant conditionals. This is only enabled for fragment shaders.
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/r300/compiler/Makefile1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c10
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c183
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h12
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c18
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h6
6 files changed, 230 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
index 34d22b45591..ff3801dc676 100644
--- a/src/mesa/drivers/dri/r300/compiler/Makefile
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -9,6 +9,7 @@ C_SOURCES = \
radeon_code.c \
radeon_compiler.c \
radeon_emulate_branches.c \
+ radeon_emulate_loops.c \
radeon_program.c \
radeon_program_print.c \
radeon_opcodes.c \
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 7f3b88ed759..38312658d65 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -26,6 +26,7 @@
#include "radeon_dataflow.h"
#include "radeon_emulate_branches.h"
+#include "radeon_emulate_loops.h"
#include "radeon_program_alu.h"
#include "radeon_program_tex.h"
#include "r300_fragprog.h"
@@ -103,6 +104,15 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
/* XXX Ideally this should be done only for r3xx, but since
* we don't have branching support for r5xx, we use the emulation
* on all chipsets. */
+
+ if(c->Base.is_r500){
+ rc_emulate_loops(&c->Base, R500_PFS_MAX_INST);
+ }
+ else{
+ rc_emulate_loops(&c->Base, R300_PFS_MAX_ALU_INST);
+ }
+ debug_program_log(c, "after emulate loops");
+
rc_emulate_branches(&c->Base);
debug_program_log(c, "after emulate branches");
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
new file mode 100644
index 00000000000..b05ba08e13d
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright 2010 Tom Stellard <[email protected]>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ */
+
+#include "radeon_emulate_loops.h"
+
+#include "radeon_compiler.h"
+
+struct emulate_loop_state {
+ struct radeon_compiler * C;
+ struct loop_info * Loops;
+ unsigned int LoopCount;
+ unsigned int LoopReserved;
+};
+
+struct loop_info {
+ struct rc_instruction * BeginLoop;
+ struct rc_instruction * EndLoop;
+};
+
+static unsigned int loop_count_instructions(struct loop_info * loop)
+{
+ unsigned int count = 0;
+ struct rc_instruction * inst = loop->BeginLoop->Next;
+ while(inst != loop->EndLoop){
+ count++;
+ inst = inst->Next;
+ }
+ return count;
+}
+
+static unsigned int loop_calc_iterations(struct loop_info * loop,
+ unsigned int loop_count, unsigned int max_instructions)
+{
+ unsigned int icount = loop_count_instructions(loop);
+ return max_instructions / (loop_count * icount);
+}
+
+static void loop_unroll(struct emulate_loop_state * s,
+ struct loop_info *loop, unsigned int iterations)
+{
+ unsigned int i;
+ struct rc_instruction * ptr;
+ struct rc_instruction * first = loop->BeginLoop->Next;
+ struct rc_instruction * last = loop->EndLoop->Prev;
+ struct rc_instruction * append_to = last;
+ rc_remove_instruction(loop->BeginLoop);
+ rc_remove_instruction(loop->EndLoop);
+ for( i = 1; i < iterations; i++){
+ for(ptr = first; ptr != last->Next; ptr = ptr->Next){
+ struct rc_instruction *new = rc_alloc_instruction(s->C);
+ memcpy(new, ptr, sizeof(struct rc_instruction));
+ rc_insert_instruction(append_to, new);
+ append_to = new;
+ }
+ }
+}
+
+/**
+ * This function prepares a loop to be unrolled by converting it into an if
+ * statement. Here is an outline of the conversion process:
+ * BGNLOOP; -> BGNLOOP;
+ * SGE temp[0], temp[1], temp[2]; -> SLT temp[0], temp[1], temp[2];
+ * IF temp[0]; -> IF temp[0];
+ * BRK; ->
+ * ENDIF; -> <Loop Body>
+ * <Loop Body> -> ENDIF;
+ * ENDLOOP; -> ENDLOOP
+ *
+ * @param inst Pointer to a BGNLOOP instruction.
+ */
+static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
+ struct rc_instruction * inst)
+{
+ struct loop_info *loop;
+ struct rc_instruction * ptr;
+
+ memory_pool_array_reserve(&s->C->Pool, struct loop_info,
+ s->Loops, s->LoopCount, s->LoopReserved, 1);
+
+ loop = &s->Loops[s->LoopCount++];
+ memset(loop, 0, sizeof(struct loop_info));
+
+ loop->BeginLoop = inst;
+ /* Reverse the SGE instruction */
+ ptr = inst->Next;
+ ptr->U.I.Opcode = RC_OPCODE_SLT;
+ while(!loop->EndLoop){
+ struct rc_instruction * endif;
+ if(ptr->Type == RC_INSTRUCTION_NORMAL){
+ }
+ switch(ptr->U.I.Opcode){
+ case RC_OPCODE_BGNLOOP:
+ /* Nested loop */
+ ptr = transform_loop(s, ptr);
+ break;
+ case RC_OPCODE_BRK:
+ /* The BRK instruction should always be followed by
+ * an ENDIF. This ENDIF will eventually replace the
+ * ENDLOOP insruction. */
+ endif = ptr->Next;
+ rc_remove_instruction(ptr);
+ rc_remove_instruction(endif);
+ break;
+ case RC_OPCODE_ENDLOOP:
+ /* Insert the ENDIF before ENDLOOP. */
+ rc_insert_instruction(ptr->Prev, endif);
+ loop->EndLoop = ptr;
+ break;
+ }
+ ptr = ptr->Next;
+ }
+ return ptr;
+}
+
+static void rc_transform_loops(struct emulate_loop_state * s)
+{
+ struct rc_instruction * ptr = s->C->Program.Instructions.Next;
+ while(ptr != &s->C->Program.Instructions) {
+ if(ptr->Type == RC_INSTRUCTION_NORMAL &&
+ ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
+ ptr = transform_loop(s, ptr);
+ }
+ ptr = ptr->Next;
+ }
+}
+
+static void rc_unroll_loops(struct emulate_loop_state *s,
+ unsigned int max_instructions)
+{
+ int i;
+ /* Iterate backwards of the list of loops so that loops that nested
+ * loops are unrolled first.
+ */
+ for( i = s->LoopCount - 1; i >= 0; i-- ){
+ unsigned int iterations = loop_calc_iterations(&s->Loops[i],
+ s->LoopCount, max_instructions);
+ loop_unroll(s, &s->Loops[i], iterations);
+ }
+}
+
+void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions)
+{
+ struct emulate_loop_state s;
+
+ memset(&s, 0, sizeof(struct emulate_loop_state));
+ s.C = c;
+
+ /* We may need to move these two operations to r3xx_(vert|frag)prog.c
+ * and run the optimization passes between them in order to increase
+ * the number of unrolls we can do for each loop.
+ */
+ rc_transform_loops(&s);
+
+ rc_unroll_loops(&s, max_instructions);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
new file mode 100644
index 00000000000..ddcf1c0fabe
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
@@ -0,0 +1,12 @@
+
+
+#ifndef RADEON_EMULATE_LOOPS_H
+#define RADEON_EMULATE_LOOPS_H
+
+#define MAX_ITERATIONS 8
+
+struct radeon_compiler;
+
+void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions);
+
+#endif /* RADEON_EMULATE_LOOPS_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
index d593b3e81ae..1dc16855dc1 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -368,6 +368,24 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.NumSrcRegs = 0
},
{
+ .Opcode = RC_OPCODE_BGNLOOP,
+ .Name = "BGNLOOP",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0
+ },
+ {
+ .Opcode = RC_OPCODE_BRK,
+ .Name = "BRK",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0
+ },
+ {
+ .Opcode = RC_OPCODE_ENDLOOP,
+ .Name = "ENDLOOP",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0,
+ },
+ {
.Opcode = RC_OPCODE_REPL_ALPHA,
.Name = "REPL_ALPHA",
.HasDstReg = 1
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
index 87a2e23084c..91c82ac0890 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -180,6 +180,12 @@ typedef enum {
/** branch instruction: has no effect */
RC_OPCODE_ENDIF,
+
+ RC_OPCODE_BGNLOOP,
+
+ RC_OPCODE_BRK,
+
+ RC_OPCODE_ENDLOOP,
/** special instruction, used in R300-R500 fragment program pair instructions
* indicates that the result of the alpha operation shall be replicated