aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers
diff options
context:
space:
mode:
authorTom Stellard <[email protected]>2010-08-05 10:19:00 -0700
committerTom Stellard <[email protected]>2010-08-10 13:17:25 -0700
commitc298bab60ea63882f34825a35cbc60f662783e64 (patch)
tree2a579f6da8146bd34fd9e5ec9090c76604d46dbb /src/mesa/drivers
parent5c7b7da798b47126d9cc887b0df4b2c24ad9176b (diff)
r300/compiler: Implement hardware assisted loops for vertex shaders.
Single loops work, but nested loops do not.
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c115
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c21
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.h19
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.h1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c8
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h3
-rw-r--r--src/mesa/drivers/dri/r300/r300_reg.h21
8 files changed, 170 insertions, 20 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index c6246a81a24..d2fa816894c 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -113,7 +113,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
debug_program_log(c, "after unroll loops");
}
else{
- rc_transform_loops(&c->Base, &loop_state);
+ rc_transform_loops(&c->Base, &loop_state, -1);
debug_program_log(c, "after transform loops");
rc_emulate_branches(&c->Base);
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index e940fedec20..7c2ba2fc092 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -32,6 +32,11 @@
#include "radeon_emulate_branches.h"
#include "radeon_emulate_loops.h"
+struct loop {
+ int BgnLoop;
+
+};
+
/*
* Take an already-setup and valid source then swizzle it appropriately to
* obtain a constant ZERO or ONE source.
@@ -337,6 +342,10 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
{
struct rc_instruction *rci;
+ struct loop * loops;
+ int current_loop_depth = 0;
+ int loops_reserved = 0;
+
compiler->code->pos_end = 0; /* Not supported yet */
compiler->code->length = 0;
@@ -385,6 +394,68 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break;
case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
+ case RC_OPCODE_BGNLOOP:
+ {
+ struct loop * l;
+
+ if ((!compiler->Base.is_r500
+ && loops_reserved >= R300_VS_MAX_LOOP_DEPTH)
+ || loops_reserved >= R500_VS_MAX_FC_DEPTH) {
+ rc_error(&compiler->Base,
+ "Loops are nested too deep.");
+ return;
+ }
+ memory_pool_array_reserve(&compiler->Base.Pool,
+ struct loop, loops, current_loop_depth,
+ loops_reserved, 1);
+ l = &loops[current_loop_depth++];
+ memset(l , 0, sizeof(struct loop));
+ l->BgnLoop = (compiler->code->length / 4);
+ continue;
+ }
+ case RC_OPCODE_ENDLOOP:
+ {
+ struct loop * l = &loops[current_loop_depth - 1];
+ unsigned int act_addr = l->BgnLoop - 1;
+ unsigned int last_addr = (compiler->code->length / 4) - 1;
+ unsigned int ret_addr = l->BgnLoop;
+
+ if (loops_reserved >= R300_VS_MAX_FC_OPS) {
+ rc_error(&compiler->Base,
+ "Too many flow control instructions.");
+ return;
+ }
+ if (compiler->Base.is_r500) {
+ compiler->code->fc_op_addrs.r500
+ [compiler->code->num_fc_ops].lw =
+ R500_PVS_FC_ACT_ADRS(act_addr)
+ | R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff)
+ ;
+ compiler->code->fc_op_addrs.r500
+ [compiler->code->num_fc_ops].uw =
+ R500_PVS_FC_LAST_INST(last_addr)
+ | R500_PVS_FC_RTN_INST(ret_addr)
+ ;
+ } else {
+ compiler->code->fc_op_addrs.r300
+ [compiler->code->num_fc_ops] =
+ R300_PVS_FC_ACT_ADRS(act_addr)
+ | R300_PVS_FC_LOOP_CNT_JMP_INST(0xff)
+ | R300_PVS_FC_LAST_INST(last_addr)
+ | R300_PVS_FC_RTN_INST(ret_addr)
+ ;
+ }
+ compiler->code->fc_loop_index[compiler->code->num_fc_ops] =
+ R300_PVS_FC_LOOP_INIT_VAL(0x0)
+ | R300_PVS_FC_LOOP_STEP_VAL(0x1)
+ ;
+ compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP(
+ compiler->code->num_fc_ops);
+ compiler->code->num_fc_ops++;
+ current_loop_depth--;
+ continue;
+ }
+
default:
rc_error(&compiler->Base, "Unknown opcode %s\n", rc_get_opcode_info(vpi->Opcode)->Name);
return;
@@ -406,6 +477,7 @@ struct temporary_allocation {
static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
{
struct rc_instruction *inst;
+ struct rc_instruction *end_loop = NULL;
unsigned int num_orig_temps = 0;
char hwtemps[R300_VS_MAX_TEMPS];
struct temporary_allocation * ta;
@@ -440,10 +512,35 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
/* Pass 2: Determine original temporary lifetimes */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ /* Instructions inside of loops need to use the ENDLOOP
+ * instruction as their LastRead. */
+ if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ int endloops = 1;
+ struct rc_instruction * ptr;
+ for(ptr = inst->Next;
+ ptr != &compiler->Base.Program.Instructions;
+ ptr = ptr->Next){
+ if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ endloops++;
+ } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
+ endloops--;
+ if (endloops <= 0) {
+ end_loop = ptr;
+ break;
+ }
+ }
+ }
+ }
+
+ if (inst == end_loop) {
+ end_loop = NULL;
+ continue;
+ }
for (i = 0; i < opcode->NumSrcRegs; ++i) {
if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY)
- ta[inst->U.I.SrcReg[i].Index].LastRead = inst;
+ ta[inst->U.I.SrcReg[i].Index].LastRead =
+ end_loop ? end_loop : inst;
}
}
@@ -640,17 +737,11 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
debug_program_log(compiler, "before compilation");
- /* XXX Ideally this should be done only for r3xx, but since
- * we don't have branching support for r5xx, we use the emulation
- * on all chipsets. */
+ if (compiler->Base.is_r500)
+ rc_transform_loops(&compiler->Base, &loop_state, R500_VS_MAX_ALU);
+ else
+ rc_transform_loops(&compiler->Base, &loop_state, R300_VS_MAX_ALU);
- if (compiler->Base.is_r500){
- rc_transform_loops(&compiler->Base, &loop_state);
- rc_emulate_loops(&loop_state, R500_VS_MAX_ALU);
- } else {
- rc_transform_loops(&compiler->Base, &loop_state);
- rc_emulate_loops(&loop_state, R300_VS_MAX_ALU);
- }
debug_program_log(compiler, "after emulate loops");
rc_emulate_branches(&compiler->Base);
@@ -717,6 +808,6 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
if (compiler->Base.Debug) {
fprintf(stderr, "Final vertex program code:\n");
- r300_vertex_program_dump(compiler->code);
+ r300_vertex_program_dump(compiler);
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
index 5800f1a78e1..66e352d05dc 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
@@ -20,6 +20,7 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
+#include "radeon_compiler.h"
#include "radeon_code.h"
#include <stdio.h>
@@ -160,8 +161,9 @@ static void r300_vs_src_dump(uint32_t src)
r300_vs_swiz_debug[(src >> 22) & 0x7]);
}
-void r300_vertex_program_dump(struct r300_vertex_program_code * vs)
+void r300_vertex_program_dump(struct r300_vertex_program_compiler * c)
{
+ struct r300_vertex_program_code * vs = c->code;
unsigned instrcount = vs->length / 4;
unsigned i;
@@ -177,4 +179,21 @@ void r300_vertex_program_dump(struct r300_vertex_program_code * vs)
r300_vs_src_dump(vs->body.d[offset+1+src]);
}
}
+
+ fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops);
+ for(i = 0; i < vs->num_fc_ops; i++) {
+ switch((vs->fc_ops >> (i * 2)) & 0x3 ) {
+ case 0: fprintf(stderr, "NOP"); break;
+ case 1: fprintf(stderr, "JUMP"); break;
+ case 2: fprintf(stderr, "LOOP"); break;
+ case 3: fprintf(stderr, "JSR"); break;
+ }
+ if (c->Base.is_r500) {
+ fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x\n",
+ vs->fc_op_addrs.r500[i].uw,
+ vs->fc_op_addrs.r500[i].lw);
+ } else {
+ fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]);
+ }
+ }
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index e14a3520dd3..896246d2035 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -243,6 +243,12 @@ struct rX00_fragment_program_code {
#define R500_VS_MAX_ALU 1024
#define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4)
#define R300_VS_MAX_TEMPS 32
+/* This is the max for all chipsets (r300-r500) */
+#define R300_VS_MAX_FC_OPS 16
+/* The r500 maximum depth is not just for loops, but any combination of loops
+ * and subroutine jumps. */
+#define R500_VS_MAX_FC_DEPTH 8
+#define R300_VS_MAX_LOOP_DEPTH 1
#define VSF_MAX_INPUTS 32
#define VSF_MAX_OUTPUTS 32
@@ -263,9 +269,18 @@ struct r300_vertex_program_code {
uint32_t InputsRead;
uint32_t OutputsWritten;
-};
-void r300_vertex_program_dump(struct r300_vertex_program_code * vs);
+ unsigned int num_fc_ops;
+ uint32_t fc_ops;
+ union {
+ uint32_t r300[R300_VS_MAX_FC_OPS];
+ struct {
+ uint32_t lw;
+ uint32_t uw;
+ } r500[R300_VS_MAX_FC_OPS];
+ } fc_op_addrs;
+ int32_t fc_loop_index[R300_VS_MAX_FC_OPS];
+};
#endif /* RADEON_CODE_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index f15905d79d4..bbd57cca63f 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -113,5 +113,6 @@ struct r300_vertex_program_compiler {
};
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
+void r300_vertex_program_dump(struct r300_vertex_program_compiler * c);
#endif /* RADEON_COMPILER_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
index 2a3306f9065..32d4b45dd6d 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
@@ -423,7 +423,8 @@ static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop,
* @param inst A pointer to a BGNLOOP instruction.
* @return 1 for success, 0 for failure
*/
-int transform_loop(struct emulate_loop_state * s, struct rc_instruction * inst)
+static int transform_loop(struct emulate_loop_state * s,
+ struct rc_instruction * inst)
{
struct loop_info * loop;
@@ -435,7 +436,7 @@ int transform_loop(struct emulate_loop_state * s, struct rc_instruction * inst)
if (!build_loop_info(s->C, loop, inst))
return 0;
- if(try_unroll_loop(s->C, loop, -1)){
+ if(try_unroll_loop(s->C, loop, s->prog_inst_limit)){
return 1;
}
@@ -472,12 +473,13 @@ int transform_loop(struct emulate_loop_state * s, struct rc_instruction * inst)
}
void rc_transform_loops(struct radeon_compiler *c,
- struct emulate_loop_state * s)
+ struct emulate_loop_state * s, int prog_inst_limit)
{
struct rc_instruction * ptr;
memset(s, 0, sizeof(struct emulate_loop_state));
s->C = c;
+ s->prog_inst_limit = prog_inst_limit;
for(ptr = s->C->Program.Instructions.Next;
ptr != &s->C->Program.Instructions; ptr = ptr->Next) {
if(ptr->Type == RC_INSTRUCTION_NORMAL &&
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
index 86d91ef14bd..bba1f68e308 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
@@ -21,10 +21,11 @@ struct emulate_loop_state {
struct loop_info * Loops;
unsigned int LoopCount;
unsigned int LoopReserved;
+ int prog_inst_limit;
};
void rc_transform_loops(struct radeon_compiler *c,
- struct emulate_loop_state * s);
+ struct emulate_loop_state * s, int prog_inst_limit);
void rc_unroll_loops(struct radeon_compiler * c, int prog_inst_limit);
diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
index f25264b6f2d..f7705b0f6fe 100644
--- a/src/mesa/drivers/dri/r300/r300_reg.h
+++ b/src/mesa/drivers/dri/r300/r300_reg.h
@@ -441,6 +441,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228
#define R300_VAP_GB_HORZ_DISC_ADJ 0x222c
+#define R300_VAP_PVS_FLOW_CNTL_ADDRS_0 0x2230
+#define R300_PVS_FC_ACT_ADRS(x) ((x) << 0)
+#define R300_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 8)
+#define R300_PVS_FC_LAST_INST(x) ((x) << 16)
+#define R300_PVS_FC_RTN_INST(x) ((x) << 24)
+
/* gap */
/* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between
@@ -459,6 +465,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_2288_R300 0x00750000 /* -- nh */
# define R300_2288_RV350 0x0000FFFF /* -- Vladimir */
+#define R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0 0x2290
+#define R300_PVS_FC_LOOP_INIT_VAL(x) ((x) << 0)
+#define R300_PVS_FC_LOOP_STEP_VAL(x) ((x) << 8)
+
/* gap */
/* Addresses are relative to the vertex program instruction area of the
@@ -489,6 +499,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R300_VAP_PVS_CODE_CNTL_1 0x22D8
# define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0
#define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC
+#define R300_VAP_PVS_FC_OPC_JUMP(x) (1 << (2 * (x)))
+#define R300_VAP_PVS_FC_OPC_LOOP(x) (2 << (2 * (x)))
+#define R300_VAP_PVS_FC_OPC_JSR(x) (3 << (2 * (x)))
/* The entire range from 0x2300 to 0x2AC inclusive seems to be used for
* immediate vertices
@@ -505,6 +518,14 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* write 0 to indicate end of packet? */
#define R300_VAP_VTX_END_OF_PKT 0x24AC
+#define R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0 0x2500
+#define R500_PVS_FC_ACT_ADRS(x) ((x) << 0)
+#define R500_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 16)
+
+#define R500_VAP_PVS_FLOW_CNTL_ADDRS_UW_0 0x2504
+#define R500_PVS_FC_LAST_INST(x) ((x) << 0)
+#define R500_PVS_FC_RTN_INST(x) ((x) << 16)
+
/* gap */
/* These are values from r300_reg/r300_reg.h - they are known to be correct