aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/r300/compiler/radeon_vert_fc.c
diff options
context:
space:
mode:
authorTom Stellard <[email protected]>2011-09-20 21:05:55 -0700
committerTom Stellard <[email protected]>2012-04-13 22:24:16 -0400
commitb2df031a959f36743527b9abc89913ce4f895de3 (patch)
treeadf844d0e5fd6708f78c2c4422f367cb4c9df75d /src/gallium/drivers/r300/compiler/radeon_vert_fc.c
parent4a269a8dc0170c75ff22af3910786228727ea41e (diff)
r300/compiler: Fix nested flow control in r500 vertex shaders
Diffstat (limited to 'src/gallium/drivers/r300/compiler/radeon_vert_fc.c')
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_vert_fc.c274
1 files changed, 274 insertions, 0 deletions
diff --git a/src/gallium/drivers/r300/compiler/radeon_vert_fc.c b/src/gallium/drivers/r300/compiler/radeon_vert_fc.c
new file mode 100644
index 00000000000..3568b238299
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_vert_fc.c
@@ -0,0 +1,274 @@
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+#include "radeon_program.h"
+#include "radeon_program_constants.h"
+
+struct vert_fc_state {
+ struct radeon_compiler *C;
+ unsigned BranchDepth;
+ unsigned LoopDepth;
+ unsigned LoopsReserved;
+ int PredStack[R500_PVS_MAX_LOOP_DEPTH];
+ int PredicateReg;
+ unsigned InCFBreak;
+};
+
+static void build_pred_src(
+ struct rc_src_register * src,
+ struct vert_fc_state * fc_state)
+{
+ src->Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
+ RC_SWIZZLE_UNUSED, RC_SWIZZLE_W);
+ src->File = RC_FILE_TEMPORARY;
+ src->Index = fc_state->PredicateReg;
+}
+
+static void build_pred_dst(
+ struct rc_dst_register * dst,
+ struct vert_fc_state * fc_state)
+{
+ dst->WriteMask = RC_MASK_W;
+ dst->File = RC_FILE_TEMPORARY;
+ dst->Index = fc_state->PredicateReg;
+}
+
+static void mark_write(void * userdata, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ unsigned int * writemasks = userdata;
+
+ if (file != RC_FILE_TEMPORARY)
+ return;
+
+ if (index >= R300_VS_MAX_TEMPS)
+ return;
+
+ writemasks[index] |= mask;
+}
+
+static int reserve_predicate_reg(struct vert_fc_state * fc_state)
+{
+ int i;
+ unsigned int writemasks[RC_REGISTER_MAX_INDEX];
+ struct rc_instruction * inst;
+ memset(writemasks, 0, sizeof(writemasks));
+ for(inst = fc_state->C->Program.Instructions.Next;
+ inst != &fc_state->C->Program.Instructions;
+ inst = inst->Next) {
+ rc_for_all_writes_mask(inst, mark_write, writemasks);
+ }
+
+ for(i = 0; i < fc_state->C->max_temp_regs; i++) {
+ /* Most of the control flow instructions only write the
+ * W component of the Predicate Register, but
+ * the docs say that ME_PRED_SET_CLR and
+ * ME_PRED_SET_RESTORE write all components of the
+ * register, so we must reserve a register that has
+ * all its components free. */
+ if (!writemasks[i]) {
+ fc_state->PredicateReg = i;
+ break;
+ }
+ }
+ if (i == fc_state->C->max_temp_regs) {
+ rc_error(fc_state->C, "No free temporary to use for"
+ " predicate stack counter.\n");
+ return -1;
+ }
+ return 1;
+}
+
+static void lower_bgnloop(
+ struct rc_instruction * inst,
+ struct vert_fc_state * fc_state)
+{
+ struct rc_instruction * new_inst =
+ rc_insert_new_instruction(fc_state->C, inst->Prev);
+
+ if ((!fc_state->C->is_r500
+ && fc_state->LoopsReserved >= R300_VS_MAX_LOOP_DEPTH)
+ || fc_state->LoopsReserved >= R500_PVS_MAX_LOOP_DEPTH) {
+ rc_error(fc_state->C, "Loops are nested too deep.");
+ return;
+ }
+
+ if (fc_state->LoopDepth == 0 && fc_state->BranchDepth == 0) {
+ if (fc_state->PredicateReg == -1) {
+ if (reserve_predicate_reg(fc_state) == -1) {
+ return;
+ }
+ }
+
+ /* Initialize the predicate bit to true. */
+ new_inst->U.I.Opcode = RC_ME_PRED_SEQ;
+ build_pred_dst(&new_inst->U.I.DstReg, fc_state);
+ new_inst->U.I.SrcReg[0].Index = 0;
+ new_inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+ new_inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+ } else {
+ fc_state->PredStack[fc_state->LoopDepth] =
+ fc_state->PredicateReg;
+ /* Copy the the current predicate value to this loop's
+ * predicate register */
+
+ /* Use the old predicate value for src0 */
+ build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
+
+ /* Reserve this loop's predicate register */
+ if (reserve_predicate_reg(fc_state) == -1) {
+ return;
+ }
+
+ /* Copy the old predicate value to the new register */
+ new_inst->U.I.Opcode = RC_OPCODE_ADD;
+ build_pred_dst(&new_inst->U.I.DstReg, fc_state);
+ new_inst->U.I.SrcReg[1].Index = 0;
+ new_inst->U.I.SrcReg[1].File = RC_FILE_NONE;
+ new_inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000;
+ }
+
+}
+
+static void lower_brk(
+ struct rc_instruction * inst,
+ struct vert_fc_state * fc_state)
+{
+ if (fc_state->LoopDepth == 1) {
+ inst->U.I.Opcode = RC_OPCODE_RCP;
+ inst->U.I.DstReg.Pred = RC_PRED_INV;
+ inst->U.I.SrcReg[0].Index = 0;
+ inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+ } else {
+ inst->U.I.Opcode = RC_ME_PRED_SET_CLR;
+ inst->U.I.DstReg.Pred = RC_PRED_SET;
+ }
+
+ build_pred_dst(&inst->U.I.DstReg, fc_state);
+}
+
+static void lower_endloop(
+ struct rc_instruction * inst,
+ struct vert_fc_state * fc_state)
+{
+ struct rc_instruction * new_inst =
+ rc_insert_new_instruction(fc_state->C, inst);
+
+ new_inst->U.I.Opcode = RC_ME_PRED_SET_RESTORE;
+ build_pred_dst(&new_inst->U.I.DstReg, fc_state);
+ /* Restore the previous predicate register. */
+ fc_state->PredicateReg = fc_state->PredStack[fc_state->LoopDepth - 1];
+ build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
+}
+
+static void lower_if(
+ struct rc_instruction * inst,
+ struct vert_fc_state * fc_state)
+{
+ /* Reserve a temporary to use as our predicate stack counter, if we
+ * don't already have one. */
+ if (fc_state->PredicateReg == -1) {
+ /* If we are inside a loop, the Predicate Register should
+ * have already been defined. */
+ assert(fc_state->LoopDepth == 0);
+
+ if (reserve_predicate_reg(fc_state) == -1) {
+ return;
+ }
+ }
+
+ if (inst->Next->U.I.Opcode == RC_OPCODE_BRK) {
+ fc_state->InCFBreak = 1;
+ }
+ if ((fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0)
+ || (fc_state->LoopDepth == 1 && fc_state->InCFBreak)) {
+ if (fc_state->InCFBreak) {
+ inst->U.I.Opcode = RC_ME_PRED_SEQ;
+ inst->U.I.DstReg.Pred = RC_PRED_SET;
+ } else {
+ inst->U.I.Opcode = RC_ME_PRED_SNEQ;
+ }
+ } else {
+ unsigned swz;
+ inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH;
+ memcpy(&inst->U.I.SrcReg[1], &inst->U.I.SrcReg[0],
+ sizeof(inst->U.I.SrcReg[1]));
+ swz = rc_get_scalar_src_swz(inst->U.I.SrcReg[1].Swizzle);
+ /* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the
+ * w component */
+ inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED,
+ RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, swz);
+ build_pred_src(&inst->U.I.SrcReg[0], fc_state);
+ }
+ build_pred_dst(&inst->U.I.DstReg, fc_state);
+}
+
+void rc_vert_fc(struct radeon_compiler *c, void *user)
+{
+ struct rc_instruction * inst;
+ struct vert_fc_state fc_state;
+
+ memset(&fc_state, 0, sizeof(fc_state));
+ fc_state.PredicateReg = -1;
+ fc_state.C = c;
+
+ for(inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+
+ switch (inst->U.I.Opcode) {
+
+ case RC_OPCODE_BGNLOOP:
+ lower_bgnloop(inst, &fc_state);
+ fc_state.LoopDepth++;
+ break;
+
+ case RC_OPCODE_BRK:
+ lower_brk(inst, &fc_state);
+ break;
+
+ case RC_OPCODE_ENDLOOP:
+ if (fc_state.BranchDepth != 0
+ || fc_state.LoopDepth != 1) {
+ lower_endloop(inst, &fc_state);
+ }
+ fc_state.LoopDepth--;
+ /* Skip PRED_RESTORE */
+ inst = inst->Next;
+ break;
+ case RC_OPCODE_IF:
+ lower_if(inst, &fc_state);
+ fc_state.BranchDepth++;
+ break;
+
+ case RC_OPCODE_ELSE:
+ inst->U.I.Opcode = RC_ME_PRED_SET_INV;
+ build_pred_dst(&inst->U.I.DstReg, &fc_state);
+ build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
+ break;
+
+ case RC_OPCODE_ENDIF:
+ if (fc_state.LoopDepth == 1 && fc_state.InCFBreak) {
+ struct rc_instruction * to_delete = inst;
+ inst = inst->Prev;
+ rc_remove_instruction(to_delete);
+ /* XXX: Delete the endif instruction */
+ } else {
+ inst->U.I.Opcode = RC_ME_PRED_SET_POP;
+ build_pred_dst(&inst->U.I.DstReg, &fc_state);
+ build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
+ }
+ fc_state.InCFBreak = 0;
+ fc_state.BranchDepth--;
+ break;
+
+ default:
+ if (fc_state.BranchDepth || fc_state.LoopDepth) {
+ inst->U.I.DstReg.Pred = RC_PRED_SET;
+ }
+ break;
+ }
+ }
+}