summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Paul <[email protected]>2009-01-04 18:40:48 -0700
committerBrian Paul <[email protected]>2009-01-04 18:40:48 -0700
commit8f6e2f8620b73c706c21ce4c58bad894d08809a7 (patch)
treedc756c752bf338ccae1aafc5dc7f2672e36db10f
parent38b526999c91a82691a8a5f3c2d0b6f071758dfc (diff)
cell: initial codegen support for fragment shader loops
Basic for/while loops work now. Only one level of loop nesting is supported at this time (same for if/else). The progs/glsl/mandelbrot demo works, but the colors are too dim.
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fp.c219
1 files changed, 193 insertions, 26 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index 3075017b55d..b503bf56af6 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -84,7 +84,17 @@ struct codegen
/** Current IF/ELSE/ENDIF nesting level */
int if_nesting;
- /** Index of execution mask register */
+ /** Current BGNLOOP/ENDLOOP nesting level */
+ int loop_nesting;
+ /** Location of start of current loop */
+ int loop_start;
+
+ /** Index of if/conditional mask register */
+ int cond_mask_reg;
+ /** Index of loop mask register */
+ int loop_mask_reg;
+
+ /** Index of master execution mask register */
int exec_mask_reg;
/** KIL mask: indicates which fragments have been killed */
@@ -171,10 +181,10 @@ get_address_reg(struct codegen *gen)
/**
- * Return index of the pixel execution mask.
+ * Return index of the master execution mask.
* The register is allocated an initialized upon the first call.
*
- * The pixel execution mask controls which pixels in a quad are
+ * The master execution mask controls which pixels in a quad are
* modified, according to surrounding conditionals, loops, etc.
*/
static int
@@ -183,19 +193,40 @@ get_exec_mask_reg(struct codegen *gen)
if (gen->exec_mask_reg <= 0) {
gen->exec_mask_reg = spe_allocate_available_register(gen->f);
- spe_indent(gen->f, 4);
- spe_comment(gen->f, -4, "INIT EXEC MASK = ~0:");
-
- /* exec_mask = {~0, ~0, ~0, ~0} */
+ /* XXX this may not be needed */
+ spe_comment(gen->f, 0*-4, "initialize master execution mask = ~0");
spe_load_int(gen->f, gen->exec_mask_reg, ~0);
-
- spe_indent(gen->f, -4);
}
return gen->exec_mask_reg;
}
+/** Return index of the conditional (if/else) execution mask register */
+static int
+get_cond_mask_reg(struct codegen *gen)
+{
+ if (gen->cond_mask_reg <= 0) {
+ gen->cond_mask_reg = spe_allocate_available_register(gen->f);
+ }
+
+ return gen->cond_mask_reg;
+}
+
+
+/** Return index of the loop execution mask register */
+static int
+get_loop_mask_reg(struct codegen *gen)
+{
+ if (gen->loop_mask_reg <= 0) {
+ gen->loop_mask_reg = spe_allocate_available_register(gen->f);
+ }
+
+ return gen->loop_mask_reg;
+}
+
+
+
static boolean
is_register_src(struct codegen *gen, int channel,
const struct tgsi_full_src_register *src)
@@ -354,7 +385,7 @@ get_dst_reg(struct codegen *gen,
switch (dest->DstRegister.File) {
case TGSI_FILE_TEMPORARY:
- if (gen->if_nesting > 0)
+ if (gen->if_nesting > 0 || gen->loop_nesting > 0)
reg = get_itemp(gen);
else
reg = gen->temp_regs[dest->DstRegister.Index][channel];
@@ -399,7 +430,7 @@ store_dest_reg(struct codegen *gen,
switch (dest->DstRegister.File) {
case TGSI_FILE_TEMPORARY:
- if (gen->if_nesting > 0) {
+ if (gen->if_nesting > 0 || gen->loop_nesting > 0) {
int d_reg = gen->temp_regs[dest->DstRegister.Index][channel];
int exec_reg = get_exec_mask_reg(gen);
/* Mix d with new value according to exec mask:
@@ -416,7 +447,7 @@ store_dest_reg(struct codegen *gen,
{
/* offset is measured in quadwords, not bytes */
int offset = dest->DstRegister.Index * 4 + channel;
- if (gen->if_nesting > 0) {
+ if (gen->if_nesting > 0 || gen->loop_nesting > 0) {
int exec_reg = get_exec_mask_reg(gen);
int curval_reg = get_itemp(gen);
/* First read the current value from memory:
@@ -1011,8 +1042,6 @@ emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst)
int ch, s1_reg[4], s2_reg[4], d_reg[4], one_reg;
bool complement = FALSE;
- one_reg = get_const_one_reg(gen);
-
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_SGT:
spe_comment(gen->f, -4, "SGT:");
@@ -1039,6 +1068,8 @@ emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst)
;
}
+ one_reg = get_const_one_reg(gen);
+
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
@@ -1515,7 +1546,7 @@ emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst)
}
}
- if (gen->if_nesting) {
+ if (gen->if_nesting || gen->loop_nesting) {
/* may have been a conditional kil */
spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg);
}
@@ -1573,15 +1604,56 @@ emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst)
}
+/**
+ * Emit code to update the execution mask.
+ * This needs to be done whenever the execution status of a conditional
+ * or loop is changed.
+ */
+static void
+emit_update_exec_mask(struct codegen *gen)
+{
+ const int exec_reg = get_exec_mask_reg(gen);
+ const int cond_reg = gen->cond_mask_reg;
+ const int loop_reg = gen->loop_mask_reg;
+
+ spe_comment(gen->f, 0, "Update master execution mask");
+
+ if (gen->if_nesting > 0 && gen->loop_nesting > 0) {
+ /* exec_mask = cond_mask & loop_mask */
+ assert(cond_reg > 0);
+ assert(loop_reg > 0);
+ spe_and(gen->f, exec_reg, cond_reg, loop_reg);
+ }
+ else if (gen->if_nesting > 0) {
+ assert(cond_reg > 0);
+ spe_move(gen->f, exec_reg, cond_reg);
+ }
+ else if (gen->loop_nesting > 0) {
+ assert(loop_reg > 0);
+ spe_move(gen->f, exec_reg, loop_reg);
+ }
+ else {
+ spe_load_int(gen->f, exec_reg, ~0x0);
+ }
+}
+
+
static boolean
emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
const int channel = 0;
- const int exec_reg = get_exec_mask_reg(gen);
+ int cond_reg;
spe_comment(gen->f, -4, "IF:");
- /* update execution mask with the predicate register */
+ cond_reg = get_cond_mask_reg(gen);
+
+ /* XXX push cond exec mask */
+
+ spe_comment(gen->f, 0, "init conditional exec mask = ~0:");
+ spe_load_int(gen->f, cond_reg, ~0);
+
+ /* update conditional execution mask with the predicate register */
int tmp_reg = get_itemp(gen);
int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]);
@@ -1589,11 +1661,14 @@ emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst)
spe_ceqi(gen->f, tmp_reg, s1_reg, 0);
/* tmp = !tmp */
spe_complement(gen->f, tmp_reg, tmp_reg);
- /* exec_mask = exec_mask & tmp */
- spe_and(gen->f, exec_reg, exec_reg, tmp_reg);
+ /* cond_mask = cond_mask & tmp */
+ spe_and(gen->f, cond_reg, cond_reg, tmp_reg);
gen->if_nesting++;
+ /* update the master execution mask */
+ emit_update_exec_mask(gen);
+
free_itemps(gen);
return TRUE;
@@ -1603,12 +1678,13 @@ emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst)
static boolean
emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
- const int exec_reg = get_exec_mask_reg(gen);
+ const int cond_reg = get_cond_mask_reg(gen);
spe_comment(gen->f, -4, "ELSE:");
- /* exec_mask = !exec_mask */
- spe_complement(gen->f, exec_reg, exec_reg);
+ spe_comment(gen->f, 0, "cond exec mask = !cond exec mask");
+ spe_complement(gen->f, cond_reg, cond_reg);
+ emit_update_exec_mask(gen);
return TRUE;
}
@@ -1617,15 +1693,93 @@ emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst)
static boolean
emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
+ spe_comment(gen->f, -4, "ENDIF:");
+
+ /* XXX todo: pop cond exec mask */
+
+ gen->if_nesting--;
+
+ emit_update_exec_mask(gen);
+
+ return TRUE;
+}
+
+
+static boolean
+emit_BGNLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int exec_reg, loop_reg;
+
+ spe_comment(gen->f, -4, "BGNLOOP:");
+
+ exec_reg = get_exec_mask_reg(gen);
+ loop_reg = get_loop_mask_reg(gen);
+
+ /* XXX push loop_exec mask */
+
+ spe_comment(gen->f, 0*-4, "initialize loop exec mask = ~0");
+ spe_load_int(gen->f, loop_reg, ~0x0);
+
+ gen->loop_nesting++;
+ gen->loop_start = spe_code_size(gen->f); /* in bytes */
+
+ return TRUE;
+}
+
+
+static boolean
+emit_ENDLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ const int loop_reg = get_loop_mask_reg(gen);
+ const int tmp_reg = get_itemp(gen);
+ int offset;
+
+ spe_comment(gen->f, -4, "ENDLOOP:");
+
+ /* tmp_reg = exec[0] | exec[1] | exec[2] | exec[3] */
+ spe_orx(gen->f, tmp_reg, loop_reg);
+
+ offset = gen->loop_start - spe_code_size(gen->f); /* in bytes */
+
+ /* branch back to top of loop if tmp_reg != 0 */
+ spe_brnz(gen->f, tmp_reg, offset / 4);
+
+ /* XXX pop loop_exec mask */
+
+ gen->loop_nesting--;
+
+ emit_update_exec_mask(gen);
+
+ return TRUE;
+}
+
+
+static boolean
+emit_BRK(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
const int exec_reg = get_exec_mask_reg(gen);
+ const int loop_reg = get_loop_mask_reg(gen);
- spe_comment(gen->f, -4, "ENDIF:");
+ spe_comment(gen->f, -4, "BREAK:");
- /* XXX todo: pop execution mask */
+ assert(gen->loop_nesting > 0);
- spe_load_int(gen->f, exec_reg, ~0x0);
+ spe_comment(gen->f, 0, "loop exec mask &= ~master exec mask");
+ spe_andc(gen->f, loop_reg, loop_reg, exec_reg);
+
+ emit_update_exec_mask(gen);
+
+ return TRUE;
+}
+
+
+static boolean
+emit_CONT(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ spe_comment(gen->f, -4, "CONT:");
+
+ assert(gen->loop_nesting > 0);
- gen->if_nesting--;
return TRUE;
}
@@ -1766,6 +1920,15 @@ emit_instruction(struct codegen *gen,
case TGSI_OPCODE_ENDIF:
return emit_ENDIF(gen, inst);
+ case TGSI_OPCODE_BGNLOOP2:
+ return emit_BGNLOOP(gen, inst);
+ case TGSI_OPCODE_ENDLOOP2:
+ return emit_ENDLOOP(gen, inst);
+ case TGSI_OPCODE_BRK:
+ return emit_BRK(gen, inst);
+ case TGSI_OPCODE_CONT:
+ return emit_CONT(gen, inst);
+
case TGSI_OPCODE_DDX:
return emit_DDX_DDY(gen, inst, TRUE);
case TGSI_OPCODE_DDY:
@@ -1807,11 +1970,15 @@ emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed)
gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1];
}
else {
+ char str[100];
int reg = spe_allocate_available_register(gen->f);
if (reg < 0)
return FALSE;
+ sprintf(str, "init $%d = %f", reg, val);
+ spe_comment(gen->f, 0, str);
+
/* update immediate map */
gen->imm_regs[gen->num_imm][ch] = reg;