summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c76
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h11
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fp.c141
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vertex_fetch.c30
4 files changed, 182 insertions, 76 deletions
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
index c442b1f6aa2..9274bc5e3cd 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
@@ -174,9 +174,12 @@ reg_name(int reg)
return "$lr";
default:
{
- static char buf[10];
- sprintf(buf, "$%d", reg);
- return buf;
+ /* cycle through four buffers to handle multiple calls per printf */
+ static char buf[4][10];
+ static int b = 0;
+ b = (b + 1) % 4;
+ sprintf(buf[b], "$%d", reg);
+ return buf[b];
}
}
}
@@ -269,15 +272,8 @@ static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT,
assert(p->num_inst <= p->max_inst);
if (p->print) {
indent(p);
- if (strcmp(name, "spe_lqd") == 0 ||
- strcmp(name, "spe_stqd") == 0) {
- printf("%s\t%s, %d(%s)\n",
- rem_prefix(name), reg_name(rT), imm, reg_name(rA));
- }
- else {
- printf("%s\t%s, %s, 0x%x\n",
- rem_prefix(name), reg_name(rT), reg_name(rA), imm);
- }
+ printf("%s\t%s, %s, 0x%x\n",
+ rem_prefix(name), reg_name(rT), reg_name(rA), imm);
}
}
@@ -379,6 +375,7 @@ void _name (struct spe_function *p, int imm) \
#include "rtasm_ppc_spe.h"
+
/**
* Initialize an spe_function.
* \param code_size size of instruction buffer to allocate, in bytes.
@@ -513,6 +510,20 @@ void spe_release_register_set(struct spe_function *p)
}
+unsigned
+spe_get_registers_used(const struct spe_function *p, ubyte used[])
+{
+ unsigned i, num = 0;
+ /* only count registers in the range available to callers */
+ for (i = 2; i < 80; i++) {
+ if (p->regs[i]) {
+ used[num++] = i;
+ }
+ }
+ return num;
+}
+
+
void
spe_print_code(struct spe_function *p, boolean enable)
{
@@ -540,6 +551,46 @@ spe_comment(struct spe_function *p, int rel_indent, const char *s)
/**
+ * Load quad word.
+ * NOTE: imm is in bytes and the least significant 4 bits must be zero!
+ */
+void spe_lqd(struct spe_function *p, unsigned rT, unsigned rA, int offset)
+{
+ const boolean pSave = p->print;
+
+ p->print = FALSE;
+ assert(offset % 4 == 0);
+ emit_RI10(p, 0x034, rT, rA, offset >> 4, "spe_lqd");
+ p->print = pSave;
+
+ if (p->print) {
+ indent(p);
+ printf("lqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA));
+ }
+}
+
+
+/**
+ * Store quad word.
+ * NOTE: imm is in bytes and the least significant 4 bits must be zero!
+ */
+void spe_stqd(struct spe_function *p, unsigned rT, unsigned rA, int offset)
+{
+ const boolean pSave = p->print;
+
+ p->print = FALSE;
+ assert(offset % 4 == 0);
+ emit_RI10(p, 0x024, rT, rA, offset >> 4, "spe_stqd");
+ p->print = pSave;
+
+ if (p->print) {
+ indent(p);
+ printf("stqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA));
+ }
+}
+
+
+/**
* For branch instructions:
* \param d if 1, disable interupts if branch is taken
* \param e if 1, enable interupts if branch is taken
@@ -764,6 +815,7 @@ spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui)
spe_release_register(p, tmp_reg);
}
+
/**
* This function is constructed identically to spe_and_uint() above.
* Changes to one should be made in the other.
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
index cd2e2454097..47dadb343c2 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
@@ -89,6 +89,9 @@ extern void spe_release_register(struct spe_function *p, int reg);
extern void spe_allocate_register_set(struct spe_function *p);
extern void spe_release_register_set(struct spe_function *p);
+extern unsigned
+spe_get_registers_used(const struct spe_function *p, ubyte used[]);
+
extern void spe_print_code(struct spe_function *p, boolean enable);
extern void spe_indent(struct spe_function *p, int spaces);
extern void spe_comment(struct spe_function *p, int rel_indent, const char *s);
@@ -128,11 +131,9 @@ extern void spe_comment(struct spe_function *p, int rel_indent, const char *s);
/* Memory load / store instructions
*/
-EMIT_RI10(spe_lqd, 0x034);
EMIT_RR (spe_lqx, 0x1c4);
EMIT_RI16(spe_lqa, 0x061);
EMIT_RI16(spe_lqr, 0x067);
-EMIT_RI10(spe_stqd, 0x024);
EMIT_RR (spe_stqx, 0x144);
EMIT_RI16(spe_stqa, 0x041);
EMIT_RI16(spe_stqr, 0x047);
@@ -290,6 +291,12 @@ EMIT_RI16(spe_brz, 0x040);
EMIT_RI16(spe_brhnz, 0x046);
EMIT_RI16(spe_brhz, 0x044);
+extern void
+spe_lqd(struct spe_function *p, unsigned rT, unsigned rA, int offset);
+
+extern void
+spe_stqd(struct spe_function *p, unsigned rT, unsigned rA, int offset);
+
extern void spe_bi(struct spe_function *p, unsigned rA, int d, int e);
extern void spe_iret(struct spe_function *p, unsigned rA, int d, int e);
extern void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA,
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index 3065869d043..640ebcadbb3 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -84,6 +84,8 @@ struct codegen
/** Index of execution mask register */
int exec_mask_reg;
+ int frame_size; /**< Stack frame size, in words */
+
struct spe_function *f;
boolean error;
};
@@ -208,7 +210,7 @@ get_src_reg(struct codegen *gen,
reg = get_itemp(gen);
reg_is_itemp = TRUE;
/* Load: reg = memory[(machine_reg) + offset] */
- spe_lqd(gen->f, reg, gen->inputs_reg, offset);
+ spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16);
}
break;
case TGSI_FILE_IMMEDIATE:
@@ -221,7 +223,7 @@ get_src_reg(struct codegen *gen,
reg = get_itemp(gen);
reg_is_itemp = TRUE;
/* Load: reg = memory[(machine_reg) + offset] */
- spe_lqd(gen->f, reg, gen->constants_reg, offset);
+ spe_lqd(gen->f, reg, gen->constants_reg, offset * 16);
}
break;
default:
@@ -325,6 +327,7 @@ store_dest_reg(struct codegen *gen,
}
else {
/* we're not inside a condition or loop: do nothing special */
+
}
break;
case TGSI_FILE_OUTPUT:
@@ -337,17 +340,17 @@ store_dest_reg(struct codegen *gen,
/* First read the current value from memory:
* Load: curval = memory[(machine_reg) + offset]
*/
- spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset);
+ spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
/* Mix curval with newvalue according to exec mask:
* d[i] = mask_reg[i] ? value_reg : d_reg
*/
spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg);
/* Store: memory[(machine_reg) + offset] = curval */
- spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset);
+ spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
}
else {
/* Store: memory[(machine_reg) + offset] = reg */
- spe_stqd(gen->f, value_reg, gen->outputs_reg, offset);
+ spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16);
}
}
break;
@@ -357,6 +360,41 @@ store_dest_reg(struct codegen *gen,
}
+
+static void
+emit_prologue(struct codegen *gen)
+{
+ gen->frame_size = 256+128; /* XXX temporary */
+
+ spe_comment(gen->f, -4, "Function prologue:");
+
+ /* save $lr on stack # stqd $lr,16($sp) */
+ spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
+
+ /* save stack pointer # stqd $sp,-frameSize($sp) */
+ spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
+
+ /* adjust stack pointer # ai $sp,$sp,-frameSize */
+ spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
+}
+
+
+static void
+emit_epilogue(struct codegen *gen)
+{
+ spe_comment(gen->f, -4, "Function epilogue:");
+
+ /* restore stack pointer # ai $sp,$sp,frameSize */
+ spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size);
+
+ /* restore $lr # lqd $lr,16($sp) */
+ spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
+
+ /* return from function call */
+ spe_bi(gen->f, SPE_REG_RA, 0, 0);
+}
+
+
static boolean
emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
@@ -588,6 +626,7 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst)
int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
int tmp_reg = get_itemp(gen);
+
/* t = x0 * x1 */
spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
@@ -603,7 +642,9 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst)
for (ch = 0; ch < 4; ch++) {
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ spe_move(gen->f, d_reg, tmp_reg);
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
}
}
@@ -623,6 +664,7 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
int tmp_reg = get_itemp(gen);
+
/* t = x0 * x1 */
spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
@@ -643,6 +685,8 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
for (ch = 0; ch < 4; ch++) {
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ spe_move(gen->f, d_reg, tmp_reg);
store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
}
}
@@ -683,6 +727,8 @@ emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst)
for (ch = 0; ch < 4; ch++) {
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ spe_move(gen->f, d_reg, tmp_reg);
store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
}
}
@@ -1112,9 +1158,6 @@ emit_function_call(struct codegen *gen,
uint addr;
int ch;
- /* XXX temporary value */
- const int frameSize = 64; /* stack frame (activation record) size */
-
assert(num_args <= 3);
/* lookup function address */
@@ -1136,48 +1179,45 @@ emit_function_call(struct codegen *gen,
for (ch = 0; ch < 4; ch++) {
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- int s_regs[3];
- uint a;
+ int s_regs[3], d_reg;
+ ubyte usedRegs[SPE_NUM_REGS];
+ uint a, i, numUsed;
+
for (a = 0; a < num_args; a++) {
s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]);
}
+ d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- /* Basically:
- * save registers on stack
- * move parameters to registers 3, 4, 5...
- * call function
- * save return value (reg 3)
- * restore registers from stack
- */
+ numUsed = spe_get_registers_used(gen->f, usedRegs);
+ assert(numUsed < gen->frame_size / 16 - 32);
- /* XXX hack: load first function param */
- spe_move(gen->f, 3, s_regs[0]);
-
- /* save $lr on stack # stqd $lr,16($sp) */
- spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
- /* save stack pointer # stqd $sp,-frameSize($sp) */
- spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -frameSize);
-
- /* XXX save registers to stack here */
+ /* save registers to stack */
+ for (i = 0; i < numUsed; i++) {
+ uint reg = usedRegs[i];
+ int offset = 2 + i;
+ spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
+ }
- /* adjust stack pointer # ai $sp,$sp,-frameSize */
- spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -frameSize);
+ /* setup function arguments */
+ for (a = 0; a < num_args; a++) {
+ spe_move(gen->f, 3 + a, s_regs[a]);
+ }
/* branch to function, save return addr */
spe_brasl(gen->f, SPE_REG_RA, addr);
- /* restore stack pointer # ai $sp,$sp,frameSize */
- spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, frameSize);
-
- /* XXX restore registers from stack here */
-
- /* restore $lr # lqd $lr,16($sp) */
- spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
-
- /* XXX hack: save function's return value */
+ /* save function's return value */
spe_move(gen->f, d_reg, 3);
+ /* restore registers from stack */
+ for (i = 0; i < numUsed; i++) {
+ uint reg = usedRegs[i];
+ if (reg != d_reg) {
+ int offset = 2 + i;
+ spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
+ }
+ }
+
store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
free_itemps(gen);
}
@@ -1202,10 +1242,11 @@ emit_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst)
int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ int tmp_reg = get_itemp(gen);
/* d = (s1 > s2) ? s1 : s2 */
- spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
- spe_selb(gen->f, d_reg, s2_reg, s1_reg, d_reg);
+ spe_fcgt(gen->f, tmp_reg, s1_reg, s2_reg);
+ spe_selb(gen->f, d_reg, s2_reg, s1_reg, tmp_reg);
store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
free_itemps(gen);
@@ -1230,10 +1271,11 @@ emit_MIN(struct codegen *gen, const struct tgsi_full_instruction *inst)
int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ int tmp_reg = get_itemp(gen);
/* d = (s2 > s1) ? s1 : s2 */
- spe_fcgt(gen->f, d_reg, s2_reg, s1_reg);
- spe_selb(gen->f, d_reg, s2_reg, s1_reg, d_reg);
+ spe_fcgt(gen->f, tmp_reg, s2_reg, s1_reg);
+ spe_selb(gen->f, d_reg, s2_reg, s1_reg, tmp_reg);
store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
free_itemps(gen);
@@ -1346,8 +1388,7 @@ static boolean
emit_END(struct codegen *gen)
{
spe_comment(gen->f, -4, "END:");
- /* return from function call */
- spe_bi(gen->f, SPE_REG_RA, 0, 0);
+ emit_epilogue(gen);
return true;
}
@@ -1420,6 +1461,10 @@ emit_instruction(struct codegen *gen,
return emit_function_call(gen, inst, "spu_sin", 1);
case TGSI_OPCODE_POW:
return emit_function_call(gen, inst, "spu_pow", 2);
+ case TGSI_OPCODE_EXPBASE2:
+ return emit_function_call(gen, inst, "spu_exp2", 1);
+ case TGSI_OPCODE_LOGBASE2:
+ return emit_function_call(gen, inst, "spu_log2", 1);
case TGSI_OPCODE_IF:
return emit_IF(gen, inst);
@@ -1532,6 +1577,7 @@ emit_declaration(struct cell_context *cell,
}
+
/**
* Translate TGSI shader code to SPE instructions. This is done when
* the state tracker gives us a new shader (via pipe->create_fs_state()).
@@ -1571,12 +1617,14 @@ cell_gen_fragment_program(struct cell_context *cell,
tgsi_parse_init(&parse, tokens);
+ emit_prologue(&gen);
+
while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
tgsi_parse_token(&parse);
switch (parse.FullToken.Token.Type) {
case TGSI_TOKEN_TYPE_IMMEDIATE:
- if (!emit_immediate(&gen, &parse.FullToken.FullImmediate))
+ if (!emit_immediate(&gen, &parse.FullToken.FullImmediate))
gen.error = true;
break;
@@ -1595,7 +1643,6 @@ cell_gen_fragment_program(struct cell_context *cell,
}
}
-
if (gen.error) {
/* terminate the SPE code */
return emit_END(&gen);
diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
index 566df7f59e3..18969005b02 100644
--- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
+++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
@@ -73,8 +73,8 @@ emit_matrix_transpose(struct spe_function *p,
int col3;
- spe_lqd(p, shuf_hi, shuf_ptr, 3);
- spe_lqd(p, shuf_lo, shuf_ptr, 4);
+ spe_lqd(p, shuf_hi, shuf_ptr, 3*16);
+ spe_lqd(p, shuf_lo, shuf_ptr, 4*16);
spe_shufb(p, t1, row0, row2, shuf_hi);
spe_shufb(p, t2, row0, row2, shuf_lo);
@@ -122,13 +122,13 @@ emit_matrix_transpose(struct spe_function *p,
*/
switch (count) {
case 4:
- spe_stqd(p, col3, dest_ptr, 3);
+ spe_stqd(p, col3, dest_ptr, 3 * 16);
case 3:
- spe_stqd(p, col2, dest_ptr, 2);
+ spe_stqd(p, col2, dest_ptr, 2 * 16);
case 2:
- spe_stqd(p, col1, dest_ptr, 1);
+ spe_stqd(p, col1, dest_ptr, 1 * 16);
case 1:
- spe_stqd(p, col0, dest_ptr, 0);
+ spe_stqd(p, col0, dest_ptr, 0 * 16);
}
@@ -166,17 +166,17 @@ emit_fetch(struct spe_function *p,
float scale_signed = 0.0;
float scale_unsigned = 0.0;
- spe_lqd(p, v0, in_ptr, 0 + offset[0]);
- spe_lqd(p, v1, in_ptr, 1 + offset[0]);
- spe_lqd(p, v2, in_ptr, 2 + offset[0]);
- spe_lqd(p, v3, in_ptr, 3 + offset[0]);
+ spe_lqd(p, v0, in_ptr, (0 + offset[0]) * 16);
+ spe_lqd(p, v1, in_ptr, (1 + offset[0]) * 16);
+ spe_lqd(p, v2, in_ptr, (2 + offset[0]) * 16);
+ spe_lqd(p, v3, in_ptr, (3 + offset[0]) * 16);
offset[0] += 4;
switch (bytes) {
case 1:
scale_signed = 1.0f / 127.0f;
scale_unsigned = 1.0f / 255.0f;
- spe_lqd(p, tmp, shuf_ptr, 1);
+ spe_lqd(p, tmp, shuf_ptr, 1 * 16);
spe_shufb(p, v0, v0, v0, tmp);
spe_shufb(p, v1, v1, v1, tmp);
spe_shufb(p, v2, v2, v2, tmp);
@@ -185,7 +185,7 @@ emit_fetch(struct spe_function *p,
case 2:
scale_signed = 1.0f / 32767.0f;
scale_unsigned = 1.0f / 65535.0f;
- spe_lqd(p, tmp, shuf_ptr, 2);
+ spe_lqd(p, tmp, shuf_ptr, 2 * 16);
spe_shufb(p, v0, v0, v0, tmp);
spe_shufb(p, v1, v1, v1, tmp);
spe_shufb(p, v2, v2, v2, tmp);
@@ -241,11 +241,11 @@ emit_fetch(struct spe_function *p,
switch (count) {
case 1:
- spe_stqd(p, float_zero, out_ptr, 1);
+ spe_stqd(p, float_zero, out_ptr, 1 * 16);
case 2:
- spe_stqd(p, float_zero, out_ptr, 2);
+ spe_stqd(p, float_zero, out_ptr, 2 * 16);
case 3:
- spe_stqd(p, float_one, out_ptr, 3);
+ spe_stqd(p, float_one, out_ptr, 3 * 16);
}
if (float_zero != -1) {