diff options
author | Brian Paul <[email protected]> | 2008-10-08 20:44:32 -0600 |
---|---|---|
committer | Brian Paul <[email protected]> | 2008-10-08 20:44:32 -0600 |
commit | d48a92e88040470f93e2186f8eb23e4797a09860 (patch) | |
tree | 7fbfd01dc04b42e42b4457eae507693ef8dc5f3b /src/gallium/drivers/cell | |
parent | a4e477433f485a39b5de448d0a9cb6f4bf9bb90f (diff) |
cell: implement function calls from shader code. fslight demo runs now.
Used for SIN, COS, EXP2, LOG2, POW instructions. TEX next.
Fixed some bugs in MIN, MAX, DP3, DP4, DPH instructions.
In rtasm code:
Special-case spe_lqd(), spe_stqd() functions so they take byte offsets but
low-order 4 bits are shifted out. This makes things consistant with SPU
assembly language conventions.
Added spe_get_registers_used() function.
Diffstat (limited to 'src/gallium/drivers/cell')
-rw-r--r-- | src/gallium/drivers/cell/ppu/cell_gen_fp.c | 141 | ||||
-rw-r--r-- | src/gallium/drivers/cell/ppu/cell_vertex_fetch.c | 30 |
2 files changed, 109 insertions, 62 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 3065869d043..640ebcadbb3 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -84,6 +84,8 @@ struct codegen /** Index of execution mask register */ int exec_mask_reg; + int frame_size; /**< Stack frame size, in words */ + struct spe_function *f; boolean error; }; @@ -208,7 +210,7 @@ get_src_reg(struct codegen *gen, reg = get_itemp(gen); reg_is_itemp = TRUE; /* Load: reg = memory[(machine_reg) + offset] */ - spe_lqd(gen->f, reg, gen->inputs_reg, offset); + spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16); } break; case TGSI_FILE_IMMEDIATE: @@ -221,7 +223,7 @@ get_src_reg(struct codegen *gen, reg = get_itemp(gen); reg_is_itemp = TRUE; /* Load: reg = memory[(machine_reg) + offset] */ - spe_lqd(gen->f, reg, gen->constants_reg, offset); + spe_lqd(gen->f, reg, gen->constants_reg, offset * 16); } break; default: @@ -325,6 +327,7 @@ store_dest_reg(struct codegen *gen, } else { /* we're not inside a condition or loop: do nothing special */ + } break; case TGSI_FILE_OUTPUT: @@ -337,17 +340,17 @@ store_dest_reg(struct codegen *gen, /* First read the current value from memory: * Load: curval = memory[(machine_reg) + offset] */ - spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset); + spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16); /* Mix curval with newvalue according to exec mask: * d[i] = mask_reg[i] ? value_reg : d_reg */ spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg); /* Store: memory[(machine_reg) + offset] = curval */ - spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset); + spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16); } else { /* Store: memory[(machine_reg) + offset] = reg */ - spe_stqd(gen->f, value_reg, gen->outputs_reg, offset); + spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16); } } break; @@ -357,6 +360,41 @@ store_dest_reg(struct codegen *gen, } + +static void +emit_prologue(struct codegen *gen) +{ + gen->frame_size = 256+128; /* XXX temporary */ + + spe_comment(gen->f, -4, "Function prologue:"); + + /* save $lr on stack # stqd $lr,16($sp) */ + spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); + + /* save stack pointer # stqd $sp,-frameSize($sp) */ + spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); + + /* adjust stack pointer # ai $sp,$sp,-frameSize */ + spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); +} + + +static void +emit_epilogue(struct codegen *gen) +{ + spe_comment(gen->f, -4, "Function epilogue:"); + + /* restore stack pointer # ai $sp,$sp,frameSize */ + spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size); + + /* restore $lr # lqd $lr,16($sp) */ + spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); + + /* return from function call */ + spe_bi(gen->f, SPE_REG_RA, 0, 0); +} + + static boolean emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) { @@ -588,6 +626,7 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); int tmp_reg = get_itemp(gen); + /* t = x0 * x1 */ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); @@ -603,7 +642,9 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + spe_move(gen->f, d_reg, tmp_reg); + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); } } @@ -623,6 +664,7 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); int tmp_reg = get_itemp(gen); + /* t = x0 * x1 */ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); @@ -643,6 +685,8 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + spe_move(gen->f, d_reg, tmp_reg); store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]); } } @@ -683,6 +727,8 @@ emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst) for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + spe_move(gen->f, d_reg, tmp_reg); store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]); } } @@ -1112,9 +1158,6 @@ emit_function_call(struct codegen *gen, uint addr; int ch; - /* XXX temporary value */ - const int frameSize = 64; /* stack frame (activation record) size */ - assert(num_args <= 3); /* lookup function address */ @@ -1136,48 +1179,45 @@ emit_function_call(struct codegen *gen, for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - int s_regs[3]; - uint a; + int s_regs[3], d_reg; + ubyte usedRegs[SPE_NUM_REGS]; + uint a, i, numUsed; + for (a = 0; a < num_args; a++) { s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]); } + d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - /* Basically: - * save registers on stack - * move parameters to registers 3, 4, 5... - * call function - * save return value (reg 3) - * restore registers from stack - */ + numUsed = spe_get_registers_used(gen->f, usedRegs); + assert(numUsed < gen->frame_size / 16 - 32); - /* XXX hack: load first function param */ - spe_move(gen->f, 3, s_regs[0]); - - /* save $lr on stack # stqd $lr,16($sp) */ - spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); - /* save stack pointer # stqd $sp,-frameSize($sp) */ - spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -frameSize); - - /* XXX save registers to stack here */ + /* save registers to stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + int offset = 2 + i; + spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } - /* adjust stack pointer # ai $sp,$sp,-frameSize */ - spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -frameSize); + /* setup function arguments */ + for (a = 0; a < num_args; a++) { + spe_move(gen->f, 3 + a, s_regs[a]); + } /* branch to function, save return addr */ spe_brasl(gen->f, SPE_REG_RA, addr); - /* restore stack pointer # ai $sp,$sp,frameSize */ - spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, frameSize); - - /* XXX restore registers from stack here */ - - /* restore $lr # lqd $lr,16($sp) */ - spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); - - /* XXX hack: save function's return value */ + /* save function's return value */ spe_move(gen->f, d_reg, 3); + /* restore registers from stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + if (reg != d_reg) { + int offset = 2 + i; + spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } + } + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); } @@ -1202,10 +1242,11 @@ emit_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int tmp_reg = get_itemp(gen); /* d = (s1 > s2) ? s1 : s2 */ - spe_fcgt(gen->f, d_reg, s1_reg, s2_reg); - spe_selb(gen->f, d_reg, s2_reg, s1_reg, d_reg); + spe_fcgt(gen->f, tmp_reg, s1_reg, s2_reg); + spe_selb(gen->f, d_reg, s2_reg, s1_reg, tmp_reg); store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); @@ -1230,10 +1271,11 @@ emit_MIN(struct codegen *gen, const struct tgsi_full_instruction *inst) int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int tmp_reg = get_itemp(gen); /* d = (s2 > s1) ? s1 : s2 */ - spe_fcgt(gen->f, d_reg, s2_reg, s1_reg); - spe_selb(gen->f, d_reg, s2_reg, s1_reg, d_reg); + spe_fcgt(gen->f, tmp_reg, s2_reg, s1_reg); + spe_selb(gen->f, d_reg, s2_reg, s1_reg, tmp_reg); store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); @@ -1346,8 +1388,7 @@ static boolean emit_END(struct codegen *gen) { spe_comment(gen->f, -4, "END:"); - /* return from function call */ - spe_bi(gen->f, SPE_REG_RA, 0, 0); + emit_epilogue(gen); return true; } @@ -1420,6 +1461,10 @@ emit_instruction(struct codegen *gen, return emit_function_call(gen, inst, "spu_sin", 1); case TGSI_OPCODE_POW: return emit_function_call(gen, inst, "spu_pow", 2); + case TGSI_OPCODE_EXPBASE2: + return emit_function_call(gen, inst, "spu_exp2", 1); + case TGSI_OPCODE_LOGBASE2: + return emit_function_call(gen, inst, "spu_log2", 1); case TGSI_OPCODE_IF: return emit_IF(gen, inst); @@ -1532,6 +1577,7 @@ emit_declaration(struct cell_context *cell, } + /** * Translate TGSI shader code to SPE instructions. This is done when * the state tracker gives us a new shader (via pipe->create_fs_state()). @@ -1571,12 +1617,14 @@ cell_gen_fragment_program(struct cell_context *cell, tgsi_parse_init(&parse, tokens); + emit_prologue(&gen); + while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) { tgsi_parse_token(&parse); switch (parse.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_IMMEDIATE: - if (!emit_immediate(&gen, &parse.FullToken.FullImmediate)) + if (!emit_immediate(&gen, &parse.FullToken.FullImmediate)) gen.error = true; break; @@ -1595,7 +1643,6 @@ cell_gen_fragment_program(struct cell_context *cell, } } - if (gen.error) { /* terminate the SPE code */ return emit_END(&gen); diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c index 566df7f59e3..18969005b02 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -73,8 +73,8 @@ emit_matrix_transpose(struct spe_function *p, int col3; - spe_lqd(p, shuf_hi, shuf_ptr, 3); - spe_lqd(p, shuf_lo, shuf_ptr, 4); + spe_lqd(p, shuf_hi, shuf_ptr, 3*16); + spe_lqd(p, shuf_lo, shuf_ptr, 4*16); spe_shufb(p, t1, row0, row2, shuf_hi); spe_shufb(p, t2, row0, row2, shuf_lo); @@ -122,13 +122,13 @@ emit_matrix_transpose(struct spe_function *p, */ switch (count) { case 4: - spe_stqd(p, col3, dest_ptr, 3); + spe_stqd(p, col3, dest_ptr, 3 * 16); case 3: - spe_stqd(p, col2, dest_ptr, 2); + spe_stqd(p, col2, dest_ptr, 2 * 16); case 2: - spe_stqd(p, col1, dest_ptr, 1); + spe_stqd(p, col1, dest_ptr, 1 * 16); case 1: - spe_stqd(p, col0, dest_ptr, 0); + spe_stqd(p, col0, dest_ptr, 0 * 16); } @@ -166,17 +166,17 @@ emit_fetch(struct spe_function *p, float scale_signed = 0.0; float scale_unsigned = 0.0; - spe_lqd(p, v0, in_ptr, 0 + offset[0]); - spe_lqd(p, v1, in_ptr, 1 + offset[0]); - spe_lqd(p, v2, in_ptr, 2 + offset[0]); - spe_lqd(p, v3, in_ptr, 3 + offset[0]); + spe_lqd(p, v0, in_ptr, (0 + offset[0]) * 16); + spe_lqd(p, v1, in_ptr, (1 + offset[0]) * 16); + spe_lqd(p, v2, in_ptr, (2 + offset[0]) * 16); + spe_lqd(p, v3, in_ptr, (3 + offset[0]) * 16); offset[0] += 4; switch (bytes) { case 1: scale_signed = 1.0f / 127.0f; scale_unsigned = 1.0f / 255.0f; - spe_lqd(p, tmp, shuf_ptr, 1); + spe_lqd(p, tmp, shuf_ptr, 1 * 16); spe_shufb(p, v0, v0, v0, tmp); spe_shufb(p, v1, v1, v1, tmp); spe_shufb(p, v2, v2, v2, tmp); @@ -185,7 +185,7 @@ emit_fetch(struct spe_function *p, case 2: scale_signed = 1.0f / 32767.0f; scale_unsigned = 1.0f / 65535.0f; - spe_lqd(p, tmp, shuf_ptr, 2); + spe_lqd(p, tmp, shuf_ptr, 2 * 16); spe_shufb(p, v0, v0, v0, tmp); spe_shufb(p, v1, v1, v1, tmp); spe_shufb(p, v2, v2, v2, tmp); @@ -241,11 +241,11 @@ emit_fetch(struct spe_function *p, switch (count) { case 1: - spe_stqd(p, float_zero, out_ptr, 1); + spe_stqd(p, float_zero, out_ptr, 1 * 16); case 2: - spe_stqd(p, float_zero, out_ptr, 2); + spe_stqd(p, float_zero, out_ptr, 2 * 16); case 3: - spe_stqd(p, float_one, out_ptr, 3); + spe_stqd(p, float_one, out_ptr, 3 * 16); } if (float_zero != -1) { |