diff options
Diffstat (limited to 'src/gallium/drivers/r300/compiler/r3xx_vertprog.c')
-rw-r--r-- | src/gallium/drivers/r300/compiler/r3xx_vertprog.c | 217 |
1 files changed, 47 insertions, 170 deletions
diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c index a8d8ebc2dc8..94733d7367f 100644 --- a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c +++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c @@ -28,17 +28,13 @@ #include "radeon_compiler_util.h" #include "radeon_dataflow.h" +#include "radeon_program.h" #include "radeon_program_alu.h" #include "radeon_swizzle.h" #include "radeon_emulate_branches.h" #include "radeon_emulate_loops.h" #include "radeon_remove_constants.h" -struct loop { - int BgnLoop; - -}; - /* * Take an already-setup and valid source then swizzle it appropriately to * obtain a constant ZERO or ONE source. @@ -359,140 +355,13 @@ static void ei_pow(struct r300_vertex_program_code *vp, inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); } -static void mark_write(void * userdata, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int mask) -{ - unsigned int * writemasks = userdata; - - if (file != RC_FILE_TEMPORARY) - return; - - if (index >= R300_VS_MAX_TEMPS) - return; - - writemasks[index] |= mask; -} - -static unsigned long t_pred_src(struct r300_vertex_program_compiler * compiler) -{ - return PVS_SRC_OPERAND(compiler->PredicateIndex, - t_swizzle(RC_SWIZZLE_ZERO), - t_swizzle(RC_SWIZZLE_ZERO), - t_swizzle(RC_SWIZZLE_ZERO), - t_swizzle(RC_SWIZZLE_W), - t_src_class(RC_FILE_TEMPORARY), - 0); -} - -static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler, - unsigned int hw_opcode, int is_math) -{ - return PVS_OP_DST_OPERAND(hw_opcode, - is_math, - 0, - compiler->PredicateIndex, - RC_MASK_W, - t_dst_class(RC_FILE_TEMPORARY)); - -} - -static void ei_if(struct r300_vertex_program_compiler * compiler, - struct rc_instruction *rci, - unsigned int * inst, - unsigned int branch_depth) -{ - unsigned int predicate_opcode; - int is_math = 0; - - if (!compiler->Base.is_r500) { - rc_error(&compiler->Base,"Opcode IF not supported\n"); - return; - } - - /* Reserve a temporary to use as our predicate stack counter, if we - * don't already have one. */ - if (!compiler->PredicateMask) { - unsigned int writemasks[RC_REGISTER_MAX_INDEX]; - struct rc_instruction * inst; - unsigned int i; - memset(writemasks, 0, sizeof(writemasks)); - for(inst = compiler->Base.Program.Instructions.Next; - inst != &compiler->Base.Program.Instructions; - inst = inst->Next) { - rc_for_all_writes_mask(inst, mark_write, writemasks); - } - for(i = 0; i < compiler->Base.max_temp_regs; i++) { - unsigned int mask = ~writemasks[i] & RC_MASK_XYZW; - /* Only the W component can be used fo the predicate - * stack counter. */ - if (mask & RC_MASK_W) { - compiler->PredicateMask = RC_MASK_W; - compiler->PredicateIndex = i; - break; - } - } - if (i == compiler->Base.max_temp_regs) { - rc_error(&compiler->Base, "No free temporary to use for" - " predicate stack counter.\n"); - return; - } - } - predicate_opcode = - branch_depth ? VE_PRED_SET_NEQ_PUSH : ME_PRED_SET_NEQ; - - rci->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(rci->U.I.SrcReg[0].Swizzle,0)); - if (branch_depth == 0) { - is_math = 1; - predicate_opcode = ME_PRED_SET_NEQ; - inst[1] = t_src(compiler->code, &rci->U.I.SrcReg[0]); - inst[2] = 0; - } else { - predicate_opcode = VE_PRED_SET_NEQ_PUSH; - inst[1] = t_pred_src(compiler); - inst[2] = t_src(compiler->code, &rci->U.I.SrcReg[0]); - } - - inst[0] = t_pred_dst(compiler, predicate_opcode, is_math); - inst[3] = 0; - -} - -static void ei_else(struct r300_vertex_program_compiler * compiler, - unsigned int * inst) -{ - if (!compiler->Base.is_r500) { - rc_error(&compiler->Base,"Opcode ELSE not supported\n"); - return; - } - inst[0] = t_pred_dst(compiler, ME_PRED_SET_INV, 1); - inst[1] = t_pred_src(compiler); - inst[2] = 0; - inst[3] = 0; -} - -static void ei_endif(struct r300_vertex_program_compiler *compiler, - unsigned int * inst) -{ - if (!compiler->Base.is_r500) { - rc_error(&compiler->Base,"Opcode ENDIF not supported\n"); - return; - } - inst[0] = t_pred_dst(compiler, ME_PRED_SET_POP, 1); - inst[1] = t_pred_src(compiler); - inst[2] = 0; - inst[3] = 0; -} - static void translate_vertex_program(struct radeon_compiler *c, void *user) { struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c; struct rc_instruction *rci; - struct loop * loops = NULL; - int current_loop_depth = 0; - int loops_reserved = 0; - - unsigned int branch_depth = 0; + unsigned loops[R500_PVS_MAX_LOOP_DEPTH]; + unsigned loop_depth = 0; compiler->code->pos_end = 0; /* Not supported yet */ compiler->code->length = 0; @@ -532,12 +401,9 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user) case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break; case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; - case RC_OPCODE_ELSE: ei_else(compiler, inst); break; - case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break; case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; - case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break; case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; @@ -556,37 +422,27 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user) case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break; case RC_OPCODE_BGNLOOP: { - struct loop * l; - if ((!compiler->Base.is_r500 - && loops_reserved >= R300_VS_MAX_LOOP_DEPTH) - || loops_reserved >= R500_VS_MAX_FC_DEPTH) { + && loop_depth >= R300_VS_MAX_LOOP_DEPTH) + || loop_depth >= R500_PVS_MAX_LOOP_DEPTH) { rc_error(&compiler->Base, "Loops are nested too deep."); return; } - memory_pool_array_reserve(&compiler->Base.Pool, - struct loop, loops, current_loop_depth, - loops_reserved, 1); - l = &loops[current_loop_depth++]; - memset(l , 0, sizeof(struct loop)); - l->BgnLoop = (compiler->code->length / 4); - continue; + loops[loop_depth++] = ((compiler->code->length)/ 4) + 1; + break; } case RC_OPCODE_ENDLOOP: { - struct loop * l; unsigned int act_addr; unsigned int last_addr; unsigned int ret_addr; - assert(loops); - l = &loops[current_loop_depth - 1]; - act_addr = l->BgnLoop - 1; + ret_addr = loops[--loop_depth]; + act_addr = ret_addr - 1; last_addr = (compiler->code->length / 4) - 1; - ret_addr = l->BgnLoop; - if (loops_reserved >= R300_VS_MAX_FC_OPS) { + if (loop_depth >= R300_VS_MAX_FC_OPS) { rc_error(&compiler->Base, "Too many flow control instructions."); return; @@ -595,7 +451,7 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user) compiler->code->fc_op_addrs.r500 [compiler->code->num_fc_ops].lw = R500_PVS_FC_ACT_ADRS(act_addr) - | R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff) + | R500_PVS_FC_LOOP_CNT_JMP_INST(0x00ff) ; compiler->code->fc_op_addrs.r500 [compiler->code->num_fc_ops].uw = @@ -618,26 +474,51 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user) compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP( compiler->code->num_fc_ops); compiler->code->num_fc_ops++; - current_loop_depth--; - continue; + + break; } + case RC_ME_PRED_SET_CLR: + ei_math1(compiler->code, ME_PRED_SET_CLR, vpi, inst); + break; + + case RC_ME_PRED_SET_INV: + ei_math1(compiler->code, ME_PRED_SET_INV, vpi, inst); + break; + + case RC_ME_PRED_SET_POP: + ei_math1(compiler->code, ME_PRED_SET_POP, vpi, inst); + break; + + case RC_ME_PRED_SET_RESTORE: + ei_math1(compiler->code, ME_PRED_SET_RESTORE, vpi, inst); + break; + + case RC_ME_PRED_SEQ: + ei_math1(compiler->code, ME_PRED_SET_EQ, vpi, inst); + break; + + case RC_ME_PRED_SNEQ: + ei_math1(compiler->code, ME_PRED_SET_NEQ, vpi, inst); + break; + + case RC_VE_PRED_SNEQ_PUSH: + ei_vector2(compiler->code, VE_PRED_SET_NEQ_PUSH, + vpi, inst); + break; + default: rc_error(&compiler->Base, "Unknown opcode %s\n", info->Name); return; } - /* Non-flow control instructions that are inside an if statement - * need to pay attention to the predicate bit. */ - if (branch_depth - && vpi->Opcode != RC_OPCODE_IF - && vpi->Opcode != RC_OPCODE_ELSE - && vpi->Opcode != RC_OPCODE_ENDIF) { - + if (vpi->DstReg.Pred != RC_PRED_DISABLED) { inst[0] |= (PVS_DST_PRED_ENABLE_MASK << PVS_DST_PRED_ENABLE_SHIFT); - inst[0] |= (PVS_DST_PRED_SENSE_MASK + if (vpi->DstReg.Pred == RC_PRED_SET) { + inst[0] |= (PVS_DST_PRED_SENSE_MASK << PVS_DST_PRED_SENSE_SHIFT); + } } /* Update the number of temporaries. */ @@ -650,10 +531,6 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user) vpi->SrcReg[i].Index >= compiler->code->num_temporaries) compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1; - if (compiler->PredicateMask) - if (compiler->PredicateIndex >= compiler->code->num_temporaries) - compiler->code->num_temporaries = compiler->PredicateIndex + 1; - if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) { rc_error(&compiler->Base, "Too many temporaries.\n"); return; @@ -1018,7 +895,6 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) struct radeon_compiler_pass vs_list[] = { /* NAME DUMP PREDICATE FUNCTION PARAM */ {"add artificial outputs", 0, 1, rc_vs_add_artificial_outputs, NULL}, - {"transform loops", 1, 1, rc_transform_loops, NULL}, {"emulate branches", 1, !is_r500, rc_emulate_branches, NULL}, {"emulate negative addressing", 1, 1, rc_emulate_negative_addressing, NULL}, {"native rewrite", 1, is_r500, rc_local_transform, alu_rewrite_r500}, @@ -1030,6 +906,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) {"source conflict resolve", 1, 1, rc_local_transform, resolve_src_conflicts}, {"register allocation", 1, opt, allocate_temporary_registers, NULL}, {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, + {"lower control flow opcodes", 1, is_r500, rc_vert_fc, NULL}, {"final code validation", 0, 1, rc_validate_final_shader, NULL}, {"machine code generation", 0, 1, translate_vertex_program, NULL}, {"dump machine code", 0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump, NULL}, |