From dc9705f30deabb789735966e3d2278e3c9e00c8a Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Tue, 24 Jul 2018 08:58:24 -0400 Subject: freedreno: a2xx: ir2 update this patch brings a number of changes to ir2: -ir2 now generates CF clauses as necessary during assembly. this simplifies fd2_program/fd2_compiler and is necessary to implement optimization passes -ir2 now has separate vector/scalar instructions. this will make it easier to implementing scheduling of scalar+vector instructions together. dst_reg is also now seperate from src registers instead of a single list -ir2 now implements register allocation. this makes it possible to compile shaders which have more than 64 TGSI registers -ir2 now implements the following optimizations: removal of IN/OUT MOV instructions generated by TGSI and removal of unused instructions when some exports are disabled -ir2 now allows full 8-bit index for constants -ir2_alloc no longer allocates 4 times too many bytes Signed-off-by: Jonathan Marek Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a2xx/fd2_compiler.c | 210 ++----- src/gallium/drivers/freedreno/a2xx/fd2_program.c | 75 +-- src/gallium/drivers/freedreno/a2xx/instr-a2xx.h | 28 +- src/gallium/drivers/freedreno/a2xx/ir-a2xx.c | 734 +++++++++++++--------- src/gallium/drivers/freedreno/a2xx/ir-a2xx.h | 113 ++-- 5 files changed, 615 insertions(+), 545 deletions(-) diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c b/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c index 3ad47f98508..12f9a1ce0ab 100644 --- a/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c +++ b/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c @@ -93,9 +93,6 @@ struct fd2_compile_context { unsigned position, psize; uint64_t need_sync; - - /* current exec CF instruction */ - struct ir2_cf *cf; }; static int @@ -130,7 +127,6 @@ compile_init(struct fd2_compile_context *ctx, struct fd_program_stateobj *prog, ctx->prog = prog; ctx->so = so; - ctx->cf = NULL; ctx->pred_depth = 0; ret = tgsi_parse_init(&ctx->parser, so->tokens); @@ -236,15 +232,6 @@ compile_free(struct fd2_compile_context *ctx) tgsi_parse_free(&ctx->parser); } -static struct ir2_cf * -next_exec_cf(struct fd2_compile_context *ctx) -{ - struct ir2_cf *cf = ctx->cf; - if (!cf || cf->exec.instrs_count >= ARRAY_SIZE(ctx->cf->exec.instrs)) - ctx->cf = cf = ir2_cf_create(ctx->so->ir, EXEC); - return cf; -} - static void compile_vtx_fetch(struct fd2_compile_context *ctx) { @@ -252,13 +239,13 @@ compile_vtx_fetch(struct fd2_compile_context *ctx) int i; for (i = 0; i < ctx->num_regs[TGSI_FILE_INPUT]; i++) { struct ir2_instruction *instr = ir2_instr_create( - next_exec_cf(ctx), IR2_FETCH); + ctx->so->ir, IR2_FETCH); instr->fetch.opc = VTX_FETCH; ctx->need_sync |= 1 << (i+1); - ir2_reg_create(instr, i+1, "xyzw", 0); - ir2_reg_create(instr, 0, "x", 0); + ir2_dst_create(instr, i+1, "xyzw", 0); + ir2_reg_create(instr, 0, "x", IR2_REG_INPUT); if (i == 0) instr->sync = true; @@ -266,7 +253,6 @@ compile_vtx_fetch(struct fd2_compile_context *ctx) vfetch_instrs[i] = instr; } ctx->so->num_vfetch_instrs = i; - ctx->cf = NULL; } /* @@ -312,7 +298,7 @@ get_temp_gpr(struct fd2_compile_context *ctx, int idx) return num; } -static struct ir2_register * +static struct ir2_dst_register * add_dst_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu, const struct tgsi_dst_register *dst) { @@ -351,10 +337,10 @@ add_dst_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu, swiz[3] = (dst->WriteMask & TGSI_WRITEMASK_W) ? 'w' : '_'; swiz[4] = '\0'; - return ir2_reg_create(alu, num, swiz, flags); + return ir2_dst_create(alu, num, swiz, flags); } -static struct ir2_register * +static struct ir2_src_register * add_src_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu, const struct tgsi_src_register *src) { @@ -373,6 +359,7 @@ add_src_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu, if (ctx->type == PIPE_SHADER_VERTEX) { num = src->Index + 1; } else { + flags |= IR2_REG_INPUT; num = export_linkage(ctx, ctx->input_export_idx[src->Index]); } @@ -415,7 +402,7 @@ static void add_vector_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu) { if (inst->Instruction.Saturate) { - alu->alu.vector_clamp = true; + alu->alu_vector.clamp = true; } } @@ -423,7 +410,7 @@ static void add_scalar_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu) { if (inst->Instruction.Saturate) { - alu->alu.scalar_clamp = true; + alu->alu_scalar.clamp = true; } } @@ -461,27 +448,12 @@ add_regs_vector_3(struct fd2_compile_context *ctx, assert(inst->Instruction.NumDstRegs == 1); add_dst_reg(ctx, alu, &inst->Dst[0].Register); - /* maybe should re-arrange the syntax some day, but - * in assembler/disassembler and what ir.c expects - * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1 - */ - add_src_reg(ctx, alu, &inst->Src[2].Register); add_src_reg(ctx, alu, &inst->Src[0].Register); add_src_reg(ctx, alu, &inst->Src[1].Register); + add_src_reg(ctx, alu, &inst->Src[2].Register); add_vector_clamp(inst, alu); } -static void -add_regs_dummy_vector(struct ir2_instruction *alu) -{ - /* create dummy, non-written vector dst/src regs - * for unused vector instr slot: - */ - ir2_reg_create(alu, 0, "____", 0); /* vector dst */ - ir2_reg_create(alu, 0, NULL, 0); /* vector src1 */ - ir2_reg_create(alu, 0, NULL, 0); /* vector src2 */ -} - static void add_regs_scalar_1(struct fd2_compile_context *ctx, struct tgsi_full_instruction *inst, struct ir2_instruction *alu) @@ -489,8 +461,6 @@ add_regs_scalar_1(struct fd2_compile_context *ctx, assert(inst->Instruction.NumSrcRegs == 1); assert(inst->Instruction.NumDstRegs == 1); - add_regs_dummy_vector(alu); - add_dst_reg(ctx, alu, &inst->Dst[0].Register); add_src_reg(ctx, alu, &inst->Src[0].Register); add_scalar_clamp(inst, alu); @@ -567,19 +537,13 @@ push_predicate(struct fd2_compile_context *ctx, struct tgsi_src_register *src) struct ir2_instruction *alu; struct tgsi_dst_register pred_dst; - /* NOTE blob compiler seems to always puts PRED_* instrs in a CF by - * themselves: - */ - ctx->cf = NULL; - if (ctx->pred_depth == 0) { /* assign predicate register: */ ctx->pred_reg = ctx->num_regs[TGSI_FILE_TEMPORARY]; get_predicate(ctx, &pred_dst, NULL); - alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SETNEs); - add_regs_dummy_vector(alu); + alu = ir2_instr_create_alu_s(ctx->so->ir, PRED_SETNEs); add_dst_reg(ctx, alu, &pred_dst); add_src_reg(ctx, alu, src); } else { @@ -587,7 +551,7 @@ push_predicate(struct fd2_compile_context *ctx, struct tgsi_src_register *src) get_predicate(ctx, &pred_dst, &pred_src); - alu = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); + alu = ir2_instr_create_alu_v(ctx->so->ir, MULv); add_dst_reg(ctx, alu, &pred_dst); add_src_reg(ctx, alu, &pred_src); add_src_reg(ctx, alu, src); @@ -600,18 +564,11 @@ push_predicate(struct fd2_compile_context *ctx, struct tgsi_src_register *src) /* save previous pred state to restore in pop_predicate(): */ ctx->pred_stack[ctx->pred_depth++] = ctx->so->ir->pred; - - ctx->cf = NULL; } static void pop_predicate(struct fd2_compile_context *ctx) { - /* NOTE blob compiler seems to always puts PRED_* instrs in a CF by - * themselves: - */ - ctx->cf = NULL; - /* restore previous predicate state: */ ctx->so->ir->pred = ctx->pred_stack[--ctx->pred_depth]; @@ -622,8 +579,7 @@ pop_predicate(struct fd2_compile_context *ctx) get_predicate(ctx, &pred_dst, &pred_src); - alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SET_POPs); - add_regs_dummy_vector(alu); + alu = ir2_instr_create_alu_s(ctx->so->ir, PRED_SET_POPs); add_dst_reg(ctx, alu, &pred_dst); add_src_reg(ctx, alu, &pred_src); alu->pred = IR2_PRED_NONE; @@ -631,8 +587,6 @@ pop_predicate(struct fd2_compile_context *ctx) /* predicate register no longer needed: */ ctx->pred_reg = -1; } - - ctx->cf = NULL; } static void @@ -693,12 +647,11 @@ translate_pow(struct fd2_compile_context *ctx, get_internal_temp(ctx, &tmp_dst, &tmp_src); - alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, LOG_CLAMP); - add_regs_dummy_vector(alu); + alu = ir2_instr_create_alu_s(ctx->so->ir, LOG_CLAMP); add_dst_reg(ctx, alu, &tmp_dst); add_src_reg(ctx, alu, &inst->Src[0].Register); - alu = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); + alu = ir2_instr_create_alu_v(ctx->so->ir, MULv); add_dst_reg(ctx, alu, &tmp_dst); add_src_reg(ctx, alu, &tmp_src); add_src_reg(ctx, alu, &inst->Src[1].Register); @@ -725,8 +678,7 @@ translate_pow(struct fd2_compile_context *ctx, break; } - alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, EXP_IEEE); - add_regs_dummy_vector(alu); + alu = ir2_instr_create_alu_s(ctx->so->ir, EXP_IEEE); add_dst_reg(ctx, alu, &inst->Dst[0].Register); add_src_reg(ctx, alu, &tmp_src); add_scalar_clamp(inst, alu); @@ -737,7 +689,7 @@ translate_tex(struct fd2_compile_context *ctx, struct tgsi_full_instruction *inst, unsigned opc) { struct ir2_instruction *instr; - struct ir2_register *reg; + struct ir2_src_register *reg; struct tgsi_dst_register tmp_dst; struct tgsi_src_register tmp_src; const struct tgsi_src_register *coord; @@ -766,19 +718,18 @@ translate_tex(struct fd2_compile_context *ctx, * * dst = texture_sample(unit, coord, bias) */ - instr = ir2_instr_create_alu(next_exec_cf(ctx), MAXv, RECIP_IEEE); - /* MAXv: */ + instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv); add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "___w"; add_src_reg(ctx, instr, &inst->Src[0].Register); add_src_reg(ctx, instr, &inst->Src[0].Register); - /* RECIP_IEEE: */ + instr = ir2_instr_create_alu_s(ctx->so->ir, RECIP_IEEE); add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "x___"; add_src_reg(ctx, instr, &inst->Src[0].Register)->swizzle = swiz[inst->Src[0].Register.SwizzleW]; - instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); + instr = ir2_instr_create_alu_v(ctx->so->ir, MULv); add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "xyz_"; add_src_reg(ctx, instr, &tmp_src)->swizzle = "xxxx"; add_src_reg(ctx, instr, &inst->Src[0].Register); @@ -788,7 +739,7 @@ translate_tex(struct fd2_compile_context *ctx, coord = &inst->Src[0].Register; } - instr = ir2_instr_create(next_exec_cf(ctx), IR2_FETCH); + instr = ir2_instr_create(ctx->so->ir, IR2_FETCH); instr->fetch.opc = TEX_FETCH; instr->fetch.is_cube = (inst->Texture.Texture == TGSI_TEXTURE_3D); instr->fetch.is_rect = (inst->Texture.Texture == TGSI_TEXTURE_RECT); @@ -807,7 +758,7 @@ translate_tex(struct fd2_compile_context *ctx, reg->swizzle[2] = reg->swizzle[0]; /* dst register needs to be marked for sync: */ - ctx->need_sync |= 1 << instr->regs[0]->num; + ctx->need_sync |= 1 << instr->dst_reg.num; /* TODO we need some way to know if the tex fetch needs to sync on alu pipe.. */ instr->sync = true; @@ -818,7 +769,7 @@ translate_tex(struct fd2_compile_context *ctx, * the texture to a temp and the use ALU instruction to move * to output */ - instr = ir2_instr_create_alu(next_exec_cf(ctx), MAXv, ~0); + instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv); add_dst_reg(ctx, instr, &inst->Dst[0].Register); add_src_reg(ctx, instr, &tmp_src); @@ -869,22 +820,18 @@ translate_sge_slt_seq_sne(struct fd2_compile_context *ctx, get_internal_temp(ctx, &tmp_dst, &tmp_src); - instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0); + instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv); add_dst_reg(ctx, instr, &tmp_dst); add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE; add_src_reg(ctx, instr, &inst->Src[1].Register); - instr = ir2_instr_create_alu(next_exec_cf(ctx), vopc, ~0); + instr = ir2_instr_create_alu_v(ctx->so->ir, vopc); add_dst_reg(ctx, instr, &inst->Dst[0].Register); - /* maybe should re-arrange the syntax some day, but - * in assembler/disassembler and what ir.c expects - * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1 - */ - get_immediate(ctx, &tmp_const, fui(c0)); - add_src_reg(ctx, instr, &tmp_const); add_src_reg(ctx, instr, &tmp_src); get_immediate(ctx, &tmp_const, fui(c1)); add_src_reg(ctx, instr, &tmp_const); + get_immediate(ctx, &tmp_const, fui(c0)); + add_src_reg(ctx, instr, &tmp_const); } /* LRP(a,b,c) = (a * b) + ((1 - a) * c) */ @@ -904,25 +851,25 @@ translate_lrp(struct fd2_compile_context *ctx, get_immediate(ctx, &tmp_const, fui(1.0)); /* tmp1 = (a * b) */ - instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); + instr = ir2_instr_create_alu_v(ctx->so->ir, MULv); add_dst_reg(ctx, instr, &tmp_dst1); add_src_reg(ctx, instr, &inst->Src[0].Register); add_src_reg(ctx, instr, &inst->Src[1].Register); /* tmp2 = (1 - a) */ - instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0); + instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv); add_dst_reg(ctx, instr, &tmp_dst2); add_src_reg(ctx, instr, &tmp_const); add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE; /* tmp2 = tmp2 * c */ - instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); + instr = ir2_instr_create_alu_v(ctx->so->ir, MULv); add_dst_reg(ctx, instr, &tmp_dst2); add_src_reg(ctx, instr, &tmp_src2); add_src_reg(ctx, instr, &inst->Src[2].Register); /* dst = tmp1 + tmp2 */ - instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0); + instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv); add_dst_reg(ctx, instr, &inst->Dst[0].Register); add_src_reg(ctx, instr, &tmp_src1); add_src_reg(ctx, instr, &tmp_src2); @@ -956,33 +903,28 @@ translate_trig(struct fd2_compile_context *ctx, tmp_src.SwizzleX = tmp_src.SwizzleY = tmp_src.SwizzleZ = tmp_src.SwizzleW = TGSI_SWIZZLE_X; - /* maybe should re-arrange the syntax some day, but - * in assembler/disassembler and what ir.c expects - * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1 - */ - instr = ir2_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0); + instr = ir2_instr_create_alu_v(ctx->so->ir, MULADDv); add_dst_reg(ctx, instr, &tmp_dst); - get_immediate(ctx, &tmp_const, fui(0.5)); - add_src_reg(ctx, instr, &tmp_const); add_src_reg(ctx, instr, &inst->Src[0].Register); get_immediate(ctx, &tmp_const, fui(0.159155)); add_src_reg(ctx, instr, &tmp_const); + get_immediate(ctx, &tmp_const, fui(0.5)); + add_src_reg(ctx, instr, &tmp_const); - instr = ir2_instr_create_alu(next_exec_cf(ctx), FRACv, ~0); + instr = ir2_instr_create_alu_v(ctx->so->ir, FRACv); add_dst_reg(ctx, instr, &tmp_dst); add_src_reg(ctx, instr, &tmp_src); add_src_reg(ctx, instr, &tmp_src); - instr = ir2_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0); + instr = ir2_instr_create_alu_v(ctx->so->ir, MULADDv); add_dst_reg(ctx, instr, &tmp_dst); - get_immediate(ctx, &tmp_const, fui(-3.141593)); - add_src_reg(ctx, instr, &tmp_const); add_src_reg(ctx, instr, &tmp_src); get_immediate(ctx, &tmp_const, fui(6.283185)); add_src_reg(ctx, instr, &tmp_const); + get_immediate(ctx, &tmp_const, fui(-3.141593)); + add_src_reg(ctx, instr, &tmp_const); - instr = ir2_instr_create_alu(next_exec_cf(ctx), ~0, op); - add_regs_dummy_vector(instr); + instr = ir2_instr_create_alu_s(ctx->so->ir, op); add_dst_reg(ctx, instr, &inst->Dst[0].Register); add_src_reg(ctx, instr, &tmp_src); } @@ -996,12 +938,12 @@ translate_dp2(struct fd2_compile_context *ctx, struct ir2_instruction *instr; /* DP2ADD c,a,b -> dot2(a,b) + c */ /* for c we use the constant 0.0 */ - instr = ir2_instr_create_alu(next_exec_cf(ctx), DOT2ADDv, ~0); - get_immediate(ctx, &tmp_const, fui(0.0f)); + instr = ir2_instr_create_alu_v(ctx->so->ir, DOT2ADDv); add_dst_reg(ctx, instr, &inst->Dst[0].Register); - add_src_reg(ctx, instr, &tmp_const); add_src_reg(ctx, instr, &inst->Src[0].Register); add_src_reg(ctx, instr, &inst->Src[1].Register); + get_immediate(ctx, &tmp_const, fui(0.0f)); + add_src_reg(ctx, instr, &tmp_const); add_vector_clamp(inst, instr); } @@ -1015,80 +957,53 @@ translate_instruction(struct fd2_compile_context *ctx, { unsigned opc = inst->Instruction.Opcode; struct ir2_instruction *instr; - static struct ir2_cf *cf; if (opc == TGSI_OPCODE_END) return; - if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) { - unsigned num = inst->Dst[0].Register.Index; - /* seems like we need to ensure that position vs param/pixel - * exports don't end up in the same EXEC clause.. easy way - * to do this is force a new EXEC clause on first appearance - * of an position or param/pixel export. - */ - if ((num == ctx->position) || (num == ctx->psize)) { - if (ctx->num_position > 0) { - ctx->cf = NULL; - ir2_cf_create_alloc(ctx->so->ir, SQ_POSITION, - ctx->num_position - 1); - ctx->num_position = 0; - } - } else { - if (ctx->num_param > 0) { - ctx->cf = NULL; - ir2_cf_create_alloc(ctx->so->ir, SQ_PARAMETER_PIXEL, - ctx->num_param - 1); - ctx->num_param = 0; - } - } - } - - cf = next_exec_cf(ctx); - /* TODO turn this into a table: */ switch (opc) { case TGSI_OPCODE_MOV: - instr = ir2_instr_create_alu(cf, MAXv, ~0); + instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv); add_regs_vector_1(ctx, inst, instr); break; case TGSI_OPCODE_RCP: - instr = ir2_instr_create_alu(cf, ~0, RECIP_IEEE); + instr = ir2_instr_create_alu_s(ctx->so->ir, RECIP_IEEE); add_regs_scalar_1(ctx, inst, instr); break; case TGSI_OPCODE_RSQ: - instr = ir2_instr_create_alu(cf, ~0, RECIPSQ_IEEE); + instr = ir2_instr_create_alu_s(ctx->so->ir, RECIPSQ_IEEE); add_regs_scalar_1(ctx, inst, instr); break; case TGSI_OPCODE_SQRT: - instr = ir2_instr_create_alu(cf, ~0, SQRT_IEEE); + instr = ir2_instr_create_alu_s(ctx->so->ir, SQRT_IEEE); add_regs_scalar_1(ctx, inst, instr); break; case TGSI_OPCODE_MUL: - instr = ir2_instr_create_alu(cf, MULv, ~0); + instr = ir2_instr_create_alu_v(ctx->so->ir, MULv); add_regs_vector_2(ctx, inst, instr); break; case TGSI_OPCODE_ADD: - instr = ir2_instr_create_alu(cf, ADDv, ~0); + instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv); add_regs_vector_2(ctx, inst, instr); break; case TGSI_OPCODE_DP2: translate_dp2(ctx, inst, opc); break; case TGSI_OPCODE_DP3: - instr = ir2_instr_create_alu(cf, DOT3v, ~0); + instr = ir2_instr_create_alu_v(ctx->so->ir, DOT3v); add_regs_vector_2(ctx, inst, instr); break; case TGSI_OPCODE_DP4: - instr = ir2_instr_create_alu(cf, DOT4v, ~0); + instr = ir2_instr_create_alu_v(ctx->so->ir, DOT4v); add_regs_vector_2(ctx, inst, instr); break; case TGSI_OPCODE_MIN: - instr = ir2_instr_create_alu(cf, MINv, ~0); + instr = ir2_instr_create_alu_v(ctx->so->ir, MINv); add_regs_vector_2(ctx, inst, instr); break; case TGSI_OPCODE_MAX: - instr = ir2_instr_create_alu(cf, MAXv, ~0); + instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv); add_regs_vector_2(ctx, inst, instr); break; case TGSI_OPCODE_SLT: @@ -1098,22 +1013,22 @@ translate_instruction(struct fd2_compile_context *ctx, translate_sge_slt_seq_sne(ctx, inst, opc); break; case TGSI_OPCODE_MAD: - instr = ir2_instr_create_alu(cf, MULADDv, ~0); + instr = ir2_instr_create_alu_v(ctx->so->ir, MULADDv); add_regs_vector_3(ctx, inst, instr); break; case TGSI_OPCODE_LRP: translate_lrp(ctx, inst, opc); break; case TGSI_OPCODE_FRC: - instr = ir2_instr_create_alu(cf, FRACv, ~0); + instr = ir2_instr_create_alu_v(ctx->so->ir, FRACv); add_regs_vector_1(ctx, inst, instr); break; case TGSI_OPCODE_FLR: - instr = ir2_instr_create_alu(cf, FLOORv, ~0); + instr = ir2_instr_create_alu_v(ctx->so->ir, FLOORv); add_regs_vector_1(ctx, inst, instr); break; case TGSI_OPCODE_EX2: - instr = ir2_instr_create_alu(cf, ~0, EXP_IEEE); + instr = ir2_instr_create_alu_s(ctx->so->ir, EXP_IEEE); add_regs_scalar_1(ctx, inst, instr); break; case TGSI_OPCODE_POW: @@ -1128,10 +1043,9 @@ translate_instruction(struct fd2_compile_context *ctx, translate_tex(ctx, inst, opc); break; case TGSI_OPCODE_CMP: - instr = ir2_instr_create_alu(cf, CNDGTEv, ~0); + instr = ir2_instr_create_alu_v(ctx->so->ir, CNDGTEv); add_regs_vector_3(ctx, inst, instr); - // TODO this should be src0 if regs where in sane order.. - instr->regs[2]->flags ^= IR2_REG_NEGATE; /* src1 */ + instr->src_reg[0].flags ^= IR2_REG_NEGATE; /* src1 */ break; case TGSI_OPCODE_IF: push_predicate(ctx, &inst->Src[0].Register); @@ -1139,16 +1053,12 @@ translate_instruction(struct fd2_compile_context *ctx, break; case TGSI_OPCODE_ELSE: ctx->so->ir->pred = IR2_PRED_NE; - /* not sure if this is required in all cases, but blob compiler - * won't combine EQ and NE in same CF: - */ - ctx->cf = NULL; break; case TGSI_OPCODE_ENDIF: pop_predicate(ctx); break; case TGSI_OPCODE_F2I: - instr = ir2_instr_create_alu(cf, TRUNCv, ~0); + instr = ir2_instr_create_alu_v(ctx->so->ir, TRUNCv); add_regs_vector_1(ctx, inst, instr); break; default: @@ -1179,8 +1089,6 @@ compile_instructions(struct fd2_compile_context *ctx) break; } } - - ctx->cf->cf_type = EXEC_END; } int diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_program.c b/src/gallium/drivers/freedreno/a2xx/fd2_program.c index 834a7c7fcd7..34622eaba0e 100644 --- a/src/gallium/drivers/freedreno/a2xx/fd2_program.c +++ b/src/gallium/drivers/freedreno/a2xx/fd2_program.c @@ -199,7 +199,7 @@ patch_vtx_fetches(struct fd_context *ctx, struct fd2_shader_stateobj *so, instr->fetch.offset = elem->src_offset; for (j = 0; j < 4; j++) - instr->regs[0]->swizzle[j] = "xyzw01__"[desc->swizzle[j]]; + instr->dst_reg.swizzle[j] = "xyzw01__"[desc->swizzle[j]]; assert(instr->fetch.fmt != ~0); @@ -210,7 +210,7 @@ patch_vtx_fetches(struct fd_context *ctx, struct fd2_shader_stateobj *so, instr->fetch.const_idx, instr->fetch.const_idx_sel, elem->instance_divisor, - instr->regs[0]->swizzle, + instr->dst_reg.swizzle, instr->fetch.stride, instr->fetch.offset); } @@ -307,7 +307,6 @@ static struct fd2_shader_stateobj * create_blit_fp(void) { struct fd2_shader_stateobj *so = create_shader(SHADER_FRAGMENT); - struct ir2_cf *cf; struct ir2_instruction *instr; if (!so) @@ -315,18 +314,13 @@ create_blit_fp(void) so->ir = ir2_shader_create(); - cf = ir2_cf_create(so->ir, EXEC); - - instr = ir2_instr_create_tex_fetch(cf, 0); - ir2_reg_create(instr, 0, "xyzw", 0); - ir2_reg_create(instr, 0, "xyx", 0); + instr = ir2_instr_create_tex_fetch(so->ir, 0); + ir2_dst_create(instr, 0, "xyzw", 0); + ir2_reg_create(instr, 0, "xyx", IR2_REG_INPUT); instr->sync = true; - cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0); - cf = ir2_cf_create(so->ir, EXEC_END); - - instr = ir2_instr_create_alu(cf, MAXv, ~0); - ir2_reg_create(instr, 0, NULL, IR2_REG_EXPORT); + instr = ir2_instr_create_alu_v(so->ir, MAXv); + ir2_dst_create(instr, 0, NULL, IR2_REG_EXPORT); ir2_reg_create(instr, 0, NULL, 0); ir2_reg_create(instr, 0, NULL, 0); @@ -349,7 +343,6 @@ static struct fd2_shader_stateobj * create_blit_vp(void) { struct fd2_shader_stateobj *so = create_shader(SHADER_VERTEX); - struct ir2_cf *cf; struct ir2_instruction *instr; if (!so) @@ -357,31 +350,23 @@ create_blit_vp(void) so->ir = ir2_shader_create(); - cf = ir2_cf_create(so->ir, EXEC); - - instr = ir2_instr_create_vtx_fetch(cf, 26, 1, FMT_32_32_FLOAT, false, 8); + instr = ir2_instr_create_vtx_fetch(so->ir, 26, 1, FMT_32_32_FLOAT, false, 8); instr->fetch.is_normalized = true; - ir2_reg_create(instr, 1, "xy01", 0); - ir2_reg_create(instr, 0, "x", 0); + ir2_dst_create(instr, 1, "xy01", 0); + ir2_reg_create(instr, 0, "x", IR2_REG_INPUT); - instr = ir2_instr_create_vtx_fetch(cf, 26, 0, FMT_32_32_32_FLOAT, false, 12); + instr = ir2_instr_create_vtx_fetch(so->ir, 26, 0, FMT_32_32_32_FLOAT, false, 12); instr->fetch.is_normalized = true; - ir2_reg_create(instr, 2, "xyz1", 0); - ir2_reg_create(instr, 0, "x", 0); - - cf = ir2_cf_create_alloc(so->ir, SQ_POSITION, 0); - cf = ir2_cf_create(so->ir, EXEC); + ir2_dst_create(instr, 2, "xyz1", 0); + ir2_reg_create(instr, 0, "x", IR2_REG_INPUT); - instr = ir2_instr_create_alu(cf, MAXv, ~0); - ir2_reg_create(instr, 62, NULL, IR2_REG_EXPORT); + instr = ir2_instr_create_alu_v(so->ir, MAXv); + ir2_dst_create(instr, 62, NULL, IR2_REG_EXPORT); ir2_reg_create(instr, 2, NULL, 0); ir2_reg_create(instr, 2, NULL, 0); - cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0); - cf = ir2_cf_create(so->ir, EXEC_END); - - instr = ir2_instr_create_alu(cf, MAXv, ~0); - ir2_reg_create(instr, 0, NULL, IR2_REG_EXPORT); + instr = ir2_instr_create_alu_v(so->ir, MAXv); + ir2_dst_create(instr, 0, NULL, IR2_REG_EXPORT); ir2_reg_create(instr, 1, NULL, 0); ir2_reg_create(instr, 1, NULL, 0); @@ -397,7 +382,6 @@ static struct fd2_shader_stateobj * create_solid_fp(void) { struct fd2_shader_stateobj *so = create_shader(SHADER_FRAGMENT); - struct ir2_cf *cf; struct ir2_instruction *instr; if (!so) @@ -405,11 +389,8 @@ create_solid_fp(void) so->ir = ir2_shader_create(); - cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0); - cf = ir2_cf_create(so->ir, EXEC_END); - - instr = ir2_instr_create_alu(cf, MAXv, ~0); - ir2_reg_create(instr, 0, NULL, IR2_REG_EXPORT); + instr = ir2_instr_create_alu_v(so->ir, MAXv); + ir2_dst_create(instr, 0, NULL, IR2_REG_EXPORT); ir2_reg_create(instr, 0, NULL, IR2_REG_CONST); ir2_reg_create(instr, 0, NULL, IR2_REG_CONST); @@ -430,7 +411,6 @@ static struct fd2_shader_stateobj * create_solid_vp(void) { struct fd2_shader_stateobj *so = create_shader(SHADER_VERTEX); - struct ir2_cf *cf; struct ir2_instruction *instr; if (!so) @@ -438,22 +418,15 @@ create_solid_vp(void) so->ir = ir2_shader_create(); - cf = ir2_cf_create(so->ir, EXEC); - - instr = ir2_instr_create_vtx_fetch(cf, 26, 0, FMT_32_32_32_FLOAT, false, 12); - ir2_reg_create(instr, 1, "xyz1", 0); - ir2_reg_create(instr, 0, "x", 0); - - cf = ir2_cf_create_alloc(so->ir, SQ_POSITION, 0); - cf = ir2_cf_create(so->ir, EXEC); + instr = ir2_instr_create_vtx_fetch(so->ir, 26, 0, FMT_32_32_32_FLOAT, false, 12); + ir2_dst_create(instr, 1, "xyz1", 0); + ir2_reg_create(instr, 0, "x", IR2_REG_INPUT); - instr = ir2_instr_create_alu(cf, MAXv, ~0); - ir2_reg_create(instr, 62, NULL, IR2_REG_EXPORT); + instr = ir2_instr_create_alu_v(so->ir, MAXv); + ir2_dst_create(instr, 62, NULL, IR2_REG_EXPORT); ir2_reg_create(instr, 1, NULL, 0); ir2_reg_create(instr, 1, NULL, 0); - cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0); - cf = ir2_cf_create(so->ir, EXEC_END); return assemble(so); } diff --git a/src/gallium/drivers/freedreno/a2xx/instr-a2xx.h b/src/gallium/drivers/freedreno/a2xx/instr-a2xx.h index ac972ed35a1..5a9f93ec794 100644 --- a/src/gallium/drivers/freedreno/a2xx/instr-a2xx.h +++ b/src/gallium/drivers/freedreno/a2xx/instr-a2xx.h @@ -147,15 +147,25 @@ typedef struct PACKED { uint8_t const_1_rel_abs : 1; uint8_t const_0_rel_abs : 1; /* dword2: */ - uint8_t src3_reg : 6; - uint8_t src3_reg_select : 1; - uint8_t src3_reg_abs : 1; - uint8_t src2_reg : 6; - uint8_t src2_reg_select : 1; - uint8_t src2_reg_abs : 1; - uint8_t src1_reg : 6; - uint8_t src1_reg_select : 1; - uint8_t src1_reg_abs : 1; + union { + struct { + uint8_t src3_reg : 6; + uint8_t src3_reg_select : 1; + uint8_t src3_reg_abs : 1; + uint8_t src2_reg : 6; + uint8_t src2_reg_select : 1; + uint8_t src2_reg_abs : 1; + uint8_t src1_reg : 6; + uint8_t src1_reg_select : 1; + uint8_t src1_reg_abs : 1; + }; + /* constants have full 8-bit index */ + struct { + uint8_t src3_reg_const : 8; + uint8_t src2_reg_const : 8; + uint8_t src1_reg_const : 8; + }; + }; instr_vector_opc_t vector_opc : 5; uint8_t src3_sel : 1; uint8_t src2_sel : 1; diff --git a/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c b/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c index 42a9ab494e6..af9811864ff 100644 --- a/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c +++ b/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c @@ -35,19 +35,13 @@ #define WARN_MSG(f, ...) DBG("WARN: "f, ##__VA_ARGS__) #define ERROR_MSG(f, ...) DBG("ERROR: "f, ##__VA_ARGS__) -#define REG_MASK 0x3f - -static int cf_emit(struct ir2_cf *cf, instr_cf_t *instr); - static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords, uint32_t idx, struct ir2_shader_info *info); -static void reg_update_stats(struct ir2_register *reg, - struct ir2_shader_info *info, bool dest); -static uint32_t reg_fetch_src_swiz(struct ir2_register *reg, uint32_t n); -static uint32_t reg_fetch_dst_swiz(struct ir2_register *reg); -static uint32_t reg_alu_dst_swiz(struct ir2_register *reg); -static uint32_t reg_alu_src_swiz(struct ir2_register *reg); +static uint32_t reg_fetch_src_swiz(struct ir2_src_register *reg, uint32_t n); +static uint32_t reg_fetch_dst_swiz(struct ir2_dst_register *reg); +static uint32_t reg_alu_dst_swiz(struct ir2_dst_register *reg); +static uint32_t reg_alu_src_swiz(struct ir2_src_register *reg); /* simple allocator to carve allocations out of an up-front allocated heap, * so that we can free everything easily in one shot. @@ -55,7 +49,7 @@ static uint32_t reg_alu_src_swiz(struct ir2_register *reg); static void * ir2_alloc(struct ir2_shader *shader, int sz) { void *ptr = &shader->heap[shader->heap_idx]; - shader->heap_idx += align(sz, 4); + shader->heap_idx += align(sz, 4) / 4; return ptr; } @@ -74,7 +68,9 @@ static char * ir2_strdup(struct ir2_shader *shader, const char *str) struct ir2_shader * ir2_shader_create(void) { DEBUG_MSG(""); - return calloc(1, sizeof(struct ir2_shader)); + struct ir2_shader *shader = calloc(1, sizeof(struct ir2_shader)); + shader->max_reg = -1; + return shader; } void ir2_shader_destroy(struct ir2_shader *shader) @@ -83,189 +79,344 @@ void ir2_shader_destroy(struct ir2_shader *shader) free(shader); } -/* resolve addr/cnt/sequence fields in the individual CF's */ -static int shader_resolve(struct ir2_shader *shader, struct ir2_shader_info *info) +/* check if an instruction is a simple MOV + */ +static struct ir2_instruction * simple_mov(struct ir2_instruction *instr, + bool output) { - uint32_t addr; - unsigned i; - int j; - - addr = shader->cfs_count / 2; - for (i = 0; i < shader->cfs_count; i++) { - struct ir2_cf *cf = shader->cfs[i]; - if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) { - uint32_t sequence = 0; - - if (cf->exec.addr && (cf->exec.addr != addr)) - WARN_MSG("invalid addr '%d' at CF %d", cf->exec.addr, i); - if (cf->exec.cnt && (cf->exec.cnt != cf->exec.instrs_count)) - WARN_MSG("invalid cnt '%d' at CF %d", cf->exec.cnt, i); - - for (j = cf->exec.instrs_count - 1; j >= 0; j--) { - struct ir2_instruction *instr = cf->exec.instrs[j]; - sequence <<= 2; - if (instr->instr_type == IR2_FETCH) - sequence |= 0x1; - if (instr->sync) - sequence |= 0x2; - } + struct ir2_src_register *src_reg = instr->src_reg; + struct ir2_dst_register *dst_reg = &instr->dst_reg; + struct ir2_register *reg; + unsigned i; + + /* MAXv used for MOV */ + if (instr->instr_type != IR2_ALU_VECTOR || + instr->alu_vector.opc != MAXv) + return NULL; + + /* non identical srcs */ + if (src_reg[0].num != src_reg[1].num) + return NULL; + + /* flags */ + int flags = IR2_REG_NEGATE | IR2_REG_ABS; + if (output) + flags |= IR2_REG_INPUT | IR2_REG_CONST; + if ((src_reg[0].flags & flags) || (src_reg[1].flags & flags)) + return NULL; + + /* clamping */ + if (instr->alu_vector.clamp) + return NULL; + + /* swizzling */ + for (i = 0; i < 4; i++) { + char swiz = (dst_reg->swizzle ? dst_reg->swizzle : "xyzw")[i]; + if (swiz == '_') + continue; + + if (swiz != (src_reg[0].swizzle ? src_reg[0].swizzle : "xyzw")[i] || + swiz != (src_reg[1].swizzle ? src_reg[1].swizzle : "xyzw")[i]) + return NULL; + } + + if (output) + reg = &instr->shader->reg[src_reg[0].num]; + else + reg = &instr->shader->reg[dst_reg->num]; + + assert(reg->write_idx >= 0); + if (reg->write_idx != reg->write_idx2) + return NULL; + + if (!output) + return instr; + + instr = instr->shader->instr[reg->write_idx]; + return instr->instr_type != IR2_ALU_VECTOR ? NULL : instr; +} - cf->exec.addr = addr; - cf->exec.cnt = cf->exec.instrs_count; - cf->exec.sequence = sequence; +static int src_to_reg(struct ir2_instruction *instr, + struct ir2_src_register *reg) +{ + if (reg->flags & IR2_REG_CONST) + return reg->num; - addr += cf->exec.instrs_count; - } - } + return instr->shader->reg[reg->num].reg; +} + +static int dst_to_reg(struct ir2_instruction *instr, + struct ir2_dst_register *reg) +{ + if (reg->flags & IR2_REG_EXPORT) + return reg->num; - info->sizedwords = 3 * addr; + return instr->shader->reg[reg->num].reg; +} - return 0; +static bool mask_get(uint32_t *mask, unsigned index) +{ + return !!(mask[index / 32] & 1 << index % 32); } -void * ir2_shader_assemble(struct ir2_shader *shader, struct ir2_shader_info *info) +static void mask_set(uint32_t *mask, struct ir2_register *reg, int index) { - uint32_t i, j; - uint32_t *ptr, *dwords = NULL; - uint32_t idx = 0; - int ret; - - info->sizedwords = 0; - info->max_reg = -1; - info->max_input_reg = 0; - info->regs_written = 0; - - /* we need an even # of CF's.. insert a NOP if needed */ - if (shader->cfs_count != align(shader->cfs_count, 2)) - ir2_cf_create(shader, NOP); - - /* first pass, resolve sizes and addresses: */ - ret = shader_resolve(shader, info); - if (ret) { - ERROR_MSG("resolve failed: %d", ret); - goto fail; + if (reg) { + unsigned i; + for (i = 0; i < ARRAY_SIZE(reg->regmask); i++) + mask[i] |= reg->regmask[i]; } + if (index >= 0) + mask[index / 32] |= 1 << index % 32; +} - ptr = dwords = calloc(4, info->sizedwords); +static bool sets_pred(struct ir2_instruction *instr) +{ + return instr->instr_type == IR2_ALU_SCALAR && + instr->alu_scalar.opc >= PRED_SETEs && + instr->alu_scalar.opc <= PRED_SET_RESTOREs; +} - /* second pass, emit CF program in pairs: */ - for (i = 0; i < shader->cfs_count; i += 2) { - instr_cf_t *cfs = (instr_cf_t *)ptr; - ret = cf_emit(shader->cfs[i], &cfs[0]); - if (ret) { - ERROR_MSG("CF emit failed: %d\n", ret); - goto fail; + + +void* ir2_shader_assemble(struct ir2_shader *shader, + struct ir2_shader_info *info) +{ + /* NOTES + * blob compiler seems to always puts PRED_* instrs in a CF by + * themselves, and wont combine EQ/NE in the same CF + * (not doing this - doesn't seem to make a difference) + * + * TODO: implement scheduling for combining vector+scalar instructions + * -some vector instructions can be replaced by scalar + */ + + /* first step: + * 1. remove "NOP" MOV instructions generated by TGSI for input/output: + * 2. track information for register allocation, and to remove + * the dead code when some exports are not needed + * 3. add additional instructions for a20x hw binning if needed + * NOTE: modifies the shader instrs + * this step could be done as instructions are added by compiler instead + */ + + /* mask of exports that must be generated + * used to avoid calculating ps exports with hw binning + */ + uint64_t export = ~0ull; + /* bitmask of variables required for exports defined by "export" */ + uint32_t export_mask[REG_MASK/32+1] = {}; + + unsigned idx, reg_idx; + unsigned max_input = 0; + int export_size = -1; + + for (idx = 0; idx < shader->instr_count; idx++) { + struct ir2_instruction *instr = shader->instr[idx], *prev; + struct ir2_dst_register dst_reg = instr->dst_reg; + + if (dst_reg.flags & IR2_REG_EXPORT) { + if (dst_reg.num < 32) + export_size++; + + if ((prev = simple_mov(instr, true))) { + /* copy instruction but keep dst */ + *instr = *prev; + instr->dst_reg = dst_reg; + } } - ret = cf_emit(shader->cfs[i+1], &cfs[1]); - if (ret) { - ERROR_MSG("CF emit failed: %d\n", ret); - goto fail; + + for (reg_idx = 0; reg_idx < instr->src_reg_count; reg_idx++) { + struct ir2_src_register *src_reg = &instr->src_reg[reg_idx]; + struct ir2_register *reg; + int num; + + if (src_reg->flags & IR2_REG_CONST) + continue; + + num = src_reg->num; + reg = &shader->reg[num]; + reg->read_idx = idx; + + if (src_reg->flags & IR2_REG_INPUT) { + max_input = MAX2(max_input, num); + } else { + /* bypass simple mov used to set src_reg */ + assert(reg->write_idx >= 0); + prev = shader->instr[reg->write_idx]; + if (simple_mov(prev, false)) { + *src_reg = prev->src_reg[0]; + /* process same src_reg again */ + reg_idx -= 1; + continue; + } + } + + /* update dependencies */ + uint32_t *mask = (dst_reg.flags & IR2_REG_EXPORT) ? + export_mask : shader->reg[dst_reg.num].regmask; + mask_set(mask, reg, num); + if (sets_pred(instr)) + mask_set(export_mask, reg, num); } - ptr += 3; - assert((ptr - dwords) <= info->sizedwords); } - /* third pass, emit ALU/FETCH: */ - for (i = 0; i < shader->cfs_count; i++) { - struct ir2_cf *cf = shader->cfs[i]; - if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) { - for (j = 0; j < cf->exec.instrs_count; j++) { - ret = instr_emit(cf->exec.instrs[j], ptr, idx++, info); - if (ret) { - ERROR_MSG("instruction emit failed: %d", ret); - goto fail; - } - ptr += 3; - assert((ptr - dwords) <= info->sizedwords); + /* second step: + * emit instructions (with CFs) + RA + */ + instr_cf_t cfs[128], *cf = cfs; + uint32_t alufetch[3*256], *af = alufetch; + + /* RA is done on write, so inputs must be allocated here */ + for (reg_idx = 0; reg_idx <= max_input; reg_idx++) + shader->reg[reg_idx].reg = reg_idx; + info->max_reg = max_input; + + /* CF instr state */ + instr_cf_exec_t exec = { .opc = EXEC }; + instr_cf_alloc_t alloc = { .opc = ALLOC }; + bool need_alloc = 0; + bool pos_export = 0; + + export_size = MAX2(export_size, 0); + + for (idx = 0; idx < shader->instr_count; idx++) { + struct ir2_instruction *instr = shader->instr[idx]; + struct ir2_dst_register *dst_reg = &instr->dst_reg; + unsigned num = dst_reg->num; + struct ir2_register *reg; + + /* a2xx only has 64 registers, so we can use a single 64-bit mask */ + uint64_t regmask = 0ull; + + /* compute the current regmask */ + for (reg_idx = 0; (int) reg_idx <= shader->max_reg; reg_idx++) { + reg = &shader->reg[reg_idx]; + if ((int) idx > reg->write_idx && idx < reg->read_idx) + regmask |= (1ull << reg->reg); + } + + if (dst_reg->flags & IR2_REG_EXPORT) { + /* skip if export is not needed */ + if (!(export & (1ull << num))) + continue; + + /* ALLOC CF: + * want to alloc all < 32 at once + * 32/33 and 62/63 come in pairs + * XXX assuming all 3 types are never interleaved + */ + if (num < 32) { + alloc.size = export_size; + alloc.buffer_select = SQ_PARAMETER_PIXEL; + need_alloc = export_size >= 0; + export_size = -1; + } else if (num == 32 || num == 33) { + alloc.size = 0; + alloc.buffer_select = SQ_MEMORY; + need_alloc = num != 33; + } else { + alloc.size = 0; + alloc.buffer_select = SQ_POSITION; + need_alloc = !pos_export; + pos_export = true; } + + } else { + /* skip if dst register not needed to compute exports */ + if (!mask_get(export_mask, num)) + continue; + + /* RA on first write */ + reg = &shader->reg[num]; + if (reg->write_idx == idx) { + reg->reg = ffsll(~regmask) - 1; + info->max_reg = MAX2(info->max_reg, reg->reg); + } + } + + if (exec.count == 6 || (exec.count && need_alloc)) { + *cf++ = *(instr_cf_t*) &exec; + exec.address += exec.count; + exec.serialize = 0; + exec.count = 0; } + + if (need_alloc) { + *cf++ = *(instr_cf_t*) &alloc; + need_alloc = false; + } + + int ret = instr_emit(instr, af, idx, info); af += 3; + assert(!ret); + + if (instr->instr_type == IR2_FETCH) + exec.serialize |= 0x1 << exec.count * 2; + if (instr->sync) + exec.serialize |= 0x2 << exec.count * 2; + exec.count += 1; } - return dwords; -fail: - free(dwords); - return NULL; -} + exec.opc = !export_size ? EXEC : EXEC_END; + *cf++ = *(instr_cf_t*) &exec; + exec.address += exec.count; + exec.serialize = 0; + exec.count = 0; + /* GPU will hang without at least one pixel alloc */ + if (!export_size) { + alloc.size = 0; + alloc.buffer_select = SQ_PARAMETER_PIXEL; + *cf++ = *(instr_cf_t*) &alloc; -struct ir2_cf * ir2_cf_create(struct ir2_shader *shader, instr_cf_opc_t cf_type) -{ - struct ir2_cf *cf = ir2_alloc(shader, sizeof(struct ir2_cf)); - DEBUG_MSG("%d", cf_type); - cf->shader = shader; - cf->cf_type = cf_type; - assert(shader->cfs_count < ARRAY_SIZE(shader->cfs)); - shader->cfs[shader->cfs_count++] = cf; - return cf; -} + exec.opc = EXEC_END; + *cf++ = *(instr_cf_t*) &exec; + } + unsigned num_cfs = cf - cfs; -/* - * CF instructions: - */ + /* insert nop to get an even # of CFs */ + if (num_cfs % 2) { + *cf++ = (instr_cf_t) { .opc = NOP }; + num_cfs++; + } -static int cf_emit(struct ir2_cf *cf, instr_cf_t *instr) -{ - memset(instr, 0, sizeof(*instr)); - - instr->opc = cf->cf_type; - - switch (cf->cf_type) { - case NOP: - break; - case EXEC: - case EXEC_END: - assert(cf->exec.addr <= 0x1ff); - assert(cf->exec.cnt <= 0x6); - assert(cf->exec.sequence <= 0xfff); - instr->exec.address = cf->exec.addr; - instr->exec.count = cf->exec.cnt; - instr->exec.serialize = cf->exec.sequence; - break; - case ALLOC: - assert(cf->alloc.size <= 0xf); - instr->alloc.size = cf->alloc.size; - switch (cf->alloc.type) { - case SQ_POSITION: - case SQ_PARAMETER_PIXEL: - instr->alloc.buffer_select = cf->alloc.type; + /* offset cf addrs */ + for (idx = 0; idx < num_cfs; idx++) { + switch (cfs[idx].opc) { + case EXEC: + case EXEC_END: + cfs[idx].exec.address += num_cfs / 2; break; default: - ERROR_MSG("invalid alloc type: %d", cf->alloc.type); - return -1; + break; + /* XXX and any other address using cf that gets implemented */ } - break; - case COND_EXEC: - case COND_EXEC_END: - case COND_PRED_EXEC: - case COND_PRED_EXEC_END: - case LOOP_START: - case LOOP_END: - case COND_CALL: - case RETURN: - case COND_JMP: - case COND_EXEC_PRED_CLEAN: - case COND_EXEC_PRED_CLEAN_END: - case MARK_VS_FETCH_DONE: - ERROR_MSG("TODO"); - return -1; } - return 0; + /* concatenate cfs+alufetchs */ + uint32_t cfdwords = num_cfs / 2 * 3; + uint32_t alufetchdwords = exec.address * 3; + info->sizedwords = cfdwords + alufetchdwords; + uint32_t *dwords = malloc(info->sizedwords * 4); + assert(dwords); + memcpy(dwords, cfs, cfdwords * 4); + memcpy(&dwords[cfdwords], alufetch, alufetchdwords * 4); + return dwords; } - -struct ir2_instruction * ir2_instr_create(struct ir2_cf *cf, int instr_type) +struct ir2_instruction * ir2_instr_create(struct ir2_shader *shader, + int instr_type) { struct ir2_instruction *instr = - ir2_alloc(cf->shader, sizeof(struct ir2_instruction)); + ir2_alloc(shader, sizeof(struct ir2_instruction)); DEBUG_MSG("%d", instr_type); - instr->shader = cf->shader; - instr->pred = cf->shader->pred; + instr->shader = shader; + instr->idx = shader->instr_count; + instr->pred = shader->pred; instr->instr_type = instr_type; - assert(cf->exec.instrs_count < ARRAY_SIZE(cf->exec.instrs)); - cf->exec.instrs[cf->exec.instrs_count++] = instr; + shader->instr[shader->instr_count++] = instr; return instr; } @@ -279,15 +430,11 @@ static int instr_emit_fetch(struct ir2_instruction *instr, struct ir2_shader_info *info) { instr_fetch_t *fetch = (instr_fetch_t *)dwords; - int reg = 0; - struct ir2_register *dst_reg = instr->regs[reg++]; - struct ir2_register *src_reg = instr->regs[reg++]; + struct ir2_dst_register *dst_reg = &instr->dst_reg; + struct ir2_src_register *src_reg = &instr->src_reg[0]; memset(fetch, 0, sizeof(*fetch)); - reg_update_stats(dst_reg, info, true); - reg_update_stats(src_reg, info, false); - fetch->opc = instr->fetch.opc; if (instr->fetch.opc == VTX_FETCH) { @@ -298,9 +445,9 @@ static int instr_emit_fetch(struct ir2_instruction *instr, assert(instr->fetch.const_idx <= 0x1f); assert(instr->fetch.const_idx_sel <= 0x3); - vtx->src_reg = src_reg->num; + vtx->src_reg = src_to_reg(instr, src_reg); vtx->src_swiz = reg_fetch_src_swiz(src_reg, 1); - vtx->dst_reg = dst_reg->num; + vtx->dst_reg = dst_to_reg(instr, dst_reg); vtx->dst_swiz = reg_fetch_dst_swiz(dst_reg); vtx->must_be_one = 1; vtx->const_index = instr->fetch.const_idx; @@ -326,9 +473,9 @@ static int instr_emit_fetch(struct ir2_instruction *instr, assert(instr->fetch.const_idx <= 0x1f); - tex->src_reg = src_reg->num; + tex->src_reg = src_to_reg(instr, src_reg); tex->src_swiz = reg_fetch_src_swiz(src_reg, 3); - tex->dst_reg = dst_reg->num; + tex->dst_reg = dst_to_reg(instr, dst_reg); tex->dst_swiz = reg_fetch_dst_swiz(dst_reg); tex->const_idx = instr->fetch.const_idx; tex->mag_filter = TEX_FILTER_USE_FETCH_CONST; @@ -360,95 +507,62 @@ static int instr_emit_fetch(struct ir2_instruction *instr, * ALU instructions: */ -static int instr_emit_alu(struct ir2_instruction *instr, uint32_t *dwords, +static int instr_emit_alu(struct ir2_instruction *instr_v, + struct ir2_instruction *instr_s, uint32_t *dwords, struct ir2_shader_info *info) { - int reg = 0; instr_alu_t *alu = (instr_alu_t *)dwords; - struct ir2_register *dst_reg = instr->regs[reg++]; - struct ir2_register *src1_reg; - struct ir2_register *src2_reg; - struct ir2_register *src3_reg; + struct ir2_dst_register *vdst_reg, *sdst_reg; + struct ir2_src_register *src1_reg, *src2_reg, *src3_reg; + struct ir2_shader *shader = instr_v ? instr_v->shader : instr_s->shader; + enum ir2_pred pred = IR2_PRED_NONE; memset(alu, 0, sizeof(*alu)); - /* handle instructions w/ 3 src operands: */ - switch (instr->alu.vector_opc) { - case MULADDv: - case CNDEv: - case CNDGTEv: - case CNDGTv: - case DOT2ADDv: - /* note: disassembler lists 3rd src first, ie: - * MULADDv Rdst = Rsrc3 + (Rsrc1 * Rsrc2) - * which is the reason for this strange ordering. - */ - src3_reg = instr->regs[reg++]; - break; - default: - src3_reg = NULL; - break; + vdst_reg = NULL; + sdst_reg = NULL; + src1_reg = NULL; + src2_reg = NULL; + src3_reg = NULL; + + if (instr_v) { + vdst_reg = &instr_v->dst_reg; + assert(instr_v->src_reg_count >= 2); + src1_reg = &instr_v->src_reg[0]; + src2_reg = &instr_v->src_reg[1]; + if (instr_v->src_reg_count > 2) + src3_reg = &instr_v->src_reg[2]; + pred = instr_v->pred; } - src1_reg = instr->regs[reg++]; - src2_reg = instr->regs[reg++]; - - reg_update_stats(dst_reg, info, true); - reg_update_stats(src1_reg, info, false); - reg_update_stats(src2_reg, info, false); - - assert((dst_reg->flags & ~IR2_REG_EXPORT) == 0); - assert(!dst_reg->swizzle || (strlen(dst_reg->swizzle) == 4)); - assert((src1_reg->flags & IR2_REG_EXPORT) == 0); - assert(!src1_reg->swizzle || (strlen(src1_reg->swizzle) == 4)); - assert((src2_reg->flags & IR2_REG_EXPORT) == 0); - assert(!src2_reg->swizzle || (strlen(src2_reg->swizzle) == 4)); + if (instr_s) { + sdst_reg = &instr_s->dst_reg; + assert(instr_s->src_reg_count == 1); + assert(!instr_v || vdst_reg->flags == sdst_reg->flags); + assert(!instr_v || pred == instr_s->pred); + if (src3_reg) { + assert(src3_reg->flags == instr_s->src_reg[0].flags); + assert(src3_reg->num == instr_s->src_reg[0].num); + assert(!strcmp(src3_reg->swizzle, instr_s->src_reg[0].swizzle)); + } + src3_reg = &instr_s->src_reg[0]; + pred = instr_s->pred; + } - if (instr->alu.vector_opc == (instr_vector_opc_t)~0) { - alu->vector_opc = MAXv; - alu->vector_write_mask = 0; + if (vdst_reg) { + assert((vdst_reg->flags & ~IR2_REG_EXPORT) == 0); + assert(!vdst_reg->swizzle || (strlen(vdst_reg->swizzle) == 4)); + alu->vector_opc = instr_v->alu_vector.opc; + alu->vector_write_mask = reg_alu_dst_swiz(vdst_reg); + alu->vector_dest = dst_to_reg(instr_v, vdst_reg); } else { - alu->vector_opc = instr->alu.vector_opc; - alu->vector_write_mask = reg_alu_dst_swiz(dst_reg); + alu->vector_opc = MAXv; } - alu->vector_dest = dst_reg->num; - alu->export_data = !!(dst_reg->flags & IR2_REG_EXPORT); - - // TODO predicate case/condition.. need to add to parser - - alu->src2_reg = src2_reg->num; - alu->src2_swiz = reg_alu_src_swiz(src2_reg); - alu->src2_reg_negate = !!(src2_reg->flags & IR2_REG_NEGATE); - alu->src2_reg_abs = !!(src2_reg->flags & IR2_REG_ABS); - alu->src2_sel = !(src2_reg->flags & IR2_REG_CONST); - - alu->src1_reg = src1_reg->num; - alu->src1_swiz = reg_alu_src_swiz(src1_reg); - alu->src1_reg_negate = !!(src1_reg->flags & IR2_REG_NEGATE); - alu->src1_reg_abs = !!(src1_reg->flags & IR2_REG_ABS); - alu->src1_sel = !(src1_reg->flags & IR2_REG_CONST); - - alu->vector_clamp = instr->alu.vector_clamp; - alu->scalar_clamp = instr->alu.scalar_clamp; - - if (instr->alu.scalar_opc != (instr_scalar_opc_t)~0) { - struct ir2_register *sdst_reg = instr->regs[reg++]; - - reg_update_stats(sdst_reg, info, true); - - assert(sdst_reg->flags == dst_reg->flags); - - if (src3_reg) { - assert(src3_reg == instr->regs[reg]); - reg++; - } else { - src3_reg = instr->regs[reg++]; - } - - alu->scalar_dest = sdst_reg->num; + if (sdst_reg) { + alu->scalar_opc = instr_s->alu_scalar.opc; alu->scalar_write_mask = reg_alu_dst_swiz(sdst_reg); - alu->scalar_opc = instr->alu.scalar_opc; + alu->scalar_dest = dst_to_reg(instr_s, sdst_reg); } else { /* not sure if this is required, but adreno compiler seems * to always set scalar opc to MAXs if it is not used: @@ -456,13 +570,58 @@ static int instr_emit_alu(struct ir2_instruction *instr, uint32_t *dwords, alu->scalar_opc = MAXs; } - if (src3_reg) { - reg_update_stats(src3_reg, info, false); + alu->export_data = + !!((instr_v ? vdst_reg : sdst_reg)->flags & IR2_REG_EXPORT); - alu->src3_reg = src3_reg->num; + /* export32 has this bit set.. it seems to do more than just set + * the base address of the constants used to zero + * TODO make this less of a hack + */ + if (alu->export_data && alu->vector_dest == 32) { + assert(!instr_s); + alu->relative_addr = 1; + } + + if (src1_reg) { + if (src1_reg->flags & IR2_REG_CONST) { + assert(!(src1_reg->flags & IR2_REG_ABS)); + alu->src1_reg_const = src1_reg->num; + } else { + alu->src1_reg = shader->reg[src1_reg->num].reg; + alu->src1_reg_abs = !!(src1_reg->flags & IR2_REG_ABS); + } + alu->src1_swiz = reg_alu_src_swiz(src1_reg); + alu->src1_reg_negate = !!(src1_reg->flags & IR2_REG_NEGATE); + alu->src1_sel = !(src1_reg->flags & IR2_REG_CONST); + } else { + alu->src1_sel = 1; + } + + if (src2_reg) { + if (src2_reg->flags & IR2_REG_CONST) { + assert(!(src2_reg->flags & IR2_REG_ABS)); + alu->src2_reg_const = src2_reg->num; + } else { + alu->src2_reg = shader->reg[src2_reg->num].reg; + alu->src2_reg_abs = !!(src2_reg->flags & IR2_REG_ABS); + } + alu->src2_swiz = reg_alu_src_swiz(src2_reg); + alu->src2_reg_negate = !!(src2_reg->flags & IR2_REG_NEGATE); + alu->src2_sel = !(src2_reg->flags & IR2_REG_CONST); + } else { + alu->src2_sel = 1; + } + + if (src3_reg) { + if (src3_reg->flags & IR2_REG_CONST) { + assert(!(src3_reg->flags & IR2_REG_ABS)); + alu->src3_reg_const = src3_reg->num; + } else { + alu->src3_reg = shader->reg[src3_reg->num].reg; + alu->src3_reg_abs = !!(src3_reg->flags & IR2_REG_ABS); + } alu->src3_swiz = reg_alu_src_swiz(src3_reg); alu->src3_reg_negate = !!(src3_reg->flags & IR2_REG_NEGATE); - alu->src3_reg_abs = !!(src3_reg->flags & IR2_REG_ABS); alu->src3_sel = !(src3_reg->flags & IR2_REG_CONST); } else { /* not sure if this is required, but adreno compiler seems @@ -471,9 +630,11 @@ static int instr_emit_alu(struct ir2_instruction *instr, uint32_t *dwords, alu->src3_sel = 1; } - if (instr->pred != IR2_PRED_NONE) { - alu->pred_select = (instr->pred == IR2_PRED_EQ) ? 3 : 2; - } + alu->vector_clamp = instr_v ? instr_v->alu_vector.clamp : 0; + alu->scalar_clamp = instr_s ? instr_s->alu_scalar.clamp : 0; + + if (pred != IR2_PRED_NONE) + alu->pred_select = (pred == IR2_PRED_EQ) ? 3 : 2; return 0; } @@ -483,51 +644,63 @@ static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords, { switch (instr->instr_type) { case IR2_FETCH: return instr_emit_fetch(instr, dwords, idx, info); - case IR2_ALU: return instr_emit_alu(instr, dwords, info); + case IR2_ALU_VECTOR: return instr_emit_alu(instr, NULL, dwords, info); + case IR2_ALU_SCALAR: return instr_emit_alu(NULL, instr, dwords, info); } return -1; } - -struct ir2_register * ir2_reg_create(struct ir2_instruction *instr, +struct ir2_dst_register * ir2_dst_create(struct ir2_instruction *instr, int num, const char *swizzle, int flags) { - struct ir2_register *reg = - ir2_alloc(instr->shader, sizeof(struct ir2_register)); - DEBUG_MSG("%x, %d, %s", flags, num, swizzle); - assert(num <= REG_MASK); + if (!(flags & IR2_REG_EXPORT)) { + struct ir2_register *reg = &instr->shader->reg[num]; + + unsigned i; + for (i = instr->shader->max_reg + 1; i <= num; i++) + instr->shader->reg[i].write_idx = -1; + instr->shader->max_reg = i - 1; + + if (reg->write_idx < 0) + reg->write_idx = instr->idx; + reg->write_idx2 = instr->idx; + } + + struct ir2_dst_register *reg = &instr->dst_reg; reg->flags = flags; reg->num = num; reg->swizzle = ir2_strdup(instr->shader, swizzle); - assert(instr->regs_count < ARRAY_SIZE(instr->regs)); - instr->regs[instr->regs_count++] = reg; return reg; } -static void reg_update_stats(struct ir2_register *reg, - struct ir2_shader_info *info, bool dest) +struct ir2_src_register * ir2_reg_create(struct ir2_instruction *instr, + int num, const char *swizzle, int flags) { - if (!(reg->flags & (IR2_REG_CONST|IR2_REG_EXPORT))) { - info->max_reg = MAX2(info->max_reg, reg->num); - - if (dest) { - info->regs_written |= (1 << reg->num); - } else if (!(info->regs_written & (1 << reg->num))) { - /* for registers that haven't been written, they must be an - * input register that the thread scheduler (presumably?) - * needs to know about: - */ - info->max_input_reg = MAX2(info->max_input_reg, reg->num); - } + assert(instr->src_reg_count + 1 <= ARRAY_SIZE(instr->src_reg)); + if (!(flags & IR2_REG_CONST)) { + struct ir2_register *reg = &instr->shader->reg[num]; + + reg->read_idx = instr->idx; + + unsigned i; + for (i = instr->shader->max_reg + 1; i <= num; i++) + instr->shader->reg[i].write_idx = -1; + instr->shader->max_reg = i - 1; } + + struct ir2_src_register *reg = &instr->src_reg[instr->src_reg_count++]; + reg->flags = flags; + reg->num = num; + reg->swizzle = ir2_strdup(instr->shader, swizzle); + return reg; } -static uint32_t reg_fetch_src_swiz(struct ir2_register *reg, uint32_t n) +static uint32_t reg_fetch_src_swiz(struct ir2_src_register *reg, uint32_t n) { uint32_t swiz = 0; int i; - assert(reg->flags == 0); + assert((reg->flags & ~IR2_REG_INPUT) == 0); assert(reg->swizzle); DEBUG_MSG("fetch src R%d.%s", reg->num, reg->swizzle); @@ -547,7 +720,7 @@ static uint32_t reg_fetch_src_swiz(struct ir2_register *reg, uint32_t n) return swiz; } -static uint32_t reg_fetch_dst_swiz(struct ir2_register *reg) +static uint32_t reg_fetch_dst_swiz(struct ir2_dst_register *reg) { uint32_t swiz = 0; int i; @@ -580,7 +753,7 @@ static uint32_t reg_fetch_dst_swiz(struct ir2_register *reg) } /* actually, a write-mask */ -static uint32_t reg_alu_dst_swiz(struct ir2_register *reg) +static uint32_t reg_alu_dst_swiz(struct ir2_dst_register *reg) { uint32_t swiz = 0; int i; @@ -607,12 +780,11 @@ static uint32_t reg_alu_dst_swiz(struct ir2_register *reg) return swiz; } -static uint32_t reg_alu_src_swiz(struct ir2_register *reg) +static uint32_t reg_alu_src_swiz(struct ir2_src_register *reg) { uint32_t swiz = 0; int i; - assert((reg->flags & IR2_REG_EXPORT) == 0); assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); DEBUG_MSG("vector src R%d.%s", reg->num, reg->swizzle); diff --git a/src/gallium/drivers/freedreno/a2xx/ir-a2xx.h b/src/gallium/drivers/freedreno/a2xx/ir-a2xx.h index c4b6c18e24c..ac2931266d4 100644 --- a/src/gallium/drivers/freedreno/a2xx/ir-a2xx.h +++ b/src/gallium/drivers/freedreno/a2xx/ir-a2xx.h @@ -33,17 +33,25 @@ struct ir2_shader; +#define REG_MASK 0xff + struct ir2_shader_info { uint16_t sizedwords; int8_t max_reg; /* highest GPR # used by shader */ - uint8_t max_input_reg; - uint64_t regs_written; }; struct ir2_register { + int16_t write_idx, write_idx2, read_idx, reg; + /* bitmask of variables on which this one depends + * XXX: use bitmask util? + */ + uint32_t regmask[REG_MASK/32+1]; +}; + +struct ir2_src_register { enum { - IR2_REG_CONST = 0x1, - IR2_REG_EXPORT = 0x2, + IR2_REG_INPUT = 0x1, + IR2_REG_CONST = 0x2, IR2_REG_NEGATE = 0x4, IR2_REG_ABS = 0x8, } flags; @@ -51,6 +59,14 @@ struct ir2_register { char *swizzle; }; +struct ir2_dst_register { + enum { + IR2_REG_EXPORT = 0x1, + } flags; + int num; + char *swizzle; +}; + enum ir2_pred { IR2_PRED_NONE = 0, IR2_PRED_EQ = 1, @@ -59,14 +75,17 @@ enum ir2_pred { struct ir2_instruction { struct ir2_shader *shader; + unsigned idx; enum { IR2_FETCH, - IR2_ALU, + IR2_ALU_VECTOR, + IR2_ALU_SCALAR, } instr_type; enum ir2_pred pred; int sync; - unsigned regs_count; - struct ir2_register *regs[5]; + unsigned src_reg_count; + struct ir2_dst_register dst_reg; + struct ir2_src_register src_reg[3]; union { /* FETCH specific: */ struct { @@ -83,38 +102,25 @@ struct ir2_instruction { uint32_t stride; uint32_t offset; } fetch; - /* ALU specific: */ + /* ALU-Vector specific: */ struct { - instr_vector_opc_t vector_opc; - instr_scalar_opc_t scalar_opc; - bool vector_clamp : 1; - bool scalar_clamp : 1; - } alu; - }; -}; - -struct ir2_cf { - struct ir2_shader *shader; - instr_cf_opc_t cf_type; - - union { - /* EXEC/EXEC_END specific: */ - struct { - unsigned instrs_count; - struct ir2_instruction *instrs[6]; - uint32_t addr, cnt, sequence; - } exec; - /* ALLOC specific: */ + instr_vector_opc_t opc; + bool clamp; + } alu_vector; + /* ALU-Scalar specific: */ struct { - instr_alloc_type_t type; /* SQ_POSITION or SQ_PARAMETER_PIXEL */ - int size; - } alloc; + instr_scalar_opc_t opc; + bool clamp; + } alu_scalar; }; }; struct ir2_shader { - unsigned cfs_count; - struct ir2_cf *cfs[0x56]; + unsigned instr_count; + int max_reg; + struct ir2_register reg[REG_MASK+1]; + + struct ir2_instruction *instr[0x200]; uint32_t heap[100 * 4096]; unsigned heap_idx; @@ -126,40 +132,41 @@ void ir2_shader_destroy(struct ir2_shader *shader); void * ir2_shader_assemble(struct ir2_shader *shader, struct ir2_shader_info *info); -struct ir2_cf * ir2_cf_create(struct ir2_shader *shader, instr_cf_opc_t cf_type); - -struct ir2_instruction * ir2_instr_create(struct ir2_cf *cf, int instr_type); +struct ir2_instruction * ir2_instr_create(struct ir2_shader *shader, + int instr_type); -struct ir2_register * ir2_reg_create(struct ir2_instruction *instr, +struct ir2_dst_register * ir2_dst_create(struct ir2_instruction *instr, + int num, const char *swizzle, int flags); +struct ir2_src_register * ir2_reg_create(struct ir2_instruction *instr, int num, const char *swizzle, int flags); /* some helper fxns: */ -static inline struct ir2_cf * -ir2_cf_create_alloc(struct ir2_shader *shader, instr_alloc_type_t type, int size) +static inline struct ir2_instruction * +ir2_instr_create_alu_v(struct ir2_shader *shader, instr_vector_opc_t vop) { - struct ir2_cf *cf = ir2_cf_create(shader, ALLOC); - if (!cf) - return cf; - cf->alloc.type = type; - cf->alloc.size = size; - return cf; + struct ir2_instruction *instr = ir2_instr_create(shader, IR2_ALU_VECTOR); + if (!instr) + return instr; + instr->alu_vector.opc = vop; + return instr; } + static inline struct ir2_instruction * -ir2_instr_create_alu(struct ir2_cf *cf, instr_vector_opc_t vop, instr_scalar_opc_t sop) +ir2_instr_create_alu_s(struct ir2_shader *shader, instr_scalar_opc_t sop) { - struct ir2_instruction *instr = ir2_instr_create(cf, IR2_ALU); + struct ir2_instruction *instr = ir2_instr_create(shader, IR2_ALU_SCALAR); if (!instr) return instr; - instr->alu.vector_opc = vop; - instr->alu.scalar_opc = sop; + instr->alu_scalar.opc = sop; return instr; } + static inline struct ir2_instruction * -ir2_instr_create_vtx_fetch(struct ir2_cf *cf, int ci, int cis, +ir2_instr_create_vtx_fetch(struct ir2_shader *shader, int ci, int cis, enum a2xx_sq_surfaceformat fmt, bool is_signed, int stride) { - struct ir2_instruction *instr = ir2_instr_create(cf, IR2_FETCH); + struct ir2_instruction *instr = ir2_instr_create(shader, IR2_FETCH); instr->fetch.opc = VTX_FETCH; instr->fetch.const_idx = ci; instr->fetch.const_idx_sel = cis; @@ -169,9 +176,9 @@ ir2_instr_create_vtx_fetch(struct ir2_cf *cf, int ci, int cis, return instr; } static inline struct ir2_instruction * -ir2_instr_create_tex_fetch(struct ir2_cf *cf, int ci) +ir2_instr_create_tex_fetch(struct ir2_shader *shader, int ci) { - struct ir2_instruction *instr = ir2_instr_create(cf, IR2_FETCH); + struct ir2_instruction *instr = ir2_instr_create(shader, IR2_FETCH); instr->fetch.opc = TEX_FETCH; instr->fetch.const_idx = ci; return instr; -- cgit v1.2.3