diff options
author | Rob Clark <[email protected]> | 2019-02-23 11:14:32 -0500 |
---|---|---|
committer | Rob Clark <[email protected]> | 2019-02-26 13:19:44 -0500 |
commit | cb884d8ab210b4793eb55852b4f07642c71a99a5 (patch) | |
tree | 44c89a11ff1b83dc3c6b6b1b3e3f323dcd6ddf0b /src/freedreno/ir3 | |
parent | 04c2520d91920725d91a7be6e415d6ab56aeeaab (diff) |
freedreno/ir3: use nopN encoding when possible
Use the (nopN) encoding for slightly denser shaders.. this lets us fold
nop instructions into the previous alu instruction in certain cases.
Shouldn't change the # of cycles a shader takes to execute, but reduces
the size. (ex: glmark2 refract goes from 168 to 116 instructions)
Currently only enabled for a6xx, but I think we could enable this for
a5xx and possibly a4xx.
Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/freedreno/ir3')
-rw-r--r-- | src/freedreno/ir3/ir3.c | 28 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3.h | 3 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_legalize.c | 10 |
3 files changed, 35 insertions, 6 deletions
diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index a4de1d3ab49..ed14c343faa 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -211,6 +211,18 @@ static int emit_cat2(struct ir3_instruction *instr, void *ptr, iassert((instr->regs_count == 2) || (instr->regs_count == 3)); + if (instr->nop) { + iassert(!instr->repeat); + iassert(instr->nop <= 3); + + cat2->src1_r = instr->nop & 0x1; + cat2->src2_r = (instr->nop >> 1) & 0x1; + } else { + cat2->src1_r = !!(src1->flags & IR3_REG_R); + if (src2) + cat2->src2_r = !!(src2->flags & IR3_REG_R); + } + if (src1->flags & IR3_REG_RELATIV) { iassert(src1->array.offset < (1 << 10)); cat2->rel1.src1 = reg(src1, info, instr->repeat, @@ -232,7 +244,6 @@ static int emit_cat2(struct ir3_instruction *instr, void *ptr, cat2->src1_im = !!(src1->flags & IR3_REG_IMMED); cat2->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); cat2->src1_abs = !!(src1->flags & (IR3_REG_FABS | IR3_REG_SABS)); - cat2->src1_r = !!(src1->flags & IR3_REG_R); if (src2) { iassert((src2->flags & IR3_REG_IMMED) || @@ -260,7 +271,6 @@ static int emit_cat2(struct ir3_instruction *instr, void *ptr, cat2->src2_im = !!(src2->flags & IR3_REG_IMMED); cat2->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); cat2->src2_abs = !!(src2->flags & (IR3_REG_FABS | IR3_REG_SABS)); - cat2->src2_r = !!(src2->flags & IR3_REG_R); } cat2->dst = reg(dst, info, instr->repeat, @@ -312,6 +322,17 @@ static int emit_cat3(struct ir3_instruction *instr, void *ptr, iassert(!((src2->flags ^ src_flags) & IR3_REG_HALF)); iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF)); + if (instr->nop) { + iassert(!instr->repeat); + iassert(instr->nop <= 3); + + cat3->src1_r = instr->nop & 0x1; + cat3->src2_r = (instr->nop >> 1) & 0x1; + } else { + cat3->src1_r = !!(src1->flags & IR3_REG_R); + cat3->src2_r = !!(src2->flags & IR3_REG_R); + } + if (src1->flags & IR3_REG_RELATIV) { iassert(src1->array.offset < (1 << 10)); cat3->rel1.src1 = reg(src1, info, instr->repeat, @@ -331,14 +352,11 @@ static int emit_cat3(struct ir3_instruction *instr, void *ptr, } cat3->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); - cat3->src1_r = !!(src1->flags & IR3_REG_R); cat3->src2 = reg(src2, info, instr->repeat, IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF | absneg); cat3->src2_c = !!(src2->flags & IR3_REG_CONST); cat3->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); - cat3->src2_r = !!(src2->flags & IR3_REG_R); - if (src3->flags & IR3_REG_RELATIV) { iassert(src3->array.offset < (1 << 10)); diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 36d645f0b50..27a52c54ad4 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -212,7 +212,8 @@ struct ir3_instruction { IR3_INSTR_MARK = 0x1000, IR3_INSTR_UNUSED= 0x2000, } flags; - int repeat; + uint8_t repeat; + uint8_t nop; #ifdef DEBUG unsigned regs_max; #endif diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 00f0c9c9b8d..b14a789efb2 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -200,6 +200,16 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) last->flags |= n->flags; continue; } + + /* NOTE: I think the nopN encoding works for a5xx and + * probably a4xx, but not a3xx. So far only tested on + * a6xx. + */ + if ((ctx->compiler->gpu_id >= 600) && !n->flags && (last->nop < 3) && + ((opc_cat(last->opc) == 2) || (opc_cat(last->opc) == 3))) { + last->nop++; + continue; + } } list_addtail(&n->node, &block->instr_list); |