From 4fa429c876806d5ad614de469dec76a54a55bb74 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 27 Dec 2010 20:59:53 +0100 Subject: nvc0: reenable some shader optimizations CSE and constants folding. --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 96 ++++++++++++++++------------- src/gallium/drivers/nvc0/nvc0_pc_print.c | 18 +++--- 2 files changed, 63 insertions(+), 51 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index e9e387632b9..8aff875fc37 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -276,7 +276,7 @@ nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b) for (s = 0; s < 3 && nvi->src[s]; ++s) { ld = nvi->src[s]->value->insn; - if (!ld || ld->opcode != NV_OP_LD) + if (!ld || (ld->opcode != NV_OP_LD && ld->opcode != NV_OP_MOV)) continue; if (!nvc0_insn_can_load(nvi, s, ld)) continue; @@ -383,9 +383,8 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL) -/* static void -modifiers_apply(uint32_t *val, ubyte type, ubyte mod) +apply_modifiers(uint32_t *val, uint8_t type, uint8_t mod) { if (mod & NV_MOD_ABS) { if (type == NV_TYPE_F32) @@ -400,10 +399,28 @@ modifiers_apply(uint32_t *val, ubyte type, ubyte mod) else *val = ~(*val) + 1; } + if (mod & NV_MOD_SAT) { + union { + float f; + uint32_t u; + int32_t i; + } u; + u.u = *val; + if (type == NV_TYPE_F32) { + u.f = CLAMP(u.f, -1.0f, 1.0f); + } else + if (type == NV_TYPE_U16) { + u.u = MIN2(u.u, 0xffff); + } else + if (type == NV_TYPE_S16) { + u.i = CLAMP(u.i, -32768, 32767); + } + *val = u.u; + } + if (mod & NV_MOD_NOT) + *val = ~*val; } -*/ -#if 0 static void constant_expression(struct nv_pc *pc, struct nv_instruction *nvi, struct nv_value *src0, struct nv_value *src1) @@ -424,8 +441,8 @@ constant_expression(struct nv_pc *pc, struct nv_instruction *nvi, u0.u32 = src0->reg.imm.u32; u1.u32 = src1->reg.imm.u32; - modifiers_apply(&u0.u32, type, nvi->src[0]->mod); - modifiers_apply(&u1.u32, type, nvi->src[1]->mod); + apply_modifiers(&u0.u32, type, nvi->src[0]->mod); + apply_modifiers(&u1.u32, type, nvi->src[1]->mod); switch (nvi->opcode) { case NV_OP_MAD: @@ -468,14 +485,14 @@ constant_expression(struct nv_pc *pc, struct nv_instruction *nvi, nvi->opcode = NV_OP_MOV; - val = new_value(pc, NV_FILE_IMM, type); - + val = new_value(pc, NV_FILE_IMM, nv_type_sizeof(type)); val->reg.imm.u32 = u.u32; nv_reference(pc, nvi, 1, NULL); nv_reference(pc, nvi, 0, val); - if (nvi->src[2]) { /* from MAD */ + if (nvi->src[2]) { + /* from MAD */ nvi->src[1] = nvi->src[0]; nvi->src[0] = nvi->src[2]; nvi->src[2] = NULL; @@ -506,7 +523,7 @@ constant_operand(struct nv_pc *pc, type = NV_OPTYPE(nvi->opcode); u.u32 = val->reg.imm.u32; - modifiers_apply(&u.u32, type, nvi->src[s]->mod); + apply_modifiers(&u.u32, type, nvi->src[s]->mod); switch (NV_BASEOP(nvi->opcode)) { case NV_OP_MUL: @@ -576,23 +593,22 @@ constant_operand(struct nv_pc *pc, break; } } -#endif static int nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b) { -#if 0 struct nv_instruction *nvi, *next; int j; for (nvi = b->entry; nvi; nvi = next) { struct nv_value *src0, *src1, *src; - int mod; + int s; + uint8_t mod[4]; next = nvi->next; - src0 = nvcg_find_immediate(nvi->src[0]); - src1 = nvcg_find_immediate(nvi->src[1]); + src0 = nvc0_pc_find_immediate(nvi->src[0]); + src1 = nvc0_pc_find_immediate(nvi->src[1]); if (src0 && src1) constant_expression(ctx->pc, nvi, src0, src1); @@ -604,7 +620,7 @@ nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b) constant_operand(ctx->pc, nvi, src1, 1); } - /* try to combine MUL, ADD into MAD */ + /* check if we can MUL + ADD -> MAD/FMA */ if (nvi->opcode != NV_OP_ADD) continue; @@ -622,20 +638,27 @@ nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b) /* could have an immediate from above constant_* */ if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR) continue; + s = (src == src0) ? 0 : 1; + + mod[0] = nvi->src[0]->mod; + mod[1] = nvi->src[1]->mod; + mod[2] = src->insn->src[0]->mod; + mod[3] = src->insn->src[0]->mod; + + if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG) + continue; nvi->opcode = NV_OP_MAD; - mod = nvi->src[(src == src0) ? 0 : 1]->mod; - nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL); - nvi->src[2] = nvi->src[(src == src0) ? 1 : 0]; + nv_reference(ctx->pc, nvi, s, NULL); + nvi->src[2] = nvi->src[!s]; - assert(!(mod & ~NV_MOD_NEG)); nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value); nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value); - nvi->src[0]->mod = src->insn->src[0]->mod ^ mod; - nvi->src[1]->mod = src->insn->src[1]->mod; + nvi->src[0]->mod = mod[2] ^ mod[s]; + nvi->src[1]->mod = mod[3]; } DESCEND_ARBITRARY(j, nv_pass_lower_arith); -#endif + return 0; } @@ -1016,7 +1039,6 @@ nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) static int nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) { -#if 0 struct nv_instruction *ir, *ik, *next; struct nv_instruction *entry = b->phi ? b->phi : b->entry; int s; @@ -1030,23 +1052,13 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) if (ir->opcode != ik->opcode || ir->fixed) continue; - if (!ir->def[0] || !ik->def[0] || - ik->opcode == NV_OP_LDA || - ik->opcode == NV_OP_STA || - ik->opcode == NV_OP_MOV || - nv_is_vector_op(ik->opcode)) - continue; /* ignore loads, stores & moves */ - - if (ik->src[4] || ir->src[4]) - continue; /* don't mess with address registers */ + if (!ir->def[0] || !ik->def[0] || ir->def[1] || ik->def[1]) + continue; - if (ik->flags_src || ir->flags_src || - ik->flags_def || ir->flags_def) - continue; /* and also not with flags, for now */ + if (ik->indirect != ir->indirect || ik->predicate != ir->predicate) + continue; - if (ik->def[0]->reg.file == NV_FILE_OUT || - ir->def[0]->reg.file == NV_FILE_OUT || - !values_equal(ik->def[0], ir->def[0])) + if (!values_equal(ik->def[0], ir->def[0])) continue; for (s = 0; s < 3; ++s) { @@ -1071,7 +1083,7 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) if (s == 3) { nvc0_insn_delete(ir); ++reps; - nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]); + nvc0_pc_replace_value(ctx->pc, ir->def[0], ik->def[0]); break; } } @@ -1079,7 +1091,7 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) } while(reps); DESCEND_ARBITRARY(s, nv_pass_cse); -#endif + return 0; } diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c index 6249f1fd1cd..b03826484e4 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_print.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -283,21 +283,21 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }, { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }, { NV_OP_MOV, "mov", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 1, 0 }, - { NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 1, 0 }, - { NV_OP_OR, "or", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 1, 0 }, - { NV_OP_XOR, "xor", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 1, 0 }, + { NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, + { NV_OP_OR, "or", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, + { NV_OP_XOR, "xor", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, { NV_OP_SHL, "shl", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 }, { NV_OP_SHR, "shr", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 }, { NV_OP_NOT, "not", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_SET, "set", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_ADD, "add", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 1, 2 }, - { NV_OP_SUB, "sub", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 1, 2 }, - { NV_OP_MUL, "mul", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 1, 2 }, - { NV_OP_MAD, "mad", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 0, 2 }, + { NV_OP_ADD, "add", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, + { NV_OP_SUB, "sub", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_MUL, "mul", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, + { NV_OP_MAD, "mad", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, { NV_OP_ABS, "abs", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_NEG, "neg", NV_TYPE_F32, NV_MOD_ABS, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_MAX, "max", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 0, 2 }, - { NV_OP_MIN, "min", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 0, 2 }, + { NV_OP_MAX, "max", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, + { NV_OP_MIN, "min", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, { NV_OP_CVT, "cvt", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_CEIL, "ceil", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, -- cgit v1.2.3