diff options
author | Karol Herbst <[email protected]> | 2018-12-13 20:19:45 +0100 |
---|---|---|
committer | Karol Herbst <[email protected]> | 2019-02-05 20:35:38 +0100 |
commit | a61c388d077edf78321ee31c84b24c6cce24ccbc (patch) | |
tree | 85a10806d64e639c013f5291518879fbc8e71117 /src/gallium/drivers/nouveau/codegen | |
parent | a203eaa4f4fb672b95426289b8dad3a8998f92d7 (diff) |
nvc0/ir: replace cvt instructions with add to improve shader performance
gives me an performance boost of 0.2% in pixmark_piano on my gk106, gm204 and
gp107.
reduces the amount of generated convert instructions by roughly 30% in
shader-db.
v2: only for 32 bit operations
move some common code out of the switch
handle OP_SAT with modifiers
v3: only for registers and const memory
rework if clauses
merge isCvt into this patch
v4: merge isCvt into its use
Signed-off-by: Karol Herbst <[email protected]>
Reviewed-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen')
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 63 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 |
2 files changed, 64 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 295497be2f9..0d9df71d0d3 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -719,6 +719,66 @@ NVC0LegalizePostRA::propagateJoin(BasicBlock *bb) bb->remove(bb->getEntry()); } +// replaces instructions which would end up as f2f or i2i with faster +// alternatives: +// - fabs(a) -> fadd(0, abs a) +// - fneg(a) -> fadd(neg 0, neg a) +// - ineg(a) -> iadd(0, neg a) +// - fneg(abs a) -> fadd(neg 0, neg abs a) +// - sat(a) -> sat add(0, a) +void +NVC0LegalizePostRA::replaceCvt(Instruction *cvt) +{ + if (!isFloatType(cvt->sType) && typeSizeof(cvt->sType) != 4) + return; + if (cvt->sType != cvt->dType) + return; + // we could make it work, but in this case we have optimizations disabled + // and we don't really care either way. + if (cvt->src(0).getFile() != FILE_GPR && + cvt->src(0).getFile() != FILE_MEMORY_CONST) + return; + + Modifier mod0, mod1; + + switch (cvt->op) { + case OP_ABS: + if (cvt->src(0).mod) + return; + if (!isFloatType(cvt->sType)) + return; + mod0 = 0; + mod1 = NV50_IR_MOD_ABS; + break; + case OP_NEG: + if (!isFloatType(cvt->sType) && cvt->src(0).mod) + return; + if (isFloatType(cvt->sType) && + (cvt->src(0).mod && cvt->src(0).mod != Modifier(NV50_IR_MOD_ABS))) + return; + + mod0 = isFloatType(cvt->sType) ? NV50_IR_MOD_NEG : 0; + mod1 = cvt->src(0).mod == Modifier(NV50_IR_MOD_ABS) ? + NV50_IR_MOD_NEG_ABS : NV50_IR_MOD_NEG; + break; + case OP_SAT: + if (!isFloatType(cvt->sType) && cvt->src(0).mod.abs()) + return; + mod0 = 0; + mod1 = cvt->src(0).mod; + cvt->saturate = true; + break; + default: + return; + } + + cvt->op = OP_ADD; + cvt->moveSources(0, 1); + cvt->setSrc(0, rZero); + cvt->src(0).mod = mod0; + cvt->src(1).mod = mod1; +} + bool NVC0LegalizePostRA::visit(BasicBlock *bb) { @@ -758,6 +818,9 @@ NVC0LegalizePostRA::visit(BasicBlock *bb) next = hi; } + if (i->op == OP_SAT || i->op == OP_NEG || i->op == OP_ABS) + replaceCvt(i); + if (i->op != OP_MOV && i->op != OP_PFETCH) replaceZero(i); } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index e0f50ab0904..4679c56471b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -81,6 +81,7 @@ private: virtual bool visit(Function *); virtual bool visit(BasicBlock *); + void replaceCvt(Instruction *); void replaceZero(Instruction *); bool tryReplaceContWithBra(BasicBlock *); void propagateJoin(BasicBlock *); |