From ab70ea1353ac9859ee51d236482fe92a0493362d Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Thu, 10 Dec 2015 21:20:32 -0500 Subject: nv50/ir: add short imad support Support emission of the short imad, but also include it in the various logic that tries to make it possible to emit. Signed-off-by: Ilia Mirkin --- .../drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp | 55 ++++++++++++++-------- .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 6 ++- src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 1 - 3 files changed, 40 insertions(+), 22 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp index 216e119af63..00be37769af 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp @@ -1190,29 +1190,45 @@ CodeEmitterNV50::emitDMUL(const Instruction *i) void CodeEmitterNV50::emitIMAD(const Instruction *i) { + int mode; code[0] = 0x60000000; - if (isSignedType(i->sType)) - code[1] = i->saturate ? 0x40000000 : 0x20000000; - else - code[1] = 0x00000000; - - int neg1 = i->src(0).mod.neg() ^ i->src(1).mod.neg(); - int neg2 = i->src(2).mod.neg(); - assert(!(neg1 & neg2)); - code[1] |= neg1 << 27; - code[1] |= neg2 << 26; + assert(!i->src(0).mod && !i->src(1).mod && !i->src(2).mod); + if (!isSignedType(i->sType)) + mode = 0; + else if (i->saturate) + mode = 2; + else + mode = 1; - if (i->src(1).getFile() == FILE_IMMEDIATE) + if (i->src(1).getFile() == FILE_IMMEDIATE) { + code[1] = 0; emitForm_IMM(i); - else + code[0] |= (mode & 1) << 8 | (mode & 2) << 14; + if (i->flagsSrc >= 0) { + assert(!(code[0] & 0x10400000)); + assert(SDATA(i->src(i->flagsSrc)).id == 0); + code[0] |= 0x10400000; + } + } else + if (i->encSize == 4) { + emitForm_MUL(i); + code[0] |= (mode & 1) << 8 | (mode & 2) << 14; + if (i->flagsSrc >= 0) { + assert(!(code[0] & 0x10400000)); + assert(SDATA(i->src(i->flagsSrc)).id == 0); + code[0] |= 0x10400000; + } + } else { + code[1] = mode << 29; emitForm_MAD(i); - if (i->flagsSrc >= 0) { - // add with carry from $cX - assert(!(code[1] & 0x0c000000) && !i->getPredicate()); - code[1] |= 0xc << 24; - srcId(i->src(i->flagsSrc), 32 + 12); + if (i->flagsSrc >= 0) { + // add with carry from $cX + assert(!(code[1] & 0x0c000000) && !i->getPredicate()); + code[1] |= 0xc << 24; + srcId(i->src(i->flagsSrc), 32 + 12); + } } } @@ -2054,8 +2070,9 @@ CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const // check constraints on short MAD if (info.srcNr >= 2 && i->srcExists(2)) { - if (!i->defExists(0) || !isFloatType(i->dType) || - i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id) + if (!i->defExists(0) || + (i->flagsSrc >= 0 && SDATA(i->src(i->flagsSrc)).id > 0) || + DDATA(i->def(0)).id != SDATA(i->src(2)).id) return 8; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 23454f30e09..7f4752c0558 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -2804,14 +2804,16 @@ NV50PostRaConstantFolding::visit(BasicBlock *bb) i->src(0).getFile() != FILE_GPR || i->src(1).getFile() != FILE_GPR || i->src(2).getFile() != FILE_GPR || - i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id || - !isFloatType(i->dType)) + i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id) break; if (i->getDef(0)->reg.data.id >= 64 || i->getSrc(0)->reg.data.id >= 64) break; + if (i->flagsSrc >= 0 && i->getSrc(i->flagsSrc)->reg.data.id != 0) + break; + if (i->getPredicate()) break; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index b32bc13f755..cd8c42ced5e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -1473,7 +1473,6 @@ GCRA::allocateRegisters(ArrayList& insns) // Short encoding only possible if they're all GPRs, no need to // affect them otherwise. if (insn->flagsDef < 0 && - isFloatType(insn->dType) && insn->src(0).getFile() == FILE_GPR && insn->src(1).getFile() == FILE_GPR && insn->src(2).getFile() == FILE_GPR) -- cgit v1.2.3